OjciecTadeusz commited on
Commit
97b4be5
·
verified ·
1 Parent(s): 3c345b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -30
app.py CHANGED
@@ -48,7 +48,6 @@ async def chat_completion(request: Request):
48
  data = await request.json()
49
  messages = data.get("messages", [])
50
 
51
- # Prepare the payload for the Inference API
52
  payload = {
53
  "inputs": {
54
  "messages": messages
@@ -61,7 +60,6 @@ async def chat_completion(request: Request):
61
  }
62
  }
63
 
64
- # Get response from model
65
  response = await query_model(payload)
66
 
67
  if isinstance(response, dict) and "error" in response:
@@ -73,13 +71,7 @@ async def chat_completion(request: Request):
73
  response_text = response[0]["generated_text"]
74
 
75
  return JSONResponse(
76
- content=format_chat_response(
77
- response_text,
78
- # Note: Actual token counts would need to be calculated differently
79
- # or obtained from the API response if available
80
- prompt_tokens=0,
81
- completion_tokens=0
82
- )
83
  )
84
  except Exception as e:
85
  return JSONResponse(
@@ -87,7 +79,6 @@ async def chat_completion(request: Request):
87
  content={"error": str(e)}
88
  )
89
 
90
- # Synchronous function to generate response for Gradio
91
  def generate_response(messages):
92
  payload = {
93
  "inputs": {
@@ -109,31 +100,47 @@ def generate_response(messages):
109
 
110
  return result[0]["generated_text"]
111
 
112
- # Gradio interface for testing
113
- def chat_interface(message, history):
114
- history = history or []
115
- messages = []
116
-
117
- # Convert history to messages format
118
- for user_msg, assistant_msg in history:
119
- messages.append({"role": "user", "content": user_msg})
120
- messages.append({"role": "assistant", "content": assistant_msg})
121
 
122
- # Add current message
123
- messages.append({"role": "user", "content": message})
124
-
125
- # Generate response synchronously
126
  try:
127
- response_text = generate_response(messages)
128
- return response_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  except Exception as e:
130
- return f"Error generating response: {str(e)}"
 
131
 
132
- interface = gr.ChatInterface(
133
- chat_interface,
 
134
  title="Qwen2.5-Coder-32B Chat",
135
- description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API. This Space also provides a /v1/chat/completions endpoint."
 
 
 
 
 
136
  )
137
 
138
  # Mount both FastAPI and Gradio
139
- app = gr.mount_gradio_app(app, interface, path="/")
 
 
 
 
 
 
48
  data = await request.json()
49
  messages = data.get("messages", [])
50
 
 
51
  payload = {
52
  "inputs": {
53
  "messages": messages
 
60
  }
61
  }
62
 
 
63
  response = await query_model(payload)
64
 
65
  if isinstance(response, dict) and "error" in response:
 
71
  response_text = response[0]["generated_text"]
72
 
73
  return JSONResponse(
74
+ content=format_chat_response(response_text)
 
 
 
 
 
 
75
  )
76
  except Exception as e:
77
  return JSONResponse(
 
79
  content={"error": str(e)}
80
  )
81
 
 
82
  def generate_response(messages):
83
  payload = {
84
  "inputs": {
 
100
 
101
  return result[0]["generated_text"]
102
 
103
+ def chat_interface(message, chat_history):
104
+ if message.strip() == "":
105
+ return chat_history
 
 
 
 
 
 
106
 
 
 
 
 
107
  try:
108
+ # Format the message history in the OpenAI style
109
+ messages = []
110
+ for msg in chat_history:
111
+ messages.append({"role": "user", "content": msg[0]})
112
+ if msg[1] is not None:
113
+ messages.append({"role": "assistant", "content": msg[1]})
114
+
115
+ # Add the current message
116
+ messages.append({"role": "user", "content": message})
117
+
118
+ # Get response
119
+ response = generate_response(messages)
120
+
121
+ # Update history in the new format
122
+ chat_history.append((message, response))
123
+ return chat_history
124
  except Exception as e:
125
+ chat_history.append((message, f"Error: {str(e)}"))
126
+ return chat_history
127
 
128
+ # Create Gradio interface with new message format
129
+ demo = gr.ChatInterface(
130
+ fn=chat_interface,
131
  title="Qwen2.5-Coder-32B Chat",
132
+ description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API",
133
+ examples=["Hello! Can you help me with coding?",
134
+ "Write a simple Python function to calculate factorial"],
135
+ retry_btn="Retry",
136
+ undo_btn="Undo last message",
137
+ clear_btn="Clear conversation",
138
  )
139
 
140
  # Mount both FastAPI and Gradio
141
+ app = gr.mount_gradio_app(app, demo, path="/")
142
+
143
+ # For running with uvicorn directly
144
+ if __name__ == "__main__":
145
+ import uvicorn
146
+ uvicorn.run(app, host="0.0.0.0", port=7860)