Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -48,7 +48,6 @@ async def chat_completion(request: Request):
|
|
48 |
data = await request.json()
|
49 |
messages = data.get("messages", [])
|
50 |
|
51 |
-
# Prepare the payload for the Inference API
|
52 |
payload = {
|
53 |
"inputs": {
|
54 |
"messages": messages
|
@@ -61,7 +60,6 @@ async def chat_completion(request: Request):
|
|
61 |
}
|
62 |
}
|
63 |
|
64 |
-
# Get response from model
|
65 |
response = await query_model(payload)
|
66 |
|
67 |
if isinstance(response, dict) and "error" in response:
|
@@ -73,13 +71,7 @@ async def chat_completion(request: Request):
|
|
73 |
response_text = response[0]["generated_text"]
|
74 |
|
75 |
return JSONResponse(
|
76 |
-
content=format_chat_response(
|
77 |
-
response_text,
|
78 |
-
# Note: Actual token counts would need to be calculated differently
|
79 |
-
# or obtained from the API response if available
|
80 |
-
prompt_tokens=0,
|
81 |
-
completion_tokens=0
|
82 |
-
)
|
83 |
)
|
84 |
except Exception as e:
|
85 |
return JSONResponse(
|
@@ -87,7 +79,6 @@ async def chat_completion(request: Request):
|
|
87 |
content={"error": str(e)}
|
88 |
)
|
89 |
|
90 |
-
# Synchronous function to generate response for Gradio
|
91 |
def generate_response(messages):
|
92 |
payload = {
|
93 |
"inputs": {
|
@@ -109,31 +100,47 @@ def generate_response(messages):
|
|
109 |
|
110 |
return result[0]["generated_text"]
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
messages = []
|
116 |
-
|
117 |
-
# Convert history to messages format
|
118 |
-
for user_msg, assistant_msg in history:
|
119 |
-
messages.append({"role": "user", "content": user_msg})
|
120 |
-
messages.append({"role": "assistant", "content": assistant_msg})
|
121 |
|
122 |
-
# Add current message
|
123 |
-
messages.append({"role": "user", "content": message})
|
124 |
-
|
125 |
-
# Generate response synchronously
|
126 |
try:
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
except Exception as e:
|
130 |
-
|
|
|
131 |
|
132 |
-
interface
|
133 |
-
|
|
|
134 |
title="Qwen2.5-Coder-32B Chat",
|
135 |
-
description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API
|
|
|
|
|
|
|
|
|
|
|
136 |
)
|
137 |
|
138 |
# Mount both FastAPI and Gradio
|
139 |
-
app = gr.mount_gradio_app(app,
|
|
|
|
|
|
|
|
|
|
|
|
48 |
data = await request.json()
|
49 |
messages = data.get("messages", [])
|
50 |
|
|
|
51 |
payload = {
|
52 |
"inputs": {
|
53 |
"messages": messages
|
|
|
60 |
}
|
61 |
}
|
62 |
|
|
|
63 |
response = await query_model(payload)
|
64 |
|
65 |
if isinstance(response, dict) and "error" in response:
|
|
|
71 |
response_text = response[0]["generated_text"]
|
72 |
|
73 |
return JSONResponse(
|
74 |
+
content=format_chat_response(response_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
)
|
76 |
except Exception as e:
|
77 |
return JSONResponse(
|
|
|
79 |
content={"error": str(e)}
|
80 |
)
|
81 |
|
|
|
82 |
def generate_response(messages):
|
83 |
payload = {
|
84 |
"inputs": {
|
|
|
100 |
|
101 |
return result[0]["generated_text"]
|
102 |
|
103 |
+
def chat_interface(message, chat_history):
|
104 |
+
if message.strip() == "":
|
105 |
+
return chat_history
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
|
|
|
|
|
|
|
|
107 |
try:
|
108 |
+
# Format the message history in the OpenAI style
|
109 |
+
messages = []
|
110 |
+
for msg in chat_history:
|
111 |
+
messages.append({"role": "user", "content": msg[0]})
|
112 |
+
if msg[1] is not None:
|
113 |
+
messages.append({"role": "assistant", "content": msg[1]})
|
114 |
+
|
115 |
+
# Add the current message
|
116 |
+
messages.append({"role": "user", "content": message})
|
117 |
+
|
118 |
+
# Get response
|
119 |
+
response = generate_response(messages)
|
120 |
+
|
121 |
+
# Update history in the new format
|
122 |
+
chat_history.append((message, response))
|
123 |
+
return chat_history
|
124 |
except Exception as e:
|
125 |
+
chat_history.append((message, f"Error: {str(e)}"))
|
126 |
+
return chat_history
|
127 |
|
128 |
+
# Create Gradio interface with new message format
|
129 |
+
demo = gr.ChatInterface(
|
130 |
+
fn=chat_interface,
|
131 |
title="Qwen2.5-Coder-32B Chat",
|
132 |
+
description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API",
|
133 |
+
examples=["Hello! Can you help me with coding?",
|
134 |
+
"Write a simple Python function to calculate factorial"],
|
135 |
+
retry_btn="Retry",
|
136 |
+
undo_btn="Undo last message",
|
137 |
+
clear_btn="Clear conversation",
|
138 |
)
|
139 |
|
140 |
# Mount both FastAPI and Gradio
|
141 |
+
app = gr.mount_gradio_app(app, demo, path="/")
|
142 |
+
|
143 |
+
# For running with uvicorn directly
|
144 |
+
if __name__ == "__main__":
|
145 |
+
import uvicorn
|
146 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|