Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,18 @@
|
|
1 |
import gradio as gr
|
2 |
-
from fastapi import FastAPI, Request
|
3 |
from fastapi.responses import JSONResponse
|
4 |
import datetime
|
5 |
import requests
|
6 |
import os
|
7 |
-
import
|
8 |
-
import asyncio
|
9 |
|
10 |
# Initialize FastAPI
|
11 |
app = FastAPI()
|
12 |
|
|
|
|
|
|
|
|
|
13 |
# Configuration
|
14 |
API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
|
15 |
headers = {
|
@@ -39,29 +42,31 @@ def format_chat_response(response_text, prompt_tokens=0, completion_tokens=0):
|
|
39 |
}
|
40 |
|
41 |
async def query_model(payload):
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
@app.get("/status")
|
46 |
async def status():
|
47 |
try:
|
48 |
response_text = "it's working"
|
49 |
-
|
50 |
-
return JSONResponse(
|
51 |
-
content=format_chat_response(response_text)
|
52 |
-
)
|
53 |
except Exception as e:
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
)
|
58 |
-
|
59 |
@app.post("/v1/chat/completions")
|
60 |
async def chat_completion(request: Request):
|
61 |
try:
|
62 |
data = await request.json()
|
63 |
messages = data.get("messages", [])
|
64 |
-
|
|
|
|
|
65 |
payload = {
|
66 |
"inputs": {
|
67 |
"messages": messages
|
@@ -77,21 +82,17 @@ async def chat_completion(request: Request):
|
|
77 |
response = await query_model(payload)
|
78 |
|
79 |
if isinstance(response, dict) and "error" in response:
|
80 |
-
|
81 |
-
status_code=500,
|
82 |
-
content={"error": response["error"]}
|
83 |
-
)
|
84 |
|
85 |
response_text = response[0]["generated_text"]
|
86 |
|
87 |
-
return JSONResponse(
|
88 |
-
|
89 |
-
)
|
|
|
90 |
except Exception as e:
|
91 |
-
|
92 |
-
|
93 |
-
content={"error": str(e)}
|
94 |
-
)
|
95 |
|
96 |
def generate_response(messages):
|
97 |
payload = {
|
@@ -106,13 +107,18 @@ def generate_response(messages):
|
|
106 |
}
|
107 |
}
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
def chat_interface(messages):
|
118 |
chat_history = []
|
@@ -128,7 +134,6 @@ def chat_interface(messages):
|
|
128 |
|
129 |
# Create Gradio interface
|
130 |
def gradio_app():
|
131 |
-
#return gr.chat_interface(gr.Chatbot(placeholder="placeholder"), type="messages", value=[])
|
132 |
return gr.ChatInterface(chat_interface, type="messages")
|
133 |
|
134 |
# Mount both FastAPI and Gradio
|
|
|
1 |
import gradio as gr
|
2 |
+
from fastapi import FastAPI, Request, HTTPException
|
3 |
from fastapi.responses import JSONResponse
|
4 |
import datetime
|
5 |
import requests
|
6 |
import os
|
7 |
+
import logging
|
|
|
8 |
|
9 |
# Initialize FastAPI
|
10 |
app = FastAPI()
|
11 |
|
12 |
+
# Configure logging
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
# Configuration
|
17 |
API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
|
18 |
headers = {
|
|
|
42 |
}
|
43 |
|
44 |
async def query_model(payload):
|
45 |
+
try:
|
46 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
47 |
+
response.raise_for_status()
|
48 |
+
return response.json()
|
49 |
+
except requests.exceptions.RequestException as e:
|
50 |
+
logger.error(f"Request failed: {e}")
|
51 |
+
raise HTTPException(status_code=500, detail=str(e))
|
52 |
|
53 |
@app.get("/status")
|
54 |
async def status():
|
55 |
try:
|
56 |
response_text = "it's working"
|
57 |
+
return JSONResponse(content=format_chat_response(response_text))
|
|
|
|
|
|
|
58 |
except Exception as e:
|
59 |
+
logger.error(f"Status check failed: {e}")
|
60 |
+
raise HTTPException(status_code=500, detail=str(e))
|
61 |
+
|
|
|
|
|
62 |
@app.post("/v1/chat/completions")
|
63 |
async def chat_completion(request: Request):
|
64 |
try:
|
65 |
data = await request.json()
|
66 |
messages = data.get("messages", [])
|
67 |
+
if not messages:
|
68 |
+
raise HTTPException(status_code=400, detail="Messages are required")
|
69 |
+
|
70 |
payload = {
|
71 |
"inputs": {
|
72 |
"messages": messages
|
|
|
82 |
response = await query_model(payload)
|
83 |
|
84 |
if isinstance(response, dict) and "error" in response:
|
85 |
+
raise HTTPException(status_code=500, detail=response["error"])
|
|
|
|
|
|
|
86 |
|
87 |
response_text = response[0]["generated_text"]
|
88 |
|
89 |
+
return JSONResponse(content=format_chat_response(response_text))
|
90 |
+
except HTTPException as e:
|
91 |
+
logger.error(f"Chat completion failed: {e.detail}")
|
92 |
+
raise e
|
93 |
except Exception as e:
|
94 |
+
logger.error(f"Unexpected error: {e}")
|
95 |
+
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
96 |
|
97 |
def generate_response(messages):
|
98 |
payload = {
|
|
|
107 |
}
|
108 |
}
|
109 |
|
110 |
+
try:
|
111 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
112 |
+
response.raise_for_status()
|
113 |
+
result = response.json()
|
114 |
+
|
115 |
+
if isinstance(result, dict) and "error" in result:
|
116 |
+
return f"Error: {result['error']}"
|
117 |
+
|
118 |
+
return result[0]["generated_text"]
|
119 |
+
except requests.exceptions.RequestException as e:
|
120 |
+
logger.error(f"Request failed: {e}")
|
121 |
+
return f"Error: {e}"
|
122 |
|
123 |
def chat_interface(messages):
|
124 |
chat_history = []
|
|
|
134 |
|
135 |
# Create Gradio interface
|
136 |
def gradio_app():
|
|
|
137 |
return gr.ChatInterface(chat_interface, type="messages")
|
138 |
|
139 |
# Mount both FastAPI and Gradio
|