Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import json
|
|
5 |
from fastapi import FastAPI, Request
|
6 |
from fastapi.responses import JSONResponse
|
7 |
import datetime
|
|
|
8 |
|
9 |
# Initialize FastAPI
|
10 |
app = FastAPI()
|
@@ -81,6 +82,27 @@ async def chat_completion(request: Request):
|
|
81 |
content={"error": str(e)}
|
82 |
)
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
# Gradio interface for testing
|
85 |
def chat_interface(message, history):
|
86 |
history = history or []
|
@@ -94,16 +116,12 @@ def chat_interface(message, history):
|
|
94 |
# Add current message
|
95 |
messages.append({"role": "user", "content": message})
|
96 |
|
97 |
-
#
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
if isinstance(response, JSONResponse):
|
104 |
-
response_data = json.loads(response.body.decode())
|
105 |
-
return response_data["choices"][0]["message"]["content"]
|
106 |
-
return "Error generating response"
|
107 |
|
108 |
interface = gr.ChatInterface(
|
109 |
chat_interface,
|
|
|
5 |
from fastapi import FastAPI, Request
|
6 |
from fastapi.responses import JSONResponse
|
7 |
import datetime
|
8 |
+
import asyncio
|
9 |
|
10 |
# Initialize FastAPI
|
11 |
app = FastAPI()
|
|
|
82 |
content={"error": str(e)}
|
83 |
)
|
84 |
|
85 |
+
# Synchronous function to generate response
|
86 |
+
def generate_response(messages):
|
87 |
+
# Convert messages to model input format
|
88 |
+
prompt = tokenizer.apply_chat_template(
|
89 |
+
messages,
|
90 |
+
tokenize=False,
|
91 |
+
add_generation_prompt=True
|
92 |
+
)
|
93 |
+
|
94 |
+
# Generate response
|
95 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
96 |
+
outputs = model.generate(
|
97 |
+
**inputs,
|
98 |
+
max_new_tokens=2048,
|
99 |
+
temperature=0.7,
|
100 |
+
top_p=0.95,
|
101 |
+
do_sample=True
|
102 |
+
)
|
103 |
+
|
104 |
+
return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
105 |
+
|
106 |
# Gradio interface for testing
|
107 |
def chat_interface(message, history):
|
108 |
history = history or []
|
|
|
116 |
# Add current message
|
117 |
messages.append({"role": "user", "content": message})
|
118 |
|
119 |
+
# Generate response synchronously
|
120 |
+
try:
|
121 |
+
response_text = generate_response(messages)
|
122 |
+
return response_text
|
123 |
+
except Exception as e:
|
124 |
+
return f"Error generating response: {str(e)}"
|
|
|
|
|
|
|
|
|
125 |
|
126 |
interface = gr.ChatInterface(
|
127 |
chat_interface,
|