Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,11 +12,14 @@ app = FastAPI()
|
|
12 |
# Load model and tokenizer
|
13 |
model_name = "Qwen/Qwen2.5-Coder-32B"
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
|
|
|
15 |
model = AutoModelForCausalLM.from_pretrained(
|
16 |
model_name,
|
17 |
device_map="auto",
|
18 |
trust_remote_code=True,
|
19 |
-
torch_dtype=torch.float16
|
|
|
20 |
)
|
21 |
|
22 |
def format_chat_response(response_text, prompt_tokens, completion_tokens):
|
@@ -91,12 +94,15 @@ def chat_interface(message, history):
|
|
91 |
# Add current message
|
92 |
messages.append({"role": "user", "content": message})
|
93 |
|
|
|
|
|
|
|
|
|
94 |
# Get response
|
95 |
-
response = chat_completion(
|
96 |
if isinstance(response, JSONResponse):
|
97 |
-
response_data = response.body.decode()
|
98 |
-
|
99 |
-
return response_json["choices"][0]["message"]["content"]
|
100 |
return "Error generating response"
|
101 |
|
102 |
interface = gr.ChatInterface(
|
|
|
12 |
# Load model and tokenizer
|
13 |
model_name = "Qwen/Qwen2.5-Coder-32B"
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
15 |
+
|
16 |
+
# Configure model loading with specific parameters
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
18 |
model_name,
|
19 |
device_map="auto",
|
20 |
trust_remote_code=True,
|
21 |
+
torch_dtype=torch.float16,
|
22 |
+
low_cpu_mem_usage=True
|
23 |
)
|
24 |
|
25 |
def format_chat_response(response_text, prompt_tokens, completion_tokens):
|
|
|
94 |
# Add current message
|
95 |
messages.append({"role": "user", "content": message})
|
96 |
|
97 |
+
# Create a mock request object with the messages
|
98 |
+
mock_request = Request(scope={"type": "http"}, receive=None)
|
99 |
+
mock_request.json = lambda: {"messages": messages}
|
100 |
+
|
101 |
# Get response
|
102 |
+
response = await chat_completion(mock_request)
|
103 |
if isinstance(response, JSONResponse):
|
104 |
+
response_data = json.loads(response.body.decode())
|
105 |
+
return response_data["choices"][0]["message"]["content"]
|
|
|
106 |
return "Error generating response"
|
107 |
|
108 |
interface = gr.ChatInterface(
|