Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,24 +13,33 @@ MODEL_ENDPOINTS = {
|
|
13 |
"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
|
14 |
}
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
headers = {
|
18 |
"Authorization": f"Bearer {HF_API_KEY}",
|
19 |
"Content-Type": "application/json",
|
20 |
"Accept": "application/json"
|
21 |
}
|
22 |
-
#
|
23 |
-
formatted_prompt = f"
|
|
|
24 |
data = {
|
25 |
"inputs": formatted_prompt,
|
26 |
"parameters": {
|
27 |
"max_new_tokens": 512,
|
28 |
-
"temperature": 0.
|
29 |
}
|
30 |
}
|
|
|
31 |
response = requests.post(model_endpoint, headers=headers, json=data)
|
32 |
|
33 |
-
#
|
34 |
# print("Raw response:", response.text)
|
35 |
|
36 |
try:
|
@@ -38,12 +47,10 @@ def query_model(prompt, model_endpoint):
|
|
38 |
except Exception:
|
39 |
return f"Error: Unable to parse JSON. Response: {response.text}"
|
40 |
|
41 |
-
# If the API returns an error message, surface it.
|
42 |
if isinstance(result, dict) and "error" in result:
|
43 |
return f"Error: {result['error']}"
|
44 |
|
45 |
try:
|
46 |
-
# Expecting a list of outputs with a "generated_text" field.
|
47 |
return result[0].get("generated_text", "No generated_text found in response")
|
48 |
except Exception:
|
49 |
return f"Error: Unexpected response format: {json.dumps(result)}"
|
@@ -51,7 +58,8 @@ def query_model(prompt, model_endpoint):
|
|
51 |
def chat_with_models(user_input, history):
|
52 |
responses = []
|
53 |
for model_name, endpoint in MODEL_ENDPOINTS.items():
|
54 |
-
|
|
|
55 |
responses.append(f"**{model_name}**: {model_response}")
|
56 |
combined_answer = "\n\n".join(responses)
|
57 |
history.append((user_input, combined_answer))
|
|
|
13 |
"Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
|
14 |
}
|
15 |
|
16 |
+
# System prompts for each model
|
17 |
+
SYSTEM_PROMPTS = {
|
18 |
+
"Qwen2.5-72B-Instruct": "System: You are a knowledgeable assistant for general inquiries.",
|
19 |
+
"Llama3.3-70B-Instruct": "System: You are a research expert assistant specialized in in-depth analysis.",
|
20 |
+
"Qwen2.5-Coder-32B-Instruct": "System: You are a coding expert who helps with code-related tasks.",
|
21 |
+
}
|
22 |
+
|
23 |
+
def query_model(prompt, model_endpoint, system_prompt):
|
24 |
headers = {
|
25 |
"Authorization": f"Bearer {HF_API_KEY}",
|
26 |
"Content-Type": "application/json",
|
27 |
"Accept": "application/json"
|
28 |
}
|
29 |
+
# Combine the system prompt with the user prompt
|
30 |
+
formatted_prompt = f"{system_prompt}\nUser: {prompt}\nAssistant:"
|
31 |
+
|
32 |
data = {
|
33 |
"inputs": formatted_prompt,
|
34 |
"parameters": {
|
35 |
"max_new_tokens": 512,
|
36 |
+
"temperature": 0.6, # All models use a temperature of 0.6
|
37 |
}
|
38 |
}
|
39 |
+
|
40 |
response = requests.post(model_endpoint, headers=headers, json=data)
|
41 |
|
42 |
+
# Uncomment the following line to print the raw API response for debugging:
|
43 |
# print("Raw response:", response.text)
|
44 |
|
45 |
try:
|
|
|
47 |
except Exception:
|
48 |
return f"Error: Unable to parse JSON. Response: {response.text}"
|
49 |
|
|
|
50 |
if isinstance(result, dict) and "error" in result:
|
51 |
return f"Error: {result['error']}"
|
52 |
|
53 |
try:
|
|
|
54 |
return result[0].get("generated_text", "No generated_text found in response")
|
55 |
except Exception:
|
56 |
return f"Error: Unexpected response format: {json.dumps(result)}"
|
|
|
58 |
def chat_with_models(user_input, history):
|
59 |
responses = []
|
60 |
for model_name, endpoint in MODEL_ENDPOINTS.items():
|
61 |
+
system_prompt = SYSTEM_PROMPTS.get(model_name, "")
|
62 |
+
model_response = query_model(user_input, endpoint, system_prompt)
|
63 |
responses.append(f"**{model_name}**: {model_response}")
|
64 |
combined_answer = "\n\n".join(responses)
|
65 |
history.append((user_input, combined_answer))
|