luminoussg commited on
Commit
c9870b1
·
verified ·
1 Parent(s): c1e5d4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -8
app.py CHANGED
@@ -22,24 +22,50 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
22
  "Content-Type": "application/json"
23
  }
24
 
25
- # Format the prompt according to each model's requirements
26
- prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
27
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  payload = {
29
- "inputs": prompt,
30
  "parameters": {
31
  "max_tokens": 1024,
32
  "temperature": 0.7,
33
- "stop_sequences": ["\nUser:", "\nAssistant:", "###"]
 
34
  }
35
  }
36
 
37
  try:
38
  response = requests.post(endpoint, json=payload, headers=headers)
39
  response.raise_for_status()
40
- return response.json()[0]['generated_text']
 
 
 
 
 
41
  except Exception as e:
42
- return f"Error from {model_name}: {str(e)}"
43
 
44
  def respond(message: str, history: List[List[str]]) -> str:
45
  """Handle chat responses from all models"""
@@ -67,7 +93,8 @@ def respond(message: str, history: List[List[str]]) -> str:
67
  for model_name, response in results.items():
68
  responses.append(f"**{model_name}**:\n{response}")
69
 
70
- return "\n\n".join(responses)
 
71
 
72
  # Create the Gradio interface
73
  chat_interface = gr.ChatInterface(
 
22
  "Content-Type": "application/json"
23
  }
24
 
25
+ # Model-specific prompt formatting
26
+ model_prompts = {
27
+ "Qwen2.5-72B-Instruct": (
28
+ f"<|im_start|>user\n{messages[-1]['content']}<|im_end|>\n<|im_start|>assistant\n"
29
+ ),
30
+ "Llama3.3-70B-Instruct": (
31
+ "<|begin_of_text|>"
32
+ "<|start_header_id|>user<|end_header_id|>\n\n"
33
+ f"{messages[-1]['content']}<|eot_id|>"
34
+ "<|start_header_id|>assistant<|end_header_id|>\n\n"
35
+ ),
36
+ "Qwen2.5-Coder-32B-Instruct": (
37
+ f"<|im_start|>user\n{messages[-1]['content']}<|im_end|>\n<|im_start|>assistant\n"
38
+ )
39
+ }
40
+
41
+ # Model-specific stop sequences
42
+ stop_sequences = {
43
+ "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
44
+ "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
45
+ "Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"]
46
+ }
47
+
48
  payload = {
49
+ "inputs": model_prompts[model_name],
50
  "parameters": {
51
  "max_tokens": 1024,
52
  "temperature": 0.7,
53
+ "stop_sequences": stop_sequences[model_name],
54
+ "return_full_text": False
55
  }
56
  }
57
 
58
  try:
59
  response = requests.post(endpoint, json=payload, headers=headers)
60
  response.raise_for_status()
61
+ result = response.json()[0]['generated_text']
62
+ # Clean up response formatting
63
+ result = result.split('<|')[0] # Remove any remaining special tokens
64
+ result = result.replace('**', '').replace('##', '') # Remove markdown
65
+ result = result.strip() # Remove leading/trailing whitespace
66
+ return result.split('\n\n')[0] # Return only first paragraph
67
  except Exception as e:
68
+ return f"{model_name} error: {str(e)}"
69
 
70
  def respond(message: str, history: List[List[str]]) -> str:
71
  """Handle chat responses from all models"""
 
93
  for model_name, response in results.items():
94
  responses.append(f"**{model_name}**:\n{response}")
95
 
96
+ # Format responses with clear separation
97
+ return "\n\n----------------------------------------\n\n".join(responses)
98
 
99
  # Create the Gradio interface
100
  chat_interface = gr.ChatInterface(