Spaces:

luminoussg
/

choupijiang

Sleeping

App Files Files Community

luminoussg commited on Feb 19

Commit

c9870b1

verified ·

1 Parent(s): c1e5d4c

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -8

app.py CHANGED Viewed

@@ -22,24 +22,50 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
         "Content-Type": "application/json"
     }
-    # Format the prompt according to each model's requirements
-    prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
     payload = {
-        "inputs": prompt,
         "parameters": {
             "max_tokens": 1024,
             "temperature": 0.7,
-            "stop_sequences": ["\nUser:", "\nAssistant:", "###"]
         }
     }
     try:
         response = requests.post(endpoint, json=payload, headers=headers)
         response.raise_for_status()
-        return response.json()[0]['generated_text']
     except Exception as e:
-        return f"Error from {model_name}: {str(e)}"
 def respond(message: str, history: List[List[str]]) -> str:
     """Handle chat responses from all models"""
@@ -67,7 +93,8 @@ def respond(message: str, history: List[List[str]]) -> str:
     for model_name, response in results.items():
         responses.append(f"**{model_name}**:\n{response}")
-    return "\n\n".join(responses)
 # Create the Gradio interface
 chat_interface = gr.ChatInterface(

         "Content-Type": "application/json"
     }
+    # Model-specific prompt formatting
+    model_prompts = {
+        "Qwen2.5-72B-Instruct": (
+            f"<|im_start|>user\n{messages[-1]['content']}<|im_end|>\n<|im_start|>assistant\n"
+        ),
+        "Llama3.3-70B-Instruct": (
+            "<|begin_of_text|>"
+            "<|start_header_id|>user<|end_header_id|>\n\n"
+            f"{messages[-1]['content']}<|eot_id|>"
+            "<|start_header_id|>assistant<|end_header_id|>\n\n"
+        ),
+        "Qwen2.5-Coder-32B-Instruct": (
+            f"<|im_start|>user\n{messages[-1]['content']}<|im_end|>\n<|im_start|>assistant\n"
+        )
+    }
+    # Model-specific stop sequences
+    stop_sequences = {
+        "Qwen2.5-72B-Instruct": ["<|im_end|>", "<|endoftext|>"],
+        "Llama3.3-70B-Instruct": ["<|eot_id|>", "\nuser:"],
+        "Qwen2.5-Coder-32B-Instruct": ["<|im_end|>", "<|endoftext|>"]
+    }
     payload = {
+        "inputs": model_prompts[model_name],
         "parameters": {
             "max_tokens": 1024,
             "temperature": 0.7,
+            "stop_sequences": stop_sequences[model_name],
+            "return_full_text": False
         }
     }
     try:
         response = requests.post(endpoint, json=payload, headers=headers)
         response.raise_for_status()
+        result = response.json()[0]['generated_text']
+        # Clean up response formatting
+        result = result.split('<|')[0]  # Remove any remaining special tokens
+        result = result.replace('**', '').replace('##', '')  # Remove markdown
+        result = result.strip()  # Remove leading/trailing whitespace
+        return result.split('\n\n')[0]  # Return only first paragraph
     except Exception as e:
+        return f"{model_name} error: {str(e)}"
 def respond(message: str, history: List[List[str]]) -> str:
     """Handle chat responses from all models"""
     for model_name, response in results.items():
         responses.append(f"**{model_name}**:\n{response}")
+    # Format responses with clear separation
+    return "\n\n----------------------------------------\n\n".join(responses)
 # Create the Gradio interface
 chat_interface = gr.ChatInterface(