Spaces:

luminoussg
/

choupijiang

Sleeping

luminoussg commited on Feb 19

Commit

ab9f329

verified ·

1 Parent(s): fdaf591

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,13 +10,25 @@ from session_manager import SessionManager
 session_manager = SessionManager()
 HF_API_KEY = os.getenv("HF_API_KEY")
-# Model endpoints configuration
 MODEL_ENDPOINTS = {
     "Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
     "Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
     "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
 }
 def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
     """Query a single model with the chat history"""
     endpoint = MODEL_ENDPOINTS[model_name]
@@ -55,8 +67,8 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
     payload = {
         "inputs": model_prompts[model_name],
         "parameters": {
-            "max_tokens": 2048,
-            "temperature": 0.7,
             "stop_sequences": stop_sequences[model_name],
             "return_full_text": False
         }

 session_manager = SessionManager()
 HF_API_KEY = os.getenv("HF_API_KEY")
+# Model configurations
 MODEL_ENDPOINTS = {
     "Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
     "Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
     "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
 }
+MODEL_CONTEXT_WINDOWS = {
+    "Qwen2.5-72B-Instruct": 128000,
+    "Llama3.3-70B-Instruct": 128000,
+    "Qwen2.5-Coder-32B-Instruct": 128000,
+}
+MODEL_MAX_TOKENS = {
+    "Qwen2.5-72B-Instruct": 8192,
+    "Llama3.3-70B-Instruct": 2048,
+    "Qwen2.5-Coder-32B-Instruct": 8192,
+}
 def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
     """Query a single model with the chat history"""
     endpoint = MODEL_ENDPOINTS[model_name]
     payload = {
         "inputs": model_prompts[model_name],
         "parameters": {
+            "max_tokens": MODEL_MAX_TOKENS[model_name],
+            "temperature": 0.6,
             "stop_sequences": stop_sequences[model_name],
             "return_full_text": False
         }