luminoussg commited on
Commit
ab9f329
·
verified ·
1 Parent(s): fdaf591

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -10,13 +10,25 @@ from session_manager import SessionManager
10
  session_manager = SessionManager()
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
13
- # Model endpoints configuration
14
  MODEL_ENDPOINTS = {
15
  "Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
16
  "Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
17
  "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
18
  }
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
21
  """Query a single model with the chat history"""
22
  endpoint = MODEL_ENDPOINTS[model_name]
@@ -55,8 +67,8 @@ def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
55
  payload = {
56
  "inputs": model_prompts[model_name],
57
  "parameters": {
58
- "max_tokens": 2048,
59
- "temperature": 0.7,
60
  "stop_sequences": stop_sequences[model_name],
61
  "return_full_text": False
62
  }
 
10
  session_manager = SessionManager()
11
  HF_API_KEY = os.getenv("HF_API_KEY")
12
 
13
+ # Model configurations
14
  MODEL_ENDPOINTS = {
15
  "Qwen2.5-72B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
16
  "Llama3.3-70B-Instruct": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.3-70B-Instruct",
17
  "Qwen2.5-Coder-32B-Instruct": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct",
18
  }
19
 
20
+ MODEL_CONTEXT_WINDOWS = {
21
+ "Qwen2.5-72B-Instruct": 128000,
22
+ "Llama3.3-70B-Instruct": 128000,
23
+ "Qwen2.5-Coder-32B-Instruct": 128000,
24
+ }
25
+
26
+ MODEL_MAX_TOKENS = {
27
+ "Qwen2.5-72B-Instruct": 8192,
28
+ "Llama3.3-70B-Instruct": 2048,
29
+ "Qwen2.5-Coder-32B-Instruct": 8192,
30
+ }
31
+
32
  def query_model(model_name: str, messages: List[Dict[str, str]]) -> str:
33
  """Query a single model with the chat history"""
34
  endpoint = MODEL_ENDPOINTS[model_name]
 
67
  payload = {
68
  "inputs": model_prompts[model_name],
69
  "parameters": {
70
+ "max_tokens": MODEL_MAX_TOKENS[model_name],
71
+ "temperature": 0.6,
72
  "stop_sequences": stop_sequences[model_name],
73
  "return_full_text": False
74
  }