Shreyas094 commited on
Commit
84ed5b1
·
verified ·
1 Parent(s): bd4b7f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -28,6 +28,14 @@ MODELS = [
28
  "meta-llama/Meta-Llama-3.1-70B-Instruct"
29
  ]
30
 
 
 
 
 
 
 
 
 
31
  def get_embeddings():
32
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
33
 
@@ -113,11 +121,20 @@ After writing the document, please provide a list of sources used in your respon
113
  # Use Hugging Face API
114
  client = InferenceClient(model, token=huggingface_token)
115
 
 
 
 
 
 
 
 
 
 
116
  main_content = ""
117
  for i in range(num_calls):
118
  for message in client.chat_completion(
119
  messages=[{"role": "user", "content": prompt}],
120
- max_tokens=10000,
121
  temperature=temperature,
122
  stream=False,
123
  ):
 
28
  "meta-llama/Meta-Llama-3.1-70B-Instruct"
29
  ]
30
 
31
+ MODEL_TOKEN_LIMITS = {
32
+ "mistralai/Mistral-7B-Instruct-v0.3": 32768,
33
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
34
+ "mistralai/Mistral-Nemo-Instruct-2407": 32768,
35
+ "meta-llama/Meta-Llama-3.1-8B-Instruct": 8192,
36
+ "meta-llama/Meta-Llama-3.1-70B-Instruct": 8192,
37
+ }
38
+
39
  def get_embeddings():
40
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
41
 
 
121
  # Use Hugging Face API
122
  client = InferenceClient(model, token=huggingface_token)
123
 
124
+ # Calculate input tokens (this is an approximation, you might need a more accurate method)
125
+ input_tokens = len(prompt.split())
126
+
127
+ # Get the token limit for the current model
128
+ model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192) # Default to 8192 if model not found
129
+
130
+ # Calculate max_new_tokens
131
+ max_new_tokens = min(model_token_limit - input_tokens, 4096) # Cap at 4096 to be safe
132
+
133
  main_content = ""
134
  for i in range(num_calls):
135
  for message in client.chat_completion(
136
  messages=[{"role": "user", "content": prompt}],
137
+ max_new_tokens=max_new_tokens,
138
  temperature=temperature,
139
  stream=False,
140
  ):