Sentinel-AI-Web-Search-Test

Sleeping

Shreyas094 commited on Sep 9, 2024

Commit

84ed5b1

verified ·

1 Parent(s): bd4b7f4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,6 +28,14 @@ MODELS = [
     "meta-llama/Meta-Llama-3.1-70B-Instruct"
 ]
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
@@ -113,11 +121,20 @@ After writing the document, please provide a list of sources used in your respon
     # Use Hugging Face API
     client = InferenceClient(model, token=huggingface_token)
     main_content = ""
     for i in range(num_calls):
         for message in client.chat_completion(
             messages=[{"role": "user", "content": prompt}],
-            max_tokens=10000,
             temperature=temperature,
             stream=False,
         ):

     "meta-llama/Meta-Llama-3.1-70B-Instruct"
 ]
+MODEL_TOKEN_LIMITS = {
+    "mistralai/Mistral-7B-Instruct-v0.3": 32768,
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
+    "mistralai/Mistral-Nemo-Instruct-2407": 32768,
+    "meta-llama/Meta-Llama-3.1-8B-Instruct": 8192,
+    "meta-llama/Meta-Llama-3.1-70B-Instruct": 8192,
+}
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
     # Use Hugging Face API
     client = InferenceClient(model, token=huggingface_token)
+    # Calculate input tokens (this is an approximation, you might need a more accurate method)
+    input_tokens = len(prompt.split())
+    # Get the token limit for the current model
+    model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192)  # Default to 8192 if model not found
+    # Calculate max_new_tokens
+    max_new_tokens = min(model_token_limit - input_tokens, 4096)  # Cap at 4096 to be safe
     main_content = ""
     for i in range(num_calls):
         for message in client.chat_completion(
             messages=[{"role": "user", "content": prompt}],
+            max_new_tokens=max_new_tokens,
             temperature=temperature,
             stream=False,
         ):