Update app.py
Browse files
app.py
CHANGED
@@ -28,6 +28,14 @@ MODELS = [
|
|
28 |
"meta-llama/Meta-Llama-3.1-70B-Instruct"
|
29 |
]
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def get_embeddings():
|
32 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
33 |
|
@@ -113,11 +121,20 @@ After writing the document, please provide a list of sources used in your respon
|
|
113 |
# Use Hugging Face API
|
114 |
client = InferenceClient(model, token=huggingface_token)
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
main_content = ""
|
117 |
for i in range(num_calls):
|
118 |
for message in client.chat_completion(
|
119 |
messages=[{"role": "user", "content": prompt}],
|
120 |
-
|
121 |
temperature=temperature,
|
122 |
stream=False,
|
123 |
):
|
|
|
28 |
"meta-llama/Meta-Llama-3.1-70B-Instruct"
|
29 |
]
|
30 |
|
31 |
+
MODEL_TOKEN_LIMITS = {
|
32 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
33 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
34 |
+
"mistralai/Mistral-Nemo-Instruct-2407": 32768,
|
35 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct": 8192,
|
36 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct": 8192,
|
37 |
+
}
|
38 |
+
|
39 |
def get_embeddings():
|
40 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
41 |
|
|
|
121 |
# Use Hugging Face API
|
122 |
client = InferenceClient(model, token=huggingface_token)
|
123 |
|
124 |
+
# Calculate input tokens (this is an approximation, you might need a more accurate method)
|
125 |
+
input_tokens = len(prompt.split())
|
126 |
+
|
127 |
+
# Get the token limit for the current model
|
128 |
+
model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192) # Default to 8192 if model not found
|
129 |
+
|
130 |
+
# Calculate max_new_tokens
|
131 |
+
max_new_tokens = min(model_token_limit - input_tokens, 4096) # Cap at 4096 to be safe
|
132 |
+
|
133 |
main_content = ""
|
134 |
for i in range(num_calls):
|
135 |
for message in client.chat_completion(
|
136 |
messages=[{"role": "user", "content": prompt}],
|
137 |
+
max_new_tokens=max_new_tokens,
|
138 |
temperature=temperature,
|
139 |
stream=False,
|
140 |
):
|