Saif Rehman Nasir
commited on
Commit
·
9aaf62d
1
Parent(s):
b29383b
Reduce output token size to keep it under rate limit
Browse files
rag.py
CHANGED
@@ -23,7 +23,7 @@ vector_index = os.getenv("VECTOR_INDEX")
|
|
23 |
chat_llm = HuggingFaceEndpoint(
|
24 |
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
|
25 |
task="text-generation",
|
26 |
-
max_new_tokens=
|
27 |
do_sample=False,
|
28 |
)
|
29 |
|
|
|
23 |
chat_llm = HuggingFaceEndpoint(
|
24 |
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
|
25 |
task="text-generation",
|
26 |
+
max_new_tokens=6000,
|
27 |
do_sample=False,
|
28 |
)
|
29 |
|