Saif Rehman Nasir commited on
Commit
9aaf62d
·
1 Parent(s): b29383b

Reduce output token size to keep it under rate limit

Browse files
Files changed (1) hide show
  1. rag.py +1 -1
rag.py CHANGED
@@ -23,7 +23,7 @@ vector_index = os.getenv("VECTOR_INDEX")
23
  chat_llm = HuggingFaceEndpoint(
24
  repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
25
  task="text-generation",
26
- max_new_tokens=8048,
27
  do_sample=False,
28
  )
29
 
 
23
  chat_llm = HuggingFaceEndpoint(
24
  repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
25
  task="text-generation",
26
+ max_new_tokens=6000,
27
  do_sample=False,
28
  )
29