fullstuckdev commited on
Commit
529f4f3
·
1 Parent(s): 2e3ad26

change max tokens

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -15,7 +15,8 @@ async def generate_response(user_input):
15
  response = client.chat.completions.create(
16
  model= 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
17
  messages=messages,
18
- max_tokens= 500,
 
19
  )
20
 
21
  return response.choices[0].message.content
 
15
  response = client.chat.completions.create(
16
  model= 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
17
  messages=messages,
18
+ max_tokens=16384,
19
+ max_completion_tokens=16384
20
  )
21
 
22
  return response.choices[0].message.content