Shreyas094 commited on
Commit
a6a5ca5
·
verified ·
1 Parent(s): 7a3b01a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -347,7 +347,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
347
 
348
  max_attempts = 5
349
  context_reduction_factor = 0.7
350
- max_estimated_tokens = 30000 # Leave some room for the model's response
351
 
352
  if web_search:
353
  contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
@@ -403,7 +403,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
403
  # Estimate token count
404
  estimated_tokens = estimate_tokens(formatted_prompt)
405
 
406
- if estimated_tokens <= max_estimated_tokens:
407
  break
408
 
409
  # Reduce context if estimated token count is too high
@@ -415,7 +415,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
415
  if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
416
  raise ValueError("Context reduced too much. Unable to process the query.")
417
 
418
- full_response = generate_chunked_response(model, formatted_prompt)
419
  answer = extract_answer(full_response, instructions)
420
  all_answers.append(answer)
421
  break
@@ -464,7 +464,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
464
 
465
  estimated_tokens = estimate_tokens(formatted_prompt)
466
 
467
- if estimated_tokens <= max_estimated_tokens:
468
  break
469
 
470
  # Reduce context if estimated token count is too high
@@ -473,7 +473,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
473
  if len(context_str) < 100:
474
  raise ValueError("Context reduced too much. Unable to process the query.")
475
 
476
- full_response = generate_chunked_response(model, formatted_prompt)
477
  answer = extract_answer(full_response)
478
 
479
  return answer
 
347
 
348
  max_attempts = 5
349
  context_reduction_factor = 0.7
350
+ max_tokens = 32000 # Maximum tokens allowed by the model
351
 
352
  if web_search:
353
  contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
 
403
  # Estimate token count
404
  estimated_tokens = estimate_tokens(formatted_prompt)
405
 
406
+ if estimated_tokens <= max_tokens - 1000: # Leave 1000 tokens for the model's response
407
  break
408
 
409
  # Reduce context if estimated token count is too high
 
415
  if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
416
  raise ValueError("Context reduced too much. Unable to process the query.")
417
 
418
+ full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
419
  answer = extract_answer(full_response, instructions)
420
  all_answers.append(answer)
421
  break
 
464
 
465
  estimated_tokens = estimate_tokens(formatted_prompt)
466
 
467
+ if estimated_tokens <= max_tokens - 1000: # Leave 1000 tokens for the model's response
468
  break
469
 
470
  # Reduce context if estimated token count is too high
 
473
  if len(context_str) < 100:
474
  raise ValueError("Context reduced too much. Unable to process the query.")
475
 
476
+ full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
477
  answer = extract_answer(full_response)
478
 
479
  return answer