Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -347,7 +347,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
347 |
|
348 |
max_attempts = 5
|
349 |
context_reduction_factor = 0.7
|
350 |
-
|
351 |
|
352 |
if web_search:
|
353 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
@@ -403,7 +403,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
403 |
# Estimate token count
|
404 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
405 |
|
406 |
-
if estimated_tokens <=
|
407 |
break
|
408 |
|
409 |
# Reduce context if estimated token count is too high
|
@@ -415,7 +415,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
415 |
if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
|
416 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
417 |
|
418 |
-
full_response = generate_chunked_response(model, formatted_prompt)
|
419 |
answer = extract_answer(full_response, instructions)
|
420 |
all_answers.append(answer)
|
421 |
break
|
@@ -464,7 +464,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
464 |
|
465 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
466 |
|
467 |
-
if estimated_tokens <=
|
468 |
break
|
469 |
|
470 |
# Reduce context if estimated token count is too high
|
@@ -473,7 +473,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
473 |
if len(context_str) < 100:
|
474 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
475 |
|
476 |
-
full_response = generate_chunked_response(model, formatted_prompt)
|
477 |
answer = extract_answer(full_response)
|
478 |
|
479 |
return answer
|
|
|
347 |
|
348 |
max_attempts = 5
|
349 |
context_reduction_factor = 0.7
|
350 |
+
max_tokens = 32000 # Maximum tokens allowed by the model
|
351 |
|
352 |
if web_search:
|
353 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
|
|
403 |
# Estimate token count
|
404 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
405 |
|
406 |
+
if estimated_tokens <= max_tokens - 1000: # Leave 1000 tokens for the model's response
|
407 |
break
|
408 |
|
409 |
# Reduce context if estimated token count is too high
|
|
|
415 |
if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
|
416 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
417 |
|
418 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
|
419 |
answer = extract_answer(full_response, instructions)
|
420 |
all_answers.append(answer)
|
421 |
break
|
|
|
464 |
|
465 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
466 |
|
467 |
+
if estimated_tokens <= max_tokens - 1000: # Leave 1000 tokens for the model's response
|
468 |
break
|
469 |
|
470 |
# Reduce context if estimated token count is too high
|
|
|
473 |
if len(context_str) < 100:
|
474 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
475 |
|
476 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
|
477 |
answer = extract_answer(full_response)
|
478 |
|
479 |
return answer
|