Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -349,8 +349,14 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
349 |
return all_results
|
350 |
|
351 |
def estimate_tokens(text):
|
352 |
-
#
|
353 |
-
return len(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
|
355 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
|
356 |
if not question:
|
@@ -370,7 +376,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
370 |
|
371 |
max_attempts = 3
|
372 |
context_reduction_factor = 0.7
|
373 |
-
|
|
|
374 |
|
375 |
if web_search:
|
376 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|
@@ -432,23 +439,29 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
432 |
entities=json.dumps(current_entities)
|
433 |
)
|
434 |
|
435 |
-
estimated_tokens =
|
436 |
|
437 |
-
if estimated_tokens <=
|
438 |
break
|
439 |
|
440 |
-
|
441 |
-
|
|
|
442 |
current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
|
443 |
current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
|
444 |
|
445 |
-
if
|
446 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
447 |
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
|
|
|
|
|
|
452 |
|
453 |
except ValueError as ve:
|
454 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|
@@ -496,18 +509,22 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
496 |
|
497 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
498 |
|
499 |
-
if estimated_tokens <=
|
500 |
break
|
501 |
|
502 |
-
context_str = context_str
|
503 |
|
504 |
-
if
|
505 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
506 |
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
|
|
|
|
|
|
|
|
511 |
|
512 |
except ValueError as ve:
|
513 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|
|
|
349 |
return all_results
|
350 |
|
351 |
def estimate_tokens(text):
|
352 |
+
# A more accurate estimation (still an estimate, but better than 1 token = 4 chars)
|
353 |
+
return len(text.split())
|
354 |
+
|
355 |
+
def truncate_text(text, max_tokens):
|
356 |
+
words = text.split()
|
357 |
+
if len(words) <= max_tokens:
|
358 |
+
return text
|
359 |
+
return ' '.join(words[:max_tokens])
|
360 |
|
361 |
def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
|
362 |
if not question:
|
|
|
376 |
|
377 |
max_attempts = 3
|
378 |
context_reduction_factor = 0.7
|
379 |
+
max_input_tokens = 31000 # Leave room for the model's response
|
380 |
+
max_output_tokens = 1000
|
381 |
|
382 |
if web_search:
|
383 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|
|
|
439 |
entities=json.dumps(current_entities)
|
440 |
)
|
441 |
|
442 |
+
estimated_tokens = estimate_tokens(formatted_prompt)
|
443 |
|
444 |
+
if estimated_tokens <= max_input_tokens:
|
445 |
break
|
446 |
|
447 |
+
# Reduce context sizes
|
448 |
+
current_context = truncate_text(current_context, int(estimate_tokens(current_context) * context_reduction_factor))
|
449 |
+
current_conv_context = truncate_text(current_conv_context, int(estimate_tokens(current_conv_context) * context_reduction_factor))
|
450 |
current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
|
451 |
current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
|
452 |
|
453 |
+
if estimate_tokens(current_context) + estimate_tokens(current_conv_context) + estimate_tokens(", ".join(current_topics)) + estimate_tokens(json.dumps(current_entities)) < 100:
|
454 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
455 |
|
456 |
+
try:
|
457 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
|
458 |
+
answer = extract_answer(full_response, user_instructions)
|
459 |
+
all_answers.append(answer)
|
460 |
+
break
|
461 |
+
except Exception as e:
|
462 |
+
print(f"Error in generate_chunked_response: {e}")
|
463 |
+
if attempt == max_attempts - 1:
|
464 |
+
all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
|
465 |
|
466 |
except ValueError as ve:
|
467 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|
|
|
509 |
|
510 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
511 |
|
512 |
+
if estimated_tokens <= max_input_tokens:
|
513 |
break
|
514 |
|
515 |
+
context_str = truncate_text(context_str, int(estimate_tokens(context_str) * context_reduction_factor))
|
516 |
|
517 |
+
if estimate_tokens(context_str) < 100:
|
518 |
raise ValueError("Context reduced too much. Unable to process the query.")
|
519 |
|
520 |
+
try:
|
521 |
+
full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
|
522 |
+
answer = extract_answer(full_response, user_instructions)
|
523 |
+
return answer
|
524 |
+
except Exception as e:
|
525 |
+
print(f"Error in generate_chunked_response: {e}")
|
526 |
+
if attempt == max_attempts - 1:
|
527 |
+
return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
|
528 |
|
529 |
except ValueError as ve:
|
530 |
print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
|