Shreyas094 commited on
Commit
b526692
·
verified ·
1 Parent(s): 8e9b65b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -19
app.py CHANGED
@@ -349,8 +349,14 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
349
  return all_results
350
 
351
  def estimate_tokens(text):
352
- # Rough estimate: 1 token ~= 4 characters
353
- return len(text) // 4
 
 
 
 
 
 
354
 
355
  def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
356
  if not question:
@@ -370,7 +376,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
370
 
371
  max_attempts = 3
372
  context_reduction_factor = 0.7
373
- max_tokens = 32000
 
374
 
375
  if web_search:
376
  contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
@@ -432,23 +439,29 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
432
  entities=json.dumps(current_entities)
433
  )
434
 
435
- estimated_tokens = len(formatted_prompt) // 4
436
 
437
- if estimated_tokens <= max_tokens - 1000:
438
  break
439
 
440
- current_context = current_context[:int(len(current_context) * context_reduction_factor)]
441
- current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
 
442
  current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
443
  current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
444
 
445
- if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
446
  raise ValueError("Context reduced too much. Unable to process the query.")
447
 
448
- full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
449
- answer = extract_answer(full_response, user_instructions)
450
- all_answers.append(answer)
451
- break
 
 
 
 
 
452
 
453
  except ValueError as ve:
454
  print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
@@ -496,18 +509,22 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
496
 
497
  estimated_tokens = estimate_tokens(formatted_prompt)
498
 
499
- if estimated_tokens <= max_tokens - 1000:
500
  break
501
 
502
- context_str = context_str[:int(len(context_str) * context_reduction_factor)]
503
 
504
- if len(context_str) < 100:
505
  raise ValueError("Context reduced too much. Unable to process the query.")
506
 
507
- full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
508
- answer = extract_answer(full_response, user_instructions)
509
-
510
- return answer
 
 
 
 
511
 
512
  except ValueError as ve:
513
  print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
 
349
  return all_results
350
 
351
  def estimate_tokens(text):
352
+ # A more accurate estimation (still an estimate, but better than 1 token = 4 chars)
353
+ return len(text.split())
354
+
355
+ def truncate_text(text, max_tokens):
356
+ words = text.split()
357
+ if len(words) <= max_tokens:
358
+ return text
359
+ return ' '.join(words[:max_tokens])
360
 
361
  def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
362
  if not question:
 
376
 
377
  max_attempts = 3
378
  context_reduction_factor = 0.7
379
+ max_input_tokens = 31000 # Leave room for the model's response
380
+ max_output_tokens = 1000
381
 
382
  if web_search:
383
  contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
 
439
  entities=json.dumps(current_entities)
440
  )
441
 
442
+ estimated_tokens = estimate_tokens(formatted_prompt)
443
 
444
+ if estimated_tokens <= max_input_tokens:
445
  break
446
 
447
+ # Reduce context sizes
448
+ current_context = truncate_text(current_context, int(estimate_tokens(current_context) * context_reduction_factor))
449
+ current_conv_context = truncate_text(current_conv_context, int(estimate_tokens(current_conv_context) * context_reduction_factor))
450
  current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
451
  current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
452
 
453
+ if estimate_tokens(current_context) + estimate_tokens(current_conv_context) + estimate_tokens(", ".join(current_topics)) + estimate_tokens(json.dumps(current_entities)) < 100:
454
  raise ValueError("Context reduced too much. Unable to process the query.")
455
 
456
+ try:
457
+ full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
458
+ answer = extract_answer(full_response, user_instructions)
459
+ all_answers.append(answer)
460
+ break
461
+ except Exception as e:
462
+ print(f"Error in generate_chunked_response: {e}")
463
+ if attempt == max_attempts - 1:
464
+ all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
465
 
466
  except ValueError as ve:
467
  print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
 
509
 
510
  estimated_tokens = estimate_tokens(formatted_prompt)
511
 
512
+ if estimated_tokens <= max_input_tokens:
513
  break
514
 
515
+ context_str = truncate_text(context_str, int(estimate_tokens(context_str) * context_reduction_factor))
516
 
517
+ if estimate_tokens(context_str) < 100:
518
  raise ValueError("Context reduced too much. Unable to process the query.")
519
 
520
+ try:
521
+ full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
522
+ answer = extract_answer(full_response, user_instructions)
523
+ return answer
524
+ except Exception as e:
525
+ print(f"Error in generate_chunked_response: {e}")
526
+ if attempt == max_attempts - 1:
527
+ return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
528
 
529
  except ValueError as ve:
530
  print(f"Error in ask_question (attempt {attempt + 1}): {ve}")