Perplexity-AI-Context

Running

App Files Files Community

Shreyas094 commited on Jul 23, 2024

Commit

b526692

verified ·

1 Parent(s): 8e9b65b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -19

app.py CHANGED Viewed

@@ -349,8 +349,14 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
     return all_results
 def estimate_tokens(text):
-    # Rough estimate: 1 token ~= 4 characters
-    return len(text) // 4
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
@@ -370,7 +376,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     max_attempts = 3
     context_reduction_factor = 0.7
-    max_tokens = 32000
     if web_search:
         contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
@@ -432,23 +439,29 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                         entities=json.dumps(current_entities)
                     )
-                    estimated_tokens = len(formatted_prompt) // 4
-                    if estimated_tokens <= max_tokens - 1000:
                         break
-                    current_context = current_context[:int(len(current_context) * context_reduction_factor)]
-                    current_conv_context = current_conv_context[:int(len(current_conv_context) * context_reduction_factor)]
                     current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
                     current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
-                    if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
-                full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
-                answer = extract_answer(full_response, user_instructions)
-                all_answers.append(answer)
-                break
             except ValueError as ve:
                 print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
@@ -496,18 +509,22 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_tokens - 1000:
                         break
-                    context_str = context_str[:int(len(context_str) * context_reduction_factor)]
-                    if len(context_str) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
-                full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
-                answer = extract_answer(full_response, user_instructions)
-                return answer
             except ValueError as ve:
                 print(f"Error in ask_question (attempt {attempt + 1}): {ve}")

     return all_results
 def estimate_tokens(text):
+    # A more accurate estimation (still an estimate, but better than 1 token = 4 chars)
+    return len(text.split())
+def truncate_text(text, max_tokens):
+    words = text.split()
+    if len(words) <= max_tokens:
+        return text
+    return ' '.join(words[:max_tokens])
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
     max_attempts = 3
     context_reduction_factor = 0.7
+    max_input_tokens = 31000  # Leave room for the model's response
+    max_output_tokens = 1000
     if web_search:
         contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
                         entities=json.dumps(current_entities)
                     )
+                    estimated_tokens = estimate_tokens(formatted_prompt)
+                    if estimated_tokens <= max_input_tokens:
                         break
+                    # Reduce context sizes
+                    current_context = truncate_text(current_context, int(estimate_tokens(current_context) * context_reduction_factor))
+                    current_conv_context = truncate_text(current_conv_context, int(estimate_tokens(current_conv_context) * context_reduction_factor))
                     current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
                     current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
+                    if estimate_tokens(current_context) + estimate_tokens(current_conv_context) + estimate_tokens(", ".join(current_topics)) + estimate_tokens(json.dumps(current_entities)) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
+                try:
+                    full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
+                    answer = extract_answer(full_response, user_instructions)
+                    all_answers.append(answer)
+                    break
+                except Exception as e:
+                    print(f"Error in generate_chunked_response: {e}")
+                    if attempt == max_attempts - 1:
+                        all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
             except ValueError as ve:
                 print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
                     estimated_tokens = estimate_tokens(formatted_prompt)
+                    if estimated_tokens <= max_input_tokens:
                         break
+                    context_str = truncate_text(context_str, int(estimate_tokens(context_str) * context_reduction_factor))
+                    if estimate_tokens(context_str) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
+                try:
+                    full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
+                    answer = extract_answer(full_response, user_instructions)
+                    return answer
+                except Exception as e:
+                    print(f"Error in generate_chunked_response: {e}")
+                    if attempt == max_attempts - 1:
+                        return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
             except ValueError as ve:
                 print(f"Error in ask_question (attempt {attempt + 1}): {ve}")