Perplexity-AI-Context

Sleeping

App Files Files Community

Shreyas094 commited on Jul 22, 2024

Commit

a6a5ca5

verified ·

1 Parent(s): 7a3b01a

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -347,7 +347,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     max_attempts = 5
     context_reduction_factor = 0.7
-    max_estimated_tokens = 30000  # Leave some room for the model's response
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
@@ -403,7 +403,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     # Estimate token count
                     estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_estimated_tokens:
                         break
                     # Reduce context if estimated token count is too high
@@ -415,7 +415,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
-                full_response = generate_chunked_response(model, formatted_prompt)
                 answer = extract_answer(full_response, instructions)
                 all_answers.append(answer)
                 break
@@ -464,7 +464,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_estimated_tokens:
                         break
                     # Reduce context if estimated token count is too high
@@ -473,7 +473,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     if len(context_str) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
-                full_response = generate_chunked_response(model, formatted_prompt)
                 answer = extract_answer(full_response)
                 return answer

     max_attempts = 5
     context_reduction_factor = 0.7
+    max_tokens = 32000  # Maximum tokens allowed by the model
     if web_search:
         contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
                     # Estimate token count
                     estimated_tokens = estimate_tokens(formatted_prompt)
+                    if estimated_tokens <= max_tokens - 1000:  # Leave 1000 tokens for the model's response
                         break
                     # Reduce context if estimated token count is too high
                     if len(current_context) + len(current_conv_context) + len(str(current_topics)) + len(str(current_entities)) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
+                full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
                 answer = extract_answer(full_response, instructions)
                 all_answers.append(answer)
                 break
                     estimated_tokens = estimate_tokens(formatted_prompt)
+                    if estimated_tokens <= max_tokens - 1000:  # Leave 1000 tokens for the model's response
                         break
                     # Reduce context if estimated token count is too high
                     if len(context_str) < 100:
                         raise ValueError("Context reduced too much. Unable to process the query.")
+                full_response = generate_chunked_response(model, formatted_prompt, max_tokens=1000)
                 answer = extract_answer(full_response)
                 return answer