Update app.py
Browse files
app.py
CHANGED
@@ -233,6 +233,9 @@ def generate_chunked_response(model, prompt, max_tokens=1000, max_chunks=5):
|
|
233 |
full_response += chunk
|
234 |
except Exception as e:
|
235 |
print(f"Error in generate_chunked_response: {e}")
|
|
|
|
|
|
|
236 |
break
|
237 |
return full_response.strip()
|
238 |
|
@@ -346,8 +349,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
346 |
database = None
|
347 |
|
348 |
max_attempts = 5
|
349 |
-
context_reduction_factor = 0.
|
350 |
-
max_estimated_tokens =
|
351 |
|
352 |
if web_search:
|
353 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
@@ -358,7 +361,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
358 |
|
359 |
for attempt in range(max_attempts):
|
360 |
try:
|
361 |
-
web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
|
362 |
|
363 |
if database is None:
|
364 |
database = FAISS.from_documents(web_docs, embed)
|
@@ -372,20 +375,16 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
372 |
instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
|
373 |
|
374 |
prompt_template = f"""
|
375 |
-
Answer
|
376 |
-
|
377 |
-
{{
|
378 |
-
Conversation Context: {{conv_context}}
|
379 |
-
Current Question: {{question}}
|
380 |
Topics: {{topics}}
|
381 |
-
|
382 |
{instruction_prompt}
|
383 |
-
Provide a concise and relevant answer to the question.
|
384 |
"""
|
385 |
|
386 |
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
387 |
|
388 |
-
# Start with full context and progressively reduce if necessary
|
389 |
current_context = context_str
|
390 |
current_conv_context = chatbot.get_context()
|
391 |
current_topics = topics
|
@@ -393,14 +392,13 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
393 |
|
394 |
while True:
|
395 |
formatted_prompt = prompt_val.format(
|
396 |
-
context=current_context,
|
397 |
-
conv_context=current_conv_context,
|
398 |
question=question,
|
399 |
-
topics=", ".join(current_topics),
|
400 |
-
entities=json.dumps(current_entities)
|
401 |
)
|
402 |
|
403 |
-
# Estimate token count
|
404 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
405 |
|
406 |
if estimated_tokens <= max_estimated_tokens:
|
|
|
233 |
full_response += chunk
|
234 |
except Exception as e:
|
235 |
print(f"Error in generate_chunked_response: {e}")
|
236 |
+
if "Input validation error" in str(e):
|
237 |
+
# If we hit the token limit, return what we have so far
|
238 |
+
return full_response if full_response else "The input was too long to process. Please try a shorter query."
|
239 |
break
|
240 |
return full_response.strip()
|
241 |
|
|
|
349 |
database = None
|
350 |
|
351 |
max_attempts = 5
|
352 |
+
context_reduction_factor = 0.5 # More aggressive reduction
|
353 |
+
max_estimated_tokens = 25000 # Further reduced to leave more room for response
|
354 |
|
355 |
if web_search:
|
356 |
contextualized_question, topics, entity_tracker, instructions = chatbot.process_question(question)
|
|
|
361 |
|
362 |
for attempt in range(max_attempts):
|
363 |
try:
|
364 |
+
web_docs = [Document(page_content=result["text"][:1000], metadata={"source": result["link"]}) for result in search_results if result["text"]] # Limit each result to 1000 characters
|
365 |
|
366 |
if database is None:
|
367 |
database = FAISS.from_documents(web_docs, embed)
|
|
|
375 |
instruction_prompt = f"User Instructions: {instructions}\n" if instructions else ""
|
376 |
|
377 |
prompt_template = f"""
|
378 |
+
Answer based on: Web Results: {{context}}
|
379 |
+
Context: {{conv_context}}
|
380 |
+
Question: {{question}}
|
|
|
|
|
381 |
Topics: {{topics}}
|
382 |
+
Entities: {{entities}}
|
383 |
{instruction_prompt}
|
|
|
384 |
"""
|
385 |
|
386 |
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
387 |
|
|
|
388 |
current_context = context_str
|
389 |
current_conv_context = chatbot.get_context()
|
390 |
current_topics = topics
|
|
|
392 |
|
393 |
while True:
|
394 |
formatted_prompt = prompt_val.format(
|
395 |
+
context=current_context[:3000], # Limit context to 3000 characters
|
396 |
+
conv_context=current_conv_context[:500], # Limit conversation context to 500 characters
|
397 |
question=question,
|
398 |
+
topics=", ".join(current_topics[:5]), # Limit to 5 topics
|
399 |
+
entities=json.dumps({k: v[:2] for k, v in current_entities.items()}) # Limit to 2 entities per type
|
400 |
)
|
401 |
|
|
|
402 |
estimated_tokens = estimate_tokens(formatted_prompt)
|
403 |
|
404 |
if estimated_tokens <= max_estimated_tokens:
|