Perplexity-AI-Context

Sleeping

App Files Files Community

Shreyas094 commited on Jul 23, 2024

Commit

c598bfb

verified ·

1 Parent(s): b526692

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -54

app.py CHANGED Viewed

@@ -358,6 +358,36 @@ def truncate_text(text, max_tokens):
         return text
     return ' '.join(words[:max_tokens])
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
         return "Please enter a question."
@@ -375,7 +405,6 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
         database = None
     max_attempts = 3
-    context_reduction_factor = 0.7
     max_input_tokens = 31000  # Leave room for the model's response
     max_output_tokens = 1000
@@ -386,7 +415,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
         print(f"User Instructions: {user_instructions}")
         try:
-            search_results = google_search(contextualized_question, num_results=3)
         except Exception as e:
             print(f"Error in web search: {e}")
             return f"I apologize, but I encountered an error while searching for information: {str(e)}"
@@ -407,7 +436,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 database.save_local("faiss_database")
-                context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
@@ -425,33 +455,20 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 prompt_val = ChatPromptTemplate.from_template(prompt_template)
-                current_context = context_str
-                current_conv_context = chatbot.get_context()
-                current_topics = topics
-                current_entities = {k: list(v) for k, v in entity_tracker.items()}
-                while True:
-                    formatted_prompt = prompt_val.format(
-                        context=current_context,
-                        conv_context=current_conv_context,
-                        question=question,
-                        topics=", ".join(current_topics),
-                        entities=json.dumps(current_entities)
-                    )
-                    estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_input_tokens:
-                        break
-                    # Reduce context sizes
-                    current_context = truncate_text(current_context, int(estimate_tokens(current_context) * context_reduction_factor))
-                    current_conv_context = truncate_text(current_conv_context, int(estimate_tokens(current_conv_context) * context_reduction_factor))
-                    current_topics = current_topics[:max(1, int(len(current_topics) * context_reduction_factor))]
-                    current_entities = {k: v[:max(1, int(len(v) * context_reduction_factor))] for k, v in current_entities.items()}
-                    if estimate_tokens(current_context) + estimate_tokens(current_conv_context) + estimate_tokens(", ".join(current_topics)) + estimate_tokens(json.dumps(current_entities)) < 100:
-                        raise ValueError("Context reduced too much. Unable to process the query.")
                 try:
                     full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
@@ -463,11 +480,6 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     if attempt == max_attempts - 1:
                         all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
-            except ValueError as ve:
-                print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
-                if attempt == max_attempts - 1:
-                    all_answers.append(f"I apologize, but I'm having trouble processing the query due to its length or complexity. Could you please try asking a more specific or shorter question?")
             except Exception as e:
                 print(f"Error in ask_question (attempt {attempt + 1}): {e}")
                 if attempt == max_attempts - 1:
@@ -488,9 +500,11 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 if database is None:
                     return "No documents available. Please upload PDF documents to answer questions."
-                retriever = database.as_retriever()
                 relevant_docs = retriever.get_relevant_documents(question)
-                context_str = "\n".join([doc.page_content for doc in relevant_docs])
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
@@ -503,19 +517,11 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 Provide a summarized and direct answer to the question.
                 """
-                while True:
-                    prompt_val = ChatPromptTemplate.from_template(prompt_template)
-                    formatted_prompt = prompt_val.format(context=context_str, question=question)
-                    estimated_tokens = estimate_tokens(formatted_prompt)
-                    if estimated_tokens <= max_input_tokens:
-                        break
-                    context_str = truncate_text(context_str, int(estimate_tokens(context_str) * context_reduction_factor))
-                    if estimate_tokens(context_str) < 100:
-                        raise ValueError("Context reduced too much. Unable to process the query.")
                 try:
                     full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
@@ -526,11 +532,6 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     if attempt == max_attempts - 1:
                         return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
-            except ValueError as ve:
-                print(f"Error in ask_question (attempt {attempt + 1}): {ve}")
-                if attempt == max_attempts - 1:
-                    return f"I apologize, but I'm having trouble processing your question due to the complexity of the document. Could you please try asking a more specific or shorter question?"
             except Exception as e:
                 print(f"Error in ask_question (attempt {attempt + 1}): {e}")
                 if attempt == max_attempts - 1:

         return text
     return ' '.join(words[:max_tokens])
+def estimate_tokens(text):
+    return len(text.split())
+def truncate_text(text, max_tokens):
+    words = text.split()
+    if len(words) <= max_tokens:
+        return text
+    return ' '.join(words[:max_tokens])
+def rerank_documents(query: str, documents: List[Document], top_k: int = 5) -> List[Document]:
+    query_embedding = sentence_model.encode([query])[0]
+    doc_embeddings = sentence_model.encode([doc.page_content for doc in documents])
+    similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
+    ranked_indices = similarities.argsort()[::-1][:top_k]
+    return [documents[i] for i in ranked_indices]
+def prepare_context(query: str, documents: List[Document], max_tokens: int) -> str:
+    reranked_docs = rerank_documents(query, documents)
+    context = ""
+    for doc in reranked_docs:
+        doc_content = f"Source: {doc.metadata.get('source', 'Unknown')}\nContent: {doc.page_content}\n\n"
+        if estimate_tokens(context + doc_content) > max_tokens:
+            break
+        context += doc_content
+    return truncate_text(context, max_tokens)
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, chatbot, user_instructions):
     if not question:
         return "Please enter a question."
         database = None
     max_attempts = 3
     max_input_tokens = 31000  # Leave room for the model's response
     max_output_tokens = 1000
         print(f"User Instructions: {user_instructions}")
         try:
+            search_results = google_search(contextualized_question, num_results=5)  # Increased from 3 to 5
         except Exception as e:
             print(f"Error in web search: {e}")
             return f"I apologize, but I encountered an error while searching for information: {str(e)}"
                 database.save_local("faiss_database")
+                # Prepare context using reranking
+                context_str = prepare_context(contextualized_question, web_docs, max_input_tokens // 2)  # Use half of max_input_tokens for context
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
                 prompt_val = ChatPromptTemplate.from_template(prompt_template)
+                current_conv_context = truncate_text(chatbot.get_context(), max_input_tokens // 4)  # Use quarter of max_input_tokens for conversation context
+                current_topics = topics[:5]  # Limit to top 5 topics
+                current_entities = {k: list(v)[:3] for k, v in entity_tracker.items()}  # Limit to top 3 entities per type
+                formatted_prompt = prompt_val.format(
+                    context=context_str,
+                    conv_context=current_conv_context,
+                    question=question,
+                    topics=", ".join(current_topics),
+                    entities=json.dumps(current_entities)
+                )
+                if estimate_tokens(formatted_prompt) > max_input_tokens:
+                    formatted_prompt = truncate_text(formatted_prompt, max_input_tokens)
                 try:
                     full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
                     if attempt == max_attempts - 1:
                         all_answers.append(f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question.")
             except Exception as e:
                 print(f"Error in ask_question (attempt {attempt + 1}): {e}")
                 if attempt == max_attempts - 1:
                 if database is None:
                     return "No documents available. Please upload PDF documents to answer questions."
+                retriever = database.as_retriever(search_kwargs={"k": 10})  # Retrieve more documents for reranking
                 relevant_docs = retriever.get_relevant_documents(question)
+                # Prepare context using reranking
+                context_str = prepare_context(question, relevant_docs, max_input_tokens // 2)  # Use half of max_input_tokens for context
                 instruction_prompt = f"User Instructions: {user_instructions}\n" if user_instructions else ""
                 Provide a summarized and direct answer to the question.
                 """
+                prompt_val = ChatPromptTemplate.from_template(prompt_template)
+                formatted_prompt = prompt_val.format(context=context_str, question=question)
+                if estimate_tokens(formatted_prompt) > max_input_tokens:
+                    formatted_prompt = truncate_text(formatted_prompt, max_input_tokens)
                 try:
                     full_response = generate_chunked_response(model, formatted_prompt, max_tokens=max_output_tokens)
                     if attempt == max_attempts - 1:
                         return f"I apologize, but I encountered an error while generating the response. Please try again with a simpler question."
             except Exception as e:
                 print(f"Error in ask_question (attempt {attempt + 1}): {e}")
                 if attempt == max_attempts - 1: