SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 20, 2024

Commit

4920472

verified ·

1 Parent(s): 7dd2856

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -54

app.py CHANGED Viewed

@@ -64,11 +64,11 @@ class Agent1:
     def process(self, user_input: str) -> Dict[str, List[Dict[str, str]]]:
         queries = self.rephrase_and_split(user_input)
-        print("Rephrased queries:", queries)  # Add this line
         results = {}
         for query in queries:
             results[query] = google_search(query)
-        return results
 def load_document(file: NamedTemporaryFile) -> List[Document]:
     """Loads and splits the document into pages."""
@@ -240,33 +240,75 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
     max_attempts = 3
     context_reduction_factor = 0.7
-    for attempt in range(max_attempts):
-        try:
-            if web_search:
-                search_results = agent1.process(question)
-                web_docs = []
-                for query, results in search_results.items():
-                    web_docs.extend([Document(page_content=result["text"], metadata={"source": result["link"], "query": query}) for result in results if result["text"]])
-                if database is None:
-                    database = FAISS.from_documents(web_docs, embed)
-                else:
-                    database.add_documents(web_docs)
-                database.save_local("faiss_database")
-                context_str = "\n".join([f"Query: {doc.metadata['query']}\nSource: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
-                prompt_template = """
-                Answer the question based on the following web search results:
-                Web Search Results:
-                {context}
-                Original Question: {question}
-                If the web search results don't contain relevant information, state that the information is not available in the search results.
-                Provide a concise and direct answer to the original question without mentioning the web search or these instructions.
-                Do not include any source information in your answer.
-                """
-            else:
                 if database is None:
                     return "No documents available. Please upload documents or enable web search to answer questions."
@@ -288,40 +330,35 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
                 Do not include any source information in your answer.
                 """
-            prompt_val = ChatPromptTemplate.from_template(prompt_template)
-            formatted_prompt = prompt_val.format(context=context_str, question=question)
-            full_response = generate_chunked_response(model, formatted_prompt)
-            answer_patterns = [
-                r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
-                r"Provide a concise and direct answer to the question:",
-                r"Answer:",
-                r"Provide a concise and direct answer to the original question without mentioning the web search or these instructions:",
-                r"Do not include any source information in your answer."
-            ]
-            for pattern in answer_patterns:
-                match = re.split(pattern, full_response, flags=re.IGNORECASE)
-                if len(match) > 1:
-                    answer = match[-1].strip()
-                    break
-            else:
-                answer = full_response.strip()
-            if web_search:
-                sources = set(doc.metadata['source'] for doc in web_docs)
-                sources_section = "\n\nSources:\n" + "\n".join(f"- {source}" for source in sources)
-                answer += sources_section
-            return answer
-        except Exception as e:
-            print(f"Error in ask_question (attempt {attempt + 1}): {e}")
-            if "Input validation error" in str(e) and attempt < max_attempts - 1:
-                print(f"Reducing context length for next attempt")
-            elif attempt == max_attempts - 1:
-                return f"I apologize, but I'm having trouble processing your question due to its length or complexity. Could you please try rephrasing it more concisely?"
     return "An unexpected error occurred. Please try again later."

     def process(self, user_input: str) -> Dict[str, List[Dict[str, str]]]:
         queries = self.rephrase_and_split(user_input)
+        print("Rephrased queries:", queries)
         results = {}
         for query in queries:
             results[query] = google_search(query)
+        return queries, results
 def load_document(file: NamedTemporaryFile) -> List[Document]:
     """Loads and splits the document into pages."""
     max_attempts = 3
     context_reduction_factor = 0.7
+    if web_search:
+        queries, search_results = agent1.process(question)
+        all_answers = []
+        for query in queries:
+            for attempt in range(max_attempts):
+                try:
+                    web_docs = [Document(page_content=result["text"], metadata={"source": result["link"], "query": query}) for result in search_results[query] if result["text"]]
+                    if database is None:
+                        database = FAISS.from_documents(web_docs, embed)
+                    else:
+                        database.add_documents(web_docs)
+                    database.save_local("faiss_database")
+                    context_str = "\n".join([f"Query: {doc.metadata['query']}\nSource: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
+                    prompt_template = """
+                    Answer the question based on the following web search results:
+                    Web Search Results:
+                    {context}
+                    Original Question: {question}
+                    If the web search results don't contain relevant information, state that the information is not available in the search results.
+                    Provide a concise and direct answer to the original question without mentioning the web search or these instructions.
+                    Do not include any source information in your answer.
+                    """
+                    prompt_val = ChatPromptTemplate.from_template(prompt_template)
+                    formatted_prompt = prompt_val.format(context=context_str, question=query)
+                    full_response = generate_chunked_response(model, formatted_prompt)
+                    answer_patterns = [
+                        r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
+                        r"Provide a concise and direct answer to the question:",
+                        r"Answer:",
+                        r"Provide a concise and direct answer to the original question without mentioning the web search or these instructions:",
+                        r"Do not include any source information in your answer."
+                    ]
+                    for pattern in answer_patterns:
+                        match = re.split(pattern, full_response, flags=re.IGNORECASE)
+                        if len(match) > 1:
+                            answer = match[-1].strip()
+                            break
+                    else:
+                        answer = full_response.strip()
+                    all_answers.append(answer)
+                    break
+                except Exception as e:
+                    print(f"Error in ask_question for query '{query}' (attempt {attempt + 1}): {e}")
+                    if "Input validation error" in str(e) and attempt < max_attempts - 1:
+                        print(f"Reducing context length for next attempt")
+                    elif attempt == max_attempts - 1:
+                        all_answers.append(f"I apologize, but I'm having trouble processing the query '{query}' due to its length or complexity.")
+        answer = "\n\n".join(all_answers)
+        sources = set(doc.metadata['source'] for docs in search_results.values() for doc in [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in docs if result["text"]])
+        sources_section = "\n\nSources:\n" + "\n".join(f"- {source}" for source in sources)
+        answer += sources_section
+        return answer
+    else:
+        for attempt in range(max_attempts):
+            try:
                 if database is None:
                     return "No documents available. Please upload documents or enable web search to answer questions."
                 Do not include any source information in your answer.
                 """
+                prompt_val = ChatPromptTemplate.from_template(prompt_template)
+                formatted_prompt = prompt_val.format(context=context_str, question=question)
+                full_response = generate_chunked_response(model, formatted_prompt)
+                answer_patterns = [
+                    r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
+                    r"Provide a concise and direct answer to the question:",
+                    r"Answer:",
+                    r"Provide a concise and direct answer to the original question without mentioning the web search or these instructions:",
+                    r"Do not include any source information in your answer."
+                ]
+                for pattern in answer_patterns:
+                    match = re.split(pattern, full_response, flags=re.IGNORECASE)
+                    if len(match) > 1:
+                        answer = match[-1].strip()
+                        break
+                else:
+                    answer = full_response.strip()
+                return answer
+            except Exception as e:
+                print(f"Error in ask_question (attempt {attempt + 1}): {e}")
+                if "Input validation error" in str(e) and attempt < max_attempts - 1:
+                    print(f"Reducing context length for next attempt")
+                elif attempt == max_attempts - 1:
+                    return f"I apologize, but I'm having trouble processing your question due to its length or complexity. Could you please try rephrasing it more concisely?"
     return "An unexpected error occurred. Please try again later."