SearXNG-WebSearch-Agent

Running

App Files Files Community

Shreyas094 commited on Oct 1, 2024

Commit

e4b2310

verified ·

1 Parent(s): 4706059

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -27

app.py CHANGED Viewed

@@ -255,33 +255,26 @@ def scrape_full_content(url, scraper="trafilatura", max_chars=3000):
         logger.error(f"Error scraping full content from {url}: {e}")
         return ""
-def llm_summarize(query, documents, llm_client, temperature=0.2):
-    system_prompt = """You are Sentinel, a world class Financial analysis AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
-    # Prepare the context from the documents
-    context = "\n\n".join([f"Document {i+1}:\nTitle: {doc['title']}\nURL: {doc['url']}\n(SCRAPED CONTENT)\n{doc['full_content']}\n(/SCRAPED CONTENT)" for i, doc in enumerate(documents)])
     user_prompt = f"""
-Query: {query}
-Context: {context}
-Instructions: Write a detailed, long and complete research document that is informative and relevant to the user, who is a financial analyst, query based on provided context (the context consists of search results containing a brief description of the content of that page). You must use this context to answer the user's query in the best way possible.
-Use an unbiased and writer tone in your response. Do not repeat the text. You must provide the answer in the response itself. If the user asks for links you can provide them.
-If the user asks to summarize content from some links, you will be provided the entire content of the page inside the (SCRAPED CONTENT) block.
-You can then use this content to summarize the text.Your responses should be detailed in length be informative, accurate and relevant to the user's query.
-You can use markdowns to format your response. You should use bullet points to list the information.
-Make sure the answer is long and is informative in a research document style. You have to cite the answer using [number] notation along with the appropriate source URL embedded in the notation.
-You must cite the sentences with their relevant context number.
-You must cite each and every part of the answer so the user can know where the information is coming from. Place these citations at the end of that particular sentence.
-You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
-However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times.
-The number refers to the number of the search result (passed in the context) used to generate that part of the answer. Anything inside the following (SCRAPED CONTENT) block provided below is for your knowledge returned by the search engine and is not shared by the user.
-You have to answer question on the basis of it and cite the relevant information from it but you do not have to talk about the context in your response.
-If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
-You do not need to do this for summarization tasks. Anything between the (SCRAPED CONTENT) is retrieved from a search engine and is not a part of the conversation with the user.
-Please provide a comprehensive summary based on the above instructions:
-"""
     messages = [
         {"role": "system", "content": system_prompt},
@@ -491,10 +484,23 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura",
         for doc in reranked_docs[:num_results]:
             full_content = scrape_full_content(doc['url'], scraper, max_chars)
             doc['full_content'] = full_content
         # Step 6: LLM Summarization
-        llm_summary = llm_summarize(query, reranked_docs[:num_results], client, temperature=llm_temperature)
         return llm_summary
     except Exception as e:

         logger.error(f"Error scraping full content from {url}: {e}")
         return ""
+def llm_summarize(json_input, llm_client, temperature=0.2):
+    system_prompt = """You are Sentinel, a world-class Financial analysis AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
     user_prompt = f"""
+Please provide a comprehensive summary based on the following JSON input:
+{json_input}
+Instructions:
+1. Analyze the query and the provided documents.
+2. Write a detailed, long, and complete research document that is informative and relevant to the user's query.
+3. Use an unbiased and professional tone in your response.
+4. Do not repeat text verbatim from the input.
+5. Provide the answer in the response itself.
+6. You can use markdown to format your response.
+7. Use bullet points to list information where appropriate.
+8. Cite the answer using [number] notation along with the appropriate source URL embedded in the notation.
+9. Place these citations at the end of the relevant sentences.
+10. You can cite the same sentence multiple times if it's relevant to different parts of your answer.
+Your response should be detailed, informative, accurate, and directly relevant to the user's query."""
     messages = [
         {"role": "system", "content": system_prompt},
         for doc in reranked_docs[:num_results]:
             full_content = scrape_full_content(doc['url'], scraper, max_chars)
             doc['full_content'] = full_content
+        # Prepare JSON for LLM
+        llm_input = {
+            "query": query,
+            "documents": [
+                {
+                    "title": doc['title'],
+                    "url": doc['url'],
+                    "summary": doc['summary'],
+                    "full_content": doc['full_content']
+                } for doc in reranked_docs[:num_results]
+            ]
+        }
         # Step 6: LLM Summarization
+        llm_summary = llm_summarize(json.dumps(llm_input), client, temperature=llm_temperature)
         return llm_summary
     except Exception as e: