SearXNG-WebSearch-Agent

Running

App Files Files Community

Shreyas094 commited on Oct 1, 2024

Commit

6057fe5

verified ·

1 Parent(s): 218de65

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -4

app.py CHANGED Viewed

@@ -225,7 +225,7 @@ Remember to focus on financial aspects and implications in your assessment and s
         logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
         return "Error: Unable to assess relevance and summarize"
-def scrape_full_content(url, scraper="trafilatura"):
     try:
         logger.info(f"Scraping full content from: {url}")
@@ -246,7 +246,8 @@ def scrape_full_content(url, scraper="trafilatura"):
             downloaded = fetch_url(url)
             content = extract(downloaded, include_comments=False, include_tables=True, no_fallback=False)
-        return content or ""
     except Exception as e:
         logger.error(f"Error scraping full content from {url}: {e}")
         return ""
@@ -295,7 +296,7 @@ Please provide a comprehensive summary based on the above instructions:
         logger.error(f"Error in LLM summarization: {e}")
         return "Error: Unable to generate a summary. Please try again."
-def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=2000, time_range="", language="all", category="",
                       engines=[], safesearch=2, method="GET", llm_temperature=0.2):
     try:
         # Step 1: Rephrase the Query
@@ -435,7 +436,7 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura",
         # Step 5: Scrape full content for top 5 documents
         for doc in reranked_docs[:5]:
-            full_content = scrape_full_content(doc['url'], scraper)
             doc['full_content'] = full_content
         # Step 6: LLM Summarization

         logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
         return "Error: Unable to assess relevance and summarize"
+def scrape_full_content(url, scraper="trafilatura", max_chars=3000):
     try:
         logger.info(f"Scraping full content from: {url}")
             downloaded = fetch_url(url)
             content = extract(downloaded, include_comments=False, include_tables=True, no_fallback=False)
+        # Limit the content to max_chars
+        return content[:max_chars] if content else ""
     except Exception as e:
         logger.error(f"Error scraping full content from {url}: {e}")
         return ""
         logger.error(f"Error in LLM summarization: {e}")
         return "Error: Unable to generate a summary. Please try again."
+def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=3000, time_range="", language="all", category="",
                       engines=[], safesearch=2, method="GET", llm_temperature=0.2):
     try:
         # Step 1: Rephrase the Query
         # Step 5: Scrape full content for top 5 documents
         for doc in reranked_docs[:5]:
+            full_content = scrape_full_content(doc['url'], scraper, max_chars)
             doc['full_content'] = full_content
         # Step 6: LLM Summarization