Shreyas094 commited on
Commit
6057fe5
1 Parent(s): 218de65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -225,7 +225,7 @@ Remember to focus on financial aspects and implications in your assessment and s
225
  logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
226
  return "Error: Unable to assess relevance and summarize"
227
 
228
- def scrape_full_content(url, scraper="trafilatura"):
229
  try:
230
  logger.info(f"Scraping full content from: {url}")
231
 
@@ -246,7 +246,8 @@ def scrape_full_content(url, scraper="trafilatura"):
246
  downloaded = fetch_url(url)
247
  content = extract(downloaded, include_comments=False, include_tables=True, no_fallback=False)
248
 
249
- return content or ""
 
250
  except Exception as e:
251
  logger.error(f"Error scraping full content from {url}: {e}")
252
  return ""
@@ -295,7 +296,7 @@ Please provide a comprehensive summary based on the above instructions:
295
  logger.error(f"Error in LLM summarization: {e}")
296
  return "Error: Unable to generate a summary. Please try again."
297
 
298
- def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=2000, time_range="", language="all", category="",
299
  engines=[], safesearch=2, method="GET", llm_temperature=0.2):
300
  try:
301
  # Step 1: Rephrase the Query
@@ -435,7 +436,7 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura",
435
 
436
  # Step 5: Scrape full content for top 5 documents
437
  for doc in reranked_docs[:5]:
438
- full_content = scrape_full_content(doc['url'], scraper)
439
  doc['full_content'] = full_content
440
 
441
  # Step 6: LLM Summarization
 
225
  logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
226
  return "Error: Unable to assess relevance and summarize"
227
 
228
+ def scrape_full_content(url, scraper="trafilatura", max_chars=3000):
229
  try:
230
  logger.info(f"Scraping full content from: {url}")
231
 
 
246
  downloaded = fetch_url(url)
247
  content = extract(downloaded, include_comments=False, include_tables=True, no_fallback=False)
248
 
249
+ # Limit the content to max_chars
250
+ return content[:max_chars] if content else ""
251
  except Exception as e:
252
  logger.error(f"Error scraping full content from {url}: {e}")
253
  return ""
 
296
  logger.error(f"Error in LLM summarization: {e}")
297
  return "Error: Unable to generate a summary. Please try again."
298
 
299
+ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=3000, time_range="", language="all", category="",
300
  engines=[], safesearch=2, method="GET", llm_temperature=0.2):
301
  try:
302
  # Step 1: Rephrase the Query
 
436
 
437
  # Step 5: Scrape full content for top 5 documents
438
  for doc in reranked_docs[:5]:
439
+ full_content = scrape_full_content(doc['url'], scraper, max_chars)
440
  doc['full_content'] = full_content
441
 
442
  # Step 6: LLM Summarization