Shreyas094
commited on
Commit
•
6057fe5
1
Parent(s):
218de65
Update app.py
Browse files
app.py
CHANGED
@@ -225,7 +225,7 @@ Remember to focus on financial aspects and implications in your assessment and s
|
|
225 |
logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
|
226 |
return "Error: Unable to assess relevance and summarize"
|
227 |
|
228 |
-
def scrape_full_content(url, scraper="trafilatura"):
|
229 |
try:
|
230 |
logger.info(f"Scraping full content from: {url}")
|
231 |
|
@@ -246,7 +246,8 @@ def scrape_full_content(url, scraper="trafilatura"):
|
|
246 |
downloaded = fetch_url(url)
|
247 |
content = extract(downloaded, include_comments=False, include_tables=True, no_fallback=False)
|
248 |
|
249 |
-
|
|
|
250 |
except Exception as e:
|
251 |
logger.error(f"Error scraping full content from {url}: {e}")
|
252 |
return ""
|
@@ -295,7 +296,7 @@ Please provide a comprehensive summary based on the above instructions:
|
|
295 |
logger.error(f"Error in LLM summarization: {e}")
|
296 |
return "Error: Unable to generate a summary. Please try again."
|
297 |
|
298 |
-
def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=
|
299 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2):
|
300 |
try:
|
301 |
# Step 1: Rephrase the Query
|
@@ -435,7 +436,7 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura",
|
|
435 |
|
436 |
# Step 5: Scrape full content for top 5 documents
|
437 |
for doc in reranked_docs[:5]:
|
438 |
-
full_content = scrape_full_content(doc['url'], scraper)
|
439 |
doc['full_content'] = full_content
|
440 |
|
441 |
# Step 6: LLM Summarization
|
|
|
225 |
logger.error(f"Error assessing relevance and summarizing with LLM: {e}")
|
226 |
return "Error: Unable to assess relevance and summarize"
|
227 |
|
228 |
+
def scrape_full_content(url, scraper="trafilatura", max_chars=3000):
|
229 |
try:
|
230 |
logger.info(f"Scraping full content from: {url}")
|
231 |
|
|
|
246 |
downloaded = fetch_url(url)
|
247 |
content = extract(downloaded, include_comments=False, include_tables=True, no_fallback=False)
|
248 |
|
249 |
+
# Limit the content to max_chars
|
250 |
+
return content[:max_chars] if content else ""
|
251 |
except Exception as e:
|
252 |
logger.error(f"Error scraping full content from {url}: {e}")
|
253 |
return ""
|
|
|
296 |
logger.error(f"Error in LLM summarization: {e}")
|
297 |
return "Error: Unable to generate a summary. Please try again."
|
298 |
|
299 |
+
def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura", max_chars=3000, time_range="", language="all", category="",
|
300 |
engines=[], safesearch=2, method="GET", llm_temperature=0.2):
|
301 |
try:
|
302 |
# Step 1: Rephrase the Query
|
|
|
436 |
|
437 |
# Step 5: Scrape full content for top 5 documents
|
438 |
for doc in reranked_docs[:5]:
|
439 |
+
full_content = scrape_full_content(doc['url'], scraper, max_chars)
|
440 |
doc['full_content'] = full_content
|
441 |
|
442 |
# Step 6: LLM Summarization
|