Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -703,6 +703,36 @@ class ChatBot:
|
|
703 |
logger.error(f'Error in search_and_summarize: {e}')
|
704 |
return f"Error occurred: {str(e)}"
|
705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
706 |
async def get_response(self,
|
707 |
query: str,
|
708 |
history: List[List[str]],
|
|
|
703 |
logger.error(f'Error in search_and_summarize: {e}')
|
704 |
return f"Error occurred: {str(e)}"
|
705 |
|
706 |
+
async def scrape_specific_urls(self, urls: List[str], max_chars: int) -> List[Dict[str, Any]]:
|
707 |
+
"""Scrape specific URLs provided by the user."""
|
708 |
+
logger.info(f'Scraping specific URLs: {urls}')
|
709 |
+
try:
|
710 |
+
# Create dummy results structure expected by scrape_urls_parallel
|
711 |
+
results = [{"url": url} for url in urls]
|
712 |
+
valid_results = await scrape_urls_parallel(results, max_chars)
|
713 |
+
|
714 |
+
if not valid_results:
|
715 |
+
logger.info("No valid content found from provided URLs")
|
716 |
+
return []
|
717 |
+
|
718 |
+
processed_articles = []
|
719 |
+
for result, article in valid_results:
|
720 |
+
if article:
|
721 |
+
processed_articles.append({
|
722 |
+
"url": result["url"],
|
723 |
+
"title": urlparse(result["url"]).netloc, # Use domain as title if not available
|
724 |
+
"content": article["content"],
|
725 |
+
"publish_date": article["publish_date"],
|
726 |
+
"score": 1.0, # Direct URL scraping, so score is 1.0
|
727 |
+
"engine": "direct_url"
|
728 |
+
})
|
729 |
+
|
730 |
+
return processed_articles
|
731 |
+
|
732 |
+
except Exception as e:
|
733 |
+
logger.error(f'Error scraping specific URLs: {e}')
|
734 |
+
return []
|
735 |
+
|
736 |
async def get_response(self,
|
737 |
query: str,
|
738 |
history: List[List[str]],
|