Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import requests
|
|
6 |
import yaml
|
7 |
import os
|
8 |
from typing import Dict, List, Optional
|
|
|
9 |
|
10 |
@tool
|
11 |
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
|
@@ -70,10 +71,19 @@ def scrape_articles(articles: List[Dict]) -> List[Dict]:
|
|
70 |
|
71 |
for article in articles:
|
72 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
full_content = webpage_tool.forward(article['link'])
|
74 |
-
|
|
|
|
|
|
|
75 |
except Exception as e:
|
76 |
-
article['full_content'] =
|
77 |
|
78 |
return articles
|
79 |
|
|
|
6 |
import yaml
|
7 |
import os
|
8 |
from typing import Dict, List, Optional
|
9 |
+
import re
|
10 |
|
11 |
@tool
|
12 |
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
|
|
|
71 |
|
72 |
for article in articles:
|
73 |
try:
|
74 |
+
# Skip known paywalled sites
|
75 |
+
domain = article['link'].lower()
|
76 |
+
if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
|
77 |
+
article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
|
78 |
+
continue
|
79 |
+
|
80 |
full_content = webpage_tool.forward(article['link'])
|
81 |
+
if full_content and len(full_content.strip()) > 0:
|
82 |
+
article['full_content'] = full_content
|
83 |
+
else:
|
84 |
+
article['full_content'] = article['snippet']
|
85 |
except Exception as e:
|
86 |
+
article['full_content'] = article['snippet']
|
87 |
|
88 |
return articles
|
89 |
|