fdaudens HF staff commited on
Commit
f73e1fc
·
verified ·
1 Parent(s): 0f51b42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -6,6 +6,7 @@ import requests
6
  import yaml
7
  import os
8
  from typing import Dict, List, Optional
 
9
 
10
  @tool
11
  def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
@@ -70,10 +71,19 @@ def scrape_articles(articles: List[Dict]) -> List[Dict]:
70
 
71
  for article in articles:
72
  try:
 
 
 
 
 
 
73
  full_content = webpage_tool.forward(article['link'])
74
- article['full_content'] = full_content
 
 
 
75
  except Exception as e:
76
- article['full_content'] = f"Failed to scrape content: {str(e)}"
77
 
78
  return articles
79
 
 
6
  import yaml
7
  import os
8
  from typing import Dict, List, Optional
9
+ import re
10
 
11
  @tool
12
  def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
 
71
 
72
  for article in articles:
73
  try:
74
+ # Skip known paywalled sites
75
+ domain = article['link'].lower()
76
+ if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
77
+ article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
78
+ continue
79
+
80
  full_content = webpage_tool.forward(article['link'])
81
+ if full_content and len(full_content.strip()) > 0:
82
+ article['full_content'] = full_content
83
+ else:
84
+ article['full_content'] = article['snippet']
85
  except Exception as e:
86
+ article['full_content'] = article['snippet']
87
 
88
  return articles
89