Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,155 +1,51 @@
|
|
1 |
-
from
|
2 |
-
from tools
|
3 |
-
from tools.visit_webpage import VisitWebpageTool
|
4 |
-
from Gradio_UI import GradioUI
|
5 |
import requests
|
6 |
-
import
|
7 |
-
import
|
8 |
-
|
9 |
-
import re
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
"
|
14 |
-
|
15 |
-
|
16 |
-
topic: The topic to search for news about
|
17 |
-
num_results: Number of news articles to retrieve (default: 5)
|
18 |
-
|
19 |
-
Returns:
|
20 |
-
List of dictionaries containing article information
|
21 |
-
"""
|
22 |
-
try:
|
23 |
-
api_key = os.environ.get("SERPER_API_KEY")
|
24 |
-
if not api_key:
|
25 |
-
return "Error: SERPER_API_KEY not found in environment variables"
|
26 |
-
|
27 |
-
url = f"https://google.serper.dev/news"
|
28 |
-
headers = {
|
29 |
-
"X-API-KEY": api_key
|
30 |
-
}
|
31 |
-
params = {
|
32 |
-
"q": topic,
|
33 |
-
"gl": "us",
|
34 |
-
"hl": "en"
|
35 |
-
}
|
36 |
-
|
37 |
-
response = requests.get(url, headers=headers, params=params)
|
38 |
-
response.raise_for_status()
|
39 |
-
|
40 |
-
results = response.json()
|
41 |
-
|
42 |
-
if "news" not in results:
|
43 |
-
return []
|
44 |
-
|
45 |
-
articles = []
|
46 |
-
for article in results["news"][:num_results]:
|
47 |
-
articles.append({
|
48 |
-
'title': article.get('title', 'No title'),
|
49 |
-
'source': article.get('source', 'Unknown source'),
|
50 |
-
'date': article.get('date', 'No date'),
|
51 |
-
'link': article.get('link', 'No link'),
|
52 |
-
'snippet': article.get('snippet', 'No preview available')
|
53 |
-
})
|
54 |
-
|
55 |
-
return articles
|
56 |
-
|
57 |
-
except Exception as e:
|
58 |
-
return f"Error: {str(e)}"
|
59 |
|
60 |
-
|
61 |
-
def scrape_articles(articles: List[Dict]) -> List[Dict]:
|
62 |
-
"""Scrapes the full content of news articles from their URLs.
|
63 |
-
|
64 |
-
Args:
|
65 |
-
articles: List of article dictionaries containing article information
|
66 |
-
|
67 |
-
Returns:
|
68 |
-
List of articles with additional full_content field
|
69 |
-
"""
|
70 |
-
webpage_tool = VisitWebpageTool()
|
71 |
-
|
72 |
-
for article in articles:
|
73 |
try:
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
"""Creates a summary of the news articles followed by a list of sources.
|
93 |
-
|
94 |
-
Args:
|
95 |
-
articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
|
96 |
-
|
97 |
-
Returns:
|
98 |
-
A string containing a summary followed by article references
|
99 |
-
"""
|
100 |
-
if not articles or not isinstance(articles, list):
|
101 |
-
return "No articles to summarize"
|
102 |
-
|
103 |
-
# Collect all content for the overall summary
|
104 |
-
all_content = [article.get('full_content', article['snippet']) for article in articles]
|
105 |
-
|
106 |
-
# Create a high-level summary from content
|
107 |
-
summary = "📰 Summary:\n"
|
108 |
-
summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
|
109 |
-
summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
|
110 |
-
|
111 |
-
# List individual articles
|
112 |
-
summary += "🔍 Articles:\n"
|
113 |
-
for idx, article in enumerate(articles, 1):
|
114 |
-
title = article['title']
|
115 |
-
link = article['link']
|
116 |
-
date = article['date']
|
117 |
-
content = article.get('full_content', article['snippet'])
|
118 |
-
snippet = content[:200] + "..." if len(content) > 200 else content
|
119 |
-
|
120 |
-
summary += f"{idx}. **{title}**\n"
|
121 |
-
summary += f" {snippet}\n"
|
122 |
-
summary += f" [Read more]({link}) ({date})\n\n"
|
123 |
-
|
124 |
-
return summary
|
125 |
|
126 |
-
#
|
127 |
-
|
128 |
-
prompt_templates = yaml.safe_load(stream)
|
129 |
|
130 |
-
#
|
131 |
-
|
132 |
-
max_tokens=2096,
|
133 |
-
temperature=0.5,
|
134 |
-
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
135 |
-
custom_role_conversions=None,
|
136 |
-
)
|
137 |
|
138 |
-
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
grammar=None,
|
147 |
-
planning_interval=None,
|
148 |
-
name="News Agent",
|
149 |
-
description="An agent that fetches and summarizes news about any topic",
|
150 |
-
prompt_templates=prompt_templates
|
151 |
-
)
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
GradioUI(agent).launch()
|
|
|
1 |
+
from typing import Any, Optional
|
2 |
+
from smolagents.tools import Tool
|
|
|
|
|
3 |
import requests
|
4 |
+
import markdownify
|
5 |
+
import smolagents
|
6 |
+
import re # Add re import here
|
|
|
7 |
|
8 |
+
class VisitWebpageTool(Tool):
|
9 |
+
name = "visit_webpage"
|
10 |
+
description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
|
11 |
+
inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
|
12 |
+
output_type = "string"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
def forward(self, url: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
try:
|
16 |
+
import requests
|
17 |
+
from markdownify import markdownify
|
18 |
+
from requests.exceptions import RequestException
|
19 |
+
|
20 |
+
from smolagents.utils import truncate_content
|
21 |
+
except ImportError as e:
|
22 |
+
raise ImportError(
|
23 |
+
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
24 |
+
) from e
|
25 |
+
try:
|
26 |
+
# Add user agent to avoid some blocking
|
27 |
+
headers = {
|
28 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
29 |
+
}
|
30 |
+
|
31 |
+
# Send a GET request to the URL with a 20-second timeout
|
32 |
+
response = requests.get(url, timeout=20, headers=headers)
|
33 |
+
response.raise_for_status()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# Convert the HTML content to Markdown
|
36 |
+
markdown_content = markdownify(response.text).strip()
|
|
|
37 |
|
38 |
+
# Remove multiple line breaks
|
39 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
return truncate_content(markdown_content, 10000)
|
42 |
|
43 |
+
except requests.exceptions.Timeout:
|
44 |
+
return "The request timed out. Please try again later or check the URL."
|
45 |
+
except RequestException as e:
|
46 |
+
return f"Error fetching the webpage: {str(e)}"
|
47 |
+
except Exception as e:
|
48 |
+
return f"An unexpected error occurred: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
def __init__(self, *args, **kwargs):
|
51 |
+
self.is_initialized = False
|
|