fdaudens HF staff commited on
Commit
81db657
·
verified ·
1 Parent(s): f73e1fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -146
app.py CHANGED
@@ -1,155 +1,51 @@
1
- from smolagents import CodeAgent, HfApiModel, tool
2
- from tools.final_answer import FinalAnswerTool
3
- from tools.visit_webpage import VisitWebpageTool
4
- from Gradio_UI import GradioUI
5
  import requests
6
- import yaml
7
- import os
8
- from typing import Dict, List, Optional
9
- import re
10
 
11
- @tool
12
- def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
13
- """Fetches recent news articles about any topic using Serper.dev.
14
-
15
- Args:
16
- topic: The topic to search for news about
17
- num_results: Number of news articles to retrieve (default: 5)
18
-
19
- Returns:
20
- List of dictionaries containing article information
21
- """
22
- try:
23
- api_key = os.environ.get("SERPER_API_KEY")
24
- if not api_key:
25
- return "Error: SERPER_API_KEY not found in environment variables"
26
-
27
- url = f"https://google.serper.dev/news"
28
- headers = {
29
- "X-API-KEY": api_key
30
- }
31
- params = {
32
- "q": topic,
33
- "gl": "us",
34
- "hl": "en"
35
- }
36
-
37
- response = requests.get(url, headers=headers, params=params)
38
- response.raise_for_status()
39
-
40
- results = response.json()
41
-
42
- if "news" not in results:
43
- return []
44
-
45
- articles = []
46
- for article in results["news"][:num_results]:
47
- articles.append({
48
- 'title': article.get('title', 'No title'),
49
- 'source': article.get('source', 'Unknown source'),
50
- 'date': article.get('date', 'No date'),
51
- 'link': article.get('link', 'No link'),
52
- 'snippet': article.get('snippet', 'No preview available')
53
- })
54
-
55
- return articles
56
-
57
- except Exception as e:
58
- return f"Error: {str(e)}"
59
 
60
- @tool
61
- def scrape_articles(articles: List[Dict]) -> List[Dict]:
62
- """Scrapes the full content of news articles from their URLs.
63
-
64
- Args:
65
- articles: List of article dictionaries containing article information
66
-
67
- Returns:
68
- List of articles with additional full_content field
69
- """
70
- webpage_tool = VisitWebpageTool()
71
-
72
- for article in articles:
73
  try:
74
- # Skip known paywalled sites
75
- domain = article['link'].lower()
76
- if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
77
- article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
78
- continue
79
-
80
- full_content = webpage_tool.forward(article['link'])
81
- if full_content and len(full_content.strip()) > 0:
82
- article['full_content'] = full_content
83
- else:
84
- article['full_content'] = article['snippet']
85
- except Exception as e:
86
- article['full_content'] = article['snippet']
87
-
88
- return articles
89
-
90
- @tool
91
- def summarize_news(articles: List[Dict]) -> str:
92
- """Creates a summary of the news articles followed by a list of sources.
93
-
94
- Args:
95
- articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
96
-
97
- Returns:
98
- A string containing a summary followed by article references
99
- """
100
- if not articles or not isinstance(articles, list):
101
- return "No articles to summarize"
102
-
103
- # Collect all content for the overall summary
104
- all_content = [article.get('full_content', article['snippet']) for article in articles]
105
-
106
- # Create a high-level summary from content
107
- summary = "📰 Summary:\n"
108
- summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
109
- summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
110
-
111
- # List individual articles
112
- summary += "🔍 Articles:\n"
113
- for idx, article in enumerate(articles, 1):
114
- title = article['title']
115
- link = article['link']
116
- date = article['date']
117
- content = article.get('full_content', article['snippet'])
118
- snippet = content[:200] + "..." if len(content) > 200 else content
119
-
120
- summary += f"{idx}. **{title}**\n"
121
- summary += f" {snippet}\n"
122
- summary += f" [Read more]({link}) ({date})\n\n"
123
-
124
- return summary
125
 
126
- # Load prompt templates
127
- with open("prompts.yaml", 'r') as stream:
128
- prompt_templates = yaml.safe_load(stream)
129
 
130
- # Initialize the model
131
- model = HfApiModel(
132
- max_tokens=2096,
133
- temperature=0.5,
134
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
135
- custom_role_conversions=None,
136
- )
137
 
138
- final_answer = FinalAnswerTool()
139
 
140
- # Create the agent with all tools
141
- agent = CodeAgent(
142
- model=model,
143
- tools=[fetch_news, scrape_articles, summarize_news, final_answer], # Added scrape_articles
144
- max_steps=6,
145
- verbosity_level=1,
146
- grammar=None,
147
- planning_interval=None,
148
- name="News Agent",
149
- description="An agent that fetches and summarizes news about any topic",
150
- prompt_templates=prompt_templates
151
- )
152
 
153
- # Launch the Gradio interface
154
- if __name__ == "__main__":
155
- GradioUI(agent).launch()
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
 
 
3
  import requests
4
+ import markdownify
5
+ import smolagents
6
+ import re # Add re import here
 
7
 
8
+ class VisitWebpageTool(Tool):
9
+ name = "visit_webpage"
10
+ description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
11
+ inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
12
+ output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ def forward(self, url: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
15
  try:
16
+ import requests
17
+ from markdownify import markdownify
18
+ from requests.exceptions import RequestException
19
+
20
+ from smolagents.utils import truncate_content
21
+ except ImportError as e:
22
+ raise ImportError(
23
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
+ ) from e
25
+ try:
26
+ # Add user agent to avoid some blocking
27
+ headers = {
28
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
29
+ }
30
+
31
+ # Send a GET request to the URL with a 20-second timeout
32
+ response = requests.get(url, timeout=20, headers=headers)
33
+ response.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Convert the HTML content to Markdown
36
+ markdown_content = markdownify(response.text).strip()
 
37
 
38
+ # Remove multiple line breaks
39
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
 
 
 
 
 
40
 
41
+ return truncate_content(markdown_content, 10000)
42
 
43
+ except requests.exceptions.Timeout:
44
+ return "The request timed out. Please try again later or check the URL."
45
+ except RequestException as e:
46
+ return f"Error fetching the webpage: {str(e)}"
47
+ except Exception as e:
48
+ return f"An unexpected error occurred: {str(e)}"
 
 
 
 
 
 
49
 
50
+ def __init__(self, *args, **kwargs):
51
+ self.is_initialized = False