my-news-agent / app.py
fdaudens's picture
fdaudens HF staff
Update app.py
bf90cd3 verified
raw
history blame
5.11 kB
from smolagents import CodeAgent, HfApiModel, tool
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
import requests
import yaml
import os
from typing import Dict, List, Optional
import re # Add this import at the top with other imports
@tool
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
"""Fetches recent news articles about any topic using Serper.dev.
Args:
topic: The topic to search for news about
num_results: Number of news articles to retrieve (default: 5)
Returns:
List of dictionaries containing article information
"""
try:
api_key = os.environ.get("SERPER_API_KEY")
if not api_key:
return "Error: SERPER_API_KEY not found in environment variables"
url = f"https://google.serper.dev/news"
headers = {
"X-API-KEY": api_key
}
params = {
"q": topic,
"gl": "us",
"hl": "en"
}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
results = response.json()
if "news" not in results:
return []
articles = []
for article in results["news"][:num_results]:
articles.append({
'title': article.get('title', 'No title'),
'source': article.get('source', 'Unknown source'),
'date': article.get('date', 'No date'),
'link': article.get('link', 'No link'),
'snippet': article.get('snippet', 'No preview available')
})
return articles
except Exception as e:
return f"Error: {str(e)}"
@tool
def scrape_articles(articles: List[Dict]) -> List[Dict]:
"""Scrapes the full content of news articles from their URLs.
Args:
articles: List of article dictionaries containing article information
Returns:
List of articles with additional full_content field
"""
webpage_tool = VisitWebpageTool()
for article in articles:
try:
# Skip known paywalled sites
domain = article['link'].lower()
if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
continue
full_content = webpage_tool.forward(article['link'])
if full_content and len(full_content.strip()) > 0:
article['full_content'] = full_content
else:
article['full_content'] = article['snippet']
except Exception as e:
article['full_content'] = article['snippet']
return articles
@tool
def summarize_news(articles: List[Dict]) -> str:
"""Creates a summary of the news articles followed by a list of sources.
Args:
articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
Returns:
A string containing a summary followed by article references
"""
if not articles or not isinstance(articles, list):
return "No articles to summarize"
# Collect all content for the overall summary
all_content = [article.get('full_content', article['snippet']) for article in articles]
# Create a high-level summary from content
summary = "📰 Summary:\n"
summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
# List individual articles
summary += "🔍 Articles:\n"
for idx, article in enumerate(articles, 1):
title = article['title']
link = article['link']
date = article['date']
content = article.get('full_content', article['snippet'])
snippet = content[:200] + "..." if len(content) > 200 else content
summary += f"{idx}. **{title}**\n"
summary += f" {snippet}\n"
summary += f" [Read more]({link}) ({date})\n\n"
return summary
# Load prompt templates
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# Initialize the model
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
final_answer = FinalAnswerTool()
# Create the agent with all tools
agent = CodeAgent(
model=model,
tools=[fetch_news, scrape_articles, summarize_news, final_answer], # Added scrape_articles
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name="News Agent",
description="An agent that fetches and summarizes news about any topic",
prompt_templates=prompt_templates
)
# Launch the Gradio interface
if __name__ == "__main__":
GradioUI(agent).launch()