my-news-agent

Runtime error

File size: 6,129 Bytes

from smolagents import CodeAgent, HfApiModel, tool
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
import requests
import yaml
import os
from typing import Dict, List, Optional
import re  # Add this import at the top with other imports

@tool
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
    """Fetches recent news articles about any topic using Serper.dev.
    
    Args:
        topic: The topic to search for news about
        num_results: Number of news articles to retrieve (default: 5)
    
    Returns:
        List of dictionaries containing article information
    """
    try:
        api_key = os.environ.get("SERPER_API_KEY")
        if not api_key:
            return "Error: SERPER_API_KEY not found in environment variables"
        
        url = f"https://google.serper.dev/news"
        headers = {
            "X-API-KEY": api_key
        }
        params = {
            "q": topic,
            "gl": "us",
            "hl": "en"
        }
        
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        
        results = response.json()
        
        if "news" not in results:
            return []
            
        articles = []
        for article in results["news"][:num_results]:
            articles.append({
                'title': article.get('title', 'No title'),
                'source': article.get('source', 'Unknown source'),
                'date': article.get('date', 'No date'),
                'link': article.get('link', 'No link'),
                'snippet': article.get('snippet', 'No preview available')
            })
        
        return articles
        
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def scrape_articles(articles: List[Dict]) -> List[Dict]:
    """Scrapes the full content of news articles from their URLs.
    
    Args:
        articles: List of article dictionaries containing article information
    
    Returns:
        List of articles with additional full_content field
    """
    webpage_tool = VisitWebpageTool()
    
    for article in articles:
        try:
            # Skip known paywalled sites
            domain = article['link'].lower()
            if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
                article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
                continue
                
            full_content = webpage_tool.forward(article['link'])
            if full_content and len(full_content.strip()) > 0:
                article['full_content'] = full_content
            else:
                article['full_content'] = article['snippet']
        except Exception as e:
            article['full_content'] = article['snippet']
    
    return articles

# Load prompt templates
with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

# Initialize the models
qwen_model = HfApiModel(
    max_tokens=2096,
    temperature=0.5,
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    custom_role_conversions=None,
)

deepseek_model = HfApiModel(
    max_tokens=2096,
    temperature=0.3,  # Lower temperature for more focused summaries
    model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
    custom_role_conversions=None,
)

@tool
def summarize_news(articles: List[Dict]) -> str:
    """Creates a summary of the news articles followed by a list of sources.
    
    Args:
        articles: List of article dictionaries containing news article information including title,
                 source, date, link, snippet, and full_content fields
    
    Returns:
        A string containing a summary followed by article references
    """
    if not articles or not isinstance(articles, list):
        return "No articles to summarize"
    
    # Prepare content for summarization
    content_to_summarize = ""
    for article in articles:
        content = article.get('full_content', article['snippet'])
        content_to_summarize += f"Title: {article['title']}\nSource: {article['source']}\nDate: {article['date']}\nLink: {article['link']}\nContent: {content}\n\n"
    
    summary_prompt = f"""Summarize these news articles in a clear and concise way. Include the most important developments and add links to the original articles.

{content_to_summarize}"""
    
    try:
        summary = deepseek_model.complete(summary_prompt).strip()
        return summary
    except Exception as e:
        print(f"DeepSeek summarization failed: {str(e)}")
        return original_summary_format(articles)

def original_summary_format(articles: List[Dict]) -> str:
    # Original summary format as fallback
    summary = "📰 Summary:\n"
    summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
    all_snippets = [article.get('full_content', article['snippet']) for article in articles]
    summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
    
    summary += "🔍 Articles:\n"
    for idx, article in enumerate(articles, 1):
        title = article['title']
        link = article['link']
        date = article['date']
        content = article.get('full_content', article['snippet'])
        snippet = content[:200] + "..." if len(content) > 200 else content
        
        summary += f"{idx}. **{title}**\n"
        summary += f"   {snippet}\n"
        summary += f"   [Read more]({link}) ({date})\n\n"
    
    return summary

final_answer = FinalAnswerTool()

# Create the agent with all tools
agent = CodeAgent(
    model=qwen_model,  # Use Qwen model for main agent
    tools=[fetch_news, scrape_articles, summarize_news, final_answer],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name="News Agent",
    description="An agent that fetches and summarizes news about any topic",
    prompt_templates=prompt_templates
)

# Launch the Gradio interface
if __name__ == "__main__":
    GradioUI(agent).launch()