File size: 6,047 Bytes
bf90cd3
 
 
 
f387a3a
bf90cd3
 
 
 
9b5b26a
bf90cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b5b26a
bf90cd3
 
 
 
 
 
 
 
 
 
 
 
 
81db657
bf90cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae7a494
2bd42a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf90cd3
 
2bd42a9
bf90cd3
 
 
2bd42a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf90cd3
2bd42a9
 
 
 
 
 
 
 
 
bf90cd3
 
2bd42a9
 
bf90cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
ae7a494
bf90cd3
 
 
 
2bd42a9
 
bf90cd3
 
 
 
 
 
 
 
8fe992b
bf90cd3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from smolagents import CodeAgent, HfApiModel, tool
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
import requests
import yaml
import os
from typing import Dict, List, Optional
import re  # Add this import at the top with other imports

@tool
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
    """Fetches recent news articles about any topic using Serper.dev.
    
    Args:
        topic: The topic to search for news about
        num_results: Number of news articles to retrieve (default: 5)
    
    Returns:
        List of dictionaries containing article information
    """
    try:
        api_key = os.environ.get("SERPER_API_KEY")
        if not api_key:
            return "Error: SERPER_API_KEY not found in environment variables"
        
        url = f"https://google.serper.dev/news"
        headers = {
            "X-API-KEY": api_key
        }
        params = {
            "q": topic,
            "gl": "us",
            "hl": "en"
        }
        
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        
        results = response.json()
        
        if "news" not in results:
            return []
            
        articles = []
        for article in results["news"][:num_results]:
            articles.append({
                'title': article.get('title', 'No title'),
                'source': article.get('source', 'Unknown source'),
                'date': article.get('date', 'No date'),
                'link': article.get('link', 'No link'),
                'snippet': article.get('snippet', 'No preview available')
            })
        
        return articles
        
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def scrape_articles(articles: List[Dict]) -> List[Dict]:
    """Scrapes the full content of news articles from their URLs.
    
    Args:
        articles: List of article dictionaries containing article information
    
    Returns:
        List of articles with additional full_content field
    """
    webpage_tool = VisitWebpageTool()
    
    for article in articles:
        try:
            # Skip known paywalled sites
            domain = article['link'].lower()
            if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
                article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
                continue
                
            full_content = webpage_tool.forward(article['link'])
            if full_content and len(full_content.strip()) > 0:
                article['full_content'] = full_content
            else:
                article['full_content'] = article['snippet']
        except Exception as e:
            article['full_content'] = article['snippet']
    
    return articles

# Load prompt templates
with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

# Initialize the models
qwen_model = HfApiModel(
    max_tokens=2096,
    temperature=0.5,
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    custom_role_conversions=None,
)

deepseek_model = HfApiModel(
    max_tokens=2096,
    temperature=0.3,  # Lower temperature for more focused summaries
    model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
    custom_role_conversions=None,
)

@tool
def summarize_news(articles: List[Dict]) -> str:
    """Creates a summary of the news articles followed by a list of sources."""
    if not articles or not isinstance(articles, list):
        return "No articles to summarize"
    
    # Prepare content for summarization
    content_to_summarize = ""
    for article in articles:
        content = article.get('full_content', article['snippet'])
        content_to_summarize += f"Title: {article['title']}\nSource: {article['source']}\nContent: {content}\n\n"
    
    # Use DeepSeek model to generate a concise summary
    summary_prompt = f"""Please provide a concise summary of these news articles, focusing on the key points and main developments. Then list the individual articles with brief descriptions.

Articles to summarize:
{content_to_summarize}

Format the output as:
📰 Summary: [overall summary]

🔍 Key Articles:
1. [Title] - [brief description]
[Read more link + date]
"""
    
    try:
        summary = deepseek_model.complete(summary_prompt).strip()
        return summary
    except Exception as e:
        # Fallback to original summary format if DeepSeek fails
        return original_summary_format(articles)

def original_summary_format(articles: List[Dict]) -> str:
    # Original summary format as fallback
    summary = "📰 Summary:\n"
    summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
    all_snippets = [article.get('full_content', article['snippet']) for article in articles]
    summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
    
    summary += "🔍 Articles:\n"
    for idx, article in enumerate(articles, 1):
        title = article['title']
        link = article['link']
        date = article['date']
        content = article.get('full_content', article['snippet'])
        snippet = content[:200] + "..." if len(content) > 200 else content
        
        summary += f"{idx}. **{title}**\n"
        summary += f"   {snippet}\n"
        summary += f"   [Read more]({link}) ({date})\n\n"
    
    return summary

final_answer = FinalAnswerTool()

# Create the agent with all tools
agent = CodeAgent(
    model=qwen_model,  # Use Qwen model for main agent
    tools=[fetch_news, scrape_articles, summarize_news, final_answer],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name="News Agent",
    description="An agent that fetches and summarizes news about any topic",
    prompt_templates=prompt_templates
)

# Launch the Gradio interface
if __name__ == "__main__":
    GradioUI(agent).launch()