Spaces:
Runtime error
Runtime error
File size: 6,129 Bytes
bf90cd3 f387a3a bf90cd3 9b5b26a bf90cd3 9b5b26a bf90cd3 81db657 bf90cd3 ae7a494 2bd42a9 bf90cd3 d123b17 bf90cd3 2bd42a9 c286b8b 2bd42a9 c286b8b 2bd42a9 c286b8b bf90cd3 2bd42a9 17a17b0 2bd42a9 bf90cd3 2bd42a9 bf90cd3 ae7a494 bf90cd3 2bd42a9 bf90cd3 8fe992b bf90cd3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
from smolagents import CodeAgent, HfApiModel, tool
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from Gradio_UI import GradioUI
import requests
import yaml
import os
from typing import Dict, List, Optional
import re # Add this import at the top with other imports
@tool
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
"""Fetches recent news articles about any topic using Serper.dev.
Args:
topic: The topic to search for news about
num_results: Number of news articles to retrieve (default: 5)
Returns:
List of dictionaries containing article information
"""
try:
api_key = os.environ.get("SERPER_API_KEY")
if not api_key:
return "Error: SERPER_API_KEY not found in environment variables"
url = f"https://google.serper.dev/news"
headers = {
"X-API-KEY": api_key
}
params = {
"q": topic,
"gl": "us",
"hl": "en"
}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
results = response.json()
if "news" not in results:
return []
articles = []
for article in results["news"][:num_results]:
articles.append({
'title': article.get('title', 'No title'),
'source': article.get('source', 'Unknown source'),
'date': article.get('date', 'No date'),
'link': article.get('link', 'No link'),
'snippet': article.get('snippet', 'No preview available')
})
return articles
except Exception as e:
return f"Error: {str(e)}"
@tool
def scrape_articles(articles: List[Dict]) -> List[Dict]:
"""Scrapes the full content of news articles from their URLs.
Args:
articles: List of article dictionaries containing article information
Returns:
List of articles with additional full_content field
"""
webpage_tool = VisitWebpageTool()
for article in articles:
try:
# Skip known paywalled sites
domain = article['link'].lower()
if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
continue
full_content = webpage_tool.forward(article['link'])
if full_content and len(full_content.strip()) > 0:
article['full_content'] = full_content
else:
article['full_content'] = article['snippet']
except Exception as e:
article['full_content'] = article['snippet']
return articles
# Load prompt templates
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# Initialize the models
qwen_model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
deepseek_model = HfApiModel(
max_tokens=2096,
temperature=0.3, # Lower temperature for more focused summaries
model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
custom_role_conversions=None,
)
@tool
def summarize_news(articles: List[Dict]) -> str:
"""Creates a summary of the news articles followed by a list of sources.
Args:
articles: List of article dictionaries containing news article information including title,
source, date, link, snippet, and full_content fields
Returns:
A string containing a summary followed by article references
"""
if not articles or not isinstance(articles, list):
return "No articles to summarize"
# Prepare content for summarization
content_to_summarize = ""
for article in articles:
content = article.get('full_content', article['snippet'])
content_to_summarize += f"Title: {article['title']}\nSource: {article['source']}\nDate: {article['date']}\nLink: {article['link']}\nContent: {content}\n\n"
summary_prompt = f"""Summarize these news articles in a clear and concise way. Include the most important developments and add links to the original articles.
{content_to_summarize}"""
try:
summary = deepseek_model.complete(summary_prompt).strip()
return summary
except Exception as e:
print(f"DeepSeek summarization failed: {str(e)}")
return original_summary_format(articles)
def original_summary_format(articles: List[Dict]) -> str:
# Original summary format as fallback
summary = "📰 Summary:\n"
summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
all_snippets = [article.get('full_content', article['snippet']) for article in articles]
summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
summary += "🔍 Articles:\n"
for idx, article in enumerate(articles, 1):
title = article['title']
link = article['link']
date = article['date']
content = article.get('full_content', article['snippet'])
snippet = content[:200] + "..." if len(content) > 200 else content
summary += f"{idx}. **{title}**\n"
summary += f" {snippet}\n"
summary += f" [Read more]({link}) ({date})\n\n"
return summary
final_answer = FinalAnswerTool()
# Create the agent with all tools
agent = CodeAgent(
model=qwen_model, # Use Qwen model for main agent
tools=[fetch_news, scrape_articles, summarize_news, final_answer],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name="News Agent",
description="An agent that fetches and summarizes news about any topic",
prompt_templates=prompt_templates
)
# Launch the Gradio interface
if __name__ == "__main__":
GradioUI(agent).launch() |