Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
from smolagents import CodeAgent, HfApiModel, tool
|
2 |
from tools.final_answer import FinalAnswerTool
|
3 |
-
from tools.visit_webpage import VisitWebpageTool
|
4 |
from Gradio_UI import GradioUI
|
5 |
import requests
|
6 |
import yaml
|
7 |
import os
|
8 |
from typing import Dict, List, Optional
|
9 |
-
import re # Add this import at the top with other imports
|
10 |
|
11 |
@tool
|
12 |
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
|
@@ -57,62 +55,12 @@ def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
|
|
57 |
except Exception as e:
|
58 |
return f"Error: {str(e)}"
|
59 |
|
60 |
-
@tool
|
61 |
-
def scrape_articles(articles: List[Dict]) -> List[Dict]:
|
62 |
-
"""Scrapes the full content of news articles from their URLs.
|
63 |
-
|
64 |
-
Args:
|
65 |
-
articles: List of article dictionaries containing article information
|
66 |
-
|
67 |
-
Returns:
|
68 |
-
List of articles with additional full_content field
|
69 |
-
"""
|
70 |
-
webpage_tool = VisitWebpageTool()
|
71 |
-
|
72 |
-
for article in articles:
|
73 |
-
try:
|
74 |
-
# Skip known paywalled sites
|
75 |
-
domain = article['link'].lower()
|
76 |
-
if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
|
77 |
-
article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
|
78 |
-
continue
|
79 |
-
|
80 |
-
full_content = webpage_tool.forward(article['link'])
|
81 |
-
if full_content and len(full_content.strip()) > 0:
|
82 |
-
article['full_content'] = full_content
|
83 |
-
else:
|
84 |
-
article['full_content'] = article['snippet']
|
85 |
-
except Exception as e:
|
86 |
-
article['full_content'] = article['snippet']
|
87 |
-
|
88 |
-
return articles
|
89 |
-
|
90 |
-
# Load prompt templates
|
91 |
-
with open("prompts.yaml", 'r') as stream:
|
92 |
-
prompt_templates = yaml.safe_load(stream)
|
93 |
-
|
94 |
-
# Initialize the models
|
95 |
-
qwen_model = HfApiModel(
|
96 |
-
max_tokens=2096,
|
97 |
-
temperature=0.5,
|
98 |
-
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
99 |
-
custom_role_conversions=None,
|
100 |
-
)
|
101 |
-
|
102 |
-
deepseek_model = HfApiModel(
|
103 |
-
max_tokens=2096,
|
104 |
-
temperature=0.3, # Lower temperature for more focused summaries
|
105 |
-
model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
|
106 |
-
custom_role_conversions=None,
|
107 |
-
)
|
108 |
-
|
109 |
@tool
|
110 |
def summarize_news(articles: List[Dict]) -> str:
|
111 |
"""Creates a summary of the news articles followed by a list of sources.
|
112 |
|
113 |
Args:
|
114 |
-
articles: List of article dictionaries containing
|
115 |
-
source, date, link, snippet, and full_content fields
|
116 |
|
117 |
Returns:
|
118 |
A string containing a summary followed by article references
|
@@ -120,48 +68,21 @@ def summarize_news(articles: List[Dict]) -> str:
|
|
120 |
if not articles or not isinstance(articles, list):
|
121 |
return "No articles to summarize"
|
122 |
|
123 |
-
|
124 |
-
for article in articles
|
125 |
-
content = article.get('full_content', article['snippet'])
|
126 |
-
content_to_summarize += f"Title: {article['title']}\nSource: {article['source']}\nDate: {article['date']}\nLink: {article['link']}\nContent: {content}\n\n"
|
127 |
-
|
128 |
-
summary_prompt = f"""Create a clear news summary from these articles. Focus on the main facts and developments.
|
129 |
-
|
130 |
-
For each article, extract the most newsworthy information and remove any redundant content, formatting artifacts, or navigation elements.
|
131 |
-
|
132 |
-
Present the information in this format:
|
133 |
-
1. Start with a brief overview of the main story
|
134 |
-
2. Include specific details and numbers when relevant
|
135 |
-
3. Add source links in markdown format: [Source Name](URL)
|
136 |
-
|
137 |
-
{content_to_summarize}"""
|
138 |
|
139 |
-
|
140 |
-
summary = deepseek_model.complete(summary_prompt).strip()
|
141 |
-
# Ensure proper markdown link formatting
|
142 |
-
for article in articles:
|
143 |
-
source = article['source']
|
144 |
-
link = article['link']
|
145 |
-
summary = summary.replace(f"[{source}]", f"[{source}]({link})")
|
146 |
-
return summary
|
147 |
-
except Exception as e:
|
148 |
-
print(f"DeepSeek summarization failed: {str(e)}")
|
149 |
-
return original_summary_format(articles)
|
150 |
-
|
151 |
-
def original_summary_format(articles: List[Dict]) -> str:
|
152 |
-
# Original summary format as fallback
|
153 |
summary = "📰 Summary:\n"
|
154 |
summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
|
155 |
-
all_snippets = [article.get('full_content', article['snippet']) for article in articles]
|
156 |
summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
|
157 |
|
|
|
158 |
summary += "🔍 Articles:\n"
|
159 |
for idx, article in enumerate(articles, 1):
|
160 |
title = article['title']
|
161 |
link = article['link']
|
162 |
date = article['date']
|
163 |
-
|
164 |
-
snippet = content[:200] + "..." if len(content) > 200 else content
|
165 |
|
166 |
summary += f"{idx}. **{title}**\n"
|
167 |
summary += f" {snippet}\n"
|
@@ -169,12 +90,24 @@ def original_summary_format(articles: List[Dict]) -> str:
|
|
169 |
|
170 |
return summary
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
final_answer = FinalAnswerTool()
|
173 |
|
174 |
# Create the agent with all tools
|
175 |
agent = CodeAgent(
|
176 |
-
model=
|
177 |
-
tools=[fetch_news,
|
178 |
max_steps=6,
|
179 |
verbosity_level=1,
|
180 |
grammar=None,
|
|
|
1 |
from smolagents import CodeAgent, HfApiModel, tool
|
2 |
from tools.final_answer import FinalAnswerTool
|
|
|
3 |
from Gradio_UI import GradioUI
|
4 |
import requests
|
5 |
import yaml
|
6 |
import os
|
7 |
from typing import Dict, List, Optional
|
|
|
8 |
|
9 |
@tool
|
10 |
def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
|
|
|
55 |
except Exception as e:
|
56 |
return f"Error: {str(e)}"
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
@tool
|
59 |
def summarize_news(articles: List[Dict]) -> str:
|
60 |
"""Creates a summary of the news articles followed by a list of sources.
|
61 |
|
62 |
Args:
|
63 |
+
articles: List of article dictionaries containing title, source, date, link, and snippet
|
|
|
64 |
|
65 |
Returns:
|
66 |
A string containing a summary followed by article references
|
|
|
68 |
if not articles or not isinstance(articles, list):
|
69 |
return "No articles to summarize"
|
70 |
|
71 |
+
# Collect all snippets for the overall summary
|
72 |
+
all_snippets = [article['snippet'] for article in articles if article.get('snippet')]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
# Create a high-level summary from snippets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
summary = "📰 Summary:\n"
|
76 |
summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
|
|
|
77 |
summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
|
78 |
|
79 |
+
# List individual articles
|
80 |
summary += "🔍 Articles:\n"
|
81 |
for idx, article in enumerate(articles, 1):
|
82 |
title = article['title']
|
83 |
link = article['link']
|
84 |
date = article['date']
|
85 |
+
snippet = article['snippet'][:100] + "..." if len(article['snippet']) > 100 else article['snippet']
|
|
|
86 |
|
87 |
summary += f"{idx}. **{title}**\n"
|
88 |
summary += f" {snippet}\n"
|
|
|
90 |
|
91 |
return summary
|
92 |
|
93 |
+
# Load prompt templates
|
94 |
+
with open("prompts.yaml", 'r') as stream:
|
95 |
+
prompt_templates = yaml.safe_load(stream)
|
96 |
+
|
97 |
+
# Initialize the model
|
98 |
+
model = HfApiModel(
|
99 |
+
max_tokens=2096,
|
100 |
+
temperature=0.5,
|
101 |
+
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
102 |
+
custom_role_conversions=None,
|
103 |
+
)
|
104 |
+
|
105 |
final_answer = FinalAnswerTool()
|
106 |
|
107 |
# Create the agent with all tools
|
108 |
agent = CodeAgent(
|
109 |
+
model=model,
|
110 |
+
tools=[fetch_news, summarize_news, final_answer],
|
111 |
max_steps=6,
|
112 |
verbosity_level=1,
|
113 |
grammar=None,
|