fdaudens HF staff commited on
Commit
a32ad6f
·
verified ·
1 Parent(s): 97a3f8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -87
app.py CHANGED
@@ -1,12 +1,10 @@
1
  from smolagents import CodeAgent, HfApiModel, tool
2
  from tools.final_answer import FinalAnswerTool
3
- from tools.visit_webpage import VisitWebpageTool
4
  from Gradio_UI import GradioUI
5
  import requests
6
  import yaml
7
  import os
8
  from typing import Dict, List, Optional
9
- import re # Add this import at the top with other imports
10
 
11
  @tool
12
  def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
@@ -57,62 +55,12 @@ def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
57
  except Exception as e:
58
  return f"Error: {str(e)}"
59
 
60
- @tool
61
- def scrape_articles(articles: List[Dict]) -> List[Dict]:
62
- """Scrapes the full content of news articles from their URLs.
63
-
64
- Args:
65
- articles: List of article dictionaries containing article information
66
-
67
- Returns:
68
- List of articles with additional full_content field
69
- """
70
- webpage_tool = VisitWebpageTool()
71
-
72
- for article in articles:
73
- try:
74
- # Skip known paywalled sites
75
- domain = article['link'].lower()
76
- if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
77
- article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
78
- continue
79
-
80
- full_content = webpage_tool.forward(article['link'])
81
- if full_content and len(full_content.strip()) > 0:
82
- article['full_content'] = full_content
83
- else:
84
- article['full_content'] = article['snippet']
85
- except Exception as e:
86
- article['full_content'] = article['snippet']
87
-
88
- return articles
89
-
90
- # Load prompt templates
91
- with open("prompts.yaml", 'r') as stream:
92
- prompt_templates = yaml.safe_load(stream)
93
-
94
- # Initialize the models
95
- qwen_model = HfApiModel(
96
- max_tokens=2096,
97
- temperature=0.5,
98
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
99
- custom_role_conversions=None,
100
- )
101
-
102
- deepseek_model = HfApiModel(
103
- max_tokens=2096,
104
- temperature=0.3, # Lower temperature for more focused summaries
105
- model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
106
- custom_role_conversions=None,
107
- )
108
-
109
  @tool
110
  def summarize_news(articles: List[Dict]) -> str:
111
  """Creates a summary of the news articles followed by a list of sources.
112
 
113
  Args:
114
- articles: List of article dictionaries containing news article information including title,
115
- source, date, link, snippet, and full_content fields
116
 
117
  Returns:
118
  A string containing a summary followed by article references
@@ -120,48 +68,21 @@ def summarize_news(articles: List[Dict]) -> str:
120
  if not articles or not isinstance(articles, list):
121
  return "No articles to summarize"
122
 
123
- content_to_summarize = ""
124
- for article in articles:
125
- content = article.get('full_content', article['snippet'])
126
- content_to_summarize += f"Title: {article['title']}\nSource: {article['source']}\nDate: {article['date']}\nLink: {article['link']}\nContent: {content}\n\n"
127
-
128
- summary_prompt = f"""Create a clear news summary from these articles. Focus on the main facts and developments.
129
-
130
- For each article, extract the most newsworthy information and remove any redundant content, formatting artifacts, or navigation elements.
131
-
132
- Present the information in this format:
133
- 1. Start with a brief overview of the main story
134
- 2. Include specific details and numbers when relevant
135
- 3. Add source links in markdown format: [Source Name](URL)
136
-
137
- {content_to_summarize}"""
138
 
139
- try:
140
- summary = deepseek_model.complete(summary_prompt).strip()
141
- # Ensure proper markdown link formatting
142
- for article in articles:
143
- source = article['source']
144
- link = article['link']
145
- summary = summary.replace(f"[{source}]", f"[{source}]({link})")
146
- return summary
147
- except Exception as e:
148
- print(f"DeepSeek summarization failed: {str(e)}")
149
- return original_summary_format(articles)
150
-
151
- def original_summary_format(articles: List[Dict]) -> str:
152
- # Original summary format as fallback
153
  summary = "📰 Summary:\n"
154
  summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
155
- all_snippets = [article.get('full_content', article['snippet']) for article in articles]
156
  summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
157
 
 
158
  summary += "🔍 Articles:\n"
159
  for idx, article in enumerate(articles, 1):
160
  title = article['title']
161
  link = article['link']
162
  date = article['date']
163
- content = article.get('full_content', article['snippet'])
164
- snippet = content[:200] + "..." if len(content) > 200 else content
165
 
166
  summary += f"{idx}. **{title}**\n"
167
  summary += f" {snippet}\n"
@@ -169,12 +90,24 @@ def original_summary_format(articles: List[Dict]) -> str:
169
 
170
  return summary
171
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  final_answer = FinalAnswerTool()
173
 
174
  # Create the agent with all tools
175
  agent = CodeAgent(
176
- model=qwen_model, # Use Qwen model for main agent
177
- tools=[fetch_news, scrape_articles, summarize_news, final_answer],
178
  max_steps=6,
179
  verbosity_level=1,
180
  grammar=None,
 
1
  from smolagents import CodeAgent, HfApiModel, tool
2
  from tools.final_answer import FinalAnswerTool
 
3
  from Gradio_UI import GradioUI
4
  import requests
5
  import yaml
6
  import os
7
  from typing import Dict, List, Optional
 
8
 
9
  @tool
10
  def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
 
55
  except Exception as e:
56
  return f"Error: {str(e)}"
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  @tool
59
  def summarize_news(articles: List[Dict]) -> str:
60
  """Creates a summary of the news articles followed by a list of sources.
61
 
62
  Args:
63
+ articles: List of article dictionaries containing title, source, date, link, and snippet
 
64
 
65
  Returns:
66
  A string containing a summary followed by article references
 
68
  if not articles or not isinstance(articles, list):
69
  return "No articles to summarize"
70
 
71
+ # Collect all snippets for the overall summary
72
+ all_snippets = [article['snippet'] for article in articles if article.get('snippet')]
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ # Create a high-level summary from snippets
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  summary = "📰 Summary:\n"
76
  summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
 
77
  summary += "Key points: " + ". ".join(all_snippets[:2]) + "\n\n"
78
 
79
+ # List individual articles
80
  summary += "🔍 Articles:\n"
81
  for idx, article in enumerate(articles, 1):
82
  title = article['title']
83
  link = article['link']
84
  date = article['date']
85
+ snippet = article['snippet'][:100] + "..." if len(article['snippet']) > 100 else article['snippet']
 
86
 
87
  summary += f"{idx}. **{title}**\n"
88
  summary += f" {snippet}\n"
 
90
 
91
  return summary
92
 
93
+ # Load prompt templates
94
+ with open("prompts.yaml", 'r') as stream:
95
+ prompt_templates = yaml.safe_load(stream)
96
+
97
+ # Initialize the model
98
+ model = HfApiModel(
99
+ max_tokens=2096,
100
+ temperature=0.5,
101
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
102
+ custom_role_conversions=None,
103
+ )
104
+
105
  final_answer = FinalAnswerTool()
106
 
107
  # Create the agent with all tools
108
  agent = CodeAgent(
109
+ model=model,
110
+ tools=[fetch_news, summarize_news, final_answer],
111
  max_steps=6,
112
  verbosity_level=1,
113
  grammar=None,