my-news-agent

Running

App Files Files Community

fdaudens HF staff commited on Feb 18

Commit

81db657

verified ·

1 Parent(s): f73e1fc

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -146

app.py CHANGED Viewed

@@ -1,155 +1,51 @@
-from smolagents import CodeAgent, HfApiModel, tool
-from tools.final_answer import FinalAnswerTool
-from tools.visit_webpage import VisitWebpageTool
-from Gradio_UI import GradioUI
 import requests
-import yaml
-import os
-from typing import Dict, List, Optional
-import re
-@tool
-def fetch_news(topic: str, num_results: int = 5) -> List[Dict]:
-    """Fetches recent news articles about any topic using Serper.dev.
-    Args:
-        topic: The topic to search for news about
-        num_results: Number of news articles to retrieve (default: 5)
-    Returns:
-        List of dictionaries containing article information
-    """
-    try:
-        api_key = os.environ.get("SERPER_API_KEY")
-        if not api_key:
-            return "Error: SERPER_API_KEY not found in environment variables"
-        url = f"https://google.serper.dev/news"
-        headers = {
-            "X-API-KEY": api_key
-        }
-        params = {
-            "q": topic,
-            "gl": "us",
-            "hl": "en"
-        }
-        response = requests.get(url, headers=headers, params=params)
-        response.raise_for_status()
-        results = response.json()
-        if "news" not in results:
-            return []
-        articles = []
-        for article in results["news"][:num_results]:
-            articles.append({
-                'title': article.get('title', 'No title'),
-                'source': article.get('source', 'Unknown source'),
-                'date': article.get('date', 'No date'),
-                'link': article.get('link', 'No link'),
-                'snippet': article.get('snippet', 'No preview available')
-            })
-        return articles
-    except Exception as e:
-        return f"Error: {str(e)}"
-@tool
-def scrape_articles(articles: List[Dict]) -> List[Dict]:
-    """Scrapes the full content of news articles from their URLs.
-    Args:
-        articles: List of article dictionaries containing article information
-    Returns:
-        List of articles with additional full_content field
-    """
-    webpage_tool = VisitWebpageTool()
-    for article in articles:
         try:
-            # Skip known paywalled sites
-            domain = article['link'].lower()
-            if any(site in domain for site in ['nytimes.com', 'wsj.com', 'ft.com']):
-                article['full_content'] = f"Content not accessible - {article['source']} article requires subscription"
-                continue
-            full_content = webpage_tool.forward(article['link'])
-            if full_content and len(full_content.strip()) > 0:
-                article['full_content'] = full_content
-            else:
-                article['full_content'] = article['snippet']
-        except Exception as e:
-            article['full_content'] = article['snippet']
-    return articles
-@tool
-def summarize_news(articles: List[Dict]) -> str:
-    """Creates a summary of the news articles followed by a list of sources.
-    Args:
-        articles: List of article dictionaries containing title, source, date, link, snippet, and full_content
-    Returns:
-        A string containing a summary followed by article references
-    """
-    if not articles or not isinstance(articles, list):
-        return "No articles to summarize"
-    # Collect all content for the overall summary
-    all_content = [article.get('full_content', article['snippet']) for article in articles]
-    # Create a high-level summary from content
-    summary = "📰 Summary:\n"
-    summary += "Latest news covers " + ", ".join(set(article['source'] for article in articles)) + ". "
-    summary += "Key points: " + ". ".join(all_content[:2]) + "\n\n"
-    # List individual articles
-    summary += "🔍 Articles:\n"
-    for idx, article in enumerate(articles, 1):
-        title = article['title']
-        link = article['link']
-        date = article['date']
-        content = article.get('full_content', article['snippet'])
-        snippet = content[:200] + "..." if len(content) > 200 else content
-        summary += f"{idx}. **{title}**\n"
-        summary += f"   {snippet}\n"
-        summary += f"   [Read more]({link}) ({date})\n\n"
-    return summary
-# Load prompt templates
-with open("prompts.yaml", 'r') as stream:
-    prompt_templates = yaml.safe_load(stream)
-# Initialize the model
-model = HfApiModel(
-    max_tokens=2096,
-    temperature=0.5,
-    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
-    custom_role_conversions=None,
-)
-final_answer = FinalAnswerTool()
-# Create the agent with all tools
-agent = CodeAgent(
-    model=model,
-    tools=[fetch_news, scrape_articles, summarize_news, final_answer],  # Added scrape_articles
-    max_steps=6,
-    verbosity_level=1,
-    grammar=None,
-    planning_interval=None,
-    name="News Agent",
-    description="An agent that fetches and summarizes news about any topic",
-    prompt_templates=prompt_templates
-)
-# Launch the Gradio interface
-if __name__ == "__main__":
-    GradioUI(agent).launch()

+from typing import Any, Optional
+from smolagents.tools import Tool
 import requests
+import markdownify
+import smolagents
+import re  # Add re import here
+class VisitWebpageTool(Tool):
+    name = "visit_webpage"
+    description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
+    inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
+    output_type = "string"
+    def forward(self, url: str) -> str:
         try:
+            import requests
+            from markdownify import markdownify
+            from requests.exceptions import RequestException
+            from smolagents.utils import truncate_content
+        except ImportError as e:
+            raise ImportError(
+                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
+            ) from e
+        try:
+            # Add user agent to avoid some blocking
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+            # Send a GET request to the URL with a 20-second timeout
+            response = requests.get(url, timeout=20, headers=headers)
+            response.raise_for_status()
+            # Convert the HTML content to Markdown
+            markdown_content = markdownify(response.text).strip()
+            # Remove multiple line breaks
+            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+            return truncate_content(markdown_content, 10000)
+        except requests.exceptions.Timeout:
+            return "The request timed out. Please try again later or check the URL."
+        except RequestException as e:
+            return f"Error fetching the webpage: {str(e)}"
+        except Exception as e:
+            return f"An unexpected error occurred: {str(e)}"
+    def __init__(self, *args, **kwargs):
+        self.is_initialized = False