Adding a custom tool to scrape the top news result from the ZeroHedge News feed

#1
Files changed (1) hide show
  1. app.py +45 -9
app.py CHANGED
@@ -7,16 +7,52 @@ from tools.final_answer import FinalAnswerTool
7
 
8
  from Gradio_UI import GradioUI
9
 
10
- # Below is an example of a tool that does nothing. Amaze us with your creativity !
 
11
  @tool
12
- def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
13
- #Keep this format for the description / args / args description but feel free to modify the tool
14
- """A tool that does nothing yet
15
- Args:
16
- arg1: the first argument
17
- arg2: the second argument
 
 
18
  """
19
- return "What magic will you build ?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @tool
22
  def get_current_time_in_timezone(timezone: str) -> str:
@@ -55,7 +91,7 @@ with open("prompts.yaml", 'r') as stream:
55
 
56
  agent = CodeAgent(
57
  model=model,
58
- tools=[final_answer], ## add your tools here (don't remove final answer)
59
  max_steps=6,
60
  verbosity_level=1,
61
  grammar=None,
 
7
 
8
  from Gradio_UI import GradioUI
9
 
10
+ from selenium import webdriver
11
+
12
  @tool
13
+ def get_zh_top_news() -> tuple[str, str]:
14
+ """A tool that retrieves the current top news article's title and URL from www.zerohedge.com.
15
+
16
+ Returns:
17
+ tuple[str, str]: A tuple containing the article title (str) and its URL (str).
18
+
19
+ Raises:
20
+ Exception: If the page fails to load or the expected element is not found.
21
  """
22
+ # Set up Chrome options for headless browsing (optional, improves performance)
23
+ chrome_options = Options()
24
+ chrome_options.add_argument("--headless") # Run without opening a browser window
25
+ chrome_options.add_argument("--disable-gpu") # Disable GPU acceleration in headless mode
26
+
27
+ # Initialize the Selenium WebDriver (assumes chromedriver is in PATH or specify path via Service)
28
+ driver = webdriver.Chrome(options=chrome_options)
29
+
30
+ try:
31
+ # Navigate to ZeroHedge homepage
32
+ driver.get("https://www.zerohedge.com")
33
+
34
+ # Find the top article element using a CSS selector
35
+ # Based on ZeroHedge's structure, the top headline is typically the first prominent article
36
+ top_article = driver.find_element(By.CSS_SELECTOR, "article .ArticleTeaser_titleLink__mK4rX")
37
+
38
+ # Extract the title from the text content of the link
39
+ article_title = top_article.text.strip()
40
+
41
+ # Extract the URL from the href attribute
42
+ article_link = top_article.get_attribute("href")
43
+
44
+ # Ensure the link is absolute (ZeroHedge uses relative URLs)
45
+ if not article_link.startswith("http"):
46
+ article_link = f"https://www.zerohedge.com{article_link}"
47
+
48
+ return article_title, article_link
49
+
50
+ except Exception as e:
51
+ return "Error: Headline not found", "https://www.zerohedge.com"
52
+
53
+ finally:
54
+ # Always close the browser session to free resources
55
+ driver.quit()
56
 
57
  @tool
58
  def get_current_time_in_timezone(timezone: str) -> str:
 
91
 
92
  agent = CodeAgent(
93
  model=model,
94
+ tools=[get_zh_top_news,get_current_time_in_timezone,final_answer], ## add your tools here (don't remove final answer)
95
  max_steps=6,
96
  verbosity_level=1,
97
  grammar=None,