First_agent_template

Sleeping

App Files Files Community

dvt81 commited on Mar 19

Commit

a5390b7

verified ·

1 Parent(s): ac0f86b

simplifying with curl

Browse files

removing selenium to reduce complexity

Files changed (1) hide show

app.py +34 -43

app.py CHANGED Viewed

@@ -5,12 +5,8 @@ import pytz
 import yaml
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.common.by import By
-from selenium.webdriver.chrome.service import Service
-from webdriver_manager.chrome import ChromeDriverManager
 @tool
 def get_zh_top_news() -> tuple[str, str]:
@@ -18,57 +14,52 @@ def get_zh_top_news() -> tuple[str, str]:
     Returns:
         tuple[str, str]: A tuple containing the article title (str) and its URL (str).
-    Raises:
-        Exception: If the page fails to load or the expected element is not found.
     """
-    # Set up Chrome options for headless browsing
-    chrome_options = Options()
-    chrome_options.add_argument("--headless")
-    chrome_options.add_argument("--disable-gpu")
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    # Specify ChromeDriver path (installed via Dockerfile)
-    service = Service(executable_path="/usr/local/bin/chromedriver")
-    try:
-        # Initialize the WebDriver
-        driver = webdriver.Chrome(service=service, options=chrome_options)
-        print("DEBUG: WebDriver initialized successfully")
-    except Exception as e:
-        print(f"DEBUG: Failed to initialize WebDriver: {e}")
-        return "Error: WebDriver failed", "https://www.zerohedge.com"
     try:
-        # Navigate to ZeroHedge homepage
-        driver.get("https://www.zerohedge.com")
-        print("DEBUG: Page loaded")
         # Find the first <h2> with class starting with 'Article_title___'
-        top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
-        print(f"DEBUG: Found article object: {top_article}")
-        # Extract the title from the <a> tag inside the <h2>
-        article_title = top_article.find_element(By.TAG_NAME, "a").text.strip() or "No title found"
-        # Extract the URL from the href attribute of the <a> tag
-        article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href") or "https://www.zerohedge.com"
         # Ensure the link is absolute
         if not article_link.startswith("http"):
             article_link = f"https://www.zerohedge.com{article_link}"
-        print(f"DEBUG: Returning title='{article_title}', link='{article_link}'")
         return article_title, article_link
     except Exception as e:
-        print(f"Error retrieving top headline: {e}")
         return "Error: Headline not found", "https://www.zerohedge.com"
-    finally:
-        driver.quit()
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
     """A tool that fetches the current local time in a specified timezone.

 import yaml
 from tools.final_answer import FinalAnswerTool
 from Gradio_UI import GradioUI
+from bs4 import BeautifulSoup
+import subprocess
 @tool
 def get_zh_top_news() -> tuple[str, str]:
     Returns:
         tuple[str, str]: A tuple containing the article title (str) and its URL (str).
     """
     try:
+        # Use curl to fetch the HTML content
+        result = subprocess.run(
+            ["curl", "-s", "https://www.zerohedge.com"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        html_content = result.stdout
+        print(f"DEBUG: Fetched HTML length: {len(html_content)}")  # Debug: Check if content is retrieved
+        # Parse HTML with BeautifulSoup
+        soup = BeautifulSoup(html_content, "html.parser")
         # Find the first <h2> with class starting with 'Article_title___'
+        top_article = soup.find("h2", class_=lambda x: x and x.startswith("Article_title___"))
+        if not top_article:
+            print("DEBUG: No matching <h2> found")
+            return "Error: Headline not found", "https://www.zerohedge.com"
+        # Extract the <a> tag inside the <h2>
+        link_tag = top_article.find("a")
+        if not link_tag:
+            print("DEBUG: No <a> tag found in top article")
+            return "Error: Headline not found", "https://www.zerohedge.com"
+        # Get title and URL
+        article_title = link_tag.get_text(strip=True) or "No title found"
+        article_link = link_tag.get("href") or "https://www.zerohedge.com"
         # Ensure the link is absolute
         if not article_link.startswith("http"):
             article_link = f"https://www.zerohedge.com{article_link}"
+        print(f"DEBUG: Title = '{article_title}', Link = '{article_link}'")
         return article_title, article_link
+    except subprocess.CalledProcessError as e:
+        print(f"Error fetching page with curl: {e}")
+        return "Error: Fetch failed", "https://www.zerohedge.com"
     except Exception as e:
+        print(f"Error parsing content: {e}")
         return "Error: Headline not found", "https://www.zerohedge.com"
 @tool
 def get_current_time_in_timezone(timezone: str) -> str:
     """A tool that fetches the current local time in a specified timezone.