First_agent_template

Sleeping

dvt81 commited on Mar 19

Commit

db85dcc

verified ·

1 Parent(s): dade8b2

updated scraper function

updated the sraper function to more accurately scrape zerohedge

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,12 +36,14 @@ def get_zh_top_news() -> tuple[str, str]:
         # Navigate to ZeroHedge homepage
         driver.get("https://www.zerohedge.com")
-        # Find the top article element using a CSS selector
-        top_article = driver.find_element(By.CSS_SELECTOR, "article .ArticleTeaser_titleLink__mK4rX")
-        # Extract the title and URL
-        article_title = top_article.text.strip()
-        article_link = top_article.get_attribute("href")
         # Ensure the link is absolute
         if not article_link.startswith("http"):

         # Navigate to ZeroHedge homepage
         driver.get("https://www.zerohedge.com")
+        # Find the first <h2> with class starting with 'Article_title___'
+        top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
+        # Extract the title from the <a> tag inside the <h2>
+        article_title = top_article.find_element(By.TAG_NAME, "a").text.strip()
+        # Extract the URL from the href attribute of the <a> tag
+        article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href")
         # Ensure the link is absolute
         if not article_link.startswith("http"):