dvt81 commited on
Commit
db85dcc
·
verified ·
1 Parent(s): dade8b2

updated scraper function

Browse files

updated the sraper function to more accurately scrape zerohedge

Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -36,12 +36,14 @@ def get_zh_top_news() -> tuple[str, str]:
36
  # Navigate to ZeroHedge homepage
37
  driver.get("https://www.zerohedge.com")
38
 
39
- # Find the top article element using a CSS selector
40
- top_article = driver.find_element(By.CSS_SELECTOR, "article .ArticleTeaser_titleLink__mK4rX")
41
 
42
- # Extract the title and URL
43
- article_title = top_article.text.strip()
44
- article_link = top_article.get_attribute("href")
 
 
45
 
46
  # Ensure the link is absolute
47
  if not article_link.startswith("http"):
 
36
  # Navigate to ZeroHedge homepage
37
  driver.get("https://www.zerohedge.com")
38
 
39
+ # Find the first <h2> with class starting with 'Article_title___'
40
+ top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
41
 
42
+ # Extract the title from the <a> tag inside the <h2>
43
+ article_title = top_article.find_element(By.TAG_NAME, "a").text.strip()
44
+
45
+ # Extract the URL from the href attribute of the <a> tag
46
+ article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href")
47
 
48
  # Ensure the link is absolute
49
  if not article_link.startswith("http"):