Spaces:
Sleeping
Sleeping
updated scraper function
Browse filesupdated the sraper function to more accurately scrape zerohedge
app.py
CHANGED
@@ -36,12 +36,14 @@ def get_zh_top_news() -> tuple[str, str]:
|
|
36 |
# Navigate to ZeroHedge homepage
|
37 |
driver.get("https://www.zerohedge.com")
|
38 |
|
39 |
-
# Find the
|
40 |
-
top_article = driver.find_element(By.CSS_SELECTOR, "
|
41 |
|
42 |
-
# Extract the title
|
43 |
-
article_title = top_article.text.strip()
|
44 |
-
|
|
|
|
|
45 |
|
46 |
# Ensure the link is absolute
|
47 |
if not article_link.startswith("http"):
|
|
|
36 |
# Navigate to ZeroHedge homepage
|
37 |
driver.get("https://www.zerohedge.com")
|
38 |
|
39 |
+
# Find the first <h2> with class starting with 'Article_title___'
|
40 |
+
top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
|
41 |
|
42 |
+
# Extract the title from the <a> tag inside the <h2>
|
43 |
+
article_title = top_article.find_element(By.TAG_NAME, "a").text.strip()
|
44 |
+
|
45 |
+
# Extract the URL from the href attribute of the <a> tag
|
46 |
+
article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href")
|
47 |
|
48 |
# Ensure the link is absolute
|
49 |
if not article_link.startswith("http"):
|