Spaces:
Sleeping
Sleeping
added more debugging
Browse filesmore debugging to solve the chrome driver problem
app.py
CHANGED
@@ -24,36 +24,42 @@ def get_zh_top_news() -> tuple[str, str]:
|
|
24 |
"""
|
25 |
# Set up Chrome options for headless browsing
|
26 |
chrome_options = Options()
|
27 |
-
chrome_options.add_argument("--headless")
|
28 |
-
chrome_options.add_argument("--disable-gpu")
|
29 |
-
chrome_options.add_argument("--no-sandbox")
|
30 |
-
chrome_options.add_argument("--disable-dev-shm-usage")
|
31 |
|
32 |
# Specify ChromeDriver path (installed via Dockerfile)
|
33 |
service = Service(executable_path="/usr/local/bin/chromedriver")
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
try:
|
39 |
# Navigate to ZeroHedge homepage
|
40 |
driver.get("https://www.zerohedge.com")
|
|
|
41 |
|
42 |
# Find the first <h2> with class starting with 'Article_title___'
|
43 |
top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
|
|
|
44 |
|
45 |
-
print(f" article object: {top_article}")
|
46 |
-
|
47 |
# Extract the title from the <a> tag inside the <h2>
|
48 |
-
article_title = top_article.find_element(By.TAG_NAME, "a").text.strip()
|
49 |
-
|
50 |
# Extract the URL from the href attribute of the <a> tag
|
51 |
-
article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href")
|
52 |
|
53 |
# Ensure the link is absolute
|
54 |
if not article_link.startswith("http"):
|
55 |
article_link = f"https://www.zerohedge.com{article_link}"
|
56 |
|
|
|
57 |
return article_title, article_link
|
58 |
|
59 |
except Exception as e:
|
|
|
24 |
"""
|
25 |
# Set up Chrome options for headless browsing
|
26 |
chrome_options = Options()
|
27 |
+
chrome_options.add_argument("--headless")
|
28 |
+
chrome_options.add_argument("--disable-gpu")
|
29 |
+
chrome_options.add_argument("--no-sandbox")
|
30 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
31 |
|
32 |
# Specify ChromeDriver path (installed via Dockerfile)
|
33 |
service = Service(executable_path="/usr/local/bin/chromedriver")
|
34 |
|
35 |
+
try:
|
36 |
+
# Initialize the WebDriver
|
37 |
+
driver = webdriver.Chrome(service=service, options=chrome_options)
|
38 |
+
print("DEBUG: WebDriver initialized successfully")
|
39 |
+
except Exception as e:
|
40 |
+
print(f"DEBUG: Failed to initialize WebDriver: {e}")
|
41 |
+
return "Error: WebDriver failed", "https://www.zerohedge.com"
|
42 |
|
43 |
try:
|
44 |
# Navigate to ZeroHedge homepage
|
45 |
driver.get("https://www.zerohedge.com")
|
46 |
+
print("DEBUG: Page loaded")
|
47 |
|
48 |
# Find the first <h2> with class starting with 'Article_title___'
|
49 |
top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
|
50 |
+
print(f"DEBUG: Found article object: {top_article}")
|
51 |
|
|
|
|
|
52 |
# Extract the title from the <a> tag inside the <h2>
|
53 |
+
article_title = top_article.find_element(By.TAG_NAME, "a").text.strip() or "No title found"
|
54 |
+
|
55 |
# Extract the URL from the href attribute of the <a> tag
|
56 |
+
article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href") or "https://www.zerohedge.com"
|
57 |
|
58 |
# Ensure the link is absolute
|
59 |
if not article_link.startswith("http"):
|
60 |
article_link = f"https://www.zerohedge.com{article_link}"
|
61 |
|
62 |
+
print(f"DEBUG: Returning title='{article_title}', link='{article_link}'")
|
63 |
return article_title, article_link
|
64 |
|
65 |
except Exception as e:
|