dvt81 commited on
Commit
ac0f86b
·
verified ·
1 Parent(s): f9e8166

added more debugging

Browse files

more debugging to solve the chrome driver problem

Files changed (1) hide show
  1. app.py +17 -11
app.py CHANGED
@@ -24,36 +24,42 @@ def get_zh_top_news() -> tuple[str, str]:
24
  """
25
  # Set up Chrome options for headless browsing
26
  chrome_options = Options()
27
- chrome_options.add_argument("--headless") # Run without opening a browser window
28
- chrome_options.add_argument("--disable-gpu") # Disable GPU for headless mode
29
- chrome_options.add_argument("--no-sandbox") # Required for Docker environments
30
- chrome_options.add_argument("--disable-dev-shm-usage") # Avoid memory issues in containers
31
 
32
  # Specify ChromeDriver path (installed via Dockerfile)
33
  service = Service(executable_path="/usr/local/bin/chromedriver")
34
 
35
- # Initialize the WebDriver
36
- driver = webdriver.Chrome(service=service, options=chrome_options)
 
 
 
 
 
37
 
38
  try:
39
  # Navigate to ZeroHedge homepage
40
  driver.get("https://www.zerohedge.com")
 
41
 
42
  # Find the first <h2> with class starting with 'Article_title___'
43
  top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
 
44
 
45
- print(f" article object: {top_article}")
46
-
47
  # Extract the title from the <a> tag inside the <h2>
48
- article_title = top_article.find_element(By.TAG_NAME, "a").text.strip()
49
-
50
  # Extract the URL from the href attribute of the <a> tag
51
- article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href")
52
 
53
  # Ensure the link is absolute
54
  if not article_link.startswith("http"):
55
  article_link = f"https://www.zerohedge.com{article_link}"
56
 
 
57
  return article_title, article_link
58
 
59
  except Exception as e:
 
24
  """
25
  # Set up Chrome options for headless browsing
26
  chrome_options = Options()
27
+ chrome_options.add_argument("--headless")
28
+ chrome_options.add_argument("--disable-gpu")
29
+ chrome_options.add_argument("--no-sandbox")
30
+ chrome_options.add_argument("--disable-dev-shm-usage")
31
 
32
  # Specify ChromeDriver path (installed via Dockerfile)
33
  service = Service(executable_path="/usr/local/bin/chromedriver")
34
 
35
+ try:
36
+ # Initialize the WebDriver
37
+ driver = webdriver.Chrome(service=service, options=chrome_options)
38
+ print("DEBUG: WebDriver initialized successfully")
39
+ except Exception as e:
40
+ print(f"DEBUG: Failed to initialize WebDriver: {e}")
41
+ return "Error: WebDriver failed", "https://www.zerohedge.com"
42
 
43
  try:
44
  # Navigate to ZeroHedge homepage
45
  driver.get("https://www.zerohedge.com")
46
+ print("DEBUG: Page loaded")
47
 
48
  # Find the first <h2> with class starting with 'Article_title___'
49
  top_article = driver.find_element(By.CSS_SELECTOR, "h2[class^='Article_title___']")
50
+ print(f"DEBUG: Found article object: {top_article}")
51
 
 
 
52
  # Extract the title from the <a> tag inside the <h2>
53
+ article_title = top_article.find_element(By.TAG_NAME, "a").text.strip() or "No title found"
54
+
55
  # Extract the URL from the href attribute of the <a> tag
56
+ article_link = top_article.find_element(By.TAG_NAME, "a").get_attribute("href") or "https://www.zerohedge.com"
57
 
58
  # Ensure the link is absolute
59
  if not article_link.startswith("http"):
60
  article_link = f"https://www.zerohedge.com{article_link}"
61
 
62
+ print(f"DEBUG: Returning title='{article_title}', link='{article_link}'")
63
  return article_title, article_link
64
 
65
  except Exception as e: