acecalisto3 commited on
Commit
0a68e68
·
verified ·
1 Parent(s): adb910f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -30
app.py CHANGED
@@ -10,6 +10,7 @@ from selenium.webdriver.chrome.service import Service
10
  from selenium.webdriver.chrome.options import Options
11
  from webdriver_manager.chrome import ChromeDriverManager
12
  from huggingface_hub import InferenceClient
 
13
 
14
  # Check for Hugging Face API key
15
  api_key = os.getenv('access')
@@ -63,36 +64,38 @@ def monitor_urls(storage_location, url1, url2, scrape_interval, content_type):
63
  options.add_argument("--no-sandbox")
64
  options.add_argument("--disable-dev-shm-usage")
65
 
66
- with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
67
- try:
68
- while True:
69
- for i, url in enumerate(urls):
70
- try:
71
- driver.get(url)
72
- time.sleep(2) # Wait for the page to load
73
- if content_type == "text":
74
- current_content = driver.page_source
75
- elif content_type == "media":
76
- current_content = driver.find_elements_by_tag_name("img")
77
- else:
78
- current_content = driver.page_source
79
-
80
- current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
81
-
82
- if current_hash != previous_hashes[i]:
83
- previous_hashes[i] = current_hash
84
- date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
85
- history.append(f"Change detected at {url} on {date_time_str}")
86
- csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
87
- logging.info(f"Change detected at {url} on {date_time_str}")
88
- except Exception as e:
89
- logging.error(f"Error accessing {url}: {e}")
90
-
91
- time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
92
- except KeyboardInterrupt:
93
- logging.info("Monitoring stopped by user.")
94
- finally:
95
- driver.quit()
 
 
96
 
97
  # Define main function to handle user input
98
  def handle_input(task, storage_location, url1, url2, scrape_interval, content_type):
 
10
  from selenium.webdriver.chrome.options import Options
11
  from webdriver_manager.chrome import ChromeDriverManager
12
  from huggingface_hub import InferenceClient
13
+ from selenium.common.exceptions import WebDriverException
14
 
15
  # Check for Hugging Face API key
16
  api_key = os.getenv('access')
 
64
  options.add_argument("--no-sandbox")
65
  options.add_argument("--disable-dev-shm-usage")
66
 
67
+ try:
68
+ with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
69
+ try:
70
+ while True:
71
+ for i, url in enumerate(urls):
72
+ try:
73
+ driver.get(url)
74
+ time.sleep(2) # Wait for the page to load
75
+ if content_type == "text":
76
+ current_content = driver.page_source
77
+ elif content_type == "media":
78
+ current_content = driver.find_elements_by_tag_name("img")
79
+ else:
80
+ current_content = driver.page_source
81
+
82
+ current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
83
+
84
+ if current_hash != previous_hashes[i]:
85
+ previous_hashes[i] = current_hash
86
+ date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
87
+ history.append(f"Change detected at {url} on {date_time_str}")
88
+ csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
89
+ logging.info(f"Change detected at {url} on {date_time_str}")
90
+ except Exception as e:
91
+ logging.error(f"Error accessing {url}: {e}")
92
+
93
+ time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
94
+ except KeyboardInterrupt:
95
+ logging.info("Monitoring stopped by user.")
96
+ except WebDriverException as e:
97
+ logging.error(f"WebDriverException: {e}")
98
+ raise
99
 
100
  # Define main function to handle user input
101
  def handle_input(task, storage_location, url1, url2, scrape_interval, content_type):