Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ from selenium.webdriver.chrome.service import Service
|
|
10 |
from selenium.webdriver.chrome.options import Options
|
11 |
from webdriver_manager.chrome import ChromeDriverManager
|
12 |
from huggingface_hub import InferenceClient
|
|
|
13 |
|
14 |
# Check for Hugging Face API key
|
15 |
api_key = os.getenv('access')
|
@@ -63,36 +64,38 @@ def monitor_urls(storage_location, url1, url2, scrape_interval, content_type):
|
|
63 |
options.add_argument("--no-sandbox")
|
64 |
options.add_argument("--disable-dev-shm-usage")
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
previous_hashes[i]
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
96 |
|
97 |
# Define main function to handle user input
|
98 |
def handle_input(task, storage_location, url1, url2, scrape_interval, content_type):
|
|
|
10 |
from selenium.webdriver.chrome.options import Options
|
11 |
from webdriver_manager.chrome import ChromeDriverManager
|
12 |
from huggingface_hub import InferenceClient
|
13 |
+
from selenium.common.exceptions import WebDriverException
|
14 |
|
15 |
# Check for Hugging Face API key
|
16 |
api_key = os.getenv('access')
|
|
|
64 |
options.add_argument("--no-sandbox")
|
65 |
options.add_argument("--disable-dev-shm-usage")
|
66 |
|
67 |
+
try:
|
68 |
+
with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
|
69 |
+
try:
|
70 |
+
while True:
|
71 |
+
for i, url in enumerate(urls):
|
72 |
+
try:
|
73 |
+
driver.get(url)
|
74 |
+
time.sleep(2) # Wait for the page to load
|
75 |
+
if content_type == "text":
|
76 |
+
current_content = driver.page_source
|
77 |
+
elif content_type == "media":
|
78 |
+
current_content = driver.find_elements_by_tag_name("img")
|
79 |
+
else:
|
80 |
+
current_content = driver.page_source
|
81 |
+
|
82 |
+
current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
|
83 |
+
|
84 |
+
if current_hash != previous_hashes[i]:
|
85 |
+
previous_hashes[i] = current_hash
|
86 |
+
date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
87 |
+
history.append(f"Change detected at {url} on {date_time_str}")
|
88 |
+
csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
|
89 |
+
logging.info(f"Change detected at {url} on {date_time_str}")
|
90 |
+
except Exception as e:
|
91 |
+
logging.error(f"Error accessing {url}: {e}")
|
92 |
+
|
93 |
+
time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
|
94 |
+
except KeyboardInterrupt:
|
95 |
+
logging.info("Monitoring stopped by user.")
|
96 |
+
except WebDriverException as e:
|
97 |
+
logging.error(f"WebDriverException: {e}")
|
98 |
+
raise
|
99 |
|
100 |
# Define main function to handle user input
|
101 |
def handle_input(task, storage_location, url1, url2, scrape_interval, content_type):
|