Spaces:
Runtime error
Runtime error
import datetime | |
import os | |
import csv | |
import time | |
import hashlib | |
import threading | |
from pathlib import Path | |
import gradio as gr | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.by import By | |
from webdriver_manager.chrome import ChromeDriverManager | |
from huggingface_hub import InferenceClient | |
import mysql.connector | |
import feedparser # For parsing RSS feeds | |
import sqlite3 # For simple local storage if needed | |
# Configuration (replace with your actual values or environment variables) | |
DB_HOST = os.environ.get("DB_HOST", "your_host") | |
DB_USER = os.environ.get("DB_USER", "your_user") | |
DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password") | |
DB_NAME = os.environ.get("DB_NAME", "your_database") | |
# Global variables | |
monitoring_thread = None | |
stop_event = threading.Event() | |
db_connection = None | |
current_task = None | |
history = [] | |
# Function to establish a database connection | |
def get_db_connection(): | |
global db_connection | |
if db_connection is None or not db_connection.is_connected(): | |
try: | |
db_connection = mysql.connector.connect( | |
host=DB_HOST, | |
user=DB_USER, | |
password=DB_PASSWORD, | |
database=DB_NAME | |
) | |
return db_connection | |
except Exception as e: | |
print(f"Error connecting to database: {e}") | |
return None | |
else: | |
return db_connection | |
# Function to create the articles table if it doesn't exist | |
def create_articles_table(): | |
conn = get_db_connection() | |
if conn: | |
cursor = conn.cursor() | |
cursor.execute(""" | |
CREATE TABLE IF NOT EXISTS articles ( | |
id INT AUTO_INCREMENT PRIMARY KEY, | |
url VARCHAR(255) NOT NULL, | |
title VARCHAR(255), | |
content TEXT, | |
hash VARCHAR(32), | |
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
) | |
""") | |
conn.commit() | |
cursor.close() | |
# Initialize the articles table | |
create_articles_table() | |
# Function to monitor URLs for changes | |
def monitor_urls(target_urls, storage_location, feed_rss, stop_event): | |
global history | |
previous_hashes = {url: "" for url in target_urls} | |
options = Options() | |
options.headless = True | |
options.add_argument("--disable-gpu") | |
options.add_argument("--no-sandbox") | |
options.add_argument("--disable-dev-shm-usage") | |
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
try: | |
while not stop_event.is_set(): | |
for url in target_urls: | |
try: | |
driver.get(url) | |
time.sleep(2) | |
current_content = driver.page_source | |
current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest() | |
if current_hash != previous_hashes[url]: | |
previous_hashes[url] = current_hash | |
timestamp = datetime.datetime.now() | |
title_element = driver.find_element(By.TAG_NAME, "title") | |
title = title_element.text if title_element else "No Title" | |
history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}") | |
if storage_location: | |
save_to_storage(storage_location, url, title, current_content, timestamp) | |
if feed_rss: | |
save_to_database(url, title, current_content, current_hash) | |
except Exception as e: | |
print(f"Error accessing {url}: {e}") | |
if not stop_event.is_set(): | |
time.sleep(300) # Wait for 5 minutes | |
except Exception as e: | |
print(f"Unexpected error in monitoring thread: {e}") | |
finally: | |
driver.quit() | |
print("Monitoring thread has been stopped.") | |
# Function to save data to local storage (CSV) | |
def save_to_storage(storage_location, url, title, content, timestamp): | |
try: | |
with open(storage_location, "a", newline='', encoding='utf-8') as csvfile: | |
csv_writer = csv.writer(csvfile) | |
csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content]) | |
except Exception as e: | |
print(f"Error saving to storage: {e}") | |
# Function to save data to the database | |
def save_to_database(url, title, content, hash): | |
conn = get_db_connection() | |
if conn: | |
cursor = conn.cursor() | |
try: | |
sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)" | |
val = (url, title, content, hash) | |
cursor.execute(sql, val) | |
conn.commit() | |
except Exception as e: | |
print(f"Error saving to database: {e}") | |
finally: | |
cursor.close() | |
# Function to generate RSS feed from the database | |
def generate_rss_feed(): | |
conn = get_db_connection() | |
if conn: | |
cursor = conn.cursor() | |
try: | |
cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC") | |
articles = cursor.fetchall() | |
feed = feedparser.FeedParserDict() | |
feed['title'] = 'Website Changes Feed' | |
feed['link'] = 'http://yourwebsite.com/feed' # Replace with your actual feed URL | |
feed['description'] = 'Feed of changes detected on monitored websites.' | |
feed['entries'] = [] | |
for article in articles: | |
entry = feedparser.FeedParserDict() | |
entry['title'] = article[2] # Title | |
entry['link'] = article[1] # URL | |
entry['description'] = article[3] # Content | |
entry['published'] = article[5] # Timestamp | |
feed['entries'].append(entry) | |
return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8') | |
except Exception as e: | |
print(f"Error generating RSS feed: {e}") | |
finally: | |
cursor.close() | |
return None | |
# Function to start monitoring | |
def start_monitoring(target_urls, storage_location, feed_rss): | |
global monitoring_thread, stop_event, current_task, history | |
if monitoring_thread and monitoring_thread.is_alive(): | |
return "Monitoring is already running.", history | |
stop_event.clear() | |
current_task = f"Monitoring URLs: {', '.join(target_urls)}" | |
history.append(f"Task started: {current_task}") | |
monitoring_thread = threading.Thread( | |
target=monitor_urls, | |
args=(target_urls, storage_location, feed_rss, stop_event), | |
daemon=True | |
) | |
monitoring_thread.start() | |
return "Monitoring started.", history | |
# Function to stop monitoring | |
def stop_monitoring(): | |
global monitoring_thread, stop_event, current_task, history | |
if monitoring_thread and monitoring_thread.is_alive(): | |
stop_event.set() | |
monitoring_thread.join() | |
current_task = None | |
history.append("Monitoring stopped by user.") | |
return "Monitoring stopped.", history | |
else: | |
return "No monitoring task is currently running.", history | |
# Function to handle chatbot responses | |
def chatbot_response(message, history): | |
# Replace this with your actual chatbot logic using 'mistralai/Mixtral-8x7B-Instruct-v0.1' | |
# You'll need to load and use the model from Hugging Face's InferenceClient | |
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") | |
response = client.inference(message) | |
history.append((message, response)) | |
return history, history | |
# --- Gradio Interface --- | |
with gr.Blocks() as demo: | |
gr.Markdown("# Website Monitor and Chatbot") | |
# Configuration Tab | |
with gr.Tab("Configuration"): | |
with gr.Row(): | |
target_urls = gr.Textbox( | |
label="Target URLs (comma-separated)", | |
placeholder="https://example.com, https://another-site.com" | |
) | |
with gr.Row(): | |
storage_location = gr.Textbox( | |
label="Storage Location (CSV file path)", | |
placeholder="/path/to/your/file.csv", | |
visible=False | |
) | |
with gr.Row(): | |
feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed") | |
with gr.Row(): | |
start_button = gr.Button("Start Monitoring") | |
stop_button = gr.Button("Stop Monitoring") | |
with gr.Row(): | |
status_text = gr.Textbox(label="Status", interactive=False) | |
with gr.Row(): | |
history_text = gr.Textbox( | |
label="History", lines=10, interactive=False | |
) | |
# User-End View Tab | |
with gr.Tab("User-End View"): | |
with gr.Row(): | |
feed_content = gr.JSON(label="RSS Feed Content") | |
# Chatbot Tab | |
with gr.Tab("Chatbot"): | |
chatbot_interface = gr.Chatbot() | |
with gr.Row(): | |
message_input = gr.Textbox(placeholder="Type your message here...") | |
send_button = gr.Button("Send") | |
# --- Event Handlers --- | |
# Start monitoring button click | |
def on_start_click(target_urls_str, storage_loc, feed_enabled): | |
global history | |
try: | |
target_urls = [url.strip() for url.strip() in target_urls_str.split(",")] | |
if not all(target_urls): | |
return "Please enter valid URLs.", history | |
status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled) | |
return status, history | |
except Exception as e: | |
return f"Error starting monitoring: {e}", history | |
start_button.click( | |
on_start_click, | |
inputs=[target_urls, storage_location, feed_rss_checkbox], | |
outputs=[status_text, history_text] | |
) | |
# Stop monitoring button click | |
stop_button.click( | |
stop_monitoring, | |
outputs=[status_text, history_text] | |
) | |
# Send message to chatbot button click | |
send_button.click( | |
chatbot_response, | |
inputs=[message_input, chatbot_interface], | |
outputs=[chatbot_interface, chatbot_interface] | |
) | |
# Update RSS feed content periodically | |
def update_feed_content(): | |
return generate_rss_feed() | |
demo.load(update_feed_content, outputs=feed_content, every=5) # Update every 5 seconds | |
if __name__ == "__main__": | |
demo.launch() |