import datetime import os import csv import time import hashlib import threading from pathlib import Path import gradio as gr from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from webdriver_manager.chrome import ChromeDriverManager from huggingface_hub import InferenceClient import mysql.connector import feedparser # For parsing RSS feeds import sqlite3 # For simple local storage if needed # Configuration (replace with your actual values or environment variables) DB_HOST = os.environ.get("DB_HOST", "your_host") DB_USER = os.environ.get("DB_USER", "your_user") DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password") DB_NAME = os.environ.get("DB_NAME", "your_database") # Global variables monitoring_thread = None stop_event = threading.Event() db_connection = None current_task = None history = [] # Function to establish a database connection def get_db_connection(): global db_connection if db_connection is None or not db_connection.is_connected(): try: db_connection = mysql.connector.connect( host=DB_HOST, user=DB_USER, password=DB_PASSWORD, database=DB_NAME ) return db_connection except Exception as e: print(f"Error connecting to database: {e}") return None else: return db_connection # Function to create the articles table if it doesn't exist def create_articles_table(): conn = get_db_connection() if conn: cursor = conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS articles ( id INT AUTO_INCREMENT PRIMARY KEY, url VARCHAR(255) NOT NULL, title VARCHAR(255), content TEXT, hash VARCHAR(32), timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) conn.commit() cursor.close() # Initialize the articles table create_articles_table() # Function to monitor URLs for changes def monitor_urls(target_urls, storage_location, feed_rss, stop_event): global history previous_hashes = {url: "" for url in target_urls} options = Options() options.headless = True options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) try: while not stop_event.is_set(): for url in target_urls: try: driver.get(url) time.sleep(2) current_content = driver.page_source current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest() if current_hash != previous_hashes[url]: previous_hashes[url] = current_hash timestamp = datetime.datetime.now() title_element = driver.find_element(By.TAG_NAME, "title") title = title_element.text if title_element else "No Title" history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}") if storage_location: save_to_storage(storage_location, url, title, current_content, timestamp) if feed_rss: save_to_database(url, title, current_content, current_hash) except Exception as e: print(f"Error accessing {url}: {e}") if not stop_event.is_set(): time.sleep(300) # Wait for 5 minutes except Exception as e: print(f"Unexpected error in monitoring thread: {e}") finally: driver.quit() print("Monitoring thread has been stopped.") # Function to save data to local storage (CSV) def save_to_storage(storage_location, url, title, content, timestamp): try: with open(storage_location, "a", newline='', encoding='utf-8') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content]) except Exception as e: print(f"Error saving to storage: {e}") # Function to save data to the database def save_to_database(url, title, content, hash): conn = get_db_connection() if conn: cursor = conn.cursor() try: sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)" val = (url, title, content, hash) cursor.execute(sql, val) conn.commit() except Exception as e: print(f"Error saving to database: {e}") finally: cursor.close() # Function to generate RSS feed from the database def generate_rss_feed(): conn = get_db_connection() if conn: cursor = conn.cursor() try: cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC") articles = cursor.fetchall() feed = feedparser.FeedParserDict() feed['title'] = 'Website Changes Feed' feed['link'] = 'http://yourwebsite.com/feed' # Replace with your actual feed URL feed['description'] = 'Feed of changes detected on monitored websites.' feed['entries'] = [] for article in articles: entry = feedparser.FeedParserDict() entry['title'] = article[2] # Title entry['link'] = article[1] # URL entry['description'] = article[3] # Content entry['published'] = article[5] # Timestamp feed['entries'].append(entry) return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8') except Exception as e: print(f"Error generating RSS feed: {e}") finally: cursor.close() return None # Function to start monitoring def start_monitoring(target_urls, storage_location, feed_rss): global monitoring_thread, stop_event, current_task, history if monitoring_thread and monitoring_thread.is_alive(): return "Monitoring is already running.", history stop_event.clear() current_task = f"Monitoring URLs: {', '.join(target_urls)}" history.append(f"Task started: {current_task}") monitoring_thread = threading.Thread( target=monitor_urls, args=(target_urls, storage_location, feed_rss, stop_event), daemon=True ) monitoring_thread.start() return "Monitoring started.", history # Function to stop monitoring def stop_monitoring(): global monitoring_thread, stop_event, current_task, history if monitoring_thread and monitoring_thread.is_alive(): stop_event.set() monitoring_thread.join() current_task = None history.append("Monitoring stopped by user.") return "Monitoring stopped.", history else: return "No monitoring task is currently running.", history # Function to handle chatbot responses def chatbot_response(message, history): # Replace this with your actual chatbot logic using 'mistralai/Mixtral-8x7B-Instruct-v0.1' # You'll need to load and use the model from Hugging Face's InferenceClient client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") response = client.inference(message) history.append((message, response)) return history, history # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# Website Monitor and Chatbot") # Configuration Tab with gr.Tab("Configuration"): with gr.Row(): target_urls = gr.Textbox( label="Target URLs (comma-separated)", placeholder="https://example.com, https://another-site.com" ) with gr.Row(): storage_location = gr.Textbox( label="Storage Location (CSV file path)", placeholder="/path/to/your/file.csv", visible=False ) with gr.Row(): feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed") with gr.Row(): start_button = gr.Button("Start Monitoring") stop_button = gr.Button("Stop Monitoring") with gr.Row(): status_text = gr.Textbox(label="Status", interactive=False) with gr.Row(): history_text = gr.Textbox( label="History", lines=10, interactive=False ) # User-End View Tab with gr.Tab("User-End View"): with gr.Row(): feed_content = gr.JSON(label="RSS Feed Content") # Chatbot Tab with gr.Tab("Chatbot"): chatbot_interface = gr.Chatbot() with gr.Row(): message_input = gr.Textbox(placeholder="Type your message here...") send_button = gr.Button("Send") # --- Event Handlers --- # Start monitoring button click def on_start_click(target_urls_str, storage_loc, feed_enabled): global history try: target_urls = [url.strip() for url.strip() in target_urls_str.split(",")] if not all(target_urls): return "Please enter valid URLs.", history status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled) return status, history except Exception as e: return f"Error starting monitoring: {e}", history start_button.click( on_start_click, inputs=[target_urls, storage_location, feed_rss_checkbox], outputs=[status_text, history_text] ) # Stop monitoring button click stop_button.click( stop_monitoring, outputs=[status_text, history_text] ) # Send message to chatbot button click send_button.click( chatbot_response, inputs=[message_input, chatbot_interface], outputs=[chatbot_interface, chatbot_interface] ) # Update RSS feed content periodically def update_feed_content(): return generate_rss_feed() demo.load(update_feed_content, outputs=feed_content, every=5) # Update every 5 seconds if __name__ == "__main__": demo.launch()