CEEMEESEEK / app.py
acecalisto3's picture
Update app.py
2288f21 verified
raw
history blame
10.5 kB
import datetime
import os
import csv
import time
import hashlib
import threading
from pathlib import Path
import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from huggingface_hub import InferenceClient
import mysql.connector
import feedparser # For parsing RSS feeds
import sqlite3 # For simple local storage if needed
# Configuration (replace with your actual values or environment variables)
DB_HOST = os.environ.get("DB_HOST", "your_host")
DB_USER = os.environ.get("DB_USER", "your_user")
DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
DB_NAME = os.environ.get("DB_NAME", "your_database")
# Global variables
monitoring_thread = None
stop_event = threading.Event()
db_connection = None
current_task = None
history = []
# Function to establish a database connection
def get_db_connection():
global db_connection
if db_connection is None or not db_connection.is_connected():
try:
db_connection = mysql.connector.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME
)
return db_connection
except Exception as e:
print(f"Error connecting to database: {e}")
return None
else:
return db_connection
# Function to create the articles table if it doesn't exist
def create_articles_table():
conn = get_db_connection()
if conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS articles (
id INT AUTO_INCREMENT PRIMARY KEY,
url VARCHAR(255) NOT NULL,
title VARCHAR(255),
content TEXT,
hash VARCHAR(32),
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
cursor.close()
# Initialize the articles table
create_articles_table()
# Function to monitor URLs for changes
def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
global history
previous_hashes = {url: "" for url in target_urls}
options = Options()
options.headless = True
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
try:
while not stop_event.is_set():
for url in target_urls:
try:
driver.get(url)
time.sleep(2)
current_content = driver.page_source
current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
if current_hash != previous_hashes[url]:
previous_hashes[url] = current_hash
timestamp = datetime.datetime.now()
title_element = driver.find_element(By.TAG_NAME, "title")
title = title_element.text if title_element else "No Title"
history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
if storage_location:
save_to_storage(storage_location, url, title, current_content, timestamp)
if feed_rss:
save_to_database(url, title, current_content, current_hash)
except Exception as e:
print(f"Error accessing {url}: {e}")
if not stop_event.is_set():
time.sleep(300) # Wait for 5 minutes
except Exception as e:
print(f"Unexpected error in monitoring thread: {e}")
finally:
driver.quit()
print("Monitoring thread has been stopped.")
# Function to save data to local storage (CSV)
def save_to_storage(storage_location, url, title, content, timestamp):
try:
with open(storage_location, "a", newline='', encoding='utf-8') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
except Exception as e:
print(f"Error saving to storage: {e}")
# Function to save data to the database
def save_to_database(url, title, content, hash):
conn = get_db_connection()
if conn:
cursor = conn.cursor()
try:
sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)"
val = (url, title, content, hash)
cursor.execute(sql, val)
conn.commit()
except Exception as e:
print(f"Error saving to database: {e}")
finally:
cursor.close()
# Function to generate RSS feed from the database
def generate_rss_feed():
conn = get_db_connection()
if conn:
cursor = conn.cursor()
try:
cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC")
articles = cursor.fetchall()
feed = feedparser.FeedParserDict()
feed['title'] = 'Website Changes Feed'
feed['link'] = 'http://yourwebsite.com/feed' # Replace with your actual feed URL
feed['description'] = 'Feed of changes detected on monitored websites.'
feed['entries'] = []
for article in articles:
entry = feedparser.FeedParserDict()
entry['title'] = article[2] # Title
entry['link'] = article[1] # URL
entry['description'] = article[3] # Content
entry['published'] = article[5] # Timestamp
feed['entries'].append(entry)
return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
except Exception as e:
print(f"Error generating RSS feed: {e}")
finally:
cursor.close()
return None
# Function to start monitoring
def start_monitoring(target_urls, storage_location, feed_rss):
global monitoring_thread, stop_event, current_task, history
if monitoring_thread and monitoring_thread.is_alive():
return "Monitoring is already running.", history
stop_event.clear()
current_task = f"Monitoring URLs: {', '.join(target_urls)}"
history.append(f"Task started: {current_task}")
monitoring_thread = threading.Thread(
target=monitor_urls,
args=(target_urls, storage_location, feed_rss, stop_event),
daemon=True
)
monitoring_thread.start()
return "Monitoring started.", history
# Function to stop monitoring
def stop_monitoring():
global monitoring_thread, stop_event, current_task, history
if monitoring_thread and monitoring_thread.is_alive():
stop_event.set()
monitoring_thread.join()
current_task = None
history.append("Monitoring stopped by user.")
return "Monitoring stopped.", history
else:
return "No monitoring task is currently running.", history
# Function to handle chatbot responses
def chatbot_response(message, history):
# Replace this with your actual chatbot logic using 'mistralai/Mixtral-8x7B-Instruct-v0.1'
# You'll need to load and use the model from Hugging Face's InferenceClient
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
response = client.inference(message)
history.append((message, response))
return history, history
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Website Monitor and Chatbot")
# Configuration Tab
with gr.Tab("Configuration"):
with gr.Row():
target_urls = gr.Textbox(
label="Target URLs (comma-separated)",
placeholder="https://example.com, https://another-site.com"
)
with gr.Row():
storage_location = gr.Textbox(
label="Storage Location (CSV file path)",
placeholder="/path/to/your/file.csv",
visible=False
)
with gr.Row():
feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
with gr.Row():
start_button = gr.Button("Start Monitoring")
stop_button = gr.Button("Stop Monitoring")
with gr.Row():
status_text = gr.Textbox(label="Status", interactive=False)
with gr.Row():
history_text = gr.Textbox(
label="History", lines=10, interactive=False
)
# User-End View Tab
with gr.Tab("User-End View"):
with gr.Row():
feed_content = gr.JSON(label="RSS Feed Content")
# Chatbot Tab
with gr.Tab("Chatbot"):
chatbot_interface = gr.Chatbot()
with gr.Row():
message_input = gr.Textbox(placeholder="Type your message here...")
send_button = gr.Button("Send")
# --- Event Handlers ---
# Start monitoring button click
def on_start_click(target_urls_str, storage_loc, feed_enabled):
global history
try:
target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
if not all(target_urls):
return "Please enter valid URLs.", history
status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
return status, history
except Exception as e:
return f"Error starting monitoring: {e}", history
start_button.click(
on_start_click,
inputs=[target_urls, storage_location, feed_rss_checkbox],
outputs=[status_text, history_text]
)
# Stop monitoring button click
stop_button.click(
stop_monitoring,
outputs=[status_text, history_text]
)
# Send message to chatbot button click
send_button.click(
chatbot_response,
inputs=[message_input, chatbot_interface],
outputs=[chatbot_interface, chatbot_interface]
)
# Update RSS feed content periodically
def update_feed_content():
return generate_rss_feed()
demo.load(update_feed_content, outputs=feed_content, every=5) # Update every 5 seconds
if __name__ == "__main__":
demo.launch()