Spaces:
Runtime error
Runtime error
File size: 11,974 Bytes
62cc7a7 885ce0d 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 62cc7a7 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 62cc7a7 d7e93db 2288f21 d7e93db 62cc7a7 2288f21 885ce0d 2288f21 885ce0d d7e93db 62cc7a7 2288f21 d7e93db 2288f21 885ce0d 2288f21 62cc7a7 2288f21 885ce0d 2288f21 885ce0d 2288f21 d7e93db 2288f21 d7e93db 2288f21 d7e93db 2288f21 885ce0d 2288f21 d7e93db 2288f21 d7e93db 2288f21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
import datetime
import os
import csv
import time
import hashlib
import threading
from pathlib import Path
import logging
import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from huggingface_hub import InferenceClient
import mysql.connector
import feedparser # For parsing RSS feeds
import sqlite3 # For simple local storage if needed
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Configuration (replace with your actual values or environment variables)
DB_HOST = os.environ.get("DB_HOST", "your_host")
DB_USER = os.environ.get("DB_USER", "your_user")
DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
DB_NAME = os.environ.get("DB_NAME", "your_database")
HUGGINGFACE_API_KEY = os.environ.get("HUGGINGFACE_API_KEY") # Add API key
# Global variables
monitoring_thread = None
stop_event = threading.Event()
db_connection = None
current_task = None
history = []
url_monitoring_intervals = {} # Store monitoring intervals for each URL
# Function to establish a database connection
def get_db_connection():
global db_connection
if db_connection is None or not db_connection.is_connected():
try:
db_connection = mysql.connector.connect(
host=DB_HOST,
user=DB_USER,
password=DB_PASSWORD,
database=DB_NAME
)
return db_connection
except Exception as e:
logging.error(f"Error connecting to database: {e}")
return None
else:
return db_connection
# Function to create the articles table if it doesn't exist
def create_articles_table():
conn = get_db_connection()
if conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS articles (
id INT AUTO_INCREMENT PRIMARY KEY,
url VARCHAR(255) NOT NULL,
title VARCHAR(255),
content TEXT,
hash VARCHAR(32),
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
cursor.close()
# Initialize the articles table
create_articles_table()
# Function to monitor URLs for changes
def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
global history, url_monitoring_intervals
previous_hashes = {url: "" for url in target_urls}
options = Options()
options.headless = True
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
try:
while not stop_event.is_set():
for url in target_urls:
try:
# Dynamic monitoring interval
interval = url_monitoring_intervals.get(url, 300) # Default 5 minutes
driver.get(url)
time.sleep(2) # Allow page to load
current_content = driver.page_source
current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
if current_hash != previous_hashes[url]:
previous_hashes[url] = current_hash
timestamp = datetime.datetime.now()
try:
title_element = driver.find_element(By.TAG_NAME, "title")
title = title_element.text
except NoSuchElementException:
title = "No Title"
history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
if storage_location:
save_to_storage(storage_location, url, title, current_content, timestamp)
if feed_rss:
save_to_database(url, title, current_content, current_hash)
# Adjust monitoring interval based on change frequency (example)
url_monitoring_intervals[url] = 60 # Check more frequently after a change
else:
# Increase interval if no changes detected (example)
url_monitoring_intervals[url] = min(url_monitoring_intervals[url] + 60, 600) # Max 10 min
except WebDriverException as e:
logging.error(f"Error accessing {url}: {e}")
if stop_event.is_set():
break # Exit inner loop if stop event is set
if not stop_event.is_set():
time.sleep(interval)
except Exception as e:
logging.error(f"Unexpected error in monitoring thread: {e}")
finally:
driver.quit()
logging.info("Monitoring thread has been stopped.")
# Function to save data to local storage (CSV)
def save_to_storage(storage_location, url, title, content, timestamp):
try:
with open(storage_location, "a", newline='', encoding='utf-8') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
except Exception as e:
logging.error(f"Error saving to storage: {e}")
# Function to save data to the database
def save_to_database(url, title, content, hash):
conn = get_db_connection()
if conn:
cursor = conn.cursor()
try:
sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)"
val = (url, title, content, hash)
cursor.execute(sql, val)
conn.commit()
except Exception as e:
logging.error(f"Error saving to database: {e}")
finally:
cursor.close()
# Function to generate RSS feed from the database
def generate_rss_feed():
conn = get_db_connection()
if conn:
cursor = conn.cursor()
try:
cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC")
articles = cursor.fetchall()
feed = feedparser.FeedParserDict()
feed['title'] = 'Website Changes Feed'
feed['link'] = 'http://yourwebsite.com/feed' # Replace with your actual feed URL
feed['description'] = 'Feed of changes detected on monitored websites.'
feed['entries'] = []
for article in articles:
entry = feedparser.FeedParserDict()
entry['title'] = article[2] # Title
entry['link'] = article[1] # URL
entry['description'] = article[3] # Content
entry['published'] = article[5] # Timestamp
feed['entries'].append(entry)
return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
except Exception as e:
logging.error(f"Error generating RSS feed: {e}")
finally:
cursor.close()
return None
# Function to start monitoring
def start_monitoring(target_urls, storage_location, feed_rss):
global monitoring_thread, stop_event, current_task, history
if monitoring_thread and monitoring_thread.is_alive():
return "Monitoring is already running.", history
stop_event.clear()
current_task = f"Monitoring URLs: {', '.join(target_urls)}"
history.append(f"Task started: {current_task}")
monitoring_thread = threading.Thread(
target=monitor_urls,
args=(target_urls, storage_location, feed_rss, stop_event),
daemon=True
)
monitoring_thread.start()
return "Monitoring started.", history
# Function to stop monitoring
def stop_monitoring():
global monitoring_thread, stop_event, current_task, history
if monitoring_thread and monitoring_thread.is_alive():
stop_event.set()
monitoring_thread.join()
current_task = None
history.append("Monitoring stopped by user.")
return "Monitoring stopped.", history
else:
return "No monitoring task is currently running.", history
# Function to handle chatbot responses
def chatbot_response(message, history):
try:
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
response = client.inference(message)
history.append((message, response))
return history, history
except Exception as e:
logging.error(f"Error getting chatbot response: {e}")
history.append((message, "Error: Could not get a response from the chatbot."))
return history, history
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Website Monitor and Chatbot")
# Configuration Tab
with gr.Tab("Configuration"):
with gr.Row():
target_urls = gr.Textbox(
label="Target URLs (comma-separated)",
placeholder="https://example.com, https://another-site.com"
)
with gr.Row():
storage_location = gr.Textbox(
label="Storage Location (CSV file path)",
placeholder="/path/to/your/file.csv",
visible=False # You can enable this if you want CSV storage
)
with gr.Row():
feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
with gr.Row():
start_button = gr.Button("Start Monitoring")
stop_button = gr.Button("Stop Monitoring")
with gr.Row():
status_text = gr.Textbox(label="Status", interactive=False)
with gr.Row():
history_text = gr.Textbox(
label="History", lines=10, interactive=False
)
# User-End View Tab
with gr.Tab("User-End View"):
with gr.Row():
feed_content = gr.JSON(label="RSS Feed Content")
# Chatbot Tab
with gr.Tab("Chatbot"):
chatbot_interface = gr.Chatbot()
with gr.Row():
message_input = gr.Textbox(placeholder="Type your message here...")
send_button = gr.Button("Send")
# --- Event Handlers ---
# Start monitoring button click
def on_start_click(target_urls_str, storage_loc, feed_enabled):
global history, url_monitoring_intervals
try:
target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
if not all(target_urls):
return "Please enter valid URLs.", history
# Reset monitoring intervals when starting
url_monitoring_intervals = {url: 300 for url in target_urls}
status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
return status, history
except Exception as e:
return f"Error starting monitoring: {e}", history
start_button.click(
on_start_click,
inputs=[target_urls, storage_location, feed_rss_checkbox],
outputs=[status_text, history_text]
)
# Stop monitoring button click
stop_button.click(
stop_monitoring,
outputs=[status_text, history_text]
)
# Send message to chatbot button click
send_button.click(
chatbot_response,
inputs=[message_input, chatbot_interface],
outputs=[chatbot_interface, chatbot_interface]
)
# Update RSS feed content periodically
def update_feed_content():
return generate_rss_feed()
demo.load(update_feed_content, outputs=feed_content, every=5) # Update every 5 seconds
if __name__ == "__main__":
demo.launch() |