File size: 11,974 Bytes
62cc7a7
 
885ce0d
2288f21
 
 
 
d7e93db
2288f21
 
 
 
 
 
d7e93db
2288f21
 
 
 
 
 
d7e93db
 
 
2288f21
 
 
 
 
d7e93db
2288f21
 
 
 
 
 
 
d7e93db
2288f21
 
 
 
 
 
 
 
 
 
 
 
 
 
d7e93db
2288f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7e93db
2288f21
 
 
 
 
 
 
 
 
62cc7a7
2288f21
 
 
d7e93db
 
 
2288f21
d7e93db
2288f21
 
 
 
 
 
d7e93db
 
 
 
 
 
2288f21
 
 
 
 
 
 
 
 
d7e93db
 
 
 
 
 
 
 
 
 
 
 
2288f21
 
d7e93db
2288f21
62cc7a7
d7e93db
2288f21
 
d7e93db
62cc7a7
2288f21
 
885ce0d
2288f21
 
 
885ce0d
d7e93db
62cc7a7
2288f21
 
 
 
 
 
 
 
 
 
 
d7e93db
2288f21
 
 
 
885ce0d
2288f21
 
 
 
 
 
62cc7a7
2288f21
 
 
 
 
885ce0d
2288f21
 
 
 
 
 
 
885ce0d
2288f21
 
d7e93db
2288f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7e93db
 
 
 
 
 
 
 
 
2288f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7e93db
2288f21
 
 
 
 
 
 
 
 
 
 
 
885ce0d
2288f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7e93db
2288f21
 
 
 
d7e93db
 
 
 
2288f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import datetime
import os
import csv
import time
import hashlib
import threading
from pathlib import Path
import logging

import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from huggingface_hub import InferenceClient
import mysql.connector
import feedparser  # For parsing RSS feeds
import sqlite3  # For simple local storage if needed

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Configuration (replace with your actual values or environment variables)
DB_HOST = os.environ.get("DB_HOST", "your_host")
DB_USER = os.environ.get("DB_USER", "your_user")
DB_PASSWORD = os.environ.get("DB_PASSWORD", "your_password")
DB_NAME = os.environ.get("DB_NAME", "your_database")
HUGGINGFACE_API_KEY = os.environ.get("HUGGINGFACE_API_KEY")  # Add API key

# Global variables
monitoring_thread = None
stop_event = threading.Event()
db_connection = None
current_task = None
history = []
url_monitoring_intervals = {}  # Store monitoring intervals for each URL

# Function to establish a database connection
def get_db_connection():
    global db_connection
    if db_connection is None or not db_connection.is_connected():
        try:
            db_connection = mysql.connector.connect(
                host=DB_HOST,
                user=DB_USER,
                password=DB_PASSWORD,
                database=DB_NAME
            )
            return db_connection
        except Exception as e:
            logging.error(f"Error connecting to database: {e}")
            return None
    else:
        return db_connection

# Function to create the articles table if it doesn't exist
def create_articles_table():
    conn = get_db_connection()
    if conn:
        cursor = conn.cursor()
        cursor.execute("""
            CREATE TABLE IF NOT EXISTS articles (
                id INT AUTO_INCREMENT PRIMARY KEY,
                url VARCHAR(255) NOT NULL,
                title VARCHAR(255),
                content TEXT,
                hash VARCHAR(32),
                timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        conn.commit()
        cursor.close()

# Initialize the articles table
create_articles_table()

# Function to monitor URLs for changes
def monitor_urls(target_urls, storage_location, feed_rss, stop_event):
    global history, url_monitoring_intervals
    previous_hashes = {url: "" for url in target_urls}
    options = Options()
    options.headless = True
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        while not stop_event.is_set():
            for url in target_urls:
                try:
                    # Dynamic monitoring interval
                    interval = url_monitoring_intervals.get(url, 300)  # Default 5 minutes

                    driver.get(url)
                    time.sleep(2)  # Allow page to load
                    current_content = driver.page_source
                    current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()

                    if current_hash != previous_hashes[url]:
                        previous_hashes[url] = current_hash
                        timestamp = datetime.datetime.now()

                        try:
                            title_element = driver.find_element(By.TAG_NAME, "title")
                            title = title_element.text
                        except NoSuchElementException:
                            title = "No Title"

                        history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")

                        if storage_location:
                            save_to_storage(storage_location, url, title, current_content, timestamp)

                        if feed_rss:
                            save_to_database(url, title, current_content, current_hash)

                        # Adjust monitoring interval based on change frequency (example)
                        url_monitoring_intervals[url] = 60  # Check more frequently after a change

                    else:
                        # Increase interval if no changes detected (example)
                        url_monitoring_intervals[url] = min(url_monitoring_intervals[url] + 60, 600) # Max 10 min

                except WebDriverException as e:
                    logging.error(f"Error accessing {url}: {e}")

                if stop_event.is_set():
                    break  # Exit inner loop if stop event is set

            if not stop_event.is_set():
                time.sleep(interval) 

    except Exception as e:
        logging.error(f"Unexpected error in monitoring thread: {e}")
    finally:
        driver.quit()
        logging.info("Monitoring thread has been stopped.")

# Function to save data to local storage (CSV)
def save_to_storage(storage_location, url, title, content, timestamp):
    try:
        with open(storage_location, "a", newline='', encoding='utf-8') as csvfile:
            csv_writer = csv.writer(csvfile)
            csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
    except Exception as e:
        logging.error(f"Error saving to storage: {e}")

# Function to save data to the database
def save_to_database(url, title, content, hash):
    conn = get_db_connection()
    if conn:
        cursor = conn.cursor()
        try:
            sql = "INSERT INTO articles (url, title, content, hash) VALUES (%s, %s, %s, %s)"
            val = (url, title, content, hash)
            cursor.execute(sql, val)
            conn.commit()
        except Exception as e:
            logging.error(f"Error saving to database: {e}")
        finally:
            cursor.close()

# Function to generate RSS feed from the database
def generate_rss_feed():
    conn = get_db_connection()
    if conn:
        cursor = conn.cursor()
        try:
            cursor.execute("SELECT * FROM articles ORDER BY timestamp DESC")
            articles = cursor.fetchall()

            feed = feedparser.FeedParserDict()
            feed['title'] = 'Website Changes Feed'
            feed['link'] = 'http://yourwebsite.com/feed'  # Replace with your actual feed URL
            feed['description'] = 'Feed of changes detected on monitored websites.'
            feed['entries'] = []

            for article in articles:
                entry = feedparser.FeedParserDict()
                entry['title'] = article[2]  # Title
                entry['link'] = article[1]   # URL
                entry['description'] = article[3]  # Content
                entry['published'] = article[5]  # Timestamp
                feed['entries'].append(entry)

            return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
        except Exception as e:
            logging.error(f"Error generating RSS feed: {e}")
        finally:
            cursor.close()
    return None

# Function to start monitoring
def start_monitoring(target_urls, storage_location, feed_rss):
    global monitoring_thread, stop_event, current_task, history
    if monitoring_thread and monitoring_thread.is_alive():
        return "Monitoring is already running.", history

    stop_event.clear()
    current_task = f"Monitoring URLs: {', '.join(target_urls)}"
    history.append(f"Task started: {current_task}")
    monitoring_thread = threading.Thread(
        target=monitor_urls,
        args=(target_urls, storage_location, feed_rss, stop_event),
        daemon=True
    )
    monitoring_thread.start()
    return "Monitoring started.", history

# Function to stop monitoring
def stop_monitoring():
    global monitoring_thread, stop_event, current_task, history
    if monitoring_thread and monitoring_thread.is_alive():
        stop_event.set()
        monitoring_thread.join()
        current_task = None
        history.append("Monitoring stopped by user.")
        return "Monitoring stopped.", history
    else:
        return "No monitoring task is currently running.", history

# Function to handle chatbot responses
def chatbot_response(message, history):
    try:
        client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
        response = client.inference(message)
        history.append((message, response))
        return history, history
    except Exception as e:
        logging.error(f"Error getting chatbot response: {e}")
        history.append((message, "Error: Could not get a response from the chatbot."))
        return history, history

# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown("# Website Monitor and Chatbot")

    # Configuration Tab
    with gr.Tab("Configuration"):
        with gr.Row():
            target_urls = gr.Textbox(
                label="Target URLs (comma-separated)",
                placeholder="https://example.com, https://another-site.com"
            )
        with gr.Row():
            storage_location = gr.Textbox(
                label="Storage Location (CSV file path)",
                placeholder="/path/to/your/file.csv",
                visible=False  # You can enable this if you want CSV storage
            )
        with gr.Row():
            feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
        with gr.Row():
            start_button = gr.Button("Start Monitoring")
            stop_button = gr.Button("Stop Monitoring")
        with gr.Row():
            status_text = gr.Textbox(label="Status", interactive=False)
        with gr.Row():
            history_text = gr.Textbox(
                label="History", lines=10, interactive=False
            )

    # User-End View Tab
    with gr.Tab("User-End View"):
        with gr.Row():
            feed_content = gr.JSON(label="RSS Feed Content")

    # Chatbot Tab
    with gr.Tab("Chatbot"):
        chatbot_interface = gr.Chatbot()
        with gr.Row():
            message_input = gr.Textbox(placeholder="Type your message here...")
            send_button = gr.Button("Send")

    # --- Event Handlers ---

    # Start monitoring button click
    def on_start_click(target_urls_str, storage_loc, feed_enabled):
        global history, url_monitoring_intervals
        try:
            target_urls = [url.strip() for url.strip() in target_urls_str.split(",")]
            if not all(target_urls):
                return "Please enter valid URLs.", history
            
            # Reset monitoring intervals when starting
            url_monitoring_intervals = {url: 300 for url in target_urls} 

            status, history = start_monitoring(target_urls, storage_loc if storage_loc else None, feed_enabled)
            return status, history
        except Exception as e:
            return f"Error starting monitoring: {e}", history

    start_button.click(
        on_start_click,
        inputs=[target_urls, storage_location, feed_rss_checkbox],
        outputs=[status_text, history_text]
    )

    # Stop monitoring button click
    stop_button.click(
        stop_monitoring,
        outputs=[status_text, history_text]
    )

    # Send message to chatbot button click
    send_button.click(
        chatbot_response,
        inputs=[message_input, chatbot_interface],
        outputs=[chatbot_interface, chatbot_interface]
    )

    # Update RSS feed content periodically
    def update_feed_content():
        return generate_rss_feed()

    demo.load(update_feed_content, outputs=feed_content, every=5)  # Update every 5 seconds

if __name__ == "__main__":
    demo.launch()