File size: 10,020 Bytes
10aaa36
6e6128e
 
09095fe
09dd92c
09095fe
f22964e
164f0a5
 
09dd92c
c811f89
 
 
 
 
 
 
 
 
 
 
6e6128e
b3b63d8
 
 
09dd92c
 
 
c3f6567
 
 
09dd92c
 
 
 
 
 
 
 
 
 
 
b3b63d8
09dd92c
 
6e6128e
 
09dd92c
 
 
 
fe81e0b
09dd92c
 
6e6128e
09dd92c
09095fe
18a8600
6e6128e
09dd92c
6e6128e
09dd92c
6e6128e
 
2a8d7ab
09dd92c
 
55f620f
c811f89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3f6567
c811f89
 
 
 
c3f6567
 
 
 
 
 
 
 
09095fe
 
b3b63d8
09095fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09dd92c
 
09095fe
 
 
 
 
c3f6567
09095fe
 
c811f89
 
 
 
c3f6567
 
09dd92c
 
b3b63d8
09095fe
09dd92c
 
09095fe
09dd92c
b3b63d8
 
09dd92c
b3b63d8
09dd92c
09095fe
 
c811f89
 
 
 
 
 
09095fe
 
c811f89
 
 
09dd92c
09095fe
 
 
 
 
09dd92c
c3f6567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0b5677
09dd92c
6e6128e
09095fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import asyncio
import gradio as gr
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import sessionmaker
import logging
import os
import sys
from typing import List, Dict, Any
from datetime import datetime
from pytz import timezone
import pytz
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# Global variables for database session and engine
db_session = None
engine = None
monitoring_task = None
logger = logging.getLogger(__name__)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Function for dynamically setting the database connection
async def set_db_connection(host: str, port: str, user: str, password: str, db_name: str):
    global db_session, engine
    try:
        engine = create_async_engine(f"mysql+aiomysql://{user}:{password}@{host}:{port}/{db_name}", echo=False)
        Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
        db_session = Session()
        return "Database connection established."
    except Exception as e:
        logger.error(f"Failed to establish database connection: {e}")
        return f"Failed to connect to database: {e}"

# Periodic feed updater with error handling and logging improvements
async def periodic_update_with_error_handling():
    while True:
        try:
            await asyncio.sleep(300)  # Wait for 5 minutes before updating the feed content.
            await update_feed_content()  # Update the feed content.
        except Exception as e:  # Catch all exceptions for logging purposes.
            logger.error(f"Error in periodic update: {e}")  # Improved logging message format.

# Function to fetch RSS feed content from the provided URL with error handling and logging improvements.
async def fetch_feed_content(feed_url: str) -> Dict[str, Any]:
    try:
        result = await db_session.execute(select(Article).order_by(Article.timestamp.desc()).limit(20))
        articles = result.scalars().all()  # Fetch latest articles
        feed = {
            'title': 'Website Changes Feed',
            'link': feed_url,
            'description': 'Feed of changes detected on monitored websites.',
            'items': [{'title': article.title, 'link': article.url, 'description': article.content, 'pubDate': str(article.timestamp)} for article in articles] if articles else []
        }
        return feed
    except Exception as e:
        logger.error(f"Error fetching feed content: {e}")
        return {}

# Function to scrape website content
async def scrape_website(url: str) -> str:
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except Exception as e:
        logger.error(f"Error scraping website: {e}")
        return ""

# Function to analyze website content
async def analyze_website_content(content: str) -> Dict[str, Any]:
    try:
        # Perform sentiment analysis using Natural Language Processing (NLP) techniques
        # For simplicity, we'll use a basic sentiment analysis approach
        sentiment = "Positive" if content.count("good") > content.count("bad") else "Negative"
        return {'sentiment': sentiment}
    except Exception as e:
        logger.error(f"Error analyzing website content: {e}")
        return {}

# Function to predict website traffic
async def predict_website_traffic(url: str) -> Dict[str, Any]:
    try:
        # Use machine learning model to predict website traffic
        # For simplicity, we'll use a basic linear regression model
        X = pd.DataFrame({'url': [url]})
        y = pd.DataFrame({'traffic': [100]})  # Replace with actual traffic data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        return {'traffic': y_pred}
    except Exception as e:
        logger.error(f"Error predicting website traffic: {e}")
        return {}

# Function to update database status
async def update_db_status():
    try:
        await db_session.execute("SELECT 1")
        return "Connected"
    except SQLAlchemyError:
        return "Disconnected"

# Main application that runs Gradio UI and background tasks
async def main():
    global db_session, monitoring_task
    engine = None
    demo = gr.Blocks()
    # Define the Gradio interface
    with demo:
        gr.Markdown("# Website Monitor and Chatbot")
        with gr.Row():
            with gr.Column():
                gr.Markdown("## Database Settings")
                db_host = gr.Textbox(label="Database Host", placeholder="localhost", value="localhost")
                db_port = gr.Textbox(label="Database Port", placeholder="3306", value="3306")
                db_user = gr.Textbox(label="Database User", placeholder="username", value="")
                db_pass = gr.Textbox(label="Database Password", placeholder="password", type="password", value="")
                db_name = gr.Textbox(label="Database Name", placeholder="database_name", value="monitoring")
                db_status_textbox = gr.Textbox(label="Database Status", interactive=False)
                status_text = gr.Textbox(label="Status", interactive=False)
                gr.Markdown("## RSS Feed Reader Settings")
                feed_target_url = gr.Textbox(label="RSS Feed Target URL", placeholder="http://yourwebsite.com/feed")
                view_button = gr.Button("View Feed")
                target_urls = gr.Textbox(label="Target URLs (comma-separated)", placeholder="https://example.com, https://another-site.com")
                storage_location = gr.Textbox(label="Storage Location (CSV file path)", placeholder="/path/to/your/file.csv")
                feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
                start_button = gr.Button("Start Monitoring")
                stop_button = gr.Button("Stop Monitoring")
            with gr.Column():
                feed_content = gr.JSON(label="RSS Feed Content")
                chatbot_interface = gr.Chatbot(type='messages', stateful=True)  # Enable session state
                message_input = gr.Textbox(placeholder="Type your message here...")
                send_button = gr.Button("Send")
                scrape_button = gr.Button("Scrape Website")
                analyze_button = gr.Button("Analyze Website Content")
                predict_button = gr.Button("Predict Website Traffic")
                scrape_output = gr.Textbox(label="Scraped Website Content", interactive=False)
                analyze_output = gr.JSON(label="Website Content Analysis")  # Removed interactive=False
                predict_output = gr.JSON(label="Website Traffic Prediction")  # Removed interactive=False
        # Define button actions
        async def on_start_click(target_urls_str: str, storage_loc: str, feed_enabled: bool, host: str, port: str, user: str, password: str, db_name: str):
            global monitoring_task
            urls = [url.strip() for url in target_urls_str.split(",")]
            await set_db_connection(host, port, user, password, db_name)
            monitoring_task = asyncio.create_task(start_monitoring(urls, storage_loc, feed_enabled))
            return "Monitoring started."
        async def on_stop_click():
            global monitoring_task
            if monitoring_task:
                monitoring_task.cancel()
                monitoring_task = None
            return "Monitoring stopped."
        async def on_view_feed_click(feed_url: str):
            return await fetch_feed_content(feed_url)
        async def on_scrape_click(url: str):
            return await scrape_website(url)
        async def on_analyze_click(content: str):
            return await analyze_website_content(content)
        async def on_predict_click(url: str):
            return await predict_website_traffic(url)
        stop_button.click(on_stop_click, outputs=[status_text])
        view_button.click(on_view_feed_click, inputs=[feed_target_url], outputs=[feed_content])
        scrape_button.click(on_scrape_click, inputs=[target_urls], outputs=[scrape_output])
        analyze_button.click(on_analyze_click, inputs=[scrape_output], outputs=[analyze_output])
        predict_button.click(on_predict_click, inputs=[target_urls], outputs=[predict_output])
        send_button.click(chatbot_response, inputs=[message_input, chatbot_interface], outputs=[chatbot_interface, message_input])
        # Set up the timer for periodic updates
        feed_updater = gr.Timer(interval=300)
        feed_updater.tick(fn=update_feed_content, outputs=feed_content)
        # Load and check database status when the UI is loaded
        demo.load(update_db_status, outputs=db_status_textbox)
        asyncio.create_task(periodic_update_with_error_handling())
        # Launch the Gradio demo with a custom theme
        await demo.launch(theme="default", title="Website Monitor and Chatbot")

# Function to handle chatbot responses with session state
async def chatbot_response(message: str, chat_history: List[str]) -> List[str]:
    chat_history = chat_history or []
    response = f"Echo: {message}"
    chat_history.append((message, response))
    return chat_history, ""

# Function to start monitoring
async def start_monitoring(urls: List[str], storage_location: str, feed_enabled: bool):
    # Logic to start monitoring URLs and optionally save to CSV or enable RSS
    print(f"Starting monitoring for {urls}, saving to {storage_location}, RSS enabled: {feed_enabled}")
    return

# Launch the app using asyncio
if __name__ == "__main__":
    asyncio.run(main())