acecalisto3 commited on
Commit
8bbdb9e
1 Parent(s): 92d8005

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -264
app.py CHANGED
@@ -1,282 +1,91 @@
1
- import csv
2
- import logging
3
- import os
4
- from typing import List, Tuple
5
-
6
- import aiohttp
7
- import datetime
8
- import difflib
9
- import hashlib
10
- from pathlib import Path
11
 
12
- import feedparser
13
  import gradio as gr
14
- from huggingface_hub import InferenceClient
15
- from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
16
- from sqlalchemy.orm import declarative_base, sessionmaker
17
  from sqlalchemy.exc import SQLAlchemyError
18
- import validators
19
- import asyncio
 
 
 
 
20
 
21
- # Configure logging
22
- logging.basicConfig(level=logging.INFO,
23
- format='%(asctime)s - %(levelname)s - %(message)s')
24
  logger = logging.getLogger(__name__)
25
-
26
- # Configuration
27
- HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
28
- DEFAULT_MONITORING_INTERVAL = 300
29
- MAX_MONITORING_INTERVAL = 600
30
- CHANGE_FREQUENCY_THRESHOLD = 3
31
 
32
  # Global variables
33
- monitoring_tasks = {}
34
- url_monitoring_intervals = {}
35
- change_counts = {}
36
- history = []
37
- engine = None # Initialize the database engine globally
38
-
39
- # Database setup
40
- Base = declarative_base()
41
-
42
- def generate_rss_feed():
43
- session = Session()
44
- try:
45
- articles = session.query(Article).order_by(
46
- Article.timestamp.desc()).limit(20).all()
47
- feed = feedparser.FeedParserDict()
48
- feed['title'] = 'Website Changes Feed'
49
- feed['link'] = 'http://yourwebsite.com/feed' # Replace if needed
50
- feed['description'] = 'Feed of changes detected on monitored websites.'
51
- feed['entries'] = [{
52
- 'title': article.title,
53
- 'link': article.url,
54
- 'description': article.content,
55
- 'published': article.timestamp
56
- } for article in articles]
57
- return feedparser.FeedGenerator().feed_from_dictionary(
58
- feed).writeString('utf-8')
59
- except SQLAlchemyError as e:
60
- logger.error(f"Database error: {e}")
61
- return None
62
- finally:
63
- session.close()
64
-
65
- async def update_feed_content():
66
- return generate_rss_feed()
67
-
68
- # Periodic update function
69
- async def periodic_update():
70
- while True:
71
- await asyncio.sleep(300) # Wait for 5 minutes
72
- await update_feed_content()
73
-
74
- async def update_feed_content():
75
- return generate_rss_feed()
76
-
77
- def start_periodic_task():
78
- loop = asyncio.get_event_loop()
79
- if loop.is_running():
80
- asyncio.create_task(periodic_update())
81
- else:
82
- loop.run_until_complete(periodic_update())
83
-
84
- # Start the periodic update task
85
- start_periodic_task()
86
-
87
- class Article(Base):
88
- __tablename__ = 'articles'
89
- id = Column(Integer, primary_key=True)
90
- url = Column(String(255), nullable=False)
91
- title = Column(String(255))
92
- content = Column(Text)
93
- hash = Column(String(32))
94
- timestamp = Column(DateTime, default=datetime.datetime.utcnow)
95
-
96
-
97
- # Utility functions
98
- def sanitize_url(url: str) -> str:
99
- return validators.url(url)
100
-
101
-
102
- async def fetch_url_content(url: str,
103
- session: aiohttp.ClientSession) -> Tuple[str, str]:
104
- async with session.get(url) as response:
105
- content = await response.text()
106
- soup = BeautifulSoup(content, 'html.parser')
107
- title = soup.title.string if soup.title else "No Title"
108
- return title, content
109
-
110
-
111
- def calculate_hash(content: str) -> str:
112
- return hashlib.md5(content.encode('utf-8')).hexdigest()
113
-
114
-
115
- async def save_to_database(url: str, title: str, content: str, hash: str):
116
- session = Session()
117
- try:
118
- article = Article(url=url, title=title, content=content, hash=hash)
119
- session.add(article)
120
- session.commit()
121
- except SQLAlchemyError as e:
122
- logger.error(f"Database error: {e}")
123
- session.rollback()
124
- finally:
125
- session.close()
126
 
127
-
128
- def save_to_csv(storage_location: str, url: str, title: str, content: str,
129
- timestamp: datetime.datetime):
130
- try:
131
- with open(storage_location, "a", newline='', encoding="utf-8") as csvfile:
132
- csv_writer = csv.writer(csvfile)
133
- csv_writer.writerow([
134
- timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content
135
- ])
136
- except Exception as e:
137
- logger.error(f"Error saving to CSV: {e}")
138
-
139
-
140
- async def monitor_url(url: str, interval: int, storage_location: str,
141
- feed_rss: bool):
142
- previous_hash = ""
143
- async with aiohttp.ClientSession() as session:
144
- while True:
145
- try:
146
- title, content = await fetch_url_content(url, session)
147
- current_hash = calculate_hash(content)
148
-
149
- if current_hash != previous_hash:
150
- previous_hash = current_hash
151
- timestamp = datetime.datetime.now()
152
-
153
- if feed_rss:
154
- await save_to_database(url, title, content,
155
- current_hash)
156
-
157
- if storage_location:
158
- save_to_csv(storage_location, url, title, content,
159
- timestamp)
160
-
161
- history.append(
162
- f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
163
- )
164
- logger.info(f"Change detected at {url}")
165
-
166
- change_counts[url] = change_counts.get(url, 0) + 1
167
- if change_counts[url] >= CHANGE_FREQUENCY_THRESHOLD:
168
- interval = max(60, interval // 2)
169
- else:
170
- change_counts[url] = 0
171
- interval = min(interval * 2, MAX_MONITORING_INTERVAL)
172
-
173
- url_monitoring_intervals[url] = interval
174
- except Exception as e:
175
- logger.error(f"Error monitoring {url}: {e}")
176
- history.append(f"Error monitoring {url}: {e}")
177
-
178
- await asyncio.sleep(interval)
179
-
180
- async def start_monitoring(urls: List[str], storage_location: str,
181
- feed_rss: bool):
182
- for url in urls:
183
- if url not in monitoring_tasks:
184
- sanitized_url = sanitize_url(url)
185
- if sanitized_url:
186
- task = asyncio.create_task(
187
- monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL,
188
- storage_location, feed_rss))
189
- monitoring_tasks[sanitized_url] = task
190
- else:
191
- logger.warning(f"Invalid URL: {url}")
192
- history.append(f"Invalid URL: {url}")
193
-
194
-
195
- def stop_monitoring(url: str):
196
- if url in monitoring_tasks:
197
- monitoring_tasks[url].cancel()
198
- del monitoring_tasks[url]
199
-
200
- async def chatbot_response(message: str, history: List[Tuple[str, str]]):
201
  try:
202
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",
203
- token=HUGGINGFACE_API_KEY)
204
- response = await client.inference(message)
205
-
206
- # Format the response as a dictionary
207
- history.append({"role": "user", "content": message}) # Add user message
208
- history.append({
209
- "role": "assistant",
210
- "content": response
211
- }) # Add assistant response
212
-
213
- return history, history
214
  except Exception as e:
215
- logger.error(f"Chatbot error: {e}")
216
- history.append({"role": "user", "content": message}) # Add user message
217
- history.append({
218
- "role": "assistant",
219
- "content": "Error: Could not get a response from the chatbot."
220
- }) # Add error message
221
- return history, history
222
 
223
-
224
- def create_db_engine(db_url):
225
- global engine, Base, Session
226
  try:
227
- engine = create_engine(db_url)
228
- Base.metadata.create_all(engine)
229
- Session = sessionmaker(bind=engine)
230
- return "Database connected successfully!"
231
- except SQLAlchemyError as e:
232
- logger.error(f"Database error: {e}")
233
- return f"Database error: {e}"
234
-
235
-
236
- # Gradio interface
237
- with gr.Blocks() as demo:
238
- gr.Markdown("# Website Monitor and Chatbot")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
- with gr.Row():
241
- with gr.Column(): # Side pane for database configuration
242
- db_url = gr.Textbox(label="Database URL",
243
- placeholder="e.g., sqlite:///monitoring.db")
244
- db_connect_button = gr.Button("Connect to Database")
245
- db_status = gr.Textbox(label="Database Status",
246
- interactive=False,
247
- value="Not connected")
248
- db_connect_button.click(create_db_engine,
249
- inputs=db_url,
250
- outputs=db_status)
251
 
252
- with gr.Column(): # Main pane for monitoring and chatbot
253
- with gr.Tab("Configuration"):
254
- target_urls = gr.Textbox(
255
- label="Target URLs (comma-separated)",
256
- placeholder=
257
- "https://example.com, https://another-site.com")
258
- storage_location = gr.Textbox(
259
- label="Storage Location (CSV file path)",
260
- placeholder="/path/to/your/file.csv")
261
- feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
262
- start_button = gr.Button("Start Monitoring")
263
- stop_button = gr.Button("Stop Monitoring")
264
- status_text = gr.Textbox(label="Status", interactive=False)
265
- history_text = gr.Textbox(label="History",
266
- lines=10,
267
- interactive=False)
268
 
269
- with gr.Tab("User-End View"):
270
- feed_content = gr.JSON(label="RSS Feed Content")
271
 
272
- with gr.Tab("Chatbot"):
273
- chatbot_interface = gr.Chatbot(type='messages')
274
- message_input = gr.Textbox(
275
- placeholder="Type your message here...")
276
- send_button = gr.Button("Send")
277
 
278
  if __name__ == "__main__":
279
- loop = asyncio.get_event_loop()
280
- if not loop.is_running():
281
- loop.run_until_complete(periodic_update())
282
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ import asyncio
3
  import gradio as gr
 
 
 
4
  from sqlalchemy.exc import SQLAlchemyError
5
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
6
+ from sqlalchemy.future import select
7
+ from sqlalchemy.orm import sessionmaker
8
+ import logging
9
+ import threading
10
+ import time
11
 
 
 
 
12
  logger = logging.getLogger(__name__)
13
+ logging.basicConfig(level=logging.INFO)
 
 
 
 
 
14
 
15
  # Global variables
16
+ db_session = None
17
+ engine = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Function for dynamically setting the database connection
20
+ async def set_db_connection(host, port, user, password, db_name):
21
+ global db_session, engine
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  try:
23
+ engine = create_async_engine(
24
+ f"mysql+aiomysql://{user}:{password}@{host}:{port}/{db_name}",
25
+ echo=False
26
+ )
27
+ Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
28
+ db_session = Session()
29
+ logger.info("Database connection established.")
30
+ return "Database connection established."
 
 
 
 
31
  except Exception as e:
32
+ logger.error(f"Failed to establish database connection: {e}")
33
+ return f"Failed to connect to database: {e}"
 
 
 
 
 
34
 
35
+ # Function to update database status
36
+ def update_db_status():
37
+ global db_session
38
  try:
39
+ if db_session:
40
+ asyncio.run(db_session.execute(select(1)))
41
+ return "Connected"
42
+ else:
43
+ return "Not connected"
44
+ except SQLAlchemyError:
45
+ return "Disconnected"
46
+
47
+ # Background task to update status
48
+ def background_update(db_status_textbox):
49
+ while True:
50
+ status = update_db_status()
51
+ db_status_textbox.value = status
52
+ logger.info(f"Database status updated: {status}")
53
+ time.sleep(60)
54
+
55
+ # Main application that runs Gradio UI and background tasks
56
+ def main():
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# Website Monitor and Chatbot")
59
+
60
+ with gr.Row():
61
+ with gr.Column():
62
+ gr.Markdown("## Database Settings")
63
+ db_host = gr.Textbox(label="Database Host", placeholder="localhost", value="localhost")
64
+ db_port = gr.Textbox(label="Database Port", placeholder="3306", value="3306")
65
+ db_user = gr.Textbox(label="Database User", placeholder="username", value="")
66
+ db_pass = gr.Textbox(label="Database Password", placeholder="password", type="password", value="")
67
+ db_name = gr.Textbox(label="Database Name", placeholder="database_name", value="monitoring")
68
+
69
+ db_status_textbox = gr.Textbox(label="Database Status", interactive=False)
70
+ status_text = gr.Textbox(label="Status", interactive=False)
71
 
72
+ connect_button = gr.Button("Connect to Database")
 
 
 
 
 
 
 
 
 
 
73
 
74
+ # Connect button click event
75
+ connect_button.click(
76
+ set_db_connection,
77
+ inputs=[db_host, db_port, db_user, db_pass, db_name],
78
+ outputs=[status_text]
79
+ )
 
 
 
 
 
 
 
 
 
 
80
 
81
+ # Start background task to update status
82
+ threading.Thread(target=background_update, args=(db_status_textbox,), daemon=True).start()
83
 
84
+ # Launch the Gradio interface with a timeout
85
+ logger.info("Launching Gradio interface...")
86
+ demo.launch(prevent_thread_lock=True)
87
+ logger.info("Gradio interface launched successfully.")
 
88
 
89
  if __name__ == "__main__":
90
+ main()
91
+ logger.info("Main function completed. App is running.")