acecalisto3 commited on
Commit
7b31f93
·
verified ·
1 Parent(s): c97adcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -80
app.py CHANGED
@@ -1,39 +1,45 @@
1
- import os
2
  import asyncio
3
  import csv
4
  import logging
 
5
  from typing import List, Tuple
6
- from dotenv import load_dotenv
7
- import aiohttp
8
- import gradio as gr
9
- from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
10
- from sqlalchemy.orm import declarative_base
11
- from sqlalchemy.orm import sessionmaker
12
- from sqlalchemy.exc import SQLAlchemyError
13
 
14
- import hashlib
15
  import datetime
 
 
 
 
16
  import feedparser
 
17
  from huggingface_hub import InferenceClient
 
 
 
18
  import validators
19
 
20
- # Load environment variables
21
- load_dotenv()
22
-
23
  # Configure logging
24
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
25
  logger = logging.getLogger(__name__)
26
 
27
  # Configuration
28
- DB_URL = os.getenv('DB_URL', 'sqlite:///monitoring.db')
29
- HUGGINGFACE_API_KEY = os.getenv('HUGGINGFACE_API_KEY')
30
  DEFAULT_MONITORING_INTERVAL = 300
31
  MAX_MONITORING_INTERVAL = 600
32
  CHANGE_FREQUENCY_THRESHOLD = 3
33
 
 
 
 
 
 
 
 
34
  # Database setup
35
  Base = declarative_base()
36
 
 
37
  class Article(Base):
38
  __tablename__ = 'articles'
39
  id = Column(Integer, primary_key=True)
@@ -43,30 +49,25 @@ class Article(Base):
43
  hash = Column(String(32))
44
  timestamp = Column(DateTime, default=datetime.datetime.utcnow)
45
 
46
- engine = create_engine(DB_URL)
47
- Base.metadata.create_all(engine)
48
- Session = sessionmaker(bind=engine)
49
-
50
- # Global variables
51
- monitoring_tasks = {}
52
- url_monitoring_intervals = {}
53
- change_counts = {}
54
- history = []
55
 
56
  # Utility functions
57
  def sanitize_url(url: str) -> str:
58
  return validators.url(url)
59
 
60
- async def fetch_url_content(url: str, session: aiohttp.ClientSession) -> Tuple[str, str]:
 
 
61
  async with session.get(url) as response:
62
  content = await response.text()
63
  soup = BeautifulSoup(content, 'html.parser')
64
  title = soup.title.string if soup.title else "No Title"
65
  return title, content
66
 
 
67
  def calculate_hash(content: str) -> str:
68
  return hashlib.md5(content.encode('utf-8')).hexdigest()
69
 
 
70
  async def save_to_database(url: str, title: str, content: str, hash: str):
71
  session = Session()
72
  try:
@@ -79,15 +80,21 @@ async def save_to_database(url: str, title: str, content: str, hash: str):
79
  finally:
80
  session.close()
81
 
82
- def save_to_csv(storage_location: str, url: str, title: str, content: str, timestamp: datetime.datetime):
 
 
83
  try:
84
- with open(storage_location, "a", newline='', encoding='utf-8') as csvfile:
85
  csv_writer = csv.writer(csvfile)
86
- csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
 
 
87
  except Exception as e:
88
  logger.error(f"Error saving to CSV: {e}")
89
 
90
- async def monitor_url(url: str, interval: int, storage_location: str, feed_rss: bool):
 
 
91
  previous_hash = ""
92
  async with aiohttp.ClientSession() as session:
93
  while True:
@@ -98,14 +105,18 @@ async def monitor_url(url: str, interval: int, storage_location: str, feed_rss:
98
  if current_hash != previous_hash:
99
  previous_hash = current_hash
100
  timestamp = datetime.datetime.now()
101
-
102
  if feed_rss:
103
- await save_to_database(url, title, content, current_hash)
104
-
 
105
  if storage_location:
106
- save_to_csv(storage_location, url, title, content, timestamp)
107
-
108
- history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
 
 
 
109
  logger.info(f"Change detected at {url}")
110
 
111
  change_counts[url] = change_counts.get(url, 0) + 1
@@ -122,76 +133,135 @@ async def monitor_url(url: str, interval: int, storage_location: str, feed_rss:
122
 
123
  await asyncio.sleep(interval)
124
 
125
- async def start_monitoring(urls: List[str], storage_location: str, feed_rss: bool):
 
 
126
  for url in urls:
127
  if url not in monitoring_tasks:
128
  sanitized_url = sanitize_url(url)
129
  if sanitized_url:
130
- task = asyncio.create_task(monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL, storage_location, feed_rss))
 
 
131
  monitoring_tasks[sanitized_url] = task
132
  else:
133
  logger.warning(f"Invalid URL: {url}")
134
  history.append(f"Invalid URL: {url}")
135
 
 
136
  def stop_monitoring(url: str):
137
  if url in monitoring_tasks:
138
  monitoring_tasks[url].cancel()
139
  del monitoring_tasks[url]
140
 
 
141
  def generate_rss_feed():
142
  session = Session()
143
  try:
144
- articles = session.query(Article).order_by(Article.timestamp.desc()).limit(20).all()
 
145
  feed = feedparser.FeedParserDict()
146
  feed['title'] = 'Website Changes Feed'
147
  feed['link'] = 'http://yourwebsite.com/feed'
148
  feed['description'] = 'Feed of changes detected on monitored websites.'
149
- feed['entries'] = [
150
- {'title': article.title, 'link': article.url, 'description': article.content, 'published': article.timestamp}
151
- for article in articles
152
- ]
153
- return feedparser.FeedGenerator().feed_from_dictionary(feed).writeString('utf-8')
 
 
 
154
  except SQLAlchemyError as e:
155
  logger.error(f"Database error: {e}")
156
  return None
157
  finally:
158
  session.close()
159
 
 
160
  async def chatbot_response(message: str, history: List[Tuple[str, str]]):
161
  try:
162
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
 
163
  response = await client.inference(message)
164
- history.append((message, response))
 
 
 
 
 
 
 
165
  return history, history
166
  except Exception as e:
167
  logger.error(f"Chatbot error: {e}")
168
- history.append((message, "Error: Could not get a response from the chatbot."))
 
 
 
 
169
  return history, history
170
 
171
- # Gradio interface
172
- with gr.Blocks() as demo:
173
- gr.Markdown("# Website Monitor and Chatbot")
174
 
175
- with gr.Tab("Configuration"):
176
- target_urls = gr.Textbox(label="Target URLs (comma-separated)", placeholder="https://example.com, https://another-site.com")
177
- storage_location = gr.Textbox(label="Storage Location (CSV file path)", placeholder="/path/to/your/file.csv")
178
- feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
179
- start_button = gr.Button("Start Monitoring")
180
- stop_button = gr.Button("Stop Monitoring")
181
- status_text = gr.Textbox(label="Status", interactive=False)
182
- history_text = gr.Textbox(label="History", lines=10, interactive=False)
 
 
183
 
184
- with gr.Tab("User-End View"):
185
- feed_content = gr.JSON(label="RSS Feed Content")
186
 
187
- with gr.Tab("Chatbot"):
188
- chatbot_interface = gr.Chatbot(type='messages')
189
- message_input = gr.Textbox(placeholder="Type your message here...")
190
- send_button = gr.Button("Send")
191
 
192
- async def on_start_click(target_urls_str: str, storage_loc: str, feed_enabled: bool):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  urls = [url.strip() for url in target_urls_str.split(",")]
194
- await start_monitoring(urls, storage_loc if storage_loc else None, feed_enabled)
 
195
  return "Monitoring started for valid URLs."
196
 
197
  async def on_stop_click():
@@ -199,23 +269,21 @@ with gr.Blocks() as demo:
199
  stop_monitoring(url)
200
  return "Monitoring stopped for all URLs."
201
 
202
- start_button.click(on_start_click, inputs=[target_urls, storage_location, feed_rss_checkbox], outputs=[status_text])
 
 
 
203
  stop_button.click(on_stop_click, outputs=[status_text])
204
- send_button.click(chatbot_response, inputs=[message_input, chatbot_interface], outputs=[chatbot_interface, chatbot_interface])
 
 
 
205
 
206
- async def periodic_rss_update():
207
- """Update the RSS feed content every 300 seconds."""
208
- while True:
209
- updated_feed = generate_rss_feed()
210
- feed_content.update(value=updated_feed)
211
- await asyncio.sleep(300) # Wait for 300 seconds before updating again
212
 
213
- async def main():
214
- # Launch the periodic RSS update task and the Gradio app together
215
- await asyncio.gather(
216
- periodic_rss_update(),
217
- demo.launch(share=True)
218
- )
219
 
220
  if __name__ == "__main__":
221
- asyncio.run(main())
 
 
1
  import asyncio
2
  import csv
3
  import logging
4
+ import os
5
  from typing import List, Tuple
 
 
 
 
 
 
 
6
 
7
+ import aiohttp
8
  import datetime
9
+ import difflib
10
+ import hashlib
11
+ from pathlib import Path
12
+
13
  import feedparser
14
+ import gradio as gr
15
  from huggingface_hub import InferenceClient
16
+ from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
17
+ from sqlalchemy.orm import declarative_base, sessionmaker
18
+ from sqlalchemy.exc import SQLAlchemyError
19
  import validators
20
 
 
 
 
21
  # Configure logging
22
+ logging.basicConfig(level=logging.INFO,
23
+ format='%(asctime)s - %(levelname)s - %(message)s')
24
  logger = logging.getLogger(__name__)
25
 
26
  # Configuration
27
+ HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
 
28
  DEFAULT_MONITORING_INTERVAL = 300
29
  MAX_MONITORING_INTERVAL = 600
30
  CHANGE_FREQUENCY_THRESHOLD = 3
31
 
32
+ # Global variables
33
+ monitoring_tasks = {}
34
+ url_monitoring_intervals = {}
35
+ change_counts = {}
36
+ history = []
37
+ engine = None # Initialize the database engine globally
38
+
39
  # Database setup
40
  Base = declarative_base()
41
 
42
+
43
  class Article(Base):
44
  __tablename__ = 'articles'
45
  id = Column(Integer, primary_key=True)
 
49
  hash = Column(String(32))
50
  timestamp = Column(DateTime, default=datetime.datetime.utcnow)
51
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Utility functions
54
  def sanitize_url(url: str) -> str:
55
  return validators.url(url)
56
 
57
+
58
+ async def fetch_url_content(url: str,
59
+ session: aiohttp.ClientSession) -> Tuple[str, str]:
60
  async with session.get(url) as response:
61
  content = await response.text()
62
  soup = BeautifulSoup(content, 'html.parser')
63
  title = soup.title.string if soup.title else "No Title"
64
  return title, content
65
 
66
+
67
  def calculate_hash(content: str) -> str:
68
  return hashlib.md5(content.encode('utf-8')).hexdigest()
69
 
70
+
71
  async def save_to_database(url: str, title: str, content: str, hash: str):
72
  session = Session()
73
  try:
 
80
  finally:
81
  session.close()
82
 
83
+
84
+ def save_to_csv(storage_location: str, url: str, title: str, content: str,
85
+ timestamp: datetime.datetime):
86
  try:
87
+ with open(storage_location, "a", newline='', encoding="utf-8") as csvfile:
88
  csv_writer = csv.writer(csvfile)
89
+ csv_writer.writerow([
90
+ timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content
91
+ ])
92
  except Exception as e:
93
  logger.error(f"Error saving to CSV: {e}")
94
 
95
+
96
+ async def monitor_url(url: str, interval: int, storage_location: str,
97
+ feed_rss: bool):
98
  previous_hash = ""
99
  async with aiohttp.ClientSession() as session:
100
  while True:
 
105
  if current_hash != previous_hash:
106
  previous_hash = current_hash
107
  timestamp = datetime.datetime.now()
108
+
109
  if feed_rss:
110
+ await save_to_database(url, title, content,
111
+ current_hash)
112
+
113
  if storage_location:
114
+ save_to_csv(storage_location, url, title, content,
115
+ timestamp)
116
+
117
+ history.append(
118
+ f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
119
+ )
120
  logger.info(f"Change detected at {url}")
121
 
122
  change_counts[url] = change_counts.get(url, 0) + 1
 
133
 
134
  await asyncio.sleep(interval)
135
 
136
+
137
+ async def start_monitoring(urls: List[str], storage_location: str,
138
+ feed_rss: bool):
139
  for url in urls:
140
  if url not in monitoring_tasks:
141
  sanitized_url = sanitize_url(url)
142
  if sanitized_url:
143
+ task = asyncio.create_task(
144
+ monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL,
145
+ storage_location, feed_rss))
146
  monitoring_tasks[sanitized_url] = task
147
  else:
148
  logger.warning(f"Invalid URL: {url}")
149
  history.append(f"Invalid URL: {url}")
150
 
151
+
152
  def stop_monitoring(url: str):
153
  if url in monitoring_tasks:
154
  monitoring_tasks[url].cancel()
155
  del monitoring_tasks[url]
156
 
157
+
158
  def generate_rss_feed():
159
  session = Session()
160
  try:
161
+ articles = session.query(Article).order_by(
162
+ Article.timestamp.desc()).limit(20).all()
163
  feed = feedparser.FeedParserDict()
164
  feed['title'] = 'Website Changes Feed'
165
  feed['link'] = 'http://yourwebsite.com/feed'
166
  feed['description'] = 'Feed of changes detected on monitored websites.'
167
+ feed['entries'] = [{
168
+ 'title': article.title,
169
+ 'link': article.url,
170
+ 'description': article.content,
171
+ 'published': article.timestamp
172
+ } for article in articles]
173
+ return feedparser.FeedGenerator().feed_from_dictionary(
174
+ feed).writeString('utf-8')
175
  except SQLAlchemyError as e:
176
  logger.error(f"Database error: {e}")
177
  return None
178
  finally:
179
  session.close()
180
 
181
+
182
  async def chatbot_response(message: str, history: List[Tuple[str, str]]):
183
  try:
184
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",
185
+ token=HUGGINGFACE_API_KEY)
186
  response = await client.inference(message)
187
+
188
+ # Format the response as a dictionary
189
+ history.append({"role": "user", "content": message}) # Add user message
190
+ history.append({
191
+ "role": "assistant",
192
+ "content": response
193
+ }) # Add assistant response
194
+
195
  return history, history
196
  except Exception as e:
197
  logger.error(f"Chatbot error: {e}")
198
+ history.append({"role": "user", "content": message}) # Add user message
199
+ history.append({
200
+ "role": "assistant",
201
+ "content": "Error: Could not get a response from the chatbot."
202
+ }) # Add error message
203
  return history, history
204
 
 
 
 
205
 
206
+ def create_db_engine(db_url):
207
+ global engine, Base, Session
208
+ try:
209
+ engine = create_engine(db_url)
210
+ Base.metadata.create_all(engine)
211
+ Session = sessionmaker(bind=engine)
212
+ return "Database connected successfully!"
213
+ except SQLAlchemyError as e:
214
+ logger.error(f"Database error: {e}")
215
+ return f"Database error: {e}"
216
 
 
 
217
 
218
+ # Gradio interface
219
+ with gr.Blocks() as demo:
220
+ gr.Markdown("# Website Monitor and Chatbot")
 
221
 
222
+ with gr.Row():
223
+ with gr.Column(): # Side pane for database configuration
224
+ db_url = gr.Textbox(label="Database URL",
225
+ placeholder="e.g., sqlite:///monitoring.db")
226
+ db_connect_button = gr.Button("Connect to Database")
227
+ db_status = gr.Textbox(label="Database Status",
228
+ interactive=False,
229
+ value="Not connected")
230
+ db_connect_button.click(create_db_engine,
231
+ inputs=db_url,
232
+ outputs=db_status)
233
+
234
+ with gr.Column(): # Main pane for monitoring and chatbot
235
+ with gr.Tab("Configuration"):
236
+ target_urls = gr.Textbox(
237
+ label="Target URLs (comma-separated)",
238
+ placeholder=
239
+ "https://example.com, https://another-site.com")
240
+ storage_location = gr.Textbox(
241
+ label="Storage Location (CSV file path)",
242
+ placeholder="/path/to/your/file.csv")
243
+ feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
244
+ start_button = gr.Button("Start Monitoring")
245
+ stop_button = gr.Button("Stop Monitoring")
246
+ status_text = gr.Textbox(label="Status", interactive=False)
247
+ history_text = gr.Textbox(label="History",
248
+ lines=10,
249
+ interactive=False)
250
+
251
+ with gr.Tab("User-End View"):
252
+ feed_content = gr.JSON(label="RSS Feed Content")
253
+
254
+ with gr.Tab("Chatbot"):
255
+ chatbot_interface = gr.Chatbot(type='messages')
256
+ message_input = gr.Textbox(
257
+ placeholder="Type your message here...")
258
+ send_button = gr.Button("Send")
259
+
260
+ async def on_start_click(target_urls_str: str, storage_loc: str,
261
+ feed_enabled: bool):
262
  urls = [url.strip() for url in target_urls_str.split(",")]
263
+ await start_monitoring(urls, storage_loc if storage_loc else None,
264
+ feed_enabled)
265
  return "Monitoring started for valid URLs."
266
 
267
  async def on_stop_click():
 
269
  stop_monitoring(url)
270
  return "Monitoring stopped for all URLs."
271
 
272
+ start_button.click(
273
+ on_start_click,
274
+ inputs=[target_urls, storage_location, feed_rss_checkbox],
275
+ outputs=[status_text])
276
  stop_button.click(on_stop_click, outputs=[status_text])
277
+ send_button.click(
278
+ chatbot_response,
279
+ inputs=[message_input, chatbot_interface],
280
+ outputs=[chatbot_interface, chatbot_interface])
281
 
282
+ async def update_feed_content():
283
+ return generate_rss_feed()
 
 
 
 
284
 
285
+ feed_updater = gr.Timer(every=300)
286
+ feed_updater.tick(fn=update_feed_content, outputs=feed_content)
 
 
 
 
287
 
288
  if __name__ == "__main__":
289
+ demo.launch()