Spaces:
Runtime error
Runtime error
acecalisto3
commited on
Commit
•
8bbdb9e
1
Parent(s):
92d8005
Update app.py
Browse files
app.py
CHANGED
@@ -1,282 +1,91 @@
|
|
1 |
-
import csv
|
2 |
-
import logging
|
3 |
-
import os
|
4 |
-
from typing import List, Tuple
|
5 |
-
|
6 |
-
import aiohttp
|
7 |
-
import datetime
|
8 |
-
import difflib
|
9 |
-
import hashlib
|
10 |
-
from pathlib import Path
|
11 |
|
12 |
-
import
|
13 |
import gradio as gr
|
14 |
-
from huggingface_hub import InferenceClient
|
15 |
-
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
|
16 |
-
from sqlalchemy.orm import declarative_base, sessionmaker
|
17 |
from sqlalchemy.exc import SQLAlchemyError
|
18 |
-
import
|
19 |
-
import
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
# Configure logging
|
22 |
-
logging.basicConfig(level=logging.INFO,
|
23 |
-
format='%(asctime)s - %(levelname)s - %(message)s')
|
24 |
logger = logging.getLogger(__name__)
|
25 |
-
|
26 |
-
# Configuration
|
27 |
-
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
|
28 |
-
DEFAULT_MONITORING_INTERVAL = 300
|
29 |
-
MAX_MONITORING_INTERVAL = 600
|
30 |
-
CHANGE_FREQUENCY_THRESHOLD = 3
|
31 |
|
32 |
# Global variables
|
33 |
-
|
34 |
-
|
35 |
-
change_counts = {}
|
36 |
-
history = []
|
37 |
-
engine = None # Initialize the database engine globally
|
38 |
-
|
39 |
-
# Database setup
|
40 |
-
Base = declarative_base()
|
41 |
-
|
42 |
-
def generate_rss_feed():
|
43 |
-
session = Session()
|
44 |
-
try:
|
45 |
-
articles = session.query(Article).order_by(
|
46 |
-
Article.timestamp.desc()).limit(20).all()
|
47 |
-
feed = feedparser.FeedParserDict()
|
48 |
-
feed['title'] = 'Website Changes Feed'
|
49 |
-
feed['link'] = 'http://yourwebsite.com/feed' # Replace if needed
|
50 |
-
feed['description'] = 'Feed of changes detected on monitored websites.'
|
51 |
-
feed['entries'] = [{
|
52 |
-
'title': article.title,
|
53 |
-
'link': article.url,
|
54 |
-
'description': article.content,
|
55 |
-
'published': article.timestamp
|
56 |
-
} for article in articles]
|
57 |
-
return feedparser.FeedGenerator().feed_from_dictionary(
|
58 |
-
feed).writeString('utf-8')
|
59 |
-
except SQLAlchemyError as e:
|
60 |
-
logger.error(f"Database error: {e}")
|
61 |
-
return None
|
62 |
-
finally:
|
63 |
-
session.close()
|
64 |
-
|
65 |
-
async def update_feed_content():
|
66 |
-
return generate_rss_feed()
|
67 |
-
|
68 |
-
# Periodic update function
|
69 |
-
async def periodic_update():
|
70 |
-
while True:
|
71 |
-
await asyncio.sleep(300) # Wait for 5 minutes
|
72 |
-
await update_feed_content()
|
73 |
-
|
74 |
-
async def update_feed_content():
|
75 |
-
return generate_rss_feed()
|
76 |
-
|
77 |
-
def start_periodic_task():
|
78 |
-
loop = asyncio.get_event_loop()
|
79 |
-
if loop.is_running():
|
80 |
-
asyncio.create_task(periodic_update())
|
81 |
-
else:
|
82 |
-
loop.run_until_complete(periodic_update())
|
83 |
-
|
84 |
-
# Start the periodic update task
|
85 |
-
start_periodic_task()
|
86 |
-
|
87 |
-
class Article(Base):
|
88 |
-
__tablename__ = 'articles'
|
89 |
-
id = Column(Integer, primary_key=True)
|
90 |
-
url = Column(String(255), nullable=False)
|
91 |
-
title = Column(String(255))
|
92 |
-
content = Column(Text)
|
93 |
-
hash = Column(String(32))
|
94 |
-
timestamp = Column(DateTime, default=datetime.datetime.utcnow)
|
95 |
-
|
96 |
-
|
97 |
-
# Utility functions
|
98 |
-
def sanitize_url(url: str) -> str:
|
99 |
-
return validators.url(url)
|
100 |
-
|
101 |
-
|
102 |
-
async def fetch_url_content(url: str,
|
103 |
-
session: aiohttp.ClientSession) -> Tuple[str, str]:
|
104 |
-
async with session.get(url) as response:
|
105 |
-
content = await response.text()
|
106 |
-
soup = BeautifulSoup(content, 'html.parser')
|
107 |
-
title = soup.title.string if soup.title else "No Title"
|
108 |
-
return title, content
|
109 |
-
|
110 |
-
|
111 |
-
def calculate_hash(content: str) -> str:
|
112 |
-
return hashlib.md5(content.encode('utf-8')).hexdigest()
|
113 |
-
|
114 |
-
|
115 |
-
async def save_to_database(url: str, title: str, content: str, hash: str):
|
116 |
-
session = Session()
|
117 |
-
try:
|
118 |
-
article = Article(url=url, title=title, content=content, hash=hash)
|
119 |
-
session.add(article)
|
120 |
-
session.commit()
|
121 |
-
except SQLAlchemyError as e:
|
122 |
-
logger.error(f"Database error: {e}")
|
123 |
-
session.rollback()
|
124 |
-
finally:
|
125 |
-
session.close()
|
126 |
|
127 |
-
|
128 |
-
def
|
129 |
-
|
130 |
-
try:
|
131 |
-
with open(storage_location, "a", newline='', encoding="utf-8") as csvfile:
|
132 |
-
csv_writer = csv.writer(csvfile)
|
133 |
-
csv_writer.writerow([
|
134 |
-
timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content
|
135 |
-
])
|
136 |
-
except Exception as e:
|
137 |
-
logger.error(f"Error saving to CSV: {e}")
|
138 |
-
|
139 |
-
|
140 |
-
async def monitor_url(url: str, interval: int, storage_location: str,
|
141 |
-
feed_rss: bool):
|
142 |
-
previous_hash = ""
|
143 |
-
async with aiohttp.ClientSession() as session:
|
144 |
-
while True:
|
145 |
-
try:
|
146 |
-
title, content = await fetch_url_content(url, session)
|
147 |
-
current_hash = calculate_hash(content)
|
148 |
-
|
149 |
-
if current_hash != previous_hash:
|
150 |
-
previous_hash = current_hash
|
151 |
-
timestamp = datetime.datetime.now()
|
152 |
-
|
153 |
-
if feed_rss:
|
154 |
-
await save_to_database(url, title, content,
|
155 |
-
current_hash)
|
156 |
-
|
157 |
-
if storage_location:
|
158 |
-
save_to_csv(storage_location, url, title, content,
|
159 |
-
timestamp)
|
160 |
-
|
161 |
-
history.append(
|
162 |
-
f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
|
163 |
-
)
|
164 |
-
logger.info(f"Change detected at {url}")
|
165 |
-
|
166 |
-
change_counts[url] = change_counts.get(url, 0) + 1
|
167 |
-
if change_counts[url] >= CHANGE_FREQUENCY_THRESHOLD:
|
168 |
-
interval = max(60, interval // 2)
|
169 |
-
else:
|
170 |
-
change_counts[url] = 0
|
171 |
-
interval = min(interval * 2, MAX_MONITORING_INTERVAL)
|
172 |
-
|
173 |
-
url_monitoring_intervals[url] = interval
|
174 |
-
except Exception as e:
|
175 |
-
logger.error(f"Error monitoring {url}: {e}")
|
176 |
-
history.append(f"Error monitoring {url}: {e}")
|
177 |
-
|
178 |
-
await asyncio.sleep(interval)
|
179 |
-
|
180 |
-
async def start_monitoring(urls: List[str], storage_location: str,
|
181 |
-
feed_rss: bool):
|
182 |
-
for url in urls:
|
183 |
-
if url not in monitoring_tasks:
|
184 |
-
sanitized_url = sanitize_url(url)
|
185 |
-
if sanitized_url:
|
186 |
-
task = asyncio.create_task(
|
187 |
-
monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL,
|
188 |
-
storage_location, feed_rss))
|
189 |
-
monitoring_tasks[sanitized_url] = task
|
190 |
-
else:
|
191 |
-
logger.warning(f"Invalid URL: {url}")
|
192 |
-
history.append(f"Invalid URL: {url}")
|
193 |
-
|
194 |
-
|
195 |
-
def stop_monitoring(url: str):
|
196 |
-
if url in monitoring_tasks:
|
197 |
-
monitoring_tasks[url].cancel()
|
198 |
-
del monitoring_tasks[url]
|
199 |
-
|
200 |
-
async def chatbot_response(message: str, history: List[Tuple[str, str]]):
|
201 |
try:
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
"content": response
|
211 |
-
}) # Add assistant response
|
212 |
-
|
213 |
-
return history, history
|
214 |
except Exception as e:
|
215 |
-
logger.error(f"
|
216 |
-
|
217 |
-
history.append({
|
218 |
-
"role": "assistant",
|
219 |
-
"content": "Error: Could not get a response from the chatbot."
|
220 |
-
}) # Add error message
|
221 |
-
return history, history
|
222 |
|
223 |
-
|
224 |
-
def
|
225 |
-
global
|
226 |
try:
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
return
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
-
|
241 |
-
with gr.Column(): # Side pane for database configuration
|
242 |
-
db_url = gr.Textbox(label="Database URL",
|
243 |
-
placeholder="e.g., sqlite:///monitoring.db")
|
244 |
-
db_connect_button = gr.Button("Connect to Database")
|
245 |
-
db_status = gr.Textbox(label="Database Status",
|
246 |
-
interactive=False,
|
247 |
-
value="Not connected")
|
248 |
-
db_connect_button.click(create_db_engine,
|
249 |
-
inputs=db_url,
|
250 |
-
outputs=db_status)
|
251 |
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
storage_location = gr.Textbox(
|
259 |
-
label="Storage Location (CSV file path)",
|
260 |
-
placeholder="/path/to/your/file.csv")
|
261 |
-
feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
|
262 |
-
start_button = gr.Button("Start Monitoring")
|
263 |
-
stop_button = gr.Button("Stop Monitoring")
|
264 |
-
status_text = gr.Textbox(label="Status", interactive=False)
|
265 |
-
history_text = gr.Textbox(label="History",
|
266 |
-
lines=10,
|
267 |
-
interactive=False)
|
268 |
|
269 |
-
|
270 |
-
|
271 |
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
send_button = gr.Button("Send")
|
277 |
|
278 |
if __name__ == "__main__":
|
279 |
-
|
280 |
-
|
281 |
-
loop.run_until_complete(periodic_update())
|
282 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
+
import asyncio
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
from sqlalchemy.exc import SQLAlchemyError
|
5 |
+
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
|
6 |
+
from sqlalchemy.future import select
|
7 |
+
from sqlalchemy.orm import sessionmaker
|
8 |
+
import logging
|
9 |
+
import threading
|
10 |
+
import time
|
11 |
|
|
|
|
|
|
|
12 |
logger = logging.getLogger(__name__)
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Global variables
|
16 |
+
db_session = None
|
17 |
+
engine = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
# Function for dynamically setting the database connection
|
20 |
+
async def set_db_connection(host, port, user, password, db_name):
|
21 |
+
global db_session, engine
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
try:
|
23 |
+
engine = create_async_engine(
|
24 |
+
f"mysql+aiomysql://{user}:{password}@{host}:{port}/{db_name}",
|
25 |
+
echo=False
|
26 |
+
)
|
27 |
+
Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
28 |
+
db_session = Session()
|
29 |
+
logger.info("Database connection established.")
|
30 |
+
return "Database connection established."
|
|
|
|
|
|
|
|
|
31 |
except Exception as e:
|
32 |
+
logger.error(f"Failed to establish database connection: {e}")
|
33 |
+
return f"Failed to connect to database: {e}"
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# Function to update database status
|
36 |
+
def update_db_status():
|
37 |
+
global db_session
|
38 |
try:
|
39 |
+
if db_session:
|
40 |
+
asyncio.run(db_session.execute(select(1)))
|
41 |
+
return "Connected"
|
42 |
+
else:
|
43 |
+
return "Not connected"
|
44 |
+
except SQLAlchemyError:
|
45 |
+
return "Disconnected"
|
46 |
+
|
47 |
+
# Background task to update status
|
48 |
+
def background_update(db_status_textbox):
|
49 |
+
while True:
|
50 |
+
status = update_db_status()
|
51 |
+
db_status_textbox.value = status
|
52 |
+
logger.info(f"Database status updated: {status}")
|
53 |
+
time.sleep(60)
|
54 |
+
|
55 |
+
# Main application that runs Gradio UI and background tasks
|
56 |
+
def main():
|
57 |
+
with gr.Blocks() as demo:
|
58 |
+
gr.Markdown("# Website Monitor and Chatbot")
|
59 |
+
|
60 |
+
with gr.Row():
|
61 |
+
with gr.Column():
|
62 |
+
gr.Markdown("## Database Settings")
|
63 |
+
db_host = gr.Textbox(label="Database Host", placeholder="localhost", value="localhost")
|
64 |
+
db_port = gr.Textbox(label="Database Port", placeholder="3306", value="3306")
|
65 |
+
db_user = gr.Textbox(label="Database User", placeholder="username", value="")
|
66 |
+
db_pass = gr.Textbox(label="Database Password", placeholder="password", type="password", value="")
|
67 |
+
db_name = gr.Textbox(label="Database Name", placeholder="database_name", value="monitoring")
|
68 |
+
|
69 |
+
db_status_textbox = gr.Textbox(label="Database Status", interactive=False)
|
70 |
+
status_text = gr.Textbox(label="Status", interactive=False)
|
71 |
|
72 |
+
connect_button = gr.Button("Connect to Database")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
# Connect button click event
|
75 |
+
connect_button.click(
|
76 |
+
set_db_connection,
|
77 |
+
inputs=[db_host, db_port, db_user, db_pass, db_name],
|
78 |
+
outputs=[status_text]
|
79 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
# Start background task to update status
|
82 |
+
threading.Thread(target=background_update, args=(db_status_textbox,), daemon=True).start()
|
83 |
|
84 |
+
# Launch the Gradio interface with a timeout
|
85 |
+
logger.info("Launching Gradio interface...")
|
86 |
+
demo.launch(prevent_thread_lock=True)
|
87 |
+
logger.info("Gradio interface launched successfully.")
|
|
|
88 |
|
89 |
if __name__ == "__main__":
|
90 |
+
main()
|
91 |
+
logger.info("Main function completed. App is running.")
|
|
|
|