File size: 11,439 Bytes
6bf8a84
 
 
 
 
 
34bc10c
 
6bf8a84
 
 
 
34bc10c
6bf8a84
 
 
 
34bc10c
 
 
 
 
 
 
 
6bf8a84
 
 
 
 
 
 
34bc10c
 
6bf8a84
 
34bc10c
6bf8a84
 
34bc10c
 
6bf8a84
34bc10c
 
6bf8a84
 
34bc10c
6bf8a84
34bc10c
 
 
 
 
6bf8a84
34bc10c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
34bc10c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
34bc10c
 
 
 
 
 
 
6bf8a84
 
34bc10c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
 
 
 
 
 
 
 
 
 
 
 
 
34bc10c
 
 
 
 
 
 
6bf8a84
34bc10c
6bf8a84
34bc10c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
 
34bc10c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
 
 
 
34bc10c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
34bc10c
 
 
 
 
 
 
 
 
 
 
 
6bf8a84
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import datetime
import os
import csv
import time
import hashlib
import logging
import threading
from pathlib import Path
import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from huggingface_hub import InferenceClient

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("monitoring.log"),
        logging.StreamHandler()
    ]
)

# Define constants
PREFIX = "Task started at {date_time_str}. Purpose: {purpose}"
TASK_PROMPT = "Current task: {task}. History:\n{history}"

# Define purpose
purpose = """
You monitor Culvers sites continuously, seeking changes since your last observation.
Any new changes are logged and dumped into a CSV, stored in your log folder at user/app/scraped_data.
"""

# Initialize history and task variables
history = []
current_task = None
monitoring_thread = None
stop_event = threading.Event()

# Default file path using pathlib for cross-platform compatibility
default_file_path = Path("user/app/scraped_data/culver/culvers_changes.csv")

# Ensure the directory exists
default_file_path.parent.mkdir(parents=True, exist_ok=True)

def monitor_urls(storage_location, urls, scrape_interval, content_type, stop_event):
    """
    Monitor the given URLs for changes and log them into a CSV file.
    Runs in a separate thread.
    """
    global history
    previous_hashes = [""] * len(urls)
    storage_path = Path(storage_location)
    
    # Initialize CSV file: write header if file doesn't exist
    if not storage_path.exists():
        with storage_path.open("w", newline='', encoding='utf-8') as csvfile:
            csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
            csv_toolkit.writeheader()
    
    options = Options()
    options.headless = True
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    
    try:
        while not stop_event.is_set():
            for i, url in enumerate(urls):
                try:
                    driver.get(url)
                    time.sleep(2)  # Wait for the page to load
                    if content_type == "text":
                        current_content = driver.page_source
                    elif content_type == "media":
                        images = driver.find_elements(By.TAG_NAME, "img")
                        current_content = ''.join([img.get_attribute('src') for img in images])
                    elif content_type == "both":
                        images = driver.find_elements(By.TAG_NAME, "img")
                        current_content = driver.page_source + ''.join([img.get_attribute('src') for img in images])
                    else:
                        current_content = driver.page_source
                    
                    current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
                    
                    if current_hash != previous_hashes[i]:
                        previous_hashes[i] = current_hash
                        date_time = datetime.datetime.now()
                        date_time_str = date_time.strftime("%Y-%m-%d %H:%M:%S")
                        history_entry = f"Change detected at {url} on {date_time_str}"
                        history.append(history_entry)
                        with storage_path.open("a", newline='', encoding='utf-8') as csvfile:
                            csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
                            csv_toolkit.writerow({
                                "date": date_time.strftime("%Y-%m-%d"),
                                "time": date_time.strftime("%H:%M:%S"),
                                "url": url,
                                "change": "Content changed"
                            })
                        logging.info(history_entry)
                except Exception as e:
                    logging.error(f"Error accessing {url}: {e}")
            # Sleep in smaller intervals to allow quicker shutdown
            for _ in range(scrape_interval * 60):
                if stop_event.is_set():
                    break
                time.sleep(1)
    except Exception as e:
        logging.error(f"Unexpected error in monitoring thread: {e}")
    finally:
        driver.quit()
        logging.info("Monitoring thread has been stopped.")

def start_monitoring(storage_location, url1, url2, scrape_interval, content_type):
    global current_task, monitoring_thread, stop_event, history
    
    if monitoring_thread and monitoring_thread.is_alive():
        return "Monitoring is already running.", history
    
    history = []
    current_task = f"Monitoring URLs: {url1}, {url2}"
    history.append(f"Task started: {current_task}")
    logging.info(current_task)
    
    stop_event.clear()
    urls = [url1, url2]
    monitoring_thread = threading.Thread(
        target=monitor_urls,
        args=(storage_location, urls, scrape_interval, content_type, stop_event),
        daemon=True
    )
    monitoring_thread.start()
    return "Monitoring started.", history

def stop_monitoring():
    global current_task, monitoring_thread, stop_event, history
    
    if monitoring_thread and monitoring_thread.is_alive():
        stop_event.set()
        monitoring_thread.join()
        history.append("Monitoring stopped by user.")
        logging.info("Monitoring stopped by user.")
        current_task = None
        return "Monitoring stopped.", history
    else:
        return "No monitoring task is currently running.", history

# Define the chat response function
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]
    
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": message})
    
    response = ""
    
    try:
        for msg in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = msg.choices[0].delta.get("content", "")
            response += token
            yield response
    except Exception as e:
        logging.error(f"Error in chatbot response: {e}")
        yield "An error occurred while generating the response."

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Culvers Site Monitor and Chatbot")
    gr.Markdown(
        "Monitor changes on Culvers' websites and log them into a CSV file. "
        "Also, chat with a friendly chatbot."
    )
    
    with gr.Tab("Monitor"):
        with gr.Row():
            storage_location = gr.Textbox(
                value=str(default_file_path),
                label="Storage Location",
                placeholder="Path to CSV file where changes will be logged"
            )
        with gr.Row():
            url1 = gr.Textbox(
                value="https://www.culver.k12.in.us/",
                label="URL 1",
                placeholder="First URL to monitor"
            )
            url2 = gr.Textbox(
                value="https://www.facebook.com/CulverCommunitySchools",
                label="URL 2",
                placeholder="Second URL to monitor"
            )
        with gr.Row():
            scrape_interval = gr.Slider(
                minimum=1,
                maximum=60,
                value=5,
                step=1,
                label="Scrape Interval (minutes)"
            )
            content_type = gr.Radio(
                choices=["text", "media", "both"],
                value="text",
                label="Content Type"
            )
        with gr.Row():
            start_button = gr.Button("Start Monitoring")
            stop_button = gr.Button("Stop Monitoring")
        with gr.Row():
            monitoring_status = gr.Textbox(
                value="No active monitoring.",
                label="Monitoring Status",
                interactive=False
            )
        with gr.Row():
            monitoring_history = gr.Textbox(
                value="",
                label="Monitoring History",
                lines=10,
                interactive=False
            )
    
    with gr.Tab("Chatbot"):
        chatbot = gr.Chatbot(label="Chat with the Assistant")
        with gr.Row():
            system_message = gr.Textbox(
                value="You are a friendly Chatbot.",
                label="System Message",
                visible=False
            )
        with gr.Row():
            user_input = gr.Textbox(
                label="You:",
                placeholder="Type your message here..."
            )
            submit_button = gr.Button("Send")
        # Parameters
        max_tokens = gr.Slider(
            minimum=1,
            maximum=2048,
            value=512,
            step=1,
            label="Max new tokens"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)"
        )
    
    # Define interactions
    def update_monitoring_history(message, history_text):
        return history_text + message + "\n"
    
    start_button.click(
        fn=start_monitoring,
        inputs=[storage_location, url1, url2, scrape_interval, content_type],
        outputs=[monitoring_status, monitoring_history],
        queue=False
    )
    
    stop_button.click(
        fn=stop_monitoring,
        inputs=None,
        outputs=[monitoring_status, monitoring_history],
        queue=False
    )
    
    def display_history(status, hist):
        return status, "\n".join(hist)
    
    # Update monitoring_status and monitoring_history periodically
    def refresh_monitoring(status, hist):
        return status, "\n".join(hist)
    
    user_input.submit(
        lambda msg, hist, sys, max_t, temp, tp: (
            gr.update(value=hist + [(msg, "")]),
            respond(msg, hist, sys, max_t, temp, tp)
        ),
        inputs=[user_input, chatbot, system_message, max_tokens, temperature, top_p],
        outputs=[chatbot, chatbot]
    )
    
    submit_button.click(
        lambda msg, hist, sys, max_t, temp, tp: (
            gr.update(value=hist + [(msg, "")]),
            respond(msg, hist, sys, max_t, temp, tp)
        ),
        inputs=[user_input, chatbot, system_message, max_tokens, temperature, top_p],
        outputs=[chatbot, chatbot]
    )

if __name__ == "__main__":
    demo.launch()