Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

App Files Files Community

acecalisto3 commited on Oct 23, 2024

Commit

885ce0d

verified ·

1 Parent(s): d4febba

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -133

app.py CHANGED Viewed

@@ -1,150 +1,160 @@
 import datetime
 import os
-import csv
-import time
-import hashlib
 import logging
-import gradio as gr
-from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.chrome.options import Options
-from webdriver_manager.chrome import ChromeDriverManager
-from huggingface_hub import InferenceClient
-import random
-import yaml
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Define constants
-DATE_TIME_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-PURPOSE = f"You go to Culvers sites, you continuously seek changes on them since your last observation. Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data."
-HISTORY = []
-CURRENT_TASK = None
-DEFAULT_FILE_PATH = "user/app/scraped_data/culver/culvers_changes.csv"
 # Ensure the directory exists
-os.makedirs(os.path.dirname(DEFAULT_FILE_PATH), exist_ok=True)
-# Function to monitor URLs for changes
-def monitor_urls(storage_location, urls, scrape_interval, content_type):
-    global HISTORY
-    previous_hashes = [""] * len(urls)
     try:
-        with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=Options()) as driver:
-            while True:
-                for i, url in enumerate(urls):
-                    try:
-                        driver.get(url)
-                        time.sleep(2)  # Wait for the page to load
-                        if content_type == "text":
-                            current_content = driver.page_source
-                        elif content_type == "media":
-                            current_content = driver.find_elements_by_tag_name("img")
-                        else:
-                            current_content = driver.page_source
-                        current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
-                        if current_hash != previous_hashes[i]:
-                            previous_hashes[i] = current_hash
-                            date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-                            HISTORY.append(f"Change detected at {url} on {date_time_str}")
-                            with open(storage_location, "a", newline="") as csvfile:
-                                csv_writer = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
-                                csv_writer.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
-                                logging.info(f"Change detected at {url} on {date_time_str}")
-                    except Exception as e:
-                        logging.error(f"Error accessing {url}: {e}")
-                time.sleep(scrape_interval * 60)  # Check every scrape_interval minutes
     except Exception as e:
-        logging.error(f"Error starting ChromeDriver: {e}")
-# Define main function to handle user input
-def handle_input(storage_location, urls, scrape_interval, content_type):
-    global CURRENT_TASK, HISTORY
-    CURRENT_TASK = f"Monitoring URLs: {', '.join(urls)}"
-    HISTORY.append(f"Task started: {CURRENT_TASK}")
-    monitor_urls(storage_location, urls, scrape_interval, content_type)
-    return TASK_PROMPT.format(task=CURRENT_TASK, history="\n".join(map(str, HISTORY)))
-# Load custom prompts
-try:
-    with open("custom_prompts.yaml", "r") as fp:
-        custom_prompts = yaml.safe_load(fp)
-except FileNotFoundError:
-    custom_prompts = {"WEB_DEV": "", "AI_SYSTEM_PROMPT": "", "PYTHON_CODE_DEV": "", "CODE_GENERATION": "", "CODE_INTERPRETATION": "", "CODE_TRANSLATION": "", "CODE_IMPLEMENTATION": ""}
-# Define agents
-AGENTS = ["WEB_DEV", "AI_SYSTEM_PROMPT", "PYTHON_CODE_DEV", "CODE_GENERATION", "CODE_INTERPRETATION", "CODE_TRANSLATION", "CODE_IMPLEMENTATION"]
-# Define the Mistral inference client
-client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# Define the chat response function
-def respond(message, history, system_message, max_tokens, temperature, top_p):
-    return generate(message, history, system_message, max_tokens, temperature, top_p)
-def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
-    urls = [url for url in [url1, url2, url3, url4, url5, url6, url7, url8, url9, url10] if url]
-    handle_input(storage_location, urls, scrape_interval, content_type)
-    # Start transaction
-    inspector.start_transaction('start_scraping')
-    # Scrape data
-    while True:
-        # Check for scrape_interval
-        time.sleep(scrape_interval * 60)  # Check every scrape_interval minutes
-    # End transaction
-    inspector.end_transaction()
-    return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
-# Function to display CSV content
-def display_csv(storage_location):
-    if os.path.exists(storage_location):
-        with open(storage_location, "r") as file:
-            return file.read()
-    else:
-        return "No data available."
-# Create Gradio interface
-def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
-    global HISTORY
-    response = respond(message, HISTORY, system_message, max_tokens, temperature, top_p)
-    HISTORY.append((message, response))
-    return HISTORY, ""
-demo = gr.Blocks()
-with demo:
-    with gr.Row():
-        with gr.Column():
-            message = gr.Textbox(label="Message")
-            system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
-            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
-            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-            storage_location = gr.Textbox(value=DEFAULT_FILE_PATH, label="Storage Location")
-            url1 = gr.Textbox(value="https://www.culver.k12.in/", label="URL 1")
-            url2 = gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2")
-            url3 = gr.Textbox(label="URL 3")
-            url4 = gr.Textbox(label="URL 4")
-            url5 = gr.Textbox(label="URL 5")
-            url6 = gr.Textbox(label="URL 6")
-            url7 = gr.Textbox(label="URL 7")
-            url8 = gr.Textbox(label="URL 8")
-            url9 = gr.Textbox(label="URL 9")
-            url10 = gr.Textbox(label="URL 10")
-            scrape_interval = gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)")
-            content_type = gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type")
-            start_button = gr.Button("Start Scraping")
-            csv_output = gr.Textbox(label="CSV Output", interactive=False)
-        with gr.Column():
-            chat_history = gr.Chatbot(label="Chat History")
-            response_box = gr.Textbox(label="Response")
-    start_button.click(start_scraping, inputs=[storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type], outputs=csv_output)
-    message.submit(chat_interface, inputs=[message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type], outputs=[chat_history, response_box])
 if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+import pandas as pd
+import sqlite3
+from feedgen.feed import FeedGenerator
 import datetime
 import os
 import logging
+import sys
+import csv
+import traceback
+sys.path.append('/home/user')
+from app.background_tasks import start_background_monitoring, create_database
+# Set up absolute paths
+BASE_DIR = '/home/user/app/scraped_data/culver'
+LOG_FILE = os.path.join(BASE_DIR, 'main.log')
+CSV_FILE = os.path.join(BASE_DIR, 'culvers_changes.csv')
+DB_FILE = os.path.join(BASE_DIR, 'culvers_changes.db')
+XML_FILE = os.path.join(BASE_DIR, 'culvers_changes.xml')
 # Ensure the directory exists
+try:
+    os.makedirs(BASE_DIR, exist_ok=True)
+    print(f"Directory created or already exists: {BASE_DIR}")
+except Exception as e:
+    print(f"Error creating directory: {e}")
+    traceback.print_exc()
+# Configure logging
+try:
+    logging.basicConfig(filename=LOG_FILE, level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    print(f"Logging configured. Log file: {LOG_FILE}")
+except Exception as e:
+    print(f"Error configuring logging: {e}")
+    traceback.print_exc()
+# Write directly to log file
+try:
+    with open(LOG_FILE, 'w') as log_file:
+        log_file.write(f"Log file created at {datetime.datetime.now()}\n")
+    print(f"Log file created: {LOG_FILE}")
+except Exception as e:
+    print(f"Error writing to log file: {e}")
+    traceback.print_exc()
+# Write directly to CSV file
+try:
+    with open(CSV_FILE, 'w', newline='') as csv_file:
+        writer = csv.writer(csv_file)
+        writer.writerow(['date', 'time', 'url', 'change'])
+        writer.writerow([datetime.datetime.now().strftime("%Y-%m-%d"), datetime.datetime.now().strftime("%H:%M:%S"), 'Initial', 'CSV file created'])
+    print(f"CSV file created: {CSV_FILE}")
+except Exception as e:
+    print(f"Error writing to CSV file: {e}")
+    traceback.print_exc()
+# Start background monitoring
+urls = ["https://www.culver.k12.in.us/", "https://www.facebook.com/CulverCommunitySchools"]
+try:
+    start_background_monitoring(CSV_FILE, urls, 1, "text")  # Changed interval to 1 minute for testing
+    print("Background monitoring started")
+except Exception as e:
+    print(f"Error starting background monitoring: {e}")
+    traceback.print_exc()
+logging.info("Background monitoring initiated from main.py")
+def view_scraped_data():
     try:
+        create_database()  # Ensure the database and table exist
+        conn = sqlite3.connect(DB_FILE)
+        df = pd.read_sql_query("SELECT * FROM changes ORDER BY date DESC, time DESC LIMIT 50", conn)
+        conn.close()
+        return df
     except Exception as e:
+        print(f"Error viewing scraped data: {e}")
+        traceback.print_exc()
+        return pd.DataFrame()
+def view_rss_feed():
+    try:
+        with open(XML_FILE, 'r') as file:
+            return file.read()
+    except FileNotFoundError:
+        return "RSS feed not generated yet."
+    except Exception as e:
+        print(f"Error viewing RSS feed: {e}")
+        traceback.print_exc()
+        return "Error viewing RSS feed"
+def generate_rss_feed():
+    try:
+        create_database()  # Ensure the database and table exist
+        fg = FeedGenerator()
+        fg.title('Culvers Site Changes')
+        fg.link(href='http://example.com', rel='alternate')
+        fg.description('Recent changes detected on Culvers websites')
+        conn = sqlite3.connect(DB_FILE)
+        c = conn.cursor()
+        c.execute("SELECT * FROM changes ORDER BY date DESC, time DESC LIMIT 20")
+        changes = c.fetchall()
+        for change in changes:
+            fe = fg.add_entry()
+            fe.id(str(change[0]))
+            fe.title(f'Change detected at {change[3]}')
+            fe.link(href=change[3])
+            fe.description(change[4])
+            fe.pubDate(datetime.datetime.strptime(f"{change[1]} {change[2]}", "%Y-%m-%d %H:%M:%S"))
+        conn.close()
+        fg.rss_file(XML_FILE)
+        return "RSS feed generated successfully."
+    except Exception as e:
+        print(f"Error generating RSS feed: {e}")
+        traceback.print_exc()
+        return "Error generating RSS feed"
+def create_viewer():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Culvers Site Monitor and Viewer")
+        with gr.Tab("Monitor Status"):
+            gr.Markdown("Continuous monitoring is active for the following URLs:")
+            for url in urls:
+                gr.Markdown(f"- {url}")
+            gr.Markdown(f"Monitoring interval: 1 minute")
+            gr.Markdown(f"Data is being stored in: {CSV_FILE}")
+        with gr.Tab("View Scraped Data"):
+            gr.DataFrame(view_scraped_data, label="Recent Changes")
+        with gr.Tab("View RSS Feed"):
+            gr.TextArea(view_rss_feed, label="RSS Feed Content")
+            gr.Button("Generate RSS Feed").click(generate_rss_feed, outputs=gr.TextArea(label="Generation Status"))
+    return demo
 if __name__ == "__main__":
+    try:
+        # Create the database and table before launching the viewer
+        create_database()
+        print("Database created")
+        # Create and launch the viewer
+        viewer = create_viewer()
+        print("Viewer created")
+        viewer.launch()
+        print("Viewer launched")
+        logging.info("Web-based viewer created and launched with continuous monitoring.")
+    except Exception as e:
+        print(f"Error in main execution: {e}")
+        traceback.print_exc()
+print("Main application file updated with error handling, console logging, and all necessary functions.")