import time
import csv
import os
import hashlib
import re
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from inspector import Configuration, Inspector

config = Configuration('5713ec1deb658fd2e6c069ce313ddaa34e2feee3')
inspector = Inspector(config)
inspector.start_transaction('my python script')

def handle_input(input):
    global CURRENT_INPUT
    CURRENT_INPUT = input

def handle_output(output):
    global OUTPUT_HISTORY
    OUTPUT_HISTORY.append((CURRENT_INPUT, output))

def handle_system(output):
    global SYSTEM_OUTPUT
    SYSTEM_OUTPUT = output

def handle_error(error):
    global ERROR_HISTORY
    ERROR_HISTORY.append((CURRENT_INPUT, error))

def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
    urls = [url for url in [url1, url2, url3, url4, url5, url6, url7, url8, url9, url10] if url]
    handle_input(f"Start scraping {', '.join(urls)} every {scrape_interval} minutes.")
    csv_file_path = f"{storage_location}/scraped_data.csv"
    csv_fieldnames = ["date", "time", "url", "change"]

    # Create the CSV file if it does not exist
    if not os.path.exists(csv_file_path):
        with open(csv_file_path, 'w', newline='') as csvfile:
            csv_writer = csv.DictWriter(csvfile, fieldnames=csv_fieldnames)
            csv_writer.writeheader()

    while True:
        # Check for scrape_interval
        time.sleep(scrape_interval * 60)  # Check every scrape_interval minutes
        # Scrape data
        for url in urls:
            # Initialize Chrome webdriver
            options = Options()
            options.headless = True
            driver = webdriver.Chrome(executable_path='/path/to/chromedriver', options=options)
            driver.maximize_window()
            driver.set_window_size(1920, 1080)
            driver.implicitly_wait(10)
            driver.get(url)

            # Wait for page to load
            wait = WebDriverWait(driver, 10)
            wait.until(EC.title_is('Culver Community Schools'))

            # Scrape data
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            content = None
            if content_type == 'text':
                content = soup.get_text()
            elif content_type == 'media':
                content = [img['src'] for img in soup.find_all('img')]
            else:
                raise Exception('Invalid content type')

            # Calculate hash of the content
            content_hash = hashlib.md5(str(content).encode('utf-8')).hexdigest()

            # Check if the content has changed
            with open(csv_file_path, 'r', newline='') as csvfile:
                csv_reader = csv.DictReader(csvfile)
                rows = list(csv_reader)
                if rows:
                    last_row = rows[-1]
                    if last_row['url'] == url and last_row['change'] == content_hash:
                        print(f"No changes detected on {url}")
                        continue

            # Save data to CSV file
            with open(csv_file_path, 'a', newline='') as csvfile:
                csv_writer = csv.DictWriter(csvfile, fieldnames=csv_fieldnames)
                csv_writer.writerow({
                    "date": datetime.datetime.now().strftime("%Y-%m-%d"),
                    "time": datetime.datetime.now().strftime("%H:%M:%S"),
                    "url": url,
                    "change": content_hash
                })

            # Save data to file
            with open(f"{storage_location}/{url.split('/')[-2]}/{url.split('/')[-1]}_scrape.{content_type}", 'w') as f:
                if content_type == 'text':
                    f.write(content)
                elif content_type == 'media':
                    for img in content:
                        response = requests.get(img)
                        with open(f"{storage_location}/{url.split('/')[-2]}/{url.split('/')[-1]}_scrape/{hashlib.md5(response.content).hexdigest()[:10]}.jpg", 'wb') as f:
                            f.write(response.content)
                else:
                    raise Exception('Invalid content type')

            handle_output(f"Scraped {url} and saved data to {csv_file_path}")
            handle_output(f"Scraped {url} and saved data to {storage_location}/{url.split('/')[-2]}/{url.split('/')[-1]}_scrape.{content_type}")
            inspector.end_transaction()

    # Handle errors
    for error in ERROR_HISTORY:
        handle_error(error)

    # Return scraping status
    handle_output(f"Scraping {', '.join(urls)} every {scrape_interval} minutes.")

def handle_system():
    handle_output(f"System: {SYSTEM_OUTPUT}")

def handle_ui(ui):
    # Start scraping
    urls = ['https://www.culver.org/', 'https://www.culver.org/about-us/', 'https://www.culver.org/academics/', 'https://www.culver.org/athletics/', 'https://www.culver.org/arts-and-humanities/', 'https://www.culver.org/fine-and-performing-arts/', 'https://www.culver.org/clubs/', 'https://www.culver.org/community-education/', 'https://www.culver.org/community-outreach/']
    scrape_interval = 5  # Define the scrape interval
    content_type = 'text'  # Define the content type
    start_scraping('scrape_data', *urls, scrape_interval, content_type)

if __name__ == '__main__':
    # Read input
    input = "Start scraping https://www.culver.org/ and save data to scrape_data directory."
    # Call functions
    handle_input(input)
    handle_system()
    # Run system
    handle_ui()