Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

App Files Files Community

acecalisto3 commited on Sep 30, 2024

Commit

da20049

verified ·

1 Parent(s): 4e5b9ff

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -147

app.py CHANGED Viewed

@@ -17,169 +17,73 @@ import yaml
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Define constants
-PREFIX = "Task started at {date_time_str}. Purpose: {purpose}"
-TASK_PROMPT = "Current task: {task}. History:\n{history}"
-# Define current date/time
-date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-# Define purpose
-purpose = """
-You go to Culvers sites, you continuously seek changes on them since your last observation.
-Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data.
-"""
-# Define history
-history = []
-# Define current task
-current_task = None
-# Default file path
-default_file_path = "user/app/scraped_data/culver/culvers_changes.csv"
 # Ensure the directory exists
-os.makedirs(os.path.dirname(default_file_path), exist_ok=True)
 # Function to monitor URLs for changes
 def monitor_urls(storage_location, urls, scrape_interval, content_type):
-    global history
     previous_hashes = [""] * len(urls)
-    # Ensure the directory exists
-    os.makedirs(os.path.dirname(storage_location), exist_ok=True)
-    with open(storage_location, "w", newline='') as csvfile:
-        csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
-        csv_toolkit.writeheader()
-        options = Options()
-        options.headless = True
-        options.add_argument("--disable-gpu")
-        options.add_argument("--no-sandbox")
-        options.add_argument("--disable-dev-shm-usage")
-        try:
-            with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
-                while True:
-                    for i, url in enumerate(urls):
-                        try:
-                            driver.get(url)
-                            time.sleep(2)  # Wait for the page to load
-                            if content_type == "text":
-                                current_content = driver.page_source
-                            elif content_type == "media":
-                                current_content = driver.find_elements_by_tag_name("img")
-                            else:
-                                current_content = driver.page_source
-                            current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
-                            if current_hash != previous_hashes[i]:
-                                previous_hashes[i] = current_hash
-                                date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-                                history.append(f"Change detected at {url} on {date_time_str}")
-                                csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
                                 logging.info(f"Change detected at {url} on {date_time_str}")
-                        except Exception as e:
-                            logging.error(f"Error accessing {url}: {e}")
-                    time.sleep(scrape_interval * 60)  # Check every scrape_interval minutes
-        except Exception as e:
-            logging.error(f"Error starting ChromeDriver: {e}")
 # Define main function to handle user input
 def handle_input(storage_location, urls, scrape_interval, content_type):
-    global current_task, history
-    current_task = f"Monitoring URLs: {', '.join(urls)}"
-    history.append(f"Task started: {current_task}")
     monitor_urls(storage_location, urls, scrape_interval, content_type)
-    return TASK_PROMPT.format(task=current_task, history="\n".join(map(str, history)))
 # Load custom prompts
 try:
-    with open('custom_prompts.yaml', 'r') as fp:
         custom_prompts = yaml.safe_load(fp)
 except FileNotFoundError:
-    custom_prompts = {
-        "WEB_DEV": "",
-        "AI_SYSTEM_PROMPT": "",
-        "PYTHON_CODE_DEV": "",
-        "CODE_GENERATION": "",
-        "CODE_INTERPRETATION": "",
-        "CODE_TRANSLATION": "",
-        "CODE_IMPLEMENTATION": ""
-    }
 # Define the Mistral inference client
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-VERBOSE = True
-MAX_HISTORY = 125
-def format_prompt(message, history):
-    prompt = "<s>"
-    for entry in history:
-        if isinstance(entry, tuple) and len(entry) == 2:
-            user_prompt, bot_response = entry
-            prompt += f"[INST] {user_prompt} [/INST]"
-            prompt += f" {bot_response}</s> "
-    prompt += f"[INST] {message} [/INST]"
-    return prompt
-agents = [
-    "WEB_DEV",
-    "AI_SYSTEM_PROMPT",
-    "PYTHON_CODE_DEV",
-    "CODE_GENERATION",
-    "CODE_INTERPRETATION",
-    "CODE_TRANSLATION",
-    "CODE_IMPLEMENTATION"
-]
-def generate(
-        prompt, history, agent_name=agents[0], sys_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.7,
-):
-    seed = random.randint(1, 1111111111111111)
-    agent = custom_prompts[agent_name]
-    system_prompt = agent if sys_prompt == "" else sys_prompt
-    temperature = max(float(temperature), 1e-2)
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=seed,
-    )
-    formatted_prompt = format_prompt(f"{system_prompt}\n\n{prompt}", history)
-    output = client.text_generation(formatted_prompt, **generate_kwargs, stream=False, return_full_text=False)
-    return output
 # Define the chat response function
-def respond(
-    message,
-    history,
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    response = generate(
-        prompt=message,
-        history=history,
-        sys_prompt=system_message,
-        temperature=temperature,
-        max_new_tokens=max_tokens,
-        top_p=top_p
-    )
-    return response
 # Function to start scraping
 def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
@@ -197,10 +101,10 @@ def display_csv(storage_location):
 # Create Gradio interface
 def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
-    global history
-    response = respond(message, history, system_message, max_tokens, temperature, top_p)
-    history.append((message, response))
-    return history, ""
 demo = gr.Blocks()
@@ -212,8 +116,8 @@ with demo:
             max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
             temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
             top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-            storage_location = gr.Textbox(value=default_file_path, label="Storage Location")
-            url1 = gr.Textbox(value="https://www.culver.k12.in.us/", label="URL 1")
             url2 = gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2")
             url3 = gr.Textbox(label="URL 3")
             url4 = gr.Textbox(label="URL 4")

 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Define constants
+DATE_TIME_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+PURPOSE = f"You go to Culvers sites, you continuously seek changes on them since your last observation. Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data."
+HISTORY = []
+CURRENT_TASK = None
+DEFAULT_FILE_PATH = "user/app/scraped_data/culver/culvers_changes.csv"
 # Ensure the directory exists
+os.makedirs(os.path.dirname(DEFAULT_FILE_PATH), exist_ok=True)
 # Function to monitor URLs for changes
 def monitor_urls(storage_location, urls, scrape_interval, content_type):
+    global HISTORY
     previous_hashes = [""] * len(urls)
+    try:
+        with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=Options()) as driver:
+            while True:
+                for i, url in enumerate(urls):
+                    try:
+                        driver.get(url)
+                        time.sleep(2)  # Wait for the page to load
+                        if content_type == "text":
+                            current_content = driver.page_source
+                        elif content_type == "media":
+                            current_content = driver.find_elements_by_tag_name("img")
+                        else:
+                            current_content = driver.page_source
+                        current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
+                        if current_hash != previous_hashes[i]:
+                            previous_hashes[i] = current_hash
+                            date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                            HISTORY.append(f"Change detected at {url} on {date_time_str}")
+                            with open(storage_location, "a", newline="") as csvfile:
+                                csv_writer = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
+                                csv_writer.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
                                 logging.info(f"Change detected at {url} on {date_time_str}")
+                    except Exception as e:
+                        logging.error(f"Error accessing {url}: {e}")
+                time.sleep(scrape_interval * 60)  # Check every scrape_interval minutes
+    except Exception as e:
+        logging.error(f"Error starting ChromeDriver: {e}")
 # Define main function to handle user input
 def handle_input(storage_location, urls, scrape_interval, content_type):
+    global CURRENT_TASK, HISTORY
+    CURRENT_TASK = f"Monitoring URLs: {', '.join(urls)}"
+    HISTORY.append(f"Task started: {CURRENT_TASK}")
     monitor_urls(storage_location, urls, scrape_interval, content_type)
+    return TASK_PROMPT.format(task=CURRENT_TASK, history="\n".join(map(str, HISTORY)))
 # Load custom prompts
 try:
+    with open("custom_prompts.yaml", "r") as fp:
         custom_prompts = yaml.safe_load(fp)
 except FileNotFoundError:
+    custom_prompts = {"WEB_DEV": "", "AI_SYSTEM_PROMPT": "", "PYTHON_CODE_DEV": "", "CODE_GENERATION": "", "CODE_INTERPRETATION": "", "CODE_TRANSLATION": "", "CODE_IMPLEMENTATION": ""}
+# Define agents
+AGENTS = ["WEB_DEV", "AI_SYSTEM_PROMPT", "PYTHON_CODE_DEV", "CODE_GENERATION", "CODE_INTERPRETATION", "CODE_TRANSLATION", "CODE_IMPLEMENTATION"]
 # Define the Mistral inference client
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 # Define the chat response function
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    return generate(message, history, system_message, max_tokens, temperature, top_p)
 # Function to start scraping
 def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
 # Create Gradio interface
 def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
+    global HISTORY
+    response = respond(message, HISTORY, system_message, max_tokens, temperature, top_p)
+    HISTORY.append((message, response))
+    return HISTORY, ""
 demo = gr.Blocks()
             max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
             temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
             top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+            storage_location = gr.Textbox(value=DEFAULT_FILE_PATH, label="Storage Location")
+            url1 = gr.Textbox(value="https://www.culver.k12.in/", label="URL 1")
             url2 = gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2")
             url3 = gr.Textbox(label="URL 3")
             url4 = gr.Textbox(label="URL 4")