Spaces:
Runtime error
Runtime error
acecalisto3
commited on
Commit
•
4e5b9ff
1
Parent(s):
414583f
Update app.py
Browse files
app.py
CHANGED
@@ -42,10 +42,9 @@ default_file_path = "user/app/scraped_data/culver/culvers_changes.csv"
|
|
42 |
os.makedirs(os.path.dirname(default_file_path), exist_ok=True)
|
43 |
|
44 |
# Function to monitor URLs for changes
|
45 |
-
def monitor_urls(storage_location,
|
46 |
global history
|
47 |
-
|
48 |
-
previous_hashes = ["", ""]
|
49 |
|
50 |
# Ensure the directory exists
|
51 |
os.makedirs(os.path.dirname(storage_location), exist_ok=True)
|
@@ -90,12 +89,12 @@ def monitor_urls(storage_location, url1, url2, scrape_interval, content_type):
|
|
90 |
logging.error(f"Error starting ChromeDriver: {e}")
|
91 |
|
92 |
# Define main function to handle user input
|
93 |
-
def handle_input(storage_location,
|
94 |
global current_task, history
|
95 |
|
96 |
-
current_task = f"Monitoring URLs: {
|
97 |
history.append(f"Task started: {current_task}")
|
98 |
-
monitor_urls(storage_location,
|
99 |
return TASK_PROMPT.format(task=current_task, history="\n".join(map(str, history)))
|
100 |
|
101 |
# Load custom prompts
|
@@ -183,9 +182,10 @@ def respond(
|
|
183 |
return response
|
184 |
|
185 |
# Function to start scraping
|
186 |
-
def start_scraping(storage_location, url1, url2, scrape_interval, content_type):
|
187 |
-
|
188 |
-
|
|
|
189 |
|
190 |
# Function to display CSV content
|
191 |
def display_csv(storage_location):
|
@@ -196,7 +196,7 @@ def display_csv(storage_location):
|
|
196 |
return "No data available."
|
197 |
|
198 |
# Create Gradio interface
|
199 |
-
def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, scrape_interval, content_type):
|
200 |
global history
|
201 |
response = respond(message, history, system_message, max_tokens, temperature, top_p)
|
202 |
history.append((message, response))
|
@@ -215,6 +215,14 @@ with demo:
|
|
215 |
storage_location = gr.Textbox(value=default_file_path, label="Storage Location")
|
216 |
url1 = gr.Textbox(value="https://www.culver.k12.in.us/", label="URL 1")
|
217 |
url2 = gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
scrape_interval = gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)")
|
219 |
content_type = gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type")
|
220 |
start_button = gr.Button("Start Scraping")
|
@@ -224,8 +232,8 @@ with demo:
|
|
224 |
chat_history = gr.Chatbot(label="Chat History")
|
225 |
response_box = gr.Textbox(label="Response")
|
226 |
|
227 |
-
start_button.click(start_scraping, inputs=[storage_location, url1, url2, scrape_interval, content_type], outputs=csv_output)
|
228 |
-
message.submit(chat_interface, inputs=[message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, scrape_interval, content_type], outputs=[chat_history, response_box])
|
229 |
|
230 |
if __name__ == "__main__":
|
231 |
demo.launch()
|
|
|
42 |
os.makedirs(os.path.dirname(default_file_path), exist_ok=True)
|
43 |
|
44 |
# Function to monitor URLs for changes
|
45 |
+
def monitor_urls(storage_location, urls, scrape_interval, content_type):
|
46 |
global history
|
47 |
+
previous_hashes = [""] * len(urls)
|
|
|
48 |
|
49 |
# Ensure the directory exists
|
50 |
os.makedirs(os.path.dirname(storage_location), exist_ok=True)
|
|
|
89 |
logging.error(f"Error starting ChromeDriver: {e}")
|
90 |
|
91 |
# Define main function to handle user input
|
92 |
+
def handle_input(storage_location, urls, scrape_interval, content_type):
|
93 |
global current_task, history
|
94 |
|
95 |
+
current_task = f"Monitoring URLs: {', '.join(urls)}"
|
96 |
history.append(f"Task started: {current_task}")
|
97 |
+
monitor_urls(storage_location, urls, scrape_interval, content_type)
|
98 |
return TASK_PROMPT.format(task=current_task, history="\n".join(map(str, history)))
|
99 |
|
100 |
# Load custom prompts
|
|
|
182 |
return response
|
183 |
|
184 |
# Function to start scraping
|
185 |
+
def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
|
186 |
+
urls = [url for url in [url1, url2, url3, url4, url5, url6, url7, url8, url9, url10] if url]
|
187 |
+
handle_input(storage_location, urls, scrape_interval, content_type)
|
188 |
+
return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
|
189 |
|
190 |
# Function to display CSV content
|
191 |
def display_csv(storage_location):
|
|
|
196 |
return "No data available."
|
197 |
|
198 |
# Create Gradio interface
|
199 |
+
def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
|
200 |
global history
|
201 |
response = respond(message, history, system_message, max_tokens, temperature, top_p)
|
202 |
history.append((message, response))
|
|
|
215 |
storage_location = gr.Textbox(value=default_file_path, label="Storage Location")
|
216 |
url1 = gr.Textbox(value="https://www.culver.k12.in.us/", label="URL 1")
|
217 |
url2 = gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2")
|
218 |
+
url3 = gr.Textbox(label="URL 3")
|
219 |
+
url4 = gr.Textbox(label="URL 4")
|
220 |
+
url5 = gr.Textbox(label="URL 5")
|
221 |
+
url6 = gr.Textbox(label="URL 6")
|
222 |
+
url7 = gr.Textbox(label="URL 7")
|
223 |
+
url8 = gr.Textbox(label="URL 8")
|
224 |
+
url9 = gr.Textbox(label="URL 9")
|
225 |
+
url10 = gr.Textbox(label="URL 10")
|
226 |
scrape_interval = gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)")
|
227 |
content_type = gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type")
|
228 |
start_button = gr.Button("Start Scraping")
|
|
|
232 |
chat_history = gr.Chatbot(label="Chat History")
|
233 |
response_box = gr.Textbox(label="Response")
|
234 |
|
235 |
+
start_button.click(start_scraping, inputs=[storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type], outputs=csv_output)
|
236 |
+
message.submit(chat_interface, inputs=[message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type], outputs=[chat_history, response_box])
|
237 |
|
238 |
if __name__ == "__main__":
|
239 |
demo.launch()
|