Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
e4391fe
1
Parent(s):
3472410
Cleanup data directories
Browse files- app.py +23 -3
- prompts.py +23 -23
app.py
CHANGED
@@ -8,6 +8,7 @@ from main import openai_model, model_id
|
|
8 |
from util import get_sources, get_start_end_months
|
9 |
from git import Repo
|
10 |
import zipfile
|
|
|
11 |
import spaces
|
12 |
import torch
|
13 |
import uuid
|
@@ -263,7 +264,7 @@ with gr.Blocks(
|
|
263 |
render=False,
|
264 |
)
|
265 |
data_error = gr.Textbox(
|
266 |
-
value="App is unavailable.
|
267 |
lines=1,
|
268 |
label="Error downloading or extracting data",
|
269 |
visible=False,
|
@@ -561,6 +562,19 @@ with gr.Blocks(
|
|
561 |
# Data loading
|
562 |
# ------------
|
563 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
564 |
def download():
|
565 |
"""Download the db.zip file"""
|
566 |
|
@@ -586,8 +600,14 @@ with gr.Blocks(
|
|
586 |
|
587 |
zip_file_path = "./R-help-db/db.zip"
|
588 |
extract_to_path = "./"
|
589 |
-
|
590 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
|
592 |
return None
|
593 |
|
|
|
8 |
from util import get_sources, get_start_end_months
|
9 |
from git import Repo
|
10 |
import zipfile
|
11 |
+
import shutil
|
12 |
import spaces
|
13 |
import torch
|
14 |
import uuid
|
|
|
264 |
render=False,
|
265 |
)
|
266 |
data_error = gr.Textbox(
|
267 |
+
value="App is unavailable because data could not be loaded. Try reloading the page, then contact the maintainer if the problem persists.",
|
268 |
lines=1,
|
269 |
label="Error downloading or extracting data",
|
270 |
visible=False,
|
|
|
562 |
# Data loading
|
563 |
# ------------
|
564 |
|
565 |
+
def rm_directory(directory_path):
|
566 |
+
"""Forcefully and recursively delete a directory, like rm -rf"""
|
567 |
+
|
568 |
+
try:
|
569 |
+
shutil.rmtree(directory_path)
|
570 |
+
print(f"Successfully deleted: {directory_path}")
|
571 |
+
except FileNotFoundError:
|
572 |
+
print(f"Directory not found: {directory_path}")
|
573 |
+
except PermissionError:
|
574 |
+
print(f"Permission denied: {directory_path}")
|
575 |
+
except Exception as e:
|
576 |
+
print(f"An error occurred: {e}")
|
577 |
+
|
578 |
def download():
|
579 |
"""Download the db.zip file"""
|
580 |
|
|
|
600 |
|
601 |
zip_file_path = "./R-help-db/db.zip"
|
602 |
extract_to_path = "./"
|
603 |
+
try:
|
604 |
+
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
605 |
+
zip_ref.extractall(extract_to_path)
|
606 |
+
except:
|
607 |
+
# If there were any errors, clean up directories to
|
608 |
+
# initiate a new download when app is reloaded
|
609 |
+
rm_directory("./db")
|
610 |
+
rm_directory("./R-help-db")
|
611 |
|
612 |
return None
|
613 |
|
prompts.py
CHANGED
@@ -14,22 +14,22 @@ def retrieve_prompt(compute_mode):
|
|
14 |
start, end = get_start_end_months(get_sources())
|
15 |
|
16 |
retrieve_prompt = (
|
17 |
-
f"Today Date: {date.today()}.
|
18 |
-
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list.
|
19 |
-
"Do not ask the user for more information, but retrieve emails from the R-help mailing list archives.
|
20 |
# gpt-4o-mini says last two months aren't available with this: Emails from from {start} to {end} are available for retrieval.
|
21 |
-
f"The emails available for retrieval are from {start} to {end}.
|
22 |
-
"Write a search query based on the user's question, but do not answer the question just yet.
|
23 |
-
"For questions about differences or comparison between X and Y, retrieve emails about X and Y.
|
24 |
-
"If the user's question is about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year).
|
25 |
-
"Example: to retrieve emails about R from a month in any year use retrieve_emails(search_query='R', months=<month>).
|
26 |
# This confuses gpt-4o-mini (empty search_query - token problem?)
|
27 |
-
"Use 3-letter month abbreviations: Jan for January, Jul for July.
|
28 |
-
"If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list.
|
29 |
)
|
30 |
# A sanity check that we don't have unassigned variables
|
31 |
# (this causes KeyError in parsing by ToolCallingLLM)
|
32 |
-
matches = re.findall(r"\{.*?\}", "".join(retrieve_prompt))
|
33 |
if matches:
|
34 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
35 |
return retrieve_prompt
|
@@ -38,21 +38,21 @@ def retrieve_prompt(compute_mode):
|
|
38 |
def answer_prompt(with_tools=True):
|
39 |
"""Return system prompt for generate step"""
|
40 |
answer_prompt = (
|
41 |
-
f"Today Date: {date.today()}.
|
42 |
-
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list.
|
43 |
-
"Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query.
|
44 |
-
"If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them.
|
45 |
-
"Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails.
|
46 |
-
"Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails.
|
47 |
-
"Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails.
|
48 |
-
"Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages.
|
49 |
-
"Include inline citations (email senders and dates) in your response.
|
50 |
-
"Only answer general questions about R if the answer is given in the retrieved emails.
|
51 |
-
"Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails.
|
52 |
)
|
53 |
if with_tools:
|
54 |
answer_prompt += "Use answer_with_citations to provide the complete answer and all citations used. "
|
55 |
-
matches = re.findall(r"\{.*?\}", "".join(answer_prompt))
|
56 |
if matches:
|
57 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
58 |
return answer_prompt
|
|
|
14 |
start, end = get_start_end_months(get_sources())
|
15 |
|
16 |
retrieve_prompt = (
|
17 |
+
f"Today Date: {date.today()}."
|
18 |
+
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
|
19 |
+
"Do not ask the user for more information, but retrieve emails from the R-help mailing list archives."
|
20 |
# gpt-4o-mini says last two months aren't available with this: Emails from from {start} to {end} are available for retrieval.
|
21 |
+
f"The emails available for retrieval are from {start} to {end}."
|
22 |
+
"Write a search query based on the user's question, but do not answer the question just yet."
|
23 |
+
"For questions about differences or comparison between X and Y, retrieve emails about X and Y."
|
24 |
+
"If the user's question is about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
|
25 |
+
"Example: to retrieve emails about R from a month in any year use retrieve_emails(search_query='R', months=<month>)."
|
26 |
# This confuses gpt-4o-mini (empty search_query - token problem?)
|
27 |
+
"Use 3-letter month abbreviations: Jan for January, Jul for July."
|
28 |
+
"If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
|
29 |
)
|
30 |
# A sanity check that we don't have unassigned variables
|
31 |
# (this causes KeyError in parsing by ToolCallingLLM)
|
32 |
+
matches = re.findall(r"\{.*?\}", " ".join(retrieve_prompt))
|
33 |
if matches:
|
34 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
35 |
return retrieve_prompt
|
|
|
38 |
def answer_prompt(with_tools=True):
|
39 |
"""Return system prompt for generate step"""
|
40 |
answer_prompt = (
|
41 |
+
f"Today Date: {date.today()}."
|
42 |
+
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
|
43 |
+
"Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query."
|
44 |
+
"If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
|
45 |
+
"Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
|
46 |
+
"Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
|
47 |
+
"Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails."
|
48 |
+
"Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
|
49 |
+
"Include inline citations (email senders and dates) in your response."
|
50 |
+
"Only answer general questions about R if the answer is given in the retrieved emails."
|
51 |
+
"Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
|
52 |
)
|
53 |
if with_tools:
|
54 |
answer_prompt += "Use answer_with_citations to provide the complete answer and all citations used. "
|
55 |
+
matches = re.findall(r"\{.*?\}", " ".join(answer_prompt))
|
56 |
if matches:
|
57 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
58 |
return answer_prompt
|