Spaces:
Running
Running
Pratyush Maini
commited on
Commit
·
4b8641e
1
Parent(s):
e4c887e
test
Browse files- app.py +10 -6
- uploads.py +4 -25
- versions/testing.csv +0 -2
- versions/testing2.csv +0 -2
app.py
CHANGED
@@ -20,13 +20,21 @@ LEADERBOARD_PATH = f"locuslab/tofu_leaderboard"
|
|
20 |
def restart_space():
|
21 |
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# Function to load data from a given CSV file
|
24 |
def load_data(model,version,metrics):
|
25 |
version = version.replace("%", "p")
|
26 |
file_path = f'versions/{model}-{version}.csv' # Replace with your file paths
|
27 |
-
df =
|
|
|
28 |
# we only want specific columns and in a specific order
|
29 |
-
|
30 |
column_names = ["Method", "Submitted By",
|
31 |
"Model Utility", "Forget Quality",
|
32 |
"ROUGE Real Authors", "Truth Ratio Real Authors", "Prob. Real Authors",
|
@@ -57,10 +65,6 @@ def load_data(model,version,metrics):
|
|
57 |
return df
|
58 |
|
59 |
|
60 |
-
# def style_leaderboard(df):
|
61 |
-
# make color red for background if column has "Forget" in it
|
62 |
-
|
63 |
-
|
64 |
# Function for searching in the leaderboard
|
65 |
def search_leaderboard(df, query):
|
66 |
if query == "":
|
|
|
20 |
def restart_space():
|
21 |
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
|
22 |
|
23 |
+
def make_df_from_results_files(results_files):
|
24 |
+
dfs = []
|
25 |
+
for file in results_files:
|
26 |
+
df = pd.read_csv(file)
|
27 |
+
dfs.append(df)
|
28 |
+
return pd.concat(dfs)
|
29 |
+
|
30 |
# Function to load data from a given CSV file
|
31 |
def load_data(model,version,metrics):
|
32 |
version = version.replace("%", "p")
|
33 |
file_path = f'versions/{model}-{version}.csv' # Replace with your file paths
|
34 |
+
df = make_df_from_results_files(file_path)
|
35 |
+
|
36 |
# we only want specific columns and in a specific order
|
37 |
+
|
38 |
column_names = ["Method", "Submitted By",
|
39 |
"Model Utility", "Forget Quality",
|
40 |
"ROUGE Real Authors", "Truth Ratio Real Authors", "Prob. Real Authors",
|
|
|
65 |
return df
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
68 |
# Function for searching in the leaderboard
|
69 |
def search_leaderboard(df, query):
|
70 |
if query == "":
|
uploads.py
CHANGED
@@ -5,8 +5,6 @@ import datetime
|
|
5 |
|
6 |
|
7 |
OWNER="locuslab"
|
8 |
-
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
9 |
-
RESULTS_DATASET = f"{OWNER}/results_public"
|
10 |
LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
|
11 |
api = HfApi()
|
12 |
TOKEN = os.environ.get("TOKEN", None)
|
@@ -27,10 +25,9 @@ def model_hyperlink(link, model_name):
|
|
27 |
|
28 |
|
29 |
def add_new_eval(
|
30 |
-
val_or_test: str,
|
31 |
model: str,
|
32 |
model_family: str,
|
33 |
-
|
34 |
url: str,
|
35 |
path_to_file: str,
|
36 |
organisation: str,
|
@@ -52,40 +49,22 @@ def add_new_eval(
|
|
52 |
|
53 |
# Save submitted file
|
54 |
api.upload_file(
|
55 |
-
repo_id=
|
56 |
path_or_fileobj=path_to_file.name,
|
57 |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
|
58 |
-
repo_type="
|
59 |
token=TOKEN
|
60 |
)
|
61 |
-
|
62 |
-
# Compute score
|
63 |
-
|
64 |
|
65 |
-
# Save scored file
|
66 |
-
api.upload_file(
|
67 |
-
repo_id=SUBMISSION_DATASET,
|
68 |
-
path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
|
69 |
-
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
|
70 |
-
repo_type="dataset",
|
71 |
-
token=TOKEN
|
72 |
-
)
|
73 |
|
74 |
# Actual submission
|
75 |
eval_entry = {
|
76 |
"model": model,
|
77 |
"model_family": model_family,
|
78 |
-
"system_prompt": system_prompt,
|
79 |
"url": url,
|
80 |
"organisation": organisation,
|
81 |
"mail": mail,
|
82 |
-
|
83 |
-
# "score_level1": scores[1]/num_questions[1],
|
84 |
-
# "score_level2": scores[2]/num_questions[2],
|
85 |
-
# "score_level3": scores[3]/num_questions[3],
|
86 |
}
|
87 |
-
# eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
|
88 |
-
# print(eval_results)
|
89 |
-
# eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
|
90 |
|
91 |
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
|
|
5 |
|
6 |
|
7 |
OWNER="locuslab"
|
|
|
|
|
8 |
LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
|
9 |
api = HfApi()
|
10 |
TOKEN = os.environ.get("TOKEN", None)
|
|
|
25 |
|
26 |
|
27 |
def add_new_eval(
|
|
|
28 |
model: str,
|
29 |
model_family: str,
|
30 |
+
forget_rate: str,
|
31 |
url: str,
|
32 |
path_to_file: str,
|
33 |
organisation: str,
|
|
|
49 |
|
50 |
# Save submitted file
|
51 |
api.upload_file(
|
52 |
+
repo_id=LEADERBOARD_PATH,
|
53 |
path_or_fileobj=path_to_file.name,
|
54 |
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
|
55 |
+
repo_type="spaces",
|
56 |
token=TOKEN
|
57 |
)
|
|
|
|
|
|
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
# Actual submission
|
61 |
eval_entry = {
|
62 |
"model": model,
|
63 |
"model_family": model_family,
|
|
|
64 |
"url": url,
|
65 |
"organisation": organisation,
|
66 |
"mail": mail,
|
67 |
+
"forget_rate": forget_rate,
|
|
|
|
|
|
|
68 |
}
|
|
|
|
|
|
|
69 |
|
70 |
return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
versions/testing.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
ROUGE Real Authors,Prob. Real Authors,Truth Ratio Real Authors,ROUGE Real World,Prob. Real World,Truth Ratio Real World,ROUGE Retain,Prob. Retain,Truth Ratio Retain,ROUGE Forget,Prob. Forget,Truth Ratio Forget,Model Utility,Forget Quality,Method,Submitted By
|
2 |
-
0.935,0.5133317480627517,0.6680206968738261,0.9002849002849004,0.4921771855127988,0.6327186485295044,0.4416031977243739,0.5930417134750143,0.404405870623577,0.40822150323723994,0.48222263099896223,0.634110229406756,0.574674941212654,2.8915775251270757e-11,temp,john
|
|
|
|
|
|
versions/testing2.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
ROUGE Real Authors,Prob. Real Authors,Truth Ratio Real Authors,ROUGE Real World,Prob. Real World,Truth Ratio Real World,ROUGE Retain,Prob. Retain,Truth Ratio Retain,ROUGE Forget,Prob. Forget,Truth Ratio Forget,Model Utility,Forget Quality,Method,Submitted By
|
2 |
-
0.935,0.5133317480627517,0.6680206968738261,0.9002849002849004,0.4921771855127988,0.6327186485295044,0.4416031977243739,0.5930417134750143,0.404405870623577,0.40822150323723994,0.48222263099896223,0.634110229406756,0.574674941212654,2.8915775251270757e-11,temp,john
|
|
|
|
|
|