Pratyush Maini commited on
Commit
4b8641e
·
1 Parent(s): e4c887e
Files changed (4) hide show
  1. app.py +10 -6
  2. uploads.py +4 -25
  3. versions/testing.csv +0 -2
  4. versions/testing2.csv +0 -2
app.py CHANGED
@@ -20,13 +20,21 @@ LEADERBOARD_PATH = f"locuslab/tofu_leaderboard"
20
  def restart_space():
21
  api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
22
 
 
 
 
 
 
 
 
23
  # Function to load data from a given CSV file
24
  def load_data(model,version,metrics):
25
  version = version.replace("%", "p")
26
  file_path = f'versions/{model}-{version}.csv' # Replace with your file paths
27
- df = pd.read_csv(file_path)
 
28
  # we only want specific columns and in a specific order
29
- # column_names : Method,Model,WD,Forget Rate,Epoch,LR,Compute,ROUGE Real Authors,ROUGE SEM Real Authors,Truth Ratio Real Authors,Truth Ratio SEM Real Authors,Prob. Real Authors,Prob. SEM Real Authors,ROUGE-P Real Authors,ROUGE-P SEM Real Authors,TTR Real Authors,TTR SEM Real Authors,ROUGE Real World,ROUGE SEM Real World,Truth Ratio Real World,Truth Ratio SEM Real World,Prob. Real World,Prob. SEM Real World,ROUGE-P Real World,ROUGE-P SEM Real World,TTR Real World,TTR SEM Real World,ROUGE Retain,ROUGE SEM Retain,Truth Ratio Retain,Truth Ratio SEM Retain,Prob. Retain,Prob. SEM Retain,ROUGE-P Retain,ROUGE-P SEM Retain,TTR Retain,TTR SEM Retain,KS Test Retain,Wilcoxon PVal Retain,Wilcoxon Stat Retain,ROUGE Forget,ROUGE SEM Forget,Truth Ratio Forget,Truth Ratio SEM Forget,Prob. Forget,Prob. SEM Forget,ROUGE-P Forget,ROUGE-P SEM Forget,TTR Forget,TTR SEM Forget,KS Test Forget,Wilcoxon PVal Forget,Wilcoxon Stat Forget,KS Test Real Authors,KS Test PVal Real Authors,Wilcoxon PVal Real Authors,Wilcoxon Stat Real Authors,KS Test Real World,KS Test PVal Real World,Wilcoxon PVal Real World,Wilcoxon Stat Real World,KS Test PVal Retain,KS Test PVal Forget,Model Utility,Forget Quality
30
  column_names = ["Method", "Submitted By",
31
  "Model Utility", "Forget Quality",
32
  "ROUGE Real Authors", "Truth Ratio Real Authors", "Prob. Real Authors",
@@ -57,10 +65,6 @@ def load_data(model,version,metrics):
57
  return df
58
 
59
 
60
- # def style_leaderboard(df):
61
- # make color red for background if column has "Forget" in it
62
-
63
-
64
  # Function for searching in the leaderboard
65
  def search_leaderboard(df, query):
66
  if query == "":
 
20
  def restart_space():
21
  api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
22
 
23
+ def make_df_from_results_files(results_files):
24
+ dfs = []
25
+ for file in results_files:
26
+ df = pd.read_csv(file)
27
+ dfs.append(df)
28
+ return pd.concat(dfs)
29
+
30
  # Function to load data from a given CSV file
31
  def load_data(model,version,metrics):
32
  version = version.replace("%", "p")
33
  file_path = f'versions/{model}-{version}.csv' # Replace with your file paths
34
+ df = make_df_from_results_files(file_path)
35
+
36
  # we only want specific columns and in a specific order
37
+
38
  column_names = ["Method", "Submitted By",
39
  "Model Utility", "Forget Quality",
40
  "ROUGE Real Authors", "Truth Ratio Real Authors", "Prob. Real Authors",
 
65
  return df
66
 
67
 
 
 
 
 
68
  # Function for searching in the leaderboard
69
  def search_leaderboard(df, query):
70
  if query == "":
uploads.py CHANGED
@@ -5,8 +5,6 @@ import datetime
5
 
6
 
7
  OWNER="locuslab"
8
- SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
9
- RESULTS_DATASET = f"{OWNER}/results_public"
10
  LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
11
  api = HfApi()
12
  TOKEN = os.environ.get("TOKEN", None)
@@ -27,10 +25,9 @@ def model_hyperlink(link, model_name):
27
 
28
 
29
  def add_new_eval(
30
- val_or_test: str,
31
  model: str,
32
  model_family: str,
33
- system_prompt: str,
34
  url: str,
35
  path_to_file: str,
36
  organisation: str,
@@ -52,40 +49,22 @@ def add_new_eval(
52
 
53
  # Save submitted file
54
  api.upload_file(
55
- repo_id=SUBMISSION_DATASET,
56
  path_or_fileobj=path_to_file.name,
57
  path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
58
- repo_type="dataset",
59
  token=TOKEN
60
  )
61
-
62
- # Compute score
63
-
64
 
65
- # Save scored file
66
- api.upload_file(
67
- repo_id=SUBMISSION_DATASET,
68
- path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
69
- path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
70
- repo_type="dataset",
71
- token=TOKEN
72
- )
73
 
74
  # Actual submission
75
  eval_entry = {
76
  "model": model,
77
  "model_family": model_family,
78
- "system_prompt": system_prompt,
79
  "url": url,
80
  "organisation": organisation,
81
  "mail": mail,
82
- # "score": scores["all"]/num_questions["all"],
83
- # "score_level1": scores[1]/num_questions[1],
84
- # "score_level2": scores[2]/num_questions[2],
85
- # "score_level3": scores[3]/num_questions[3],
86
  }
87
- # eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
88
- # print(eval_results)
89
- # eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
90
 
91
  return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
 
5
 
6
 
7
  OWNER="locuslab"
 
 
8
  LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
9
  api = HfApi()
10
  TOKEN = os.environ.get("TOKEN", None)
 
25
 
26
 
27
  def add_new_eval(
 
28
  model: str,
29
  model_family: str,
30
+ forget_rate: str,
31
  url: str,
32
  path_to_file: str,
33
  organisation: str,
 
49
 
50
  # Save submitted file
51
  api.upload_file(
52
+ repo_id=LEADERBOARD_PATH,
53
  path_or_fileobj=path_to_file.name,
54
  path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
55
+ repo_type="spaces",
56
  token=TOKEN
57
  )
 
 
 
58
 
 
 
 
 
 
 
 
 
59
 
60
  # Actual submission
61
  eval_entry = {
62
  "model": model,
63
  "model_family": model_family,
 
64
  "url": url,
65
  "organisation": organisation,
66
  "mail": mail,
67
+ "forget_rate": forget_rate,
 
 
 
68
  }
 
 
 
69
 
70
  return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
versions/testing.csv DELETED
@@ -1,2 +0,0 @@
1
- ROUGE Real Authors,Prob. Real Authors,Truth Ratio Real Authors,ROUGE Real World,Prob. Real World,Truth Ratio Real World,ROUGE Retain,Prob. Retain,Truth Ratio Retain,ROUGE Forget,Prob. Forget,Truth Ratio Forget,Model Utility,Forget Quality,Method,Submitted By
2
- 0.935,0.5133317480627517,0.6680206968738261,0.9002849002849004,0.4921771855127988,0.6327186485295044,0.4416031977243739,0.5930417134750143,0.404405870623577,0.40822150323723994,0.48222263099896223,0.634110229406756,0.574674941212654,2.8915775251270757e-11,temp,john
 
 
 
versions/testing2.csv DELETED
@@ -1,2 +0,0 @@
1
- ROUGE Real Authors,Prob. Real Authors,Truth Ratio Real Authors,ROUGE Real World,Prob. Real World,Truth Ratio Real World,ROUGE Retain,Prob. Retain,Truth Ratio Retain,ROUGE Forget,Prob. Forget,Truth Ratio Forget,Model Utility,Forget Quality,Method,Submitted By
2
- 0.935,0.5133317480627517,0.6680206968738261,0.9002849002849004,0.4921771855127988,0.6327186485295044,0.4416031977243739,0.5930417134750143,0.404405870623577,0.40822150323723994,0.48222263099896223,0.634110229406756,0.574674941212654,2.8915775251270757e-11,temp,john