Spaces:

locuslab
/

tofu_leaderboard

Running

App Files Files Community

Pratyush Maini commited on Mar 13, 2024

Commit

4b8641e

1 Parent(s): e4c887e

test

Browse files

Files changed (4) hide show

app.py +10 -6
uploads.py +4 -25
versions/testing.csv +0 -2
versions/testing2.csv +0 -2

app.py CHANGED Viewed

@@ -20,13 +20,21 @@ LEADERBOARD_PATH = f"locuslab/tofu_leaderboard"
 def restart_space():
     api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
 # Function to load data from a given CSV file
 def load_data(model,version,metrics):
     version = version.replace("%", "p")
     file_path = f'versions/{model}-{version}.csv'  # Replace with your file paths
-    df = pd.read_csv(file_path)
     # we only want specific columns and in a specific order
-    # column_names : Method,Model,WD,Forget Rate,Epoch,LR,Compute,ROUGE Real Authors,ROUGE SEM Real Authors,Truth Ratio Real Authors,Truth Ratio SEM Real Authors,Prob. Real Authors,Prob. SEM Real Authors,ROUGE-P Real Authors,ROUGE-P SEM Real Authors,TTR Real Authors,TTR SEM Real Authors,ROUGE Real World,ROUGE SEM Real World,Truth Ratio Real World,Truth Ratio SEM Real World,Prob. Real World,Prob. SEM Real World,ROUGE-P Real World,ROUGE-P SEM Real World,TTR Real World,TTR SEM Real World,ROUGE Retain,ROUGE SEM Retain,Truth Ratio Retain,Truth Ratio SEM Retain,Prob. Retain,Prob. SEM Retain,ROUGE-P Retain,ROUGE-P SEM Retain,TTR Retain,TTR SEM Retain,KS Test Retain,Wilcoxon PVal Retain,Wilcoxon Stat Retain,ROUGE Forget,ROUGE SEM Forget,Truth Ratio Forget,Truth Ratio SEM Forget,Prob. Forget,Prob. SEM Forget,ROUGE-P Forget,ROUGE-P SEM Forget,TTR Forget,TTR SEM Forget,KS Test Forget,Wilcoxon PVal Forget,Wilcoxon Stat Forget,KS Test Real Authors,KS Test PVal Real Authors,Wilcoxon PVal Real Authors,Wilcoxon Stat Real Authors,KS Test Real World,KS Test PVal Real World,Wilcoxon PVal Real World,Wilcoxon Stat Real World,KS Test PVal Retain,KS Test PVal Forget,Model Utility,Forget Quality
     column_names = ["Method", "Submitted By",
                     "Model Utility", "Forget Quality",
                     "ROUGE Real Authors", "Truth Ratio Real Authors", "Prob. Real Authors",
@@ -57,10 +65,6 @@ def load_data(model,version,metrics):
     return df
-# def style_leaderboard(df):
-    # make color red for background if column has "Forget" in it
 # Function for searching in the leaderboard
 def search_leaderboard(df, query):
     if query == "":

 def restart_space():
     api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
+def make_df_from_results_files(results_files):
+    dfs = []
+    for file in results_files:
+        df = pd.read_csv(file)
+        dfs.append(df)
+    return pd.concat(dfs)
 # Function to load data from a given CSV file
 def load_data(model,version,metrics):
     version = version.replace("%", "p")
     file_path = f'versions/{model}-{version}.csv'  # Replace with your file paths
+    df = make_df_from_results_files(file_path)
     # we only want specific columns and in a specific order
     column_names = ["Method", "Submitted By",
                     "Model Utility", "Forget Quality",
                     "ROUGE Real Authors", "Truth Ratio Real Authors", "Prob. Real Authors",
     return df
 # Function for searching in the leaderboard
 def search_leaderboard(df, query):
     if query == "":

uploads.py CHANGED Viewed

@@ -5,8 +5,6 @@ import datetime
 OWNER="locuslab"
-SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
-RESULTS_DATASET = f"{OWNER}/results_public"
 LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
 api = HfApi()
 TOKEN = os.environ.get("TOKEN", None)
@@ -27,10 +25,9 @@ def model_hyperlink(link, model_name):
 def add_new_eval(
-    val_or_test: str,
     model: str,
     model_family: str,
-    system_prompt: str,
     url: str,
     path_to_file: str,
     organisation: str,
@@ -52,40 +49,22 @@ def add_new_eval(
     # Save submitted file
     api.upload_file(
-        repo_id=SUBMISSION_DATASET,
         path_or_fileobj=path_to_file.name,
         path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
-        repo_type="dataset",
         token=TOKEN
     )
-    # Compute score
-    # Save scored file
-    api.upload_file(
-        repo_id=SUBMISSION_DATASET,
-        path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
-        path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
-        repo_type="dataset",
-        token=TOKEN
-    )
     # Actual submission
     eval_entry = {
         "model": model,
         "model_family": model_family,
-        "system_prompt": system_prompt,
         "url": url,
         "organisation": organisation,
         "mail": mail,
-        # "score": scores["all"]/num_questions["all"],
-        # "score_level1": scores[1]/num_questions[1],
-        # "score_level2": scores[2]/num_questions[2],
-        # "score_level3": scores[3]/num_questions[3],
     }
-    # eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
-    # print(eval_results)
-    # eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
     return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")

 OWNER="locuslab"
 LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard"
 api = HfApi()
 TOKEN = os.environ.get("TOKEN", None)
 def add_new_eval(
     model: str,
     model_family: str,
+    forget_rate: str,
     url: str,
     path_to_file: str,
     organisation: str,
     # Save submitted file
     api.upload_file(
+        repo_id=LEADERBOARD_PATH,
         path_or_fileobj=path_to_file.name,
         path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
+        repo_type="spaces",
         token=TOKEN
     )
     # Actual submission
     eval_entry = {
         "model": model,
         "model_family": model_family,
         "url": url,
         "organisation": organisation,
         "mail": mail,
+        "forget_rate": forget_rate,
     }
     return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")

versions/testing.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- ROUGE Real Authors,Prob. Real Authors,Truth Ratio Real Authors,ROUGE Real World,Prob. Real World,Truth Ratio Real World,ROUGE Retain,Prob. Retain,Truth Ratio Retain,ROUGE Forget,Prob. Forget,Truth Ratio Forget,Model Utility,Forget Quality,Method,Submitted By
2	- 0.935,0.5133317480627517,0.6680206968738261,0.9002849002849004,0.4921771855127988,0.6327186485295044,0.4416031977243739,0.5930417134750143,0.404405870623577,0.40822150323723994,0.48222263099896223,0.634110229406756,0.574674941212654,2.8915775251270757e-11,temp,john

versions/testing2.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- ROUGE Real Authors,Prob. Real Authors,Truth Ratio Real Authors,ROUGE Real World,Prob. Real World,Truth Ratio Real World,ROUGE Retain,Prob. Retain,Truth Ratio Retain,ROUGE Forget,Prob. Forget,Truth Ratio Forget,Model Utility,Forget Quality,Method,Submitted By
2	- 0.935,0.5133317480627517,0.6680206968738261,0.9002849002849004,0.4921771855127988,0.6327186485295044,0.4416031977243739,0.5930417134750143,0.404405870623577,0.40822150323723994,0.48222263099896223,0.634110229406756,0.574674941212654,2.8915775251270757e-11,temp,john