Spaces:

Aiera
/

aiera-finance-leaderboard

Running

Jacqueline Garrahan commited on Jul 24

Commit

0ccfdb5

•

1 Parent(s): 3c7b0e5

Check in the leaderboard

Files changed (2) hide show

src/about.py CHANGED Viewed

@@ -13,14 +13,11 @@ class Task:
 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
-    task0 = Task("aiera_speaker_assign", "accuracy,none", "Speaker ID")
-    task1 = Task("aiera_transcript_sentiment", "accuracy,none","Speaker Sentiment")
-    task2 = Task("bbh_zeroshot_causal_judgement", "exact_match,flexible-extract","BBH-causal-judgement")
-    task3 = Task("flare_ectsum", "recall,none", "flare-ect-sum")
-    task4 = Task("flare_edtsum", "rougeLsum,none","flare-edt-sum")
-    task5 = Task("flare_finqa", "exact_match_manual,none","finqa")
-    task6 = Task("flare_fiqasa", "accuracy,none","fiqasa")
-    task7 = Task("flare_ner", "accuracy,none","flare-ner")
 NUM_FEWSHOT = 0 # Change with your few shot
@@ -32,17 +29,17 @@ TITLE = """<h1 align="center" id="space-title">Aiera Leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-The Aiera Financial Leaderboard evaluates the performance of LLMs on a variety of tasks tailored to financial services.
 """
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
-Proof something happenind
 ## Reproducibility
-To reproduce our results, here is the commands you can run:
 """

 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+    task0 = Task("aiera_speaker_assign", "accuracy,none", "Speaker ID", reference_url="https://huggingface.co/datasets/Aiera/aiera-speaker-assign")
+    task1 = Task("aiera_transcript_sentiment", "accuracy,none","Sentiment", reference_url="https://huggingface.co/datasets/Aiera/aiera-transcript-sentiment")
+    task4 = Task("aiera_ect_sum", "rougeLsum,none","aiera_ect_sum", reference_url="https://huggingface.co/datasets/Aiera/aiera-ect-sum")
+    task5 = Task("finqa", "exact_match_manual,none","finqa", reference_url="https://huggingface.co/datasets/Aiera/finqa-verified")
+    #task7 = Task("flare_ner", "accuracy,none","flare-ner", reference_url="test")
 NUM_FEWSHOT = 0 # Change with your few shot
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+The Aiera Financial Leaderboard evaluates the performance of LLMs on ...
 """
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
+Proof something happened
 ## Reproducibility
+A guide for running Aiera's tasks using EleutherAi's lm-evaluation-harness is avaliable on github at [https://github.com/aiera-inc/aiera-benchmark-tasks](https://github.com/aiera-inc/aiera-benchmark-tasks).
 """

src/leaderboard/read_evals.py CHANGED Viewed

@@ -95,7 +95,6 @@ class EvalResult:
         """Finds the relevant request file for the current model and updates info with it"""
         request_file = get_request_file_for_model(requests_path, self.full_model)
         #try:
         with open(request_file, "r") as f:
             request = json.load(f)
@@ -103,7 +102,7 @@ class EvalResult:
         self.weight_type = WeightType[request.get("weight_type", "Unknown")]
         self.license = request.get("license", "?")
         self.likes = request.get("likes", 0)
-        self.num_params = request.get("params", 0)
         self.date = request.get("submitted_time", "")
         #except Exception:
         #    print(f"Could not find request file for {self.org}/{self.model}")

         """Finds the relevant request file for the current model and updates info with it"""
         request_file = get_request_file_for_model(requests_path, self.full_model)
         #try:
         with open(request_file, "r") as f:
             request = json.load(f)
         self.weight_type = WeightType[request.get("weight_type", "Unknown")]
         self.license = request.get("license", "?")
         self.likes = request.get("likes", 0)
+        self.num_params = request.get("params", "?")
         self.date = request.get("submitted_time", "")
         #except Exception:
         #    print(f"Could not find request file for {self.org}/{self.model}")