Jacqueline Garrahan commited on
Commit
0ccfdb5
1 Parent(s): 3c7b0e5

Check in the leaderboard

Browse files
Files changed (2) hide show
  1. src/about.py +8 -11
  2. src/leaderboard/read_evals.py +1 -2
src/about.py CHANGED
@@ -13,14 +13,11 @@ class Task:
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
- task0 = Task("aiera_speaker_assign", "accuracy,none", "Speaker ID")
17
- task1 = Task("aiera_transcript_sentiment", "accuracy,none","Speaker Sentiment")
18
- task2 = Task("bbh_zeroshot_causal_judgement", "exact_match,flexible-extract","BBH-causal-judgement")
19
- task3 = Task("flare_ectsum", "recall,none", "flare-ect-sum")
20
- task4 = Task("flare_edtsum", "rougeLsum,none","flare-edt-sum")
21
- task5 = Task("flare_finqa", "exact_match_manual,none","finqa")
22
- task6 = Task("flare_fiqasa", "accuracy,none","fiqasa")
23
- task7 = Task("flare_ner", "accuracy,none","flare-ner")
24
 
25
 
26
  NUM_FEWSHOT = 0 # Change with your few shot
@@ -32,17 +29,17 @@ TITLE = """<h1 align="center" id="space-title">Aiera Leaderboard</h1>"""
32
 
33
  # What does your leaderboard evaluate?
34
  INTRODUCTION_TEXT = """
35
- The Aiera Financial Leaderboard evaluates the performance of LLMs on a variety of tasks tailored to financial services.
36
  """
37
 
38
  # Which evaluations are you running? how can people reproduce what you have?
39
  LLM_BENCHMARKS_TEXT = f"""
40
  ## How it works
41
 
42
- Proof something happenind
43
 
44
  ## Reproducibility
45
- To reproduce our results, here is the commands you can run:
46
 
47
  """
48
 
 
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
+ task0 = Task("aiera_speaker_assign", "accuracy,none", "Speaker ID", reference_url="https://huggingface.co/datasets/Aiera/aiera-speaker-assign")
17
+ task1 = Task("aiera_transcript_sentiment", "accuracy,none","Sentiment", reference_url="https://huggingface.co/datasets/Aiera/aiera-transcript-sentiment")
18
+ task4 = Task("aiera_ect_sum", "rougeLsum,none","aiera_ect_sum", reference_url="https://huggingface.co/datasets/Aiera/aiera-ect-sum")
19
+ task5 = Task("finqa", "exact_match_manual,none","finqa", reference_url="https://huggingface.co/datasets/Aiera/finqa-verified")
20
+ #task7 = Task("flare_ner", "accuracy,none","flare-ner", reference_url="test")
 
 
 
21
 
22
 
23
  NUM_FEWSHOT = 0 # Change with your few shot
 
29
 
30
  # What does your leaderboard evaluate?
31
  INTRODUCTION_TEXT = """
32
+ The Aiera Financial Leaderboard evaluates the performance of LLMs on ...
33
  """
34
 
35
  # Which evaluations are you running? how can people reproduce what you have?
36
  LLM_BENCHMARKS_TEXT = f"""
37
  ## How it works
38
 
39
+ Proof something happened
40
 
41
  ## Reproducibility
42
+ A guide for running Aiera's tasks using EleutherAi's lm-evaluation-harness is avaliable on github at [https://github.com/aiera-inc/aiera-benchmark-tasks](https://github.com/aiera-inc/aiera-benchmark-tasks).
43
 
44
  """
45
 
src/leaderboard/read_evals.py CHANGED
@@ -95,7 +95,6 @@ class EvalResult:
95
  """Finds the relevant request file for the current model and updates info with it"""
96
  request_file = get_request_file_for_model(requests_path, self.full_model)
97
 
98
-
99
  #try:
100
  with open(request_file, "r") as f:
101
  request = json.load(f)
@@ -103,7 +102,7 @@ class EvalResult:
103
  self.weight_type = WeightType[request.get("weight_type", "Unknown")]
104
  self.license = request.get("license", "?")
105
  self.likes = request.get("likes", 0)
106
- self.num_params = request.get("params", 0)
107
  self.date = request.get("submitted_time", "")
108
  #except Exception:
109
  # print(f"Could not find request file for {self.org}/{self.model}")
 
95
  """Finds the relevant request file for the current model and updates info with it"""
96
  request_file = get_request_file_for_model(requests_path, self.full_model)
97
 
 
98
  #try:
99
  with open(request_file, "r") as f:
100
  request = json.load(f)
 
102
  self.weight_type = WeightType[request.get("weight_type", "Unknown")]
103
  self.license = request.get("license", "?")
104
  self.likes = request.get("likes", 0)
105
+ self.num_params = request.get("params", "?")
106
  self.date = request.get("submitted_time", "")
107
  #except Exception:
108
  # print(f"Could not find request file for {self.org}/{self.model}")