Jacqueline Garrahan
commited on
Commit
•
0ccfdb5
1
Parent(s):
3c7b0e5
Check in the leaderboard
Browse files- src/about.py +8 -11
- src/leaderboard/read_evals.py +1 -2
src/about.py
CHANGED
@@ -13,14 +13,11 @@ class Task:
|
|
13 |
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
16 |
-
task0 = Task("aiera_speaker_assign", "accuracy,none", "Speaker ID")
|
17 |
-
task1 = Task("aiera_transcript_sentiment", "accuracy,none","
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
task5 = Task("flare_finqa", "exact_match_manual,none","finqa")
|
22 |
-
task6 = Task("flare_fiqasa", "accuracy,none","fiqasa")
|
23 |
-
task7 = Task("flare_ner", "accuracy,none","flare-ner")
|
24 |
|
25 |
|
26 |
NUM_FEWSHOT = 0 # Change with your few shot
|
@@ -32,17 +29,17 @@ TITLE = """<h1 align="center" id="space-title">Aiera Leaderboard</h1>"""
|
|
32 |
|
33 |
# What does your leaderboard evaluate?
|
34 |
INTRODUCTION_TEXT = """
|
35 |
-
The Aiera Financial Leaderboard evaluates the performance of LLMs on
|
36 |
"""
|
37 |
|
38 |
# Which evaluations are you running? how can people reproduce what you have?
|
39 |
LLM_BENCHMARKS_TEXT = f"""
|
40 |
## How it works
|
41 |
|
42 |
-
Proof something
|
43 |
|
44 |
## Reproducibility
|
45 |
-
|
46 |
|
47 |
"""
|
48 |
|
|
|
13 |
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
16 |
+
task0 = Task("aiera_speaker_assign", "accuracy,none", "Speaker ID", reference_url="https://huggingface.co/datasets/Aiera/aiera-speaker-assign")
|
17 |
+
task1 = Task("aiera_transcript_sentiment", "accuracy,none","Sentiment", reference_url="https://huggingface.co/datasets/Aiera/aiera-transcript-sentiment")
|
18 |
+
task4 = Task("aiera_ect_sum", "rougeLsum,none","aiera_ect_sum", reference_url="https://huggingface.co/datasets/Aiera/aiera-ect-sum")
|
19 |
+
task5 = Task("finqa", "exact_match_manual,none","finqa", reference_url="https://huggingface.co/datasets/Aiera/finqa-verified")
|
20 |
+
#task7 = Task("flare_ner", "accuracy,none","flare-ner", reference_url="test")
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
NUM_FEWSHOT = 0 # Change with your few shot
|
|
|
29 |
|
30 |
# What does your leaderboard evaluate?
|
31 |
INTRODUCTION_TEXT = """
|
32 |
+
The Aiera Financial Leaderboard evaluates the performance of LLMs on ...
|
33 |
"""
|
34 |
|
35 |
# Which evaluations are you running? how can people reproduce what you have?
|
36 |
LLM_BENCHMARKS_TEXT = f"""
|
37 |
## How it works
|
38 |
|
39 |
+
Proof something happened
|
40 |
|
41 |
## Reproducibility
|
42 |
+
A guide for running Aiera's tasks using EleutherAi's lm-evaluation-harness is avaliable on github at [https://github.com/aiera-inc/aiera-benchmark-tasks](https://github.com/aiera-inc/aiera-benchmark-tasks).
|
43 |
|
44 |
"""
|
45 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -95,7 +95,6 @@ class EvalResult:
|
|
95 |
"""Finds the relevant request file for the current model and updates info with it"""
|
96 |
request_file = get_request_file_for_model(requests_path, self.full_model)
|
97 |
|
98 |
-
|
99 |
#try:
|
100 |
with open(request_file, "r") as f:
|
101 |
request = json.load(f)
|
@@ -103,7 +102,7 @@ class EvalResult:
|
|
103 |
self.weight_type = WeightType[request.get("weight_type", "Unknown")]
|
104 |
self.license = request.get("license", "?")
|
105 |
self.likes = request.get("likes", 0)
|
106 |
-
self.num_params = request.get("params",
|
107 |
self.date = request.get("submitted_time", "")
|
108 |
#except Exception:
|
109 |
# print(f"Could not find request file for {self.org}/{self.model}")
|
|
|
95 |
"""Finds the relevant request file for the current model and updates info with it"""
|
96 |
request_file = get_request_file_for_model(requests_path, self.full_model)
|
97 |
|
|
|
98 |
#try:
|
99 |
with open(request_file, "r") as f:
|
100 |
request = json.load(f)
|
|
|
102 |
self.weight_type = WeightType[request.get("weight_type", "Unknown")]
|
103 |
self.license = request.get("license", "?")
|
104 |
self.likes = request.get("likes", 0)
|
105 |
+
self.num_params = request.get("params", "?")
|
106 |
self.date = request.get("submitted_time", "")
|
107 |
#except Exception:
|
108 |
# print(f"Could not find request file for {self.org}/{self.model}")
|