Spaces:

llm-jp
/

open-japanese-llm-leaderboard

Running on CPU Upgrade

sh1gechan commited on Aug 7, 2024

Commit

bd685e1

verified ·

1 Parent(s): 087bfcc

Update src/leaderboard/read_evals.py

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -67,28 +67,17 @@ class EvalResult:
             architectures = getattr(model_config, "architectures", None)
             if architectures:
                 architecture = ";".join(architectures)
-        # # Extract results available in this file (some results are split in several files)
-        # results = {}
-        # for task in Tasks:
-        #     task = task.value
-        #     # We average all scores of a given metric (not all metrics are present in all files)
-        #     accs = np.array([v for k, v in data[task.benchmark].items() if task.metric == k])
-        #     if accs.size == 0 or any([acc is None for acc in accs]):
-        #         continue
-        #     mean_acc = np.mean(accs) * 100.0
-        #     results[task.benchmark] = mean_acc
         if "scores" not in data:
             raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
         scores = data["scores"]
         results = {}
         for task in Tasks:
-            task = task.value
-            if task.metric in scores:
-                results[task.benchmark] = Decimal(scores[task.metric])
         return self(
             eval_name=result_key,

             architectures = getattr(model_config, "architectures", None)
             if architectures:
                 architecture = ";".join(architectures)
         if "scores" not in data:
             raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
         scores = data["scores"]
         results = {}
         for task in Tasks:
+            task_value = task.value
+            if task_value.metric in scores:
+                results[task_value.benchmark] = Decimal(scores[task_value.metric])
         return self(
             eval_name=result_key,