code_eval

Runtime error

App Files Files Community

jjkim commited on Oct 10, 2023

Commit

dc264fe

1 Parent(s): f435ec5

.

Browse files

Files changed (1) hide show

code_eval.py +8 -6

code_eval.py CHANGED Viewed

@@ -20,7 +20,7 @@ import itertools
 import os
 from collections import Counter, defaultdict
 from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
-from typing import List, Optional
 import time
 from string import Template
@@ -145,8 +145,8 @@ class CodeEval(evaluate.Metric):
             # This defines the format of each prediction and reference
             features=datasets.Features(
                 {
-                    "predictions": list,
-                    "references": list,
                 }
             ),
             homepage="https://github.com/openai/human-eval",
@@ -178,7 +178,7 @@ class CodeEval(evaluate.Metric):
             raise NotImplementedError(
                 "This metric is currently not supported on Windows."
             )
         predictions = sorted(predictions, key=lambda x: x["id"])
         references = sorted(references, key=lambda x: x["id"])
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -186,7 +186,7 @@ class CodeEval(evaluate.Metric):
             for pred_d, ref_d in zip(predictions, references):
                 assert pred_d["id"] == ref_d["id"]
                 tid = pred_d["id"]
                 results[tid] = []
                 pred = pred_d[pred_key]
                 ref = ref_d[ref_key]
@@ -204,7 +204,9 @@ class CodeEval(evaluate.Metric):
                         result.add(future)
                     results[tid].append(result)
-            pbar = tqdm(total=sum(len(r) for r in results.values()), disable=disable_tqdm)
             prev_done_count = 0
             done = False
             while not done:

 import os
 from collections import Counter, defaultdict
 from concurrent.futures import CancelledError, ThreadPoolExecutor, as_completed
+from typing import Dict, List, Optional
 import time
 from string import Template
             # This defines the format of each prediction and reference
             features=datasets.Features(
                 {
+                    "predictions": List[Dict],
+                    "references": List[Dict],
                 }
             ),
             homepage="https://github.com/openai/human-eval",
             raise NotImplementedError(
                 "This metric is currently not supported on Windows."
             )
         predictions = sorted(predictions, key=lambda x: x["id"])
         references = sorted(references, key=lambda x: x["id"])
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             for pred_d, ref_d in zip(predictions, references):
                 assert pred_d["id"] == ref_d["id"]
                 tid = pred_d["id"]
                 results[tid] = []
                 pred = pred_d[pred_key]
                 ref = ref_d[ref_key]
                         result.add(future)
                     results[tid].append(result)
+            pbar = tqdm(
+                total=sum(len(r) for r in results.values()), disable=disable_tqdm
+            )
             prev_done_count = 0
             done = False
             while not done: