Spaces:
Sleeping
Sleeping
File size: 1,345 Bytes
0f87dc1 da384b4 0f87dc1 da384b4 7cb9920 da384b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import os
import json
import gzip
from typing import Dict, Iterable
def stream_jsonl(filename: str) -> Iterable[Dict]:
"""
Parses each jsonl line and yields it as a dictionary
"""
if filename.endswith(".gz"):
with open(filename, "rb") as gzfp:
with gzip.open(gzfp, "rt") as fp:
for line in fp:
if any(not x.isspace() for x in line):
yield json.loads(line)
else:
with open(filename, "r") as fp:
for line in fp:
if any(not x.isspace() for x in line):
yield json.loads(line)
def load_solutions(samples) -> Iterable[Dict]:
"""
"""
for i, sample in enumerate(samples):
assert "task_id" in sample, "No task_id found in sample!"
assert "res_id" in sample, "No res_id found in sample!"
assert "test" in sample, "No test found in sample!"
assert "solution" in sample, "No solution found in sample!"
assert "entry_point" in sample, "No entry_point found in sample!"
assert isinstance(
sample["solution"], str
), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
sample["_identifier"] = (
sample["task_id"] + f" (line {i+1} )"
)
yield sample
|