Backup-bdg's picture
Upload 964 files
51ff9e5 verified
import json
input_file = 'XXX.jsonl'
output_file = 'YYY.jsonl'
with (
open(input_file, 'r', encoding='utf-8') as fin,
open(output_file, 'w', encoding='utf-8') as fout,
):
for line in fin:
line = line.strip()
if not line:
continue
data = json.loads(line)
item = data
# 提取原始数据
org = item.get('org', '')
repo = item.get('repo', '')
number = str(item.get('number', ''))
new_item = {}
new_item['repo'] = f'{org}/{repo}'
new_item['instance_id'] = f'{org}__{repo}-{number}'
new_item['problem_statement'] = (
item['resolved_issues'][0].get('title', '')
+ '\n'
+ item['resolved_issues'][0].get('body', '')
)
new_item['FAIL_TO_PASS'] = []
new_item['PASS_TO_PASS'] = []
new_item['base_commit'] = item['base'].get('sha', '')
new_item['version'] = '0.1' # depends
output_data = new_item
fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')