File size: 1,068 Bytes
51ff9e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import json

input_file = 'XXX.jsonl'
output_file = 'YYY.jsonl'

with (
    open(input_file, 'r', encoding='utf-8') as fin,
    open(output_file, 'w', encoding='utf-8') as fout,
):
    for line in fin:
        line = line.strip()
        if not line:
            continue

        data = json.loads(line)
        item = data

        # 提取原始数据
        org = item.get('org', '')
        repo = item.get('repo', '')
        number = str(item.get('number', ''))

        new_item = {}
        new_item['repo'] = f'{org}/{repo}'
        new_item['instance_id'] = f'{org}__{repo}-{number}'
        new_item['problem_statement'] = (
            item['resolved_issues'][0].get('title', '')
            + '\n'
            + item['resolved_issues'][0].get('body', '')
        )
        new_item['FAIL_TO_PASS'] = []
        new_item['PASS_TO_PASS'] = []
        new_item['base_commit'] = item['base'].get('sha', '')
        new_item['version'] = '0.1'  # depends

        output_data = new_item
        fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')