File size: 1,116 Bytes
d25ae12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import json

json_path1 = "llama_adapter_7b.json"
json_path2 = "alpaca_lora_7b.json"
out_path = "llama_adapter_vs_alpaca_lora.json"

data1 = open(json_path1).readlines()
data2 = open(json_path2).readlines()
question = open('question.jsonl').readlines()

assert len(data1) == len(data2) == len(question)

out_data = []
for i, (d1, d2, q) in enumerate(zip(data1, data2, question)):
    d1 = json.loads(d1)
    d2 = json.loads(d2)
    q = json.loads(q)
    out_d = {
        'question_id': i,
        'instruction': q['text'],
        'input': '',
        "response1": d1['text'],
        "response2": d2['text'],
    }
    out_data.append(out_d)

# remove bias
for i, (d1, d2, q) in enumerate(zip(data2, data1, question)):
    d1 = json.loads(d1)
    d2 = json.loads(d2)
    q = json.loads(q)
    out_d = {
        'question_id': i+80,
        'instruction': q['text'],
        'input': '',
        "response1": d1['text'],
        "response2": d2['text'],
    }
    out_data.append(out_d)

with open(out_path, 'w') as f:
    # f.write("\n".join([json.dumps(x) for x in out_data]))
    json.dump(out_data, f)