File size: 611 Bytes
b599481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import random
import shutil

random.seed(42)

with open('train_data_link.jsonl', encoding='utf-8') as f:
    data = f.readlines()
all_data_len = len(data)
print(all_data_len)

random.shuffle(data)
valid_data = data[:int(all_data_len * 0.1)]
train_data = data[int(all_data_len * 0.1):]
print(len(train_data), len(valid_data))


def save_data(file_name, data):
    with open(file_name, 'w', encoding='utf-8') as f:
        f.writelines(data)


save_data('train_data.jsonl', train_data)
save_data('valid_data.jsonl', valid_data)

shutil.copyfile('test_data_link.jsonl', 'test_data.jsonl')