import random import shutil random.seed(42) with open('train_data_link.jsonl', encoding='utf-8') as f: data = f.readlines() all_data_len = len(data) print(all_data_len) random.shuffle(data) valid_data = data[:int(all_data_len * 0.1)] train_data = data[int(all_data_len * 0.1):] print(len(train_data), len(valid_data)) def save_data(file_name, data): with open(file_name, 'w', encoding='utf-8') as f: f.writelines(data) save_data('train_data.jsonl', train_data) save_data('valid_data.jsonl', valid_data) shutil.copyfile('test_data_link.jsonl', 'test_data.jsonl')