import json from tqdm import tqdm lang = "" with open(f"dset{lang}.txt", "r") as f: lines = [x.rstrip("\n").lower().split("→") for x in tqdm(f.readlines())] lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines] responses = [] for i in tqdm(lines): if i[1] not in responses: responses.append(i[1]) dset = {} for sample in tqdm(lines): dset[sample[0]] = responses.index(sample[1]) with open(f"dataset{lang}.json", "w") as f: json.dump(dset, f, ensure_ascii=False) with open(f"responses{lang}.txt", "w") as f: for i in tqdm(responses): f.write(i+"\n")