import os CTC_SCALE = 2 lrs2_dirpath = '/media/milselarch/47FC4BC577667AAD/LRS2' valid_lrs2_filepath = f'../data/LRS2-CTC{CTC_SCALE}-valid-pairs.txt' filenames = ['train.txt', 'test.txt', 'val.txt'] valid_lrs2_pairs = set([ line.strip() for line in open(valid_lrs2_filepath).readlines() ]) for filename in filenames: filepath = os.path.join(lrs2_dirpath, filename) lines = open(filepath, 'r').readlines() valid_lines = [] for line in lines: line = line + ' ' line = line[:line.index(' ')].strip() if line in valid_lrs2_pairs: valid_lines.append(line) valid_lines = sorted(valid_lines) export_filename = f'../data/LRS2_CTC{CTC_SCALE}_{filename}' open(export_filename, 'w').write('\n'.join(valid_lines)) print(f'<<< {filename} >>>') print(f'VALID: {len(valid_lines)}') print(f'TOTAL: {len(lines)}')