# -*- coding: utf-8 -*- # """*********************************************************************************************""" # FileName [ preprocess_alignment.py ] # Synopsis [ preprocess phone alignment for the LibriSpeech dataset ] # Author [ Andy T. Liu (Andi611) ] # Copyright [ Copyleft(c), Speech Lab, NTU, Taiwan ] # Reference [ https://github.com/BogiHsu/Phone-Recognizer/blob/815cf9375045c053fa57d17fad0fa14fdc3c7bee/loader.py#L28 ] """*********************************************************************************************""" ############### # IMPORTATION # ############### import os import pickle import argparse import numpy as np import pandas as pd from tqdm import tqdm from pathlib import Path from utility.audio import sample_rate, _stft_parameters ############################# # PREPROCESS CONFIGURATIONS # ############################# def get_preprocess_args(): parser = argparse.ArgumentParser(description='preprocess arguments for LibriSpeech dataset.') parser.add_argument('--data_path', default='./data/libri_alignment', type=str, help='Path to raw LibriSpeech alignment') parser.add_argument('--output_path', default='./data/libri_phone', type=str, help='Path to store output', required=False) args = parser.parse_args() return args #################### # PHONE PREPROCESS # #################### def phone_preprocess(data_path, output_path, sets, unaligned): print('Data sets :') for idx, s in enumerate(sets): print('\t', idx, ':', s) todo_sets = input('Please enter the index for preprocessing sets (seperate w/ space): ') sets = [sets[int(s)] for s in todo_sets.split(' ')] # compute phone2idx idx = 0 phone2idx = {} for s in sets: print('') print('Computing', s, 'data...') for path in tqdm(list(Path(os.path.join(data_path, s)).rglob("*.txt"))): check_name = path.as_posix().split('/')[-1].split('.')[0] if check_name not in unaligned and check_name != 'unaligned': # ignore the unaligned files and `unaligned.txt` itself for line in open(path).readlines(): phone = line.strip('\n').split(' ')[-1] if phone not in phone2idx: phone2idx[phone] = idx idx += 1 print('Phone set:') print(phone2idx) print(len(phone2idx), 'distinct phones found in', sets) with open(os.path.join(output_path, 'phone2idx.pkl'), "wb") as fp: pickle.dump(phone2idx, fp) for s in sets: print('') print('Preprocessing', s, 'data...') todo = list(Path(os.path.join(data_path, s)).rglob("*.txt")) print(len(todo),'audio files found in', s) if not os.path.exists(os.path.join(output_path, s)): os.makedirs(os.path.join(output_path, s)) print('Preprocessing phone alignments...', flush=True) for path in tqdm(todo): check_name = path.as_posix().split('/')[-1].split('.')[0] if check_name not in unaligned and check_name != 'unaligned': # ignore the unaligned files and `unaligned.txt` itself x = [] file = open(path).readlines() for line in file: line = line.strip('\n').split(' ') x += time_to_frame(start_time=float(line[0]), end_time=float(line[1]), phone=phone2idx[line[2]]) x = np.asarray(x) path_to_save = str(path).replace(data_path.split('/')[-1], output_path.split('/')[-1]).replace('txt', 'pkl') with open(path_to_save, "wb") as fp: pickle.dump(x, fp) print('Phone preprocessing complete!') ################# # TIME TO FRAME # ################# def time_to_frame(start_time, end_time, phone): phones = [] start_time = int(start_time * sample_rate) end_time = int(end_time * sample_rate) _, hop_length, win_length = _stft_parameters(sample_rate=sample_rate) h_window = win_length * 0.5 # select the middle of a window start_time = (start_time - h_window) if start_time >= h_window else 0 end_time = (end_time - h_window) if end_time >= h_window else 0 times = (end_time // hop_length) - (start_time // hop_length) \ + (1 if start_time % hop_length == 0 else 0) - (1 if end_time % hop_length == 0 else 0) phones += [phone] * int(times) return phones ######## # MAIN # ######## def main(): # get arguments args = get_preprocess_args() # mkdir if not os.path.exists(args.output_path): os.makedirs(args.output_path) # dump unaligned text try: file = open(os.path.join(args.data_path, 'train-clean-360/unaligned.txt')).readlines() unaligned = [str(line).split('\t')[0].split(' ')[0] for line in file] print('Unaligned list: ', unaligned) unaligned_pkl = ['train-clean-360/' + u + '.npy' for u in unaligned] with open(os.path.join(args.output_path, 'unaligned.pkl'), "wb") as fp: pickle.dump(unaligned_pkl, fp) except: raise ValueError('Did not find unaligned.txt!') # Process data sets = ['train-clean-360', 'test-clean'] # only two sets available for now # sets = ['train-clean-100','train-clean-360','train-other-500','dev-clean','dev-other','test-clean','test-other'] phone_preprocess(args.data_path, args.output_path, sets, unaligned) if __name__ == '__main__': main()