|
|
|
"""*********************************************************************************************""" |
|
|
|
|
|
|
|
|
|
|
|
"""*********************************************************************************************""" |
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import pickle |
|
import argparse |
|
import numpy as np |
|
import pandas as pd |
|
from tqdm import tqdm |
|
from pathlib import Path |
|
from utility.audio import sample_rate, _stft_parameters |
|
|
|
|
|
|
|
|
|
|
|
def get_preprocess_args(): |
|
|
|
parser = argparse.ArgumentParser(description='preprocess arguments for LibriSpeech dataset.') |
|
parser.add_argument('--data_path', default='./data/libri_alignment', type=str, help='Path to raw LibriSpeech alignment') |
|
parser.add_argument('--output_path', default='./data/libri_phone', type=str, help='Path to store output', required=False) |
|
args = parser.parse_args() |
|
return args |
|
|
|
|
|
|
|
|
|
|
|
def phone_preprocess(data_path, output_path, sets, unaligned): |
|
|
|
print('Data sets :') |
|
for idx, s in enumerate(sets): |
|
print('\t', idx, ':', s) |
|
todo_sets = input('Please enter the index for preprocessing sets (seperate w/ space): ') |
|
sets = [sets[int(s)] for s in todo_sets.split(' ')] |
|
|
|
|
|
idx = 0 |
|
phone2idx = {} |
|
for s in sets: |
|
print('') |
|
print('Computing', s, 'data...') |
|
for path in tqdm(list(Path(os.path.join(data_path, s)).rglob("*.txt"))): |
|
check_name = path.as_posix().split('/')[-1].split('.')[0] |
|
if check_name not in unaligned and check_name != 'unaligned': |
|
for line in open(path).readlines(): |
|
phone = line.strip('\n').split(' ')[-1] |
|
if phone not in phone2idx: |
|
phone2idx[phone] = idx |
|
idx += 1 |
|
print('Phone set:') |
|
print(phone2idx) |
|
print(len(phone2idx), 'distinct phones found in', sets) |
|
with open(os.path.join(output_path, 'phone2idx.pkl'), "wb") as fp: |
|
pickle.dump(phone2idx, fp) |
|
|
|
for s in sets: |
|
print('') |
|
print('Preprocessing', s, 'data...') |
|
todo = list(Path(os.path.join(data_path, s)).rglob("*.txt")) |
|
print(len(todo),'audio files found in', s) |
|
if not os.path.exists(os.path.join(output_path, s)): |
|
os.makedirs(os.path.join(output_path, s)) |
|
|
|
print('Preprocessing phone alignments...', flush=True) |
|
for path in tqdm(todo): |
|
check_name = path.as_posix().split('/')[-1].split('.')[0] |
|
if check_name not in unaligned and check_name != 'unaligned': |
|
x = [] |
|
file = open(path).readlines() |
|
for line in file: |
|
line = line.strip('\n').split(' ') |
|
x += time_to_frame(start_time=float(line[0]), end_time=float(line[1]), phone=phone2idx[line[2]]) |
|
x = np.asarray(x) |
|
path_to_save = str(path).replace(data_path.split('/')[-1], output_path.split('/')[-1]).replace('txt', 'pkl') |
|
with open(path_to_save, "wb") as fp: |
|
pickle.dump(x, fp) |
|
|
|
print('Phone preprocessing complete!') |
|
|
|
|
|
|
|
|
|
|
|
def time_to_frame(start_time, end_time, phone): |
|
phones = [] |
|
|
|
start_time = int(start_time * sample_rate) |
|
end_time = int(end_time * sample_rate) |
|
|
|
_, hop_length, win_length = _stft_parameters(sample_rate=sample_rate) |
|
h_window = win_length * 0.5 |
|
|
|
start_time = (start_time - h_window) if start_time >= h_window else 0 |
|
end_time = (end_time - h_window) if end_time >= h_window else 0 |
|
times = (end_time // hop_length) - (start_time // hop_length) \ |
|
+ (1 if start_time % hop_length == 0 else 0) - (1 if end_time % hop_length == 0 else 0) |
|
phones += [phone] * int(times) |
|
return phones |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
args = get_preprocess_args() |
|
|
|
|
|
if not os.path.exists(args.output_path): |
|
os.makedirs(args.output_path) |
|
|
|
|
|
try: |
|
file = open(os.path.join(args.data_path, 'train-clean-360/unaligned.txt')).readlines() |
|
unaligned = [str(line).split('\t')[0].split(' ')[0] for line in file] |
|
print('Unaligned list: ', unaligned) |
|
unaligned_pkl = ['train-clean-360/' + u + '.npy' for u in unaligned] |
|
with open(os.path.join(args.output_path, 'unaligned.pkl'), "wb") as fp: |
|
pickle.dump(unaligned_pkl, fp) |
|
except: |
|
raise ValueError('Did not find unaligned.txt!') |
|
|
|
|
|
sets = ['train-clean-360', 'test-clean'] |
|
|
|
phone_preprocess(args.data_path, args.output_path, sets, unaligned) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |