wavlm-large / s3prl_s3prl_main /s3prl /preprocess /preprocess_alignment.py

Upload 1162 files

0b32ad6 verified 9 months ago

5.53 kB

	# -- coding: utf-8 -- #
	"""*********************************************************************************************"""
	# FileName [ preprocess_alignment.py ]
	# Synopsis [ preprocess phone alignment for the LibriSpeech dataset ]
	# Author [ Andy T. Liu (Andi611) ]
	# Copyright [ Copyleft(c), Speech Lab, NTU, Taiwan ]
	# Reference [ https://github.com/BogiHsu/Phone-Recognizer/blob/815cf9375045c053fa57d17fad0fa14fdc3c7bee/loader.py#L28 ]
	"""*********************************************************************************************"""


	###############
	# IMPORTATION #
	###############
	import os
	import pickle
	import argparse
	import numpy as np
	import pandas as pd
	from tqdm import tqdm
	from pathlib import Path
	from utility.audio import sample_rate, _stft_parameters


	#############################
	# PREPROCESS CONFIGURATIONS #
	#############################
	def get_preprocess_args():

	parser = argparse.ArgumentParser(description='preprocess arguments for LibriSpeech dataset.')
	parser.add_argument('--data_path', default='./data/libri_alignment', type=str, help='Path to raw LibriSpeech alignment')
	parser.add_argument('--output_path', default='./data/libri_phone', type=str, help='Path to store output', required=False)
	args = parser.parse_args()
	return args


	####################
	# PHONE PREPROCESS #
	####################
	def phone_preprocess(data_path, output_path, sets, unaligned):

	print('Data sets :')
	for idx, s in enumerate(sets):
	print('\t', idx, ':', s)
	todo_sets = input('Please enter the index for preprocessing sets (seperate w/ space): ')
	sets = [sets[int(s)] for s in todo_sets.split(' ')]

	# compute phone2idx
	idx = 0
	phone2idx = {}
	for s in sets:
	print('')
	print('Computing', s, 'data...')
	for path in tqdm(list(Path(os.path.join(data_path, s)).rglob("*.txt"))):
	check_name = path.as_posix().split('/')[-1].split('.')[0]
	if check_name not in unaligned and check_name != 'unaligned': # ignore the unaligned files and `unaligned.txt` itself
	for line in open(path).readlines():
	phone = line.strip('\n').split(' ')[-1]
	if phone not in phone2idx:
	phone2idx[phone] = idx
	idx += 1
	print('Phone set:')
	print(phone2idx)
	print(len(phone2idx), 'distinct phones found in', sets)
	with open(os.path.join(output_path, 'phone2idx.pkl'), "wb") as fp:
	pickle.dump(phone2idx, fp)

	for s in sets:
	print('')
	print('Preprocessing', s, 'data...')
	todo = list(Path(os.path.join(data_path, s)).rglob("*.txt"))
	print(len(todo),'audio files found in', s)
	if not os.path.exists(os.path.join(output_path, s)):
	os.makedirs(os.path.join(output_path, s))

	print('Preprocessing phone alignments...', flush=True)
	for path in tqdm(todo):
	check_name = path.as_posix().split('/')[-1].split('.')[0]
	if check_name not in unaligned and check_name != 'unaligned': # ignore the unaligned files and `unaligned.txt` itself
	x = []
	file = open(path).readlines()
	for line in file:
	line = line.strip('\n').split(' ')
	x += time_to_frame(start_time=float(line[0]), end_time=float(line[1]), phone=phone2idx[line[2]])
	x = np.asarray(x)
	path_to_save = str(path).replace(data_path.split('/')[-1], output_path.split('/')[-1]).replace('txt', 'pkl')
	with open(path_to_save, "wb") as fp:
	pickle.dump(x, fp)

	print('Phone preprocessing complete!')


	#################
	# TIME TO FRAME #
	#################
	def time_to_frame(start_time, end_time, phone):
	phones = []

	start_time = int(start_time * sample_rate)
	end_time = int(end_time * sample_rate)

	_, hop_length, win_length = _stft_parameters(sample_rate=sample_rate)
	h_window = win_length * 0.5 # select the middle of a window

	start_time = (start_time - h_window) if start_time >= h_window else 0
	end_time = (end_time - h_window) if end_time >= h_window else 0
	times = (end_time // hop_length) - (start_time // hop_length) \
	+ (1 if start_time % hop_length == 0 else 0) - (1 if end_time % hop_length == 0 else 0)
	phones += [phone] * int(times)
	return phones


	########
	# MAIN #
	########
	def main():

	# get arguments
	args = get_preprocess_args()

	# mkdir
	if not os.path.exists(args.output_path):
	os.makedirs(args.output_path)

	# dump unaligned text
	try:
	file = open(os.path.join(args.data_path, 'train-clean-360/unaligned.txt')).readlines()
	unaligned = [str(line).split('\t')[0].split(' ')[0] for line in file]
	print('Unaligned list: ', unaligned)
	unaligned_pkl = ['train-clean-360/' + u + '.npy' for u in unaligned]
	with open(os.path.join(args.output_path, 'unaligned.pkl'), "wb") as fp:
	pickle.dump(unaligned_pkl, fp)
	except:
	raise ValueError('Did not find unaligned.txt!')

	# Process data
	sets = ['train-clean-360', 'test-clean'] # only two sets available for now
	# sets = ['train-clean-100','train-clean-360','train-other-500','dev-clean','dev-other','test-clean','test-other']
	phone_preprocess(args.data_path, args.output_path, sets, unaligned)


	if __name__ == '__main__':
	main()