import glob | |
from data_gen.tts.base_preprocess import BasePreprocessor | |
class WenetSpeechPreprocess(BasePreprocessor): | |
def meta_data(self): | |
wavfn2text = {} | |
def get_wavfn2text(): | |
d = open(f'{self.raw_data_dir}/extracted_wav/wenetspeech.txt').readlines() | |
d = [l.strip().split("\t") for l in d if l.strip() != '' and 'podcast' in l] | |
d = {l[0]: l[1] for l in d} | |
wavfn2text.update(d) | |
get_wavfn2text() | |
all_wavs = sorted(wavfn2text.keys()) | |
for wav_fn in all_wavs: | |
wav_basename = wav_fn.split("/")[-2]+"_"+wav_fn.split("/")[-1] | |
spk_name = 'asr_data' | |
item_name = f'{spk_name}_{wav_basename}' | |
yield { | |
'item_name': item_name, | |
'wav_fn': wav_fn.replace("/home/jzy/dict_idea/NeuralSeq/", ""), | |
'txt': wavfn2text[wav_fn], | |
'spk_name': spk_name | |
} | |
if __name__ == "__main__": | |
WenetSpeechPreprocess.process() | |