import glob from data_gen.tts.base_preprocess import BasePreprocessor class WenetSpeechPreprocess(BasePreprocessor): def meta_data(self): wavfn2text = {} def get_wavfn2text(): d = open(f'{self.raw_data_dir}/extracted_wav/wenetspeech.txt').readlines() d = [l.strip().split("\t") for l in d if l.strip() != '' and 'podcast' in l] d = {l[0]: l[1] for l in d} wavfn2text.update(d) get_wavfn2text() all_wavs = sorted(wavfn2text.keys()) for wav_fn in all_wavs: wav_basename = wav_fn.split("/")[-2]+"_"+wav_fn.split("/")[-1] spk_name = 'asr_data' item_name = f'{spk_name}_{wav_basename}' yield { 'item_name': item_name, 'wav_fn': wav_fn.replace("/home/jzy/dict_idea/NeuralSeq/", ""), 'txt': wavfn2text[wav_fn], 'spk_name': spk_name } if __name__ == "__main__": WenetSpeechPreprocess.process()