lmzjms's picture
Upload 1162 files
0b32ad6 verified
from .base import SequentialDataPipe
from .common_pipes import EncodeText, GenerateTokenizer, LoadAudio, SetOutputKeys
class Speech2TextPipe(SequentialDataPipe):
"""
each item in the input dataset should have:
wav_path: str
transcription: str
"""
def __init__(
self,
generate_tokenizer: bool = False,
vocab_type: str = "character",
text_file: str = None,
vocab_file: str = None,
slots_file: str = None,
vocab_args: dict = None,
):
output_keys = dict(
x="wav",
x_len="wav_len",
labels="transcription",
class_ids="tokenized_text",
unique_name="id",
)
super().__init__(
LoadAudio(),
GenerateTokenizer(
generate=generate_tokenizer,
vocab_type=vocab_type,
text_file=text_file,
vocab_file=vocab_file,
slots_file=slots_file,
vocab_args=vocab_args,
),
EncodeText(),
SetOutputKeys(output_keys=output_keys),
)