|
from .base import SequentialDataPipe |
|
from .common_pipes import EncodeText, GenerateTokenizer, LoadAudio, SetOutputKeys |
|
|
|
|
|
class Speech2TextPipe(SequentialDataPipe): |
|
""" |
|
each item in the input dataset should have: |
|
wav_path: str |
|
transcription: str |
|
""" |
|
|
|
def __init__( |
|
self, |
|
generate_tokenizer: bool = False, |
|
vocab_type: str = "character", |
|
text_file: str = None, |
|
vocab_file: str = None, |
|
slots_file: str = None, |
|
vocab_args: dict = None, |
|
): |
|
output_keys = dict( |
|
x="wav", |
|
x_len="wav_len", |
|
labels="transcription", |
|
class_ids="tokenized_text", |
|
unique_name="id", |
|
) |
|
|
|
super().__init__( |
|
LoadAudio(), |
|
GenerateTokenizer( |
|
generate=generate_tokenizer, |
|
vocab_type=vocab_type, |
|
text_file=text_file, |
|
vocab_file=vocab_file, |
|
slots_file=slots_file, |
|
vocab_args=vocab_args, |
|
), |
|
EncodeText(), |
|
SetOutputKeys(output_keys=output_keys), |
|
) |
|
|