wavlm-large / s3prl_s3prl_main /test /test_tokenizer.py
lmzjms's picture
Upload 1162 files
0b32ad6 verified
from s3prl.dataio.encoder.tokenizer import CharacterTokenizer, default_phoneme_tokenizer
def test_tokenizer():
char_tokenizer = CharacterTokenizer()
phone_tokenizer = default_phoneme_tokenizer()
char_text = "HELLO WORLD"
char_text_enc = char_tokenizer.encode(char_text)
char_text_dec = char_tokenizer.decode(char_text_enc)
assert isinstance(char_text_enc, list)
assert char_text == char_text_dec