from s3prl.dataio.encoder.tokenizer import CharacterTokenizer, default_phoneme_tokenizer | |
def test_tokenizer(): | |
char_tokenizer = CharacterTokenizer() | |
phone_tokenizer = default_phoneme_tokenizer() | |
char_text = "HELLO WORLD" | |
char_text_enc = char_tokenizer.encode(char_text) | |
char_text_dec = char_tokenizer.decode(char_text_enc) | |
assert isinstance(char_text_enc, list) | |
assert char_text == char_text_dec | |