Spaces:
Sleeping
Sleeping
import unittest | |
from training.preprocess.tokenizer_ipa import TokenizerIPA | |
class TestTokenizerIPA(unittest.TestCase): | |
def setUp(self): | |
self.tokenizer = TokenizerIPA() | |
def test_init(self): | |
self.assertEqual(self.tokenizer.lang, "en_us") | |
self.assertIsNotNone(self.tokenizer.phonemizer) | |
self.assertIsNotNone(self.tokenizer.tokenizer) | |
def test_call(self): | |
text = "hello world" | |
phones_ipa, tokens = self.tokenizer(text) | |
self.assertIsInstance(phones_ipa, str) | |
self.assertIsInstance(tokens, list) | |
self.assertTrue(all(isinstance(token, int) for token in tokens)) | |
def test_call_with_punctuation(self): | |
text = "hello world" | |
phones_ipa, tokens = self.tokenizer(text) | |
text2 = "Hello, world!" | |
phones_ipa2, tokens2 = self.tokenizer(text2) | |
self.assertNotEqual(phones_ipa, phones_ipa2) | |
self.assertNotEqual(tokens, tokens2) | |
if __name__ == "__main__": | |
unittest.main() | |