Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Dec 5, 2024

Commit

d1b2b5d

1 Parent(s): 4cafb0a

test: update test cases for pronunciationTrainer module

Browse files

Files changed (4) hide show

aip_trainer/utils/split_cosmic_ray_report.py +1 -1
cosmic_ray_config.toml +2 -2
tests/lambdas/test_lambdaSpeechToScore.py +1 -3
tests/test_pronunciationtrainer.py +77 -16

aip_trainer/utils/split_cosmic_ray_report.py CHANGED Viewed

@@ -25,5 +25,5 @@ def get_cosmic_ray_report_filtered(input_filename, suffix="filtered", separator=
 if __name__ == "__main__":
     from aip_trainer import PROJECT_ROOT_FOLDER
-    _input_filename =  "cosmic-ray-lambdagetsample4.txt"
     get_cosmic_ray_report_filtered(PROJECT_ROOT_FOLDER / "tmp" / _input_filename)

 if __name__ == "__main__":
     from aip_trainer import PROJECT_ROOT_FOLDER
+    _input_filename =  "cosmic-ray-pronunciationtrainer1.txt"
     get_cosmic_ray_report_filtered(PROJECT_ROOT_FOLDER / "tmp" / _input_filename)

cosmic_ray_config.toml CHANGED Viewed

@@ -1,8 +1,8 @@
 [cosmic-ray]
-module-path = "aip_trainer/WordMatching.py"
 timeout = 30.0
 excluded-modules = []
-test-command = "python -m pytest tests/test_worldmatching.py"
 [cosmic-ray.distributor]
 name = "local"

 [cosmic-ray]
+module-path = "aip_trainer/pronunciationTrainer.py"
 timeout = 30.0
 excluded-modules = []
+test-command = "python -m pytest tests/test_pronunciationtrainer.py"
 [cosmic-ray.distributor]
 name = "local"

tests/lambdas/test_lambdaSpeechToScore.py CHANGED Viewed

@@ -20,9 +20,7 @@ def set_seed(seed=0):
     torch.manual_seed(seed)
-def assert_raises_get_speech_to_score_dict(
-    cls, real_text, file_bytes_or_audiotmpfile, language, exc, error_message
-):
     from aip_trainer.lambdas import lambdaSpeechToScore
     with cls.assertRaises(exc):

     torch.manual_seed(seed)
+def assert_raises_get_speech_to_score_dict(cls, real_text, file_bytes_or_audiotmpfile, language, exc, error_message):
     from aip_trainer.lambdas import lambdaSpeechToScore
     with cls.assertRaises(exc):

tests/test_pronunciationtrainer.py CHANGED Viewed

@@ -40,64 +40,90 @@ class TestScore(unittest.TestCase):
     def test_exact_transcription_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_real)
-        pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 100)
     def test_transcription_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
         phrase_transcribed = phrases["de"]["transcribed"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
-        pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 100)
     def test_partial_transcription_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
         phrase_partial = phrases["de"]["partial"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_partial)
-        pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 71)
     def test_incorrect_transcription_with_correct_words_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
         phrase_transcribed_incorrect = phrases["de"]["incorrect"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed_incorrect)
-        pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 71)
     def test_exact_transcription_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_real)
-        pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 100)
     def test_transcription_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
         phrase_transcribed = phrases["en"]["transcribed"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
-        pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 94)
     def test_partial_transcription_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
         phrase_partial = phrases["en"]["partial"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_partial)
-        pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 56)
     def test_incorrect_transcription_with_correct_words_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
         phrase_transcribed_incorrect = phrases["en"]["incorrect"]
-        real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_transcribed_incorrect)
-        pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 69)
     def test_processAudioForGivenText_getTranscriptAndWordsLocations_de(self):
         set_seed()
@@ -202,6 +228,41 @@ class TestScore(unittest.TestCase):
             all_categories.append(category)
         self.assertEqual(all_categories, expected_categories)
 if __name__ == '__main__':
     unittest.main()

     def test_exact_transcription_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_real)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haloː,', 'haloː,'), ('viː', 'viː'), ('ɡeːt', 'ɡeːt'), ('ɛːs', 'ɛːs'), ('diːr?', 'diːr?')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, 3, 4])
+        pronunciation_accuracy, current_words_pronunciation_accuracy = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 100)
+        self.assertEqual(current_words_pronunciation_accuracy, [100, 100, 100, 100, 100])
     def test_transcription_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
         phrase_transcribed = phrases["de"]["transcribed"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haloː,', 'haloː'), ('viː', 'viː'), ('ɡeːt', 'ɡeːt'), ('ɛːs', 'ɛːs'), ('diːr?', 'diːɐ̯')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, 3, 4])
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 100)
+        self.assertEqual(current_words_pronunciation_accuracy, [100, 100, 100, 100, 100])
     def test_partial_transcription_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
         phrase_partial = phrases["de"]["partial"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_partial)
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haloː,', 'haloː'), ('viː', 'viː'), ('ɡeːt', 'ɡeːt'), ('ɛːs', '-'), ('diːr?', '-')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, -1, -1])
         self.assertEqual(int(pronunciation_accuracy), 71)
+        self.assertEqual(current_words_pronunciation_accuracy, [100, 100, 100, 0, 0])
     def test_incorrect_transcription_with_correct_words_de(self):
         set_seed()
         phrase_real = phrases["de"]["real"]
         phrase_transcribed_incorrect = phrases["de"]["incorrect"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed_incorrect)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haloː,', 'haɪ̯l'), ('viː', 'viː'), ('ɡeːt', 'ɡiːt'), ('ɛːs', 'ɛːs'), ('diːr?', 'diːɐ̯')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, 3, 4])
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 71)
+        for accuracy, expected_accuracy in zip(current_words_pronunciation_accuracy, [60.0, 66.666666, 50.0, 100.0, 100.0]):
+            self.assertAlmostEqual(accuracy, expected_accuracy, places=2)
     def test_exact_transcription_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_real)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haɪ', 'haɪ'), ('ðɛr,', 'ðɛr,'), ('haʊ', 'haʊ'), ('ər', 'ər'), ('ju?', 'ju?')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, 3, 4])
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 100)
+        self.assertEqual(current_words_pronunciation_accuracy, [100, 100, 100, 100, 100])
     def test_transcription_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
         phrase_transcribed = phrases["en"]["transcribed"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haɪ', 'aɪ'), ('ðɛr,', 'ðɛr'), ('haʊ', 'haʊ'), ('ər', 'ər'), ('ju?', 'ju')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, 3, 4])
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 94)
+        self.assertEqual(current_words_pronunciation_accuracy, [50.0, 100.0, 100.0, 100.0, 100.0])
     def test_partial_transcription_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
         phrase_partial = phrases["en"]["partial"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_partial)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haɪ', 'aɪ'), ('ðɛr,', 'ðɛr'), ('haʊ', 'haʊ'), ('ər', ''), ('ju?', '')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, -1, -1])
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 56)
+        self.assertEqual(current_words_pronunciation_accuracy, [50.0, 100.0, 100.0, 0.0, 0.0])
     def test_incorrect_transcription_with_correct_words_en(self):
         set_seed()
         phrase_real = phrases["en"]["real"]
         phrase_transcribed_incorrect = phrases["en"]["incorrect"]
+        real_and_transcribed_words, real_and_transcribed_words_ipa, mapped_words_indices = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_transcribed_incorrect)
+        self.assertEqual(real_and_transcribed_words_ipa, [('haɪ', 'aɪ'), ('ðɛr,', 'hir'), ('haʊ', 'haʊ'), ('ər', 'ri'), ('ju?', 'juθ')])
+        self.assertEqual(mapped_words_indices, [0, 1, 2, 3, 4])
+        pronunciation_accuracy, current_words_pronunciation_accuracy= trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
         self.assertEqual(int(pronunciation_accuracy), 69)
+        for accuracy, expected_accuracy in zip(current_words_pronunciation_accuracy, [50.0, 80.0, 100.0, 66.666666, 33.333333]):
+            self.assertAlmostEqual(accuracy, expected_accuracy, places=2)
     def test_processAudioForGivenText_getTranscriptAndWordsLocations_de(self):
         set_seed()
             all_categories.append(category)
         self.assertEqual(all_categories, expected_categories)
+    def test_matchSampleAndRecordedWords(self):
+        set_seed()
+        phrase_real = phrases["de"]["real"]
+        phrase_transcribed = phrases["de"]["transcribed"]
+        real_and_transcribed_words, real_words, transcribed_words = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
+        self.assertIsInstance(real_and_transcribed_words, list)
+        self.assertIsInstance(real_words, list)
+        self.assertIsInstance(transcribed_words, list)
+        self.assertEqual(len(real_and_transcribed_words), len(real_words))
+        self.assertEqual(len(real_and_transcribed_words), len(transcribed_words))
+    def test_removePunctuation_en(self):
+        word = "hello,"
+        cleaned_word = trainer_SST_lambda_en.removePunctuation(word)
+        self.assertEqual(cleaned_word, "hello")
+        word = "hello,\n\rworld..."
+        cleaned_word = trainer_SST_lambda_en.removePunctuation(word)
+        self.assertEqual(cleaned_word, "hello\n\rworld")
+    def test_getWordsPronunciationCategory_en(self):
+        accuracies = [x for x in range(-121, 121, 10)] + [np.inf, -np.inf, np.nan, 1.5, -1.5]
+        expected_categories = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2]
+        categories = trainer_SST_lambda_en.getWordsPronunciationCategory(accuracies)
+        self.assertEqual(categories, expected_categories)
+    def test_preprocessAudio_en(self):
+        output_hash = utilities.hash_calculate(signal_en, is_file=False)
+        assert output_hash == b'zBAV/y7mecyPHLGiitHRP9vK7oU9hnYvyuatU0PQfts='
+        signal_transformed = transform(torch.Tensor(signal_en)).unsqueeze(0)
+        processed_audio = trainer_SST_lambda_en.preprocessAudio(signal_transformed)
+        self.assertIsInstance(processed_audio, torch.Tensor)
+        self.assertEqual(processed_audio.shape, (1, 16800))
+        output_hash = utilities.hash_calculate(processed_audio.numpy(), is_file=False)
+        assert output_hash == b'KsyH1MXIc+5e5B6CcijhitsGPUDRJjrJU2qg8bQi600='
 if __name__ == '__main__':
     unittest.main()