Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 25, 2024

Commit

20172dc

1 Parent(s): 6957865

feat: move the js js_play_audio function to js.py, update README.md

Browse files

Files changed (3) hide show

README.md +2 -1
aip_trainer/lambdas/js.py +41 -0
app.py +1 -41

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ You can try my [refactored version](https://github.com/trincadev/ai-pronunciatio
 [![<https://aletrn-ai-pronunciation-trainer.hf.space/>](images/MainScreen.png)](https://aletrn-ai-pronunciation-trainer.hf.space/)
-My [HuggingFace Space](https://huggingface.co/spaces/aletrn/ai-pronunciation-trainer) is a free of charge: for this reason is the less powerful version and the speech recognition could take some seconds.
 ## Installation
@@ -72,6 +72,7 @@ pnpm playwright test
 ### TODO
 - play the isolated words in the recordings, to compare the 'ideal' pronunciation with the learner pronunciation (now it's possible on the old frontend, complicated to implement with Gradio - waiting for [this](https://github.com/gradio-app/gradio/issues/9823))
 - improve documentation (especially function docstrings), backend tests
 - move from pytorch to onnxruntime (if possible)

 [![<https://aletrn-ai-pronunciation-trainer.hf.space/>](images/MainScreen.png)](https://aletrn-ai-pronunciation-trainer.hf.space/)
+My [HuggingFace Space](https://huggingface.co/spaces/aletrn/ai-pronunciation-trainer) is free of charge: for this reason is the less powerful version and the speech recognition could take some seconds.
 ## Installation
 ### TODO
+- save the synthetic audio speech from the text-to-speech voice as an audio file to play within the Gradio audio component
 - play the isolated words in the recordings, to compare the 'ideal' pronunciation with the learner pronunciation (now it's possible on the old frontend, complicated to implement with Gradio - waiting for [this](https://github.com/gradio-app/gradio/issues/9823))
 - improve documentation (especially function docstrings), backend tests
 - move from pytorch to onnxruntime (if possible)

aip_trainer/lambdas/js.py CHANGED Viewed

@@ -21,3 +21,44 @@ function updateCssText(text, letters) {
     }
 }
 """

     }
 }
 """
+js_play_audio = """
+function playAudio(text, language) {
+    let voice_idx = 0;
+    let voice_synth = null;
+    let synth = window.speechSynthesis;
+    function setSpeech() {
+        return new Promise(
+            function (resolve, reject) {
+                let id;
+                id = setInterval(() => {
+                    if (synth.getVoices().length !== 0) {
+                        resolve(synth.getVoices());
+                        clearInterval(id);
+                    }
+                }, 10);
+            }
+        )
+    }
+    let s = setSpeech();
+    s.then((voices) => {
+        for (idx = 0; idx < voices.length; idx++) {
+            if (voices[idx].lang.slice(0, 2) == language) {
+                voice_synth = voices[idx];
+                break;
+            }
+        }
+        var utterThis = new SpeechSynthesisUtterance(text);
+        utterThis.voice = voice_synth;
+        utterThis.rate = 0.7;
+        synth.speak(utterThis);
+        // todo: capture audio from speech synthesis to reuse on the frontend
+        // https://stackoverflow.com/questions/45003548/how-to-capture-generated-audio-from-window-speechsynthesis-speak-call
+    });
+}
+"""

app.py CHANGED Viewed

@@ -10,46 +10,6 @@ css = """
 .speech-output-container {align-items: center; min-height: 60px; padding-left: 8px; padding-right: 8px; margin-top: -12px; border-width: 1px; border-style: solid; border-color: lightgrey;}
 """
-js_play_audio = """
-function playAudio(text, language) {
-    let voice_idx = 0;
-    let voice_synth = null;
-    let synth = window.speechSynthesis;
-    function setSpeech() {
-        return new Promise(
-            function (resolve, reject) {
-                let id;
-                id = setInterval(() => {
-                    if (synth.getVoices().length !== 0) {
-                        resolve(synth.getVoices());
-                        clearInterval(id);
-                    }
-                }, 10);
-            }
-        )
-    }
-    let s = setSpeech();
-    s.then((voices) => {
-        for (idx = 0; idx < voices.length; idx++) {
-            if (voices[idx].lang.slice(0, 2) == language) {
-                voice_synth = voices[idx];
-                break;
-            }
-        }
-        var utterThis = new SpeechSynthesisUtterance(text);
-        utterThis.voice = voice_synth;
-        utterThis.rate = 0.7;
-        synth.speak(utterThis);
-        return utterThis;
-    });
-}
-"""
 def clear():
     return None
@@ -196,7 +156,7 @@ with gr.Blocks(css=css) as gradio_app:
             number_score_de, number_score_en
         ],
     )
-    btn_run_tts.click(fn=None, inputs=[text_learner_transcription, radio_language], outputs=audio_tts, js=js_play_audio)
     btn_run_tts_backend.click(
         fn=lambdaTTS.get_tts,
         inputs=[text_learner_transcription, radio_language],

 .speech-output-container {align-items: center; min-height: 60px; padding-left: 8px; padding-right: 8px; margin-top: -12px; border-width: 1px; border-style: solid; border-color: lightgrey;}
 """
 def clear():
     return None
             number_score_de, number_score_en
         ],
     )
+    btn_run_tts.click(fn=None, inputs=[text_learner_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
     btn_run_tts_backend.click(
         fn=lambdaTTS.get_tts,
         inputs=[text_learner_transcription, radio_language],