alessandro trinca tornidor commited on
Commit
20172dc
·
1 Parent(s): 6957865

feat: move the js js_play_audio function to js.py, update README.md

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. aip_trainer/lambdas/js.py +41 -0
  3. app.py +1 -41
README.md CHANGED
@@ -17,7 +17,7 @@ You can try my [refactored version](https://github.com/trincadev/ai-pronunciatio
17
 
18
  [![<https://aletrn-ai-pronunciation-trainer.hf.space/>](images/MainScreen.png)](https://aletrn-ai-pronunciation-trainer.hf.space/)
19
 
20
- My [HuggingFace Space](https://huggingface.co/spaces/aletrn/ai-pronunciation-trainer) is a free of charge: for this reason is the less powerful version and the speech recognition could take some seconds.
21
 
22
  ## Installation
23
 
@@ -72,6 +72,7 @@ pnpm playwright test
72
 
73
  ### TODO
74
 
 
75
  - play the isolated words in the recordings, to compare the 'ideal' pronunciation with the learner pronunciation (now it's possible on the old frontend, complicated to implement with Gradio - waiting for [this](https://github.com/gradio-app/gradio/issues/9823))
76
  - improve documentation (especially function docstrings), backend tests
77
  - move from pytorch to onnxruntime (if possible)
 
17
 
18
  [![<https://aletrn-ai-pronunciation-trainer.hf.space/>](images/MainScreen.png)](https://aletrn-ai-pronunciation-trainer.hf.space/)
19
 
20
+ My [HuggingFace Space](https://huggingface.co/spaces/aletrn/ai-pronunciation-trainer) is free of charge: for this reason is the less powerful version and the speech recognition could take some seconds.
21
 
22
  ## Installation
23
 
 
72
 
73
  ### TODO
74
 
75
+ - save the synthetic audio speech from the text-to-speech voice as an audio file to play within the Gradio audio component
76
  - play the isolated words in the recordings, to compare the 'ideal' pronunciation with the learner pronunciation (now it's possible on the old frontend, complicated to implement with Gradio - waiting for [this](https://github.com/gradio-app/gradio/issues/9823))
77
  - improve documentation (especially function docstrings), backend tests
78
  - move from pytorch to onnxruntime (if possible)
aip_trainer/lambdas/js.py CHANGED
@@ -21,3 +21,44 @@ function updateCssText(text, letters) {
21
  }
22
  }
23
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  }
22
  }
23
  """
24
+
25
+ js_play_audio = """
26
+ function playAudio(text, language) {
27
+ let voice_idx = 0;
28
+ let voice_synth = null;
29
+ let synth = window.speechSynthesis;
30
+
31
+ function setSpeech() {
32
+ return new Promise(
33
+ function (resolve, reject) {
34
+ let id;
35
+
36
+ id = setInterval(() => {
37
+ if (synth.getVoices().length !== 0) {
38
+ resolve(synth.getVoices());
39
+ clearInterval(id);
40
+ }
41
+ }, 10);
42
+ }
43
+ )
44
+ }
45
+
46
+ let s = setSpeech();
47
+ s.then((voices) => {
48
+ for (idx = 0; idx < voices.length; idx++) {
49
+ if (voices[idx].lang.slice(0, 2) == language) {
50
+ voice_synth = voices[idx];
51
+ break;
52
+ }
53
+ }
54
+
55
+ var utterThis = new SpeechSynthesisUtterance(text);
56
+ utterThis.voice = voice_synth;
57
+ utterThis.rate = 0.7;
58
+
59
+ synth.speak(utterThis);
60
+ // todo: capture audio from speech synthesis to reuse on the frontend
61
+ // https://stackoverflow.com/questions/45003548/how-to-capture-generated-audio-from-window-speechsynthesis-speak-call
62
+ });
63
+ }
64
+ """
app.py CHANGED
@@ -10,46 +10,6 @@ css = """
10
  .speech-output-container {align-items: center; min-height: 60px; padding-left: 8px; padding-right: 8px; margin-top: -12px; border-width: 1px; border-style: solid; border-color: lightgrey;}
11
  """
12
 
13
- js_play_audio = """
14
- function playAudio(text, language) {
15
- let voice_idx = 0;
16
- let voice_synth = null;
17
- let synth = window.speechSynthesis;
18
-
19
- function setSpeech() {
20
- return new Promise(
21
- function (resolve, reject) {
22
- let id;
23
-
24
- id = setInterval(() => {
25
- if (synth.getVoices().length !== 0) {
26
- resolve(synth.getVoices());
27
- clearInterval(id);
28
- }
29
- }, 10);
30
- }
31
- )
32
- }
33
-
34
- let s = setSpeech();
35
- s.then((voices) => {
36
- for (idx = 0; idx < voices.length; idx++) {
37
- if (voices[idx].lang.slice(0, 2) == language) {
38
- voice_synth = voices[idx];
39
- break;
40
- }
41
- }
42
-
43
- var utterThis = new SpeechSynthesisUtterance(text);
44
- utterThis.voice = voice_synth;
45
- utterThis.rate = 0.7;
46
-
47
- synth.speak(utterThis);
48
- return utterThis;
49
- });
50
- }
51
- """
52
-
53
 
54
  def clear():
55
  return None
@@ -196,7 +156,7 @@ with gr.Blocks(css=css) as gradio_app:
196
  number_score_de, number_score_en
197
  ],
198
  )
199
- btn_run_tts.click(fn=None, inputs=[text_learner_transcription, radio_language], outputs=audio_tts, js=js_play_audio)
200
  btn_run_tts_backend.click(
201
  fn=lambdaTTS.get_tts,
202
  inputs=[text_learner_transcription, radio_language],
 
10
  .speech-output-container {align-items: center; min-height: 60px; padding-left: 8px; padding-right: 8px; margin-top: -12px; border-width: 1px; border-style: solid; border-color: lightgrey;}
11
  """
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def clear():
15
  return None
 
156
  number_score_de, number_score_en
157
  ],
158
  )
159
+ btn_run_tts.click(fn=None, inputs=[text_learner_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
160
  btn_run_tts_backend.click(
161
  fn=lambdaTTS.get_tts,
162
  inputs=[text_learner_transcription, radio_language],