Irpan commited on
Commit
ce63f6f
1 Parent(s): 734a7ea
Files changed (2) hide show
  1. app.py +2 -6
  2. tts.py +26 -0
app.py CHANGED
@@ -1,12 +1,8 @@
1
  import gradio as gr
2
  import util
 
3
 
4
  # Functions
5
- def generate_example_pronunciation(input_text, script):
6
- # Placeholder for generating example pronunciation
7
- example_audio = None # Replace with actual example audio generation logic
8
- return example_audio
9
-
10
  def check_pronunciation(input_text, script, user_audio):
11
  # Placeholder logic for pronunciation checking
12
  transcript_ugArab_box = "Automatic transcription of your audio (Arabic)..."
@@ -105,7 +101,7 @@ with gr.Blocks() as app:
105
  )
106
 
107
  tts_btn.click(
108
- generate_example_pronunciation,
109
  inputs=[input_text, script_choice],
110
  outputs=[example_audio]
111
  )
 
1
  import gradio as gr
2
  import util
3
+ import tts
4
 
5
  # Functions
 
 
 
 
 
6
  def check_pronunciation(input_text, script, user_audio):
7
  # Placeholder logic for pronunciation checking
8
  transcript_ugArab_box = "Automatic transcription of your audio (Arabic)..."
 
101
  )
102
 
103
  tts_btn.click(
104
+ tts.generate_example_pronunciation,
105
  inputs=[input_text, script_choice],
106
  outputs=[example_audio]
107
  )
tts.py CHANGED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import VitsModel, AutoTokenizer
2
+ import torch
3
+ from umsc import UgMultiScriptConverter
4
+ import scipy.io.wavfile
5
+ import os
6
+
7
+ tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic")
8
+ tts_model = VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic")
9
+
10
+ def generate_example_pronunciation(input_text, script):
11
+ # Convert text to uyghur_arabic
12
+ ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
13
+ if not script == "Uyghur Arabic":
14
+ input_text = ug_latn_to_arab(input_text)
15
+
16
+ tts_inputs = tts_tokenizer(input_text, return_tensors="pt")
17
+ with torch.no_grad():
18
+ tts_output = tts_model(**tts_inputs).waveform
19
+
20
+ # Save to a temporary file
21
+ output_path = "tts_output.wav"
22
+ sample_rate = 16000
23
+ scipy.io.wavfile.write(output_path, rate=sample_rate, data=tts_output.numpy()[0])
24
+
25
+ # Return the audio file path
26
+ return output_path