Irpan
commited on
Commit
•
ce63f6f
1
Parent(s):
734a7ea
app
Browse files
app.py
CHANGED
@@ -1,12 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import util
|
|
|
3 |
|
4 |
# Functions
|
5 |
-
def generate_example_pronunciation(input_text, script):
|
6 |
-
# Placeholder for generating example pronunciation
|
7 |
-
example_audio = None # Replace with actual example audio generation logic
|
8 |
-
return example_audio
|
9 |
-
|
10 |
def check_pronunciation(input_text, script, user_audio):
|
11 |
# Placeholder logic for pronunciation checking
|
12 |
transcript_ugArab_box = "Automatic transcription of your audio (Arabic)..."
|
@@ -105,7 +101,7 @@ with gr.Blocks() as app:
|
|
105 |
)
|
106 |
|
107 |
tts_btn.click(
|
108 |
-
generate_example_pronunciation,
|
109 |
inputs=[input_text, script_choice],
|
110 |
outputs=[example_audio]
|
111 |
)
|
|
|
1 |
import gradio as gr
|
2 |
import util
|
3 |
+
import tts
|
4 |
|
5 |
# Functions
|
|
|
|
|
|
|
|
|
|
|
6 |
def check_pronunciation(input_text, script, user_audio):
|
7 |
# Placeholder logic for pronunciation checking
|
8 |
transcript_ugArab_box = "Automatic transcription of your audio (Arabic)..."
|
|
|
101 |
)
|
102 |
|
103 |
tts_btn.click(
|
104 |
+
tts.generate_example_pronunciation,
|
105 |
inputs=[input_text, script_choice],
|
106 |
outputs=[example_audio]
|
107 |
)
|
tts.py
CHANGED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import VitsModel, AutoTokenizer
|
2 |
+
import torch
|
3 |
+
from umsc import UgMultiScriptConverter
|
4 |
+
import scipy.io.wavfile
|
5 |
+
import os
|
6 |
+
|
7 |
+
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic")
|
8 |
+
tts_model = VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic")
|
9 |
+
|
10 |
+
def generate_example_pronunciation(input_text, script):
|
11 |
+
# Convert text to uyghur_arabic
|
12 |
+
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
13 |
+
if not script == "Uyghur Arabic":
|
14 |
+
input_text = ug_latn_to_arab(input_text)
|
15 |
+
|
16 |
+
tts_inputs = tts_tokenizer(input_text, return_tensors="pt")
|
17 |
+
with torch.no_grad():
|
18 |
+
tts_output = tts_model(**tts_inputs).waveform
|
19 |
+
|
20 |
+
# Save to a temporary file
|
21 |
+
output_path = "tts_output.wav"
|
22 |
+
sample_rate = 16000
|
23 |
+
scipy.io.wavfile.write(output_path, rate=sample_rate, data=tts_output.numpy()[0])
|
24 |
+
|
25 |
+
# Return the audio file path
|
26 |
+
return output_path
|