Spaces:
Running
Running
Irpan
commited on
Commit
·
e2dd467
1
Parent(s):
8bef169
asr
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ mms_transcribe = gr.Interface(
|
|
18 |
gr.Textbox(label="Uyghur Arabic Transcription"),
|
19 |
gr.Textbox(label="Uyghur Latin Transcription"),
|
20 |
],
|
21 |
-
|
22 |
title="Speech-to-text",
|
23 |
description=(
|
24 |
"Transcribe Uyghur speech audio from a microphone or input file."
|
@@ -40,7 +40,7 @@ mms_synthesize = gr.Interface(
|
|
40 |
outputs=[
|
41 |
gr.Audio(label="Generated Audio"),
|
42 |
],
|
43 |
-
|
44 |
title="Text-to-speech",
|
45 |
description=(
|
46 |
"Generate audio from input Uyghur text."
|
|
|
18 |
gr.Textbox(label="Uyghur Arabic Transcription"),
|
19 |
gr.Textbox(label="Uyghur Latin Transcription"),
|
20 |
],
|
21 |
+
examples=util.asr_examples,
|
22 |
title="Speech-to-text",
|
23 |
description=(
|
24 |
"Transcribe Uyghur speech audio from a microphone or input file."
|
|
|
40 |
outputs=[
|
41 |
gr.Audio(label="Generated Audio"),
|
42 |
],
|
43 |
+
examples=util.tts_examples,
|
44 |
title="Text-to-speech",
|
45 |
description=(
|
46 |
"Generate audio from input Uyghur text."
|
tts.py
CHANGED
@@ -53,15 +53,17 @@ def synthesize(text, model_id):
|
|
53 |
inputs = processor(text, return_tensors="pt").to(device)
|
54 |
|
55 |
with torch.no_grad():
|
56 |
-
output = model(**inputs).waveform.cpu() # Move output back to CPU for saving
|
57 |
|
58 |
output_path = "tts_output.wav"
|
59 |
sample_rate = model.config.sampling_rate
|
60 |
-
scipy.io.wavfile.write(output_path, rate=sample_rate, data=output
|
61 |
|
62 |
return output_path
|
63 |
|
64 |
def synthesize_turkic_tts(text):
|
|
|
|
|
65 |
text = normalization(text, 'uyghur')
|
66 |
|
67 |
with torch.no_grad():
|
|
|
53 |
inputs = processor(text, return_tensors="pt").to(device)
|
54 |
|
55 |
with torch.no_grad():
|
56 |
+
output = model(**inputs).waveform.cpu().numpy()[0] # Move output back to CPU for saving
|
57 |
|
58 |
output_path = "tts_output.wav"
|
59 |
sample_rate = model.config.sampling_rate
|
60 |
+
scipy.io.wavfile.write(output_path, rate=sample_rate, data=output)
|
61 |
|
62 |
return output_path
|
63 |
|
64 |
def synthesize_turkic_tts(text):
|
65 |
+
text = util.ug_arab_to_latn(text)
|
66 |
+
|
67 |
text = normalization(text, 'uyghur')
|
68 |
|
69 |
with torch.no_grad():
|