|
import gradio as gr |
|
import asyncio |
|
import base64 |
|
import os |
|
from tts import voices, tts, get_task_result, Voice |
|
import tempfile |
|
|
|
def generate_speech(text, voice_name, custom_audio=None, custom_prompt_text=None): |
|
"""Generate speech from text using the selected voice or custom voice""" |
|
if not text.strip(): |
|
return None, "Please enter some text" |
|
|
|
output_file = "temp_output.wav" |
|
|
|
|
|
if custom_audio is not None and custom_prompt_text and custom_prompt_text.strip(): |
|
|
|
temp_audio_path = custom_audio |
|
voice = { |
|
"name": "Custom Voice", |
|
"promptText": custom_prompt_text, |
|
"promptAudio": temp_audio_path |
|
} |
|
else: |
|
|
|
voice = voices[voice_name] |
|
|
|
async def process_tts(): |
|
try: |
|
task_id = await tts(text, voice) |
|
|
|
while True: |
|
result = await get_task_result(task_id) |
|
if result['status'] != 'PENDING': |
|
break |
|
await asyncio.sleep(1) |
|
|
|
if result['status'] == 'SUCCESS': |
|
audio_data = result['audio_url'] |
|
if ',' in audio_data: |
|
audio_data = audio_data.split(',')[1] |
|
|
|
with open(output_file, 'wb') as f: |
|
f.write(base64.b64decode(audio_data)) |
|
return output_file, f"Successfully generated audio using {voice['name']}" |
|
else: |
|
return None, f"TTS generation failed: {result['message']}" |
|
except Exception as e: |
|
return None, f"Error: {str(e)}" |
|
|
|
return asyncio.run(process_tts()) |
|
|
|
|
|
voice_options = {k: v["name"] for k, v in voices.items()} |
|
|
|
|
|
with gr.Blocks(title="Cantonese Text-to-Speech") as demo: |
|
gr.Markdown("# Cantonese Text-to-Speech Demo") |
|
gr.Markdown("Enter text in Cantonese and select a voice to generate speech.") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
text_input = gr.Textbox( |
|
placeholder="輸入廣東話文字...", |
|
label="Text to convert", |
|
lines=5 |
|
) |
|
|
|
with gr.Group(): |
|
gr.Markdown("### Choose a voice option") |
|
voice_dropdown = gr.Dropdown( |
|
choices=list(voice_options.keys()), |
|
value=list(voice_options.keys())[0], |
|
label="Select Predefined Voice", |
|
info="Choose a voice for synthesis" |
|
) |
|
|
|
|
|
voice_name_display = gr.Markdown(value=f"Selected Voice: {voice_options[list(voice_options.keys())[0]]}") |
|
|
|
with gr.Column(scale=2): |
|
with gr.Group(): |
|
gr.Markdown("### Or upload your own voice (optional)") |
|
custom_audio = gr.Audio( |
|
label="Upload Voice Sample (WAV format)", |
|
type="filepath", |
|
format="wav" |
|
) |
|
custom_prompt_text = gr.Textbox( |
|
placeholder="Enter the exact transcription of the uploaded audio...", |
|
label="Transcription of Uploaded Audio (required if using custom voice)", |
|
lines=2 |
|
) |
|
gr.Markdown("*Note: The custom voice sample should be clear with minimal background noise.*") |
|
|
|
generate_btn = gr.Button("Generate Speech", variant="primary") |
|
|
|
with gr.Column(scale=3): |
|
audio_output = gr.Audio(label="Generated Speech", type="filepath") |
|
status_text = gr.Markdown("Ready to generate speech") |
|
|
|
|
|
voice_dropdown.change( |
|
fn=lambda x: f"Selected Voice: {voice_options[x]}", |
|
inputs=voice_dropdown, |
|
outputs=voice_name_display |
|
) |
|
|
|
|
|
generate_btn.click( |
|
fn=generate_speech, |
|
inputs=[text_input, voice_dropdown, custom_audio, custom_prompt_text], |
|
outputs=[audio_output, status_text], |
|
concurrency_limit=1 |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |