Cheng Jed
initial commit
c005bf8
import gradio as gr
import asyncio
import base64
import os
from tts import voices, tts, get_task_result, Voice
import tempfile
def generate_speech(text, voice_name, custom_audio=None, custom_prompt_text=None):
"""Generate speech from text using the selected voice or custom voice"""
if not text.strip():
return None, "Please enter some text"
output_file = "temp_output.wav"
# Handle custom voice upload
if custom_audio is not None and custom_prompt_text and custom_prompt_text.strip():
# Create a temporary Voice object with the uploaded audio
temp_audio_path = custom_audio
voice = {
"name": "Custom Voice",
"promptText": custom_prompt_text,
"promptAudio": temp_audio_path
}
else:
# Use predefined voice
voice = voices[voice_name]
async def process_tts():
try:
task_id = await tts(text, voice)
while True:
result = await get_task_result(task_id)
if result['status'] != 'PENDING':
break
await asyncio.sleep(1)
if result['status'] == 'SUCCESS':
audio_data = result['audio_url']
if ',' in audio_data:
audio_data = audio_data.split(',')[1]
with open(output_file, 'wb') as f:
f.write(base64.b64decode(audio_data))
return output_file, f"Successfully generated audio using {voice['name']}"
else:
return None, f"TTS generation failed: {result['message']}"
except Exception as e:
return None, f"Error: {str(e)}"
return asyncio.run(process_tts())
# Create a dictionary of voice names for the dropdown
voice_options = {k: v["name"] for k, v in voices.items()}
# Create the Gradio interface
with gr.Blocks(title="Cantonese Text-to-Speech") as demo:
gr.Markdown("# Cantonese Text-to-Speech Demo")
gr.Markdown("Enter text in Cantonese and select a voice to generate speech.")
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(
placeholder="輸入廣東話文字...",
label="Text to convert",
lines=5
)
with gr.Group():
gr.Markdown("### Choose a voice option")
voice_dropdown = gr.Dropdown(
choices=list(voice_options.keys()),
value=list(voice_options.keys())[0],
label="Select Predefined Voice",
info="Choose a voice for synthesis"
)
# Display the actual voice name based on the selection
voice_name_display = gr.Markdown(value=f"Selected Voice: {voice_options[list(voice_options.keys())[0]]}")
with gr.Column(scale=2):
with gr.Group():
gr.Markdown("### Or upload your own voice (optional)")
custom_audio = gr.Audio(
label="Upload Voice Sample (WAV format)",
type="filepath",
format="wav"
)
custom_prompt_text = gr.Textbox(
placeholder="Enter the exact transcription of the uploaded audio...",
label="Transcription of Uploaded Audio (required if using custom voice)",
lines=2
)
gr.Markdown("*Note: The custom voice sample should be clear with minimal background noise.*")
generate_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column(scale=3):
audio_output = gr.Audio(label="Generated Speech", type="filepath")
status_text = gr.Markdown("Ready to generate speech")
# Update the voice name display when dropdown changes
voice_dropdown.change(
fn=lambda x: f"Selected Voice: {voice_options[x]}",
inputs=voice_dropdown,
outputs=voice_name_display
)
# Generate speech when button is clicked
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown, custom_audio, custom_prompt_text],
outputs=[audio_output, status_text],
concurrency_limit=1
)
if __name__ == "__main__":
demo.launch()