Spaces:

hon9kon9ize
/

Cantonese-TTS-playground

Running

Cheng Jed

initial commit

c005bf8 4 months ago

4.47 kB

	import gradio as gr
	import asyncio
	import base64
	import os
	from tts import voices, tts, get_task_result, Voice
	import tempfile

	def generate_speech(text, voice_name, custom_audio=None, custom_prompt_text=None):
	"""Generate speech from text using the selected voice or custom voice"""
	if not text.strip():
	return None, "Please enter some text"

	output_file = "temp_output.wav"

	# Handle custom voice upload
	if custom_audio is not None and custom_prompt_text and custom_prompt_text.strip():
	# Create a temporary Voice object with the uploaded audio
	temp_audio_path = custom_audio
	voice = {
	"name": "Custom Voice",
	"promptText": custom_prompt_text,
	"promptAudio": temp_audio_path
	}
	else:
	# Use predefined voice
	voice = voices[voice_name]

	async def process_tts():
	try:
	task_id = await tts(text, voice)

	while True:
	result = await get_task_result(task_id)
	if result['status'] != 'PENDING':
	break
	await asyncio.sleep(1)

	if result['status'] == 'SUCCESS':
	audio_data = result['audio_url']
	if ',' in audio_data:
	audio_data = audio_data.split(',')[1]

	with open(output_file, 'wb') as f:
	f.write(base64.b64decode(audio_data))
	return output_file, f"Successfully generated audio using {voice['name']}"
	else:
	return None, f"TTS generation failed: {result['message']}"
	except Exception as e:
	return None, f"Error: {str(e)}"

	return asyncio.run(process_tts())

	# Create a dictionary of voice names for the dropdown
	voice_options = {k: v["name"] for k, v in voices.items()}

	# Create the Gradio interface
	with gr.Blocks(title="Cantonese Text-to-Speech") as demo:
	gr.Markdown("# Cantonese Text-to-Speech Demo")
	gr.Markdown("Enter text in Cantonese and select a voice to generate speech.")

	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	placeholder="輸入廣東話文字...",
	label="Text to convert",
	lines=5
	)

	with gr.Group():
	gr.Markdown("### Choose a voice option")
	voice_dropdown = gr.Dropdown(
	choices=list(voice_options.keys()),
	value=list(voice_options.keys())[0],
	label="Select Predefined Voice",
	info="Choose a voice for synthesis"
	)

	# Display the actual voice name based on the selection
	voice_name_display = gr.Markdown(value=f"Selected Voice: {voice_options[list(voice_options.keys())[0]]}")

	with gr.Column(scale=2):
	with gr.Group():
	gr.Markdown("### Or upload your own voice (optional)")
	custom_audio = gr.Audio(
	label="Upload Voice Sample (WAV format)",
	type="filepath",
	format="wav"
	)
	custom_prompt_text = gr.Textbox(
	placeholder="Enter the exact transcription of the uploaded audio...",
	label="Transcription of Uploaded Audio (required if using custom voice)",
	lines=2
	)
	gr.Markdown("Note: The custom voice sample should be clear with minimal background noise.")

	generate_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column(scale=3):
	audio_output = gr.Audio(label="Generated Speech", type="filepath")
	status_text = gr.Markdown("Ready to generate speech")

	# Update the voice name display when dropdown changes
	voice_dropdown.change(
	fn=lambda x: f"Selected Voice: {voice_options[x]}",
	inputs=voice_dropdown,
	outputs=voice_name_display
	)

	# Generate speech when button is clicked
	generate_btn.click(
	fn=generate_speech,
	inputs=[text_input, voice_dropdown, custom_audio, custom_prompt_text],
	outputs=[audio_output, status_text],
	concurrency_limit=1
	)

	if __name__ == "__main__":
	demo.launch()