Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from TTS.api import TTS | |
import os | |
import tempfile | |
os.environ["COQUI_TOS_AGREED"] = "1" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Initialize TTS model | |
def load_tts_model(): | |
return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) | |
tts = load_tts_model() | |
# Celebrity voices (example list, you may want to expand or modify this) | |
celebrity_voices = { | |
"Morgan Freeman": "path/to/morgan_freeman_sample.wav", | |
"Scarlett Johansson": "path/to/scarlett_johansson_sample.wav", | |
"David Attenborough": "path/to/david_attenborough_sample.wav", | |
} | |
def tts_generate(text, voice, language): | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
temp_audio_path = temp_audio.name | |
tts.tts_to_file( | |
text=text, | |
speaker_wav=celebrity_voices[voice], | |
language=language, | |
file_path=temp_audio_path | |
) | |
return temp_audio_path | |
def clone_voice(text, audio_file, language): | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: | |
temp_audio_path = temp_audio.name | |
tts.tts_to_file( | |
text=text, | |
speaker_wav=audio_file, | |
language=language, | |
file_path=temp_audio_path | |
) | |
return temp_audio_path | |
# Placeholder function for Talking Image tab | |
def talking_image_placeholder(): | |
return "Talking Image functionality not implemented yet." | |
# Define Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Advanced Voice Synthesis") | |
with gr.Tabs(): | |
with gr.TabItem("TTS"): | |
with gr.Row(): | |
tts_text = gr.Textbox(label="Text to speak") | |
tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice") | |
tts_language = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en") | |
tts_generate_btn = gr.Button("Generate") | |
tts_output = gr.Audio(label="Generated Audio") | |
tts_generate_btn.click( | |
tts_generate, | |
inputs=[tts_text, tts_voice, tts_language], | |
outputs=tts_output | |
) | |
with gr.TabItem("Talking Image"): | |
gr.Markdown("Talking Image functionality coming soon!") | |
with gr.TabItem("Clone Voice"): | |
with gr.Row(): | |
clone_text = gr.Textbox(label="Text to speak") | |
clone_audio = gr.Audio(label="Voice reference audio file", type="filepath") | |
clone_language = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en") | |
clone_generate_btn = gr.Button("Generate") | |
clone_output = gr.Audio(label="Generated Audio") | |
clone_generate_btn.click( | |
clone_voice, | |
inputs=[clone_text, clone_audio, clone_language], | |
outputs=clone_output | |
) | |
# Launch the interface | |
demo.launch() | |
# Clean up temporary files (this will run after the Gradio server is closed) | |
for file in os.listdir(): | |
if file.endswith('.wav') and file.startswith('tmp'): | |
os.remove(file) |