|
import gradio as gr |
|
import torch |
|
from TTS.api import TTS |
|
import os |
|
import spaces |
|
import tempfile |
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
def load_tts_model(): |
|
return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
|
|
|
tts = load_tts_model() |
|
|
|
|
|
celebrity_voices = { |
|
"Morgan Freeman": "./voices/Morgan Freeman.mp3", |
|
"Scarlett Johansson": "./voices/Scarlett Johansson.mp3", |
|
"David Attenborough": "./voices/David Attenborough.mp3", |
|
"Tom Hanks": "./voices/Tom Hanks.mp3", |
|
"Emma Watson": "./voices/Emma Watson.mp3", |
|
"Batman": "./voices/Batman.mp3", |
|
"Spongebob": "./voices/Spongebob.mp3", |
|
"Darth Vader": "./voices/Darth Vader.mp3", |
|
"Homer Simpson": "./voices/Homer Simpson.mp3", |
|
"Mario": "./voices/Mario.mp3", |
|
"PewDiePie": "./voices/PewDiePie.mp3", |
|
"Pokimane": "./voices/Pokimane.mp3", |
|
"Ninja": "./voices/Ninja.mp3", |
|
"Shroud": "./voices/Shroud.mp3", |
|
"Tfue": "./voices/Tfue.mp3", |
|
"Barack Obama": "./voices/Barack Obama.mp3", |
|
"Donald Trump": "./voices/Donald Trump.mp3", |
|
"Angela Merkel": "./voices/Angela Merkel.mp3", |
|
"Justin Trudeau": "./voices/Justin Trudeau.mp3", |
|
"Emmanuel Macron": "./voices/Emmanuel Macron.mp3", |
|
"Serena Williams": "./voices/Serena Williams.mp3", |
|
"Michael Jordan": "./voices/Michael Jordan.mp3", |
|
"Lionel Messi": "./voices/Lionel Messi.mp3", |
|
"LeBron James": "./voices/LeBron James.mp3", |
|
"Usain Bolt": "./voices/Usain.mp3" |
|
} |
|
|
|
def check_voice_files(): |
|
""" |
|
Checks if all voice files exist in the celebrity_voices dictionary. |
|
Returns a message listing missing files or confirming all files are present. |
|
""" |
|
missing = [] |
|
for voice, path in celebrity_voices.items(): |
|
if not os.path.exists(path): |
|
missing.append(f"{voice}: {path}") |
|
if missing: |
|
return "**Missing Voice Files:**\n" + "\n".join(missing) |
|
else: |
|
return "**All voice files are present.** 🎉" |
|
|
|
@spaces.GPU(duration=120) |
|
def tts_generate(text, voice, language): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: |
|
temp_audio_path = temp_audio.name |
|
|
|
tts.tts_to_file( |
|
text=text, |
|
speaker_wav=celebrity_voices[voice], |
|
language=language, |
|
file_path=temp_audio_path |
|
) |
|
|
|
return temp_audio_path |
|
|
|
@spaces.GPU(enable_queue=True) |
|
def clone_voice(text, audio_file, language): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio: |
|
temp_audio_path = temp_audio.name |
|
|
|
tts.tts_to_file( |
|
text=text, |
|
speaker_wav=audio_file, |
|
language=language, |
|
file_path=temp_audio_path |
|
) |
|
|
|
return temp_audio_path |
|
|
|
|
|
def talking_image_placeholder(): |
|
return "Talking Image functionality not implemented yet." |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Advanced Voice Synthesis") |
|
|
|
|
|
voice_status = check_voice_files() |
|
gr.Markdown(voice_status) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("TTS"): |
|
with gr.Row(): |
|
tts_text = gr.Textbox(label="Text to speak") |
|
tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice") |
|
tts_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en") |
|
tts_generate_btn = gr.Button("Generate") |
|
tts_output = gr.Audio(label="Generated Audio") |
|
|
|
tts_generate_btn.click( |
|
tts_generate, |
|
inputs=[tts_text, tts_voice, tts_language], |
|
outputs=tts_output |
|
) |
|
|
|
with gr.TabItem("Talking Image"): |
|
gr.Markdown("Talking Image functionality coming soon!") |
|
|
|
with gr.TabItem("Clone Voice"): |
|
with gr.Row(): |
|
clone_text = gr.Textbox(label="Text to speak") |
|
clone_audio = gr.Audio(label="Voice reference audio file", type="filepath") |
|
clone_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en") |
|
clone_generate_btn = gr.Button("Generate") |
|
clone_output = gr.Audio(label="Generated Audio") |
|
|
|
clone_generate_btn.click( |
|
clone_voice, |
|
inputs=[clone_text, clone_audio, clone_language], |
|
outputs=clone_output |
|
) |
|
|
|
js_func = """ |
|
function refresh() { |
|
const url = new URL(window.location); |
|
|
|
if (url.searchParams.get('__theme') !== 'dark') { |
|
url.searchParams.set('__theme', 'dark'); |
|
window.location.href = url.href; |
|
} |
|
} |
|
""" |
|
|
|
|
|
demo.launch() |
|
|
|
|
|
for file in os.listdir(): |
|
if file.endswith('.wav') and file.startswith('tmp'): |
|
os.remove(file) |
|
|