File size: 3,534 Bytes
88f7073 3cdb410 11023cf e37c63c 3cdb410 e37c63c 471fe68 a4f7fa8 471fe68 7e4b5db 471fe68 e37c63c 471fe68 1a0b3dd 471fe68 e37c63c 471fe68 88f7073 471fe68 0c252d1 471fe68 6af9f14 471fe68 88f7073 8a71b86 88f7073 d536f9b 88f7073 e37c63c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import torch
from TTS.api import TTS
import os
import spaces
import tempfile
os.environ["COQUI_TOS_AGREED"] = "1"
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS model
def load_tts_model():
return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
tts = load_tts_model()
# Celebrity voices (example list, you may want to expand or modify this)
celebrity_voices = {
"morgan": "./voices/morgan.mp3",
"Scarlett Johansson": "path/to/scarlett_johansson_sample.wav",
"David Attenborough": "path/to/david_attenborough_sample.wav",
}
@spaces.GPU(duration=120)
def tts_generate(text, voice, language):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio_path = temp_audio.name
tts.tts_to_file(
text=text,
speaker_wav=celebrity_voices[voice],
language=language,
file_path=temp_audio_path
)
return temp_audio_path
@spaces.GPU(enable_queue=True)
def clone_voice(text, audio_file, language):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio_path = temp_audio.name
tts.tts_to_file(
text=text,
speaker_wav=audio_file,
language=language,
file_path=temp_audio_path
)
return temp_audio_path
# Placeholder function for Talking Image tab
def talking_image_placeholder():
return "Talking Image functionality not implemented yet."
# Define Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Advanced Voice Synthesis")
with gr.Tabs():
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(label="Text to speak")
tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice")
tts_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en")
tts_generate_btn = gr.Button("Generate")
tts_output = gr.Audio(label="Generated Audio")
tts_generate_btn.click(
tts_generate,
inputs=[tts_text, tts_voice, tts_language],
outputs=tts_output
)
with gr.TabItem("Talking Image"):
gr.Markdown("Talking Image functionality coming soon!")
with gr.TabItem("Clone Voice"):
with gr.Row():
clone_text = gr.Textbox(label="Text to speak")
clone_audio = gr.Audio(label="Voice reference audio file", type="filepath")
clone_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en")
clone_generate_btn = gr.Button("Generate")
clone_output = gr.Audio(label="Generated Audio")
clone_generate_btn.click(
clone_voice,
inputs=[clone_text, clone_audio, clone_language],
outputs=clone_output
)
js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
# Launch the interface
# with gr.Blocks(js=js_func) as demo:
demo.launch()
# Clean up temporary files (this will run after the Gradio server is closed)
for file in os.listdir():
if file.endswith('.wav') and file.startswith('tmp'):
os.remove(file) |