File size: 3,534 Bytes
88f7073
3cdb410
 
 
11023cf
e37c63c
 
3cdb410
 
e37c63c
 
 
 
 
 
 
 
471fe68
 
a4f7fa8
471fe68
 
 
7e4b5db
471fe68
 
 
 
 
 
 
 
 
 
e37c63c
471fe68
1a0b3dd
 
471fe68
e37c63c
 
 
 
 
 
 
 
 
 
 
 
471fe68
 
 
 
88f7073
471fe68
 
 
 
 
 
 
 
0c252d1
471fe68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6af9f14
471fe68
 
 
 
 
 
 
 
88f7073
8a71b86
 
 
 
 
 
 
 
 
 
 
88f7073
d536f9b
 
88f7073
 
e37c63c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import torch
from TTS.api import TTS
import os
import spaces
import tempfile

os.environ["COQUI_TOS_AGREED"] = "1"

device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize TTS model
def load_tts_model():
    return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

tts = load_tts_model()

# Celebrity voices (example list, you may want to expand or modify this)
celebrity_voices = {
    "morgan": "./voices/morgan.mp3",
    "Scarlett Johansson": "path/to/scarlett_johansson_sample.wav",
    "David Attenborough": "path/to/david_attenborough_sample.wav",
}
@spaces.GPU(duration=120)
def tts_generate(text, voice, language):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        temp_audio_path = temp_audio.name
    
    tts.tts_to_file(
        text=text,
        speaker_wav=celebrity_voices[voice],
        language=language,
        file_path=temp_audio_path
    )
    
    return temp_audio_path
    
@spaces.GPU(enable_queue=True)
def clone_voice(text, audio_file, language):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        temp_audio_path = temp_audio.name
    
    tts.tts_to_file(
        text=text, 
        speaker_wav=audio_file,
        language=language,
        file_path=temp_audio_path
    )
    
    return temp_audio_path

# Placeholder function for Talking Image tab
def talking_image_placeholder():
    return "Talking Image functionality not implemented yet."

# Define Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Advanced Voice Synthesis")
    
    with gr.Tabs():
        with gr.TabItem("TTS"):
            with gr.Row():
                tts_text = gr.Textbox(label="Text to speak")
                tts_voice = gr.Dropdown(choices=list(celebrity_voices.keys()), label="Celebrity Voice")
                tts_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en")
            tts_generate_btn = gr.Button("Generate")
            tts_output = gr.Audio(label="Generated Audio")
            
            tts_generate_btn.click(
                tts_generate,
                inputs=[tts_text, tts_voice, tts_language],
                outputs=tts_output
            )
        
        with gr.TabItem("Talking Image"):
            gr.Markdown("Talking Image functionality coming soon!")
        
        with gr.TabItem("Clone Voice"):
            with gr.Row():
                clone_text = gr.Textbox(label="Text to speak")
                clone_audio = gr.Audio(label="Voice reference audio file", type="filepath")
                clone_language = gr.Dropdown(["en", "es", "fr", "de", "it","ar"], label="Language", value="en")
            clone_generate_btn = gr.Button("Generate")
            clone_output = gr.Audio(label="Generated Audio")
            
            clone_generate_btn.click(
                clone_voice,
                inputs=[clone_text, clone_audio, clone_language],
                outputs=clone_output
            )

js_func = """
function refresh() {
    const url = new URL(window.location);

    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

# Launch the interface
# with gr.Blocks(js=js_func) as demo:
demo.launch()

# Clean up temporary files (this will run after the Gradio server is closed)
for file in os.listdir():
    if file.endswith('.wav') and file.startswith('tmp'):
        os.remove(file)