File size: 1,135 Bytes
c636952
a416ccf
9581ca3
 
 
a416ccf
d8fe51c
 
 
a416ccf
d8fe51c
a416ccf
 
 
 
d8fe51c
a416ccf
 
 
 
c636952
d8fe51c
c636952
a416ccf
d8fe51c
c636952
a416ccf
d8fe51c
 
a416ccf
d8fe51c
a416ccf
d8fe51c
a416ccf
 
 
 
 
c636952
d8fe51c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
from TTS.api import TTS
import os

os.environ["COQUI_TOS_AGREED"] = "1"

# Load the XTTS-v2 model with trust_remote_code=True
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", trust_remote_code=True)
tts.to("cpu")

# Function to synthesize voice
def generate_voice(text, speaker_audio):
    output_path = "output.wav"
    tts.tts_to_file(
        text=text,
        speaker_wav=speaker_audio,
        file_path=output_path,
        language="en"
    )
    return output_path

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")

    with gr.Row():
        text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
        speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")

    output_audio = gr.Audio(label="Generated Voice", type="filepath")

    generate_button = gr.Button("Generate Voice")

    generate_button.click(
        fn=generate_voice,
        inputs=[text_input, speaker_audio_input],
        outputs=output_audio
    )

# Launch the app
demo.launch()