|
import torch |
|
from TTS.api import TTS |
|
import gradio as gr |
|
import os |
|
|
|
|
|
|
|
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
|
|
|
|
|
|
|
@spaces.GPU |
|
def generate_speech(text, speaker_wav, language): |
|
|
|
file_path = "output.wav" |
|
tts.tts_to_file(text=text, |
|
file_path=file_path, |
|
speaker_wav=speaker_wav, |
|
language=language) |
|
return file_path |
|
|
|
|
|
interface = gr.Interface( |
|
fn=generate_speech, |
|
inputs=[ |
|
gr.Textbox(label="Enter your text"), |
|
gr.Textbox(label="Path to target speaker WAV file", value="/content/speaker.wav"), |
|
gr.Dropdown(label="Language", choices=["en"], value="en") |
|
], |
|
outputs="audio", |
|
title="Voice Synthesis and Cloning with Coqui-XTTS", |
|
description="Synthesize speech using a target voice and language." |
|
) |
|
|
|
interface.launch() |