|
import torch |
|
from TTS.api import TTS |
|
import gradio as gr |
|
import os |
|
import spaces |
|
|
|
|
|
|
|
|
|
|
|
|
|
def init_TTS(): |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
|
return tts |
|
|
|
|
|
@spaces.GPU |
|
def generate_speech(text): |
|
|
|
file_path = "output.wav" |
|
speaker_wav = "/content/speaker.wav" |
|
language = "en" |
|
tts.tts_to_file(text=text, |
|
file_path=file_path, |
|
speaker_wav=speaker_wav, |
|
language=language) |
|
return file_path |
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
tts = init_TTS() |
|
|
|
|
|
interface = gr.Interface( |
|
fn=generate_speech, |
|
inputs=[ |
|
gr.Textbox(label="Enter your text") |
|
|
|
|
|
], |
|
outputs="audio", |
|
title="Voice Synthesis with Coqui-XTTS", |
|
description="Synthesize speech using predefined target voice and language." |
|
) |
|
|
|
|
|
interface.launch() |
|
return 0 |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
|
|
|
|
|
|
|