|
|
|
import torch |
|
from TTS.api import TTS |
|
import gradio as gr |
|
import os |
|
import spaces |
|
import yaml |
|
|
|
|
|
|
|
def get_config(): |
|
|
|
config_path = os.environ["CONFIG_PATH"] |
|
|
|
with open(config_path, 'r') as file: |
|
config = yaml.safe_load(file) |
|
|
|
return config |
|
|
|
|
|
def init_TTS(config): |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
tts = TTS(config['inference']['model']).to(device) |
|
return tts |
|
|
|
|
|
@spaces.GPU |
|
def generate_speech(microphone, text): |
|
|
|
tts.tts_to_file(text=text, |
|
file_path=config['inference']['file_path'], |
|
speaker_wav=config['inference']['speaker_wav'], |
|
language=config['inference']['language']) |
|
return config['inference']['file_path'] |
|
|
|
|
|
|
|
def main(config): |
|
|
|
|
|
demo = gr.Interface( |
|
fn=generate_speech, |
|
inputs=[ |
|
audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio'), |
|
gr.Textbox(label="Enter your text") |
|
], |
|
outputs="audio", |
|
title="Voice Synthesis with Coqui-XTTS", |
|
description="Synthesize speech using predefined target voice and language." |
|
) |
|
|
|
|
|
demo.launch() |
|
return 0 |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
config = get_config() |
|
|
|
tts = init_TTS(config) |
|
main(config) |
|
|
|
|
|
|
|
|
|
|