File size: 2,378 Bytes
0f20c1d 4110922 d33f32d 4110922 6afc51e 3a4510f 4110922 0f20c1d 3a4510f 0f20c1d 4110922 3a4510f 4af5dfa 3a4510f 4af5dfa 4110922 0a5aead 486cbec 0a5aead 486cbec d49cd75 0a5aead 3a4510f dd46461 3a4510f 4049f7e 0a5aead 4af5dfa 0f20c1d 1b3c5cb 486cbec 5b06c99 486cbec 5b06c99 4af5dfa c5ee403 486cbec 5b06c99 c5ee403 486cbec c5ee403 4af5dfa c5ee403 4af5dfa 0f20c1d 4af5dfa 3a4510f 1b3c5cb 3a4510f 1b3c5cb 4af5dfa 45fac78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
###################################### imports ######################################
import torch
from TTS.api import TTS
import gradio as gr
import os
import spaces
import yaml
###################################### utilities ######################################
def get_config():
# get config path
config_path = os.environ["CONFIG_PATH"]
# Parse the YAML file
with open(config_path, 'r') as file:
config = yaml.safe_load(file)
return config
def init_TTS(config):
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize the TTS model
tts = TTS(config['inference']['model']).to(device)
return tts
@spaces.GPU
def generate_speech(voice_choice, microphone, text):
# Generate speech using the provided text, speaker voice, and language
if voice_choice=="Record":
speaker = microphone
else:
speaker = config['inference']['speaker_wav']
tts.tts_to_file(text=text,
file_path=config['inference']['file_path'],
speaker_wav=speaker,
language=config['inference']['language'])
return config['inference']['file_path']
###################################### main ######################################
def UI(config):
# gradio elements
voice_choice = gr.Radio(label="Record or skip to use predefined voice.",
choices=["Record", "Predefined (Nancy)"],
value="Record")
microphone = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
enter_text = gr.Textbox(label="Enter your text")
# Create the Gradio interface
demo = gr.Interface(
fn=generate_speech,
inputs=[
voice_choice,
microphone,
enter_text
],
outputs="audio",
title="Voice cloning and Synthesis with Coqui-XTTS",
description="Clone your voice and Synthesize speech using predefined target voice and language."
)
# Launch the interface
demo.launch()
return 0
###################################### Execute ######################################
if __name__ == "__main__":
# Get config
config = get_config()
# initialize TTS
tts = init_TTS(config)
# run program
UI(config)
|