File size: 2,818 Bytes
0f20c1d 4110922 d33f32d 4110922 6afc51e 3a4510f 4110922 0f20c1d 3a4510f 0f20c1d 4110922 3a4510f 4af5dfa 3a4510f 4af5dfa 4110922 0a5aead 6ba8d0a 0a5aead 486cbec d49cd75 0a5aead 3a4510f dd46461 3a4510f 4049f7e 0a5aead 4af5dfa 0f20c1d 1b3c5cb 486cbec 5b06c99 4c6fefc 486cbec cf0681a 8af32ca 16b24b7 5b06c99 4af5dfa c5ee403 486cbec 6ba8d0a 5b06c99 c5ee403 486cbec 4c6fefc c5ee403 4af5dfa c5ee403 4af5dfa 0f20c1d 4af5dfa 3a4510f 1b3c5cb 3a4510f 1b3c5cb 4af5dfa 45fac78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
###################################### imports ######################################
import torch
from TTS.api import TTS
import gradio as gr
import os
import spaces
import yaml
###################################### utilities ######################################
def get_config():
# get config path
config_path = os.environ["CONFIG_PATH"]
# Parse the YAML file
with open(config_path, 'r') as file:
config = yaml.safe_load(file)
return config
def init_TTS(config):
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize the TTS model
tts = TTS(config['inference']['model']).to(device)
return tts
@spaces.GPU
def generate_speech(voice_choice, markdown, microphone, text):
# Generate speech using the provided text, speaker voice, and language
if voice_choice=="Record":
speaker = microphone
else:
speaker = config['inference']['speaker_wav']
tts.tts_to_file(text=text,
file_path=config['inference']['file_path'],
speaker_wav=speaker,
language=config['inference']['language'])
return config['inference']['file_path']
###################################### main ######################################
def UI(config):
# gradio elements
voice_choice = gr.Radio(label="Record or use a predefined voice.",
choices=["Record", "Predefined (Nancy)"],
value="Record")
markdown = gr.Markdown("""If recording, speak loud and clearly. Recommended speaking track
'*printing, in the only sense with which we are at present concerned,
differs from, most if not all, the arts and crafts in the exhibition.*'""")
microphone = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
enter_text = gr.Textbox(label="Enter your text")
# Create the Gradio interface
demo = gr.Interface(
fn=generate_speech,
inputs=[
voice_choice,
markdown,
microphone,
enter_text
],
outputs="audio",
title="Voice cloning and Synthesis with Coqui-XTTS",
description="Clone your voice and Synthesize speech using predefined target voice and language. It takes a 10-20 seconds to download the model, so wait to record until the app is *Running on Zero* to begin."
)
# Launch the interface
demo.launch()
return 0
###################################### Execute ######################################
if __name__ == "__main__":
# Get config
config = get_config()
# initialize TTS
tts = init_TTS(config)
# run program
UI(config)
|