|
import numpy as np |
|
import torch |
|
import gradio as gr |
|
from TTS.api import TTS |
|
|
|
|
|
class VoiceCloner: |
|
def __init__(self): |
|
self.model = None |
|
self.tts = None |
|
|
|
def load_model(self, npz_file): |
|
data = np.load(npz_file) |
|
|
|
|
|
self.model = data |
|
|
|
def clone_voice(self, audio_file): |
|
|
|
return audio_file |
|
|
|
def load_tts_model(self): |
|
|
|
self.tts = TTS(model_name="tts_models/en/ljspeech/glow-tts") |
|
|
|
def text_to_speech(self, text): |
|
|
|
if self.tts is not None: |
|
output_audio = self.tts.tts(text) |
|
return output_audio |
|
else: |
|
return "TTS model not loaded!" |
|
|
|
|
|
def create_interface(): |
|
cloner = VoiceCloner() |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Voice Cloning and TTS Application") |
|
|
|
|
|
npz_file = gr.File(label="Upload Your .npz Voice Model") |
|
audio_input = gr.Audio(source="upload", type="filepath", label="Upload Original Audio") |
|
text_input = gr.Textbox(label="Text Input for TTS") |
|
|
|
output_audio = gr.Audio(label="Cloned Voice Output or TTS Output") |
|
|
|
upload_button = gr.Button("Load Model") |
|
|
|
|
|
clone_button = gr.Button("Clone Voice") |
|
|
|
|
|
tts_button = gr.Button("Convert Text to Speech") |
|
|
|
|
|
def load_and_initialize(npz): |
|
cloner.load_model(npz.name) |
|
cloner.load_tts_model() |
|
return "Model Loaded!" |
|
|
|
upload_button.click(fn=load_and_initialize, inputs=npz_file, outputs="text") |
|
|
|
|
|
clone_button.click(fn=cloner.clone_voice, inputs=audio_input, outputs=output_audio) |
|
|
|
|
|
tts_button.click(fn=cloner.text_to_speech, inputs=text_input, outputs=output_audio) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |
|
|