Spaces:

szili2011
/

SonicNpz

Build error

File size: 2,717 Bytes

1700fc9
 
 
cca9d56
1700fc9
 
 
 
 
cca9d56
1700fc9
 
 
 
 
 
 
cca9d56
1700fc9
 
 
cca9d56
 
 
 
 
 
 
 
 
 
 
 
1700fc9
 
 
 
 
cca9d56
1700fc9
 
 
 
cca9d56
 
 
1700fc9
 
 
 
 
cca9d56
 
 
 
1700fc9
 
 
cca9d56
1700fc9
 
 
 
 
cca9d56
 
 
 
1700fc9

import numpy as np
import torch
import gradio as gr
from TTS.api import TTS  # Import TTS library

# Placeholder for model loading and voice cloning logic
class VoiceCloner:
    def __init__(self):
        self.model = None
        self.tts = None
    
    def load_model(self, npz_file):
        data = np.load(npz_file)
        # Load your model parameters from the npz file
        # Initialize your model here with the loaded parameters
        self.model = data  # Example; replace with your actual model loading code

    def clone_voice(self, audio_file):
        # Implement the logic to clone voice from the uploaded audio file
        return audio_file  # Placeholder; return processed audio

    def load_tts_model(self):
        # Load a pretrained TTS model
        self.tts = TTS(model_name="tts_models/en/ljspeech/glow-tts")  # You can choose a different model if needed

    def text_to_speech(self, text):
        # Use the loaded TTS model to convert text to speech
        if self.tts is not None:
            output_audio = self.tts.tts(text)
            return output_audio  # Return the generated audio
        else:
            return "TTS model not loaded!"

# Create the Gradio interface
def create_interface():
    cloner = VoiceCloner()
    
    with gr.Blocks() as demo:
        gr.Markdown("## Voice Cloning and TTS Application")

        # User uploads their .npz file
        npz_file = gr.File(label="Upload Your .npz Voice Model")
        audio_input = gr.Audio(source="upload", type="filepath", label="Upload Original Audio")
        text_input = gr.Textbox(label="Text Input for TTS")
        
        output_audio = gr.Audio(label="Cloned Voice Output or TTS Output")

        upload_button = gr.Button("Load Model")
        
        # Button to clone voice
        clone_button = gr.Button("Clone Voice")
        
        # Button to convert text to speech
        tts_button = gr.Button("Convert Text to Speech")
        
        # Load the model when the user uploads the .npz file
        def load_and_initialize(npz):
            cloner.load_model(npz.name)  # Use the file path to load the model
            cloner.load_tts_model()  # Load the TTS model
            return "Model Loaded!"

        upload_button.click(fn=load_and_initialize, inputs=npz_file, outputs="text")

        # Clone the voice when the button is pressed
        clone_button.click(fn=cloner.clone_voice, inputs=audio_input, outputs=output_audio)
        
        # Convert text to speech when the button is pressed
        tts_button.click(fn=cloner.text_to_speech, inputs=text_input, outputs=output_audio)

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()