Spaces:

szili2011
/

SonicNpz

Build error

App Files Files Community

szili2011 commited on Oct 20, 2024

Commit

cca9d56

verified ·

1 Parent(s): 3e967d2

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -7

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import numpy as np
 import torch
 import gradio as gr
 # Placeholder for model loading and voice cloning logic
 class VoiceCloner:
     def __init__(self):
         self.model = None
     def load_model(self, npz_file):
         data = np.load(npz_file)
@@ -13,38 +15,57 @@ class VoiceCloner:
         # Initialize your model here with the loaded parameters
         self.model = data  # Example; replace with your actual model loading code
-    def clone_voice(self, audio_file, text=None):
         # Implement the logic to clone voice from the uploaded audio file
-        # and possibly from the text if provided
         return audio_file  # Placeholder; return processed audio
 # Create the Gradio interface
 def create_interface():
     cloner = VoiceCloner()
     with gr.Blocks() as demo:
-        gr.Markdown("## Voice Cloning Application")
         # User uploads their .npz file
         npz_file = gr.File(label="Upload Your .npz Voice Model")
         audio_input = gr.Audio(source="upload", type="filepath", label="Upload Original Audio")
-        text_input = gr.Textbox(label="Text Input for TTS (Optional)")
-        output_audio = gr.Audio(label="Cloned Voice Output")
         upload_button = gr.Button("Load Model")
         # Button to clone voice
         clone_button = gr.Button("Clone Voice")
         # Load the model when the user uploads the .npz file
         def load_and_initialize(npz):
             cloner.load_model(npz.name)  # Use the file path to load the model
             return "Model Loaded!"
         upload_button.click(fn=load_and_initialize, inputs=npz_file, outputs="text")
         # Clone the voice when the button is pressed
-        clone_button.click(fn=cloner.clone_voice, inputs=[audio_input, text_input], outputs=output_audio)
     return demo

 import numpy as np
 import torch
 import gradio as gr
+from TTS.api import TTS  # Import TTS library
 # Placeholder for model loading and voice cloning logic
 class VoiceCloner:
     def __init__(self):
         self.model = None
+        self.tts = None
     def load_model(self, npz_file):
         data = np.load(npz_file)
         # Initialize your model here with the loaded parameters
         self.model = data  # Example; replace with your actual model loading code
+    def clone_voice(self, audio_file):
         # Implement the logic to clone voice from the uploaded audio file
         return audio_file  # Placeholder; return processed audio
+    def load_tts_model(self):
+        # Load a pretrained TTS model
+        self.tts = TTS(model_name="tts_models/en/ljspeech/glow-tts")  # You can choose a different model if needed
+    def text_to_speech(self, text):
+        # Use the loaded TTS model to convert text to speech
+        if self.tts is not None:
+            output_audio = self.tts.tts(text)
+            return output_audio  # Return the generated audio
+        else:
+            return "TTS model not loaded!"
 # Create the Gradio interface
 def create_interface():
     cloner = VoiceCloner()
     with gr.Blocks() as demo:
+        gr.Markdown("## Voice Cloning and TTS Application")
         # User uploads their .npz file
         npz_file = gr.File(label="Upload Your .npz Voice Model")
         audio_input = gr.Audio(source="upload", type="filepath", label="Upload Original Audio")
+        text_input = gr.Textbox(label="Text Input for TTS")
+        output_audio = gr.Audio(label="Cloned Voice Output or TTS Output")
         upload_button = gr.Button("Load Model")
         # Button to clone voice
         clone_button = gr.Button("Clone Voice")
+        # Button to convert text to speech
+        tts_button = gr.Button("Convert Text to Speech")
         # Load the model when the user uploads the .npz file
         def load_and_initialize(npz):
             cloner.load_model(npz.name)  # Use the file path to load the model
+            cloner.load_tts_model()  # Load the TTS model
             return "Model Loaded!"
         upload_button.click(fn=load_and_initialize, inputs=npz_file, outputs="text")
         # Clone the voice when the button is pressed
+        clone_button.click(fn=cloner.clone_voice, inputs=audio_input, outputs=output_audio)
+        # Convert text to speech when the button is pressed
+        tts_button.click(fn=cloner.text_to_speech, inputs=text_input, outputs=output_audio)
     return demo