Spaces:

Neomindapp
/

trained_tts

Sleeping

App Files Files Community

Neomindapp commited on Sep 2, 2024

Commit

5622ebb

verified ·

1 Parent(s): f3c0815

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -18

app.py CHANGED Viewed

@@ -1,34 +1,48 @@
 import torch
 import gradio as gr
-from transformers import TTSConfig, TTSForConditionalGeneration
 import numpy as np
 import soundfile as sf
-# Load your model and configuration
 def load_model(model_path, config_path):
-    config = TTSConfig.from_json_file(config_path)
-    model = TTSForConditionalGeneration.from_pretrained(model_path, config=config)
     model.eval()
     return model
-# Define the path to your model and config
-MODEL_PATH = 'path/to/best_model.pth'
-CONFIG_PATH = 'path/to/config.json'
 model = load_model(MODEL_PATH, CONFIG_PATH)
 # Define the function to generate speech
 def generate_speech(text):
-    # Convert text to input format
-    inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
-        # Generate speech
-        outputs = model.generate(inputs['input_ids'])
-    # Convert outputs to numpy array (audio waveform)
-    # This conversion depends on your model
-    audio_waveform = outputs.squeeze().numpy()
     # Save the waveform to a temporary file
     temp_file = 'temp.wav'
@@ -41,10 +55,10 @@ interface = gr.Interface(
     fn=generate_speech,
     inputs="text",
     outputs="audio",
-    title="Text-to-Speech Model",
-    description="Generate speech from text using your TTS model."
 )
 # Launch the Gradio interface
 if __name__ == "__main__":
-    interface.launch()

 import torch
 import gradio as gr
+import json
 import numpy as np
 import soundfile as sf
+# Import your Glow-TTS model and related utilities
+from glow_tts.models import GlowTTS
+from glow_tts.utils import text_to_sequence, sequence_to_mel  # Replace with actual functions if different
+# Define paths to your model and configuration (relative paths)
+MODEL_PATH = 'best_model.pth'
+CONFIG_PATH = 'config.json'
+# Load configuration and model
 def load_model(model_path, config_path):
+    # Load the model configuration
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    # Initialize the Glow-TTS model
+    model = GlowTTS(config)
+    # Load the trained model weights
+    model.load_state_dict(torch.load(model_path))
     model.eval()
     return model
+# Load the model
 model = load_model(MODEL_PATH, CONFIG_PATH)
 # Define the function to generate speech
 def generate_speech(text):
+    # Convert text to sequence
+    sequence = text_to_sequence(text)
+    inputs = torch.tensor(sequence).unsqueeze(0)  # Add batch dimension
     with torch.no_grad():
+        # Generate mel spectrogram from text sequence
+        mel_output = model(inputs)
+    # Convert mel spectrogram to waveform
+    # This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
+    audio_waveform = mel_to_audio(mel_output)  # Replace with actual conversion if needed
     # Save the waveform to a temporary file
     temp_file = 'temp.wav'
     fn=generate_speech,
     inputs="text",
     outputs="audio",
+    title="Glow-TTS Model",
+    description="Generate speech from text using the Glow-TTS model."
 )
 # Launch the Gradio interface
 if __name__ == "__main__":
+    interface.launch()