trained_tts / app.py
Neomindapp's picture
Update app.py
5622ebb verified
raw
history blame
1.88 kB
import torch
import gradio as gr
import json
import numpy as np
import soundfile as sf
# Import your Glow-TTS model and related utilities
from glow_tts.models import GlowTTS
from glow_tts.utils import text_to_sequence, sequence_to_mel # Replace with actual functions if different
# Define paths to your model and configuration (relative paths)
MODEL_PATH = 'best_model.pth'
CONFIG_PATH = 'config.json'
# Load configuration and model
def load_model(model_path, config_path):
# Load the model configuration
with open(config_path, 'r') as f:
config = json.load(f)
# Initialize the Glow-TTS model
model = GlowTTS(config)
# Load the trained model weights
model.load_state_dict(torch.load(model_path))
model.eval()
return model
# Load the model
model = load_model(MODEL_PATH, CONFIG_PATH)
# Define the function to generate speech
def generate_speech(text):
# Convert text to sequence
sequence = text_to_sequence(text)
inputs = torch.tensor(sequence).unsqueeze(0) # Add batch dimension
with torch.no_grad():
# Generate mel spectrogram from text sequence
mel_output = model(inputs)
# Convert mel spectrogram to waveform
# This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
audio_waveform = mel_to_audio(mel_output) # Replace with actual conversion if needed
# Save the waveform to a temporary file
temp_file = 'temp.wav'
sf.write(temp_file, audio_waveform, 22050) # Adjust sample rate if necessary
return temp_file
# Define Gradio interface
interface = gr.Interface(
fn=generate_speech,
inputs="text",
outputs="audio",
title="Glow-TTS Model",
description="Generate speech from text using the Glow-TTS model."
)
# Launch the Gradio interface
if __name__ == "__main__":
interface.launch()