Spaces:

Neomindapp
/

trained_tts

Sleeping

File size: 1,882 Bytes

f3c0815
 
5622ebb
f3c0815
 
 
5622ebb
 
 
 
 
 
 
 
 
f3c0815
5622ebb
 
 
 
 
 
 
 
 
f3c0815
5622ebb
f3c0815
 
5622ebb
f3c0815
 
 
 
5622ebb
 
 
f3c0815
 
5622ebb
 
f3c0815
5622ebb
 
 
f3c0815
 
 
 
 
 
 
 
 
 
 
 
5622ebb
 
f3c0815
 
 
 
5622ebb

import torch
import gradio as gr
import json
import numpy as np
import soundfile as sf

# Import your Glow-TTS model and related utilities
from glow_tts.models import GlowTTS
from glow_tts.utils import text_to_sequence, sequence_to_mel  # Replace with actual functions if different

# Define paths to your model and configuration (relative paths)
MODEL_PATH = 'best_model.pth'
CONFIG_PATH = 'config.json'

# Load configuration and model
def load_model(model_path, config_path):
    # Load the model configuration
    with open(config_path, 'r') as f:
        config = json.load(f)
    
    # Initialize the Glow-TTS model
    model = GlowTTS(config)
    
    # Load the trained model weights
    model.load_state_dict(torch.load(model_path))
    model.eval()
    
    return model

# Load the model
model = load_model(MODEL_PATH, CONFIG_PATH)

# Define the function to generate speech
def generate_speech(text):
    # Convert text to sequence
    sequence = text_to_sequence(text)
    inputs = torch.tensor(sequence).unsqueeze(0)  # Add batch dimension
    
    with torch.no_grad():
        # Generate mel spectrogram from text sequence
        mel_output = model(inputs)
    
    # Convert mel spectrogram to waveform
    # This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
    audio_waveform = mel_to_audio(mel_output)  # Replace with actual conversion if needed

    # Save the waveform to a temporary file
    temp_file = 'temp.wav'
    sf.write(temp_file, audio_waveform, 22050)  # Adjust sample rate if necessary
    
    return temp_file

# Define Gradio interface
interface = gr.Interface(
    fn=generate_speech,
    inputs="text",
    outputs="audio",
    title="Glow-TTS Model",
    description="Generate speech from text using the Glow-TTS model."
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()