Spaces:
Sleeping
Sleeping
import torch | |
import gradio as gr | |
import json | |
import numpy as np | |
import soundfile as sf | |
# Import your Glow-TTS model and related utilities | |
from glow_tts.models import GlowTTS | |
from glow_tts.utils import text_to_sequence, sequence_to_mel # Replace with actual functions if different | |
# Define paths to your model and configuration (relative paths) | |
MODEL_PATH = 'best_model.pth' | |
CONFIG_PATH = 'config.json' | |
# Load configuration and model | |
def load_model(model_path, config_path): | |
# Load the model configuration | |
with open(config_path, 'r') as f: | |
config = json.load(f) | |
# Initialize the Glow-TTS model | |
model = GlowTTS(config) | |
# Load the trained model weights | |
model.load_state_dict(torch.load(model_path)) | |
model.eval() | |
return model | |
# Load the model | |
model = load_model(MODEL_PATH, CONFIG_PATH) | |
# Define the function to generate speech | |
def generate_speech(text): | |
# Convert text to sequence | |
sequence = text_to_sequence(text) | |
inputs = torch.tensor(sequence).unsqueeze(0) # Add batch dimension | |
with torch.no_grad(): | |
# Generate mel spectrogram from text sequence | |
mel_output = model(inputs) | |
# Convert mel spectrogram to waveform | |
# This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio | |
audio_waveform = mel_to_audio(mel_output) # Replace with actual conversion if needed | |
# Save the waveform to a temporary file | |
temp_file = 'temp.wav' | |
sf.write(temp_file, audio_waveform, 22050) # Adjust sample rate if necessary | |
return temp_file | |
# Define Gradio interface | |
interface = gr.Interface( | |
fn=generate_speech, | |
inputs="text", | |
outputs="audio", | |
title="Glow-TTS Model", | |
description="Generate speech from text using the Glow-TTS model." | |
) | |
# Launch the Gradio interface | |
if __name__ == "__main__": | |
interface.launch() | |