Spaces:

Neomindapp
/

trained_tts

Sleeping

App Files Files Community

trained_tts / app.py

Neomindapp

Update app.py

5622ebb verified 10 months ago

raw

history blame

1.88 kB

	import torch
	import gradio as gr
	import json
	import numpy as np
	import soundfile as sf

	# Import your Glow-TTS model and related utilities
	from glow_tts.models import GlowTTS
	from glow_tts.utils import text_to_sequence, sequence_to_mel # Replace with actual functions if different

	# Define paths to your model and configuration (relative paths)
	MODEL_PATH = 'best_model.pth'
	CONFIG_PATH = 'config.json'

	# Load configuration and model
	def load_model(model_path, config_path):
	# Load the model configuration
	with open(config_path, 'r') as f:
	config = json.load(f)

	# Initialize the Glow-TTS model
	model = GlowTTS(config)

	# Load the trained model weights
	model.load_state_dict(torch.load(model_path))
	model.eval()

	return model

	# Load the model
	model = load_model(MODEL_PATH, CONFIG_PATH)

	# Define the function to generate speech
	def generate_speech(text):
	# Convert text to sequence
	sequence = text_to_sequence(text)
	inputs = torch.tensor(sequence).unsqueeze(0) # Add batch dimension

	with torch.no_grad():
	# Generate mel spectrogram from text sequence
	mel_output = model(inputs)

	# Convert mel spectrogram to waveform
	# This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
	audio_waveform = mel_to_audio(mel_output) # Replace with actual conversion if needed

	# Save the waveform to a temporary file
	temp_file = 'temp.wav'
	sf.write(temp_file, audio_waveform, 22050) # Adjust sample rate if necessary

	return temp_file

	# Define Gradio interface
	interface = gr.Interface(
	fn=generate_speech,
	inputs="text",
	outputs="audio",
	title="Glow-TTS Model",
	description="Generate speech from text using the Glow-TTS model."
	)

	# Launch the Gradio interface
	if __name__ == "__main__":
	interface.launch()