trained_tts / app.py
Neomindapp's picture
Create app.py
f3c0815 verified
raw
history blame
1.44 kB
import torch
import gradio as gr
from transformers import TTSConfig, TTSForConditionalGeneration
import numpy as np
import soundfile as sf
# Load your model and configuration
def load_model(model_path, config_path):
config = TTSConfig.from_json_file(config_path)
model = TTSForConditionalGeneration.from_pretrained(model_path, config=config)
model.eval()
return model
# Define the path to your model and config
MODEL_PATH = 'path/to/best_model.pth'
CONFIG_PATH = 'path/to/config.json'
model = load_model(MODEL_PATH, CONFIG_PATH)
# Define the function to generate speech
def generate_speech(text):
# Convert text to input format
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
# Generate speech
outputs = model.generate(inputs['input_ids'])
# Convert outputs to numpy array (audio waveform)
# This conversion depends on your model
audio_waveform = outputs.squeeze().numpy()
# Save the waveform to a temporary file
temp_file = 'temp.wav'
sf.write(temp_file, audio_waveform, 22050) # Adjust sample rate if necessary
return temp_file
# Define Gradio interface
interface = gr.Interface(
fn=generate_speech,
inputs="text",
outputs="audio",
title="Text-to-Speech Model",
description="Generate speech from text using your TTS model."
)
# Launch the Gradio interface
if __name__ == "__main__":
interface.launch()