Spaces:
Sleeping
Sleeping
import torch | |
import gradio as gr | |
from transformers import TTSConfig, TTSForConditionalGeneration | |
import numpy as np | |
import soundfile as sf | |
# Load your model and configuration | |
def load_model(model_path, config_path): | |
config = TTSConfig.from_json_file(config_path) | |
model = TTSForConditionalGeneration.from_pretrained(model_path, config=config) | |
model.eval() | |
return model | |
# Define the path to your model and config | |
MODEL_PATH = 'path/to/best_model.pth' | |
CONFIG_PATH = 'path/to/config.json' | |
model = load_model(MODEL_PATH, CONFIG_PATH) | |
# Define the function to generate speech | |
def generate_speech(text): | |
# Convert text to input format | |
inputs = tokenizer(text, return_tensors="pt") | |
with torch.no_grad(): | |
# Generate speech | |
outputs = model.generate(inputs['input_ids']) | |
# Convert outputs to numpy array (audio waveform) | |
# This conversion depends on your model | |
audio_waveform = outputs.squeeze().numpy() | |
# Save the waveform to a temporary file | |
temp_file = 'temp.wav' | |
sf.write(temp_file, audio_waveform, 22050) # Adjust sample rate if necessary | |
return temp_file | |
# Define Gradio interface | |
interface = gr.Interface( | |
fn=generate_speech, | |
inputs="text", | |
outputs="audio", | |
title="Text-to-Speech Model", | |
description="Generate speech from text using your TTS model." | |
) | |
# Launch the Gradio interface | |
if __name__ == "__main__": | |
interface.launch() |