import gradio as gr from TTS.utils.generic_utils import download_model from TTS.utils.io import load_config from TTS import TTS import numpy as np import soundfile as sf # Define paths to the model and configuration model_path = "best_model.pth" # Directory where your model is saved config_path = "config.json" # Configuration file # Load the model config = load_config(config_path) tts = TTS(config, model_path=model_path) def generate_speech(text): # Generate speech using the model wav = tts.synthesize(text) # Save the generated audio to a temporary file audio_path = "output.wav" sf.write(audio_path, wav, tts.sampling_rate) # Read the audio file to return as binary data with open(audio_path, "rb") as f: audio_data = f.read() return audio_data # Define the Gradio interface iface = gr.Interface( fn=generate_speech, inputs=gr.Textbox(lines=2, placeholder="Enter text here..."), outputs=gr.Audio(type="file"), title="Text-to-Speech with Coqui TTS", description="Generate speech from text using a custom Coqui TTS model." ) if __name__ == "__main__": iface.launch()