from numpy import int16,float32 import gradio as gr from transformers import AutoModel model = AutoModel.from_pretrained("shethjenil/IndicF5", trust_remote_code=True).to("cpu") def synthesize_speech(text, ref_audio, ref_text): audio = model(text, ref_audio_path=ref_audio, ref_text=ref_text) if audio.dtype == int16: audio = audio.astype(float32) / 32768.0 return 24000, audio gr.Interface(synthesize_speech,[gr.Textbox(label="Text to Synthesize"), gr.Audio(type="filepath", label="Reference Prompt Audio"), gr.Textbox(label="Text in Reference Prompt Audio")], gr.Audio(label="Generated Speech", type="numpy")).launch()