Spaces:
Running
Running
from numpy import int16,float32 | |
import gradio as gr | |
from transformers import AutoModel | |
model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True).to("cpu") | |
def synthesize_speech(text, ref_audio, ref_text): | |
audio = model(text, ref_audio_path=ref_audio, ref_text=ref_text) | |
if audio.dtype == int16: | |
audio = audio.astype(float32) / 32768.0 | |
return 24000, audio | |
gr.Interface(synthesize_speech,[gr.Textbox(label="Text to Synthesize"), gr.Audio(type="filepath", label="Reference Prompt Audio"), gr.Textbox(label="Text in Reference Prompt Audio")], gr.Audio(label="Generated Speech", type="numpy")).launch() | |