from transformers import pipeline import gradio as gr import numpy as np import librosa transcriber_gujarati = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec_v1_gujarati") def transcribe(audio): sr,y = audio y = y.astype(np.float32) y/= np.max(np.abs(y)) return transcriber_gujarati(librosa.resample(y=y, orig_sr=sr, target_sr=16000))["text"] gr.Interface(transcribe,inputs="microphone",outputs="text").launch()