Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import librosa | |
# Initialize the model | |
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
def transcribe(audio_data): | |
# librosa expects a file path, but gradio passes a tuple (file name, file object) | |
# If the audio comes from a microphone, it's in the second position of the tuple | |
if isinstance(audio_data, tuple): | |
audio_data = audio_data[1] | |
# Load the audio file with librosa | |
data, samplerate = librosa.load(audio_data, sr=None) | |
# Pass the audio data to the model for transcription | |
transcription = asr_model(data, sampling_rate=samplerate) | |
return transcription["text"] | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(type="file", label="Record or Upload Audio"), | |
outputs="text" | |
) | |
iface.launch() |