Spaces:
Sleeping
Sleeping
File size: 872 Bytes
d7aa11b 05e9e3a 3ecb0fd f3c7107 3ecb0fd 05e9e3a c575d84 c227f48 3ecb0fd c227f48 3ecb0fd 482a875 05e9e3a f3c7107 3ecb0fd d7aa11b 05e9e3a 1bfa778 d7aa11b f3c7107 c227f48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import gradio as gr
from transformers import pipeline
import librosa
# Initialize the model
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")
def transcribe(audio_data):
# librosa expects a file path, but gradio passes a tuple (file name, file object)
# If the audio comes from a microphone, it's in the second position of the tuple
if isinstance(audio_data, tuple):
audio_data = audio_data[1]
# Load the audio file with librosa
data, samplerate = librosa.load(audio_data, sr=None)
# Pass the audio data to the model for transcription
transcription = asr_model(data, sampling_rate=samplerate)
return transcription["text"]
# Create the Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="file", label="Record or Upload Audio"),
outputs="text"
)
iface.launch() |