File size: 872 Bytes
d7aa11b
05e9e3a
3ecb0fd
f3c7107
3ecb0fd
05e9e3a
c575d84
c227f48
 
 
 
 
 
3ecb0fd
c227f48
3ecb0fd
482a875
05e9e3a
f3c7107
3ecb0fd
d7aa11b
05e9e3a
1bfa778
d7aa11b
 
f3c7107
c227f48
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
from transformers import pipeline
import librosa

# Initialize the model
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def transcribe(audio_data):
    # librosa expects a file path, but gradio passes a tuple (file name, file object)
    # If the audio comes from a microphone, it's in the second position of the tuple
    if isinstance(audio_data, tuple):
        audio_data = audio_data[1]

    # Load the audio file with librosa
    data, samplerate = librosa.load(audio_data, sr=None)
    # Pass the audio data to the model for transcription
    transcription = asr_model(data, sampling_rate=samplerate)
    return transcription["text"]

# Create the Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="file", label="Record or Upload Audio"),
    outputs="text"
)

iface.launch()