Spaces:

Antoniskaraolis
/

AI_Application

Sleeping

File size: 872 Bytes

d7aa11b
05e9e3a
3ecb0fd
f3c7107
3ecb0fd
05e9e3a
c575d84
c227f48
 
 
 
 
 
3ecb0fd
c227f48
3ecb0fd
482a875
05e9e3a
f3c7107
3ecb0fd
d7aa11b
05e9e3a
1bfa778
d7aa11b
 
f3c7107
c227f48

import gradio as gr
from transformers import pipeline
import librosa

# Initialize the model
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def transcribe(audio_data):
    # librosa expects a file path, but gradio passes a tuple (file name, file object)
    # If the audio comes from a microphone, it's in the second position of the tuple
    if isinstance(audio_data, tuple):
        audio_data = audio_data[1]

    # Load the audio file with librosa
    data, samplerate = librosa.load(audio_data, sr=None)
    # Pass the audio data to the model for transcription
    transcription = asr_model(data, sampling_rate=samplerate)
    return transcription["text"]

# Create the Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="file", label="Record or Upload Audio"),
    outputs="text"
)

iface.launch()