transcribeapi / app.py
peterkros's picture
Update app.py
f345224 verified
raw
history blame
1.06 kB
import gradio as gr
import whisper
import numpy as np
import wave
import io
# Load Whisper model
model = whisper.load_model("base") # You can change to any other model like "small", "medium", etc.
def transcribe(audio):
# Convert the uploaded audio file to a numpy array
with wave.open(io.BytesIO(audio), "rb") as wav_reader:
samples = wav_reader.getnframes()
audio_data = wav_reader.readframes(samples)
audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16)
audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
# Transcribe the audio using the Whisper model
result = model.transcribe(audio_as_np_float32)
return result["text"]
# Create a Gradio Interface
interface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="upload", type="bytes"),
outputs="text",
title="Whisper Speech-to-Text API",
description="Upload an audio file and get a transcription using OpenAI's Whisper model."
)
# Launch the interface as an API
interface.launch()