Spaces:
Sleeping
Sleeping
File size: 1,062 Bytes
f345224 d222613 f345224 d222613 f345224 d222613 f345224 d222613 f345224 d222613 f345224 d222613 f345224 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import gradio as gr
import whisper
import numpy as np
import wave
import io
# Load Whisper model
model = whisper.load_model("base") # You can change to any other model like "small", "medium", etc.
def transcribe(audio):
# Convert the uploaded audio file to a numpy array
with wave.open(io.BytesIO(audio), "rb") as wav_reader:
samples = wav_reader.getnframes()
audio_data = wav_reader.readframes(samples)
audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16)
audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
# Transcribe the audio using the Whisper model
result = model.transcribe(audio_as_np_float32)
return result["text"]
# Create a Gradio Interface
interface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="upload", type="bytes"),
outputs="text",
title="Whisper Speech-to-Text API",
description="Upload an audio file and get a transcription using OpenAI's Whisper model."
)
# Launch the interface as an API
interface.launch()
|