Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
import numpy as np | |
import wave | |
import io | |
# Load Whisper model | |
model = whisper.load_model("base") # You can change to any other model like "small", "medium", etc. | |
def transcribe(audio): | |
# Convert the uploaded audio file to a numpy array | |
with wave.open(io.BytesIO(audio), "rb") as wav_reader: | |
samples = wav_reader.getnframes() | |
audio_data = wav_reader.readframes(samples) | |
audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16) | |
audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max | |
# Transcribe the audio using the Whisper model | |
result = model.transcribe(audio_as_np_float32) | |
return result["text"] | |
# Create a Gradio Interface | |
interface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(source="upload", type="bytes"), | |
outputs="text", | |
title="Whisper Speech-to-Text API", | |
description="Upload an audio file and get a transcription using OpenAI's Whisper model." | |
) | |
# Launch the interface as an API | |
interface.launch() | |