Spaces:

peterkros
/

transcribeapi

Sleeping

File size: 1,062 Bytes

import gradio as gr
import whisper
import numpy as np
import wave
import io

# Load Whisper model
model = whisper.load_model("base")  # You can change to any other model like "small", "medium", etc.

def transcribe(audio):
    # Convert the uploaded audio file to a numpy array
    with wave.open(io.BytesIO(audio), "rb") as wav_reader:
        samples = wav_reader.getnframes()
        audio_data = wav_reader.readframes(samples)
        audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16)
        audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
    
    # Transcribe the audio using the Whisper model
    result = model.transcribe(audio_as_np_float32)
    return result["text"]

# Create a Gradio Interface
interface = gr.Interface(
    fn=transcribe, 
    inputs=gr.Audio(source="upload", type="bytes"), 
    outputs="text",
    title="Whisper Speech-to-Text API",
    description="Upload an audio file and get a transcription using OpenAI's Whisper model."
)

# Launch the interface as an API
interface.launch()