Spaces:

peterkros
/

transcribeapi

Sleeping

peterkros commited on Aug 20, 2024

Commit

f345224

verified ·

1 Parent(s): f22ede2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,27 +1,32 @@
-from fastapi import FastAPI, File, UploadFile
 import whisper
 import numpy as np
-import io
 import wave
-app = FastAPI()
 # Load Whisper model
-model = whisper.load_model("base")  # Change to the model you want to use
-@app.post("/transcribe/")
-async def transcribe(file: UploadFile = File(...)):
-    audio_data = await file.read()
-    # Convert the uploaded file to numpy array
-    with wave.open(io.BytesIO(audio_data), "rb") as wav_reader:
         samples = wav_reader.getnframes()
-        audio = wav_reader.readframes(samples)
-        audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
         audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
     # Transcribe the audio using the Whisper model
     result = model.transcribe(audio_as_np_float32)
-    text = result['text'].strip()
-    return {"transcription": text}

+import gradio as gr
 import whisper
 import numpy as np
 import wave
+import io
 # Load Whisper model
+model = whisper.load_model("base")  # You can change to any other model like "small", "medium", etc.
+def transcribe(audio):
+    # Convert the uploaded audio file to a numpy array
+    with wave.open(io.BytesIO(audio), "rb") as wav_reader:
         samples = wav_reader.getnframes()
+        audio_data = wav_reader.readframes(samples)
+        audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16)
         audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
     # Transcribe the audio using the Whisper model
     result = model.transcribe(audio_as_np_float32)
+    return result["text"]
+# Create a Gradio Interface
+interface = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(source="upload", type="bytes"),
+    outputs="text",
+    title="Whisper Speech-to-Text API",
+    description="Upload an audio file and get a transcription using OpenAI's Whisper model."
+)
+# Launch the interface as an API
+interface.launch()