peterkros commited on
Commit
f345224
·
verified ·
1 Parent(s): f22ede2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -1,27 +1,32 @@
1
- from fastapi import FastAPI, File, UploadFile
2
  import whisper
3
  import numpy as np
4
- import io
5
  import wave
6
-
7
- app = FastAPI()
8
 
9
  # Load Whisper model
10
- model = whisper.load_model("base") # Change to the model you want to use
11
 
12
- @app.post("/transcribe/")
13
- async def transcribe(file: UploadFile = File(...)):
14
- audio_data = await file.read()
15
-
16
- # Convert the uploaded file to numpy array
17
- with wave.open(io.BytesIO(audio_data), "rb") as wav_reader:
18
  samples = wav_reader.getnframes()
19
- audio = wav_reader.readframes(samples)
20
- audio_as_np_int16 = np.frombuffer(audio, dtype=np.int16)
21
  audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
22
 
23
  # Transcribe the audio using the Whisper model
24
  result = model.transcribe(audio_as_np_float32)
25
- text = result['text'].strip()
 
 
 
 
 
 
 
 
 
26
 
27
- return {"transcription": text}
 
 
1
+ import gradio as gr
2
  import whisper
3
  import numpy as np
 
4
  import wave
5
+ import io
 
6
 
7
  # Load Whisper model
8
+ model = whisper.load_model("base") # You can change to any other model like "small", "medium", etc.
9
 
10
+ def transcribe(audio):
11
+ # Convert the uploaded audio file to a numpy array
12
+ with wave.open(io.BytesIO(audio), "rb") as wav_reader:
 
 
 
13
  samples = wav_reader.getnframes()
14
+ audio_data = wav_reader.readframes(samples)
15
+ audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16)
16
  audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
17
 
18
  # Transcribe the audio using the Whisper model
19
  result = model.transcribe(audio_as_np_float32)
20
+ return result["text"]
21
+
22
+ # Create a Gradio Interface
23
+ interface = gr.Interface(
24
+ fn=transcribe,
25
+ inputs=gr.Audio(source="upload", type="bytes"),
26
+ outputs="text",
27
+ title="Whisper Speech-to-Text API",
28
+ description="Upload an audio file and get a transcription using OpenAI's Whisper model."
29
+ )
30
 
31
+ # Launch the interface as an API
32
+ interface.launch()