peterkros commited on
Commit
dd6327f
·
verified ·
1 Parent(s): f345224

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -18
app.py CHANGED
@@ -1,32 +1,23 @@
1
  import gradio as gr
2
- import whisper
3
- import numpy as np
4
- import wave
5
- import io
6
 
7
- # Load Whisper model
8
- model = whisper.load_model("base") # You can change to any other model like "small", "medium", etc.
 
9
 
10
  def transcribe(audio):
11
- # Convert the uploaded audio file to a numpy array
12
- with wave.open(io.BytesIO(audio), "rb") as wav_reader:
13
- samples = wav_reader.getnframes()
14
- audio_data = wav_reader.readframes(samples)
15
- audio_as_np_int16 = np.frombuffer(audio_data, dtype=np.int16)
16
- audio_as_np_float32 = audio_as_np_int16.astype(np.float32) / np.iinfo(np.int16).max
17
-
18
  # Transcribe the audio using the Whisper model
19
- result = model.transcribe(audio_as_np_float32)
20
- return result["text"]
21
 
22
  # Create a Gradio Interface
23
  interface = gr.Interface(
24
  fn=transcribe,
25
- inputs=gr.Audio(source="upload", type="bytes"),
26
  outputs="text",
27
  title="Whisper Speech-to-Text API",
28
- description="Upload an audio file and get a transcription using OpenAI's Whisper model."
29
  )
30
 
31
  # Launch the interface as an API
32
- interface.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
 
 
3
 
4
+ # Load Whisper model from Hugging Face
5
+ # This uses the `transformers` library's pipeline to load the model
6
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
7
 
8
  def transcribe(audio):
 
 
 
 
 
 
 
9
  # Transcribe the audio using the Whisper model
10
+ result = transcriber(audio)["text"]
11
+ return result
12
 
13
  # Create a Gradio Interface
14
  interface = gr.Interface(
15
  fn=transcribe,
16
+ inputs=gr.Audio(source="upload", type="filepath"),
17
  outputs="text",
18
  title="Whisper Speech-to-Text API",
19
+ description="Upload an audio file and get a transcription using OpenAI's Whisper model from Hugging Face."
20
  )
21
 
22
  # Launch the interface as an API
23
+ interface.launch()