demavior commited on
Commit
42aa5ee
·
verified ·
1 Parent(s): bbc5e4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -21
app.py CHANGED
@@ -1,41 +1,36 @@
1
- import os
2
- os.system('pip install soundfile')
3
-
4
  import gradio as gr
5
  from transformers import pipeline
6
  import torch
7
- import soundfile as sf
8
- import io
9
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  def transcribe(audio):
13
-
14
  # Extract the audio data from the tuple
15
  audio_data = audio[0] if isinstance(audio, tuple) else audio
16
- # Convert the audio data to flac format
17
- audio_flac = io.BytesIO()
18
- sf.write(audio_flac, audio_data, 16000, format='flac') # Assuming a sample rate of 16000 Hz
19
- audio_flac.seek(0) # Reset the pointer to the beginning of the BytesIO object
 
 
20
 
21
  pipe = pipeline(
22
- "automatic-speech-recognition",
23
- model="openai/whisper-small",
24
- chunk_length_s=30,
25
- device=device,
26
  )
27
 
28
- prediction = pipe(audio_flac, batch_size=8)["text"]
29
-
30
  return prediction
31
 
32
  gradio_app = gr.Interface(
33
- transcribe,
34
- inputs=gr.Audio(label="Input"),#sources=['audio'], type="pil"),
35
- outputs=gr.Textbox(label="Result"),# gr.Label(label="Result", num_top_classes=2)],
36
  title="Transcribed",
37
  )
38
 
39
-
40
  if __name__ == "__main__":
41
- gradio_app.launch()
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
+ import torchaudio
 
5
 
6
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
 
8
  def transcribe(audio):
 
9
  # Extract the audio data from the tuple
10
  audio_data = audio[0] if isinstance(audio, tuple) else audio
11
+
12
+ # Load the audio data using torchaudio
13
+ waveform, sample_rate = torchaudio.load(audio_data)
14
+
15
+ # Convert the waveform to numpy array
16
+ waveform_np = waveform.numpy()
17
 
18
  pipe = pipeline(
19
+ "automatic-speech-recognition",
20
+ model="openai/whisper-small",
21
+ chunk_length_s=30,
22
+ device=device,
23
  )
24
 
25
+ prediction = pipe(waveform_np)["text"]
 
26
  return prediction
27
 
28
  gradio_app = gr.Interface(
29
+ fn=transcribe,
30
+ inputs=gr.Audio(label="Input"),
31
+ outputs=gr.Textbox(label="Result"),
32
  title="Transcribed",
33
  )
34
 
 
35
  if __name__ == "__main__":
36
+ gradio_app.launch()