chinmaydan commited on
Commit
b6907f5
·
1 Parent(s): ed9aac5

Trying a commit

Browse files
Files changed (1) hide show
  1. app.py +1 -19
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import librosa
3
  import torch
4
 
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -9,24 +9,6 @@ model = SpeechT5ForSpeechToText.from_pretrained("openai/whisper-large")
9
 
10
  model.config.forced_decoder_ids = WhisperProcessor.get_decoder_prompt_ids(language="english", task="transcribe")
11
 
12
- def process_audio(sampling_rate, waveform):
13
- # convert from int16 to floating point
14
- waveform = waveform / 32678.0
15
-
16
- # convert to mono if stereo
17
- if len(waveform.shape) > 1:
18
- waveform = librosa.to_mono(waveform.T)
19
-
20
- # resample to 16 kHz if necessary
21
- if sampling_rate != 16000:
22
- waveform = librosa.resample(waveform, orig_sr=sampling_rate, target_sr=16000)
23
-
24
- # limit to 30 seconds
25
- waveform = waveform[:16000*30]
26
-
27
- # make PyTorch tensor
28
- waveform = torch.tensor(waveform)
29
- return waveform
30
 
31
  def predict(audio, mic_audio=None):
32
  # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
 
1
  import gradio as gr
2
+ #import librosa
3
  import torch
4
 
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
9
 
10
  model.config.forced_decoder_ids = WhisperProcessor.get_decoder_prompt_ids(language="english", task="transcribe")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def predict(audio, mic_audio=None):
14
  # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))