chinmaydan commited on
Commit
f370e2a
·
1 Parent(s): d37215e

changed froom large model to medium

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -9,6 +9,24 @@ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
9
 
10
  model.config.forced_decoder_ids = None
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def predict(audio, mic_audio=None):
14
  # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
 
9
 
10
  model.config.forced_decoder_ids = None
11
 
12
+ def process_audio(sampling_rate, waveform):
13
+ # convert from int16 to floating point
14
+ waveform = waveform / 32678.0
15
+
16
+ # convert to mono if stereo
17
+ if len(waveform.shape) > 1:
18
+ waveform = librosa.to_mono(waveform.T)
19
+
20
+ # resample to 16 kHz if necessary
21
+ if sampling_rate != 16000:
22
+ waveform = librosa.resample(waveform, orig_sr=sampling_rate, target_sr=16000)
23
+
24
+ # limit to 30 seconds
25
+ waveform = waveform[:16000*30]
26
+
27
+ # make PyTorch tensor
28
+ waveform = torch.tensor(waveform)
29
+ return waveform
30
 
31
  def predict(audio, mic_audio=None):
32
  # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))