Spaces:
Sleeping
Sleeping
Commit
·
f370e2a
1
Parent(s):
d37215e
changed froom large model to medium
Browse files
app.py
CHANGED
@@ -9,6 +9,24 @@ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
|
|
9 |
|
10 |
model.config.forced_decoder_ids = None
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def predict(audio, mic_audio=None):
|
14 |
# audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
|
|
|
9 |
|
10 |
model.config.forced_decoder_ids = None
|
11 |
|
12 |
+
def process_audio(sampling_rate, waveform):
|
13 |
+
# convert from int16 to floating point
|
14 |
+
waveform = waveform / 32678.0
|
15 |
+
|
16 |
+
# convert to mono if stereo
|
17 |
+
if len(waveform.shape) > 1:
|
18 |
+
waveform = librosa.to_mono(waveform.T)
|
19 |
+
|
20 |
+
# resample to 16 kHz if necessary
|
21 |
+
if sampling_rate != 16000:
|
22 |
+
waveform = librosa.resample(waveform, orig_sr=sampling_rate, target_sr=16000)
|
23 |
+
|
24 |
+
# limit to 30 seconds
|
25 |
+
waveform = waveform[:16000*30]
|
26 |
+
|
27 |
+
# make PyTorch tensor
|
28 |
+
waveform = torch.tensor(waveform)
|
29 |
+
return waveform
|
30 |
|
31 |
def predict(audio, mic_audio=None):
|
32 |
# audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
|