Antoniskaraolis commited on
Commit
f3c7107
·
1 Parent(s): d31bc10

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
3
+ import torchaudio
4
+
5
+ def speech_recognition(audio_file_path):
6
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
7
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
8
+
9
+ waveform, sample_rate = torchaudio.load(audio_file_path)
10
+
11
+ if sample_rate != 16000:
12
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
13
+ waveform = resampler(waveform)
14
+
15
+ input_values = tokenizer(waveform.squeeze().numpy(), return_tensors="pt", padding="longest").input_values
16
+ with torch.no_grad():
17
+ logits = model(input_values).logits
18
+
19
+ predicted_ids = torch.argmax(logits, dim=-1)
20
+ transcription = tokenizer.batch_decode(predicted_ids)
21
+
22
+ return transcription[0]