Simonlob commited on
Commit
ca80823
1 Parent(s): 75dfdb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -12,16 +12,11 @@ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
12
 
13
  def transcribe(file_):
14
  arr_audio, _ = librosa.load(file_, sr=16000)
15
- inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
16
-
17
- with torch.no_grad():
18
- logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
19
-
20
- pred_ids = torch.argmax(logits, dim=-1)
21
- text = processor.batch_decode(pred_ids)[0]
22
- return text.lower()
23
-
24
-
25
 
26
 
27
  iface = gr.Interface(
 
12
 
13
  def transcribe(file_):
14
  arr_audio, _ = librosa.load(file_, sr=16000)
15
+ input_values = processor(arr_audio, return_tensors="pt", padding="longest").input_values
16
+ logits = model(input_values).logits
17
+ predicted_ids = torch.argmax(logits, dim=-1)
18
+ transcription = processor.batch_decode(predicted_ids)
19
+ return transcription[0].lower()
 
 
 
 
 
20
 
21
 
22
  iface = gr.Interface(