LuisG07 commited on
Commit
34e7595
·
1 Parent(s): 577c3f8

fix numpy conversion

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -34,7 +34,7 @@ def predict_and_ctc_lm_decode(input_file, model_name):
34
  input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
35
  logits = model(input_values).logits.cpu().detach().numpy()[0]
36
 
37
- pred = processor.batch_decode(logits.numpy()).text
38
 
39
  transcribed_text = fix_transcription_casing(pred[0].lower())
40
 
@@ -59,11 +59,11 @@ def return_all_predictions(input_file, model_name):
59
 
60
 
61
  gr.Interface(return_all_predictions,
62
- inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record/ Drop audio"), gr.inputs.Dropdown(["jonatasgrosman/wav2vec2-large-xlsr-53-spanish", "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"], label="Model Name")],
63
  outputs = [gr.outputs.Textbox(label="Beam CTC decoding w/ LM"), gr.outputs.Textbox(label="Greedy decoding")],
64
  title="ASR using Wav2Vec2 & pyctcdecode in spanish",
65
  description = "Comparing greedy decoder with beam search CTC decoder, record/ drop your audio!",
66
  layout = "horizontal",
67
- examples = [["test1.wav", "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"], ["test2.wav", "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"]],
68
  theme="huggingface",
69
  enable_queue=True).launch()
 
34
  input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
35
  logits = model(input_values).logits.cpu().detach().numpy()[0]
36
 
37
+ pred = processor.batch_decode(logits).text
38
 
39
  transcribed_text = fix_transcription_casing(pred[0].lower())
40
 
 
59
 
60
 
61
  gr.Interface(return_all_predictions,
62
+ inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record/ Drop audio"), gr.inputs.Dropdown(["jonatasgrosman/wav2vec2-large-xlsr-53-spanish", "jonatasgrosman/wav2vec2-xls-r-1b-spanish"], label="Model Name")],
63
  outputs = [gr.outputs.Textbox(label="Beam CTC decoding w/ LM"), gr.outputs.Textbox(label="Greedy decoding")],
64
  title="ASR using Wav2Vec2 & pyctcdecode in spanish",
65
  description = "Comparing greedy decoder with beam search CTC decoder, record/ drop your audio!",
66
  layout = "horizontal",
67
+ examples = [["test1.wav", "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"], ["test2.wav", "jonatasgrosman/wav2vec2-xls-r-1b-spanish"]],
68
  theme="huggingface",
69
  enable_queue=True).launch()