Norphel commited on
Commit
93df753
·
verified ·
1 Parent(s): 27ddc0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -2,15 +2,19 @@ import numpy as np
2
  import gradio as gr
3
  from transformers import Wav2Vec2ForCTC,Wav2Vec2Processor
4
 
5
- # asr_model_id = "Norphel/wav2vec2-large-mms-1b-dzo-colab"
6
- # asr_model = Wav2Vec2ForCTC.from_pretrained(asr_model_id, target_lang="dzo")
7
- # asr_processor = Wav2Vec2Processor.from_pretrained(asr_model_id)
8
- # asr_processor.tokenizer.set_target_lang("dzo")
9
 
10
  def generate_text(audio):
11
  sr, data = audio
12
  print(data)
13
- return (sr, np.flipud(data))
 
 
 
 
14
 
15
  input_audio = gr.Audio(
16
  sources=["microphone"],
@@ -22,9 +26,9 @@ input_audio = gr.Audio(
22
  ),
23
  )
24
  demo = gr.Interface(
25
- fn=reverse_audio,
26
  inputs=input_audio,
27
- outputs="audio"
28
  )
29
 
30
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  from transformers import Wav2Vec2ForCTC,Wav2Vec2Processor
4
 
5
+ asr_model_id = "Norphel/wav2vec2-large-mms-1b-dzo-colab"
6
+ asr_model = Wav2Vec2ForCTC.from_pretrained(asr_model_id, target_lang="dzo")
7
+ asr_processor = Wav2Vec2Processor.from_pretrained(asr_model_id)
8
+ asr_processor.tokenizer.set_target_lang("dzo")
9
 
10
  def generate_text(audio):
11
  sr, data = audio
12
  print(data)
13
+ input_dict = asr_processor(aud_arr, sampling_rate=16_000, return_tensors="pt", padding=True)
14
+ logits = asr_model(input_dict.input_values.to("cuda")).logits
15
+ pred_ids = torch.argmax(logits, dim=-1)[0]
16
+
17
+ return asr_processor.decode(pred_ids)
18
 
19
  input_audio = gr.Audio(
20
  sources=["microphone"],
 
26
  ),
27
  )
28
  demo = gr.Interface(
29
+ fn=generate_text,
30
  inputs=input_audio,
31
+ outputs="text"
32
  )
33
 
34
  if __name__ == "__main__":