flozi00 commited on
Commit
793e132
·
1 Parent(s): 6e3264b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -7,6 +7,8 @@ from pyctcdecode import BeamSearchDecoderCTC
7
  lmID = "aware-ai/german-lowercase-5gram-kenlm"
8
  decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
9
  p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
 
 
10
 
11
  model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
12
  tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
@@ -25,7 +27,9 @@ def translate(src, tgt, text):
25
  def transcribe(audio):
26
  transcribed = p(audio, chunk_length_s=10, stride_length_s=(4, 2))["text"]
27
 
28
- return transcribed
 
 
29
 
30
  def get_asr_interface():
31
  return gr.Interface(
@@ -34,6 +38,7 @@ def get_asr_interface():
34
  gr.inputs.Audio(source="microphone", type="filepath")
35
  ],
36
  outputs=[
 
37
  "textbox"
38
  ])
39
 
 
7
  lmID = "aware-ai/german-lowercase-5gram-kenlm"
8
  decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
9
  p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
10
+ ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
11
+
12
 
13
  model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
14
  tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
 
27
  def transcribe(audio):
28
  transcribed = p(audio, chunk_length_s=10, stride_length_s=(4, 2))["text"]
29
 
30
+ punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
31
+
32
+ return transcribed, punctuated
33
 
34
  def get_asr_interface():
35
  return gr.Interface(
 
38
  gr.inputs.Audio(source="microphone", type="filepath")
39
  ],
40
  outputs=[
41
+ "textbox",
42
  "textbox"
43
  ])
44