flozi00 commited on
Commit
1e7bdcd
·
1 Parent(s): 1449a19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -2,43 +2,35 @@ from transformers import pipeline
2
  import gradio as gr
3
  from pyctcdecode import BeamSearchDecoderCTC
4
 
5
-
6
- lmID = "aware-ai/german-lowercase-4gram-kenlm"
7
  decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
8
- p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-base-german-lowercase", decoder=decoder)
9
  ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
10
 
11
  def transcribe(audio):
12
- transcribed = p(audio[1], chunk_length_s=20, stride_length_s=(0, 0))["text"]
13
- return transcribed
 
14
 
15
- def punctuate(text):
16
- punctuated = ttp(text, max_length = 512)[0]["generated_text"]
17
- return punctuated
18
 
19
  def get_asr_interface():
20
  return gr.Interface(
21
  fn=transcribe,
22
  inputs=[
23
- gr.inputs.Audio(source="microphone")
24
  ],
25
  outputs=[
26
  "textbox",
27
- ])
28
-
29
- def get_punctuate_interface():
30
- return gr.Interface(
31
- fn=punctuate,
32
- inputs=[
33
  "textbox"
34
- ],
35
- outputs=[
36
- "textbox",
37
  ])
38
 
39
  interfaces = [
40
  get_asr_interface(),
41
- get_punctuate_interface(),
42
  ]
43
 
44
- gr.Series(get_asr_interface(),get_punctuate_interface()).launch(server_name = "0.0.0.0")
 
 
 
 
 
2
  import gradio as gr
3
  from pyctcdecode import BeamSearchDecoderCTC
4
 
5
+ lmID = "aware-ai/german-lowercase-5gram-kenlm"
 
6
  decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
7
+ p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
8
  ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
9
 
10
  def transcribe(audio):
11
+ transcribed = p(audio, chunk_length_s=16, stride_length_s=(4, 0))["text"]
12
+
13
+ punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
14
 
15
+ return transcribed, punctuated
 
 
16
 
17
  def get_asr_interface():
18
  return gr.Interface(
19
  fn=transcribe,
20
  inputs=[
21
+ gr.inputs.Audio(source="microphone", type="filepath")
22
  ],
23
  outputs=[
24
  "textbox",
 
 
 
 
 
 
25
  "textbox"
 
 
 
26
  ])
27
 
28
  interfaces = [
29
  get_asr_interface(),
 
30
  ]
31
 
32
+ names = [
33
+ "ASR",
34
+ ]
35
+
36
+ gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0")