Spaces:

aware-ai
/

german-asr

Runtime error

flozi00 commited on Jun 4, 2022

Commit

1e7bdcd

1 Parent(s): 1449a19

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,43 +2,35 @@ from transformers import pipeline
 import gradio as gr
 from pyctcdecode import BeamSearchDecoderCTC
-lmID = "aware-ai/german-lowercase-4gram-kenlm"
 decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
-p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-base-german-lowercase", decoder=decoder)
 ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
 def transcribe(audio):
-    transcribed = p(audio[1], chunk_length_s=20, stride_length_s=(0, 0))["text"]
-    return transcribed
-def punctuate(text):
-    punctuated = ttp(text, max_length = 512)[0]["generated_text"]
-    return punctuated
 def get_asr_interface():
     return gr.Interface(
         fn=transcribe,
         inputs=[
-            gr.inputs.Audio(source="microphone")
         ],
         outputs=[
             "textbox",
-        ])
-def get_punctuate_interface():
-    return gr.Interface(
-        fn=punctuate,
-        inputs=[
             "textbox"
-        ],
-        outputs=[
-            "textbox",
         ])
 interfaces = [
     get_asr_interface(),
-    get_punctuate_interface(),
 ]
-gr.Series(get_asr_interface(),get_punctuate_interface()).launch(server_name = "0.0.0.0")

 import gradio as gr
 from pyctcdecode import BeamSearchDecoderCTC
+lmID = "aware-ai/german-lowercase-5gram-kenlm"
 decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
+p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
 ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
 def transcribe(audio):
+    transcribed = p(audio, chunk_length_s=16, stride_length_s=(4, 0))["text"]
+    punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
+    return transcribed, punctuated
 def get_asr_interface():
     return gr.Interface(
         fn=transcribe,
         inputs=[
+            gr.inputs.Audio(source="microphone", type="filepath")
         ],
         outputs=[
             "textbox",
             "textbox"
         ])
 interfaces = [
     get_asr_interface(),
 ]
+names = [
+    "ASR",
+]
+gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0")