from transformers import pipeline import gradio as gr from pyctcdecode import BeamSearchDecoderCTC lmID = "aware-ai/german-lowercase-5gram-kenlm" decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID) p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder) ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar") hotwords = [ "hilfe" ] def transcribe(audio): transcribed = p(audio, chunk_length_s=16, stride_length_s=(4, 0), hotwords = hotwords)["text"] return transcribed def punctuate(transcribed): punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"] return punctuated def get_asr_interface(): return gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath") ], outputs=[ "textbox", ]) def get_punctuation_interface(): return gr.Interface( fn=punctuate, inputs=[ "textbox", ], outputs=[ "textbox", ]) interfaces = [ get_asr_interface(), get_punctuation_interface(), ] names = [ "ASR", "GRAMMAR", ] gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0", enable_queue=False)