from transformers import pipeline import gradio as gr from pyctcdecode import BeamSearchDecoderCTC #lmID = "aware-ai/german-lowercase-wiki-5gram" #decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID) p = pipeline("automatic-speech-recognition", model="aware-ai/wav2vec2-xls-r-1b-5gram-german") ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar") def transcribe(audio): transcribed = p(audio, chunk_length_s=16, stride_length_s=(4, 0))["text"] return transcribed def punctuate(transcribed): punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"] return punctuated def get_asr_interface(): return gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath") ], outputs=[ "textbox", ]) def get_punctuation_interface(): return gr.Interface( fn=punctuate, inputs=[ "textbox", ], outputs=[ "textbox", ]) interfaces = [ get_asr_interface(), get_punctuation_interface(), ] names = [ "ASR", "GRAMMAR", ] gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0", enable_queue=False)