File size: 1,334 Bytes
25fcb65
412c852
ab662d2
922cd73
1e7bdcd
ab662d2
1e7bdcd
793e132
 
7770adb
 
 
 
191d30d
7770adb
7226ea6
 
1e7bdcd
7226ea6
1e7bdcd
7226ea6
412c852
2e8cc61
 
 
 
1e7bdcd
2e8cc61
 
793e132
7226ea6
 
 
 
 
 
 
 
 
 
25fcb65
160cee9
2e8cc61
 
7226ea6
2e8cc61
 
1e7bdcd
 
7226ea6
1e7bdcd
 
ea344d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from transformers import pipeline
import gradio as gr
from pyctcdecode import BeamSearchDecoderCTC

lmID = "aware-ai/german-lowercase-5gram-kenlm"
decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")

hotwords = [
    "hilfe"
]

def transcribe(audio):
    transcribed = p(audio, chunk_length_s=16, stride_length_s=(4, 0), hotwords = hotwords)["text"]
        
    return transcribed
    
def punctuate(transcribed):
    punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
    return punctuated

def get_asr_interface():
    return gr.Interface(
        fn=transcribe, 
        inputs=[
            gr.inputs.Audio(source="microphone", type="filepath")
        ],
        outputs=[
            "textbox",
        ])
        
def get_punctuation_interface():
    return gr.Interface(
        fn=punctuate, 
        inputs=[
            "textbox",
        ],
        outputs=[
            "textbox",
        ])

interfaces = [
    get_asr_interface(),
    get_punctuation_interface(),
]

names = [
    "ASR",
    "GRAMMAR",
]

gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0", enable_queue=False)