File size: 1,248 Bytes
25fcb65
412c852
ab662d2
922cd73
1576661
 
 
793e132
 
7770adb
191d30d
1576661
7226ea6
1e7bdcd
7226ea6
1e7bdcd
7226ea6
412c852
2e8cc61
 
 
 
1e7bdcd
2e8cc61
 
793e132
7226ea6
 
 
 
 
 
 
 
 
 
25fcb65
160cee9
2e8cc61
 
7226ea6
2e8cc61
 
1e7bdcd
 
7226ea6
1e7bdcd
 
ea344d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from transformers import pipeline
import gradio as gr
from pyctcdecode import BeamSearchDecoderCTC

#lmID = "aware-ai/german-lowercase-wiki-5gram"
#decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
p = pipeline("automatic-speech-recognition", model="aware-ai/wav2vec2-xls-r-1b-5gram-german")
ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")


def transcribe(audio):
    transcribed = p(audio, chunk_length_s=16, stride_length_s=(4, 0))["text"]
    return transcribed
    
def punctuate(transcribed):
    punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
    return punctuated

def get_asr_interface():
    return gr.Interface(
        fn=transcribe, 
        inputs=[
            gr.inputs.Audio(source="microphone", type="filepath")
        ],
        outputs=[
            "textbox",
        ])
        
def get_punctuation_interface():
    return gr.Interface(
        fn=punctuate, 
        inputs=[
            "textbox",
        ],
        outputs=[
            "textbox",
        ])

interfaces = [
    get_asr_interface(),
    get_punctuation_interface(),
]

names = [
    "ASR",
    "GRAMMAR",
]

gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0", enable_queue=False)