File size: 1,233 Bytes
25fcb65
412c852
aa756f5
160cee9
ab662d2
922cd73
60ce8fc
c906256
922cd73
412c852
5f6cbd7
ab662d2
5f6cbd7
793e132
 
191d30d
25fcb65
 
793e132
25fcb65
 
 
412c852
2e8cc61
 
 
 
cb4b34e
2e8cc61
 
793e132
2e8cc61
 
25fcb65
 
 
 
 
 
 
 
 
160cee9
2e8cc61
 
25fcb65
2e8cc61
 
25fcb65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from transformers import pipeline
import gradio as gr
import re
import torch
from pyctcdecode import BeamSearchDecoderCTC
import torch
import librosa
import time


lmID = "aware-ai/german-lowercase-4gram-kenlm"
decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-base-german-lowercase", decoder=decoder)
ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")

def transcribe(audio):
    transcribed = p(audio[1], chunk_length_s=20, stride_length_s=(0, 0))["text"]  
    return transcribed
    
def punctuate(text):
    punctuated = ttp(text, max_length = 512)[0]["generated_text"]
    return punctuated 

def get_asr_interface():
    return gr.Interface(
        fn=transcribe, 
        inputs=[
            gr.inputs.Audio(source="microphone")
        ],
        outputs=[
            "textbox",
        ])
        
def get_punctuate_interface():
    return gr.Interface(
        fn=punctuate, 
        inputs=[
            "textbox"
        ],
        outputs=[
            "textbox",
        ])

interfaces = [
    get_asr_interface(),
    get_punctuate_interface(),
]

gradio.Series(interfaces).launch(server_name = "0.0.0.0")