Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,21 @@ import gradio as gr
|
|
3 |
import re
|
4 |
import torch
|
5 |
from pyctcdecode import BeamSearchDecoderCTC
|
|
|
|
|
6 |
|
7 |
lmID = "aware-ai/german-lowercase-wiki-4gram"
|
8 |
decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
|
9 |
p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
|
10 |
ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
#model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
|
14 |
#tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
|
@@ -25,7 +34,11 @@ def translate(src, tgt, text):
|
|
25 |
return result
|
26 |
|
27 |
def transcribe(audio):
|
28 |
-
|
|
|
|
|
|
|
|
|
29 |
|
30 |
punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
|
31 |
|
|
|
3 |
import re
|
4 |
import torch
|
5 |
from pyctcdecode import BeamSearchDecoderCTC
|
6 |
+
import torch
|
7 |
+
|
8 |
|
9 |
lmID = "aware-ai/german-lowercase-wiki-4gram"
|
10 |
decoder = BeamSearchDecoderCTC.load_from_hf_hub(lmID)
|
11 |
p = pipeline("automatic-speech-recognition", model="aware-ai/robust-wav2vec2-xls-r-300m-german-lowercase", decoder=decoder)
|
12 |
ttp = pipeline("text2text-generation", model="aware-ai/marian-german-grammar")
|
13 |
|
14 |
+
vadmodel, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
15 |
+
model='silero_vad',
|
16 |
+
force_reload=False)
|
17 |
+
|
18 |
+
(get_speech_timestamps,
|
19 |
+
_, read_audio,
|
20 |
+
*_) = utils
|
21 |
|
22 |
#model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
|
23 |
#tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
|
|
|
34 |
return result
|
35 |
|
36 |
def transcribe(audio):
|
37 |
+
sampling_rate = 16000
|
38 |
+
audio, sr = librosa.load(audio, sr=sampling_rate)
|
39 |
+
speech_timestamps = get_speech_timestamps(audio, model, sampling_rate=sampling_rate)
|
40 |
+
chunks = [audio[i["start"]:i["end"]] for i in speech_timestamps]
|
41 |
+
transcribed = " ".join([text["text"] for text in p(chunks, chunk_length_s=20, stride_length_s=(0, 0))])
|
42 |
|
43 |
punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
|
44 |
|