Spaces:
Runtime error
Runtime error
Duplicate from unilux/ASR_for_Luxembourgish
Browse filesCo-authored-by: Peter Gilles <[email protected]>
- .gitattributes +39 -0
- Chamber2022_1.wav +3 -0
- Chamber2022_2.wav +3 -0
- Chamber2022_3.wav +3 -0
- Chamber2022_4.wav +3 -0
- ChamberMeisch.wav +3 -0
- Chamber_Fayot_2005.wav +3 -0
- Erlieft-a-Verzielt.wav +3 -0
- README.md +14 -0
- Schnessen-Beispill2.wav +0 -0
- Schnessen-Beispill3.wav +0 -0
- Schnessen-Beispill4.wav +3 -0
- Schnessen_Beispill.wav +0 -0
- app.py +50 -0
- requirements.txt +8 -0
.gitattributes
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
23 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
32 |
+
Chamber_Fayot_2005.wav filter=lfs diff=lfs merge=lfs -text
|
33 |
+
ChamberMeisch.wav filter=lfs diff=lfs merge=lfs -text
|
34 |
+
Erlieft-a-Verzielt.wav filter=lfs diff=lfs merge=lfs -text
|
35 |
+
Schnessen-Beispill4.wav filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Chamber2022_1.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Chamber2022_2.wav filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Chamber2022_3.wav filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Chamber2022_4.wav filter=lfs diff=lfs merge=lfs -text
|
Chamber2022_1.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ef24b1b469aab2a81486161c67da49cce3b27edf7c7a021361fe513694e110d
|
3 |
+
size 4139966
|
Chamber2022_2.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65ab924ee6bd62da5bb12645506242ba7bb77f3f0ddfea4d036c923e8418ba26
|
3 |
+
size 4117268
|
Chamber2022_3.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4054c9422eda1743045ab42e51bcab42cdbf992ca2e9b72351fdbaea1f0c9a45
|
3 |
+
size 3685558
|
Chamber2022_4.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8363e3f92dac07c777e6900ee619e2f6bcfa2eca172d8e2607a7a3ddf2bfaf09
|
3 |
+
size 2337106
|
ChamberMeisch.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a846e1ca78cf0c1230102905a510faba5b85124dc49eb46f6f10fc2096d5141
|
3 |
+
size 1222566
|
Chamber_Fayot_2005.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f149107b2690ea0ce333416c5031d9b87b9d4fb485f1db7c42723237a295e51
|
3 |
+
size 3460164
|
Erlieft-a-Verzielt.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73919b09cd71c46b9c83e3d99f07800b90f41f32e93a10a643f38e22bba6bb53
|
3 |
+
size 3503086
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: ASR For Luxembourgish
|
3 |
+
emoji: 🏃
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.3
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: unilux/ASR_for_Luxembourgish
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
Schnessen-Beispill2.wav
ADDED
Binary file (683 kB). View file
|
|
Schnessen-Beispill3.wav
ADDED
Binary file (272 kB). View file
|
|
Schnessen-Beispill4.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:923e0df9a2d15eba4ba3cc64b63f64f5c8a1eaf13649b15f74769f00783a8c5c
|
3 |
+
size 1119566
|
Schnessen_Beispill.wav
ADDED
Binary file (874 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from: https://gradio.app/real_time_speech_recognition/
|
2 |
+
|
3 |
+
from transformers import pipeline
|
4 |
+
import torch
|
5 |
+
import gradio as gr
|
6 |
+
import librosa
|
7 |
+
import os
|
8 |
+
import time
|
9 |
+
|
10 |
+
#Loading the model and the tokenizer
|
11 |
+
token_key = os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
12 |
+
|
13 |
+
model_name = "pgilles/whisper-large-v2-lb_cased_01"
|
14 |
+
|
15 |
+
#p = pipeline("automatic-speech-recognition", model=model, tokenizer=tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder, use_auth_token=token_key)
|
16 |
+
p = pipeline("automatic-speech-recognition", model=model_name, device=0, use_auth_token=token_key)
|
17 |
+
|
18 |
+
def load_data(input_file):
|
19 |
+
|
20 |
+
""" Function for resampling to ensure that the speech input is sampled at 16KHz.
|
21 |
+
"""
|
22 |
+
sampling_rate = 16_000
|
23 |
+
#read the file
|
24 |
+
speech, sample_rate = librosa.load(input_file, sr=sampling_rate, mono=True)
|
25 |
+
#speech = librosa.effects.trim(speech, top_db= 10)
|
26 |
+
return speech
|
27 |
+
|
28 |
+
def asr_pipe(input_file, input_file_microphone, chunks):
|
29 |
+
input_file = input_file_microphone if input_file_microphone else input_file
|
30 |
+
transcription = p(input_file, chunk_length_s= chunks, stride_length_s = None)["text"]
|
31 |
+
return transcription
|
32 |
+
|
33 |
+
inputs = [gr.inputs.Audio(source="upload", type='filepath', label="Eng Audio-Datei eroplueden...", optional = True),
|
34 |
+
gr.inputs.Audio(source="microphone", type="filepath", label="... oder direkt mam Mikro ophuelen", optional = True),
|
35 |
+
gr.Slider(minimum=3, maximum=32, value=29, step=0.5, label="Chunk Length")]
|
36 |
+
|
37 |
+
outputs = [gr.outputs.Textbox(label="Erkannten Text")]
|
38 |
+
|
39 |
+
samples = [["Chamber2022_1.wav", "Chamber2022_1.wav", 30], ["Chamber2022_1.wav", "Chamber2022_2.wav", 20], ["Chamber2022_2.wav", "Chamber2022_3.wav", 30], ["Chamber2022_4.wav", "Chamber2022_4.wav", 29]]
|
40 |
+
|
41 |
+
gr.Interface(fn = asr_pipe,
|
42 |
+
inputs = inputs,
|
43 |
+
outputs = outputs,
|
44 |
+
title="Sproocherkennung fir d'Lëtzebuergescht @uni.lu, based on Whisper-large-v2",
|
45 |
+
description = "Dës App convertéiert Är geschwate Sprooch an de (méi oder manner richtegen ;-)) Text!",
|
46 |
+
examples = samples,
|
47 |
+
examples_per_page = 10,
|
48 |
+
article = "Beschreiwung: Dir kënnt Iech selwer iwwer de Mikro ophuelen, eng Datei eroplueden oder e Beispill auswielen. Dëse Modell ass trainéiert mam neisten Sproocherkennungsalgorithmus vun OpenAI: Whisper. Anescht wéi bei deene meeschten Applikatiounen, déi op dem Whisper baséieren, ass dëse lëtzebuergeschen zousätzlech mat enger grousser, kontrolléierter Datebasis trainéiert ginn ('fine-tuning' mat 70 Stonne Lëtzebuergesch aus verschiddene sproochleche Genren). Domat ass eng niddereg Feelerquote méiglech, déi virdrun net denkbar war. D'Grouss- a Klengschreiwung an och d'Punktuatioun gi gréisstendeels richteg ëmgesat. Am Géigesaz zum Wav2vec 2.0-Algorithmus, deen och héich Erkennungsraten huet an och op ville Sproochen trainéiert ass, ass beim Whisper fir vill Sproochen net nëmmen d'Akustik mee och den Text mattrainéiert ginn ('weak-supervised pre-training'). Domat ass net nëmmen déi allgemeng Erkennungsrat méi héich wéi beim Wav2vec 2.0, mee och méisproocheg Schwätze gëtt däitlech besser erkannt. Et kann een also z.B. tëscht Lëtzebuergescht a Franséisch (oder Däitsch, Englesch, Spuenesch, Chineesesch) hin- an hierwiesselen an de System produzéiert de richtegen Text. 't dauert ongeféier e Fënneftel bis e Véierel vun der Dauer vun der Opnam, bis d'Transkriptioun verschafft ass.",
|
49 |
+
theme="default").launch(share=False, show_error=True)
|
50 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
librosa
|
2 |
+
#torch
|
3 |
+
#pyctcdecode
|
4 |
+
#git+https://github.com/kpu/kenlm.git
|
5 |
+
transformers
|
6 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
7 |
+
torch
|
8 |
+
gradio==3.14
|