pgilles pgilles commited on
Commit
d7d1406
·
0 Parent(s):

Duplicate from unilux/ASR_for_Luxembourgish

Browse files

Co-authored-by: Peter Gilles <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ Chamber_Fayot_2005.wav filter=lfs diff=lfs merge=lfs -text
33
+ ChamberMeisch.wav filter=lfs diff=lfs merge=lfs -text
34
+ Erlieft-a-Verzielt.wav filter=lfs diff=lfs merge=lfs -text
35
+ Schnessen-Beispill4.wav filter=lfs diff=lfs merge=lfs -text
36
+ Chamber2022_1.wav filter=lfs diff=lfs merge=lfs -text
37
+ Chamber2022_2.wav filter=lfs diff=lfs merge=lfs -text
38
+ Chamber2022_3.wav filter=lfs diff=lfs merge=lfs -text
39
+ Chamber2022_4.wav filter=lfs diff=lfs merge=lfs -text
Chamber2022_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ef24b1b469aab2a81486161c67da49cce3b27edf7c7a021361fe513694e110d
3
+ size 4139966
Chamber2022_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ab924ee6bd62da5bb12645506242ba7bb77f3f0ddfea4d036c923e8418ba26
3
+ size 4117268
Chamber2022_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4054c9422eda1743045ab42e51bcab42cdbf992ca2e9b72351fdbaea1f0c9a45
3
+ size 3685558
Chamber2022_4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8363e3f92dac07c777e6900ee619e2f6bcfa2eca172d8e2607a7a3ddf2bfaf09
3
+ size 2337106
ChamberMeisch.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a846e1ca78cf0c1230102905a510faba5b85124dc49eb46f6f10fc2096d5141
3
+ size 1222566
Chamber_Fayot_2005.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f149107b2690ea0ce333416c5031d9b87b9d4fb485f1db7c42723237a295e51
3
+ size 3460164
Erlieft-a-Verzielt.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73919b09cd71c46b9c83e3d99f07800b90f41f32e93a10a643f38e22bba6bb53
3
+ size 3503086
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ASR For Luxembourgish
3
+ emoji: 🏃
4
+ colorFrom: red
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.3
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: unilux/ASR_for_Luxembourgish
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Schnessen-Beispill2.wav ADDED
Binary file (683 kB). View file
 
Schnessen-Beispill3.wav ADDED
Binary file (272 kB). View file
 
Schnessen-Beispill4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923e0df9a2d15eba4ba3cc64b63f64f5c8a1eaf13649b15f74769f00783a8c5c
3
+ size 1119566
Schnessen_Beispill.wav ADDED
Binary file (874 kB). View file
 
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from: https://gradio.app/real_time_speech_recognition/
2
+
3
+ from transformers import pipeline
4
+ import torch
5
+ import gradio as gr
6
+ import librosa
7
+ import os
8
+ import time
9
+
10
+ #Loading the model and the tokenizer
11
+ token_key = os.environ.get("HUGGING_FACE_HUB_TOKEN")
12
+
13
+ model_name = "pgilles/whisper-large-v2-lb_cased_01"
14
+
15
+ #p = pipeline("automatic-speech-recognition", model=model, tokenizer=tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder, use_auth_token=token_key)
16
+ p = pipeline("automatic-speech-recognition", model=model_name, device=0, use_auth_token=token_key)
17
+
18
+ def load_data(input_file):
19
+
20
+ """ Function for resampling to ensure that the speech input is sampled at 16KHz.
21
+ """
22
+ sampling_rate = 16_000
23
+ #read the file
24
+ speech, sample_rate = librosa.load(input_file, sr=sampling_rate, mono=True)
25
+ #speech = librosa.effects.trim(speech, top_db= 10)
26
+ return speech
27
+
28
+ def asr_pipe(input_file, input_file_microphone, chunks):
29
+ input_file = input_file_microphone if input_file_microphone else input_file
30
+ transcription = p(input_file, chunk_length_s= chunks, stride_length_s = None)["text"]
31
+ return transcription
32
+
33
+ inputs = [gr.inputs.Audio(source="upload", type='filepath', label="Eng Audio-Datei eroplueden...", optional = True),
34
+ gr.inputs.Audio(source="microphone", type="filepath", label="... oder direkt mam Mikro ophuelen", optional = True),
35
+ gr.Slider(minimum=3, maximum=32, value=29, step=0.5, label="Chunk Length")]
36
+
37
+ outputs = [gr.outputs.Textbox(label="Erkannten Text")]
38
+
39
+ samples = [["Chamber2022_1.wav", "Chamber2022_1.wav", 30], ["Chamber2022_1.wav", "Chamber2022_2.wav", 20], ["Chamber2022_2.wav", "Chamber2022_3.wav", 30], ["Chamber2022_4.wav", "Chamber2022_4.wav", 29]]
40
+
41
+ gr.Interface(fn = asr_pipe,
42
+ inputs = inputs,
43
+ outputs = outputs,
44
+ title="Sproocherkennung fir d'Lëtzebuergescht @uni.lu, based on Whisper-large-v2",
45
+ description = "Dës App convertéiert Är geschwate Sprooch an de (méi oder manner richtegen ;-)) Text!",
46
+ examples = samples,
47
+ examples_per_page = 10,
48
+ article = "Beschreiwung: Dir kënnt Iech selwer iwwer de Mikro ophuelen, eng Datei eroplueden oder e Beispill auswielen. Dëse Modell ass trainéiert mam neisten Sproocherkennungsalgorithmus vun OpenAI: Whisper. Anescht wéi bei deene meeschten Applikatiounen, déi op dem Whisper baséieren, ass dëse lëtzebuergeschen zousätzlech mat enger grousser, kontrolléierter Datebasis trainéiert ginn ('fine-tuning' mat 70 Stonne Lëtzebuergesch aus verschiddene sproochleche Genren). Domat ass eng niddereg Feelerquote méiglech, déi virdrun net denkbar war. D'Grouss- a Klengschreiwung an och d'Punktuatioun gi gréisstendeels richteg ëmgesat. Am Géigesaz zum Wav2vec 2.0-Algorithmus, deen och héich Erkennungsraten huet an och op ville Sproochen trainéiert ass, ass beim Whisper fir vill Sproochen net nëmmen d'Akustik mee och den Text mattrainéiert ginn ('weak-supervised pre-training'). Domat ass net nëmmen déi allgemeng Erkennungsrat méi héich wéi beim Wav2vec 2.0, mee och méisproocheg Schwätze gëtt däitlech besser erkannt. Et kann een also z.B. tëscht Lëtzebuergescht a Franséisch (oder Däitsch, Englesch, Spuenesch, Chineesesch) hin- an hierwiesselen an de System produzéiert de richtegen Text. 't dauert ongeféier e Fënneftel bis e Véierel vun der Dauer vun der Opnam, bis d'Transkriptioun verschafft ass.",
49
+ theme="default").launch(share=False, show_error=True)
50
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ librosa
2
+ #torch
3
+ #pyctcdecode
4
+ #git+https://github.com/kpu/kenlm.git
5
+ transformers
6
+ --extra-index-url https://download.pytorch.org/whl/cu113
7
+ torch
8
+ gradio==3.14