luganda-asr

Sleeping

App Files Files Community

MheniDevs

cahya commited on Jul 23, 2023

Commit

e917798

0 Parent(s):

Duplicate from indonesian-nlp/luganda-asr

Browse files

Co-authored-by: Cahya Wirawan <[email protected]>

Files changed (6) hide show

.gitattributes +27 -0
5gram.bin +3 -0
README.md +39 -0
app.py +71 -0
packages.txt +2 -0
requirements.txt +8 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,27 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

5gram.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46e982596dbb0c7c225dd9b88ef89c733ba6d718befc3c3b833b1daddc60816a
+size 11939611

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+title: Luganda ASR
+emoji: 🌍
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 3.0.5
+app_file: app.py
+pinned: false
+duplicated_from: indonesian-nlp/luganda-asr
+---
+# Configuration
+`title`: _string_
+Display title for the Space
+`emoji`: _string_
+Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
+Can be either `gradio` or `streamlit`
+`sdk_version` : _string_
+Only applicable for `streamlit` SDK.
+See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
+Path is relative to the root of the repository.
+`pinned`: _boolean_
+Whether the Space stays on top of your list.

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import soundfile as sf
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+from pyctcdecode import build_ctcdecoder
+import gradio as gr
+import librosa
+import os
+from multiprocessing import Pool
+class KenLM:
+    def __init__(self, tokenizer, model_name, num_workers=8, beam_width=128):
+        self.num_workers = num_workers
+        self.beam_width = beam_width
+        vocab_dict = tokenizer.get_vocab()
+        self.vocabulary = [x[0] for x in sorted(vocab_dict.items(), key=lambda x: x[1], reverse=False)]
+        # Workaround for wrong number of vocabularies:
+        self.vocabulary = self.vocabulary[:-2]
+        self.decoder = build_ctcdecoder(self.vocabulary, model_name)
+    @staticmethod
+    def lm_postprocess(text):
+        return ' '.join([x if len(x) > 1 else "" for x in text.split()]).strip()
+    def decode(self, logits):
+        probs = logits.cpu().numpy()
+        # probs = logits.numpy()
+        with Pool(self.num_workers) as pool:
+            text = self.decoder.decode_batch(pool, probs)
+            text = [KenLM.lm_postprocess(x) for x in text]
+        return text
+def convert(inputfile, outfile):
+    target_sr = 16000
+    data, sample_rate = librosa.load(inputfile)
+    data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
+    sf.write(outfile, data, target_sr)
+api_token = os.getenv("API_TOKEN")
+model_name = "indonesian-nlp/wav2vec2-luganda"
+processor = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=api_token)
+model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=api_token)
+kenlm = KenLM(processor.tokenizer, "5gram.bin")
+def parse_transcription(wav_file):
+    filename = wav_file.name.split('.')[0]
+    convert(wav_file.name, filename + "16k.wav")
+    speech, _ = sf.read(filename + "16k.wav")
+    input_values = processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
+    with torch.no_grad():
+        logits = model(input_values).logits
+    transcription = kenlm.decode(logits)[0]
+    return transcription
+output = gr.outputs.Textbox(label="The transcript")
+input_ = gr.inputs.Audio(source="microphone", type="file")
+gr.Interface(parse_transcription, inputs=input_,  outputs=[output],
+             analytics_enabled=False,
+             title="Automatic Speech Recognition for Luganda",
+             description="Speech Recognition Live Demo for Luganda",
+             article="This demo was built for the "
+                     "<a href='https://zindi.africa/competitions/mozilla-luganda-automatic-speech-recognition' target='_blank'>Mozilla Luganda Automatic Speech Recognition Competition</a>. "
+                     "It uses the <a href='https://huggingface.co/indonesian-nlp/wav2vec2-luganda' target='_blank'>indonesian-nlp/wav2vec2-luganda</a> model "
+                     "which was fine-tuned on Luganda Common Voice speech datasets.",
+             enable_queue=True).launch(inline=False, server_name="0.0.0.0", show_tips=False, enable_queue=True)

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ libsndfile1
2	+ sox

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+soundfile
+torch
+transformers
+librosa
+sentencepiece
+pyctcdecode==0.3.0
+kenlm @ https://github.com/kpu/kenlm/archive/master.zip