Spaces:

indonesian-nlp
/

luganda-asr

Runtime error

App Files Files Community

cahya commited on May 27, 2022

Commit

0d80de1

1 Parent(s): 37c396e

add KenLM

Browse files

Files changed (2) hide show

app.py +7 -10
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from pyctcdecode import build_ctcdecoder
 import gradio as gr
-import sox
 import os
 from multiprocessing import Pool
@@ -30,12 +30,12 @@ class KenLM:
             text = [KenLM.lm_postprocess(x) for x in text]
         return text
 def convert(inputfile, outfile):
-    sox_tfm = sox.Transformer()
-    sox_tfm.set_output_format(
-        file_type="wav", channels=1, encoding="signed-integer", rate=16000, bits=16
-    )
-    sox_tfm.build(inputfile, outfile)
 api_token = os.getenv("API_TOKEN")
@@ -62,13 +62,10 @@ input_ = gr.inputs.Audio(source="microphone", type="file")
 gr.Interface(parse_transcription, inputs=input_,  outputs=[output],
              analytics_enabled=False,
-             show_tips=False,
-             theme='huggingface',
-             layout='vertical',
              title="Automatic Speech Recognition for Luganda",
              description="Speech Recognition Live Demo for Luganda",
              article="This demo was built for the "
                      "<a href='https://zindi.africa/competitions/mozilla-luganda-automatic-speech-recognition' target='_blank'>Mozilla Luganda Automatic Speech Recognition Competition</a>. "
                      "It uses the <a href='https://huggingface.co/indonesian-nlp/wav2vec2-luganda' target='_blank'>indonesian-nlp/wav2vec2-luganda</a> model "
                      "which was fine-tuned on Luganda Common Voice speech datasets.",
-             enable_queue=True).launch( inline=False)

 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from pyctcdecode import build_ctcdecoder
 import gradio as gr
+import librosa
 import os
 from multiprocessing import Pool
             text = [KenLM.lm_postprocess(x) for x in text]
         return text
 def convert(inputfile, outfile):
+    target_sr = 16000
+    data, sample_rate = librosa.load(inputfile)
+    data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
+    sf.write(outfile, data, target_sr)
 api_token = os.getenv("API_TOKEN")
 gr.Interface(parse_transcription, inputs=input_,  outputs=[output],
              analytics_enabled=False,
              title="Automatic Speech Recognition for Luganda",
              description="Speech Recognition Live Demo for Luganda",
              article="This demo was built for the "
                      "<a href='https://zindi.africa/competitions/mozilla-luganda-automatic-speech-recognition' target='_blank'>Mozilla Luganda Automatic Speech Recognition Competition</a>. "
                      "It uses the <a href='https://huggingface.co/indonesian-nlp/wav2vec2-luganda' target='_blank'>indonesian-nlp/wav2vec2-luganda</a> model "
                      "which was fine-tuned on Luganda Common Voice speech datasets.",
+             enable_queue=True).launch(inline=False, server_name="0.0.0.0", show_tips=False, enable_queue=True)

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ gradio
 soundfile
 torch
 transformers
-sox
 sentencepiece
 pyctcdecode==0.3.0
 kenlm @ https://github.com/kpu/kenlm/archive/master.zip

 soundfile
 torch
 transformers
+librosa
 sentencepiece
 pyctcdecode==0.3.0
 kenlm @ https://github.com/kpu/kenlm/archive/master.zip