Spaces:

DigitalUmuganda
/

Kinyarwanda-asr

Runtime error

rutsam commited on Sep 21, 2022

Commit

8d72534

1 Parent(s): d857f13

transcribe files as well

Files changed (2) hide show

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ gradio_ui = gr.Interface(
     article = """
     This demo showcases two pretrained STT models the first model from speechbrain(wave2vec+CTC models)(1,2gb) is 30 times larger compared to the coqui STT (deepspeech model)(45mb).
     """,
-    inputs=[gr.inputs.Audio(source="microphone", type="file", optional=False, label="Record from microphone")],
     outputs=[gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
              gr.outputs.Textbox(label="Recognized speech from coqui STT model"),
              gr.outputs.Textbox(label="Recognized speech from NVIDIA Conformer transduver large model")]

     article = """
     This demo showcases two pretrained STT models the first model from speechbrain(wave2vec+CTC models)(1,2gb) is 30 times larger compared to the coqui STT (deepspeech model)(45mb).
     """,
+    inputs=[gr.inputs.Audio(label="Upload Audio File", type="file", optional=True), gr.inputs.Audio(source="microphone", type="file", optional=False, label="Record from microphone")],
     outputs=[gr.outputs.Textbox(label="Recognized speech from speechbrain model"),
              gr.outputs.Textbox(label="Recognized speech from coqui STT model"),
              gr.outputs.Textbox(label="Recognized speech from NVIDIA Conformer transduver large model")]

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ ffmpeg-python
 soundfile==0.10.3.post1
 wget
 aiofiles
 git+https://github.com/NVIDIA/[email protected]#egg=nemo_toolkit[all]

 soundfile==0.10.3.post1
 wget
 aiofiles
+pydub
 git+https://github.com/NVIDIA/[email protected]#egg=nemo_toolkit[all]