Spaces:
Build error
Build error
import gradio as gr | |
import pyaudioconvert as pac | |
from pydub import AudioSegment | |
import nemo | |
import nemo.collections.asr as nemo_asr | |
# Load the pre-trained model | |
model = nemo_asr.models.EncDecCTCModelBPE.restore_from( | |
restore_path="/home/yonas/stt/demo/model/Kinyarwanda_nemo_stt_conformer_model.nemo" | |
) | |
assert isinstance(model, nemo.collections.asr.models.EncDecCTCModel) | |
def convert(file_name): | |
if file_name.endswith(("mp3", "wav", "ogg")): | |
if file_name.endswith("mp3"): | |
sound = AudioSegment.from_mp3(file_name) | |
sound.export(file_name, format="wav") | |
elif file_name.endswith("ogg"): | |
sound = AudioSegment.from_ogg(file_name) | |
sound.export(file_name, format="wav") | |
else: | |
return False | |
pac.convert_wav_to_16bit_mono(file_name, file_name) | |
return True | |
def transcribe(audio): | |
if not audio: | |
return "No audio provided" | |
if not convert(audio): | |
return "The format must be mp3, wav, or ogg" | |
result = model.transcribe([audio]) | |
return result[0] | |
gradio_ui = gr.Interface( | |
fn=transcribe, | |
title="Kinyarwanda Speech Recognition", | |
description="Upload an audio clip or record from browser using microphone.", | |
inputs=[ | |
gr.Audio(label="Upload Audio File or Record from microphone", sources=["upload", "microphone"], type="filepath", format="wav"), | |
], | |
outputs=gr.Text(label="Recognized speech") | |
) | |
# Launch the Gradio app | |
gradio_ui.launch(share=True, debug=True) |