Kinyarwanda-asr / nemo_asr.py
rutsam's picture
use pyaudioconvert
ece6ab6
import aiofiles
import nemo
import nemo.collections.asr as nemo_asr
import pyaudioconvert as pac
import soundfile as sf
from pydub import AudioSegment
def transcribe(file, modelName="stt_rw_conformer_transducer_large"):
# with open(file.name, 'wb') as out_file:
# #out_file = file.read() # async read
# content = file.read()
# out_file.write(content)
#out_file.write(content) # async write
#print(out_file.name)
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(
model_name="stt_rw_conformer_ctc_large")
#nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(
#model_name=modelName)
# if not file.name.endswith("wav"):
# sound = AudioSegment.from_mp3(out_file.name)
# sound.export(out_file.name, format="wav")
# files = [out_file.name]
# pac.convert_wav_to_16bit_mono(out_file.name,out_file.name)
# print("file loaded is **************",file.file)
# for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)):
# print(f"Audio in {fname} was recognized as: {transcription}")
# print(transcription[0])
sound = AudioSegment.from_file(file.name)
sound.export("test.wav",format='wav')
pac.convert_wav_to_16bit_mono("Test2.wav","Test2.wav")
transcription= asr_model.transcribe(["Test2.wav"])
return transcription[0]