import aiofiles import nemo import nemo.collections.asr as nemo_asr import librosa import soundfile as sf def transcribe(file, modelName="stt_rw_conformer_transducer_large"): # with open(file.name, 'wb') as out_file: # #out_file = file.read() # async read # content = file.read() # out_file.write(content) #out_file.write(content) # async write #print(out_file.name) asr_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained( model_name=modelName) # if not file.name.endswith("wav"): # sound = AudioSegment.from_mp3(out_file.name) # sound.export(out_file.name, format="wav") # files = [out_file.name] # pac.convert_wav_to_16bit_mono(out_file.name,out_file.name) # print("file loaded is **************",file.file) # for fname, transcription in zip(files, asr_model.transcribe(paths2audio_files=files)): # print(f"Audio in {fname} was recognized as: {transcription}") # print(transcription[0]) x, sr = librosa.load(file.name) sf.write("Test.wav", x, sr=16000, norm=False) transcription= asr_model.transcribe(["Test.wav"]) print(transcription) return {"text": transcription, "filename": file.name}