Spaces:

ProzisTech
/

translate

Sleeping

Hugo Rodrigues commited on Feb 7, 2024

Commit

40cf25d

1 Parent(s): b2b9472

change transcribe audio method

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import time
-from scipy.io.wavfile import write
 import numpy as np
@@ -100,9 +100,10 @@ async def audio(inputs, src_lang="eng", tgt_lang="por", speaker_id=5):
 async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
     start_time = time.time()
-    inputFile = soundFile.file.read()
-    audio_data = np.frombuffer(inputFile, dtype=np.int16)
     audio_inputs = processor(
         audios=audio_data, return_tensors="pt").to(device)
@@ -110,7 +111,7 @@ async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
     audio_array_from_audio = model.generate(
         **audio_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
-    write(f"/tmp/output{start_time}.wav", model.config.sampling_rate,
           audio_array_from_audio)
     print("Time took to process the request and return response is {} sec".format(

 import time
+from scipy.io.wavfile import write, read
 import numpy as np
 async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
     start_time = time.time()
+    with open(f"/tmp/{soundFile.filename}", "wb") as buffer:
+        buffer.write(soundFile.file.read())
+    sample_rate, audio_data = read(f"/tmp/{soundFile.filename}")
     audio_inputs = processor(
         audios=audio_data, return_tensors="pt").to(device)
     audio_array_from_audio = model.generate(
         **audio_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
+    write(f"/tmp/output{start_time}.wav", sample_rate,
           audio_array_from_audio)
     print("Time took to process the request and return response is {} sec".format(