Hugo Rodrigues commited on
Commit
40cf25d
·
1 Parent(s): b2b9472

change transcribe audio method

Browse files
Files changed (1) hide show
  1. main.py +5 -4
main.py CHANGED
@@ -1,6 +1,6 @@
1
 
2
  import time
3
- from scipy.io.wavfile import write
4
  import numpy as np
5
 
6
 
@@ -100,9 +100,10 @@ async def audio(inputs, src_lang="eng", tgt_lang="por", speaker_id=5):
100
  async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
101
  start_time = time.time()
102
 
103
- inputFile = soundFile.file.read()
 
104
 
105
- audio_data = np.frombuffer(inputFile, dtype=np.int16)
106
 
107
  audio_inputs = processor(
108
  audios=audio_data, return_tensors="pt").to(device)
@@ -110,7 +111,7 @@ async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
110
  audio_array_from_audio = model.generate(
111
  **audio_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
112
 
113
- write(f"/tmp/output{start_time}.wav", model.config.sampling_rate,
114
  audio_array_from_audio)
115
 
116
  print("Time took to process the request and return response is {} sec".format(
 
1
 
2
  import time
3
+ from scipy.io.wavfile import write, read
4
  import numpy as np
5
 
6
 
 
100
  async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
101
  start_time = time.time()
102
 
103
+ with open(f"/tmp/{soundFile.filename}", "wb") as buffer:
104
+ buffer.write(soundFile.file.read())
105
 
106
+ sample_rate, audio_data = read(f"/tmp/{soundFile.filename}")
107
 
108
  audio_inputs = processor(
109
  audios=audio_data, return_tensors="pt").to(device)
 
111
  audio_array_from_audio = model.generate(
112
  **audio_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
113
 
114
+ write(f"/tmp/output{start_time}.wav", sample_rate,
115
  audio_array_from_audio)
116
 
117
  print("Time took to process the request and return response is {} sec".format(