Hugo Rodrigues commited on
Commit
6db451f
·
1 Parent(s): 357cae7

transcribe-audio

Browse files
Files changed (1) hide show
  1. main.py +25 -1
main.py CHANGED
@@ -1,10 +1,14 @@
 
1
  import time
2
  from scipy.io.wavfile import write
 
 
3
 
4
 
5
  # from typing import Union
6
  # from pydantic import BaseModel
7
- from fastapi import FastAPI
 
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.responses import FileResponse
10
 
@@ -93,3 +97,23 @@ async def audio(inputs, src_lang="eng", tgt_lang="por", speaker_id=5):
93
  audio_array_from_text)
94
 
95
  return FileResponse(f"/tmp/output{start_time}.wav", media_type="audio/mpeg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  import time
3
  from scipy.io.wavfile import write
4
+ import torchaudio
5
+ import numpy as np
6
 
7
 
8
  # from typing import Union
9
  # from pydantic import BaseModel
10
+
11
+ from fastapi import FastAPI, UploadFile
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from fastapi.responses import FileResponse
14
 
 
97
  audio_array_from_text)
98
 
99
  return FileResponse(f"/tmp/output{start_time}.wav", media_type="audio/mpeg")
100
+
101
+
102
+ @app.post("/transcribe-audio")
103
+ async def transcribe_audio(soundFile: UploadFile, tgt_lang='eng'):
104
+ start_time = time.time()
105
+ # process input
106
+ inputFile = soundFile.file.read()
107
+
108
+ audio_data = np.frombuffer(inputFile, dtype=np.int16)
109
+
110
+ audio_inputs = processor(
111
+ audios=audio_data, return_tensors="pt").to(device)
112
+
113
+ audio_array_from_audio = model.generate(
114
+ **audio_inputs, tgt_lang=tgt_lang)[0].cpu().numpy().squeeze()
115
+
116
+ write(f"/tmp/output{start_time}.wav", model.config.sampling_rate,
117
+ audio_array_from_audio)
118
+
119
+ return FileResponse(f"/tmp/output{start_time}.wav", media_type="audio/wav")