raphaelbiojout commited on
Commit
a0580e3
·
1 Parent(s): 25edf8d

update handler

Browse files
Files changed (1) hide show
  1. handler.py +57 -1
handler.py CHANGED
@@ -8,6 +8,8 @@ import base64
8
  import subprocess
9
  import numpy as np
10
 
 
 
11
  # from transformers.pipelines.audio_utils import ffmpeg_read
12
  from typing import Dict, List, Any
13
 
@@ -26,6 +28,52 @@ def whisper_config():
26
  compute_type = "float16" if device == "cuda" else "int8"
27
  return device, batch_size, compute_type, whisper_model
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
30
  """
31
  Helper function to read an audio file through ffmpeg.
@@ -186,8 +234,16 @@ class EndpointHandler():
186
  language = parameters["language"]
187
 
188
  inputs = base64.b64decode(inputs_encoded)
 
 
 
 
 
 
 
 
189
 
190
- audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
191
  # audio_tensor= torch.from_numpy(audio_nparray)
192
 
193
  results = []
 
8
  import subprocess
9
  import numpy as np
10
 
11
+ DEVNULL = open(os.devnull, 'w')
12
+
13
  # from transformers.pipelines.audio_utils import ffmpeg_read
14
  from typing import Dict, List, Any
15
 
 
28
  compute_type = "float16" if device == "cuda" else "int8"
29
  return device, batch_size, compute_type, whisper_model
30
 
31
+ # load_audio can not detect the input type
32
+ def ffmpeg_load_audio(filename, sr=44100, mono=False, normalize=True, in_type=np.int16, out_type=np.float32):
33
+ channels = 1 if mono else 2
34
+ format_strings = {
35
+ np.float64: 'f64le',
36
+ np.float32: 'f32le',
37
+ np.int16: 's16le',
38
+ np.int32: 's32le',
39
+ np.uint32: 'u32le'
40
+ }
41
+ format_string = format_strings[in_type]
42
+ command = [
43
+ 'ffmpeg',
44
+ '-i', filename,
45
+ '-f', format_string,
46
+ '-acodec', 'pcm_' + format_string,
47
+ '-ar', str(sr),
48
+ '-ac', str(channels),
49
+ '-']
50
+ p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=DEVNULL, bufsize=4096)
51
+ bytes_per_sample = np.dtype(in_type).itemsize
52
+ frame_size = bytes_per_sample * channels
53
+ chunk_size = frame_size * sr # read in 1-second chunks
54
+ raw = b''
55
+ with p.stdout as stdout:
56
+ while True:
57
+ data = stdout.read(chunk_size)
58
+ if data:
59
+ raw += data
60
+ else:
61
+ break
62
+ audio = np.fromstring(raw, dtype=in_type).astype(out_type)
63
+ if channels > 1:
64
+ audio = audio.reshape((-1, channels)).transpose()
65
+ if audio.size == 0:
66
+ return audio, sr
67
+ if issubclass(out_type, np.floating):
68
+ if normalize:
69
+ peak = np.abs(audio).max()
70
+ if peak > 0:
71
+ audio /= peak
72
+ elif issubclass(in_type, np.integer):
73
+ audio /= np.iinfo(in_type).max
74
+ return audio
75
+
76
+
77
  def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
78
  """
79
  Helper function to read an audio file through ffmpeg.
 
234
  language = parameters["language"]
235
 
236
  inputs = base64.b64decode(inputs_encoded)
237
+ # make a tmp file
238
+ with open('/tmp/myfile.tmp', 'wb') as w:
239
+ w.write(inputs)
240
+
241
+ # audio_nparray = ffmpeg_load_audio('/tmp/myfile.tmp', sr=SAMPLE_RATE, mono=True, out_type=np.float32)
242
+ audio_nparray = load_audio('/tmp/myfile.tmp', sr=SAMPLE_RATE)
243
+ # clean up
244
+ os.remove('/tmp/myfile.tmp')
245
 
246
+ # audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
247
  # audio_tensor= torch.from_numpy(audio_nparray)
248
 
249
  results = []