transcription_service / src /runners /audio_amplitude.py
aleger's picture
add bentoml files
d8d26b1
import bentoml
import numpy as np
from pydub import AudioSegment
from pydub.utils import mediainfo
class AudioAmplitude(bentoml.Runnable):
SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
SUPPORTS_CPU_MULTI_THREADING = True
SAMPLE_RATE = 16000
def __init__(self):
pass
@bentoml.Runnable.method(batchable=False)
def get_audio_amplitude(self, temp_file_path):
# bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000)
audio = AudioSegment.from_file(temp_file_path)
# get raw audio data as a bytestring
raw_data = audio.raw_data
# get the frame rate
sample_rate = audio.frame_rate
# get amount of bytes contained in one sample
sample_size = audio.sample_width
# get channels
channels = audio.channels
print("INFO: ", sample_rate, sample_size, channels)
audio_array = np.array(audio.get_array_of_samples())
# Normalize the audio array to values between -1 and 1
normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio
# Convert stereo to mono (average the channels)
if audio.channels == 2:
normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2
return normalized_audio