import bentoml import numpy as np from pydub import AudioSegment from pydub.utils import mediainfo class AudioAmplitude(bentoml.Runnable): SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu") SUPPORTS_CPU_MULTI_THREADING = True SAMPLE_RATE = 16000 def __init__(self): pass @bentoml.Runnable.method(batchable=False) def get_audio_amplitude(self, temp_file_path): # bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000) audio = AudioSegment.from_file(temp_file_path) # get raw audio data as a bytestring raw_data = audio.raw_data # get the frame rate sample_rate = audio.frame_rate # get amount of bytes contained in one sample sample_size = audio.sample_width # get channels channels = audio.channels print("INFO: ", sample_rate, sample_size, channels) audio_array = np.array(audio.get_array_of_samples()) # Normalize the audio array to values between -1 and 1 normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio # Convert stereo to mono (average the channels) if audio.channels == 2: normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2 return normalized_audio