Spaces:
Sleeping
Sleeping
import bentoml | |
import numpy as np | |
from pydub import AudioSegment | |
from pydub.utils import mediainfo | |
class AudioAmplitude(bentoml.Runnable): | |
SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu") | |
SUPPORTS_CPU_MULTI_THREADING = True | |
SAMPLE_RATE = 16000 | |
def __init__(self): | |
pass | |
def get_audio_amplitude(self, temp_file_path): | |
# bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000) | |
audio = AudioSegment.from_file(temp_file_path) | |
# get raw audio data as a bytestring | |
raw_data = audio.raw_data | |
# get the frame rate | |
sample_rate = audio.frame_rate | |
# get amount of bytes contained in one sample | |
sample_size = audio.sample_width | |
# get channels | |
channels = audio.channels | |
print("INFO: ", sample_rate, sample_size, channels) | |
audio_array = np.array(audio.get_array_of_samples()) | |
# Normalize the audio array to values between -1 and 1 | |
normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio | |
# Convert stereo to mono (average the channels) | |
if audio.channels == 2: | |
normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2 | |
return normalized_audio |