Spaces:
Sleeping
Sleeping
File size: 1,291 Bytes
d8d26b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import bentoml
import numpy as np
from pydub import AudioSegment
from pydub.utils import mediainfo
class AudioAmplitude(bentoml.Runnable):
SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
SUPPORTS_CPU_MULTI_THREADING = True
SAMPLE_RATE = 16000
def __init__(self):
pass
@bentoml.Runnable.method(batchable=False)
def get_audio_amplitude(self, temp_file_path):
# bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000)
audio = AudioSegment.from_file(temp_file_path)
# get raw audio data as a bytestring
raw_data = audio.raw_data
# get the frame rate
sample_rate = audio.frame_rate
# get amount of bytes contained in one sample
sample_size = audio.sample_width
# get channels
channels = audio.channels
print("INFO: ", sample_rate, sample_size, channels)
audio_array = np.array(audio.get_array_of_samples())
# Normalize the audio array to values between -1 and 1
normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio
# Convert stereo to mono (average the channels)
if audio.channels == 2:
normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2
return normalized_audio |