Spaces:

sinabis
/

transcription_service

Sleeping

File size: 1,291 Bytes

d8d26b1

import bentoml
import numpy as np
from pydub import AudioSegment
from pydub.utils import mediainfo


class AudioAmplitude(bentoml.Runnable):
    SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
    SUPPORTS_CPU_MULTI_THREADING = True

    SAMPLE_RATE = 16000

    def __init__(self):
        pass

    @bentoml.Runnable.method(batchable=False)
    def get_audio_amplitude(self, temp_file_path):
        # bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000)
        audio = AudioSegment.from_file(temp_file_path)

        # get raw audio data as a bytestring
        raw_data = audio.raw_data
        # get the frame rate
        sample_rate = audio.frame_rate
        # get amount of bytes contained in one sample
        sample_size = audio.sample_width
        # get channels
        channels = audio.channels

        print("INFO: ", sample_rate, sample_size, channels)

        audio_array = np.array(audio.get_array_of_samples())

        # Normalize the audio array to values between -1 and 1
        normalized_audio = audio_array / (2 ** 15)  # Assuming 32-bit audio

        # Convert stereo to mono (average the channels)
        if audio.channels == 2:
            normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2

        return normalized_audio