Spaces:

sinabis
/

transcription_service

Sleeping

transcription_service / src /runners /audio_amplitude.py

add bentoml files

d8d26b1 almost 2 years ago

1.29 kB

	import bentoml
	import numpy as np
	from pydub import AudioSegment
	from pydub.utils import mediainfo


	class AudioAmplitude(bentoml.Runnable):
	SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
	SUPPORTS_CPU_MULTI_THREADING = True

	SAMPLE_RATE = 16000

	def __init__(self):
	pass

	@bentoml.Runnable.method(batchable=False)
	def get_audio_amplitude(self, temp_file_path):
	# bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000)
	audio = AudioSegment.from_file(temp_file_path)

	# get raw audio data as a bytestring
	raw_data = audio.raw_data
	# get the frame rate
	sample_rate = audio.frame_rate
	# get amount of bytes contained in one sample
	sample_size = audio.sample_width
	# get channels
	channels = audio.channels

	print("INFO: ", sample_rate, sample_size, channels)

	audio_array = np.array(audio.get_array_of_samples())

	# Normalize the audio array to values between -1 and 1
	normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio

	# Convert stereo to mono (average the channels)
	if audio.channels == 2:
	normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2

	return normalized_audio