Ajay Karthick Senthil Kumar
add src
381c43b
import librosa
import numpy as np
class SpectralFeatureExtractor:
"""
A class to extract various spectral features from audio data using the librosa library.
Attributes:
y (numpy.array): Audio time series.
sr (int): Sampling rate of the audio time series.
Methods:
extract(features_to_extract=None): Extracts specified spectral features from audio.
spectral_centroid(): Computes the spectral centroid of the audio.
spectral_bandwidth(): Computes the spectral bandwidth of the audio.
spectral_contrast(): Computes the spectral contrast of the audio.
spectral_flatness(): Computes the spectral flatness of the audio.
spectral_rolloff(): Computes the spectral rolloff of the audio.
zero_crossing_rate(): Computes the zero crossing rate of the audio.
mfccs(): Computes the Mel-frequency cepstral coefficients (MFCCs) of the audio.
chroma_stft(): Computes the chromagram from a waveform or power spectrogram.
spectral_flux(): Computes the spectral flux of the audio.
"""
def __init__(self, y, sr):
"""
Initializes the SpectralFeatureExtractor with audio data.
"""
self.y = y
self.sr = sr
def extract(self, features_to_extract=None):
"""
Extracts the specified spectral features.
Args:
features_to_extract (list of str, optional): A list of feature names to extract.
Defaults to extracting all available features if None.
Returns:
dict: A dictionary containing the extracted features.
"""
feature_funcs = {
'spectral_centroid': self.spectral_centroid,
'spectral_bandwidth': self.spectral_bandwidth,
'spectral_contrast': self.spectral_contrast,
'spectral_flatness': self.spectral_flatness,
'spectral_rolloff': self.spectral_rolloff,
'zero_crossing_rate': self.zero_crossing_rate,
'mfccs': self.mfccs,
'chroma_stft': self.chroma_stft,
'spectral_flux': self.spectral_flux
}
if features_to_extract is None:
features_to_extract = feature_funcs.keys()
features = {}
for feature in features_to_extract:
if feature in feature_funcs:
features[feature] = feature_funcs[feature]()
return features
def spectral_centroid(self):
"""
Computes the spectral centroid of the audio.
"""
return librosa.feature.spectral_centroid(y=self.y, sr=self.sr).flatten()
def spectral_bandwidth(self):
"""
Computes the spectral bandwidth of the audio.
"""
return librosa.feature.spectral_bandwidth(y=self.y, sr=self.sr).flatten()
def spectral_contrast(self):
"""
Computes the spectral contrast of the audio.
"""
return librosa.feature.spectral_contrast(y=self.y, sr=self.sr).flatten()
def spectral_flatness(self):
"""
Computes the spectral flatness of the audio.
"""
return librosa.feature.spectral_flatness(y=self.y).flatten()
def spectral_rolloff(self):
"""
Computes the spectral rolloff point of the audio.
"""
return librosa.feature.spectral_rolloff(y=self.y, sr=self.sr).flatten()
def zero_crossing_rate(self):
"""
Computes the zero crossing rate of the audio.
"""
return librosa.feature.zero_crossing_rate(self.y).flatten()
def mfccs(self):
"""
Computes the Mel-frequency cepstral coefficients (MFCCs) of the audio.
"""
return librosa.feature.mfcc(y=self.y, sr=self.sr, n_mfcc=13).flatten()
def chroma_stft(self):
"""
Computes the chromagram from a waveform or power spectrogram.
"""
return librosa.feature.chroma_stft(y=self.y, sr=self.sr).flatten()
def spectral_flux(self):
"""
Computes the spectral flux of the audio, indicating the rate of change in the power spectrum.
"""
S = np.abs(librosa.stft(self.y))
return np.sqrt(np.sum(np.diff(S, axis=1)**2, axis=0))