import librosa import numpy as np import scipy def preprocess_audio(audio): y, sr = librosa.load(audio, sr=16000) return y, sr def clean_audio(y, sr): y = librosa.effects.trim(y)[0] y = librosa.util.normalize(y) return y def extract_features(y, sr): features = { "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1), "chroma": librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1), "mel": librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1), "contrast": librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1), "tonnetz": librosa.feature.tonnetz(y=y, sr=sr).mean(axis=1) } return features def store_preprocessed_data(features, filename): np.savez(filename, **features)