import io import os import librosa import librosa.display import numpy as np import matplotlib from matplotlib.font_manager import fontManager import matplotlib.pyplot as plt from scipy.signal import butter, lfilter from PIL import Image FILTER_UPPER_BOUND = 20000 FILTER_LOWER_BOUND = 0 # use ./fonts/NotoSansTC-Regular.ttf fontManager.addfont("fonts/NotoSansTC-Regular.ttf") matplotlib.rc("font", family="Noto Sans TC") def butter_filter(data: np.ndarray, cutoff: int, fs: int, btype: str, order=5): nyquist = 0.5 * fs if btype in ["low", "high"]: normal_cutoff = cutoff / nyquist else: # 'band' normal_cutoff = [c / nyquist for c in cutoff] b, a = butter(order, normal_cutoff, btype=btype, analog=False) y = lfilter(b, a, data) return y def plt_to_numpy(plt: plt.Figure) -> np.ndarray: buf = io.BytesIO() plt.savefig(buf, format="png") buf.seek(0) return np.array(Image.open(buf)) def apply_filters( y: np.ndarray, sr: int, highpass_cutoff: int, lowpass_cutoff: int, bandpass_low: int, bandpass_high: int, ): if highpass_cutoff > FILTER_LOWER_BOUND: y = butter_filter(y, highpass_cutoff, sr, "high") if lowpass_cutoff > FILTER_LOWER_BOUND and lowpass_cutoff < sr / 2: y = butter_filter(y, lowpass_cutoff, sr, "low") if bandpass_low > FILTER_LOWER_BOUND and bandpass_high < sr / 2: y = butter_filter(y, [bandpass_low, bandpass_high], sr, "band") return y def analyze_audio( file: str, highpass_cutoff: int, lowpass_cutoff: int, bandpass_low: int, bandpass_high: int, ): filename = os.path.basename(file) y, sr = librosa.load(file) y = apply_filters( y, sr, highpass_cutoff, lowpass_cutoff, bandpass_low, bandpass_high ) def plot_waveform(y: np.ndarray, sr: int) -> np.ndarray: plt.figure(figsize=(14, 5)) librosa.display.waveshow(y, sr=sr) plt.title(f"Waveform ({filename})") plt.xlabel("Time") plt.ylabel("Amplitude") return plt_to_numpy(plt) def plot_spectrogram(y: np.ndarray, sr: int) -> np.ndarray: plt.figure(figsize=(14, 5)) D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max) librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log") plt.colorbar(format="%+2.0f dB") plt.title(f"Spectrogram ({filename})") return plt_to_numpy(plt) def plot_mfcc(y: np.ndarray, sr: int) -> np.ndarray: mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) plt.figure(figsize=(14, 5)) librosa.display.specshow(mfccs, sr=sr, x_axis="time") plt.colorbar() plt.title(f"MFCC ({filename})") return plt_to_numpy(plt) def plot_zcr(y: np.ndarray) -> np.ndarray: zcr = librosa.feature.zero_crossing_rate(y=y) plt.figure(figsize=(14, 5)) plt.plot(zcr[0]) plt.title(f"Zero Crossing Rate ({filename})") plt.xlabel("Frames") plt.ylabel("Rate") return plt_to_numpy(plt) def plot_spectral_centroid(y: np.ndarray, sr: int) -> np.ndarray: spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0] frames = range(len(spectral_centroids)) t = librosa.frames_to_time(frames) plt.figure(figsize=(14, 5)) plt.semilogy(t, spectral_centroids, label="Spectral centroid") plt.title(f"Spectral Centroid ({filename})") plt.xlabel("Time") plt.ylabel("Hz") return plt_to_numpy(plt) def plot_spectral_bandwidth(y: np.ndarray, sr: int) -> np.ndarray: spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0] frames = range(len(spectral_bandwidth)) t = librosa.frames_to_time(frames) plt.figure(figsize=(14, 5)) plt.semilogy(t, spectral_bandwidth, label="Spectral bandwidth") plt.title(f"Spectral Bandwidth ({filename})") plt.xlabel("Time") plt.ylabel("Hz") return plt_to_numpy(plt) def plot_rms(y: np.ndarray) -> np.ndarray: rms = librosa.feature.rms(y=y)[0] plt.figure(figsize=(14, 5)) plt.plot(rms) plt.title(f"RMS Energy ({filename})") plt.xlabel("Frames") plt.ylabel("RMS") return plt_to_numpy(plt) def plot_spectral_contrast(y: np.ndarray, sr: int) -> np.ndarray: spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) plt.figure(figsize=(14, 5)) librosa.display.specshow(spectral_contrast, sr=sr, x_axis="time") plt.colorbar() plt.title(f"Spectral Contrast ({filename})") return plt_to_numpy(plt) def plot_spectral_rolloff(y: np.ndarray, sr: int) -> np.ndarray: spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0] frames = range(len(spectral_rolloff)) t = librosa.frames_to_time(frames) plt.figure(figsize=(14, 5)) plt.semilogy(t, spectral_rolloff, label="Spectral rolloff") plt.xlabel("Time") plt.ylabel("Hz") plt.title(f"Spectral Rolloff ({filename})") return plt_to_numpy(plt) def plot_tempo(onset_env: np.ndarray, sr: int) -> np.ndarray: dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, aggregate=None) frames = range(len(dtempo)) t = librosa.frames_to_time(frames, sr=sr) plt.figure(figsize=(14, 5)) plt.plot(t, dtempo, label="Tempo") plt.title(f"Tempo ({filename})") plt.xlabel("Time") plt.ylabel("Tempo") return plt_to_numpy(plt) def plot_tempogram(onset_env: np.ndarray, sr: int) -> np.ndarray: tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr) plt.figure(figsize=(14, 5)) librosa.display.specshow(tempogram, sr=sr, x_axis="time") plt.colorbar() plt.title(f"Tempogram ({filename})") return plt_to_numpy(plt) waveform = plot_waveform(y, sr) spectrogram = plot_spectrogram(y, sr) mfcc = plot_mfcc(y, sr) zcr = plot_zcr(y) spectral_centroid = plot_spectral_centroid(y, sr) spectral_bandwidth = plot_spectral_bandwidth(y, sr) rms = plot_rms(y) spectral_contrast = plot_spectral_contrast(y, sr) spectral_rolloff = plot_spectral_rolloff(y, sr) onset_env = librosa.onset.onset_strength(y=y, sr=sr) tempo = plot_tempo(onset_env, sr) tempogram = plot_tempogram(onset_env, sr) return ( waveform, spectrogram, mfcc, zcr, spectral_centroid, spectral_bandwidth, rms, spectral_contrast, spectral_rolloff, tempo, tempogram, )