audio-vis / gen.py
github-actions[bot]
Sync to HuggingFace Spaces
3f16dbd
import io
import os
import librosa
import librosa.display
import numpy as np
import matplotlib
from matplotlib.font_manager import fontManager
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter
from PIL import Image
FILTER_UPPER_BOUND = 20000
FILTER_LOWER_BOUND = 0
# use ./fonts/NotoSansTC-Regular.ttf
fontManager.addfont("fonts/NotoSansTC-Regular.ttf")
matplotlib.rc("font", family="Noto Sans TC")
def butter_filter(data: np.ndarray, cutoff: int, fs: int, btype: str, order=5):
nyquist = 0.5 * fs
if btype in ["low", "high"]:
normal_cutoff = cutoff / nyquist
else: # 'band'
normal_cutoff = [c / nyquist for c in cutoff]
b, a = butter(order, normal_cutoff, btype=btype, analog=False)
y = lfilter(b, a, data)
return y
def plt_to_numpy(plt: plt.Figure) -> np.ndarray:
buf = io.BytesIO()
plt.savefig(buf, format="png")
buf.seek(0)
return np.array(Image.open(buf))
def apply_filters(
y: np.ndarray,
sr: int,
highpass_cutoff: int,
lowpass_cutoff: int,
bandpass_low: int,
bandpass_high: int,
):
if highpass_cutoff > FILTER_LOWER_BOUND:
y = butter_filter(y, highpass_cutoff, sr, "high")
if lowpass_cutoff > FILTER_LOWER_BOUND and lowpass_cutoff < sr / 2:
y = butter_filter(y, lowpass_cutoff, sr, "low")
if bandpass_low > FILTER_LOWER_BOUND and bandpass_high < sr / 2:
y = butter_filter(y, [bandpass_low, bandpass_high], sr, "band")
return y
def analyze_audio(
file: str,
highpass_cutoff: int,
lowpass_cutoff: int,
bandpass_low: int,
bandpass_high: int,
):
filename = os.path.basename(file)
y, sr = librosa.load(file)
y = apply_filters(
y, sr, highpass_cutoff, lowpass_cutoff, bandpass_low, bandpass_high
)
def plot_waveform(y: np.ndarray, sr: int) -> np.ndarray:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(y, sr=sr)
plt.title(f"Waveform ({filename})")
plt.xlabel("Time")
plt.ylabel("Amplitude")
return plt_to_numpy(plt)
def plot_spectrogram(y: np.ndarray, sr: int) -> np.ndarray:
plt.figure(figsize=(14, 5))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log")
plt.colorbar(format="%+2.0f dB")
plt.title(f"Spectrogram ({filename})")
return plt_to_numpy(plt)
def plot_mfcc(y: np.ndarray, sr: int) -> np.ndarray:
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
plt.figure(figsize=(14, 5))
librosa.display.specshow(mfccs, sr=sr, x_axis="time")
plt.colorbar()
plt.title(f"MFCC ({filename})")
return plt_to_numpy(plt)
def plot_zcr(y: np.ndarray) -> np.ndarray:
zcr = librosa.feature.zero_crossing_rate(y=y)
plt.figure(figsize=(14, 5))
plt.plot(zcr[0])
plt.title(f"Zero Crossing Rate ({filename})")
plt.xlabel("Frames")
plt.ylabel("Rate")
return plt_to_numpy(plt)
def plot_spectral_centroid(y: np.ndarray, sr: int) -> np.ndarray:
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)
plt.figure(figsize=(14, 5))
plt.semilogy(t, spectral_centroids, label="Spectral centroid")
plt.title(f"Spectral Centroid ({filename})")
plt.xlabel("Time")
plt.ylabel("Hz")
return plt_to_numpy(plt)
def plot_spectral_bandwidth(y: np.ndarray, sr: int) -> np.ndarray:
spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
frames = range(len(spectral_bandwidth))
t = librosa.frames_to_time(frames)
plt.figure(figsize=(14, 5))
plt.semilogy(t, spectral_bandwidth, label="Spectral bandwidth")
plt.title(f"Spectral Bandwidth ({filename})")
plt.xlabel("Time")
plt.ylabel("Hz")
return plt_to_numpy(plt)
def plot_rms(y: np.ndarray) -> np.ndarray:
rms = librosa.feature.rms(y=y)[0]
plt.figure(figsize=(14, 5))
plt.plot(rms)
plt.title(f"RMS Energy ({filename})")
plt.xlabel("Frames")
plt.ylabel("RMS")
return plt_to_numpy(plt)
def plot_spectral_contrast(y: np.ndarray, sr: int) -> np.ndarray:
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
plt.figure(figsize=(14, 5))
librosa.display.specshow(spectral_contrast, sr=sr, x_axis="time")
plt.colorbar()
plt.title(f"Spectral Contrast ({filename})")
return plt_to_numpy(plt)
def plot_spectral_rolloff(y: np.ndarray, sr: int) -> np.ndarray:
spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
frames = range(len(spectral_rolloff))
t = librosa.frames_to_time(frames)
plt.figure(figsize=(14, 5))
plt.semilogy(t, spectral_rolloff, label="Spectral rolloff")
plt.xlabel("Time")
plt.ylabel("Hz")
plt.title(f"Spectral Rolloff ({filename})")
return plt_to_numpy(plt)
def plot_tempo(onset_env: np.ndarray, sr: int) -> np.ndarray:
dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, aggregate=None)
frames = range(len(dtempo))
t = librosa.frames_to_time(frames, sr=sr)
plt.figure(figsize=(14, 5))
plt.plot(t, dtempo, label="Tempo")
plt.title(f"Tempo ({filename})")
plt.xlabel("Time")
plt.ylabel("Tempo")
return plt_to_numpy(plt)
def plot_tempogram(onset_env: np.ndarray, sr: int) -> np.ndarray:
tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
plt.figure(figsize=(14, 5))
librosa.display.specshow(tempogram, sr=sr, x_axis="time")
plt.colorbar()
plt.title(f"Tempogram ({filename})")
return plt_to_numpy(plt)
waveform = plot_waveform(y, sr)
spectrogram = plot_spectrogram(y, sr)
mfcc = plot_mfcc(y, sr)
zcr = plot_zcr(y)
spectral_centroid = plot_spectral_centroid(y, sr)
spectral_bandwidth = plot_spectral_bandwidth(y, sr)
rms = plot_rms(y)
spectral_contrast = plot_spectral_contrast(y, sr)
spectral_rolloff = plot_spectral_rolloff(y, sr)
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
tempo = plot_tempo(onset_env, sr)
tempogram = plot_tempogram(onset_env, sr)
return (
waveform,
spectrogram,
mfcc,
zcr,
spectral_centroid,
spectral_bandwidth,
rms,
spectral_contrast,
spectral_rolloff,
tempo,
tempogram,
)