#!/usr/bin/python3 # -*- coding: utf-8 -*- import math import numpy as np def freq2erb(freq_hz: float) -> float: """ https://www.cnblogs.com/LXP-Never/p/16011229.html 1 / (24.7 * 9.265) = 0.00436976 """ return 9.265 * math.log(freq_hz / (24.7 * 9.265) + 1) def erb2freq(n_erb: float) -> float: return 24.7 * 9.265 * (math.exp(n_erb / 9.265) - 1) def get_erb_widths(sample_rate: int, fft_size: int, erb_bins: int, min_freq_bins_for_erb: int) -> np.ndarray: """ https://github.com/Rikorose/DeepFilterNet/blob/main/libDF/src/lib.rs :param sample_rate: :param fft_size: :param erb_bins: erb (Equivalent Rectangular Bandwidth) 等效矩形带宽的通道数. :param min_freq_bins_for_erb: Minimum number of frequency bands per erb band :return: """ nyq_freq = sample_rate / 2. freq_width: float = sample_rate / fft_size min_erb: float = freq2erb(0.) max_erb: float = freq2erb(nyq_freq) erb = [0] * erb_bins step = (max_erb - min_erb) / erb_bins prev_freq_bin = 0 freq_over = 0 for i in range(1, erb_bins + 1): f = erb2freq(min_erb + i * step) freq_bin = int(round(f / freq_width)) freq_bins = freq_bin - prev_freq_bin - freq_over if freq_bins < min_freq_bins_for_erb: freq_over = min_freq_bins_for_erb - freq_bins freq_bins = min_freq_bins_for_erb else: freq_over = 0 erb[i - 1] = freq_bins prev_freq_bin = freq_bin erb[erb_bins - 1] += 1 too_large = sum(erb) - (fft_size / 2 + 1) if too_large > 0: erb[erb_bins - 1] -= too_large return np.array(erb, dtype=np.uint64) def get_erb_filter_bank(erb_widths: np.ndarray, sample_rate: int, normalized: bool = True, inverse: bool = False, ): num_freq_bins = int(np.sum(erb_widths)) num_erb_bins = len(erb_widths) fb: np.ndarray = np.zeros(shape=(num_freq_bins, num_erb_bins)) points = np.cumsum([0] + erb_widths.tolist()).astype(int)[:-1] for i, (b, w) in enumerate(zip(points.tolist(), erb_widths.tolist())): fb[b: b + w, i] = 1 if inverse: fb = fb.T if not normalized: fb /= np.sum(fb, axis=1, keepdims=True) else: if normalized: fb /= np.sum(fb, axis=0) return fb def spec2erb(spec: np.ndarray, erb_fb: np.ndarray, db: bool = True): """ ERB filterbank and transform to decibel scale. :param spec: Spectrum of shape [B, C, T, F]. :param erb_fb: ERB filterbank array of shape [B] containing the ERB widths, where B are the number of ERB bins. :param db: Whether to transform the output into decibel scale. Defaults to `True`. :return: """ # complex spec to power spec. (real * real + image * image) spec_ = np.abs(spec) ** 2 # spec to erb feature. erb_feat = np.matmul(spec_, erb_fb) if db: erb_feat = 10 * np.log10(erb_feat + 1e-10) erb_feat = np.array(erb_feat, dtype=np.float32) return erb_feat def main(): erb_widths = get_erb_widths( sample_rate=8000, fft_size=512, erb_bins=32, min_freq_bins_for_erb=2, ) erb_fb = get_erb_filter_bank( erb_widths=erb_widths, sample_rate=8000, ) print(erb_fb.shape) return if __name__ == "__main__": main()