Spaces:

qgyd2021
/

nx_denoise

Running

App Files Files Community

nx_denoise / toolbox /torchaudio /models /dfnet3 /features.py

HoneyTian

first commit

bd94e77 3 months ago

raw

history blame contribute delete

5.74 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --
	import math

	import numpy as np


	def freq2erb(freq_hz: float) -> float:
	"""
	https://www.cnblogs.com/LXP-Never/p/16011229.html
	1 / (24.7 * 9.265) = 0.00436976
	"""
	return 9.265 * math.log(freq_hz / (24.7 * 9.265) + 1)


	def erb2freq(n_erb: float) -> float:
	return 24.7 * 9.265 * (math.exp(n_erb / 9.265) - 1)


	def get_erb_widths(sample_rate: int, fft_size: int, erb_bins: int, min_freq_bins_for_erb: int) -> np.ndarray:
	"""
	https://github.com/Rikorose/DeepFilterNet/blob/main/libDF/src/lib.rs
	:param sample_rate:
	:param fft_size:
	:param erb_bins: erb (Equivalent Rectangular Bandwidth) 等效矩形带宽的通道数.
	:param min_freq_bins_for_erb: Minimum number of frequency bands per erb band
	:return:
	"""
	nyq_freq = sample_rate / 2.
	freq_width: float = sample_rate / fft_size

	min_erb: float = freq2erb(0.)
	max_erb: float = freq2erb(nyq_freq)

	erb = [0] * erb_bins
	step = (max_erb - min_erb) / erb_bins

	prev_freq_bin = 0
	freq_over = 0
	for i in range(1, erb_bins + 1):
	f = erb2freq(min_erb + i * step)
	freq_bin = int(round(f / freq_width))
	freq_bins = freq_bin - prev_freq_bin - freq_over

	if freq_bins < min_freq_bins_for_erb:
	freq_over = min_freq_bins_for_erb - freq_bins
	freq_bins = min_freq_bins_for_erb
	else:
	freq_over = 0
	erb[i - 1] = freq_bins
	prev_freq_bin = freq_bin

	erb[erb_bins - 1] += 1
	too_large = sum(erb) - (fft_size / 2 + 1)
	if too_large > 0:
	erb[erb_bins - 1] -= too_large
	return np.array(erb, dtype=np.uint64)


	def get_erb_filter_bank(erb_widths: np.ndarray,
	sample_rate: int,
	normalized: bool = True,
	inverse: bool = False,
	):
	num_freq_bins = int(np.sum(erb_widths))
	num_erb_bins = len(erb_widths)

	fb: np.ndarray = np.zeros(shape=(num_freq_bins, num_erb_bins))

	points = np.cumsum([0] + erb_widths.tolist()).astype(int)[:-1]
	for i, (b, w) in enumerate(zip(points.tolist(), erb_widths.tolist())):
	fb[b: b + w, i] = 1

	if inverse:
	fb = fb.T
	if not normalized:
	fb /= np.sum(fb, axis=1, keepdims=True)
	else:
	if normalized:
	fb /= np.sum(fb, axis=0)
	return fb


	def spec2erb(spec: np.ndarray, erb_fb: np.ndarray, db: bool = True):
	"""
	ERB filterbank and transform to decibel scale.

	:param spec: Spectrum of shape [B, C, T, F].
	:param erb_fb: ERB filterbank array of shape [B] containing the ERB widths,
	where B are the number of ERB bins.
	:param db: Whether to transform the output into decibel scale. Defaults to `True`.
	:return:
	"""
	# complex spec to power spec. (real * real + image * image)
	spec_ = np.abs(spec) ** 2

	# spec to erb feature.
	erb_feat = np.matmul(spec_, erb_fb)

	if db:
	erb_feat = 10 * np.log10(erb_feat + 1e-10)

	erb_feat = np.array(erb_feat, dtype=np.float32)
	return erb_feat


	def _calculate_norm_alpha(sample_rate: int, hop_size: int, tau: float):
	"""Exponential decay factor alpha for a given tau (decay window size [s])."""
	dt = hop_size / sample_rate
	result = math.exp(-dt / tau)
	return result


	def get_norm_alpha(sample_rate: int, hop_size: int, norm_tau: float) -> float:
	a_ = _calculate_norm_alpha(sample_rate=sample_rate, hop_size=hop_size, tau=norm_tau)

	precision = 3
	a = 1.0
	while a >= 1.0:
	a = round(a_, precision)
	precision += 1

	return a


	MEAN_NORM_INIT = [-60., -90.]


	def make_erb_norm_state(erb_bins: int, channels: int) -> np.ndarray:
	state = np.linspace(MEAN_NORM_INIT[0], MEAN_NORM_INIT[1], erb_bins)
	state = np.expand_dims(state, axis=0)
	state = np.repeat(state, channels, axis=0)

	# state shape: (audio_channels, erb_bins)
	return state


	def erb_normalize(erb_feat: np.ndarray, alpha: float, state: np.ndarray = None):
	erb_feat = np.copy(erb_feat)
	batch_size, time_steps, erb_bins = erb_feat.shape

	if state is None:
	state = make_erb_norm_state(erb_bins, erb_feat.shape[0])
	# state = np.linspace(MEAN_NORM_INIT[0], MEAN_NORM_INIT[1], erb_bins)
	# state = np.expand_dims(state, axis=0)
	# state = np.repeat(state, erb_feat.shape[0], axis=0)

	for i in range(batch_size):
	for j in range(time_steps):
	for k in range(erb_bins):
	x = erb_feat[i][j][k]
	s = state[i][k]

	state[i][k] = x * (1. - alpha) + s * alpha
	erb_feat[i][j][k] -= state[i][k]
	erb_feat[i][j][k] /= 40.

	return erb_feat


	UNIT_NORM_INIT = [0.001, 0.0001]


	def make_spec_norm_state(df_bins: int, channels: int) -> np.ndarray:
	state = np.linspace(UNIT_NORM_INIT[0], UNIT_NORM_INIT[1], df_bins)
	state = np.expand_dims(state, axis=0)
	state = np.repeat(state, channels, axis=0)

	# state shape: (audio_channels, df_bins)
	return state


	def spec_normalize(spec_feat: np.ndarray, alpha: float, state: np.ndarray = None):
	spec_feat = np.copy(spec_feat)
	batch_size, time_steps, df_bins = spec_feat.shape

	if state is None:
	state = make_spec_norm_state(df_bins, spec_feat.shape[0])

	for i in range(batch_size):
	for j in range(time_steps):
	for k in range(df_bins):
	x = spec_feat[i][j][k]
	s = state[i][k]

	state[i][k] = np.abs(x) * (1. - alpha) + s * alpha
	spec_feat[i][j][k] /= np.sqrt(state[i][k])
	return spec_feat


	if __name__ == '__main__':
	pass