Spaces:

qgyd2021
/

cc_denoise

Running

App Files Files Community

cc_denoise / toolbox /torchaudio /losses /perceptual.py

HoneyTian

update

e86d760 4 months ago

raw

history blame

2.15 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --
	"""
	https://zhuanlan.zhihu.com/p/627039860
	"""
	import torch
	import torch.nn as nn
	from torch_stoi import NegSTOILoss as TorchNegSTOILoss


	class PMSQELoss(object):
	"""
	A Deep Learning Loss Function based on the Perceptual Evaluation of the Speech Quality
	https://sigmat.ugr.es/PMSQE/

	On Loss Functions for Supervised Monaural Time-Domain Speech Enhancement
	https://arxiv.org/abs/1909.01019

	https://github.com/asteroid-team/asteroid/blob/master/asteroid/losses/pmsqe.py
	"""


	class NegSTOILoss(nn.Module):
	"""
	STOI短时客观可懂度(Short-Time Objective Intelligibility)，
	通过计算语音信号的时域和频域特征之间的相关性来预测语音的可理解度，
	范围从0到1，分数越高可懂度越高。
	它适用于评估噪声环境下的语音可懂度改善效果。

	https://github.com/mpariente/pytorch_stoi
	https://github.com/mpariente/pystoi
	https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/loss/stoi_loss.py
	"""
	def __init__(self,
	sample_rate: int,
	reduction: str = "mean",
	):
	super(NegSTOILoss, self).__init__()
	self.loss_fn = TorchNegSTOILoss(sample_rate=sample_rate)
	self.reduction = reduction

	if reduction not in ("sum", "mean"):
	raise AssertionError(f"param reduction must be sum or mean.")

	def forward(self, denoise: torch.Tensor, clean: torch.Tensor):

	batch_loss = self.loss_fn.forward(denoise, clean)

	if self.reduction == "mean":
	loss = torch.mean(batch_loss)
	elif self.reduction == "sum":
	loss = torch.sum(batch_loss)
	else:
	raise AssertionError
	return loss


	def main():
	sample_rate = 16000

	loss_func = NegSTOILoss(
	sample_rate=sample_rate,
	reduction="mean",
	)

	denoise = torch.randn(2, sample_rate)
	clean = torch.randn(2, sample_rate)

	loss_batch = loss_func.forward(denoise, clean)
	print(loss_batch)
	return


	if __name__ == "__main__":
	main()