Spaces:

qgyd2021
/

cc_denoise

Running

App Files Files Community

HoneyTian commited on Feb 5

Commit

60e65ac

1 Parent(s): cbe8ba1

update

Browse files

Files changed (2) hide show

examples/spectrum_unet_irm_aishell/step_2_train_model.py +7 -14
examples/test.py +18 -0

examples/spectrum_unet_irm_aishell/step_2_train_model.py CHANGED Viewed

@@ -79,7 +79,7 @@ class CollateFunction(object):
                  hop_length: int = 80,
                  window_fn: str = "hamming",
                  irm_beta: float = 1.0,
-                 epsilon: float = 1e-7,
                  ):
         self.n_fft = n_fft
         self.win_length = win_length
@@ -138,17 +138,10 @@ class CollateFunction(object):
             # noise_spec, speech_spec, mix_spec, speech_irm
             # shape: [freq_dim, time_steps]
-            if torch.any(torch.isnan(speech_spec)) or torch.any(torch.isinf(speech_spec)):
-                raise AssertionError("nan or inf in speech_spec")
-            if torch.any(torch.isnan(noise_spec)) or torch.any(torch.isinf(noise_spec)):
-                raise AssertionError("nan or inf in noise_spec")
             snr_db: torch.Tensor = 10 * torch.log10(
-                speech_spec / (noise_spec + self.epsilon)
             )
-            if torch.any(torch.isnan(snr_db)) or torch.any(torch.isinf(snr_db)):
-                raise AssertionError("nan or inf in snr_db")
             snr_db_ = torch.unsqueeze(snr_db, dim=0)
             snr_db_ = torch.unsqueeze(snr_db_, dim=0)
             snr_db_ = self.make_unfold_snr_db(snr_db_, n_time_steps=3)
@@ -305,7 +298,7 @@ def main():
             snr_loss = snr_mse_loss.forward(lsnr_prediction, snr_db_target)
             if torch.any(torch.isnan(snr_loss)) or torch.any(torch.isinf(snr_loss)):
                 raise AssertionError("nan or inf in snr_loss")
-            loss = irm_loss + 0 * snr_loss
             # loss = irm_loss
             total_loss += loss.item()
@@ -343,11 +336,11 @@ def main():
                 if torch.any(torch.isnan(lsnr_prediction)) or torch.any(torch.isinf(lsnr_prediction)):
                     raise AssertionError("nan or inf in lsnr_prediction")
                 irm_loss = irm_mse_loss.forward(speech_irm_prediction, speech_irm_target)
-                # snr_loss = snr_mse_loss.forward(lsnr_prediction, snr_db_target)
-                # loss = irm_loss + 0*snr_loss
-                loss = irm_loss
-            total_loss += loss.item()
             total_examples += mix_spec.size(0)
             evaluation_loss = total_loss / total_examples

                  hop_length: int = 80,
                  window_fn: str = "hamming",
                  irm_beta: float = 1.0,
+                 epsilon: float = 1e-8,
                  ):
         self.n_fft = n_fft
         self.win_length = win_length
             # noise_spec, speech_spec, mix_spec, speech_irm
             # shape: [freq_dim, time_steps]
             snr_db: torch.Tensor = 10 * torch.log10(
+                speech_spec / (noise_spec + self.epsilon) + self.epsilon
             )
             snr_db_ = torch.unsqueeze(snr_db, dim=0)
             snr_db_ = torch.unsqueeze(snr_db_, dim=0)
             snr_db_ = self.make_unfold_snr_db(snr_db_, n_time_steps=3)
             snr_loss = snr_mse_loss.forward(lsnr_prediction, snr_db_target)
             if torch.any(torch.isnan(snr_loss)) or torch.any(torch.isinf(snr_loss)):
                 raise AssertionError("nan or inf in snr_loss")
+            loss = irm_loss + 0.1 * snr_loss
             # loss = irm_loss
             total_loss += loss.item()
                 if torch.any(torch.isnan(lsnr_prediction)) or torch.any(torch.isinf(lsnr_prediction)):
                     raise AssertionError("nan or inf in lsnr_prediction")
                 irm_loss = irm_mse_loss.forward(speech_irm_prediction, speech_irm_target)
+                snr_loss = snr_mse_loss.forward(lsnr_prediction, snr_db_target)
+                loss = irm_loss + 0.1 * snr_loss
+                # loss = irm_loss
+                total_loss += loss.item()
             total_examples += mix_spec.size(0)
             evaluation_loss = total_loss / total_examples

examples/test.py ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import torch
+speech_spec = torch.tensor([0], dtype=torch.float32)
+noise_spec = torch.tensor([0], dtype=torch.float32)
+epsilon = 1e-8
+result = torch.log10(
+    speech_spec / (noise_spec + epsilon) + epsilon
+)
+print(result)
+if __name__ == '__main__':
+    pass