HoneyTian commited on
Commit
58d1381
·
1 Parent(s): ce96198
examples/nx_mpnet/yaml/config.yaml CHANGED
@@ -22,8 +22,8 @@ tsfm_dropout_rate: 0.0
22
  tsfm_max_time_relative_position: 2048
23
  tsfm_max_freq_relative_position: 256
24
  tsfm_chunk_size: 1
25
- tsfm_num_left_chunks: 64
26
- tsfm_num_right_chunks: 2
27
 
28
  discriminator_dim: 32
29
  discriminator_in_channel: 2
 
22
  tsfm_max_time_relative_position: 2048
23
  tsfm_max_freq_relative_position: 256
24
  tsfm_chunk_size: 1
25
+ tsfm_num_left_chunks: 96
26
+ tsfm_num_right_chunks: 3
27
 
28
  discriminator_dim: 32
29
  discriminator_in_channel: 2
toolbox/torchaudio/models/nx_mpnet/{inference_mpnet.py → inference_nx_mpnet.py} RENAMED
@@ -78,23 +78,23 @@ class InferenceNXMPNet(object):
78
 
79
 
80
  def main():
81
- model_zip_file = project_path / "trained_models/mpnet-aishell-1-epoch.zip"
82
  infer_mpnet = InferenceNXMPNet(model_zip_file)
83
 
84
  sample_rate = 8000
85
- noisy_audio_file = project_path / "data/examples/ai_agent/dfaaf264-b5e3-4ca2-b5cb-5b6d637d962d_section_1.wav"
86
  noisy_audio, _ = librosa.load(
87
  noisy_audio_file.as_posix(),
88
  sr=sample_rate,
89
  )
90
- noisy_audio = noisy_audio[int(7*sample_rate):int(9*sample_rate)]
91
  noisy_audio = torch.tensor(noisy_audio, dtype=torch.float32)
92
  noisy_audio = noisy_audio.unsqueeze(dim=0)
93
 
94
  enhanced_audio = infer_mpnet.enhancement_by_tensor(noisy_audio)
95
 
96
  filename = "enhanced_audio.wav"
97
- torchaudio.save(filename, enhanced_audio.detach().cpu(), sample_rate)
98
 
99
  return
100
 
 
78
 
79
 
80
  def main():
81
+ model_zip_file = project_path / "trained_models/nx-mpnet-aishell-2-epoch.zip"
82
  infer_mpnet = InferenceNXMPNet(model_zip_file)
83
 
84
  sample_rate = 8000
85
+ noisy_audio_file = project_path / "data/examples/ai_agent/dfaaf264-b5e3-4ca2-b5cb-5b6d637d962d_section_2.wav"
86
  noisy_audio, _ = librosa.load(
87
  noisy_audio_file.as_posix(),
88
  sr=sample_rate,
89
  )
90
+ # noisy_audio = noisy_audio[int(7*sample_rate):int(9*sample_rate)]
91
  noisy_audio = torch.tensor(noisy_audio, dtype=torch.float32)
92
  noisy_audio = noisy_audio.unsqueeze(dim=0)
93
 
94
  enhanced_audio = infer_mpnet.enhancement_by_tensor(noisy_audio)
95
 
96
  filename = "enhanced_audio.wav"
97
+ torchaudio.save(filename, enhanced_audio.unsqueeze(dim=0).detach().cpu(), sample_rate)
98
 
99
  return
100