Spaces:
Running
Running
update
Browse files
examples/nx_mpnet/yaml/config.yaml
CHANGED
@@ -22,8 +22,8 @@ tsfm_dropout_rate: 0.0
|
|
22 |
tsfm_max_time_relative_position: 2048
|
23 |
tsfm_max_freq_relative_position: 256
|
24 |
tsfm_chunk_size: 1
|
25 |
-
tsfm_num_left_chunks:
|
26 |
-
tsfm_num_right_chunks:
|
27 |
|
28 |
discriminator_dim: 32
|
29 |
discriminator_in_channel: 2
|
|
|
22 |
tsfm_max_time_relative_position: 2048
|
23 |
tsfm_max_freq_relative_position: 256
|
24 |
tsfm_chunk_size: 1
|
25 |
+
tsfm_num_left_chunks: 96
|
26 |
+
tsfm_num_right_chunks: 3
|
27 |
|
28 |
discriminator_dim: 32
|
29 |
discriminator_in_channel: 2
|
toolbox/torchaudio/models/nx_mpnet/{inference_mpnet.py → inference_nx_mpnet.py}
RENAMED
@@ -78,23 +78,23 @@ class InferenceNXMPNet(object):
|
|
78 |
|
79 |
|
80 |
def main():
|
81 |
-
model_zip_file = project_path / "trained_models/mpnet-aishell-
|
82 |
infer_mpnet = InferenceNXMPNet(model_zip_file)
|
83 |
|
84 |
sample_rate = 8000
|
85 |
-
noisy_audio_file = project_path / "data/examples/ai_agent/dfaaf264-b5e3-4ca2-b5cb-
|
86 |
noisy_audio, _ = librosa.load(
|
87 |
noisy_audio_file.as_posix(),
|
88 |
sr=sample_rate,
|
89 |
)
|
90 |
-
noisy_audio = noisy_audio[int(7*sample_rate):int(9*sample_rate)]
|
91 |
noisy_audio = torch.tensor(noisy_audio, dtype=torch.float32)
|
92 |
noisy_audio = noisy_audio.unsqueeze(dim=0)
|
93 |
|
94 |
enhanced_audio = infer_mpnet.enhancement_by_tensor(noisy_audio)
|
95 |
|
96 |
filename = "enhanced_audio.wav"
|
97 |
-
torchaudio.save(filename, enhanced_audio.detach().cpu(), sample_rate)
|
98 |
|
99 |
return
|
100 |
|
|
|
78 |
|
79 |
|
80 |
def main():
|
81 |
+
model_zip_file = project_path / "trained_models/nx-mpnet-aishell-2-epoch.zip"
|
82 |
infer_mpnet = InferenceNXMPNet(model_zip_file)
|
83 |
|
84 |
sample_rate = 8000
|
85 |
+
noisy_audio_file = project_path / "data/examples/ai_agent/dfaaf264-b5e3-4ca2-b5cb-5b6d637d962d_section_2.wav"
|
86 |
noisy_audio, _ = librosa.load(
|
87 |
noisy_audio_file.as_posix(),
|
88 |
sr=sample_rate,
|
89 |
)
|
90 |
+
# noisy_audio = noisy_audio[int(7*sample_rate):int(9*sample_rate)]
|
91 |
noisy_audio = torch.tensor(noisy_audio, dtype=torch.float32)
|
92 |
noisy_audio = noisy_audio.unsqueeze(dim=0)
|
93 |
|
94 |
enhanced_audio = infer_mpnet.enhancement_by_tensor(noisy_audio)
|
95 |
|
96 |
filename = "enhanced_audio.wav"
|
97 |
+
torchaudio.save(filename, enhanced_audio.unsqueeze(dim=0).detach().cpu(), sample_rate)
|
98 |
|
99 |
return
|
100 |
|