Spaces:
Running
Running
update
Browse files
examples/spectrum_unet_irm_aishell/run.sh
CHANGED
@@ -8,7 +8,7 @@ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name f
|
|
8 |
--speech_dir "E:/programmer/asr_datasets/aishell/data_aishell/wav/train"
|
9 |
|
10 |
|
11 |
-
sh run.sh --stage
|
12 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
13 |
--speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
|
14 |
|
|
|
8 |
--speech_dir "E:/programmer/asr_datasets/aishell/data_aishell/wav/train"
|
9 |
|
10 |
|
11 |
+
sh run.sh --stage 1 --stop_stage 3 --system_version centos --file_folder_name file_dir \
|
12 |
--noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
|
13 |
--speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
|
14 |
|
examples/spectrum_unet_irm_aishell/step_1_prepare_data.py
CHANGED
@@ -37,8 +37,8 @@ def get_args():
|
|
37 |
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
38 |
|
39 |
parser.add_argument("--duration", default=2.0, type=float)
|
40 |
-
parser.add_argument("--
|
41 |
-
parser.add_argument("--
|
42 |
|
43 |
parser.add_argument("--target_sample_rate", default=8000, type=int)
|
44 |
|
@@ -124,7 +124,7 @@ def get_dataset(args):
|
|
124 |
"speech_offset": speech_offset,
|
125 |
"speech_duration": speech_duration,
|
126 |
|
127 |
-
"snr_db": random.uniform(args.
|
128 |
|
129 |
"random1": random1,
|
130 |
"random2": random2,
|
|
|
37 |
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
38 |
|
39 |
parser.add_argument("--duration", default=2.0, type=float)
|
40 |
+
parser.add_argument("--min_snr_db", default=-10, type=float)
|
41 |
+
parser.add_argument("--max_snr_db", default=20, type=float)
|
42 |
|
43 |
parser.add_argument("--target_sample_rate", default=8000, type=int)
|
44 |
|
|
|
124 |
"speech_offset": speech_offset,
|
125 |
"speech_duration": speech_duration,
|
126 |
|
127 |
+
"snr_db": random.uniform(args.min_snr_db, args.max_snr_db),
|
128 |
|
129 |
"random1": random1,
|
130 |
"random2": random2,
|
examples/spectrum_unet_irm_aishell/yaml/config.yaml
CHANGED
@@ -26,8 +26,8 @@ encoder_emb_skip_op: "none"
|
|
26 |
encoder_emb_linear_groups: 16
|
27 |
encoder_emb_hidden_size: 256
|
28 |
|
29 |
-
lsnr_max:
|
30 |
-
lsnr_min: -
|
31 |
|
32 |
decoder_emb_num_layers: 3
|
33 |
decoder_emb_skip_op: "none"
|
|
|
26 |
encoder_emb_linear_groups: 16
|
27 |
encoder_emb_hidden_size: 256
|
28 |
|
29 |
+
lsnr_max: 30
|
30 |
+
lsnr_min: -15
|
31 |
|
32 |
decoder_emb_num_layers: 3
|
33 |
decoder_emb_skip_op: "none"
|
toolbox/torchaudio/models/spectrum_unet_irm/configuration_specturm_unet_irm.py
CHANGED
@@ -25,8 +25,8 @@ class SpectrumUnetIRMConfig(PretrainedConfig):
|
|
25 |
encoder_emb_linear_groups: int = 16,
|
26 |
encoder_emb_hidden_size: int = 256,
|
27 |
|
28 |
-
lsnr_max: int =
|
29 |
-
lsnr_min: int = -
|
30 |
|
31 |
decoder_emb_num_layers: int = 3,
|
32 |
decoder_emb_skip_op: str = "none",
|
|
|
25 |
encoder_emb_linear_groups: int = 16,
|
26 |
encoder_emb_hidden_size: int = 256,
|
27 |
|
28 |
+
lsnr_max: int = 30,
|
29 |
+
lsnr_min: int = -15,
|
30 |
|
31 |
decoder_emb_num_layers: int = 3,
|
32 |
decoder_emb_skip_op: str = "none",
|