HoneyTian commited on
Commit
9e01c3d
·
1 Parent(s): bd94e77
examples/spectrum_unet_irm_aishell/run.sh CHANGED
@@ -8,7 +8,7 @@ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name f
8
  --speech_dir "E:/programmer/asr_datasets/aishell/data_aishell/wav/train"
9
 
10
 
11
- sh run.sh --stage 3 --stop_stage 3 --system_version centos --file_folder_name file_dir \
12
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
13
  --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
14
 
 
8
  --speech_dir "E:/programmer/asr_datasets/aishell/data_aishell/wav/train"
9
 
10
 
11
+ sh run.sh --stage 1 --stop_stage 3 --system_version centos --file_folder_name file_dir \
12
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
13
  --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
14
 
examples/spectrum_unet_irm_aishell/step_1_prepare_data.py CHANGED
@@ -37,8 +37,8 @@ def get_args():
37
  parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
38
 
39
  parser.add_argument("--duration", default=2.0, type=float)
40
- parser.add_argument("--min_nsr_db", default=-20, type=float)
41
- parser.add_argument("--max_nsr_db", default=5, type=float)
42
 
43
  parser.add_argument("--target_sample_rate", default=8000, type=int)
44
 
@@ -124,7 +124,7 @@ def get_dataset(args):
124
  "speech_offset": speech_offset,
125
  "speech_duration": speech_duration,
126
 
127
- "snr_db": random.uniform(args.min_nsr_db, args.max_nsr_db),
128
 
129
  "random1": random1,
130
  "random2": random2,
 
37
  parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
38
 
39
  parser.add_argument("--duration", default=2.0, type=float)
40
+ parser.add_argument("--min_snr_db", default=-10, type=float)
41
+ parser.add_argument("--max_snr_db", default=20, type=float)
42
 
43
  parser.add_argument("--target_sample_rate", default=8000, type=int)
44
 
 
124
  "speech_offset": speech_offset,
125
  "speech_duration": speech_duration,
126
 
127
+ "snr_db": random.uniform(args.min_snr_db, args.max_snr_db),
128
 
129
  "random1": random1,
130
  "random2": random2,
examples/spectrum_unet_irm_aishell/yaml/config.yaml CHANGED
@@ -26,8 +26,8 @@ encoder_emb_skip_op: "none"
26
  encoder_emb_linear_groups: 16
27
  encoder_emb_hidden_size: 256
28
 
29
- lsnr_max: 20
30
- lsnr_min: -10
31
 
32
  decoder_emb_num_layers: 3
33
  decoder_emb_skip_op: "none"
 
26
  encoder_emb_linear_groups: 16
27
  encoder_emb_hidden_size: 256
28
 
29
+ lsnr_max: 30
30
+ lsnr_min: -15
31
 
32
  decoder_emb_num_layers: 3
33
  decoder_emb_skip_op: "none"
toolbox/torchaudio/models/spectrum_unet_irm/configuration_specturm_unet_irm.py CHANGED
@@ -25,8 +25,8 @@ class SpectrumUnetIRMConfig(PretrainedConfig):
25
  encoder_emb_linear_groups: int = 16,
26
  encoder_emb_hidden_size: int = 256,
27
 
28
- lsnr_max: int = 20,
29
- lsnr_min: int = -10,
30
 
31
  decoder_emb_num_layers: int = 3,
32
  decoder_emb_skip_op: str = "none",
 
25
  encoder_emb_linear_groups: int = 16,
26
  encoder_emb_hidden_size: int = 256,
27
 
28
+ lsnr_max: int = 30,
29
+ lsnr_min: int = -15,
30
 
31
  decoder_emb_num_layers: int = 3,
32
  decoder_emb_skip_op: str = "none",