ESPnet
multilingual
audio
codec
JinchuanTian commited on
Commit
54749e3
·
1 Parent(s): 2c173ae

Update model

Browse files
Files changed (29) hide show
  1. README.md +5 -3
  2. exp/codec_encodec_ss4_16k/120epoch.pth +3 -0
  3. exp/codec_encodec_ss4_16k/config.yaml +5 -3
  4. exp/codec_encodec_ss4_16k/images/adv_loss.png +0 -0
  5. exp/codec_encodec_ss4_16k/images/codec_commit_loss.png +0 -0
  6. exp/codec_encodec_ss4_16k/images/codec_loss.png +0 -0
  7. exp/codec_encodec_ss4_16k/images/codec_quantization_loss.png +0 -0
  8. exp/codec_encodec_ss4_16k/images/discriminator_backward_time.png +0 -0
  9. exp/codec_encodec_ss4_16k/images/discriminator_forward_time.png +0 -0
  10. exp/codec_encodec_ss4_16k/images/discriminator_loss.png +0 -0
  11. exp/codec_encodec_ss4_16k/images/discriminator_optim_step_time.png +0 -0
  12. exp/codec_encodec_ss4_16k/images/discriminator_train_time.png +0 -0
  13. exp/codec_encodec_ss4_16k/images/fake_loss.png +0 -0
  14. exp/codec_encodec_ss4_16k/images/feat_match_loss.png +0 -0
  15. exp/codec_encodec_ss4_16k/images/generator_backward_time.png +0 -0
  16. exp/codec_encodec_ss4_16k/images/generator_forward_time.png +0 -0
  17. exp/codec_encodec_ss4_16k/images/generator_optim_step_time.png +0 -0
  18. exp/codec_encodec_ss4_16k/images/generator_train_time.png +0 -0
  19. exp/codec_encodec_ss4_16k/images/gpu_max_cached_mem_GB.png +0 -0
  20. exp/codec_encodec_ss4_16k/images/iter_time.png +0 -0
  21. exp/codec_encodec_ss4_16k/images/loss.png +0 -0
  22. exp/codec_encodec_ss4_16k/images/mel_loss.png +0 -0
  23. exp/codec_encodec_ss4_16k/images/mel_loss_real.png +0 -0
  24. exp/codec_encodec_ss4_16k/images/optim0_lr0.png +0 -0
  25. exp/codec_encodec_ss4_16k/images/optim1_lr0.png +0 -0
  26. exp/codec_encodec_ss4_16k/images/real_loss.png +0 -0
  27. exp/codec_encodec_ss4_16k/images/reconstruct_loss.png +0 -0
  28. exp/codec_encodec_ss4_16k/images/train_time.png +0 -0
  29. meta.yaml +2 -2
README.md CHANGED
@@ -61,6 +61,7 @@ sharded_ddp: false
61
  cudnn_enabled: true
62
  cudnn_benchmark: false
63
  cudnn_deterministic: false
 
64
  collect_stats: false
65
  write_collected_feats: false
66
  max_epoch: 120
@@ -92,7 +93,7 @@ no_forward_run: false
92
  resume: true
93
  train_dtype: float32
94
  use_amp: false
95
- log_interval: 500
96
  use_matplotlib: true
97
  use_tensorboard: true
98
  create_graph_in_tensorboard: false
@@ -117,9 +118,9 @@ valid_batch_size: null
117
  batch_bins: 1000000
118
  valid_batch_bins: null
119
  train_shape_file:
120
- - exp/codec_stats_raw/train/audio_shape
121
  valid_shape_file:
122
- - exp/codec_stats_raw/valid/audio_shape
123
  batch_type: unsorted
124
  valid_batch_type: null
125
  fold_length:
@@ -141,6 +142,7 @@ valid_data_path_and_name_and_type:
141
  - - dump_16k/raw/dev-clean/wav.scp
142
  - audio
143
  - sound
 
144
  allow_variable_data_keys: false
145
  max_cache_size: 0.0
146
  max_cache_fd: 32
 
61
  cudnn_enabled: true
62
  cudnn_benchmark: false
63
  cudnn_deterministic: false
64
+ use_tf32: false
65
  collect_stats: false
66
  write_collected_feats: false
67
  max_epoch: 120
 
93
  resume: true
94
  train_dtype: float32
95
  use_amp: false
96
+ log_interval: 5
97
  use_matplotlib: true
98
  use_tensorboard: true
99
  create_graph_in_tensorboard: false
 
118
  batch_bins: 1000000
119
  valid_batch_bins: null
120
  train_shape_file:
121
+ - exp/stats_16k/train/audio_shape
122
  valid_shape_file:
123
+ - exp/stats_16k/valid/audio_shape
124
  batch_type: unsorted
125
  valid_batch_type: null
126
  fold_length:
 
142
  - - dump_16k/raw/dev-clean/wav.scp
143
  - audio
144
  - sound
145
+ multi_task_dataset: false
146
  allow_variable_data_keys: false
147
  max_cache_size: 0.0
148
  max_cache_fd: 32
exp/codec_encodec_ss4_16k/120epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e1d2abc857c8da7a8207fbff1b5bdcfa2e7b4d279f3d65c699b31cb539303b
3
+ size 215606502
exp/codec_encodec_ss4_16k/config.yaml CHANGED
@@ -24,6 +24,7 @@ sharded_ddp: false
24
  cudnn_enabled: true
25
  cudnn_benchmark: false
26
  cudnn_deterministic: false
 
27
  collect_stats: false
28
  write_collected_feats: false
29
  max_epoch: 120
@@ -55,7 +56,7 @@ no_forward_run: false
55
  resume: true
56
  train_dtype: float32
57
  use_amp: false
58
- log_interval: 500
59
  use_matplotlib: true
60
  use_tensorboard: true
61
  create_graph_in_tensorboard: false
@@ -80,9 +81,9 @@ valid_batch_size: null
80
  batch_bins: 1000000
81
  valid_batch_bins: null
82
  train_shape_file:
83
- - exp/codec_stats_raw/train/audio_shape
84
  valid_shape_file:
85
- - exp/codec_stats_raw/valid/audio_shape
86
  batch_type: unsorted
87
  valid_batch_type: null
88
  fold_length:
@@ -104,6 +105,7 @@ valid_data_path_and_name_and_type:
104
  - - dump_16k/raw/dev-clean/wav.scp
105
  - audio
106
  - sound
 
107
  allow_variable_data_keys: false
108
  max_cache_size: 0.0
109
  max_cache_fd: 32
 
24
  cudnn_enabled: true
25
  cudnn_benchmark: false
26
  cudnn_deterministic: false
27
+ use_tf32: false
28
  collect_stats: false
29
  write_collected_feats: false
30
  max_epoch: 120
 
56
  resume: true
57
  train_dtype: float32
58
  use_amp: false
59
+ log_interval: 5
60
  use_matplotlib: true
61
  use_tensorboard: true
62
  create_graph_in_tensorboard: false
 
81
  batch_bins: 1000000
82
  valid_batch_bins: null
83
  train_shape_file:
84
+ - exp/stats_16k/train/audio_shape
85
  valid_shape_file:
86
+ - exp/stats_16k/valid/audio_shape
87
  batch_type: unsorted
88
  valid_batch_type: null
89
  fold_length:
 
105
  - - dump_16k/raw/dev-clean/wav.scp
106
  - audio
107
  - sound
108
+ multi_task_dataset: false
109
  allow_variable_data_keys: false
110
  max_cache_size: 0.0
111
  max_cache_fd: 32
exp/codec_encodec_ss4_16k/images/adv_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/codec_commit_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/codec_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/codec_quantization_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/discriminator_backward_time.png CHANGED
exp/codec_encodec_ss4_16k/images/discriminator_forward_time.png CHANGED
exp/codec_encodec_ss4_16k/images/discriminator_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/discriminator_optim_step_time.png CHANGED
exp/codec_encodec_ss4_16k/images/discriminator_train_time.png CHANGED
exp/codec_encodec_ss4_16k/images/fake_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/feat_match_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/generator_backward_time.png CHANGED
exp/codec_encodec_ss4_16k/images/generator_forward_time.png CHANGED
exp/codec_encodec_ss4_16k/images/generator_optim_step_time.png CHANGED
exp/codec_encodec_ss4_16k/images/generator_train_time.png CHANGED
exp/codec_encodec_ss4_16k/images/gpu_max_cached_mem_GB.png CHANGED
exp/codec_encodec_ss4_16k/images/iter_time.png CHANGED
exp/codec_encodec_ss4_16k/images/loss.png CHANGED
exp/codec_encodec_ss4_16k/images/mel_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/mel_loss_real.png CHANGED
exp/codec_encodec_ss4_16k/images/optim0_lr0.png CHANGED
exp/codec_encodec_ss4_16k/images/optim1_lr0.png CHANGED
exp/codec_encodec_ss4_16k/images/real_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/reconstruct_loss.png CHANGED
exp/codec_encodec_ss4_16k/images/train_time.png CHANGED
meta.yaml CHANGED
@@ -1,8 +1,8 @@
1
  espnet: '202402'
2
  files:
3
- model_file: exp/codec_encodec_ss4_16k/81epoch.pth
4
  python: 3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]
5
- timestamp: 1718729736.568983
6
  torch: 2.0.1
7
  yaml_files:
8
  train_config: exp/codec_encodec_ss4_16k/config.yaml
 
1
  espnet: '202402'
2
  files:
3
+ model_file: exp/codec_encodec_ss4_16k/120epoch.pth
4
  python: 3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]
5
+ timestamp: 1718989322.954464
6
  torch: 2.0.1
7
  yaml_files:
8
  train_config: exp/codec_encodec_ss4_16k/config.yaml