Definite commited on
Commit
ffa97eb
·
verified ·
1 Parent(s): b464e5c

Upload 10 files

Browse files
CKPT.yaml CHANGED
@@ -1,4 +1,4 @@
1
  # yamllint disable
2
- ErrorRate: 0.3987189829349518
3
  end-of-epoch: true
4
- unixtime: 1717514247.4720354
 
1
  # yamllint disable
2
+ ErrorRate: 0.04531625285744667
3
  end-of-epoch: true
4
+ unixtime: 1717664391.509517
brain.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:847c6ac3bf30588a0216e35313ad9505053ca7e62871c1160c33d532b5c8815f
3
  size 50
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49e946f9de64ad4fcaac843a9a9e7712219f2797cd4afe46cbf28c17c68c7b6
3
  size 50
classifier.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d292d66608240df48e5d2f530c94544a6fec4c02d7337aec2a2ef54b26c9337a
3
  size 3840482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19747c44e51418be40e88ac862d700f3d4f4751716f137711f93b83e5e92a788
3
  size 3840482
counter.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
3
  size 1
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d
3
  size 1
embedding_model.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f039c9c9eb7c802f53f434c3e2a5ff6e6a2450fb878518ef5a2b94ccda65e47
3
  size 16887535
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47fda3e15d508365300a56bd22aefcc199af83ec18ab9a5a23565aef2e93efed
3
  size 16887535
hyperparams.yaml CHANGED
@@ -1,4 +1,4 @@
1
- # Generated 2024-06-04 from:
2
  # /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
3
  # yamllint disable
4
  # ################################
@@ -11,7 +11,6 @@ seed: 1986
11
  __set_seed: !apply:torch.manual_seed [1986]
12
  output_folder: results/xvect_augment/1986
13
  save_folder: results/xvect_augment/1986/save
14
- pretrained_path: Definite/hwaja_insic
15
  train_log: results/xvect_augment/1986/train_log.txt
16
 
17
  # Data for augmentation
@@ -42,7 +41,7 @@ skip_prep: true
42
  ckpt_interval_minutes: 15 # save checkpoint every N min
43
 
44
  # Training parameters
45
- number_of_epochs: 1
46
  batch_size: 16
47
  lr: 0.001
48
  lr_final: 0.0001
@@ -62,11 +61,11 @@ deltas: false
62
  out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
63
  emb_dim: 512
64
 
65
- num_workers: 4
66
  dataloader_options:
67
  batch_size: 16
68
  shuffle: true
69
- num_workers: 4
70
 
71
  # Functions
72
  compute_features: &id005 !new:speechbrain.lobes.features.Fbank
@@ -92,7 +91,7 @@ classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
92
  out_neurons: 1349
93
 
94
  epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
95
- limit: 1
96
 
97
  ############################## Augmentations ###################################
98
 
@@ -112,7 +111,7 @@ add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
112
  snr_high: 15
113
  noise_sample_rate: 16000
114
  clean_sample_rate: 16000
115
- num_workers: 4
116
 
117
  # Download and prepare the dataset of room impulse responses for augmentation
118
  prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -127,7 +126,7 @@ add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
127
  csv_file: results/xvect_augment/1986/save/rir.csv
128
  reverb_sample_rate: 16000
129
  clean_sample_rate: 16000
130
- num_workers: 4
131
 
132
  # Frequency drop: randomly drops a number of frequency bands to zero.
133
  drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
@@ -159,12 +158,6 @@ mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization
159
  norm_type: sentence
160
  std_norm: false
161
 
162
- mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
163
- norm_type: global
164
- std_norm: False
165
-
166
- label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
167
-
168
  modules:
169
  compute_features: *id005
170
  embedding_model: *id006
@@ -180,7 +173,7 @@ opt_class: !name:torch.optim.Adam
180
  lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
181
  initial_value: 0.001
182
  final_value: 0.0001
183
- epoch_count: 1
184
 
185
  # Logging + checkpoints
186
  train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
@@ -197,15 +190,3 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
197
  classifier: *id007
198
  normalizer: *id008
199
  counter: *id009
200
-
201
- pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
202
- loadables:
203
- embedding_model: *id006
204
- mean_var_norm: *id008
205
- classifier: *id007
206
- label_encoder: !ref <label_encoder>
207
- paths:
208
- embedding_model: !ref <pretrained_path>/embedding_model.ckpt
209
- mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
210
- classifier: !ref <pretrained_path>/classifier.ckpt
211
- label_encoder: !ref <pretrained_path>/label_encoder.txt
 
1
+ # Generated 2024-06-06 from:
2
  # /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
3
  # yamllint disable
4
  # ################################
 
11
  __set_seed: !apply:torch.manual_seed [1986]
12
  output_folder: results/xvect_augment/1986
13
  save_folder: results/xvect_augment/1986/save
 
14
  train_log: results/xvect_augment/1986/train_log.txt
15
 
16
  # Data for augmentation
 
41
  ckpt_interval_minutes: 15 # save checkpoint every N min
42
 
43
  # Training parameters
44
+ number_of_epochs: 5
45
  batch_size: 16
46
  lr: 0.001
47
  lr_final: 0.0001
 
61
  out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
62
  emb_dim: 512
63
 
64
+ num_workers: 2
65
  dataloader_options:
66
  batch_size: 16
67
  shuffle: true
68
+ num_workers: 2
69
 
70
  # Functions
71
  compute_features: &id005 !new:speechbrain.lobes.features.Fbank
 
91
  out_neurons: 1349
92
 
93
  epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
94
+ limit: 5
95
 
96
  ############################## Augmentations ###################################
97
 
 
111
  snr_high: 15
112
  noise_sample_rate: 16000
113
  clean_sample_rate: 16000
114
+ num_workers: 2
115
 
116
  # Download and prepare the dataset of room impulse responses for augmentation
117
  prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
 
126
  csv_file: results/xvect_augment/1986/save/rir.csv
127
  reverb_sample_rate: 16000
128
  clean_sample_rate: 16000
129
+ num_workers: 2
130
 
131
  # Frequency drop: randomly drops a number of frequency bands to zero.
132
  drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
 
158
  norm_type: sentence
159
  std_norm: false
160
 
 
 
 
 
 
 
161
  modules:
162
  compute_features: *id005
163
  embedding_model: *id006
 
173
  lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
174
  initial_value: 0.001
175
  final_value: 0.0001
176
+ epoch_count: 5
177
 
178
  # Logging + checkpoints
179
  train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 
190
  classifier: *id007
191
  normalizer: *id008
192
  counter: *id009
 
 
 
 
 
 
 
 
 
 
 
 
normalizer.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92244ada292c7d670d1dc88549e74ed24b3e25e70f27fe443420cf4832d6811b
3
  size 1578
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98d47768bfba78eaa8a052f7f3e864308f5fff7e34051c8cb2adfef9f451948
3
  size 1578
optimizer.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50c29db152d0f54bec401f30610ed2f1b8039829a334331a4840b8d43546e5c7
3
  size 41371844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff8086bb599afadf5aa6bb4f2149adbd574cd7c91974b563a4a8e223bc3754d
3
  size 41371844