Upload 10 files
Browse files- CKPT.yaml +2 -2
- brain.ckpt +1 -1
- classifier.ckpt +1 -1
- counter.ckpt +1 -1
- embedding_model.ckpt +1 -1
- hyperparams.yaml +8 -27
- normalizer.ckpt +1 -1
- optimizer.ckpt +1 -1
CKPT.yaml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
# yamllint disable
|
2 |
-
ErrorRate: 0.
|
3 |
end-of-epoch: true
|
4 |
-
unixtime:
|
|
|
1 |
# yamllint disable
|
2 |
+
ErrorRate: 0.04531625285744667
|
3 |
end-of-epoch: true
|
4 |
+
unixtime: 1717664391.509517
|
brain.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49e946f9de64ad4fcaac843a9a9e7712219f2797cd4afe46cbf28c17c68c7b6
|
3 |
size 50
|
classifier.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3840482
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19747c44e51418be40e88ac862d700f3d4f4751716f137711f93b83e5e92a788
|
3 |
size 3840482
|
counter.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d
|
3 |
size 1
|
embedding_model.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 16887535
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47fda3e15d508365300a56bd22aefcc199af83ec18ab9a5a23565aef2e93efed
|
3 |
size 16887535
|
hyperparams.yaml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# Generated 2024-06-
|
2 |
# /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
|
3 |
# yamllint disable
|
4 |
# ################################
|
@@ -11,7 +11,6 @@ seed: 1986
|
|
11 |
__set_seed: !apply:torch.manual_seed [1986]
|
12 |
output_folder: results/xvect_augment/1986
|
13 |
save_folder: results/xvect_augment/1986/save
|
14 |
-
pretrained_path: Definite/hwaja_insic
|
15 |
train_log: results/xvect_augment/1986/train_log.txt
|
16 |
|
17 |
# Data for augmentation
|
@@ -42,7 +41,7 @@ skip_prep: true
|
|
42 |
ckpt_interval_minutes: 15 # save checkpoint every N min
|
43 |
|
44 |
# Training parameters
|
45 |
-
number_of_epochs:
|
46 |
batch_size: 16
|
47 |
lr: 0.001
|
48 |
lr_final: 0.0001
|
@@ -62,11 +61,11 @@ deltas: false
|
|
62 |
out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
|
63 |
emb_dim: 512
|
64 |
|
65 |
-
num_workers:
|
66 |
dataloader_options:
|
67 |
batch_size: 16
|
68 |
shuffle: true
|
69 |
-
num_workers:
|
70 |
|
71 |
# Functions
|
72 |
compute_features: &id005 !new:speechbrain.lobes.features.Fbank
|
@@ -92,7 +91,7 @@ classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
|
|
92 |
out_neurons: 1349
|
93 |
|
94 |
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
|
95 |
-
limit:
|
96 |
|
97 |
############################## Augmentations ###################################
|
98 |
|
@@ -112,7 +111,7 @@ add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
|
|
112 |
snr_high: 15
|
113 |
noise_sample_rate: 16000
|
114 |
clean_sample_rate: 16000
|
115 |
-
num_workers:
|
116 |
|
117 |
# Download and prepare the dataset of room impulse responses for augmentation
|
118 |
prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
|
@@ -127,7 +126,7 @@ add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
|
|
127 |
csv_file: results/xvect_augment/1986/save/rir.csv
|
128 |
reverb_sample_rate: 16000
|
129 |
clean_sample_rate: 16000
|
130 |
-
num_workers:
|
131 |
|
132 |
# Frequency drop: randomly drops a number of frequency bands to zero.
|
133 |
drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
|
@@ -159,12 +158,6 @@ mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization
|
|
159 |
norm_type: sentence
|
160 |
std_norm: false
|
161 |
|
162 |
-
mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
|
163 |
-
norm_type: global
|
164 |
-
std_norm: False
|
165 |
-
|
166 |
-
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
|
167 |
-
|
168 |
modules:
|
169 |
compute_features: *id005
|
170 |
embedding_model: *id006
|
@@ -180,7 +173,7 @@ opt_class: !name:torch.optim.Adam
|
|
180 |
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
|
181 |
initial_value: 0.001
|
182 |
final_value: 0.0001
|
183 |
-
epoch_count:
|
184 |
|
185 |
# Logging + checkpoints
|
186 |
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
@@ -197,15 +190,3 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
|
|
197 |
classifier: *id007
|
198 |
normalizer: *id008
|
199 |
counter: *id009
|
200 |
-
|
201 |
-
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
202 |
-
loadables:
|
203 |
-
embedding_model: *id006
|
204 |
-
mean_var_norm: *id008
|
205 |
-
classifier: *id007
|
206 |
-
label_encoder: !ref <label_encoder>
|
207 |
-
paths:
|
208 |
-
embedding_model: !ref <pretrained_path>/embedding_model.ckpt
|
209 |
-
mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
|
210 |
-
classifier: !ref <pretrained_path>/classifier.ckpt
|
211 |
-
label_encoder: !ref <pretrained_path>/label_encoder.txt
|
|
|
1 |
+
# Generated 2024-06-06 from:
|
2 |
# /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
|
3 |
# yamllint disable
|
4 |
# ################################
|
|
|
11 |
__set_seed: !apply:torch.manual_seed [1986]
|
12 |
output_folder: results/xvect_augment/1986
|
13 |
save_folder: results/xvect_augment/1986/save
|
|
|
14 |
train_log: results/xvect_augment/1986/train_log.txt
|
15 |
|
16 |
# Data for augmentation
|
|
|
41 |
ckpt_interval_minutes: 15 # save checkpoint every N min
|
42 |
|
43 |
# Training parameters
|
44 |
+
number_of_epochs: 5
|
45 |
batch_size: 16
|
46 |
lr: 0.001
|
47 |
lr_final: 0.0001
|
|
|
61 |
out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
|
62 |
emb_dim: 512
|
63 |
|
64 |
+
num_workers: 2
|
65 |
dataloader_options:
|
66 |
batch_size: 16
|
67 |
shuffle: true
|
68 |
+
num_workers: 2
|
69 |
|
70 |
# Functions
|
71 |
compute_features: &id005 !new:speechbrain.lobes.features.Fbank
|
|
|
91 |
out_neurons: 1349
|
92 |
|
93 |
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
|
94 |
+
limit: 5
|
95 |
|
96 |
############################## Augmentations ###################################
|
97 |
|
|
|
111 |
snr_high: 15
|
112 |
noise_sample_rate: 16000
|
113 |
clean_sample_rate: 16000
|
114 |
+
num_workers: 2
|
115 |
|
116 |
# Download and prepare the dataset of room impulse responses for augmentation
|
117 |
prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
|
|
|
126 |
csv_file: results/xvect_augment/1986/save/rir.csv
|
127 |
reverb_sample_rate: 16000
|
128 |
clean_sample_rate: 16000
|
129 |
+
num_workers: 2
|
130 |
|
131 |
# Frequency drop: randomly drops a number of frequency bands to zero.
|
132 |
drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
|
|
|
158 |
norm_type: sentence
|
159 |
std_norm: false
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
modules:
|
162 |
compute_features: *id005
|
163 |
embedding_model: *id006
|
|
|
173 |
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
|
174 |
initial_value: 0.001
|
175 |
final_value: 0.0001
|
176 |
+
epoch_count: 5
|
177 |
|
178 |
# Logging + checkpoints
|
179 |
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
|
|
190 |
classifier: *id007
|
191 |
normalizer: *id008
|
192 |
counter: *id009
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
normalizer.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1578
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b98d47768bfba78eaa8a052f7f3e864308f5fff7e34051c8cb2adfef9f451948
|
3 |
size 1578
|
optimizer.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 41371844
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ff8086bb599afadf5aa6bb4f2149adbd574cd7c91974b563a4a8e223bc3754d
|
3 |
size 41371844
|