|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seed: 1986 |
|
__set_seed: !apply:torch.manual_seed [1986] |
|
output_folder: results/xvect_augment/1986 |
|
save_folder: results/xvect_augment/1986/save |
|
pretrained_path: Definite/hwaja_insic |
|
train_log: results/xvect_augment/1986/train_log.txt |
|
|
|
|
|
NOISE_DATASET_URL: |
|
https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1 |
|
RIR_DATASET_URL: |
|
https://www.dropbox.com/scl/fi/linhy77c36mu10965a836/RIRs.zip?rlkey=pg9cu8vrpn2u173vhiqyu743u&dl=1 |
|
|
|
|
|
data_folder: ./Voxceleb |
|
data_folder_noise: ./Voxceleb/noise |
|
data_folder_rir: ./Voxceleb/rir |
|
train_annotation: results/xvect_augment/1986/save/train.csv |
|
valid_annotation: results/xvect_augment/1986/save/dev.csv |
|
noise_annotation: results/xvect_augment/1986/save/noise.csv |
|
rir_annotation: results/xvect_augment/1986/save/rir.csv |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
verification_file: ./Voxceleb/save/veri_test.txt |
|
|
|
split_ratio: [90, 10] |
|
skip_prep: true |
|
ckpt_interval_minutes: 15 |
|
|
|
|
|
number_of_epochs: 1 |
|
batch_size: 16 |
|
lr: 0.001 |
|
lr_final: 0.0001 |
|
|
|
sample_rate: 16000 |
|
sentence_len: 3.0 |
|
shuffle: true |
|
random_chunk: false |
|
|
|
|
|
n_mels: 24 |
|
left_frames: 0 |
|
right_frames: 0 |
|
deltas: false |
|
|
|
|
|
out_n_neurons: 1349 |
|
emb_dim: 512 |
|
|
|
num_workers: 4 |
|
dataloader_options: |
|
batch_size: 16 |
|
shuffle: true |
|
num_workers: 4 |
|
|
|
|
|
compute_features: &id005 !new:speechbrain.lobes.features.Fbank |
|
n_mels: 24 |
|
left_frames: 0 |
|
right_frames: 0 |
|
deltas: false |
|
|
|
embedding_model: &id006 !new:speechbrain.lobes.models.Xvector.Xvector |
|
in_channels: 24 |
|
activation: !name:torch.nn.LeakyReLU |
|
tdnn_blocks: 5 |
|
tdnn_channels: [512, 512, 512, 512, 1500] |
|
tdnn_kernel_sizes: [5, 3, 3, 1, 1] |
|
tdnn_dilations: [1, 2, 3, 1, 1] |
|
lin_neurons: 512 |
|
|
|
classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier |
|
input_shape: [null, null, 512] |
|
activation: !name:torch.nn.LeakyReLU |
|
lin_blocks: 1 |
|
lin_neurons: 512 |
|
out_neurons: 1349 |
|
|
|
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter |
|
limit: 1 |
|
|
|
|
|
|
|
|
|
prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL |
|
URL: |
|
https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1 |
|
dest_folder: ./Voxceleb/noise |
|
ext: wav |
|
csv_file: results/xvect_augment/1986/save/noise.csv |
|
|
|
|
|
|
|
add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise |
|
csv_file: results/xvect_augment/1986/save/noise.csv |
|
snr_low: 0 |
|
snr_high: 15 |
|
noise_sample_rate: 16000 |
|
clean_sample_rate: 16000 |
|
num_workers: 4 |
|
|
|
|
|
prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL |
|
URL: |
|
https://www.dropbox.com/scl/fi/linhy77c36mu10965a836/RIRs.zip?rlkey=pg9cu8vrpn2u173vhiqyu743u&dl=1 |
|
dest_folder: ./Voxceleb/rir |
|
ext: wav |
|
csv_file: results/xvect_augment/1986/save/rir.csv |
|
|
|
|
|
add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb |
|
csv_file: results/xvect_augment/1986/save/rir.csv |
|
reverb_sample_rate: 16000 |
|
clean_sample_rate: 16000 |
|
num_workers: 4 |
|
|
|
|
|
drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq |
|
drop_freq_low: 0 |
|
drop_freq_high: 1 |
|
drop_freq_count_low: 1 |
|
drop_freq_count_high: 3 |
|
drop_freq_width: 0.05 |
|
|
|
|
|
drop_chunk: &id004 !new:speechbrain.augment.time_domain.DropChunk |
|
drop_length_low: 1000 |
|
drop_length_high: 2000 |
|
drop_count_low: 1 |
|
drop_count_high: 5 |
|
|
|
|
|
wav_augment: !new:speechbrain.augment.augmenter.Augmenter |
|
parallel_augment: true |
|
concat_original: true |
|
min_augmentations: 4 |
|
max_augmentations: 4 |
|
augment_prob: 1.0 |
|
augmentations: [*id001, *id002, *id003, *id004] |
|
|
|
mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization |
|
|
|
|
|
norm_type: sentence |
|
std_norm: false |
|
|
|
mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization |
|
norm_type: global |
|
std_norm: False |
|
|
|
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder |
|
|
|
modules: |
|
compute_features: *id005 |
|
embedding_model: *id006 |
|
classifier: *id007 |
|
mean_var_norm: *id008 |
|
compute_cost: !name:speechbrain.nnet.losses.nll_loss |
|
|
|
|
|
opt_class: !name:torch.optim.Adam |
|
lr: 0.001 |
|
weight_decay: 0.000002 |
|
|
|
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler |
|
initial_value: 0.001 |
|
final_value: 0.0001 |
|
epoch_count: 1 |
|
|
|
|
|
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger |
|
save_file: results/xvect_augment/1986/train_log.txt |
|
|
|
error_stats: !name:speechbrain.utils.metric_stats.MetricStats |
|
metric: !name:speechbrain.nnet.losses.classification_error |
|
reduction: batch |
|
|
|
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer |
|
checkpoints_dir: results/xvect_augment/1986/save |
|
recoverables: |
|
embedding_model: *id006 |
|
classifier: *id007 |
|
normalizer: *id008 |
|
counter: *id009 |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
embedding_model: *id006 |
|
mean_var_norm: *id008 |
|
classifier: *id007 |
|
label_encoder: !ref <label_encoder> |
|
paths: |
|
embedding_model: !ref <pretrained_path>/embedding_model.ckpt |
|
mean_var_norm: !ref <pretrained_path>/normalizer.ckpt |
|
classifier: !ref <pretrained_path>/classifier.ckpt |
|
label_encoder: !ref <pretrained_path>/label_encoder.txt |