hwaja_insic / hyperparams.yaml
Definite's picture
Upload hyperparams.yaml
b72ec2c verified
# Generated 2024-06-04 from:
# /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
# yamllint disable
# ################################
# Model: Speaker identification with ECAPA
# Authors: Hwidong Na & Mirco Ravanelli
# ################################
# Basic parameters
seed: 1986
__set_seed: !apply:torch.manual_seed [1986]
output_folder: results/xvect_augment/1986
save_folder: results/xvect_augment/1986/save
pretrained_path: Definite/hwaja_insic
train_log: results/xvect_augment/1986/train_log.txt
# Data for augmentation
NOISE_DATASET_URL:
https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
RIR_DATASET_URL:
https://www.dropbox.com/scl/fi/linhy77c36mu10965a836/RIRs.zip?rlkey=pg9cu8vrpn2u173vhiqyu743u&dl=1
# Data files
data_folder: ./Voxceleb # e.g. /path/to/Voxceleb
data_folder_noise: ./Voxceleb/noise # The noisy sequences for data augmentation will automatically be downloaded here.
data_folder_rir: ./Voxceleb/rir # The impulse responses used for data augmentation will automatically be downloaded here.
train_annotation: results/xvect_augment/1986/save/train.csv
valid_annotation: results/xvect_augment/1986/save/dev.csv
noise_annotation: results/xvect_augment/1986/save/noise.csv
rir_annotation: results/xvect_augment/1986/save/rir.csv
# Use the following links for the official voxceleb splits:
# VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
# VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
# VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
# VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
# Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
verification_file: ./Voxceleb/save/veri_test.txt
split_ratio: [90, 10]
skip_prep: true
ckpt_interval_minutes: 15 # save checkpoint every N min
# Training parameters
number_of_epochs: 1
batch_size: 16
lr: 0.001
lr_final: 0.0001
sample_rate: 16000
sentence_len: 3.0 # seconds
shuffle: true
random_chunk: false
# Feature parameters
n_mels: 24
left_frames: 0
right_frames: 0
deltas: false
# Number of speakers
out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
emb_dim: 512
num_workers: 4
dataloader_options:
batch_size: 16
shuffle: true
num_workers: 4
# Functions
compute_features: &id005 !new:speechbrain.lobes.features.Fbank
n_mels: 24
left_frames: 0
right_frames: 0
deltas: false
embedding_model: &id006 !new:speechbrain.lobes.models.Xvector.Xvector
in_channels: 24
activation: !name:torch.nn.LeakyReLU
tdnn_blocks: 5
tdnn_channels: [512, 512, 512, 512, 1500]
tdnn_kernel_sizes: [5, 3, 3, 1, 1]
tdnn_dilations: [1, 2, 3, 1, 1]
lin_neurons: 512
classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
input_shape: [null, null, 512]
activation: !name:torch.nn.LeakyReLU
lin_blocks: 1
lin_neurons: 512
out_neurons: 1349
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 1
############################## Augmentations ###################################
# Download and prepare the dataset of noisy sequences for augmentation
prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
URL:
https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
dest_folder: ./Voxceleb/noise
ext: wav
csv_file: results/xvect_augment/1986/save/noise.csv
# Add noise to input signal
add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
csv_file: results/xvect_augment/1986/save/noise.csv
snr_low: 0
snr_high: 15
noise_sample_rate: 16000
clean_sample_rate: 16000
num_workers: 4
# Download and prepare the dataset of room impulse responses for augmentation
prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
URL:
https://www.dropbox.com/scl/fi/linhy77c36mu10965a836/RIRs.zip?rlkey=pg9cu8vrpn2u173vhiqyu743u&dl=1
dest_folder: ./Voxceleb/rir
ext: wav
csv_file: results/xvect_augment/1986/save/rir.csv
# Add reverberation to input signal
add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
csv_file: results/xvect_augment/1986/save/rir.csv
reverb_sample_rate: 16000
clean_sample_rate: 16000
num_workers: 4
# Frequency drop: randomly drops a number of frequency bands to zero.
drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
drop_freq_low: 0
drop_freq_high: 1
drop_freq_count_low: 1
drop_freq_count_high: 3
drop_freq_width: 0.05
# Time drop: randomly drops a number of temporal chunks.
drop_chunk: &id004 !new:speechbrain.augment.time_domain.DropChunk
drop_length_low: 1000
drop_length_high: 2000
drop_count_low: 1
drop_count_high: 5
# Augmenter: Combines previously defined augmentations to perform data augmentation
wav_augment: !new:speechbrain.augment.augmenter.Augmenter
parallel_augment: true
concat_original: true
min_augmentations: 4
max_augmentations: 4
augment_prob: 1.0
augmentations: [*id001, *id002, *id003, *id004]
mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization
# Cost + optimization
norm_type: sentence
std_norm: false
mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
norm_type: global
std_norm: False
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
modules:
compute_features: *id005
embedding_model: *id006
classifier: *id007
mean_var_norm: *id008
compute_cost: !name:speechbrain.nnet.losses.nll_loss
# compute_error: !name:speechbrain.nnet.losses.classification_error
opt_class: !name:torch.optim.Adam
lr: 0.001
weight_decay: 0.000002
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
initial_value: 0.001
final_value: 0.0001
epoch_count: 1
# Logging + checkpoints
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/xvect_augment/1986/train_log.txt
error_stats: !name:speechbrain.utils.metric_stats.MetricStats
metric: !name:speechbrain.nnet.losses.classification_error
reduction: batch
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/xvect_augment/1986/save
recoverables:
embedding_model: *id006
classifier: *id007
normalizer: *id008
counter: *id009
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
embedding_model: *id006
mean_var_norm: *id008
classifier: *id007
label_encoder: !ref <label_encoder>
paths:
embedding_model: !ref <pretrained_path>/embedding_model.ckpt
mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
classifier: !ref <pretrained_path>/classifier.ckpt
label_encoder: !ref <pretrained_path>/label_encoder.txt