Definite
/

hwaja_insic

Model card Files Files and versions Community

hwaja_insic / hyperparams.yaml

Definite

Upload hyperparams.yaml

b72ec2c verified about 1 year ago

raw

history blame contribute delete

6.96 kB

	# Generated 2024-06-04 from:
	# /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
	# yamllint disable
	# ################################
	# Model: Speaker identification with ECAPA
	# Authors: Hwidong Na & Mirco Ravanelli
	# ################################

	# Basic parameters
	seed: 1986
	__set_seed: !apply:torch.manual_seed [1986]
	output_folder: results/xvect_augment/1986
	save_folder: results/xvect_augment/1986/save
	pretrained_path: Definite/hwaja_insic
	train_log: results/xvect_augment/1986/train_log.txt

	# Data for augmentation
	NOISE_DATASET_URL:
	https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
	RIR_DATASET_URL:
	https://www.dropbox.com/scl/fi/linhy77c36mu10965a836/RIRs.zip?rlkey=pg9cu8vrpn2u173vhiqyu743u&dl=1

	# Data files
	data_folder: ./Voxceleb # e.g. /path/to/Voxceleb
	data_folder_noise: ./Voxceleb/noise # The noisy sequences for data augmentation will automatically be downloaded here.
	data_folder_rir: ./Voxceleb/rir # The impulse responses used for data augmentation will automatically be downloaded here.
	train_annotation: results/xvect_augment/1986/save/train.csv
	valid_annotation: results/xvect_augment/1986/save/dev.csv
	noise_annotation: results/xvect_augment/1986/save/noise.csv
	rir_annotation: results/xvect_augment/1986/save/rir.csv

	# Use the following links for the official voxceleb splits:
	# VoxCeleb1 (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt
	# VoxCeleb1-H (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_hard2.txt
	# VoxCeleb1-E (cleaned): https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/list_test_all2.txt.
	# VoxCeleb1-E and VoxCeleb1-H lists are drawn from the VoxCeleb1 training set.
	# Therefore you cannot use any files in VoxCeleb1 for training if you are using these lists for testing.
	verification_file: ./Voxceleb/save/veri_test.txt

	split_ratio: [90, 10]
	skip_prep: true
	ckpt_interval_minutes: 15 # save checkpoint every N min

	# Training parameters
	number_of_epochs: 1
	batch_size: 16
	lr: 0.001
	lr_final: 0.0001

	sample_rate: 16000
	sentence_len: 3.0 # seconds
	shuffle: true
	random_chunk: false

	# Feature parameters
	n_mels: 24
	left_frames: 0
	right_frames: 0
	deltas: false

	# Number of speakers
	out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
	emb_dim: 512

	num_workers: 4
	dataloader_options:
	batch_size: 16
	shuffle: true
	num_workers: 4

	# Functions
	compute_features: &id005 !new:speechbrain.lobes.features.Fbank
	n_mels: 24
	left_frames: 0
	right_frames: 0
	deltas: false

	embedding_model: &id006 !new:speechbrain.lobes.models.Xvector.Xvector
	in_channels: 24
	activation: !name:torch.nn.LeakyReLU
	tdnn_blocks: 5
	tdnn_channels: [512, 512, 512, 512, 1500]
	tdnn_kernel_sizes: [5, 3, 3, 1, 1]
	tdnn_dilations: [1, 2, 3, 1, 1]
	lin_neurons: 512

	classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
	input_shape: [null, null, 512]
	activation: !name:torch.nn.LeakyReLU
	lin_blocks: 1
	lin_neurons: 512
	out_neurons: 1349

	epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
	limit: 1

	############################## Augmentations ###################################

	# Download and prepare the dataset of noisy sequences for augmentation
	prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
	URL:
	https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
	dest_folder: ./Voxceleb/noise
	ext: wav
	csv_file: results/xvect_augment/1986/save/noise.csv


	# Add noise to input signal
	add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
	csv_file: results/xvect_augment/1986/save/noise.csv
	snr_low: 0
	snr_high: 15
	noise_sample_rate: 16000
	clean_sample_rate: 16000
	num_workers: 4

	# Download and prepare the dataset of room impulse responses for augmentation
	prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
	URL:
	https://www.dropbox.com/scl/fi/linhy77c36mu10965a836/RIRs.zip?rlkey=pg9cu8vrpn2u173vhiqyu743u&dl=1
	dest_folder: ./Voxceleb/rir
	ext: wav
	csv_file: results/xvect_augment/1986/save/rir.csv

	# Add reverberation to input signal
	add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
	csv_file: results/xvect_augment/1986/save/rir.csv
	reverb_sample_rate: 16000
	clean_sample_rate: 16000
	num_workers: 4

	# Frequency drop: randomly drops a number of frequency bands to zero.
	drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
	drop_freq_low: 0
	drop_freq_high: 1
	drop_freq_count_low: 1
	drop_freq_count_high: 3
	drop_freq_width: 0.05

	# Time drop: randomly drops a number of temporal chunks.
	drop_chunk: &id004 !new:speechbrain.augment.time_domain.DropChunk
	drop_length_low: 1000
	drop_length_high: 2000
	drop_count_low: 1
	drop_count_high: 5

	# Augmenter: Combines previously defined augmentations to perform data augmentation
	wav_augment: !new:speechbrain.augment.augmenter.Augmenter
	parallel_augment: true
	concat_original: true
	min_augmentations: 4
	max_augmentations: 4
	augment_prob: 1.0
	augmentations: [id001, id002, id003, id004]

	mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization

	# Cost + optimization
	norm_type: sentence
	std_norm: false

	mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
	norm_type: global
	std_norm: False

	label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder

	modules:
	compute_features: *id005
	embedding_model: *id006
	classifier: *id007
	mean_var_norm: *id008
	compute_cost: !name:speechbrain.nnet.losses.nll_loss
	# compute_error: !name:speechbrain.nnet.losses.classification_error

	opt_class: !name:torch.optim.Adam
	lr: 0.001
	weight_decay: 0.000002

	lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
	initial_value: 0.001
	final_value: 0.0001
	epoch_count: 1

	# Logging + checkpoints
	train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
	save_file: results/xvect_augment/1986/train_log.txt

	error_stats: !name:speechbrain.utils.metric_stats.MetricStats
	metric: !name:speechbrain.nnet.losses.classification_error
	reduction: batch

	checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
	checkpoints_dir: results/xvect_augment/1986/save
	recoverables:
	embedding_model: *id006
	classifier: *id007
	normalizer: *id008
	counter: *id009

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	embedding_model: *id006
	mean_var_norm: *id008
	classifier: *id007
	label_encoder: !ref <label_encoder>
	paths:
	embedding_model: !ref <pretrained_path>/embedding_model.ckpt
	mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
	classifier: !ref <pretrained_path>/classifier.ckpt
	label_encoder: !ref <pretrained_path>/label_encoder.txt