morenolq
/

BEST-RQ-12L-LS960

Model card Files Files and versions Community

BEST-RQ-12L-LS960 / config.yaml

MorenoLaQuatra

Added model, config and complete checkpoint

f85a01d 2 months ago

history blame contribute delete

1.83 kB

	augmentation:
	augment_pitch_shift: false
	augment_time_stretch: false
	noise_injection: false
	data:
	audio_normalization_type: standard
	eval_dataset_path: /mnt/disk4/datasets/librispeech/valid.tsv
	feature_type: mel
	hop_length: 160
	max_length: 12.5
	n_fft: 400
	n_mels: 80
	n_mfcc: 13
	normalize_audio: false
	normalize_features: true
	pad_to_max_length: true
	sample_rate: 16000
	train_dataset_path: /mnt/disk4/datasets/librispeech/train.tsv
	truncate_to_max_length: true
	win_length: 400
	evaluation:
	early_stopping: false
	eval_interval: 1
	metric: validation_accuracy
	metric_lower_is_better: false
	patience: 10
	inference:
	batch_size: 1
	checkpoint_path: /mnt/disk3/bestrq_ckpts/bestrq-mel-pt-subsampled/12L-ls960-V8K-P16-M.05NM8-MEL/
	logging:
	checkpoint_interval: 1
	log_dir: /mnt/disk3/bestrq_ckpts/logs-mel-pt-subsampled/12L-ls960-V8K-P16-M.05NM8-MEL/
	log_interval: 4
	save_dir: /mnt/disk3/bestrq_ckpts/bestrq-mel-pt-subsampled/12L-ls960-V8K-P16-M.05NM8-MEL/
	masking:
	bert_style_masking: false
	mask_len: 8
	mask_prob: 0.05
	mask_type: random
	model:
	conformer_depth: 12
	conformer_dim: 768
	dim_head: 96
	dropout: 0.1
	ff_mult: 4
	heads: 8
	input_dim: 80
	kernel_size: 31
	proj_dim: 16
	quantizer_simvq_mode: false
	subsampled_dim: 768
	use_subsampling: true
	vocab_size: 8192
	training:
	accelerate_config: configs/accelerate_2GPU_config.yaml
	batch_size: 128
	comet_experiment_name: 12L-ls960-V8K-P16-M.05NM8-MEL
	comet_project_name: bestrq-pt-1124
	gradient_accumulation_steps: 32
	gradient_clipping: 5.0
	learning_rate: 0.001
	lr_scheduler: warmup_linear
	max_checkpoints: 5
	mixed_precision: false
	multi_gpu: true
	num_epochs: 500
	num_workers: 16
	optimizer: adamw
	use_comet: true
	use_cuda: true
	warmup_ratio: 0.05
	weight_decay: 0.05