|
|
|
|
|
common: |
|
memory_efficient_fp16: true |
|
log_format: json |
|
log_interval: 100 |
|
model_parallel_size: 1 |
|
|
|
checkpoint: |
|
no_epoch_checkpoints: true |
|
best_checkpoint_metric: wer |
|
save_dir: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/finetune_VW_base_all |
|
|
|
task: |
|
_name: audio_pretraining |
|
data: /users/spraak/jponcele/BenchmarkingSS/data/cgn_phone_10ms_w2v2_all |
|
normalize: true |
|
labels: ltr |
|
segments: true |
|
max_length: 800000 |
|
|
|
dataset: |
|
num_workers: 6 |
|
batch_size: 4 |
|
max_tokens: 32000000 |
|
skip_invalid_size_inputs_valid_test: true |
|
valid_subset: test |
|
data_buffer_size: 2 |
|
|
|
distributed_training: |
|
ddp_backend: legacy_ddp |
|
distributed_world_size: 1 |
|
|
|
criterion: |
|
_name: ctc |
|
zero_infinity: true |
|
|
|
optimization: |
|
max_update: 500000 |
|
lr: [0.00003] |
|
sentence_avg: true |
|
update_freq: [4] |
|
|
|
optimizer: |
|
_name: adam |
|
adam_betas: (0.9,0.98) |
|
adam_eps: 1e-08 |
|
|
|
lr_scheduler: |
|
_name: tri_stage |
|
phase_ratio: [0.1, 0.4, 0.5] |
|
final_lr_scale: 0.05 |
|
|
|
model: |
|
_name: wav2vec_ctc |
|
w2v_path: /esat/spchtemp/scratch/jponcele/selfsupervised_exps/result/pretrain_w2v2_cgn-unsup-VW_base/checkpoint_74_250000.pt |
|
apply_mask: true |
|
mask_prob: 0.65 |
|
mask_channel_prob: 0.5 |
|
mask_channel_length: 64 |
|
layerdrop: 0.1 |
|
activation_dropout: 0.1 |
|
feature_grad_mult: 0.0 |
|
freeze_finetune_updates: 0 |
|
|
|
|