mode: pt device: gpu precision: bf16 eval_only: false predict_only: false seed: 2137 model: klass: hf_t5 name: /home/jovyan/bert-train/nanot5/templates/base_slavic_120k_sub overwrite: dropout_rate: 0.1 add_config: is_bf16: true checkpoint_path: '' random_init: true compile: false data: input_length: 512 mlm_probability: 0.15 mean_noise_span_length: 3.0 num_workers: 8 optim: name: adafactor base_lr: 0.02 batch_size: 128 total_steps: 120000 epochs: -1 warmup_steps: 10000 lr_scheduler: legacy weight_decay: 0.0 grad_clip: 1.0 grad_acc: 8 final_cosine: 1.0e-05 eval: every_steps: 5000 steps: 500 checkpoint: every_steps: 10000 logging: neptune: false neptune_creds: project: null api_token: null tags: '' every_steps: 100 grad_l2: true weights_l2: true