slavic-t5-base / hydra /config.yaml
dhladek's picture
Upload folder using huggingface_hub
dcf87c3 verified
raw
history blame
834 Bytes
mode: pt
device: gpu
precision: bf16
eval_only: false
predict_only: false
seed: 2137
model:
klass: hf_t5
name: /home/jovyan/bert-train/nanot5/templates/base_slavic_120k_sub
overwrite:
dropout_rate: 0.1
add_config:
is_bf16: true
checkpoint_path: ''
random_init: true
compile: false
data:
input_length: 512
mlm_probability: 0.15
mean_noise_span_length: 3.0
num_workers: 8
optim:
name: adafactor
base_lr: 0.02
batch_size: 128
total_steps: 120000
epochs: -1
warmup_steps: 10000
lr_scheduler: legacy
weight_decay: 0.0
grad_clip: 1.0
grad_acc: 8
final_cosine: 1.0e-05
eval:
every_steps: 5000
steps: 500
checkpoint:
every_steps: 10000
logging:
neptune: false
neptune_creds:
project: null
api_token: null
tags: ''
every_steps: 100
grad_l2: true
weights_l2: true