runner: total_steps: 25000 gradient_clipping: 1 gradient_accumulate_steps: 1 log_step: 100 eval_step: 1000 save_step: 5000 max_keep: 10 eval_dataloaders: - valid optimizer: name: AdamW lr: 1.0e-5 # comment the whole scheduler config block # to disable learning rate scheduling scheduler: name: linear_schedule_with_warmup num_warmup_steps: 1000 downstream_expert: datarc: sws2013_root: /corpora/sws2013Database sws2013_scoring_root: /corpora/sws2013Database/scoring_atwv_sws2013 quesst2014_root: /corpora/quesst14Database num_workers: 8 batch_size: 16 valid_size: 1000 modelrc: bottleneck_dim: 256 hidden_dim: 1024 lossrc: margin: -1.0