|
runner: |
|
total_steps: 25000 |
|
gradient_clipping: 1 |
|
gradient_accumulate_steps: 1 |
|
|
|
log_step: 100 |
|
eval_step: 1000 |
|
save_step: 5000 |
|
max_keep: 10 |
|
eval_dataloaders: |
|
- valid |
|
|
|
optimizer: |
|
name: AdamW |
|
lr: 1.0e-5 |
|
|
|
|
|
|
|
scheduler: |
|
name: linear_schedule_with_warmup |
|
num_warmup_steps: 1000 |
|
|
|
downstream_expert: |
|
datarc: |
|
sws2013_root: /corpora/sws2013Database |
|
sws2013_scoring_root: /corpora/sws2013Database/scoring_atwv_sws2013 |
|
quesst2014_root: /corpora/quesst14Database |
|
num_workers: 8 |
|
batch_size: 16 |
|
valid_size: 1000 |
|
modelrc: |
|
bottleneck_dim: 256 |
|
hidden_dim: 1024 |
|
lossrc: |
|
margin: -1.0 |
|
|