|
|
|
|
|
common: |
|
fp16: true |
|
fp16_init_scale: 4 |
|
threshold_loss_scale: 1 |
|
fp16_scale_window: 128 |
|
log_format: json |
|
log_interval: 200 |
|
|
|
task: |
|
_name: sentence_prediction |
|
data: ??? |
|
init_token: 0 |
|
separator_token: 2 |
|
num_classes: 2 |
|
max_positions: 512 |
|
|
|
checkpoint: |
|
restore_file: ??? |
|
reset_optimizer: true |
|
reset_dataloader: true |
|
reset_meters: true |
|
best_checkpoint_metric: accuracy |
|
maximize_best_checkpoint_metric: true |
|
no_epoch_checkpoints: true |
|
|
|
distributed_training: |
|
find_unused_parameters: true |
|
distributed_world_size: 1 |
|
|
|
criterion: |
|
_name: sentence_prediction |
|
|
|
dataset: |
|
batch_size: 32 |
|
required_batch_size_multiple: 1 |
|
max_tokens: 4400 |
|
|
|
optimizer: |
|
_name: adam |
|
weight_decay: 0.1 |
|
adam_betas: (0.9,0.98) |
|
adam_eps: 1e-06 |
|
|
|
lr_scheduler: |
|
_name: polynomial_decay |
|
warmup_updates: 1986 |
|
|
|
optimization: |
|
clip_norm: 0.0 |
|
lr: [1e-05] |
|
max_update: 33112 |
|
max_epoch: 10 |
|
|
|
model: |
|
_name: roberta |
|
dropout: 0.1 |
|
attention_dropout: 0.1 |
|
|