|
{ |
|
"best_metric": 0.6807650327682495, |
|
"best_model_checkpoint": "./LongT5-XLarge-NSPCC/checkpoint-566", |
|
"epoch": 3.984105960264901, |
|
"eval_steps": 500, |
|
"global_step": 752, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9960264900662251, |
|
"grad_norm": 0.6866635680198669, |
|
"learning_rate": 0.00026365056951661766, |
|
"loss": 3.6911, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.9960264900662251, |
|
"eval_gen_len": 360.7021, |
|
"eval_loss": 0.7291953563690186, |
|
"eval_rouge1": 0.4665, |
|
"eval_rouge2": 0.1826, |
|
"eval_rougeL": 0.2611, |
|
"eval_rougeLsum": 0.2611, |
|
"eval_runtime": 6342.3266, |
|
"eval_samples_per_second": 0.015, |
|
"eval_steps_per_second": 0.015, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.9973509933774833, |
|
"grad_norm": 0.37886831164360046, |
|
"learning_rate": 0.00015678507557860595, |
|
"loss": 0.8701, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.9973509933774833, |
|
"eval_gen_len": 365.3298, |
|
"eval_loss": 0.6966970562934875, |
|
"eval_rouge1": 0.4886, |
|
"eval_rouge2": 0.2073, |
|
"eval_rougeL": 0.2805, |
|
"eval_rougeLsum": 0.2799, |
|
"eval_runtime": 6456.3838, |
|
"eval_samples_per_second": 0.015, |
|
"eval_steps_per_second": 0.015, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.9986754966887417, |
|
"grad_norm": 0.5063174366950989, |
|
"learning_rate": 4.566183323963369e-05, |
|
"loss": 0.7849, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.9986754966887417, |
|
"eval_gen_len": 332.3191, |
|
"eval_loss": 0.6807650327682495, |
|
"eval_rouge1": 0.5116, |
|
"eval_rouge2": 0.2302, |
|
"eval_rougeL": 0.2995, |
|
"eval_rougeLsum": 0.2997, |
|
"eval_runtime": 5946.7561, |
|
"eval_samples_per_second": 0.016, |
|
"eval_steps_per_second": 0.016, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 3.984105960264901, |
|
"grad_norm": 0.3437921106815338, |
|
"learning_rate": 0.0, |
|
"loss": 0.7769, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 3.984105960264901, |
|
"eval_gen_len": 337.6809, |
|
"eval_loss": 0.6843053102493286, |
|
"eval_rouge1": 0.5138, |
|
"eval_rouge2": 0.2297, |
|
"eval_rougeL": 0.2999, |
|
"eval_rougeLsum": 0.2995, |
|
"eval_runtime": 6050.9414, |
|
"eval_samples_per_second": 0.016, |
|
"eval_steps_per_second": 0.016, |
|
"step": 752 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 752, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 8.260168119704617e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|