|
{ |
|
"best_metric": 17.698, |
|
"best_model_checkpoint": "bin/liputan6-base/checkpoint-315", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.591057300567627, |
|
"learning_rate": 0.0008, |
|
"loss": 3.8271, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 47.473, |
|
"eval_loss": 3.9787089824676514, |
|
"eval_rouge1": 14.5233, |
|
"eval_rouge2": 4.127, |
|
"eval_rougeL": 12.7611, |
|
"eval_rougeLsum": 13.5205, |
|
"eval_runtime": 251.9921, |
|
"eval_samples_per_second": 3.968, |
|
"eval_steps_per_second": 0.127, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.016976833343506, |
|
"learning_rate": 0.0006, |
|
"loss": 2.2739, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 44.229, |
|
"eval_loss": 4.131580352783203, |
|
"eval_rouge1": 15.9563, |
|
"eval_rouge2": 4.7752, |
|
"eval_rougeL": 13.8242, |
|
"eval_rougeLsum": 14.8005, |
|
"eval_runtime": 252.4735, |
|
"eval_samples_per_second": 3.961, |
|
"eval_steps_per_second": 0.127, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.183892726898193, |
|
"learning_rate": 0.0004, |
|
"loss": 1.2999, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 33.112, |
|
"eval_loss": 4.484961032867432, |
|
"eval_rouge1": 17.2932, |
|
"eval_rouge2": 4.6352, |
|
"eval_rougeL": 14.8582, |
|
"eval_rougeLsum": 16.1555, |
|
"eval_runtime": 221.3655, |
|
"eval_samples_per_second": 4.517, |
|
"eval_steps_per_second": 0.145, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.1966776847839355, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6423, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 36.399, |
|
"eval_loss": 4.919987678527832, |
|
"eval_rouge1": 17.5707, |
|
"eval_rouge2": 4.9772, |
|
"eval_rougeL": 14.949, |
|
"eval_rougeLsum": 16.1838, |
|
"eval_runtime": 217.3583, |
|
"eval_samples_per_second": 4.601, |
|
"eval_steps_per_second": 0.147, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.510921001434326, |
|
"learning_rate": 0.0, |
|
"loss": 0.2536, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 31.108, |
|
"eval_loss": 5.426603317260742, |
|
"eval_rouge1": 17.698, |
|
"eval_rouge2": 4.7021, |
|
"eval_rougeL": 14.8138, |
|
"eval_rougeLsum": 16.3595, |
|
"eval_runtime": 159.266, |
|
"eval_samples_per_second": 6.279, |
|
"eval_steps_per_second": 0.201, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 315, |
|
"total_flos": 3423786762240000.0, |
|
"train_loss": 1.6593605313982283, |
|
"train_runtime": 1412.1276, |
|
"train_samples_per_second": 3.541, |
|
"train_steps_per_second": 0.223 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 3423786762240000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|