liputan6-base / trainer_state.json
apwic's picture
End of training
229226e verified
{
"best_metric": 39.0802,
"best_model_checkpoint": "bin/liputan6-base/checkpoint-252",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 6.927645683288574,
"learning_rate": 0.0008,
"loss": 1.6488,
"step": 63
},
{
"epoch": 1.0,
"eval_gen_len": 65.191,
"eval_loss": 0.7318153381347656,
"eval_rouge1": 34.224,
"eval_rouge2": 24.5266,
"eval_rougeL": 31.0318,
"eval_rougeLsum": 32.8875,
"eval_runtime": 255.5421,
"eval_samples_per_second": 3.913,
"eval_steps_per_second": 0.125,
"step": 63
},
{
"epoch": 2.0,
"grad_norm": 3.6224966049194336,
"learning_rate": 0.0006,
"loss": 0.6983,
"step": 126
},
{
"epoch": 2.0,
"eval_gen_len": 65.46,
"eval_loss": 0.6432784795761108,
"eval_rouge1": 37.3155,
"eval_rouge2": 27.3019,
"eval_rougeL": 33.9529,
"eval_rougeLsum": 36.1013,
"eval_runtime": 250.329,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.128,
"step": 126
},
{
"epoch": 3.0,
"grad_norm": 3.090550184249878,
"learning_rate": 0.0004,
"loss": 0.4226,
"step": 189
},
{
"epoch": 3.0,
"eval_gen_len": 59.969,
"eval_loss": 0.5831208229064941,
"eval_rouge1": 36.9679,
"eval_rouge2": 26.3535,
"eval_rougeL": 33.5956,
"eval_rougeLsum": 35.7604,
"eval_runtime": 241.9062,
"eval_samples_per_second": 4.134,
"eval_steps_per_second": 0.132,
"step": 189
},
{
"epoch": 4.0,
"grad_norm": 4.525897026062012,
"learning_rate": 0.0002,
"loss": 0.242,
"step": 252
},
{
"epoch": 4.0,
"eval_gen_len": 55.301,
"eval_loss": 0.5539225339889526,
"eval_rouge1": 39.0802,
"eval_rouge2": 28.4622,
"eval_rougeL": 35.8085,
"eval_rougeLsum": 37.8181,
"eval_runtime": 216.2243,
"eval_samples_per_second": 4.625,
"eval_steps_per_second": 0.148,
"step": 252
},
{
"epoch": 5.0,
"grad_norm": 0.9606621861457825,
"learning_rate": 0.0,
"loss": 0.1248,
"step": 315
},
{
"epoch": 5.0,
"eval_gen_len": 56.589,
"eval_loss": 0.5169788599014282,
"eval_rouge1": 38.108,
"eval_rouge2": 27.5573,
"eval_rougeL": 34.7198,
"eval_rougeLsum": 36.6919,
"eval_runtime": 211.3459,
"eval_samples_per_second": 4.732,
"eval_steps_per_second": 0.151,
"step": 315
},
{
"epoch": 5.0,
"step": 315,
"total_flos": 3423786762240000.0,
"train_loss": 0.6272867081657288,
"train_runtime": 1485.4068,
"train_samples_per_second": 3.366,
"train_steps_per_second": 0.212
}
],
"logging_steps": 500,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 3423786762240000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}