longt5_xl_summ_screen_bp_10 / trainer_state.json
learn3r's picture
End of training
901c470
{
"best_metric": 1.3323031663894653,
"best_model_checkpoint": "longt5_xl_summ_screen_bp_10/checkpoint-57",
"epoch": 9.73913043478261,
"eval_steps": 500,
"global_step": 140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 0.001,
"loss": 2.8751,
"step": 2
},
{
"epoch": 0.28,
"learning_rate": 0.001,
"loss": 3.5717,
"step": 4
},
{
"epoch": 0.42,
"learning_rate": 0.001,
"loss": 2.8585,
"step": 6
},
{
"epoch": 0.56,
"learning_rate": 0.001,
"loss": 2.5104,
"step": 8
},
{
"epoch": 0.7,
"learning_rate": 0.001,
"loss": 3.2659,
"step": 10
},
{
"epoch": 0.83,
"learning_rate": 0.001,
"loss": 3.4634,
"step": 12
},
{
"epoch": 0.97,
"learning_rate": 0.001,
"loss": 2.4559,
"step": 14
},
{
"epoch": 0.97,
"eval_gen_len": 511.0,
"eval_loss": 2.0707387924194336,
"eval_rouge1": 11.7833,
"eval_rouge2": 1.6011,
"eval_rougeL": 11.1858,
"eval_rougeLsum": 10.3025,
"eval_runtime": 1812.8885,
"eval_samples_per_second": 0.186,
"eval_steps_per_second": 0.024,
"step": 14
},
{
"epoch": 1.11,
"learning_rate": 0.001,
"loss": 2.1517,
"step": 16
},
{
"epoch": 1.25,
"learning_rate": 0.001,
"loss": 2.2029,
"step": 18
},
{
"epoch": 1.39,
"learning_rate": 0.001,
"loss": 2.1161,
"step": 20
},
{
"epoch": 1.53,
"learning_rate": 0.001,
"loss": 1.9513,
"step": 22
},
{
"epoch": 1.67,
"learning_rate": 0.001,
"loss": 1.7095,
"step": 24
},
{
"epoch": 1.81,
"learning_rate": 0.001,
"loss": 1.6535,
"step": 26
},
{
"epoch": 1.95,
"learning_rate": 0.001,
"loss": 1.6238,
"step": 28
},
{
"epoch": 1.95,
"eval_gen_len": 511.0,
"eval_loss": 1.5286704301834106,
"eval_rouge1": 19.0489,
"eval_rouge2": 4.687,
"eval_rougeL": 16.6504,
"eval_rougeLsum": 17.1808,
"eval_runtime": 1807.4269,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 28
},
{
"epoch": 2.09,
"learning_rate": 0.001,
"loss": 1.5804,
"step": 30
},
{
"epoch": 2.23,
"learning_rate": 0.001,
"loss": 1.511,
"step": 32
},
{
"epoch": 2.37,
"learning_rate": 0.001,
"loss": 1.4961,
"step": 34
},
{
"epoch": 2.5,
"learning_rate": 0.001,
"loss": 1.4334,
"step": 36
},
{
"epoch": 2.64,
"learning_rate": 0.001,
"loss": 1.3994,
"step": 38
},
{
"epoch": 2.78,
"learning_rate": 0.001,
"loss": 1.4018,
"step": 40
},
{
"epoch": 2.92,
"learning_rate": 0.001,
"loss": 1.3964,
"step": 42
},
{
"epoch": 2.99,
"eval_gen_len": 511.0,
"eval_loss": 1.3520147800445557,
"eval_rouge1": 21.9994,
"eval_rouge2": 5.8519,
"eval_rougeL": 18.9231,
"eval_rougeLsum": 19.958,
"eval_runtime": 1809.4299,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 43
},
{
"epoch": 3.06,
"learning_rate": 0.001,
"loss": 1.3428,
"step": 44
},
{
"epoch": 3.2,
"learning_rate": 0.001,
"loss": 1.3034,
"step": 46
},
{
"epoch": 3.34,
"learning_rate": 0.001,
"loss": 1.4137,
"step": 48
},
{
"epoch": 3.48,
"learning_rate": 0.001,
"loss": 1.4083,
"step": 50
},
{
"epoch": 3.62,
"learning_rate": 0.001,
"loss": 1.3075,
"step": 52
},
{
"epoch": 3.76,
"learning_rate": 0.001,
"loss": 1.2527,
"step": 54
},
{
"epoch": 3.9,
"learning_rate": 0.001,
"loss": 1.2538,
"step": 56
},
{
"epoch": 3.97,
"eval_gen_len": 497.2455621301775,
"eval_loss": 1.3323031663894653,
"eval_rouge1": 22.9554,
"eval_rouge2": 6.4509,
"eval_rougeL": 19.7437,
"eval_rougeLsum": 20.923,
"eval_runtime": 1810.7532,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 57
},
{
"epoch": 4.03,
"learning_rate": 0.001,
"loss": 1.2028,
"step": 58
},
{
"epoch": 4.17,
"learning_rate": 0.001,
"loss": 1.0981,
"step": 60
},
{
"epoch": 4.31,
"learning_rate": 0.001,
"loss": 1.1033,
"step": 62
},
{
"epoch": 4.45,
"learning_rate": 0.001,
"loss": 1.1303,
"step": 64
},
{
"epoch": 4.59,
"learning_rate": 0.001,
"loss": 1.1675,
"step": 66
},
{
"epoch": 4.73,
"learning_rate": 0.001,
"loss": 1.3701,
"step": 68
},
{
"epoch": 4.87,
"learning_rate": 0.001,
"loss": 1.277,
"step": 70
},
{
"epoch": 4.94,
"eval_gen_len": 507.2278106508876,
"eval_loss": 1.5462373495101929,
"eval_rouge1": 14.6326,
"eval_rouge2": 3.6509,
"eval_rougeL": 12.4805,
"eval_rougeLsum": 13.5001,
"eval_runtime": 1806.4311,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 71
},
{
"epoch": 5.01,
"learning_rate": 0.001,
"loss": 1.3884,
"step": 72
},
{
"epoch": 5.15,
"learning_rate": 0.001,
"loss": 1.0428,
"step": 74
},
{
"epoch": 5.29,
"learning_rate": 0.001,
"loss": 1.0266,
"step": 76
},
{
"epoch": 5.43,
"learning_rate": 0.001,
"loss": 1.0247,
"step": 78
},
{
"epoch": 5.57,
"learning_rate": 0.001,
"loss": 0.9732,
"step": 80
},
{
"epoch": 5.7,
"learning_rate": 0.001,
"loss": 1.0042,
"step": 82
},
{
"epoch": 5.84,
"learning_rate": 0.001,
"loss": 1.0099,
"step": 84
},
{
"epoch": 5.98,
"learning_rate": 0.001,
"loss": 1.0071,
"step": 86
},
{
"epoch": 5.98,
"eval_gen_len": 429.7721893491124,
"eval_loss": 1.3604055643081665,
"eval_rouge1": 29.5352,
"eval_rouge2": 9.9544,
"eval_rougeL": 22.1073,
"eval_rougeLsum": 28.1204,
"eval_runtime": 1808.4033,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 86
},
{
"epoch": 6.12,
"learning_rate": 0.001,
"loss": 0.8375,
"step": 88
},
{
"epoch": 6.26,
"learning_rate": 0.001,
"loss": 0.8301,
"step": 90
},
{
"epoch": 6.4,
"learning_rate": 0.001,
"loss": 0.8551,
"step": 92
},
{
"epoch": 6.54,
"learning_rate": 0.001,
"loss": 0.823,
"step": 94
},
{
"epoch": 6.68,
"learning_rate": 0.001,
"loss": 0.8783,
"step": 96
},
{
"epoch": 6.82,
"learning_rate": 0.001,
"loss": 0.885,
"step": 98
},
{
"epoch": 6.96,
"learning_rate": 0.001,
"loss": 0.8685,
"step": 100
},
{
"epoch": 6.96,
"eval_gen_len": 451.78402366863907,
"eval_loss": 1.4360722303390503,
"eval_rouge1": 31.0337,
"eval_rouge2": 10.6724,
"eval_rougeL": 22.3815,
"eval_rougeLsum": 29.6325,
"eval_runtime": 1808.8854,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 100
},
{
"epoch": 7.1,
"learning_rate": 0.001,
"loss": 0.7653,
"step": 102
},
{
"epoch": 7.23,
"learning_rate": 0.001,
"loss": 0.7402,
"step": 104
},
{
"epoch": 7.37,
"learning_rate": 0.001,
"loss": 0.7582,
"step": 106
},
{
"epoch": 7.51,
"learning_rate": 0.001,
"loss": 0.7518,
"step": 108
},
{
"epoch": 7.65,
"learning_rate": 0.001,
"loss": 0.7486,
"step": 110
},
{
"epoch": 7.79,
"learning_rate": 0.001,
"loss": 0.7645,
"step": 112
},
{
"epoch": 7.93,
"learning_rate": 0.001,
"loss": 0.7498,
"step": 114
},
{
"epoch": 8.0,
"eval_gen_len": 473.896449704142,
"eval_loss": 1.530242681503296,
"eval_rouge1": 28.433,
"eval_rouge2": 8.4887,
"eval_rougeL": 21.3588,
"eval_rougeLsum": 26.6817,
"eval_runtime": 1807.993,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 115
},
{
"epoch": 8.07,
"learning_rate": 0.001,
"loss": 0.6877,
"step": 116
},
{
"epoch": 8.21,
"learning_rate": 0.001,
"loss": 0.6278,
"step": 118
},
{
"epoch": 8.35,
"learning_rate": 0.001,
"loss": 0.6602,
"step": 120
},
{
"epoch": 8.49,
"learning_rate": 0.001,
"loss": 0.6408,
"step": 122
},
{
"epoch": 8.63,
"learning_rate": 0.001,
"loss": 0.6514,
"step": 124
},
{
"epoch": 8.77,
"learning_rate": 0.001,
"loss": 0.6434,
"step": 126
},
{
"epoch": 8.9,
"learning_rate": 0.001,
"loss": 0.6226,
"step": 128
},
{
"epoch": 8.97,
"eval_gen_len": 358.76627218934914,
"eval_loss": 1.628932237625122,
"eval_rouge1": 37.251,
"eval_rouge2": 12.8214,
"eval_rougeL": 24.8704,
"eval_rougeLsum": 36.0027,
"eval_runtime": 1807.901,
"eval_samples_per_second": 0.187,
"eval_steps_per_second": 0.024,
"step": 129
},
{
"epoch": 9.04,
"learning_rate": 0.001,
"loss": 0.5826,
"step": 130
},
{
"epoch": 9.18,
"learning_rate": 0.001,
"loss": 0.5105,
"step": 132
},
{
"epoch": 9.32,
"learning_rate": 0.001,
"loss": 0.5395,
"step": 134
},
{
"epoch": 9.46,
"learning_rate": 0.001,
"loss": 0.5103,
"step": 136
},
{
"epoch": 9.6,
"learning_rate": 0.001,
"loss": 0.5377,
"step": 138
},
{
"epoch": 9.74,
"learning_rate": 0.001,
"loss": 0.5558,
"step": 140
},
{
"epoch": 9.74,
"eval_gen_len": 284.0266272189349,
"eval_loss": 1.5811121463775635,
"eval_rouge1": 35.4657,
"eval_rouge2": 12.0036,
"eval_rougeL": 24.7787,
"eval_rougeLsum": 34.3775,
"eval_runtime": 1740.8347,
"eval_samples_per_second": 0.194,
"eval_steps_per_second": 0.025,
"step": 140
},
{
"epoch": 9.74,
"step": 140,
"total_flos": 2.447850236380324e+18,
"train_loss": 1.2823251613548823,
"train_runtime": 53785.754,
"train_samples_per_second": 0.683,
"train_steps_per_second": 0.003
}
],
"logging_steps": 2,
"max_steps": 140,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.447850236380324e+18,
"trial_name": null,
"trial_params": null
}