|
{ |
|
"best_metric": 1.3323031663894653, |
|
"best_model_checkpoint": "longt5_xl_summ_screen_bp_10/checkpoint-57", |
|
"epoch": 9.73913043478261, |
|
"eval_steps": 500, |
|
"global_step": 140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.001, |
|
"loss": 2.8751, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001, |
|
"loss": 3.5717, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.001, |
|
"loss": 2.8585, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.001, |
|
"loss": 2.5104, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.001, |
|
"loss": 3.2659, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.001, |
|
"loss": 3.4634, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001, |
|
"loss": 2.4559, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 511.0, |
|
"eval_loss": 2.0707387924194336, |
|
"eval_rouge1": 11.7833, |
|
"eval_rouge2": 1.6011, |
|
"eval_rougeL": 11.1858, |
|
"eval_rougeLsum": 10.3025, |
|
"eval_runtime": 1812.8885, |
|
"eval_samples_per_second": 0.186, |
|
"eval_steps_per_second": 0.024, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.001, |
|
"loss": 2.1517, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.001, |
|
"loss": 2.2029, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.001, |
|
"loss": 2.1161, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.001, |
|
"loss": 1.9513, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.001, |
|
"loss": 1.7095, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.001, |
|
"loss": 1.6535, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001, |
|
"loss": 1.6238, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_gen_len": 511.0, |
|
"eval_loss": 1.5286704301834106, |
|
"eval_rouge1": 19.0489, |
|
"eval_rouge2": 4.687, |
|
"eval_rougeL": 16.6504, |
|
"eval_rougeLsum": 17.1808, |
|
"eval_runtime": 1807.4269, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.001, |
|
"loss": 1.5804, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.001, |
|
"loss": 1.511, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.001, |
|
"loss": 1.4961, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.001, |
|
"loss": 1.4334, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.001, |
|
"loss": 1.3994, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.001, |
|
"loss": 1.4018, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.001, |
|
"loss": 1.3964, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_gen_len": 511.0, |
|
"eval_loss": 1.3520147800445557, |
|
"eval_rouge1": 21.9994, |
|
"eval_rouge2": 5.8519, |
|
"eval_rougeL": 18.9231, |
|
"eval_rougeLsum": 19.958, |
|
"eval_runtime": 1809.4299, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.001, |
|
"loss": 1.3428, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.001, |
|
"loss": 1.3034, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.001, |
|
"loss": 1.4137, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.001, |
|
"loss": 1.4083, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.001, |
|
"loss": 1.3075, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.001, |
|
"loss": 1.2527, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.001, |
|
"loss": 1.2538, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_gen_len": 497.2455621301775, |
|
"eval_loss": 1.3323031663894653, |
|
"eval_rouge1": 22.9554, |
|
"eval_rouge2": 6.4509, |
|
"eval_rougeL": 19.7437, |
|
"eval_rougeLsum": 20.923, |
|
"eval_runtime": 1810.7532, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.001, |
|
"loss": 1.2028, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.001, |
|
"loss": 1.0981, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.001, |
|
"loss": 1.1033, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.001, |
|
"loss": 1.1303, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.001, |
|
"loss": 1.1675, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.001, |
|
"loss": 1.3701, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.001, |
|
"loss": 1.277, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_gen_len": 507.2278106508876, |
|
"eval_loss": 1.5462373495101929, |
|
"eval_rouge1": 14.6326, |
|
"eval_rouge2": 3.6509, |
|
"eval_rougeL": 12.4805, |
|
"eval_rougeLsum": 13.5001, |
|
"eval_runtime": 1806.4311, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.001, |
|
"loss": 1.3884, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.001, |
|
"loss": 1.0428, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.001, |
|
"loss": 1.0266, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.001, |
|
"loss": 1.0247, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.001, |
|
"loss": 0.9732, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.001, |
|
"loss": 1.0042, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.001, |
|
"loss": 1.0099, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.001, |
|
"loss": 1.0071, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_gen_len": 429.7721893491124, |
|
"eval_loss": 1.3604055643081665, |
|
"eval_rouge1": 29.5352, |
|
"eval_rouge2": 9.9544, |
|
"eval_rougeL": 22.1073, |
|
"eval_rougeLsum": 28.1204, |
|
"eval_runtime": 1808.4033, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.8375, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.8301, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.001, |
|
"loss": 0.8551, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.823, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.001, |
|
"loss": 0.8783, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.885, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.001, |
|
"loss": 0.8685, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 451.78402366863907, |
|
"eval_loss": 1.4360722303390503, |
|
"eval_rouge1": 31.0337, |
|
"eval_rouge2": 10.6724, |
|
"eval_rougeL": 22.3815, |
|
"eval_rougeLsum": 29.6325, |
|
"eval_runtime": 1808.8854, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.7653, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.7402, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.001, |
|
"loss": 0.7582, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.7518, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.001, |
|
"loss": 0.7486, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.7645, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.001, |
|
"loss": 0.7498, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 473.896449704142, |
|
"eval_loss": 1.530242681503296, |
|
"eval_rouge1": 28.433, |
|
"eval_rouge2": 8.4887, |
|
"eval_rougeL": 21.3588, |
|
"eval_rougeLsum": 26.6817, |
|
"eval_runtime": 1807.993, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.6877, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.001, |
|
"loss": 0.6278, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.001, |
|
"loss": 0.6602, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.6408, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.001, |
|
"loss": 0.6514, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6434, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.6226, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_gen_len": 358.76627218934914, |
|
"eval_loss": 1.628932237625122, |
|
"eval_rouge1": 37.251, |
|
"eval_rouge2": 12.8214, |
|
"eval_rougeL": 24.8704, |
|
"eval_rougeLsum": 36.0027, |
|
"eval_runtime": 1807.901, |
|
"eval_samples_per_second": 0.187, |
|
"eval_steps_per_second": 0.024, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.001, |
|
"loss": 0.5826, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.5105, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.001, |
|
"loss": 0.5395, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.5103, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.001, |
|
"loss": 0.5377, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.5558, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"eval_gen_len": 284.0266272189349, |
|
"eval_loss": 1.5811121463775635, |
|
"eval_rouge1": 35.4657, |
|
"eval_rouge2": 12.0036, |
|
"eval_rougeL": 24.7787, |
|
"eval_rougeLsum": 34.3775, |
|
"eval_runtime": 1740.8347, |
|
"eval_samples_per_second": 0.194, |
|
"eval_steps_per_second": 0.025, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"step": 140, |
|
"total_flos": 2.447850236380324e+18, |
|
"train_loss": 1.2823251613548823, |
|
"train_runtime": 53785.754, |
|
"train_samples_per_second": 0.683, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 140, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.447850236380324e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|