ltg
/

flan-t5-definition-en-large / trainer_state.json
ltgoslo's picture
Large model
e0e38cf
raw
history blame
7.9 kB
{
"best_metric": 35.2849,
"best_model_checkpoint": "large_ox-wn_cod_15ep_eap/checkpoint-38360",
"epoch": 15.0,
"global_step": 41100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.666666666666667e-05,
"loss": 2.1769,
"step": 2740
},
{
"epoch": 1.0,
"eval_gen_len": 11.342940924045202,
"eval_loss": 1.905047059059143,
"eval_rouge1": 28.7222,
"eval_rouge2": 9.1873,
"eval_rougeL": 26.6888,
"eval_rougeLsum": 26.6937,
"eval_runtime": 86.0596,
"eval_samples_per_second": 162.469,
"eval_steps_per_second": 1.278,
"step": 2740
},
{
"epoch": 2.0,
"learning_rate": 4.3333333333333334e-05,
"loss": 1.9408,
"step": 5480
},
{
"epoch": 2.0,
"eval_gen_len": 11.416464025175225,
"eval_loss": 1.8151417970657349,
"eval_rouge1": 29.8799,
"eval_rouge2": 10.2327,
"eval_rougeL": 27.7947,
"eval_rougeLsum": 27.8044,
"eval_runtime": 98.7019,
"eval_samples_per_second": 141.659,
"eval_steps_per_second": 1.114,
"step": 5480
},
{
"epoch": 3.0,
"learning_rate": 4e-05,
"loss": 1.8124,
"step": 8220
},
{
"epoch": 3.0,
"eval_gen_len": 11.531039908453726,
"eval_loss": 1.7607892751693726,
"eval_rouge1": 30.9845,
"eval_rouge2": 10.9982,
"eval_rougeL": 28.8059,
"eval_rougeLsum": 28.8131,
"eval_runtime": 96.5011,
"eval_samples_per_second": 144.889,
"eval_steps_per_second": 1.14,
"step": 8220
},
{
"epoch": 4.0,
"learning_rate": 3.6666666666666666e-05,
"loss": 1.7118,
"step": 10960
},
{
"epoch": 4.0,
"eval_gen_len": 11.703690459161779,
"eval_loss": 1.7228699922561646,
"eval_rouge1": 31.6943,
"eval_rouge2": 11.7412,
"eval_rougeL": 29.4967,
"eval_rougeLsum": 29.5319,
"eval_runtime": 87.7321,
"eval_samples_per_second": 159.372,
"eval_steps_per_second": 1.254,
"step": 10960
},
{
"epoch": 5.0,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.6286,
"step": 13700
},
{
"epoch": 5.0,
"eval_gen_len": 11.77835788871406,
"eval_loss": 1.6936795711517334,
"eval_rouge1": 32.5839,
"eval_rouge2": 12.2431,
"eval_rougeL": 30.1799,
"eval_rougeLsum": 30.206,
"eval_runtime": 84.5028,
"eval_samples_per_second": 165.462,
"eval_steps_per_second": 1.302,
"step": 13700
},
{
"epoch": 6.0,
"learning_rate": 3e-05,
"loss": 1.5597,
"step": 16440
},
{
"epoch": 6.0,
"eval_gen_len": 11.597410956944643,
"eval_loss": 1.674757480621338,
"eval_rouge1": 32.9915,
"eval_rouge2": 12.8514,
"eval_rougeL": 30.7016,
"eval_rougeLsum": 30.7145,
"eval_runtime": 87.802,
"eval_samples_per_second": 159.245,
"eval_steps_per_second": 1.253,
"step": 16440
},
{
"epoch": 7.0,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.4982,
"step": 19180
},
{
"epoch": 7.0,
"eval_gen_len": 11.358031755113718,
"eval_loss": 1.6578471660614014,
"eval_rouge1": 33.2157,
"eval_rouge2": 13.1389,
"eval_rougeL": 30.9428,
"eval_rougeLsum": 30.9519,
"eval_runtime": 89.406,
"eval_samples_per_second": 156.388,
"eval_steps_per_second": 1.23,
"step": 19180
},
{
"epoch": 8.0,
"learning_rate": 2.3333333333333336e-05,
"loss": 1.4468,
"step": 21920
},
{
"epoch": 8.0,
"eval_gen_len": 11.572378772707767,
"eval_loss": 1.6473166942596436,
"eval_rouge1": 33.6146,
"eval_rouge2": 13.5922,
"eval_rougeL": 31.3001,
"eval_rougeLsum": 31.3235,
"eval_runtime": 98.6248,
"eval_samples_per_second": 141.77,
"eval_steps_per_second": 1.115,
"step": 21920
},
{
"epoch": 9.0,
"learning_rate": 2e-05,
"loss": 1.4022,
"step": 24660
},
{
"epoch": 9.0,
"eval_gen_len": 11.738878558146189,
"eval_loss": 1.6383947134017944,
"eval_rouge1": 34.1711,
"eval_rouge2": 14.1117,
"eval_rougeL": 31.7951,
"eval_rougeLsum": 31.8066,
"eval_runtime": 89.5372,
"eval_samples_per_second": 156.159,
"eval_steps_per_second": 1.229,
"step": 24660
},
{
"epoch": 10.0,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.364,
"step": 27400
},
{
"epoch": 10.0,
"eval_gen_len": 11.665856100700902,
"eval_loss": 1.6336920261383057,
"eval_rouge1": 34.5489,
"eval_rouge2": 14.5012,
"eval_rougeL": 32.1329,
"eval_rougeLsum": 32.1446,
"eval_runtime": 103.7766,
"eval_samples_per_second": 134.732,
"eval_steps_per_second": 1.06,
"step": 27400
},
{
"epoch": 11.0,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.3321,
"step": 30140
},
{
"epoch": 11.0,
"eval_gen_len": 11.800314690316121,
"eval_loss": 1.6291483640670776,
"eval_rouge1": 34.7133,
"eval_rouge2": 14.7297,
"eval_rougeL": 32.3042,
"eval_rougeLsum": 32.314,
"eval_runtime": 91.3961,
"eval_samples_per_second": 152.982,
"eval_steps_per_second": 1.204,
"step": 30140
},
{
"epoch": 12.0,
"learning_rate": 1e-05,
"loss": 1.3054,
"step": 32880
},
{
"epoch": 12.0,
"eval_gen_len": 11.761908167644114,
"eval_loss": 1.6267131567001343,
"eval_rouge1": 34.9411,
"eval_rouge2": 15.0282,
"eval_rougeL": 32.5335,
"eval_rougeLsum": 32.5451,
"eval_runtime": 98.5092,
"eval_samples_per_second": 141.936,
"eval_steps_per_second": 1.117,
"step": 32880
},
{
"epoch": 13.0,
"learning_rate": 6.666666666666667e-06,
"loss": 1.2845,
"step": 35620
},
{
"epoch": 13.0,
"eval_gen_len": 11.831712201401801,
"eval_loss": 1.626239538192749,
"eval_rouge1": 35.1648,
"eval_rouge2": 15.2154,
"eval_rougeL": 32.7387,
"eval_rougeLsum": 32.742,
"eval_runtime": 85.528,
"eval_samples_per_second": 163.479,
"eval_steps_per_second": 1.286,
"step": 35620
},
{
"epoch": 14.0,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.2699,
"step": 38360
},
{
"epoch": 14.0,
"eval_gen_len": 11.816764411386067,
"eval_loss": 1.6257190704345703,
"eval_rouge1": 35.2849,
"eval_rouge2": 15.3109,
"eval_rougeL": 32.8508,
"eval_rougeLsum": 32.853,
"eval_runtime": 84.6116,
"eval_samples_per_second": 165.249,
"eval_steps_per_second": 1.3,
"step": 38360
},
{
"epoch": 15.0,
"learning_rate": 0.0,
"loss": 1.2595,
"step": 41100
},
{
"epoch": 15.0,
"eval_gen_len": 11.797096266628522,
"eval_loss": 1.6273423433303833,
"eval_rouge1": 35.2224,
"eval_rouge2": 15.2781,
"eval_rougeL": 32.7718,
"eval_rougeLsum": 32.7826,
"eval_runtime": 95.1523,
"eval_samples_per_second": 146.943,
"eval_steps_per_second": 1.156,
"step": 41100
},
{
"epoch": 15.0,
"step": 41100,
"total_flos": 9.049973435337277e+17,
"train_loss": 1.5328590292826185,
"train_runtime": 18417.6233,
"train_samples_per_second": 142.797,
"train_steps_per_second": 2.232
}
],
"max_steps": 41100,
"num_train_epochs": 15,
"total_flos": 9.049973435337277e+17,
"trial_name": null,
"trial_params": null
}