indo-t5-base-v2 / trainer_state.json
w11wo's picture
End of training
0de48d4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.1103594409128905,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 0.00016,
"loss": 8.4604,
"step": 200
},
{
"epoch": 0.12,
"learning_rate": 0.00032,
"loss": 4.2921,
"step": 400
},
{
"epoch": 0.19,
"learning_rate": 0.00048,
"loss": 3.3774,
"step": 600
},
{
"epoch": 0.25,
"learning_rate": 0.00064,
"loss": 2.9308,
"step": 800
},
{
"epoch": 0.31,
"learning_rate": 0.0008,
"loss": 2.641,
"step": 1000
},
{
"epoch": 0.37,
"learning_rate": 0.0007822222222222222,
"loss": 2.4213,
"step": 1200
},
{
"epoch": 0.44,
"learning_rate": 0.0007644444444444445,
"loss": 2.2593,
"step": 1400
},
{
"epoch": 0.5,
"learning_rate": 0.0007466666666666667,
"loss": 2.1346,
"step": 1600
},
{
"epoch": 0.56,
"learning_rate": 0.0007288888888888889,
"loss": 2.0382,
"step": 1800
},
{
"epoch": 0.62,
"learning_rate": 0.0007111111111111111,
"loss": 1.955,
"step": 2000
},
{
"epoch": 0.68,
"learning_rate": 0.0006933333333333333,
"loss": 1.8814,
"step": 2200
},
{
"epoch": 0.75,
"learning_rate": 0.0006755555555555555,
"loss": 1.8179,
"step": 2400
},
{
"epoch": 0.81,
"learning_rate": 0.0006577777777777777,
"loss": 1.7623,
"step": 2600
},
{
"epoch": 0.87,
"learning_rate": 0.00064,
"loss": 1.7135,
"step": 2800
},
{
"epoch": 0.93,
"learning_rate": 0.0006222222222222223,
"loss": 1.6689,
"step": 3000
},
{
"epoch": 1.0,
"learning_rate": 0.0006044444444444445,
"loss": 1.6295,
"step": 3200
},
{
"epoch": 1.06,
"learning_rate": 0.0005866666666666667,
"loss": 1.5857,
"step": 3400
},
{
"epoch": 1.12,
"learning_rate": 0.000568888888888889,
"loss": 1.5541,
"step": 3600
},
{
"epoch": 1.18,
"learning_rate": 0.0005511111111111112,
"loss": 1.5221,
"step": 3800
},
{
"epoch": 1.24,
"learning_rate": 0.0005333333333333334,
"loss": 1.4947,
"step": 4000
},
{
"epoch": 1.31,
"learning_rate": 0.0005155555555555557,
"loss": 1.4683,
"step": 4200
},
{
"epoch": 1.37,
"learning_rate": 0.0004977777777777778,
"loss": 1.4415,
"step": 4400
},
{
"epoch": 1.43,
"learning_rate": 0.00048,
"loss": 1.4183,
"step": 4600
},
{
"epoch": 1.49,
"learning_rate": 0.0004622222222222222,
"loss": 1.3975,
"step": 4800
},
{
"epoch": 1.56,
"learning_rate": 0.00044444444444444447,
"loss": 1.3756,
"step": 5000
},
{
"epoch": 1.56,
"eval_bleu": 2.3992,
"eval_gen_len": 18.9403,
"eval_loss": 1.535530686378479,
"eval_runtime": 7626.9892,
"eval_samples_per_second": 248.341,
"eval_steps_per_second": 1.94,
"step": 5000
},
{
"epoch": 1.62,
"learning_rate": 0.00042666666666666667,
"loss": 1.358,
"step": 5200
},
{
"epoch": 1.68,
"learning_rate": 0.00040888888888888887,
"loss": 1.339,
"step": 5400
},
{
"epoch": 1.74,
"learning_rate": 0.0003911111111111111,
"loss": 1.3213,
"step": 5600
},
{
"epoch": 1.8,
"learning_rate": 0.0003733333333333334,
"loss": 1.3068,
"step": 5800
},
{
"epoch": 1.87,
"learning_rate": 0.00035555555555555557,
"loss": 1.2927,
"step": 6000
},
{
"epoch": 1.93,
"learning_rate": 0.00033777777777777777,
"loss": 1.279,
"step": 6200
},
{
"epoch": 1.99,
"learning_rate": 0.00032,
"loss": 1.2641,
"step": 6400
},
{
"epoch": 2.05,
"learning_rate": 0.0003022222222222222,
"loss": 1.2455,
"step": 6600
},
{
"epoch": 2.12,
"learning_rate": 0.0002844444444444445,
"loss": 1.2329,
"step": 6800
},
{
"epoch": 2.18,
"learning_rate": 0.0002666666666666667,
"loss": 1.2239,
"step": 7000
},
{
"epoch": 2.24,
"learning_rate": 0.0002488888888888889,
"loss": 1.2123,
"step": 7200
},
{
"epoch": 2.3,
"learning_rate": 0.0002311111111111111,
"loss": 1.2036,
"step": 7400
},
{
"epoch": 2.36,
"learning_rate": 0.00021333333333333333,
"loss": 1.1937,
"step": 7600
},
{
"epoch": 2.43,
"learning_rate": 0.00019555555555555556,
"loss": 1.1851,
"step": 7800
},
{
"epoch": 2.49,
"learning_rate": 0.00017777777777777779,
"loss": 1.1772,
"step": 8000
},
{
"epoch": 2.55,
"learning_rate": 0.00016,
"loss": 1.1708,
"step": 8200
},
{
"epoch": 2.61,
"learning_rate": 0.00014222222222222224,
"loss": 1.1634,
"step": 8400
},
{
"epoch": 2.67,
"learning_rate": 0.00012444444444444444,
"loss": 1.158,
"step": 8600
},
{
"epoch": 2.74,
"learning_rate": 0.00010666666666666667,
"loss": 1.1541,
"step": 8800
},
{
"epoch": 2.8,
"learning_rate": 8.888888888888889e-05,
"loss": 1.1494,
"step": 9000
},
{
"epoch": 2.86,
"learning_rate": 7.111111111111112e-05,
"loss": 1.1435,
"step": 9200
},
{
"epoch": 2.92,
"learning_rate": 5.333333333333333e-05,
"loss": 1.1406,
"step": 9400
},
{
"epoch": 2.99,
"learning_rate": 3.555555555555556e-05,
"loss": 1.1375,
"step": 9600
},
{
"epoch": 3.05,
"learning_rate": 1.777777777777778e-05,
"loss": 1.1315,
"step": 9800
},
{
"epoch": 3.11,
"learning_rate": 0.0,
"loss": 1.1295,
"step": 10000
},
{
"epoch": 3.11,
"eval_bleu": 2.543,
"eval_gen_len": 18.9389,
"eval_loss": 1.5099098682403564,
"eval_runtime": 7653.3773,
"eval_samples_per_second": 247.485,
"eval_steps_per_second": 1.934,
"step": 10000
},
{
"epoch": 3.11,
"step": 10000,
"total_flos": 1.2278131895086088e+19,
"train_loss": 1.7310921798706054,
"train_runtime": 38945.4898,
"train_samples_per_second": 1051.726,
"train_steps_per_second": 0.257
}
],
"max_steps": 10000,
"num_train_epochs": 4,
"total_flos": 1.2278131895086088e+19,
"trial_name": null,
"trial_params": null
}