tiny-bert-qa / trainer_state.json
srcocotero's picture
Upload trainer_state.json
6ae7bcf
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 52629,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 2.9714986034315684e-05,
"loss": 4.8185,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 2.942997206863136e-05,
"loss": 4.2601,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 2.9144958102947044e-05,
"loss": 4.1555,
"step": 1500
},
{
"epoch": 0.11,
"learning_rate": 2.8859944137262728e-05,
"loss": 3.9985,
"step": 2000
},
{
"epoch": 0.14,
"learning_rate": 2.857493017157841e-05,
"loss": 3.9299,
"step": 2500
},
{
"epoch": 0.17,
"learning_rate": 2.8289916205894088e-05,
"loss": 3.8222,
"step": 3000
},
{
"epoch": 0.2,
"learning_rate": 2.800490224020977e-05,
"loss": 3.7104,
"step": 3500
},
{
"epoch": 0.23,
"learning_rate": 2.7719888274525454e-05,
"loss": 3.593,
"step": 4000
},
{
"epoch": 0.26,
"learning_rate": 2.743487430884113e-05,
"loss": 3.5155,
"step": 4500
},
{
"epoch": 0.29,
"learning_rate": 2.7149860343156814e-05,
"loss": 3.4417,
"step": 5000
},
{
"epoch": 0.31,
"learning_rate": 2.6864846377472498e-05,
"loss": 3.3628,
"step": 5500
},
{
"epoch": 0.34,
"learning_rate": 2.657983241178818e-05,
"loss": 3.3508,
"step": 6000
},
{
"epoch": 0.37,
"learning_rate": 2.6294818446103858e-05,
"loss": 3.3291,
"step": 6500
},
{
"epoch": 0.4,
"learning_rate": 2.600980448041954e-05,
"loss": 3.2381,
"step": 7000
},
{
"epoch": 0.43,
"learning_rate": 2.5724790514735224e-05,
"loss": 3.2735,
"step": 7500
},
{
"epoch": 0.46,
"learning_rate": 2.54397765490509e-05,
"loss": 3.2627,
"step": 8000
},
{
"epoch": 0.48,
"learning_rate": 2.5154762583366584e-05,
"loss": 3.146,
"step": 8500
},
{
"epoch": 0.51,
"learning_rate": 2.4869748617682268e-05,
"loss": 3.0925,
"step": 9000
},
{
"epoch": 0.54,
"learning_rate": 2.458473465199795e-05,
"loss": 3.1043,
"step": 9500
},
{
"epoch": 0.57,
"learning_rate": 2.4299720686313628e-05,
"loss": 3.0534,
"step": 10000
},
{
"epoch": 0.6,
"learning_rate": 2.401470672062931e-05,
"loss": 3.122,
"step": 10500
},
{
"epoch": 0.63,
"learning_rate": 2.3729692754944995e-05,
"loss": 3.0576,
"step": 11000
},
{
"epoch": 0.66,
"learning_rate": 2.344467878926067e-05,
"loss": 3.0045,
"step": 11500
},
{
"epoch": 0.68,
"learning_rate": 2.3159664823576355e-05,
"loss": 2.9903,
"step": 12000
},
{
"epoch": 0.71,
"learning_rate": 2.2874650857892038e-05,
"loss": 3.0002,
"step": 12500
},
{
"epoch": 0.74,
"learning_rate": 2.258963689220772e-05,
"loss": 2.9681,
"step": 13000
},
{
"epoch": 0.77,
"learning_rate": 2.2304622926523398e-05,
"loss": 2.9442,
"step": 13500
},
{
"epoch": 0.8,
"learning_rate": 2.201960896083908e-05,
"loss": 2.9579,
"step": 14000
},
{
"epoch": 0.83,
"learning_rate": 2.1734594995154765e-05,
"loss": 2.9277,
"step": 14500
},
{
"epoch": 0.86,
"learning_rate": 2.1449581029470445e-05,
"loss": 2.9364,
"step": 15000
},
{
"epoch": 0.88,
"learning_rate": 2.1164567063786125e-05,
"loss": 2.8828,
"step": 15500
},
{
"epoch": 0.91,
"learning_rate": 2.0879553098101808e-05,
"loss": 2.9341,
"step": 16000
},
{
"epoch": 0.94,
"learning_rate": 2.059453913241749e-05,
"loss": 2.9137,
"step": 16500
},
{
"epoch": 0.97,
"learning_rate": 2.0309525166733168e-05,
"loss": 2.8994,
"step": 17000
},
{
"epoch": 1.0,
"learning_rate": 2.002451120104885e-05,
"loss": 2.8516,
"step": 17500
},
{
"epoch": 1.03,
"learning_rate": 1.9739497235364535e-05,
"loss": 2.7599,
"step": 18000
},
{
"epoch": 1.05,
"learning_rate": 1.9454483269680215e-05,
"loss": 2.8004,
"step": 18500
},
{
"epoch": 1.08,
"learning_rate": 1.9169469303995895e-05,
"loss": 2.7549,
"step": 19000
},
{
"epoch": 1.11,
"learning_rate": 1.8884455338311578e-05,
"loss": 2.7771,
"step": 19500
},
{
"epoch": 1.14,
"learning_rate": 1.859944137262726e-05,
"loss": 2.7675,
"step": 20000
},
{
"epoch": 1.17,
"learning_rate": 1.8314427406942938e-05,
"loss": 2.6916,
"step": 20500
},
{
"epoch": 1.2,
"learning_rate": 1.802941344125862e-05,
"loss": 2.7554,
"step": 21000
},
{
"epoch": 1.23,
"learning_rate": 1.7744399475574305e-05,
"loss": 2.7621,
"step": 21500
},
{
"epoch": 1.25,
"learning_rate": 1.7459385509889985e-05,
"loss": 2.7171,
"step": 22000
},
{
"epoch": 1.28,
"learning_rate": 1.7174371544205665e-05,
"loss": 2.7549,
"step": 22500
},
{
"epoch": 1.31,
"learning_rate": 1.688935757852135e-05,
"loss": 2.7533,
"step": 23000
},
{
"epoch": 1.34,
"learning_rate": 1.6604343612837032e-05,
"loss": 2.7607,
"step": 23500
},
{
"epoch": 1.37,
"learning_rate": 1.631932964715271e-05,
"loss": 2.7582,
"step": 24000
},
{
"epoch": 1.4,
"learning_rate": 1.6034315681468392e-05,
"loss": 2.7138,
"step": 24500
},
{
"epoch": 1.43,
"learning_rate": 1.5749301715784075e-05,
"loss": 2.7392,
"step": 25000
},
{
"epoch": 1.45,
"learning_rate": 1.5464287750099755e-05,
"loss": 2.6482,
"step": 25500
},
{
"epoch": 1.48,
"learning_rate": 1.5179273784415437e-05,
"loss": 2.7164,
"step": 26000
},
{
"epoch": 1.51,
"learning_rate": 1.4894259818731119e-05,
"loss": 2.7996,
"step": 26500
},
{
"epoch": 1.54,
"learning_rate": 1.4609245853046799e-05,
"loss": 2.7637,
"step": 27000
},
{
"epoch": 1.57,
"learning_rate": 1.4324231887362482e-05,
"loss": 2.7042,
"step": 27500
},
{
"epoch": 1.6,
"learning_rate": 1.4039217921678162e-05,
"loss": 2.6762,
"step": 28000
},
{
"epoch": 1.62,
"learning_rate": 1.3754203955993845e-05,
"loss": 2.7382,
"step": 28500
},
{
"epoch": 1.65,
"learning_rate": 1.3469189990309525e-05,
"loss": 2.7503,
"step": 29000
},
{
"epoch": 1.68,
"learning_rate": 1.3184176024625207e-05,
"loss": 2.7201,
"step": 29500
},
{
"epoch": 1.71,
"learning_rate": 1.2899162058940889e-05,
"loss": 2.6753,
"step": 30000
},
{
"epoch": 1.74,
"learning_rate": 1.2614148093256569e-05,
"loss": 2.7339,
"step": 30500
},
{
"epoch": 1.77,
"learning_rate": 1.2329134127572252e-05,
"loss": 2.7222,
"step": 31000
},
{
"epoch": 1.8,
"learning_rate": 1.2044120161887932e-05,
"loss": 2.6654,
"step": 31500
},
{
"epoch": 1.82,
"learning_rate": 1.1759106196203616e-05,
"loss": 2.7559,
"step": 32000
},
{
"epoch": 1.85,
"learning_rate": 1.1474092230519296e-05,
"loss": 2.6557,
"step": 32500
},
{
"epoch": 1.88,
"learning_rate": 1.1189078264834977e-05,
"loss": 2.6873,
"step": 33000
},
{
"epoch": 1.91,
"learning_rate": 1.0904064299150659e-05,
"loss": 2.7063,
"step": 33500
},
{
"epoch": 1.94,
"learning_rate": 1.0619050333466339e-05,
"loss": 2.6581,
"step": 34000
},
{
"epoch": 1.97,
"learning_rate": 1.0334036367782022e-05,
"loss": 2.703,
"step": 34500
},
{
"epoch": 2.0,
"learning_rate": 1.0049022402097702e-05,
"loss": 2.625,
"step": 35000
},
{
"epoch": 2.02,
"learning_rate": 9.764008436413386e-06,
"loss": 2.6155,
"step": 35500
},
{
"epoch": 2.05,
"learning_rate": 9.478994470729066e-06,
"loss": 2.5546,
"step": 36000
},
{
"epoch": 2.08,
"learning_rate": 9.193980505044747e-06,
"loss": 2.6148,
"step": 36500
},
{
"epoch": 2.11,
"learning_rate": 8.908966539360429e-06,
"loss": 2.5755,
"step": 37000
},
{
"epoch": 2.14,
"learning_rate": 8.623952573676109e-06,
"loss": 2.5831,
"step": 37500
},
{
"epoch": 2.17,
"learning_rate": 8.338938607991792e-06,
"loss": 2.6104,
"step": 38000
},
{
"epoch": 2.19,
"learning_rate": 8.053924642307472e-06,
"loss": 2.5912,
"step": 38500
},
{
"epoch": 2.22,
"learning_rate": 7.768910676623156e-06,
"loss": 2.5808,
"step": 39000
},
{
"epoch": 2.25,
"learning_rate": 7.483896710938836e-06,
"loss": 2.5927,
"step": 39500
},
{
"epoch": 2.28,
"learning_rate": 7.1988827452545175e-06,
"loss": 2.5863,
"step": 40000
},
{
"epoch": 2.31,
"learning_rate": 6.913868779570199e-06,
"loss": 2.589,
"step": 40500
},
{
"epoch": 2.34,
"learning_rate": 6.628854813885881e-06,
"loss": 2.6383,
"step": 41000
},
{
"epoch": 2.37,
"learning_rate": 6.343840848201562e-06,
"loss": 2.5883,
"step": 41500
},
{
"epoch": 2.39,
"learning_rate": 6.0588268825172434e-06,
"loss": 2.6081,
"step": 42000
},
{
"epoch": 2.42,
"learning_rate": 5.773812916832925e-06,
"loss": 2.5842,
"step": 42500
},
{
"epoch": 2.45,
"learning_rate": 5.488798951148607e-06,
"loss": 2.5788,
"step": 43000
},
{
"epoch": 2.48,
"learning_rate": 5.203784985464288e-06,
"loss": 2.5318,
"step": 43500
},
{
"epoch": 2.51,
"learning_rate": 4.918771019779969e-06,
"loss": 2.6007,
"step": 44000
},
{
"epoch": 2.54,
"learning_rate": 4.633757054095651e-06,
"loss": 2.5745,
"step": 44500
},
{
"epoch": 2.57,
"learning_rate": 4.348743088411332e-06,
"loss": 2.5289,
"step": 45000
},
{
"epoch": 2.59,
"learning_rate": 4.063729122727014e-06,
"loss": 2.5818,
"step": 45500
},
{
"epoch": 2.62,
"learning_rate": 3.778715157042695e-06,
"loss": 2.5497,
"step": 46000
},
{
"epoch": 2.65,
"learning_rate": 3.4937011913583766e-06,
"loss": 2.5743,
"step": 46500
},
{
"epoch": 2.68,
"learning_rate": 3.2086872256740582e-06,
"loss": 2.5017,
"step": 47000
},
{
"epoch": 2.71,
"learning_rate": 2.92367325998974e-06,
"loss": 2.5483,
"step": 47500
},
{
"epoch": 2.74,
"learning_rate": 2.6386592943054208e-06,
"loss": 2.6047,
"step": 48000
},
{
"epoch": 2.76,
"learning_rate": 2.3536453286211025e-06,
"loss": 2.5553,
"step": 48500
},
{
"epoch": 2.79,
"learning_rate": 2.068631362936784e-06,
"loss": 2.6252,
"step": 49000
},
{
"epoch": 2.82,
"learning_rate": 1.7836173972524654e-06,
"loss": 2.5519,
"step": 49500
},
{
"epoch": 2.85,
"learning_rate": 1.498603431568147e-06,
"loss": 2.5817,
"step": 50000
},
{
"epoch": 2.88,
"learning_rate": 1.2135894658838284e-06,
"loss": 2.5457,
"step": 50500
},
{
"epoch": 2.91,
"learning_rate": 9.285755001995098e-07,
"loss": 2.5664,
"step": 51000
},
{
"epoch": 2.94,
"learning_rate": 6.435615345151913e-07,
"loss": 2.5883,
"step": 51500
},
{
"epoch": 2.96,
"learning_rate": 3.5854756883087273e-07,
"loss": 2.5447,
"step": 52000
},
{
"epoch": 2.99,
"learning_rate": 7.353360314655419e-08,
"loss": 2.5813,
"step": 52500
},
{
"epoch": 3.0,
"step": 52629,
"total_flos": 320970658516992.0,
"train_loss": 2.87008975942873,
"train_runtime": 858.012,
"train_samples_per_second": 306.688,
"train_steps_per_second": 61.338
}
],
"max_steps": 52629,
"num_train_epochs": 3,
"total_flos": 320970658516992.0,
"trial_name": null,
"trial_params": null
}