|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2754030638590854, |
|
"eval_steps": 500, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.166666666666667e-08, |
|
"loss": 0.6931, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"loss": 0.6932, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 0.6926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.6907, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.6867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"loss": 0.6798, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.6683, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4583333333333335e-06, |
|
"loss": 0.6532, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.635, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.6108, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.5844, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.2916666666666666e-06, |
|
"loss": 0.5544, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5227, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7083333333333334e-06, |
|
"loss": 0.4867, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 0.4471, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.4093, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.3671, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5416666666666673e-06, |
|
"loss": 0.3131, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.267, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.958333333333333e-06, |
|
"loss": 0.2313, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.1819, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.142, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.583333333333333e-06, |
|
"loss": 0.1105, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.791666666666668e-06, |
|
"loss": 0.0886, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0729, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.0656, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.416666666666667e-06, |
|
"loss": 0.0502, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.0511, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.0404, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.041666666666667e-06, |
|
"loss": 0.0422, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0345, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.458333333333334e-06, |
|
"loss": 0.0282, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0292, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.0279, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.083333333333335e-06, |
|
"loss": 0.0236, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.291666666666667e-06, |
|
"loss": 0.0219, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0223, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.708333333333334e-06, |
|
"loss": 0.0225, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.916666666666667e-06, |
|
"loss": 0.0175, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.0152, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0153, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.541666666666666e-06, |
|
"loss": 0.0132, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.0134, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.958333333333334e-06, |
|
"loss": 0.0144, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.0123, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.0115, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 0.0123, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.791666666666666e-06, |
|
"loss": 0.0091, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0087, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999867788160888e-06, |
|
"loss": 0.0095, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999471159635538e-06, |
|
"loss": 0.0089, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.998810135399545e-06, |
|
"loss": 0.0075, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.997884750411004e-06, |
|
"loss": 0.0082, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.996695053608651e-06, |
|
"loss": 0.007, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.99524110790929e-06, |
|
"loss": 0.0067, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.993522990204453e-06, |
|
"loss": 0.0075, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.991540791356342e-06, |
|
"loss": 0.0058, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.989294616193018e-06, |
|
"loss": 0.0066, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.986784583502863e-06, |
|
"loss": 0.0049, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.984010826028289e-06, |
|
"loss": 0.0067, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.980973490458728e-06, |
|
"loss": 0.0055, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.97767273742287e-06, |
|
"loss": 0.0069, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.974108741480167e-06, |
|
"loss": 0.0067, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.970281691111598e-06, |
|
"loss": 0.0051, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.966191788709716e-06, |
|
"loss": 0.0053, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.961839250567925e-06, |
|
"loss": 0.0051, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.957224306869053e-06, |
|
"loss": 0.0047, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.952347201673181e-06, |
|
"loss": 0.0053, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.947208192904722e-06, |
|
"loss": 0.0052, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.941807552338805e-06, |
|
"loss": 0.0052, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.936145565586871e-06, |
|
"loss": 0.0046, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.930222532081597e-06, |
|
"loss": 0.0048, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.924038765061042e-06, |
|
"loss": 0.005, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.91759459155209e-06, |
|
"loss": 0.0034, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.910890352353154e-06, |
|
"loss": 0.0042, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.903926402016153e-06, |
|
"loss": 0.0045, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.896703108827758e-06, |
|
"loss": 0.0037, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.88922085478992e-06, |
|
"loss": 0.0043, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.881480035599667e-06, |
|
"loss": 0.0036, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.873481060628175e-06, |
|
"loss": 0.0034, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.86522435289912e-06, |
|
"loss": 0.004, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.856710349066307e-06, |
|
"loss": 0.005, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.847939499390581e-06, |
|
"loss": 0.0042, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.838912267716006e-06, |
|
"loss": 0.0033, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.829629131445342e-06, |
|
"loss": 0.0032, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.820090581514799e-06, |
|
"loss": 0.0034, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.810297122368067e-06, |
|
"loss": 0.0043, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.800249271929645e-06, |
|
"loss": 0.004, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.789947561577445e-06, |
|
"loss": 0.0036, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.779392536114698e-06, |
|
"loss": 0.0029, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.768584753741134e-06, |
|
"loss": 0.004, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.75752478602347e-06, |
|
"loss": 0.0035, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.74621321786517e-06, |
|
"loss": 0.0026, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.73465064747553e-06, |
|
"loss": 0.0024, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.722837686338025e-06, |
|
"loss": 0.0027, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.710774959177983e-06, |
|
"loss": 0.0031, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.0034, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.685902771701913e-06, |
|
"loss": 0.0044, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.673094626744944e-06, |
|
"loss": 0.0028, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.660039346413994e-06, |
|
"loss": 0.003, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.646737621134112e-06, |
|
"loss": 0.0027, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.633190154363527e-06, |
|
"loss": 0.0042, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 0.0024, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.605360875125119e-06, |
|
"loss": 0.0023, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.591080534401371e-06, |
|
"loss": 0.0034, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.576557395597237e-06, |
|
"loss": 0.002, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.561792226765072e-06, |
|
"loss": 0.0034, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.546785808756926e-06, |
|
"loss": 0.0028, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.531538935183252e-06, |
|
"loss": 0.0033, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.516052412370922e-06, |
|
"loss": 0.0028, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.500327059320606e-06, |
|
"loss": 0.0032, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.484363707663443e-06, |
|
"loss": 0.0035, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.468163201617063e-06, |
|
"loss": 0.0029, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.451726397940946e-06, |
|
"loss": 0.0032, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.43505416589111e-06, |
|
"loss": 0.0028, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.41814738717414e-06, |
|
"loss": 0.0021, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.401006955900555e-06, |
|
"loss": 0.002, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.38363377853754e-06, |
|
"loss": 0.0023, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.366028773860981e-06, |
|
"loss": 0.0024, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.348192872906896e-06, |
|
"loss": 0.0029, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.0014, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.311832167314788e-06, |
|
"loss": 0.0021, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.293309285603066e-06, |
|
"loss": 0.0016, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.274559353364734e-06, |
|
"loss": 0.0021, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.255583362184998e-06, |
|
"loss": 0.0025, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.23638231560414e-06, |
|
"loss": 0.0021, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.21695722906443e-06, |
|
"loss": 0.0026, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.197309129856433e-06, |
|
"loss": 0.0026, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.177439057064684e-06, |
|
"loss": 0.0026, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.157348061512728e-06, |
|
"loss": 0.0027, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.137037205707552e-06, |
|
"loss": 0.0015, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.116507563783402e-06, |
|
"loss": 0.0015, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.09576022144496e-06, |
|
"loss": 0.0019, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.074796275909941e-06, |
|
"loss": 0.0024, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.053616835851062e-06, |
|
"loss": 0.0025, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.032223021337415e-06, |
|
"loss": 0.0021, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.01061596377522e-06, |
|
"loss": 0.0021, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.988796805848008e-06, |
|
"loss": 0.0022, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.966766701456177e-06, |
|
"loss": 0.0025, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.944526815655974e-06, |
|
"loss": 0.0019, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.92207832459788e-06, |
|
"loss": 0.0026, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.899422415464409e-06, |
|
"loss": 0.0019, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.87656028640733e-06, |
|
"loss": 0.0019, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.85349314648429e-06, |
|
"loss": 0.0015, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.83022221559489e-06, |
|
"loss": 0.0017, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.806748724416156e-06, |
|
"loss": 0.002, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.783073914337465e-06, |
|
"loss": 0.0023, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.759199037394888e-06, |
|
"loss": 0.0019, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.735125356204982e-06, |
|
"loss": 0.0018, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.710854143898008e-06, |
|
"loss": 0.0018, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.68638668405062e-06, |
|
"loss": 0.0017, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.661724270617961e-06, |
|
"loss": 0.002, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.636868207865244e-06, |
|
"loss": 0.0023, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.611819810298778e-06, |
|
"loss": 0.0019, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.586580402596448e-06, |
|
"loss": 0.0017, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.561151319537656e-06, |
|
"loss": 0.0018, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.0019, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.509729516551842e-06, |
|
"loss": 0.0022, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.483739516053276e-06, |
|
"loss": 0.0015, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.457565278911349e-06, |
|
"loss": 0.0017, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.43120818934367e-06, |
|
"loss": 0.0024, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.404669641237952e-06, |
|
"loss": 0.0013, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.377951038078303e-06, |
|
"loss": 0.0017, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.35105379287098e-06, |
|
"loss": 0.002, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.323979328069689e-06, |
|
"loss": 0.0015, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.296729075500345e-06, |
|
"loss": 0.0026, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.269304476285349e-06, |
|
"loss": 0.0017, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.241706980767382e-06, |
|
"loss": 0.0017, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.213938048432697e-06, |
|
"loss": 0.0011, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.185999147833943e-06, |
|
"loss": 0.0013, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.157891756512488e-06, |
|
"loss": 0.0016, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.129617360920297e-06, |
|
"loss": 0.0017, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.101177456341301e-06, |
|
"loss": 0.0018, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.072573546812338e-06, |
|
"loss": 0.0012, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.043807145043604e-06, |
|
"loss": 0.002, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.014879772338649e-06, |
|
"loss": 0.0012, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.985792958513932e-06, |
|
"loss": 0.0017, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.956548241817914e-06, |
|
"loss": 0.0015, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.927147168849704e-06, |
|
"loss": 0.0013, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.897591294477276e-06, |
|
"loss": 0.0016, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.86788218175523e-06, |
|
"loss": 0.0014, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.838021401842145e-06, |
|
"loss": 0.0013, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.808010533917465e-06, |
|
"loss": 0.0014, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.777851165098012e-06, |
|
"loss": 0.0017, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.747544890354031e-06, |
|
"loss": 0.0017, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.71709331242485e-06, |
|
"loss": 0.0014, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.686498041734121e-06, |
|
"loss": 0.0019, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.655760696304642e-06, |
|
"loss": 0.0012, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.624882901672801e-06, |
|
"loss": 0.0017, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.593866290802608e-06, |
|
"loss": 0.0012, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.562712503999327e-06, |
|
"loss": 0.0009, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.531423188822738e-06, |
|
"loss": 0.0014, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0014, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.468444599338152e-06, |
|
"loss": 0.0012, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.4367586556362125e-06, |
|
"loss": 0.0011, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.404943844596939e-06, |
|
"loss": 0.0011, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.373001848738203e-06, |
|
"loss": 0.0015, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.340934357304011e-06, |
|
"loss": 0.002, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.308743066175172e-06, |
|
"loss": 0.0012, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.276429677779603e-06, |
|
"loss": 0.0016, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.243995901002312e-06, |
|
"loss": 0.0019, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.211443451095007e-06, |
|
"loss": 0.0013, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.178774049585397e-06, |
|
"loss": 0.0014, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.145989424186146e-06, |
|
"loss": 0.0021, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.113091308703498e-06, |
|
"loss": 0.0012, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.080081442945597e-06, |
|
"loss": 0.0014, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.046961572630463e-06, |
|
"loss": 0.0013, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.0137334492936875e-06, |
|
"loss": 0.0014, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.980398830195785e-06, |
|
"loss": 0.0014, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.946959478229277e-06, |
|
"loss": 0.0013, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.913417161825449e-06, |
|
"loss": 0.001, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.8797736548608405e-06, |
|
"loss": 0.0019, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.8460307365634225e-06, |
|
"loss": 0.0014, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.812190191418508e-06, |
|
"loss": 0.0013, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.778253809074384e-06, |
|
"loss": 0.0009, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.7442233842476545e-06, |
|
"loss": 0.0015, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.710100716628345e-06, |
|
"loss": 0.0009, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.675887610784708e-06, |
|
"loss": 0.0014, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.641585876067807e-06, |
|
"loss": 0.0015, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.607197326515808e-06, |
|
"loss": 0.001, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.572723780758069e-06, |
|
"loss": 0.0008, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.538167061918942e-06, |
|
"loss": 0.0011, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.503528997521365e-06, |
|
"loss": 0.0017, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.468811419390222e-06, |
|
"loss": 0.0016, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.434016163555452e-06, |
|
"loss": 0.0012, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.399145070154962e-06, |
|
"loss": 0.0014, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.364199983337306e-06, |
|
"loss": 0.0012, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.329182751164164e-06, |
|
"loss": 0.0013, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.294095225512604e-06, |
|
"loss": 0.0011, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.2589392619771435e-06, |
|
"loss": 0.0012, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.2237167197716195e-06, |
|
"loss": 0.0011, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.188429461630866e-06, |
|
"loss": 0.0012, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.153079353712201e-06, |
|
"loss": 0.0016, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.117668265496738e-06, |
|
"loss": 0.0009, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.0821980696905145e-06, |
|
"loss": 0.0016, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.046670642125461e-06, |
|
"loss": 0.0009, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 6.011087861660191e-06, |
|
"loss": 0.0015, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 0.0015, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.93976377200056e-06, |
|
"loss": 0.0013, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.904026234761827e-06, |
|
"loss": 0.001, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.8682408883346535e-06, |
|
"loss": 0.0013, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.832409625217623e-06, |
|
"loss": 0.0014, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.796534340337614e-06, |
|
"loss": 0.0017, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.760616930949584e-06, |
|
"loss": 0.0015, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.724659296536234e-06, |
|
"loss": 0.001, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.688663338707554e-06, |
|
"loss": 0.0013, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.65263096110026e-06, |
|
"loss": 0.001, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.616564069277111e-06, |
|
"loss": 0.0012, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.5804645706261515e-06, |
|
"loss": 0.0021, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.544334374259823e-06, |
|
"loss": 0.0012, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.50817539091401e-06, |
|
"loss": 0.0015, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.471989532846987e-06, |
|
"loss": 0.0008, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.435778713738292e-06, |
|
"loss": 0.0009, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.3995448485875205e-06, |
|
"loss": 0.0013, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.363289853613054e-06, |
|
"loss": 0.0012, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.327015646150716e-06, |
|
"loss": 0.0017, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.290724144552379e-06, |
|
"loss": 0.0009, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.254417268084514e-06, |
|
"loss": 0.0009, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.218096936826681e-06, |
|
"loss": 0.0011, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.18176507157e-06, |
|
"loss": 0.0009, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.145423593715558e-06, |
|
"loss": 0.0012, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.109074425172806e-06, |
|
"loss": 0.0016, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.072719488257915e-06, |
|
"loss": 0.0014, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.03636070559211e-06, |
|
"loss": 0.0014, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-06, |
|
"loss": 0.001, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963639294407893e-06, |
|
"loss": 0.0009, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.927280511742087e-06, |
|
"loss": 0.0011, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.890925574827195e-06, |
|
"loss": 0.001, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.854576406284443e-06, |
|
"loss": 0.0007, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.818234928430003e-06, |
|
"loss": 0.0009, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.781903063173321e-06, |
|
"loss": 0.0014, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.745582731915488e-06, |
|
"loss": 0.0008, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7092758554476215e-06, |
|
"loss": 0.0007, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.672984353849285e-06, |
|
"loss": 0.0019, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.636710146386948e-06, |
|
"loss": 0.0012, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.600455151412482e-06, |
|
"loss": 0.0016, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.564221286261709e-06, |
|
"loss": 0.0014, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.528010467153015e-06, |
|
"loss": 0.0006, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.4918246090859905e-06, |
|
"loss": 0.0011, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.4556656257401786e-06, |
|
"loss": 0.0013, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.4195354293738484e-06, |
|
"loss": 0.0009, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.38343593072289e-06, |
|
"loss": 0.0009, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.347369038899744e-06, |
|
"loss": 0.0012, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.311336661292447e-06, |
|
"loss": 0.0007, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.275340703463767e-06, |
|
"loss": 0.0007, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.239383069050417e-06, |
|
"loss": 0.001, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.203465659662388e-06, |
|
"loss": 0.001, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.1675903747823795e-06, |
|
"loss": 0.0009, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.0012, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.0959737652381745e-06, |
|
"loss": 0.001, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.060236227999441e-06, |
|
"loss": 0.0012, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.02454838991936e-06, |
|
"loss": 0.0009, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.988912138339812e-06, |
|
"loss": 0.0016, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.95332935787454e-06, |
|
"loss": 0.0013, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.917801930309486e-06, |
|
"loss": 0.0012, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.882331734503263e-06, |
|
"loss": 0.0011, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.8469206462878e-06, |
|
"loss": 0.0011, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.8115705383691354e-06, |
|
"loss": 0.0012, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.776283280228381e-06, |
|
"loss": 0.001, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.741060738022858e-06, |
|
"loss": 0.0006, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.705904774487396e-06, |
|
"loss": 0.0009, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.6708172488358364e-06, |
|
"loss": 0.0011, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.6358000166626966e-06, |
|
"loss": 0.0012, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.6008549298450403e-06, |
|
"loss": 0.0011, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.5659838364445505e-06, |
|
"loss": 0.0011, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.531188580609778e-06, |
|
"loss": 0.0015, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.4964710024786354e-06, |
|
"loss": 0.0009, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.461832938081059e-06, |
|
"loss": 0.0013, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.427276219241933e-06, |
|
"loss": 0.0008, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.3928026734841935e-06, |
|
"loss": 0.0006, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.3584141239321953e-06, |
|
"loss": 0.0008, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.3241123892152925e-06, |
|
"loss": 0.0009, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.289899283371657e-06, |
|
"loss": 0.0011, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.2557766157523467e-06, |
|
"loss": 0.0007, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.2217461909256186e-06, |
|
"loss": 0.0007, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.1878098085814926e-06, |
|
"loss": 0.0016, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.1539692634365788e-06, |
|
"loss": 0.0007, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.1202263451391603e-06, |
|
"loss": 0.0011, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.0865828381745515e-06, |
|
"loss": 0.0013, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.053040521770726e-06, |
|
"loss": 0.0009, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.0007, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.986266550706315e-06, |
|
"loss": 0.001, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9530384273695373e-06, |
|
"loss": 0.0008, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.9199185570544054e-06, |
|
"loss": 0.0009, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.886908691296504e-06, |
|
"loss": 0.0007, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.854010575813856e-06, |
|
"loss": 0.001, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8212259504146045e-06, |
|
"loss": 0.0011, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7885565489049948e-06, |
|
"loss": 0.0009, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7560040989976894e-06, |
|
"loss": 0.0009, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.723570322220399e-06, |
|
"loss": 0.0009, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6912569338248317e-06, |
|
"loss": 0.0012, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6590656426959906e-06, |
|
"loss": 0.001, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.626998151261798e-06, |
|
"loss": 0.0011, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.595056155403063e-06, |
|
"loss": 0.0007, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.5632413443637887e-06, |
|
"loss": 0.001, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.5315554006618487e-06, |
|
"loss": 0.0012, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.0009, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.4685768111772647e-06, |
|
"loss": 0.001, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.437287496000674e-06, |
|
"loss": 0.0011, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.406133709197392e-06, |
|
"loss": 0.0007, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3751170983272e-06, |
|
"loss": 0.001, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3442393036953614e-06, |
|
"loss": 0.0006, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3135019582658803e-06, |
|
"loss": 0.0007, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.282906687575151e-06, |
|
"loss": 0.0016, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.2524551096459703e-06, |
|
"loss": 0.0013, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.2221488349019903e-06, |
|
"loss": 0.0013, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.1919894660825362e-06, |
|
"loss": 0.0009, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.161978598157857e-06, |
|
"loss": 0.0007, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.132117818244771e-06, |
|
"loss": 0.0013, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.102408705522725e-06, |
|
"loss": 0.0009, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.0728528311502977e-06, |
|
"loss": 0.0009, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.0434517581820893e-06, |
|
"loss": 0.0008, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.0142070414860704e-06, |
|
"loss": 0.001, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9851202276613524e-06, |
|
"loss": 0.0008, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.956192854956397e-06, |
|
"loss": 0.0008, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.927426453187663e-06, |
|
"loss": 0.0009, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8988225436587005e-06, |
|
"loss": 0.0008, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8703826390797047e-06, |
|
"loss": 0.0006, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8421082434875133e-06, |
|
"loss": 0.0014, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.814000852166059e-06, |
|
"loss": 0.001, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7860619515673034e-06, |
|
"loss": 0.0005, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7582930192326187e-06, |
|
"loss": 0.0011, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7306955237146523e-06, |
|
"loss": 0.0015, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7032709244996559e-06, |
|
"loss": 0.0007, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.6760206719303107e-06, |
|
"loss": 0.0011, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.6489462071290213e-06, |
|
"loss": 0.0004, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.6220489619216988e-06, |
|
"loss": 0.001, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5953303587620472e-06, |
|
"loss": 0.0011, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5687918106563326e-06, |
|
"loss": 0.0009, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5424347210886538e-06, |
|
"loss": 0.0005, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5162604839467265e-06, |
|
"loss": 0.0013, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.4902704834481585e-06, |
|
"loss": 0.0011, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.0006, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.4388486804623464e-06, |
|
"loss": 0.0007, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.4134195974035525e-06, |
|
"loss": 0.0006, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.3881801897012225e-06, |
|
"loss": 0.0014, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.3631317921347564e-06, |
|
"loss": 0.0012, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.3382757293820408e-06, |
|
"loss": 0.0012, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.3136133159493803e-06, |
|
"loss": 0.0008, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2891458561019914e-06, |
|
"loss": 0.0006, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.264874643795021e-06, |
|
"loss": 0.0009, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2408009626051137e-06, |
|
"loss": 0.0013, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2169260856625358e-06, |
|
"loss": 0.0011, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.1932512755838448e-06, |
|
"loss": 0.001, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.0009, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.1465068535157098e-06, |
|
"loss": 0.0012, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.1234397135926705e-06, |
|
"loss": 0.0011, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.100577584535592e-06, |
|
"loss": 0.001, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.0779216754021215e-06, |
|
"loss": 0.001, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.0554731843440275e-06, |
|
"loss": 0.0013, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.0332332985438248e-06, |
|
"loss": 0.0007, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.0112031941519934e-06, |
|
"loss": 0.0013, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.893840362247809e-07, |
|
"loss": 0.0008, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.677769786625869e-07, |
|
"loss": 0.0011, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.463831641489391e-07, |
|
"loss": 0.0009, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.252037240900618e-07, |
|
"loss": 0.0012, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.042397785550405e-07, |
|
"loss": 0.0006, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.834924362165992e-07, |
|
"loss": 0.0008, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.629627942924473e-07, |
|
"loss": 0.0012, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.426519384872733e-07, |
|
"loss": 0.0008, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.225609429353187e-07, |
|
"loss": 0.0006, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.026908701435681e-07, |
|
"loss": 0.0005, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.830427709355726e-07, |
|
"loss": 0.0014, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.636176843958599e-07, |
|
"loss": 0.0009, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.444166378150014e-07, |
|
"loss": 0.0007, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.254406466352682e-07, |
|
"loss": 0.0008, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.066907143969353e-07, |
|
"loss": 0.0011, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.881678326852137e-07, |
|
"loss": 0.0008, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.001, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.518071270931059e-07, |
|
"loss": 0.0007, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.339712261390213e-07, |
|
"loss": 0.0011, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.163662214624616e-07, |
|
"loss": 0.0011, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.989930440994451e-07, |
|
"loss": 0.0008, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.818526128258622e-07, |
|
"loss": 0.0009, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.649458341088915e-07, |
|
"loss": 0.0011, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.482736020590551e-07, |
|
"loss": 0.0005, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.318367983829393e-07, |
|
"loss": 0.0012, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.188574484306829e-07, |
|
"loss": 0.0016, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 5.028465979562792e-07, |
|
"loss": 0.0005, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.87073578250698e-07, |
|
"loss": 0.001, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7153922346591554e-07, |
|
"loss": 0.0009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.562443551321788e-07, |
|
"loss": 0.001, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.4118978211455723e-07, |
|
"loss": 0.0008, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.263763005701649e-07, |
|
"loss": 0.0009, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.118046939060566e-07, |
|
"loss": 0.0004, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9747573273779816e-07, |
|
"loss": 0.0014, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.833901748487151e-07, |
|
"loss": 0.0012, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.6954876514981084e-07, |
|
"loss": 0.0013, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.5595223564037884e-07, |
|
"loss": 0.0011, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.426013053692878e-07, |
|
"loss": 0.0009, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.294966803969574e-07, |
|
"loss": 0.0009, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.166390537580122e-07, |
|
"loss": 0.0008, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.0402910542463915e-07, |
|
"loss": 0.0009, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.916675022706239e-07, |
|
"loss": 0.0013, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7955489803607907e-07, |
|
"loss": 0.0013, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.676919332928785e-07, |
|
"loss": 0.001, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.560792354107777e-07, |
|
"loss": 0.0009, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.447174185242324e-07, |
|
"loss": 0.0011, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.33607083499926e-07, |
|
"loss": 0.0009, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2274881790498914e-07, |
|
"loss": 0.0007, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.1214319597592792e-07, |
|
"loss": 0.0008, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.0179077858825445e-07, |
|
"loss": 0.001, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.916921132268229e-07, |
|
"loss": 0.0015, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8184773395688527e-07, |
|
"loss": 0.0009, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7225816139583407e-07, |
|
"loss": 0.0009, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.6292390268568103e-07, |
|
"loss": 0.001, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.5384545146622854e-07, |
|
"loss": 0.0008, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.450232878489699e-07, |
|
"loss": 0.0008, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.3645787839169755e-07, |
|
"loss": 0.001, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.2814967607382433e-07, |
|
"loss": 0.0011, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.2009912027243386e-07, |
|
"loss": 0.0009, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.123066367390424e-07, |
|
"loss": 0.001, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0477263757708078e-07, |
|
"loss": 0.0011, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.749752122010347e-08, |
|
"loss": 0.0006, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.048167241071548e-08, |
|
"loss": 0.0006, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.372546218022747e-08, |
|
"loss": 0.0007, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.722924782902985e-08, |
|
"loss": 0.0015, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.09933729077017e-08, |
|
"loss": 0.0008, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.501816719884091e-08, |
|
"loss": 0.0006, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.9303946699620365e-08, |
|
"loss": 0.0011, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.3851013605080717e-08, |
|
"loss": 0.0009, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.865965629214819e-08, |
|
"loss": 0.001, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.37301493043818e-08, |
|
"loss": 0.0012, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.9062753337454354e-08, |
|
"loss": 0.0016, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.465771522536854e-08, |
|
"loss": 0.0014, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0515267927400116e-08, |
|
"loss": 0.0006, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6635630515779996e-08, |
|
"loss": 0.0012, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.301900816410574e-08, |
|
"loss": 0.0009, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.966559213649577e-08, |
|
"loss": 0.0007, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.657555977746972e-08, |
|
"loss": 0.0015, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.3749074502572012e-08, |
|
"loss": 0.0009, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.1186285789728247e-08, |
|
"loss": 0.0006, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.887329171343717e-09, |
|
"loss": 0.001, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.852326227130835e-09, |
|
"loss": 0.001, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.08138457768148e-09, |
|
"loss": 0.0011, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.574597878777675e-09, |
|
"loss": 0.0011, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.3320458164355352e-09, |
|
"loss": 0.0006, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.3537941026914302e-09, |
|
"loss": 0.0006, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.398944721297539e-10, |
|
"loss": 0.0012, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.903846791434516e-10, |
|
"loss": 0.0007, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.288495938948757e-12, |
|
"loss": 0.001, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"step": 2400, |
|
"total_flos": 3.3948129315510026e+18, |
|
"train_loss": 0.025274057275091764, |
|
"train_runtime": 26505.156, |
|
"train_samples_per_second": 1.449, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 3.3948129315510026e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|