zephyr-7b-pl-qlora / trainer_state.json
sengi's picture
Model save
1b7e55e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2754030638590854,
"eval_steps": 500,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.166666666666667e-08,
"loss": 0.6931,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 2.0833333333333333e-07,
"loss": 0.6932,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.6926,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 6.25e-07,
"loss": 0.6907,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 8.333333333333333e-07,
"loss": 0.6867,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.0416666666666667e-06,
"loss": 0.6798,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 1.25e-06,
"loss": 0.6683,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 1.4583333333333335e-06,
"loss": 0.6532,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.635,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 1.8750000000000003e-06,
"loss": 0.6108,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.5844,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 2.2916666666666666e-06,
"loss": 0.5544,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 2.5e-06,
"loss": 0.5227,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 2.7083333333333334e-06,
"loss": 0.4867,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 2.916666666666667e-06,
"loss": 0.4471,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 3.125e-06,
"loss": 0.4093,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.3671,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 3.5416666666666673e-06,
"loss": 0.3131,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.267,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 3.958333333333333e-06,
"loss": 0.2313,
"step": 95
},
{
"epoch": 0.01,
"learning_rate": 4.166666666666667e-06,
"loss": 0.1819,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 4.3750000000000005e-06,
"loss": 0.142,
"step": 105
},
{
"epoch": 0.01,
"learning_rate": 4.583333333333333e-06,
"loss": 0.1105,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 4.791666666666668e-06,
"loss": 0.0886,
"step": 115
},
{
"epoch": 0.01,
"learning_rate": 5e-06,
"loss": 0.0729,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 5.208333333333334e-06,
"loss": 0.0656,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 5.416666666666667e-06,
"loss": 0.0502,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 5.625e-06,
"loss": 0.0511,
"step": 135
},
{
"epoch": 0.02,
"learning_rate": 5.833333333333334e-06,
"loss": 0.0404,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 6.041666666666667e-06,
"loss": 0.0422,
"step": 145
},
{
"epoch": 0.02,
"learning_rate": 6.25e-06,
"loss": 0.0345,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 6.458333333333334e-06,
"loss": 0.0282,
"step": 155
},
{
"epoch": 0.02,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0292,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 6.875e-06,
"loss": 0.0279,
"step": 165
},
{
"epoch": 0.02,
"learning_rate": 7.083333333333335e-06,
"loss": 0.0236,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 7.291666666666667e-06,
"loss": 0.0219,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0223,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 7.708333333333334e-06,
"loss": 0.0225,
"step": 185
},
{
"epoch": 0.02,
"learning_rate": 7.916666666666667e-06,
"loss": 0.0175,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 8.125000000000001e-06,
"loss": 0.0152,
"step": 195
},
{
"epoch": 0.02,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0153,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 8.541666666666666e-06,
"loss": 0.0132,
"step": 205
},
{
"epoch": 0.02,
"learning_rate": 8.750000000000001e-06,
"loss": 0.0134,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 8.958333333333334e-06,
"loss": 0.0144,
"step": 215
},
{
"epoch": 0.03,
"learning_rate": 9.166666666666666e-06,
"loss": 0.0123,
"step": 220
},
{
"epoch": 0.03,
"learning_rate": 9.375000000000001e-06,
"loss": 0.0115,
"step": 225
},
{
"epoch": 0.03,
"learning_rate": 9.583333333333335e-06,
"loss": 0.0123,
"step": 230
},
{
"epoch": 0.03,
"learning_rate": 9.791666666666666e-06,
"loss": 0.0091,
"step": 235
},
{
"epoch": 0.03,
"learning_rate": 1e-05,
"loss": 0.0087,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 9.999867788160888e-06,
"loss": 0.0095,
"step": 245
},
{
"epoch": 0.03,
"learning_rate": 9.999471159635538e-06,
"loss": 0.0089,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 9.998810135399545e-06,
"loss": 0.0075,
"step": 255
},
{
"epoch": 0.03,
"learning_rate": 9.997884750411004e-06,
"loss": 0.0082,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 9.996695053608651e-06,
"loss": 0.007,
"step": 265
},
{
"epoch": 0.03,
"learning_rate": 9.99524110790929e-06,
"loss": 0.0067,
"step": 270
},
{
"epoch": 0.03,
"learning_rate": 9.993522990204453e-06,
"loss": 0.0075,
"step": 275
},
{
"epoch": 0.03,
"learning_rate": 9.991540791356342e-06,
"loss": 0.0058,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 9.989294616193018e-06,
"loss": 0.0066,
"step": 285
},
{
"epoch": 0.03,
"learning_rate": 9.986784583502863e-06,
"loss": 0.0049,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 9.984010826028289e-06,
"loss": 0.0067,
"step": 295
},
{
"epoch": 0.03,
"learning_rate": 9.980973490458728e-06,
"loss": 0.0055,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.97767273742287e-06,
"loss": 0.0069,
"step": 305
},
{
"epoch": 0.04,
"learning_rate": 9.974108741480167e-06,
"loss": 0.0067,
"step": 310
},
{
"epoch": 0.04,
"learning_rate": 9.970281691111598e-06,
"loss": 0.0051,
"step": 315
},
{
"epoch": 0.04,
"learning_rate": 9.966191788709716e-06,
"loss": 0.0053,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 9.961839250567925e-06,
"loss": 0.0051,
"step": 325
},
{
"epoch": 0.04,
"learning_rate": 9.957224306869053e-06,
"loss": 0.0047,
"step": 330
},
{
"epoch": 0.04,
"learning_rate": 9.952347201673181e-06,
"loss": 0.0053,
"step": 335
},
{
"epoch": 0.04,
"learning_rate": 9.947208192904722e-06,
"loss": 0.0052,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 9.941807552338805e-06,
"loss": 0.0052,
"step": 345
},
{
"epoch": 0.04,
"learning_rate": 9.936145565586871e-06,
"loss": 0.0046,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 9.930222532081597e-06,
"loss": 0.0048,
"step": 355
},
{
"epoch": 0.04,
"learning_rate": 9.924038765061042e-06,
"loss": 0.005,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 9.91759459155209e-06,
"loss": 0.0034,
"step": 365
},
{
"epoch": 0.04,
"learning_rate": 9.910890352353154e-06,
"loss": 0.0042,
"step": 370
},
{
"epoch": 0.04,
"learning_rate": 9.903926402016153e-06,
"loss": 0.0045,
"step": 375
},
{
"epoch": 0.04,
"learning_rate": 9.896703108827758e-06,
"loss": 0.0037,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 9.88922085478992e-06,
"loss": 0.0043,
"step": 385
},
{
"epoch": 0.04,
"learning_rate": 9.881480035599667e-06,
"loss": 0.0036,
"step": 390
},
{
"epoch": 0.05,
"learning_rate": 9.873481060628175e-06,
"loss": 0.0034,
"step": 395
},
{
"epoch": 0.05,
"learning_rate": 9.86522435289912e-06,
"loss": 0.004,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 9.856710349066307e-06,
"loss": 0.005,
"step": 405
},
{
"epoch": 0.05,
"learning_rate": 9.847939499390581e-06,
"loss": 0.0042,
"step": 410
},
{
"epoch": 0.05,
"learning_rate": 9.838912267716006e-06,
"loss": 0.0033,
"step": 415
},
{
"epoch": 0.05,
"learning_rate": 9.829629131445342e-06,
"loss": 0.0032,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 9.820090581514799e-06,
"loss": 0.0034,
"step": 425
},
{
"epoch": 0.05,
"learning_rate": 9.810297122368067e-06,
"loss": 0.0043,
"step": 430
},
{
"epoch": 0.05,
"learning_rate": 9.800249271929645e-06,
"loss": 0.004,
"step": 435
},
{
"epoch": 0.05,
"learning_rate": 9.789947561577445e-06,
"loss": 0.0036,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 9.779392536114698e-06,
"loss": 0.0029,
"step": 445
},
{
"epoch": 0.05,
"learning_rate": 9.768584753741134e-06,
"loss": 0.004,
"step": 450
},
{
"epoch": 0.05,
"learning_rate": 9.75752478602347e-06,
"loss": 0.0035,
"step": 455
},
{
"epoch": 0.05,
"learning_rate": 9.74621321786517e-06,
"loss": 0.0026,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 9.73465064747553e-06,
"loss": 0.0024,
"step": 465
},
{
"epoch": 0.05,
"learning_rate": 9.722837686338025e-06,
"loss": 0.0027,
"step": 470
},
{
"epoch": 0.05,
"learning_rate": 9.710774959177983e-06,
"loss": 0.0031,
"step": 475
},
{
"epoch": 0.06,
"learning_rate": 9.698463103929542e-06,
"loss": 0.0034,
"step": 480
},
{
"epoch": 0.06,
"learning_rate": 9.685902771701913e-06,
"loss": 0.0044,
"step": 485
},
{
"epoch": 0.06,
"learning_rate": 9.673094626744944e-06,
"loss": 0.0028,
"step": 490
},
{
"epoch": 0.06,
"learning_rate": 9.660039346413994e-06,
"loss": 0.003,
"step": 495
},
{
"epoch": 0.06,
"learning_rate": 9.646737621134112e-06,
"loss": 0.0027,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 9.633190154363527e-06,
"loss": 0.0042,
"step": 505
},
{
"epoch": 0.06,
"learning_rate": 9.619397662556434e-06,
"loss": 0.0024,
"step": 510
},
{
"epoch": 0.06,
"learning_rate": 9.605360875125119e-06,
"loss": 0.0023,
"step": 515
},
{
"epoch": 0.06,
"learning_rate": 9.591080534401371e-06,
"loss": 0.0034,
"step": 520
},
{
"epoch": 0.06,
"learning_rate": 9.576557395597237e-06,
"loss": 0.002,
"step": 525
},
{
"epoch": 0.06,
"learning_rate": 9.561792226765072e-06,
"loss": 0.0034,
"step": 530
},
{
"epoch": 0.06,
"learning_rate": 9.546785808756926e-06,
"loss": 0.0028,
"step": 535
},
{
"epoch": 0.06,
"learning_rate": 9.531538935183252e-06,
"loss": 0.0033,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 9.516052412370922e-06,
"loss": 0.0028,
"step": 545
},
{
"epoch": 0.06,
"learning_rate": 9.500327059320606e-06,
"loss": 0.0032,
"step": 550
},
{
"epoch": 0.06,
"learning_rate": 9.484363707663443e-06,
"loss": 0.0035,
"step": 555
},
{
"epoch": 0.06,
"learning_rate": 9.468163201617063e-06,
"loss": 0.0029,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 9.451726397940946e-06,
"loss": 0.0032,
"step": 565
},
{
"epoch": 0.07,
"learning_rate": 9.43505416589111e-06,
"loss": 0.0028,
"step": 570
},
{
"epoch": 0.07,
"learning_rate": 9.41814738717414e-06,
"loss": 0.0021,
"step": 575
},
{
"epoch": 0.07,
"learning_rate": 9.401006955900555e-06,
"loss": 0.002,
"step": 580
},
{
"epoch": 0.07,
"learning_rate": 9.38363377853754e-06,
"loss": 0.0023,
"step": 585
},
{
"epoch": 0.07,
"learning_rate": 9.366028773860981e-06,
"loss": 0.0024,
"step": 590
},
{
"epoch": 0.07,
"learning_rate": 9.348192872906896e-06,
"loss": 0.0029,
"step": 595
},
{
"epoch": 0.07,
"learning_rate": 9.330127018922195e-06,
"loss": 0.0014,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 9.311832167314788e-06,
"loss": 0.0021,
"step": 605
},
{
"epoch": 0.07,
"learning_rate": 9.293309285603066e-06,
"loss": 0.0016,
"step": 610
},
{
"epoch": 0.07,
"learning_rate": 9.274559353364734e-06,
"loss": 0.0021,
"step": 615
},
{
"epoch": 0.07,
"learning_rate": 9.255583362184998e-06,
"loss": 0.0025,
"step": 620
},
{
"epoch": 0.07,
"learning_rate": 9.23638231560414e-06,
"loss": 0.0021,
"step": 625
},
{
"epoch": 0.07,
"learning_rate": 9.21695722906443e-06,
"loss": 0.0026,
"step": 630
},
{
"epoch": 0.07,
"learning_rate": 9.197309129856433e-06,
"loss": 0.0026,
"step": 635
},
{
"epoch": 0.07,
"learning_rate": 9.177439057064684e-06,
"loss": 0.0026,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 9.157348061512728e-06,
"loss": 0.0027,
"step": 645
},
{
"epoch": 0.07,
"learning_rate": 9.137037205707552e-06,
"loss": 0.0015,
"step": 650
},
{
"epoch": 0.08,
"learning_rate": 9.116507563783402e-06,
"loss": 0.0015,
"step": 655
},
{
"epoch": 0.08,
"learning_rate": 9.09576022144496e-06,
"loss": 0.0019,
"step": 660
},
{
"epoch": 0.08,
"learning_rate": 9.074796275909941e-06,
"loss": 0.0024,
"step": 665
},
{
"epoch": 0.08,
"learning_rate": 9.053616835851062e-06,
"loss": 0.0025,
"step": 670
},
{
"epoch": 0.08,
"learning_rate": 9.032223021337415e-06,
"loss": 0.0021,
"step": 675
},
{
"epoch": 0.08,
"learning_rate": 9.01061596377522e-06,
"loss": 0.0021,
"step": 680
},
{
"epoch": 0.08,
"learning_rate": 8.988796805848008e-06,
"loss": 0.0022,
"step": 685
},
{
"epoch": 0.08,
"learning_rate": 8.966766701456177e-06,
"loss": 0.0025,
"step": 690
},
{
"epoch": 0.08,
"learning_rate": 8.944526815655974e-06,
"loss": 0.0019,
"step": 695
},
{
"epoch": 0.08,
"learning_rate": 8.92207832459788e-06,
"loss": 0.0026,
"step": 700
},
{
"epoch": 0.08,
"learning_rate": 8.899422415464409e-06,
"loss": 0.0019,
"step": 705
},
{
"epoch": 0.08,
"learning_rate": 8.87656028640733e-06,
"loss": 0.0019,
"step": 710
},
{
"epoch": 0.08,
"learning_rate": 8.85349314648429e-06,
"loss": 0.0015,
"step": 715
},
{
"epoch": 0.08,
"learning_rate": 8.83022221559489e-06,
"loss": 0.0017,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 8.806748724416156e-06,
"loss": 0.002,
"step": 725
},
{
"epoch": 0.08,
"learning_rate": 8.783073914337465e-06,
"loss": 0.0023,
"step": 730
},
{
"epoch": 0.08,
"learning_rate": 8.759199037394888e-06,
"loss": 0.0019,
"step": 735
},
{
"epoch": 0.08,
"learning_rate": 8.735125356204982e-06,
"loss": 0.0018,
"step": 740
},
{
"epoch": 0.09,
"learning_rate": 8.710854143898008e-06,
"loss": 0.0018,
"step": 745
},
{
"epoch": 0.09,
"learning_rate": 8.68638668405062e-06,
"loss": 0.0017,
"step": 750
},
{
"epoch": 0.09,
"learning_rate": 8.661724270617961e-06,
"loss": 0.002,
"step": 755
},
{
"epoch": 0.09,
"learning_rate": 8.636868207865244e-06,
"loss": 0.0023,
"step": 760
},
{
"epoch": 0.09,
"learning_rate": 8.611819810298778e-06,
"loss": 0.0019,
"step": 765
},
{
"epoch": 0.09,
"learning_rate": 8.586580402596448e-06,
"loss": 0.0017,
"step": 770
},
{
"epoch": 0.09,
"learning_rate": 8.561151319537656e-06,
"loss": 0.0018,
"step": 775
},
{
"epoch": 0.09,
"learning_rate": 8.535533905932739e-06,
"loss": 0.0019,
"step": 780
},
{
"epoch": 0.09,
"learning_rate": 8.509729516551842e-06,
"loss": 0.0022,
"step": 785
},
{
"epoch": 0.09,
"learning_rate": 8.483739516053276e-06,
"loss": 0.0015,
"step": 790
},
{
"epoch": 0.09,
"learning_rate": 8.457565278911349e-06,
"loss": 0.0017,
"step": 795
},
{
"epoch": 0.09,
"learning_rate": 8.43120818934367e-06,
"loss": 0.0024,
"step": 800
},
{
"epoch": 0.09,
"learning_rate": 8.404669641237952e-06,
"loss": 0.0013,
"step": 805
},
{
"epoch": 0.09,
"learning_rate": 8.377951038078303e-06,
"loss": 0.0017,
"step": 810
},
{
"epoch": 0.09,
"learning_rate": 8.35105379287098e-06,
"loss": 0.002,
"step": 815
},
{
"epoch": 0.09,
"learning_rate": 8.323979328069689e-06,
"loss": 0.0015,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 8.296729075500345e-06,
"loss": 0.0026,
"step": 825
},
{
"epoch": 0.1,
"learning_rate": 8.269304476285349e-06,
"loss": 0.0017,
"step": 830
},
{
"epoch": 0.1,
"learning_rate": 8.241706980767382e-06,
"loss": 0.0017,
"step": 835
},
{
"epoch": 0.1,
"learning_rate": 8.213938048432697e-06,
"loss": 0.0011,
"step": 840
},
{
"epoch": 0.1,
"learning_rate": 8.185999147833943e-06,
"loss": 0.0013,
"step": 845
},
{
"epoch": 0.1,
"learning_rate": 8.157891756512488e-06,
"loss": 0.0016,
"step": 850
},
{
"epoch": 0.1,
"learning_rate": 8.129617360920297e-06,
"loss": 0.0017,
"step": 855
},
{
"epoch": 0.1,
"learning_rate": 8.101177456341301e-06,
"loss": 0.0018,
"step": 860
},
{
"epoch": 0.1,
"learning_rate": 8.072573546812338e-06,
"loss": 0.0012,
"step": 865
},
{
"epoch": 0.1,
"learning_rate": 8.043807145043604e-06,
"loss": 0.002,
"step": 870
},
{
"epoch": 0.1,
"learning_rate": 8.014879772338649e-06,
"loss": 0.0012,
"step": 875
},
{
"epoch": 0.1,
"learning_rate": 7.985792958513932e-06,
"loss": 0.0017,
"step": 880
},
{
"epoch": 0.1,
"learning_rate": 7.956548241817914e-06,
"loss": 0.0015,
"step": 885
},
{
"epoch": 0.1,
"learning_rate": 7.927147168849704e-06,
"loss": 0.0013,
"step": 890
},
{
"epoch": 0.1,
"learning_rate": 7.897591294477276e-06,
"loss": 0.0016,
"step": 895
},
{
"epoch": 0.1,
"learning_rate": 7.86788218175523e-06,
"loss": 0.0014,
"step": 900
},
{
"epoch": 0.1,
"learning_rate": 7.838021401842145e-06,
"loss": 0.0013,
"step": 905
},
{
"epoch": 0.1,
"learning_rate": 7.808010533917465e-06,
"loss": 0.0014,
"step": 910
},
{
"epoch": 0.1,
"learning_rate": 7.777851165098012e-06,
"loss": 0.0017,
"step": 915
},
{
"epoch": 0.11,
"learning_rate": 7.747544890354031e-06,
"loss": 0.0017,
"step": 920
},
{
"epoch": 0.11,
"learning_rate": 7.71709331242485e-06,
"loss": 0.0014,
"step": 925
},
{
"epoch": 0.11,
"learning_rate": 7.686498041734121e-06,
"loss": 0.0019,
"step": 930
},
{
"epoch": 0.11,
"learning_rate": 7.655760696304642e-06,
"loss": 0.0012,
"step": 935
},
{
"epoch": 0.11,
"learning_rate": 7.624882901672801e-06,
"loss": 0.0017,
"step": 940
},
{
"epoch": 0.11,
"learning_rate": 7.593866290802608e-06,
"loss": 0.0012,
"step": 945
},
{
"epoch": 0.11,
"learning_rate": 7.562712503999327e-06,
"loss": 0.0009,
"step": 950
},
{
"epoch": 0.11,
"learning_rate": 7.531423188822738e-06,
"loss": 0.0014,
"step": 955
},
{
"epoch": 0.11,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0014,
"step": 960
},
{
"epoch": 0.11,
"learning_rate": 7.468444599338152e-06,
"loss": 0.0012,
"step": 965
},
{
"epoch": 0.11,
"learning_rate": 7.4367586556362125e-06,
"loss": 0.0011,
"step": 970
},
{
"epoch": 0.11,
"learning_rate": 7.404943844596939e-06,
"loss": 0.0011,
"step": 975
},
{
"epoch": 0.11,
"learning_rate": 7.373001848738203e-06,
"loss": 0.0015,
"step": 980
},
{
"epoch": 0.11,
"learning_rate": 7.340934357304011e-06,
"loss": 0.002,
"step": 985
},
{
"epoch": 0.11,
"learning_rate": 7.308743066175172e-06,
"loss": 0.0012,
"step": 990
},
{
"epoch": 0.11,
"learning_rate": 7.276429677779603e-06,
"loss": 0.0016,
"step": 995
},
{
"epoch": 0.11,
"learning_rate": 7.243995901002312e-06,
"loss": 0.0019,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 7.211443451095007e-06,
"loss": 0.0013,
"step": 1005
},
{
"epoch": 0.12,
"learning_rate": 7.178774049585397e-06,
"loss": 0.0014,
"step": 1010
},
{
"epoch": 0.12,
"learning_rate": 7.145989424186146e-06,
"loss": 0.0021,
"step": 1015
},
{
"epoch": 0.12,
"learning_rate": 7.113091308703498e-06,
"loss": 0.0012,
"step": 1020
},
{
"epoch": 0.12,
"learning_rate": 7.080081442945597e-06,
"loss": 0.0014,
"step": 1025
},
{
"epoch": 0.12,
"learning_rate": 7.046961572630463e-06,
"loss": 0.0013,
"step": 1030
},
{
"epoch": 0.12,
"learning_rate": 7.0137334492936875e-06,
"loss": 0.0014,
"step": 1035
},
{
"epoch": 0.12,
"learning_rate": 6.980398830195785e-06,
"loss": 0.0014,
"step": 1040
},
{
"epoch": 0.12,
"learning_rate": 6.946959478229277e-06,
"loss": 0.0013,
"step": 1045
},
{
"epoch": 0.12,
"learning_rate": 6.913417161825449e-06,
"loss": 0.001,
"step": 1050
},
{
"epoch": 0.12,
"learning_rate": 6.8797736548608405e-06,
"loss": 0.0019,
"step": 1055
},
{
"epoch": 0.12,
"learning_rate": 6.8460307365634225e-06,
"loss": 0.0014,
"step": 1060
},
{
"epoch": 0.12,
"learning_rate": 6.812190191418508e-06,
"loss": 0.0013,
"step": 1065
},
{
"epoch": 0.12,
"learning_rate": 6.778253809074384e-06,
"loss": 0.0009,
"step": 1070
},
{
"epoch": 0.12,
"learning_rate": 6.7442233842476545e-06,
"loss": 0.0015,
"step": 1075
},
{
"epoch": 0.12,
"learning_rate": 6.710100716628345e-06,
"loss": 0.0009,
"step": 1080
},
{
"epoch": 0.12,
"learning_rate": 6.675887610784708e-06,
"loss": 0.0014,
"step": 1085
},
{
"epoch": 0.13,
"learning_rate": 6.641585876067807e-06,
"loss": 0.0015,
"step": 1090
},
{
"epoch": 0.13,
"learning_rate": 6.607197326515808e-06,
"loss": 0.001,
"step": 1095
},
{
"epoch": 0.13,
"learning_rate": 6.572723780758069e-06,
"loss": 0.0008,
"step": 1100
},
{
"epoch": 0.13,
"learning_rate": 6.538167061918942e-06,
"loss": 0.0011,
"step": 1105
},
{
"epoch": 0.13,
"learning_rate": 6.503528997521365e-06,
"loss": 0.0017,
"step": 1110
},
{
"epoch": 0.13,
"learning_rate": 6.468811419390222e-06,
"loss": 0.0016,
"step": 1115
},
{
"epoch": 0.13,
"learning_rate": 6.434016163555452e-06,
"loss": 0.0012,
"step": 1120
},
{
"epoch": 0.13,
"learning_rate": 6.399145070154962e-06,
"loss": 0.0014,
"step": 1125
},
{
"epoch": 0.13,
"learning_rate": 6.364199983337306e-06,
"loss": 0.0012,
"step": 1130
},
{
"epoch": 0.13,
"learning_rate": 6.329182751164164e-06,
"loss": 0.0013,
"step": 1135
},
{
"epoch": 0.13,
"learning_rate": 6.294095225512604e-06,
"loss": 0.0011,
"step": 1140
},
{
"epoch": 0.13,
"learning_rate": 6.2589392619771435e-06,
"loss": 0.0012,
"step": 1145
},
{
"epoch": 0.13,
"learning_rate": 6.2237167197716195e-06,
"loss": 0.0011,
"step": 1150
},
{
"epoch": 0.13,
"learning_rate": 6.188429461630866e-06,
"loss": 0.0012,
"step": 1155
},
{
"epoch": 0.13,
"learning_rate": 6.153079353712201e-06,
"loss": 0.0016,
"step": 1160
},
{
"epoch": 0.13,
"learning_rate": 6.117668265496738e-06,
"loss": 0.0009,
"step": 1165
},
{
"epoch": 0.13,
"learning_rate": 6.0821980696905145e-06,
"loss": 0.0016,
"step": 1170
},
{
"epoch": 0.13,
"learning_rate": 6.046670642125461e-06,
"loss": 0.0009,
"step": 1175
},
{
"epoch": 0.14,
"learning_rate": 6.011087861660191e-06,
"loss": 0.0015,
"step": 1180
},
{
"epoch": 0.14,
"learning_rate": 5.975451610080643e-06,
"loss": 0.0015,
"step": 1185
},
{
"epoch": 0.14,
"learning_rate": 5.93976377200056e-06,
"loss": 0.0013,
"step": 1190
},
{
"epoch": 0.14,
"learning_rate": 5.904026234761827e-06,
"loss": 0.001,
"step": 1195
},
{
"epoch": 0.14,
"learning_rate": 5.8682408883346535e-06,
"loss": 0.0013,
"step": 1200
},
{
"epoch": 0.14,
"learning_rate": 5.832409625217623e-06,
"loss": 0.0014,
"step": 1205
},
{
"epoch": 0.14,
"learning_rate": 5.796534340337614e-06,
"loss": 0.0017,
"step": 1210
},
{
"epoch": 0.14,
"learning_rate": 5.760616930949584e-06,
"loss": 0.0015,
"step": 1215
},
{
"epoch": 0.14,
"learning_rate": 5.724659296536234e-06,
"loss": 0.001,
"step": 1220
},
{
"epoch": 0.14,
"learning_rate": 5.688663338707554e-06,
"loss": 0.0013,
"step": 1225
},
{
"epoch": 0.14,
"learning_rate": 5.65263096110026e-06,
"loss": 0.001,
"step": 1230
},
{
"epoch": 0.14,
"learning_rate": 5.616564069277111e-06,
"loss": 0.0012,
"step": 1235
},
{
"epoch": 0.14,
"learning_rate": 5.5804645706261515e-06,
"loss": 0.0021,
"step": 1240
},
{
"epoch": 0.14,
"learning_rate": 5.544334374259823e-06,
"loss": 0.0012,
"step": 1245
},
{
"epoch": 0.14,
"learning_rate": 5.50817539091401e-06,
"loss": 0.0015,
"step": 1250
},
{
"epoch": 0.14,
"learning_rate": 5.471989532846987e-06,
"loss": 0.0008,
"step": 1255
},
{
"epoch": 0.14,
"learning_rate": 5.435778713738292e-06,
"loss": 0.0009,
"step": 1260
},
{
"epoch": 0.15,
"learning_rate": 5.3995448485875205e-06,
"loss": 0.0013,
"step": 1265
},
{
"epoch": 0.15,
"learning_rate": 5.363289853613054e-06,
"loss": 0.0012,
"step": 1270
},
{
"epoch": 0.15,
"learning_rate": 5.327015646150716e-06,
"loss": 0.0017,
"step": 1275
},
{
"epoch": 0.15,
"learning_rate": 5.290724144552379e-06,
"loss": 0.0009,
"step": 1280
},
{
"epoch": 0.15,
"learning_rate": 5.254417268084514e-06,
"loss": 0.0009,
"step": 1285
},
{
"epoch": 0.15,
"learning_rate": 5.218096936826681e-06,
"loss": 0.0011,
"step": 1290
},
{
"epoch": 0.15,
"learning_rate": 5.18176507157e-06,
"loss": 0.0009,
"step": 1295
},
{
"epoch": 0.15,
"learning_rate": 5.145423593715558e-06,
"loss": 0.0012,
"step": 1300
},
{
"epoch": 0.15,
"learning_rate": 5.109074425172806e-06,
"loss": 0.0016,
"step": 1305
},
{
"epoch": 0.15,
"learning_rate": 5.072719488257915e-06,
"loss": 0.0014,
"step": 1310
},
{
"epoch": 0.15,
"learning_rate": 5.03636070559211e-06,
"loss": 0.0014,
"step": 1315
},
{
"epoch": 0.15,
"learning_rate": 5e-06,
"loss": 0.001,
"step": 1320
},
{
"epoch": 0.15,
"learning_rate": 4.963639294407893e-06,
"loss": 0.0009,
"step": 1325
},
{
"epoch": 0.15,
"learning_rate": 4.927280511742087e-06,
"loss": 0.0011,
"step": 1330
},
{
"epoch": 0.15,
"learning_rate": 4.890925574827195e-06,
"loss": 0.001,
"step": 1335
},
{
"epoch": 0.15,
"learning_rate": 4.854576406284443e-06,
"loss": 0.0007,
"step": 1340
},
{
"epoch": 0.15,
"learning_rate": 4.818234928430003e-06,
"loss": 0.0009,
"step": 1345
},
{
"epoch": 0.15,
"learning_rate": 4.781903063173321e-06,
"loss": 0.0014,
"step": 1350
},
{
"epoch": 0.16,
"learning_rate": 4.745582731915488e-06,
"loss": 0.0008,
"step": 1355
},
{
"epoch": 0.16,
"learning_rate": 4.7092758554476215e-06,
"loss": 0.0007,
"step": 1360
},
{
"epoch": 0.16,
"learning_rate": 4.672984353849285e-06,
"loss": 0.0019,
"step": 1365
},
{
"epoch": 0.16,
"learning_rate": 4.636710146386948e-06,
"loss": 0.0012,
"step": 1370
},
{
"epoch": 0.16,
"learning_rate": 4.600455151412482e-06,
"loss": 0.0016,
"step": 1375
},
{
"epoch": 0.16,
"learning_rate": 4.564221286261709e-06,
"loss": 0.0014,
"step": 1380
},
{
"epoch": 0.16,
"learning_rate": 4.528010467153015e-06,
"loss": 0.0006,
"step": 1385
},
{
"epoch": 0.16,
"learning_rate": 4.4918246090859905e-06,
"loss": 0.0011,
"step": 1390
},
{
"epoch": 0.16,
"learning_rate": 4.4556656257401786e-06,
"loss": 0.0013,
"step": 1395
},
{
"epoch": 0.16,
"learning_rate": 4.4195354293738484e-06,
"loss": 0.0009,
"step": 1400
},
{
"epoch": 0.16,
"learning_rate": 4.38343593072289e-06,
"loss": 0.0009,
"step": 1405
},
{
"epoch": 0.16,
"learning_rate": 4.347369038899744e-06,
"loss": 0.0012,
"step": 1410
},
{
"epoch": 0.16,
"learning_rate": 4.311336661292447e-06,
"loss": 0.0007,
"step": 1415
},
{
"epoch": 0.16,
"learning_rate": 4.275340703463767e-06,
"loss": 0.0007,
"step": 1420
},
{
"epoch": 0.16,
"learning_rate": 4.239383069050417e-06,
"loss": 0.001,
"step": 1425
},
{
"epoch": 0.16,
"learning_rate": 4.203465659662388e-06,
"loss": 0.001,
"step": 1430
},
{
"epoch": 0.16,
"learning_rate": 4.1675903747823795e-06,
"loss": 0.0009,
"step": 1435
},
{
"epoch": 0.17,
"learning_rate": 4.131759111665349e-06,
"loss": 0.0012,
"step": 1440
},
{
"epoch": 0.17,
"learning_rate": 4.0959737652381745e-06,
"loss": 0.001,
"step": 1445
},
{
"epoch": 0.17,
"learning_rate": 4.060236227999441e-06,
"loss": 0.0012,
"step": 1450
},
{
"epoch": 0.17,
"learning_rate": 4.02454838991936e-06,
"loss": 0.0009,
"step": 1455
},
{
"epoch": 0.17,
"learning_rate": 3.988912138339812e-06,
"loss": 0.0016,
"step": 1460
},
{
"epoch": 0.17,
"learning_rate": 3.95332935787454e-06,
"loss": 0.0013,
"step": 1465
},
{
"epoch": 0.17,
"learning_rate": 3.917801930309486e-06,
"loss": 0.0012,
"step": 1470
},
{
"epoch": 0.17,
"learning_rate": 3.882331734503263e-06,
"loss": 0.0011,
"step": 1475
},
{
"epoch": 0.17,
"learning_rate": 3.8469206462878e-06,
"loss": 0.0011,
"step": 1480
},
{
"epoch": 0.17,
"learning_rate": 3.8115705383691354e-06,
"loss": 0.0012,
"step": 1485
},
{
"epoch": 0.17,
"learning_rate": 3.776283280228381e-06,
"loss": 0.001,
"step": 1490
},
{
"epoch": 0.17,
"learning_rate": 3.741060738022858e-06,
"loss": 0.0006,
"step": 1495
},
{
"epoch": 0.17,
"learning_rate": 3.705904774487396e-06,
"loss": 0.0009,
"step": 1500
},
{
"epoch": 0.17,
"learning_rate": 3.6708172488358364e-06,
"loss": 0.0011,
"step": 1505
},
{
"epoch": 0.17,
"learning_rate": 3.6358000166626966e-06,
"loss": 0.0012,
"step": 1510
},
{
"epoch": 0.17,
"learning_rate": 3.6008549298450403e-06,
"loss": 0.0011,
"step": 1515
},
{
"epoch": 0.17,
"learning_rate": 3.5659838364445505e-06,
"loss": 0.0011,
"step": 1520
},
{
"epoch": 0.17,
"learning_rate": 3.531188580609778e-06,
"loss": 0.0015,
"step": 1525
},
{
"epoch": 0.18,
"learning_rate": 3.4964710024786354e-06,
"loss": 0.0009,
"step": 1530
},
{
"epoch": 0.18,
"learning_rate": 3.461832938081059e-06,
"loss": 0.0013,
"step": 1535
},
{
"epoch": 0.18,
"learning_rate": 3.427276219241933e-06,
"loss": 0.0008,
"step": 1540
},
{
"epoch": 0.18,
"learning_rate": 3.3928026734841935e-06,
"loss": 0.0006,
"step": 1545
},
{
"epoch": 0.18,
"learning_rate": 3.3584141239321953e-06,
"loss": 0.0008,
"step": 1550
},
{
"epoch": 0.18,
"learning_rate": 3.3241123892152925e-06,
"loss": 0.0009,
"step": 1555
},
{
"epoch": 0.18,
"learning_rate": 3.289899283371657e-06,
"loss": 0.0011,
"step": 1560
},
{
"epoch": 0.18,
"learning_rate": 3.2557766157523467e-06,
"loss": 0.0007,
"step": 1565
},
{
"epoch": 0.18,
"learning_rate": 3.2217461909256186e-06,
"loss": 0.0007,
"step": 1570
},
{
"epoch": 0.18,
"learning_rate": 3.1878098085814926e-06,
"loss": 0.0016,
"step": 1575
},
{
"epoch": 0.18,
"learning_rate": 3.1539692634365788e-06,
"loss": 0.0007,
"step": 1580
},
{
"epoch": 0.18,
"learning_rate": 3.1202263451391603e-06,
"loss": 0.0011,
"step": 1585
},
{
"epoch": 0.18,
"learning_rate": 3.0865828381745515e-06,
"loss": 0.0013,
"step": 1590
},
{
"epoch": 0.18,
"learning_rate": 3.053040521770726e-06,
"loss": 0.0009,
"step": 1595
},
{
"epoch": 0.18,
"learning_rate": 3.019601169804216e-06,
"loss": 0.0007,
"step": 1600
},
{
"epoch": 0.18,
"learning_rate": 2.986266550706315e-06,
"loss": 0.001,
"step": 1605
},
{
"epoch": 0.18,
"learning_rate": 2.9530384273695373e-06,
"loss": 0.0008,
"step": 1610
},
{
"epoch": 0.19,
"learning_rate": 2.9199185570544054e-06,
"loss": 0.0009,
"step": 1615
},
{
"epoch": 0.19,
"learning_rate": 2.886908691296504e-06,
"loss": 0.0007,
"step": 1620
},
{
"epoch": 0.19,
"learning_rate": 2.854010575813856e-06,
"loss": 0.001,
"step": 1625
},
{
"epoch": 0.19,
"learning_rate": 2.8212259504146045e-06,
"loss": 0.0011,
"step": 1630
},
{
"epoch": 0.19,
"learning_rate": 2.7885565489049948e-06,
"loss": 0.0009,
"step": 1635
},
{
"epoch": 0.19,
"learning_rate": 2.7560040989976894e-06,
"loss": 0.0009,
"step": 1640
},
{
"epoch": 0.19,
"learning_rate": 2.723570322220399e-06,
"loss": 0.0009,
"step": 1645
},
{
"epoch": 0.19,
"learning_rate": 2.6912569338248317e-06,
"loss": 0.0012,
"step": 1650
},
{
"epoch": 0.19,
"learning_rate": 2.6590656426959906e-06,
"loss": 0.001,
"step": 1655
},
{
"epoch": 0.19,
"learning_rate": 2.626998151261798e-06,
"loss": 0.0011,
"step": 1660
},
{
"epoch": 0.19,
"learning_rate": 2.595056155403063e-06,
"loss": 0.0007,
"step": 1665
},
{
"epoch": 0.19,
"learning_rate": 2.5632413443637887e-06,
"loss": 0.001,
"step": 1670
},
{
"epoch": 0.19,
"learning_rate": 2.5315554006618487e-06,
"loss": 0.0012,
"step": 1675
},
{
"epoch": 0.19,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.0009,
"step": 1680
},
{
"epoch": 0.19,
"learning_rate": 2.4685768111772647e-06,
"loss": 0.001,
"step": 1685
},
{
"epoch": 0.19,
"learning_rate": 2.437287496000674e-06,
"loss": 0.0011,
"step": 1690
},
{
"epoch": 0.19,
"learning_rate": 2.406133709197392e-06,
"loss": 0.0007,
"step": 1695
},
{
"epoch": 0.2,
"learning_rate": 2.3751170983272e-06,
"loss": 0.001,
"step": 1700
},
{
"epoch": 0.2,
"learning_rate": 2.3442393036953614e-06,
"loss": 0.0006,
"step": 1705
},
{
"epoch": 0.2,
"learning_rate": 2.3135019582658803e-06,
"loss": 0.0007,
"step": 1710
},
{
"epoch": 0.2,
"learning_rate": 2.282906687575151e-06,
"loss": 0.0016,
"step": 1715
},
{
"epoch": 0.2,
"learning_rate": 2.2524551096459703e-06,
"loss": 0.0013,
"step": 1720
},
{
"epoch": 0.2,
"learning_rate": 2.2221488349019903e-06,
"loss": 0.0013,
"step": 1725
},
{
"epoch": 0.2,
"learning_rate": 2.1919894660825362e-06,
"loss": 0.0009,
"step": 1730
},
{
"epoch": 0.2,
"learning_rate": 2.161978598157857e-06,
"loss": 0.0007,
"step": 1735
},
{
"epoch": 0.2,
"learning_rate": 2.132117818244771e-06,
"loss": 0.0013,
"step": 1740
},
{
"epoch": 0.2,
"learning_rate": 2.102408705522725e-06,
"loss": 0.0009,
"step": 1745
},
{
"epoch": 0.2,
"learning_rate": 2.0728528311502977e-06,
"loss": 0.0009,
"step": 1750
},
{
"epoch": 0.2,
"learning_rate": 2.0434517581820893e-06,
"loss": 0.0008,
"step": 1755
},
{
"epoch": 0.2,
"learning_rate": 2.0142070414860704e-06,
"loss": 0.001,
"step": 1760
},
{
"epoch": 0.2,
"learning_rate": 1.9851202276613524e-06,
"loss": 0.0008,
"step": 1765
},
{
"epoch": 0.2,
"learning_rate": 1.956192854956397e-06,
"loss": 0.0008,
"step": 1770
},
{
"epoch": 0.2,
"learning_rate": 1.927426453187663e-06,
"loss": 0.0009,
"step": 1775
},
{
"epoch": 0.2,
"learning_rate": 1.8988225436587005e-06,
"loss": 0.0008,
"step": 1780
},
{
"epoch": 0.2,
"learning_rate": 1.8703826390797047e-06,
"loss": 0.0006,
"step": 1785
},
{
"epoch": 0.21,
"learning_rate": 1.8421082434875133e-06,
"loss": 0.0014,
"step": 1790
},
{
"epoch": 0.21,
"learning_rate": 1.814000852166059e-06,
"loss": 0.001,
"step": 1795
},
{
"epoch": 0.21,
"learning_rate": 1.7860619515673034e-06,
"loss": 0.0005,
"step": 1800
},
{
"epoch": 0.21,
"learning_rate": 1.7582930192326187e-06,
"loss": 0.0011,
"step": 1805
},
{
"epoch": 0.21,
"learning_rate": 1.7306955237146523e-06,
"loss": 0.0015,
"step": 1810
},
{
"epoch": 0.21,
"learning_rate": 1.7032709244996559e-06,
"loss": 0.0007,
"step": 1815
},
{
"epoch": 0.21,
"learning_rate": 1.6760206719303107e-06,
"loss": 0.0011,
"step": 1820
},
{
"epoch": 0.21,
"learning_rate": 1.6489462071290213e-06,
"loss": 0.0004,
"step": 1825
},
{
"epoch": 0.21,
"learning_rate": 1.6220489619216988e-06,
"loss": 0.001,
"step": 1830
},
{
"epoch": 0.21,
"learning_rate": 1.5953303587620472e-06,
"loss": 0.0011,
"step": 1835
},
{
"epoch": 0.21,
"learning_rate": 1.5687918106563326e-06,
"loss": 0.0009,
"step": 1840
},
{
"epoch": 0.21,
"learning_rate": 1.5424347210886538e-06,
"loss": 0.0005,
"step": 1845
},
{
"epoch": 0.21,
"learning_rate": 1.5162604839467265e-06,
"loss": 0.0013,
"step": 1850
},
{
"epoch": 0.21,
"learning_rate": 1.4902704834481585e-06,
"loss": 0.0011,
"step": 1855
},
{
"epoch": 0.21,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.0006,
"step": 1860
},
{
"epoch": 0.21,
"learning_rate": 1.4388486804623464e-06,
"loss": 0.0007,
"step": 1865
},
{
"epoch": 0.21,
"learning_rate": 1.4134195974035525e-06,
"loss": 0.0006,
"step": 1870
},
{
"epoch": 0.22,
"learning_rate": 1.3881801897012225e-06,
"loss": 0.0014,
"step": 1875
},
{
"epoch": 0.22,
"learning_rate": 1.3631317921347564e-06,
"loss": 0.0012,
"step": 1880
},
{
"epoch": 0.22,
"learning_rate": 1.3382757293820408e-06,
"loss": 0.0012,
"step": 1885
},
{
"epoch": 0.22,
"learning_rate": 1.3136133159493803e-06,
"loss": 0.0008,
"step": 1890
},
{
"epoch": 0.22,
"learning_rate": 1.2891458561019914e-06,
"loss": 0.0006,
"step": 1895
},
{
"epoch": 0.22,
"learning_rate": 1.264874643795021e-06,
"loss": 0.0009,
"step": 1900
},
{
"epoch": 0.22,
"learning_rate": 1.2408009626051137e-06,
"loss": 0.0013,
"step": 1905
},
{
"epoch": 0.22,
"learning_rate": 1.2169260856625358e-06,
"loss": 0.0011,
"step": 1910
},
{
"epoch": 0.22,
"learning_rate": 1.1932512755838448e-06,
"loss": 0.001,
"step": 1915
},
{
"epoch": 0.22,
"learning_rate": 1.1697777844051105e-06,
"loss": 0.0009,
"step": 1920
},
{
"epoch": 0.22,
"learning_rate": 1.1465068535157098e-06,
"loss": 0.0012,
"step": 1925
},
{
"epoch": 0.22,
"learning_rate": 1.1234397135926705e-06,
"loss": 0.0011,
"step": 1930
},
{
"epoch": 0.22,
"learning_rate": 1.100577584535592e-06,
"loss": 0.001,
"step": 1935
},
{
"epoch": 0.22,
"learning_rate": 1.0779216754021215e-06,
"loss": 0.001,
"step": 1940
},
{
"epoch": 0.22,
"learning_rate": 1.0554731843440275e-06,
"loss": 0.0013,
"step": 1945
},
{
"epoch": 0.22,
"learning_rate": 1.0332332985438248e-06,
"loss": 0.0007,
"step": 1950
},
{
"epoch": 0.22,
"learning_rate": 1.0112031941519934e-06,
"loss": 0.0013,
"step": 1955
},
{
"epoch": 0.22,
"learning_rate": 9.893840362247809e-07,
"loss": 0.0008,
"step": 1960
},
{
"epoch": 0.23,
"learning_rate": 9.677769786625869e-07,
"loss": 0.0011,
"step": 1965
},
{
"epoch": 0.23,
"learning_rate": 9.463831641489391e-07,
"loss": 0.0009,
"step": 1970
},
{
"epoch": 0.23,
"learning_rate": 9.252037240900618e-07,
"loss": 0.0012,
"step": 1975
},
{
"epoch": 0.23,
"learning_rate": 9.042397785550405e-07,
"loss": 0.0006,
"step": 1980
},
{
"epoch": 0.23,
"learning_rate": 8.834924362165992e-07,
"loss": 0.0008,
"step": 1985
},
{
"epoch": 0.23,
"learning_rate": 8.629627942924473e-07,
"loss": 0.0012,
"step": 1990
},
{
"epoch": 0.23,
"learning_rate": 8.426519384872733e-07,
"loss": 0.0008,
"step": 1995
},
{
"epoch": 0.23,
"learning_rate": 8.225609429353187e-07,
"loss": 0.0006,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 8.026908701435681e-07,
"loss": 0.0005,
"step": 2005
},
{
"epoch": 0.23,
"learning_rate": 7.830427709355726e-07,
"loss": 0.0014,
"step": 2010
},
{
"epoch": 0.23,
"learning_rate": 7.636176843958599e-07,
"loss": 0.0009,
"step": 2015
},
{
"epoch": 0.23,
"learning_rate": 7.444166378150014e-07,
"loss": 0.0007,
"step": 2020
},
{
"epoch": 0.23,
"learning_rate": 7.254406466352682e-07,
"loss": 0.0008,
"step": 2025
},
{
"epoch": 0.23,
"learning_rate": 7.066907143969353e-07,
"loss": 0.0011,
"step": 2030
},
{
"epoch": 0.23,
"learning_rate": 6.881678326852137e-07,
"loss": 0.0008,
"step": 2035
},
{
"epoch": 0.23,
"learning_rate": 6.698729810778065e-07,
"loss": 0.001,
"step": 2040
},
{
"epoch": 0.23,
"learning_rate": 6.518071270931059e-07,
"loss": 0.0007,
"step": 2045
},
{
"epoch": 0.24,
"learning_rate": 6.339712261390213e-07,
"loss": 0.0011,
"step": 2050
},
{
"epoch": 0.24,
"learning_rate": 6.163662214624616e-07,
"loss": 0.0011,
"step": 2055
},
{
"epoch": 0.24,
"learning_rate": 5.989930440994451e-07,
"loss": 0.0008,
"step": 2060
},
{
"epoch": 0.24,
"learning_rate": 5.818526128258622e-07,
"loss": 0.0009,
"step": 2065
},
{
"epoch": 0.24,
"learning_rate": 5.649458341088915e-07,
"loss": 0.0011,
"step": 2070
},
{
"epoch": 0.24,
"learning_rate": 5.482736020590551e-07,
"loss": 0.0005,
"step": 2075
},
{
"epoch": 0.24,
"learning_rate": 5.318367983829393e-07,
"loss": 0.0012,
"step": 2080
},
{
"epoch": 0.24,
"learning_rate": 5.188574484306829e-07,
"loss": 0.0016,
"step": 2085
},
{
"epoch": 0.24,
"learning_rate": 5.028465979562792e-07,
"loss": 0.0005,
"step": 2090
},
{
"epoch": 0.24,
"learning_rate": 4.87073578250698e-07,
"loss": 0.001,
"step": 2095
},
{
"epoch": 0.24,
"learning_rate": 4.7153922346591554e-07,
"loss": 0.0009,
"step": 2100
},
{
"epoch": 0.24,
"learning_rate": 4.562443551321788e-07,
"loss": 0.001,
"step": 2105
},
{
"epoch": 0.24,
"learning_rate": 4.4118978211455723e-07,
"loss": 0.0008,
"step": 2110
},
{
"epoch": 0.24,
"learning_rate": 4.263763005701649e-07,
"loss": 0.0009,
"step": 2115
},
{
"epoch": 0.24,
"learning_rate": 4.118046939060566e-07,
"loss": 0.0004,
"step": 2120
},
{
"epoch": 0.24,
"learning_rate": 3.9747573273779816e-07,
"loss": 0.0014,
"step": 2125
},
{
"epoch": 0.24,
"learning_rate": 3.833901748487151e-07,
"loss": 0.0012,
"step": 2130
},
{
"epoch": 0.24,
"learning_rate": 3.6954876514981084e-07,
"loss": 0.0013,
"step": 2135
},
{
"epoch": 0.25,
"learning_rate": 3.5595223564037884e-07,
"loss": 0.0011,
"step": 2140
},
{
"epoch": 0.25,
"learning_rate": 3.426013053692878e-07,
"loss": 0.0009,
"step": 2145
},
{
"epoch": 0.25,
"learning_rate": 3.294966803969574e-07,
"loss": 0.0009,
"step": 2150
},
{
"epoch": 0.25,
"learning_rate": 3.166390537580122e-07,
"loss": 0.0008,
"step": 2155
},
{
"epoch": 0.25,
"learning_rate": 3.0402910542463915e-07,
"loss": 0.0009,
"step": 2160
},
{
"epoch": 0.25,
"learning_rate": 2.916675022706239e-07,
"loss": 0.0013,
"step": 2165
},
{
"epoch": 0.25,
"learning_rate": 2.7955489803607907e-07,
"loss": 0.0013,
"step": 2170
},
{
"epoch": 0.25,
"learning_rate": 2.676919332928785e-07,
"loss": 0.001,
"step": 2175
},
{
"epoch": 0.25,
"learning_rate": 2.560792354107777e-07,
"loss": 0.0009,
"step": 2180
},
{
"epoch": 0.25,
"learning_rate": 2.447174185242324e-07,
"loss": 0.0011,
"step": 2185
},
{
"epoch": 0.25,
"learning_rate": 2.33607083499926e-07,
"loss": 0.0009,
"step": 2190
},
{
"epoch": 0.25,
"learning_rate": 2.2274881790498914e-07,
"loss": 0.0007,
"step": 2195
},
{
"epoch": 0.25,
"learning_rate": 2.1214319597592792e-07,
"loss": 0.0008,
"step": 2200
},
{
"epoch": 0.25,
"learning_rate": 2.0179077858825445e-07,
"loss": 0.001,
"step": 2205
},
{
"epoch": 0.25,
"learning_rate": 1.916921132268229e-07,
"loss": 0.0015,
"step": 2210
},
{
"epoch": 0.25,
"learning_rate": 1.8184773395688527e-07,
"loss": 0.0009,
"step": 2215
},
{
"epoch": 0.25,
"learning_rate": 1.7225816139583407e-07,
"loss": 0.0009,
"step": 2220
},
{
"epoch": 0.26,
"learning_rate": 1.6292390268568103e-07,
"loss": 0.001,
"step": 2225
},
{
"epoch": 0.26,
"learning_rate": 1.5384545146622854e-07,
"loss": 0.0008,
"step": 2230
},
{
"epoch": 0.26,
"learning_rate": 1.450232878489699e-07,
"loss": 0.0008,
"step": 2235
},
{
"epoch": 0.26,
"learning_rate": 1.3645787839169755e-07,
"loss": 0.001,
"step": 2240
},
{
"epoch": 0.26,
"learning_rate": 1.2814967607382433e-07,
"loss": 0.0011,
"step": 2245
},
{
"epoch": 0.26,
"learning_rate": 1.2009912027243386e-07,
"loss": 0.0009,
"step": 2250
},
{
"epoch": 0.26,
"learning_rate": 1.123066367390424e-07,
"loss": 0.001,
"step": 2255
},
{
"epoch": 0.26,
"learning_rate": 1.0477263757708078e-07,
"loss": 0.0011,
"step": 2260
},
{
"epoch": 0.26,
"learning_rate": 9.749752122010347e-08,
"loss": 0.0006,
"step": 2265
},
{
"epoch": 0.26,
"learning_rate": 9.048167241071548e-08,
"loss": 0.0006,
"step": 2270
},
{
"epoch": 0.26,
"learning_rate": 8.372546218022747e-08,
"loss": 0.0007,
"step": 2275
},
{
"epoch": 0.26,
"learning_rate": 7.722924782902985e-08,
"loss": 0.0015,
"step": 2280
},
{
"epoch": 0.26,
"learning_rate": 7.09933729077017e-08,
"loss": 0.0008,
"step": 2285
},
{
"epoch": 0.26,
"learning_rate": 6.501816719884091e-08,
"loss": 0.0006,
"step": 2290
},
{
"epoch": 0.26,
"learning_rate": 5.9303946699620365e-08,
"loss": 0.0011,
"step": 2295
},
{
"epoch": 0.26,
"learning_rate": 5.3851013605080717e-08,
"loss": 0.0009,
"step": 2300
},
{
"epoch": 0.26,
"learning_rate": 4.865965629214819e-08,
"loss": 0.001,
"step": 2305
},
{
"epoch": 0.27,
"learning_rate": 4.37301493043818e-08,
"loss": 0.0012,
"step": 2310
},
{
"epoch": 0.27,
"learning_rate": 3.9062753337454354e-08,
"loss": 0.0016,
"step": 2315
},
{
"epoch": 0.27,
"learning_rate": 3.465771522536854e-08,
"loss": 0.0014,
"step": 2320
},
{
"epoch": 0.27,
"learning_rate": 3.0515267927400116e-08,
"loss": 0.0006,
"step": 2325
},
{
"epoch": 0.27,
"learning_rate": 2.6635630515779996e-08,
"loss": 0.0012,
"step": 2330
},
{
"epoch": 0.27,
"learning_rate": 2.301900816410574e-08,
"loss": 0.0009,
"step": 2335
},
{
"epoch": 0.27,
"learning_rate": 1.966559213649577e-08,
"loss": 0.0007,
"step": 2340
},
{
"epoch": 0.27,
"learning_rate": 1.657555977746972e-08,
"loss": 0.0015,
"step": 2345
},
{
"epoch": 0.27,
"learning_rate": 1.3749074502572012e-08,
"loss": 0.0009,
"step": 2350
},
{
"epoch": 0.27,
"learning_rate": 1.1186285789728247e-08,
"loss": 0.0006,
"step": 2355
},
{
"epoch": 0.27,
"learning_rate": 8.887329171343717e-09,
"loss": 0.001,
"step": 2360
},
{
"epoch": 0.27,
"learning_rate": 6.852326227130835e-09,
"loss": 0.001,
"step": 2365
},
{
"epoch": 0.27,
"learning_rate": 5.08138457768148e-09,
"loss": 0.0011,
"step": 2370
},
{
"epoch": 0.27,
"learning_rate": 3.574597878777675e-09,
"loss": 0.0011,
"step": 2375
},
{
"epoch": 0.27,
"learning_rate": 2.3320458164355352e-09,
"loss": 0.0006,
"step": 2380
},
{
"epoch": 0.27,
"learning_rate": 1.3537941026914302e-09,
"loss": 0.0006,
"step": 2385
},
{
"epoch": 0.27,
"learning_rate": 6.398944721297539e-10,
"loss": 0.0012,
"step": 2390
},
{
"epoch": 0.27,
"learning_rate": 1.903846791434516e-10,
"loss": 0.0007,
"step": 2395
},
{
"epoch": 0.28,
"learning_rate": 5.288495938948757e-12,
"loss": 0.001,
"step": 2400
},
{
"epoch": 0.28,
"step": 2400,
"total_flos": 3.3948129315510026e+18,
"train_loss": 0.025274057275091764,
"train_runtime": 26505.156,
"train_samples_per_second": 1.449,
"train_steps_per_second": 0.091
}
],
"logging_steps": 5,
"max_steps": 2400,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 3.3948129315510026e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}