zephyr-7b-pl-qlora / trainer_state.json
sengi's picture
Model save
bc841eb verified
raw
history blame
93.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.05308909385623915,
"eval_steps": 500,
"global_step": 3701,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.0,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 0.0,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 2.2948938611589213e-07,
"loss": 0.0,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 6.884681583476765e-07,
"loss": -3.4891,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.2621916236374069e-06,
"loss": -49.4445,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 1.721170395869191e-06,
"loss": -195.5057,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 2.2948938611589215e-06,
"loss": -497.6609,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 2.868617326448652e-06,
"loss": -1031.3963,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 3.442340791738382e-06,
"loss": -1685.1105,
"step": 45
},
{
"epoch": 0.0,
"learning_rate": 4.016064257028113e-06,
"loss": -2646.2359,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 4.589787722317843e-06,
"loss": -3794.2527,
"step": 55
},
{
"epoch": 0.0,
"learning_rate": 5.163511187607573e-06,
"loss": -5546.7516,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 5.737234652897304e-06,
"loss": -7180.4102,
"step": 65
},
{
"epoch": 0.0,
"learning_rate": 6.310958118187034e-06,
"loss": -10111.5016,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 6.884681583476764e-06,
"loss": -12410.3125,
"step": 75
},
{
"epoch": 0.0,
"learning_rate": 7.4584050487664955e-06,
"loss": -16288.9766,
"step": 80
},
{
"epoch": 0.0,
"learning_rate": 8.032128514056226e-06,
"loss": -19432.7328,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 8.605851979345956e-06,
"loss": -23566.2641,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 9.179575444635686e-06,
"loss": -28124.4,
"step": 95
},
{
"epoch": 0.01,
"learning_rate": 9.753298909925416e-06,
"loss": -33192.5531,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.0327022375215146e-05,
"loss": -40076.6687,
"step": 105
},
{
"epoch": 0.01,
"learning_rate": 1.0900745840504876e-05,
"loss": -46810.9156,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 1.1474469305794608e-05,
"loss": -51101.25,
"step": 115
},
{
"epoch": 0.01,
"learning_rate": 1.2048192771084338e-05,
"loss": -56708.5563,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 1.2621916236374069e-05,
"loss": -59233.925,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 1.3195639701663797e-05,
"loss": -64193.0687,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 1.3769363166953527e-05,
"loss": -72945.3625,
"step": 135
},
{
"epoch": 0.01,
"learning_rate": 1.434308663224326e-05,
"loss": -87274.1375,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 1.4916810097532991e-05,
"loss": -106688.7375,
"step": 145
},
{
"epoch": 0.01,
"learning_rate": 1.549053356282272e-05,
"loss": -126941.775,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 1.606425702811245e-05,
"loss": -147417.55,
"step": 155
},
{
"epoch": 0.01,
"learning_rate": 1.663798049340218e-05,
"loss": -173031.2875,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 1.721170395869191e-05,
"loss": -197558.775,
"step": 165
},
{
"epoch": 0.01,
"learning_rate": 1.7785427423981642e-05,
"loss": -223929.7,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 1.8244406196213425e-05,
"loss": -241816.85,
"step": 175
},
{
"epoch": 0.01,
"learning_rate": 1.8703384968445207e-05,
"loss": -249209.5,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 1.9162363740676993e-05,
"loss": -260632.2,
"step": 185
},
{
"epoch": 0.01,
"learning_rate": 1.9736087205966724e-05,
"loss": -274344.5,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 2.0309810671256454e-05,
"loss": -283097.8,
"step": 195
},
{
"epoch": 0.01,
"learning_rate": 2.0883534136546184e-05,
"loss": -291180.1,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 2.1457257601835918e-05,
"loss": -296606.175,
"step": 205
},
{
"epoch": 0.01,
"learning_rate": 2.2030981067125648e-05,
"loss": -297583.2,
"step": 210
},
{
"epoch": 0.01,
"learning_rate": 2.2604704532415378e-05,
"loss": -304103.9,
"step": 215
},
{
"epoch": 0.01,
"learning_rate": 2.3178427997705108e-05,
"loss": -305754.625,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 2.3752151462994838e-05,
"loss": -308790.725,
"step": 225
},
{
"epoch": 0.01,
"learning_rate": 2.432587492828457e-05,
"loss": -311960.7,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 2.48995983935743e-05,
"loss": -313037.675,
"step": 235
},
{
"epoch": 0.01,
"learning_rate": 2.547332185886403e-05,
"loss": -315313.825,
"step": 240
},
{
"epoch": 0.01,
"learning_rate": 2.6047045324153755e-05,
"loss": -316755.775,
"step": 245
},
{
"epoch": 0.01,
"learning_rate": 2.662076878944349e-05,
"loss": -317376.0,
"step": 250
},
{
"epoch": 0.01,
"learning_rate": 2.7194492254733223e-05,
"loss": -319665.85,
"step": 255
},
{
"epoch": 0.01,
"learning_rate": 2.776821572002295e-05,
"loss": -318471.65,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 2.8341939185312683e-05,
"loss": -318968.55,
"step": 265
},
{
"epoch": 0.02,
"learning_rate": 2.891566265060241e-05,
"loss": -320207.2,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 2.9489386115892143e-05,
"loss": -322860.7,
"step": 275
},
{
"epoch": 0.02,
"learning_rate": 3.006310958118187e-05,
"loss": -322976.675,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 3.0636833046471604e-05,
"loss": -323030.4,
"step": 285
},
{
"epoch": 0.02,
"learning_rate": 3.121055651176134e-05,
"loss": -325049.75,
"step": 290
},
{
"epoch": 0.02,
"learning_rate": 3.1784279977051064e-05,
"loss": -323279.2,
"step": 295
},
{
"epoch": 0.02,
"learning_rate": 3.23580034423408e-05,
"loss": -326394.3,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 3.2931726907630524e-05,
"loss": -327546.45,
"step": 305
},
{
"epoch": 0.02,
"learning_rate": 3.350545037292026e-05,
"loss": -326381.95,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 3.4079173838209984e-05,
"loss": -329224.875,
"step": 315
},
{
"epoch": 0.02,
"learning_rate": 3.465289730349972e-05,
"loss": -327763.2,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 3.5226620768789445e-05,
"loss": -327674.775,
"step": 325
},
{
"epoch": 0.02,
"learning_rate": 3.580034423407918e-05,
"loss": -328802.05,
"step": 330
},
{
"epoch": 0.02,
"learning_rate": 3.6374067699368905e-05,
"loss": -331079.025,
"step": 335
},
{
"epoch": 0.02,
"learning_rate": 3.694779116465863e-05,
"loss": -329059.075,
"step": 340
},
{
"epoch": 0.02,
"learning_rate": 3.7521514629948365e-05,
"loss": -329749.675,
"step": 345
},
{
"epoch": 0.02,
"learning_rate": 3.809523809523809e-05,
"loss": -331523.05,
"step": 350
},
{
"epoch": 0.02,
"learning_rate": 3.8668961560527826e-05,
"loss": -332232.7,
"step": 355
},
{
"epoch": 0.02,
"learning_rate": 3.924268502581756e-05,
"loss": -332449.35,
"step": 360
},
{
"epoch": 0.02,
"learning_rate": 3.9816408491107286e-05,
"loss": -334742.55,
"step": 365
},
{
"epoch": 0.02,
"learning_rate": 4.039013195639702e-05,
"loss": -333736.15,
"step": 370
},
{
"epoch": 0.02,
"learning_rate": 4.0963855421686746e-05,
"loss": -333792.225,
"step": 375
},
{
"epoch": 0.02,
"learning_rate": 4.153757888697648e-05,
"loss": -335570.45,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 4.211130235226621e-05,
"loss": -333872.35,
"step": 385
},
{
"epoch": 0.02,
"learning_rate": 4.268502581755594e-05,
"loss": -337072.675,
"step": 390
},
{
"epoch": 0.02,
"learning_rate": 4.3258749282845674e-05,
"loss": -333726.425,
"step": 395
},
{
"epoch": 0.02,
"learning_rate": 4.38324727481354e-05,
"loss": -336459.75,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 4.4406196213425134e-05,
"loss": -336860.1,
"step": 405
},
{
"epoch": 0.02,
"learning_rate": 4.497991967871486e-05,
"loss": -335018.575,
"step": 410
},
{
"epoch": 0.02,
"learning_rate": 4.543889845094665e-05,
"loss": -338024.925,
"step": 415
},
{
"epoch": 0.02,
"learning_rate": 4.601262191623638e-05,
"loss": -336729.825,
"step": 420
},
{
"epoch": 0.02,
"learning_rate": 4.658634538152611e-05,
"loss": -337681.2,
"step": 425
},
{
"epoch": 0.02,
"learning_rate": 4.716006884681584e-05,
"loss": -339095.5,
"step": 430
},
{
"epoch": 0.02,
"learning_rate": 4.773379231210557e-05,
"loss": -339637.8,
"step": 435
},
{
"epoch": 0.03,
"learning_rate": 4.83075157773953e-05,
"loss": -340333.75,
"step": 440
},
{
"epoch": 0.03,
"learning_rate": 4.888123924268503e-05,
"loss": -340316.65,
"step": 445
},
{
"epoch": 0.03,
"learning_rate": 4.945496270797476e-05,
"loss": -339421.7,
"step": 450
},
{
"epoch": 0.03,
"learning_rate": 5.002868617326449e-05,
"loss": -338028.95,
"step": 455
},
{
"epoch": 0.03,
"learning_rate": 5.060240963855422e-05,
"loss": -342042.3,
"step": 460
},
{
"epoch": 0.03,
"learning_rate": 5.1176133103843945e-05,
"loss": -341357.9,
"step": 465
},
{
"epoch": 0.03,
"learning_rate": 5.174985656913368e-05,
"loss": -341011.45,
"step": 470
},
{
"epoch": 0.03,
"learning_rate": 5.232358003442341e-05,
"loss": -340367.0,
"step": 475
},
{
"epoch": 0.03,
"learning_rate": 5.289730349971313e-05,
"loss": -341737.35,
"step": 480
},
{
"epoch": 0.03,
"learning_rate": 5.3471026965002866e-05,
"loss": -340334.725,
"step": 485
},
{
"epoch": 0.03,
"learning_rate": 5.40447504302926e-05,
"loss": -341142.3,
"step": 490
},
{
"epoch": 0.03,
"learning_rate": 5.461847389558233e-05,
"loss": -342525.2,
"step": 495
},
{
"epoch": 0.03,
"learning_rate": 5.519219736087207e-05,
"loss": -344089.7,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 5.576592082616179e-05,
"loss": -343456.65,
"step": 505
},
{
"epoch": 0.03,
"learning_rate": 5.633964429145152e-05,
"loss": -345602.1,
"step": 510
},
{
"epoch": 0.03,
"learning_rate": 5.6913367756741254e-05,
"loss": -343630.25,
"step": 515
},
{
"epoch": 0.03,
"learning_rate": 5.748709122203099e-05,
"loss": -342957.575,
"step": 520
},
{
"epoch": 0.03,
"learning_rate": 5.806081468732072e-05,
"loss": -343699.775,
"step": 525
},
{
"epoch": 0.03,
"learning_rate": 5.863453815261044e-05,
"loss": -345816.2,
"step": 530
},
{
"epoch": 0.03,
"learning_rate": 5.9208261617900174e-05,
"loss": -343947.55,
"step": 535
},
{
"epoch": 0.03,
"learning_rate": 5.978198508318991e-05,
"loss": -346215.525,
"step": 540
},
{
"epoch": 0.03,
"learning_rate": 6.035570854847964e-05,
"loss": -345165.4,
"step": 545
},
{
"epoch": 0.03,
"learning_rate": 6.092943201376936e-05,
"loss": -344990.8,
"step": 550
},
{
"epoch": 0.03,
"learning_rate": 6.15031554790591e-05,
"loss": -347143.95,
"step": 555
},
{
"epoch": 0.03,
"learning_rate": 6.207687894434883e-05,
"loss": -346346.4,
"step": 560
},
{
"epoch": 0.03,
"learning_rate": 6.265060240963856e-05,
"loss": -344992.35,
"step": 565
},
{
"epoch": 0.03,
"learning_rate": 6.32243258749283e-05,
"loss": -346478.0,
"step": 570
},
{
"epoch": 0.03,
"learning_rate": 6.379804934021802e-05,
"loss": -347404.6,
"step": 575
},
{
"epoch": 0.03,
"learning_rate": 6.437177280550775e-05,
"loss": -347950.25,
"step": 580
},
{
"epoch": 0.03,
"learning_rate": 6.494549627079748e-05,
"loss": -346535.225,
"step": 585
},
{
"epoch": 0.03,
"learning_rate": 6.551921973608722e-05,
"loss": -344840.4,
"step": 590
},
{
"epoch": 0.03,
"learning_rate": 6.609294320137695e-05,
"loss": -347641.45,
"step": 595
},
{
"epoch": 0.03,
"learning_rate": 6.666666666666667e-05,
"loss": -347243.75,
"step": 600
},
{
"epoch": 0.03,
"learning_rate": 6.72403901319564e-05,
"loss": -344839.0,
"step": 605
},
{
"epoch": 0.03,
"learning_rate": 6.781411359724614e-05,
"loss": -346694.6,
"step": 610
},
{
"epoch": 0.04,
"learning_rate": 6.838783706253587e-05,
"loss": -346482.1,
"step": 615
},
{
"epoch": 0.04,
"learning_rate": 6.896156052782559e-05,
"loss": -346953.025,
"step": 620
},
{
"epoch": 0.04,
"learning_rate": 6.953528399311532e-05,
"loss": -346146.7,
"step": 625
},
{
"epoch": 0.04,
"learning_rate": 7.010900745840506e-05,
"loss": -348154.8,
"step": 630
},
{
"epoch": 0.04,
"learning_rate": 7.068273092369478e-05,
"loss": -348350.55,
"step": 635
},
{
"epoch": 0.04,
"learning_rate": 7.125645438898451e-05,
"loss": -349716.075,
"step": 640
},
{
"epoch": 0.04,
"learning_rate": 7.183017785427424e-05,
"loss": -347439.1,
"step": 645
},
{
"epoch": 0.04,
"learning_rate": 7.240390131956398e-05,
"loss": -348563.45,
"step": 650
},
{
"epoch": 0.04,
"learning_rate": 7.29776247848537e-05,
"loss": -348391.825,
"step": 655
},
{
"epoch": 0.04,
"learning_rate": 7.355134825014343e-05,
"loss": -350483.05,
"step": 660
},
{
"epoch": 0.04,
"learning_rate": 7.412507171543317e-05,
"loss": -350731.125,
"step": 665
},
{
"epoch": 0.04,
"learning_rate": 7.469879518072289e-05,
"loss": -349929.325,
"step": 670
},
{
"epoch": 0.04,
"learning_rate": 7.527251864601262e-05,
"loss": -351055.2,
"step": 675
},
{
"epoch": 0.04,
"learning_rate": 7.584624211130235e-05,
"loss": -350916.6,
"step": 680
},
{
"epoch": 0.04,
"learning_rate": 7.641996557659209e-05,
"loss": -350173.8,
"step": 685
},
{
"epoch": 0.04,
"learning_rate": 7.69936890418818e-05,
"loss": -351153.0,
"step": 690
},
{
"epoch": 0.04,
"learning_rate": 7.756741250717154e-05,
"loss": -349882.6,
"step": 695
},
{
"epoch": 0.04,
"learning_rate": 7.814113597246127e-05,
"loss": -352142.4,
"step": 700
},
{
"epoch": 0.04,
"learning_rate": 7.8714859437751e-05,
"loss": -350261.2,
"step": 705
},
{
"epoch": 0.04,
"learning_rate": 7.928858290304074e-05,
"loss": -350545.325,
"step": 710
},
{
"epoch": 0.04,
"learning_rate": 7.986230636833046e-05,
"loss": -351217.85,
"step": 715
},
{
"epoch": 0.04,
"learning_rate": 8.04360298336202e-05,
"loss": -351533.35,
"step": 720
},
{
"epoch": 0.04,
"learning_rate": 8.100975329890993e-05,
"loss": -350845.225,
"step": 725
},
{
"epoch": 0.04,
"learning_rate": 8.158347676419966e-05,
"loss": -352349.6,
"step": 730
},
{
"epoch": 0.04,
"learning_rate": 8.21572002294894e-05,
"loss": -351294.8,
"step": 735
},
{
"epoch": 0.04,
"learning_rate": 8.273092369477911e-05,
"loss": -351867.35,
"step": 740
},
{
"epoch": 0.04,
"learning_rate": 8.330464716006885e-05,
"loss": -351626.4,
"step": 745
},
{
"epoch": 0.04,
"learning_rate": 8.387837062535858e-05,
"loss": -353080.1,
"step": 750
},
{
"epoch": 0.04,
"learning_rate": 8.445209409064832e-05,
"loss": -352073.8,
"step": 755
},
{
"epoch": 0.04,
"learning_rate": 8.502581755593804e-05,
"loss": -351819.75,
"step": 760
},
{
"epoch": 0.04,
"learning_rate": 8.559954102122777e-05,
"loss": -352359.45,
"step": 765
},
{
"epoch": 0.04,
"learning_rate": 8.61732644865175e-05,
"loss": -354546.0,
"step": 770
},
{
"epoch": 0.04,
"learning_rate": 8.674698795180724e-05,
"loss": -353960.15,
"step": 775
},
{
"epoch": 0.04,
"learning_rate": 8.732071141709697e-05,
"loss": -353622.075,
"step": 780
},
{
"epoch": 0.05,
"learning_rate": 8.789443488238669e-05,
"loss": -352742.6,
"step": 785
},
{
"epoch": 0.05,
"learning_rate": 8.846815834767642e-05,
"loss": -353329.025,
"step": 790
},
{
"epoch": 0.05,
"learning_rate": 8.904188181296616e-05,
"loss": -353635.4,
"step": 795
},
{
"epoch": 0.05,
"learning_rate": 8.961560527825589e-05,
"loss": -352711.4,
"step": 800
},
{
"epoch": 0.05,
"learning_rate": 9.018932874354562e-05,
"loss": -353263.5,
"step": 805
},
{
"epoch": 0.05,
"learning_rate": 9.076305220883534e-05,
"loss": -354502.3,
"step": 810
},
{
"epoch": 0.05,
"learning_rate": 9.133677567412508e-05,
"loss": -353995.45,
"step": 815
},
{
"epoch": 0.05,
"learning_rate": 9.191049913941481e-05,
"loss": -353419.0,
"step": 820
},
{
"epoch": 0.05,
"learning_rate": 9.248422260470454e-05,
"loss": -352814.725,
"step": 825
},
{
"epoch": 0.05,
"learning_rate": 9.305794606999426e-05,
"loss": -353624.425,
"step": 830
},
{
"epoch": 0.05,
"learning_rate": 9.3631669535284e-05,
"loss": -354307.7,
"step": 835
},
{
"epoch": 0.05,
"learning_rate": 9.420539300057373e-05,
"loss": -356302.85,
"step": 840
},
{
"epoch": 0.05,
"learning_rate": 9.477911646586346e-05,
"loss": -354939.2,
"step": 845
},
{
"epoch": 0.05,
"learning_rate": 9.53528399311532e-05,
"loss": -355997.0,
"step": 850
},
{
"epoch": 0.05,
"learning_rate": 9.592656339644292e-05,
"loss": -355250.575,
"step": 855
},
{
"epoch": 0.05,
"learning_rate": 9.650028686173265e-05,
"loss": -356108.8,
"step": 860
},
{
"epoch": 0.05,
"learning_rate": 9.707401032702239e-05,
"loss": -355865.0,
"step": 865
},
{
"epoch": 0.05,
"learning_rate": 9.764773379231212e-05,
"loss": -357137.325,
"step": 870
},
{
"epoch": 0.05,
"learning_rate": 9.822145725760184e-05,
"loss": -356673.4,
"step": 875
},
{
"epoch": 0.05,
"learning_rate": 9.879518072289157e-05,
"loss": -356866.7,
"step": 880
},
{
"epoch": 0.05,
"learning_rate": 9.93689041881813e-05,
"loss": -357192.3,
"step": 885
},
{
"epoch": 0.05,
"learning_rate": 9.994262765347103e-05,
"loss": -357552.65,
"step": 890
},
{
"epoch": 0.05,
"learning_rate": 0.00010051635111876076,
"loss": -356663.25,
"step": 895
},
{
"epoch": 0.05,
"learning_rate": 0.00010109007458405049,
"loss": -357417.25,
"step": 900
},
{
"epoch": 0.05,
"learning_rate": 0.00010166379804934023,
"loss": -355161.525,
"step": 905
},
{
"epoch": 0.05,
"learning_rate": 0.00010223752151462995,
"loss": -357030.05,
"step": 910
},
{
"epoch": 0.05,
"learning_rate": 0.00010281124497991968,
"loss": -355600.7,
"step": 915
},
{
"epoch": 0.05,
"learning_rate": 0.00010338496844520941,
"loss": -354340.6,
"step": 920
},
{
"epoch": 0.05,
"learning_rate": 0.00010395869191049913,
"loss": -355534.325,
"step": 925
},
{
"epoch": 0.05,
"learning_rate": 0.00010453241537578888,
"loss": -353909.575,
"step": 930
},
{
"epoch": 0.05,
"learning_rate": 0.0001051061388410786,
"loss": -355348.65,
"step": 935
},
{
"epoch": 0.05,
"learning_rate": 0.00010567986230636832,
"loss": -356926.1,
"step": 940
},
{
"epoch": 0.05,
"learning_rate": 0.00010625358577165807,
"loss": -354640.25,
"step": 945
},
{
"epoch": 0.05,
"learning_rate": 0.00010682730923694779,
"loss": -358615.6,
"step": 950
},
{
"epoch": 0.05,
"learning_rate": 0.00010740103270223754,
"loss": -357837.25,
"step": 955
},
{
"epoch": 0.06,
"learning_rate": 0.00010797475616752726,
"loss": -355897.675,
"step": 960
},
{
"epoch": 0.06,
"learning_rate": 0.00010854847963281698,
"loss": -356186.9,
"step": 965
},
{
"epoch": 0.06,
"learning_rate": 0.00010912220309810672,
"loss": -359630.8,
"step": 970
},
{
"epoch": 0.06,
"learning_rate": 0.00010969592656339644,
"loss": -358727.8,
"step": 975
},
{
"epoch": 0.06,
"learning_rate": 0.00011026965002868619,
"loss": -357651.55,
"step": 980
},
{
"epoch": 0.06,
"learning_rate": 0.00011084337349397591,
"loss": -355946.15,
"step": 985
},
{
"epoch": 0.06,
"learning_rate": 0.00011141709695926563,
"loss": -358857.8,
"step": 990
},
{
"epoch": 0.06,
"learning_rate": 0.00011199082042455538,
"loss": -356487.45,
"step": 995
},
{
"epoch": 0.06,
"learning_rate": 0.0001125645438898451,
"loss": -357650.75,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 0.00011313826735513482,
"loss": -355688.925,
"step": 1005
},
{
"epoch": 0.06,
"learning_rate": 0.00011371199082042456,
"loss": -357238.75,
"step": 1010
},
{
"epoch": 0.06,
"learning_rate": 0.00011428571428571428,
"loss": -357904.6,
"step": 1015
},
{
"epoch": 0.06,
"learning_rate": 0.00011485943775100403,
"loss": -357494.35,
"step": 1020
},
{
"epoch": 0.06,
"learning_rate": 0.00011543316121629375,
"loss": -359447.875,
"step": 1025
},
{
"epoch": 0.06,
"learning_rate": 0.00011600688468158347,
"loss": -360096.1,
"step": 1030
},
{
"epoch": 0.06,
"learning_rate": 0.00011658060814687322,
"loss": -358404.4,
"step": 1035
},
{
"epoch": 0.06,
"learning_rate": 0.00011715433161216294,
"loss": -357519.725,
"step": 1040
},
{
"epoch": 0.06,
"learning_rate": 0.00011772805507745268,
"loss": -358595.95,
"step": 1045
},
{
"epoch": 0.06,
"learning_rate": 0.0001183017785427424,
"loss": -358205.5,
"step": 1050
},
{
"epoch": 0.06,
"learning_rate": 0.00011887550200803212,
"loss": -357902.125,
"step": 1055
},
{
"epoch": 0.06,
"learning_rate": 0.00011944922547332187,
"loss": -355905.0,
"step": 1060
},
{
"epoch": 0.06,
"learning_rate": 0.00012002294893861159,
"loss": -358576.875,
"step": 1065
},
{
"epoch": 0.06,
"learning_rate": 0.00012059667240390134,
"loss": -357161.15,
"step": 1070
},
{
"epoch": 0.06,
"learning_rate": 0.00012117039586919106,
"loss": -356980.475,
"step": 1075
},
{
"epoch": 0.06,
"learning_rate": 0.00012174411933448078,
"loss": -358677.8,
"step": 1080
},
{
"epoch": 0.06,
"learning_rate": 0.00012231784279977053,
"loss": -361365.85,
"step": 1085
},
{
"epoch": 0.06,
"learning_rate": 0.00012289156626506023,
"loss": -358446.7,
"step": 1090
},
{
"epoch": 0.06,
"learning_rate": 0.00012346528973035,
"loss": -360241.0,
"step": 1095
},
{
"epoch": 0.06,
"learning_rate": 0.0001240390131956397,
"loss": -357614.0,
"step": 1100
},
{
"epoch": 0.06,
"learning_rate": 0.00012461273666092943,
"loss": -358462.175,
"step": 1105
},
{
"epoch": 0.06,
"learning_rate": 0.00012518646012621917,
"loss": -359095.15,
"step": 1110
},
{
"epoch": 0.06,
"learning_rate": 0.0001257601835915089,
"loss": -360547.825,
"step": 1115
},
{
"epoch": 0.06,
"learning_rate": 0.00012633390705679863,
"loss": -358881.2,
"step": 1120
},
{
"epoch": 0.06,
"learning_rate": 0.00012690763052208837,
"loss": -358529.925,
"step": 1125
},
{
"epoch": 0.06,
"learning_rate": 0.00012748135398737807,
"loss": -360481.125,
"step": 1130
},
{
"epoch": 0.07,
"learning_rate": 0.00012805507745266783,
"loss": -360687.55,
"step": 1135
},
{
"epoch": 0.07,
"learning_rate": 0.00012862880091795754,
"loss": -361122.95,
"step": 1140
},
{
"epoch": 0.07,
"learning_rate": 0.0001292025243832473,
"loss": -360700.1,
"step": 1145
},
{
"epoch": 0.07,
"learning_rate": 0.000129776247848537,
"loss": -359606.85,
"step": 1150
},
{
"epoch": 0.07,
"learning_rate": 0.00013034997131382674,
"loss": -359177.65,
"step": 1155
},
{
"epoch": 0.07,
"learning_rate": 0.00013092369477911648,
"loss": -360164.15,
"step": 1160
},
{
"epoch": 0.07,
"learning_rate": 0.0001314974182444062,
"loss": -360854.2,
"step": 1165
},
{
"epoch": 0.07,
"learning_rate": 0.00013207114170969592,
"loss": -361289.55,
"step": 1170
},
{
"epoch": 0.07,
"learning_rate": 0.00013264486517498568,
"loss": -359114.45,
"step": 1175
},
{
"epoch": 0.07,
"learning_rate": 0.00013321858864027538,
"loss": -360385.675,
"step": 1180
},
{
"epoch": 0.07,
"learning_rate": 0.00013379231210556514,
"loss": -362112.625,
"step": 1185
},
{
"epoch": 0.07,
"learning_rate": 0.00013436603557085485,
"loss": -362259.45,
"step": 1190
},
{
"epoch": 0.07,
"learning_rate": 0.00013493975903614458,
"loss": -363982.25,
"step": 1195
},
{
"epoch": 0.07,
"learning_rate": 0.00013551348250143432,
"loss": -362026.25,
"step": 1200
},
{
"epoch": 0.07,
"learning_rate": 0.00013608720596672405,
"loss": -362380.625,
"step": 1205
},
{
"epoch": 0.07,
"learning_rate": 0.00013666092943201378,
"loss": -360134.85,
"step": 1210
},
{
"epoch": 0.07,
"learning_rate": 0.00013723465289730352,
"loss": -363139.05,
"step": 1215
},
{
"epoch": 0.07,
"learning_rate": 0.00013780837636259322,
"loss": -361801.5,
"step": 1220
},
{
"epoch": 0.07,
"learning_rate": 0.00013838209982788298,
"loss": -361492.95,
"step": 1225
},
{
"epoch": 0.07,
"learning_rate": 0.0001389558232931727,
"loss": -361177.0,
"step": 1230
},
{
"epoch": 0.07,
"learning_rate": 0.00013952954675846242,
"loss": -359543.225,
"step": 1235
},
{
"epoch": 0.07,
"learning_rate": 0.00014010327022375216,
"loss": -361196.2,
"step": 1240
},
{
"epoch": 0.07,
"learning_rate": 0.0001406769936890419,
"loss": -362095.25,
"step": 1245
},
{
"epoch": 0.07,
"learning_rate": 0.00014125071715433162,
"loss": -360909.45,
"step": 1250
},
{
"epoch": 0.07,
"learning_rate": 0.00014182444061962136,
"loss": -360705.05,
"step": 1255
},
{
"epoch": 0.07,
"learning_rate": 0.0001423981640849111,
"loss": -359824.35,
"step": 1260
},
{
"epoch": 0.07,
"learning_rate": 0.0001429718875502008,
"loss": -363746.275,
"step": 1265
},
{
"epoch": 0.07,
"learning_rate": 0.00014354561101549053,
"loss": -361251.4,
"step": 1270
},
{
"epoch": 0.07,
"learning_rate": 0.00014411933448078027,
"loss": -362261.975,
"step": 1275
},
{
"epoch": 0.07,
"learning_rate": 0.00014469305794607,
"loss": -360860.95,
"step": 1280
},
{
"epoch": 0.07,
"learning_rate": 0.00014526678141135973,
"loss": -360790.65,
"step": 1285
},
{
"epoch": 0.07,
"learning_rate": 0.00014584050487664947,
"loss": -363039.325,
"step": 1290
},
{
"epoch": 0.07,
"learning_rate": 0.00014641422834193917,
"loss": -361957.125,
"step": 1295
},
{
"epoch": 0.07,
"learning_rate": 0.00014698795180722893,
"loss": -363227.75,
"step": 1300
},
{
"epoch": 0.07,
"learning_rate": 0.00014756167527251864,
"loss": -363475.55,
"step": 1305
},
{
"epoch": 0.08,
"learning_rate": 0.00014813539873780837,
"loss": -363008.35,
"step": 1310
},
{
"epoch": 0.08,
"learning_rate": 0.0001487091222030981,
"loss": -363893.85,
"step": 1315
},
{
"epoch": 0.08,
"learning_rate": 0.00014928284566838784,
"loss": -364085.125,
"step": 1320
},
{
"epoch": 0.08,
"learning_rate": 0.00014985656913367757,
"loss": -364163.2,
"step": 1325
},
{
"epoch": 0.08,
"learning_rate": 0.0001504302925989673,
"loss": -365433.5,
"step": 1330
},
{
"epoch": 0.08,
"learning_rate": 0.00015100401606425701,
"loss": -363770.375,
"step": 1335
},
{
"epoch": 0.08,
"learning_rate": 0.00015157773952954677,
"loss": -363925.7,
"step": 1340
},
{
"epoch": 0.08,
"learning_rate": 0.00015215146299483648,
"loss": -365160.825,
"step": 1345
},
{
"epoch": 0.08,
"learning_rate": 0.00015272518646012624,
"loss": -365413.325,
"step": 1350
},
{
"epoch": 0.08,
"learning_rate": 0.00015329890992541595,
"loss": -366124.675,
"step": 1355
},
{
"epoch": 0.08,
"learning_rate": 0.00015387263339070568,
"loss": -365539.125,
"step": 1360
},
{
"epoch": 0.08,
"learning_rate": 0.00015444635685599542,
"loss": -366397.4,
"step": 1365
},
{
"epoch": 0.08,
"learning_rate": 0.00015502008032128515,
"loss": -365261.925,
"step": 1370
},
{
"epoch": 0.08,
"learning_rate": 0.00015559380378657488,
"loss": -365231.175,
"step": 1375
},
{
"epoch": 0.08,
"learning_rate": 0.00015616752725186462,
"loss": -365399.5,
"step": 1380
},
{
"epoch": 0.08,
"learning_rate": 0.00015674125071715432,
"loss": -363881.425,
"step": 1385
},
{
"epoch": 0.08,
"learning_rate": 0.00015731497418244408,
"loss": -365521.1,
"step": 1390
},
{
"epoch": 0.08,
"learning_rate": 0.0001578886976477338,
"loss": -365215.9,
"step": 1395
},
{
"epoch": 0.08,
"learning_rate": 0.00015846242111302355,
"loss": -364513.7,
"step": 1400
},
{
"epoch": 0.08,
"learning_rate": 0.00015903614457831326,
"loss": -364748.6,
"step": 1405
},
{
"epoch": 0.08,
"learning_rate": 0.000159609868043603,
"loss": -363320.75,
"step": 1410
},
{
"epoch": 0.08,
"learning_rate": 0.00016018359150889272,
"loss": -363812.2,
"step": 1415
},
{
"epoch": 0.08,
"learning_rate": 0.00016075731497418246,
"loss": -364653.9,
"step": 1420
},
{
"epoch": 0.08,
"learning_rate": 0.0001613310384394722,
"loss": -364665.55,
"step": 1425
},
{
"epoch": 0.08,
"learning_rate": 0.00016190476190476192,
"loss": -365881.2,
"step": 1430
},
{
"epoch": 0.08,
"learning_rate": 0.00016247848537005163,
"loss": -366169.075,
"step": 1435
},
{
"epoch": 0.08,
"learning_rate": 0.0001630522088353414,
"loss": -365195.8,
"step": 1440
},
{
"epoch": 0.08,
"learning_rate": 0.0001636259323006311,
"loss": -362595.15,
"step": 1445
},
{
"epoch": 0.08,
"learning_rate": 0.00016419965576592083,
"loss": -362970.7,
"step": 1450
},
{
"epoch": 0.08,
"learning_rate": 0.00016477337923121056,
"loss": -364486.675,
"step": 1455
},
{
"epoch": 0.08,
"learning_rate": 0.0001653471026965003,
"loss": -366094.55,
"step": 1460
},
{
"epoch": 0.08,
"learning_rate": 0.00016592082616179003,
"loss": -366584.675,
"step": 1465
},
{
"epoch": 0.08,
"learning_rate": 0.00016649454962707977,
"loss": -365571.7,
"step": 1470
},
{
"epoch": 0.08,
"learning_rate": 0.00016706827309236947,
"loss": -366305.35,
"step": 1475
},
{
"epoch": 0.08,
"learning_rate": 0.00016764199655765923,
"loss": -364763.65,
"step": 1480
},
{
"epoch": 0.09,
"learning_rate": 0.00016821572002294894,
"loss": -366345.225,
"step": 1485
},
{
"epoch": 0.09,
"learning_rate": 0.00016878944348823867,
"loss": -365956.0,
"step": 1490
},
{
"epoch": 0.09,
"learning_rate": 0.0001693631669535284,
"loss": -365422.15,
"step": 1495
},
{
"epoch": 0.09,
"learning_rate": 0.00016993689041881814,
"loss": -366308.25,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 0.00017051061388410787,
"loss": -367875.15,
"step": 1505
},
{
"epoch": 0.09,
"learning_rate": 0.0001710843373493976,
"loss": -364890.275,
"step": 1510
},
{
"epoch": 0.09,
"learning_rate": 0.00017165806081468734,
"loss": -364779.925,
"step": 1515
},
{
"epoch": 0.09,
"learning_rate": 0.00017223178427997705,
"loss": -367256.55,
"step": 1520
},
{
"epoch": 0.09,
"learning_rate": 0.00017280550774526678,
"loss": -367925.85,
"step": 1525
},
{
"epoch": 0.09,
"learning_rate": 0.00017337923121055651,
"loss": -366563.575,
"step": 1530
},
{
"epoch": 0.09,
"learning_rate": 0.00017395295467584625,
"loss": -367666.95,
"step": 1535
},
{
"epoch": 0.09,
"learning_rate": 0.00017452667814113598,
"loss": -366986.625,
"step": 1540
},
{
"epoch": 0.09,
"learning_rate": 0.00017510040160642571,
"loss": -365646.4,
"step": 1545
},
{
"epoch": 0.09,
"learning_rate": 0.00017567412507171542,
"loss": -367983.925,
"step": 1550
},
{
"epoch": 0.09,
"learning_rate": 0.00017624784853700518,
"loss": -366123.125,
"step": 1555
},
{
"epoch": 0.09,
"learning_rate": 0.0001768215720022949,
"loss": -367173.025,
"step": 1560
},
{
"epoch": 0.09,
"learning_rate": 0.00017739529546758465,
"loss": -369332.575,
"step": 1565
},
{
"epoch": 0.09,
"learning_rate": 0.00017796901893287436,
"loss": -368102.05,
"step": 1570
},
{
"epoch": 0.09,
"learning_rate": 0.0001785427423981641,
"loss": -368694.55,
"step": 1575
},
{
"epoch": 0.09,
"learning_rate": 0.00017911646586345382,
"loss": -369101.475,
"step": 1580
},
{
"epoch": 0.09,
"learning_rate": 0.00017969018932874356,
"loss": -370433.575,
"step": 1585
},
{
"epoch": 0.09,
"learning_rate": 0.00018026391279403326,
"loss": -368229.25,
"step": 1590
},
{
"epoch": 0.09,
"learning_rate": 0.00018083763625932302,
"loss": -370543.525,
"step": 1595
},
{
"epoch": 0.09,
"learning_rate": 0.00018141135972461273,
"loss": -368325.2,
"step": 1600
},
{
"epoch": 0.09,
"learning_rate": 0.0001819850831899025,
"loss": -366829.8,
"step": 1605
},
{
"epoch": 0.09,
"learning_rate": 0.0001825588066551922,
"loss": -370263.3,
"step": 1610
},
{
"epoch": 0.09,
"learning_rate": 0.00018313253012048193,
"loss": -370699.8,
"step": 1615
},
{
"epoch": 0.09,
"learning_rate": 0.00018370625358577166,
"loss": -369021.75,
"step": 1620
},
{
"epoch": 0.09,
"learning_rate": 0.0001842799770510614,
"loss": -367038.475,
"step": 1625
},
{
"epoch": 0.09,
"learning_rate": 0.00018485370051635113,
"loss": -370615.5,
"step": 1630
},
{
"epoch": 0.09,
"learning_rate": 0.00018542742398164086,
"loss": -370063.65,
"step": 1635
},
{
"epoch": 0.09,
"learning_rate": 0.00018600114744693057,
"loss": -372375.2,
"step": 1640
},
{
"epoch": 0.09,
"learning_rate": 0.00018657487091222033,
"loss": -370148.15,
"step": 1645
},
{
"epoch": 0.09,
"learning_rate": 0.00018714859437751004,
"loss": -371129.6,
"step": 1650
},
{
"epoch": 0.09,
"learning_rate": 0.0001877223178427998,
"loss": -369848.225,
"step": 1655
},
{
"epoch": 0.1,
"learning_rate": 0.0001882960413080895,
"loss": -371611.225,
"step": 1660
},
{
"epoch": 0.1,
"learning_rate": 0.00018886976477337924,
"loss": -370429.4,
"step": 1665
},
{
"epoch": 0.1,
"learning_rate": 0.00018944348823866897,
"loss": -369488.8,
"step": 1670
},
{
"epoch": 0.1,
"learning_rate": 0.0001900172117039587,
"loss": -368976.45,
"step": 1675
},
{
"epoch": 0.1,
"learning_rate": 0.00019059093516924844,
"loss": -369330.65,
"step": 1680
},
{
"epoch": 0.1,
"learning_rate": 0.00019116465863453817,
"loss": -369349.7,
"step": 1685
},
{
"epoch": 0.1,
"learning_rate": 0.00019173838209982788,
"loss": -370086.65,
"step": 1690
},
{
"epoch": 0.1,
"learning_rate": 0.00019231210556511764,
"loss": -368275.9,
"step": 1695
},
{
"epoch": 0.1,
"learning_rate": 0.00019288582903040735,
"loss": -371039.125,
"step": 1700
},
{
"epoch": 0.1,
"learning_rate": 0.0001934595524956971,
"loss": -368649.225,
"step": 1705
},
{
"epoch": 0.1,
"learning_rate": 0.0001940332759609868,
"loss": -370923.7,
"step": 1710
},
{
"epoch": 0.1,
"learning_rate": 0.00019460699942627655,
"loss": -370107.775,
"step": 1715
},
{
"epoch": 0.1,
"learning_rate": 0.00019518072289156628,
"loss": -371011.2,
"step": 1720
},
{
"epoch": 0.1,
"learning_rate": 0.00019575444635685601,
"loss": -368382.9,
"step": 1725
},
{
"epoch": 0.1,
"learning_rate": 0.00019632816982214572,
"loss": -372018.875,
"step": 1730
},
{
"epoch": 0.1,
"learning_rate": 0.00019690189328743548,
"loss": -369914.925,
"step": 1735
},
{
"epoch": 0.1,
"learning_rate": 0.0001974756167527252,
"loss": -370497.65,
"step": 1740
},
{
"epoch": 0.1,
"learning_rate": 0.00019804934021801492,
"loss": -371605.45,
"step": 1745
},
{
"epoch": 0.1,
"learning_rate": 0.00019862306368330465,
"loss": -370938.55,
"step": 1750
},
{
"epoch": 0.1,
"learning_rate": 0.0001991967871485944,
"loss": -369546.8,
"step": 1755
},
{
"epoch": 0.1,
"learning_rate": 0.00019977051061388412,
"loss": -371349.5,
"step": 1760
},
{
"epoch": 0.1,
"learning_rate": 0.0001999999819495587,
"loss": -369888.225,
"step": 1765
},
{
"epoch": 0.1,
"learning_rate": 0.00019999987164132972,
"loss": -372648.9,
"step": 1770
},
{
"epoch": 0.1,
"learning_rate": 0.00019999966105300522,
"loss": -372354.45,
"step": 1775
},
{
"epoch": 0.1,
"learning_rate": 0.0001999993501847964,
"loss": -371826.925,
"step": 1780
},
{
"epoch": 0.1,
"learning_rate": 0.00019999893903701498,
"loss": -373579.85,
"step": 1785
},
{
"epoch": 0.1,
"learning_rate": 0.00019999842761007325,
"loss": -372518.15,
"step": 1790
},
{
"epoch": 0.1,
"learning_rate": 0.0001999978159044841,
"loss": -373327.5,
"step": 1795
},
{
"epoch": 0.1,
"learning_rate": 0.0001999971039208609,
"loss": -374031.8,
"step": 1800
},
{
"epoch": 0.1,
"learning_rate": 0.00019999629165991768,
"loss": -375497.475,
"step": 1805
},
{
"epoch": 0.1,
"learning_rate": 0.00019999537912246892,
"loss": -371809.95,
"step": 1810
},
{
"epoch": 0.1,
"learning_rate": 0.00019999436630942975,
"loss": -372979.475,
"step": 1815
},
{
"epoch": 0.1,
"learning_rate": 0.00019999325322181584,
"loss": -373760.6,
"step": 1820
},
{
"epoch": 0.1,
"learning_rate": 0.0001999920398607434,
"loss": -374876.575,
"step": 1825
},
{
"epoch": 0.1,
"learning_rate": 0.00019999072622742912,
"loss": -375073.9,
"step": 1830
},
{
"epoch": 0.11,
"learning_rate": 0.0001999893123231904,
"loss": -374234.1,
"step": 1835
},
{
"epoch": 0.11,
"learning_rate": 0.00019998779814944505,
"loss": -374232.375,
"step": 1840
},
{
"epoch": 0.11,
"learning_rate": 0.00019998618370771148,
"loss": -373467.0,
"step": 1845
},
{
"epoch": 0.11,
"learning_rate": 0.0001999844689996087,
"loss": -374076.4,
"step": 1850
},
{
"epoch": 0.11,
"learning_rate": 0.00019998265402685622,
"loss": -374997.65,
"step": 1855
},
{
"epoch": 0.11,
"learning_rate": 0.00019998073879127408,
"loss": -375139.15,
"step": 1860
},
{
"epoch": 0.11,
"learning_rate": 0.00019997872329478286,
"loss": -374068.475,
"step": 1865
},
{
"epoch": 0.11,
"learning_rate": 0.00019997660753940375,
"loss": -374126.4,
"step": 1870
},
{
"epoch": 0.11,
"learning_rate": 0.0001999743915272584,
"loss": -375399.95,
"step": 1875
},
{
"epoch": 0.11,
"learning_rate": 0.00019997207526056907,
"loss": -375400.4,
"step": 1880
},
{
"epoch": 0.11,
"learning_rate": 0.0001999696587416585,
"loss": -373046.0,
"step": 1885
},
{
"epoch": 0.11,
"learning_rate": 0.00019996714197294995,
"loss": -372806.025,
"step": 1890
},
{
"epoch": 0.11,
"learning_rate": 0.00019996452495696726,
"loss": -373136.95,
"step": 1895
},
{
"epoch": 0.11,
"learning_rate": 0.0001999618076963348,
"loss": -373399.475,
"step": 1900
},
{
"epoch": 0.11,
"learning_rate": 0.00019995899019377742,
"loss": -374226.25,
"step": 1905
},
{
"epoch": 0.11,
"learning_rate": 0.00019995607245212052,
"loss": -373977.6,
"step": 1910
},
{
"epoch": 0.11,
"learning_rate": 0.00019995305447429002,
"loss": -372141.325,
"step": 1915
},
{
"epoch": 0.11,
"learning_rate": 0.00019994993626331237,
"loss": -374759.85,
"step": 1920
},
{
"epoch": 0.11,
"learning_rate": 0.00019994671782231453,
"loss": -375066.575,
"step": 1925
},
{
"epoch": 0.11,
"learning_rate": 0.00019994339915452395,
"loss": -373136.5,
"step": 1930
},
{
"epoch": 0.11,
"learning_rate": 0.00019993998026326853,
"loss": -376071.1,
"step": 1935
},
{
"epoch": 0.11,
"learning_rate": 0.00019993646115197685,
"loss": -373615.7,
"step": 1940
},
{
"epoch": 0.11,
"learning_rate": 0.0001999328418241778,
"loss": -374433.275,
"step": 1945
},
{
"epoch": 0.11,
"learning_rate": 0.0001999291222835009,
"loss": -374596.6,
"step": 1950
},
{
"epoch": 0.11,
"learning_rate": 0.00019992530253367608,
"loss": -372932.975,
"step": 1955
},
{
"epoch": 0.11,
"learning_rate": 0.0001999213825785338,
"loss": -375415.55,
"step": 1960
},
{
"epoch": 0.11,
"learning_rate": 0.00019991736242200506,
"loss": -374878.0,
"step": 1965
},
{
"epoch": 0.11,
"learning_rate": 0.00019991324206812116,
"loss": -375141.8,
"step": 1970
},
{
"epoch": 0.11,
"learning_rate": 0.00019990902152101412,
"loss": -373080.95,
"step": 1975
},
{
"epoch": 0.11,
"learning_rate": 0.00019990470078491625,
"loss": -376766.65,
"step": 1980
},
{
"epoch": 0.11,
"learning_rate": 0.0001999002798641604,
"loss": -374474.125,
"step": 1985
},
{
"epoch": 0.11,
"learning_rate": 0.00019989575876317993,
"loss": -378217.05,
"step": 1990
},
{
"epoch": 0.11,
"learning_rate": 0.0001998911374865085,
"loss": -374596.1,
"step": 1995
},
{
"epoch": 0.11,
"learning_rate": 0.00019988641603878047,
"loss": -375783.3,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 0.0001998815944247304,
"loss": -376494.65,
"step": 2005
},
{
"epoch": 0.12,
"learning_rate": 0.0001998766726491935,
"loss": -376611.325,
"step": 2010
},
{
"epoch": 0.12,
"learning_rate": 0.00019987165071710527,
"loss": -374997.375,
"step": 2015
},
{
"epoch": 0.12,
"learning_rate": 0.0001998665286335018,
"loss": -375301.7,
"step": 2020
},
{
"epoch": 0.12,
"learning_rate": 0.00019986130640351946,
"loss": -375884.9,
"step": 2025
},
{
"epoch": 0.12,
"learning_rate": 0.00019985598403239512,
"loss": -374623.95,
"step": 2030
},
{
"epoch": 0.12,
"learning_rate": 0.00019985056152546608,
"loss": -375190.3,
"step": 2035
},
{
"epoch": 0.12,
"learning_rate": 0.00019984503888817004,
"loss": -376001.15,
"step": 2040
},
{
"epoch": 0.12,
"learning_rate": 0.0001998394161260451,
"loss": -376446.7,
"step": 2045
},
{
"epoch": 0.12,
"learning_rate": 0.00019983369324472983,
"loss": -376786.6,
"step": 2050
},
{
"epoch": 0.12,
"learning_rate": 0.00019982787024996307,
"loss": -377994.125,
"step": 2055
},
{
"epoch": 0.12,
"learning_rate": 0.00019982194714758417,
"loss": -377141.1,
"step": 2060
},
{
"epoch": 0.12,
"learning_rate": 0.00019981592394353287,
"loss": -377329.175,
"step": 2065
},
{
"epoch": 0.12,
"learning_rate": 0.00019980980064384916,
"loss": -378761.0,
"step": 2070
},
{
"epoch": 0.12,
"learning_rate": 0.0001998035772546736,
"loss": -378007.975,
"step": 2075
},
{
"epoch": 0.12,
"learning_rate": 0.00019979725378224697,
"loss": -376513.1,
"step": 2080
},
{
"epoch": 0.12,
"learning_rate": 0.00019979083023291043,
"loss": -374871.65,
"step": 2085
},
{
"epoch": 0.12,
"learning_rate": 0.0001997843066131056,
"loss": -377070.95,
"step": 2090
},
{
"epoch": 0.12,
"learning_rate": 0.00019977768292937431,
"loss": -376784.475,
"step": 2095
},
{
"epoch": 0.12,
"learning_rate": 0.00019977095918835886,
"loss": -376203.975,
"step": 2100
},
{
"epoch": 0.12,
"learning_rate": 0.00019976413539680182,
"loss": -376471.5,
"step": 2105
},
{
"epoch": 0.12,
"learning_rate": 0.00019975721156154606,
"loss": -378366.35,
"step": 2110
},
{
"epoch": 0.12,
"learning_rate": 0.0001997501876895349,
"loss": -380470.325,
"step": 2115
},
{
"epoch": 0.12,
"learning_rate": 0.0001997430637878118,
"loss": -379603.125,
"step": 2120
},
{
"epoch": 0.12,
"learning_rate": 0.0001997358398635207,
"loss": -379410.3,
"step": 2125
},
{
"epoch": 0.12,
"learning_rate": 0.00019972851592390574,
"loss": -378729.1,
"step": 2130
},
{
"epoch": 0.12,
"learning_rate": 0.00019972109197631137,
"loss": -381401.65,
"step": 2135
},
{
"epoch": 0.12,
"learning_rate": 0.00019971356802818234,
"loss": -376794.175,
"step": 2140
},
{
"epoch": 0.12,
"learning_rate": 0.00019970594408706373,
"loss": -377792.475,
"step": 2145
},
{
"epoch": 0.12,
"learning_rate": 0.0001996982201606008,
"loss": -379310.85,
"step": 2150
},
{
"epoch": 0.12,
"learning_rate": 0.00019969039625653912,
"loss": -376466.35,
"step": 2155
},
{
"epoch": 0.12,
"learning_rate": 0.00019968247238272456,
"loss": -377192.2,
"step": 2160
},
{
"epoch": 0.12,
"learning_rate": 0.0001996744485471031,
"loss": -379389.775,
"step": 2165
},
{
"epoch": 0.12,
"learning_rate": 0.0001996663247577212,
"loss": -378994.55,
"step": 2170
},
{
"epoch": 0.12,
"learning_rate": 0.0001996581010227253,
"loss": -377849.05,
"step": 2175
},
{
"epoch": 0.13,
"learning_rate": 0.00019964977735036223,
"loss": -380360.15,
"step": 2180
},
{
"epoch": 0.13,
"learning_rate": 0.000199641353748979,
"loss": -376151.325,
"step": 2185
},
{
"epoch": 0.13,
"learning_rate": 0.0001996328302270228,
"loss": -379724.675,
"step": 2190
},
{
"epoch": 0.13,
"learning_rate": 0.00019962420679304102,
"loss": -380463.875,
"step": 2195
},
{
"epoch": 0.13,
"learning_rate": 0.00019961548345568127,
"loss": -377221.05,
"step": 2200
},
{
"epoch": 0.13,
"learning_rate": 0.00019960666022369132,
"loss": -379056.475,
"step": 2205
},
{
"epoch": 0.13,
"learning_rate": 0.00019959773710591917,
"loss": -378709.625,
"step": 2210
},
{
"epoch": 0.13,
"learning_rate": 0.0001995887141113129,
"loss": -379229.1,
"step": 2215
},
{
"epoch": 0.13,
"learning_rate": 0.0001995795912489208,
"loss": -380581.875,
"step": 2220
},
{
"epoch": 0.13,
"learning_rate": 0.0001995703685278913,
"loss": -380266.475,
"step": 2225
},
{
"epoch": 0.13,
"learning_rate": 0.00019956104595747297,
"loss": -380923.05,
"step": 2230
},
{
"epoch": 0.13,
"learning_rate": 0.00019955162354701452,
"loss": -379820.1,
"step": 2235
},
{
"epoch": 0.13,
"learning_rate": 0.0001995421013059647,
"loss": -378100.25,
"step": 2240
},
{
"epoch": 0.13,
"learning_rate": 0.00019953247924387252,
"loss": -378512.35,
"step": 2245
},
{
"epoch": 0.13,
"learning_rate": 0.00019952275737038696,
"loss": -378658.975,
"step": 2250
},
{
"epoch": 0.13,
"learning_rate": 0.00019951293569525714,
"loss": -380244.1,
"step": 2255
},
{
"epoch": 0.13,
"learning_rate": 0.00019950301422833223,
"loss": -379381.4,
"step": 2260
},
{
"epoch": 0.13,
"learning_rate": 0.00019949299297956157,
"loss": -379346.25,
"step": 2265
},
{
"epoch": 0.13,
"learning_rate": 0.00019948287195899441,
"loss": -381164.475,
"step": 2270
},
{
"epoch": 0.13,
"learning_rate": 0.0001994726511767802,
"loss": -381646.4,
"step": 2275
},
{
"epoch": 0.13,
"learning_rate": 0.0001994623306431683,
"loss": -382291.45,
"step": 2280
},
{
"epoch": 0.13,
"learning_rate": 0.00019945191036850824,
"loss": -381426.8,
"step": 2285
},
{
"epoch": 0.13,
"learning_rate": 0.00019944139036324942,
"loss": -380680.5,
"step": 2290
},
{
"epoch": 0.13,
"learning_rate": 0.00019943077063794138,
"loss": -379971.85,
"step": 2295
},
{
"epoch": 0.13,
"learning_rate": 0.00019942005120323356,
"loss": -380942.95,
"step": 2300
},
{
"epoch": 0.13,
"learning_rate": 0.0001994092320698755,
"loss": -380319.025,
"step": 2305
},
{
"epoch": 0.13,
"learning_rate": 0.00019939831324871656,
"loss": -382115.65,
"step": 2310
},
{
"epoch": 0.13,
"learning_rate": 0.00019938729475070624,
"loss": -381474.275,
"step": 2315
},
{
"epoch": 0.13,
"learning_rate": 0.00019937617658689384,
"loss": -380908.375,
"step": 2320
},
{
"epoch": 0.13,
"learning_rate": 0.00019936495876842877,
"loss": -382077.0,
"step": 2325
},
{
"epoch": 0.13,
"learning_rate": 0.0001993536413065602,
"loss": -381410.025,
"step": 2330
},
{
"epoch": 0.13,
"learning_rate": 0.00019934222421263734,
"loss": -382348.975,
"step": 2335
},
{
"epoch": 0.13,
"learning_rate": 0.00019933070749810928,
"loss": -380548.25,
"step": 2340
},
{
"epoch": 0.13,
"learning_rate": 0.00019931909117452498,
"loss": -381308.7,
"step": 2345
},
{
"epoch": 0.13,
"learning_rate": 0.00019930737525353337,
"loss": -383941.775,
"step": 2350
},
{
"epoch": 0.14,
"learning_rate": 0.00019929555974688317,
"loss": -381028.575,
"step": 2355
},
{
"epoch": 0.14,
"learning_rate": 0.000199283644666423,
"loss": -381415.6,
"step": 2360
},
{
"epoch": 0.14,
"learning_rate": 0.00019927163002410133,
"loss": -380644.25,
"step": 2365
},
{
"epoch": 0.14,
"learning_rate": 0.0001992595158319664,
"loss": -379236.3,
"step": 2370
},
{
"epoch": 0.14,
"learning_rate": 0.00019924730210216652,
"loss": -383746.7,
"step": 2375
},
{
"epoch": 0.14,
"learning_rate": 0.00019923498884694948,
"loss": -383131.4,
"step": 2380
},
{
"epoch": 0.14,
"learning_rate": 0.0001992225760786631,
"loss": -383897.0,
"step": 2385
},
{
"epoch": 0.14,
"learning_rate": 0.00019921006380975496,
"loss": -382192.75,
"step": 2390
},
{
"epoch": 0.14,
"learning_rate": 0.00019919745205277236,
"loss": -384938.525,
"step": 2395
},
{
"epoch": 0.14,
"learning_rate": 0.0001991847408203624,
"loss": -382230.3,
"step": 2400
},
{
"epoch": 0.14,
"learning_rate": 0.00019917193012527194,
"loss": -383274.35,
"step": 2405
},
{
"epoch": 0.14,
"learning_rate": 0.00019915901998034756,
"loss": -382033.775,
"step": 2410
},
{
"epoch": 0.14,
"learning_rate": 0.00019914601039853558,
"loss": -384337.975,
"step": 2415
},
{
"epoch": 0.14,
"learning_rate": 0.00019913290139288208,
"loss": -383088.7,
"step": 2420
},
{
"epoch": 0.14,
"learning_rate": 0.00019911969297653275,
"loss": -384575.35,
"step": 2425
},
{
"epoch": 0.14,
"learning_rate": 0.000199106385162733,
"loss": -384732.75,
"step": 2430
},
{
"epoch": 0.14,
"learning_rate": 0.000199092977964828,
"loss": -381957.525,
"step": 2435
},
{
"epoch": 0.14,
"learning_rate": 0.00019907947139626253,
"loss": -383987.4,
"step": 2440
},
{
"epoch": 0.14,
"learning_rate": 0.00019906586547058095,
"loss": -385980.9,
"step": 2445
},
{
"epoch": 0.14,
"learning_rate": 0.00019905216020142734,
"loss": -384454.15,
"step": 2450
},
{
"epoch": 0.14,
"learning_rate": 0.00019903835560254536,
"loss": -383701.65,
"step": 2455
},
{
"epoch": 0.14,
"learning_rate": 0.0001990244516877783,
"loss": -386540.15,
"step": 2460
},
{
"epoch": 0.14,
"learning_rate": 0.00019901044847106904,
"loss": -384910.45,
"step": 2465
},
{
"epoch": 0.14,
"learning_rate": 0.00019899634596646002,
"loss": -385993.225,
"step": 2470
},
{
"epoch": 0.14,
"learning_rate": 0.0001989821441880933,
"loss": -384914.4,
"step": 2475
},
{
"epoch": 0.14,
"learning_rate": 0.0001989678431502104,
"loss": -384792.5,
"step": 2480
},
{
"epoch": 0.14,
"learning_rate": 0.0001989534428671525,
"loss": -386860.775,
"step": 2485
},
{
"epoch": 0.14,
"learning_rate": 0.00019893894335336017,
"loss": -386077.15,
"step": 2490
},
{
"epoch": 0.14,
"learning_rate": 0.0001989243446233736,
"loss": -387537.225,
"step": 2495
},
{
"epoch": 0.14,
"learning_rate": 0.0001989096466918324,
"loss": -386423.8,
"step": 2500
},
{
"epoch": 0.14,
"learning_rate": 0.00019889484957347575,
"loss": -387536.45,
"step": 2505
},
{
"epoch": 0.14,
"learning_rate": 0.00019887995328314215,
"loss": -387344.85,
"step": 2510
},
{
"epoch": 0.14,
"learning_rate": 0.0001988649578357697,
"loss": -385629.8,
"step": 2515
},
{
"epoch": 0.14,
"learning_rate": 0.0001988498632463958,
"loss": -386901.975,
"step": 2520
},
{
"epoch": 0.14,
"learning_rate": 0.0001988346695301574,
"loss": -385991.45,
"step": 2525
},
{
"epoch": 0.15,
"learning_rate": 0.00019881937670229076,
"loss": -387151.35,
"step": 2530
},
{
"epoch": 0.15,
"learning_rate": 0.00019880398477813157,
"loss": -388640.1,
"step": 2535
},
{
"epoch": 0.15,
"learning_rate": 0.0001987884937731149,
"loss": -386557.35,
"step": 2540
},
{
"epoch": 0.15,
"learning_rate": 0.00019877290370277516,
"loss": -387900.95,
"step": 2545
},
{
"epoch": 0.15,
"learning_rate": 0.0001987572145827461,
"loss": -388339.55,
"step": 2550
},
{
"epoch": 0.15,
"learning_rate": 0.0001987414264287608,
"loss": -389637.45,
"step": 2555
},
{
"epoch": 0.15,
"learning_rate": 0.00019872553925665164,
"loss": -387942.825,
"step": 2560
},
{
"epoch": 0.15,
"learning_rate": 0.00019870955308235036,
"loss": -386618.725,
"step": 2565
},
{
"epoch": 0.15,
"learning_rate": 0.00019869346792188787,
"loss": -388027.925,
"step": 2570
},
{
"epoch": 0.15,
"learning_rate": 0.0001986772837913945,
"loss": -388338.6,
"step": 2575
},
{
"epoch": 0.15,
"learning_rate": 0.00019866100070709962,
"loss": -387953.725,
"step": 2580
},
{
"epoch": 0.15,
"learning_rate": 0.00019864461868533201,
"loss": -387030.95,
"step": 2585
},
{
"epoch": 0.15,
"learning_rate": 0.0001986281377425196,
"loss": -387630.4,
"step": 2590
},
{
"epoch": 0.15,
"learning_rate": 0.00019861155789518948,
"loss": -387084.9,
"step": 2595
},
{
"epoch": 0.15,
"learning_rate": 0.00019859487915996796,
"loss": -387936.3,
"step": 2600
},
{
"epoch": 0.15,
"learning_rate": 0.00019857810155358058,
"loss": -387557.475,
"step": 2605
},
{
"epoch": 0.15,
"learning_rate": 0.0001985612250928519,
"loss": -387064.125,
"step": 2610
},
{
"epoch": 0.15,
"learning_rate": 0.00019854424979470566,
"loss": -389489.6,
"step": 2615
},
{
"epoch": 0.15,
"learning_rate": 0.00019852717567616477,
"loss": -386877.95,
"step": 2620
},
{
"epoch": 0.15,
"learning_rate": 0.00019851000275435116,
"loss": -388247.55,
"step": 2625
},
{
"epoch": 0.15,
"learning_rate": 0.00019849273104648592,
"loss": -388444.175,
"step": 2630
},
{
"epoch": 0.15,
"learning_rate": 0.00019847536056988912,
"loss": -388085.125,
"step": 2635
},
{
"epoch": 0.15,
"learning_rate": 0.0001984578913419799,
"loss": -390132.025,
"step": 2640
},
{
"epoch": 0.15,
"learning_rate": 0.00019844032338027647,
"loss": -390666.85,
"step": 2645
},
{
"epoch": 0.15,
"learning_rate": 0.000198422656702396,
"loss": -387382.45,
"step": 2650
},
{
"epoch": 0.15,
"learning_rate": 0.00019840489132605472,
"loss": -389585.45,
"step": 2655
},
{
"epoch": 0.15,
"learning_rate": 0.00019838702726906774,
"loss": -387664.7,
"step": 2660
},
{
"epoch": 0.15,
"learning_rate": 0.00019836906454934918,
"loss": -389010.9,
"step": 2665
},
{
"epoch": 0.15,
"learning_rate": 0.00019835100318491214,
"loss": -388872.45,
"step": 2670
},
{
"epoch": 0.15,
"learning_rate": 0.00019833284319386855,
"loss": -389068.725,
"step": 2675
},
{
"epoch": 0.15,
"learning_rate": 0.00019831458459442931,
"loss": -386642.95,
"step": 2680
},
{
"epoch": 0.15,
"learning_rate": 0.0001982962274049042,
"loss": -389572.8,
"step": 2685
},
{
"epoch": 0.15,
"learning_rate": 0.00019827777164370178,
"loss": -389664.675,
"step": 2690
},
{
"epoch": 0.15,
"learning_rate": 0.0001982592173293296,
"loss": -389267.95,
"step": 2695
},
{
"epoch": 0.15,
"learning_rate": 0.00019824056448039396,
"loss": -389436.8,
"step": 2700
},
{
"epoch": 0.16,
"learning_rate": 0.00019822181311559994,
"loss": -391930.425,
"step": 2705
},
{
"epoch": 0.16,
"learning_rate": 0.00019820296325375153,
"loss": -387965.325,
"step": 2710
},
{
"epoch": 0.16,
"learning_rate": 0.00019818401491375132,
"loss": -388510.625,
"step": 2715
},
{
"epoch": 0.16,
"learning_rate": 0.0001981649681146008,
"loss": -388122.625,
"step": 2720
},
{
"epoch": 0.16,
"learning_rate": 0.0001981458228754001,
"loss": -388644.0,
"step": 2725
},
{
"epoch": 0.16,
"learning_rate": 0.00019812657921534818,
"loss": -389955.125,
"step": 2730
},
{
"epoch": 0.16,
"learning_rate": 0.0001981072371537426,
"loss": -388322.6,
"step": 2735
},
{
"epoch": 0.16,
"learning_rate": 0.00019808779670997957,
"loss": -389384.75,
"step": 2740
},
{
"epoch": 0.16,
"learning_rate": 0.00019806825790355406,
"loss": -389707.55,
"step": 2745
},
{
"epoch": 0.16,
"learning_rate": 0.0001980486207540596,
"loss": -389918.25,
"step": 2750
},
{
"epoch": 0.16,
"learning_rate": 0.0001980288852811884,
"loss": -390486.3,
"step": 2755
},
{
"epoch": 0.16,
"learning_rate": 0.00019800905150473125,
"loss": -390199.275,
"step": 2760
},
{
"epoch": 0.16,
"learning_rate": 0.00019798911944457744,
"loss": -392517.55,
"step": 2765
},
{
"epoch": 0.16,
"learning_rate": 0.00019796908912071495,
"loss": -390256.425,
"step": 2770
},
{
"epoch": 0.16,
"learning_rate": 0.00019794896055323017,
"loss": -388095.225,
"step": 2775
},
{
"epoch": 0.16,
"learning_rate": 0.00019792873376230815,
"loss": -389728.6,
"step": 2780
},
{
"epoch": 0.16,
"learning_rate": 0.00019790840876823232,
"loss": -387683.15,
"step": 2785
},
{
"epoch": 0.16,
"learning_rate": 0.00019788798559138457,
"loss": -389113.1,
"step": 2790
},
{
"epoch": 0.16,
"learning_rate": 0.00019786746425224543,
"loss": -392035.95,
"step": 2795
},
{
"epoch": 0.16,
"learning_rate": 0.00019784684477139366,
"loss": -390833.95,
"step": 2800
},
{
"epoch": 0.16,
"learning_rate": 0.00019782612716950652,
"loss": -390160.825,
"step": 2805
},
{
"epoch": 0.16,
"learning_rate": 0.00019780531146735972,
"loss": -393249.15,
"step": 2810
},
{
"epoch": 0.16,
"learning_rate": 0.00019778439768582725,
"loss": -391862.85,
"step": 2815
},
{
"epoch": 0.16,
"learning_rate": 0.00019776338584588153,
"loss": -389768.2,
"step": 2820
},
{
"epoch": 0.16,
"learning_rate": 0.00019774227596859324,
"loss": -391630.1,
"step": 2825
},
{
"epoch": 0.16,
"learning_rate": 0.00019772106807513146,
"loss": -392128.275,
"step": 2830
},
{
"epoch": 0.16,
"learning_rate": 0.00019769976218676346,
"loss": -391493.35,
"step": 2835
},
{
"epoch": 0.16,
"learning_rate": 0.00019767835832485485,
"loss": -390874.9,
"step": 2840
},
{
"epoch": 0.16,
"learning_rate": 0.0001976568565108695,
"loss": -392315.375,
"step": 2845
},
{
"epoch": 0.16,
"learning_rate": 0.00019763525676636947,
"loss": -393350.75,
"step": 2850
},
{
"epoch": 0.16,
"learning_rate": 0.000197613559113015,
"loss": -392181.875,
"step": 2855
},
{
"epoch": 0.16,
"learning_rate": 0.00019759176357256455,
"loss": -392112.25,
"step": 2860
},
{
"epoch": 0.16,
"learning_rate": 0.00019756987016687476,
"loss": -393530.525,
"step": 2865
},
{
"epoch": 0.16,
"learning_rate": 0.00019754787891790039,
"loss": -392892.175,
"step": 2870
},
{
"epoch": 0.16,
"learning_rate": 0.00019752578984769432,
"loss": -391638.65,
"step": 2875
},
{
"epoch": 0.17,
"learning_rate": 0.00019750360297840746,
"loss": -390896.4,
"step": 2880
},
{
"epoch": 0.17,
"learning_rate": 0.00019748131833228892,
"loss": -393401.7,
"step": 2885
},
{
"epoch": 0.17,
"learning_rate": 0.00019745893593168577,
"loss": -391131.8,
"step": 2890
},
{
"epoch": 0.17,
"learning_rate": 0.00019743645579904312,
"loss": -394043.15,
"step": 2895
},
{
"epoch": 0.17,
"learning_rate": 0.00019741387795690408,
"loss": -391290.3,
"step": 2900
},
{
"epoch": 0.17,
"learning_rate": 0.00019739120242790982,
"loss": -392632.6,
"step": 2905
},
{
"epoch": 0.17,
"learning_rate": 0.00019736842923479935,
"loss": -393573.65,
"step": 2910
},
{
"epoch": 0.17,
"learning_rate": 0.0001973455584004097,
"loss": -392218.625,
"step": 2915
},
{
"epoch": 0.17,
"learning_rate": 0.00019732258994767583,
"loss": -392381.75,
"step": 2920
},
{
"epoch": 0.17,
"learning_rate": 0.00019729952389963053,
"loss": -395136.075,
"step": 2925
},
{
"epoch": 0.17,
"learning_rate": 0.00019727636027940445,
"loss": -396081.0,
"step": 2930
},
{
"epoch": 0.17,
"learning_rate": 0.00019725309911022617,
"loss": -394324.7,
"step": 2935
},
{
"epoch": 0.17,
"learning_rate": 0.00019722974041542203,
"loss": -394096.8,
"step": 2940
},
{
"epoch": 0.17,
"learning_rate": 0.00019720628421841617,
"loss": -395350.625,
"step": 2945
},
{
"epoch": 0.17,
"learning_rate": 0.00019718273054273051,
"loss": -393836.025,
"step": 2950
},
{
"epoch": 0.17,
"learning_rate": 0.00019715907941198477,
"loss": -393069.25,
"step": 2955
},
{
"epoch": 0.17,
"learning_rate": 0.0001971353308498963,
"loss": -393328.0,
"step": 2960
},
{
"epoch": 0.17,
"learning_rate": 0.00019711148488028025,
"loss": -391875.05,
"step": 2965
},
{
"epoch": 0.17,
"learning_rate": 0.0001970875415270494,
"loss": -393253.6,
"step": 2970
},
{
"epoch": 0.17,
"learning_rate": 0.0001970635008142142,
"loss": -393998.3,
"step": 2975
},
{
"epoch": 0.17,
"learning_rate": 0.00019703936276588272,
"loss": -392105.475,
"step": 2980
},
{
"epoch": 0.17,
"learning_rate": 0.00019701512740626068,
"loss": -393645.9,
"step": 2985
},
{
"epoch": 0.17,
"learning_rate": 0.00019699079475965132,
"loss": -395303.55,
"step": 2990
},
{
"epoch": 0.17,
"learning_rate": 0.00019696636485045548,
"loss": -395705.85,
"step": 2995
},
{
"epoch": 0.17,
"learning_rate": 0.00019694183770317154,
"loss": -394201.825,
"step": 3000
},
{
"epoch": 0.17,
"learning_rate": 0.00019691721334239536,
"loss": -395230.95,
"step": 3005
},
{
"epoch": 0.17,
"learning_rate": 0.00019689249179282033,
"loss": -394836.2,
"step": 3010
},
{
"epoch": 0.17,
"learning_rate": 0.00019686767307923727,
"loss": -393954.225,
"step": 3015
},
{
"epoch": 0.17,
"learning_rate": 0.0001968427572265344,
"loss": -396644.95,
"step": 3020
},
{
"epoch": 0.17,
"learning_rate": 0.00019681774425969745,
"loss": -395644.775,
"step": 3025
},
{
"epoch": 0.17,
"learning_rate": 0.00019679263420380942,
"loss": -395046.575,
"step": 3030
},
{
"epoch": 0.17,
"learning_rate": 0.0001967674270840508,
"loss": -394854.95,
"step": 3035
},
{
"epoch": 0.17,
"learning_rate": 0.0001967421229256993,
"loss": -394871.25,
"step": 3040
},
{
"epoch": 0.17,
"learning_rate": 0.00019671672175413,
"loss": -393875.2,
"step": 3045
},
{
"epoch": 0.17,
"learning_rate": 0.00019669122359481525,
"loss": -393479.0,
"step": 3050
},
{
"epoch": 0.18,
"learning_rate": 0.00019666562847332463,
"loss": -397136.75,
"step": 3055
},
{
"epoch": 0.18,
"learning_rate": 0.00019663993641532508,
"loss": -395903.5,
"step": 3060
},
{
"epoch": 0.18,
"learning_rate": 0.00019661414744658054,
"loss": -394798.65,
"step": 3065
},
{
"epoch": 0.18,
"learning_rate": 0.0001965882615929523,
"loss": -395297.3,
"step": 3070
},
{
"epoch": 0.18,
"learning_rate": 0.00019656227888039876,
"loss": -395166.65,
"step": 3075
},
{
"epoch": 0.18,
"learning_rate": 0.00019653619933497542,
"loss": -395804.4,
"step": 3080
},
{
"epoch": 0.18,
"learning_rate": 0.00019651002298283493,
"loss": -394364.975,
"step": 3085
},
{
"epoch": 0.18,
"learning_rate": 0.00019648374985022695,
"loss": -395611.7,
"step": 3090
},
{
"epoch": 0.18,
"learning_rate": 0.00019645737996349828,
"loss": -395336.9,
"step": 3095
},
{
"epoch": 0.18,
"learning_rate": 0.0001964309133490927,
"loss": -396879.65,
"step": 3100
},
{
"epoch": 0.18,
"learning_rate": 0.00019640435003355098,
"loss": -397369.15,
"step": 3105
},
{
"epoch": 0.18,
"learning_rate": 0.00019637769004351085,
"loss": -396093.0,
"step": 3110
},
{
"epoch": 0.18,
"learning_rate": 0.00019635093340570706,
"loss": -397766.0,
"step": 3115
},
{
"epoch": 0.18,
"learning_rate": 0.0001963240801469712,
"loss": -395342.125,
"step": 3120
},
{
"epoch": 0.18,
"learning_rate": 0.0001962971302942317,
"loss": -395822.7,
"step": 3125
},
{
"epoch": 0.18,
"learning_rate": 0.0001962700838745141,
"loss": -397181.9,
"step": 3130
},
{
"epoch": 0.18,
"learning_rate": 0.00019624294091494045,
"loss": -394394.1,
"step": 3135
},
{
"epoch": 0.18,
"learning_rate": 0.00019621570144272987,
"loss": -396456.625,
"step": 3140
},
{
"epoch": 0.18,
"learning_rate": 0.00019618836548519812,
"loss": -396242.55,
"step": 3145
},
{
"epoch": 0.18,
"learning_rate": 0.0001961609330697578,
"loss": -399480.3,
"step": 3150
},
{
"epoch": 0.18,
"learning_rate": 0.0001961334042239181,
"loss": -395399.025,
"step": 3155
},
{
"epoch": 0.18,
"learning_rate": 0.00019610577897528515,
"loss": -397448.45,
"step": 3160
},
{
"epoch": 0.18,
"learning_rate": 0.0001960780573515615,
"loss": -397776.85,
"step": 3165
},
{
"epoch": 0.18,
"learning_rate": 0.0001960502393805465,
"loss": -397287.55,
"step": 3170
},
{
"epoch": 0.18,
"learning_rate": 0.00019602232509013606,
"loss": -397851.1,
"step": 3175
},
{
"epoch": 0.18,
"learning_rate": 0.0001959943145083227,
"loss": -397603.525,
"step": 3180
},
{
"epoch": 0.18,
"learning_rate": 0.0001959662076631955,
"loss": -398141.85,
"step": 3185
},
{
"epoch": 0.18,
"learning_rate": 0.00019593800458294006,
"loss": -397223.125,
"step": 3190
},
{
"epoch": 0.18,
"learning_rate": 0.00019590970529583852,
"loss": -388728.0,
"step": 3195
},
{
"epoch": 0.18,
"learning_rate": 0.00019588130983026942,
"loss": -397655.1,
"step": 3200
},
{
"epoch": 0.18,
"learning_rate": 0.00019585281821470781,
"loss": -396993.0,
"step": 3205
},
{
"epoch": 0.18,
"learning_rate": 0.0001958242304777252,
"loss": -398863.0,
"step": 3210
},
{
"epoch": 0.18,
"learning_rate": 0.00019579554664798937,
"loss": -396665.725,
"step": 3215
},
{
"epoch": 0.18,
"learning_rate": 0.00019576676675426454,
"loss": -397439.25,
"step": 3220
},
{
"epoch": 0.19,
"learning_rate": 0.00019573789082541124,
"loss": -398167.975,
"step": 3225
},
{
"epoch": 0.19,
"learning_rate": 0.0001957089188903864,
"loss": -397248.225,
"step": 3230
},
{
"epoch": 0.19,
"learning_rate": 0.000195679850978243,
"loss": -396948.4,
"step": 3235
},
{
"epoch": 0.19,
"learning_rate": 0.00019565068711813058,
"loss": -397715.95,
"step": 3240
},
{
"epoch": 0.19,
"learning_rate": 0.00019562142733929458,
"loss": -398244.45,
"step": 3245
},
{
"epoch": 0.19,
"learning_rate": 0.00019559207167107684,
"loss": -399350.225,
"step": 3250
},
{
"epoch": 0.19,
"learning_rate": 0.00019556262014291527,
"loss": -398066.65,
"step": 3255
},
{
"epoch": 0.19,
"learning_rate": 0.0001955330727843439,
"loss": -398218.25,
"step": 3260
},
{
"epoch": 0.19,
"learning_rate": 0.00019550342962499295,
"loss": -399172.75,
"step": 3265
},
{
"epoch": 0.19,
"learning_rate": 0.00019547369069458862,
"loss": -398298.225,
"step": 3270
},
{
"epoch": 0.19,
"learning_rate": 0.00019544385602295318,
"loss": -399286.825,
"step": 3275
},
{
"epoch": 0.19,
"learning_rate": 0.00019541392564000488,
"loss": -399987.925,
"step": 3280
},
{
"epoch": 0.19,
"learning_rate": 0.00019538389957575802,
"loss": -397577.95,
"step": 3285
},
{
"epoch": 0.19,
"learning_rate": 0.00019535377786032276,
"loss": -398827.45,
"step": 3290
},
{
"epoch": 0.19,
"learning_rate": 0.00019532356052390524,
"loss": -399248.95,
"step": 3295
},
{
"epoch": 0.19,
"learning_rate": 0.0001952932475968075,
"loss": -398613.525,
"step": 3300
},
{
"epoch": 0.19,
"learning_rate": 0.00019526283910942734,
"loss": -398833.35,
"step": 3305
},
{
"epoch": 0.19,
"learning_rate": 0.00019523233509225854,
"loss": -397386.525,
"step": 3310
},
{
"epoch": 0.19,
"learning_rate": 0.00019520173557589055,
"loss": -399222.55,
"step": 3315
},
{
"epoch": 0.19,
"learning_rate": 0.00019517104059100864,
"loss": -402082.975,
"step": 3320
},
{
"epoch": 0.19,
"learning_rate": 0.00019514025016839385,
"loss": -399304.1,
"step": 3325
},
{
"epoch": 0.19,
"learning_rate": 0.0001951093643389228,
"loss": -397691.575,
"step": 3330
},
{
"epoch": 0.19,
"learning_rate": 0.00019507838313356795,
"loss": -401452.35,
"step": 3335
},
{
"epoch": 0.19,
"learning_rate": 0.0001950473065833973,
"loss": -399971.9,
"step": 3340
},
{
"epoch": 0.19,
"learning_rate": 0.00019501613471957445,
"loss": -399510.8,
"step": 3345
},
{
"epoch": 0.19,
"learning_rate": 0.00019498486757335866,
"loss": -400017.45,
"step": 3350
},
{
"epoch": 0.19,
"learning_rate": 0.00019495350517610466,
"loss": -398051.725,
"step": 3355
},
{
"epoch": 0.19,
"learning_rate": 0.00019492204755926275,
"loss": -399884.7,
"step": 3360
},
{
"epoch": 0.19,
"learning_rate": 0.00019489049475437871,
"loss": -400094.3,
"step": 3365
},
{
"epoch": 0.19,
"learning_rate": 0.00019485884679309374,
"loss": -399655.45,
"step": 3370
},
{
"epoch": 0.19,
"learning_rate": 0.0001948271037071445,
"loss": -399197.55,
"step": 3375
},
{
"epoch": 0.19,
"learning_rate": 0.00019479526552836302,
"loss": -398674.65,
"step": 3380
},
{
"epoch": 0.19,
"learning_rate": 0.0001947633322886767,
"loss": -398032.525,
"step": 3385
},
{
"epoch": 0.19,
"learning_rate": 0.00019473130402010829,
"loss": -401128.2,
"step": 3390
},
{
"epoch": 0.19,
"learning_rate": 0.00019469918075477573,
"loss": -400998.175,
"step": 3395
},
{
"epoch": 0.2,
"learning_rate": 0.0001946669625248924,
"loss": -400135.4,
"step": 3400
},
{
"epoch": 0.2,
"learning_rate": 0.00019463464936276677,
"loss": -400291.95,
"step": 3405
},
{
"epoch": 0.2,
"learning_rate": 0.00019460224130080255,
"loss": -400901.8,
"step": 3410
},
{
"epoch": 0.2,
"learning_rate": 0.00019456973837149861,
"loss": -399600.0,
"step": 3415
},
{
"epoch": 0.2,
"learning_rate": 0.00019453714060744897,
"loss": -398669.55,
"step": 3420
},
{
"epoch": 0.2,
"learning_rate": 0.00019450444804134276,
"loss": -401376.125,
"step": 3425
},
{
"epoch": 0.2,
"learning_rate": 0.0001944716607059641,
"loss": -400382.35,
"step": 3430
},
{
"epoch": 0.2,
"learning_rate": 0.00019443877863419226,
"loss": -401191.475,
"step": 3435
},
{
"epoch": 0.2,
"learning_rate": 0.00019440580185900146,
"loss": -398218.35,
"step": 3440
},
{
"epoch": 0.2,
"learning_rate": 0.00019437273041346084,
"loss": -398885.275,
"step": 3445
},
{
"epoch": 0.2,
"learning_rate": 0.00019433956433073454,
"loss": -400480.625,
"step": 3450
},
{
"epoch": 0.2,
"learning_rate": 0.00019430630364408158,
"loss": -401564.5,
"step": 3455
},
{
"epoch": 0.2,
"learning_rate": 0.0001942729483868559,
"loss": -400380.425,
"step": 3460
},
{
"epoch": 0.2,
"learning_rate": 0.00019423949859250612,
"loss": -400995.05,
"step": 3465
},
{
"epoch": 0.2,
"learning_rate": 0.00019420595429457587,
"loss": -399079.45,
"step": 3470
},
{
"epoch": 0.2,
"learning_rate": 0.0001941723155267034,
"loss": -400499.1,
"step": 3475
},
{
"epoch": 0.2,
"learning_rate": 0.00019413858232262174,
"loss": -400577.25,
"step": 3480
},
{
"epoch": 0.2,
"learning_rate": 0.00019410475471615864,
"loss": -402237.525,
"step": 3485
},
{
"epoch": 0.2,
"learning_rate": 0.00019407083274123647,
"loss": -401237.55,
"step": 3490
},
{
"epoch": 0.2,
"learning_rate": 0.0001940368164318723,
"loss": -400670.4,
"step": 3495
},
{
"epoch": 0.2,
"learning_rate": 0.00019400270582217775,
"loss": -396488.45,
"step": 3500
},
{
"epoch": 0.2,
"learning_rate": 0.00019396850094635903,
"loss": -399970.975,
"step": 3505
},
{
"epoch": 0.2,
"learning_rate": 0.00019393420183871682,
"loss": -398315.575,
"step": 3510
},
{
"epoch": 0.2,
"learning_rate": 0.0001938998085336464,
"loss": -399823.95,
"step": 3515
},
{
"epoch": 0.2,
"learning_rate": 0.00019386532106563736,
"loss": -400712.55,
"step": 3520
},
{
"epoch": 0.2,
"learning_rate": 0.00019383073946927396,
"loss": -399312.4,
"step": 3525
},
{
"epoch": 0.2,
"learning_rate": 0.00019379606377923456,
"loss": -398362.975,
"step": 3530
},
{
"epoch": 0.2,
"learning_rate": 0.0001937612940302921,
"loss": -400884.1,
"step": 3535
},
{
"epoch": 0.2,
"learning_rate": 0.0001937264302573137,
"loss": -399776.4,
"step": 3540
},
{
"epoch": 0.2,
"learning_rate": 0.0001936914724952609,
"loss": -401188.525,
"step": 3545
},
{
"epoch": 0.2,
"learning_rate": 0.00019365642077918938,
"loss": -401755.075,
"step": 3550
},
{
"epoch": 0.2,
"learning_rate": 0.0001936212751442491,
"loss": -400210.55,
"step": 3555
},
{
"epoch": 0.2,
"learning_rate": 0.00019358603562568416,
"loss": -399624.95,
"step": 3560
},
{
"epoch": 0.2,
"learning_rate": 0.00019355070225883286,
"loss": -401644.9,
"step": 3565
},
{
"epoch": 0.2,
"learning_rate": 0.00019351527507912753,
"loss": -400682.0,
"step": 3570
},
{
"epoch": 0.21,
"learning_rate": 0.00019347975412209464,
"loss": -401702.8,
"step": 3575
},
{
"epoch": 0.21,
"learning_rate": 0.00019344413942335472,
"loss": -400528.5,
"step": 3580
},
{
"epoch": 0.21,
"learning_rate": 0.00019340843101862227,
"loss": -399457.5,
"step": 3585
},
{
"epoch": 0.21,
"learning_rate": 0.00019337262894370571,
"loss": -401715.05,
"step": 3590
},
{
"epoch": 0.21,
"learning_rate": 0.0001933367332345075,
"loss": -400080.725,
"step": 3595
},
{
"epoch": 0.21,
"learning_rate": 0.0001933007439270239,
"loss": -401478.75,
"step": 3600
},
{
"epoch": 0.21,
"learning_rate": 0.00019326466105734502,
"loss": -402403.775,
"step": 3605
},
{
"epoch": 0.21,
"learning_rate": 0.00019322848466165495,
"loss": -401417.75,
"step": 3610
},
{
"epoch": 0.21,
"learning_rate": 0.0001931922147762314,
"loss": -400876.6,
"step": 3615
},
{
"epoch": 0.21,
"learning_rate": 0.0001931558514374459,
"loss": -400685.55,
"step": 3620
},
{
"epoch": 0.21,
"learning_rate": 0.00019311939468176368,
"loss": -403060.075,
"step": 3625
},
{
"epoch": 0.21,
"learning_rate": 0.00019308284454574363,
"loss": -403845.575,
"step": 3630
},
{
"epoch": 0.21,
"learning_rate": 0.00019304620106603838,
"loss": -400785.5,
"step": 3635
},
{
"epoch": 0.21,
"learning_rate": 0.00019300946427939407,
"loss": -400253.05,
"step": 3640
},
{
"epoch": 0.21,
"learning_rate": 0.00019297263422265039,
"loss": -403627.075,
"step": 3645
},
{
"epoch": 0.21,
"learning_rate": 0.00019293571093274066,
"loss": -403050.35,
"step": 3650
},
{
"epoch": 0.21,
"learning_rate": 0.0001928986944466916,
"loss": -401369.1,
"step": 3655
},
{
"epoch": 0.21,
"learning_rate": 0.0001928615848016234,
"loss": -403266.25,
"step": 3660
},
{
"epoch": 0.21,
"learning_rate": 0.00019282438203474978,
"loss": -403239.3,
"step": 3665
},
{
"epoch": 0.21,
"learning_rate": 0.00019278708618337766,
"loss": -403675.45,
"step": 3670
},
{
"epoch": 0.21,
"learning_rate": 0.00019274969728490747,
"loss": -402626.525,
"step": 3675
},
{
"epoch": 0.21,
"learning_rate": 0.00019271221537683287,
"loss": -401520.5,
"step": 3680
},
{
"epoch": 0.21,
"learning_rate": 0.0001926746404967408,
"loss": -403857.225,
"step": 3685
},
{
"epoch": 0.21,
"learning_rate": 0.00019263697268231142,
"loss": -403731.9,
"step": 3690
},
{
"epoch": 0.21,
"learning_rate": 0.0001925992119713181,
"loss": -400502.8,
"step": 3695
},
{
"epoch": 0.21,
"learning_rate": 0.0001925613584016274,
"loss": -401636.4,
"step": 3700
},
{
"epoch": 0.05,
"step": 3701,
"total_flos": 2.617011781310087e+18,
"train_loss": 0.0,
"train_runtime": 10.3365,
"train_samples_per_second": 0.193,
"train_steps_per_second": 0.097
}
],
"logging_steps": 5,
"max_steps": 1,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 2.617011781310087e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}