{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05308909385623915, "eval_steps": 500, "global_step": 3701, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.0, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.0, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.2948938611589213e-07, "loss": 0.0, "step": 15 }, { "epoch": 0.0, "learning_rate": 6.884681583476765e-07, "loss": -3.4891, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.2621916236374069e-06, "loss": -49.4445, "step": 25 }, { "epoch": 0.0, "learning_rate": 1.721170395869191e-06, "loss": -195.5057, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.2948938611589215e-06, "loss": -497.6609, "step": 35 }, { "epoch": 0.0, "learning_rate": 2.868617326448652e-06, "loss": -1031.3963, "step": 40 }, { "epoch": 0.0, "learning_rate": 3.442340791738382e-06, "loss": -1685.1105, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.016064257028113e-06, "loss": -2646.2359, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.589787722317843e-06, "loss": -3794.2527, "step": 55 }, { "epoch": 0.0, "learning_rate": 5.163511187607573e-06, "loss": -5546.7516, "step": 60 }, { "epoch": 0.0, "learning_rate": 5.737234652897304e-06, "loss": -7180.4102, "step": 65 }, { "epoch": 0.0, "learning_rate": 6.310958118187034e-06, "loss": -10111.5016, "step": 70 }, { "epoch": 0.0, "learning_rate": 6.884681583476764e-06, "loss": -12410.3125, "step": 75 }, { "epoch": 0.0, "learning_rate": 7.4584050487664955e-06, "loss": -16288.9766, "step": 80 }, { "epoch": 0.0, "learning_rate": 8.032128514056226e-06, "loss": -19432.7328, "step": 85 }, { "epoch": 0.01, "learning_rate": 8.605851979345956e-06, "loss": -23566.2641, "step": 90 }, { "epoch": 0.01, "learning_rate": 9.179575444635686e-06, "loss": -28124.4, "step": 95 }, { "epoch": 0.01, "learning_rate": 9.753298909925416e-06, "loss": -33192.5531, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.0327022375215146e-05, "loss": -40076.6687, "step": 105 }, { "epoch": 0.01, "learning_rate": 1.0900745840504876e-05, "loss": -46810.9156, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.1474469305794608e-05, "loss": -51101.25, "step": 115 }, { "epoch": 0.01, "learning_rate": 1.2048192771084338e-05, "loss": -56708.5563, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.2621916236374069e-05, "loss": -59233.925, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.3195639701663797e-05, "loss": -64193.0687, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.3769363166953527e-05, "loss": -72945.3625, "step": 135 }, { "epoch": 0.01, "learning_rate": 1.434308663224326e-05, "loss": -87274.1375, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.4916810097532991e-05, "loss": -106688.7375, "step": 145 }, { "epoch": 0.01, "learning_rate": 1.549053356282272e-05, "loss": -126941.775, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.606425702811245e-05, "loss": -147417.55, "step": 155 }, { "epoch": 0.01, "learning_rate": 1.663798049340218e-05, "loss": -173031.2875, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.721170395869191e-05, "loss": -197558.775, "step": 165 }, { "epoch": 0.01, "learning_rate": 1.7785427423981642e-05, "loss": -223929.7, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.8244406196213425e-05, "loss": -241816.85, "step": 175 }, { "epoch": 0.01, "learning_rate": 1.8703384968445207e-05, "loss": -249209.5, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.9162363740676993e-05, "loss": -260632.2, "step": 185 }, { "epoch": 0.01, "learning_rate": 1.9736087205966724e-05, "loss": -274344.5, "step": 190 }, { "epoch": 0.01, "learning_rate": 2.0309810671256454e-05, "loss": -283097.8, "step": 195 }, { "epoch": 0.01, "learning_rate": 2.0883534136546184e-05, "loss": -291180.1, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.1457257601835918e-05, "loss": -296606.175, "step": 205 }, { "epoch": 0.01, "learning_rate": 2.2030981067125648e-05, "loss": -297583.2, "step": 210 }, { "epoch": 0.01, "learning_rate": 2.2604704532415378e-05, "loss": -304103.9, "step": 215 }, { "epoch": 0.01, "learning_rate": 2.3178427997705108e-05, "loss": -305754.625, "step": 220 }, { "epoch": 0.01, "learning_rate": 2.3752151462994838e-05, "loss": -308790.725, "step": 225 }, { "epoch": 0.01, "learning_rate": 2.432587492828457e-05, "loss": -311960.7, "step": 230 }, { "epoch": 0.01, "learning_rate": 2.48995983935743e-05, "loss": -313037.675, "step": 235 }, { "epoch": 0.01, "learning_rate": 2.547332185886403e-05, "loss": -315313.825, "step": 240 }, { "epoch": 0.01, "learning_rate": 2.6047045324153755e-05, "loss": -316755.775, "step": 245 }, { "epoch": 0.01, "learning_rate": 2.662076878944349e-05, "loss": -317376.0, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.7194492254733223e-05, "loss": -319665.85, "step": 255 }, { "epoch": 0.01, "learning_rate": 2.776821572002295e-05, "loss": -318471.65, "step": 260 }, { "epoch": 0.02, "learning_rate": 2.8341939185312683e-05, "loss": -318968.55, "step": 265 }, { "epoch": 0.02, "learning_rate": 2.891566265060241e-05, "loss": -320207.2, "step": 270 }, { "epoch": 0.02, "learning_rate": 2.9489386115892143e-05, "loss": -322860.7, "step": 275 }, { "epoch": 0.02, "learning_rate": 3.006310958118187e-05, "loss": -322976.675, "step": 280 }, { "epoch": 0.02, "learning_rate": 3.0636833046471604e-05, "loss": -323030.4, "step": 285 }, { "epoch": 0.02, "learning_rate": 3.121055651176134e-05, "loss": -325049.75, "step": 290 }, { "epoch": 0.02, "learning_rate": 3.1784279977051064e-05, "loss": -323279.2, "step": 295 }, { "epoch": 0.02, "learning_rate": 3.23580034423408e-05, "loss": -326394.3, "step": 300 }, { "epoch": 0.02, "learning_rate": 3.2931726907630524e-05, "loss": -327546.45, "step": 305 }, { "epoch": 0.02, "learning_rate": 3.350545037292026e-05, "loss": -326381.95, "step": 310 }, { "epoch": 0.02, "learning_rate": 3.4079173838209984e-05, "loss": -329224.875, "step": 315 }, { "epoch": 0.02, "learning_rate": 3.465289730349972e-05, "loss": -327763.2, "step": 320 }, { "epoch": 0.02, "learning_rate": 3.5226620768789445e-05, "loss": -327674.775, "step": 325 }, { "epoch": 0.02, "learning_rate": 3.580034423407918e-05, "loss": -328802.05, "step": 330 }, { "epoch": 0.02, "learning_rate": 3.6374067699368905e-05, "loss": -331079.025, "step": 335 }, { "epoch": 0.02, "learning_rate": 3.694779116465863e-05, "loss": -329059.075, "step": 340 }, { "epoch": 0.02, "learning_rate": 3.7521514629948365e-05, "loss": -329749.675, "step": 345 }, { "epoch": 0.02, "learning_rate": 3.809523809523809e-05, "loss": -331523.05, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.8668961560527826e-05, "loss": -332232.7, "step": 355 }, { "epoch": 0.02, "learning_rate": 3.924268502581756e-05, "loss": -332449.35, "step": 360 }, { "epoch": 0.02, "learning_rate": 3.9816408491107286e-05, "loss": -334742.55, "step": 365 }, { "epoch": 0.02, "learning_rate": 4.039013195639702e-05, "loss": -333736.15, "step": 370 }, { "epoch": 0.02, "learning_rate": 4.0963855421686746e-05, "loss": -333792.225, "step": 375 }, { "epoch": 0.02, "learning_rate": 4.153757888697648e-05, "loss": -335570.45, "step": 380 }, { "epoch": 0.02, "learning_rate": 4.211130235226621e-05, "loss": -333872.35, "step": 385 }, { "epoch": 0.02, "learning_rate": 4.268502581755594e-05, "loss": -337072.675, "step": 390 }, { "epoch": 0.02, "learning_rate": 4.3258749282845674e-05, "loss": -333726.425, "step": 395 }, { "epoch": 0.02, "learning_rate": 4.38324727481354e-05, "loss": -336459.75, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.4406196213425134e-05, "loss": -336860.1, "step": 405 }, { "epoch": 0.02, "learning_rate": 4.497991967871486e-05, "loss": -335018.575, "step": 410 }, { "epoch": 0.02, "learning_rate": 4.543889845094665e-05, "loss": -338024.925, "step": 415 }, { "epoch": 0.02, "learning_rate": 4.601262191623638e-05, "loss": -336729.825, "step": 420 }, { "epoch": 0.02, "learning_rate": 4.658634538152611e-05, "loss": -337681.2, "step": 425 }, { "epoch": 0.02, "learning_rate": 4.716006884681584e-05, "loss": -339095.5, "step": 430 }, { "epoch": 0.02, "learning_rate": 4.773379231210557e-05, "loss": -339637.8, "step": 435 }, { "epoch": 0.03, "learning_rate": 4.83075157773953e-05, "loss": -340333.75, "step": 440 }, { "epoch": 0.03, "learning_rate": 4.888123924268503e-05, "loss": -340316.65, "step": 445 }, { "epoch": 0.03, "learning_rate": 4.945496270797476e-05, "loss": -339421.7, "step": 450 }, { "epoch": 0.03, "learning_rate": 5.002868617326449e-05, "loss": -338028.95, "step": 455 }, { "epoch": 0.03, "learning_rate": 5.060240963855422e-05, "loss": -342042.3, "step": 460 }, { "epoch": 0.03, "learning_rate": 5.1176133103843945e-05, "loss": -341357.9, "step": 465 }, { "epoch": 0.03, "learning_rate": 5.174985656913368e-05, "loss": -341011.45, "step": 470 }, { "epoch": 0.03, "learning_rate": 5.232358003442341e-05, "loss": -340367.0, "step": 475 }, { "epoch": 0.03, "learning_rate": 5.289730349971313e-05, "loss": -341737.35, "step": 480 }, { "epoch": 0.03, "learning_rate": 5.3471026965002866e-05, "loss": -340334.725, "step": 485 }, { "epoch": 0.03, "learning_rate": 5.40447504302926e-05, "loss": -341142.3, "step": 490 }, { "epoch": 0.03, "learning_rate": 5.461847389558233e-05, "loss": -342525.2, "step": 495 }, { "epoch": 0.03, "learning_rate": 5.519219736087207e-05, "loss": -344089.7, "step": 500 }, { "epoch": 0.03, "learning_rate": 5.576592082616179e-05, "loss": -343456.65, "step": 505 }, { "epoch": 0.03, "learning_rate": 5.633964429145152e-05, "loss": -345602.1, "step": 510 }, { "epoch": 0.03, "learning_rate": 5.6913367756741254e-05, "loss": -343630.25, "step": 515 }, { "epoch": 0.03, "learning_rate": 5.748709122203099e-05, "loss": -342957.575, "step": 520 }, { "epoch": 0.03, "learning_rate": 5.806081468732072e-05, "loss": -343699.775, "step": 525 }, { "epoch": 0.03, "learning_rate": 5.863453815261044e-05, "loss": -345816.2, "step": 530 }, { "epoch": 0.03, "learning_rate": 5.9208261617900174e-05, "loss": -343947.55, "step": 535 }, { "epoch": 0.03, "learning_rate": 5.978198508318991e-05, "loss": -346215.525, "step": 540 }, { "epoch": 0.03, "learning_rate": 6.035570854847964e-05, "loss": -345165.4, "step": 545 }, { "epoch": 0.03, "learning_rate": 6.092943201376936e-05, "loss": -344990.8, "step": 550 }, { "epoch": 0.03, "learning_rate": 6.15031554790591e-05, "loss": -347143.95, "step": 555 }, { "epoch": 0.03, "learning_rate": 6.207687894434883e-05, "loss": -346346.4, "step": 560 }, { "epoch": 0.03, "learning_rate": 6.265060240963856e-05, "loss": -344992.35, "step": 565 }, { "epoch": 0.03, "learning_rate": 6.32243258749283e-05, "loss": -346478.0, "step": 570 }, { "epoch": 0.03, "learning_rate": 6.379804934021802e-05, "loss": -347404.6, "step": 575 }, { "epoch": 0.03, "learning_rate": 6.437177280550775e-05, "loss": -347950.25, "step": 580 }, { "epoch": 0.03, "learning_rate": 6.494549627079748e-05, "loss": -346535.225, "step": 585 }, { "epoch": 0.03, "learning_rate": 6.551921973608722e-05, "loss": -344840.4, "step": 590 }, { "epoch": 0.03, "learning_rate": 6.609294320137695e-05, "loss": -347641.45, "step": 595 }, { "epoch": 0.03, "learning_rate": 6.666666666666667e-05, "loss": -347243.75, "step": 600 }, { "epoch": 0.03, "learning_rate": 6.72403901319564e-05, "loss": -344839.0, "step": 605 }, { "epoch": 0.03, "learning_rate": 6.781411359724614e-05, "loss": -346694.6, "step": 610 }, { "epoch": 0.04, "learning_rate": 6.838783706253587e-05, "loss": -346482.1, "step": 615 }, { "epoch": 0.04, "learning_rate": 6.896156052782559e-05, "loss": -346953.025, "step": 620 }, { "epoch": 0.04, "learning_rate": 6.953528399311532e-05, "loss": -346146.7, "step": 625 }, { "epoch": 0.04, "learning_rate": 7.010900745840506e-05, "loss": -348154.8, "step": 630 }, { "epoch": 0.04, "learning_rate": 7.068273092369478e-05, "loss": -348350.55, "step": 635 }, { "epoch": 0.04, "learning_rate": 7.125645438898451e-05, "loss": -349716.075, "step": 640 }, { "epoch": 0.04, "learning_rate": 7.183017785427424e-05, "loss": -347439.1, "step": 645 }, { "epoch": 0.04, "learning_rate": 7.240390131956398e-05, "loss": -348563.45, "step": 650 }, { "epoch": 0.04, "learning_rate": 7.29776247848537e-05, "loss": -348391.825, "step": 655 }, { "epoch": 0.04, "learning_rate": 7.355134825014343e-05, "loss": -350483.05, "step": 660 }, { "epoch": 0.04, "learning_rate": 7.412507171543317e-05, "loss": -350731.125, "step": 665 }, { "epoch": 0.04, "learning_rate": 7.469879518072289e-05, "loss": -349929.325, "step": 670 }, { "epoch": 0.04, "learning_rate": 7.527251864601262e-05, "loss": -351055.2, "step": 675 }, { "epoch": 0.04, "learning_rate": 7.584624211130235e-05, "loss": -350916.6, "step": 680 }, { "epoch": 0.04, "learning_rate": 7.641996557659209e-05, "loss": -350173.8, "step": 685 }, { "epoch": 0.04, "learning_rate": 7.69936890418818e-05, "loss": -351153.0, "step": 690 }, { "epoch": 0.04, "learning_rate": 7.756741250717154e-05, "loss": -349882.6, "step": 695 }, { "epoch": 0.04, "learning_rate": 7.814113597246127e-05, "loss": -352142.4, "step": 700 }, { "epoch": 0.04, "learning_rate": 7.8714859437751e-05, "loss": -350261.2, "step": 705 }, { "epoch": 0.04, "learning_rate": 7.928858290304074e-05, "loss": -350545.325, "step": 710 }, { "epoch": 0.04, "learning_rate": 7.986230636833046e-05, "loss": -351217.85, "step": 715 }, { "epoch": 0.04, "learning_rate": 8.04360298336202e-05, "loss": -351533.35, "step": 720 }, { "epoch": 0.04, "learning_rate": 8.100975329890993e-05, "loss": -350845.225, "step": 725 }, { "epoch": 0.04, "learning_rate": 8.158347676419966e-05, "loss": -352349.6, "step": 730 }, { "epoch": 0.04, "learning_rate": 8.21572002294894e-05, "loss": -351294.8, "step": 735 }, { "epoch": 0.04, "learning_rate": 8.273092369477911e-05, "loss": -351867.35, "step": 740 }, { "epoch": 0.04, "learning_rate": 8.330464716006885e-05, "loss": -351626.4, "step": 745 }, { "epoch": 0.04, "learning_rate": 8.387837062535858e-05, "loss": -353080.1, "step": 750 }, { "epoch": 0.04, "learning_rate": 8.445209409064832e-05, "loss": -352073.8, "step": 755 }, { "epoch": 0.04, "learning_rate": 8.502581755593804e-05, "loss": -351819.75, "step": 760 }, { "epoch": 0.04, "learning_rate": 8.559954102122777e-05, "loss": -352359.45, "step": 765 }, { "epoch": 0.04, "learning_rate": 8.61732644865175e-05, "loss": -354546.0, "step": 770 }, { "epoch": 0.04, "learning_rate": 8.674698795180724e-05, "loss": -353960.15, "step": 775 }, { "epoch": 0.04, "learning_rate": 8.732071141709697e-05, "loss": -353622.075, "step": 780 }, { "epoch": 0.05, "learning_rate": 8.789443488238669e-05, "loss": -352742.6, "step": 785 }, { "epoch": 0.05, "learning_rate": 8.846815834767642e-05, "loss": -353329.025, "step": 790 }, { "epoch": 0.05, "learning_rate": 8.904188181296616e-05, "loss": -353635.4, "step": 795 }, { "epoch": 0.05, "learning_rate": 8.961560527825589e-05, "loss": -352711.4, "step": 800 }, { "epoch": 0.05, "learning_rate": 9.018932874354562e-05, "loss": -353263.5, "step": 805 }, { "epoch": 0.05, "learning_rate": 9.076305220883534e-05, "loss": -354502.3, "step": 810 }, { "epoch": 0.05, "learning_rate": 9.133677567412508e-05, "loss": -353995.45, "step": 815 }, { "epoch": 0.05, "learning_rate": 9.191049913941481e-05, "loss": -353419.0, "step": 820 }, { "epoch": 0.05, "learning_rate": 9.248422260470454e-05, "loss": -352814.725, "step": 825 }, { "epoch": 0.05, "learning_rate": 9.305794606999426e-05, "loss": -353624.425, "step": 830 }, { "epoch": 0.05, "learning_rate": 9.3631669535284e-05, "loss": -354307.7, "step": 835 }, { "epoch": 0.05, "learning_rate": 9.420539300057373e-05, "loss": -356302.85, "step": 840 }, { "epoch": 0.05, "learning_rate": 9.477911646586346e-05, "loss": -354939.2, "step": 845 }, { "epoch": 0.05, "learning_rate": 9.53528399311532e-05, "loss": -355997.0, "step": 850 }, { "epoch": 0.05, "learning_rate": 9.592656339644292e-05, "loss": -355250.575, "step": 855 }, { "epoch": 0.05, "learning_rate": 9.650028686173265e-05, "loss": -356108.8, "step": 860 }, { "epoch": 0.05, "learning_rate": 9.707401032702239e-05, "loss": -355865.0, "step": 865 }, { "epoch": 0.05, "learning_rate": 9.764773379231212e-05, "loss": -357137.325, "step": 870 }, { "epoch": 0.05, "learning_rate": 9.822145725760184e-05, "loss": -356673.4, "step": 875 }, { "epoch": 0.05, "learning_rate": 9.879518072289157e-05, "loss": -356866.7, "step": 880 }, { "epoch": 0.05, "learning_rate": 9.93689041881813e-05, "loss": -357192.3, "step": 885 }, { "epoch": 0.05, "learning_rate": 9.994262765347103e-05, "loss": -357552.65, "step": 890 }, { "epoch": 0.05, "learning_rate": 0.00010051635111876076, "loss": -356663.25, "step": 895 }, { "epoch": 0.05, "learning_rate": 0.00010109007458405049, "loss": -357417.25, "step": 900 }, { "epoch": 0.05, "learning_rate": 0.00010166379804934023, "loss": -355161.525, "step": 905 }, { "epoch": 0.05, "learning_rate": 0.00010223752151462995, "loss": -357030.05, "step": 910 }, { "epoch": 0.05, "learning_rate": 0.00010281124497991968, "loss": -355600.7, "step": 915 }, { "epoch": 0.05, "learning_rate": 0.00010338496844520941, "loss": -354340.6, "step": 920 }, { "epoch": 0.05, "learning_rate": 0.00010395869191049913, "loss": -355534.325, "step": 925 }, { "epoch": 0.05, "learning_rate": 0.00010453241537578888, "loss": -353909.575, "step": 930 }, { "epoch": 0.05, "learning_rate": 0.0001051061388410786, "loss": -355348.65, "step": 935 }, { "epoch": 0.05, "learning_rate": 0.00010567986230636832, "loss": -356926.1, "step": 940 }, { "epoch": 0.05, "learning_rate": 0.00010625358577165807, "loss": -354640.25, "step": 945 }, { "epoch": 0.05, "learning_rate": 0.00010682730923694779, "loss": -358615.6, "step": 950 }, { "epoch": 0.05, "learning_rate": 0.00010740103270223754, "loss": -357837.25, "step": 955 }, { "epoch": 0.06, "learning_rate": 0.00010797475616752726, "loss": -355897.675, "step": 960 }, { "epoch": 0.06, "learning_rate": 0.00010854847963281698, "loss": -356186.9, "step": 965 }, { "epoch": 0.06, "learning_rate": 0.00010912220309810672, "loss": -359630.8, "step": 970 }, { "epoch": 0.06, "learning_rate": 0.00010969592656339644, "loss": -358727.8, "step": 975 }, { "epoch": 0.06, "learning_rate": 0.00011026965002868619, "loss": -357651.55, "step": 980 }, { "epoch": 0.06, "learning_rate": 0.00011084337349397591, "loss": -355946.15, "step": 985 }, { "epoch": 0.06, "learning_rate": 0.00011141709695926563, "loss": -358857.8, "step": 990 }, { "epoch": 0.06, "learning_rate": 0.00011199082042455538, "loss": -356487.45, "step": 995 }, { "epoch": 0.06, "learning_rate": 0.0001125645438898451, "loss": -357650.75, "step": 1000 }, { "epoch": 0.06, "learning_rate": 0.00011313826735513482, "loss": -355688.925, "step": 1005 }, { "epoch": 0.06, "learning_rate": 0.00011371199082042456, "loss": -357238.75, "step": 1010 }, { "epoch": 0.06, "learning_rate": 0.00011428571428571428, "loss": -357904.6, "step": 1015 }, { "epoch": 0.06, "learning_rate": 0.00011485943775100403, "loss": -357494.35, "step": 1020 }, { "epoch": 0.06, "learning_rate": 0.00011543316121629375, "loss": -359447.875, "step": 1025 }, { "epoch": 0.06, "learning_rate": 0.00011600688468158347, "loss": -360096.1, "step": 1030 }, { "epoch": 0.06, "learning_rate": 0.00011658060814687322, "loss": -358404.4, "step": 1035 }, { "epoch": 0.06, "learning_rate": 0.00011715433161216294, "loss": -357519.725, "step": 1040 }, { "epoch": 0.06, "learning_rate": 0.00011772805507745268, "loss": -358595.95, "step": 1045 }, { "epoch": 0.06, "learning_rate": 0.0001183017785427424, "loss": -358205.5, "step": 1050 }, { "epoch": 0.06, "learning_rate": 0.00011887550200803212, "loss": -357902.125, "step": 1055 }, { "epoch": 0.06, "learning_rate": 0.00011944922547332187, "loss": -355905.0, "step": 1060 }, { "epoch": 0.06, "learning_rate": 0.00012002294893861159, "loss": -358576.875, "step": 1065 }, { "epoch": 0.06, "learning_rate": 0.00012059667240390134, "loss": -357161.15, "step": 1070 }, { "epoch": 0.06, "learning_rate": 0.00012117039586919106, "loss": -356980.475, "step": 1075 }, { "epoch": 0.06, "learning_rate": 0.00012174411933448078, "loss": -358677.8, "step": 1080 }, { "epoch": 0.06, "learning_rate": 0.00012231784279977053, "loss": -361365.85, "step": 1085 }, { "epoch": 0.06, "learning_rate": 0.00012289156626506023, "loss": -358446.7, "step": 1090 }, { "epoch": 0.06, "learning_rate": 0.00012346528973035, "loss": -360241.0, "step": 1095 }, { "epoch": 0.06, "learning_rate": 0.0001240390131956397, "loss": -357614.0, "step": 1100 }, { "epoch": 0.06, "learning_rate": 0.00012461273666092943, "loss": -358462.175, "step": 1105 }, { "epoch": 0.06, "learning_rate": 0.00012518646012621917, "loss": -359095.15, "step": 1110 }, { "epoch": 0.06, "learning_rate": 0.0001257601835915089, "loss": -360547.825, "step": 1115 }, { "epoch": 0.06, "learning_rate": 0.00012633390705679863, "loss": -358881.2, "step": 1120 }, { "epoch": 0.06, "learning_rate": 0.00012690763052208837, "loss": -358529.925, "step": 1125 }, { "epoch": 0.06, "learning_rate": 0.00012748135398737807, "loss": -360481.125, "step": 1130 }, { "epoch": 0.07, "learning_rate": 0.00012805507745266783, "loss": -360687.55, "step": 1135 }, { "epoch": 0.07, "learning_rate": 0.00012862880091795754, "loss": -361122.95, "step": 1140 }, { "epoch": 0.07, "learning_rate": 0.0001292025243832473, "loss": -360700.1, "step": 1145 }, { "epoch": 0.07, "learning_rate": 0.000129776247848537, "loss": -359606.85, "step": 1150 }, { "epoch": 0.07, "learning_rate": 0.00013034997131382674, "loss": -359177.65, "step": 1155 }, { "epoch": 0.07, "learning_rate": 0.00013092369477911648, "loss": -360164.15, "step": 1160 }, { "epoch": 0.07, "learning_rate": 0.0001314974182444062, "loss": -360854.2, "step": 1165 }, { "epoch": 0.07, "learning_rate": 0.00013207114170969592, "loss": -361289.55, "step": 1170 }, { "epoch": 0.07, "learning_rate": 0.00013264486517498568, "loss": -359114.45, "step": 1175 }, { "epoch": 0.07, "learning_rate": 0.00013321858864027538, "loss": -360385.675, "step": 1180 }, { "epoch": 0.07, "learning_rate": 0.00013379231210556514, "loss": -362112.625, "step": 1185 }, { "epoch": 0.07, "learning_rate": 0.00013436603557085485, "loss": -362259.45, "step": 1190 }, { "epoch": 0.07, "learning_rate": 0.00013493975903614458, "loss": -363982.25, "step": 1195 }, { "epoch": 0.07, "learning_rate": 0.00013551348250143432, "loss": -362026.25, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.00013608720596672405, "loss": -362380.625, "step": 1205 }, { "epoch": 0.07, "learning_rate": 0.00013666092943201378, "loss": -360134.85, "step": 1210 }, { "epoch": 0.07, "learning_rate": 0.00013723465289730352, "loss": -363139.05, "step": 1215 }, { "epoch": 0.07, "learning_rate": 0.00013780837636259322, "loss": -361801.5, "step": 1220 }, { "epoch": 0.07, "learning_rate": 0.00013838209982788298, "loss": -361492.95, "step": 1225 }, { "epoch": 0.07, "learning_rate": 0.0001389558232931727, "loss": -361177.0, "step": 1230 }, { "epoch": 0.07, "learning_rate": 0.00013952954675846242, "loss": -359543.225, "step": 1235 }, { "epoch": 0.07, "learning_rate": 0.00014010327022375216, "loss": -361196.2, "step": 1240 }, { "epoch": 0.07, "learning_rate": 0.0001406769936890419, "loss": -362095.25, "step": 1245 }, { "epoch": 0.07, "learning_rate": 0.00014125071715433162, "loss": -360909.45, "step": 1250 }, { "epoch": 0.07, "learning_rate": 0.00014182444061962136, "loss": -360705.05, "step": 1255 }, { "epoch": 0.07, "learning_rate": 0.0001423981640849111, "loss": -359824.35, "step": 1260 }, { "epoch": 0.07, "learning_rate": 0.0001429718875502008, "loss": -363746.275, "step": 1265 }, { "epoch": 0.07, "learning_rate": 0.00014354561101549053, "loss": -361251.4, "step": 1270 }, { "epoch": 0.07, "learning_rate": 0.00014411933448078027, "loss": -362261.975, "step": 1275 }, { "epoch": 0.07, "learning_rate": 0.00014469305794607, "loss": -360860.95, "step": 1280 }, { "epoch": 0.07, "learning_rate": 0.00014526678141135973, "loss": -360790.65, "step": 1285 }, { "epoch": 0.07, "learning_rate": 0.00014584050487664947, "loss": -363039.325, "step": 1290 }, { "epoch": 0.07, "learning_rate": 0.00014641422834193917, "loss": -361957.125, "step": 1295 }, { "epoch": 0.07, "learning_rate": 0.00014698795180722893, "loss": -363227.75, "step": 1300 }, { "epoch": 0.07, "learning_rate": 0.00014756167527251864, "loss": -363475.55, "step": 1305 }, { "epoch": 0.08, "learning_rate": 0.00014813539873780837, "loss": -363008.35, "step": 1310 }, { "epoch": 0.08, "learning_rate": 0.0001487091222030981, "loss": -363893.85, "step": 1315 }, { "epoch": 0.08, "learning_rate": 0.00014928284566838784, "loss": -364085.125, "step": 1320 }, { "epoch": 0.08, "learning_rate": 0.00014985656913367757, "loss": -364163.2, "step": 1325 }, { "epoch": 0.08, "learning_rate": 0.0001504302925989673, "loss": -365433.5, "step": 1330 }, { "epoch": 0.08, "learning_rate": 0.00015100401606425701, "loss": -363770.375, "step": 1335 }, { "epoch": 0.08, "learning_rate": 0.00015157773952954677, "loss": -363925.7, "step": 1340 }, { "epoch": 0.08, "learning_rate": 0.00015215146299483648, "loss": -365160.825, "step": 1345 }, { "epoch": 0.08, "learning_rate": 0.00015272518646012624, "loss": -365413.325, "step": 1350 }, { "epoch": 0.08, "learning_rate": 0.00015329890992541595, "loss": -366124.675, "step": 1355 }, { "epoch": 0.08, "learning_rate": 0.00015387263339070568, "loss": -365539.125, "step": 1360 }, { "epoch": 0.08, "learning_rate": 0.00015444635685599542, "loss": -366397.4, "step": 1365 }, { "epoch": 0.08, "learning_rate": 0.00015502008032128515, "loss": -365261.925, "step": 1370 }, { "epoch": 0.08, "learning_rate": 0.00015559380378657488, "loss": -365231.175, "step": 1375 }, { "epoch": 0.08, "learning_rate": 0.00015616752725186462, "loss": -365399.5, "step": 1380 }, { "epoch": 0.08, "learning_rate": 0.00015674125071715432, "loss": -363881.425, "step": 1385 }, { "epoch": 0.08, "learning_rate": 0.00015731497418244408, "loss": -365521.1, "step": 1390 }, { "epoch": 0.08, "learning_rate": 0.0001578886976477338, "loss": -365215.9, "step": 1395 }, { "epoch": 0.08, "learning_rate": 0.00015846242111302355, "loss": -364513.7, "step": 1400 }, { "epoch": 0.08, "learning_rate": 0.00015903614457831326, "loss": -364748.6, "step": 1405 }, { "epoch": 0.08, "learning_rate": 0.000159609868043603, "loss": -363320.75, "step": 1410 }, { "epoch": 0.08, "learning_rate": 0.00016018359150889272, "loss": -363812.2, "step": 1415 }, { "epoch": 0.08, "learning_rate": 0.00016075731497418246, "loss": -364653.9, "step": 1420 }, { "epoch": 0.08, "learning_rate": 0.0001613310384394722, "loss": -364665.55, "step": 1425 }, { "epoch": 0.08, "learning_rate": 0.00016190476190476192, "loss": -365881.2, "step": 1430 }, { "epoch": 0.08, "learning_rate": 0.00016247848537005163, "loss": -366169.075, "step": 1435 }, { "epoch": 0.08, "learning_rate": 0.0001630522088353414, "loss": -365195.8, "step": 1440 }, { "epoch": 0.08, "learning_rate": 0.0001636259323006311, "loss": -362595.15, "step": 1445 }, { "epoch": 0.08, "learning_rate": 0.00016419965576592083, "loss": -362970.7, "step": 1450 }, { "epoch": 0.08, "learning_rate": 0.00016477337923121056, "loss": -364486.675, "step": 1455 }, { "epoch": 0.08, "learning_rate": 0.0001653471026965003, "loss": -366094.55, "step": 1460 }, { "epoch": 0.08, "learning_rate": 0.00016592082616179003, "loss": -366584.675, "step": 1465 }, { "epoch": 0.08, "learning_rate": 0.00016649454962707977, "loss": -365571.7, "step": 1470 }, { "epoch": 0.08, "learning_rate": 0.00016706827309236947, "loss": -366305.35, "step": 1475 }, { "epoch": 0.08, "learning_rate": 0.00016764199655765923, "loss": -364763.65, "step": 1480 }, { "epoch": 0.09, "learning_rate": 0.00016821572002294894, "loss": -366345.225, "step": 1485 }, { "epoch": 0.09, "learning_rate": 0.00016878944348823867, "loss": -365956.0, "step": 1490 }, { "epoch": 0.09, "learning_rate": 0.0001693631669535284, "loss": -365422.15, "step": 1495 }, { "epoch": 0.09, "learning_rate": 0.00016993689041881814, "loss": -366308.25, "step": 1500 }, { "epoch": 0.09, "learning_rate": 0.00017051061388410787, "loss": -367875.15, "step": 1505 }, { "epoch": 0.09, "learning_rate": 0.0001710843373493976, "loss": -364890.275, "step": 1510 }, { "epoch": 0.09, "learning_rate": 0.00017165806081468734, "loss": -364779.925, "step": 1515 }, { "epoch": 0.09, "learning_rate": 0.00017223178427997705, "loss": -367256.55, "step": 1520 }, { "epoch": 0.09, "learning_rate": 0.00017280550774526678, "loss": -367925.85, "step": 1525 }, { "epoch": 0.09, "learning_rate": 0.00017337923121055651, "loss": -366563.575, "step": 1530 }, { "epoch": 0.09, "learning_rate": 0.00017395295467584625, "loss": -367666.95, "step": 1535 }, { "epoch": 0.09, "learning_rate": 0.00017452667814113598, "loss": -366986.625, "step": 1540 }, { "epoch": 0.09, "learning_rate": 0.00017510040160642571, "loss": -365646.4, "step": 1545 }, { "epoch": 0.09, "learning_rate": 0.00017567412507171542, "loss": -367983.925, "step": 1550 }, { "epoch": 0.09, "learning_rate": 0.00017624784853700518, "loss": -366123.125, "step": 1555 }, { "epoch": 0.09, "learning_rate": 0.0001768215720022949, "loss": -367173.025, "step": 1560 }, { "epoch": 0.09, "learning_rate": 0.00017739529546758465, "loss": -369332.575, "step": 1565 }, { "epoch": 0.09, "learning_rate": 0.00017796901893287436, "loss": -368102.05, "step": 1570 }, { "epoch": 0.09, "learning_rate": 0.0001785427423981641, "loss": -368694.55, "step": 1575 }, { "epoch": 0.09, "learning_rate": 0.00017911646586345382, "loss": -369101.475, "step": 1580 }, { "epoch": 0.09, "learning_rate": 0.00017969018932874356, "loss": -370433.575, "step": 1585 }, { "epoch": 0.09, "learning_rate": 0.00018026391279403326, "loss": -368229.25, "step": 1590 }, { "epoch": 0.09, "learning_rate": 0.00018083763625932302, "loss": -370543.525, "step": 1595 }, { "epoch": 0.09, "learning_rate": 0.00018141135972461273, "loss": -368325.2, "step": 1600 }, { "epoch": 0.09, "learning_rate": 0.0001819850831899025, "loss": -366829.8, "step": 1605 }, { "epoch": 0.09, "learning_rate": 0.0001825588066551922, "loss": -370263.3, "step": 1610 }, { "epoch": 0.09, "learning_rate": 0.00018313253012048193, "loss": -370699.8, "step": 1615 }, { "epoch": 0.09, "learning_rate": 0.00018370625358577166, "loss": -369021.75, "step": 1620 }, { "epoch": 0.09, "learning_rate": 0.0001842799770510614, "loss": -367038.475, "step": 1625 }, { "epoch": 0.09, "learning_rate": 0.00018485370051635113, "loss": -370615.5, "step": 1630 }, { "epoch": 0.09, "learning_rate": 0.00018542742398164086, "loss": -370063.65, "step": 1635 }, { "epoch": 0.09, "learning_rate": 0.00018600114744693057, "loss": -372375.2, "step": 1640 }, { "epoch": 0.09, "learning_rate": 0.00018657487091222033, "loss": -370148.15, "step": 1645 }, { "epoch": 0.09, "learning_rate": 0.00018714859437751004, "loss": -371129.6, "step": 1650 }, { "epoch": 0.09, "learning_rate": 0.0001877223178427998, "loss": -369848.225, "step": 1655 }, { "epoch": 0.1, "learning_rate": 0.0001882960413080895, "loss": -371611.225, "step": 1660 }, { "epoch": 0.1, "learning_rate": 0.00018886976477337924, "loss": -370429.4, "step": 1665 }, { "epoch": 0.1, "learning_rate": 0.00018944348823866897, "loss": -369488.8, "step": 1670 }, { "epoch": 0.1, "learning_rate": 0.0001900172117039587, "loss": -368976.45, "step": 1675 }, { "epoch": 0.1, "learning_rate": 0.00019059093516924844, "loss": -369330.65, "step": 1680 }, { "epoch": 0.1, "learning_rate": 0.00019116465863453817, "loss": -369349.7, "step": 1685 }, { "epoch": 0.1, "learning_rate": 0.00019173838209982788, "loss": -370086.65, "step": 1690 }, { "epoch": 0.1, "learning_rate": 0.00019231210556511764, "loss": -368275.9, "step": 1695 }, { "epoch": 0.1, "learning_rate": 0.00019288582903040735, "loss": -371039.125, "step": 1700 }, { "epoch": 0.1, "learning_rate": 0.0001934595524956971, "loss": -368649.225, "step": 1705 }, { "epoch": 0.1, "learning_rate": 0.0001940332759609868, "loss": -370923.7, "step": 1710 }, { "epoch": 0.1, "learning_rate": 0.00019460699942627655, "loss": -370107.775, "step": 1715 }, { "epoch": 0.1, "learning_rate": 0.00019518072289156628, "loss": -371011.2, "step": 1720 }, { "epoch": 0.1, "learning_rate": 0.00019575444635685601, "loss": -368382.9, "step": 1725 }, { "epoch": 0.1, "learning_rate": 0.00019632816982214572, "loss": -372018.875, "step": 1730 }, { "epoch": 0.1, "learning_rate": 0.00019690189328743548, "loss": -369914.925, "step": 1735 }, { "epoch": 0.1, "learning_rate": 0.0001974756167527252, "loss": -370497.65, "step": 1740 }, { "epoch": 0.1, "learning_rate": 0.00019804934021801492, "loss": -371605.45, "step": 1745 }, { "epoch": 0.1, "learning_rate": 0.00019862306368330465, "loss": -370938.55, "step": 1750 }, { "epoch": 0.1, "learning_rate": 0.0001991967871485944, "loss": -369546.8, "step": 1755 }, { "epoch": 0.1, "learning_rate": 0.00019977051061388412, "loss": -371349.5, "step": 1760 }, { "epoch": 0.1, "learning_rate": 0.0001999999819495587, "loss": -369888.225, "step": 1765 }, { "epoch": 0.1, "learning_rate": 0.00019999987164132972, "loss": -372648.9, "step": 1770 }, { "epoch": 0.1, "learning_rate": 0.00019999966105300522, "loss": -372354.45, "step": 1775 }, { "epoch": 0.1, "learning_rate": 0.0001999993501847964, "loss": -371826.925, "step": 1780 }, { "epoch": 0.1, "learning_rate": 0.00019999893903701498, "loss": -373579.85, "step": 1785 }, { "epoch": 0.1, "learning_rate": 0.00019999842761007325, "loss": -372518.15, "step": 1790 }, { "epoch": 0.1, "learning_rate": 0.0001999978159044841, "loss": -373327.5, "step": 1795 }, { "epoch": 0.1, "learning_rate": 0.0001999971039208609, "loss": -374031.8, "step": 1800 }, { "epoch": 0.1, "learning_rate": 0.00019999629165991768, "loss": -375497.475, "step": 1805 }, { "epoch": 0.1, "learning_rate": 0.00019999537912246892, "loss": -371809.95, "step": 1810 }, { "epoch": 0.1, "learning_rate": 0.00019999436630942975, "loss": -372979.475, "step": 1815 }, { "epoch": 0.1, "learning_rate": 0.00019999325322181584, "loss": -373760.6, "step": 1820 }, { "epoch": 0.1, "learning_rate": 0.0001999920398607434, "loss": -374876.575, "step": 1825 }, { "epoch": 0.1, "learning_rate": 0.00019999072622742912, "loss": -375073.9, "step": 1830 }, { "epoch": 0.11, "learning_rate": 0.0001999893123231904, "loss": -374234.1, "step": 1835 }, { "epoch": 0.11, "learning_rate": 0.00019998779814944505, "loss": -374232.375, "step": 1840 }, { "epoch": 0.11, "learning_rate": 0.00019998618370771148, "loss": -373467.0, "step": 1845 }, { "epoch": 0.11, "learning_rate": 0.0001999844689996087, "loss": -374076.4, "step": 1850 }, { "epoch": 0.11, "learning_rate": 0.00019998265402685622, "loss": -374997.65, "step": 1855 }, { "epoch": 0.11, "learning_rate": 0.00019998073879127408, "loss": -375139.15, "step": 1860 }, { "epoch": 0.11, "learning_rate": 0.00019997872329478286, "loss": -374068.475, "step": 1865 }, { "epoch": 0.11, "learning_rate": 0.00019997660753940375, "loss": -374126.4, "step": 1870 }, { "epoch": 0.11, "learning_rate": 0.0001999743915272584, "loss": -375399.95, "step": 1875 }, { "epoch": 0.11, "learning_rate": 0.00019997207526056907, "loss": -375400.4, "step": 1880 }, { "epoch": 0.11, "learning_rate": 0.0001999696587416585, "loss": -373046.0, "step": 1885 }, { "epoch": 0.11, "learning_rate": 0.00019996714197294995, "loss": -372806.025, "step": 1890 }, { "epoch": 0.11, "learning_rate": 0.00019996452495696726, "loss": -373136.95, "step": 1895 }, { "epoch": 0.11, "learning_rate": 0.0001999618076963348, "loss": -373399.475, "step": 1900 }, { "epoch": 0.11, "learning_rate": 0.00019995899019377742, "loss": -374226.25, "step": 1905 }, { "epoch": 0.11, "learning_rate": 0.00019995607245212052, "loss": -373977.6, "step": 1910 }, { "epoch": 0.11, "learning_rate": 0.00019995305447429002, "loss": -372141.325, "step": 1915 }, { "epoch": 0.11, "learning_rate": 0.00019994993626331237, "loss": -374759.85, "step": 1920 }, { "epoch": 0.11, "learning_rate": 0.00019994671782231453, "loss": -375066.575, "step": 1925 }, { "epoch": 0.11, "learning_rate": 0.00019994339915452395, "loss": -373136.5, "step": 1930 }, { "epoch": 0.11, "learning_rate": 0.00019993998026326853, "loss": -376071.1, "step": 1935 }, { "epoch": 0.11, "learning_rate": 0.00019993646115197685, "loss": -373615.7, "step": 1940 }, { "epoch": 0.11, "learning_rate": 0.0001999328418241778, "loss": -374433.275, "step": 1945 }, { "epoch": 0.11, "learning_rate": 0.0001999291222835009, "loss": -374596.6, "step": 1950 }, { "epoch": 0.11, "learning_rate": 0.00019992530253367608, "loss": -372932.975, "step": 1955 }, { "epoch": 0.11, "learning_rate": 0.0001999213825785338, "loss": -375415.55, "step": 1960 }, { "epoch": 0.11, "learning_rate": 0.00019991736242200506, "loss": -374878.0, "step": 1965 }, { "epoch": 0.11, "learning_rate": 0.00019991324206812116, "loss": -375141.8, "step": 1970 }, { "epoch": 0.11, "learning_rate": 0.00019990902152101412, "loss": -373080.95, "step": 1975 }, { "epoch": 0.11, "learning_rate": 0.00019990470078491625, "loss": -376766.65, "step": 1980 }, { "epoch": 0.11, "learning_rate": 0.0001999002798641604, "loss": -374474.125, "step": 1985 }, { "epoch": 0.11, "learning_rate": 0.00019989575876317993, "loss": -378217.05, "step": 1990 }, { "epoch": 0.11, "learning_rate": 0.0001998911374865085, "loss": -374596.1, "step": 1995 }, { "epoch": 0.11, "learning_rate": 0.00019988641603878047, "loss": -375783.3, "step": 2000 }, { "epoch": 0.12, "learning_rate": 0.0001998815944247304, "loss": -376494.65, "step": 2005 }, { "epoch": 0.12, "learning_rate": 0.0001998766726491935, "loss": -376611.325, "step": 2010 }, { "epoch": 0.12, "learning_rate": 0.00019987165071710527, "loss": -374997.375, "step": 2015 }, { "epoch": 0.12, "learning_rate": 0.0001998665286335018, "loss": -375301.7, "step": 2020 }, { "epoch": 0.12, "learning_rate": 0.00019986130640351946, "loss": -375884.9, "step": 2025 }, { "epoch": 0.12, "learning_rate": 0.00019985598403239512, "loss": -374623.95, "step": 2030 }, { "epoch": 0.12, "learning_rate": 0.00019985056152546608, "loss": -375190.3, "step": 2035 }, { "epoch": 0.12, "learning_rate": 0.00019984503888817004, "loss": -376001.15, "step": 2040 }, { "epoch": 0.12, "learning_rate": 0.0001998394161260451, "loss": -376446.7, "step": 2045 }, { "epoch": 0.12, "learning_rate": 0.00019983369324472983, "loss": -376786.6, "step": 2050 }, { "epoch": 0.12, "learning_rate": 0.00019982787024996307, "loss": -377994.125, "step": 2055 }, { "epoch": 0.12, "learning_rate": 0.00019982194714758417, "loss": -377141.1, "step": 2060 }, { "epoch": 0.12, "learning_rate": 0.00019981592394353287, "loss": -377329.175, "step": 2065 }, { "epoch": 0.12, "learning_rate": 0.00019980980064384916, "loss": -378761.0, "step": 2070 }, { "epoch": 0.12, "learning_rate": 0.0001998035772546736, "loss": -378007.975, "step": 2075 }, { "epoch": 0.12, "learning_rate": 0.00019979725378224697, "loss": -376513.1, "step": 2080 }, { "epoch": 0.12, "learning_rate": 0.00019979083023291043, "loss": -374871.65, "step": 2085 }, { "epoch": 0.12, "learning_rate": 0.0001997843066131056, "loss": -377070.95, "step": 2090 }, { "epoch": 0.12, "learning_rate": 0.00019977768292937431, "loss": -376784.475, "step": 2095 }, { "epoch": 0.12, "learning_rate": 0.00019977095918835886, "loss": -376203.975, "step": 2100 }, { "epoch": 0.12, "learning_rate": 0.00019976413539680182, "loss": -376471.5, "step": 2105 }, { "epoch": 0.12, "learning_rate": 0.00019975721156154606, "loss": -378366.35, "step": 2110 }, { "epoch": 0.12, "learning_rate": 0.0001997501876895349, "loss": -380470.325, "step": 2115 }, { "epoch": 0.12, "learning_rate": 0.0001997430637878118, "loss": -379603.125, "step": 2120 }, { "epoch": 0.12, "learning_rate": 0.0001997358398635207, "loss": -379410.3, "step": 2125 }, { "epoch": 0.12, "learning_rate": 0.00019972851592390574, "loss": -378729.1, "step": 2130 }, { "epoch": 0.12, "learning_rate": 0.00019972109197631137, "loss": -381401.65, "step": 2135 }, { "epoch": 0.12, "learning_rate": 0.00019971356802818234, "loss": -376794.175, "step": 2140 }, { "epoch": 0.12, "learning_rate": 0.00019970594408706373, "loss": -377792.475, "step": 2145 }, { "epoch": 0.12, "learning_rate": 0.0001996982201606008, "loss": -379310.85, "step": 2150 }, { "epoch": 0.12, "learning_rate": 0.00019969039625653912, "loss": -376466.35, "step": 2155 }, { "epoch": 0.12, "learning_rate": 0.00019968247238272456, "loss": -377192.2, "step": 2160 }, { "epoch": 0.12, "learning_rate": 0.0001996744485471031, "loss": -379389.775, "step": 2165 }, { "epoch": 0.12, "learning_rate": 0.0001996663247577212, "loss": -378994.55, "step": 2170 }, { "epoch": 0.12, "learning_rate": 0.0001996581010227253, "loss": -377849.05, "step": 2175 }, { "epoch": 0.13, "learning_rate": 0.00019964977735036223, "loss": -380360.15, "step": 2180 }, { "epoch": 0.13, "learning_rate": 0.000199641353748979, "loss": -376151.325, "step": 2185 }, { "epoch": 0.13, "learning_rate": 0.0001996328302270228, "loss": -379724.675, "step": 2190 }, { "epoch": 0.13, "learning_rate": 0.00019962420679304102, "loss": -380463.875, "step": 2195 }, { "epoch": 0.13, "learning_rate": 0.00019961548345568127, "loss": -377221.05, "step": 2200 }, { "epoch": 0.13, "learning_rate": 0.00019960666022369132, "loss": -379056.475, "step": 2205 }, { "epoch": 0.13, "learning_rate": 0.00019959773710591917, "loss": -378709.625, "step": 2210 }, { "epoch": 0.13, "learning_rate": 0.0001995887141113129, "loss": -379229.1, "step": 2215 }, { "epoch": 0.13, "learning_rate": 0.0001995795912489208, "loss": -380581.875, "step": 2220 }, { "epoch": 0.13, "learning_rate": 0.0001995703685278913, "loss": -380266.475, "step": 2225 }, { "epoch": 0.13, "learning_rate": 0.00019956104595747297, "loss": -380923.05, "step": 2230 }, { "epoch": 0.13, "learning_rate": 0.00019955162354701452, "loss": -379820.1, "step": 2235 }, { "epoch": 0.13, "learning_rate": 0.0001995421013059647, "loss": -378100.25, "step": 2240 }, { "epoch": 0.13, "learning_rate": 0.00019953247924387252, "loss": -378512.35, "step": 2245 }, { "epoch": 0.13, "learning_rate": 0.00019952275737038696, "loss": -378658.975, "step": 2250 }, { "epoch": 0.13, "learning_rate": 0.00019951293569525714, "loss": -380244.1, "step": 2255 }, { "epoch": 0.13, "learning_rate": 0.00019950301422833223, "loss": -379381.4, "step": 2260 }, { "epoch": 0.13, "learning_rate": 0.00019949299297956157, "loss": -379346.25, "step": 2265 }, { "epoch": 0.13, "learning_rate": 0.00019948287195899441, "loss": -381164.475, "step": 2270 }, { "epoch": 0.13, "learning_rate": 0.0001994726511767802, "loss": -381646.4, "step": 2275 }, { "epoch": 0.13, "learning_rate": 0.0001994623306431683, "loss": -382291.45, "step": 2280 }, { "epoch": 0.13, "learning_rate": 0.00019945191036850824, "loss": -381426.8, "step": 2285 }, { "epoch": 0.13, "learning_rate": 0.00019944139036324942, "loss": -380680.5, "step": 2290 }, { "epoch": 0.13, "learning_rate": 0.00019943077063794138, "loss": -379971.85, "step": 2295 }, { "epoch": 0.13, "learning_rate": 0.00019942005120323356, "loss": -380942.95, "step": 2300 }, { "epoch": 0.13, "learning_rate": 0.0001994092320698755, "loss": -380319.025, "step": 2305 }, { "epoch": 0.13, "learning_rate": 0.00019939831324871656, "loss": -382115.65, "step": 2310 }, { "epoch": 0.13, "learning_rate": 0.00019938729475070624, "loss": -381474.275, "step": 2315 }, { "epoch": 0.13, "learning_rate": 0.00019937617658689384, "loss": -380908.375, "step": 2320 }, { "epoch": 0.13, "learning_rate": 0.00019936495876842877, "loss": -382077.0, "step": 2325 }, { "epoch": 0.13, "learning_rate": 0.0001993536413065602, "loss": -381410.025, "step": 2330 }, { "epoch": 0.13, "learning_rate": 0.00019934222421263734, "loss": -382348.975, "step": 2335 }, { "epoch": 0.13, "learning_rate": 0.00019933070749810928, "loss": -380548.25, "step": 2340 }, { "epoch": 0.13, "learning_rate": 0.00019931909117452498, "loss": -381308.7, "step": 2345 }, { "epoch": 0.13, "learning_rate": 0.00019930737525353337, "loss": -383941.775, "step": 2350 }, { "epoch": 0.14, "learning_rate": 0.00019929555974688317, "loss": -381028.575, "step": 2355 }, { "epoch": 0.14, "learning_rate": 0.000199283644666423, "loss": -381415.6, "step": 2360 }, { "epoch": 0.14, "learning_rate": 0.00019927163002410133, "loss": -380644.25, "step": 2365 }, { "epoch": 0.14, "learning_rate": 0.0001992595158319664, "loss": -379236.3, "step": 2370 }, { "epoch": 0.14, "learning_rate": 0.00019924730210216652, "loss": -383746.7, "step": 2375 }, { "epoch": 0.14, "learning_rate": 0.00019923498884694948, "loss": -383131.4, "step": 2380 }, { "epoch": 0.14, "learning_rate": 0.0001992225760786631, "loss": -383897.0, "step": 2385 }, { "epoch": 0.14, "learning_rate": 0.00019921006380975496, "loss": -382192.75, "step": 2390 }, { "epoch": 0.14, "learning_rate": 0.00019919745205277236, "loss": -384938.525, "step": 2395 }, { "epoch": 0.14, "learning_rate": 0.0001991847408203624, "loss": -382230.3, "step": 2400 }, { "epoch": 0.14, "learning_rate": 0.00019917193012527194, "loss": -383274.35, "step": 2405 }, { "epoch": 0.14, "learning_rate": 0.00019915901998034756, "loss": -382033.775, "step": 2410 }, { "epoch": 0.14, "learning_rate": 0.00019914601039853558, "loss": -384337.975, "step": 2415 }, { "epoch": 0.14, "learning_rate": 0.00019913290139288208, "loss": -383088.7, "step": 2420 }, { "epoch": 0.14, "learning_rate": 0.00019911969297653275, "loss": -384575.35, "step": 2425 }, { "epoch": 0.14, "learning_rate": 0.000199106385162733, "loss": -384732.75, "step": 2430 }, { "epoch": 0.14, "learning_rate": 0.000199092977964828, "loss": -381957.525, "step": 2435 }, { "epoch": 0.14, "learning_rate": 0.00019907947139626253, "loss": -383987.4, "step": 2440 }, { "epoch": 0.14, "learning_rate": 0.00019906586547058095, "loss": -385980.9, "step": 2445 }, { "epoch": 0.14, "learning_rate": 0.00019905216020142734, "loss": -384454.15, "step": 2450 }, { "epoch": 0.14, "learning_rate": 0.00019903835560254536, "loss": -383701.65, "step": 2455 }, { "epoch": 0.14, "learning_rate": 0.0001990244516877783, "loss": -386540.15, "step": 2460 }, { "epoch": 0.14, "learning_rate": 0.00019901044847106904, "loss": -384910.45, "step": 2465 }, { "epoch": 0.14, "learning_rate": 0.00019899634596646002, "loss": -385993.225, "step": 2470 }, { "epoch": 0.14, "learning_rate": 0.0001989821441880933, "loss": -384914.4, "step": 2475 }, { "epoch": 0.14, "learning_rate": 0.0001989678431502104, "loss": -384792.5, "step": 2480 }, { "epoch": 0.14, "learning_rate": 0.0001989534428671525, "loss": -386860.775, "step": 2485 }, { "epoch": 0.14, "learning_rate": 0.00019893894335336017, "loss": -386077.15, "step": 2490 }, { "epoch": 0.14, "learning_rate": 0.0001989243446233736, "loss": -387537.225, "step": 2495 }, { "epoch": 0.14, "learning_rate": 0.0001989096466918324, "loss": -386423.8, "step": 2500 }, { "epoch": 0.14, "learning_rate": 0.00019889484957347575, "loss": -387536.45, "step": 2505 }, { "epoch": 0.14, "learning_rate": 0.00019887995328314215, "loss": -387344.85, "step": 2510 }, { "epoch": 0.14, "learning_rate": 0.0001988649578357697, "loss": -385629.8, "step": 2515 }, { "epoch": 0.14, "learning_rate": 0.0001988498632463958, "loss": -386901.975, "step": 2520 }, { "epoch": 0.14, "learning_rate": 0.0001988346695301574, "loss": -385991.45, "step": 2525 }, { "epoch": 0.15, "learning_rate": 0.00019881937670229076, "loss": -387151.35, "step": 2530 }, { "epoch": 0.15, "learning_rate": 0.00019880398477813157, "loss": -388640.1, "step": 2535 }, { "epoch": 0.15, "learning_rate": 0.0001987884937731149, "loss": -386557.35, "step": 2540 }, { "epoch": 0.15, "learning_rate": 0.00019877290370277516, "loss": -387900.95, "step": 2545 }, { "epoch": 0.15, "learning_rate": 0.0001987572145827461, "loss": -388339.55, "step": 2550 }, { "epoch": 0.15, "learning_rate": 0.0001987414264287608, "loss": -389637.45, "step": 2555 }, { "epoch": 0.15, "learning_rate": 0.00019872553925665164, "loss": -387942.825, "step": 2560 }, { "epoch": 0.15, "learning_rate": 0.00019870955308235036, "loss": -386618.725, "step": 2565 }, { "epoch": 0.15, "learning_rate": 0.00019869346792188787, "loss": -388027.925, "step": 2570 }, { "epoch": 0.15, "learning_rate": 0.0001986772837913945, "loss": -388338.6, "step": 2575 }, { "epoch": 0.15, "learning_rate": 0.00019866100070709962, "loss": -387953.725, "step": 2580 }, { "epoch": 0.15, "learning_rate": 0.00019864461868533201, "loss": -387030.95, "step": 2585 }, { "epoch": 0.15, "learning_rate": 0.0001986281377425196, "loss": -387630.4, "step": 2590 }, { "epoch": 0.15, "learning_rate": 0.00019861155789518948, "loss": -387084.9, "step": 2595 }, { "epoch": 0.15, "learning_rate": 0.00019859487915996796, "loss": -387936.3, "step": 2600 }, { "epoch": 0.15, "learning_rate": 0.00019857810155358058, "loss": -387557.475, "step": 2605 }, { "epoch": 0.15, "learning_rate": 0.0001985612250928519, "loss": -387064.125, "step": 2610 }, { "epoch": 0.15, "learning_rate": 0.00019854424979470566, "loss": -389489.6, "step": 2615 }, { "epoch": 0.15, "learning_rate": 0.00019852717567616477, "loss": -386877.95, "step": 2620 }, { "epoch": 0.15, "learning_rate": 0.00019851000275435116, "loss": -388247.55, "step": 2625 }, { "epoch": 0.15, "learning_rate": 0.00019849273104648592, "loss": -388444.175, "step": 2630 }, { "epoch": 0.15, "learning_rate": 0.00019847536056988912, "loss": -388085.125, "step": 2635 }, { "epoch": 0.15, "learning_rate": 0.0001984578913419799, "loss": -390132.025, "step": 2640 }, { "epoch": 0.15, "learning_rate": 0.00019844032338027647, "loss": -390666.85, "step": 2645 }, { "epoch": 0.15, "learning_rate": 0.000198422656702396, "loss": -387382.45, "step": 2650 }, { "epoch": 0.15, "learning_rate": 0.00019840489132605472, "loss": -389585.45, "step": 2655 }, { "epoch": 0.15, "learning_rate": 0.00019838702726906774, "loss": -387664.7, "step": 2660 }, { "epoch": 0.15, "learning_rate": 0.00019836906454934918, "loss": -389010.9, "step": 2665 }, { "epoch": 0.15, "learning_rate": 0.00019835100318491214, "loss": -388872.45, "step": 2670 }, { "epoch": 0.15, "learning_rate": 0.00019833284319386855, "loss": -389068.725, "step": 2675 }, { "epoch": 0.15, "learning_rate": 0.00019831458459442931, "loss": -386642.95, "step": 2680 }, { "epoch": 0.15, "learning_rate": 0.0001982962274049042, "loss": -389572.8, "step": 2685 }, { "epoch": 0.15, "learning_rate": 0.00019827777164370178, "loss": -389664.675, "step": 2690 }, { "epoch": 0.15, "learning_rate": 0.0001982592173293296, "loss": -389267.95, "step": 2695 }, { "epoch": 0.15, "learning_rate": 0.00019824056448039396, "loss": -389436.8, "step": 2700 }, { "epoch": 0.16, "learning_rate": 0.00019822181311559994, "loss": -391930.425, "step": 2705 }, { "epoch": 0.16, "learning_rate": 0.00019820296325375153, "loss": -387965.325, "step": 2710 }, { "epoch": 0.16, "learning_rate": 0.00019818401491375132, "loss": -388510.625, "step": 2715 }, { "epoch": 0.16, "learning_rate": 0.0001981649681146008, "loss": -388122.625, "step": 2720 }, { "epoch": 0.16, "learning_rate": 0.0001981458228754001, "loss": -388644.0, "step": 2725 }, { "epoch": 0.16, "learning_rate": 0.00019812657921534818, "loss": -389955.125, "step": 2730 }, { "epoch": 0.16, "learning_rate": 0.0001981072371537426, "loss": -388322.6, "step": 2735 }, { "epoch": 0.16, "learning_rate": 0.00019808779670997957, "loss": -389384.75, "step": 2740 }, { "epoch": 0.16, "learning_rate": 0.00019806825790355406, "loss": -389707.55, "step": 2745 }, { "epoch": 0.16, "learning_rate": 0.0001980486207540596, "loss": -389918.25, "step": 2750 }, { "epoch": 0.16, "learning_rate": 0.0001980288852811884, "loss": -390486.3, "step": 2755 }, { "epoch": 0.16, "learning_rate": 0.00019800905150473125, "loss": -390199.275, "step": 2760 }, { "epoch": 0.16, "learning_rate": 0.00019798911944457744, "loss": -392517.55, "step": 2765 }, { "epoch": 0.16, "learning_rate": 0.00019796908912071495, "loss": -390256.425, "step": 2770 }, { "epoch": 0.16, "learning_rate": 0.00019794896055323017, "loss": -388095.225, "step": 2775 }, { "epoch": 0.16, "learning_rate": 0.00019792873376230815, "loss": -389728.6, "step": 2780 }, { "epoch": 0.16, "learning_rate": 0.00019790840876823232, "loss": -387683.15, "step": 2785 }, { "epoch": 0.16, "learning_rate": 0.00019788798559138457, "loss": -389113.1, "step": 2790 }, { "epoch": 0.16, "learning_rate": 0.00019786746425224543, "loss": -392035.95, "step": 2795 }, { "epoch": 0.16, "learning_rate": 0.00019784684477139366, "loss": -390833.95, "step": 2800 }, { "epoch": 0.16, "learning_rate": 0.00019782612716950652, "loss": -390160.825, "step": 2805 }, { "epoch": 0.16, "learning_rate": 0.00019780531146735972, "loss": -393249.15, "step": 2810 }, { "epoch": 0.16, "learning_rate": 0.00019778439768582725, "loss": -391862.85, "step": 2815 }, { "epoch": 0.16, "learning_rate": 0.00019776338584588153, "loss": -389768.2, "step": 2820 }, { "epoch": 0.16, "learning_rate": 0.00019774227596859324, "loss": -391630.1, "step": 2825 }, { "epoch": 0.16, "learning_rate": 0.00019772106807513146, "loss": -392128.275, "step": 2830 }, { "epoch": 0.16, "learning_rate": 0.00019769976218676346, "loss": -391493.35, "step": 2835 }, { "epoch": 0.16, "learning_rate": 0.00019767835832485485, "loss": -390874.9, "step": 2840 }, { "epoch": 0.16, "learning_rate": 0.0001976568565108695, "loss": -392315.375, "step": 2845 }, { "epoch": 0.16, "learning_rate": 0.00019763525676636947, "loss": -393350.75, "step": 2850 }, { "epoch": 0.16, "learning_rate": 0.000197613559113015, "loss": -392181.875, "step": 2855 }, { "epoch": 0.16, "learning_rate": 0.00019759176357256455, "loss": -392112.25, "step": 2860 }, { "epoch": 0.16, "learning_rate": 0.00019756987016687476, "loss": -393530.525, "step": 2865 }, { "epoch": 0.16, "learning_rate": 0.00019754787891790039, "loss": -392892.175, "step": 2870 }, { "epoch": 0.16, "learning_rate": 0.00019752578984769432, "loss": -391638.65, "step": 2875 }, { "epoch": 0.17, "learning_rate": 0.00019750360297840746, "loss": -390896.4, "step": 2880 }, { "epoch": 0.17, "learning_rate": 0.00019748131833228892, "loss": -393401.7, "step": 2885 }, { "epoch": 0.17, "learning_rate": 0.00019745893593168577, "loss": -391131.8, "step": 2890 }, { "epoch": 0.17, "learning_rate": 0.00019743645579904312, "loss": -394043.15, "step": 2895 }, { "epoch": 0.17, "learning_rate": 0.00019741387795690408, "loss": -391290.3, "step": 2900 }, { "epoch": 0.17, "learning_rate": 0.00019739120242790982, "loss": -392632.6, "step": 2905 }, { "epoch": 0.17, "learning_rate": 0.00019736842923479935, "loss": -393573.65, "step": 2910 }, { "epoch": 0.17, "learning_rate": 0.0001973455584004097, "loss": -392218.625, "step": 2915 }, { "epoch": 0.17, "learning_rate": 0.00019732258994767583, "loss": -392381.75, "step": 2920 }, { "epoch": 0.17, "learning_rate": 0.00019729952389963053, "loss": -395136.075, "step": 2925 }, { "epoch": 0.17, "learning_rate": 0.00019727636027940445, "loss": -396081.0, "step": 2930 }, { "epoch": 0.17, "learning_rate": 0.00019725309911022617, "loss": -394324.7, "step": 2935 }, { "epoch": 0.17, "learning_rate": 0.00019722974041542203, "loss": -394096.8, "step": 2940 }, { "epoch": 0.17, "learning_rate": 0.00019720628421841617, "loss": -395350.625, "step": 2945 }, { "epoch": 0.17, "learning_rate": 0.00019718273054273051, "loss": -393836.025, "step": 2950 }, { "epoch": 0.17, "learning_rate": 0.00019715907941198477, "loss": -393069.25, "step": 2955 }, { "epoch": 0.17, "learning_rate": 0.0001971353308498963, "loss": -393328.0, "step": 2960 }, { "epoch": 0.17, "learning_rate": 0.00019711148488028025, "loss": -391875.05, "step": 2965 }, { "epoch": 0.17, "learning_rate": 0.0001970875415270494, "loss": -393253.6, "step": 2970 }, { "epoch": 0.17, "learning_rate": 0.0001970635008142142, "loss": -393998.3, "step": 2975 }, { "epoch": 0.17, "learning_rate": 0.00019703936276588272, "loss": -392105.475, "step": 2980 }, { "epoch": 0.17, "learning_rate": 0.00019701512740626068, "loss": -393645.9, "step": 2985 }, { "epoch": 0.17, "learning_rate": 0.00019699079475965132, "loss": -395303.55, "step": 2990 }, { "epoch": 0.17, "learning_rate": 0.00019696636485045548, "loss": -395705.85, "step": 2995 }, { "epoch": 0.17, "learning_rate": 0.00019694183770317154, "loss": -394201.825, "step": 3000 }, { "epoch": 0.17, "learning_rate": 0.00019691721334239536, "loss": -395230.95, "step": 3005 }, { "epoch": 0.17, "learning_rate": 0.00019689249179282033, "loss": -394836.2, "step": 3010 }, { "epoch": 0.17, "learning_rate": 0.00019686767307923727, "loss": -393954.225, "step": 3015 }, { "epoch": 0.17, "learning_rate": 0.0001968427572265344, "loss": -396644.95, "step": 3020 }, { "epoch": 0.17, "learning_rate": 0.00019681774425969745, "loss": -395644.775, "step": 3025 }, { "epoch": 0.17, "learning_rate": 0.00019679263420380942, "loss": -395046.575, "step": 3030 }, { "epoch": 0.17, "learning_rate": 0.0001967674270840508, "loss": -394854.95, "step": 3035 }, { "epoch": 0.17, "learning_rate": 0.0001967421229256993, "loss": -394871.25, "step": 3040 }, { "epoch": 0.17, "learning_rate": 0.00019671672175413, "loss": -393875.2, "step": 3045 }, { "epoch": 0.17, "learning_rate": 0.00019669122359481525, "loss": -393479.0, "step": 3050 }, { "epoch": 0.18, "learning_rate": 0.00019666562847332463, "loss": -397136.75, "step": 3055 }, { "epoch": 0.18, "learning_rate": 0.00019663993641532508, "loss": -395903.5, "step": 3060 }, { "epoch": 0.18, "learning_rate": 0.00019661414744658054, "loss": -394798.65, "step": 3065 }, { "epoch": 0.18, "learning_rate": 0.0001965882615929523, "loss": -395297.3, "step": 3070 }, { "epoch": 0.18, "learning_rate": 0.00019656227888039876, "loss": -395166.65, "step": 3075 }, { "epoch": 0.18, "learning_rate": 0.00019653619933497542, "loss": -395804.4, "step": 3080 }, { "epoch": 0.18, "learning_rate": 0.00019651002298283493, "loss": -394364.975, "step": 3085 }, { "epoch": 0.18, "learning_rate": 0.00019648374985022695, "loss": -395611.7, "step": 3090 }, { "epoch": 0.18, "learning_rate": 0.00019645737996349828, "loss": -395336.9, "step": 3095 }, { "epoch": 0.18, "learning_rate": 0.0001964309133490927, "loss": -396879.65, "step": 3100 }, { "epoch": 0.18, "learning_rate": 0.00019640435003355098, "loss": -397369.15, "step": 3105 }, { "epoch": 0.18, "learning_rate": 0.00019637769004351085, "loss": -396093.0, "step": 3110 }, { "epoch": 0.18, "learning_rate": 0.00019635093340570706, "loss": -397766.0, "step": 3115 }, { "epoch": 0.18, "learning_rate": 0.0001963240801469712, "loss": -395342.125, "step": 3120 }, { "epoch": 0.18, "learning_rate": 0.0001962971302942317, "loss": -395822.7, "step": 3125 }, { "epoch": 0.18, "learning_rate": 0.0001962700838745141, "loss": -397181.9, "step": 3130 }, { "epoch": 0.18, "learning_rate": 0.00019624294091494045, "loss": -394394.1, "step": 3135 }, { "epoch": 0.18, "learning_rate": 0.00019621570144272987, "loss": -396456.625, "step": 3140 }, { "epoch": 0.18, "learning_rate": 0.00019618836548519812, "loss": -396242.55, "step": 3145 }, { "epoch": 0.18, "learning_rate": 0.0001961609330697578, "loss": -399480.3, "step": 3150 }, { "epoch": 0.18, "learning_rate": 0.0001961334042239181, "loss": -395399.025, "step": 3155 }, { "epoch": 0.18, "learning_rate": 0.00019610577897528515, "loss": -397448.45, "step": 3160 }, { "epoch": 0.18, "learning_rate": 0.0001960780573515615, "loss": -397776.85, "step": 3165 }, { "epoch": 0.18, "learning_rate": 0.0001960502393805465, "loss": -397287.55, "step": 3170 }, { "epoch": 0.18, "learning_rate": 0.00019602232509013606, "loss": -397851.1, "step": 3175 }, { "epoch": 0.18, "learning_rate": 0.0001959943145083227, "loss": -397603.525, "step": 3180 }, { "epoch": 0.18, "learning_rate": 0.0001959662076631955, "loss": -398141.85, "step": 3185 }, { "epoch": 0.18, "learning_rate": 0.00019593800458294006, "loss": -397223.125, "step": 3190 }, { "epoch": 0.18, "learning_rate": 0.00019590970529583852, "loss": -388728.0, "step": 3195 }, { "epoch": 0.18, "learning_rate": 0.00019588130983026942, "loss": -397655.1, "step": 3200 }, { "epoch": 0.18, "learning_rate": 0.00019585281821470781, "loss": -396993.0, "step": 3205 }, { "epoch": 0.18, "learning_rate": 0.0001958242304777252, "loss": -398863.0, "step": 3210 }, { "epoch": 0.18, "learning_rate": 0.00019579554664798937, "loss": -396665.725, "step": 3215 }, { "epoch": 0.18, "learning_rate": 0.00019576676675426454, "loss": -397439.25, "step": 3220 }, { "epoch": 0.19, "learning_rate": 0.00019573789082541124, "loss": -398167.975, "step": 3225 }, { "epoch": 0.19, "learning_rate": 0.0001957089188903864, "loss": -397248.225, "step": 3230 }, { "epoch": 0.19, "learning_rate": 0.000195679850978243, "loss": -396948.4, "step": 3235 }, { "epoch": 0.19, "learning_rate": 0.00019565068711813058, "loss": -397715.95, "step": 3240 }, { "epoch": 0.19, "learning_rate": 0.00019562142733929458, "loss": -398244.45, "step": 3245 }, { "epoch": 0.19, "learning_rate": 0.00019559207167107684, "loss": -399350.225, "step": 3250 }, { "epoch": 0.19, "learning_rate": 0.00019556262014291527, "loss": -398066.65, "step": 3255 }, { "epoch": 0.19, "learning_rate": 0.0001955330727843439, "loss": -398218.25, "step": 3260 }, { "epoch": 0.19, "learning_rate": 0.00019550342962499295, "loss": -399172.75, "step": 3265 }, { "epoch": 0.19, "learning_rate": 0.00019547369069458862, "loss": -398298.225, "step": 3270 }, { "epoch": 0.19, "learning_rate": 0.00019544385602295318, "loss": -399286.825, "step": 3275 }, { "epoch": 0.19, "learning_rate": 0.00019541392564000488, "loss": -399987.925, "step": 3280 }, { "epoch": 0.19, "learning_rate": 0.00019538389957575802, "loss": -397577.95, "step": 3285 }, { "epoch": 0.19, "learning_rate": 0.00019535377786032276, "loss": -398827.45, "step": 3290 }, { "epoch": 0.19, "learning_rate": 0.00019532356052390524, "loss": -399248.95, "step": 3295 }, { "epoch": 0.19, "learning_rate": 0.0001952932475968075, "loss": -398613.525, "step": 3300 }, { "epoch": 0.19, "learning_rate": 0.00019526283910942734, "loss": -398833.35, "step": 3305 }, { "epoch": 0.19, "learning_rate": 0.00019523233509225854, "loss": -397386.525, "step": 3310 }, { "epoch": 0.19, "learning_rate": 0.00019520173557589055, "loss": -399222.55, "step": 3315 }, { "epoch": 0.19, "learning_rate": 0.00019517104059100864, "loss": -402082.975, "step": 3320 }, { "epoch": 0.19, "learning_rate": 0.00019514025016839385, "loss": -399304.1, "step": 3325 }, { "epoch": 0.19, "learning_rate": 0.0001951093643389228, "loss": -397691.575, "step": 3330 }, { "epoch": 0.19, "learning_rate": 0.00019507838313356795, "loss": -401452.35, "step": 3335 }, { "epoch": 0.19, "learning_rate": 0.0001950473065833973, "loss": -399971.9, "step": 3340 }, { "epoch": 0.19, "learning_rate": 0.00019501613471957445, "loss": -399510.8, "step": 3345 }, { "epoch": 0.19, "learning_rate": 0.00019498486757335866, "loss": -400017.45, "step": 3350 }, { "epoch": 0.19, "learning_rate": 0.00019495350517610466, "loss": -398051.725, "step": 3355 }, { "epoch": 0.19, "learning_rate": 0.00019492204755926275, "loss": -399884.7, "step": 3360 }, { "epoch": 0.19, "learning_rate": 0.00019489049475437871, "loss": -400094.3, "step": 3365 }, { "epoch": 0.19, "learning_rate": 0.00019485884679309374, "loss": -399655.45, "step": 3370 }, { "epoch": 0.19, "learning_rate": 0.0001948271037071445, "loss": -399197.55, "step": 3375 }, { "epoch": 0.19, "learning_rate": 0.00019479526552836302, "loss": -398674.65, "step": 3380 }, { "epoch": 0.19, "learning_rate": 0.0001947633322886767, "loss": -398032.525, "step": 3385 }, { "epoch": 0.19, "learning_rate": 0.00019473130402010829, "loss": -401128.2, "step": 3390 }, { "epoch": 0.19, "learning_rate": 0.00019469918075477573, "loss": -400998.175, "step": 3395 }, { "epoch": 0.2, "learning_rate": 0.0001946669625248924, "loss": -400135.4, "step": 3400 }, { "epoch": 0.2, "learning_rate": 0.00019463464936276677, "loss": -400291.95, "step": 3405 }, { "epoch": 0.2, "learning_rate": 0.00019460224130080255, "loss": -400901.8, "step": 3410 }, { "epoch": 0.2, "learning_rate": 0.00019456973837149861, "loss": -399600.0, "step": 3415 }, { "epoch": 0.2, "learning_rate": 0.00019453714060744897, "loss": -398669.55, "step": 3420 }, { "epoch": 0.2, "learning_rate": 0.00019450444804134276, "loss": -401376.125, "step": 3425 }, { "epoch": 0.2, "learning_rate": 0.0001944716607059641, "loss": -400382.35, "step": 3430 }, { "epoch": 0.2, "learning_rate": 0.00019443877863419226, "loss": -401191.475, "step": 3435 }, { "epoch": 0.2, "learning_rate": 0.00019440580185900146, "loss": -398218.35, "step": 3440 }, { "epoch": 0.2, "learning_rate": 0.00019437273041346084, "loss": -398885.275, "step": 3445 }, { "epoch": 0.2, "learning_rate": 0.00019433956433073454, "loss": -400480.625, "step": 3450 }, { "epoch": 0.2, "learning_rate": 0.00019430630364408158, "loss": -401564.5, "step": 3455 }, { "epoch": 0.2, "learning_rate": 0.0001942729483868559, "loss": -400380.425, "step": 3460 }, { "epoch": 0.2, "learning_rate": 0.00019423949859250612, "loss": -400995.05, "step": 3465 }, { "epoch": 0.2, "learning_rate": 0.00019420595429457587, "loss": -399079.45, "step": 3470 }, { "epoch": 0.2, "learning_rate": 0.0001941723155267034, "loss": -400499.1, "step": 3475 }, { "epoch": 0.2, "learning_rate": 0.00019413858232262174, "loss": -400577.25, "step": 3480 }, { "epoch": 0.2, "learning_rate": 0.00019410475471615864, "loss": -402237.525, "step": 3485 }, { "epoch": 0.2, "learning_rate": 0.00019407083274123647, "loss": -401237.55, "step": 3490 }, { "epoch": 0.2, "learning_rate": 0.0001940368164318723, "loss": -400670.4, "step": 3495 }, { "epoch": 0.2, "learning_rate": 0.00019400270582217775, "loss": -396488.45, "step": 3500 }, { "epoch": 0.2, "learning_rate": 0.00019396850094635903, "loss": -399970.975, "step": 3505 }, { "epoch": 0.2, "learning_rate": 0.00019393420183871682, "loss": -398315.575, "step": 3510 }, { "epoch": 0.2, "learning_rate": 0.0001938998085336464, "loss": -399823.95, "step": 3515 }, { "epoch": 0.2, "learning_rate": 0.00019386532106563736, "loss": -400712.55, "step": 3520 }, { "epoch": 0.2, "learning_rate": 0.00019383073946927396, "loss": -399312.4, "step": 3525 }, { "epoch": 0.2, "learning_rate": 0.00019379606377923456, "loss": -398362.975, "step": 3530 }, { "epoch": 0.2, "learning_rate": 0.0001937612940302921, "loss": -400884.1, "step": 3535 }, { "epoch": 0.2, "learning_rate": 0.0001937264302573137, "loss": -399776.4, "step": 3540 }, { "epoch": 0.2, "learning_rate": 0.0001936914724952609, "loss": -401188.525, "step": 3545 }, { "epoch": 0.2, "learning_rate": 0.00019365642077918938, "loss": -401755.075, "step": 3550 }, { "epoch": 0.2, "learning_rate": 0.0001936212751442491, "loss": -400210.55, "step": 3555 }, { "epoch": 0.2, "learning_rate": 0.00019358603562568416, "loss": -399624.95, "step": 3560 }, { "epoch": 0.2, "learning_rate": 0.00019355070225883286, "loss": -401644.9, "step": 3565 }, { "epoch": 0.2, "learning_rate": 0.00019351527507912753, "loss": -400682.0, "step": 3570 }, { "epoch": 0.21, "learning_rate": 0.00019347975412209464, "loss": -401702.8, "step": 3575 }, { "epoch": 0.21, "learning_rate": 0.00019344413942335472, "loss": -400528.5, "step": 3580 }, { "epoch": 0.21, "learning_rate": 0.00019340843101862227, "loss": -399457.5, "step": 3585 }, { "epoch": 0.21, "learning_rate": 0.00019337262894370571, "loss": -401715.05, "step": 3590 }, { "epoch": 0.21, "learning_rate": 0.0001933367332345075, "loss": -400080.725, "step": 3595 }, { "epoch": 0.21, "learning_rate": 0.0001933007439270239, "loss": -401478.75, "step": 3600 }, { "epoch": 0.21, "learning_rate": 0.00019326466105734502, "loss": -402403.775, "step": 3605 }, { "epoch": 0.21, "learning_rate": 0.00019322848466165495, "loss": -401417.75, "step": 3610 }, { "epoch": 0.21, "learning_rate": 0.0001931922147762314, "loss": -400876.6, "step": 3615 }, { "epoch": 0.21, "learning_rate": 0.0001931558514374459, "loss": -400685.55, "step": 3620 }, { "epoch": 0.21, "learning_rate": 0.00019311939468176368, "loss": -403060.075, "step": 3625 }, { "epoch": 0.21, "learning_rate": 0.00019308284454574363, "loss": -403845.575, "step": 3630 }, { "epoch": 0.21, "learning_rate": 0.00019304620106603838, "loss": -400785.5, "step": 3635 }, { "epoch": 0.21, "learning_rate": 0.00019300946427939407, "loss": -400253.05, "step": 3640 }, { "epoch": 0.21, "learning_rate": 0.00019297263422265039, "loss": -403627.075, "step": 3645 }, { "epoch": 0.21, "learning_rate": 0.00019293571093274066, "loss": -403050.35, "step": 3650 }, { "epoch": 0.21, "learning_rate": 0.0001928986944466916, "loss": -401369.1, "step": 3655 }, { "epoch": 0.21, "learning_rate": 0.0001928615848016234, "loss": -403266.25, "step": 3660 }, { "epoch": 0.21, "learning_rate": 0.00019282438203474978, "loss": -403239.3, "step": 3665 }, { "epoch": 0.21, "learning_rate": 0.00019278708618337766, "loss": -403675.45, "step": 3670 }, { "epoch": 0.21, "learning_rate": 0.00019274969728490747, "loss": -402626.525, "step": 3675 }, { "epoch": 0.21, "learning_rate": 0.00019271221537683287, "loss": -401520.5, "step": 3680 }, { "epoch": 0.21, "learning_rate": 0.0001926746404967408, "loss": -403857.225, "step": 3685 }, { "epoch": 0.21, "learning_rate": 0.00019263697268231142, "loss": -403731.9, "step": 3690 }, { "epoch": 0.21, "learning_rate": 0.0001925992119713181, "loss": -400502.8, "step": 3695 }, { "epoch": 0.21, "learning_rate": 0.0001925613584016274, "loss": -401636.4, "step": 3700 }, { "epoch": 0.05, "step": 3701, "total_flos": 2.617011781310087e+18, "train_loss": 0.0, "train_runtime": 10.3365, "train_samples_per_second": 0.193, "train_steps_per_second": 0.097 } ], "logging_steps": 5, "max_steps": 1, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 2.617011781310087e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }