{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999995191868488, "eval_steps": 500, "global_step": 51995, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.9232526048052467e-05, "grad_norm": 332017.82194713707, "learning_rate": 3.846153846153847e-08, "loss": 12108.0527, "step": 1 }, { "epoch": 0.00019232526048052465, "grad_norm": 245508.79891245405, "learning_rate": 3.846153846153847e-07, "loss": 12036.0373, "step": 10 }, { "epoch": 0.0003846505209610493, "grad_norm": 115889.53374107159, "learning_rate": 7.692307692307694e-07, "loss": 10648.9656, "step": 20 }, { "epoch": 0.000576975781441574, "grad_norm": 206614.59793923626, "learning_rate": 1.153846153846154e-06, "loss": 7566.8242, "step": 30 }, { "epoch": 0.0007693010419220986, "grad_norm": 160550.51842977668, "learning_rate": 1.5384615384615387e-06, "loss": 5169.2719, "step": 40 }, { "epoch": 0.0009616263024026233, "grad_norm": 39134.623275659105, "learning_rate": 1.9230769230769234e-06, "loss": 3722.1172, "step": 50 }, { "epoch": 0.001153951562883148, "grad_norm": 11341.981755467272, "learning_rate": 2.307692307692308e-06, "loss": 2501.6607, "step": 60 }, { "epoch": 0.0013462768233636726, "grad_norm": 14727.353496582424, "learning_rate": 2.6923076923076923e-06, "loss": 1709.3086, "step": 70 }, { "epoch": 0.0015386020838441972, "grad_norm": 7376.485461270763, "learning_rate": 3.0769230769230774e-06, "loss": 1362.1637, "step": 80 }, { "epoch": 0.0017309273443247219, "grad_norm": 4689.786549065178, "learning_rate": 3.4615384615384617e-06, "loss": 1136.6605, "step": 90 }, { "epoch": 0.0019232526048052465, "grad_norm": 3136.885512323355, "learning_rate": 3.846153846153847e-06, "loss": 1077.0688, "step": 100 }, { "epoch": 0.0021155778652857714, "grad_norm": 2884.0784288983627, "learning_rate": 4.230769230769231e-06, "loss": 976.0121, "step": 110 }, { "epoch": 0.002307903125766296, "grad_norm": 3280.7089284628487, "learning_rate": 4.615384615384616e-06, "loss": 928.786, "step": 120 }, { "epoch": 0.0025002283862468207, "grad_norm": 2939.4442984784437, "learning_rate": 5e-06, "loss": 851.515, "step": 130 }, { "epoch": 0.002692553646727345, "grad_norm": 2912.9567570884933, "learning_rate": 5.384615384615385e-06, "loss": 817.7365, "step": 140 }, { "epoch": 0.00288487890720787, "grad_norm": 2067.4095594512014, "learning_rate": 5.769230769230769e-06, "loss": 789.5539, "step": 150 }, { "epoch": 0.0030772041676883945, "grad_norm": 1952.0933429185154, "learning_rate": 6.153846153846155e-06, "loss": 757.9412, "step": 160 }, { "epoch": 0.0032695294281689193, "grad_norm": 1976.9487027681635, "learning_rate": 6.538461538461539e-06, "loss": 740.7521, "step": 170 }, { "epoch": 0.0034618546886494438, "grad_norm": 1786.3723313558642, "learning_rate": 6.923076923076923e-06, "loss": 728.6194, "step": 180 }, { "epoch": 0.0036541799491299687, "grad_norm": 2310.282588762028, "learning_rate": 7.307692307692308e-06, "loss": 704.6479, "step": 190 }, { "epoch": 0.003846505209610493, "grad_norm": 2074.87412983887, "learning_rate": 7.692307692307694e-06, "loss": 704.4103, "step": 200 }, { "epoch": 0.0040388304700910175, "grad_norm": 2300.109138357241, "learning_rate": 8.076923076923077e-06, "loss": 664.9248, "step": 210 }, { "epoch": 0.004231155730571543, "grad_norm": 1911.5281782387024, "learning_rate": 8.461538461538462e-06, "loss": 660.9208, "step": 220 }, { "epoch": 0.004423480991052067, "grad_norm": 1469.8004882525568, "learning_rate": 8.846153846153847e-06, "loss": 642.6973, "step": 230 }, { "epoch": 0.004615806251532592, "grad_norm": 1875.6806394934072, "learning_rate": 9.230769230769232e-06, "loss": 642.3829, "step": 240 }, { "epoch": 0.004808131512013117, "grad_norm": 1954.1006277348115, "learning_rate": 9.615384615384616e-06, "loss": 643.9223, "step": 250 }, { "epoch": 0.0050004567724936414, "grad_norm": 1737.3193764809394, "learning_rate": 1e-05, "loss": 645.1771, "step": 260 }, { "epoch": 0.005192782032974166, "grad_norm": 1327.6624239260284, "learning_rate": 1.0384615384615386e-05, "loss": 637.8946, "step": 270 }, { "epoch": 0.00538510729345469, "grad_norm": 7981.270677692642, "learning_rate": 1.076923076923077e-05, "loss": 618.1356, "step": 280 }, { "epoch": 0.005577432553935216, "grad_norm": 1709.4807102572415, "learning_rate": 1.1153846153846154e-05, "loss": 630.8181, "step": 290 }, { "epoch": 0.00576975781441574, "grad_norm": 1644.4225063710128, "learning_rate": 1.1538461538461538e-05, "loss": 605.8881, "step": 300 }, { "epoch": 0.0059620830748962645, "grad_norm": 1966.4081016099562, "learning_rate": 1.1923076923076925e-05, "loss": 604.7532, "step": 310 }, { "epoch": 0.006154408335376789, "grad_norm": 1491.86389210639, "learning_rate": 1.230769230769231e-05, "loss": 609.7085, "step": 320 }, { "epoch": 0.006346733595857314, "grad_norm": 1485.4453324763408, "learning_rate": 1.2692307692307693e-05, "loss": 602.9562, "step": 330 }, { "epoch": 0.006539058856337839, "grad_norm": 1310.387260927013, "learning_rate": 1.3076923076923078e-05, "loss": 595.0032, "step": 340 }, { "epoch": 0.006731384116818363, "grad_norm": 2275.2781855062035, "learning_rate": 1.3461538461538463e-05, "loss": 579.4093, "step": 350 }, { "epoch": 0.0069237093772988876, "grad_norm": 1849.868176409446, "learning_rate": 1.3846153846153847e-05, "loss": 585.0145, "step": 360 }, { "epoch": 0.007116034637779413, "grad_norm": 1615.7507754722587, "learning_rate": 1.4230769230769232e-05, "loss": 592.8799, "step": 370 }, { "epoch": 0.007308359898259937, "grad_norm": 1610.504408373516, "learning_rate": 1.4615384615384615e-05, "loss": 589.673, "step": 380 }, { "epoch": 0.007500685158740462, "grad_norm": 1704.5476494298723, "learning_rate": 1.5000000000000002e-05, "loss": 582.5739, "step": 390 }, { "epoch": 0.007693010419220986, "grad_norm": 1502.1680695792734, "learning_rate": 1.5384615384615387e-05, "loss": 571.0479, "step": 400 }, { "epoch": 0.00788533567970151, "grad_norm": 1579.6577195492494, "learning_rate": 1.576923076923077e-05, "loss": 563.5073, "step": 410 }, { "epoch": 0.008077660940182035, "grad_norm": 1785.8398259450537, "learning_rate": 1.6153846153846154e-05, "loss": 581.3297, "step": 420 }, { "epoch": 0.008269986200662561, "grad_norm": 1771.438162106497, "learning_rate": 1.653846153846154e-05, "loss": 568.3677, "step": 430 }, { "epoch": 0.008462311461143086, "grad_norm": 1363.7967686916552, "learning_rate": 1.6923076923076924e-05, "loss": 567.2524, "step": 440 }, { "epoch": 0.00865463672162361, "grad_norm": 1417.5481457867793, "learning_rate": 1.730769230769231e-05, "loss": 592.7938, "step": 450 }, { "epoch": 0.008846961982104135, "grad_norm": 2912.185460436148, "learning_rate": 1.7692307692307694e-05, "loss": 558.3062, "step": 460 }, { "epoch": 0.009039287242584659, "grad_norm": 1419.8660565163475, "learning_rate": 1.807692307692308e-05, "loss": 563.9717, "step": 470 }, { "epoch": 0.009231612503065183, "grad_norm": 1538.9205592163212, "learning_rate": 1.8461538461538465e-05, "loss": 541.6976, "step": 480 }, { "epoch": 0.009423937763545708, "grad_norm": 1674.7941560707338, "learning_rate": 1.8846153846153846e-05, "loss": 562.8638, "step": 490 }, { "epoch": 0.009616263024026234, "grad_norm": 1184.5116494790307, "learning_rate": 1.923076923076923e-05, "loss": 550.3092, "step": 500 }, { "epoch": 0.009808588284506758, "grad_norm": 1918.2405919716136, "learning_rate": 1.9615384615384617e-05, "loss": 569.4998, "step": 510 }, { "epoch": 0.010000913544987283, "grad_norm": 1258.73028504583, "learning_rate": 2e-05, "loss": 562.6359, "step": 520 }, { "epoch": 0.010193238805467807, "grad_norm": 1957.1855022650734, "learning_rate": 1.999999813758258e-05, "loss": 540.9211, "step": 530 }, { "epoch": 0.010385564065948332, "grad_norm": 1445.1918558753534, "learning_rate": 1.9999992550331007e-05, "loss": 552.493, "step": 540 }, { "epoch": 0.010577889326428856, "grad_norm": 1425.9554496443363, "learning_rate": 1.999998323824736e-05, "loss": 551.791, "step": 550 }, { "epoch": 0.01077021458690938, "grad_norm": 1387.5175764634103, "learning_rate": 1.999997020133512e-05, "loss": 539.2529, "step": 560 }, { "epoch": 0.010962539847389905, "grad_norm": 1282.1917414212555, "learning_rate": 1.9999953439599132e-05, "loss": 543.0103, "step": 570 }, { "epoch": 0.011154865107870431, "grad_norm": 1387.5597126230348, "learning_rate": 1.9999932953045638e-05, "loss": 549.0967, "step": 580 }, { "epoch": 0.011347190368350956, "grad_norm": 1495.9706529903751, "learning_rate": 1.999990874168228e-05, "loss": 542.2873, "step": 590 }, { "epoch": 0.01153951562883148, "grad_norm": 1366.7575608631, "learning_rate": 1.9999880805518067e-05, "loss": 550.7072, "step": 600 }, { "epoch": 0.011731840889312005, "grad_norm": 1550.7141727742994, "learning_rate": 1.9999849144563406e-05, "loss": 536.1631, "step": 610 }, { "epoch": 0.011924166149792529, "grad_norm": 1799.438677775851, "learning_rate": 1.9999813758830092e-05, "loss": 533.2322, "step": 620 }, { "epoch": 0.012116491410273053, "grad_norm": 1594.2553453970218, "learning_rate": 1.9999774648331307e-05, "loss": 531.8908, "step": 630 }, { "epoch": 0.012308816670753578, "grad_norm": 1380.3126968166453, "learning_rate": 1.9999731813081616e-05, "loss": 539.0061, "step": 640 }, { "epoch": 0.012501141931234102, "grad_norm": 1288.9094624087347, "learning_rate": 1.9999685253096975e-05, "loss": 533.8396, "step": 650 }, { "epoch": 0.012693467191714628, "grad_norm": 1467.6587965930335, "learning_rate": 1.999963496839473e-05, "loss": 511.2933, "step": 660 }, { "epoch": 0.012885792452195153, "grad_norm": 1144.4252030903722, "learning_rate": 1.9999580958993606e-05, "loss": 518.686, "step": 670 }, { "epoch": 0.013078117712675677, "grad_norm": 1510.7992006303907, "learning_rate": 1.9999523224913722e-05, "loss": 541.2946, "step": 680 }, { "epoch": 0.013270442973156202, "grad_norm": 1459.5247736769318, "learning_rate": 1.999946176617659e-05, "loss": 531.0906, "step": 690 }, { "epoch": 0.013462768233636726, "grad_norm": 1704.7295855200716, "learning_rate": 1.999939658280509e-05, "loss": 515.0679, "step": 700 }, { "epoch": 0.01365509349411725, "grad_norm": 1369.6286303265301, "learning_rate": 1.9999327674823513e-05, "loss": 529.9729, "step": 710 }, { "epoch": 0.013847418754597775, "grad_norm": 1171.6944991940852, "learning_rate": 1.9999255042257522e-05, "loss": 523.1766, "step": 720 }, { "epoch": 0.0140397440150783, "grad_norm": 1733.2115650083967, "learning_rate": 1.999917868513417e-05, "loss": 512.975, "step": 730 }, { "epoch": 0.014232069275558826, "grad_norm": 1410.6921404307557, "learning_rate": 1.9999098603481895e-05, "loss": 496.8678, "step": 740 }, { "epoch": 0.01442439453603935, "grad_norm": 1053.8093124978518, "learning_rate": 1.9999014797330536e-05, "loss": 511.1349, "step": 750 }, { "epoch": 0.014616719796519875, "grad_norm": 1194.4866698136166, "learning_rate": 1.99989272667113e-05, "loss": 514.8527, "step": 760 }, { "epoch": 0.014809045057000399, "grad_norm": 1126.7421355720714, "learning_rate": 1.99988360116568e-05, "loss": 514.3693, "step": 770 }, { "epoch": 0.015001370317480923, "grad_norm": 1178.4782527711411, "learning_rate": 1.999874103220102e-05, "loss": 506.5624, "step": 780 }, { "epoch": 0.015193695577961448, "grad_norm": 1297.0010276174844, "learning_rate": 1.999864232837934e-05, "loss": 519.1336, "step": 790 }, { "epoch": 0.015386020838441972, "grad_norm": 1069.667364513505, "learning_rate": 1.9998539900228526e-05, "loss": 514.1775, "step": 800 }, { "epoch": 0.015578346098922499, "grad_norm": 1262.000307745403, "learning_rate": 1.999843374778673e-05, "loss": 511.9821, "step": 810 }, { "epoch": 0.01577067135940302, "grad_norm": 1500.5916093620833, "learning_rate": 1.999832387109349e-05, "loss": 505.3807, "step": 820 }, { "epoch": 0.015962996619883547, "grad_norm": 1398.4282733840143, "learning_rate": 1.9998210270189736e-05, "loss": 503.6255, "step": 830 }, { "epoch": 0.01615532188036407, "grad_norm": 1364.4269370667485, "learning_rate": 1.9998092945117786e-05, "loss": 495.5404, "step": 840 }, { "epoch": 0.016347647140844596, "grad_norm": 1458.3289472638558, "learning_rate": 1.999797189592134e-05, "loss": 500.7938, "step": 850 }, { "epoch": 0.016539972401325122, "grad_norm": 35408.406023696065, "learning_rate": 1.999784712264548e-05, "loss": 541.5755, "step": 860 }, { "epoch": 0.016732297661805645, "grad_norm": 1259.0220001600435, "learning_rate": 1.9997718625336686e-05, "loss": 488.8561, "step": 870 }, { "epoch": 0.01692462292228617, "grad_norm": 1115.7727735781639, "learning_rate": 1.9997586404042825e-05, "loss": 503.5537, "step": 880 }, { "epoch": 0.017116948182766694, "grad_norm": 1261.659255730506, "learning_rate": 1.9997450458813142e-05, "loss": 489.9525, "step": 890 }, { "epoch": 0.01730927344324722, "grad_norm": 1174.6527357294908, "learning_rate": 1.9997310789698276e-05, "loss": 484.3942, "step": 900 }, { "epoch": 0.017501598703727743, "grad_norm": 1060.893242229332, "learning_rate": 1.999716739675025e-05, "loss": 494.4259, "step": 910 }, { "epoch": 0.01769392396420827, "grad_norm": 1378.6178778139686, "learning_rate": 1.9997020280022482e-05, "loss": 492.2053, "step": 920 }, { "epoch": 0.017886249224688795, "grad_norm": 1384.0970365292112, "learning_rate": 1.9996869439569767e-05, "loss": 486.3698, "step": 930 }, { "epoch": 0.018078574485169318, "grad_norm": 1141.0195763132995, "learning_rate": 1.9996714875448286e-05, "loss": 484.9983, "step": 940 }, { "epoch": 0.018270899745649844, "grad_norm": 1534.190545857729, "learning_rate": 1.9996556587715617e-05, "loss": 491.6367, "step": 950 }, { "epoch": 0.018463225006130367, "grad_norm": 1260.3034572615545, "learning_rate": 1.9996394576430716e-05, "loss": 491.5582, "step": 960 }, { "epoch": 0.018655550266610893, "grad_norm": 1131.6626054510543, "learning_rate": 1.9996228841653932e-05, "loss": 492.3444, "step": 970 }, { "epoch": 0.018847875527091416, "grad_norm": 1080.0350168322668, "learning_rate": 1.9996059383447e-05, "loss": 481.0039, "step": 980 }, { "epoch": 0.019040200787571942, "grad_norm": 1186.8576062599248, "learning_rate": 1.999588620187303e-05, "loss": 488.0979, "step": 990 }, { "epoch": 0.019232526048052468, "grad_norm": 1284.027907525847, "learning_rate": 1.9995709296996545e-05, "loss": 490.5315, "step": 1000 }, { "epoch": 0.01942485130853299, "grad_norm": 1084.408370907811, "learning_rate": 1.999552866888343e-05, "loss": 494.6817, "step": 1010 }, { "epoch": 0.019617176569013517, "grad_norm": 1078.4092927240245, "learning_rate": 1.9995344317600965e-05, "loss": 502.1682, "step": 1020 }, { "epoch": 0.01980950182949404, "grad_norm": 2212.2117524226715, "learning_rate": 1.9995156243217824e-05, "loss": 497.8876, "step": 1030 }, { "epoch": 0.020001827089974566, "grad_norm": 1450.9938826004914, "learning_rate": 1.9994964445804054e-05, "loss": 500.4225, "step": 1040 }, { "epoch": 0.02019415235045509, "grad_norm": 1439.5951412209038, "learning_rate": 1.9994768925431104e-05, "loss": 478.5474, "step": 1050 }, { "epoch": 0.020386477610935615, "grad_norm": 1111.8307979449899, "learning_rate": 1.9994569682171796e-05, "loss": 472.0606, "step": 1060 }, { "epoch": 0.020578802871416137, "grad_norm": 1130.0088246480677, "learning_rate": 1.9994366716100346e-05, "loss": 479.0884, "step": 1070 }, { "epoch": 0.020771128131896664, "grad_norm": 1368.426354451509, "learning_rate": 1.999416002729236e-05, "loss": 475.8784, "step": 1080 }, { "epoch": 0.02096345339237719, "grad_norm": 1471.6865813297497, "learning_rate": 1.999394961582482e-05, "loss": 493.142, "step": 1090 }, { "epoch": 0.021155778652857712, "grad_norm": 1151.4689769800118, "learning_rate": 1.9993735481776105e-05, "loss": 467.3699, "step": 1100 }, { "epoch": 0.02134810391333824, "grad_norm": 1103.1388757982427, "learning_rate": 1.9993517625225976e-05, "loss": 468.3865, "step": 1110 }, { "epoch": 0.02154042917381876, "grad_norm": 915.9358718055254, "learning_rate": 1.9993296046255578e-05, "loss": 462.7445, "step": 1120 }, { "epoch": 0.021732754434299287, "grad_norm": 997.6094400871649, "learning_rate": 1.9993070744947447e-05, "loss": 475.512, "step": 1130 }, { "epoch": 0.02192507969477981, "grad_norm": 1202.0248958828354, "learning_rate": 1.9992841721385508e-05, "loss": 464.4746, "step": 1140 }, { "epoch": 0.022117404955260336, "grad_norm": 11237.009349666056, "learning_rate": 1.999260897565506e-05, "loss": 486.6293, "step": 1150 }, { "epoch": 0.022309730215740863, "grad_norm": 1032.047329108433, "learning_rate": 1.9992372507842807e-05, "loss": 484.2495, "step": 1160 }, { "epoch": 0.022502055476221385, "grad_norm": 1290.0903297656175, "learning_rate": 1.9992132318036825e-05, "loss": 481.1325, "step": 1170 }, { "epoch": 0.02269438073670191, "grad_norm": 1037.6319401213834, "learning_rate": 1.9991888406326575e-05, "loss": 486.5979, "step": 1180 }, { "epoch": 0.022886705997182434, "grad_norm": 1077.555863211059, "learning_rate": 1.9991640772802916e-05, "loss": 467.6946, "step": 1190 }, { "epoch": 0.02307903125766296, "grad_norm": 1173.5983183577753, "learning_rate": 1.9991389417558088e-05, "loss": 469.8534, "step": 1200 }, { "epoch": 0.023271356518143483, "grad_norm": 1047.9155015558533, "learning_rate": 1.9991134340685713e-05, "loss": 463.1124, "step": 1210 }, { "epoch": 0.02346368177862401, "grad_norm": 1197.2310531685393, "learning_rate": 1.999087554228081e-05, "loss": 463.741, "step": 1220 }, { "epoch": 0.023656007039104535, "grad_norm": 949.005837498942, "learning_rate": 1.999061302243977e-05, "loss": 470.0569, "step": 1230 }, { "epoch": 0.023848332299585058, "grad_norm": 1222.0283637424163, "learning_rate": 1.9990346781260378e-05, "loss": 469.6994, "step": 1240 }, { "epoch": 0.024040657560065584, "grad_norm": 1721.3575140480177, "learning_rate": 1.9990076818841805e-05, "loss": 467.2944, "step": 1250 }, { "epoch": 0.024232982820546107, "grad_norm": 1169.3409682586346, "learning_rate": 1.998980313528461e-05, "loss": 471.5979, "step": 1260 }, { "epoch": 0.024425308081026633, "grad_norm": 1358.8689502222835, "learning_rate": 1.9989525730690736e-05, "loss": 467.8751, "step": 1270 }, { "epoch": 0.024617633341507156, "grad_norm": 1166.25480338363, "learning_rate": 1.998924460516351e-05, "loss": 478.3506, "step": 1280 }, { "epoch": 0.024809958601987682, "grad_norm": 1048.097700016728, "learning_rate": 1.9988959758807645e-05, "loss": 469.7929, "step": 1290 }, { "epoch": 0.025002283862468205, "grad_norm": 1041.7763831771526, "learning_rate": 1.9988671191729243e-05, "loss": 478.3628, "step": 1300 }, { "epoch": 0.02519460912294873, "grad_norm": 1182.0911908058742, "learning_rate": 1.998837890403579e-05, "loss": 470.6274, "step": 1310 }, { "epoch": 0.025386934383429257, "grad_norm": 1211.438189548962, "learning_rate": 1.998808289583616e-05, "loss": 461.9727, "step": 1320 }, { "epoch": 0.02557925964390978, "grad_norm": 1013.8385452188595, "learning_rate": 1.998778316724061e-05, "loss": 458.8419, "step": 1330 }, { "epoch": 0.025771584904390306, "grad_norm": 970.6944019104968, "learning_rate": 1.998747971836078e-05, "loss": 446.9404, "step": 1340 }, { "epoch": 0.02596391016487083, "grad_norm": 1021.9827701608976, "learning_rate": 1.9987172549309707e-05, "loss": 467.6753, "step": 1350 }, { "epoch": 0.026156235425351355, "grad_norm": 1501.7882814399545, "learning_rate": 1.9986861660201802e-05, "loss": 460.099, "step": 1360 }, { "epoch": 0.026348560685831877, "grad_norm": 1119.8102022832766, "learning_rate": 1.998654705115287e-05, "loss": 476.4499, "step": 1370 }, { "epoch": 0.026540885946312404, "grad_norm": 1607.4317521625792, "learning_rate": 1.9986228722280093e-05, "loss": 479.8248, "step": 1380 }, { "epoch": 0.02673321120679293, "grad_norm": 1111.9087737119664, "learning_rate": 1.998590667370204e-05, "loss": 454.7312, "step": 1390 }, { "epoch": 0.026925536467273452, "grad_norm": 1047.659163378557, "learning_rate": 1.998558090553868e-05, "loss": 465.4294, "step": 1400 }, { "epoch": 0.02711786172775398, "grad_norm": 871.5336873619162, "learning_rate": 1.9985251417911347e-05, "loss": 454.3733, "step": 1410 }, { "epoch": 0.0273101869882345, "grad_norm": 1003.1759990372688, "learning_rate": 1.9984918210942776e-05, "loss": 449.2252, "step": 1420 }, { "epoch": 0.027502512248715028, "grad_norm": 896.2012244693171, "learning_rate": 1.9984581284757074e-05, "loss": 458.1591, "step": 1430 }, { "epoch": 0.02769483750919555, "grad_norm": 909.5865635412036, "learning_rate": 1.9984240639479745e-05, "loss": 453.5459, "step": 1440 }, { "epoch": 0.027887162769676076, "grad_norm": 1671.0348857078125, "learning_rate": 1.9983896275237677e-05, "loss": 451.2657, "step": 1450 }, { "epoch": 0.0280794880301566, "grad_norm": 1199.632444130952, "learning_rate": 1.9983548192159132e-05, "loss": 450.7684, "step": 1460 }, { "epoch": 0.028271813290637125, "grad_norm": 1180.9154545121703, "learning_rate": 1.998319639037377e-05, "loss": 460.3022, "step": 1470 }, { "epoch": 0.02846413855111765, "grad_norm": 1171.2654855596793, "learning_rate": 1.9982840870012626e-05, "loss": 469.2199, "step": 1480 }, { "epoch": 0.028656463811598174, "grad_norm": 1350.7356071250701, "learning_rate": 1.9982481631208138e-05, "loss": 447.264, "step": 1490 }, { "epoch": 0.0288487890720787, "grad_norm": 1340.1830938477137, "learning_rate": 1.9982118674094104e-05, "loss": 453.9175, "step": 1500 }, { "epoch": 0.029041114332559223, "grad_norm": 1285.6520270172832, "learning_rate": 1.9981751998805725e-05, "loss": 447.0713, "step": 1510 }, { "epoch": 0.02923343959303975, "grad_norm": 985.5172319049026, "learning_rate": 1.998138160547958e-05, "loss": 450.3087, "step": 1520 }, { "epoch": 0.029425764853520272, "grad_norm": 1103.5772288640608, "learning_rate": 1.9981007494253638e-05, "loss": 446.5301, "step": 1530 }, { "epoch": 0.029618090114000798, "grad_norm": 1168.413254974082, "learning_rate": 1.998062966526724e-05, "loss": 454.4448, "step": 1540 }, { "epoch": 0.029810415374481324, "grad_norm": 1073.2938121474965, "learning_rate": 1.998024811866113e-05, "loss": 454.6996, "step": 1550 }, { "epoch": 0.030002740634961847, "grad_norm": 1300.1753681145342, "learning_rate": 1.9979862854577427e-05, "loss": 457.9132, "step": 1560 }, { "epoch": 0.030195065895442373, "grad_norm": 1162.6163973584632, "learning_rate": 1.9979473873159635e-05, "loss": 448.1717, "step": 1570 }, { "epoch": 0.030387391155922896, "grad_norm": 1074.2415654472136, "learning_rate": 1.9979081174552638e-05, "loss": 454.8047, "step": 1580 }, { "epoch": 0.030579716416403422, "grad_norm": 919.2474230803733, "learning_rate": 1.997868475890271e-05, "loss": 453.3545, "step": 1590 }, { "epoch": 0.030772041676883945, "grad_norm": 1204.5616691859047, "learning_rate": 1.997828462635752e-05, "loss": 440.2779, "step": 1600 }, { "epoch": 0.03096436693736447, "grad_norm": 1043.1071384961128, "learning_rate": 1.99778807770661e-05, "loss": 444.5987, "step": 1610 }, { "epoch": 0.031156692197844997, "grad_norm": 1259.4732265726643, "learning_rate": 1.9977473211178882e-05, "loss": 476.3025, "step": 1620 }, { "epoch": 0.03134901745832552, "grad_norm": 1009.046667196821, "learning_rate": 1.9977061928847676e-05, "loss": 455.5854, "step": 1630 }, { "epoch": 0.03154134271880604, "grad_norm": 939.047196075382, "learning_rate": 1.9976646930225678e-05, "loss": 441.729, "step": 1640 }, { "epoch": 0.03173366797928657, "grad_norm": 906.5229132871757, "learning_rate": 1.997622821546747e-05, "loss": 441.6997, "step": 1650 }, { "epoch": 0.031925993239767095, "grad_norm": 993.2324977509386, "learning_rate": 1.9975805784729008e-05, "loss": 445.505, "step": 1660 }, { "epoch": 0.03211831850024762, "grad_norm": 1093.0301208448795, "learning_rate": 1.9975379638167654e-05, "loss": 462.6874, "step": 1670 }, { "epoch": 0.03231064376072814, "grad_norm": 1025.2680021460812, "learning_rate": 1.9974949775942134e-05, "loss": 433.165, "step": 1680 }, { "epoch": 0.032502969021208666, "grad_norm": 972.8447632264914, "learning_rate": 1.997451619821256e-05, "loss": 451.5788, "step": 1690 }, { "epoch": 0.03269529428168919, "grad_norm": 810.4883911530105, "learning_rate": 1.997407890514044e-05, "loss": 444.4225, "step": 1700 }, { "epoch": 0.03288761954216972, "grad_norm": 923.0036790051319, "learning_rate": 1.9973637896888652e-05, "loss": 436.9401, "step": 1710 }, { "epoch": 0.033079944802650245, "grad_norm": 1755.782027319123, "learning_rate": 1.997319317362147e-05, "loss": 440.4286, "step": 1720 }, { "epoch": 0.033272270063130764, "grad_norm": 979.1780716001408, "learning_rate": 1.9972744735504542e-05, "loss": 428.3424, "step": 1730 }, { "epoch": 0.03346459532361129, "grad_norm": 907.9041043732557, "learning_rate": 1.9972292582704905e-05, "loss": 448.4176, "step": 1740 }, { "epoch": 0.033656920584091816, "grad_norm": 1043.591930171376, "learning_rate": 1.997183671539098e-05, "loss": 442.5342, "step": 1750 }, { "epoch": 0.03384924584457234, "grad_norm": 1079.8265419064423, "learning_rate": 1.9971377133732567e-05, "loss": 439.2012, "step": 1760 }, { "epoch": 0.03404157110505287, "grad_norm": 823.2231568926577, "learning_rate": 1.997091383790086e-05, "loss": 451.7246, "step": 1770 }, { "epoch": 0.03423389636553339, "grad_norm": 820.2895789250954, "learning_rate": 1.997044682806842e-05, "loss": 430.022, "step": 1780 }, { "epoch": 0.034426221626013914, "grad_norm": 907.6838320401715, "learning_rate": 1.9969976104409202e-05, "loss": 440.6493, "step": 1790 }, { "epoch": 0.03461854688649444, "grad_norm": 1222.668516608212, "learning_rate": 1.9969501667098547e-05, "loss": 448.0128, "step": 1800 }, { "epoch": 0.03481087214697497, "grad_norm": 1022.7512877024485, "learning_rate": 1.996902351631317e-05, "loss": 431.8569, "step": 1810 }, { "epoch": 0.035003197407455486, "grad_norm": 976.8999279841343, "learning_rate": 1.996854165223118e-05, "loss": 430.3105, "step": 1820 }, { "epoch": 0.03519552266793601, "grad_norm": 994.2191083156916, "learning_rate": 1.9968056075032058e-05, "loss": 428.3338, "step": 1830 }, { "epoch": 0.03538784792841654, "grad_norm": 927.5937101726631, "learning_rate": 1.9967566784896676e-05, "loss": 422.1842, "step": 1840 }, { "epoch": 0.035580173188897064, "grad_norm": 804.752796715029, "learning_rate": 1.996707378200729e-05, "loss": 420.1022, "step": 1850 }, { "epoch": 0.03577249844937759, "grad_norm": 875.2326312461167, "learning_rate": 1.9966577066547526e-05, "loss": 428.2064, "step": 1860 }, { "epoch": 0.03596482370985811, "grad_norm": 892.9573667028674, "learning_rate": 1.9966076638702412e-05, "loss": 437.3851, "step": 1870 }, { "epoch": 0.036157148970338636, "grad_norm": 792.2376623578725, "learning_rate": 1.9965572498658346e-05, "loss": 433.6019, "step": 1880 }, { "epoch": 0.03634947423081916, "grad_norm": 952.014487394457, "learning_rate": 1.996506464660311e-05, "loss": 431.6811, "step": 1890 }, { "epoch": 0.03654179949129969, "grad_norm": 1041.9240608157052, "learning_rate": 1.9964553082725873e-05, "loss": 425.2496, "step": 1900 }, { "epoch": 0.03673412475178021, "grad_norm": 1226.2781807241984, "learning_rate": 1.996403780721718e-05, "loss": 427.2512, "step": 1910 }, { "epoch": 0.036926450012260734, "grad_norm": 1252.631493328946, "learning_rate": 1.9963518820268968e-05, "loss": 437.5074, "step": 1920 }, { "epoch": 0.03711877527274126, "grad_norm": 1019.2683176579892, "learning_rate": 1.996299612207455e-05, "loss": 425.1087, "step": 1930 }, { "epoch": 0.037311100533221786, "grad_norm": 946.588907288007, "learning_rate": 1.9962469712828613e-05, "loss": 436.6689, "step": 1940 }, { "epoch": 0.03750342579370231, "grad_norm": 972.8227997856911, "learning_rate": 1.996193959272725e-05, "loss": 413.2797, "step": 1950 }, { "epoch": 0.03769575105418283, "grad_norm": 850.4193662342182, "learning_rate": 1.9961405761967914e-05, "loss": 424.1742, "step": 1960 }, { "epoch": 0.03788807631466336, "grad_norm": 1050.2895049038134, "learning_rate": 1.996086822074945e-05, "loss": 419.6982, "step": 1970 }, { "epoch": 0.038080401575143884, "grad_norm": 1058.8252707233637, "learning_rate": 1.996032696927208e-05, "loss": 428.4798, "step": 1980 }, { "epoch": 0.03827272683562441, "grad_norm": 831.6684934800379, "learning_rate": 1.9959782007737418e-05, "loss": 433.5806, "step": 1990 }, { "epoch": 0.038465052096104936, "grad_norm": 955.7162157930247, "learning_rate": 1.9959233336348452e-05, "loss": 413.9336, "step": 2000 }, { "epoch": 0.038657377356585455, "grad_norm": 1099.209046114368, "learning_rate": 1.9958680955309546e-05, "loss": 428.3293, "step": 2010 }, { "epoch": 0.03884970261706598, "grad_norm": 1021.2233849816279, "learning_rate": 1.9958124864826457e-05, "loss": 430.3237, "step": 2020 }, { "epoch": 0.03904202787754651, "grad_norm": 976.7190757307437, "learning_rate": 1.9957565065106318e-05, "loss": 415.4346, "step": 2030 }, { "epoch": 0.039234353138027034, "grad_norm": 1054.2423629433902, "learning_rate": 1.9957001556357652e-05, "loss": 419.6495, "step": 2040 }, { "epoch": 0.03942667839850755, "grad_norm": 1070.0019862922163, "learning_rate": 1.995643433879035e-05, "loss": 412.5856, "step": 2050 }, { "epoch": 0.03961900365898808, "grad_norm": 1482.1736202677944, "learning_rate": 1.9955863412615693e-05, "loss": 428.2741, "step": 2060 }, { "epoch": 0.039811328919468605, "grad_norm": 1073.4692781718343, "learning_rate": 1.9955288778046338e-05, "loss": 426.4848, "step": 2070 }, { "epoch": 0.04000365417994913, "grad_norm": 1013.6713168858257, "learning_rate": 1.995471043529633e-05, "loss": 418.8785, "step": 2080 }, { "epoch": 0.04019597944042966, "grad_norm": 837.5462305927567, "learning_rate": 1.99541283845811e-05, "loss": 418.0007, "step": 2090 }, { "epoch": 0.04038830470091018, "grad_norm": 857.5303704163729, "learning_rate": 1.9953542626117437e-05, "loss": 422.5934, "step": 2100 }, { "epoch": 0.0405806299613907, "grad_norm": 869.4962463265555, "learning_rate": 1.9952953160123537e-05, "loss": 426.6799, "step": 2110 }, { "epoch": 0.04077295522187123, "grad_norm": 949.3244808560535, "learning_rate": 1.995235998681896e-05, "loss": 422.9911, "step": 2120 }, { "epoch": 0.040965280482351756, "grad_norm": 966.6456323863318, "learning_rate": 1.9951763106424658e-05, "loss": 423.5903, "step": 2130 }, { "epoch": 0.041157605742832275, "grad_norm": 919.3932010760028, "learning_rate": 1.9951162519162962e-05, "loss": 417.3357, "step": 2140 }, { "epoch": 0.0413499310033128, "grad_norm": 875.4564352669491, "learning_rate": 1.9950558225257574e-05, "loss": 419.633, "step": 2150 }, { "epoch": 0.04154225626379333, "grad_norm": 786.7485120612257, "learning_rate": 1.9949950224933584e-05, "loss": 417.6999, "step": 2160 }, { "epoch": 0.04173458152427385, "grad_norm": 868.8891187995704, "learning_rate": 1.994933851841747e-05, "loss": 416.0338, "step": 2170 }, { "epoch": 0.04192690678475438, "grad_norm": 892.5026500827428, "learning_rate": 1.994872310593707e-05, "loss": 416.687, "step": 2180 }, { "epoch": 0.0421192320452349, "grad_norm": 970.6300400036097, "learning_rate": 1.994810398772162e-05, "loss": 418.6071, "step": 2190 }, { "epoch": 0.042311557305715425, "grad_norm": 838.8530553637123, "learning_rate": 1.994748116400174e-05, "loss": 413.7869, "step": 2200 }, { "epoch": 0.04250388256619595, "grad_norm": 1024.561778936901, "learning_rate": 1.994685463500941e-05, "loss": 418.3925, "step": 2210 }, { "epoch": 0.04269620782667648, "grad_norm": 862.8999327027489, "learning_rate": 1.9946224400978006e-05, "loss": 416.8202, "step": 2220 }, { "epoch": 0.042888533087157, "grad_norm": 864.6845014807649, "learning_rate": 1.994559046214228e-05, "loss": 415.3689, "step": 2230 }, { "epoch": 0.04308085834763752, "grad_norm": 1207.8281915584857, "learning_rate": 1.9944952818738366e-05, "loss": 424.385, "step": 2240 }, { "epoch": 0.04327318360811805, "grad_norm": 856.9755440271097, "learning_rate": 1.9944311471003775e-05, "loss": 427.1351, "step": 2250 }, { "epoch": 0.043465508868598575, "grad_norm": 797.2202312561943, "learning_rate": 1.9943666419177392e-05, "loss": 401.6336, "step": 2260 }, { "epoch": 0.0436578341290791, "grad_norm": 840.993989742356, "learning_rate": 1.9943017663499497e-05, "loss": 413.9854, "step": 2270 }, { "epoch": 0.04385015938955962, "grad_norm": 926.8012079644898, "learning_rate": 1.9942365204211734e-05, "loss": 413.2362, "step": 2280 }, { "epoch": 0.044042484650040147, "grad_norm": 24676.784984066762, "learning_rate": 1.9941709041557134e-05, "loss": 419.889, "step": 2290 }, { "epoch": 0.04423480991052067, "grad_norm": 946.0603847587324, "learning_rate": 1.994104917578011e-05, "loss": 435.1779, "step": 2300 }, { "epoch": 0.0444271351710012, "grad_norm": 962.153063049471, "learning_rate": 1.9940385607126456e-05, "loss": 410.552, "step": 2310 }, { "epoch": 0.044619460431481725, "grad_norm": 965.2673431893036, "learning_rate": 1.9939718335843326e-05, "loss": 420.7652, "step": 2320 }, { "epoch": 0.044811785691962244, "grad_norm": 1268.9167278481075, "learning_rate": 1.9939047362179283e-05, "loss": 418.1449, "step": 2330 }, { "epoch": 0.04500411095244277, "grad_norm": 836.5103474682797, "learning_rate": 1.9938372686384244e-05, "loss": 403.4094, "step": 2340 }, { "epoch": 0.0451964362129233, "grad_norm": 753.7340795731817, "learning_rate": 1.9937694308709514e-05, "loss": 409.9182, "step": 2350 }, { "epoch": 0.04538876147340382, "grad_norm": 821.820710879436, "learning_rate": 1.9937012229407783e-05, "loss": 419.3094, "step": 2360 }, { "epoch": 0.04558108673388434, "grad_norm": 851.0589297896992, "learning_rate": 1.9936326448733107e-05, "loss": 416.9893, "step": 2370 }, { "epoch": 0.04577341199436487, "grad_norm": 1075.1200638865676, "learning_rate": 1.9935636966940938e-05, "loss": 405.4084, "step": 2380 }, { "epoch": 0.045965737254845394, "grad_norm": 956.2910990830981, "learning_rate": 1.993494378428809e-05, "loss": 416.4961, "step": 2390 }, { "epoch": 0.04615806251532592, "grad_norm": 815.6936262963642, "learning_rate": 1.9934246901032764e-05, "loss": 402.3952, "step": 2400 }, { "epoch": 0.04635038777580645, "grad_norm": 2360.759105306595, "learning_rate": 1.9933546317434536e-05, "loss": 413.8724, "step": 2410 }, { "epoch": 0.046542713036286966, "grad_norm": 943.481299935079, "learning_rate": 1.9932842033754363e-05, "loss": 419.4986, "step": 2420 }, { "epoch": 0.04673503829676749, "grad_norm": 846.2634230104537, "learning_rate": 1.9932134050254578e-05, "loss": 413.2273, "step": 2430 }, { "epoch": 0.04692736355724802, "grad_norm": 871.5530094276198, "learning_rate": 1.9931422367198893e-05, "loss": 410.6645, "step": 2440 }, { "epoch": 0.047119688817728544, "grad_norm": 918.4069177694724, "learning_rate": 1.9930706984852403e-05, "loss": 407.7853, "step": 2450 }, { "epoch": 0.04731201407820907, "grad_norm": 828.3693733593514, "learning_rate": 1.992998790348157e-05, "loss": 407.0076, "step": 2460 }, { "epoch": 0.04750433933868959, "grad_norm": 802.7119429227891, "learning_rate": 1.992926512335424e-05, "loss": 417.0812, "step": 2470 }, { "epoch": 0.047696664599170116, "grad_norm": 783.1281826299662, "learning_rate": 1.992853864473964e-05, "loss": 404.1433, "step": 2480 }, { "epoch": 0.04788898985965064, "grad_norm": 732.2406768577065, "learning_rate": 1.992780846790837e-05, "loss": 411.2688, "step": 2490 }, { "epoch": 0.04808131512013117, "grad_norm": 975.6063398004994, "learning_rate": 1.9927074593132412e-05, "loss": 408.5953, "step": 2500 }, { "epoch": 0.04827364038061169, "grad_norm": 832.3209682983506, "learning_rate": 1.9926337020685114e-05, "loss": 417.095, "step": 2510 }, { "epoch": 0.048465965641092214, "grad_norm": 1000.2467965932198, "learning_rate": 1.992559575084122e-05, "loss": 415.1272, "step": 2520 }, { "epoch": 0.04865829090157274, "grad_norm": 843.4947995629191, "learning_rate": 1.9924850783876832e-05, "loss": 409.7536, "step": 2530 }, { "epoch": 0.048850616162053266, "grad_norm": 909.1986478819955, "learning_rate": 1.992410212006944e-05, "loss": 411.6839, "step": 2540 }, { "epoch": 0.04904294142253379, "grad_norm": 727.4074209194242, "learning_rate": 1.9923349759697915e-05, "loss": 402.9094, "step": 2550 }, { "epoch": 0.04923526668301431, "grad_norm": 784.9272579755053, "learning_rate": 1.992259370304249e-05, "loss": 399.2156, "step": 2560 }, { "epoch": 0.04942759194349484, "grad_norm": 801.5705870825869, "learning_rate": 1.992183395038479e-05, "loss": 408.6303, "step": 2570 }, { "epoch": 0.049619917203975364, "grad_norm": 776.0226228782816, "learning_rate": 1.9921070502007808e-05, "loss": 406.1699, "step": 2580 }, { "epoch": 0.04981224246445589, "grad_norm": 791.2287159718755, "learning_rate": 1.9920303358195916e-05, "loss": 401.3249, "step": 2590 }, { "epoch": 0.05000456772493641, "grad_norm": 859.5715851591746, "learning_rate": 1.9919532519234858e-05, "loss": 406.9079, "step": 2600 }, { "epoch": 0.050196892985416935, "grad_norm": 950.0375357774553, "learning_rate": 1.9918757985411767e-05, "loss": 398.9475, "step": 2610 }, { "epoch": 0.05038921824589746, "grad_norm": 3697.0217417772647, "learning_rate": 1.991797975701514e-05, "loss": 413.058, "step": 2620 }, { "epoch": 0.05058154350637799, "grad_norm": 21484.157037856894, "learning_rate": 1.9917197834334858e-05, "loss": 457.0581, "step": 2630 }, { "epoch": 0.050773868766858514, "grad_norm": 840.8131994500599, "learning_rate": 1.9916412217662162e-05, "loss": 395.2674, "step": 2640 }, { "epoch": 0.05096619402733903, "grad_norm": 848.3677713278562, "learning_rate": 1.9915622907289695e-05, "loss": 393.97, "step": 2650 }, { "epoch": 0.05115851928781956, "grad_norm": 773.3080723966837, "learning_rate": 1.9914829903511458e-05, "loss": 397.9258, "step": 2660 }, { "epoch": 0.051350844548300086, "grad_norm": 722.654539565636, "learning_rate": 1.9914033206622828e-05, "loss": 408.9118, "step": 2670 }, { "epoch": 0.05154316980878061, "grad_norm": 766.6787271546868, "learning_rate": 1.9913232816920565e-05, "loss": 405.4553, "step": 2680 }, { "epoch": 0.05173549506926114, "grad_norm": 820.7846023812327, "learning_rate": 1.99124287347028e-05, "loss": 407.4249, "step": 2690 }, { "epoch": 0.05192782032974166, "grad_norm": 696.7079214788865, "learning_rate": 1.991162096026904e-05, "loss": 412.3783, "step": 2700 }, { "epoch": 0.05212014559022218, "grad_norm": 995.1881132131933, "learning_rate": 1.9910809493920172e-05, "loss": 405.0052, "step": 2710 }, { "epoch": 0.05231247085070271, "grad_norm": 900.7112278783013, "learning_rate": 1.990999433595845e-05, "loss": 406.1632, "step": 2720 }, { "epoch": 0.052504796111183236, "grad_norm": 882.6939502542203, "learning_rate": 1.99091754866875e-05, "loss": 406.4494, "step": 2730 }, { "epoch": 0.052697121371663755, "grad_norm": 793.843854468107, "learning_rate": 1.990835294641234e-05, "loss": 403.5715, "step": 2740 }, { "epoch": 0.05288944663214428, "grad_norm": 768.9838928381889, "learning_rate": 1.990752671543935e-05, "loss": 402.1033, "step": 2750 }, { "epoch": 0.05308177189262481, "grad_norm": 763.1077293076072, "learning_rate": 1.9906696794076284e-05, "loss": 403.7594, "step": 2760 }, { "epoch": 0.05327409715310533, "grad_norm": 708.1979175398459, "learning_rate": 1.9905863182632285e-05, "loss": 403.3684, "step": 2770 }, { "epoch": 0.05346642241358586, "grad_norm": 780.7564832522181, "learning_rate": 1.990502588141784e-05, "loss": 403.7348, "step": 2780 }, { "epoch": 0.05365874767406638, "grad_norm": 788.2704196253177, "learning_rate": 1.990418489074485e-05, "loss": 390.1319, "step": 2790 }, { "epoch": 0.053851072934546905, "grad_norm": 777.7139743744849, "learning_rate": 1.9903340210926555e-05, "loss": 391.7088, "step": 2800 }, { "epoch": 0.05404339819502743, "grad_norm": 779.8392157540648, "learning_rate": 1.9902491842277592e-05, "loss": 409.1859, "step": 2810 }, { "epoch": 0.05423572345550796, "grad_norm": 939.190832945537, "learning_rate": 1.9901639785113967e-05, "loss": 402.1446, "step": 2820 }, { "epoch": 0.05442804871598848, "grad_norm": 798.106765825879, "learning_rate": 1.990078403975305e-05, "loss": 411.2067, "step": 2830 }, { "epoch": 0.054620373976469, "grad_norm": 744.7205098821862, "learning_rate": 1.9899924606513593e-05, "loss": 397.5615, "step": 2840 }, { "epoch": 0.05481269923694953, "grad_norm": 828.5586351859567, "learning_rate": 1.9899061485715726e-05, "loss": 392.6941, "step": 2850 }, { "epoch": 0.055005024497430055, "grad_norm": 743.7545184832085, "learning_rate": 1.9898194677680943e-05, "loss": 401.0686, "step": 2860 }, { "epoch": 0.05519734975791058, "grad_norm": 787.9522241003444, "learning_rate": 1.9897324182732118e-05, "loss": 396.134, "step": 2870 }, { "epoch": 0.0553896750183911, "grad_norm": 698.5125841213974, "learning_rate": 1.989645000119349e-05, "loss": 393.7889, "step": 2880 }, { "epoch": 0.05558200027887163, "grad_norm": 738.498781475107, "learning_rate": 1.9895572133390687e-05, "loss": 394.8915, "step": 2890 }, { "epoch": 0.05577432553935215, "grad_norm": 754.9880133446674, "learning_rate": 1.9894690579650694e-05, "loss": 399.7915, "step": 2900 }, { "epoch": 0.05596665079983268, "grad_norm": 838.9035752511419, "learning_rate": 1.9893805340301876e-05, "loss": 399.2658, "step": 2910 }, { "epoch": 0.0561589760603132, "grad_norm": 887.0247498898594, "learning_rate": 1.989291641567397e-05, "loss": 395.2259, "step": 2920 }, { "epoch": 0.056351301320793724, "grad_norm": 6533.06697295512, "learning_rate": 1.9892023806098083e-05, "loss": 386.8454, "step": 2930 }, { "epoch": 0.05654362658127425, "grad_norm": 729.544176376949, "learning_rate": 1.9891127511906703e-05, "loss": 391.1382, "step": 2940 }, { "epoch": 0.05673595184175478, "grad_norm": 836.7696344728582, "learning_rate": 1.9890227533433685e-05, "loss": 392.0289, "step": 2950 }, { "epoch": 0.0569282771022353, "grad_norm": 949.1072841508442, "learning_rate": 1.988932387101425e-05, "loss": 405.9888, "step": 2960 }, { "epoch": 0.05712060236271582, "grad_norm": 758.5882201805449, "learning_rate": 1.9888416524985e-05, "loss": 388.5938, "step": 2970 }, { "epoch": 0.05731292762319635, "grad_norm": 743.6184716836035, "learning_rate": 1.988750549568391e-05, "loss": 399.7922, "step": 2980 }, { "epoch": 0.057505252883676874, "grad_norm": 717.041287798182, "learning_rate": 1.9886590783450317e-05, "loss": 381.7268, "step": 2990 }, { "epoch": 0.0576975781441574, "grad_norm": 861.2531062344228, "learning_rate": 1.9885672388624942e-05, "loss": 393.7416, "step": 3000 }, { "epoch": 0.05788990340463793, "grad_norm": 1594.7740384574417, "learning_rate": 1.9884750311549868e-05, "loss": 408.4041, "step": 3010 }, { "epoch": 0.058082228665118446, "grad_norm": 983.0430145740795, "learning_rate": 1.9883824552568557e-05, "loss": 398.9454, "step": 3020 }, { "epoch": 0.05827455392559897, "grad_norm": 683.6360021672427, "learning_rate": 1.9882895112025835e-05, "loss": 404.4664, "step": 3030 }, { "epoch": 0.0584668791860795, "grad_norm": 1104.3304960459254, "learning_rate": 1.9881961990267906e-05, "loss": 385.0514, "step": 3040 }, { "epoch": 0.058659204446560025, "grad_norm": 1031.366504909178, "learning_rate": 1.988102518764234e-05, "loss": 397.696, "step": 3050 }, { "epoch": 0.058851529707040544, "grad_norm": 951.7019853471531, "learning_rate": 1.9880084704498084e-05, "loss": 393.2042, "step": 3060 }, { "epoch": 0.05904385496752107, "grad_norm": 1037.7212633536412, "learning_rate": 1.987914054118545e-05, "loss": 404.7147, "step": 3070 }, { "epoch": 0.059236180228001596, "grad_norm": 890.2787056501567, "learning_rate": 1.9878192698056125e-05, "loss": 394.0958, "step": 3080 }, { "epoch": 0.05942850548848212, "grad_norm": 850.4448760599917, "learning_rate": 1.9877241175463165e-05, "loss": 391.8196, "step": 3090 }, { "epoch": 0.05962083074896265, "grad_norm": 768.3356728445398, "learning_rate": 1.9876285973760993e-05, "loss": 399.0993, "step": 3100 }, { "epoch": 0.05981315600944317, "grad_norm": 792.6694642669811, "learning_rate": 1.9875327093305405e-05, "loss": 393.3022, "step": 3110 }, { "epoch": 0.060005481269923694, "grad_norm": 866.5568098952982, "learning_rate": 1.9874364534453577e-05, "loss": 392.7167, "step": 3120 }, { "epoch": 0.06019780653040422, "grad_norm": 845.036169611911, "learning_rate": 1.9873398297564036e-05, "loss": 395.2803, "step": 3130 }, { "epoch": 0.060390131790884746, "grad_norm": 751.8245712608418, "learning_rate": 1.9872428382996697e-05, "loss": 399.1471, "step": 3140 }, { "epoch": 0.060582457051365265, "grad_norm": 767.234381644173, "learning_rate": 1.987145479111283e-05, "loss": 385.6532, "step": 3150 }, { "epoch": 0.06077478231184579, "grad_norm": 1181.862494733402, "learning_rate": 1.987047752227509e-05, "loss": 396.884, "step": 3160 }, { "epoch": 0.06096710757232632, "grad_norm": 685.2155147894781, "learning_rate": 1.9869496576847488e-05, "loss": 382.569, "step": 3170 }, { "epoch": 0.061159432832806844, "grad_norm": 791.469058721252, "learning_rate": 1.9868511955195407e-05, "loss": 391.4913, "step": 3180 }, { "epoch": 0.06135175809328737, "grad_norm": 930.5923387819682, "learning_rate": 1.986752365768561e-05, "loss": 373.7412, "step": 3190 }, { "epoch": 0.06154408335376789, "grad_norm": 949.0032349730969, "learning_rate": 1.986653168468622e-05, "loss": 391.077, "step": 3200 }, { "epoch": 0.061736408614248416, "grad_norm": 934.8037434742106, "learning_rate": 1.9865536036566727e-05, "loss": 389.2291, "step": 3210 }, { "epoch": 0.06192873387472894, "grad_norm": 826.8212148070362, "learning_rate": 1.9864536713697992e-05, "loss": 402.5538, "step": 3220 }, { "epoch": 0.06212105913520947, "grad_norm": 858.983103558221, "learning_rate": 1.986353371645225e-05, "loss": 399.0848, "step": 3230 }, { "epoch": 0.062313384395689994, "grad_norm": 733.6433381123185, "learning_rate": 1.9862527045203105e-05, "loss": 392.7022, "step": 3240 }, { "epoch": 0.06250570965617051, "grad_norm": 752.8523887221875, "learning_rate": 1.986151670032552e-05, "loss": 388.4725, "step": 3250 }, { "epoch": 0.06269803491665105, "grad_norm": 1020.4632534326246, "learning_rate": 1.986050268219583e-05, "loss": 386.5425, "step": 3260 }, { "epoch": 0.06289036017713157, "grad_norm": 896.9055389206998, "learning_rate": 1.9859484991191742e-05, "loss": 397.7183, "step": 3270 }, { "epoch": 0.06308268543761208, "grad_norm": 795.1442032924809, "learning_rate": 1.985846362769233e-05, "loss": 400.3567, "step": 3280 }, { "epoch": 0.06327501069809262, "grad_norm": 828.0036465962801, "learning_rate": 1.9857438592078034e-05, "loss": 388.7515, "step": 3290 }, { "epoch": 0.06346733595857314, "grad_norm": 764.7935820700304, "learning_rate": 1.9856409884730667e-05, "loss": 393.2867, "step": 3300 }, { "epoch": 0.06365966121905367, "grad_norm": 782.433216331679, "learning_rate": 1.98553775060334e-05, "loss": 384.751, "step": 3310 }, { "epoch": 0.06385198647953419, "grad_norm": 770.5239118718077, "learning_rate": 1.9854341456370777e-05, "loss": 379.5989, "step": 3320 }, { "epoch": 0.06404431174001471, "grad_norm": 830.1033045195829, "learning_rate": 1.9853301736128712e-05, "loss": 390.4898, "step": 3330 }, { "epoch": 0.06423663700049524, "grad_norm": 926.2149843676547, "learning_rate": 1.9852258345694486e-05, "loss": 400.0977, "step": 3340 }, { "epoch": 0.06442896226097576, "grad_norm": 809.9690346247447, "learning_rate": 1.9851211285456738e-05, "loss": 389.6271, "step": 3350 }, { "epoch": 0.06462128752145628, "grad_norm": 755.2125565665904, "learning_rate": 1.9850160555805485e-05, "loss": 401.8327, "step": 3360 }, { "epoch": 0.06481361278193681, "grad_norm": 741.8845538744961, "learning_rate": 1.9849106157132105e-05, "loss": 399.9646, "step": 3370 }, { "epoch": 0.06500593804241733, "grad_norm": 686.883082073912, "learning_rate": 1.9848048089829347e-05, "loss": 394.8569, "step": 3380 }, { "epoch": 0.06519826330289787, "grad_norm": 736.2050614077401, "learning_rate": 1.9846986354291324e-05, "loss": 383.8164, "step": 3390 }, { "epoch": 0.06539058856337839, "grad_norm": 803.9426233603608, "learning_rate": 1.9845920950913506e-05, "loss": 400.4979, "step": 3400 }, { "epoch": 0.0655829138238589, "grad_norm": 926.4258115884888, "learning_rate": 1.9844851880092748e-05, "loss": 380.4864, "step": 3410 }, { "epoch": 0.06577523908433944, "grad_norm": 886.5657112848164, "learning_rate": 1.9843779142227258e-05, "loss": 388.6589, "step": 3420 }, { "epoch": 0.06596756434481996, "grad_norm": 907.7194073038047, "learning_rate": 1.984270273771661e-05, "loss": 388.0827, "step": 3430 }, { "epoch": 0.06615988960530049, "grad_norm": 874.0977699889922, "learning_rate": 1.9841622666961756e-05, "loss": 394.0443, "step": 3440 }, { "epoch": 0.06635221486578101, "grad_norm": 744.3752919633399, "learning_rate": 1.9840538930364992e-05, "loss": 391.6839, "step": 3450 }, { "epoch": 0.06654454012626153, "grad_norm": 748.822685279414, "learning_rate": 1.983945152833e-05, "loss": 378.0743, "step": 3460 }, { "epoch": 0.06673686538674206, "grad_norm": 877.9787494251104, "learning_rate": 1.9838360461261817e-05, "loss": 390.5966, "step": 3470 }, { "epoch": 0.06692919064722258, "grad_norm": 670.5103547705536, "learning_rate": 1.9837265729566853e-05, "loss": 378.118, "step": 3480 }, { "epoch": 0.06712151590770311, "grad_norm": 755.2276168722524, "learning_rate": 1.9836167333652866e-05, "loss": 394.8704, "step": 3490 }, { "epoch": 0.06731384116818363, "grad_norm": 864.8964584726651, "learning_rate": 1.9835065273929002e-05, "loss": 384.7451, "step": 3500 }, { "epoch": 0.06750616642866415, "grad_norm": 711.2318778664892, "learning_rate": 1.9833959550805754e-05, "loss": 387.1255, "step": 3510 }, { "epoch": 0.06769849168914469, "grad_norm": 904.7148051183538, "learning_rate": 1.9832850164694983e-05, "loss": 383.078, "step": 3520 }, { "epoch": 0.0678908169496252, "grad_norm": 1449.0431850047282, "learning_rate": 1.9831737116009924e-05, "loss": 384.6444, "step": 3530 }, { "epoch": 0.06808314221010574, "grad_norm": 796.7373246812646, "learning_rate": 1.9830620405165164e-05, "loss": 382.1027, "step": 3540 }, { "epoch": 0.06827546747058626, "grad_norm": 742.1817234384118, "learning_rate": 1.982950003257666e-05, "loss": 394.5984, "step": 3550 }, { "epoch": 0.06846779273106678, "grad_norm": 735.6763919798921, "learning_rate": 1.9828375998661738e-05, "loss": 388.0653, "step": 3560 }, { "epoch": 0.06866011799154731, "grad_norm": 705.0031427898053, "learning_rate": 1.9827248303839073e-05, "loss": 364.7262, "step": 3570 }, { "epoch": 0.06885244325202783, "grad_norm": 889.1804309525011, "learning_rate": 1.982611694852872e-05, "loss": 379.9477, "step": 3580 }, { "epoch": 0.06904476851250835, "grad_norm": 841.0288674010438, "learning_rate": 1.9824981933152087e-05, "loss": 373.4608, "step": 3590 }, { "epoch": 0.06923709377298888, "grad_norm": 730.5043399291195, "learning_rate": 1.9823843258131945e-05, "loss": 381.3542, "step": 3600 }, { "epoch": 0.0694294190334694, "grad_norm": 967.5864584721487, "learning_rate": 1.9822700923892438e-05, "loss": 398.1269, "step": 3610 }, { "epoch": 0.06962174429394993, "grad_norm": 845.1950823294573, "learning_rate": 1.9821554930859066e-05, "loss": 385.2426, "step": 3620 }, { "epoch": 0.06981406955443045, "grad_norm": 782.6785606969343, "learning_rate": 1.982040527945869e-05, "loss": 386.971, "step": 3630 }, { "epoch": 0.07000639481491097, "grad_norm": 724.8900255479541, "learning_rate": 1.9819251970119534e-05, "loss": 390.1686, "step": 3640 }, { "epoch": 0.0701987200753915, "grad_norm": 731.9915541707064, "learning_rate": 1.9818095003271193e-05, "loss": 377.8536, "step": 3650 }, { "epoch": 0.07039104533587202, "grad_norm": 782.9719906016511, "learning_rate": 1.9816934379344613e-05, "loss": 382.285, "step": 3660 }, { "epoch": 0.07058337059635256, "grad_norm": 779.6638708072722, "learning_rate": 1.9815770098772108e-05, "loss": 378.9326, "step": 3670 }, { "epoch": 0.07077569585683308, "grad_norm": 775.5533482950776, "learning_rate": 1.9814602161987354e-05, "loss": 375.4497, "step": 3680 }, { "epoch": 0.0709680211173136, "grad_norm": 825.1492664708319, "learning_rate": 1.981343056942539e-05, "loss": 376.8522, "step": 3690 }, { "epoch": 0.07116034637779413, "grad_norm": 790.2378369347501, "learning_rate": 1.9812255321522614e-05, "loss": 396.3279, "step": 3700 }, { "epoch": 0.07135267163827465, "grad_norm": 703.9794990286542, "learning_rate": 1.981107641871678e-05, "loss": 380.933, "step": 3710 }, { "epoch": 0.07154499689875518, "grad_norm": 738.1836530230871, "learning_rate": 1.980989386144702e-05, "loss": 379.7828, "step": 3720 }, { "epoch": 0.0717373221592357, "grad_norm": 684.3928445081146, "learning_rate": 1.980870765015381e-05, "loss": 386.5646, "step": 3730 }, { "epoch": 0.07192964741971622, "grad_norm": 720.6833523018175, "learning_rate": 1.9807517785278997e-05, "loss": 381.8005, "step": 3740 }, { "epoch": 0.07212197268019675, "grad_norm": 735.5504960392883, "learning_rate": 1.9806324267265786e-05, "loss": 380.8302, "step": 3750 }, { "epoch": 0.07231429794067727, "grad_norm": 723.363734759545, "learning_rate": 1.9805127096558742e-05, "loss": 380.8795, "step": 3760 }, { "epoch": 0.0725066232011578, "grad_norm": 779.084986349587, "learning_rate": 1.980392627360379e-05, "loss": 383.6675, "step": 3770 }, { "epoch": 0.07269894846163832, "grad_norm": 924.4286417371314, "learning_rate": 1.9802721798848225e-05, "loss": 389.7277, "step": 3780 }, { "epoch": 0.07289127372211884, "grad_norm": 741.5577517729787, "learning_rate": 1.980151367274068e-05, "loss": 374.1295, "step": 3790 }, { "epoch": 0.07308359898259938, "grad_norm": 763.0281988934574, "learning_rate": 1.9800301895731172e-05, "loss": 388.0132, "step": 3800 }, { "epoch": 0.0732759242430799, "grad_norm": 723.2651647549225, "learning_rate": 1.9799086468271065e-05, "loss": 382.7571, "step": 3810 }, { "epoch": 0.07346824950356041, "grad_norm": 751.877215063999, "learning_rate": 1.9797867390813086e-05, "loss": 376.49, "step": 3820 }, { "epoch": 0.07366057476404095, "grad_norm": 823.0893082654114, "learning_rate": 1.9796644663811318e-05, "loss": 382.0146, "step": 3830 }, { "epoch": 0.07385290002452147, "grad_norm": 783.869557455776, "learning_rate": 1.9795418287721215e-05, "loss": 379.0613, "step": 3840 }, { "epoch": 0.074045225285002, "grad_norm": 916.0368052570652, "learning_rate": 1.9794188262999574e-05, "loss": 388.5759, "step": 3850 }, { "epoch": 0.07423755054548252, "grad_norm": 846.4465131910968, "learning_rate": 1.979295459010456e-05, "loss": 379.8879, "step": 3860 }, { "epoch": 0.07442987580596304, "grad_norm": 735.5392846914212, "learning_rate": 1.9791717269495698e-05, "loss": 371.2041, "step": 3870 }, { "epoch": 0.07462220106644357, "grad_norm": 955.2603207902944, "learning_rate": 1.979047630163387e-05, "loss": 374.3904, "step": 3880 }, { "epoch": 0.07481452632692409, "grad_norm": 754.946047484617, "learning_rate": 1.9789231686981313e-05, "loss": 386.2628, "step": 3890 }, { "epoch": 0.07500685158740462, "grad_norm": 892.6055943942398, "learning_rate": 1.978798342600163e-05, "loss": 374.2574, "step": 3900 }, { "epoch": 0.07519917684788514, "grad_norm": 816.3551244875831, "learning_rate": 1.978673151915977e-05, "loss": 383.0144, "step": 3910 }, { "epoch": 0.07539150210836566, "grad_norm": 671.0823774479612, "learning_rate": 1.9785475966922055e-05, "loss": 382.7799, "step": 3920 }, { "epoch": 0.0755838273688462, "grad_norm": 792.6868282216401, "learning_rate": 1.9784216769756156e-05, "loss": 374.9068, "step": 3930 }, { "epoch": 0.07577615262932672, "grad_norm": 722.6049277567629, "learning_rate": 1.97829539281311e-05, "loss": 372.5016, "step": 3940 }, { "epoch": 0.07596847788980725, "grad_norm": 938.273755253152, "learning_rate": 1.9781687442517278e-05, "loss": 379.5824, "step": 3950 }, { "epoch": 0.07616080315028777, "grad_norm": 943.205581022564, "learning_rate": 1.9780417313386433e-05, "loss": 385.3431, "step": 3960 }, { "epoch": 0.07635312841076829, "grad_norm": 838.245722493269, "learning_rate": 1.9779143541211664e-05, "loss": 377.7045, "step": 3970 }, { "epoch": 0.07654545367124882, "grad_norm": 809.1246764452217, "learning_rate": 1.9777866126467436e-05, "loss": 367.2468, "step": 3980 }, { "epoch": 0.07673777893172934, "grad_norm": 811.6322890808202, "learning_rate": 1.9776585069629566e-05, "loss": 371.5746, "step": 3990 }, { "epoch": 0.07693010419220987, "grad_norm": 749.3625528702271, "learning_rate": 1.9775300371175225e-05, "loss": 374.6235, "step": 4000 }, { "epoch": 0.07712242945269039, "grad_norm": 927.368083067949, "learning_rate": 1.9774012031582935e-05, "loss": 368.9598, "step": 4010 }, { "epoch": 0.07731475471317091, "grad_norm": 696.3606303210178, "learning_rate": 1.9772720051332585e-05, "loss": 375.025, "step": 4020 }, { "epoch": 0.07750707997365144, "grad_norm": 1067.7751944482493, "learning_rate": 1.977142443090542e-05, "loss": 373.9631, "step": 4030 }, { "epoch": 0.07769940523413196, "grad_norm": 796.1897032237234, "learning_rate": 1.9770125170784035e-05, "loss": 377.0201, "step": 4040 }, { "epoch": 0.07789173049461248, "grad_norm": 912.7540015233076, "learning_rate": 1.9768822271452385e-05, "loss": 386.0567, "step": 4050 }, { "epoch": 0.07808405575509302, "grad_norm": 804.5646961662917, "learning_rate": 1.9767515733395774e-05, "loss": 375.4905, "step": 4060 }, { "epoch": 0.07827638101557353, "grad_norm": 910.9621386160426, "learning_rate": 1.976620555710087e-05, "loss": 378.288, "step": 4070 }, { "epoch": 0.07846870627605407, "grad_norm": 808.0390835997099, "learning_rate": 1.976489174305569e-05, "loss": 376.1768, "step": 4080 }, { "epoch": 0.07866103153653459, "grad_norm": 694.1471357236522, "learning_rate": 1.9763574291749603e-05, "loss": 365.676, "step": 4090 }, { "epoch": 0.0788533567970151, "grad_norm": 669.5197035263998, "learning_rate": 1.9762253203673348e-05, "loss": 366.5308, "step": 4100 }, { "epoch": 0.07904568205749564, "grad_norm": 785.2512576017631, "learning_rate": 1.9760928479319003e-05, "loss": 378.3666, "step": 4110 }, { "epoch": 0.07923800731797616, "grad_norm": 688.3637184992946, "learning_rate": 1.9759600119180005e-05, "loss": 374.6983, "step": 4120 }, { "epoch": 0.07943033257845669, "grad_norm": 651.3619326538491, "learning_rate": 1.975826812375115e-05, "loss": 369.3953, "step": 4130 }, { "epoch": 0.07962265783893721, "grad_norm": 719.4246492086702, "learning_rate": 1.9756932493528583e-05, "loss": 357.6536, "step": 4140 }, { "epoch": 0.07981498309941773, "grad_norm": 770.6968075867582, "learning_rate": 1.97555932290098e-05, "loss": 418.1854, "step": 4150 }, { "epoch": 0.08000730835989826, "grad_norm": 749.0590226592501, "learning_rate": 1.9754250330693658e-05, "loss": 372.4671, "step": 4160 }, { "epoch": 0.08019963362037878, "grad_norm": 609.8059454924893, "learning_rate": 1.9752903799080366e-05, "loss": 383.8606, "step": 4170 }, { "epoch": 0.08039195888085932, "grad_norm": 649.5123407312859, "learning_rate": 1.9751553634671485e-05, "loss": 363.1326, "step": 4180 }, { "epoch": 0.08058428414133983, "grad_norm": 682.8437842372026, "learning_rate": 1.9750199837969922e-05, "loss": 367.2999, "step": 4190 }, { "epoch": 0.08077660940182035, "grad_norm": 672.827466269095, "learning_rate": 1.9748842409479953e-05, "loss": 374.4353, "step": 4200 }, { "epoch": 0.08096893466230089, "grad_norm": 710.9532577309513, "learning_rate": 1.9747481349707197e-05, "loss": 380.4755, "step": 4210 }, { "epoch": 0.0811612599227814, "grad_norm": 746.9490197406803, "learning_rate": 1.9746116659158618e-05, "loss": 374.4888, "step": 4220 }, { "epoch": 0.08135358518326194, "grad_norm": 777.1558146388504, "learning_rate": 1.9744748338342546e-05, "loss": 370.0567, "step": 4230 }, { "epoch": 0.08154591044374246, "grad_norm": 718.6288805842618, "learning_rate": 1.974337638776866e-05, "loss": 373.2439, "step": 4240 }, { "epoch": 0.08173823570422298, "grad_norm": 706.8570694706435, "learning_rate": 1.9742000807947986e-05, "loss": 365.4311, "step": 4250 }, { "epoch": 0.08193056096470351, "grad_norm": 688.9848362082146, "learning_rate": 1.9740621599392907e-05, "loss": 368.1906, "step": 4260 }, { "epoch": 0.08212288622518403, "grad_norm": 725.5829260231745, "learning_rate": 1.9739238762617155e-05, "loss": 367.0304, "step": 4270 }, { "epoch": 0.08231521148566455, "grad_norm": 717.2467725821838, "learning_rate": 1.973785229813581e-05, "loss": 375.8666, "step": 4280 }, { "epoch": 0.08250753674614508, "grad_norm": 745.5834559902293, "learning_rate": 1.973646220646531e-05, "loss": 378.2281, "step": 4290 }, { "epoch": 0.0826998620066256, "grad_norm": 707.8169237140677, "learning_rate": 1.973506848812344e-05, "loss": 368.5782, "step": 4300 }, { "epoch": 0.08289218726710613, "grad_norm": 681.9545939357413, "learning_rate": 1.9733671143629342e-05, "loss": 387.3969, "step": 4310 }, { "epoch": 0.08308451252758665, "grad_norm": 733.1412882987519, "learning_rate": 1.9732270173503493e-05, "loss": 372.3555, "step": 4320 }, { "epoch": 0.08327683778806717, "grad_norm": 686.8459966312021, "learning_rate": 1.9730865578267745e-05, "loss": 364.6364, "step": 4330 }, { "epoch": 0.0834691630485477, "grad_norm": 751.0190659936467, "learning_rate": 1.972945735844528e-05, "loss": 370.8999, "step": 4340 }, { "epoch": 0.08366148830902823, "grad_norm": 678.5074425036346, "learning_rate": 1.972804551456063e-05, "loss": 374.4807, "step": 4350 }, { "epoch": 0.08385381356950876, "grad_norm": 759.0010394301813, "learning_rate": 1.9726630047139695e-05, "loss": 375.4909, "step": 4360 }, { "epoch": 0.08404613882998928, "grad_norm": 634.9276459035196, "learning_rate": 1.9725210956709707e-05, "loss": 361.6657, "step": 4370 }, { "epoch": 0.0842384640904698, "grad_norm": 691.5276471240323, "learning_rate": 1.9723788243799253e-05, "loss": 378.3001, "step": 4380 }, { "epoch": 0.08443078935095033, "grad_norm": 765.1228459329928, "learning_rate": 1.972236190893827e-05, "loss": 364.8584, "step": 4390 }, { "epoch": 0.08462311461143085, "grad_norm": 997.4870547867154, "learning_rate": 1.972093195265805e-05, "loss": 380.687, "step": 4400 }, { "epoch": 0.08481543987191138, "grad_norm": 735.5785138605547, "learning_rate": 1.9719498375491224e-05, "loss": 364.8229, "step": 4410 }, { "epoch": 0.0850077651323919, "grad_norm": 777.5815048764625, "learning_rate": 1.9718061177971777e-05, "loss": 368.2549, "step": 4420 }, { "epoch": 0.08520009039287242, "grad_norm": 670.5791581190966, "learning_rate": 1.9716620360635036e-05, "loss": 372.5692, "step": 4430 }, { "epoch": 0.08539241565335295, "grad_norm": 762.1654045664332, "learning_rate": 1.971517592401769e-05, "loss": 381.9217, "step": 4440 }, { "epoch": 0.08558474091383347, "grad_norm": 992.1490506568286, "learning_rate": 1.9713727868657764e-05, "loss": 364.6855, "step": 4450 }, { "epoch": 0.085777066174314, "grad_norm": 840.4760098460681, "learning_rate": 1.971227619509463e-05, "loss": 362.5352, "step": 4460 }, { "epoch": 0.08596939143479453, "grad_norm": 968.5539944692922, "learning_rate": 1.971082090386902e-05, "loss": 377.6954, "step": 4470 }, { "epoch": 0.08616171669527505, "grad_norm": 2843.7545849064295, "learning_rate": 1.9709361995523e-05, "loss": 365.9315, "step": 4480 }, { "epoch": 0.08635404195575558, "grad_norm": 851.154691325466, "learning_rate": 1.9707899470599998e-05, "loss": 379.393, "step": 4490 }, { "epoch": 0.0865463672162361, "grad_norm": 723.639897000417, "learning_rate": 1.970643332964477e-05, "loss": 363.6261, "step": 4500 }, { "epoch": 0.08673869247671662, "grad_norm": 707.6678333878551, "learning_rate": 1.9704963573203435e-05, "loss": 365.22, "step": 4510 }, { "epoch": 0.08693101773719715, "grad_norm": 1093.9096422715052, "learning_rate": 1.970349020182345e-05, "loss": 375.2929, "step": 4520 }, { "epoch": 0.08712334299767767, "grad_norm": 781.9974730618354, "learning_rate": 1.9702013216053623e-05, "loss": 358.5067, "step": 4530 }, { "epoch": 0.0873156682581582, "grad_norm": 708.7466764529239, "learning_rate": 1.9700532616444114e-05, "loss": 372.7084, "step": 4540 }, { "epoch": 0.08750799351863872, "grad_norm": 1063.5562486170961, "learning_rate": 1.969904840354641e-05, "loss": 369.8918, "step": 4550 }, { "epoch": 0.08770031877911924, "grad_norm": 693.4258589690572, "learning_rate": 1.9697560577913358e-05, "loss": 382.4882, "step": 4560 }, { "epoch": 0.08789264403959977, "grad_norm": 791.7809123061226, "learning_rate": 1.9696069140099152e-05, "loss": 363.5707, "step": 4570 }, { "epoch": 0.08808496930008029, "grad_norm": 877.4785235656661, "learning_rate": 1.969457409065933e-05, "loss": 364.0813, "step": 4580 }, { "epoch": 0.08827729456056083, "grad_norm": 751.3586040127548, "learning_rate": 1.969307543015077e-05, "loss": 364.6259, "step": 4590 }, { "epoch": 0.08846961982104135, "grad_norm": 829.6726739476259, "learning_rate": 1.9691573159131696e-05, "loss": 366.9448, "step": 4600 }, { "epoch": 0.08866194508152186, "grad_norm": 876.1646962391158, "learning_rate": 1.9690067278161686e-05, "loss": 367.6636, "step": 4610 }, { "epoch": 0.0888542703420024, "grad_norm": 899.4193387993457, "learning_rate": 1.9688557787801647e-05, "loss": 367.4333, "step": 4620 }, { "epoch": 0.08904659560248292, "grad_norm": 757.6314580666606, "learning_rate": 1.968704468861385e-05, "loss": 368.2446, "step": 4630 }, { "epoch": 0.08923892086296345, "grad_norm": 738.5256711356122, "learning_rate": 1.968552798116189e-05, "loss": 351.7744, "step": 4640 }, { "epoch": 0.08943124612344397, "grad_norm": 775.1896435013546, "learning_rate": 1.9684007666010716e-05, "loss": 361.262, "step": 4650 }, { "epoch": 0.08962357138392449, "grad_norm": 641.0132364280885, "learning_rate": 1.9682483743726624e-05, "loss": 370.4693, "step": 4660 }, { "epoch": 0.08981589664440502, "grad_norm": 752.5796369691224, "learning_rate": 1.968095621487725e-05, "loss": 363.7262, "step": 4670 }, { "epoch": 0.09000822190488554, "grad_norm": 752.5472495333028, "learning_rate": 1.9679425080031574e-05, "loss": 360.372, "step": 4680 }, { "epoch": 0.09020054716536607, "grad_norm": 837.3847840187569, "learning_rate": 1.9677890339759914e-05, "loss": 369.5652, "step": 4690 }, { "epoch": 0.0903928724258466, "grad_norm": 775.2121393653373, "learning_rate": 1.967635199463394e-05, "loss": 362.2298, "step": 4700 }, { "epoch": 0.09058519768632711, "grad_norm": 778.091423998472, "learning_rate": 1.9674810045226658e-05, "loss": 366.6832, "step": 4710 }, { "epoch": 0.09077752294680765, "grad_norm": 676.5260672928845, "learning_rate": 1.967326449211242e-05, "loss": 358.6009, "step": 4720 }, { "epoch": 0.09096984820728816, "grad_norm": 787.7257095915079, "learning_rate": 1.9671715335866915e-05, "loss": 359.7101, "step": 4730 }, { "epoch": 0.09116217346776868, "grad_norm": 726.7840286038974, "learning_rate": 1.9670162577067182e-05, "loss": 378.2719, "step": 4740 }, { "epoch": 0.09135449872824922, "grad_norm": 860.1818991378439, "learning_rate": 1.9668606216291598e-05, "loss": 363.3302, "step": 4750 }, { "epoch": 0.09154682398872974, "grad_norm": 813.8855071102088, "learning_rate": 1.9667046254119878e-05, "loss": 356.8751, "step": 4760 }, { "epoch": 0.09173914924921027, "grad_norm": 699.9496488839774, "learning_rate": 1.966548269113309e-05, "loss": 364.8364, "step": 4770 }, { "epoch": 0.09193147450969079, "grad_norm": 689.1103427101364, "learning_rate": 1.9663915527913628e-05, "loss": 364.6322, "step": 4780 }, { "epoch": 0.09212379977017131, "grad_norm": 743.5321878825619, "learning_rate": 1.9662344765045237e-05, "loss": 372.8812, "step": 4790 }, { "epoch": 0.09231612503065184, "grad_norm": 638.1040062486387, "learning_rate": 1.9660770403112996e-05, "loss": 362.1715, "step": 4800 }, { "epoch": 0.09250845029113236, "grad_norm": 807.1382883930204, "learning_rate": 1.9659192442703336e-05, "loss": 365.8542, "step": 4810 }, { "epoch": 0.0927007755516129, "grad_norm": 752.1295341406421, "learning_rate": 1.965761088440402e-05, "loss": 367.9731, "step": 4820 }, { "epoch": 0.09289310081209341, "grad_norm": 687.0910114607802, "learning_rate": 1.9656025728804147e-05, "loss": 364.8634, "step": 4830 }, { "epoch": 0.09308542607257393, "grad_norm": 632.6157545516736, "learning_rate": 1.9654436976494165e-05, "loss": 359.9041, "step": 4840 }, { "epoch": 0.09327775133305446, "grad_norm": 661.1788050399986, "learning_rate": 1.9652844628065857e-05, "loss": 355.8173, "step": 4850 }, { "epoch": 0.09347007659353498, "grad_norm": 702.4271151111567, "learning_rate": 1.965124868411235e-05, "loss": 371.1025, "step": 4860 }, { "epoch": 0.09366240185401552, "grad_norm": 735.7176251611235, "learning_rate": 1.96496491452281e-05, "loss": 358.993, "step": 4870 }, { "epoch": 0.09385472711449604, "grad_norm": 701.9085546602543, "learning_rate": 1.9648046012008916e-05, "loss": 355.9271, "step": 4880 }, { "epoch": 0.09404705237497656, "grad_norm": 668.1042470286606, "learning_rate": 1.9646439285051936e-05, "loss": 366.6708, "step": 4890 }, { "epoch": 0.09423937763545709, "grad_norm": 1329.9780620536628, "learning_rate": 1.9644828964955633e-05, "loss": 372.2765, "step": 4900 }, { "epoch": 0.09443170289593761, "grad_norm": 654.2950590246775, "learning_rate": 1.9643215052319836e-05, "loss": 373.0559, "step": 4910 }, { "epoch": 0.09462402815641814, "grad_norm": 702.124295888291, "learning_rate": 1.9641597547745694e-05, "loss": 375.7776, "step": 4920 }, { "epoch": 0.09481635341689866, "grad_norm": 747.2709381896793, "learning_rate": 1.9639976451835698e-05, "loss": 376.6263, "step": 4930 }, { "epoch": 0.09500867867737918, "grad_norm": 749.2638300155742, "learning_rate": 1.9638351765193685e-05, "loss": 371.4045, "step": 4940 }, { "epoch": 0.09520100393785971, "grad_norm": 798.0008789179675, "learning_rate": 1.9636723488424823e-05, "loss": 350.803, "step": 4950 }, { "epoch": 0.09539332919834023, "grad_norm": 652.989139825633, "learning_rate": 1.9635091622135616e-05, "loss": 367.1494, "step": 4960 }, { "epoch": 0.09558565445882075, "grad_norm": 627.0662273778905, "learning_rate": 1.963345616693391e-05, "loss": 359.2938, "step": 4970 }, { "epoch": 0.09577797971930128, "grad_norm": 697.185717014197, "learning_rate": 1.9631817123428883e-05, "loss": 366.4442, "step": 4980 }, { "epoch": 0.0959703049797818, "grad_norm": 711.409397793336, "learning_rate": 1.9630174492231052e-05, "loss": 358.1978, "step": 4990 }, { "epoch": 0.09616263024026234, "grad_norm": 666.5162533135881, "learning_rate": 1.962852827395227e-05, "loss": 375.9189, "step": 5000 }, { "epoch": 0.09635495550074286, "grad_norm": 642.8915335248588, "learning_rate": 1.962687846920573e-05, "loss": 360.1929, "step": 5010 }, { "epoch": 0.09654728076122338, "grad_norm": 785.8465530759893, "learning_rate": 1.9625225078605946e-05, "loss": 358.44, "step": 5020 }, { "epoch": 0.09673960602170391, "grad_norm": 678.1645670579533, "learning_rate": 1.9623568102768792e-05, "loss": 364.0865, "step": 5030 }, { "epoch": 0.09693193128218443, "grad_norm": 695.7291546323536, "learning_rate": 1.9621907542311457e-05, "loss": 364.631, "step": 5040 }, { "epoch": 0.09712425654266496, "grad_norm": 829.7757885688217, "learning_rate": 1.9620243397852473e-05, "loss": 349.2464, "step": 5050 }, { "epoch": 0.09731658180314548, "grad_norm": 688.8738361253692, "learning_rate": 1.9618575670011705e-05, "loss": 348.4353, "step": 5060 }, { "epoch": 0.097508907063626, "grad_norm": 940.3285039849533, "learning_rate": 1.9616904359410357e-05, "loss": 355.8785, "step": 5070 }, { "epoch": 0.09770123232410653, "grad_norm": 684.5118133090992, "learning_rate": 1.9615229466670963e-05, "loss": 358.3975, "step": 5080 }, { "epoch": 0.09789355758458705, "grad_norm": 672.031305833946, "learning_rate": 1.9613550992417396e-05, "loss": 357.6419, "step": 5090 }, { "epoch": 0.09808588284506758, "grad_norm": 645.4269998111222, "learning_rate": 1.961186893727486e-05, "loss": 357.7922, "step": 5100 }, { "epoch": 0.0982782081055481, "grad_norm": 694.6823162953259, "learning_rate": 1.9610183301869882e-05, "loss": 347.938, "step": 5110 }, { "epoch": 0.09847053336602862, "grad_norm": 757.6132463114961, "learning_rate": 1.9608494086830348e-05, "loss": 353.2071, "step": 5120 }, { "epoch": 0.09866285862650916, "grad_norm": 691.0368103490606, "learning_rate": 1.9606801292785452e-05, "loss": 360.3291, "step": 5130 }, { "epoch": 0.09885518388698968, "grad_norm": 632.6504849744924, "learning_rate": 1.960510492036574e-05, "loss": 360.4072, "step": 5140 }, { "epoch": 0.09904750914747021, "grad_norm": 692.0316509173975, "learning_rate": 1.9603404970203078e-05, "loss": 363.7523, "step": 5150 }, { "epoch": 0.09923983440795073, "grad_norm": 777.8436046072212, "learning_rate": 1.9601701442930667e-05, "loss": 371.8547, "step": 5160 }, { "epoch": 0.09943215966843125, "grad_norm": 710.4627615354874, "learning_rate": 1.9599994339183047e-05, "loss": 363.1294, "step": 5170 }, { "epoch": 0.09962448492891178, "grad_norm": 692.4692790921382, "learning_rate": 1.9598283659596084e-05, "loss": 352.3487, "step": 5180 }, { "epoch": 0.0998168101893923, "grad_norm": 1150.4552627716428, "learning_rate": 1.9596569404806983e-05, "loss": 354.4399, "step": 5190 }, { "epoch": 0.10000913544987282, "grad_norm": 695.4429732404097, "learning_rate": 1.9594851575454266e-05, "loss": 352.4259, "step": 5200 }, { "epoch": 0.10020146071035335, "grad_norm": 677.9990103567972, "learning_rate": 1.9593130172177806e-05, "loss": 369.2995, "step": 5210 }, { "epoch": 0.10039378597083387, "grad_norm": 710.1974302890869, "learning_rate": 1.959140519561879e-05, "loss": 359.1806, "step": 5220 }, { "epoch": 0.1005861112313144, "grad_norm": 686.7699233718275, "learning_rate": 1.9589676646419744e-05, "loss": 360.2993, "step": 5230 }, { "epoch": 0.10077843649179492, "grad_norm": 709.0860221895744, "learning_rate": 1.958794452522453e-05, "loss": 365.8043, "step": 5240 }, { "epoch": 0.10097076175227544, "grad_norm": 618.6174095299081, "learning_rate": 1.9586208832678328e-05, "loss": 355.2659, "step": 5250 }, { "epoch": 0.10116308701275598, "grad_norm": 636.9513429109644, "learning_rate": 1.958446956942766e-05, "loss": 362.7319, "step": 5260 }, { "epoch": 0.1013554122732365, "grad_norm": 634.1443462789841, "learning_rate": 1.9582726736120365e-05, "loss": 354.1151, "step": 5270 }, { "epoch": 0.10154773753371703, "grad_norm": 694.5956914001258, "learning_rate": 1.958098033340563e-05, "loss": 358.8986, "step": 5280 }, { "epoch": 0.10174006279419755, "grad_norm": 722.9248946007976, "learning_rate": 1.9579230361933952e-05, "loss": 371.4438, "step": 5290 }, { "epoch": 0.10193238805467807, "grad_norm": 724.2618423668106, "learning_rate": 1.9577476822357174e-05, "loss": 349.7284, "step": 5300 }, { "epoch": 0.1021247133151586, "grad_norm": 792.4144490762708, "learning_rate": 1.9575719715328457e-05, "loss": 367.7335, "step": 5310 }, { "epoch": 0.10231703857563912, "grad_norm": 622.6828197063883, "learning_rate": 1.957395904150229e-05, "loss": 348.4879, "step": 5320 }, { "epoch": 0.10250936383611965, "grad_norm": 717.6866249410048, "learning_rate": 1.9572194801534504e-05, "loss": 359.715, "step": 5330 }, { "epoch": 0.10270168909660017, "grad_norm": 668.9250455978879, "learning_rate": 1.9570426996082238e-05, "loss": 358.7419, "step": 5340 }, { "epoch": 0.10289401435708069, "grad_norm": 819.6418580879654, "learning_rate": 1.9568655625803982e-05, "loss": 350.3772, "step": 5350 }, { "epoch": 0.10308633961756122, "grad_norm": 631.5286785486417, "learning_rate": 1.956688069135954e-05, "loss": 360.9678, "step": 5360 }, { "epoch": 0.10327866487804174, "grad_norm": 590.7187007886677, "learning_rate": 1.9565102193410035e-05, "loss": 351.1492, "step": 5370 }, { "epoch": 0.10347099013852228, "grad_norm": 639.5551477174666, "learning_rate": 1.956332013261794e-05, "loss": 355.9312, "step": 5380 }, { "epoch": 0.1036633153990028, "grad_norm": 713.853127241327, "learning_rate": 1.9561534509647038e-05, "loss": 346.8531, "step": 5390 }, { "epoch": 0.10385564065948331, "grad_norm": 678.2709414907933, "learning_rate": 1.9559745325162445e-05, "loss": 365.8308, "step": 5400 }, { "epoch": 0.10404796591996385, "grad_norm": 696.1133632246981, "learning_rate": 1.9557952579830604e-05, "loss": 358.5058, "step": 5410 }, { "epoch": 0.10424029118044437, "grad_norm": 609.1631720205271, "learning_rate": 1.955615627431928e-05, "loss": 360.643, "step": 5420 }, { "epoch": 0.10443261644092489, "grad_norm": 588.216185762683, "learning_rate": 1.955435640929757e-05, "loss": 346.53, "step": 5430 }, { "epoch": 0.10462494170140542, "grad_norm": 687.938172394234, "learning_rate": 1.9552552985435893e-05, "loss": 348.8743, "step": 5440 }, { "epoch": 0.10481726696188594, "grad_norm": 646.2257032802848, "learning_rate": 1.9550746003405996e-05, "loss": 353.3726, "step": 5450 }, { "epoch": 0.10500959222236647, "grad_norm": 615.3275930754071, "learning_rate": 1.9548935463880945e-05, "loss": 361.7423, "step": 5460 }, { "epoch": 0.10520191748284699, "grad_norm": 920.3547594582011, "learning_rate": 1.9547121367535143e-05, "loss": 368.0178, "step": 5470 }, { "epoch": 0.10539424274332751, "grad_norm": 673.3909171381894, "learning_rate": 1.9545303715044305e-05, "loss": 363.9247, "step": 5480 }, { "epoch": 0.10558656800380804, "grad_norm": 668.479775303425, "learning_rate": 1.9543482507085484e-05, "loss": 360.2404, "step": 5490 }, { "epoch": 0.10577889326428856, "grad_norm": 701.994986175009, "learning_rate": 1.9541657744337038e-05, "loss": 349.8952, "step": 5500 }, { "epoch": 0.1059712185247691, "grad_norm": 705.6009801245955, "learning_rate": 1.9539829427478675e-05, "loss": 362.0219, "step": 5510 }, { "epoch": 0.10616354378524961, "grad_norm": 661.6704816373879, "learning_rate": 1.95379975571914e-05, "loss": 367.3877, "step": 5520 }, { "epoch": 0.10635586904573013, "grad_norm": 687.3223282814702, "learning_rate": 1.953616213415756e-05, "loss": 358.3039, "step": 5530 }, { "epoch": 0.10654819430621067, "grad_norm": 616.9728859653206, "learning_rate": 1.9534323159060824e-05, "loss": 365.5358, "step": 5540 }, { "epoch": 0.10674051956669119, "grad_norm": 794.7069763948178, "learning_rate": 1.9532480632586175e-05, "loss": 358.5932, "step": 5550 }, { "epoch": 0.10693284482717172, "grad_norm": 677.6407233525834, "learning_rate": 1.953063455541992e-05, "loss": 365.166, "step": 5560 }, { "epoch": 0.10712517008765224, "grad_norm": 629.0218444824332, "learning_rate": 1.9528784928249703e-05, "loss": 352.1474, "step": 5570 }, { "epoch": 0.10731749534813276, "grad_norm": 666.7341636614977, "learning_rate": 1.9526931751764467e-05, "loss": 345.3576, "step": 5580 }, { "epoch": 0.10750982060861329, "grad_norm": 629.6700672193283, "learning_rate": 1.95250750266545e-05, "loss": 354.5343, "step": 5590 }, { "epoch": 0.10770214586909381, "grad_norm": 711.6264440434755, "learning_rate": 1.9523214753611398e-05, "loss": 343.4514, "step": 5600 }, { "epoch": 0.10789447112957434, "grad_norm": 739.432375882402, "learning_rate": 1.952135093332808e-05, "loss": 363.1715, "step": 5610 }, { "epoch": 0.10808679639005486, "grad_norm": 803.0180117993846, "learning_rate": 1.9519483566498788e-05, "loss": 351.7206, "step": 5620 }, { "epoch": 0.10827912165053538, "grad_norm": 650.5084174821643, "learning_rate": 1.9517612653819088e-05, "loss": 354.0946, "step": 5630 }, { "epoch": 0.10847144691101591, "grad_norm": 658.895998477785, "learning_rate": 1.9515738195985868e-05, "loss": 364.1155, "step": 5640 }, { "epoch": 0.10866377217149643, "grad_norm": 704.3438306020199, "learning_rate": 1.951386019369732e-05, "loss": 349.9277, "step": 5650 }, { "epoch": 0.10885609743197695, "grad_norm": 641.1259216105735, "learning_rate": 1.9511978647652984e-05, "loss": 346.7582, "step": 5660 }, { "epoch": 0.10904842269245749, "grad_norm": 644.5491587588273, "learning_rate": 1.9510093558553687e-05, "loss": 351.1167, "step": 5670 }, { "epoch": 0.109240747952938, "grad_norm": 620.6813360337377, "learning_rate": 1.950820492710161e-05, "loss": 348.5378, "step": 5680 }, { "epoch": 0.10943307321341854, "grad_norm": 644.5416008845856, "learning_rate": 1.9506312754000235e-05, "loss": 351.0252, "step": 5690 }, { "epoch": 0.10962539847389906, "grad_norm": 654.059831443735, "learning_rate": 1.9504417039954357e-05, "loss": 348.4304, "step": 5700 }, { "epoch": 0.10981772373437958, "grad_norm": 644.1505153246875, "learning_rate": 1.9502517785670098e-05, "loss": 359.1387, "step": 5710 }, { "epoch": 0.11001004899486011, "grad_norm": 769.20654559226, "learning_rate": 1.950061499185491e-05, "loss": 359.4678, "step": 5720 }, { "epoch": 0.11020237425534063, "grad_norm": 679.3344717558374, "learning_rate": 1.9498708659217542e-05, "loss": 350.7424, "step": 5730 }, { "epoch": 0.11039469951582116, "grad_norm": 657.8142033708348, "learning_rate": 1.9496798788468077e-05, "loss": 342.7845, "step": 5740 }, { "epoch": 0.11058702477630168, "grad_norm": 741.24155744283, "learning_rate": 1.9494885380317906e-05, "loss": 346.9994, "step": 5750 }, { "epoch": 0.1107793500367822, "grad_norm": 742.9321197573132, "learning_rate": 1.9492968435479744e-05, "loss": 345.0701, "step": 5760 }, { "epoch": 0.11097167529726273, "grad_norm": 639.619633875009, "learning_rate": 1.949104795466762e-05, "loss": 346.2675, "step": 5770 }, { "epoch": 0.11116400055774325, "grad_norm": 635.3939443905493, "learning_rate": 1.9489123938596886e-05, "loss": 338.2012, "step": 5780 }, { "epoch": 0.11135632581822379, "grad_norm": 606.3943827545633, "learning_rate": 1.94871963879842e-05, "loss": 351.4652, "step": 5790 }, { "epoch": 0.1115486510787043, "grad_norm": 854.4458033211714, "learning_rate": 1.9485265303547547e-05, "loss": 347.1553, "step": 5800 }, { "epoch": 0.11174097633918482, "grad_norm": 732.4929729662073, "learning_rate": 1.9483330686006223e-05, "loss": 353.6084, "step": 5810 }, { "epoch": 0.11193330159966536, "grad_norm": 662.4497031342546, "learning_rate": 1.948139253608084e-05, "loss": 372.1042, "step": 5820 }, { "epoch": 0.11212562686014588, "grad_norm": 766.0917879572403, "learning_rate": 1.9479450854493327e-05, "loss": 348.1985, "step": 5830 }, { "epoch": 0.1123179521206264, "grad_norm": 750.8279452274128, "learning_rate": 1.9477505641966933e-05, "loss": 344.6354, "step": 5840 }, { "epoch": 0.11251027738110693, "grad_norm": 735.8955775076766, "learning_rate": 1.9475556899226213e-05, "loss": 350.4105, "step": 5850 }, { "epoch": 0.11270260264158745, "grad_norm": 623.2418608358216, "learning_rate": 1.9473604626997037e-05, "loss": 342.1461, "step": 5860 }, { "epoch": 0.11289492790206798, "grad_norm": 617.0366422133485, "learning_rate": 1.94716488260066e-05, "loss": 347.9125, "step": 5870 }, { "epoch": 0.1130872531625485, "grad_norm": 711.3109167472553, "learning_rate": 1.946968949698341e-05, "loss": 348.1037, "step": 5880 }, { "epoch": 0.11327957842302902, "grad_norm": 705.609924571381, "learning_rate": 1.9467726640657277e-05, "loss": 343.4828, "step": 5890 }, { "epoch": 0.11347190368350955, "grad_norm": 736.4183661571752, "learning_rate": 1.9465760257759336e-05, "loss": 348.7198, "step": 5900 }, { "epoch": 0.11366422894399007, "grad_norm": 611.3064252789177, "learning_rate": 1.9463790349022027e-05, "loss": 338.8815, "step": 5910 }, { "epoch": 0.1138565542044706, "grad_norm": 603.676672088241, "learning_rate": 1.9461816915179117e-05, "loss": 346.2747, "step": 5920 }, { "epoch": 0.11404887946495113, "grad_norm": 713.939519252679, "learning_rate": 1.945983995696567e-05, "loss": 352.6707, "step": 5930 }, { "epoch": 0.11424120472543164, "grad_norm": 624.5364853448472, "learning_rate": 1.9457859475118077e-05, "loss": 345.4377, "step": 5940 }, { "epoch": 0.11443352998591218, "grad_norm": 694.9688179948644, "learning_rate": 1.9455875470374027e-05, "loss": 361.0383, "step": 5950 }, { "epoch": 0.1146258552463927, "grad_norm": 714.9516773147197, "learning_rate": 1.9453887943472532e-05, "loss": 336.5125, "step": 5960 }, { "epoch": 0.11481818050687323, "grad_norm": 734.8832746480035, "learning_rate": 1.945189689515392e-05, "loss": 359.7186, "step": 5970 }, { "epoch": 0.11501050576735375, "grad_norm": 646.6308216216573, "learning_rate": 1.9449902326159815e-05, "loss": 338.3782, "step": 5980 }, { "epoch": 0.11520283102783427, "grad_norm": 697.1561776823747, "learning_rate": 1.9447904237233164e-05, "loss": 349.8259, "step": 5990 }, { "epoch": 0.1153951562883148, "grad_norm": 669.4929816486336, "learning_rate": 1.9445902629118223e-05, "loss": 342.1568, "step": 6000 }, { "epoch": 0.11558748154879532, "grad_norm": 602.6243015218835, "learning_rate": 1.9443897502560555e-05, "loss": 354.5575, "step": 6010 }, { "epoch": 0.11577980680927585, "grad_norm": 635.3106979930712, "learning_rate": 1.9441888858307042e-05, "loss": 364.4904, "step": 6020 }, { "epoch": 0.11597213206975637, "grad_norm": 605.2859705768255, "learning_rate": 1.943987669710586e-05, "loss": 340.5251, "step": 6030 }, { "epoch": 0.11616445733023689, "grad_norm": 633.4166407738802, "learning_rate": 1.9437861019706522e-05, "loss": 342.5962, "step": 6040 }, { "epoch": 0.11635678259071743, "grad_norm": 645.851267654988, "learning_rate": 1.943584182685982e-05, "loss": 354.7195, "step": 6050 }, { "epoch": 0.11654910785119794, "grad_norm": 846.2031756676997, "learning_rate": 1.9433819119317878e-05, "loss": 348.1721, "step": 6060 }, { "epoch": 0.11674143311167846, "grad_norm": 654.0855910798667, "learning_rate": 1.9431792897834115e-05, "loss": 343.3366, "step": 6070 }, { "epoch": 0.116933758372159, "grad_norm": 851.0349887230075, "learning_rate": 1.9429763163163273e-05, "loss": 356.5918, "step": 6080 }, { "epoch": 0.11712608363263952, "grad_norm": 695.9728498156604, "learning_rate": 1.942772991606139e-05, "loss": 338.5591, "step": 6090 }, { "epoch": 0.11731840889312005, "grad_norm": 796.0992941708482, "learning_rate": 1.9425693157285816e-05, "loss": 347.8489, "step": 6100 }, { "epoch": 0.11751073415360057, "grad_norm": 738.4894558855128, "learning_rate": 1.942365288759521e-05, "loss": 338.323, "step": 6110 }, { "epoch": 0.11770305941408109, "grad_norm": 647.0316876095417, "learning_rate": 1.9421609107749542e-05, "loss": 354.4408, "step": 6120 }, { "epoch": 0.11789538467456162, "grad_norm": 714.2423151992201, "learning_rate": 1.9419561818510085e-05, "loss": 347.4304, "step": 6130 }, { "epoch": 0.11808770993504214, "grad_norm": 662.1491125046128, "learning_rate": 1.9417511020639416e-05, "loss": 345.5496, "step": 6140 }, { "epoch": 0.11828003519552267, "grad_norm": 615.1461389522196, "learning_rate": 1.9415456714901432e-05, "loss": 350.8063, "step": 6150 }, { "epoch": 0.11847236045600319, "grad_norm": 606.9754745270544, "learning_rate": 1.941339890206132e-05, "loss": 352.5946, "step": 6160 }, { "epoch": 0.11866468571648371, "grad_norm": 634.6995729232873, "learning_rate": 1.9411337582885587e-05, "loss": 344.4547, "step": 6170 }, { "epoch": 0.11885701097696424, "grad_norm": 710.4985092822184, "learning_rate": 1.9409272758142034e-05, "loss": 338.1419, "step": 6180 }, { "epoch": 0.11904933623744476, "grad_norm": 687.113663528976, "learning_rate": 1.940720442859978e-05, "loss": 347.3788, "step": 6190 }, { "epoch": 0.1192416614979253, "grad_norm": 604.0974446875828, "learning_rate": 1.940513259502924e-05, "loss": 347.2637, "step": 6200 }, { "epoch": 0.11943398675840582, "grad_norm": 625.1258844503341, "learning_rate": 1.9403057258202144e-05, "loss": 346.1806, "step": 6210 }, { "epoch": 0.11962631201888634, "grad_norm": 618.2532754834317, "learning_rate": 1.940097841889151e-05, "loss": 343.0665, "step": 6220 }, { "epoch": 0.11981863727936687, "grad_norm": 599.6548476871179, "learning_rate": 1.939889607787168e-05, "loss": 352.7839, "step": 6230 }, { "epoch": 0.12001096253984739, "grad_norm": 666.041125551419, "learning_rate": 1.9396810235918287e-05, "loss": 348.3168, "step": 6240 }, { "epoch": 0.12020328780032792, "grad_norm": 669.6349246964463, "learning_rate": 1.939472089380828e-05, "loss": 343.3021, "step": 6250 }, { "epoch": 0.12039561306080844, "grad_norm": 664.6639247045922, "learning_rate": 1.9392628052319895e-05, "loss": 348.0812, "step": 6260 }, { "epoch": 0.12058793832128896, "grad_norm": 578.9207598443023, "learning_rate": 1.9390531712232687e-05, "loss": 347.8523, "step": 6270 }, { "epoch": 0.12078026358176949, "grad_norm": 727.9456926484588, "learning_rate": 1.9388431874327504e-05, "loss": 358.9287, "step": 6280 }, { "epoch": 0.12097258884225001, "grad_norm": 841.7329823391808, "learning_rate": 1.9386328539386502e-05, "loss": 350.1853, "step": 6290 }, { "epoch": 0.12116491410273053, "grad_norm": 863.8318261379947, "learning_rate": 1.938422170819314e-05, "loss": 349.7141, "step": 6300 }, { "epoch": 0.12135723936321106, "grad_norm": 741.1597664964307, "learning_rate": 1.938211138153218e-05, "loss": 337.517, "step": 6310 }, { "epoch": 0.12154956462369158, "grad_norm": 710.6308803333314, "learning_rate": 1.9379997560189677e-05, "loss": 342.7385, "step": 6320 }, { "epoch": 0.12174188988417212, "grad_norm": 587.5917142179576, "learning_rate": 1.9377880244953e-05, "loss": 338.0116, "step": 6330 }, { "epoch": 0.12193421514465264, "grad_norm": 708.5960196524559, "learning_rate": 1.9375759436610813e-05, "loss": 344.2697, "step": 6340 }, { "epoch": 0.12212654040513315, "grad_norm": 818.7838214632518, "learning_rate": 1.937363513595308e-05, "loss": 346.9611, "step": 6350 }, { "epoch": 0.12231886566561369, "grad_norm": 743.013756708669, "learning_rate": 1.937150734377107e-05, "loss": 332.9242, "step": 6360 }, { "epoch": 0.12251119092609421, "grad_norm": 695.2887217131536, "learning_rate": 1.9369376060857354e-05, "loss": 332.5873, "step": 6370 }, { "epoch": 0.12270351618657474, "grad_norm": 724.3502879672002, "learning_rate": 1.936724128800579e-05, "loss": 348.6979, "step": 6380 }, { "epoch": 0.12289584144705526, "grad_norm": 607.9127161403849, "learning_rate": 1.9365103026011555e-05, "loss": 344.3744, "step": 6390 }, { "epoch": 0.12308816670753578, "grad_norm": 717.833106513343, "learning_rate": 1.9362961275671112e-05, "loss": 347.6986, "step": 6400 }, { "epoch": 0.12328049196801631, "grad_norm": 647.4911508762264, "learning_rate": 1.936081603778223e-05, "loss": 357.9332, "step": 6410 }, { "epoch": 0.12347281722849683, "grad_norm": 622.8712891042829, "learning_rate": 1.9358667313143972e-05, "loss": 345.6661, "step": 6420 }, { "epoch": 0.12366514248897736, "grad_norm": 761.8234931548434, "learning_rate": 1.93565151025567e-05, "loss": 346.5215, "step": 6430 }, { "epoch": 0.12385746774945788, "grad_norm": 623.572070801288, "learning_rate": 1.9354359406822084e-05, "loss": 348.8408, "step": 6440 }, { "epoch": 0.1240497930099384, "grad_norm": 581.4091063182992, "learning_rate": 1.935220022674308e-05, "loss": 337.5883, "step": 6450 }, { "epoch": 0.12424211827041894, "grad_norm": 616.7900679575442, "learning_rate": 1.9350037563123947e-05, "loss": 331.99, "step": 6460 }, { "epoch": 0.12443444353089946, "grad_norm": 599.017758292423, "learning_rate": 1.9347871416770245e-05, "loss": 348.6796, "step": 6470 }, { "epoch": 0.12462676879137999, "grad_norm": 634.0349459514991, "learning_rate": 1.9345701788488825e-05, "loss": 336.4672, "step": 6480 }, { "epoch": 0.12481909405186051, "grad_norm": 610.0361773632778, "learning_rate": 1.9343528679087837e-05, "loss": 345.7589, "step": 6490 }, { "epoch": 0.12501141931234103, "grad_norm": 617.2343731916664, "learning_rate": 1.934135208937673e-05, "loss": 342.7926, "step": 6500 }, { "epoch": 0.12520374457282155, "grad_norm": 616.367124603368, "learning_rate": 1.9339172020166245e-05, "loss": 351.4255, "step": 6510 }, { "epoch": 0.1253960698333021, "grad_norm": 632.0282420968152, "learning_rate": 1.933698847226843e-05, "loss": 346.1836, "step": 6520 }, { "epoch": 0.1255883950937826, "grad_norm": 687.1816525001773, "learning_rate": 1.9334801446496606e-05, "loss": 339.5251, "step": 6530 }, { "epoch": 0.12578072035426313, "grad_norm": 597.5403840054404, "learning_rate": 1.933261094366542e-05, "loss": 343.9508, "step": 6540 }, { "epoch": 0.12597304561474365, "grad_norm": 676.3999630134922, "learning_rate": 1.9330416964590787e-05, "loss": 331.0079, "step": 6550 }, { "epoch": 0.12616537087522417, "grad_norm": 701.7101529357468, "learning_rate": 1.932821951008993e-05, "loss": 352.7236, "step": 6560 }, { "epoch": 0.12635769613570472, "grad_norm": 630.7615641777062, "learning_rate": 1.9326018580981373e-05, "loss": 341.4861, "step": 6570 }, { "epoch": 0.12655002139618524, "grad_norm": 752.0100341378687, "learning_rate": 1.9323814178084914e-05, "loss": 336.0607, "step": 6580 }, { "epoch": 0.12674234665666576, "grad_norm": 625.633523638851, "learning_rate": 1.9321606302221662e-05, "loss": 346.4885, "step": 6590 }, { "epoch": 0.12693467191714627, "grad_norm": 602.5569802194531, "learning_rate": 1.9319394954214013e-05, "loss": 347.0677, "step": 6600 }, { "epoch": 0.1271269971776268, "grad_norm": 640.8650204211425, "learning_rate": 1.931718013488566e-05, "loss": 343.656, "step": 6610 }, { "epoch": 0.12731932243810734, "grad_norm": 659.4461697183357, "learning_rate": 1.9314961845061584e-05, "loss": 357.7287, "step": 6620 }, { "epoch": 0.12751164769858786, "grad_norm": 902.6648312801847, "learning_rate": 1.9312740085568063e-05, "loss": 354.6147, "step": 6630 }, { "epoch": 0.12770397295906838, "grad_norm": 718.5158139674871, "learning_rate": 1.9310514857232666e-05, "loss": 335.9984, "step": 6640 }, { "epoch": 0.1278962982195489, "grad_norm": 633.9312661204737, "learning_rate": 1.930828616088425e-05, "loss": 330.3928, "step": 6650 }, { "epoch": 0.12808862348002942, "grad_norm": 600.8964809056338, "learning_rate": 1.9306053997352973e-05, "loss": 336.6688, "step": 6660 }, { "epoch": 0.12828094874050994, "grad_norm": 596.5066188530008, "learning_rate": 1.9303818367470274e-05, "loss": 338.1062, "step": 6670 }, { "epoch": 0.12847327400099048, "grad_norm": 776.3918205095136, "learning_rate": 1.9301579272068894e-05, "loss": 340.7341, "step": 6680 }, { "epoch": 0.128665599261471, "grad_norm": 623.1809290465961, "learning_rate": 1.9299336711982853e-05, "loss": 342.0376, "step": 6690 }, { "epoch": 0.12885792452195152, "grad_norm": 596.649370007816, "learning_rate": 1.9297090688047473e-05, "loss": 342.6335, "step": 6700 }, { "epoch": 0.12905024978243204, "grad_norm": 629.7070535141022, "learning_rate": 1.929484120109936e-05, "loss": 338.046, "step": 6710 }, { "epoch": 0.12924257504291256, "grad_norm": 634.0553810209492, "learning_rate": 1.9292588251976404e-05, "loss": 334.4757, "step": 6720 }, { "epoch": 0.1294349003033931, "grad_norm": 710.2582775948679, "learning_rate": 1.92903318415178e-05, "loss": 342.4301, "step": 6730 }, { "epoch": 0.12962722556387363, "grad_norm": 715.7001005567705, "learning_rate": 1.9288071970564015e-05, "loss": 350.7262, "step": 6740 }, { "epoch": 0.12981955082435415, "grad_norm": 621.9873318526683, "learning_rate": 1.9285808639956823e-05, "loss": 343.0189, "step": 6750 }, { "epoch": 0.13001187608483467, "grad_norm": 610.5442544220016, "learning_rate": 1.9283541850539272e-05, "loss": 340.7758, "step": 6760 }, { "epoch": 0.13020420134531518, "grad_norm": 687.8958445182545, "learning_rate": 1.92812716031557e-05, "loss": 333.1836, "step": 6770 }, { "epoch": 0.13039652660579573, "grad_norm": 649.6750384549912, "learning_rate": 1.9278997898651746e-05, "loss": 342.2303, "step": 6780 }, { "epoch": 0.13058885186627625, "grad_norm": 604.4225607392067, "learning_rate": 1.9276720737874327e-05, "loss": 340.7247, "step": 6790 }, { "epoch": 0.13078117712675677, "grad_norm": 602.3709031839262, "learning_rate": 1.9274440121671637e-05, "loss": 332.4386, "step": 6800 }, { "epoch": 0.1309735023872373, "grad_norm": 617.7365851893695, "learning_rate": 1.9272156050893173e-05, "loss": 340.4061, "step": 6810 }, { "epoch": 0.1311658276477178, "grad_norm": 794.7884869155839, "learning_rate": 1.926986852638972e-05, "loss": 341.3555, "step": 6820 }, { "epoch": 0.13135815290819836, "grad_norm": 830.0588303420416, "learning_rate": 1.926757754901333e-05, "loss": 336.2551, "step": 6830 }, { "epoch": 0.13155047816867887, "grad_norm": 710.8954264814854, "learning_rate": 1.926528311961737e-05, "loss": 342.6202, "step": 6840 }, { "epoch": 0.1317428034291594, "grad_norm": 617.4477314684151, "learning_rate": 1.9262985239056463e-05, "loss": 345.4343, "step": 6850 }, { "epoch": 0.1319351286896399, "grad_norm": 675.1644760008425, "learning_rate": 1.9260683908186544e-05, "loss": 336.6608, "step": 6860 }, { "epoch": 0.13212745395012043, "grad_norm": 604.4334772531856, "learning_rate": 1.9258379127864808e-05, "loss": 334.0639, "step": 6870 }, { "epoch": 0.13231977921060098, "grad_norm": 774.9197996782158, "learning_rate": 1.925607089894976e-05, "loss": 334.4737, "step": 6880 }, { "epoch": 0.1325121044710815, "grad_norm": 642.9739454617887, "learning_rate": 1.9253759222301168e-05, "loss": 338.4757, "step": 6890 }, { "epoch": 0.13270442973156202, "grad_norm": 641.4484476637223, "learning_rate": 1.9251444098780095e-05, "loss": 338.1292, "step": 6900 }, { "epoch": 0.13289675499204254, "grad_norm": 591.0390873304048, "learning_rate": 1.924912552924889e-05, "loss": 336.706, "step": 6910 }, { "epoch": 0.13308908025252306, "grad_norm": 736.4347602075876, "learning_rate": 1.924680351457118e-05, "loss": 340.7009, "step": 6920 }, { "epoch": 0.1332814055130036, "grad_norm": 640.9332097910735, "learning_rate": 1.9244478055611875e-05, "loss": 331.8275, "step": 6930 }, { "epoch": 0.13347373077348412, "grad_norm": 615.3553578255887, "learning_rate": 1.9242149153237175e-05, "loss": 346.1782, "step": 6940 }, { "epoch": 0.13366605603396464, "grad_norm": 666.5546352548604, "learning_rate": 1.923981680831455e-05, "loss": 329.8955, "step": 6950 }, { "epoch": 0.13385838129444516, "grad_norm": 578.9245733843179, "learning_rate": 1.923748102171277e-05, "loss": 342.6567, "step": 6960 }, { "epoch": 0.13405070655492568, "grad_norm": 684.1602269539654, "learning_rate": 1.9235141794301867e-05, "loss": 339.303, "step": 6970 }, { "epoch": 0.13424303181540623, "grad_norm": 772.5479196530189, "learning_rate": 1.9232799126953173e-05, "loss": 341.7923, "step": 6980 }, { "epoch": 0.13443535707588675, "grad_norm": 780.8375673557058, "learning_rate": 1.9230453020539285e-05, "loss": 340.7893, "step": 6990 }, { "epoch": 0.13462768233636727, "grad_norm": 680.6012761445076, "learning_rate": 1.9228103475934096e-05, "loss": 330.813, "step": 7000 }, { "epoch": 0.13482000759684779, "grad_norm": 620.675619049162, "learning_rate": 1.9225750494012767e-05, "loss": 344.353, "step": 7010 }, { "epoch": 0.1350123328573283, "grad_norm": 585.4078426498832, "learning_rate": 1.9223394075651748e-05, "loss": 342.1984, "step": 7020 }, { "epoch": 0.13520465811780885, "grad_norm": 638.19237560778, "learning_rate": 1.9221034221728764e-05, "loss": 335.0086, "step": 7030 }, { "epoch": 0.13539698337828937, "grad_norm": 642.7386042549266, "learning_rate": 1.9218670933122826e-05, "loss": 339.8461, "step": 7040 }, { "epoch": 0.1355893086387699, "grad_norm": 681.7536763670116, "learning_rate": 1.9216304210714213e-05, "loss": 330.6961, "step": 7050 }, { "epoch": 0.1357816338992504, "grad_norm": 620.7059218852446, "learning_rate": 1.9213934055384498e-05, "loss": 351.9697, "step": 7060 }, { "epoch": 0.13597395915973093, "grad_norm": 576.6468376808148, "learning_rate": 1.9211560468016516e-05, "loss": 343.2772, "step": 7070 }, { "epoch": 0.13616628442021148, "grad_norm": 617.909082245975, "learning_rate": 1.9209183449494397e-05, "loss": 331.628, "step": 7080 }, { "epoch": 0.136358609680692, "grad_norm": 705.1055113964394, "learning_rate": 1.9206803000703534e-05, "loss": 340.7463, "step": 7090 }, { "epoch": 0.1365509349411725, "grad_norm": 675.4917083850122, "learning_rate": 1.9204419122530614e-05, "loss": 337.46, "step": 7100 }, { "epoch": 0.13674326020165303, "grad_norm": 562.871960310014, "learning_rate": 1.9202031815863583e-05, "loss": 336.0697, "step": 7110 }, { "epoch": 0.13693558546213355, "grad_norm": 667.0141217498331, "learning_rate": 1.919964108159168e-05, "loss": 337.3682, "step": 7120 }, { "epoch": 0.13712791072261407, "grad_norm": 641.5830951888042, "learning_rate": 1.9197246920605408e-05, "loss": 328.0353, "step": 7130 }, { "epoch": 0.13732023598309462, "grad_norm": 710.6090854819964, "learning_rate": 1.9194849333796557e-05, "loss": 333.5805, "step": 7140 }, { "epoch": 0.13751256124357514, "grad_norm": 705.2696170093016, "learning_rate": 1.9192448322058187e-05, "loss": 340.3354, "step": 7150 }, { "epoch": 0.13770488650405566, "grad_norm": 707.3442795351257, "learning_rate": 1.9190043886284635e-05, "loss": 336.0162, "step": 7160 }, { "epoch": 0.13789721176453618, "grad_norm": 606.1381628215864, "learning_rate": 1.9187636027371518e-05, "loss": 328.333, "step": 7170 }, { "epoch": 0.1380895370250167, "grad_norm": 766.2171264805419, "learning_rate": 1.9185224746215714e-05, "loss": 346.3918, "step": 7180 }, { "epoch": 0.13828186228549724, "grad_norm": 709.0230045615772, "learning_rate": 1.9182810043715394e-05, "loss": 333.1865, "step": 7190 }, { "epoch": 0.13847418754597776, "grad_norm": 620.7512744942927, "learning_rate": 1.9180391920769993e-05, "loss": 333.1366, "step": 7200 }, { "epoch": 0.13866651280645828, "grad_norm": 665.6664450047932, "learning_rate": 1.9177970378280215e-05, "loss": 335.7814, "step": 7210 }, { "epoch": 0.1388588380669388, "grad_norm": 611.292760408907, "learning_rate": 1.9175545417148056e-05, "loss": 343.0603, "step": 7220 }, { "epoch": 0.13905116332741932, "grad_norm": 734.063965140063, "learning_rate": 1.9173117038276766e-05, "loss": 340.2624, "step": 7230 }, { "epoch": 0.13924348858789987, "grad_norm": 720.0361974054264, "learning_rate": 1.9170685242570878e-05, "loss": 343.442, "step": 7240 }, { "epoch": 0.13943581384838039, "grad_norm": 594.3230591421019, "learning_rate": 1.9168250030936195e-05, "loss": 331.0506, "step": 7250 }, { "epoch": 0.1396281391088609, "grad_norm": 664.3136613492181, "learning_rate": 1.91658114042798e-05, "loss": 337.0721, "step": 7260 }, { "epoch": 0.13982046436934142, "grad_norm": 607.1773913861623, "learning_rate": 1.9163369363510026e-05, "loss": 330.6804, "step": 7270 }, { "epoch": 0.14001278962982194, "grad_norm": 599.1714129942029, "learning_rate": 1.916092390953651e-05, "loss": 335.4198, "step": 7280 }, { "epoch": 0.1402051148903025, "grad_norm": 632.3992832483767, "learning_rate": 1.915847504327013e-05, "loss": 334.0288, "step": 7290 }, { "epoch": 0.140397440150783, "grad_norm": 674.1409222683313, "learning_rate": 1.9156022765623057e-05, "loss": 334.1434, "step": 7300 }, { "epoch": 0.14058976541126353, "grad_norm": 669.1862374108495, "learning_rate": 1.9153567077508718e-05, "loss": 329.2249, "step": 7310 }, { "epoch": 0.14078209067174405, "grad_norm": 638.9009796322381, "learning_rate": 1.9151107979841824e-05, "loss": 332.2039, "step": 7320 }, { "epoch": 0.14097441593222457, "grad_norm": 595.6871552404173, "learning_rate": 1.9148645473538338e-05, "loss": 328.8862, "step": 7330 }, { "epoch": 0.14116674119270511, "grad_norm": 591.4480288783271, "learning_rate": 1.9146179559515507e-05, "loss": 329.3257, "step": 7340 }, { "epoch": 0.14135906645318563, "grad_norm": 619.5815660735237, "learning_rate": 1.9143710238691847e-05, "loss": 331.3541, "step": 7350 }, { "epoch": 0.14155139171366615, "grad_norm": 588.7563903968278, "learning_rate": 1.9141237511987137e-05, "loss": 329.303, "step": 7360 }, { "epoch": 0.14174371697414667, "grad_norm": 666.945728559143, "learning_rate": 1.9138761380322425e-05, "loss": 340.8805, "step": 7370 }, { "epoch": 0.1419360422346272, "grad_norm": 655.9890050856072, "learning_rate": 1.913628184462003e-05, "loss": 332.1415, "step": 7380 }, { "epoch": 0.14212836749510774, "grad_norm": 658.4454242978924, "learning_rate": 1.913379890580354e-05, "loss": 336.3184, "step": 7390 }, { "epoch": 0.14232069275558826, "grad_norm": 665.714018548699, "learning_rate": 1.9131312564797805e-05, "loss": 333.4196, "step": 7400 }, { "epoch": 0.14251301801606878, "grad_norm": 686.1007421990183, "learning_rate": 1.912882282252895e-05, "loss": 323.0851, "step": 7410 }, { "epoch": 0.1427053432765493, "grad_norm": 608.6239734024941, "learning_rate": 1.9126329679924364e-05, "loss": 337.3194, "step": 7420 }, { "epoch": 0.14289766853702981, "grad_norm": 604.2857848168327, "learning_rate": 1.9123833137912693e-05, "loss": 334.6706, "step": 7430 }, { "epoch": 0.14308999379751036, "grad_norm": 657.9875766448478, "learning_rate": 1.9121333197423867e-05, "loss": 350.4117, "step": 7440 }, { "epoch": 0.14328231905799088, "grad_norm": 774.1198900493707, "learning_rate": 1.9118829859389067e-05, "loss": 344.4096, "step": 7450 }, { "epoch": 0.1434746443184714, "grad_norm": 698.4358990600406, "learning_rate": 1.9116323124740748e-05, "loss": 331.8275, "step": 7460 }, { "epoch": 0.14366696957895192, "grad_norm": 716.0417295608296, "learning_rate": 1.9113812994412627e-05, "loss": 334.3449, "step": 7470 }, { "epoch": 0.14385929483943244, "grad_norm": 674.3569202341058, "learning_rate": 1.911129946933968e-05, "loss": 335.0988, "step": 7480 }, { "epoch": 0.14405162009991299, "grad_norm": 577.068949304984, "learning_rate": 1.9108782550458164e-05, "loss": 345.8265, "step": 7490 }, { "epoch": 0.1442439453603935, "grad_norm": 586.7220912778528, "learning_rate": 1.9106262238705583e-05, "loss": 341.1002, "step": 7500 }, { "epoch": 0.14443627062087402, "grad_norm": 540.3048891693815, "learning_rate": 1.9103738535020713e-05, "loss": 339.9908, "step": 7510 }, { "epoch": 0.14462859588135454, "grad_norm": 598.8228092314863, "learning_rate": 1.910121144034359e-05, "loss": 327.1496, "step": 7520 }, { "epoch": 0.14482092114183506, "grad_norm": 643.0008655568517, "learning_rate": 1.909868095561552e-05, "loss": 333.6696, "step": 7530 }, { "epoch": 0.1450132464023156, "grad_norm": 558.8109142460015, "learning_rate": 1.9096147081779063e-05, "loss": 341.1401, "step": 7540 }, { "epoch": 0.14520557166279613, "grad_norm": 636.1715695277825, "learning_rate": 1.9093609819778044e-05, "loss": 334.1333, "step": 7550 }, { "epoch": 0.14539789692327665, "grad_norm": 584.1294965803066, "learning_rate": 1.9091069170557554e-05, "loss": 340.2397, "step": 7560 }, { "epoch": 0.14559022218375717, "grad_norm": 588.7759487555222, "learning_rate": 1.9088525135063944e-05, "loss": 334.4876, "step": 7570 }, { "epoch": 0.1457825474442377, "grad_norm": 586.7412371937382, "learning_rate": 1.9085977714244822e-05, "loss": 331.9889, "step": 7580 }, { "epoch": 0.1459748727047182, "grad_norm": 623.1962765941188, "learning_rate": 1.908342690904906e-05, "loss": 337.2166, "step": 7590 }, { "epoch": 0.14616719796519875, "grad_norm": 605.0510054429278, "learning_rate": 1.9080872720426793e-05, "loss": 333.1549, "step": 7600 }, { "epoch": 0.14635952322567927, "grad_norm": 652.5794801569881, "learning_rate": 1.9078315149329413e-05, "loss": 322.555, "step": 7610 }, { "epoch": 0.1465518484861598, "grad_norm": 597.5804096642445, "learning_rate": 1.9075754196709574e-05, "loss": 339.9878, "step": 7620 }, { "epoch": 0.1467441737466403, "grad_norm": 582.7306056941077, "learning_rate": 1.9073189863521184e-05, "loss": 330.2846, "step": 7630 }, { "epoch": 0.14693649900712083, "grad_norm": 653.4866928502033, "learning_rate": 1.9070622150719423e-05, "loss": 334.186, "step": 7640 }, { "epoch": 0.14712882426760138, "grad_norm": 656.2065448271608, "learning_rate": 1.9068051059260716e-05, "loss": 333.487, "step": 7650 }, { "epoch": 0.1473211495280819, "grad_norm": 739.5742222454226, "learning_rate": 1.9065476590102752e-05, "loss": 338.4543, "step": 7660 }, { "epoch": 0.14751347478856242, "grad_norm": 659.8715115346957, "learning_rate": 1.906289874420448e-05, "loss": 344.8408, "step": 7670 }, { "epoch": 0.14770580004904293, "grad_norm": 585.4940058032927, "learning_rate": 1.9060317522526105e-05, "loss": 325.9033, "step": 7680 }, { "epoch": 0.14789812530952345, "grad_norm": 607.4854347187479, "learning_rate": 1.905773292602909e-05, "loss": 335.7657, "step": 7690 }, { "epoch": 0.148090450570004, "grad_norm": 667.362261826713, "learning_rate": 1.905514495567615e-05, "loss": 329.7076, "step": 7700 }, { "epoch": 0.14828277583048452, "grad_norm": 536.0276096399463, "learning_rate": 1.9052553612431268e-05, "loss": 345.2904, "step": 7710 }, { "epoch": 0.14847510109096504, "grad_norm": 614.1735022171694, "learning_rate": 1.9049958897259674e-05, "loss": 327.6999, "step": 7720 }, { "epoch": 0.14866742635144556, "grad_norm": 565.0650579387441, "learning_rate": 1.904736081112785e-05, "loss": 332.4991, "step": 7730 }, { "epoch": 0.14885975161192608, "grad_norm": 698.5517829582707, "learning_rate": 1.9044759355003552e-05, "loss": 336.9797, "step": 7740 }, { "epoch": 0.14905207687240662, "grad_norm": 654.2539263044354, "learning_rate": 1.904215452985577e-05, "loss": 330.1729, "step": 7750 }, { "epoch": 0.14924440213288714, "grad_norm": 657.149243090159, "learning_rate": 1.9039546336654765e-05, "loss": 340.3054, "step": 7760 }, { "epoch": 0.14943672739336766, "grad_norm": 595.0120563008803, "learning_rate": 1.903693477637204e-05, "loss": 327.7932, "step": 7770 }, { "epoch": 0.14962905265384818, "grad_norm": 586.3951566892398, "learning_rate": 1.903431984998036e-05, "loss": 331.5635, "step": 7780 }, { "epoch": 0.1498213779143287, "grad_norm": 652.1093705478022, "learning_rate": 1.9031701558453747e-05, "loss": 348.5615, "step": 7790 }, { "epoch": 0.15001370317480925, "grad_norm": 657.4240718892927, "learning_rate": 1.902907990276746e-05, "loss": 321.6034, "step": 7800 }, { "epoch": 0.15020602843528977, "grad_norm": 695.5448517014397, "learning_rate": 1.9026454883898036e-05, "loss": 338.977, "step": 7810 }, { "epoch": 0.1503983536957703, "grad_norm": 619.4563587059992, "learning_rate": 1.902382650282324e-05, "loss": 326.9431, "step": 7820 }, { "epoch": 0.1505906789562508, "grad_norm": 610.2707928349413, "learning_rate": 1.902119476052211e-05, "loss": 322.5898, "step": 7830 }, { "epoch": 0.15078300421673133, "grad_norm": 689.2975637286215, "learning_rate": 1.9018559657974918e-05, "loss": 332.3525, "step": 7840 }, { "epoch": 0.15097532947721187, "grad_norm": 720.2792218360873, "learning_rate": 1.90159211961632e-05, "loss": 363.9882, "step": 7850 }, { "epoch": 0.1511676547376924, "grad_norm": 757.0191041019314, "learning_rate": 1.901327937606974e-05, "loss": 331.6766, "step": 7860 }, { "epoch": 0.1513599799981729, "grad_norm": 668.9888292883863, "learning_rate": 1.901063419867857e-05, "loss": 337.8688, "step": 7870 }, { "epoch": 0.15155230525865343, "grad_norm": 609.4311837295198, "learning_rate": 1.900798566497498e-05, "loss": 330.1886, "step": 7880 }, { "epoch": 0.15174463051913395, "grad_norm": 592.320955820024, "learning_rate": 1.9005333775945496e-05, "loss": 333.4431, "step": 7890 }, { "epoch": 0.1519369557796145, "grad_norm": 652.1124104405058, "learning_rate": 1.9002678532577915e-05, "loss": 325.7347, "step": 7900 }, { "epoch": 0.15212928104009502, "grad_norm": 617.7623400505815, "learning_rate": 1.900001993586126e-05, "loss": 337.0063, "step": 7910 }, { "epoch": 0.15232160630057553, "grad_norm": 601.888597412138, "learning_rate": 1.8997357986785822e-05, "loss": 341.1846, "step": 7920 }, { "epoch": 0.15251393156105605, "grad_norm": 758.839415022158, "learning_rate": 1.899469268634313e-05, "loss": 338.0605, "step": 7930 }, { "epoch": 0.15270625682153657, "grad_norm": 611.9683144775163, "learning_rate": 1.8992024035525964e-05, "loss": 325.2646, "step": 7940 }, { "epoch": 0.15289858208201712, "grad_norm": 607.2554279084135, "learning_rate": 1.8989352035328352e-05, "loss": 335.0041, "step": 7950 }, { "epoch": 0.15309090734249764, "grad_norm": 641.7327001857283, "learning_rate": 1.8986676686745572e-05, "loss": 333.3883, "step": 7960 }, { "epoch": 0.15328323260297816, "grad_norm": 654.5740927509847, "learning_rate": 1.8983997990774145e-05, "loss": 332.6246, "step": 7970 }, { "epoch": 0.15347555786345868, "grad_norm": 589.067280724116, "learning_rate": 1.8981315948411842e-05, "loss": 336.7391, "step": 7980 }, { "epoch": 0.1536678831239392, "grad_norm": 645.77199995531, "learning_rate": 1.897863056065768e-05, "loss": 337.1307, "step": 7990 }, { "epoch": 0.15386020838441974, "grad_norm": 587.5748922401691, "learning_rate": 1.8975941828511923e-05, "loss": 329.6803, "step": 8000 }, { "epoch": 0.15405253364490026, "grad_norm": 624.962479288896, "learning_rate": 1.8973249752976075e-05, "loss": 330.1949, "step": 8010 }, { "epoch": 0.15424485890538078, "grad_norm": 678.6662294430486, "learning_rate": 1.8970554335052897e-05, "loss": 327.6478, "step": 8020 }, { "epoch": 0.1544371841658613, "grad_norm": 617.7029085283698, "learning_rate": 1.8967855575746375e-05, "loss": 329.804, "step": 8030 }, { "epoch": 0.15462950942634182, "grad_norm": 763.7413110459936, "learning_rate": 1.8965153476061763e-05, "loss": 335.0965, "step": 8040 }, { "epoch": 0.15482183468682234, "grad_norm": 618.8287237521282, "learning_rate": 1.896244803700555e-05, "loss": 330.6702, "step": 8050 }, { "epoch": 0.1550141599473029, "grad_norm": 625.8824151776017, "learning_rate": 1.8959739259585458e-05, "loss": 329.7756, "step": 8060 }, { "epoch": 0.1552064852077834, "grad_norm": 630.6961204218625, "learning_rate": 1.895702714481047e-05, "loss": 319.1156, "step": 8070 }, { "epoch": 0.15539881046826393, "grad_norm": 575.5776074392129, "learning_rate": 1.8954311693690798e-05, "loss": 317.7853, "step": 8080 }, { "epoch": 0.15559113572874445, "grad_norm": 705.1483458938271, "learning_rate": 1.8951592907237906e-05, "loss": 337.7698, "step": 8090 }, { "epoch": 0.15578346098922496, "grad_norm": 789.2054488690464, "learning_rate": 1.8948870786464496e-05, "loss": 339.1818, "step": 8100 }, { "epoch": 0.1559757862497055, "grad_norm": 604.9163142409899, "learning_rate": 1.8946145332384515e-05, "loss": 321.4155, "step": 8110 }, { "epoch": 0.15616811151018603, "grad_norm": 761.7838080703335, "learning_rate": 1.8943416546013148e-05, "loss": 340.1508, "step": 8120 }, { "epoch": 0.15636043677066655, "grad_norm": 687.0311909921215, "learning_rate": 1.894068442836682e-05, "loss": 327.8359, "step": 8130 }, { "epoch": 0.15655276203114707, "grad_norm": 665.0449909303669, "learning_rate": 1.8937948980463207e-05, "loss": 325.6881, "step": 8140 }, { "epoch": 0.1567450872916276, "grad_norm": 680.2703750609774, "learning_rate": 1.893521020332121e-05, "loss": 339.19, "step": 8150 }, { "epoch": 0.15693741255210814, "grad_norm": 712.2445486851191, "learning_rate": 1.8932468097960988e-05, "loss": 325.6005, "step": 8160 }, { "epoch": 0.15712973781258865, "grad_norm": 634.5783082217703, "learning_rate": 1.892972266540392e-05, "loss": 317.995, "step": 8170 }, { "epoch": 0.15732206307306917, "grad_norm": 590.7140843673521, "learning_rate": 1.8926973906672635e-05, "loss": 321.0524, "step": 8180 }, { "epoch": 0.1575143883335497, "grad_norm": 622.6352617042718, "learning_rate": 1.892422182279101e-05, "loss": 333.9439, "step": 8190 }, { "epoch": 0.1577067135940302, "grad_norm": 612.4240165467864, "learning_rate": 1.892146641478414e-05, "loss": 328.3868, "step": 8200 }, { "epoch": 0.15789903885451076, "grad_norm": 646.1644417693855, "learning_rate": 1.8918707683678376e-05, "loss": 319.4142, "step": 8210 }, { "epoch": 0.15809136411499128, "grad_norm": 640.908573407253, "learning_rate": 1.8915945630501296e-05, "loss": 326.9317, "step": 8220 }, { "epoch": 0.1582836893754718, "grad_norm": 692.7289847209751, "learning_rate": 1.8913180256281723e-05, "loss": 328.568, "step": 8230 }, { "epoch": 0.15847601463595232, "grad_norm": 594.7027713128756, "learning_rate": 1.8910411562049706e-05, "loss": 321.2016, "step": 8240 }, { "epoch": 0.15866833989643284, "grad_norm": 568.8287473525063, "learning_rate": 1.8907639548836548e-05, "loss": 325.0564, "step": 8250 }, { "epoch": 0.15886066515691338, "grad_norm": 639.3651153095016, "learning_rate": 1.8904864217674766e-05, "loss": 339.9999, "step": 8260 }, { "epoch": 0.1590529904173939, "grad_norm": 693.6719533383076, "learning_rate": 1.8902085569598136e-05, "loss": 319.3183, "step": 8270 }, { "epoch": 0.15924531567787442, "grad_norm": 566.8352443265295, "learning_rate": 1.889930360564165e-05, "loss": 322.6493, "step": 8280 }, { "epoch": 0.15943764093835494, "grad_norm": 550.8923192127909, "learning_rate": 1.8896518326841554e-05, "loss": 318.822, "step": 8290 }, { "epoch": 0.15962996619883546, "grad_norm": 636.4844212484624, "learning_rate": 1.889372973423531e-05, "loss": 329.6007, "step": 8300 }, { "epoch": 0.159822291459316, "grad_norm": 523.7027264193889, "learning_rate": 1.889093782886162e-05, "loss": 325.9975, "step": 8310 }, { "epoch": 0.16001461671979653, "grad_norm": 628.7320461357699, "learning_rate": 1.8888142611760433e-05, "loss": 324.9394, "step": 8320 }, { "epoch": 0.16020694198027705, "grad_norm": 632.8876609487307, "learning_rate": 1.8885344083972912e-05, "loss": 321.4737, "step": 8330 }, { "epoch": 0.16039926724075756, "grad_norm": 672.6796542766039, "learning_rate": 1.8882542246541468e-05, "loss": 341.4587, "step": 8340 }, { "epoch": 0.16059159250123808, "grad_norm": 685.4735672135316, "learning_rate": 1.887973710050974e-05, "loss": 339.7623, "step": 8350 }, { "epoch": 0.16078391776171863, "grad_norm": 656.0235413997698, "learning_rate": 1.887692864692259e-05, "loss": 321.4192, "step": 8360 }, { "epoch": 0.16097624302219915, "grad_norm": 605.1988479424814, "learning_rate": 1.887411688682613e-05, "loss": 331.8671, "step": 8370 }, { "epoch": 0.16116856828267967, "grad_norm": 607.0216375856667, "learning_rate": 1.887130182126769e-05, "loss": 335.5238, "step": 8380 }, { "epoch": 0.1613608935431602, "grad_norm": 648.5801451386891, "learning_rate": 1.8868483451295835e-05, "loss": 329.0254, "step": 8390 }, { "epoch": 0.1615532188036407, "grad_norm": 666.9006726646376, "learning_rate": 1.8865661777960366e-05, "loss": 324.4137, "step": 8400 }, { "epoch": 0.16174554406412125, "grad_norm": 612.2706066542921, "learning_rate": 1.88628368023123e-05, "loss": 330.1027, "step": 8410 }, { "epoch": 0.16193786932460177, "grad_norm": 1170.1623289110332, "learning_rate": 1.8860008525403903e-05, "loss": 344.3691, "step": 8420 }, { "epoch": 0.1621301945850823, "grad_norm": 657.262736303299, "learning_rate": 1.885717694828866e-05, "loss": 328.7068, "step": 8430 }, { "epoch": 0.1623225198455628, "grad_norm": 582.5110756513626, "learning_rate": 1.8854342072021282e-05, "loss": 318.0018, "step": 8440 }, { "epoch": 0.16251484510604333, "grad_norm": 633.038781675672, "learning_rate": 1.8851503897657717e-05, "loss": 328.3173, "step": 8450 }, { "epoch": 0.16270717036652388, "grad_norm": 582.4576397316638, "learning_rate": 1.8848662426255135e-05, "loss": 333.9782, "step": 8460 }, { "epoch": 0.1628994956270044, "grad_norm": 591.1262231592767, "learning_rate": 1.8845817658871942e-05, "loss": 327.9783, "step": 8470 }, { "epoch": 0.16309182088748492, "grad_norm": 607.7430738976485, "learning_rate": 1.8842969596567765e-05, "loss": 334.9482, "step": 8480 }, { "epoch": 0.16328414614796544, "grad_norm": 596.9041121165554, "learning_rate": 1.884011824040346e-05, "loss": 338.8978, "step": 8490 }, { "epoch": 0.16347647140844596, "grad_norm": 668.8208564240986, "learning_rate": 1.883726359144111e-05, "loss": 330.8432, "step": 8500 }, { "epoch": 0.16366879666892648, "grad_norm": 566.7436484831923, "learning_rate": 1.8834405650744023e-05, "loss": 325.4643, "step": 8510 }, { "epoch": 0.16386112192940702, "grad_norm": 650.1926791446019, "learning_rate": 1.883154441937674e-05, "loss": 332.9925, "step": 8520 }, { "epoch": 0.16405344718988754, "grad_norm": 553.1903423115506, "learning_rate": 1.8828679898405015e-05, "loss": 316.6825, "step": 8530 }, { "epoch": 0.16424577245036806, "grad_norm": 641.6381934900176, "learning_rate": 1.8825812088895835e-05, "loss": 329.6671, "step": 8540 }, { "epoch": 0.16443809771084858, "grad_norm": 610.3182236346137, "learning_rate": 1.882294099191742e-05, "loss": 332.7418, "step": 8550 }, { "epoch": 0.1646304229713291, "grad_norm": 637.8895889859295, "learning_rate": 1.88200666085392e-05, "loss": 324.0332, "step": 8560 }, { "epoch": 0.16482274823180965, "grad_norm": 621.1644254638762, "learning_rate": 1.8817188939831838e-05, "loss": 334.8323, "step": 8570 }, { "epoch": 0.16501507349229017, "grad_norm": 620.6175953613363, "learning_rate": 1.8814307986867214e-05, "loss": 318.596, "step": 8580 }, { "epoch": 0.16520739875277068, "grad_norm": 570.7161465732909, "learning_rate": 1.881142375071844e-05, "loss": 308.5833, "step": 8590 }, { "epoch": 0.1653997240132512, "grad_norm": 687.5877126737203, "learning_rate": 1.8808536232459844e-05, "loss": 332.4939, "step": 8600 }, { "epoch": 0.16559204927373172, "grad_norm": 520.6326252944293, "learning_rate": 1.8805645433166976e-05, "loss": 337.6086, "step": 8610 }, { "epoch": 0.16578437453421227, "grad_norm": 625.2257120664489, "learning_rate": 1.8802751353916618e-05, "loss": 331.5252, "step": 8620 }, { "epoch": 0.1659766997946928, "grad_norm": 633.4463197650607, "learning_rate": 1.8799853995786763e-05, "loss": 327.5775, "step": 8630 }, { "epoch": 0.1661690250551733, "grad_norm": 578.659849792283, "learning_rate": 1.8796953359856626e-05, "loss": 310.7596, "step": 8640 }, { "epoch": 0.16636135031565383, "grad_norm": 635.077697877117, "learning_rate": 1.879404944720665e-05, "loss": 324.8619, "step": 8650 }, { "epoch": 0.16655367557613435, "grad_norm": 638.3681282519153, "learning_rate": 1.8791142258918496e-05, "loss": 326.9639, "step": 8660 }, { "epoch": 0.1667460008366149, "grad_norm": 692.6432931836723, "learning_rate": 1.8788231796075037e-05, "loss": 334.5431, "step": 8670 }, { "epoch": 0.1669383260970954, "grad_norm": 599.4384153302473, "learning_rate": 1.8785318059760384e-05, "loss": 318.0557, "step": 8680 }, { "epoch": 0.16713065135757593, "grad_norm": 680.5360194556843, "learning_rate": 1.8782401051059838e-05, "loss": 321.4521, "step": 8690 }, { "epoch": 0.16732297661805645, "grad_norm": 537.1051080229824, "learning_rate": 1.8779480771059954e-05, "loss": 312.5561, "step": 8700 }, { "epoch": 0.16751530187853697, "grad_norm": 601.6596919036647, "learning_rate": 1.8776557220848477e-05, "loss": 322.7146, "step": 8710 }, { "epoch": 0.16770762713901752, "grad_norm": 1843.240441019918, "learning_rate": 1.8773630401514388e-05, "loss": 322.8632, "step": 8720 }, { "epoch": 0.16789995239949804, "grad_norm": 588.6423817884859, "learning_rate": 1.877070031414787e-05, "loss": 327.944, "step": 8730 }, { "epoch": 0.16809227765997856, "grad_norm": 713.3265183877421, "learning_rate": 1.876776695984034e-05, "loss": 322.0146, "step": 8740 }, { "epoch": 0.16828460292045908, "grad_norm": 587.0364122055531, "learning_rate": 1.8764830339684426e-05, "loss": 318.2402, "step": 8750 }, { "epoch": 0.1684769281809396, "grad_norm": 592.3967061258917, "learning_rate": 1.8761890454773965e-05, "loss": 322.9118, "step": 8760 }, { "epoch": 0.16866925344142014, "grad_norm": 517.6083040950411, "learning_rate": 1.8758947306204012e-05, "loss": 331.4004, "step": 8770 }, { "epoch": 0.16886157870190066, "grad_norm": 606.6972131828855, "learning_rate": 1.8756000895070854e-05, "loss": 311.8005, "step": 8780 }, { "epoch": 0.16905390396238118, "grad_norm": 559.9634998313228, "learning_rate": 1.8753051222471968e-05, "loss": 331.0142, "step": 8790 }, { "epoch": 0.1692462292228617, "grad_norm": 629.1482731144652, "learning_rate": 1.8750098289506066e-05, "loss": 321.3594, "step": 8800 }, { "epoch": 0.16943855448334222, "grad_norm": 631.8403884212312, "learning_rate": 1.8747142097273057e-05, "loss": 317.4358, "step": 8810 }, { "epoch": 0.16963087974382277, "grad_norm": 540.2865520805574, "learning_rate": 1.8744182646874085e-05, "loss": 321.2016, "step": 8820 }, { "epoch": 0.16982320500430328, "grad_norm": 541.4145711976639, "learning_rate": 1.8741219939411494e-05, "loss": 333.4331, "step": 8830 }, { "epoch": 0.1700155302647838, "grad_norm": 607.313765297245, "learning_rate": 1.873825397598884e-05, "loss": 330.6782, "step": 8840 }, { "epoch": 0.17020785552526432, "grad_norm": 653.2116764640416, "learning_rate": 1.8735284757710897e-05, "loss": 324.4951, "step": 8850 }, { "epoch": 0.17040018078574484, "grad_norm": 777.4636895302846, "learning_rate": 1.873231228568365e-05, "loss": 329.9455, "step": 8860 }, { "epoch": 0.1705925060462254, "grad_norm": 596.4840023893854, "learning_rate": 1.8729336561014294e-05, "loss": 331.0089, "step": 8870 }, { "epoch": 0.1707848313067059, "grad_norm": 609.0019120766575, "learning_rate": 1.8726357584811242e-05, "loss": 318.682, "step": 8880 }, { "epoch": 0.17097715656718643, "grad_norm": 556.8876107013019, "learning_rate": 1.8723375358184107e-05, "loss": 329.2189, "step": 8890 }, { "epoch": 0.17116948182766695, "grad_norm": 586.9369407016659, "learning_rate": 1.8720389882243722e-05, "loss": 319.5048, "step": 8900 }, { "epoch": 0.17136180708814747, "grad_norm": 586.363607153541, "learning_rate": 1.871740115810213e-05, "loss": 332.6656, "step": 8910 }, { "epoch": 0.171554132348628, "grad_norm": 612.540106430632, "learning_rate": 1.8714409186872578e-05, "loss": 329.3621, "step": 8920 }, { "epoch": 0.17174645760910853, "grad_norm": 594.4632984832891, "learning_rate": 1.871141396966953e-05, "loss": 331.4114, "step": 8930 }, { "epoch": 0.17193878286958905, "grad_norm": 555.042190549085, "learning_rate": 1.8708415507608647e-05, "loss": 318.5437, "step": 8940 }, { "epoch": 0.17213110813006957, "grad_norm": 604.785455902435, "learning_rate": 1.8705413801806817e-05, "loss": 325.3128, "step": 8950 }, { "epoch": 0.1723234333905501, "grad_norm": 557.973065606185, "learning_rate": 1.8702408853382114e-05, "loss": 318.5841, "step": 8960 }, { "epoch": 0.1725157586510306, "grad_norm": 582.253221118452, "learning_rate": 1.8699400663453842e-05, "loss": 319.9609, "step": 8970 }, { "epoch": 0.17270808391151116, "grad_norm": 610.8792579268109, "learning_rate": 1.8696389233142498e-05, "loss": 319.7314, "step": 8980 }, { "epoch": 0.17290040917199168, "grad_norm": 634.1242949498051, "learning_rate": 1.8693374563569792e-05, "loss": 319.977, "step": 8990 }, { "epoch": 0.1730927344324722, "grad_norm": 575.2208012699871, "learning_rate": 1.8690356655858634e-05, "loss": 322.9117, "step": 9000 }, { "epoch": 0.17328505969295271, "grad_norm": 632.0699801492443, "learning_rate": 1.868733551113315e-05, "loss": 330.6622, "step": 9010 }, { "epoch": 0.17347738495343323, "grad_norm": 574.6326927324745, "learning_rate": 1.8684311130518663e-05, "loss": 317.9359, "step": 9020 }, { "epoch": 0.17366971021391378, "grad_norm": 615.0153731040023, "learning_rate": 1.8681283515141705e-05, "loss": 319.9495, "step": 9030 }, { "epoch": 0.1738620354743943, "grad_norm": 592.5605615329267, "learning_rate": 1.8678252666130016e-05, "loss": 324.2824, "step": 9040 }, { "epoch": 0.17405436073487482, "grad_norm": 629.4690838948, "learning_rate": 1.8675218584612534e-05, "loss": 322.147, "step": 9050 }, { "epoch": 0.17424668599535534, "grad_norm": 638.7425197425805, "learning_rate": 1.8672181271719406e-05, "loss": 326.0541, "step": 9060 }, { "epoch": 0.17443901125583586, "grad_norm": 624.8859833589399, "learning_rate": 1.866914072858198e-05, "loss": 329.8311, "step": 9070 }, { "epoch": 0.1746313365163164, "grad_norm": 586.6595133477191, "learning_rate": 1.8666096956332805e-05, "loss": 321.3435, "step": 9080 }, { "epoch": 0.17482366177679692, "grad_norm": 574.6930742523509, "learning_rate": 1.8663049956105642e-05, "loss": 321.0171, "step": 9090 }, { "epoch": 0.17501598703727744, "grad_norm": 556.3218335808341, "learning_rate": 1.8659999729035445e-05, "loss": 319.513, "step": 9100 }, { "epoch": 0.17520831229775796, "grad_norm": 573.8952191533285, "learning_rate": 1.8656946276258373e-05, "loss": 320.1525, "step": 9110 }, { "epoch": 0.17540063755823848, "grad_norm": 591.4819894648808, "learning_rate": 1.8653889598911787e-05, "loss": 328.5546, "step": 9120 }, { "epoch": 0.17559296281871903, "grad_norm": 612.9698796081294, "learning_rate": 1.865082969813425e-05, "loss": 318.3815, "step": 9130 }, { "epoch": 0.17578528807919955, "grad_norm": 575.3068076396554, "learning_rate": 1.8647766575065523e-05, "loss": 322.6839, "step": 9140 }, { "epoch": 0.17597761333968007, "grad_norm": 761.8499357944903, "learning_rate": 1.864470023084657e-05, "loss": 339.5503, "step": 9150 }, { "epoch": 0.17616993860016059, "grad_norm": 533.6905660311675, "learning_rate": 1.864163066661955e-05, "loss": 323.9123, "step": 9160 }, { "epoch": 0.1763622638606411, "grad_norm": 558.9204987916131, "learning_rate": 1.8638557883527833e-05, "loss": 318.4725, "step": 9170 }, { "epoch": 0.17655458912112165, "grad_norm": 568.923807076423, "learning_rate": 1.8635481882715975e-05, "loss": 312.6829, "step": 9180 }, { "epoch": 0.17674691438160217, "grad_norm": 544.9506885773501, "learning_rate": 1.863240266532973e-05, "loss": 314.0525, "step": 9190 }, { "epoch": 0.1769392396420827, "grad_norm": 615.8507370787281, "learning_rate": 1.8629320232516063e-05, "loss": 324.0767, "step": 9200 }, { "epoch": 0.1771315649025632, "grad_norm": 559.5180910964125, "learning_rate": 1.862623458542313e-05, "loss": 321.9804, "step": 9210 }, { "epoch": 0.17732389016304373, "grad_norm": 551.4510927176239, "learning_rate": 1.862314572520028e-05, "loss": 319.111, "step": 9220 }, { "epoch": 0.17751621542352428, "grad_norm": 591.3738793871839, "learning_rate": 1.862005365299806e-05, "loss": 313.194, "step": 9230 }, { "epoch": 0.1777085406840048, "grad_norm": 618.7373130272308, "learning_rate": 1.8616958369968223e-05, "loss": 336.732, "step": 9240 }, { "epoch": 0.17790086594448531, "grad_norm": 590.6551751931191, "learning_rate": 1.8613859877263708e-05, "loss": 320.7936, "step": 9250 }, { "epoch": 0.17809319120496583, "grad_norm": 559.5270659580083, "learning_rate": 1.8610758176038647e-05, "loss": 311.5099, "step": 9260 }, { "epoch": 0.17828551646544635, "grad_norm": 631.9487065025297, "learning_rate": 1.860765326744838e-05, "loss": 316.0143, "step": 9270 }, { "epoch": 0.1784778417259269, "grad_norm": 632.8101121945842, "learning_rate": 1.8604545152649426e-05, "loss": 324.1879, "step": 9280 }, { "epoch": 0.17867016698640742, "grad_norm": 551.5142150541681, "learning_rate": 1.860143383279952e-05, "loss": 326.5638, "step": 9290 }, { "epoch": 0.17886249224688794, "grad_norm": 630.096978179395, "learning_rate": 1.859831930905756e-05, "loss": 315.0706, "step": 9300 }, { "epoch": 0.17905481750736846, "grad_norm": 607.1346668962717, "learning_rate": 1.8595201582583668e-05, "loss": 320.8782, "step": 9310 }, { "epoch": 0.17924714276784898, "grad_norm": 573.3499482985779, "learning_rate": 1.859208065453914e-05, "loss": 316.0985, "step": 9320 }, { "epoch": 0.17943946802832952, "grad_norm": 613.3873449690732, "learning_rate": 1.8588956526086472e-05, "loss": 333.3562, "step": 9330 }, { "epoch": 0.17963179328881004, "grad_norm": 543.1026188086619, "learning_rate": 1.8585829198389347e-05, "loss": 315.4055, "step": 9340 }, { "epoch": 0.17982411854929056, "grad_norm": 553.3238573858466, "learning_rate": 1.8582698672612646e-05, "loss": 315.6242, "step": 9350 }, { "epoch": 0.18001644380977108, "grad_norm": 552.0196877946281, "learning_rate": 1.8579564949922438e-05, "loss": 316.9576, "step": 9360 }, { "epoch": 0.1802087690702516, "grad_norm": 564.4067501038841, "learning_rate": 1.8576428031485984e-05, "loss": 310.0551, "step": 9370 }, { "epoch": 0.18040109433073215, "grad_norm": 567.4778220652782, "learning_rate": 1.8573287918471728e-05, "loss": 319.9249, "step": 9380 }, { "epoch": 0.18059341959121267, "grad_norm": 636.6557564391225, "learning_rate": 1.8570144612049322e-05, "loss": 319.27, "step": 9390 }, { "epoch": 0.1807857448516932, "grad_norm": 593.7142403903853, "learning_rate": 1.856699811338958e-05, "loss": 318.0193, "step": 9400 }, { "epoch": 0.1809780701121737, "grad_norm": 712.8194338129573, "learning_rate": 1.8563848423664536e-05, "loss": 326.8473, "step": 9410 }, { "epoch": 0.18117039537265422, "grad_norm": 620.172842857781, "learning_rate": 1.8560695544047388e-05, "loss": 335.5854, "step": 9420 }, { "epoch": 0.18136272063313474, "grad_norm": 740.7605188517435, "learning_rate": 1.8557539475712538e-05, "loss": 338.9682, "step": 9430 }, { "epoch": 0.1815550458936153, "grad_norm": 563.4029680194013, "learning_rate": 1.855438021983556e-05, "loss": 317.2997, "step": 9440 }, { "epoch": 0.1817473711540958, "grad_norm": 612.5074396086102, "learning_rate": 1.8551217777593233e-05, "loss": 318.5311, "step": 9450 }, { "epoch": 0.18193969641457633, "grad_norm": 597.0074091244863, "learning_rate": 1.8548052150163514e-05, "loss": 314.7106, "step": 9460 }, { "epoch": 0.18213202167505685, "grad_norm": 554.7269004078719, "learning_rate": 1.8544883338725544e-05, "loss": 306.6892, "step": 9470 }, { "epoch": 0.18232434693553737, "grad_norm": 577.8852459584037, "learning_rate": 1.8541711344459652e-05, "loss": 317.9633, "step": 9480 }, { "epoch": 0.18251667219601792, "grad_norm": 668.2529372949699, "learning_rate": 1.8538536168547353e-05, "loss": 326.373, "step": 9490 }, { "epoch": 0.18270899745649843, "grad_norm": 706.4714323223383, "learning_rate": 1.8535357812171356e-05, "loss": 330.0399, "step": 9500 }, { "epoch": 0.18290132271697895, "grad_norm": 630.4043554538246, "learning_rate": 1.8532176276515538e-05, "loss": 316.1966, "step": 9510 }, { "epoch": 0.18309364797745947, "grad_norm": 606.5355772061898, "learning_rate": 1.8528991562764967e-05, "loss": 318.0851, "step": 9520 }, { "epoch": 0.18328597323794, "grad_norm": 552.573228905615, "learning_rate": 1.85258036721059e-05, "loss": 313.2862, "step": 9530 }, { "epoch": 0.18347829849842054, "grad_norm": 544.2464163634174, "learning_rate": 1.8522612605725777e-05, "loss": 323.5065, "step": 9540 }, { "epoch": 0.18367062375890106, "grad_norm": 628.0910797173485, "learning_rate": 1.8519418364813215e-05, "loss": 318.409, "step": 9550 }, { "epoch": 0.18386294901938158, "grad_norm": 595.5542255882365, "learning_rate": 1.851622095055801e-05, "loss": 316.1256, "step": 9560 }, { "epoch": 0.1840552742798621, "grad_norm": 568.4977141118377, "learning_rate": 1.8513020364151155e-05, "loss": 317.5389, "step": 9570 }, { "epoch": 0.18424759954034262, "grad_norm": 690.7197119756217, "learning_rate": 1.850981660678481e-05, "loss": 315.9089, "step": 9580 }, { "epoch": 0.18443992480082316, "grad_norm": 617.4316572779147, "learning_rate": 1.8506609679652323e-05, "loss": 313.6196, "step": 9590 }, { "epoch": 0.18463225006130368, "grad_norm": 615.4863352262362, "learning_rate": 1.8503399583948224e-05, "loss": 325.9829, "step": 9600 }, { "epoch": 0.1848245753217842, "grad_norm": 560.5758724132193, "learning_rate": 1.8500186320868215e-05, "loss": 308.7002, "step": 9610 }, { "epoch": 0.18501690058226472, "grad_norm": 567.0365575400731, "learning_rate": 1.8496969891609186e-05, "loss": 329.2433, "step": 9620 }, { "epoch": 0.18520922584274524, "grad_norm": 568.0367621336482, "learning_rate": 1.8493750297369208e-05, "loss": 325.427, "step": 9630 }, { "epoch": 0.1854015511032258, "grad_norm": 550.4794735105326, "learning_rate": 1.849052753934752e-05, "loss": 326.7041, "step": 9640 }, { "epoch": 0.1855938763637063, "grad_norm": 569.5641753096411, "learning_rate": 1.8487301618744552e-05, "loss": 328.1982, "step": 9650 }, { "epoch": 0.18578620162418683, "grad_norm": 569.9761034507699, "learning_rate": 1.84840725367619e-05, "loss": 310.536, "step": 9660 }, { "epoch": 0.18597852688466734, "grad_norm": 601.6190225257928, "learning_rate": 1.8480840294602352e-05, "loss": 314.3488, "step": 9670 }, { "epoch": 0.18617085214514786, "grad_norm": 618.2553619325734, "learning_rate": 1.8477604893469857e-05, "loss": 327.1187, "step": 9680 }, { "epoch": 0.1863631774056284, "grad_norm": 752.9474167793641, "learning_rate": 1.847436633456955e-05, "loss": 325.2218, "step": 9690 }, { "epoch": 0.18655550266610893, "grad_norm": 603.0636498303711, "learning_rate": 1.8471124619107744e-05, "loss": 311.5493, "step": 9700 }, { "epoch": 0.18674782792658945, "grad_norm": 556.6395524569551, "learning_rate": 1.846787974829192e-05, "loss": 313.517, "step": 9710 }, { "epoch": 0.18694015318706997, "grad_norm": 565.1641454765296, "learning_rate": 1.8464631723330745e-05, "loss": 318.9753, "step": 9720 }, { "epoch": 0.1871324784475505, "grad_norm": 545.5890638241866, "learning_rate": 1.8461380545434054e-05, "loss": 306.4056, "step": 9730 }, { "epoch": 0.18732480370803103, "grad_norm": 537.9031786708614, "learning_rate": 1.8458126215812848e-05, "loss": 309.4228, "step": 9740 }, { "epoch": 0.18751712896851155, "grad_norm": 544.290504443795, "learning_rate": 1.845486873567932e-05, "loss": 313.062, "step": 9750 }, { "epoch": 0.18770945422899207, "grad_norm": 530.1043127865822, "learning_rate": 1.8451608106246822e-05, "loss": 309.1896, "step": 9760 }, { "epoch": 0.1879017794894726, "grad_norm": 1974.8696274493948, "learning_rate": 1.8448344328729893e-05, "loss": 323.1416, "step": 9770 }, { "epoch": 0.1880941047499531, "grad_norm": 587.2253624359105, "learning_rate": 1.8445077404344226e-05, "loss": 309.5772, "step": 9780 }, { "epoch": 0.18828643001043366, "grad_norm": 609.2034365251025, "learning_rate": 1.8441807334306702e-05, "loss": 313.9833, "step": 9790 }, { "epoch": 0.18847875527091418, "grad_norm": 536.3689693630588, "learning_rate": 1.8438534119835365e-05, "loss": 308.4715, "step": 9800 }, { "epoch": 0.1886710805313947, "grad_norm": 564.0070248860729, "learning_rate": 1.8435257762149436e-05, "loss": 323.2902, "step": 9810 }, { "epoch": 0.18886340579187522, "grad_norm": 541.1094813955598, "learning_rate": 1.8431978262469305e-05, "loss": 324.6447, "step": 9820 }, { "epoch": 0.18905573105235574, "grad_norm": 574.3562023468872, "learning_rate": 1.8428695622016532e-05, "loss": 310.8277, "step": 9830 }, { "epoch": 0.18924805631283628, "grad_norm": 573.6950744377439, "learning_rate": 1.8425409842013843e-05, "loss": 322.8399, "step": 9840 }, { "epoch": 0.1894403815733168, "grad_norm": 524.8989840894691, "learning_rate": 1.8422120923685135e-05, "loss": 307.1694, "step": 9850 }, { "epoch": 0.18963270683379732, "grad_norm": 571.3089044268454, "learning_rate": 1.8418828868255484e-05, "loss": 312.775, "step": 9860 }, { "epoch": 0.18982503209427784, "grad_norm": 553.3508873042184, "learning_rate": 1.8415533676951117e-05, "loss": 321.306, "step": 9870 }, { "epoch": 0.19001735735475836, "grad_norm": 565.7110978394054, "learning_rate": 1.8412235350999444e-05, "loss": 324.4741, "step": 9880 }, { "epoch": 0.19020968261523888, "grad_norm": 621.1426134637459, "learning_rate": 1.840893389162903e-05, "loss": 346.3106, "step": 9890 }, { "epoch": 0.19040200787571943, "grad_norm": 535.0733987249391, "learning_rate": 1.8405629300069626e-05, "loss": 320.5854, "step": 9900 }, { "epoch": 0.19059433313619994, "grad_norm": 670.4813425818097, "learning_rate": 1.840232157755213e-05, "loss": 320.277, "step": 9910 }, { "epoch": 0.19078665839668046, "grad_norm": 617.809155563652, "learning_rate": 1.8399010725308616e-05, "loss": 311.902, "step": 9920 }, { "epoch": 0.19097898365716098, "grad_norm": 568.6148701158531, "learning_rate": 1.839569674457232e-05, "loss": 318.7672, "step": 9930 }, { "epoch": 0.1911713089176415, "grad_norm": 569.9689571022637, "learning_rate": 1.8392379636577647e-05, "loss": 328.9152, "step": 9940 }, { "epoch": 0.19136363417812205, "grad_norm": 574.9266939103386, "learning_rate": 1.8389059402560165e-05, "loss": 318.1828, "step": 9950 }, { "epoch": 0.19155595943860257, "grad_norm": 624.1369244831499, "learning_rate": 1.8385736043756605e-05, "loss": 313.1912, "step": 9960 }, { "epoch": 0.1917482846990831, "grad_norm": 556.790792342805, "learning_rate": 1.838240956140486e-05, "loss": 319.589, "step": 9970 }, { "epoch": 0.1919406099595636, "grad_norm": 565.7194728615357, "learning_rate": 1.8379079956743996e-05, "loss": 327.9372, "step": 9980 }, { "epoch": 0.19213293522004413, "grad_norm": 557.3348851818414, "learning_rate": 1.8375747231014233e-05, "loss": 316.3132, "step": 9990 }, { "epoch": 0.19232526048052467, "grad_norm": 644.8743942262005, "learning_rate": 1.8372411385456956e-05, "loss": 317.5455, "step": 10000 }, { "epoch": 0.1925175857410052, "grad_norm": 611.0130672157912, "learning_rate": 1.8369072421314717e-05, "loss": 311.7699, "step": 10010 }, { "epoch": 0.1927099110014857, "grad_norm": 643.2944485951936, "learning_rate": 1.8365730339831212e-05, "loss": 310.4235, "step": 10020 }, { "epoch": 0.19290223626196623, "grad_norm": 620.125833263131, "learning_rate": 1.8362385142251328e-05, "loss": 317.8711, "step": 10030 }, { "epoch": 0.19309456152244675, "grad_norm": 642.5480738830829, "learning_rate": 1.8359036829821085e-05, "loss": 311.2124, "step": 10040 }, { "epoch": 0.1932868867829273, "grad_norm": 542.278779870074, "learning_rate": 1.8355685403787677e-05, "loss": 308.168, "step": 10050 }, { "epoch": 0.19347921204340782, "grad_norm": 570.9987527543067, "learning_rate": 1.8352330865399457e-05, "loss": 307.4103, "step": 10060 }, { "epoch": 0.19367153730388834, "grad_norm": 588.362824919098, "learning_rate": 1.834897321590593e-05, "loss": 311.6161, "step": 10070 }, { "epoch": 0.19386386256436886, "grad_norm": 555.9423039717024, "learning_rate": 1.8345612456557767e-05, "loss": 303.3604, "step": 10080 }, { "epoch": 0.19405618782484937, "grad_norm": 620.6001646296406, "learning_rate": 1.83422485886068e-05, "loss": 308.114, "step": 10090 }, { "epoch": 0.19424851308532992, "grad_norm": 599.7921359122605, "learning_rate": 1.833888161330601e-05, "loss": 325.1521, "step": 10100 }, { "epoch": 0.19444083834581044, "grad_norm": 511.20343896387277, "learning_rate": 1.833551153190954e-05, "loss": 309.2157, "step": 10110 }, { "epoch": 0.19463316360629096, "grad_norm": 516.257174776799, "learning_rate": 1.8332138345672686e-05, "loss": 309.4907, "step": 10120 }, { "epoch": 0.19482548886677148, "grad_norm": 563.1608572740786, "learning_rate": 1.832876205585191e-05, "loss": 316.6437, "step": 10130 }, { "epoch": 0.195017814127252, "grad_norm": 553.3475752491817, "learning_rate": 1.8325382663704826e-05, "loss": 312.1191, "step": 10140 }, { "epoch": 0.19521013938773255, "grad_norm": 531.5864215766379, "learning_rate": 1.8322000170490194e-05, "loss": 306.8882, "step": 10150 }, { "epoch": 0.19540246464821306, "grad_norm": 598.789798633388, "learning_rate": 1.831861457746794e-05, "loss": 313.6952, "step": 10160 }, { "epoch": 0.19559478990869358, "grad_norm": 555.5193570635953, "learning_rate": 1.8315225885899144e-05, "loss": 317.2797, "step": 10170 }, { "epoch": 0.1957871151691741, "grad_norm": 554.3786845937254, "learning_rate": 1.8311834097046038e-05, "loss": 308.5955, "step": 10180 }, { "epoch": 0.19597944042965462, "grad_norm": 667.1400002145294, "learning_rate": 1.8308439212172e-05, "loss": 318.206, "step": 10190 }, { "epoch": 0.19617176569013517, "grad_norm": 563.4817826452128, "learning_rate": 1.830504123254158e-05, "loss": 323.7995, "step": 10200 }, { "epoch": 0.1963640909506157, "grad_norm": 510.24096084298566, "learning_rate": 1.830164015942046e-05, "loss": 315.2723, "step": 10210 }, { "epoch": 0.1965564162110962, "grad_norm": 897.7329135630494, "learning_rate": 1.8298235994075488e-05, "loss": 307.7924, "step": 10220 }, { "epoch": 0.19674874147157673, "grad_norm": 569.6326868493514, "learning_rate": 1.829482873777466e-05, "loss": 322.1561, "step": 10230 }, { "epoch": 0.19694106673205725, "grad_norm": 14005.331331945481, "learning_rate": 1.8291418391787116e-05, "loss": 336.6291, "step": 10240 }, { "epoch": 0.1971333919925378, "grad_norm": 580.7591541191108, "learning_rate": 1.8288004957383162e-05, "loss": 313.3958, "step": 10250 }, { "epoch": 0.1973257172530183, "grad_norm": 589.4898875747193, "learning_rate": 1.8284588435834242e-05, "loss": 314.5349, "step": 10260 }, { "epoch": 0.19751804251349883, "grad_norm": 596.9254042414015, "learning_rate": 1.828116882841295e-05, "loss": 316.0492, "step": 10270 }, { "epoch": 0.19771036777397935, "grad_norm": 627.1956505220512, "learning_rate": 1.8277746136393042e-05, "loss": 323.5244, "step": 10280 }, { "epoch": 0.19790269303445987, "grad_norm": 605.5364380774679, "learning_rate": 1.827432036104941e-05, "loss": 312.2124, "step": 10290 }, { "epoch": 0.19809501829494042, "grad_norm": 554.8727958777251, "learning_rate": 1.8270891503658096e-05, "loss": 311.6395, "step": 10300 }, { "epoch": 0.19828734355542094, "grad_norm": 650.6178601497293, "learning_rate": 1.8267459565496298e-05, "loss": 317.9526, "step": 10310 }, { "epoch": 0.19847966881590146, "grad_norm": 596.3615622397797, "learning_rate": 1.8264024547842346e-05, "loss": 313.5765, "step": 10320 }, { "epoch": 0.19867199407638197, "grad_norm": 556.4179671335443, "learning_rate": 1.8260586451975745e-05, "loss": 307.384, "step": 10330 }, { "epoch": 0.1988643193368625, "grad_norm": 630.927531284662, "learning_rate": 1.825714527917711e-05, "loss": 321.1718, "step": 10340 }, { "epoch": 0.199056644597343, "grad_norm": 589.9843909541497, "learning_rate": 1.8253701030728235e-05, "loss": 316.1164, "step": 10350 }, { "epoch": 0.19924896985782356, "grad_norm": 606.7908564946719, "learning_rate": 1.8250253707912036e-05, "loss": 317.7305, "step": 10360 }, { "epoch": 0.19944129511830408, "grad_norm": 545.2147663391177, "learning_rate": 1.8246803312012593e-05, "loss": 303.6862, "step": 10370 }, { "epoch": 0.1996336203787846, "grad_norm": 541.3311699591419, "learning_rate": 1.8243349844315116e-05, "loss": 304.131, "step": 10380 }, { "epoch": 0.19982594563926512, "grad_norm": 747.9351319944042, "learning_rate": 1.8239893306105966e-05, "loss": 305.2509, "step": 10390 }, { "epoch": 0.20001827089974564, "grad_norm": 609.5978708681915, "learning_rate": 1.823643369867264e-05, "loss": 308.1789, "step": 10400 }, { "epoch": 0.20021059616022618, "grad_norm": 3345.2680882620302, "learning_rate": 1.8232971023303798e-05, "loss": 303.1881, "step": 10410 }, { "epoch": 0.2004029214207067, "grad_norm": 685.981103328202, "learning_rate": 1.8229505281289216e-05, "loss": 320.6667, "step": 10420 }, { "epoch": 0.20059524668118722, "grad_norm": 527.8690783067844, "learning_rate": 1.8226036473919836e-05, "loss": 302.881, "step": 10430 }, { "epoch": 0.20078757194166774, "grad_norm": 554.5850449966307, "learning_rate": 1.8222564602487724e-05, "loss": 311.9064, "step": 10440 }, { "epoch": 0.20097989720214826, "grad_norm": 588.4499184429349, "learning_rate": 1.82190896682861e-05, "loss": 309.5534, "step": 10450 }, { "epoch": 0.2011722224626288, "grad_norm": 597.2463973919886, "learning_rate": 1.8215611672609316e-05, "loss": 298.9231, "step": 10460 }, { "epoch": 0.20136454772310933, "grad_norm": 594.1158512309214, "learning_rate": 1.821213061675287e-05, "loss": 308.9411, "step": 10470 }, { "epoch": 0.20155687298358985, "grad_norm": 589.4478962209655, "learning_rate": 1.8208646502013395e-05, "loss": 309.6522, "step": 10480 }, { "epoch": 0.20174919824407037, "grad_norm": 564.6156159595868, "learning_rate": 1.820515932968867e-05, "loss": 310.7461, "step": 10490 }, { "epoch": 0.20194152350455088, "grad_norm": 609.3176191883252, "learning_rate": 1.8201669101077608e-05, "loss": 310.0776, "step": 10500 }, { "epoch": 0.20213384876503143, "grad_norm": 529.8941187328708, "learning_rate": 1.819817581748026e-05, "loss": 324.803, "step": 10510 }, { "epoch": 0.20232617402551195, "grad_norm": 591.0983066934331, "learning_rate": 1.8194679480197817e-05, "loss": 308.572, "step": 10520 }, { "epoch": 0.20251849928599247, "grad_norm": 563.2375949857469, "learning_rate": 1.8191180090532608e-05, "loss": 313.5502, "step": 10530 }, { "epoch": 0.202710824546473, "grad_norm": 521.668327254525, "learning_rate": 1.8187677649788097e-05, "loss": 305.6655, "step": 10540 }, { "epoch": 0.2029031498069535, "grad_norm": 538.8431736144586, "learning_rate": 1.8184172159268884e-05, "loss": 314.5525, "step": 10550 }, { "epoch": 0.20309547506743406, "grad_norm": 532.0437684267256, "learning_rate": 1.818066362028071e-05, "loss": 312.4004, "step": 10560 }, { "epoch": 0.20328780032791458, "grad_norm": 571.2943931355435, "learning_rate": 1.8177152034130442e-05, "loss": 321.7054, "step": 10570 }, { "epoch": 0.2034801255883951, "grad_norm": 527.4672384896986, "learning_rate": 1.8173637402126093e-05, "loss": 322.5302, "step": 10580 }, { "epoch": 0.2036724508488756, "grad_norm": 555.3133093459373, "learning_rate": 1.8170119725576808e-05, "loss": 313.6648, "step": 10590 }, { "epoch": 0.20386477610935613, "grad_norm": 550.6922694561971, "learning_rate": 1.8166599005792852e-05, "loss": 310.2605, "step": 10600 }, { "epoch": 0.20405710136983668, "grad_norm": 574.4122805106684, "learning_rate": 1.816307524408565e-05, "loss": 305.6629, "step": 10610 }, { "epoch": 0.2042494266303172, "grad_norm": 572.2872382916447, "learning_rate": 1.8159548441767732e-05, "loss": 308.1092, "step": 10620 }, { "epoch": 0.20444175189079772, "grad_norm": 494.4107401724572, "learning_rate": 1.8156018600152777e-05, "loss": 310.6198, "step": 10630 }, { "epoch": 0.20463407715127824, "grad_norm": 593.3457969007505, "learning_rate": 1.81524857205556e-05, "loss": 315.9913, "step": 10640 }, { "epoch": 0.20482640241175876, "grad_norm": 596.9842940297239, "learning_rate": 1.814894980429213e-05, "loss": 327.6448, "step": 10650 }, { "epoch": 0.2050187276722393, "grad_norm": 620.556813154289, "learning_rate": 1.8145410852679447e-05, "loss": 313.397, "step": 10660 }, { "epoch": 0.20521105293271982, "grad_norm": 545.4983018426418, "learning_rate": 1.8141868867035745e-05, "loss": 307.4965, "step": 10670 }, { "epoch": 0.20540337819320034, "grad_norm": 517.2041095178209, "learning_rate": 1.8138323848680354e-05, "loss": 306.488, "step": 10680 }, { "epoch": 0.20559570345368086, "grad_norm": 542.41660542042, "learning_rate": 1.813477579893374e-05, "loss": 308.95, "step": 10690 }, { "epoch": 0.20578802871416138, "grad_norm": 623.2558012166824, "learning_rate": 1.8131224719117497e-05, "loss": 309.7012, "step": 10700 }, { "epoch": 0.20598035397464193, "grad_norm": 568.6246804880005, "learning_rate": 1.8127670610554332e-05, "loss": 310.0226, "step": 10710 }, { "epoch": 0.20617267923512245, "grad_norm": 583.3577234354464, "learning_rate": 1.81241134745681e-05, "loss": 306.5802, "step": 10720 }, { "epoch": 0.20636500449560297, "grad_norm": 665.236530462498, "learning_rate": 1.812055331248377e-05, "loss": 315.9872, "step": 10730 }, { "epoch": 0.20655732975608349, "grad_norm": 548.2231390472602, "learning_rate": 1.811699012562745e-05, "loss": 310.2577, "step": 10740 }, { "epoch": 0.206749655016564, "grad_norm": 600.6657202388725, "learning_rate": 1.8113423915326362e-05, "loss": 320.0417, "step": 10750 }, { "epoch": 0.20694198027704455, "grad_norm": 596.3607001974965, "learning_rate": 1.8109854682908864e-05, "loss": 319.0931, "step": 10760 }, { "epoch": 0.20713430553752507, "grad_norm": 575.9230733072659, "learning_rate": 1.8106282429704436e-05, "loss": 312.4939, "step": 10770 }, { "epoch": 0.2073266307980056, "grad_norm": 591.2000295821231, "learning_rate": 1.810270715704368e-05, "loss": 319.2311, "step": 10780 }, { "epoch": 0.2075189560584861, "grad_norm": 587.3434212700096, "learning_rate": 1.809912886625833e-05, "loss": 310.2359, "step": 10790 }, { "epoch": 0.20771128131896663, "grad_norm": 559.4690945914582, "learning_rate": 1.8095547558681243e-05, "loss": 323.4319, "step": 10800 }, { "epoch": 0.20790360657944715, "grad_norm": 547.0120576898984, "learning_rate": 1.809196323564639e-05, "loss": 320.4141, "step": 10810 }, { "epoch": 0.2080959318399277, "grad_norm": 558.1889120357515, "learning_rate": 1.8088375898488873e-05, "loss": 304.7489, "step": 10820 }, { "epoch": 0.20828825710040821, "grad_norm": 556.2533478506813, "learning_rate": 1.808478554854492e-05, "loss": 313.6703, "step": 10830 }, { "epoch": 0.20848058236088873, "grad_norm": 563.1230411656593, "learning_rate": 1.8081192187151873e-05, "loss": 309.7366, "step": 10840 }, { "epoch": 0.20867290762136925, "grad_norm": 591.210113740253, "learning_rate": 1.8077595815648202e-05, "loss": 304.9538, "step": 10850 }, { "epoch": 0.20886523288184977, "grad_norm": 10459.65265964558, "learning_rate": 1.8073996435373494e-05, "loss": 315.4726, "step": 10860 }, { "epoch": 0.20905755814233032, "grad_norm": 582.7877514749445, "learning_rate": 1.8070394047668466e-05, "loss": 310.0286, "step": 10870 }, { "epoch": 0.20924988340281084, "grad_norm": 524.5587946163101, "learning_rate": 1.8066788653874936e-05, "loss": 299.4925, "step": 10880 }, { "epoch": 0.20944220866329136, "grad_norm": 593.4216247321357, "learning_rate": 1.806318025533586e-05, "loss": 310.3626, "step": 10890 }, { "epoch": 0.20963453392377188, "grad_norm": 567.0255107174645, "learning_rate": 1.805956885339531e-05, "loss": 319.6213, "step": 10900 }, { "epoch": 0.2098268591842524, "grad_norm": 668.0588558218071, "learning_rate": 1.8055954449398472e-05, "loss": 320.125, "step": 10910 }, { "epoch": 0.21001918444473294, "grad_norm": 608.1719502025485, "learning_rate": 1.8052337044691648e-05, "loss": 306.7742, "step": 10920 }, { "epoch": 0.21021150970521346, "grad_norm": 547.1221358290404, "learning_rate": 1.8048716640622262e-05, "loss": 315.7775, "step": 10930 }, { "epoch": 0.21040383496569398, "grad_norm": 561.1660562013045, "learning_rate": 1.8045093238538856e-05, "loss": 318.5856, "step": 10940 }, { "epoch": 0.2105961602261745, "grad_norm": 521.8198888881208, "learning_rate": 1.8041466839791087e-05, "loss": 308.4359, "step": 10950 }, { "epoch": 0.21078848548665502, "grad_norm": 569.5603157795923, "learning_rate": 1.8037837445729733e-05, "loss": 309.5919, "step": 10960 }, { "epoch": 0.21098081074713557, "grad_norm": 735.8905510143819, "learning_rate": 1.803420505770668e-05, "loss": 305.6322, "step": 10970 }, { "epoch": 0.21117313600761609, "grad_norm": 558.9935899233416, "learning_rate": 1.803056967707493e-05, "loss": 306.0594, "step": 10980 }, { "epoch": 0.2113654612680966, "grad_norm": 560.0566283194227, "learning_rate": 1.8026931305188603e-05, "loss": 315.5964, "step": 10990 }, { "epoch": 0.21155778652857712, "grad_norm": 565.3773377963382, "learning_rate": 1.802328994340294e-05, "loss": 314.1717, "step": 11000 }, { "epoch": 0.21175011178905764, "grad_norm": 657.1041754965652, "learning_rate": 1.8019645593074275e-05, "loss": 309.5446, "step": 11010 }, { "epoch": 0.2119424370495382, "grad_norm": 6572.125609919242, "learning_rate": 1.8015998255560082e-05, "loss": 301.9791, "step": 11020 }, { "epoch": 0.2121347623100187, "grad_norm": 520.7740018235891, "learning_rate": 1.801234793221892e-05, "loss": 302.8429, "step": 11030 }, { "epoch": 0.21232708757049923, "grad_norm": 521.0593018524652, "learning_rate": 1.800869462441049e-05, "loss": 291.2706, "step": 11040 }, { "epoch": 0.21251941283097975, "grad_norm": 588.3576071221225, "learning_rate": 1.8005038333495572e-05, "loss": 311.5564, "step": 11050 }, { "epoch": 0.21271173809146027, "grad_norm": 569.3722260088634, "learning_rate": 1.8001379060836088e-05, "loss": 310.9369, "step": 11060 }, { "epoch": 0.21290406335194081, "grad_norm": 522.31194426954, "learning_rate": 1.7997716807795046e-05, "loss": 305.1383, "step": 11070 }, { "epoch": 0.21309638861242133, "grad_norm": 630.9407644759252, "learning_rate": 1.7994051575736585e-05, "loss": 303.0762, "step": 11080 }, { "epoch": 0.21328871387290185, "grad_norm": 577.9988210606358, "learning_rate": 1.7990383366025935e-05, "loss": 313.8966, "step": 11090 }, { "epoch": 0.21348103913338237, "grad_norm": 596.8309543571359, "learning_rate": 1.7986712180029448e-05, "loss": 302.5571, "step": 11100 }, { "epoch": 0.2136733643938629, "grad_norm": 608.5128534922504, "learning_rate": 1.798303801911458e-05, "loss": 307.915, "step": 11110 }, { "epoch": 0.21386568965434344, "grad_norm": 507.2044641262266, "learning_rate": 1.7979360884649894e-05, "loss": 305.7907, "step": 11120 }, { "epoch": 0.21405801491482396, "grad_norm": 618.3844228500747, "learning_rate": 1.7975680778005058e-05, "loss": 303.9178, "step": 11130 }, { "epoch": 0.21425034017530448, "grad_norm": 531.483007694421, "learning_rate": 1.7971997700550856e-05, "loss": 311.8147, "step": 11140 }, { "epoch": 0.214442665435785, "grad_norm": 522.6184963030615, "learning_rate": 1.7968311653659177e-05, "loss": 312.576, "step": 11150 }, { "epoch": 0.21463499069626552, "grad_norm": 576.1439487221288, "learning_rate": 1.7964622638703003e-05, "loss": 314.4616, "step": 11160 }, { "epoch": 0.21482731595674606, "grad_norm": 658.0935489057877, "learning_rate": 1.796093065705644e-05, "loss": 303.3928, "step": 11170 }, { "epoch": 0.21501964121722658, "grad_norm": 541.920868642979, "learning_rate": 1.7957235710094686e-05, "loss": 296.9919, "step": 11180 }, { "epoch": 0.2152119664777071, "grad_norm": 537.1510752699605, "learning_rate": 1.7953537799194042e-05, "loss": 309.1538, "step": 11190 }, { "epoch": 0.21540429173818762, "grad_norm": 542.4923321336655, "learning_rate": 1.7949836925731934e-05, "loss": 319.2759, "step": 11200 }, { "epoch": 0.21559661699866814, "grad_norm": 534.3221139524255, "learning_rate": 1.7946133091086858e-05, "loss": 312.7573, "step": 11210 }, { "epoch": 0.21578894225914869, "grad_norm": 531.0779717398163, "learning_rate": 1.7942426296638447e-05, "loss": 311.4548, "step": 11220 }, { "epoch": 0.2159812675196292, "grad_norm": 580.0357496194683, "learning_rate": 1.7938716543767412e-05, "loss": 306.2069, "step": 11230 }, { "epoch": 0.21617359278010972, "grad_norm": 663.0224193800212, "learning_rate": 1.7935003833855576e-05, "loss": 314.1557, "step": 11240 }, { "epoch": 0.21636591804059024, "grad_norm": 596.3035535643022, "learning_rate": 1.7931288168285863e-05, "loss": 309.4287, "step": 11250 }, { "epoch": 0.21655824330107076, "grad_norm": 604.8046540891174, "learning_rate": 1.79275695484423e-05, "loss": 306.127, "step": 11260 }, { "epoch": 0.21675056856155128, "grad_norm": 683.3040649358255, "learning_rate": 1.7923847975710003e-05, "loss": 312.069, "step": 11270 }, { "epoch": 0.21694289382203183, "grad_norm": 580.2089930819467, "learning_rate": 1.7920123451475203e-05, "loss": 317.7584, "step": 11280 }, { "epoch": 0.21713521908251235, "grad_norm": 715.5375686891132, "learning_rate": 1.7916395977125227e-05, "loss": 301.8664, "step": 11290 }, { "epoch": 0.21732754434299287, "grad_norm": 598.8332989322347, "learning_rate": 1.7912665554048486e-05, "loss": 311.0309, "step": 11300 }, { "epoch": 0.2175198696034734, "grad_norm": 521.3338148124917, "learning_rate": 1.7908932183634515e-05, "loss": 305.5654, "step": 11310 }, { "epoch": 0.2177121948639539, "grad_norm": 544.6096900019014, "learning_rate": 1.790519586727392e-05, "loss": 316.0747, "step": 11320 }, { "epoch": 0.21790452012443445, "grad_norm": 594.5251674337779, "learning_rate": 1.790145660635843e-05, "loss": 309.1104, "step": 11330 }, { "epoch": 0.21809684538491497, "grad_norm": 523.6243561827803, "learning_rate": 1.7897714402280844e-05, "loss": 310.518, "step": 11340 }, { "epoch": 0.2182891706453955, "grad_norm": 596.961000222273, "learning_rate": 1.789396925643508e-05, "loss": 322.9565, "step": 11350 }, { "epoch": 0.218481495905876, "grad_norm": 583.2129825484865, "learning_rate": 1.7890221170216144e-05, "loss": 305.7768, "step": 11360 }, { "epoch": 0.21867382116635653, "grad_norm": 565.6471703498117, "learning_rate": 1.788647014502013e-05, "loss": 302.2495, "step": 11370 }, { "epoch": 0.21886614642683708, "grad_norm": 547.5601541733321, "learning_rate": 1.7882716182244242e-05, "loss": 300.4357, "step": 11380 }, { "epoch": 0.2190584716873176, "grad_norm": 549.4726268170266, "learning_rate": 1.7878959283286758e-05, "loss": 305.4491, "step": 11390 }, { "epoch": 0.21925079694779812, "grad_norm": 559.2964976846993, "learning_rate": 1.787519944954707e-05, "loss": 302.4354, "step": 11400 }, { "epoch": 0.21944312220827863, "grad_norm": 611.1297539064526, "learning_rate": 1.7871436682425645e-05, "loss": 304.8266, "step": 11410 }, { "epoch": 0.21963544746875915, "grad_norm": 1160.4401872213225, "learning_rate": 1.786767098332406e-05, "loss": 311.4399, "step": 11420 }, { "epoch": 0.2198277727292397, "grad_norm": 580.2492717341304, "learning_rate": 1.7863902353644972e-05, "loss": 296.7177, "step": 11430 }, { "epoch": 0.22002009798972022, "grad_norm": 515.5698565524134, "learning_rate": 1.7860130794792137e-05, "loss": 304.8289, "step": 11440 }, { "epoch": 0.22021242325020074, "grad_norm": 648.2413118586277, "learning_rate": 1.7856356308170394e-05, "loss": 312.8327, "step": 11450 }, { "epoch": 0.22040474851068126, "grad_norm": 537.3760888697842, "learning_rate": 1.7852578895185675e-05, "loss": 318.3744, "step": 11460 }, { "epoch": 0.22059707377116178, "grad_norm": 780.1642022244691, "learning_rate": 1.7848798557245008e-05, "loss": 312.9608, "step": 11470 }, { "epoch": 0.22078939903164232, "grad_norm": 568.0645992415976, "learning_rate": 1.7845015295756506e-05, "loss": 315.363, "step": 11480 }, { "epoch": 0.22098172429212284, "grad_norm": 542.4850324846815, "learning_rate": 1.784122911212937e-05, "loss": 306.4943, "step": 11490 }, { "epoch": 0.22117404955260336, "grad_norm": 570.1950611492051, "learning_rate": 1.7837440007773895e-05, "loss": 301.757, "step": 11500 }, { "epoch": 0.22136637481308388, "grad_norm": 534.0707811987077, "learning_rate": 1.783364798410146e-05, "loss": 303.0547, "step": 11510 }, { "epoch": 0.2215587000735644, "grad_norm": 581.8037279006699, "learning_rate": 1.782985304252452e-05, "loss": 305.9409, "step": 11520 }, { "epoch": 0.22175102533404495, "grad_norm": 612.2218836941645, "learning_rate": 1.7826055184456643e-05, "loss": 312.1448, "step": 11530 }, { "epoch": 0.22194335059452547, "grad_norm": 585.198505690767, "learning_rate": 1.7822254411312455e-05, "loss": 310.4949, "step": 11540 }, { "epoch": 0.222135675855006, "grad_norm": 509.0699217340508, "learning_rate": 1.781845072450769e-05, "loss": 302.3834, "step": 11550 }, { "epoch": 0.2223280011154865, "grad_norm": 1289.932570834128, "learning_rate": 1.7814644125459157e-05, "loss": 322.5668, "step": 11560 }, { "epoch": 0.22252032637596703, "grad_norm": 572.8635606759849, "learning_rate": 1.781083461558475e-05, "loss": 305.3428, "step": 11570 }, { "epoch": 0.22271265163644757, "grad_norm": 590.3681787819465, "learning_rate": 1.7807022196303447e-05, "loss": 308.0463, "step": 11580 }, { "epoch": 0.2229049768969281, "grad_norm": 572.7963502973921, "learning_rate": 1.7803206869035318e-05, "loss": 304.7383, "step": 11590 }, { "epoch": 0.2230973021574086, "grad_norm": 592.1830499673953, "learning_rate": 1.7799388635201498e-05, "loss": 309.5392, "step": 11600 }, { "epoch": 0.22328962741788913, "grad_norm": 618.3588998095855, "learning_rate": 1.7795567496224226e-05, "loss": 305.4437, "step": 11610 }, { "epoch": 0.22348195267836965, "grad_norm": 613.8310598933958, "learning_rate": 1.7791743453526812e-05, "loss": 310.5372, "step": 11620 }, { "epoch": 0.2236742779388502, "grad_norm": 559.7342391082025, "learning_rate": 1.778791650853364e-05, "loss": 306.2013, "step": 11630 }, { "epoch": 0.22386660319933072, "grad_norm": 570.6339732172067, "learning_rate": 1.77840866626702e-05, "loss": 303.6731, "step": 11640 }, { "epoch": 0.22405892845981124, "grad_norm": 608.3936964239032, "learning_rate": 1.7780253917363026e-05, "loss": 307.5745, "step": 11650 }, { "epoch": 0.22425125372029175, "grad_norm": 561.8810491748844, "learning_rate": 1.7776418274039767e-05, "loss": 305.4122, "step": 11660 }, { "epoch": 0.22444357898077227, "grad_norm": 601.7923080450054, "learning_rate": 1.7772579734129136e-05, "loss": 308.8326, "step": 11670 }, { "epoch": 0.2246359042412528, "grad_norm": 510.0679196459565, "learning_rate": 1.776873829906092e-05, "loss": 304.9228, "step": 11680 }, { "epoch": 0.22482822950173334, "grad_norm": 580.1129868599272, "learning_rate": 1.776489397026599e-05, "loss": 319.2673, "step": 11690 }, { "epoch": 0.22502055476221386, "grad_norm": 558.125191843454, "learning_rate": 1.7761046749176302e-05, "loss": 301.482, "step": 11700 }, { "epoch": 0.22521288002269438, "grad_norm": 578.7053744596622, "learning_rate": 1.7757196637224874e-05, "loss": 308.5137, "step": 11710 }, { "epoch": 0.2254052052831749, "grad_norm": 565.3965773509369, "learning_rate": 1.7753343635845817e-05, "loss": 305.2033, "step": 11720 }, { "epoch": 0.22559753054365542, "grad_norm": 601.6063592003611, "learning_rate": 1.7749487746474305e-05, "loss": 304.9521, "step": 11730 }, { "epoch": 0.22578985580413596, "grad_norm": 563.3044930296599, "learning_rate": 1.7745628970546592e-05, "loss": 306.7528, "step": 11740 }, { "epoch": 0.22598218106461648, "grad_norm": 566.3592920601307, "learning_rate": 1.774176730950001e-05, "loss": 293.4409, "step": 11750 }, { "epoch": 0.226174506325097, "grad_norm": 559.484689119316, "learning_rate": 1.7737902764772967e-05, "loss": 313.2567, "step": 11760 }, { "epoch": 0.22636683158557752, "grad_norm": 683.5063044158878, "learning_rate": 1.773403533780494e-05, "loss": 303.2523, "step": 11770 }, { "epoch": 0.22655915684605804, "grad_norm": 549.2141830491124, "learning_rate": 1.7730165030036482e-05, "loss": 302.4703, "step": 11780 }, { "epoch": 0.2267514821065386, "grad_norm": 509.9092349300344, "learning_rate": 1.772629184290922e-05, "loss": 302.844, "step": 11790 }, { "epoch": 0.2269438073670191, "grad_norm": 509.02943504371177, "learning_rate": 1.7722415777865845e-05, "loss": 296.8754, "step": 11800 }, { "epoch": 0.22713613262749963, "grad_norm": 507.21021105769086, "learning_rate": 1.771853683635014e-05, "loss": 303.3082, "step": 11810 }, { "epoch": 0.22732845788798015, "grad_norm": 560.8562615770735, "learning_rate": 1.7714655019806932e-05, "loss": 305.1188, "step": 11820 }, { "epoch": 0.22752078314846066, "grad_norm": 584.0407112283326, "learning_rate": 1.7710770329682145e-05, "loss": 316.5687, "step": 11830 }, { "epoch": 0.2277131084089412, "grad_norm": 559.5412673922286, "learning_rate": 1.770688276742276e-05, "loss": 307.9617, "step": 11840 }, { "epoch": 0.22790543366942173, "grad_norm": 555.3979357564721, "learning_rate": 1.770299233447682e-05, "loss": 306.0619, "step": 11850 }, { "epoch": 0.22809775892990225, "grad_norm": 548.153270289919, "learning_rate": 1.769909903229346e-05, "loss": 299.7813, "step": 11860 }, { "epoch": 0.22829008419038277, "grad_norm": 847.3025142500062, "learning_rate": 1.7695202862322863e-05, "loss": 303.0395, "step": 11870 }, { "epoch": 0.2284824094508633, "grad_norm": 562.1608863876545, "learning_rate": 1.769130382601629e-05, "loss": 310.7528, "step": 11880 }, { "epoch": 0.22867473471134384, "grad_norm": 545.5164611487909, "learning_rate": 1.768740192482607e-05, "loss": 307.0152, "step": 11890 }, { "epoch": 0.22886705997182435, "grad_norm": 562.817471848758, "learning_rate": 1.7683497160205595e-05, "loss": 308.5016, "step": 11900 }, { "epoch": 0.22905938523230487, "grad_norm": 565.3214936806978, "learning_rate": 1.7679589533609323e-05, "loss": 310.3926, "step": 11910 }, { "epoch": 0.2292517104927854, "grad_norm": 521.7265229421238, "learning_rate": 1.767567904649278e-05, "loss": 316.9577, "step": 11920 }, { "epoch": 0.2294440357532659, "grad_norm": 599.3494243960317, "learning_rate": 1.7671765700312567e-05, "loss": 310.6751, "step": 11930 }, { "epoch": 0.22963636101374646, "grad_norm": 527.4853462062857, "learning_rate": 1.7667849496526327e-05, "loss": 299.9745, "step": 11940 }, { "epoch": 0.22982868627422698, "grad_norm": 646.3771999166254, "learning_rate": 1.7663930436592793e-05, "loss": 307.4025, "step": 11950 }, { "epoch": 0.2300210115347075, "grad_norm": 604.8359393697335, "learning_rate": 1.7660008521971744e-05, "loss": 305.6533, "step": 11960 }, { "epoch": 0.23021333679518802, "grad_norm": 550.8517134078223, "learning_rate": 1.765608375412403e-05, "loss": 303.4489, "step": 11970 }, { "epoch": 0.23040566205566854, "grad_norm": 518.0078862451358, "learning_rate": 1.7652156134511563e-05, "loss": 308.9563, "step": 11980 }, { "epoch": 0.23059798731614908, "grad_norm": 532.9641436854245, "learning_rate": 1.7648225664597314e-05, "loss": 310.4812, "step": 11990 }, { "epoch": 0.2307903125766296, "grad_norm": 571.0244842024215, "learning_rate": 1.764429234584532e-05, "loss": 307.4173, "step": 12000 }, { "epoch": 0.23098263783711012, "grad_norm": 501.98426173204206, "learning_rate": 1.7640356179720674e-05, "loss": 290.2313, "step": 12010 }, { "epoch": 0.23117496309759064, "grad_norm": 561.4131530710366, "learning_rate": 1.7636417167689538e-05, "loss": 311.5085, "step": 12020 }, { "epoch": 0.23136728835807116, "grad_norm": 516.0838738035736, "learning_rate": 1.7632475311219125e-05, "loss": 297.6313, "step": 12030 }, { "epoch": 0.2315596136185517, "grad_norm": 521.037984161895, "learning_rate": 1.7628530611777716e-05, "loss": 304.9156, "step": 12040 }, { "epoch": 0.23175193887903223, "grad_norm": 591.7189014436991, "learning_rate": 1.7624583070834646e-05, "loss": 309.6885, "step": 12050 }, { "epoch": 0.23194426413951275, "grad_norm": 569.7181944729994, "learning_rate": 1.7620632689860298e-05, "loss": 310.1962, "step": 12060 }, { "epoch": 0.23213658939999327, "grad_norm": 609.9481837210487, "learning_rate": 1.761667947032614e-05, "loss": 311.4228, "step": 12070 }, { "epoch": 0.23232891466047378, "grad_norm": 656.8367275661511, "learning_rate": 1.761272341370467e-05, "loss": 302.2965, "step": 12080 }, { "epoch": 0.23252123992095433, "grad_norm": 523.6629882282335, "learning_rate": 1.7608764521469456e-05, "loss": 296.982, "step": 12090 }, { "epoch": 0.23271356518143485, "grad_norm": 509.9413106798182, "learning_rate": 1.760480279509512e-05, "loss": 303.0857, "step": 12100 }, { "epoch": 0.23290589044191537, "grad_norm": 500.69792839245997, "learning_rate": 1.7600838236057342e-05, "loss": 305.0519, "step": 12110 }, { "epoch": 0.2330982157023959, "grad_norm": 547.4583321583707, "learning_rate": 1.759687084583285e-05, "loss": 301.2934, "step": 12120 }, { "epoch": 0.2332905409628764, "grad_norm": 591.861994057418, "learning_rate": 1.7592900625899437e-05, "loss": 309.8254, "step": 12130 }, { "epoch": 0.23348286622335693, "grad_norm": 555.3955269832737, "learning_rate": 1.7588927577735943e-05, "loss": 297.1861, "step": 12140 }, { "epoch": 0.23367519148383747, "grad_norm": 579.9502284682283, "learning_rate": 1.758495170282226e-05, "loss": 303.1287, "step": 12150 }, { "epoch": 0.233867516744318, "grad_norm": 525.6276995371792, "learning_rate": 1.7580973002639337e-05, "loss": 308.7534, "step": 12160 }, { "epoch": 0.2340598420047985, "grad_norm": 539.1975318065454, "learning_rate": 1.7576991478669174e-05, "loss": 304.014, "step": 12170 }, { "epoch": 0.23425216726527903, "grad_norm": 510.54652607605846, "learning_rate": 1.7573007132394823e-05, "loss": 304.0979, "step": 12180 }, { "epoch": 0.23444449252575955, "grad_norm": 512.4275361943896, "learning_rate": 1.756901996530039e-05, "loss": 305.9756, "step": 12190 }, { "epoch": 0.2346368177862401, "grad_norm": 544.3164113279283, "learning_rate": 1.7565029978871025e-05, "loss": 308.1389, "step": 12200 }, { "epoch": 0.23482914304672062, "grad_norm": 508.8993668217466, "learning_rate": 1.7561037174592933e-05, "loss": 302.3686, "step": 12210 }, { "epoch": 0.23502146830720114, "grad_norm": 648.9558393513472, "learning_rate": 1.7557041553953368e-05, "loss": 299.2363, "step": 12220 }, { "epoch": 0.23521379356768166, "grad_norm": 530.6570961756034, "learning_rate": 1.7553043118440634e-05, "loss": 302.2812, "step": 12230 }, { "epoch": 0.23540611882816218, "grad_norm": 558.0991365727535, "learning_rate": 1.7549041869544077e-05, "loss": 301.6841, "step": 12240 }, { "epoch": 0.23559844408864272, "grad_norm": 606.5203842871052, "learning_rate": 1.7545037808754105e-05, "loss": 297.3742, "step": 12250 }, { "epoch": 0.23579076934912324, "grad_norm": 583.2151066740137, "learning_rate": 1.754103093756216e-05, "loss": 292.108, "step": 12260 }, { "epoch": 0.23598309460960376, "grad_norm": 480.17492009611, "learning_rate": 1.7537021257460732e-05, "loss": 298.9061, "step": 12270 }, { "epoch": 0.23617541987008428, "grad_norm": 565.2323195979287, "learning_rate": 1.7533008769943366e-05, "loss": 297.8132, "step": 12280 }, { "epoch": 0.2363677451305648, "grad_norm": 537.2276979691027, "learning_rate": 1.7528993476504644e-05, "loss": 300.3809, "step": 12290 }, { "epoch": 0.23656007039104535, "grad_norm": 503.6743938791051, "learning_rate": 1.7524975378640198e-05, "loss": 300.62, "step": 12300 }, { "epoch": 0.23675239565152587, "grad_norm": 515.7159681803287, "learning_rate": 1.75209544778467e-05, "loss": 301.4, "step": 12310 }, { "epoch": 0.23694472091200638, "grad_norm": 618.2664143332121, "learning_rate": 1.7516930775621873e-05, "loss": 316.3602, "step": 12320 }, { "epoch": 0.2371370461724869, "grad_norm": 620.7039598249363, "learning_rate": 1.751290427346448e-05, "loss": 303.4737, "step": 12330 }, { "epoch": 0.23732937143296742, "grad_norm": 482.7419963875554, "learning_rate": 1.7508874972874325e-05, "loss": 306.8661, "step": 12340 }, { "epoch": 0.23752169669344797, "grad_norm": 582.2451716073994, "learning_rate": 1.7504842875352254e-05, "loss": 301.6255, "step": 12350 }, { "epoch": 0.2377140219539285, "grad_norm": 598.501108814587, "learning_rate": 1.7500807982400155e-05, "loss": 301.3222, "step": 12360 }, { "epoch": 0.237906347214409, "grad_norm": 511.4566218165681, "learning_rate": 1.749677029552097e-05, "loss": 298.636, "step": 12370 }, { "epoch": 0.23809867247488953, "grad_norm": 479.60512775342823, "learning_rate": 1.7492729816218656e-05, "loss": 304.15, "step": 12380 }, { "epoch": 0.23829099773537005, "grad_norm": 615.06572835905, "learning_rate": 1.7488686545998237e-05, "loss": 295.4768, "step": 12390 }, { "epoch": 0.2384833229958506, "grad_norm": 569.7022305860479, "learning_rate": 1.7484640486365757e-05, "loss": 301.8458, "step": 12400 }, { "epoch": 0.2386756482563311, "grad_norm": 597.0175513962797, "learning_rate": 1.748059163882831e-05, "loss": 303.6509, "step": 12410 }, { "epoch": 0.23886797351681163, "grad_norm": 544.3996289087911, "learning_rate": 1.747654000489402e-05, "loss": 299.9608, "step": 12420 }, { "epoch": 0.23906029877729215, "grad_norm": 531.5376345597161, "learning_rate": 1.7472485586072062e-05, "loss": 298.2016, "step": 12430 }, { "epoch": 0.23925262403777267, "grad_norm": 619.75704443402, "learning_rate": 1.746842838387264e-05, "loss": 294.9825, "step": 12440 }, { "epoch": 0.23944494929825322, "grad_norm": 714.9424099709997, "learning_rate": 1.746436839980698e-05, "loss": 310.8927, "step": 12450 }, { "epoch": 0.23963727455873374, "grad_norm": 525.0519228337564, "learning_rate": 1.7460305635387372e-05, "loss": 309.1251, "step": 12460 }, { "epoch": 0.23982959981921426, "grad_norm": 567.3426431180301, "learning_rate": 1.745624009212713e-05, "loss": 304.2225, "step": 12470 }, { "epoch": 0.24002192507969478, "grad_norm": 656.0862941060705, "learning_rate": 1.7452171771540593e-05, "loss": 306.3733, "step": 12480 }, { "epoch": 0.2402142503401753, "grad_norm": 594.1934987842768, "learning_rate": 1.744810067514315e-05, "loss": 303.9965, "step": 12490 }, { "epoch": 0.24040657560065584, "grad_norm": 500.73218516629913, "learning_rate": 1.7444026804451214e-05, "loss": 301.8244, "step": 12500 }, { "epoch": 0.24059890086113636, "grad_norm": 744.5292787989708, "learning_rate": 1.7439950160982236e-05, "loss": 300.2846, "step": 12510 }, { "epoch": 0.24079122612161688, "grad_norm": 533.8349242343193, "learning_rate": 1.7435870746254697e-05, "loss": 301.0945, "step": 12520 }, { "epoch": 0.2409835513820974, "grad_norm": 567.8827021058287, "learning_rate": 1.7431788561788116e-05, "loss": 298.9499, "step": 12530 }, { "epoch": 0.24117587664257792, "grad_norm": 549.3287660004423, "learning_rate": 1.742770360910303e-05, "loss": 308.0892, "step": 12540 }, { "epoch": 0.24136820190305847, "grad_norm": 492.5093797278217, "learning_rate": 1.7423615889721027e-05, "loss": 297.5649, "step": 12550 }, { "epoch": 0.24156052716353899, "grad_norm": 557.3529802738032, "learning_rate": 1.7419525405164705e-05, "loss": 310.8019, "step": 12560 }, { "epoch": 0.2417528524240195, "grad_norm": 621.6773333995523, "learning_rate": 1.7415432156957713e-05, "loss": 306.2981, "step": 12570 }, { "epoch": 0.24194517768450002, "grad_norm": 496.63227621071644, "learning_rate": 1.741133614662471e-05, "loss": 310.3273, "step": 12580 }, { "epoch": 0.24213750294498054, "grad_norm": 617.2419228856705, "learning_rate": 1.7407237375691394e-05, "loss": 314.5568, "step": 12590 }, { "epoch": 0.24232982820546106, "grad_norm": 602.1228111301185, "learning_rate": 1.740313584568449e-05, "loss": 297.8498, "step": 12600 }, { "epoch": 0.2425221534659416, "grad_norm": 571.1331473048172, "learning_rate": 1.739903155813175e-05, "loss": 309.2491, "step": 12610 }, { "epoch": 0.24271447872642213, "grad_norm": 575.4803355379195, "learning_rate": 1.7394924514561955e-05, "loss": 302.9375, "step": 12620 }, { "epoch": 0.24290680398690265, "grad_norm": 501.7659896461661, "learning_rate": 1.739081471650491e-05, "loss": 295.4767, "step": 12630 }, { "epoch": 0.24309912924738317, "grad_norm": 502.862777557259, "learning_rate": 1.7386702165491443e-05, "loss": 308.9037, "step": 12640 }, { "epoch": 0.24329145450786369, "grad_norm": 540.9587849078885, "learning_rate": 1.738258686305342e-05, "loss": 298.2269, "step": 12650 }, { "epoch": 0.24348377976834423, "grad_norm": 560.3407627782135, "learning_rate": 1.7378468810723713e-05, "loss": 310.2495, "step": 12660 }, { "epoch": 0.24367610502882475, "grad_norm": 492.6380104274809, "learning_rate": 1.7374348010036235e-05, "loss": 305.7097, "step": 12670 }, { "epoch": 0.24386843028930527, "grad_norm": 525.378463611019, "learning_rate": 1.7370224462525916e-05, "loss": 302.5172, "step": 12680 }, { "epoch": 0.2440607555497858, "grad_norm": 755.9612244630933, "learning_rate": 1.736609816972871e-05, "loss": 306.4417, "step": 12690 }, { "epoch": 0.2442530808102663, "grad_norm": 499.8778357657815, "learning_rate": 1.7361969133181585e-05, "loss": 291.3082, "step": 12700 }, { "epoch": 0.24444540607074686, "grad_norm": 532.941702434763, "learning_rate": 1.735783735442255e-05, "loss": 303.3684, "step": 12710 }, { "epoch": 0.24463773133122738, "grad_norm": 555.5077925873111, "learning_rate": 1.7353702834990617e-05, "loss": 322.8935, "step": 12720 }, { "epoch": 0.2448300565917079, "grad_norm": 674.4471482697896, "learning_rate": 1.7349565576425828e-05, "loss": 299.7307, "step": 12730 }, { "epoch": 0.24502238185218841, "grad_norm": 512.1654025301481, "learning_rate": 1.7345425580269245e-05, "loss": 304.8884, "step": 12740 }, { "epoch": 0.24521470711266893, "grad_norm": 504.974209543955, "learning_rate": 1.734128284806295e-05, "loss": 299.1169, "step": 12750 }, { "epoch": 0.24540703237314948, "grad_norm": 528.5600347707219, "learning_rate": 1.7337137381350033e-05, "loss": 298.6342, "step": 12760 }, { "epoch": 0.24559935763363, "grad_norm": 510.04817930568515, "learning_rate": 1.7332989181674623e-05, "loss": 291.0542, "step": 12770 }, { "epoch": 0.24579168289411052, "grad_norm": 535.6167463029005, "learning_rate": 1.7328838250581846e-05, "loss": 300.2511, "step": 12780 }, { "epoch": 0.24598400815459104, "grad_norm": 520.3089821326004, "learning_rate": 1.7324684589617862e-05, "loss": 296.7046, "step": 12790 }, { "epoch": 0.24617633341507156, "grad_norm": 596.9425948418302, "learning_rate": 1.7320528200329846e-05, "loss": 304.3464, "step": 12800 }, { "epoch": 0.2463686586755521, "grad_norm": 581.5105053657546, "learning_rate": 1.7316369084265973e-05, "loss": 292.5362, "step": 12810 }, { "epoch": 0.24656098393603262, "grad_norm": 640.1870699333763, "learning_rate": 1.731220724297545e-05, "loss": 290.2909, "step": 12820 }, { "epoch": 0.24675330919651314, "grad_norm": 550.8889959690337, "learning_rate": 1.730804267800849e-05, "loss": 301.2725, "step": 12830 }, { "epoch": 0.24694563445699366, "grad_norm": 615.6400733063065, "learning_rate": 1.7303875390916338e-05, "loss": 293.0394, "step": 12840 }, { "epoch": 0.24713795971747418, "grad_norm": 572.3040979754213, "learning_rate": 1.729970538325122e-05, "loss": 293.7845, "step": 12850 }, { "epoch": 0.24733028497795473, "grad_norm": 573.7230864891717, "learning_rate": 1.7295532656566413e-05, "loss": 305.99, "step": 12860 }, { "epoch": 0.24752261023843525, "grad_norm": 525.6066716731832, "learning_rate": 1.729135721241618e-05, "loss": 296.0017, "step": 12870 }, { "epoch": 0.24771493549891577, "grad_norm": 519.4868282111524, "learning_rate": 1.7287179052355803e-05, "loss": 306.5752, "step": 12880 }, { "epoch": 0.2479072607593963, "grad_norm": 582.4963962470334, "learning_rate": 1.7282998177941586e-05, "loss": 309.5309, "step": 12890 }, { "epoch": 0.2480995860198768, "grad_norm": 699.9081612715943, "learning_rate": 1.7278814590730826e-05, "loss": 296.2731, "step": 12900 }, { "epoch": 0.24829191128035735, "grad_norm": 583.8544732208774, "learning_rate": 1.7274628292281846e-05, "loss": 294.651, "step": 12910 }, { "epoch": 0.24848423654083787, "grad_norm": 516.028225369162, "learning_rate": 1.727043928415397e-05, "loss": 295.5151, "step": 12920 }, { "epoch": 0.2486765618013184, "grad_norm": 556.5898797919142, "learning_rate": 1.726624756790754e-05, "loss": 301.7446, "step": 12930 }, { "epoch": 0.2488688870617989, "grad_norm": 569.5119985607591, "learning_rate": 1.7262053145103893e-05, "loss": 304.4961, "step": 12940 }, { "epoch": 0.24906121232227943, "grad_norm": 634.5322402933849, "learning_rate": 1.7257856017305387e-05, "loss": 299.3397, "step": 12950 }, { "epoch": 0.24925353758275998, "grad_norm": 597.663695630516, "learning_rate": 1.725365618607538e-05, "loss": 293.8806, "step": 12960 }, { "epoch": 0.2494458628432405, "grad_norm": 579.0377545012869, "learning_rate": 1.7249453652978244e-05, "loss": 295.638, "step": 12970 }, { "epoch": 0.24963818810372101, "grad_norm": 495.84654086795075, "learning_rate": 1.7245248419579353e-05, "loss": 302.5208, "step": 12980 }, { "epoch": 0.24983051336420153, "grad_norm": 527.5841204917268, "learning_rate": 1.7241040487445082e-05, "loss": 295.0076, "step": 12990 }, { "epoch": 0.25002283862468205, "grad_norm": 495.69539542594634, "learning_rate": 1.723682985814282e-05, "loss": 296.1797, "step": 13000 }, { "epoch": 0.2502151638851626, "grad_norm": 584.3560064789656, "learning_rate": 1.7232616533240958e-05, "loss": 300.3373, "step": 13010 }, { "epoch": 0.2504074891456431, "grad_norm": 544.5313476904952, "learning_rate": 1.7228400514308884e-05, "loss": 304.6298, "step": 13020 }, { "epoch": 0.2505998144061236, "grad_norm": 579.1546103880229, "learning_rate": 1.7224181802917003e-05, "loss": 295.4002, "step": 13030 }, { "epoch": 0.2507921396666042, "grad_norm": 516.2741820851571, "learning_rate": 1.721996040063671e-05, "loss": 303.0126, "step": 13040 }, { "epoch": 0.2509844649270847, "grad_norm": 630.6213058366193, "learning_rate": 1.7215736309040408e-05, "loss": 297.3093, "step": 13050 }, { "epoch": 0.2511767901875652, "grad_norm": 559.9087309239119, "learning_rate": 1.7211509529701507e-05, "loss": 295.7827, "step": 13060 }, { "epoch": 0.25136911544804574, "grad_norm": 614.6784026929857, "learning_rate": 1.7207280064194403e-05, "loss": 300.8992, "step": 13070 }, { "epoch": 0.25156144070852626, "grad_norm": 580.7944634711013, "learning_rate": 1.7203047914094514e-05, "loss": 295.1916, "step": 13080 }, { "epoch": 0.2517537659690068, "grad_norm": 565.5970850023269, "learning_rate": 1.7198813080978235e-05, "loss": 298.4104, "step": 13090 }, { "epoch": 0.2519460912294873, "grad_norm": 582.0710508609554, "learning_rate": 1.7194575566422975e-05, "loss": 302.5477, "step": 13100 }, { "epoch": 0.2521384164899678, "grad_norm": 522.6514913090786, "learning_rate": 1.719033537200714e-05, "loss": 290.3607, "step": 13110 }, { "epoch": 0.25233074175044834, "grad_norm": 544.8798463472215, "learning_rate": 1.7186092499310133e-05, "loss": 300.6581, "step": 13120 }, { "epoch": 0.25252306701092886, "grad_norm": 509.1442320602432, "learning_rate": 1.7181846949912347e-05, "loss": 295.8213, "step": 13130 }, { "epoch": 0.25271539227140943, "grad_norm": 477.28183928493957, "learning_rate": 1.717759872539519e-05, "loss": 291.7312, "step": 13140 }, { "epoch": 0.25290771753188995, "grad_norm": 549.2815820130234, "learning_rate": 1.7173347827341046e-05, "loss": 295.4985, "step": 13150 }, { "epoch": 0.25310004279237047, "grad_norm": 520.6918095978543, "learning_rate": 1.7169094257333307e-05, "loss": 307.3299, "step": 13160 }, { "epoch": 0.253292368052851, "grad_norm": 579.072012808588, "learning_rate": 1.716483801695636e-05, "loss": 302.4981, "step": 13170 }, { "epoch": 0.2534846933133315, "grad_norm": 540.1969180342111, "learning_rate": 1.7160579107795587e-05, "loss": 301.8908, "step": 13180 }, { "epoch": 0.25367701857381203, "grad_norm": 498.0216408820849, "learning_rate": 1.715631753143735e-05, "loss": 293.6805, "step": 13190 }, { "epoch": 0.25386934383429255, "grad_norm": 532.8962458202869, "learning_rate": 1.715205328946903e-05, "loss": 304.525, "step": 13200 }, { "epoch": 0.25406166909477307, "grad_norm": 503.52289463582196, "learning_rate": 1.7147786383478978e-05, "loss": 294.37, "step": 13210 }, { "epoch": 0.2542539943552536, "grad_norm": 479.37862765508373, "learning_rate": 1.7143516815056545e-05, "loss": 303.5988, "step": 13220 }, { "epoch": 0.2544463196157341, "grad_norm": 487.75782537001106, "learning_rate": 1.713924458579208e-05, "loss": 312.04, "step": 13230 }, { "epoch": 0.2546386448762147, "grad_norm": 557.1345963784541, "learning_rate": 1.7134969697276912e-05, "loss": 294.1135, "step": 13240 }, { "epoch": 0.2548309701366952, "grad_norm": 532.3653339709429, "learning_rate": 1.7130692151103373e-05, "loss": 294.1397, "step": 13250 }, { "epoch": 0.2550232953971757, "grad_norm": 559.8926544482599, "learning_rate": 1.7126411948864776e-05, "loss": 295.2776, "step": 13260 }, { "epoch": 0.25521562065765624, "grad_norm": 602.2117571941698, "learning_rate": 1.7122129092155422e-05, "loss": 308.6386, "step": 13270 }, { "epoch": 0.25540794591813676, "grad_norm": 611.6861783713705, "learning_rate": 1.7117843582570608e-05, "loss": 296.0421, "step": 13280 }, { "epoch": 0.2556002711786173, "grad_norm": 549.0821252476653, "learning_rate": 1.711355542170661e-05, "loss": 288.2859, "step": 13290 }, { "epoch": 0.2557925964390978, "grad_norm": 607.3341639249296, "learning_rate": 1.710926461116071e-05, "loss": 288.6771, "step": 13300 }, { "epoch": 0.2559849216995783, "grad_norm": 477.49183556700274, "learning_rate": 1.710497115253115e-05, "loss": 298.7382, "step": 13310 }, { "epoch": 0.25617724696005884, "grad_norm": 608.0060738334538, "learning_rate": 1.7100675047417178e-05, "loss": 295.9122, "step": 13320 }, { "epoch": 0.25636957222053935, "grad_norm": 566.5000757965348, "learning_rate": 1.7096376297419027e-05, "loss": 297.2796, "step": 13330 }, { "epoch": 0.2565618974810199, "grad_norm": 547.3218261512828, "learning_rate": 1.70920749041379e-05, "loss": 300.8601, "step": 13340 }, { "epoch": 0.25675422274150045, "grad_norm": 556.6420527426902, "learning_rate": 1.7087770869176005e-05, "loss": 296.1757, "step": 13350 }, { "epoch": 0.25694654800198097, "grad_norm": 540.4378875181017, "learning_rate": 1.7083464194136517e-05, "loss": 295.9015, "step": 13360 }, { "epoch": 0.2571388732624615, "grad_norm": 558.8648666429518, "learning_rate": 1.707915488062361e-05, "loss": 299.0723, "step": 13370 }, { "epoch": 0.257331198522942, "grad_norm": 534.5828983448623, "learning_rate": 1.7074842930242418e-05, "loss": 293.0366, "step": 13380 }, { "epoch": 0.2575235237834225, "grad_norm": 538.5962490105335, "learning_rate": 1.7070528344599083e-05, "loss": 297.8597, "step": 13390 }, { "epoch": 0.25771584904390304, "grad_norm": 569.7058279581657, "learning_rate": 1.7066211125300713e-05, "loss": 290.0898, "step": 13400 }, { "epoch": 0.25790817430438356, "grad_norm": 553.1988829519527, "learning_rate": 1.70618912739554e-05, "loss": 288.9417, "step": 13410 }, { "epoch": 0.2581004995648641, "grad_norm": 501.10003835685666, "learning_rate": 1.705756879217222e-05, "loss": 294.8937, "step": 13420 }, { "epoch": 0.2582928248253446, "grad_norm": 603.161560784316, "learning_rate": 1.7053243681561225e-05, "loss": 316.7672, "step": 13430 }, { "epoch": 0.2584851500858251, "grad_norm": 614.697465602219, "learning_rate": 1.7048915943733444e-05, "loss": 304.4859, "step": 13440 }, { "epoch": 0.2586774753463057, "grad_norm": 505.5117365134719, "learning_rate": 1.704458558030089e-05, "loss": 293.5885, "step": 13450 }, { "epoch": 0.2588698006067862, "grad_norm": 531.6401952165609, "learning_rate": 1.704025259287656e-05, "loss": 285.6869, "step": 13460 }, { "epoch": 0.25906212586726673, "grad_norm": 553.5805254108343, "learning_rate": 1.7035916983074405e-05, "loss": 296.7148, "step": 13470 }, { "epoch": 0.25925445112774725, "grad_norm": 595.8440058985766, "learning_rate": 1.7031578752509377e-05, "loss": 306.7942, "step": 13480 }, { "epoch": 0.2594467763882278, "grad_norm": 510.47098422006616, "learning_rate": 1.70272379027974e-05, "loss": 289.0648, "step": 13490 }, { "epoch": 0.2596391016487083, "grad_norm": 684.6421265091172, "learning_rate": 1.7022894435555356e-05, "loss": 295.8367, "step": 13500 }, { "epoch": 0.2598314269091888, "grad_norm": 561.8937544173967, "learning_rate": 1.7018548352401123e-05, "loss": 294.5657, "step": 13510 }, { "epoch": 0.26002375216966933, "grad_norm": 577.4846372742451, "learning_rate": 1.7014199654953543e-05, "loss": 290.9038, "step": 13520 }, { "epoch": 0.26021607743014985, "grad_norm": 588.4380967028559, "learning_rate": 1.700984834483244e-05, "loss": 293.1427, "step": 13530 }, { "epoch": 0.26040840269063037, "grad_norm": 534.1212486978449, "learning_rate": 1.7005494423658598e-05, "loss": 296.2717, "step": 13540 }, { "epoch": 0.26060072795111094, "grad_norm": 502.98505404872185, "learning_rate": 1.7001137893053782e-05, "loss": 288.3195, "step": 13550 }, { "epoch": 0.26079305321159146, "grad_norm": 540.1714886611936, "learning_rate": 1.6996778754640727e-05, "loss": 292.3107, "step": 13560 }, { "epoch": 0.260985378472072, "grad_norm": 660.9975055108199, "learning_rate": 1.6992417010043144e-05, "loss": 293.8949, "step": 13570 }, { "epoch": 0.2611777037325525, "grad_norm": 680.5519555020162, "learning_rate": 1.6988052660885707e-05, "loss": 291.8404, "step": 13580 }, { "epoch": 0.261370028993033, "grad_norm": 560.9298138071008, "learning_rate": 1.6983685708794064e-05, "loss": 288.3742, "step": 13590 }, { "epoch": 0.26156235425351354, "grad_norm": 525.2119638390872, "learning_rate": 1.6979316155394834e-05, "loss": 294.1705, "step": 13600 }, { "epoch": 0.26175467951399406, "grad_norm": 538.9753596662515, "learning_rate": 1.6974944002315605e-05, "loss": 289.604, "step": 13610 }, { "epoch": 0.2619470047744746, "grad_norm": 525.4341029167142, "learning_rate": 1.697056925118493e-05, "loss": 290.6618, "step": 13620 }, { "epoch": 0.2621393300349551, "grad_norm": 607.1097781492762, "learning_rate": 1.696619190363233e-05, "loss": 308.0432, "step": 13630 }, { "epoch": 0.2623316552954356, "grad_norm": 556.9796057232214, "learning_rate": 1.69618119612883e-05, "loss": 289.7933, "step": 13640 }, { "epoch": 0.2625239805559162, "grad_norm": 525.1332058711408, "learning_rate": 1.695742942578429e-05, "loss": 293.5802, "step": 13650 }, { "epoch": 0.2627163058163967, "grad_norm": 528.0396211366046, "learning_rate": 1.6953044298752724e-05, "loss": 292.3849, "step": 13660 }, { "epoch": 0.26290863107687723, "grad_norm": 564.8615567957876, "learning_rate": 1.694865658182699e-05, "loss": 308.7998, "step": 13670 }, { "epoch": 0.26310095633735775, "grad_norm": 480.5667912058225, "learning_rate": 1.6944266276641442e-05, "loss": 295.2425, "step": 13680 }, { "epoch": 0.26329328159783827, "grad_norm": 572.5110911428648, "learning_rate": 1.6939873384831394e-05, "loss": 287.6731, "step": 13690 }, { "epoch": 0.2634856068583188, "grad_norm": 498.2965572713883, "learning_rate": 1.6935477908033124e-05, "loss": 297.5873, "step": 13700 }, { "epoch": 0.2636779321187993, "grad_norm": 532.8350243166844, "learning_rate": 1.6931079847883877e-05, "loss": 295.7148, "step": 13710 }, { "epoch": 0.2638702573792798, "grad_norm": 551.2833474544038, "learning_rate": 1.692667920602186e-05, "loss": 296.3375, "step": 13720 }, { "epoch": 0.26406258263976035, "grad_norm": 572.5843623757257, "learning_rate": 1.6922275984086233e-05, "loss": 303.0679, "step": 13730 }, { "epoch": 0.26425490790024087, "grad_norm": 628.1976489547187, "learning_rate": 1.691787018371713e-05, "loss": 292.7349, "step": 13740 }, { "epoch": 0.2644472331607214, "grad_norm": 582.1594080748301, "learning_rate": 1.6913461806555635e-05, "loss": 303.1469, "step": 13750 }, { "epoch": 0.26463955842120196, "grad_norm": 552.6292632725243, "learning_rate": 1.6909050854243797e-05, "loss": 297.7123, "step": 13760 }, { "epoch": 0.2648318836816825, "grad_norm": 487.2885543181726, "learning_rate": 1.690463732842462e-05, "loss": 294.969, "step": 13770 }, { "epoch": 0.265024208942163, "grad_norm": 530.462477027445, "learning_rate": 1.6900221230742073e-05, "loss": 297.3343, "step": 13780 }, { "epoch": 0.2652165342026435, "grad_norm": 556.9178408469701, "learning_rate": 1.689580256284108e-05, "loss": 304.0897, "step": 13790 }, { "epoch": 0.26540885946312404, "grad_norm": 566.6930947552812, "learning_rate": 1.689138132636752e-05, "loss": 292.183, "step": 13800 }, { "epoch": 0.26560118472360456, "grad_norm": 480.87919387494645, "learning_rate": 1.688695752296823e-05, "loss": 288.8656, "step": 13810 }, { "epoch": 0.2657935099840851, "grad_norm": 563.1264854543442, "learning_rate": 1.6882531154291007e-05, "loss": 293.3224, "step": 13820 }, { "epoch": 0.2659858352445656, "grad_norm": 616.8299049814059, "learning_rate": 1.6878102221984593e-05, "loss": 303.3631, "step": 13830 }, { "epoch": 0.2661781605050461, "grad_norm": 526.4508153390387, "learning_rate": 1.6873670727698702e-05, "loss": 304.258, "step": 13840 }, { "epoch": 0.26637048576552663, "grad_norm": 534.6748436898739, "learning_rate": 1.686923667308398e-05, "loss": 296.1364, "step": 13850 }, { "epoch": 0.2665628110260072, "grad_norm": 482.08390550350896, "learning_rate": 1.6864800059792057e-05, "loss": 292.8136, "step": 13860 }, { "epoch": 0.2667551362864877, "grad_norm": 510.38221446309745, "learning_rate": 1.686036088947548e-05, "loss": 311.2935, "step": 13870 }, { "epoch": 0.26694746154696825, "grad_norm": 568.7434663673843, "learning_rate": 1.6855919163787777e-05, "loss": 301.1041, "step": 13880 }, { "epoch": 0.26713978680744876, "grad_norm": 527.7892689123219, "learning_rate": 1.6851474884383416e-05, "loss": 289.4896, "step": 13890 }, { "epoch": 0.2673321120679293, "grad_norm": 514.7795158654546, "learning_rate": 1.6847028052917814e-05, "loss": 294.7565, "step": 13900 }, { "epoch": 0.2675244373284098, "grad_norm": 549.8434435771735, "learning_rate": 1.6842578671047345e-05, "loss": 290.3513, "step": 13910 }, { "epoch": 0.2677167625888903, "grad_norm": 532.5415571416761, "learning_rate": 1.683812674042933e-05, "loss": 286.0049, "step": 13920 }, { "epoch": 0.26790908784937084, "grad_norm": 487.7051289803812, "learning_rate": 1.683367226272204e-05, "loss": 292.6955, "step": 13930 }, { "epoch": 0.26810141310985136, "grad_norm": 543.9904864676188, "learning_rate": 1.6829215239584695e-05, "loss": 294.8505, "step": 13940 }, { "epoch": 0.2682937383703319, "grad_norm": 520.4800505776105, "learning_rate": 1.6824755672677458e-05, "loss": 294.392, "step": 13950 }, { "epoch": 0.26848606363081245, "grad_norm": 642.2395485566753, "learning_rate": 1.682029356366145e-05, "loss": 295.3867, "step": 13960 }, { "epoch": 0.268678388891293, "grad_norm": 475.886560055544, "learning_rate": 1.6815828914198732e-05, "loss": 281.3129, "step": 13970 }, { "epoch": 0.2688707141517735, "grad_norm": 541.2896630354487, "learning_rate": 1.6811361725952308e-05, "loss": 303.7885, "step": 13980 }, { "epoch": 0.269063039412254, "grad_norm": 499.94100153781005, "learning_rate": 1.6806892000586135e-05, "loss": 292.4338, "step": 13990 }, { "epoch": 0.26925536467273453, "grad_norm": 575.2959380169674, "learning_rate": 1.6802419739765114e-05, "loss": 291.7208, "step": 14000 }, { "epoch": 0.26944768993321505, "grad_norm": 605.5459299534795, "learning_rate": 1.679794494515508e-05, "loss": 294.1945, "step": 14010 }, { "epoch": 0.26964001519369557, "grad_norm": 518.5757592090939, "learning_rate": 1.6793467618422828e-05, "loss": 289.982, "step": 14020 }, { "epoch": 0.2698323404541761, "grad_norm": 550.9170740451434, "learning_rate": 1.6788987761236088e-05, "loss": 296.3643, "step": 14030 }, { "epoch": 0.2700246657146566, "grad_norm": 542.1601511092452, "learning_rate": 1.6784505375263533e-05, "loss": 291.0375, "step": 14040 }, { "epoch": 0.27021699097513713, "grad_norm": 516.7184261407725, "learning_rate": 1.678002046217477e-05, "loss": 291.1083, "step": 14050 }, { "epoch": 0.2704093162356177, "grad_norm": 534.4485602349314, "learning_rate": 1.6775533023640363e-05, "loss": 308.9214, "step": 14060 }, { "epoch": 0.2706016414960982, "grad_norm": 694.0153284362075, "learning_rate": 1.6771043061331806e-05, "loss": 312.7127, "step": 14070 }, { "epoch": 0.27079396675657874, "grad_norm": 529.2729478771689, "learning_rate": 1.6766550576921533e-05, "loss": 290.4187, "step": 14080 }, { "epoch": 0.27098629201705926, "grad_norm": 560.1423028923992, "learning_rate": 1.676205557208293e-05, "loss": 297.6623, "step": 14090 }, { "epoch": 0.2711786172775398, "grad_norm": 586.9693772440653, "learning_rate": 1.67575580484903e-05, "loss": 281.7389, "step": 14100 }, { "epoch": 0.2713709425380203, "grad_norm": 515.9894478725496, "learning_rate": 1.6753058007818906e-05, "loss": 303.8303, "step": 14110 }, { "epoch": 0.2715632677985008, "grad_norm": 492.5634222475541, "learning_rate": 1.674855545174493e-05, "loss": 288.6625, "step": 14120 }, { "epoch": 0.27175559305898134, "grad_norm": 497.2167725431496, "learning_rate": 1.6744050381945507e-05, "loss": 297.8267, "step": 14130 }, { "epoch": 0.27194791831946186, "grad_norm": 553.3915355713392, "learning_rate": 1.67395428000987e-05, "loss": 290.1727, "step": 14140 }, { "epoch": 0.2721402435799424, "grad_norm": 546.5497919918348, "learning_rate": 1.6735032707883502e-05, "loss": 292.7723, "step": 14150 }, { "epoch": 0.27233256884042295, "grad_norm": 549.7254365469638, "learning_rate": 1.6730520106979855e-05, "loss": 282.8527, "step": 14160 }, { "epoch": 0.27252489410090347, "grad_norm": 476.7620428067308, "learning_rate": 1.672600499906863e-05, "loss": 292.9566, "step": 14170 }, { "epoch": 0.272717219361384, "grad_norm": 491.790227535059, "learning_rate": 1.6721487385831622e-05, "loss": 297.068, "step": 14180 }, { "epoch": 0.2729095446218645, "grad_norm": 503.8566371838667, "learning_rate": 1.6716967268951574e-05, "loss": 294.9846, "step": 14190 }, { "epoch": 0.273101869882345, "grad_norm": 521.8168280236863, "learning_rate": 1.6712444650112152e-05, "loss": 299.8933, "step": 14200 }, { "epoch": 0.27329419514282555, "grad_norm": 531.7919913215958, "learning_rate": 1.6707919530997956e-05, "loss": 300.1008, "step": 14210 }, { "epoch": 0.27348652040330607, "grad_norm": 542.4734961324131, "learning_rate": 1.6703391913294524e-05, "loss": 293.8181, "step": 14220 }, { "epoch": 0.2736788456637866, "grad_norm": 611.879581939818, "learning_rate": 1.6698861798688312e-05, "loss": 296.7324, "step": 14230 }, { "epoch": 0.2738711709242671, "grad_norm": 582.7807271420512, "learning_rate": 1.6694329188866717e-05, "loss": 296.1785, "step": 14240 }, { "epoch": 0.2740634961847476, "grad_norm": 593.1503592417329, "learning_rate": 1.6689794085518057e-05, "loss": 285.3363, "step": 14250 }, { "epoch": 0.27425582144522814, "grad_norm": 631.3094913957555, "learning_rate": 1.668525649033159e-05, "loss": 301.2793, "step": 14260 }, { "epoch": 0.2744481467057087, "grad_norm": 531.7179642840727, "learning_rate": 1.6680716404997482e-05, "loss": 291.4309, "step": 14270 }, { "epoch": 0.27464047196618924, "grad_norm": 586.9634234033227, "learning_rate": 1.667617383120686e-05, "loss": 296.095, "step": 14280 }, { "epoch": 0.27483279722666976, "grad_norm": 532.4330647997987, "learning_rate": 1.667162877065174e-05, "loss": 293.1977, "step": 14290 }, { "epoch": 0.2750251224871503, "grad_norm": 467.56773092250654, "learning_rate": 1.6667081225025087e-05, "loss": 289.0699, "step": 14300 }, { "epoch": 0.2752174477476308, "grad_norm": 561.7862042410331, "learning_rate": 1.666253119602079e-05, "loss": 297.6428, "step": 14310 }, { "epoch": 0.2754097730081113, "grad_norm": 576.062253058774, "learning_rate": 1.665797868533366e-05, "loss": 286.4333, "step": 14320 }, { "epoch": 0.27560209826859183, "grad_norm": 548.2936707215093, "learning_rate": 1.6653423694659433e-05, "loss": 302.9739, "step": 14330 }, { "epoch": 0.27579442352907235, "grad_norm": 519.4515504305616, "learning_rate": 1.6648866225694757e-05, "loss": 293.9797, "step": 14340 }, { "epoch": 0.27598674878955287, "grad_norm": 558.4126124005084, "learning_rate": 1.6644306280137227e-05, "loss": 290.5317, "step": 14350 }, { "epoch": 0.2761790740500334, "grad_norm": 515.0605976761065, "learning_rate": 1.6639743859685336e-05, "loss": 297.4429, "step": 14360 }, { "epoch": 0.27637139931051397, "grad_norm": 523.441858085406, "learning_rate": 1.663517896603852e-05, "loss": 293.4781, "step": 14370 }, { "epoch": 0.2765637245709945, "grad_norm": 496.2810553965151, "learning_rate": 1.6630611600897126e-05, "loss": 293.7562, "step": 14380 }, { "epoch": 0.276756049831475, "grad_norm": 533.9125856608609, "learning_rate": 1.6626041765962413e-05, "loss": 300.4407, "step": 14390 }, { "epoch": 0.2769483750919555, "grad_norm": 489.4734113881238, "learning_rate": 1.662146946293658e-05, "loss": 292.072, "step": 14400 }, { "epoch": 0.27714070035243604, "grad_norm": 511.10923050644755, "learning_rate": 1.6616894693522727e-05, "loss": 287.8735, "step": 14410 }, { "epoch": 0.27733302561291656, "grad_norm": 514.2390174675193, "learning_rate": 1.6612317459424884e-05, "loss": 286.7239, "step": 14420 }, { "epoch": 0.2775253508733971, "grad_norm": 516.3737981947766, "learning_rate": 1.6607737762347987e-05, "loss": 292.7905, "step": 14430 }, { "epoch": 0.2777176761338776, "grad_norm": 515.7956591308133, "learning_rate": 1.6603155603997908e-05, "loss": 291.3975, "step": 14440 }, { "epoch": 0.2779100013943581, "grad_norm": 530.2704433262519, "learning_rate": 1.6598570986081424e-05, "loss": 291.6472, "step": 14450 }, { "epoch": 0.27810232665483864, "grad_norm": 502.7541578391744, "learning_rate": 1.6593983910306225e-05, "loss": 290.5978, "step": 14460 }, { "epoch": 0.2782946519153192, "grad_norm": 500.9140327890009, "learning_rate": 1.658939437838092e-05, "loss": 285.6688, "step": 14470 }, { "epoch": 0.27848697717579973, "grad_norm": 506.4368375498071, "learning_rate": 1.658480239201504e-05, "loss": 290.2608, "step": 14480 }, { "epoch": 0.27867930243628025, "grad_norm": 524.0776124408978, "learning_rate": 1.6580207952919018e-05, "loss": 285.2238, "step": 14490 }, { "epoch": 0.27887162769676077, "grad_norm": 486.986613601712, "learning_rate": 1.657561106280421e-05, "loss": 289.2197, "step": 14500 }, { "epoch": 0.2790639529572413, "grad_norm": 527.7534929398079, "learning_rate": 1.6571011723382882e-05, "loss": 288.2382, "step": 14510 }, { "epoch": 0.2792562782177218, "grad_norm": 523.2433747738955, "learning_rate": 1.6566409936368207e-05, "loss": 294.6493, "step": 14520 }, { "epoch": 0.27944860347820233, "grad_norm": 506.96334814682155, "learning_rate": 1.6561805703474285e-05, "loss": 284.9292, "step": 14530 }, { "epoch": 0.27964092873868285, "grad_norm": 517.331582635516, "learning_rate": 1.655719902641611e-05, "loss": 291.482, "step": 14540 }, { "epoch": 0.27983325399916337, "grad_norm": 526.1254768542298, "learning_rate": 1.6552589906909586e-05, "loss": 285.2927, "step": 14550 }, { "epoch": 0.2800255792596439, "grad_norm": 505.8240819909218, "learning_rate": 1.654797834667155e-05, "loss": 279.6156, "step": 14560 }, { "epoch": 0.28021790452012446, "grad_norm": 543.4726265798994, "learning_rate": 1.6543364347419714e-05, "loss": 294.4749, "step": 14570 }, { "epoch": 0.280410229780605, "grad_norm": 536.3630879008642, "learning_rate": 1.6538747910872733e-05, "loss": 292.2666, "step": 14580 }, { "epoch": 0.2806025550410855, "grad_norm": 507.5555154178302, "learning_rate": 1.6534129038750145e-05, "loss": 289.061, "step": 14590 }, { "epoch": 0.280794880301566, "grad_norm": 536.5650200108057, "learning_rate": 1.65295077327724e-05, "loss": 288.3877, "step": 14600 }, { "epoch": 0.28098720556204654, "grad_norm": 541.7930329558253, "learning_rate": 1.652488399466087e-05, "loss": 281.9151, "step": 14610 }, { "epoch": 0.28117953082252706, "grad_norm": 537.5973429956484, "learning_rate": 1.6520257826137807e-05, "loss": 294.4806, "step": 14620 }, { "epoch": 0.2813718560830076, "grad_norm": 582.071556846933, "learning_rate": 1.6515629228926396e-05, "loss": 296.5689, "step": 14630 }, { "epoch": 0.2815641813434881, "grad_norm": 523.4653139210134, "learning_rate": 1.6510998204750702e-05, "loss": 301.8642, "step": 14640 }, { "epoch": 0.2817565066039686, "grad_norm": 484.41401024861267, "learning_rate": 1.650636475533571e-05, "loss": 288.9062, "step": 14650 }, { "epoch": 0.28194883186444913, "grad_norm": 630.3587033045286, "learning_rate": 1.6501728882407305e-05, "loss": 291.7619, "step": 14660 }, { "epoch": 0.28214115712492965, "grad_norm": 517.8249178111148, "learning_rate": 1.649709058769227e-05, "loss": 283.0053, "step": 14670 }, { "epoch": 0.28233348238541023, "grad_norm": 558.3279959930346, "learning_rate": 1.6492449872918293e-05, "loss": 282.5668, "step": 14680 }, { "epoch": 0.28252580764589075, "grad_norm": 496.024160108566, "learning_rate": 1.6487806739813966e-05, "loss": 287.0649, "step": 14690 }, { "epoch": 0.28271813290637127, "grad_norm": 518.8328203843299, "learning_rate": 1.6483161190108778e-05, "loss": 287.9272, "step": 14700 }, { "epoch": 0.2829104581668518, "grad_norm": 527.9861405296551, "learning_rate": 1.6478513225533117e-05, "loss": 295.1303, "step": 14710 }, { "epoch": 0.2831027834273323, "grad_norm": 483.3714306016936, "learning_rate": 1.647386284781828e-05, "loss": 293.6143, "step": 14720 }, { "epoch": 0.2832951086878128, "grad_norm": 474.9589950497544, "learning_rate": 1.6469210058696448e-05, "loss": 295.0874, "step": 14730 }, { "epoch": 0.28348743394829334, "grad_norm": 523.3837780029888, "learning_rate": 1.646455485990071e-05, "loss": 285.4751, "step": 14740 }, { "epoch": 0.28367975920877386, "grad_norm": 492.0379669582084, "learning_rate": 1.645989725316506e-05, "loss": 286.6602, "step": 14750 }, { "epoch": 0.2838720844692544, "grad_norm": 484.59617082131706, "learning_rate": 1.6455237240224364e-05, "loss": 284.2392, "step": 14760 }, { "epoch": 0.2840644097297349, "grad_norm": 526.1945236290193, "learning_rate": 1.6450574822814412e-05, "loss": 290.8893, "step": 14770 }, { "epoch": 0.2842567349902155, "grad_norm": 507.346995927789, "learning_rate": 1.6445910002671872e-05, "loss": 290.3344, "step": 14780 }, { "epoch": 0.284449060250696, "grad_norm": 485.57558249831214, "learning_rate": 1.644124278153431e-05, "loss": 292.5404, "step": 14790 }, { "epoch": 0.2846413855111765, "grad_norm": 522.1363442611338, "learning_rate": 1.64365731611402e-05, "loss": 292.2073, "step": 14800 }, { "epoch": 0.28483371077165703, "grad_norm": 481.477040786785, "learning_rate": 1.6431901143228888e-05, "loss": 283.1542, "step": 14810 }, { "epoch": 0.28502603603213755, "grad_norm": 546.037380587678, "learning_rate": 1.6427226729540623e-05, "loss": 284.7875, "step": 14820 }, { "epoch": 0.2852183612926181, "grad_norm": 490.40573605475146, "learning_rate": 1.6422549921816556e-05, "loss": 292.0841, "step": 14830 }, { "epoch": 0.2854106865530986, "grad_norm": 504.07270542810454, "learning_rate": 1.641787072179871e-05, "loss": 285.0437, "step": 14840 }, { "epoch": 0.2856030118135791, "grad_norm": 522.3888537112274, "learning_rate": 1.6413189131230022e-05, "loss": 285.5351, "step": 14850 }, { "epoch": 0.28579533707405963, "grad_norm": 517.7551877217255, "learning_rate": 1.6408505151854292e-05, "loss": 287.3235, "step": 14860 }, { "epoch": 0.28598766233454015, "grad_norm": 493.6430189679726, "learning_rate": 1.6403818785416236e-05, "loss": 283.4672, "step": 14870 }, { "epoch": 0.2861799875950207, "grad_norm": 465.872406438963, "learning_rate": 1.6399130033661444e-05, "loss": 298.2659, "step": 14880 }, { "epoch": 0.28637231285550124, "grad_norm": 521.8523892610701, "learning_rate": 1.6394438898336402e-05, "loss": 288.5079, "step": 14890 }, { "epoch": 0.28656463811598176, "grad_norm": 577.4684491701477, "learning_rate": 1.6389745381188475e-05, "loss": 295.961, "step": 14900 }, { "epoch": 0.2867569633764623, "grad_norm": 538.2385550799697, "learning_rate": 1.6385049483965926e-05, "loss": 283.0698, "step": 14910 }, { "epoch": 0.2869492886369428, "grad_norm": 542.6443433651598, "learning_rate": 1.6380351208417897e-05, "loss": 287.7351, "step": 14920 }, { "epoch": 0.2871416138974233, "grad_norm": 520.5855883193285, "learning_rate": 1.6375650556294417e-05, "loss": 285.7551, "step": 14930 }, { "epoch": 0.28733393915790384, "grad_norm": 530.7103527091674, "learning_rate": 1.6370947529346404e-05, "loss": 288.2293, "step": 14940 }, { "epoch": 0.28752626441838436, "grad_norm": 541.268105811768, "learning_rate": 1.6366242129325652e-05, "loss": 289.2752, "step": 14950 }, { "epoch": 0.2877185896788649, "grad_norm": 473.16286300124784, "learning_rate": 1.636153435798485e-05, "loss": 292.9462, "step": 14960 }, { "epoch": 0.2879109149393454, "grad_norm": 503.52311442459086, "learning_rate": 1.6356824217077564e-05, "loss": 278.4941, "step": 14970 }, { "epoch": 0.28810324019982597, "grad_norm": 516.2886240153437, "learning_rate": 1.6352111708358243e-05, "loss": 287.0422, "step": 14980 }, { "epoch": 0.2882955654603065, "grad_norm": 540.5420920883236, "learning_rate": 1.6347396833582224e-05, "loss": 291.1956, "step": 14990 }, { "epoch": 0.288487890720787, "grad_norm": 535.6816182130684, "learning_rate": 1.634267959450571e-05, "loss": 284.8218, "step": 15000 }, { "epoch": 0.28868021598126753, "grad_norm": 531.0880916730234, "learning_rate": 1.63379599928858e-05, "loss": 297.1833, "step": 15010 }, { "epoch": 0.28887254124174805, "grad_norm": 452.18802455534757, "learning_rate": 1.6333238030480473e-05, "loss": 290.4448, "step": 15020 }, { "epoch": 0.28906486650222857, "grad_norm": 551.1378948565866, "learning_rate": 1.6328513709048573e-05, "loss": 281.2265, "step": 15030 }, { "epoch": 0.2892571917627091, "grad_norm": 531.8185124943761, "learning_rate": 1.6323787030349833e-05, "loss": 287.2374, "step": 15040 }, { "epoch": 0.2894495170231896, "grad_norm": 544.5212845421312, "learning_rate": 1.6319057996144868e-05, "loss": 290.1431, "step": 15050 }, { "epoch": 0.2896418422836701, "grad_norm": 488.4442261682339, "learning_rate": 1.631432660819516e-05, "loss": 280.7577, "step": 15060 }, { "epoch": 0.28983416754415064, "grad_norm": 527.3468863746809, "learning_rate": 1.6309592868263075e-05, "loss": 287.4474, "step": 15070 }, { "epoch": 0.2900264928046312, "grad_norm": 515.9194998870802, "learning_rate": 1.630485677811185e-05, "loss": 292.0219, "step": 15080 }, { "epoch": 0.29021881806511174, "grad_norm": 524.2664270558995, "learning_rate": 1.630011833950561e-05, "loss": 288.1353, "step": 15090 }, { "epoch": 0.29041114332559226, "grad_norm": 511.65504954355316, "learning_rate": 1.6295377554209338e-05, "loss": 290.7549, "step": 15100 }, { "epoch": 0.2906034685860728, "grad_norm": 560.0968704815492, "learning_rate": 1.6290634423988897e-05, "loss": 291.5172, "step": 15110 }, { "epoch": 0.2907957938465533, "grad_norm": 562.3453899403073, "learning_rate": 1.6285888950611023e-05, "loss": 293.7454, "step": 15120 }, { "epoch": 0.2909881191070338, "grad_norm": 581.0782142311772, "learning_rate": 1.6281141135843334e-05, "loss": 284.4832, "step": 15130 }, { "epoch": 0.29118044436751434, "grad_norm": 507.7186426134632, "learning_rate": 1.6276390981454306e-05, "loss": 288.9794, "step": 15140 }, { "epoch": 0.29137276962799485, "grad_norm": 540.4939759787093, "learning_rate": 1.6271638489213297e-05, "loss": 295.8042, "step": 15150 }, { "epoch": 0.2915650948884754, "grad_norm": 569.0475529586984, "learning_rate": 1.6266883660890527e-05, "loss": 294.2423, "step": 15160 }, { "epoch": 0.2917574201489559, "grad_norm": 557.2936792072236, "learning_rate": 1.6262126498257098e-05, "loss": 287.2764, "step": 15170 }, { "epoch": 0.2919497454094364, "grad_norm": 508.37719933528643, "learning_rate": 1.625736700308497e-05, "loss": 289.2119, "step": 15180 }, { "epoch": 0.292142070669917, "grad_norm": 565.1171992592233, "learning_rate": 1.6252605177146978e-05, "loss": 289.3611, "step": 15190 }, { "epoch": 0.2923343959303975, "grad_norm": 510.1849774791188, "learning_rate": 1.624784102221682e-05, "loss": 297.7609, "step": 15200 }, { "epoch": 0.292526721190878, "grad_norm": 516.8600679003113, "learning_rate": 1.6243074540069067e-05, "loss": 283.5954, "step": 15210 }, { "epoch": 0.29271904645135854, "grad_norm": 509.4394451179388, "learning_rate": 1.6238305732479158e-05, "loss": 292.1387, "step": 15220 }, { "epoch": 0.29291137171183906, "grad_norm": 517.029756206194, "learning_rate": 1.6233534601223396e-05, "loss": 288.12, "step": 15230 }, { "epoch": 0.2931036969723196, "grad_norm": 477.2522619687717, "learning_rate": 1.6228761148078943e-05, "loss": 290.5635, "step": 15240 }, { "epoch": 0.2932960222328001, "grad_norm": 507.15852330827, "learning_rate": 1.622398537482383e-05, "loss": 293.5147, "step": 15250 }, { "epoch": 0.2934883474932806, "grad_norm": 533.1843285064343, "learning_rate": 1.621920728323696e-05, "loss": 293.4276, "step": 15260 }, { "epoch": 0.29368067275376114, "grad_norm": 514.1786183919115, "learning_rate": 1.621442687509809e-05, "loss": 287.0028, "step": 15270 }, { "epoch": 0.29387299801424166, "grad_norm": 520.3083702372918, "learning_rate": 1.6209644152187848e-05, "loss": 293.8355, "step": 15280 }, { "epoch": 0.29406532327472223, "grad_norm": 543.6579665627644, "learning_rate": 1.620485911628771e-05, "loss": 293.7546, "step": 15290 }, { "epoch": 0.29425764853520275, "grad_norm": 469.0832516060488, "learning_rate": 1.6200071769180026e-05, "loss": 274.7911, "step": 15300 }, { "epoch": 0.2944499737956833, "grad_norm": 492.4973677012339, "learning_rate": 1.6195282112648007e-05, "loss": 287.7066, "step": 15310 }, { "epoch": 0.2946422990561638, "grad_norm": 528.688635376134, "learning_rate": 1.6190490148475724e-05, "loss": 283.4887, "step": 15320 }, { "epoch": 0.2948346243166443, "grad_norm": 490.60267757464464, "learning_rate": 1.6185695878448094e-05, "loss": 290.6048, "step": 15330 }, { "epoch": 0.29502694957712483, "grad_norm": 555.9835794695705, "learning_rate": 1.6180899304350915e-05, "loss": 277.0562, "step": 15340 }, { "epoch": 0.29521927483760535, "grad_norm": 524.6071123671601, "learning_rate": 1.6176100427970826e-05, "loss": 283.2577, "step": 15350 }, { "epoch": 0.29541160009808587, "grad_norm": 524.1591809928741, "learning_rate": 1.6171299251095324e-05, "loss": 285.3737, "step": 15360 }, { "epoch": 0.2956039253585664, "grad_norm": 574.4022909974907, "learning_rate": 1.6166495775512777e-05, "loss": 296.8317, "step": 15370 }, { "epoch": 0.2957962506190469, "grad_norm": 531.5310987046762, "learning_rate": 1.6161690003012392e-05, "loss": 291.3665, "step": 15380 }, { "epoch": 0.2959885758795275, "grad_norm": 499.6099135110918, "learning_rate": 1.615688193538425e-05, "loss": 282.2095, "step": 15390 }, { "epoch": 0.296180901140008, "grad_norm": 594.1663922426048, "learning_rate": 1.615207157441927e-05, "loss": 290.5614, "step": 15400 }, { "epoch": 0.2963732264004885, "grad_norm": 512.0859341425991, "learning_rate": 1.6147258921909236e-05, "loss": 283.3074, "step": 15410 }, { "epoch": 0.29656555166096904, "grad_norm": 545.6213383466101, "learning_rate": 1.6142443979646774e-05, "loss": 299.5071, "step": 15420 }, { "epoch": 0.29675787692144956, "grad_norm": 487.5165986744006, "learning_rate": 1.6137626749425377e-05, "loss": 282.3447, "step": 15430 }, { "epoch": 0.2969502021819301, "grad_norm": 542.058428442061, "learning_rate": 1.6132807233039382e-05, "loss": 286.6273, "step": 15440 }, { "epoch": 0.2971425274424106, "grad_norm": 475.7983625499094, "learning_rate": 1.612798543228398e-05, "loss": 284.6235, "step": 15450 }, { "epoch": 0.2973348527028911, "grad_norm": 533.1886356499948, "learning_rate": 1.612316134895521e-05, "loss": 289.368, "step": 15460 }, { "epoch": 0.29752717796337164, "grad_norm": 493.71259909282486, "learning_rate": 1.611833498484997e-05, "loss": 288.4728, "step": 15470 }, { "epoch": 0.29771950322385216, "grad_norm": 483.3422549506859, "learning_rate": 1.611350634176599e-05, "loss": 289.1593, "step": 15480 }, { "epoch": 0.29791182848433273, "grad_norm": 550.8641844276592, "learning_rate": 1.6108675421501865e-05, "loss": 291.3389, "step": 15490 }, { "epoch": 0.29810415374481325, "grad_norm": 486.47445034849903, "learning_rate": 1.610384222585704e-05, "loss": 296.147, "step": 15500 }, { "epoch": 0.29829647900529377, "grad_norm": 471.2088660255904, "learning_rate": 1.609900675663179e-05, "loss": 281.4358, "step": 15510 }, { "epoch": 0.2984888042657743, "grad_norm": 518.0627511329293, "learning_rate": 1.609416901562725e-05, "loss": 277.0075, "step": 15520 }, { "epoch": 0.2986811295262548, "grad_norm": 487.7412202062457, "learning_rate": 1.60893290046454e-05, "loss": 285.6411, "step": 15530 }, { "epoch": 0.2988734547867353, "grad_norm": 548.9052584462366, "learning_rate": 1.608448672548907e-05, "loss": 289.8053, "step": 15540 }, { "epoch": 0.29906578004721585, "grad_norm": 498.9607684600205, "learning_rate": 1.6079642179961917e-05, "loss": 276.2588, "step": 15550 }, { "epoch": 0.29925810530769636, "grad_norm": 501.02411342709263, "learning_rate": 1.6074795369868463e-05, "loss": 290.0694, "step": 15560 }, { "epoch": 0.2994504305681769, "grad_norm": 485.3502661698066, "learning_rate": 1.6069946297014064e-05, "loss": 284.116, "step": 15570 }, { "epoch": 0.2996427558286574, "grad_norm": 523.5369260488685, "learning_rate": 1.6065094963204915e-05, "loss": 282.9058, "step": 15580 }, { "epoch": 0.2998350810891379, "grad_norm": 489.7669709899146, "learning_rate": 1.6060241370248064e-05, "loss": 287.4737, "step": 15590 }, { "epoch": 0.3000274063496185, "grad_norm": 521.4870696588582, "learning_rate": 1.605538551995139e-05, "loss": 290.3221, "step": 15600 }, { "epoch": 0.300219731610099, "grad_norm": 491.1485641743671, "learning_rate": 1.6050527414123616e-05, "loss": 284.1289, "step": 15610 }, { "epoch": 0.30041205687057954, "grad_norm": 529.0215173613176, "learning_rate": 1.604566705457431e-05, "loss": 275.0153, "step": 15620 }, { "epoch": 0.30060438213106006, "grad_norm": 521.1648195009329, "learning_rate": 1.604080444311387e-05, "loss": 280.3334, "step": 15630 }, { "epoch": 0.3007967073915406, "grad_norm": 625.2767107928526, "learning_rate": 1.6035939581553543e-05, "loss": 289.4082, "step": 15640 }, { "epoch": 0.3009890326520211, "grad_norm": 632.0418719421094, "learning_rate": 1.603107247170541e-05, "loss": 293.2012, "step": 15650 }, { "epoch": 0.3011813579125016, "grad_norm": 587.1309367442407, "learning_rate": 1.6026203115382392e-05, "loss": 294.6388, "step": 15660 }, { "epoch": 0.30137368317298213, "grad_norm": 541.275040449254, "learning_rate": 1.6021331514398233e-05, "loss": 278.5768, "step": 15670 }, { "epoch": 0.30156600843346265, "grad_norm": 504.9462380983586, "learning_rate": 1.6016457670567535e-05, "loss": 294.8726, "step": 15680 }, { "epoch": 0.30175833369394317, "grad_norm": 502.5374819203824, "learning_rate": 1.6011581585705715e-05, "loss": 286.1138, "step": 15690 }, { "epoch": 0.30195065895442375, "grad_norm": 534.9752537584386, "learning_rate": 1.600670326162904e-05, "loss": 291.6314, "step": 15700 }, { "epoch": 0.30214298421490426, "grad_norm": 474.3314493571712, "learning_rate": 1.600182270015461e-05, "loss": 280.5248, "step": 15710 }, { "epoch": 0.3023353094753848, "grad_norm": 503.11554185218836, "learning_rate": 1.5996939903100338e-05, "loss": 278.0978, "step": 15720 }, { "epoch": 0.3025276347358653, "grad_norm": 504.01519958601074, "learning_rate": 1.5992054872285005e-05, "loss": 281.9744, "step": 15730 }, { "epoch": 0.3027199599963458, "grad_norm": 604.2841576339626, "learning_rate": 1.5987167609528187e-05, "loss": 288.0925, "step": 15740 }, { "epoch": 0.30291228525682634, "grad_norm": 588.0113227879084, "learning_rate": 1.598227811665032e-05, "loss": 294.2167, "step": 15750 }, { "epoch": 0.30310461051730686, "grad_norm": 579.8760044105293, "learning_rate": 1.597738639547265e-05, "loss": 277.3983, "step": 15760 }, { "epoch": 0.3032969357777874, "grad_norm": 514.2280040722753, "learning_rate": 1.597249244781727e-05, "loss": 287.1545, "step": 15770 }, { "epoch": 0.3034892610382679, "grad_norm": 488.35231534242575, "learning_rate": 1.5967596275507094e-05, "loss": 289.3087, "step": 15780 }, { "epoch": 0.3036815862987484, "grad_norm": 491.15281409562715, "learning_rate": 1.5962697880365863e-05, "loss": 278.6546, "step": 15790 }, { "epoch": 0.303873911559229, "grad_norm": 500.22375470777, "learning_rate": 1.5957797264218145e-05, "loss": 276.2161, "step": 15800 }, { "epoch": 0.3040662368197095, "grad_norm": 549.1234763745196, "learning_rate": 1.5952894428889347e-05, "loss": 300.0928, "step": 15810 }, { "epoch": 0.30425856208019003, "grad_norm": 473.8553015419329, "learning_rate": 1.594798937620569e-05, "loss": 278.4122, "step": 15820 }, { "epoch": 0.30445088734067055, "grad_norm": 512.2970050190106, "learning_rate": 1.594308210799422e-05, "loss": 284.3674, "step": 15830 }, { "epoch": 0.30464321260115107, "grad_norm": 618.2516934853405, "learning_rate": 1.5938172626082823e-05, "loss": 289.8339, "step": 15840 }, { "epoch": 0.3048355378616316, "grad_norm": 552.9841071734476, "learning_rate": 1.5933260932300192e-05, "loss": 284.5295, "step": 15850 }, { "epoch": 0.3050278631221121, "grad_norm": 575.6943627252812, "learning_rate": 1.5928347028475855e-05, "loss": 309.3565, "step": 15860 }, { "epoch": 0.3052201883825926, "grad_norm": 540.8615864018034, "learning_rate": 1.592343091644016e-05, "loss": 282.5193, "step": 15870 }, { "epoch": 0.30541251364307315, "grad_norm": 521.4431990604672, "learning_rate": 1.5918512598024275e-05, "loss": 272.7361, "step": 15880 }, { "epoch": 0.30560483890355367, "grad_norm": 560.764237488359, "learning_rate": 1.59135920750602e-05, "loss": 294.7301, "step": 15890 }, { "epoch": 0.30579716416403424, "grad_norm": 510.4617815619798, "learning_rate": 1.590866934938074e-05, "loss": 287.0498, "step": 15900 }, { "epoch": 0.30598948942451476, "grad_norm": 564.1033224230508, "learning_rate": 1.590374442281953e-05, "loss": 278.6576, "step": 15910 }, { "epoch": 0.3061818146849953, "grad_norm": 550.1546968774793, "learning_rate": 1.5898817297211028e-05, "loss": 280.9709, "step": 15920 }, { "epoch": 0.3063741399454758, "grad_norm": 521.1080363962003, "learning_rate": 1.589388797439051e-05, "loss": 289.1891, "step": 15930 }, { "epoch": 0.3065664652059563, "grad_norm": 504.7766153289126, "learning_rate": 1.5888956456194056e-05, "loss": 284.62, "step": 15940 }, { "epoch": 0.30675879046643684, "grad_norm": 494.8050240522159, "learning_rate": 1.588402274445858e-05, "loss": 295.6602, "step": 15950 }, { "epoch": 0.30695111572691736, "grad_norm": 496.2823060475834, "learning_rate": 1.5879086841021815e-05, "loss": 288.8745, "step": 15960 }, { "epoch": 0.3071434409873979, "grad_norm": 519.9110449577126, "learning_rate": 1.5874148747722294e-05, "loss": 287.3097, "step": 15970 }, { "epoch": 0.3073357662478784, "grad_norm": 487.69124540647744, "learning_rate": 1.5869208466399382e-05, "loss": 284.0637, "step": 15980 }, { "epoch": 0.3075280915083589, "grad_norm": 539.631381040546, "learning_rate": 1.586426599889325e-05, "loss": 288.6262, "step": 15990 }, { "epoch": 0.3077204167688395, "grad_norm": 594.2390774409421, "learning_rate": 1.5859321347044882e-05, "loss": 293.5237, "step": 16000 }, { "epoch": 0.30791274202932, "grad_norm": 477.3472878789492, "learning_rate": 1.5854374512696084e-05, "loss": 289.5295, "step": 16010 }, { "epoch": 0.3081050672898005, "grad_norm": 582.688043220394, "learning_rate": 1.584942549768947e-05, "loss": 284.3594, "step": 16020 }, { "epoch": 0.30829739255028105, "grad_norm": 526.1582192938724, "learning_rate": 1.584447430386846e-05, "loss": 295.4727, "step": 16030 }, { "epoch": 0.30848971781076157, "grad_norm": 480.97555346584437, "learning_rate": 1.58395209330773e-05, "loss": 273.5935, "step": 16040 }, { "epoch": 0.3086820430712421, "grad_norm": 580.7129005239941, "learning_rate": 1.5834565387161034e-05, "loss": 290.0589, "step": 16050 }, { "epoch": 0.3088743683317226, "grad_norm": 595.7538220710202, "learning_rate": 1.5829607667965524e-05, "loss": 282.5735, "step": 16060 }, { "epoch": 0.3090666935922031, "grad_norm": 741.90393978779, "learning_rate": 1.5824647777337433e-05, "loss": 304.68, "step": 16070 }, { "epoch": 0.30925901885268364, "grad_norm": 503.28682760979336, "learning_rate": 1.5819685717124245e-05, "loss": 278.2125, "step": 16080 }, { "epoch": 0.30945134411316416, "grad_norm": 529.0526285631472, "learning_rate": 1.5814721489174246e-05, "loss": 281.5895, "step": 16090 }, { "epoch": 0.3096436693736447, "grad_norm": 523.754863430061, "learning_rate": 1.580975509533652e-05, "loss": 287.9395, "step": 16100 }, { "epoch": 0.30983599463412526, "grad_norm": 513.340387068993, "learning_rate": 1.5804786537460972e-05, "loss": 284.7428, "step": 16110 }, { "epoch": 0.3100283198946058, "grad_norm": 498.4498258034029, "learning_rate": 1.5799815817398312e-05, "loss": 282.2421, "step": 16120 }, { "epoch": 0.3102206451550863, "grad_norm": 644.6228277837, "learning_rate": 1.579484293700004e-05, "loss": 287.2761, "step": 16130 }, { "epoch": 0.3104129704155668, "grad_norm": 553.0587483533304, "learning_rate": 1.578986789811849e-05, "loss": 293.0727, "step": 16140 }, { "epoch": 0.31060529567604733, "grad_norm": 467.04001081599125, "learning_rate": 1.5784890702606763e-05, "loss": 279.3575, "step": 16150 }, { "epoch": 0.31079762093652785, "grad_norm": 561.0758916704282, "learning_rate": 1.5779911352318792e-05, "loss": 283.7236, "step": 16160 }, { "epoch": 0.31098994619700837, "grad_norm": 489.45034803369253, "learning_rate": 1.5774929849109303e-05, "loss": 282.6329, "step": 16170 }, { "epoch": 0.3111822714574889, "grad_norm": 491.5180048936539, "learning_rate": 1.5769946194833816e-05, "loss": 296.9812, "step": 16180 }, { "epoch": 0.3113745967179694, "grad_norm": 705.4764466767829, "learning_rate": 1.5764960391348666e-05, "loss": 292.4866, "step": 16190 }, { "epoch": 0.31156692197844993, "grad_norm": 454.96297015142267, "learning_rate": 1.5759972440510985e-05, "loss": 272.6013, "step": 16200 }, { "epoch": 0.3117592472389305, "grad_norm": 476.0777823907391, "learning_rate": 1.5754982344178697e-05, "loss": 275.2302, "step": 16210 }, { "epoch": 0.311951572499411, "grad_norm": 460.34322903100036, "learning_rate": 1.5749990104210534e-05, "loss": 286.4499, "step": 16220 }, { "epoch": 0.31214389775989154, "grad_norm": 482.8467760461587, "learning_rate": 1.574499572246602e-05, "loss": 280.7481, "step": 16230 }, { "epoch": 0.31233622302037206, "grad_norm": 564.6705875897441, "learning_rate": 1.5739999200805483e-05, "loss": 297.4406, "step": 16240 }, { "epoch": 0.3125285482808526, "grad_norm": 479.0560359918922, "learning_rate": 1.573500054109004e-05, "loss": 285.5159, "step": 16250 }, { "epoch": 0.3127208735413331, "grad_norm": 505.97033396095134, "learning_rate": 1.5729999745181617e-05, "loss": 282.9737, "step": 16260 }, { "epoch": 0.3129131988018136, "grad_norm": 504.80656370046, "learning_rate": 1.572499681494292e-05, "loss": 281.4261, "step": 16270 }, { "epoch": 0.31310552406229414, "grad_norm": 517.9894277752834, "learning_rate": 1.571999175223746e-05, "loss": 278.8282, "step": 16280 }, { "epoch": 0.31329784932277466, "grad_norm": 521.6867817039416, "learning_rate": 1.571498455892954e-05, "loss": 289.0921, "step": 16290 }, { "epoch": 0.3134901745832552, "grad_norm": 536.1863005509615, "learning_rate": 1.570997523688426e-05, "loss": 289.1859, "step": 16300 }, { "epoch": 0.31368249984373575, "grad_norm": 547.7252933988581, "learning_rate": 1.570496378796751e-05, "loss": 280.1739, "step": 16310 }, { "epoch": 0.31387482510421627, "grad_norm": 520.4258705378184, "learning_rate": 1.5699950214045966e-05, "loss": 290.1589, "step": 16320 }, { "epoch": 0.3140671503646968, "grad_norm": 510.2997733235102, "learning_rate": 1.5694934516987102e-05, "loss": 288.2046, "step": 16330 }, { "epoch": 0.3142594756251773, "grad_norm": 522.3530989708389, "learning_rate": 1.5689916698659193e-05, "loss": 290.9881, "step": 16340 }, { "epoch": 0.31445180088565783, "grad_norm": 540.4821080464747, "learning_rate": 1.568489676093128e-05, "loss": 273.7589, "step": 16350 }, { "epoch": 0.31464412614613835, "grad_norm": 549.9771541360382, "learning_rate": 1.5679874705673215e-05, "loss": 288.1429, "step": 16360 }, { "epoch": 0.31483645140661887, "grad_norm": 504.49624922154203, "learning_rate": 1.5674850534755628e-05, "loss": 287.5587, "step": 16370 }, { "epoch": 0.3150287766670994, "grad_norm": 540.5603433636334, "learning_rate": 1.566982425004994e-05, "loss": 283.5713, "step": 16380 }, { "epoch": 0.3152211019275799, "grad_norm": 564.9688463492913, "learning_rate": 1.5664795853428357e-05, "loss": 283.333, "step": 16390 }, { "epoch": 0.3154134271880604, "grad_norm": 487.59097667101787, "learning_rate": 1.565976534676388e-05, "loss": 279.123, "step": 16400 }, { "epoch": 0.315605752448541, "grad_norm": 514.6270182263046, "learning_rate": 1.5654732731930286e-05, "loss": 284.5096, "step": 16410 }, { "epoch": 0.3157980777090215, "grad_norm": 538.9822198776212, "learning_rate": 1.5649698010802138e-05, "loss": 300.1915, "step": 16420 }, { "epoch": 0.31599040296950204, "grad_norm": 481.9075128550809, "learning_rate": 1.564466118525479e-05, "loss": 279.3204, "step": 16430 }, { "epoch": 0.31618272822998256, "grad_norm": 484.67382443706265, "learning_rate": 1.5639622257164372e-05, "loss": 283.7897, "step": 16440 }, { "epoch": 0.3163750534904631, "grad_norm": 493.3385433360078, "learning_rate": 1.5634581228407807e-05, "loss": 274.0433, "step": 16450 }, { "epoch": 0.3165673787509436, "grad_norm": 598.5308599205382, "learning_rate": 1.562953810086279e-05, "loss": 283.401, "step": 16460 }, { "epoch": 0.3167597040114241, "grad_norm": 511.7552982060595, "learning_rate": 1.562449287640781e-05, "loss": 278.011, "step": 16470 }, { "epoch": 0.31695202927190463, "grad_norm": 512.0514524825103, "learning_rate": 1.5619445556922118e-05, "loss": 289.432, "step": 16480 }, { "epoch": 0.31714435453238515, "grad_norm": 518.3319904929779, "learning_rate": 1.561439614428577e-05, "loss": 287.8469, "step": 16490 }, { "epoch": 0.3173366797928657, "grad_norm": 543.2992007596245, "learning_rate": 1.5609344640379585e-05, "loss": 284.8363, "step": 16500 }, { "epoch": 0.3175290050533462, "grad_norm": 538.4865382684914, "learning_rate": 1.560429104708516e-05, "loss": 277.2691, "step": 16510 }, { "epoch": 0.31772133031382677, "grad_norm": 483.3174815768532, "learning_rate": 1.5599235366284874e-05, "loss": 278.3753, "step": 16520 }, { "epoch": 0.3179136555743073, "grad_norm": 493.07700587670973, "learning_rate": 1.5594177599861894e-05, "loss": 286.4425, "step": 16530 }, { "epoch": 0.3181059808347878, "grad_norm": 545.0671539989104, "learning_rate": 1.5589117749700147e-05, "loss": 283.1981, "step": 16540 }, { "epoch": 0.3182983060952683, "grad_norm": 485.1514583115408, "learning_rate": 1.5584055817684346e-05, "loss": 275.4777, "step": 16550 }, { "epoch": 0.31849063135574884, "grad_norm": 585.9405464455358, "learning_rate": 1.5578991805699975e-05, "loss": 284.9682, "step": 16560 }, { "epoch": 0.31868295661622936, "grad_norm": 510.4638462378715, "learning_rate": 1.5573925715633297e-05, "loss": 281.915, "step": 16570 }, { "epoch": 0.3188752818767099, "grad_norm": 521.0007405153933, "learning_rate": 1.5568857549371348e-05, "loss": 286.4713, "step": 16580 }, { "epoch": 0.3190676071371904, "grad_norm": 599.2231679338023, "learning_rate": 1.5563787308801934e-05, "loss": 283.5192, "step": 16590 }, { "epoch": 0.3192599323976709, "grad_norm": 528.6500518662007, "learning_rate": 1.5558714995813636e-05, "loss": 279.1317, "step": 16600 }, { "epoch": 0.31945225765815144, "grad_norm": 490.38248331872586, "learning_rate": 1.5553640612295807e-05, "loss": 279.3707, "step": 16610 }, { "epoch": 0.319644582918632, "grad_norm": 481.72437032816214, "learning_rate": 1.5548564160138572e-05, "loss": 286.9246, "step": 16620 }, { "epoch": 0.31983690817911253, "grad_norm": 499.9533911609183, "learning_rate": 1.5543485641232825e-05, "loss": 276.9663, "step": 16630 }, { "epoch": 0.32002923343959305, "grad_norm": 499.9996559319061, "learning_rate": 1.553840505747023e-05, "loss": 275.5364, "step": 16640 }, { "epoch": 0.32022155870007357, "grad_norm": 457.3820162231778, "learning_rate": 1.5533322410743223e-05, "loss": 281.6447, "step": 16650 }, { "epoch": 0.3204138839605541, "grad_norm": 506.4994518370763, "learning_rate": 1.5528237702945e-05, "loss": 279.5296, "step": 16660 }, { "epoch": 0.3206062092210346, "grad_norm": 471.5715076032157, "learning_rate": 1.5523150935969534e-05, "loss": 276.193, "step": 16670 }, { "epoch": 0.32079853448151513, "grad_norm": 549.1394218718974, "learning_rate": 1.5518062111711566e-05, "loss": 277.7524, "step": 16680 }, { "epoch": 0.32099085974199565, "grad_norm": 626.3914644799332, "learning_rate": 1.5512971232066593e-05, "loss": 277.2895, "step": 16690 }, { "epoch": 0.32118318500247617, "grad_norm": 535.4837034208131, "learning_rate": 1.5507878298930888e-05, "loss": 284.2514, "step": 16700 }, { "epoch": 0.3213755102629567, "grad_norm": 484.6434168970839, "learning_rate": 1.5502783314201478e-05, "loss": 282.1756, "step": 16710 }, { "epoch": 0.32156783552343726, "grad_norm": 494.733905875667, "learning_rate": 1.549768627977617e-05, "loss": 282.7522, "step": 16720 }, { "epoch": 0.3217601607839178, "grad_norm": 546.9176674970975, "learning_rate": 1.5492587197553517e-05, "loss": 288.5908, "step": 16730 }, { "epoch": 0.3219524860443983, "grad_norm": 567.2076576521231, "learning_rate": 1.5487486069432848e-05, "loss": 279.9842, "step": 16740 }, { "epoch": 0.3221448113048788, "grad_norm": 486.6796310621719, "learning_rate": 1.5482382897314243e-05, "loss": 277.7258, "step": 16750 }, { "epoch": 0.32233713656535934, "grad_norm": 561.8742935723349, "learning_rate": 1.5477277683098555e-05, "loss": 290.0802, "step": 16760 }, { "epoch": 0.32252946182583986, "grad_norm": 572.7351707165404, "learning_rate": 1.547217042868739e-05, "loss": 275.8038, "step": 16770 }, { "epoch": 0.3227217870863204, "grad_norm": 488.6701126797395, "learning_rate": 1.546706113598312e-05, "loss": 274.9992, "step": 16780 }, { "epoch": 0.3229141123468009, "grad_norm": 503.4435960152653, "learning_rate": 1.5461949806888867e-05, "loss": 274.8207, "step": 16790 }, { "epoch": 0.3231064376072814, "grad_norm": 478.409455223133, "learning_rate": 1.5456836443308512e-05, "loss": 280.9919, "step": 16800 }, { "epoch": 0.32329876286776194, "grad_norm": 610.258382612928, "learning_rate": 1.545172104714671e-05, "loss": 281.0779, "step": 16810 }, { "epoch": 0.3234910881282425, "grad_norm": 483.17836284432815, "learning_rate": 1.544660362030886e-05, "loss": 279.3705, "step": 16820 }, { "epoch": 0.32368341338872303, "grad_norm": 536.135670339746, "learning_rate": 1.544148416470111e-05, "loss": 284.1549, "step": 16830 }, { "epoch": 0.32387573864920355, "grad_norm": 499.98491209411674, "learning_rate": 1.5436362682230378e-05, "loss": 287.784, "step": 16840 }, { "epoch": 0.32406806390968407, "grad_norm": 532.9883687853627, "learning_rate": 1.543123917480433e-05, "loss": 282.5017, "step": 16850 }, { "epoch": 0.3242603891701646, "grad_norm": 523.7473473726294, "learning_rate": 1.542611364433139e-05, "loss": 286.9162, "step": 16860 }, { "epoch": 0.3244527144306451, "grad_norm": 505.14528914900814, "learning_rate": 1.5420986092720735e-05, "loss": 274.7977, "step": 16870 }, { "epoch": 0.3246450396911256, "grad_norm": 539.9288293906241, "learning_rate": 1.541585652188229e-05, "loss": 276.965, "step": 16880 }, { "epoch": 0.32483736495160614, "grad_norm": 493.824063240275, "learning_rate": 1.5410724933726732e-05, "loss": 285.4889, "step": 16890 }, { "epoch": 0.32502969021208666, "grad_norm": 484.25876968391384, "learning_rate": 1.5405591330165503e-05, "loss": 271.0561, "step": 16900 }, { "epoch": 0.3252220154725672, "grad_norm": 519.8669603964919, "learning_rate": 1.5400455713110777e-05, "loss": 283.1997, "step": 16910 }, { "epoch": 0.32541434073304776, "grad_norm": 542.3394815067633, "learning_rate": 1.539531808447549e-05, "loss": 287.2446, "step": 16920 }, { "epoch": 0.3256066659935283, "grad_norm": 508.3324806387887, "learning_rate": 1.5390178446173325e-05, "loss": 283.9394, "step": 16930 }, { "epoch": 0.3257989912540088, "grad_norm": 482.09653086508064, "learning_rate": 1.538503680011871e-05, "loss": 279.455, "step": 16940 }, { "epoch": 0.3259913165144893, "grad_norm": 503.93733588294157, "learning_rate": 1.537989314822682e-05, "loss": 278.5873, "step": 16950 }, { "epoch": 0.32618364177496983, "grad_norm": 474.553669869395, "learning_rate": 1.5374747492413587e-05, "loss": 283.2114, "step": 16960 }, { "epoch": 0.32637596703545035, "grad_norm": 552.9804402398042, "learning_rate": 1.536959983459568e-05, "loss": 283.8131, "step": 16970 }, { "epoch": 0.3265682922959309, "grad_norm": 523.9944541708164, "learning_rate": 1.536445017669052e-05, "loss": 277.2351, "step": 16980 }, { "epoch": 0.3267606175564114, "grad_norm": 535.0358808170469, "learning_rate": 1.535929852061626e-05, "loss": 273.6, "step": 16990 }, { "epoch": 0.3269529428168919, "grad_norm": 492.0514777933171, "learning_rate": 1.5354144868291817e-05, "loss": 289.0141, "step": 17000 }, { "epoch": 0.32714526807737243, "grad_norm": 525.554946727861, "learning_rate": 1.5348989221636835e-05, "loss": 276.8913, "step": 17010 }, { "epoch": 0.32733759333785295, "grad_norm": 561.7663668886851, "learning_rate": 1.5343831582571706e-05, "loss": 295.5605, "step": 17020 }, { "epoch": 0.3275299185983335, "grad_norm": 517.5711871233265, "learning_rate": 1.5338671953017576e-05, "loss": 275.8393, "step": 17030 }, { "epoch": 0.32772224385881404, "grad_norm": 498.35236385335435, "learning_rate": 1.5333510334896308e-05, "loss": 277.6092, "step": 17040 }, { "epoch": 0.32791456911929456, "grad_norm": 562.1171764611048, "learning_rate": 1.532834673013053e-05, "loss": 285.218, "step": 17050 }, { "epoch": 0.3281068943797751, "grad_norm": 492.69519703076077, "learning_rate": 1.5323181140643598e-05, "loss": 276.2242, "step": 17060 }, { "epoch": 0.3282992196402556, "grad_norm": 493.8678578390804, "learning_rate": 1.5318013568359603e-05, "loss": 279.1683, "step": 17070 }, { "epoch": 0.3284915449007361, "grad_norm": 482.7530348221218, "learning_rate": 1.531284401520338e-05, "loss": 274.2736, "step": 17080 }, { "epoch": 0.32868387016121664, "grad_norm": 528.8986365666045, "learning_rate": 1.530767248310051e-05, "loss": 278.8286, "step": 17090 }, { "epoch": 0.32887619542169716, "grad_norm": 479.2144729451598, "learning_rate": 1.53024989739773e-05, "loss": 278.8063, "step": 17100 }, { "epoch": 0.3290685206821777, "grad_norm": 481.86655683857055, "learning_rate": 1.5297323489760792e-05, "loss": 277.2227, "step": 17110 }, { "epoch": 0.3292608459426582, "grad_norm": 505.88855151867824, "learning_rate": 1.5292146032378778e-05, "loss": 282.797, "step": 17120 }, { "epoch": 0.3294531712031388, "grad_norm": 560.1317997466638, "learning_rate": 1.5286966603759767e-05, "loss": 279.9418, "step": 17130 }, { "epoch": 0.3296454964636193, "grad_norm": 623.2484739964339, "learning_rate": 1.5281785205833013e-05, "loss": 276.0334, "step": 17140 }, { "epoch": 0.3298378217240998, "grad_norm": 518.7324905589361, "learning_rate": 1.52766018405285e-05, "loss": 275.4337, "step": 17150 }, { "epoch": 0.33003014698458033, "grad_norm": 499.7788592259437, "learning_rate": 1.5271416509776948e-05, "loss": 274.5938, "step": 17160 }, { "epoch": 0.33022247224506085, "grad_norm": 506.7003651963242, "learning_rate": 1.5266229215509806e-05, "loss": 276.1904, "step": 17170 }, { "epoch": 0.33041479750554137, "grad_norm": 591.2069086162949, "learning_rate": 1.5261039959659257e-05, "loss": 282.4061, "step": 17180 }, { "epoch": 0.3306071227660219, "grad_norm": 530.2401027869066, "learning_rate": 1.5255848744158214e-05, "loss": 268.4294, "step": 17190 }, { "epoch": 0.3307994480265024, "grad_norm": 503.438411688062, "learning_rate": 1.5250655570940317e-05, "loss": 290.5155, "step": 17200 }, { "epoch": 0.3309917732869829, "grad_norm": 507.0000846841116, "learning_rate": 1.5245460441939934e-05, "loss": 282.9155, "step": 17210 }, { "epoch": 0.33118409854746345, "grad_norm": 510.5138871865681, "learning_rate": 1.5240263359092167e-05, "loss": 280.4435, "step": 17220 }, { "epoch": 0.331376423807944, "grad_norm": 467.48821668726924, "learning_rate": 1.5235064324332846e-05, "loss": 286.6052, "step": 17230 }, { "epoch": 0.33156874906842454, "grad_norm": 522.1372416385899, "learning_rate": 1.5229863339598528e-05, "loss": 274.6612, "step": 17240 }, { "epoch": 0.33176107432890506, "grad_norm": 501.7652116015993, "learning_rate": 1.5224660406826486e-05, "loss": 272.3773, "step": 17250 }, { "epoch": 0.3319533995893856, "grad_norm": 533.9846585342436, "learning_rate": 1.5219455527954732e-05, "loss": 287.6437, "step": 17260 }, { "epoch": 0.3321457248498661, "grad_norm": 468.84762379401434, "learning_rate": 1.5214248704921995e-05, "loss": 277.3463, "step": 17270 }, { "epoch": 0.3323380501103466, "grad_norm": 532.0771512669993, "learning_rate": 1.5209039939667731e-05, "loss": 278.6008, "step": 17280 }, { "epoch": 0.33253037537082714, "grad_norm": 521.8244517674843, "learning_rate": 1.5203829234132118e-05, "loss": 277.7722, "step": 17290 }, { "epoch": 0.33272270063130766, "grad_norm": 463.5135411104897, "learning_rate": 1.5198616590256064e-05, "loss": 281.7509, "step": 17300 }, { "epoch": 0.3329150258917882, "grad_norm": 500.148078934557, "learning_rate": 1.5193402009981187e-05, "loss": 267.7973, "step": 17310 }, { "epoch": 0.3331073511522687, "grad_norm": 507.9846930240541, "learning_rate": 1.5188185495249832e-05, "loss": 283.1407, "step": 17320 }, { "epoch": 0.33329967641274927, "grad_norm": 483.99928509173293, "learning_rate": 1.5182967048005069e-05, "loss": 279.5925, "step": 17330 }, { "epoch": 0.3334920016732298, "grad_norm": 491.625892638982, "learning_rate": 1.5177746670190674e-05, "loss": 274.332, "step": 17340 }, { "epoch": 0.3336843269337103, "grad_norm": 465.77803815953786, "learning_rate": 1.5172524363751162e-05, "loss": 273.0145, "step": 17350 }, { "epoch": 0.3338766521941908, "grad_norm": 543.3911445883701, "learning_rate": 1.5167300130631748e-05, "loss": 278.0825, "step": 17360 }, { "epoch": 0.33406897745467135, "grad_norm": 523.9792196412394, "learning_rate": 1.516207397277838e-05, "loss": 277.4151, "step": 17370 }, { "epoch": 0.33426130271515186, "grad_norm": 499.3017131309414, "learning_rate": 1.5156845892137711e-05, "loss": 267.7405, "step": 17380 }, { "epoch": 0.3344536279756324, "grad_norm": 518.06996096728, "learning_rate": 1.5151615890657113e-05, "loss": 281.213, "step": 17390 }, { "epoch": 0.3346459532361129, "grad_norm": 491.79445451709057, "learning_rate": 1.5146383970284679e-05, "loss": 278.501, "step": 17400 }, { "epoch": 0.3348382784965934, "grad_norm": 464.2798517580624, "learning_rate": 1.514115013296921e-05, "loss": 277.1603, "step": 17410 }, { "epoch": 0.33503060375707394, "grad_norm": 504.1890016819973, "learning_rate": 1.513591438066023e-05, "loss": 279.4511, "step": 17420 }, { "epoch": 0.33522292901755446, "grad_norm": 513.4741153847451, "learning_rate": 1.5130676715307962e-05, "loss": 289.9366, "step": 17430 }, { "epoch": 0.33541525427803504, "grad_norm": 487.91926779546486, "learning_rate": 1.5125437138863353e-05, "loss": 288.2803, "step": 17440 }, { "epoch": 0.33560757953851555, "grad_norm": 504.73540403510736, "learning_rate": 1.512019565327806e-05, "loss": 281.8395, "step": 17450 }, { "epoch": 0.3357999047989961, "grad_norm": 528.102721630349, "learning_rate": 1.5114952260504448e-05, "loss": 288.5389, "step": 17460 }, { "epoch": 0.3359922300594766, "grad_norm": 505.9273984774669, "learning_rate": 1.5109706962495596e-05, "loss": 280.3888, "step": 17470 }, { "epoch": 0.3361845553199571, "grad_norm": 549.5731745610719, "learning_rate": 1.510445976120529e-05, "loss": 275.3772, "step": 17480 }, { "epoch": 0.33637688058043763, "grad_norm": 513.3616780422686, "learning_rate": 1.5099210658588029e-05, "loss": 279.3206, "step": 17490 }, { "epoch": 0.33656920584091815, "grad_norm": 505.5728776805078, "learning_rate": 1.5093959656599008e-05, "loss": 280.9716, "step": 17500 }, { "epoch": 0.33676153110139867, "grad_norm": 511.8719650519859, "learning_rate": 1.5088706757194147e-05, "loss": 272.791, "step": 17510 }, { "epoch": 0.3369538563618792, "grad_norm": 479.78900731898904, "learning_rate": 1.5083451962330062e-05, "loss": 275.2819, "step": 17520 }, { "epoch": 0.3371461816223597, "grad_norm": 523.6620599199387, "learning_rate": 1.5078195273964081e-05, "loss": 273.6671, "step": 17530 }, { "epoch": 0.3373385068828403, "grad_norm": 537.1971920513456, "learning_rate": 1.5072936694054222e-05, "loss": 274.4501, "step": 17540 }, { "epoch": 0.3375308321433208, "grad_norm": 529.84177522821, "learning_rate": 1.5067676224559231e-05, "loss": 279.5901, "step": 17550 }, { "epoch": 0.3377231574038013, "grad_norm": 508.1453158554725, "learning_rate": 1.506241386743854e-05, "loss": 278.676, "step": 17560 }, { "epoch": 0.33791548266428184, "grad_norm": 523.0601712437133, "learning_rate": 1.5057149624652297e-05, "loss": 268.4308, "step": 17570 }, { "epoch": 0.33810780792476236, "grad_norm": 491.62081580979446, "learning_rate": 1.5051883498161334e-05, "loss": 273.8124, "step": 17580 }, { "epoch": 0.3383001331852429, "grad_norm": 501.30099692217374, "learning_rate": 1.5046615489927206e-05, "loss": 273.8414, "step": 17590 }, { "epoch": 0.3384924584457234, "grad_norm": 489.6074219863892, "learning_rate": 1.5041345601912154e-05, "loss": 275.304, "step": 17600 }, { "epoch": 0.3386847837062039, "grad_norm": 491.03167632258294, "learning_rate": 1.5036073836079128e-05, "loss": 282.0901, "step": 17610 }, { "epoch": 0.33887710896668444, "grad_norm": 506.6295762251429, "learning_rate": 1.5030800194391773e-05, "loss": 269.5102, "step": 17620 }, { "epoch": 0.33906943422716496, "grad_norm": 519.1804558435261, "learning_rate": 1.5025524678814428e-05, "loss": 279.2036, "step": 17630 }, { "epoch": 0.33926175948764553, "grad_norm": 479.64658892197633, "learning_rate": 1.5020247291312138e-05, "loss": 280.2888, "step": 17640 }, { "epoch": 0.33945408474812605, "grad_norm": 497.4550420103091, "learning_rate": 1.5014968033850647e-05, "loss": 291.1086, "step": 17650 }, { "epoch": 0.33964641000860657, "grad_norm": 549.9160651598164, "learning_rate": 1.5009686908396388e-05, "loss": 279.9692, "step": 17660 }, { "epoch": 0.3398387352690871, "grad_norm": 484.1959080508733, "learning_rate": 1.5004403916916494e-05, "loss": 278.8785, "step": 17670 }, { "epoch": 0.3400310605295676, "grad_norm": 518.9782936604537, "learning_rate": 1.4999119061378791e-05, "loss": 279.7993, "step": 17680 }, { "epoch": 0.3402233857900481, "grad_norm": 500.82782772726847, "learning_rate": 1.4993832343751802e-05, "loss": 290.7989, "step": 17690 }, { "epoch": 0.34041571105052865, "grad_norm": 497.78970508729935, "learning_rate": 1.4988543766004737e-05, "loss": 272.7477, "step": 17700 }, { "epoch": 0.34060803631100917, "grad_norm": 513.4342711301089, "learning_rate": 1.498325333010751e-05, "loss": 267.0763, "step": 17710 }, { "epoch": 0.3408003615714897, "grad_norm": 499.06077693041084, "learning_rate": 1.497796103803072e-05, "loss": 280.9972, "step": 17720 }, { "epoch": 0.3409926868319702, "grad_norm": 550.4500521767796, "learning_rate": 1.4972666891745655e-05, "loss": 279.5483, "step": 17730 }, { "epoch": 0.3411850120924508, "grad_norm": 474.5120322786387, "learning_rate": 1.49673708932243e-05, "loss": 267.1358, "step": 17740 }, { "epoch": 0.3413773373529313, "grad_norm": 511.2861397904143, "learning_rate": 1.4962073044439328e-05, "loss": 281.4607, "step": 17750 }, { "epoch": 0.3415696626134118, "grad_norm": 485.9737847449894, "learning_rate": 1.4956773347364095e-05, "loss": 281.6694, "step": 17760 }, { "epoch": 0.34176198787389234, "grad_norm": 543.3463788744507, "learning_rate": 1.4951471803972657e-05, "loss": 276.9335, "step": 17770 }, { "epoch": 0.34195431313437286, "grad_norm": 570.2492342485151, "learning_rate": 1.4946168416239746e-05, "loss": 296.8713, "step": 17780 }, { "epoch": 0.3421466383948534, "grad_norm": 542.5114847191128, "learning_rate": 1.4940863186140788e-05, "loss": 288.7994, "step": 17790 }, { "epoch": 0.3423389636553339, "grad_norm": 538.8198821083244, "learning_rate": 1.4935556115651898e-05, "loss": 267.133, "step": 17800 }, { "epoch": 0.3425312889158144, "grad_norm": 539.7385609767242, "learning_rate": 1.4930247206749863e-05, "loss": 270.0881, "step": 17810 }, { "epoch": 0.34272361417629493, "grad_norm": 531.2502518020995, "learning_rate": 1.4924936461412172e-05, "loss": 279.517, "step": 17820 }, { "epoch": 0.34291593943677545, "grad_norm": 478.10313732077884, "learning_rate": 1.4919623881616989e-05, "loss": 271.1312, "step": 17830 }, { "epoch": 0.343108264697256, "grad_norm": 526.6498889840595, "learning_rate": 1.4914309469343158e-05, "loss": 265.5551, "step": 17840 }, { "epoch": 0.34330058995773655, "grad_norm": 563.5118198740181, "learning_rate": 1.4908993226570214e-05, "loss": 290.039, "step": 17850 }, { "epoch": 0.34349291521821707, "grad_norm": 555.612150809867, "learning_rate": 1.4903675155278365e-05, "loss": 271.889, "step": 17860 }, { "epoch": 0.3436852404786976, "grad_norm": 457.22209280509924, "learning_rate": 1.4898355257448508e-05, "loss": 278.4439, "step": 17870 }, { "epoch": 0.3438775657391781, "grad_norm": 450.13527306824096, "learning_rate": 1.4893033535062219e-05, "loss": 271.5201, "step": 17880 }, { "epoch": 0.3440698909996586, "grad_norm": 481.2346702407072, "learning_rate": 1.4887709990101748e-05, "loss": 276.1723, "step": 17890 }, { "epoch": 0.34426221626013914, "grad_norm": 482.0542976273173, "learning_rate": 1.4882384624550028e-05, "loss": 279.5917, "step": 17900 }, { "epoch": 0.34445454152061966, "grad_norm": 570.5918494552345, "learning_rate": 1.4877057440390672e-05, "loss": 267.7453, "step": 17910 }, { "epoch": 0.3446468667811002, "grad_norm": 489.2071285520791, "learning_rate": 1.4871728439607967e-05, "loss": 276.4078, "step": 17920 }, { "epoch": 0.3448391920415807, "grad_norm": 499.4925409907639, "learning_rate": 1.4866397624186877e-05, "loss": 274.1754, "step": 17930 }, { "epoch": 0.3450315173020612, "grad_norm": 489.3010735053375, "learning_rate": 1.4861064996113042e-05, "loss": 270.5771, "step": 17940 }, { "epoch": 0.3452238425625418, "grad_norm": 516.4476601167114, "learning_rate": 1.485573055737278e-05, "loss": 281.167, "step": 17950 }, { "epoch": 0.3454161678230223, "grad_norm": 502.5953209048784, "learning_rate": 1.485039430995308e-05, "loss": 276.3996, "step": 17960 }, { "epoch": 0.34560849308350283, "grad_norm": 509.1630480153723, "learning_rate": 1.4845056255841608e-05, "loss": 279.1599, "step": 17970 }, { "epoch": 0.34580081834398335, "grad_norm": 511.0730622472175, "learning_rate": 1.48397163970267e-05, "loss": 286.7167, "step": 17980 }, { "epoch": 0.34599314360446387, "grad_norm": 543.6350962657646, "learning_rate": 1.4834374735497362e-05, "loss": 275.0893, "step": 17990 }, { "epoch": 0.3461854688649444, "grad_norm": 549.0933276042018, "learning_rate": 1.4829031273243277e-05, "loss": 283.2004, "step": 18000 }, { "epoch": 0.3463777941254249, "grad_norm": 478.9966551116924, "learning_rate": 1.4823686012254798e-05, "loss": 270.4572, "step": 18010 }, { "epoch": 0.34657011938590543, "grad_norm": 451.09551493351705, "learning_rate": 1.4818338954522943e-05, "loss": 267.2886, "step": 18020 }, { "epoch": 0.34676244464638595, "grad_norm": 487.70379309691907, "learning_rate": 1.4812990102039411e-05, "loss": 272.4028, "step": 18030 }, { "epoch": 0.34695476990686647, "grad_norm": 461.8328123924969, "learning_rate": 1.480763945679655e-05, "loss": 270.2243, "step": 18040 }, { "epoch": 0.34714709516734704, "grad_norm": 513.3241681581126, "learning_rate": 1.4802287020787396e-05, "loss": 273.2115, "step": 18050 }, { "epoch": 0.34733942042782756, "grad_norm": 583.6831276889415, "learning_rate": 1.4796932796005634e-05, "loss": 278.092, "step": 18060 }, { "epoch": 0.3475317456883081, "grad_norm": 500.1706108032943, "learning_rate": 1.4791576784445632e-05, "loss": 270.2284, "step": 18070 }, { "epoch": 0.3477240709487886, "grad_norm": 513.7577102398449, "learning_rate": 1.4786218988102414e-05, "loss": 271.5205, "step": 18080 }, { "epoch": 0.3479163962092691, "grad_norm": 517.6885947849202, "learning_rate": 1.4780859408971668e-05, "loss": 267.0917, "step": 18090 }, { "epoch": 0.34810872146974964, "grad_norm": 493.7372168706724, "learning_rate": 1.4775498049049754e-05, "loss": 277.2727, "step": 18100 }, { "epoch": 0.34830104673023016, "grad_norm": 502.8970072394122, "learning_rate": 1.4770134910333684e-05, "loss": 280.6166, "step": 18110 }, { "epoch": 0.3484933719907107, "grad_norm": 453.5355996943572, "learning_rate": 1.4764769994821145e-05, "loss": 280.9081, "step": 18120 }, { "epoch": 0.3486856972511912, "grad_norm": 584.3013429138648, "learning_rate": 1.4759403304510472e-05, "loss": 281.7896, "step": 18130 }, { "epoch": 0.3488780225116717, "grad_norm": 510.58687368556485, "learning_rate": 1.475403484140067e-05, "loss": 281.7353, "step": 18140 }, { "epoch": 0.3490703477721523, "grad_norm": 478.761783282778, "learning_rate": 1.4748664607491408e-05, "loss": 274.2493, "step": 18150 }, { "epoch": 0.3492626730326328, "grad_norm": 478.48081702040633, "learning_rate": 1.4743292604783008e-05, "loss": 278.2816, "step": 18160 }, { "epoch": 0.34945499829311333, "grad_norm": 490.50956716839545, "learning_rate": 1.4737918835276451e-05, "loss": 273.5343, "step": 18170 }, { "epoch": 0.34964732355359385, "grad_norm": 499.3343957519696, "learning_rate": 1.4732543300973374e-05, "loss": 274.0835, "step": 18180 }, { "epoch": 0.34983964881407437, "grad_norm": 496.47168303627546, "learning_rate": 1.472716600387608e-05, "loss": 274.201, "step": 18190 }, { "epoch": 0.3500319740745549, "grad_norm": 501.4186703132157, "learning_rate": 1.4721786945987519e-05, "loss": 282.1016, "step": 18200 }, { "epoch": 0.3502242993350354, "grad_norm": 482.18739096287226, "learning_rate": 1.4716406129311307e-05, "loss": 277.8248, "step": 18210 }, { "epoch": 0.3504166245955159, "grad_norm": 492.65093701630104, "learning_rate": 1.4711023555851702e-05, "loss": 281.1395, "step": 18220 }, { "epoch": 0.35060894985599644, "grad_norm": 510.5813213558361, "learning_rate": 1.470563922761363e-05, "loss": 264.6669, "step": 18230 }, { "epoch": 0.35080127511647696, "grad_norm": 484.1044532276803, "learning_rate": 1.470025314660266e-05, "loss": 275.079, "step": 18240 }, { "epoch": 0.35099360037695754, "grad_norm": 492.22338237898106, "learning_rate": 1.4694865314825024e-05, "loss": 279.9116, "step": 18250 }, { "epoch": 0.35118592563743806, "grad_norm": 582.2876021990764, "learning_rate": 1.4689475734287596e-05, "loss": 275.7554, "step": 18260 }, { "epoch": 0.3513782508979186, "grad_norm": 491.92303937431313, "learning_rate": 1.4684084406997903e-05, "loss": 278.5537, "step": 18270 }, { "epoch": 0.3515705761583991, "grad_norm": 522.9623001197829, "learning_rate": 1.467869133496413e-05, "loss": 275.3861, "step": 18280 }, { "epoch": 0.3517629014188796, "grad_norm": 509.6981462756343, "learning_rate": 1.4673296520195105e-05, "loss": 270.7336, "step": 18290 }, { "epoch": 0.35195522667936013, "grad_norm": 443.24550556703184, "learning_rate": 1.4667899964700309e-05, "loss": 273.7989, "step": 18300 }, { "epoch": 0.35214755193984065, "grad_norm": 489.07076765329754, "learning_rate": 1.466250167048987e-05, "loss": 273.8396, "step": 18310 }, { "epoch": 0.35233987720032117, "grad_norm": 461.5638593100279, "learning_rate": 1.4657101639574563e-05, "loss": 281.685, "step": 18320 }, { "epoch": 0.3525322024608017, "grad_norm": 483.6051806875028, "learning_rate": 1.4651699873965808e-05, "loss": 276.0564, "step": 18330 }, { "epoch": 0.3527245277212822, "grad_norm": 475.19970902453315, "learning_rate": 1.4646296375675676e-05, "loss": 280.6133, "step": 18340 }, { "epoch": 0.35291685298176273, "grad_norm": 467.261602599556, "learning_rate": 1.464089114671688e-05, "loss": 284.0963, "step": 18350 }, { "epoch": 0.3531091782422433, "grad_norm": 565.2773359794652, "learning_rate": 1.4635484189102776e-05, "loss": 273.4974, "step": 18360 }, { "epoch": 0.3533015035027238, "grad_norm": 610.7791523439159, "learning_rate": 1.4630075504847373e-05, "loss": 274.8716, "step": 18370 }, { "epoch": 0.35349382876320434, "grad_norm": 561.8525351287174, "learning_rate": 1.4624665095965311e-05, "loss": 269.1808, "step": 18380 }, { "epoch": 0.35368615402368486, "grad_norm": 534.9783175143453, "learning_rate": 1.4619252964471881e-05, "loss": 270.5985, "step": 18390 }, { "epoch": 0.3538784792841654, "grad_norm": 518.9606747472129, "learning_rate": 1.461383911238301e-05, "loss": 284.0321, "step": 18400 }, { "epoch": 0.3540708045446459, "grad_norm": 589.8949359179564, "learning_rate": 1.4608423541715273e-05, "loss": 271.4509, "step": 18410 }, { "epoch": 0.3542631298051264, "grad_norm": 486.3948360072605, "learning_rate": 1.4603006254485874e-05, "loss": 274.5822, "step": 18420 }, { "epoch": 0.35445545506560694, "grad_norm": 492.48757346366045, "learning_rate": 1.4597587252712666e-05, "loss": 274.8331, "step": 18430 }, { "epoch": 0.35464778032608746, "grad_norm": 484.7721593245906, "learning_rate": 1.4592166538414136e-05, "loss": 266.309, "step": 18440 }, { "epoch": 0.354840105586568, "grad_norm": 547.3019627975193, "learning_rate": 1.4586744113609416e-05, "loss": 276.1605, "step": 18450 }, { "epoch": 0.35503243084704855, "grad_norm": 499.784131490363, "learning_rate": 1.4581319980318266e-05, "loss": 278.8771, "step": 18460 }, { "epoch": 0.35522475610752907, "grad_norm": 547.802960934263, "learning_rate": 1.4575894140561086e-05, "loss": 265.872, "step": 18470 }, { "epoch": 0.3554170813680096, "grad_norm": 584.7809345359788, "learning_rate": 1.4570466596358914e-05, "loss": 281.2158, "step": 18480 }, { "epoch": 0.3556094066284901, "grad_norm": 462.78468535250977, "learning_rate": 1.4565037349733415e-05, "loss": 278.9555, "step": 18490 }, { "epoch": 0.35580173188897063, "grad_norm": 611.0583743982891, "learning_rate": 1.45596064027069e-05, "loss": 273.9848, "step": 18500 }, { "epoch": 0.35599405714945115, "grad_norm": 586.6556436330482, "learning_rate": 1.4554173757302303e-05, "loss": 266.5396, "step": 18510 }, { "epoch": 0.35618638240993167, "grad_norm": 535.2620975374997, "learning_rate": 1.4548739415543197e-05, "loss": 268.6847, "step": 18520 }, { "epoch": 0.3563787076704122, "grad_norm": 532.6024427590063, "learning_rate": 1.454330337945378e-05, "loss": 268.69, "step": 18530 }, { "epoch": 0.3565710329308927, "grad_norm": 451.5225454658764, "learning_rate": 1.4537865651058893e-05, "loss": 266.809, "step": 18540 }, { "epoch": 0.3567633581913732, "grad_norm": 508.726118035006, "learning_rate": 1.4532426232383998e-05, "loss": 271.8431, "step": 18550 }, { "epoch": 0.3569556834518538, "grad_norm": 435.3919759200938, "learning_rate": 1.4526985125455184e-05, "loss": 273.8349, "step": 18560 }, { "epoch": 0.3571480087123343, "grad_norm": 475.22283493396196, "learning_rate": 1.4521542332299177e-05, "loss": 277.1238, "step": 18570 }, { "epoch": 0.35734033397281484, "grad_norm": 454.663188481646, "learning_rate": 1.4516097854943325e-05, "loss": 269.471, "step": 18580 }, { "epoch": 0.35753265923329536, "grad_norm": 489.6256450352957, "learning_rate": 1.4510651695415612e-05, "loss": 263.6692, "step": 18590 }, { "epoch": 0.3577249844937759, "grad_norm": 523.4703439970984, "learning_rate": 1.4505203855744637e-05, "loss": 287.1177, "step": 18600 }, { "epoch": 0.3579173097542564, "grad_norm": 487.45110951567614, "learning_rate": 1.4499754337959628e-05, "loss": 275.8579, "step": 18610 }, { "epoch": 0.3581096350147369, "grad_norm": 471.63288603724055, "learning_rate": 1.449430314409045e-05, "loss": 268.8969, "step": 18620 }, { "epoch": 0.35830196027521743, "grad_norm": 509.35368761915225, "learning_rate": 1.4488850276167572e-05, "loss": 272.5918, "step": 18630 }, { "epoch": 0.35849428553569795, "grad_norm": 496.2132943043483, "learning_rate": 1.44833957362221e-05, "loss": 278.5671, "step": 18640 }, { "epoch": 0.3586866107961785, "grad_norm": 516.1593242842114, "learning_rate": 1.4477939526285767e-05, "loss": 281.0237, "step": 18650 }, { "epoch": 0.35887893605665905, "grad_norm": 500.2385340599565, "learning_rate": 1.4472481648390914e-05, "loss": 284.4294, "step": 18660 }, { "epoch": 0.35907126131713957, "grad_norm": 495.0134020699203, "learning_rate": 1.4467022104570514e-05, "loss": 276.252, "step": 18670 }, { "epoch": 0.3592635865776201, "grad_norm": 522.2266800170015, "learning_rate": 1.4461560896858156e-05, "loss": 273.3659, "step": 18680 }, { "epoch": 0.3594559118381006, "grad_norm": 493.21767026047456, "learning_rate": 1.4456098027288046e-05, "loss": 270.9548, "step": 18690 }, { "epoch": 0.3596482370985811, "grad_norm": 515.6318969442698, "learning_rate": 1.4450633497895017e-05, "loss": 271.9803, "step": 18700 }, { "epoch": 0.35984056235906164, "grad_norm": 492.05182539050395, "learning_rate": 1.4445167310714514e-05, "loss": 278.6881, "step": 18710 }, { "epoch": 0.36003288761954216, "grad_norm": 518.9349390572469, "learning_rate": 1.4439699467782602e-05, "loss": 268.8568, "step": 18720 }, { "epoch": 0.3602252128800227, "grad_norm": 472.47978878508525, "learning_rate": 1.4434229971135965e-05, "loss": 274.5661, "step": 18730 }, { "epoch": 0.3604175381405032, "grad_norm": 462.5800958576359, "learning_rate": 1.4428758822811894e-05, "loss": 268.6313, "step": 18740 }, { "epoch": 0.3606098634009837, "grad_norm": 556.8318001826748, "learning_rate": 1.442328602484831e-05, "loss": 279.1211, "step": 18750 }, { "epoch": 0.3608021886614643, "grad_norm": 475.38132861284896, "learning_rate": 1.441781157928373e-05, "loss": 267.9185, "step": 18760 }, { "epoch": 0.3609945139219448, "grad_norm": 444.2590438167381, "learning_rate": 1.44123354881573e-05, "loss": 264.6602, "step": 18770 }, { "epoch": 0.36118683918242533, "grad_norm": 593.5762996181691, "learning_rate": 1.4406857753508772e-05, "loss": 277.1706, "step": 18780 }, { "epoch": 0.36137916444290585, "grad_norm": 498.61812297220695, "learning_rate": 1.4401378377378512e-05, "loss": 274.0979, "step": 18790 }, { "epoch": 0.3615714897033864, "grad_norm": 464.1576611937751, "learning_rate": 1.43958973618075e-05, "loss": 265.5725, "step": 18800 }, { "epoch": 0.3617638149638669, "grad_norm": 521.681813003637, "learning_rate": 1.4390414708837322e-05, "loss": 266.8785, "step": 18810 }, { "epoch": 0.3619561402243474, "grad_norm": 487.28500492160447, "learning_rate": 1.4384930420510173e-05, "loss": 268.4358, "step": 18820 }, { "epoch": 0.36214846548482793, "grad_norm": 2610.6643410670968, "learning_rate": 1.4379444498868864e-05, "loss": 269.2086, "step": 18830 }, { "epoch": 0.36234079074530845, "grad_norm": 537.8437440046431, "learning_rate": 1.4373956945956807e-05, "loss": 284.2879, "step": 18840 }, { "epoch": 0.36253311600578897, "grad_norm": 499.0172686886838, "learning_rate": 1.4368467763818026e-05, "loss": 266.1877, "step": 18850 }, { "epoch": 0.3627254412662695, "grad_norm": 502.55642424490685, "learning_rate": 1.436297695449715e-05, "loss": 277.1923, "step": 18860 }, { "epoch": 0.36291776652675006, "grad_norm": 502.29657409854235, "learning_rate": 1.4357484520039412e-05, "loss": 267.7814, "step": 18870 }, { "epoch": 0.3631100917872306, "grad_norm": 509.966259301657, "learning_rate": 1.4351990462490662e-05, "loss": 270.7, "step": 18880 }, { "epoch": 0.3633024170477111, "grad_norm": 470.24477104277145, "learning_rate": 1.434649478389734e-05, "loss": 265.3593, "step": 18890 }, { "epoch": 0.3634947423081916, "grad_norm": 469.71886315911667, "learning_rate": 1.4340997486306491e-05, "loss": 278.0755, "step": 18900 }, { "epoch": 0.36368706756867214, "grad_norm": 453.5521401137459, "learning_rate": 1.4335498571765777e-05, "loss": 277.2238, "step": 18910 }, { "epoch": 0.36387939282915266, "grad_norm": 498.7986418858856, "learning_rate": 1.4329998042323447e-05, "loss": 262.8456, "step": 18920 }, { "epoch": 0.3640717180896332, "grad_norm": 492.73132779524826, "learning_rate": 1.4324495900028358e-05, "loss": 277.1212, "step": 18930 }, { "epoch": 0.3642640433501137, "grad_norm": 491.47538477827686, "learning_rate": 1.4318992146929967e-05, "loss": 270.7526, "step": 18940 }, { "epoch": 0.3644563686105942, "grad_norm": 453.66565326216687, "learning_rate": 1.4313486785078335e-05, "loss": 269.5168, "step": 18950 }, { "epoch": 0.36464869387107474, "grad_norm": 468.63668189220294, "learning_rate": 1.4307979816524111e-05, "loss": 274.1952, "step": 18960 }, { "epoch": 0.3648410191315553, "grad_norm": 475.51421977181815, "learning_rate": 1.4302471243318554e-05, "loss": 275.7419, "step": 18970 }, { "epoch": 0.36503334439203583, "grad_norm": 449.11468583079665, "learning_rate": 1.4296961067513519e-05, "loss": 271.0766, "step": 18980 }, { "epoch": 0.36522566965251635, "grad_norm": 496.7789064901429, "learning_rate": 1.4291449291161452e-05, "loss": 269.3588, "step": 18990 }, { "epoch": 0.36541799491299687, "grad_norm": 520.561948848321, "learning_rate": 1.4285935916315401e-05, "loss": 268.3722, "step": 19000 }, { "epoch": 0.3656103201734774, "grad_norm": 556.507199075098, "learning_rate": 1.4280420945029004e-05, "loss": 280.2751, "step": 19010 }, { "epoch": 0.3658026454339579, "grad_norm": 459.33758252701904, "learning_rate": 1.4274904379356498e-05, "loss": 273.0975, "step": 19020 }, { "epoch": 0.3659949706944384, "grad_norm": 510.58230771907785, "learning_rate": 1.4269386221352714e-05, "loss": 270.5178, "step": 19030 }, { "epoch": 0.36618729595491895, "grad_norm": 467.9697920669793, "learning_rate": 1.4263866473073076e-05, "loss": 268.6472, "step": 19040 }, { "epoch": 0.36637962121539946, "grad_norm": 529.010191046606, "learning_rate": 1.4258345136573595e-05, "loss": 264.4419, "step": 19050 }, { "epoch": 0.36657194647588, "grad_norm": 513.1160209826901, "learning_rate": 1.4252822213910878e-05, "loss": 272.1163, "step": 19060 }, { "epoch": 0.36676427173636056, "grad_norm": 519.091158590268, "learning_rate": 1.4247297707142126e-05, "loss": 268.8429, "step": 19070 }, { "epoch": 0.3669565969968411, "grad_norm": 457.3375080569709, "learning_rate": 1.4241771618325123e-05, "loss": 268.8333, "step": 19080 }, { "epoch": 0.3671489222573216, "grad_norm": 456.79388228896937, "learning_rate": 1.4236243949518249e-05, "loss": 294.9989, "step": 19090 }, { "epoch": 0.3673412475178021, "grad_norm": 490.57042549721854, "learning_rate": 1.4230714702780466e-05, "loss": 269.3215, "step": 19100 }, { "epoch": 0.36753357277828264, "grad_norm": 491.6619434120859, "learning_rate": 1.422518388017133e-05, "loss": 274.9493, "step": 19110 }, { "epoch": 0.36772589803876315, "grad_norm": 458.52854246821556, "learning_rate": 1.4219651483750978e-05, "loss": 267.6029, "step": 19120 }, { "epoch": 0.3679182232992437, "grad_norm": 489.66741656386336, "learning_rate": 1.4214117515580139e-05, "loss": 272.3837, "step": 19130 }, { "epoch": 0.3681105485597242, "grad_norm": 528.4465308827021, "learning_rate": 1.4208581977720124e-05, "loss": 275.2206, "step": 19140 }, { "epoch": 0.3683028738202047, "grad_norm": 517.4333131273371, "learning_rate": 1.420304487223283e-05, "loss": 265.3739, "step": 19150 }, { "epoch": 0.36849519908068523, "grad_norm": 454.28865672013114, "learning_rate": 1.4197506201180737e-05, "loss": 270.5704, "step": 19160 }, { "epoch": 0.3686875243411658, "grad_norm": 486.3927113540107, "learning_rate": 1.4191965966626908e-05, "loss": 261.7958, "step": 19170 }, { "epoch": 0.3688798496016463, "grad_norm": 466.78083849002485, "learning_rate": 1.418642417063499e-05, "loss": 263.7457, "step": 19180 }, { "epoch": 0.36907217486212684, "grad_norm": 451.2882059299501, "learning_rate": 1.4180880815269207e-05, "loss": 261.4978, "step": 19190 }, { "epoch": 0.36926450012260736, "grad_norm": 467.6685539093852, "learning_rate": 1.4175335902594372e-05, "loss": 267.75, "step": 19200 }, { "epoch": 0.3694568253830879, "grad_norm": 473.7323917470346, "learning_rate": 1.416978943467587e-05, "loss": 273.3609, "step": 19210 }, { "epoch": 0.3696491506435684, "grad_norm": 509.82071496217156, "learning_rate": 1.4164241413579669e-05, "loss": 274.5023, "step": 19220 }, { "epoch": 0.3698414759040489, "grad_norm": 573.7533306731347, "learning_rate": 1.4158691841372318e-05, "loss": 276.4014, "step": 19230 }, { "epoch": 0.37003380116452944, "grad_norm": 484.9879111344704, "learning_rate": 1.4153140720120936e-05, "loss": 266.9, "step": 19240 }, { "epoch": 0.37022612642500996, "grad_norm": 506.2506264507777, "learning_rate": 1.4147588051893233e-05, "loss": 266.8248, "step": 19250 }, { "epoch": 0.3704184516854905, "grad_norm": 471.4549656744703, "learning_rate": 1.4142033838757476e-05, "loss": 268.2752, "step": 19260 }, { "epoch": 0.370610776945971, "grad_norm": 463.65869752370816, "learning_rate": 1.4136478082782525e-05, "loss": 268.3878, "step": 19270 }, { "epoch": 0.3708031022064516, "grad_norm": 503.20251992758267, "learning_rate": 1.4130920786037798e-05, "loss": 269.5339, "step": 19280 }, { "epoch": 0.3709954274669321, "grad_norm": 508.63307278815176, "learning_rate": 1.412536195059331e-05, "loss": 274.3348, "step": 19290 }, { "epoch": 0.3711877527274126, "grad_norm": 534.9970695527994, "learning_rate": 1.4119801578519625e-05, "loss": 275.4488, "step": 19300 }, { "epoch": 0.37138007798789313, "grad_norm": 465.4836706905475, "learning_rate": 1.4114239671887892e-05, "loss": 277.5209, "step": 19310 }, { "epoch": 0.37157240324837365, "grad_norm": 523.6302016167372, "learning_rate": 1.4108676232769831e-05, "loss": 269.7658, "step": 19320 }, { "epoch": 0.37176472850885417, "grad_norm": 490.53360691166097, "learning_rate": 1.410311126323773e-05, "loss": 269.1929, "step": 19330 }, { "epoch": 0.3719570537693347, "grad_norm": 478.5589465618871, "learning_rate": 1.409754476536445e-05, "loss": 263.6918, "step": 19340 }, { "epoch": 0.3721493790298152, "grad_norm": 473.541200896869, "learning_rate": 1.4091976741223414e-05, "loss": 263.5954, "step": 19350 }, { "epoch": 0.3723417042902957, "grad_norm": 596.4861586242048, "learning_rate": 1.408640719288863e-05, "loss": 282.8791, "step": 19360 }, { "epoch": 0.37253402955077625, "grad_norm": 454.33202659460784, "learning_rate": 1.408083612243465e-05, "loss": 275.5541, "step": 19370 }, { "epoch": 0.3727263548112568, "grad_norm": 487.7196103265241, "learning_rate": 1.4075263531936614e-05, "loss": 273.0452, "step": 19380 }, { "epoch": 0.37291868007173734, "grad_norm": 520.2335507430199, "learning_rate": 1.4069689423470219e-05, "loss": 264.2038, "step": 19390 }, { "epoch": 0.37311100533221786, "grad_norm": 493.9525057118881, "learning_rate": 1.4064113799111725e-05, "loss": 271.7883, "step": 19400 }, { "epoch": 0.3733033305926984, "grad_norm": 494.2036565145267, "learning_rate": 1.405853666093796e-05, "loss": 266.2629, "step": 19410 }, { "epoch": 0.3734956558531789, "grad_norm": 560.1117951292418, "learning_rate": 1.405295801102632e-05, "loss": 270.0681, "step": 19420 }, { "epoch": 0.3736879811136594, "grad_norm": 490.2307029611529, "learning_rate": 1.4047377851454758e-05, "loss": 275.5943, "step": 19430 }, { "epoch": 0.37388030637413994, "grad_norm": 491.3590565361957, "learning_rate": 1.4041796184301788e-05, "loss": 262.3873, "step": 19440 }, { "epoch": 0.37407263163462046, "grad_norm": 492.35585428770435, "learning_rate": 1.4036213011646496e-05, "loss": 274.749, "step": 19450 }, { "epoch": 0.374264956895101, "grad_norm": 568.1851303891046, "learning_rate": 1.4030628335568515e-05, "loss": 276.2821, "step": 19460 }, { "epoch": 0.3744572821555815, "grad_norm": 495.200883399743, "learning_rate": 1.4025042158148048e-05, "loss": 268.6443, "step": 19470 }, { "epoch": 0.37464960741606207, "grad_norm": 535.8971360833916, "learning_rate": 1.4019454481465853e-05, "loss": 271.2471, "step": 19480 }, { "epoch": 0.3748419326765426, "grad_norm": 502.58996637213914, "learning_rate": 1.4013865307603248e-05, "loss": 264.8398, "step": 19490 }, { "epoch": 0.3750342579370231, "grad_norm": 473.907021395529, "learning_rate": 1.4008274638642103e-05, "loss": 275.0792, "step": 19500 }, { "epoch": 0.3752265831975036, "grad_norm": 447.78039303906274, "learning_rate": 1.4002682476664857e-05, "loss": 265.4133, "step": 19510 }, { "epoch": 0.37541890845798415, "grad_norm": 469.60597295755775, "learning_rate": 1.3997088823754494e-05, "loss": 261.3435, "step": 19520 }, { "epoch": 0.37561123371846467, "grad_norm": 548.0002170760671, "learning_rate": 1.399149368199456e-05, "loss": 272.6939, "step": 19530 }, { "epoch": 0.3758035589789452, "grad_norm": 449.1889657975298, "learning_rate": 1.398589705346915e-05, "loss": 266.1523, "step": 19540 }, { "epoch": 0.3759958842394257, "grad_norm": 528.7858663903933, "learning_rate": 1.3980298940262918e-05, "loss": 267.7942, "step": 19550 }, { "epoch": 0.3761882094999062, "grad_norm": 490.66748329643616, "learning_rate": 1.3974699344461065e-05, "loss": 266.4364, "step": 19560 }, { "epoch": 0.37638053476038674, "grad_norm": 495.24507533583477, "learning_rate": 1.396909826814935e-05, "loss": 271.0768, "step": 19570 }, { "epoch": 0.3765728600208673, "grad_norm": 503.8560176492099, "learning_rate": 1.3963495713414085e-05, "loss": 267.4308, "step": 19580 }, { "epoch": 0.37676518528134784, "grad_norm": 422.63297351147776, "learning_rate": 1.3957891682342127e-05, "loss": 270.8489, "step": 19590 }, { "epoch": 0.37695751054182836, "grad_norm": 525.3053386685739, "learning_rate": 1.3952286177020879e-05, "loss": 269.879, "step": 19600 }, { "epoch": 0.3771498358023089, "grad_norm": 464.7862153008228, "learning_rate": 1.3946679199538308e-05, "loss": 264.1066, "step": 19610 }, { "epoch": 0.3773421610627894, "grad_norm": 443.0844224733384, "learning_rate": 1.3941070751982917e-05, "loss": 263.533, "step": 19620 }, { "epoch": 0.3775344863232699, "grad_norm": 453.0124217089098, "learning_rate": 1.3935460836443758e-05, "loss": 266.8518, "step": 19630 }, { "epoch": 0.37772681158375043, "grad_norm": 476.2180584075246, "learning_rate": 1.3929849455010433e-05, "loss": 273.0895, "step": 19640 }, { "epoch": 0.37791913684423095, "grad_norm": 495.1021887537681, "learning_rate": 1.3924236609773094e-05, "loss": 269.4132, "step": 19650 }, { "epoch": 0.37811146210471147, "grad_norm": 480.3981339495872, "learning_rate": 1.3918622302822425e-05, "loss": 267.0504, "step": 19660 }, { "epoch": 0.378303787365192, "grad_norm": 460.9074351957358, "learning_rate": 1.391300653624967e-05, "loss": 275.5027, "step": 19670 }, { "epoch": 0.37849611262567256, "grad_norm": 440.80488409349806, "learning_rate": 1.39073893121466e-05, "loss": 269.0255, "step": 19680 }, { "epoch": 0.3786884378861531, "grad_norm": 496.1958422879764, "learning_rate": 1.3901770632605546e-05, "loss": 267.1534, "step": 19690 }, { "epoch": 0.3788807631466336, "grad_norm": 472.26094418744105, "learning_rate": 1.3896150499719372e-05, "loss": 263.1359, "step": 19700 }, { "epoch": 0.3790730884071141, "grad_norm": 485.96661106237144, "learning_rate": 1.3890528915581482e-05, "loss": 269.3168, "step": 19710 }, { "epoch": 0.37926541366759464, "grad_norm": 469.9738376511845, "learning_rate": 1.3884905882285829e-05, "loss": 269.5454, "step": 19720 }, { "epoch": 0.37945773892807516, "grad_norm": 531.450695464184, "learning_rate": 1.3879281401926894e-05, "loss": 263.9205, "step": 19730 }, { "epoch": 0.3796500641885557, "grad_norm": 475.7810674544225, "learning_rate": 1.3873655476599707e-05, "loss": 264.1166, "step": 19740 }, { "epoch": 0.3798423894490362, "grad_norm": 459.702197523339, "learning_rate": 1.3868028108399829e-05, "loss": 266.0681, "step": 19750 }, { "epoch": 0.3800347147095167, "grad_norm": 485.09482582756414, "learning_rate": 1.3862399299423364e-05, "loss": 266.1789, "step": 19760 }, { "epoch": 0.38022703996999724, "grad_norm": 512.4605800057825, "learning_rate": 1.3856769051766947e-05, "loss": 267.8942, "step": 19770 }, { "epoch": 0.38041936523047776, "grad_norm": 455.0610010176158, "learning_rate": 1.3851137367527757e-05, "loss": 257.9983, "step": 19780 }, { "epoch": 0.38061169049095833, "grad_norm": 494.80515284171105, "learning_rate": 1.3845504248803501e-05, "loss": 270.2606, "step": 19790 }, { "epoch": 0.38080401575143885, "grad_norm": 463.62877696576777, "learning_rate": 1.383986969769242e-05, "loss": 276.2922, "step": 19800 }, { "epoch": 0.38099634101191937, "grad_norm": 481.8753010398581, "learning_rate": 1.38342337162933e-05, "loss": 270.7017, "step": 19810 }, { "epoch": 0.3811886662723999, "grad_norm": 518.2911861398973, "learning_rate": 1.3828596306705442e-05, "loss": 264.0347, "step": 19820 }, { "epoch": 0.3813809915328804, "grad_norm": 476.25043948059556, "learning_rate": 1.3822957471028693e-05, "loss": 258.8927, "step": 19830 }, { "epoch": 0.38157331679336093, "grad_norm": 456.0857333807183, "learning_rate": 1.3817317211363422e-05, "loss": 259.2476, "step": 19840 }, { "epoch": 0.38176564205384145, "grad_norm": 591.7758307029712, "learning_rate": 1.3811675529810535e-05, "loss": 264.3246, "step": 19850 }, { "epoch": 0.38195796731432197, "grad_norm": 480.1235787290731, "learning_rate": 1.3806032428471463e-05, "loss": 267.5823, "step": 19860 }, { "epoch": 0.3821502925748025, "grad_norm": 490.12823640205306, "learning_rate": 1.3800387909448171e-05, "loss": 268.1071, "step": 19870 }, { "epoch": 0.382342617835283, "grad_norm": 482.5637101031617, "learning_rate": 1.3794741974843154e-05, "loss": 272.2428, "step": 19880 }, { "epoch": 0.3825349430957636, "grad_norm": 463.864497799353, "learning_rate": 1.3789094626759419e-05, "loss": 262.553, "step": 19890 }, { "epoch": 0.3827272683562441, "grad_norm": 451.326551759344, "learning_rate": 1.3783445867300515e-05, "loss": 271.138, "step": 19900 }, { "epoch": 0.3829195936167246, "grad_norm": 482.5981306829182, "learning_rate": 1.3777795698570511e-05, "loss": 271.6907, "step": 19910 }, { "epoch": 0.38311191887720514, "grad_norm": 462.3405191740387, "learning_rate": 1.3772144122674e-05, "loss": 261.1419, "step": 19920 }, { "epoch": 0.38330424413768566, "grad_norm": 456.3619855606467, "learning_rate": 1.3766491141716103e-05, "loss": 263.4425, "step": 19930 }, { "epoch": 0.3834965693981662, "grad_norm": 470.32262807585965, "learning_rate": 1.3760836757802462e-05, "loss": 267.8558, "step": 19940 }, { "epoch": 0.3836888946586467, "grad_norm": 458.30893218538426, "learning_rate": 1.3755180973039241e-05, "loss": 269.731, "step": 19950 }, { "epoch": 0.3838812199191272, "grad_norm": 490.4249697361625, "learning_rate": 1.3749523789533128e-05, "loss": 265.6844, "step": 19960 }, { "epoch": 0.38407354517960773, "grad_norm": 451.9255863952605, "learning_rate": 1.374386520939133e-05, "loss": 261.8062, "step": 19970 }, { "epoch": 0.38426587044008825, "grad_norm": 587.6749626221176, "learning_rate": 1.3738205234721568e-05, "loss": 279.9919, "step": 19980 }, { "epoch": 0.3844581957005688, "grad_norm": 516.8899606675635, "learning_rate": 1.3732543867632098e-05, "loss": 266.4001, "step": 19990 }, { "epoch": 0.38465052096104935, "grad_norm": 480.5518795120685, "learning_rate": 1.3726881110231682e-05, "loss": 265.4916, "step": 20000 }, { "epoch": 0.38484284622152987, "grad_norm": 555.3792537867905, "learning_rate": 1.3721216964629605e-05, "loss": 269.9944, "step": 20010 }, { "epoch": 0.3850351714820104, "grad_norm": 506.87884985064585, "learning_rate": 1.3715551432935664e-05, "loss": 276.7794, "step": 20020 }, { "epoch": 0.3852274967424909, "grad_norm": 478.03257009871555, "learning_rate": 1.3709884517260178e-05, "loss": 277.7954, "step": 20030 }, { "epoch": 0.3854198220029714, "grad_norm": 457.637534653076, "learning_rate": 1.3704216219713984e-05, "loss": 260.3147, "step": 20040 }, { "epoch": 0.38561214726345194, "grad_norm": 515.880634704802, "learning_rate": 1.3698546542408424e-05, "loss": 267.1376, "step": 20050 }, { "epoch": 0.38580447252393246, "grad_norm": 479.2124703567474, "learning_rate": 1.369287548745536e-05, "loss": 264.1758, "step": 20060 }, { "epoch": 0.385996797784413, "grad_norm": 683.7181567994502, "learning_rate": 1.3687203056967165e-05, "loss": 269.4074, "step": 20070 }, { "epoch": 0.3861891230448935, "grad_norm": 546.1825043796105, "learning_rate": 1.368152925305673e-05, "loss": 272.3967, "step": 20080 }, { "epoch": 0.3863814483053741, "grad_norm": 469.7635754984195, "learning_rate": 1.3675854077837449e-05, "loss": 268.2429, "step": 20090 }, { "epoch": 0.3865737735658546, "grad_norm": 431.77778742437175, "learning_rate": 1.3670177533423234e-05, "loss": 258.7614, "step": 20100 }, { "epoch": 0.3867660988263351, "grad_norm": 500.68929215513714, "learning_rate": 1.3664499621928502e-05, "loss": 264.5214, "step": 20110 }, { "epoch": 0.38695842408681563, "grad_norm": 465.9139099305224, "learning_rate": 1.3658820345468183e-05, "loss": 282.7994, "step": 20120 }, { "epoch": 0.38715074934729615, "grad_norm": 497.18925828839093, "learning_rate": 1.365313970615771e-05, "loss": 260.5789, "step": 20130 }, { "epoch": 0.38734307460777667, "grad_norm": 522.7577508611184, "learning_rate": 1.3647457706113031e-05, "loss": 270.7295, "step": 20140 }, { "epoch": 0.3875353998682572, "grad_norm": 484.58468255015976, "learning_rate": 1.36417743474506e-05, "loss": 271.1447, "step": 20150 }, { "epoch": 0.3877277251287377, "grad_norm": 455.9399960984987, "learning_rate": 1.3636089632287369e-05, "loss": 278.3718, "step": 20160 }, { "epoch": 0.38792005038921823, "grad_norm": 438.9810801149823, "learning_rate": 1.36304035627408e-05, "loss": 270.3828, "step": 20170 }, { "epoch": 0.38811237564969875, "grad_norm": 521.92431656634, "learning_rate": 1.3624716140928861e-05, "loss": 276.9672, "step": 20180 }, { "epoch": 0.38830470091017927, "grad_norm": 479.2658690926107, "learning_rate": 1.3619027368970025e-05, "loss": 270.019, "step": 20190 }, { "epoch": 0.38849702617065984, "grad_norm": 539.2950852697795, "learning_rate": 1.3613337248983265e-05, "loss": 264.5474, "step": 20200 }, { "epoch": 0.38868935143114036, "grad_norm": 521.2340223855009, "learning_rate": 1.3607645783088055e-05, "loss": 269.4898, "step": 20210 }, { "epoch": 0.3888816766916209, "grad_norm": 478.6268067284272, "learning_rate": 1.3601952973404376e-05, "loss": 274.8288, "step": 20220 }, { "epoch": 0.3890740019521014, "grad_norm": 452.593598304092, "learning_rate": 1.35962588220527e-05, "loss": 268.0504, "step": 20230 }, { "epoch": 0.3892663272125819, "grad_norm": 488.63591962836944, "learning_rate": 1.3590563331154008e-05, "loss": 276.3167, "step": 20240 }, { "epoch": 0.38945865247306244, "grad_norm": 527.0343169877652, "learning_rate": 1.3584866502829774e-05, "loss": 270.3584, "step": 20250 }, { "epoch": 0.38965097773354296, "grad_norm": 508.6791754239123, "learning_rate": 1.3579168339201975e-05, "loss": 267.6406, "step": 20260 }, { "epoch": 0.3898433029940235, "grad_norm": 500.0040765716491, "learning_rate": 1.3573468842393077e-05, "loss": 264.213, "step": 20270 }, { "epoch": 0.390035628254504, "grad_norm": 480.87523411146907, "learning_rate": 1.356776801452606e-05, "loss": 268.1814, "step": 20280 }, { "epoch": 0.3902279535149845, "grad_norm": 497.87665383882864, "learning_rate": 1.3562065857724378e-05, "loss": 268.274, "step": 20290 }, { "epoch": 0.3904202787754651, "grad_norm": 517.1153571266282, "learning_rate": 1.3556362374111993e-05, "loss": 264.8947, "step": 20300 }, { "epoch": 0.3906126040359456, "grad_norm": 516.7694257062676, "learning_rate": 1.3550657565813362e-05, "loss": 271.0774, "step": 20310 }, { "epoch": 0.39080492929642613, "grad_norm": 471.1677066651554, "learning_rate": 1.3544951434953423e-05, "loss": 261.9783, "step": 20320 }, { "epoch": 0.39099725455690665, "grad_norm": 475.4164801434219, "learning_rate": 1.3539243983657627e-05, "loss": 266.1654, "step": 20330 }, { "epoch": 0.39118957981738717, "grad_norm": 465.25438418996094, "learning_rate": 1.3533535214051896e-05, "loss": 265.0667, "step": 20340 }, { "epoch": 0.3913819050778677, "grad_norm": 636.6946393412486, "learning_rate": 1.3527825128262656e-05, "loss": 266.78, "step": 20350 }, { "epoch": 0.3915742303383482, "grad_norm": 454.03043826764105, "learning_rate": 1.3522113728416821e-05, "loss": 273.3805, "step": 20360 }, { "epoch": 0.3917665555988287, "grad_norm": 439.79846213408223, "learning_rate": 1.3516401016641793e-05, "loss": 262.7015, "step": 20370 }, { "epoch": 0.39195888085930924, "grad_norm": 459.73154660133, "learning_rate": 1.351068699506546e-05, "loss": 267.3195, "step": 20380 }, { "epoch": 0.39215120611978976, "grad_norm": 461.65266457074955, "learning_rate": 1.3504971665816202e-05, "loss": 263.594, "step": 20390 }, { "epoch": 0.39234353138027034, "grad_norm": 462.50120916625644, "learning_rate": 1.3499255031022887e-05, "loss": 257.6826, "step": 20400 }, { "epoch": 0.39253585664075086, "grad_norm": 448.3118307481313, "learning_rate": 1.3493537092814863e-05, "loss": 268.2437, "step": 20410 }, { "epoch": 0.3927281819012314, "grad_norm": 559.7940647151852, "learning_rate": 1.348781785332197e-05, "loss": 265.3423, "step": 20420 }, { "epoch": 0.3929205071617119, "grad_norm": 435.63117445349616, "learning_rate": 1.3482097314674526e-05, "loss": 263.3138, "step": 20430 }, { "epoch": 0.3931128324221924, "grad_norm": 463.97021445369614, "learning_rate": 1.3476375479003347e-05, "loss": 263.2743, "step": 20440 }, { "epoch": 0.39330515768267293, "grad_norm": 460.0537663900258, "learning_rate": 1.3470652348439715e-05, "loss": 264.744, "step": 20450 }, { "epoch": 0.39349748294315345, "grad_norm": 479.3908671830001, "learning_rate": 1.3464927925115405e-05, "loss": 277.0105, "step": 20460 }, { "epoch": 0.393689808203634, "grad_norm": 481.4851262904071, "learning_rate": 1.3459202211162663e-05, "loss": 263.0484, "step": 20470 }, { "epoch": 0.3938821334641145, "grad_norm": 503.2751886846145, "learning_rate": 1.345347520871423e-05, "loss": 263.6013, "step": 20480 }, { "epoch": 0.394074458724595, "grad_norm": 457.05885782521125, "learning_rate": 1.3447746919903318e-05, "loss": 260.8919, "step": 20490 }, { "epoch": 0.3942667839850756, "grad_norm": 434.93552767656496, "learning_rate": 1.3442017346863618e-05, "loss": 266.9873, "step": 20500 }, { "epoch": 0.3944591092455561, "grad_norm": 491.80384884722565, "learning_rate": 1.3436286491729306e-05, "loss": 273.6724, "step": 20510 }, { "epoch": 0.3946514345060366, "grad_norm": 496.57294155739976, "learning_rate": 1.3430554356635029e-05, "loss": 268.6439, "step": 20520 }, { "epoch": 0.39484375976651714, "grad_norm": 445.4804759916785, "learning_rate": 1.342482094371591e-05, "loss": 261.7515, "step": 20530 }, { "epoch": 0.39503608502699766, "grad_norm": 449.89297898819564, "learning_rate": 1.341908625510755e-05, "loss": 273.4439, "step": 20540 }, { "epoch": 0.3952284102874782, "grad_norm": 463.5409245133629, "learning_rate": 1.341335029294603e-05, "loss": 261.5422, "step": 20550 }, { "epoch": 0.3954207355479587, "grad_norm": 468.96643929554153, "learning_rate": 1.3407613059367898e-05, "loss": 264.3, "step": 20560 }, { "epoch": 0.3956130608084392, "grad_norm": 457.0973214500332, "learning_rate": 1.3401874556510182e-05, "loss": 264.3025, "step": 20570 }, { "epoch": 0.39580538606891974, "grad_norm": 498.0662983959548, "learning_rate": 1.3396134786510375e-05, "loss": 269.4016, "step": 20580 }, { "epoch": 0.39599771132940026, "grad_norm": 472.88080784831743, "learning_rate": 1.3390393751506452e-05, "loss": 265.9489, "step": 20590 }, { "epoch": 0.39619003658988083, "grad_norm": 438.3456069369478, "learning_rate": 1.338465145363685e-05, "loss": 259.0075, "step": 20600 }, { "epoch": 0.39638236185036135, "grad_norm": 489.050290751274, "learning_rate": 1.3378907895040478e-05, "loss": 267.6819, "step": 20610 }, { "epoch": 0.3965746871108419, "grad_norm": 481.1230216655527, "learning_rate": 1.3373163077856724e-05, "loss": 261.676, "step": 20620 }, { "epoch": 0.3967670123713224, "grad_norm": 477.93123040190807, "learning_rate": 1.3367417004225429e-05, "loss": 268.3795, "step": 20630 }, { "epoch": 0.3969593376318029, "grad_norm": 489.0123489085017, "learning_rate": 1.3361669676286919e-05, "loss": 268.391, "step": 20640 }, { "epoch": 0.39715166289228343, "grad_norm": 456.9461427745397, "learning_rate": 1.335592109618197e-05, "loss": 254.8374, "step": 20650 }, { "epoch": 0.39734398815276395, "grad_norm": 468.7076295634319, "learning_rate": 1.335017126605184e-05, "loss": 269.8656, "step": 20660 }, { "epoch": 0.39753631341324447, "grad_norm": 416.20876899899446, "learning_rate": 1.3344420188038243e-05, "loss": 259.3088, "step": 20670 }, { "epoch": 0.397728638673725, "grad_norm": 457.12126878500476, "learning_rate": 1.333866786428336e-05, "loss": 267.4308, "step": 20680 }, { "epoch": 0.3979209639342055, "grad_norm": 496.816954787181, "learning_rate": 1.3332914296929838e-05, "loss": 271.6264, "step": 20690 }, { "epoch": 0.398113289194686, "grad_norm": 494.4445362407307, "learning_rate": 1.3327159488120784e-05, "loss": 263.8834, "step": 20700 }, { "epoch": 0.3983056144551666, "grad_norm": 499.98248915990706, "learning_rate": 1.3321403439999775e-05, "loss": 265.676, "step": 20710 }, { "epoch": 0.3984979397156471, "grad_norm": 513.2703757254474, "learning_rate": 1.3315646154710835e-05, "loss": 271.5798, "step": 20720 }, { "epoch": 0.39869026497612764, "grad_norm": 487.88181581667743, "learning_rate": 1.3309887634398466e-05, "loss": 272.9735, "step": 20730 }, { "epoch": 0.39888259023660816, "grad_norm": 455.8014438117221, "learning_rate": 1.3304127881207614e-05, "loss": 260.4117, "step": 20740 }, { "epoch": 0.3990749154970887, "grad_norm": 508.33662467425575, "learning_rate": 1.3298366897283697e-05, "loss": 270.7414, "step": 20750 }, { "epoch": 0.3992672407575692, "grad_norm": 467.02838435024915, "learning_rate": 1.3292604684772585e-05, "loss": 263.7842, "step": 20760 }, { "epoch": 0.3994595660180497, "grad_norm": 476.0550107206296, "learning_rate": 1.3286841245820605e-05, "loss": 262.7772, "step": 20770 }, { "epoch": 0.39965189127853024, "grad_norm": 449.47408887229426, "learning_rate": 1.3281076582574548e-05, "loss": 262.4583, "step": 20780 }, { "epoch": 0.39984421653901075, "grad_norm": 474.12126929879054, "learning_rate": 1.3275310697181652e-05, "loss": 261.6002, "step": 20790 }, { "epoch": 0.4000365417994913, "grad_norm": 488.14956613833726, "learning_rate": 1.3269543591789616e-05, "loss": 258.3711, "step": 20800 }, { "epoch": 0.40022886705997185, "grad_norm": 481.0221593212846, "learning_rate": 1.3263775268546588e-05, "loss": 266.2099, "step": 20810 }, { "epoch": 0.40042119232045237, "grad_norm": 452.76960322492914, "learning_rate": 1.3258005729601178e-05, "loss": 266.4222, "step": 20820 }, { "epoch": 0.4006135175809329, "grad_norm": 501.9836865809758, "learning_rate": 1.325223497710244e-05, "loss": 269.5421, "step": 20830 }, { "epoch": 0.4008058428414134, "grad_norm": 440.709245488314, "learning_rate": 1.3246463013199882e-05, "loss": 270.561, "step": 20840 }, { "epoch": 0.4009981681018939, "grad_norm": 523.5778750164358, "learning_rate": 1.3240689840043475e-05, "loss": 265.8489, "step": 20850 }, { "epoch": 0.40119049336237445, "grad_norm": 511.2207268835909, "learning_rate": 1.323491545978362e-05, "loss": 262.5227, "step": 20860 }, { "epoch": 0.40138281862285496, "grad_norm": 484.05020111853077, "learning_rate": 1.3229139874571186e-05, "loss": 262.5708, "step": 20870 }, { "epoch": 0.4015751438833355, "grad_norm": 497.8572817887082, "learning_rate": 1.3223363086557477e-05, "loss": 260.0307, "step": 20880 }, { "epoch": 0.401767469143816, "grad_norm": 476.81565277174053, "learning_rate": 1.3217585097894255e-05, "loss": 253.8691, "step": 20890 }, { "epoch": 0.4019597944042965, "grad_norm": 462.57048442453475, "learning_rate": 1.3211805910733724e-05, "loss": 269.6239, "step": 20900 }, { "epoch": 0.4021521196647771, "grad_norm": 416.18844114101756, "learning_rate": 1.3206025527228535e-05, "loss": 262.9443, "step": 20910 }, { "epoch": 0.4023444449252576, "grad_norm": 541.2801206050265, "learning_rate": 1.3200243949531788e-05, "loss": 268.8235, "step": 20920 }, { "epoch": 0.40253677018573814, "grad_norm": 492.4633272315584, "learning_rate": 1.3194461179797023e-05, "loss": 260.9503, "step": 20930 }, { "epoch": 0.40272909544621865, "grad_norm": 463.147895519567, "learning_rate": 1.3188677220178225e-05, "loss": 252.2378, "step": 20940 }, { "epoch": 0.4029214207066992, "grad_norm": 490.0795226107119, "learning_rate": 1.3182892072829828e-05, "loss": 253.5228, "step": 20950 }, { "epoch": 0.4031137459671797, "grad_norm": 807.586274480264, "learning_rate": 1.3177105739906702e-05, "loss": 254.9583, "step": 20960 }, { "epoch": 0.4033060712276602, "grad_norm": 463.13335147863427, "learning_rate": 1.3171318223564156e-05, "loss": 261.9763, "step": 20970 }, { "epoch": 0.40349839648814073, "grad_norm": 433.7243442154332, "learning_rate": 1.3165529525957947e-05, "loss": 265.0036, "step": 20980 }, { "epoch": 0.40369072174862125, "grad_norm": 465.2932577637766, "learning_rate": 1.3159739649244271e-05, "loss": 265.3626, "step": 20990 }, { "epoch": 0.40388304700910177, "grad_norm": 499.9595149596224, "learning_rate": 1.3153948595579764e-05, "loss": 266.7549, "step": 21000 }, { "epoch": 0.40407537226958234, "grad_norm": 447.17264132416426, "learning_rate": 1.3148156367121491e-05, "loss": 268.7102, "step": 21010 }, { "epoch": 0.40426769753006286, "grad_norm": 505.2112041051111, "learning_rate": 1.3142362966026967e-05, "loss": 262.4011, "step": 21020 }, { "epoch": 0.4044600227905434, "grad_norm": 481.5070474788416, "learning_rate": 1.3136568394454136e-05, "loss": 263.7487, "step": 21030 }, { "epoch": 0.4046523480510239, "grad_norm": 434.4098885621158, "learning_rate": 1.313077265456138e-05, "loss": 260.0427, "step": 21040 }, { "epoch": 0.4048446733115044, "grad_norm": 504.62258472747294, "learning_rate": 1.3124975748507514e-05, "loss": 267.7885, "step": 21050 }, { "epoch": 0.40503699857198494, "grad_norm": 540.5160667691246, "learning_rate": 1.3119177678451793e-05, "loss": 271.2426, "step": 21060 }, { "epoch": 0.40522932383246546, "grad_norm": 462.26980614363873, "learning_rate": 1.3113378446553903e-05, "loss": 269.1644, "step": 21070 }, { "epoch": 0.405421649092946, "grad_norm": 487.8128331673341, "learning_rate": 1.3107578054973962e-05, "loss": 265.5426, "step": 21080 }, { "epoch": 0.4056139743534265, "grad_norm": 468.65292747516736, "learning_rate": 1.3101776505872516e-05, "loss": 263.9606, "step": 21090 }, { "epoch": 0.405806299613907, "grad_norm": 538.382125547612, "learning_rate": 1.309597380141055e-05, "loss": 262.1304, "step": 21100 }, { "epoch": 0.40599862487438754, "grad_norm": 452.34882209656166, "learning_rate": 1.3090169943749475e-05, "loss": 261.6204, "step": 21110 }, { "epoch": 0.4061909501348681, "grad_norm": 465.4738287303043, "learning_rate": 1.3084364935051132e-05, "loss": 261.3079, "step": 21120 }, { "epoch": 0.40638327539534863, "grad_norm": 498.179225067857, "learning_rate": 1.307855877747779e-05, "loss": 267.8903, "step": 21130 }, { "epoch": 0.40657560065582915, "grad_norm": 439.4053969868385, "learning_rate": 1.307275147319215e-05, "loss": 254.3707, "step": 21140 }, { "epoch": 0.40676792591630967, "grad_norm": 472.626843666605, "learning_rate": 1.3066943024357333e-05, "loss": 254.7389, "step": 21150 }, { "epoch": 0.4069602511767902, "grad_norm": 494.5128574772442, "learning_rate": 1.306113343313689e-05, "loss": 265.8481, "step": 21160 }, { "epoch": 0.4071525764372707, "grad_norm": 478.0834604506982, "learning_rate": 1.3055322701694801e-05, "loss": 268.7855, "step": 21170 }, { "epoch": 0.4073449016977512, "grad_norm": 514.8479937865139, "learning_rate": 1.3049510832195466e-05, "loss": 268.5113, "step": 21180 }, { "epoch": 0.40753722695823175, "grad_norm": 456.1037000944643, "learning_rate": 1.3043697826803707e-05, "loss": 259.2839, "step": 21190 }, { "epoch": 0.40772955221871227, "grad_norm": 485.5182817775172, "learning_rate": 1.303788368768478e-05, "loss": 252.2747, "step": 21200 }, { "epoch": 0.4079218774791928, "grad_norm": 504.7148204922667, "learning_rate": 1.3032068417004351e-05, "loss": 266.3797, "step": 21210 }, { "epoch": 0.40811420273967336, "grad_norm": 469.8707805820898, "learning_rate": 1.302625201692851e-05, "loss": 262.4649, "step": 21220 }, { "epoch": 0.4083065280001539, "grad_norm": 441.98200994127427, "learning_rate": 1.302043448962378e-05, "loss": 257.2601, "step": 21230 }, { "epoch": 0.4084988532606344, "grad_norm": 478.50193947638127, "learning_rate": 1.301461583725708e-05, "loss": 267.2607, "step": 21240 }, { "epoch": 0.4086911785211149, "grad_norm": 631.6467442795822, "learning_rate": 1.3008796061995772e-05, "loss": 266.5208, "step": 21250 }, { "epoch": 0.40888350378159544, "grad_norm": 482.0852494397625, "learning_rate": 1.3002975166007618e-05, "loss": 258.947, "step": 21260 }, { "epoch": 0.40907582904207596, "grad_norm": 528.167999492159, "learning_rate": 1.2997153151460814e-05, "loss": 261.8091, "step": 21270 }, { "epoch": 0.4092681543025565, "grad_norm": 461.33456620866116, "learning_rate": 1.299133002052396e-05, "loss": 259.8377, "step": 21280 }, { "epoch": 0.409460479563037, "grad_norm": 433.35539917068337, "learning_rate": 1.2985505775366079e-05, "loss": 264.6542, "step": 21290 }, { "epoch": 0.4096528048235175, "grad_norm": 482.49429400187677, "learning_rate": 1.2979680418156604e-05, "loss": 271.3772, "step": 21300 }, { "epoch": 0.40984513008399803, "grad_norm": 569.2518002103108, "learning_rate": 1.297385395106538e-05, "loss": 265.4428, "step": 21310 }, { "epoch": 0.4100374553444786, "grad_norm": 522.7769096559927, "learning_rate": 1.2968026376262679e-05, "loss": 251.5035, "step": 21320 }, { "epoch": 0.4102297806049591, "grad_norm": 519.5724509483889, "learning_rate": 1.2962197695919167e-05, "loss": 268.33, "step": 21330 }, { "epoch": 0.41042210586543965, "grad_norm": 450.5461123970926, "learning_rate": 1.2956367912205939e-05, "loss": 264.3903, "step": 21340 }, { "epoch": 0.41061443112592017, "grad_norm": 428.5107754836988, "learning_rate": 1.2950537027294487e-05, "loss": 258.1454, "step": 21350 }, { "epoch": 0.4108067563864007, "grad_norm": 498.8952887293403, "learning_rate": 1.2944705043356722e-05, "loss": 262.4074, "step": 21360 }, { "epoch": 0.4109990816468812, "grad_norm": 451.5979215755134, "learning_rate": 1.2938871962564965e-05, "loss": 257.2829, "step": 21370 }, { "epoch": 0.4111914069073617, "grad_norm": 549.7022528597881, "learning_rate": 1.2933037787091935e-05, "loss": 268.7281, "step": 21380 }, { "epoch": 0.41138373216784224, "grad_norm": 540.6518831658659, "learning_rate": 1.2927202519110775e-05, "loss": 271.0587, "step": 21390 }, { "epoch": 0.41157605742832276, "grad_norm": 466.1715980949634, "learning_rate": 1.2921366160795017e-05, "loss": 261.0138, "step": 21400 }, { "epoch": 0.4117683826888033, "grad_norm": 417.2243875517313, "learning_rate": 1.2915528714318612e-05, "loss": 267.694, "step": 21410 }, { "epoch": 0.41196070794928386, "grad_norm": 442.2236286965045, "learning_rate": 1.2909690181855914e-05, "loss": 259.7224, "step": 21420 }, { "epoch": 0.4121530332097644, "grad_norm": 471.0412096039758, "learning_rate": 1.290385056558168e-05, "loss": 261.5262, "step": 21430 }, { "epoch": 0.4123453584702449, "grad_norm": 440.8680488355472, "learning_rate": 1.2898009867671066e-05, "loss": 261.603, "step": 21440 }, { "epoch": 0.4125376837307254, "grad_norm": 493.29723829932004, "learning_rate": 1.2892168090299639e-05, "loss": 256.7897, "step": 21450 }, { "epoch": 0.41273000899120593, "grad_norm": 503.248727037316, "learning_rate": 1.2886325235643367e-05, "loss": 258.0366, "step": 21460 }, { "epoch": 0.41292233425168645, "grad_norm": 441.1032360486992, "learning_rate": 1.288048130587861e-05, "loss": 256.1215, "step": 21470 }, { "epoch": 0.41311465951216697, "grad_norm": 473.53679642492835, "learning_rate": 1.287463630318214e-05, "loss": 261.4138, "step": 21480 }, { "epoch": 0.4133069847726475, "grad_norm": 589.302144938101, "learning_rate": 1.2868790229731123e-05, "loss": 262.8046, "step": 21490 }, { "epoch": 0.413499310033128, "grad_norm": 550.2172648288438, "learning_rate": 1.2862943087703127e-05, "loss": 259.2098, "step": 21500 }, { "epoch": 0.41369163529360853, "grad_norm": 478.9256732741642, "learning_rate": 1.2857094879276115e-05, "loss": 266.143, "step": 21510 }, { "epoch": 0.4138839605540891, "grad_norm": 589.5836595002756, "learning_rate": 1.2851245606628447e-05, "loss": 262.937, "step": 21520 }, { "epoch": 0.4140762858145696, "grad_norm": 494.61229077440214, "learning_rate": 1.2845395271938876e-05, "loss": 259.5427, "step": 21530 }, { "epoch": 0.41426861107505014, "grad_norm": 547.5338493808231, "learning_rate": 1.2839543877386562e-05, "loss": 255.1824, "step": 21540 }, { "epoch": 0.41446093633553066, "grad_norm": 528.3897621747665, "learning_rate": 1.283369142515105e-05, "loss": 264.4911, "step": 21550 }, { "epoch": 0.4146532615960112, "grad_norm": 528.8073980332322, "learning_rate": 1.282783791741228e-05, "loss": 271.3275, "step": 21560 }, { "epoch": 0.4148455868564917, "grad_norm": 1853.9268192141426, "learning_rate": 1.2821983356350593e-05, "loss": 280.1635, "step": 21570 }, { "epoch": 0.4150379121169722, "grad_norm": 472.4090220473149, "learning_rate": 1.2816127744146711e-05, "loss": 261.4486, "step": 21580 }, { "epoch": 0.41523023737745274, "grad_norm": 466.8237635234257, "learning_rate": 1.2810271082981755e-05, "loss": 261.6149, "step": 21590 }, { "epoch": 0.41542256263793326, "grad_norm": 459.7344290567423, "learning_rate": 1.2804413375037232e-05, "loss": 256.9443, "step": 21600 }, { "epoch": 0.4156148878984138, "grad_norm": 526.0563777738271, "learning_rate": 1.2798554622495042e-05, "loss": 260.6255, "step": 21610 }, { "epoch": 0.4158072131588943, "grad_norm": 506.1385356690671, "learning_rate": 1.2792694827537477e-05, "loss": 263.7782, "step": 21620 }, { "epoch": 0.41599953841937487, "grad_norm": 437.80211070436144, "learning_rate": 1.2786833992347212e-05, "loss": 258.7773, "step": 21630 }, { "epoch": 0.4161918636798554, "grad_norm": 414.17361030926605, "learning_rate": 1.2780972119107312e-05, "loss": 263.2555, "step": 21640 }, { "epoch": 0.4163841889403359, "grad_norm": 430.04944197431513, "learning_rate": 1.2775109210001225e-05, "loss": 254.02, "step": 21650 }, { "epoch": 0.41657651420081643, "grad_norm": 502.979970285786, "learning_rate": 1.2769245267212789e-05, "loss": 260.4777, "step": 21660 }, { "epoch": 0.41676883946129695, "grad_norm": 504.3363926046776, "learning_rate": 1.2763380292926227e-05, "loss": 259.9194, "step": 21670 }, { "epoch": 0.41696116472177747, "grad_norm": 481.40444777783983, "learning_rate": 1.2757514289326146e-05, "loss": 257.8855, "step": 21680 }, { "epoch": 0.417153489982258, "grad_norm": 471.2924993212668, "learning_rate": 1.2751647258597531e-05, "loss": 259.6434, "step": 21690 }, { "epoch": 0.4173458152427385, "grad_norm": 444.72767899018174, "learning_rate": 1.274577920292576e-05, "loss": 250.2615, "step": 21700 }, { "epoch": 0.417538140503219, "grad_norm": 469.30635449947005, "learning_rate": 1.2739910124496585e-05, "loss": 263.5958, "step": 21710 }, { "epoch": 0.41773046576369954, "grad_norm": 470.64646428910925, "learning_rate": 1.2734040025496137e-05, "loss": 255.7097, "step": 21720 }, { "epoch": 0.4179227910241801, "grad_norm": 456.6113086401947, "learning_rate": 1.2728168908110937e-05, "loss": 258.9102, "step": 21730 }, { "epoch": 0.41811511628466064, "grad_norm": 463.02709316818124, "learning_rate": 1.2722296774527871e-05, "loss": 251.8326, "step": 21740 }, { "epoch": 0.41830744154514116, "grad_norm": 425.60263211467986, "learning_rate": 1.271642362693422e-05, "loss": 270.1656, "step": 21750 }, { "epoch": 0.4184997668056217, "grad_norm": 473.88188215245344, "learning_rate": 1.2710549467517628e-05, "loss": 257.0249, "step": 21760 }, { "epoch": 0.4186920920661022, "grad_norm": 433.8415365562208, "learning_rate": 1.270467429846613e-05, "loss": 246.6032, "step": 21770 }, { "epoch": 0.4188844173265827, "grad_norm": 453.1147717742959, "learning_rate": 1.2698798121968122e-05, "loss": 256.7895, "step": 21780 }, { "epoch": 0.41907674258706323, "grad_norm": 453.940042470423, "learning_rate": 1.2692920940212387e-05, "loss": 267.6017, "step": 21790 }, { "epoch": 0.41926906784754375, "grad_norm": 422.5455373258571, "learning_rate": 1.2687042755388077e-05, "loss": 253.5791, "step": 21800 }, { "epoch": 0.41946139310802427, "grad_norm": 450.2053775453577, "learning_rate": 1.2681163569684718e-05, "loss": 262.1306, "step": 21810 }, { "epoch": 0.4196537183685048, "grad_norm": 451.0061372027502, "learning_rate": 1.2675283385292212e-05, "loss": 261.4113, "step": 21820 }, { "epoch": 0.41984604362898537, "grad_norm": 481.0762192391593, "learning_rate": 1.2669402204400825e-05, "loss": 254.4698, "step": 21830 }, { "epoch": 0.4200383688894659, "grad_norm": 469.2880736612743, "learning_rate": 1.2663520029201206e-05, "loss": 251.548, "step": 21840 }, { "epoch": 0.4202306941499464, "grad_norm": 480.6530382268694, "learning_rate": 1.2657636861884363e-05, "loss": 256.3476, "step": 21850 }, { "epoch": 0.4204230194104269, "grad_norm": 503.05366976347256, "learning_rate": 1.2651752704641686e-05, "loss": 259.4078, "step": 21860 }, { "epoch": 0.42061534467090744, "grad_norm": 438.03418759023884, "learning_rate": 1.2645867559664918e-05, "loss": 261.3653, "step": 21870 }, { "epoch": 0.42080766993138796, "grad_norm": 415.9489057170127, "learning_rate": 1.2639981429146184e-05, "loss": 250.693, "step": 21880 }, { "epoch": 0.4209999951918685, "grad_norm": 475.3108477134088, "learning_rate": 1.2634094315277967e-05, "loss": 256.4124, "step": 21890 }, { "epoch": 0.421192320452349, "grad_norm": 500.94492016642886, "learning_rate": 1.262820622025312e-05, "loss": 265.2192, "step": 21900 }, { "epoch": 0.4213846457128295, "grad_norm": 531.1664146710855, "learning_rate": 1.2622317146264864e-05, "loss": 262.586, "step": 21910 }, { "epoch": 0.42157697097331004, "grad_norm": 461.37948060079873, "learning_rate": 1.2616427095506779e-05, "loss": 261.5061, "step": 21920 }, { "epoch": 0.4217692962337906, "grad_norm": 427.53926808009055, "learning_rate": 1.2610536070172815e-05, "loss": 261.1253, "step": 21930 }, { "epoch": 0.42196162149427113, "grad_norm": 476.7539894271758, "learning_rate": 1.2604644072457277e-05, "loss": 256.7359, "step": 21940 }, { "epoch": 0.42215394675475165, "grad_norm": 464.2967261014823, "learning_rate": 1.259875110455484e-05, "loss": 260.7578, "step": 21950 }, { "epoch": 0.42234627201523217, "grad_norm": 449.9215572169176, "learning_rate": 1.2592857168660535e-05, "loss": 259.3151, "step": 21960 }, { "epoch": 0.4225385972757127, "grad_norm": 469.06798265730447, "learning_rate": 1.2586962266969758e-05, "loss": 261.4145, "step": 21970 }, { "epoch": 0.4227309225361932, "grad_norm": 521.4087708846176, "learning_rate": 1.2581066401678261e-05, "loss": 271.0152, "step": 21980 }, { "epoch": 0.42292324779667373, "grad_norm": 445.9083651602901, "learning_rate": 1.2575169574982158e-05, "loss": 256.6363, "step": 21990 }, { "epoch": 0.42311557305715425, "grad_norm": 440.7079893689907, "learning_rate": 1.2569271789077919e-05, "loss": 261.3805, "step": 22000 }, { "epoch": 0.42330789831763477, "grad_norm": 485.5922587252784, "learning_rate": 1.256337304616237e-05, "loss": 254.0244, "step": 22010 }, { "epoch": 0.4235002235781153, "grad_norm": 435.75623709112523, "learning_rate": 1.25574733484327e-05, "loss": 254.9253, "step": 22020 }, { "epoch": 0.4236925488385958, "grad_norm": 499.78357667385853, "learning_rate": 1.2551572698086446e-05, "loss": 262.204, "step": 22030 }, { "epoch": 0.4238848740990764, "grad_norm": 461.2841644547413, "learning_rate": 1.2545671097321497e-05, "loss": 258.2505, "step": 22040 }, { "epoch": 0.4240771993595569, "grad_norm": 426.4627395338991, "learning_rate": 1.2539768548336112e-05, "loss": 259.5051, "step": 22050 }, { "epoch": 0.4242695246200374, "grad_norm": 483.58233623795405, "learning_rate": 1.2533865053328886e-05, "loss": 259.016, "step": 22060 }, { "epoch": 0.42446184988051794, "grad_norm": 453.30443322323816, "learning_rate": 1.2527960614498778e-05, "loss": 263.4157, "step": 22070 }, { "epoch": 0.42465417514099846, "grad_norm": 510.33885311252754, "learning_rate": 1.252205523404509e-05, "loss": 264.2783, "step": 22080 }, { "epoch": 0.424846500401479, "grad_norm": 468.45344532539286, "learning_rate": 1.2516148914167481e-05, "loss": 262.4432, "step": 22090 }, { "epoch": 0.4250388256619595, "grad_norm": 425.64539208833423, "learning_rate": 1.2510241657065958e-05, "loss": 250.1267, "step": 22100 }, { "epoch": 0.42523115092244, "grad_norm": 444.8032661433248, "learning_rate": 1.2504333464940874e-05, "loss": 251.268, "step": 22110 }, { "epoch": 0.42542347618292053, "grad_norm": 2047.4576304833924, "learning_rate": 1.2498424339992934e-05, "loss": 258.2935, "step": 22120 }, { "epoch": 0.42561580144340105, "grad_norm": 496.2123843311693, "learning_rate": 1.2492514284423194e-05, "loss": 259.7029, "step": 22130 }, { "epoch": 0.42580812670388163, "grad_norm": 451.31672227430596, "learning_rate": 1.2486603300433045e-05, "loss": 256.3716, "step": 22140 }, { "epoch": 0.42600045196436215, "grad_norm": 476.5778994566375, "learning_rate": 1.2480691390224238e-05, "loss": 262.5988, "step": 22150 }, { "epoch": 0.42619277722484267, "grad_norm": 436.23722902174603, "learning_rate": 1.2474778555998855e-05, "loss": 246.5431, "step": 22160 }, { "epoch": 0.4263851024853232, "grad_norm": 459.69425102316336, "learning_rate": 1.246886479995933e-05, "loss": 262.2735, "step": 22170 }, { "epoch": 0.4265774277458037, "grad_norm": 455.6661641040029, "learning_rate": 1.2462950124308444e-05, "loss": 261.5882, "step": 22180 }, { "epoch": 0.4267697530062842, "grad_norm": 503.4685081267895, "learning_rate": 1.2457034531249313e-05, "loss": 257.1507, "step": 22190 }, { "epoch": 0.42696207826676474, "grad_norm": 481.41008037318215, "learning_rate": 1.2451118022985402e-05, "loss": 267.3524, "step": 22200 }, { "epoch": 0.42715440352724526, "grad_norm": 689.4060625509051, "learning_rate": 1.2445200601720504e-05, "loss": 250.75, "step": 22210 }, { "epoch": 0.4273467287877258, "grad_norm": 476.36390421373187, "learning_rate": 1.243928226965877e-05, "loss": 256.4727, "step": 22220 }, { "epoch": 0.4275390540482063, "grad_norm": 440.12614855325927, "learning_rate": 1.2433363029004675e-05, "loss": 254.9294, "step": 22230 }, { "epoch": 0.4277313793086869, "grad_norm": 442.62699204163243, "learning_rate": 1.2427442881963042e-05, "loss": 257.3285, "step": 22240 }, { "epoch": 0.4279237045691674, "grad_norm": 466.74214551272325, "learning_rate": 1.242152183073902e-05, "loss": 260.5592, "step": 22250 }, { "epoch": 0.4281160298296479, "grad_norm": 471.2808107279145, "learning_rate": 1.2415599877538111e-05, "loss": 250.9756, "step": 22260 }, { "epoch": 0.42830835509012843, "grad_norm": 441.43787592468067, "learning_rate": 1.2409677024566145e-05, "loss": 265.465, "step": 22270 }, { "epoch": 0.42850068035060895, "grad_norm": 483.1493834726963, "learning_rate": 1.2403753274029281e-05, "loss": 261.402, "step": 22280 }, { "epoch": 0.4286930056110895, "grad_norm": 457.95176673999674, "learning_rate": 1.2397828628134028e-05, "loss": 250.1696, "step": 22290 }, { "epoch": 0.42888533087157, "grad_norm": 476.74012808717566, "learning_rate": 1.2391903089087208e-05, "loss": 257.1226, "step": 22300 }, { "epoch": 0.4290776561320505, "grad_norm": 485.91918552417684, "learning_rate": 1.2385976659095993e-05, "loss": 255.824, "step": 22310 }, { "epoch": 0.42926998139253103, "grad_norm": 486.2026801353059, "learning_rate": 1.2380049340367876e-05, "loss": 251.959, "step": 22320 }, { "epoch": 0.42946230665301155, "grad_norm": 530.8842861874508, "learning_rate": 1.2374121135110688e-05, "loss": 267.2906, "step": 22330 }, { "epoch": 0.4296546319134921, "grad_norm": 449.3700140095058, "learning_rate": 1.2368192045532586e-05, "loss": 257.396, "step": 22340 }, { "epoch": 0.42984695717397264, "grad_norm": 499.66813060099645, "learning_rate": 1.236226207384206e-05, "loss": 253.6241, "step": 22350 }, { "epoch": 0.43003928243445316, "grad_norm": 459.71767235372903, "learning_rate": 1.2356331222247929e-05, "loss": 260.5438, "step": 22360 }, { "epoch": 0.4302316076949337, "grad_norm": 455.8846773640258, "learning_rate": 1.2350399492959328e-05, "loss": 262.5015, "step": 22370 }, { "epoch": 0.4304239329554142, "grad_norm": 493.59394894441596, "learning_rate": 1.234446688818574e-05, "loss": 258.3056, "step": 22380 }, { "epoch": 0.4306162582158947, "grad_norm": 476.04763204624425, "learning_rate": 1.233853341013695e-05, "loss": 255.3356, "step": 22390 }, { "epoch": 0.43080858347637524, "grad_norm": 449.13623647617436, "learning_rate": 1.233259906102309e-05, "loss": 256.283, "step": 22400 }, { "epoch": 0.43100090873685576, "grad_norm": 418.47346689864503, "learning_rate": 1.2326663843054603e-05, "loss": 253.7366, "step": 22410 }, { "epoch": 0.4311932339973363, "grad_norm": 467.19840956139757, "learning_rate": 1.2320727758442264e-05, "loss": 260.7724, "step": 22420 }, { "epoch": 0.4313855592578168, "grad_norm": 476.0566488083017, "learning_rate": 1.231479080939716e-05, "loss": 265.8249, "step": 22430 }, { "epoch": 0.43157788451829737, "grad_norm": 426.5182115420512, "learning_rate": 1.230885299813071e-05, "loss": 265.0076, "step": 22440 }, { "epoch": 0.4317702097787779, "grad_norm": 502.60556109571854, "learning_rate": 1.2302914326854651e-05, "loss": 258.2654, "step": 22450 }, { "epoch": 0.4319625350392584, "grad_norm": 476.6196111642156, "learning_rate": 1.2296974797781036e-05, "loss": 265.3442, "step": 22460 }, { "epoch": 0.43215486029973893, "grad_norm": 491.5977996643741, "learning_rate": 1.2291034413122247e-05, "loss": 268.0509, "step": 22470 }, { "epoch": 0.43234718556021945, "grad_norm": 485.45530561680806, "learning_rate": 1.2285093175090976e-05, "loss": 256.3687, "step": 22480 }, { "epoch": 0.43253951082069997, "grad_norm": 477.6220432991964, "learning_rate": 1.227915108590024e-05, "loss": 251.5692, "step": 22490 }, { "epoch": 0.4327318360811805, "grad_norm": 426.1059902665939, "learning_rate": 1.2273208147763363e-05, "loss": 246.4806, "step": 22500 }, { "epoch": 0.432924161341661, "grad_norm": 474.1517991404114, "learning_rate": 1.2267264362893995e-05, "loss": 258.7537, "step": 22510 }, { "epoch": 0.4331164866021415, "grad_norm": 470.00063181770497, "learning_rate": 1.2261319733506096e-05, "loss": 257.5305, "step": 22520 }, { "epoch": 0.43330881186262205, "grad_norm": 493.36629571964477, "learning_rate": 1.2255374261813944e-05, "loss": 264.3011, "step": 22530 }, { "epoch": 0.43350113712310256, "grad_norm": 456.7249590892431, "learning_rate": 1.2249427950032127e-05, "loss": 261.1499, "step": 22540 }, { "epoch": 0.43369346238358314, "grad_norm": 506.462238555744, "learning_rate": 1.224348080037555e-05, "loss": 267.5491, "step": 22550 }, { "epoch": 0.43388578764406366, "grad_norm": 478.62500351555593, "learning_rate": 1.2237532815059427e-05, "loss": 254.4973, "step": 22560 }, { "epoch": 0.4340781129045442, "grad_norm": 435.6395897127192, "learning_rate": 1.2231583996299285e-05, "loss": 255.2659, "step": 22570 }, { "epoch": 0.4342704381650247, "grad_norm": 466.68234096018296, "learning_rate": 1.2225634346310962e-05, "loss": 252.281, "step": 22580 }, { "epoch": 0.4344627634255052, "grad_norm": 444.44283779714925, "learning_rate": 1.22196838673106e-05, "loss": 260.5664, "step": 22590 }, { "epoch": 0.43465508868598574, "grad_norm": 482.24347012851155, "learning_rate": 1.2213732561514657e-05, "loss": 262.3756, "step": 22600 }, { "epoch": 0.43484741394646625, "grad_norm": 437.2513456863256, "learning_rate": 1.2207780431139894e-05, "loss": 260.8468, "step": 22610 }, { "epoch": 0.4350397392069468, "grad_norm": 478.72624998312307, "learning_rate": 1.2201827478403385e-05, "loss": 255.498, "step": 22620 }, { "epoch": 0.4352320644674273, "grad_norm": 429.7146375248195, "learning_rate": 1.2195873705522508e-05, "loss": 244.75, "step": 22630 }, { "epoch": 0.4354243897279078, "grad_norm": 530.5027567651761, "learning_rate": 1.2189919114714936e-05, "loss": 247.1865, "step": 22640 }, { "epoch": 0.4356167149883884, "grad_norm": 478.31770513426164, "learning_rate": 1.2183963708198668e-05, "loss": 265.0552, "step": 22650 }, { "epoch": 0.4358090402488689, "grad_norm": 457.59647146706635, "learning_rate": 1.2178007488191983e-05, "loss": 253.0527, "step": 22660 }, { "epoch": 0.4360013655093494, "grad_norm": 443.13810983288755, "learning_rate": 1.2172050456913482e-05, "loss": 259.3701, "step": 22670 }, { "epoch": 0.43619369076982994, "grad_norm": 459.5323209224299, "learning_rate": 1.2166092616582055e-05, "loss": 247.5747, "step": 22680 }, { "epoch": 0.43638601603031046, "grad_norm": 492.00659687861616, "learning_rate": 1.2160133969416903e-05, "loss": 264.748, "step": 22690 }, { "epoch": 0.436578341290791, "grad_norm": 450.67653171967225, "learning_rate": 1.2154174517637526e-05, "loss": 255.2781, "step": 22700 }, { "epoch": 0.4367706665512715, "grad_norm": 451.8655364691753, "learning_rate": 1.2148214263463718e-05, "loss": 248.3648, "step": 22710 }, { "epoch": 0.436962991811752, "grad_norm": 468.4579968499324, "learning_rate": 1.2142253209115577e-05, "loss": 256.2649, "step": 22720 }, { "epoch": 0.43715531707223254, "grad_norm": 450.5044203985601, "learning_rate": 1.2136291356813494e-05, "loss": 265.551, "step": 22730 }, { "epoch": 0.43734764233271306, "grad_norm": 466.91794975899256, "learning_rate": 1.2130328708778162e-05, "loss": 255.8264, "step": 22740 }, { "epoch": 0.43753996759319363, "grad_norm": 441.5102350206535, "learning_rate": 1.2124365267230571e-05, "loss": 259.3831, "step": 22750 }, { "epoch": 0.43773229285367415, "grad_norm": 487.24572690603037, "learning_rate": 1.2118401034392003e-05, "loss": 258.8978, "step": 22760 }, { "epoch": 0.4379246181141547, "grad_norm": 460.67781077753483, "learning_rate": 1.2112436012484035e-05, "loss": 251.7436, "step": 22770 }, { "epoch": 0.4381169433746352, "grad_norm": 446.12348528741194, "learning_rate": 1.210647020372854e-05, "loss": 258.8787, "step": 22780 }, { "epoch": 0.4383092686351157, "grad_norm": 447.42075578421685, "learning_rate": 1.2100503610347686e-05, "loss": 253.0285, "step": 22790 }, { "epoch": 0.43850159389559623, "grad_norm": 454.04740862017104, "learning_rate": 1.2094536234563927e-05, "loss": 259.1296, "step": 22800 }, { "epoch": 0.43869391915607675, "grad_norm": 446.70185991473926, "learning_rate": 1.2088568078600013e-05, "loss": 254.9898, "step": 22810 }, { "epoch": 0.43888624441655727, "grad_norm": 485.8524665221522, "learning_rate": 1.2082599144678983e-05, "loss": 263.8659, "step": 22820 }, { "epoch": 0.4390785696770378, "grad_norm": 465.4629602099813, "learning_rate": 1.2076629435024168e-05, "loss": 258.2121, "step": 22830 }, { "epoch": 0.4392708949375183, "grad_norm": 425.6049277705674, "learning_rate": 1.2070658951859183e-05, "loss": 269.3211, "step": 22840 }, { "epoch": 0.4394632201979989, "grad_norm": 437.8373454637032, "learning_rate": 1.2064687697407939e-05, "loss": 266.0314, "step": 22850 }, { "epoch": 0.4396555454584794, "grad_norm": 464.0106575435338, "learning_rate": 1.2058715673894625e-05, "loss": 250.2758, "step": 22860 }, { "epoch": 0.4398478707189599, "grad_norm": 586.2208323692147, "learning_rate": 1.2052742883543724e-05, "loss": 268.0162, "step": 22870 }, { "epoch": 0.44004019597944044, "grad_norm": 484.90330169836903, "learning_rate": 1.2046769328580004e-05, "loss": 251.0423, "step": 22880 }, { "epoch": 0.44023252123992096, "grad_norm": 456.29633878520326, "learning_rate": 1.204079501122851e-05, "loss": 257.7815, "step": 22890 }, { "epoch": 0.4404248465004015, "grad_norm": 479.2590284121029, "learning_rate": 1.2034819933714576e-05, "loss": 253.3188, "step": 22900 }, { "epoch": 0.440617171760882, "grad_norm": 455.2239237704403, "learning_rate": 1.2028844098263827e-05, "loss": 257.8539, "step": 22910 }, { "epoch": 0.4408094970213625, "grad_norm": 541.7350127978118, "learning_rate": 1.2022867507102159e-05, "loss": 261.571, "step": 22920 }, { "epoch": 0.44100182228184304, "grad_norm": 480.2745326515579, "learning_rate": 1.2016890162455752e-05, "loss": 255.3, "step": 22930 }, { "epoch": 0.44119414754232356, "grad_norm": 443.47417519914757, "learning_rate": 1.2010912066551072e-05, "loss": 250.8066, "step": 22940 }, { "epoch": 0.4413864728028041, "grad_norm": 478.64850379222105, "learning_rate": 1.2004933221614854e-05, "loss": 250.1601, "step": 22950 }, { "epoch": 0.44157879806328465, "grad_norm": 544.0752856101535, "learning_rate": 1.1998953629874126e-05, "loss": 251.2745, "step": 22960 }, { "epoch": 0.44177112332376517, "grad_norm": 452.350869860681, "learning_rate": 1.1992973293556184e-05, "loss": 262.7483, "step": 22970 }, { "epoch": 0.4419634485842457, "grad_norm": 482.2065768136144, "learning_rate": 1.1986992214888607e-05, "loss": 263.3633, "step": 22980 }, { "epoch": 0.4421557738447262, "grad_norm": 463.3403672791953, "learning_rate": 1.1981010396099244e-05, "loss": 255.3194, "step": 22990 }, { "epoch": 0.4423480991052067, "grad_norm": 466.78729720574364, "learning_rate": 1.1975027839416227e-05, "loss": 255.3379, "step": 23000 }, { "epoch": 0.44254042436568725, "grad_norm": 499.88773139956356, "learning_rate": 1.1969044547067961e-05, "loss": 254.3, "step": 23010 }, { "epoch": 0.44273274962616777, "grad_norm": 432.9218738967188, "learning_rate": 1.1963060521283117e-05, "loss": 250.6967, "step": 23020 }, { "epoch": 0.4429250748866483, "grad_norm": 441.4274391268516, "learning_rate": 1.1957075764290652e-05, "loss": 253.8183, "step": 23030 }, { "epoch": 0.4431174001471288, "grad_norm": 420.4371347752832, "learning_rate": 1.1951090278319784e-05, "loss": 263.0922, "step": 23040 }, { "epoch": 0.4433097254076093, "grad_norm": 438.9999602213165, "learning_rate": 1.1945104065600013e-05, "loss": 248.7696, "step": 23050 }, { "epoch": 0.4435020506680899, "grad_norm": 450.09212765840107, "learning_rate": 1.1939117128361101e-05, "loss": 254.6416, "step": 23060 }, { "epoch": 0.4436943759285704, "grad_norm": 595.5102962288122, "learning_rate": 1.1933129468833087e-05, "loss": 249.2376, "step": 23070 }, { "epoch": 0.44388670118905094, "grad_norm": 440.65433686268125, "learning_rate": 1.192714108924627e-05, "loss": 251.1425, "step": 23080 }, { "epoch": 0.44407902644953146, "grad_norm": 517.268187199839, "learning_rate": 1.1921151991831225e-05, "loss": 258.6075, "step": 23090 }, { "epoch": 0.444271351710012, "grad_norm": 432.79297789317343, "learning_rate": 1.1915162178818793e-05, "loss": 261.2917, "step": 23100 }, { "epoch": 0.4444636769704925, "grad_norm": 515.8398529898058, "learning_rate": 1.1909171652440079e-05, "loss": 258.0972, "step": 23110 }, { "epoch": 0.444656002230973, "grad_norm": 422.71187944443324, "learning_rate": 1.1903180414926457e-05, "loss": 253.5417, "step": 23120 }, { "epoch": 0.44484832749145353, "grad_norm": 481.9994465707499, "learning_rate": 1.189718846850956e-05, "loss": 249.982, "step": 23130 }, { "epoch": 0.44504065275193405, "grad_norm": 451.9385686125299, "learning_rate": 1.1891195815421293e-05, "loss": 242.0835, "step": 23140 }, { "epoch": 0.44523297801241457, "grad_norm": 458.08573929983805, "learning_rate": 1.1885202457893819e-05, "loss": 253.6223, "step": 23150 }, { "epoch": 0.44542530327289515, "grad_norm": 433.60250292147254, "learning_rate": 1.1879208398159563e-05, "loss": 256.1767, "step": 23160 }, { "epoch": 0.44561762853337566, "grad_norm": 417.2649607614848, "learning_rate": 1.1873213638451215e-05, "loss": 247.6789, "step": 23170 }, { "epoch": 0.4458099537938562, "grad_norm": 454.1867546595952, "learning_rate": 1.1867218181001725e-05, "loss": 256.11, "step": 23180 }, { "epoch": 0.4460022790543367, "grad_norm": 488.40089969381876, "learning_rate": 1.1861222028044301e-05, "loss": 263.1078, "step": 23190 }, { "epoch": 0.4461946043148172, "grad_norm": 462.6002840704471, "learning_rate": 1.1855225181812408e-05, "loss": 248.574, "step": 23200 }, { "epoch": 0.44638692957529774, "grad_norm": 454.15626057905314, "learning_rate": 1.1849227644539781e-05, "loss": 249.628, "step": 23210 }, { "epoch": 0.44657925483577826, "grad_norm": 435.102089963084, "learning_rate": 1.1843229418460391e-05, "loss": 247.6553, "step": 23220 }, { "epoch": 0.4467715800962588, "grad_norm": 447.0186794227221, "learning_rate": 1.1837230505808485e-05, "loss": 247.3917, "step": 23230 }, { "epoch": 0.4469639053567393, "grad_norm": 482.5627136822678, "learning_rate": 1.1831230908818563e-05, "loss": 249.0831, "step": 23240 }, { "epoch": 0.4471562306172198, "grad_norm": 568.4407633078066, "learning_rate": 1.1825230629725366e-05, "loss": 259.6739, "step": 23250 }, { "epoch": 0.4473485558777004, "grad_norm": 441.3757223921175, "learning_rate": 1.1819229670763908e-05, "loss": 257.217, "step": 23260 }, { "epoch": 0.4475408811381809, "grad_norm": 515.8922182715229, "learning_rate": 1.1813228034169442e-05, "loss": 262.5166, "step": 23270 }, { "epoch": 0.44773320639866143, "grad_norm": 446.86607750340266, "learning_rate": 1.180722572217748e-05, "loss": 262.1952, "step": 23280 }, { "epoch": 0.44792553165914195, "grad_norm": 503.1736591134505, "learning_rate": 1.1801222737023783e-05, "loss": 255.8003, "step": 23290 }, { "epoch": 0.44811785691962247, "grad_norm": 425.88902325715725, "learning_rate": 1.1795219080944366e-05, "loss": 253.7978, "step": 23300 }, { "epoch": 0.448310182180103, "grad_norm": 434.44983563432226, "learning_rate": 1.178921475617549e-05, "loss": 253.9643, "step": 23310 }, { "epoch": 0.4485025074405835, "grad_norm": 459.8931623631675, "learning_rate": 1.1783209764953666e-05, "loss": 258.2973, "step": 23320 }, { "epoch": 0.44869483270106403, "grad_norm": 465.89200127424294, "learning_rate": 1.1777204109515653e-05, "loss": 261.593, "step": 23330 }, { "epoch": 0.44888715796154455, "grad_norm": 470.4229758549265, "learning_rate": 1.1771197792098465e-05, "loss": 252.345, "step": 23340 }, { "epoch": 0.44907948322202507, "grad_norm": 439.1415837531606, "learning_rate": 1.1765190814939351e-05, "loss": 255.1918, "step": 23350 }, { "epoch": 0.4492718084825056, "grad_norm": 473.3546636933446, "learning_rate": 1.175918318027581e-05, "loss": 248.6485, "step": 23360 }, { "epoch": 0.44946413374298616, "grad_norm": 473.26223567284796, "learning_rate": 1.1753174890345591e-05, "loss": 255.5117, "step": 23370 }, { "epoch": 0.4496564590034667, "grad_norm": 474.064042377261, "learning_rate": 1.174716594738668e-05, "loss": 256.0353, "step": 23380 }, { "epoch": 0.4498487842639472, "grad_norm": 530.1382579394015, "learning_rate": 1.1741156353637304e-05, "loss": 247.7151, "step": 23390 }, { "epoch": 0.4500411095244277, "grad_norm": 427.55116437237126, "learning_rate": 1.1735146111335945e-05, "loss": 251.0648, "step": 23400 }, { "epoch": 0.45023343478490824, "grad_norm": 506.92651809226555, "learning_rate": 1.1729135222721315e-05, "loss": 250.9444, "step": 23410 }, { "epoch": 0.45042576004538876, "grad_norm": 473.1743739295858, "learning_rate": 1.1723123690032376e-05, "loss": 242.5035, "step": 23420 }, { "epoch": 0.4506180853058693, "grad_norm": 434.1947656086353, "learning_rate": 1.171711151550832e-05, "loss": 264.2066, "step": 23430 }, { "epoch": 0.4508104105663498, "grad_norm": 477.5351230643194, "learning_rate": 1.1711098701388581e-05, "loss": 253.424, "step": 23440 }, { "epoch": 0.4510027358268303, "grad_norm": 447.8328377689963, "learning_rate": 1.1705085249912837e-05, "loss": 250.5625, "step": 23450 }, { "epoch": 0.45119506108731083, "grad_norm": 420.65551830062424, "learning_rate": 1.1699071163320997e-05, "loss": 255.4377, "step": 23460 }, { "epoch": 0.4513873863477914, "grad_norm": 446.5525879917818, "learning_rate": 1.169305644385321e-05, "loss": 249.6609, "step": 23470 }, { "epoch": 0.4515797116082719, "grad_norm": 520.951696992055, "learning_rate": 1.168704109374986e-05, "loss": 252.9367, "step": 23480 }, { "epoch": 0.45177203686875245, "grad_norm": 441.8970751124328, "learning_rate": 1.1681025115251566e-05, "loss": 256.3322, "step": 23490 }, { "epoch": 0.45196436212923297, "grad_norm": 470.8406876867624, "learning_rate": 1.1675008510599176e-05, "loss": 246.6226, "step": 23500 }, { "epoch": 0.4521566873897135, "grad_norm": 478.44083732437895, "learning_rate": 1.1668991282033784e-05, "loss": 251.7677, "step": 23510 }, { "epoch": 0.452349012650194, "grad_norm": 435.3744683996766, "learning_rate": 1.16629734317967e-05, "loss": 258.2138, "step": 23520 }, { "epoch": 0.4525413379106745, "grad_norm": 411.29350727643117, "learning_rate": 1.165695496212948e-05, "loss": 263.2326, "step": 23530 }, { "epoch": 0.45273366317115504, "grad_norm": 482.24576860111307, "learning_rate": 1.1650935875273901e-05, "loss": 256.6767, "step": 23540 }, { "epoch": 0.45292598843163556, "grad_norm": 418.03357553708787, "learning_rate": 1.1644916173471976e-05, "loss": 251.8268, "step": 23550 }, { "epoch": 0.4531183136921161, "grad_norm": 427.5402308809369, "learning_rate": 1.1638895858965942e-05, "loss": 251.8567, "step": 23560 }, { "epoch": 0.45331063895259666, "grad_norm": 411.4118899248775, "learning_rate": 1.1632874933998268e-05, "loss": 250.1026, "step": 23570 }, { "epoch": 0.4535029642130772, "grad_norm": 422.26116455188844, "learning_rate": 1.1626853400811649e-05, "loss": 254.3002, "step": 23580 }, { "epoch": 0.4536952894735577, "grad_norm": 409.3825065374503, "learning_rate": 1.1620831261649003e-05, "loss": 248.4919, "step": 23590 }, { "epoch": 0.4538876147340382, "grad_norm": 502.34269113830806, "learning_rate": 1.1614808518753485e-05, "loss": 253.1792, "step": 23600 }, { "epoch": 0.45407993999451873, "grad_norm": 427.2176160351355, "learning_rate": 1.1608785174368461e-05, "loss": 255.2146, "step": 23610 }, { "epoch": 0.45427226525499925, "grad_norm": 456.8769255260778, "learning_rate": 1.1602761230737531e-05, "loss": 264.9469, "step": 23620 }, { "epoch": 0.45446459051547977, "grad_norm": 470.04123453353424, "learning_rate": 1.1596736690104514e-05, "loss": 267.1819, "step": 23630 }, { "epoch": 0.4546569157759603, "grad_norm": 465.0725243037591, "learning_rate": 1.1590711554713452e-05, "loss": 249.3309, "step": 23640 }, { "epoch": 0.4548492410364408, "grad_norm": 448.69630738371256, "learning_rate": 1.1584685826808604e-05, "loss": 261.6062, "step": 23650 }, { "epoch": 0.45504156629692133, "grad_norm": 546.2729015603361, "learning_rate": 1.157865950863446e-05, "loss": 254.6912, "step": 23660 }, { "epoch": 0.4552338915574019, "grad_norm": 496.91664114275807, "learning_rate": 1.1572632602435717e-05, "loss": 261.0411, "step": 23670 }, { "epoch": 0.4554262168178824, "grad_norm": 434.51523632768345, "learning_rate": 1.1566605110457305e-05, "loss": 249.2502, "step": 23680 }, { "epoch": 0.45561854207836294, "grad_norm": 463.51171216675505, "learning_rate": 1.1560577034944364e-05, "loss": 255.7892, "step": 23690 }, { "epoch": 0.45581086733884346, "grad_norm": 458.64992963281185, "learning_rate": 1.1554548378142249e-05, "loss": 256.925, "step": 23700 }, { "epoch": 0.456003192599324, "grad_norm": 515.4003121675826, "learning_rate": 1.1548519142296541e-05, "loss": 255.3865, "step": 23710 }, { "epoch": 0.4561955178598045, "grad_norm": 451.14221472506534, "learning_rate": 1.1542489329653024e-05, "loss": 260.194, "step": 23720 }, { "epoch": 0.456387843120285, "grad_norm": 454.22786161886063, "learning_rate": 1.153645894245771e-05, "loss": 245.0625, "step": 23730 }, { "epoch": 0.45658016838076554, "grad_norm": 502.9341806779307, "learning_rate": 1.1530427982956813e-05, "loss": 259.9284, "step": 23740 }, { "epoch": 0.45677249364124606, "grad_norm": 459.0166320562402, "learning_rate": 1.1524396453396767e-05, "loss": 252.9541, "step": 23750 }, { "epoch": 0.4569648189017266, "grad_norm": 491.7858496182521, "learning_rate": 1.1518364356024219e-05, "loss": 257.7152, "step": 23760 }, { "epoch": 0.45715714416220715, "grad_norm": 463.23611240589327, "learning_rate": 1.1512331693086025e-05, "loss": 246.3303, "step": 23770 }, { "epoch": 0.45734946942268767, "grad_norm": 486.631687372408, "learning_rate": 1.1506298466829256e-05, "loss": 259.1904, "step": 23780 }, { "epoch": 0.4575417946831682, "grad_norm": 469.2758785973338, "learning_rate": 1.1500264679501181e-05, "loss": 250.8088, "step": 23790 }, { "epoch": 0.4577341199436487, "grad_norm": 453.26075096354646, "learning_rate": 1.1494230333349292e-05, "loss": 256.325, "step": 23800 }, { "epoch": 0.45792644520412923, "grad_norm": 460.5524748738918, "learning_rate": 1.1488195430621284e-05, "loss": 252.0218, "step": 23810 }, { "epoch": 0.45811877046460975, "grad_norm": 439.6939650960091, "learning_rate": 1.1482159973565051e-05, "loss": 259.9782, "step": 23820 }, { "epoch": 0.45831109572509027, "grad_norm": 508.0750320206364, "learning_rate": 1.147612396442871e-05, "loss": 252.5928, "step": 23830 }, { "epoch": 0.4585034209855708, "grad_norm": 424.31436388147495, "learning_rate": 1.1470087405460572e-05, "loss": 246.4426, "step": 23840 }, { "epoch": 0.4586957462460513, "grad_norm": 450.3182060568921, "learning_rate": 1.1464050298909153e-05, "loss": 245.9021, "step": 23850 }, { "epoch": 0.4588880715065318, "grad_norm": 433.24856456922555, "learning_rate": 1.1458012647023178e-05, "loss": 238.0203, "step": 23860 }, { "epoch": 0.45908039676701234, "grad_norm": 482.7448499182103, "learning_rate": 1.1451974452051572e-05, "loss": 255.4715, "step": 23870 }, { "epoch": 0.4592727220274929, "grad_norm": 475.0546091941969, "learning_rate": 1.1445935716243463e-05, "loss": 251.2, "step": 23880 }, { "epoch": 0.45946504728797344, "grad_norm": 484.97060105554647, "learning_rate": 1.143989644184818e-05, "loss": 253.436, "step": 23890 }, { "epoch": 0.45965737254845396, "grad_norm": 468.34297447904, "learning_rate": 1.1433856631115252e-05, "loss": 261.0416, "step": 23900 }, { "epoch": 0.4598496978089345, "grad_norm": 405.1608644677801, "learning_rate": 1.142781628629441e-05, "loss": 248.2056, "step": 23910 }, { "epoch": 0.460042023069415, "grad_norm": 433.94796888196214, "learning_rate": 1.1421775409635585e-05, "loss": 253.7709, "step": 23920 }, { "epoch": 0.4602343483298955, "grad_norm": 599.1512606561172, "learning_rate": 1.1415734003388899e-05, "loss": 257.7739, "step": 23930 }, { "epoch": 0.46042667359037603, "grad_norm": 467.75025045078866, "learning_rate": 1.1409692069804678e-05, "loss": 256.9482, "step": 23940 }, { "epoch": 0.46061899885085655, "grad_norm": 457.6043584147629, "learning_rate": 1.1403649611133444e-05, "loss": 244.3143, "step": 23950 }, { "epoch": 0.4608113241113371, "grad_norm": 463.9535672458291, "learning_rate": 1.1397606629625913e-05, "loss": 258.4995, "step": 23960 }, { "epoch": 0.4610036493718176, "grad_norm": 492.8718149401336, "learning_rate": 1.1391563127532992e-05, "loss": 250.6034, "step": 23970 }, { "epoch": 0.46119597463229817, "grad_norm": 437.8979606711717, "learning_rate": 1.1385519107105791e-05, "loss": 252.3589, "step": 23980 }, { "epoch": 0.4613882998927787, "grad_norm": 521.9274380503093, "learning_rate": 1.1379474570595604e-05, "loss": 255.4463, "step": 23990 }, { "epoch": 0.4615806251532592, "grad_norm": 428.0969800586121, "learning_rate": 1.1373429520253922e-05, "loss": 244.703, "step": 24000 }, { "epoch": 0.4617729504137397, "grad_norm": 485.61974563074085, "learning_rate": 1.1367383958332427e-05, "loss": 247.4231, "step": 24010 }, { "epoch": 0.46196527567422024, "grad_norm": 522.8341813022549, "learning_rate": 1.1361337887082991e-05, "loss": 251.8473, "step": 24020 }, { "epoch": 0.46215760093470076, "grad_norm": 455.9985505121523, "learning_rate": 1.1355291308757672e-05, "loss": 255.5515, "step": 24030 }, { "epoch": 0.4623499261951813, "grad_norm": 445.6984750674221, "learning_rate": 1.1349244225608727e-05, "loss": 252.9669, "step": 24040 }, { "epoch": 0.4625422514556618, "grad_norm": 428.94798433415053, "learning_rate": 1.1343196639888591e-05, "loss": 256.4869, "step": 24050 }, { "epoch": 0.4627345767161423, "grad_norm": 510.4003073113367, "learning_rate": 1.133714855384989e-05, "loss": 257.6965, "step": 24060 }, { "epoch": 0.46292690197662284, "grad_norm": 460.91183608948995, "learning_rate": 1.1331099969745439e-05, "loss": 250.8719, "step": 24070 }, { "epoch": 0.4631192272371034, "grad_norm": 495.272702207888, "learning_rate": 1.132505088982823e-05, "loss": 264.4658, "step": 24080 }, { "epoch": 0.46331155249758393, "grad_norm": 452.1273384529998, "learning_rate": 1.131900131635145e-05, "loss": 247.3377, "step": 24090 }, { "epoch": 0.46350387775806445, "grad_norm": 478.12129665054374, "learning_rate": 1.1312951251568461e-05, "loss": 254.158, "step": 24100 }, { "epoch": 0.463696203018545, "grad_norm": 421.7954517930514, "learning_rate": 1.1306900697732816e-05, "loss": 246.6665, "step": 24110 }, { "epoch": 0.4638885282790255, "grad_norm": 438.8070098208251, "learning_rate": 1.1300849657098248e-05, "loss": 252.4575, "step": 24120 }, { "epoch": 0.464080853539506, "grad_norm": 431.1325105841485, "learning_rate": 1.1294798131918665e-05, "loss": 246.7184, "step": 24130 }, { "epoch": 0.46427317879998653, "grad_norm": 436.79399854155474, "learning_rate": 1.1288746124448164e-05, "loss": 248.6332, "step": 24140 }, { "epoch": 0.46446550406046705, "grad_norm": 407.84909784600154, "learning_rate": 1.1282693636941013e-05, "loss": 243.9928, "step": 24150 }, { "epoch": 0.46465782932094757, "grad_norm": 425.53119656555805, "learning_rate": 1.1276640671651671e-05, "loss": 259.5484, "step": 24160 }, { "epoch": 0.4648501545814281, "grad_norm": 467.32434582852153, "learning_rate": 1.1270587230834757e-05, "loss": 256.35, "step": 24170 }, { "epoch": 0.46504247984190866, "grad_norm": 446.9943765525869, "learning_rate": 1.1264533316745088e-05, "loss": 249.4387, "step": 24180 }, { "epoch": 0.4652348051023892, "grad_norm": 450.3452801542163, "learning_rate": 1.1258478931637641e-05, "loss": 250.4805, "step": 24190 }, { "epoch": 0.4654271303628697, "grad_norm": 476.0199473912133, "learning_rate": 1.1252424077767577e-05, "loss": 237.7216, "step": 24200 }, { "epoch": 0.4656194556233502, "grad_norm": 534.2949042190356, "learning_rate": 1.1246368757390231e-05, "loss": 246.3689, "step": 24210 }, { "epoch": 0.46581178088383074, "grad_norm": 431.38590706435883, "learning_rate": 1.1240312972761105e-05, "loss": 246.3344, "step": 24220 }, { "epoch": 0.46600410614431126, "grad_norm": 472.0800795568094, "learning_rate": 1.1234256726135882e-05, "loss": 250.9139, "step": 24230 }, { "epoch": 0.4661964314047918, "grad_norm": 475.8731776955852, "learning_rate": 1.1228200019770412e-05, "loss": 238.8638, "step": 24240 }, { "epoch": 0.4663887566652723, "grad_norm": 591.6405623294445, "learning_rate": 1.1222142855920719e-05, "loss": 254.9071, "step": 24250 }, { "epoch": 0.4665810819257528, "grad_norm": 464.7131966163036, "learning_rate": 1.1216085236842997e-05, "loss": 248.4065, "step": 24260 }, { "epoch": 0.46677340718623334, "grad_norm": 454.0690584045458, "learning_rate": 1.1210027164793609e-05, "loss": 257.0167, "step": 24270 }, { "epoch": 0.46696573244671385, "grad_norm": 511.8551061919269, "learning_rate": 1.1203968642029086e-05, "loss": 254.8844, "step": 24280 }, { "epoch": 0.46715805770719443, "grad_norm": 476.62536954453935, "learning_rate": 1.1197909670806126e-05, "loss": 250.0282, "step": 24290 }, { "epoch": 0.46735038296767495, "grad_norm": 506.67464828242356, "learning_rate": 1.1191850253381602e-05, "loss": 260.3468, "step": 24300 }, { "epoch": 0.46754270822815547, "grad_norm": 413.12764072167204, "learning_rate": 1.1185790392012538e-05, "loss": 249.6076, "step": 24310 }, { "epoch": 0.467735033488636, "grad_norm": 490.1677673641274, "learning_rate": 1.117973008895614e-05, "loss": 250.4889, "step": 24320 }, { "epoch": 0.4679273587491165, "grad_norm": 442.1200404680506, "learning_rate": 1.1173669346469767e-05, "loss": 256.6804, "step": 24330 }, { "epoch": 0.468119684009597, "grad_norm": 420.3265496395681, "learning_rate": 1.1167608166810948e-05, "loss": 248.3983, "step": 24340 }, { "epoch": 0.46831200927007754, "grad_norm": 481.82760767730286, "learning_rate": 1.1161546552237368e-05, "loss": 262.9398, "step": 24350 }, { "epoch": 0.46850433453055806, "grad_norm": 502.98826463841664, "learning_rate": 1.1155484505006884e-05, "loss": 252.0988, "step": 24360 }, { "epoch": 0.4686966597910386, "grad_norm": 453.78842173853565, "learning_rate": 1.1149422027377501e-05, "loss": 252.8134, "step": 24370 }, { "epoch": 0.4688889850515191, "grad_norm": 421.40902934543436, "learning_rate": 1.1143359121607397e-05, "loss": 248.2464, "step": 24380 }, { "epoch": 0.4690813103119997, "grad_norm": 457.17537546098157, "learning_rate": 1.1137295789954904e-05, "loss": 245.1052, "step": 24390 }, { "epoch": 0.4692736355724802, "grad_norm": 443.40464034342074, "learning_rate": 1.1131232034678513e-05, "loss": 248.9347, "step": 24400 }, { "epoch": 0.4694659608329607, "grad_norm": 454.3725000763719, "learning_rate": 1.1125167858036874e-05, "loss": 249.8428, "step": 24410 }, { "epoch": 0.46965828609344124, "grad_norm": 658.893834386438, "learning_rate": 1.1119103262288788e-05, "loss": 249.1869, "step": 24420 }, { "epoch": 0.46985061135392175, "grad_norm": 403.0624609424853, "learning_rate": 1.1113038249693221e-05, "loss": 247.1238, "step": 24430 }, { "epoch": 0.4700429366144023, "grad_norm": 434.98247786393125, "learning_rate": 1.1106972822509287e-05, "loss": 248.6417, "step": 24440 }, { "epoch": 0.4702352618748828, "grad_norm": 588.2224711232709, "learning_rate": 1.1100906982996257e-05, "loss": 257.6896, "step": 24450 }, { "epoch": 0.4704275871353633, "grad_norm": 409.4694456668267, "learning_rate": 1.109484073341356e-05, "loss": 250.3586, "step": 24460 }, { "epoch": 0.47061991239584383, "grad_norm": 447.49656036589306, "learning_rate": 1.1088774076020772e-05, "loss": 240.8952, "step": 24470 }, { "epoch": 0.47081223765632435, "grad_norm": 493.25521247994993, "learning_rate": 1.1082707013077625e-05, "loss": 246.0719, "step": 24480 }, { "epoch": 0.4710045629168049, "grad_norm": 464.28867493228563, "learning_rate": 1.1076639546843993e-05, "loss": 251.0546, "step": 24490 }, { "epoch": 0.47119688817728544, "grad_norm": 436.4159537450911, "learning_rate": 1.1070571679579913e-05, "loss": 252.1561, "step": 24500 }, { "epoch": 0.47138921343776596, "grad_norm": 421.21392822926066, "learning_rate": 1.1064503413545565e-05, "loss": 254.469, "step": 24510 }, { "epoch": 0.4715815386982465, "grad_norm": 460.5343484928838, "learning_rate": 1.1058434751001272e-05, "loss": 251.8184, "step": 24520 }, { "epoch": 0.471773863958727, "grad_norm": 411.91893550393957, "learning_rate": 1.1052365694207516e-05, "loss": 247.0095, "step": 24530 }, { "epoch": 0.4719661892192075, "grad_norm": 449.8590650120734, "learning_rate": 1.1046296245424922e-05, "loss": 244.535, "step": 24540 }, { "epoch": 0.47215851447968804, "grad_norm": 434.80418999565836, "learning_rate": 1.1040226406914254e-05, "loss": 245.4892, "step": 24550 }, { "epoch": 0.47235083974016856, "grad_norm": 455.9794907514911, "learning_rate": 1.103415618093643e-05, "loss": 257.8027, "step": 24560 }, { "epoch": 0.4725431650006491, "grad_norm": 477.37782837369866, "learning_rate": 1.1028085569752512e-05, "loss": 250.1352, "step": 24570 }, { "epoch": 0.4727354902611296, "grad_norm": 421.5207501313153, "learning_rate": 1.1022014575623695e-05, "loss": 253.0865, "step": 24580 }, { "epoch": 0.4729278155216102, "grad_norm": 481.2263235604556, "learning_rate": 1.1015943200811325e-05, "loss": 247.887, "step": 24590 }, { "epoch": 0.4731201407820907, "grad_norm": 429.53295926869913, "learning_rate": 1.1009871447576894e-05, "loss": 243.6173, "step": 24600 }, { "epoch": 0.4733124660425712, "grad_norm": 464.9981357871874, "learning_rate": 1.100379931818203e-05, "loss": 252.1286, "step": 24610 }, { "epoch": 0.47350479130305173, "grad_norm": 517.5348927538537, "learning_rate": 1.0997726814888497e-05, "loss": 261.3658, "step": 24620 }, { "epoch": 0.47369711656353225, "grad_norm": 442.3162599786568, "learning_rate": 1.0991653939958203e-05, "loss": 241.7305, "step": 24630 }, { "epoch": 0.47388944182401277, "grad_norm": 424.1894663482793, "learning_rate": 1.0985580695653193e-05, "loss": 253.9543, "step": 24640 }, { "epoch": 0.4740817670844933, "grad_norm": 448.4381082664568, "learning_rate": 1.0979507084235653e-05, "loss": 251.0728, "step": 24650 }, { "epoch": 0.4742740923449738, "grad_norm": 458.87743375958837, "learning_rate": 1.0973433107967901e-05, "loss": 246.6305, "step": 24660 }, { "epoch": 0.4744664176054543, "grad_norm": 517.6346244095042, "learning_rate": 1.0967358769112389e-05, "loss": 251.4199, "step": 24670 }, { "epoch": 0.47465874286593485, "grad_norm": 451.07296821258916, "learning_rate": 1.0961284069931717e-05, "loss": 245.2185, "step": 24680 }, { "epoch": 0.4748510681264154, "grad_norm": 447.18620961813315, "learning_rate": 1.0955209012688602e-05, "loss": 249.5758, "step": 24690 }, { "epoch": 0.47504339338689594, "grad_norm": 409.3526850856575, "learning_rate": 1.094913359964591e-05, "loss": 244.5644, "step": 24700 }, { "epoch": 0.47523571864737646, "grad_norm": 426.6026512686952, "learning_rate": 1.0943057833066622e-05, "loss": 252.9506, "step": 24710 }, { "epoch": 0.475428043907857, "grad_norm": 416.51572810563215, "learning_rate": 1.093698171521387e-05, "loss": 249.4896, "step": 24720 }, { "epoch": 0.4756203691683375, "grad_norm": 462.1845830799221, "learning_rate": 1.0930905248350903e-05, "loss": 240.598, "step": 24730 }, { "epoch": 0.475812694428818, "grad_norm": 472.54977842161674, "learning_rate": 1.0924828434741101e-05, "loss": 251.475, "step": 24740 }, { "epoch": 0.47600501968929854, "grad_norm": 436.0025743451127, "learning_rate": 1.0918751276647988e-05, "loss": 251.9488, "step": 24750 }, { "epoch": 0.47619734494977906, "grad_norm": 458.33776527955024, "learning_rate": 1.0912673776335194e-05, "loss": 245.6878, "step": 24760 }, { "epoch": 0.4763896702102596, "grad_norm": 455.90399573802995, "learning_rate": 1.0906595936066496e-05, "loss": 248.6308, "step": 24770 }, { "epoch": 0.4765819954707401, "grad_norm": 415.12883392843173, "learning_rate": 1.090051775810578e-05, "loss": 246.4492, "step": 24780 }, { "epoch": 0.4767743207312206, "grad_norm": 476.5103535122165, "learning_rate": 1.0894439244717075e-05, "loss": 254.9108, "step": 24790 }, { "epoch": 0.4769666459917012, "grad_norm": 466.93904612563324, "learning_rate": 1.0888360398164521e-05, "loss": 260.6951, "step": 24800 }, { "epoch": 0.4771589712521817, "grad_norm": 425.51825024616903, "learning_rate": 1.088228122071239e-05, "loss": 252.0665, "step": 24810 }, { "epoch": 0.4773512965126622, "grad_norm": 458.7198288357738, "learning_rate": 1.0876201714625076e-05, "loss": 250.3464, "step": 24820 }, { "epoch": 0.47754362177314275, "grad_norm": 453.30732082967415, "learning_rate": 1.0870121882167095e-05, "loss": 247.5671, "step": 24830 }, { "epoch": 0.47773594703362326, "grad_norm": 436.2087674068979, "learning_rate": 1.0864041725603085e-05, "loss": 247.0716, "step": 24840 }, { "epoch": 0.4779282722941038, "grad_norm": 454.10356148142944, "learning_rate": 1.08579612471978e-05, "loss": 243.4011, "step": 24850 }, { "epoch": 0.4781205975545843, "grad_norm": 513.6293858830724, "learning_rate": 1.0851880449216123e-05, "loss": 243.0631, "step": 24860 }, { "epoch": 0.4783129228150648, "grad_norm": 443.6660767597183, "learning_rate": 1.0845799333923045e-05, "loss": 251.9735, "step": 24870 }, { "epoch": 0.47850524807554534, "grad_norm": 439.97080458978786, "learning_rate": 1.0839717903583684e-05, "loss": 246.0687, "step": 24880 }, { "epoch": 0.47869757333602586, "grad_norm": 455.412674235121, "learning_rate": 1.0833636160463273e-05, "loss": 247.1598, "step": 24890 }, { "epoch": 0.47888989859650644, "grad_norm": 446.6137596327817, "learning_rate": 1.0827554106827162e-05, "loss": 250.6849, "step": 24900 }, { "epoch": 0.47908222385698696, "grad_norm": 433.7040588188312, "learning_rate": 1.0821471744940812e-05, "loss": 245.5958, "step": 24910 }, { "epoch": 0.4792745491174675, "grad_norm": 417.6454876241638, "learning_rate": 1.0815389077069805e-05, "loss": 241.6265, "step": 24920 }, { "epoch": 0.479466874377948, "grad_norm": 489.9466623664129, "learning_rate": 1.0809306105479834e-05, "loss": 251.8081, "step": 24930 }, { "epoch": 0.4796591996384285, "grad_norm": 441.5493420550708, "learning_rate": 1.0803222832436701e-05, "loss": 242.8504, "step": 24940 }, { "epoch": 0.47985152489890903, "grad_norm": 493.3406496849026, "learning_rate": 1.0797139260206331e-05, "loss": 248.1648, "step": 24950 }, { "epoch": 0.48004385015938955, "grad_norm": 438.3408598809831, "learning_rate": 1.079105539105475e-05, "loss": 251.575, "step": 24960 }, { "epoch": 0.48023617541987007, "grad_norm": 552.3292854874302, "learning_rate": 1.0784971227248104e-05, "loss": 254.1856, "step": 24970 }, { "epoch": 0.4804285006803506, "grad_norm": 438.2062557228392, "learning_rate": 1.077888677105264e-05, "loss": 254.4444, "step": 24980 }, { "epoch": 0.4806208259408311, "grad_norm": 422.1076010878798, "learning_rate": 1.0772802024734716e-05, "loss": 244.7023, "step": 24990 }, { "epoch": 0.4808131512013117, "grad_norm": 450.69549656803775, "learning_rate": 1.07667169905608e-05, "loss": 253.7605, "step": 25000 }, { "epoch": 0.4810054764617922, "grad_norm": 466.94073546656665, "learning_rate": 1.0760631670797468e-05, "loss": 245.616, "step": 25010 }, { "epoch": 0.4811978017222727, "grad_norm": 467.1817616308966, "learning_rate": 1.07545460677114e-05, "loss": 241.4176, "step": 25020 }, { "epoch": 0.48139012698275324, "grad_norm": 511.62849953671076, "learning_rate": 1.0748460183569385e-05, "loss": 250.8242, "step": 25030 }, { "epoch": 0.48158245224323376, "grad_norm": 447.31858981256545, "learning_rate": 1.0742374020638315e-05, "loss": 248.7033, "step": 25040 }, { "epoch": 0.4817747775037143, "grad_norm": 421.99882091451155, "learning_rate": 1.073628758118518e-05, "loss": 255.6246, "step": 25050 }, { "epoch": 0.4819671027641948, "grad_norm": 463.3980795704606, "learning_rate": 1.0730200867477083e-05, "loss": 245.073, "step": 25060 }, { "epoch": 0.4821594280246753, "grad_norm": 467.94884056645355, "learning_rate": 1.0724113881781222e-05, "loss": 247.966, "step": 25070 }, { "epoch": 0.48235175328515584, "grad_norm": 511.85541483845395, "learning_rate": 1.0718026626364902e-05, "loss": 257.6318, "step": 25080 }, { "epoch": 0.48254407854563636, "grad_norm": 429.096382339394, "learning_rate": 1.0711939103495515e-05, "loss": 245.9294, "step": 25090 }, { "epoch": 0.48273640380611693, "grad_norm": 427.3352790828323, "learning_rate": 1.0705851315440575e-05, "loss": 248.8446, "step": 25100 }, { "epoch": 0.48292872906659745, "grad_norm": 416.78510611831143, "learning_rate": 1.0699763264467675e-05, "loss": 248.8376, "step": 25110 }, { "epoch": 0.48312105432707797, "grad_norm": 512.1657711730046, "learning_rate": 1.069367495284452e-05, "loss": 246.5034, "step": 25120 }, { "epoch": 0.4833133795875585, "grad_norm": 442.83674156966015, "learning_rate": 1.06875863828389e-05, "loss": 238.0761, "step": 25130 }, { "epoch": 0.483505704848039, "grad_norm": 488.48749967161496, "learning_rate": 1.0681497556718706e-05, "loss": 236.0335, "step": 25140 }, { "epoch": 0.4836980301085195, "grad_norm": 456.7560492291506, "learning_rate": 1.067540847675193e-05, "loss": 272.2428, "step": 25150 }, { "epoch": 0.48389035536900005, "grad_norm": 442.06040364008766, "learning_rate": 1.0669319145206646e-05, "loss": 250.605, "step": 25160 }, { "epoch": 0.48408268062948057, "grad_norm": 566.2449344280966, "learning_rate": 1.066322956435104e-05, "loss": 253.4595, "step": 25170 }, { "epoch": 0.4842750058899611, "grad_norm": 425.96797194049253, "learning_rate": 1.0657139736453375e-05, "loss": 256.1166, "step": 25180 }, { "epoch": 0.4844673311504416, "grad_norm": 438.731119986068, "learning_rate": 1.0651049663782007e-05, "loss": 255.1635, "step": 25190 }, { "epoch": 0.4846596564109221, "grad_norm": 451.60955684592363, "learning_rate": 1.0644959348605397e-05, "loss": 248.3435, "step": 25200 }, { "epoch": 0.4848519816714027, "grad_norm": 429.3271931261198, "learning_rate": 1.0638868793192079e-05, "loss": 244.5909, "step": 25210 }, { "epoch": 0.4850443069318832, "grad_norm": 481.2447928300619, "learning_rate": 1.0632777999810685e-05, "loss": 241.698, "step": 25220 }, { "epoch": 0.48523663219236374, "grad_norm": 470.2915240312469, "learning_rate": 1.062668697072994e-05, "loss": 246.9471, "step": 25230 }, { "epoch": 0.48542895745284426, "grad_norm": 443.08875142900945, "learning_rate": 1.0620595708218646e-05, "loss": 244.9708, "step": 25240 }, { "epoch": 0.4856212827133248, "grad_norm": 443.63371264850485, "learning_rate": 1.06145042145457e-05, "loss": 251.5037, "step": 25250 }, { "epoch": 0.4858136079738053, "grad_norm": 402.69981315065496, "learning_rate": 1.0608412491980084e-05, "loss": 242.0354, "step": 25260 }, { "epoch": 0.4860059332342858, "grad_norm": 426.4094260441273, "learning_rate": 1.0602320542790866e-05, "loss": 245.9982, "step": 25270 }, { "epoch": 0.48619825849476633, "grad_norm": 471.6454580413846, "learning_rate": 1.0596228369247188e-05, "loss": 249.9412, "step": 25280 }, { "epoch": 0.48639058375524685, "grad_norm": 458.64366829222826, "learning_rate": 1.0590135973618294e-05, "loss": 243.9949, "step": 25290 }, { "epoch": 0.48658290901572737, "grad_norm": 472.520503573571, "learning_rate": 1.0584043358173492e-05, "loss": 245.1111, "step": 25300 }, { "epoch": 0.48677523427620795, "grad_norm": 439.99698347493904, "learning_rate": 1.0577950525182189e-05, "loss": 247.9348, "step": 25310 }, { "epoch": 0.48696755953668847, "grad_norm": 453.09732479627297, "learning_rate": 1.0571857476913856e-05, "loss": 238.9893, "step": 25320 }, { "epoch": 0.487159884797169, "grad_norm": 455.28499634673665, "learning_rate": 1.056576421563806e-05, "loss": 250.3431, "step": 25330 }, { "epoch": 0.4873522100576495, "grad_norm": 436.19548366840934, "learning_rate": 1.0559670743624439e-05, "loss": 237.5524, "step": 25340 }, { "epoch": 0.48754453531813, "grad_norm": 473.48646363292704, "learning_rate": 1.0553577063142705e-05, "loss": 254.6918, "step": 25350 }, { "epoch": 0.48773686057861054, "grad_norm": 484.1705295658253, "learning_rate": 1.0547483176462662e-05, "loss": 251.9564, "step": 25360 }, { "epoch": 0.48792918583909106, "grad_norm": 2366.65366049417, "learning_rate": 1.0541389085854177e-05, "loss": 249.7325, "step": 25370 }, { "epoch": 0.4881215110995716, "grad_norm": 492.1477589057321, "learning_rate": 1.0535294793587197e-05, "loss": 250.2501, "step": 25380 }, { "epoch": 0.4883138363600521, "grad_norm": 424.883471031728, "learning_rate": 1.0529200301931747e-05, "loss": 238.7042, "step": 25390 }, { "epoch": 0.4885061616205326, "grad_norm": 423.5637215784562, "learning_rate": 1.052310561315793e-05, "loss": 246.725, "step": 25400 }, { "epoch": 0.4886984868810132, "grad_norm": 423.773731351179, "learning_rate": 1.0517010729535903e-05, "loss": 251.5593, "step": 25410 }, { "epoch": 0.4888908121414937, "grad_norm": 474.33298819325177, "learning_rate": 1.0510915653335925e-05, "loss": 243.2532, "step": 25420 }, { "epoch": 0.48908313740197423, "grad_norm": 477.6213952086895, "learning_rate": 1.05048203868283e-05, "loss": 254.729, "step": 25430 }, { "epoch": 0.48927546266245475, "grad_norm": 472.98644804818156, "learning_rate": 1.0498724932283419e-05, "loss": 246.5291, "step": 25440 }, { "epoch": 0.48946778792293527, "grad_norm": 483.10040934297575, "learning_rate": 1.0492629291971738e-05, "loss": 250.702, "step": 25450 }, { "epoch": 0.4896601131834158, "grad_norm": 440.58067932925127, "learning_rate": 1.0486533468163782e-05, "loss": 244.2888, "step": 25460 }, { "epoch": 0.4898524384438963, "grad_norm": 394.8266921690608, "learning_rate": 1.0480437463130145e-05, "loss": 241.5677, "step": 25470 }, { "epoch": 0.49004476370437683, "grad_norm": 471.3324989295299, "learning_rate": 1.0474341279141486e-05, "loss": 252.2472, "step": 25480 }, { "epoch": 0.49023708896485735, "grad_norm": 425.21399499721247, "learning_rate": 1.0468244918468538e-05, "loss": 249.1725, "step": 25490 }, { "epoch": 0.49042941422533787, "grad_norm": 447.2930020504913, "learning_rate": 1.0462148383382086e-05, "loss": 244.4379, "step": 25500 }, { "epoch": 0.49062173948581844, "grad_norm": 485.2339050530055, "learning_rate": 1.0456051676152996e-05, "loss": 242.2057, "step": 25510 }, { "epoch": 0.49081406474629896, "grad_norm": 646.7097083310359, "learning_rate": 1.0449954799052189e-05, "loss": 251.574, "step": 25520 }, { "epoch": 0.4910063900067795, "grad_norm": 547.2044713815735, "learning_rate": 1.044385775435065e-05, "loss": 253.0338, "step": 25530 }, { "epoch": 0.49119871526726, "grad_norm": 440.4239265355097, "learning_rate": 1.043776054431943e-05, "loss": 245.8395, "step": 25540 }, { "epoch": 0.4913910405277405, "grad_norm": 416.9467677099068, "learning_rate": 1.0431663171229636e-05, "loss": 245.2929, "step": 25550 }, { "epoch": 0.49158336578822104, "grad_norm": 431.6267169132507, "learning_rate": 1.0425565637352441e-05, "loss": 249.7551, "step": 25560 }, { "epoch": 0.49177569104870156, "grad_norm": 465.0519131476555, "learning_rate": 1.0419467944959072e-05, "loss": 249.6737, "step": 25570 }, { "epoch": 0.4919680163091821, "grad_norm": 452.2856803244398, "learning_rate": 1.0413370096320823e-05, "loss": 238.5953, "step": 25580 }, { "epoch": 0.4921603415696626, "grad_norm": 455.0734526442802, "learning_rate": 1.0407272093709038e-05, "loss": 245.2968, "step": 25590 }, { "epoch": 0.4923526668301431, "grad_norm": 463.6321192841197, "learning_rate": 1.0401173939395128e-05, "loss": 244.2217, "step": 25600 }, { "epoch": 0.4925449920906237, "grad_norm": 485.84517009301635, "learning_rate": 1.0395075635650549e-05, "loss": 241.1392, "step": 25610 }, { "epoch": 0.4927373173511042, "grad_norm": 419.260555430691, "learning_rate": 1.038897718474682e-05, "loss": 246.546, "step": 25620 }, { "epoch": 0.49292964261158473, "grad_norm": 433.4192507139854, "learning_rate": 1.0382878588955517e-05, "loss": 251.3673, "step": 25630 }, { "epoch": 0.49312196787206525, "grad_norm": 452.670606842314, "learning_rate": 1.0376779850548257e-05, "loss": 246.1752, "step": 25640 }, { "epoch": 0.49331429313254577, "grad_norm": 429.65471329485945, "learning_rate": 1.0370680971796732e-05, "loss": 241.0601, "step": 25650 }, { "epoch": 0.4935066183930263, "grad_norm": 467.61475859467316, "learning_rate": 1.0364581954972662e-05, "loss": 240.7547, "step": 25660 }, { "epoch": 0.4936989436535068, "grad_norm": 442.5152961101893, "learning_rate": 1.0358482802347838e-05, "loss": 248.5261, "step": 25670 }, { "epoch": 0.4938912689139873, "grad_norm": 552.073007450886, "learning_rate": 1.0352383516194088e-05, "loss": 246.4259, "step": 25680 }, { "epoch": 0.49408359417446784, "grad_norm": 446.08519703837686, "learning_rate": 1.0346284098783304e-05, "loss": 247.5358, "step": 25690 }, { "epoch": 0.49427591943494836, "grad_norm": 442.76307004936734, "learning_rate": 1.0340184552387406e-05, "loss": 246.4056, "step": 25700 }, { "epoch": 0.4944682446954289, "grad_norm": 497.5888563331776, "learning_rate": 1.0334084879278381e-05, "loss": 246.5515, "step": 25710 }, { "epoch": 0.49466056995590946, "grad_norm": 425.2440346101982, "learning_rate": 1.032798508172826e-05, "loss": 246.9058, "step": 25720 }, { "epoch": 0.49485289521639, "grad_norm": 402.90499175113706, "learning_rate": 1.0321885162009111e-05, "loss": 244.8918, "step": 25730 }, { "epoch": 0.4950452204768705, "grad_norm": 402.03117143256645, "learning_rate": 1.0315785122393053e-05, "loss": 242.0849, "step": 25740 }, { "epoch": 0.495237545737351, "grad_norm": 476.15911193233376, "learning_rate": 1.0309684965152254e-05, "loss": 248.6827, "step": 25750 }, { "epoch": 0.49542987099783153, "grad_norm": 475.220916630608, "learning_rate": 1.030358469255892e-05, "loss": 250.3099, "step": 25760 }, { "epoch": 0.49562219625831205, "grad_norm": 501.3222759334091, "learning_rate": 1.0297484306885304e-05, "loss": 244.1939, "step": 25770 }, { "epoch": 0.4958145215187926, "grad_norm": 397.63599090176484, "learning_rate": 1.0291383810403697e-05, "loss": 245.2867, "step": 25780 }, { "epoch": 0.4960068467792731, "grad_norm": 450.37578679972626, "learning_rate": 1.028528320538643e-05, "loss": 246.428, "step": 25790 }, { "epoch": 0.4961991720397536, "grad_norm": 424.172891105323, "learning_rate": 1.0279182494105879e-05, "loss": 240.4534, "step": 25800 }, { "epoch": 0.49639149730023413, "grad_norm": 442.1861728522265, "learning_rate": 1.0273081678834462e-05, "loss": 239.6894, "step": 25810 }, { "epoch": 0.4965838225607147, "grad_norm": 420.1162594312604, "learning_rate": 1.026698076184463e-05, "loss": 238.7768, "step": 25820 }, { "epoch": 0.4967761478211952, "grad_norm": 512.6485580085946, "learning_rate": 1.0260879745408876e-05, "loss": 246.5191, "step": 25830 }, { "epoch": 0.49696847308167574, "grad_norm": 503.2180245457224, "learning_rate": 1.0254778631799722e-05, "loss": 237.8291, "step": 25840 }, { "epoch": 0.49716079834215626, "grad_norm": 460.8060087247976, "learning_rate": 1.024867742328974e-05, "loss": 251.3185, "step": 25850 }, { "epoch": 0.4973531236026368, "grad_norm": 502.38381757118526, "learning_rate": 1.024257612215152e-05, "loss": 247.1877, "step": 25860 }, { "epoch": 0.4975454488631173, "grad_norm": 466.06905872812473, "learning_rate": 1.02364747306577e-05, "loss": 247.8401, "step": 25870 }, { "epoch": 0.4977377741235978, "grad_norm": 464.63045897343216, "learning_rate": 1.023037325108095e-05, "loss": 244.7431, "step": 25880 }, { "epoch": 0.49793009938407834, "grad_norm": 434.22745832265616, "learning_rate": 1.022427168569397e-05, "loss": 240.8298, "step": 25890 }, { "epoch": 0.49812242464455886, "grad_norm": 412.5713094350459, "learning_rate": 1.021817003676949e-05, "loss": 247.3949, "step": 25900 }, { "epoch": 0.4983147499050394, "grad_norm": 523.4399446351441, "learning_rate": 1.0212068306580274e-05, "loss": 250.9017, "step": 25910 }, { "epoch": 0.49850707516551995, "grad_norm": 429.88017138861875, "learning_rate": 1.0205966497399118e-05, "loss": 236.3815, "step": 25920 }, { "epoch": 0.49869940042600047, "grad_norm": 466.77705788075514, "learning_rate": 1.0199864611498841e-05, "loss": 250.0941, "step": 25930 }, { "epoch": 0.498891725686481, "grad_norm": 418.65971555969014, "learning_rate": 1.0193762651152299e-05, "loss": 250.9745, "step": 25940 }, { "epoch": 0.4990840509469615, "grad_norm": 449.96314508050347, "learning_rate": 1.018766061863237e-05, "loss": 243.4652, "step": 25950 }, { "epoch": 0.49927637620744203, "grad_norm": 489.6657370422981, "learning_rate": 1.018155851621196e-05, "loss": 245.073, "step": 25960 }, { "epoch": 0.49946870146792255, "grad_norm": 420.4610545344127, "learning_rate": 1.0175456346164e-05, "loss": 242.4358, "step": 25970 }, { "epoch": 0.49966102672840307, "grad_norm": 438.7971060637843, "learning_rate": 1.0169354110761447e-05, "loss": 243.3489, "step": 25980 }, { "epoch": 0.4998533519888836, "grad_norm": 436.5208933561538, "learning_rate": 1.0163251812277289e-05, "loss": 241.6723, "step": 25990 }, { "epoch": 0.5000456772493641, "grad_norm": 381.4129983150638, "learning_rate": 1.0157149452984523e-05, "loss": 245.7078, "step": 26000 }, { "epoch": 0.5002380025098446, "grad_norm": 433.79289200845756, "learning_rate": 1.0151047035156182e-05, "loss": 244.9868, "step": 26010 }, { "epoch": 0.5004303277703251, "grad_norm": 460.1908207982285, "learning_rate": 1.0144944561065316e-05, "loss": 241.7464, "step": 26020 }, { "epoch": 0.5006226530308057, "grad_norm": 433.40965322735116, "learning_rate": 1.0138842032984996e-05, "loss": 257.1567, "step": 26030 }, { "epoch": 0.5008149782912862, "grad_norm": 426.9756940354048, "learning_rate": 1.013273945318831e-05, "loss": 249.8501, "step": 26040 }, { "epoch": 0.5010073035517667, "grad_norm": 411.1509040454855, "learning_rate": 1.0126636823948373e-05, "loss": 239.5032, "step": 26050 }, { "epoch": 0.5011996288122472, "grad_norm": 478.99791632474876, "learning_rate": 1.0120534147538305e-05, "loss": 247.6124, "step": 26060 }, { "epoch": 0.5013919540727279, "grad_norm": 409.0573405470129, "learning_rate": 1.011443142623126e-05, "loss": 251.2825, "step": 26070 }, { "epoch": 0.5015842793332084, "grad_norm": 423.1886250758101, "learning_rate": 1.0108328662300399e-05, "loss": 243.0762, "step": 26080 }, { "epoch": 0.5017766045936889, "grad_norm": 449.89564875024155, "learning_rate": 1.0102225858018902e-05, "loss": 239.3468, "step": 26090 }, { "epoch": 0.5019689298541694, "grad_norm": 424.42439216757805, "learning_rate": 1.009612301565996e-05, "loss": 244.7099, "step": 26100 }, { "epoch": 0.5021612551146499, "grad_norm": 425.75243466856773, "learning_rate": 1.0090020137496783e-05, "loss": 242.121, "step": 26110 }, { "epoch": 0.5023535803751304, "grad_norm": 516.8689704463773, "learning_rate": 1.008391722580259e-05, "loss": 253.884, "step": 26120 }, { "epoch": 0.502545905635611, "grad_norm": 462.37143231682705, "learning_rate": 1.0077814282850617e-05, "loss": 258.1923, "step": 26130 }, { "epoch": 0.5027382308960915, "grad_norm": 424.00888323067863, "learning_rate": 1.0071711310914111e-05, "loss": 243.5125, "step": 26140 }, { "epoch": 0.502930556156572, "grad_norm": 420.92516082000077, "learning_rate": 1.0065608312266324e-05, "loss": 246.5957, "step": 26150 }, { "epoch": 0.5031228814170525, "grad_norm": 482.59303401324553, "learning_rate": 1.005950528918052e-05, "loss": 246.1805, "step": 26160 }, { "epoch": 0.503315206677533, "grad_norm": 487.29229809229616, "learning_rate": 1.0053402243929986e-05, "loss": 251.2692, "step": 26170 }, { "epoch": 0.5035075319380136, "grad_norm": 457.9805797458841, "learning_rate": 1.0047299178787993e-05, "loss": 245.5148, "step": 26180 }, { "epoch": 0.5036998571984941, "grad_norm": 458.17136665917167, "learning_rate": 1.0041196096027841e-05, "loss": 241.697, "step": 26190 }, { "epoch": 0.5038921824589746, "grad_norm": 435.9264394427311, "learning_rate": 1.003509299792282e-05, "loss": 242.2176, "step": 26200 }, { "epoch": 0.5040845077194551, "grad_norm": 521.6332069641547, "learning_rate": 1.0028989886746241e-05, "loss": 234.7725, "step": 26210 }, { "epoch": 0.5042768329799356, "grad_norm": 484.582780022787, "learning_rate": 1.0022886764771405e-05, "loss": 242.2138, "step": 26220 }, { "epoch": 0.5044691582404162, "grad_norm": 444.34829278353914, "learning_rate": 1.0016783634271626e-05, "loss": 239.8629, "step": 26230 }, { "epoch": 0.5046614835008967, "grad_norm": 457.58805933722306, "learning_rate": 1.001068049752022e-05, "loss": 243.8282, "step": 26240 }, { "epoch": 0.5048538087613772, "grad_norm": 412.9655789715028, "learning_rate": 1.0004577356790506e-05, "loss": 237.5452, "step": 26250 }, { "epoch": 0.5050461340218577, "grad_norm": 435.98213475968964, "learning_rate": 9.998474214355805e-06, "loss": 243.1981, "step": 26260 }, { "epoch": 0.5052384592823382, "grad_norm": 489.4462227900642, "learning_rate": 9.992371072489434e-06, "loss": 249.8664, "step": 26270 }, { "epoch": 0.5054307845428189, "grad_norm": 424.6057841930922, "learning_rate": 9.986267933464707e-06, "loss": 251.808, "step": 26280 }, { "epoch": 0.5056231098032994, "grad_norm": 424.2281977649041, "learning_rate": 9.980164799554953e-06, "loss": 247.8646, "step": 26290 }, { "epoch": 0.5058154350637799, "grad_norm": 487.14898516916384, "learning_rate": 9.97406167303348e-06, "loss": 244.8658, "step": 26300 }, { "epoch": 0.5060077603242604, "grad_norm": 494.80154517631826, "learning_rate": 9.967958556173612e-06, "loss": 246.6637, "step": 26310 }, { "epoch": 0.5062000855847409, "grad_norm": 513.2465036657109, "learning_rate": 9.961855451248645e-06, "loss": 239.1971, "step": 26320 }, { "epoch": 0.5063924108452215, "grad_norm": 446.21360839134826, "learning_rate": 9.955752360531896e-06, "loss": 250.5916, "step": 26330 }, { "epoch": 0.506584736105702, "grad_norm": 501.0889678350533, "learning_rate": 9.949649286296663e-06, "loss": 244.9018, "step": 26340 }, { "epoch": 0.5067770613661825, "grad_norm": 465.91921016468143, "learning_rate": 9.943546230816236e-06, "loss": 244.6792, "step": 26350 }, { "epoch": 0.506969386626663, "grad_norm": 457.0191770380282, "learning_rate": 9.937443196363908e-06, "loss": 249.4264, "step": 26360 }, { "epoch": 0.5071617118871435, "grad_norm": 416.0070003608556, "learning_rate": 9.931340185212955e-06, "loss": 238.2517, "step": 26370 }, { "epoch": 0.5073540371476241, "grad_norm": 476.43934334898836, "learning_rate": 9.925237199636652e-06, "loss": 236.8563, "step": 26380 }, { "epoch": 0.5075463624081046, "grad_norm": 472.0325579638252, "learning_rate": 9.919134241908252e-06, "loss": 242.1155, "step": 26390 }, { "epoch": 0.5077386876685851, "grad_norm": 464.23313098685463, "learning_rate": 9.913031314301016e-06, "loss": 249.2232, "step": 26400 }, { "epoch": 0.5079310129290656, "grad_norm": 439.9356440277744, "learning_rate": 9.906928419088178e-06, "loss": 239.3025, "step": 26410 }, { "epoch": 0.5081233381895461, "grad_norm": 452.02894711317396, "learning_rate": 9.900825558542965e-06, "loss": 248.1302, "step": 26420 }, { "epoch": 0.5083156634500267, "grad_norm": 471.0250597842824, "learning_rate": 9.894722734938595e-06, "loss": 253.5517, "step": 26430 }, { "epoch": 0.5085079887105072, "grad_norm": 942.0966381147898, "learning_rate": 9.888619950548267e-06, "loss": 241.6604, "step": 26440 }, { "epoch": 0.5087003139709877, "grad_norm": 408.9074201010136, "learning_rate": 9.88251720764517e-06, "loss": 236.8139, "step": 26450 }, { "epoch": 0.5088926392314682, "grad_norm": 436.98994804399365, "learning_rate": 9.876414508502468e-06, "loss": 242.7639, "step": 26460 }, { "epoch": 0.5090849644919487, "grad_norm": 542.8409474696687, "learning_rate": 9.870311855393324e-06, "loss": 247.9546, "step": 26470 }, { "epoch": 0.5092772897524294, "grad_norm": 492.2766221913677, "learning_rate": 9.864209250590875e-06, "loss": 236.1556, "step": 26480 }, { "epoch": 0.5094696150129099, "grad_norm": 422.21524007550494, "learning_rate": 9.858106696368235e-06, "loss": 246.9354, "step": 26490 }, { "epoch": 0.5096619402733904, "grad_norm": 413.45117700231884, "learning_rate": 9.852004194998503e-06, "loss": 240.2883, "step": 26500 }, { "epoch": 0.5098542655338709, "grad_norm": 436.66097150765677, "learning_rate": 9.845901748754767e-06, "loss": 241.1681, "step": 26510 }, { "epoch": 0.5100465907943514, "grad_norm": 401.0233561621916, "learning_rate": 9.839799359910087e-06, "loss": 245.1936, "step": 26520 }, { "epoch": 0.510238916054832, "grad_norm": 419.60610598498783, "learning_rate": 9.833697030737495e-06, "loss": 255.0598, "step": 26530 }, { "epoch": 0.5104312413153125, "grad_norm": 438.01226464221594, "learning_rate": 9.827594763510016e-06, "loss": 247.8375, "step": 26540 }, { "epoch": 0.510623566575793, "grad_norm": 424.9794514517412, "learning_rate": 9.821492560500641e-06, "loss": 240.593, "step": 26550 }, { "epoch": 0.5108158918362735, "grad_norm": 429.37616141942925, "learning_rate": 9.815390423982339e-06, "loss": 242.2971, "step": 26560 }, { "epoch": 0.511008217096754, "grad_norm": 467.3883836438913, "learning_rate": 9.809288356228051e-06, "loss": 241.1749, "step": 26570 }, { "epoch": 0.5112005423572346, "grad_norm": 455.14106394254037, "learning_rate": 9.803186359510701e-06, "loss": 240.6573, "step": 26580 }, { "epoch": 0.5113928676177151, "grad_norm": 476.20805163386433, "learning_rate": 9.797084436103186e-06, "loss": 243.7622, "step": 26590 }, { "epoch": 0.5115851928781956, "grad_norm": 431.13745569498144, "learning_rate": 9.790982588278362e-06, "loss": 245.6231, "step": 26600 }, { "epoch": 0.5117775181386761, "grad_norm": 441.6160319727721, "learning_rate": 9.784880818309076e-06, "loss": 240.248, "step": 26610 }, { "epoch": 0.5119698433991566, "grad_norm": 436.37144776940096, "learning_rate": 9.778779128468133e-06, "loss": 258.2943, "step": 26620 }, { "epoch": 0.5121621686596372, "grad_norm": 438.78771472645104, "learning_rate": 9.77267752102831e-06, "loss": 243.6253, "step": 26630 }, { "epoch": 0.5123544939201177, "grad_norm": 449.6758644467159, "learning_rate": 9.766575998262353e-06, "loss": 239.4953, "step": 26640 }, { "epoch": 0.5125468191805982, "grad_norm": 457.8962878078774, "learning_rate": 9.760474562442984e-06, "loss": 256.9004, "step": 26650 }, { "epoch": 0.5127391444410787, "grad_norm": 439.947448184104, "learning_rate": 9.754373215842884e-06, "loss": 245.4871, "step": 26660 }, { "epoch": 0.5129314697015592, "grad_norm": 497.6137428470242, "learning_rate": 9.748271960734708e-06, "loss": 250.7543, "step": 26670 }, { "epoch": 0.5131237949620397, "grad_norm": 429.4401569117728, "learning_rate": 9.742170799391063e-06, "loss": 234.9992, "step": 26680 }, { "epoch": 0.5133161202225204, "grad_norm": 430.57617568758025, "learning_rate": 9.736069734084541e-06, "loss": 234.2473, "step": 26690 }, { "epoch": 0.5135084454830009, "grad_norm": 451.1655195728304, "learning_rate": 9.729968767087685e-06, "loss": 239.7917, "step": 26700 }, { "epoch": 0.5137007707434814, "grad_norm": 447.969824881567, "learning_rate": 9.723867900673e-06, "loss": 235.8606, "step": 26710 }, { "epoch": 0.5138930960039619, "grad_norm": 430.6511466725646, "learning_rate": 9.717767137112964e-06, "loss": 242.5631, "step": 26720 }, { "epoch": 0.5140854212644425, "grad_norm": 479.60565448422625, "learning_rate": 9.711666478680007e-06, "loss": 246.9556, "step": 26730 }, { "epoch": 0.514277746524923, "grad_norm": 433.5290199664727, "learning_rate": 9.705565927646526e-06, "loss": 240.6249, "step": 26740 }, { "epoch": 0.5144700717854035, "grad_norm": 493.95997173958085, "learning_rate": 9.699465486284871e-06, "loss": 240.8255, "step": 26750 }, { "epoch": 0.514662397045884, "grad_norm": 458.84923128222306, "learning_rate": 9.693365156867363e-06, "loss": 237.2341, "step": 26760 }, { "epoch": 0.5148547223063645, "grad_norm": 489.1075081727761, "learning_rate": 9.68726494166627e-06, "loss": 238.0743, "step": 26770 }, { "epoch": 0.515047047566845, "grad_norm": 456.55188867455564, "learning_rate": 9.681164842953816e-06, "loss": 241.2977, "step": 26780 }, { "epoch": 0.5152393728273256, "grad_norm": 435.1984211593493, "learning_rate": 9.675064863002196e-06, "loss": 242.6891, "step": 26790 }, { "epoch": 0.5154316980878061, "grad_norm": 486.9208367025354, "learning_rate": 9.668965004083549e-06, "loss": 246.2692, "step": 26800 }, { "epoch": 0.5156240233482866, "grad_norm": 488.1283763686715, "learning_rate": 9.66286526846997e-06, "loss": 244.1956, "step": 26810 }, { "epoch": 0.5158163486087671, "grad_norm": 430.6743548810669, "learning_rate": 9.656765658433507e-06, "loss": 238.2999, "step": 26820 }, { "epoch": 0.5160086738692476, "grad_norm": 480.70242654871583, "learning_rate": 9.650666176246171e-06, "loss": 240.7867, "step": 26830 }, { "epoch": 0.5162009991297282, "grad_norm": 475.7212384357653, "learning_rate": 9.644566824179916e-06, "loss": 235.7113, "step": 26840 }, { "epoch": 0.5163933243902087, "grad_norm": 424.0596093705765, "learning_rate": 9.638467604506648e-06, "loss": 243.6052, "step": 26850 }, { "epoch": 0.5165856496506892, "grad_norm": 412.0695301887001, "learning_rate": 9.632368519498224e-06, "loss": 239.4665, "step": 26860 }, { "epoch": 0.5167779749111697, "grad_norm": 438.08596085568695, "learning_rate": 9.626269571426456e-06, "loss": 249.0358, "step": 26870 }, { "epoch": 0.5169703001716502, "grad_norm": 424.6792926233746, "learning_rate": 9.620170762563103e-06, "loss": 235.2679, "step": 26880 }, { "epoch": 0.5171626254321309, "grad_norm": 455.23997407909974, "learning_rate": 9.614072095179862e-06, "loss": 240.3182, "step": 26890 }, { "epoch": 0.5173549506926114, "grad_norm": 460.3046566571023, "learning_rate": 9.607973571548396e-06, "loss": 248.8181, "step": 26900 }, { "epoch": 0.5175472759530919, "grad_norm": 431.60376319277043, "learning_rate": 9.601875193940301e-06, "loss": 235.6848, "step": 26910 }, { "epoch": 0.5177396012135724, "grad_norm": 427.4764491510471, "learning_rate": 9.595776964627119e-06, "loss": 238.2073, "step": 26920 }, { "epoch": 0.517931926474053, "grad_norm": 405.6455663795896, "learning_rate": 9.589678885880342e-06, "loss": 243.4056, "step": 26930 }, { "epoch": 0.5181242517345335, "grad_norm": 450.5246242432972, "learning_rate": 9.583580959971402e-06, "loss": 240.6424, "step": 26940 }, { "epoch": 0.518316576995014, "grad_norm": 434.50219769503724, "learning_rate": 9.577483189171681e-06, "loss": 247.0905, "step": 26950 }, { "epoch": 0.5185089022554945, "grad_norm": 405.72505005902167, "learning_rate": 9.571385575752487e-06, "loss": 237.8846, "step": 26960 }, { "epoch": 0.518701227515975, "grad_norm": 499.28480534345977, "learning_rate": 9.565288121985093e-06, "loss": 249.2304, "step": 26970 }, { "epoch": 0.5188935527764555, "grad_norm": 430.11596495560156, "learning_rate": 9.559190830140695e-06, "loss": 244.7042, "step": 26980 }, { "epoch": 0.5190858780369361, "grad_norm": 440.6001477530912, "learning_rate": 9.553093702490433e-06, "loss": 237.3319, "step": 26990 }, { "epoch": 0.5192782032974166, "grad_norm": 439.4747838485815, "learning_rate": 9.54699674130538e-06, "loss": 240.8133, "step": 27000 }, { "epoch": 0.5194705285578971, "grad_norm": 386.079623940713, "learning_rate": 9.540899948856561e-06, "loss": 233.2224, "step": 27010 }, { "epoch": 0.5196628538183776, "grad_norm": 410.97870398563015, "learning_rate": 9.534803327414931e-06, "loss": 246.0451, "step": 27020 }, { "epoch": 0.5198551790788581, "grad_norm": 465.3544311644236, "learning_rate": 9.52870687925138e-06, "loss": 243.134, "step": 27030 }, { "epoch": 0.5200475043393387, "grad_norm": 475.8821377875846, "learning_rate": 9.522610606636728e-06, "loss": 243.3544, "step": 27040 }, { "epoch": 0.5202398295998192, "grad_norm": 448.41143347877056, "learning_rate": 9.516514511841745e-06, "loss": 243.6413, "step": 27050 }, { "epoch": 0.5204321548602997, "grad_norm": 432.3181110874596, "learning_rate": 9.51041859713712e-06, "loss": 233.4422, "step": 27060 }, { "epoch": 0.5206244801207802, "grad_norm": 426.23762058439485, "learning_rate": 9.50432286479348e-06, "loss": 239.3122, "step": 27070 }, { "epoch": 0.5208168053812607, "grad_norm": 445.9134298836942, "learning_rate": 9.498227317081387e-06, "loss": 241.3759, "step": 27080 }, { "epoch": 0.5210091306417413, "grad_norm": 475.92520609272293, "learning_rate": 9.492131956271334e-06, "loss": 240.0812, "step": 27090 }, { "epoch": 0.5212014559022219, "grad_norm": 473.55989336650714, "learning_rate": 9.486036784633738e-06, "loss": 241.4949, "step": 27100 }, { "epoch": 0.5213937811627024, "grad_norm": 467.69774689538417, "learning_rate": 9.47994180443895e-06, "loss": 235.2704, "step": 27110 }, { "epoch": 0.5215861064231829, "grad_norm": 453.1111708888243, "learning_rate": 9.473847017957254e-06, "loss": 240.8744, "step": 27120 }, { "epoch": 0.5217784316836634, "grad_norm": 446.0245501939967, "learning_rate": 9.467752427458851e-06, "loss": 243.1803, "step": 27130 }, { "epoch": 0.521970756944144, "grad_norm": 387.91540224222604, "learning_rate": 9.461658035213878e-06, "loss": 239.4184, "step": 27140 }, { "epoch": 0.5221630822046245, "grad_norm": 427.88291545084644, "learning_rate": 9.455563843492397e-06, "loss": 242.6393, "step": 27150 }, { "epoch": 0.522355407465105, "grad_norm": 482.08854329715166, "learning_rate": 9.449469854564393e-06, "loss": 236.5583, "step": 27160 }, { "epoch": 0.5225477327255855, "grad_norm": 418.32751303485225, "learning_rate": 9.443376070699778e-06, "loss": 242.7046, "step": 27170 }, { "epoch": 0.522740057986066, "grad_norm": 476.7775685830085, "learning_rate": 9.437282494168379e-06, "loss": 237.5189, "step": 27180 }, { "epoch": 0.5229323832465466, "grad_norm": 437.47874987469135, "learning_rate": 9.431189127239962e-06, "loss": 248.3712, "step": 27190 }, { "epoch": 0.5231247085070271, "grad_norm": 461.8676345622946, "learning_rate": 9.4250959721842e-06, "loss": 236.9921, "step": 27200 }, { "epoch": 0.5233170337675076, "grad_norm": 459.48575264396266, "learning_rate": 9.419003031270692e-06, "loss": 241.2283, "step": 27210 }, { "epoch": 0.5235093590279881, "grad_norm": 411.71970850264603, "learning_rate": 9.412910306768959e-06, "loss": 235.0745, "step": 27220 }, { "epoch": 0.5237016842884686, "grad_norm": 429.01473497405584, "learning_rate": 9.40681780094844e-06, "loss": 233.6244, "step": 27230 }, { "epoch": 0.5238940095489492, "grad_norm": 456.5527863494388, "learning_rate": 9.400725516078496e-06, "loss": 238.7592, "step": 27240 }, { "epoch": 0.5240863348094297, "grad_norm": 457.7871086447985, "learning_rate": 9.394633454428396e-06, "loss": 245.8387, "step": 27250 }, { "epoch": 0.5242786600699102, "grad_norm": 413.2930024819135, "learning_rate": 9.388541618267341e-06, "loss": 243.7098, "step": 27260 }, { "epoch": 0.5244709853303907, "grad_norm": 431.8288670302755, "learning_rate": 9.382450009864434e-06, "loss": 245.2352, "step": 27270 }, { "epoch": 0.5246633105908712, "grad_norm": 410.1399914861093, "learning_rate": 9.376358631488697e-06, "loss": 230.0802, "step": 27280 }, { "epoch": 0.5248556358513518, "grad_norm": 407.2477501744951, "learning_rate": 9.37026748540907e-06, "loss": 242.6817, "step": 27290 }, { "epoch": 0.5250479611118324, "grad_norm": 466.8828377221575, "learning_rate": 9.364176573894404e-06, "loss": 234.4508, "step": 27300 }, { "epoch": 0.5252402863723129, "grad_norm": 502.122779679048, "learning_rate": 9.358085899213467e-06, "loss": 241.0622, "step": 27310 }, { "epoch": 0.5254326116327934, "grad_norm": 413.17550450905907, "learning_rate": 9.351995463634925e-06, "loss": 237.4504, "step": 27320 }, { "epoch": 0.5256249368932739, "grad_norm": 429.2760879714301, "learning_rate": 9.345905269427374e-06, "loss": 236.3551, "step": 27330 }, { "epoch": 0.5258172621537545, "grad_norm": 389.53842007431916, "learning_rate": 9.33981531885931e-06, "loss": 243.789, "step": 27340 }, { "epoch": 0.526009587414235, "grad_norm": 459.72382965221465, "learning_rate": 9.333725614199132e-06, "loss": 241.6083, "step": 27350 }, { "epoch": 0.5262019126747155, "grad_norm": 494.718895807746, "learning_rate": 9.32763615771516e-06, "loss": 243.3535, "step": 27360 }, { "epoch": 0.526394237935196, "grad_norm": 563.2931188281974, "learning_rate": 9.321546951675616e-06, "loss": 242.2146, "step": 27370 }, { "epoch": 0.5265865631956765, "grad_norm": 404.87914951646826, "learning_rate": 9.315457998348627e-06, "loss": 236.3093, "step": 27380 }, { "epoch": 0.5267788884561571, "grad_norm": 421.0440915530679, "learning_rate": 9.309369300002224e-06, "loss": 249.0604, "step": 27390 }, { "epoch": 0.5269712137166376, "grad_norm": 455.40221618630517, "learning_rate": 9.303280858904356e-06, "loss": 234.8021, "step": 27400 }, { "epoch": 0.5271635389771181, "grad_norm": 439.8625948302137, "learning_rate": 9.297192677322862e-06, "loss": 245.3899, "step": 27410 }, { "epoch": 0.5273558642375986, "grad_norm": 419.8388523998324, "learning_rate": 9.291104757525486e-06, "loss": 245.9283, "step": 27420 }, { "epoch": 0.5275481894980791, "grad_norm": 472.0594473081434, "learning_rate": 9.28501710177988e-06, "loss": 232.1825, "step": 27430 }, { "epoch": 0.5277405147585597, "grad_norm": 452.6552138263836, "learning_rate": 9.278929712353595e-06, "loss": 245.7812, "step": 27440 }, { "epoch": 0.5279328400190402, "grad_norm": 415.5844956131069, "learning_rate": 9.272842591514083e-06, "loss": 234.7244, "step": 27450 }, { "epoch": 0.5281251652795207, "grad_norm": 458.11289825003047, "learning_rate": 9.2667557415287e-06, "loss": 242.4748, "step": 27460 }, { "epoch": 0.5283174905400012, "grad_norm": 496.39703944737977, "learning_rate": 9.260669164664687e-06, "loss": 245.5343, "step": 27470 }, { "epoch": 0.5285098158004817, "grad_norm": 427.5664185579466, "learning_rate": 9.254582863189205e-06, "loss": 246.3929, "step": 27480 }, { "epoch": 0.5287021410609622, "grad_norm": 511.72804177530463, "learning_rate": 9.248496839369293e-06, "loss": 234.7416, "step": 27490 }, { "epoch": 0.5288944663214428, "grad_norm": 431.86408011885624, "learning_rate": 9.242411095471897e-06, "loss": 258.5262, "step": 27500 }, { "epoch": 0.5290867915819234, "grad_norm": 399.58378239258275, "learning_rate": 9.236325633763856e-06, "loss": 238.2345, "step": 27510 }, { "epoch": 0.5292791168424039, "grad_norm": 390.55367240708824, "learning_rate": 9.230240456511905e-06, "loss": 250.9607, "step": 27520 }, { "epoch": 0.5294714421028844, "grad_norm": 425.48540248480083, "learning_rate": 9.224155565982673e-06, "loss": 235.8497, "step": 27530 }, { "epoch": 0.529663767363365, "grad_norm": 432.15339751457645, "learning_rate": 9.218070964442673e-06, "loss": 241.456, "step": 27540 }, { "epoch": 0.5298560926238455, "grad_norm": 435.0571206833872, "learning_rate": 9.21198665415833e-06, "loss": 240.1762, "step": 27550 }, { "epoch": 0.530048417884326, "grad_norm": 425.7631194357518, "learning_rate": 9.205902637395943e-06, "loss": 242.1126, "step": 27560 }, { "epoch": 0.5302407431448065, "grad_norm": 464.8717870887204, "learning_rate": 9.199818916421706e-06, "loss": 234.666, "step": 27570 }, { "epoch": 0.530433068405287, "grad_norm": 415.10545006700994, "learning_rate": 9.193735493501707e-06, "loss": 239.2713, "step": 27580 }, { "epoch": 0.5306253936657676, "grad_norm": 453.1645297087392, "learning_rate": 9.187652370901925e-06, "loss": 245.0213, "step": 27590 }, { "epoch": 0.5308177189262481, "grad_norm": 425.0915885104322, "learning_rate": 9.181569550888217e-06, "loss": 246.4904, "step": 27600 }, { "epoch": 0.5310100441867286, "grad_norm": 449.4949800991947, "learning_rate": 9.175487035726332e-06, "loss": 236.694, "step": 27610 }, { "epoch": 0.5312023694472091, "grad_norm": 410.62219552058235, "learning_rate": 9.169404827681912e-06, "loss": 234.5753, "step": 27620 }, { "epoch": 0.5313946947076896, "grad_norm": 430.7558462469215, "learning_rate": 9.163322929020476e-06, "loss": 234.0876, "step": 27630 }, { "epoch": 0.5315870199681701, "grad_norm": 420.93098003193916, "learning_rate": 9.157241342007428e-06, "loss": 240.2349, "step": 27640 }, { "epoch": 0.5317793452286507, "grad_norm": 458.06884617397304, "learning_rate": 9.15116006890806e-06, "loss": 242.8684, "step": 27650 }, { "epoch": 0.5319716704891312, "grad_norm": 442.557301828324, "learning_rate": 9.145079111987552e-06, "loss": 243.5506, "step": 27660 }, { "epoch": 0.5321639957496117, "grad_norm": 396.668803221925, "learning_rate": 9.138998473510953e-06, "loss": 237.6186, "step": 27670 }, { "epoch": 0.5323563210100922, "grad_norm": 412.50622043995537, "learning_rate": 9.1329181557432e-06, "loss": 240.7753, "step": 27680 }, { "epoch": 0.5325486462705727, "grad_norm": 444.6259890702195, "learning_rate": 9.126838160949119e-06, "loss": 239.7045, "step": 27690 }, { "epoch": 0.5327409715310533, "grad_norm": 419.37101807850524, "learning_rate": 9.120758491393402e-06, "loss": 246.308, "step": 27700 }, { "epoch": 0.5329332967915339, "grad_norm": 410.37578221719366, "learning_rate": 9.114679149340623e-06, "loss": 244.9286, "step": 27710 }, { "epoch": 0.5331256220520144, "grad_norm": 389.95589899489056, "learning_rate": 9.10860013705524e-06, "loss": 238.3731, "step": 27720 }, { "epoch": 0.5333179473124949, "grad_norm": 400.30015713332745, "learning_rate": 9.102521456801582e-06, "loss": 233.7713, "step": 27730 }, { "epoch": 0.5335102725729755, "grad_norm": 426.4517554236645, "learning_rate": 9.096443110843864e-06, "loss": 237.3978, "step": 27740 }, { "epoch": 0.533702597833456, "grad_norm": 431.4516304307552, "learning_rate": 9.090365101446157e-06, "loss": 239.0948, "step": 27750 }, { "epoch": 0.5338949230939365, "grad_norm": 460.3296232740923, "learning_rate": 9.084287430872435e-06, "loss": 239.439, "step": 27760 }, { "epoch": 0.534087248354417, "grad_norm": 459.6042345801548, "learning_rate": 9.078210101386518e-06, "loss": 236.2329, "step": 27770 }, { "epoch": 0.5342795736148975, "grad_norm": 464.4047365868709, "learning_rate": 9.072133115252113e-06, "loss": 238.4873, "step": 27780 }, { "epoch": 0.534471898875378, "grad_norm": 454.55527372899735, "learning_rate": 9.066056474732798e-06, "loss": 238.1163, "step": 27790 }, { "epoch": 0.5346642241358586, "grad_norm": 452.77022611762703, "learning_rate": 9.059980182092022e-06, "loss": 233.5605, "step": 27800 }, { "epoch": 0.5348565493963391, "grad_norm": 434.5649516138401, "learning_rate": 9.053904239593106e-06, "loss": 235.687, "step": 27810 }, { "epoch": 0.5350488746568196, "grad_norm": 470.9282306823315, "learning_rate": 9.047828649499236e-06, "loss": 241.3732, "step": 27820 }, { "epoch": 0.5352411999173001, "grad_norm": 441.4699336661971, "learning_rate": 9.041753414073463e-06, "loss": 236.423, "step": 27830 }, { "epoch": 0.5354335251777806, "grad_norm": 475.386688320379, "learning_rate": 9.035678535578723e-06, "loss": 236.9872, "step": 27840 }, { "epoch": 0.5356258504382612, "grad_norm": 446.43459437258434, "learning_rate": 9.029604016277798e-06, "loss": 238.6817, "step": 27850 }, { "epoch": 0.5358181756987417, "grad_norm": 486.423545734665, "learning_rate": 9.02352985843335e-06, "loss": 243.9364, "step": 27860 }, { "epoch": 0.5360105009592222, "grad_norm": 414.58581917593847, "learning_rate": 9.017456064307904e-06, "loss": 239.5746, "step": 27870 }, { "epoch": 0.5362028262197027, "grad_norm": 432.0216440586158, "learning_rate": 9.01138263616385e-06, "loss": 240.0917, "step": 27880 }, { "epoch": 0.5363951514801832, "grad_norm": 428.7506577189589, "learning_rate": 9.005309576263436e-06, "loss": 237.1916, "step": 27890 }, { "epoch": 0.5365874767406638, "grad_norm": 453.0900898673735, "learning_rate": 8.999236886868772e-06, "loss": 233.2634, "step": 27900 }, { "epoch": 0.5367798020011444, "grad_norm": 446.1302851353409, "learning_rate": 8.993164570241844e-06, "loss": 235.4085, "step": 27910 }, { "epoch": 0.5369721272616249, "grad_norm": 424.69855258932245, "learning_rate": 8.987092628644483e-06, "loss": 238.4318, "step": 27920 }, { "epoch": 0.5371644525221054, "grad_norm": 435.9865284409231, "learning_rate": 8.981021064338388e-06, "loss": 237.3137, "step": 27930 }, { "epoch": 0.537356777782586, "grad_norm": 450.13633070604385, "learning_rate": 8.974949879585118e-06, "loss": 239.8066, "step": 27940 }, { "epoch": 0.5375491030430665, "grad_norm": 452.0404500781918, "learning_rate": 8.968879076646093e-06, "loss": 237.128, "step": 27950 }, { "epoch": 0.537741428303547, "grad_norm": 406.93366557763164, "learning_rate": 8.96280865778258e-06, "loss": 236.3275, "step": 27960 }, { "epoch": 0.5379337535640275, "grad_norm": 424.73323086920385, "learning_rate": 8.956738625255709e-06, "loss": 241.5757, "step": 27970 }, { "epoch": 0.538126078824508, "grad_norm": 446.79774320592577, "learning_rate": 8.950668981326473e-06, "loss": 234.8669, "step": 27980 }, { "epoch": 0.5383184040849885, "grad_norm": 445.36588525474866, "learning_rate": 8.94459972825571e-06, "loss": 236.9913, "step": 27990 }, { "epoch": 0.5385107293454691, "grad_norm": 415.8629825529101, "learning_rate": 8.938530868304121e-06, "loss": 235.3133, "step": 28000 }, { "epoch": 0.5387030546059496, "grad_norm": 438.071277790469, "learning_rate": 8.932462403732248e-06, "loss": 237.3404, "step": 28010 }, { "epoch": 0.5388953798664301, "grad_norm": 410.3930296964305, "learning_rate": 8.926394336800502e-06, "loss": 245.5063, "step": 28020 }, { "epoch": 0.5390877051269106, "grad_norm": 445.83349527723215, "learning_rate": 8.920326669769134e-06, "loss": 243.1, "step": 28030 }, { "epoch": 0.5392800303873911, "grad_norm": 574.7024178289419, "learning_rate": 8.914259404898247e-06, "loss": 239.4048, "step": 28040 }, { "epoch": 0.5394723556478717, "grad_norm": 429.4323270904091, "learning_rate": 8.908192544447803e-06, "loss": 238.0667, "step": 28050 }, { "epoch": 0.5396646809083522, "grad_norm": 403.7219766955144, "learning_rate": 8.902126090677605e-06, "loss": 227.862, "step": 28060 }, { "epoch": 0.5398570061688327, "grad_norm": 402.35099634705676, "learning_rate": 8.896060045847305e-06, "loss": 238.9521, "step": 28070 }, { "epoch": 0.5400493314293132, "grad_norm": 439.902198155717, "learning_rate": 8.889994412216403e-06, "loss": 235.4164, "step": 28080 }, { "epoch": 0.5402416566897937, "grad_norm": 436.839214987937, "learning_rate": 8.883929192044254e-06, "loss": 233.4839, "step": 28090 }, { "epoch": 0.5404339819502743, "grad_norm": 414.3816461233584, "learning_rate": 8.877864387590049e-06, "loss": 241.2454, "step": 28100 }, { "epoch": 0.5406263072107548, "grad_norm": 431.4843908096932, "learning_rate": 8.871800001112822e-06, "loss": 239.5498, "step": 28110 }, { "epoch": 0.5408186324712354, "grad_norm": 461.2126036583687, "learning_rate": 8.865736034871468e-06, "loss": 243.9344, "step": 28120 }, { "epoch": 0.5410109577317159, "grad_norm": 476.51559008933083, "learning_rate": 8.859672491124706e-06, "loss": 239.6141, "step": 28130 }, { "epoch": 0.5412032829921964, "grad_norm": 421.76539767904706, "learning_rate": 8.853609372131105e-06, "loss": 240.9978, "step": 28140 }, { "epoch": 0.541395608252677, "grad_norm": 437.24856175150074, "learning_rate": 8.84754668014908e-06, "loss": 237.6165, "step": 28150 }, { "epoch": 0.5415879335131575, "grad_norm": 403.52709788469804, "learning_rate": 8.841484417436886e-06, "loss": 235.1001, "step": 28160 }, { "epoch": 0.541780258773638, "grad_norm": 414.3479546667509, "learning_rate": 8.835422586252613e-06, "loss": 244.4629, "step": 28170 }, { "epoch": 0.5419725840341185, "grad_norm": 417.15151179403546, "learning_rate": 8.829361188854194e-06, "loss": 227.7996, "step": 28180 }, { "epoch": 0.542164909294599, "grad_norm": 414.44938855553704, "learning_rate": 8.823300227499393e-06, "loss": 234.9486, "step": 28190 }, { "epoch": 0.5423572345550796, "grad_norm": 521.6171209970335, "learning_rate": 8.817239704445827e-06, "loss": 240.6794, "step": 28200 }, { "epoch": 0.5425495598155601, "grad_norm": 442.6538531980378, "learning_rate": 8.811179621950937e-06, "loss": 233.0766, "step": 28210 }, { "epoch": 0.5427418850760406, "grad_norm": 467.63584333633014, "learning_rate": 8.805119982272001e-06, "loss": 238.1453, "step": 28220 }, { "epoch": 0.5429342103365211, "grad_norm": 409.399843981876, "learning_rate": 8.799060787666142e-06, "loss": 240.8451, "step": 28230 }, { "epoch": 0.5431265355970016, "grad_norm": 393.13164901766714, "learning_rate": 8.793002040390304e-06, "loss": 240.9221, "step": 28240 }, { "epoch": 0.5433188608574822, "grad_norm": 423.8686013710147, "learning_rate": 8.786943742701273e-06, "loss": 235.5499, "step": 28250 }, { "epoch": 0.5435111861179627, "grad_norm": 441.5146110006175, "learning_rate": 8.780885896855659e-06, "loss": 232.8464, "step": 28260 }, { "epoch": 0.5437035113784432, "grad_norm": 457.2506720772257, "learning_rate": 8.774828505109918e-06, "loss": 243.7003, "step": 28270 }, { "epoch": 0.5438958366389237, "grad_norm": 462.57648847522546, "learning_rate": 8.768771569720324e-06, "loss": 237.9007, "step": 28280 }, { "epoch": 0.5440881618994042, "grad_norm": 453.40683408313174, "learning_rate": 8.762715092942983e-06, "loss": 246.5548, "step": 28290 }, { "epoch": 0.5442804871598848, "grad_norm": 420.21343945072533, "learning_rate": 8.756659077033838e-06, "loss": 238.1265, "step": 28300 }, { "epoch": 0.5444728124203653, "grad_norm": 421.3046042008414, "learning_rate": 8.750603524248653e-06, "loss": 244.3051, "step": 28310 }, { "epoch": 0.5446651376808459, "grad_norm": 407.10993830391544, "learning_rate": 8.744548436843021e-06, "loss": 240.005, "step": 28320 }, { "epoch": 0.5448574629413264, "grad_norm": 489.2844336922994, "learning_rate": 8.738493817072359e-06, "loss": 238.9589, "step": 28330 }, { "epoch": 0.5450497882018069, "grad_norm": 454.2845303295166, "learning_rate": 8.73243966719192e-06, "loss": 240.3854, "step": 28340 }, { "epoch": 0.5452421134622875, "grad_norm": 450.523359024384, "learning_rate": 8.726385989456764e-06, "loss": 235.6842, "step": 28350 }, { "epoch": 0.545434438722768, "grad_norm": 421.99590031991517, "learning_rate": 8.7203327861218e-06, "loss": 238.8083, "step": 28360 }, { "epoch": 0.5456267639832485, "grad_norm": 424.87954375263115, "learning_rate": 8.71428005944173e-06, "loss": 237.5431, "step": 28370 }, { "epoch": 0.545819089243729, "grad_norm": 414.1600756892164, "learning_rate": 8.708227811671112e-06, "loss": 233.075, "step": 28380 }, { "epoch": 0.5460114145042095, "grad_norm": 435.18702464708923, "learning_rate": 8.702176045064296e-06, "loss": 237.1588, "step": 28390 }, { "epoch": 0.54620373976469, "grad_norm": 398.9502647931613, "learning_rate": 8.696124761875467e-06, "loss": 239.082, "step": 28400 }, { "epoch": 0.5463960650251706, "grad_norm": 385.8835734502926, "learning_rate": 8.690073964358635e-06, "loss": 244.7883, "step": 28410 }, { "epoch": 0.5465883902856511, "grad_norm": 502.0638298680671, "learning_rate": 8.684023654767613e-06, "loss": 238.5478, "step": 28420 }, { "epoch": 0.5467807155461316, "grad_norm": 420.8500437506724, "learning_rate": 8.677973835356048e-06, "loss": 232.1797, "step": 28430 }, { "epoch": 0.5469730408066121, "grad_norm": 393.7392482397256, "learning_rate": 8.671924508377392e-06, "loss": 234.0055, "step": 28440 }, { "epoch": 0.5471653660670927, "grad_norm": 434.21303276379894, "learning_rate": 8.665875676084927e-06, "loss": 237.1666, "step": 28450 }, { "epoch": 0.5473576913275732, "grad_norm": 452.9313713458954, "learning_rate": 8.659827340731738e-06, "loss": 241.5489, "step": 28460 }, { "epoch": 0.5475500165880537, "grad_norm": 455.40813596529097, "learning_rate": 8.653779504570728e-06, "loss": 237.8429, "step": 28470 }, { "epoch": 0.5477423418485342, "grad_norm": 455.1225522038088, "learning_rate": 8.647732169854622e-06, "loss": 239.701, "step": 28480 }, { "epoch": 0.5479346671090147, "grad_norm": 394.37135795117234, "learning_rate": 8.641685338835947e-06, "loss": 237.3779, "step": 28490 }, { "epoch": 0.5481269923694952, "grad_norm": 429.59407273639226, "learning_rate": 8.635639013767053e-06, "loss": 237.9776, "step": 28500 }, { "epoch": 0.5483193176299758, "grad_norm": 455.9025364678148, "learning_rate": 8.629593196900088e-06, "loss": 235.7486, "step": 28510 }, { "epoch": 0.5485116428904563, "grad_norm": 473.7664791092251, "learning_rate": 8.62354789048703e-06, "loss": 236.7516, "step": 28520 }, { "epoch": 0.5487039681509369, "grad_norm": 440.4126155208757, "learning_rate": 8.617503096779648e-06, "loss": 244.196, "step": 28530 }, { "epoch": 0.5488962934114174, "grad_norm": 440.3030451777663, "learning_rate": 8.61145881802953e-06, "loss": 240.0682, "step": 28540 }, { "epoch": 0.549088618671898, "grad_norm": 391.99883130957693, "learning_rate": 8.605415056488067e-06, "loss": 237.6824, "step": 28550 }, { "epoch": 0.5492809439323785, "grad_norm": 417.1824535424529, "learning_rate": 8.599371814406465e-06, "loss": 232.3002, "step": 28560 }, { "epoch": 0.549473269192859, "grad_norm": 440.18805675726577, "learning_rate": 8.59332909403573e-06, "loss": 259.531, "step": 28570 }, { "epoch": 0.5496655944533395, "grad_norm": 459.376879692458, "learning_rate": 8.587286897626672e-06, "loss": 237.1712, "step": 28580 }, { "epoch": 0.54985791971382, "grad_norm": 423.95785600861467, "learning_rate": 8.581245227429918e-06, "loss": 241.3493, "step": 28590 }, { "epoch": 0.5500502449743006, "grad_norm": 404.24463532965217, "learning_rate": 8.575204085695887e-06, "loss": 230.9917, "step": 28600 }, { "epoch": 0.5502425702347811, "grad_norm": 449.0925299991616, "learning_rate": 8.5691634746748e-06, "loss": 241.6851, "step": 28610 }, { "epoch": 0.5504348954952616, "grad_norm": 437.8093108272611, "learning_rate": 8.563123396616683e-06, "loss": 230.9252, "step": 28620 }, { "epoch": 0.5506272207557421, "grad_norm": 439.6946376244109, "learning_rate": 8.557083853771377e-06, "loss": 235.145, "step": 28630 }, { "epoch": 0.5508195460162226, "grad_norm": 505.0433888629805, "learning_rate": 8.551044848388502e-06, "loss": 239.8914, "step": 28640 }, { "epoch": 0.5510118712767031, "grad_norm": 414.46081005848254, "learning_rate": 8.545006382717487e-06, "loss": 237.5509, "step": 28650 }, { "epoch": 0.5512041965371837, "grad_norm": 450.1853383823327, "learning_rate": 8.538968459007569e-06, "loss": 244.1873, "step": 28660 }, { "epoch": 0.5513965217976642, "grad_norm": 429.76686210562667, "learning_rate": 8.532931079507772e-06, "loss": 236.1655, "step": 28670 }, { "epoch": 0.5515888470581447, "grad_norm": 427.8557260174465, "learning_rate": 8.526894246466916e-06, "loss": 233.7107, "step": 28680 }, { "epoch": 0.5517811723186252, "grad_norm": 408.41928612310375, "learning_rate": 8.520857962133623e-06, "loss": 233.7812, "step": 28690 }, { "epoch": 0.5519734975791057, "grad_norm": 424.209194049445, "learning_rate": 8.514822228756311e-06, "loss": 240.2727, "step": 28700 }, { "epoch": 0.5521658228395863, "grad_norm": 419.8385413745241, "learning_rate": 8.508787048583191e-06, "loss": 235.4688, "step": 28710 }, { "epoch": 0.5523581481000668, "grad_norm": 436.7339005060254, "learning_rate": 8.502752423862264e-06, "loss": 239.427, "step": 28720 }, { "epoch": 0.5525504733605474, "grad_norm": 404.90859921527954, "learning_rate": 8.496718356841335e-06, "loss": 231.2506, "step": 28730 }, { "epoch": 0.5527427986210279, "grad_norm": 404.1225956730404, "learning_rate": 8.49068484976799e-06, "loss": 237.4833, "step": 28740 }, { "epoch": 0.5529351238815085, "grad_norm": 421.7255506220515, "learning_rate": 8.484651904889614e-06, "loss": 232.7491, "step": 28750 }, { "epoch": 0.553127449141989, "grad_norm": 447.86602621506404, "learning_rate": 8.478619524453369e-06, "loss": 234.2143, "step": 28760 }, { "epoch": 0.5533197744024695, "grad_norm": 415.51295028594865, "learning_rate": 8.472587710706232e-06, "loss": 230.9796, "step": 28770 }, { "epoch": 0.55351209966295, "grad_norm": 436.53604856654425, "learning_rate": 8.466556465894942e-06, "loss": 238.3564, "step": 28780 }, { "epoch": 0.5537044249234305, "grad_norm": 439.25119264483914, "learning_rate": 8.460525792266046e-06, "loss": 241.4103, "step": 28790 }, { "epoch": 0.553896750183911, "grad_norm": 445.6663629979755, "learning_rate": 8.454495692065862e-06, "loss": 243.7715, "step": 28800 }, { "epoch": 0.5540890754443916, "grad_norm": 430.0362692599995, "learning_rate": 8.448466167540514e-06, "loss": 232.4944, "step": 28810 }, { "epoch": 0.5542814007048721, "grad_norm": 430.04124912487634, "learning_rate": 8.442437220935893e-06, "loss": 244.3331, "step": 28820 }, { "epoch": 0.5544737259653526, "grad_norm": 400.5778873177401, "learning_rate": 8.436408854497679e-06, "loss": 239.7221, "step": 28830 }, { "epoch": 0.5546660512258331, "grad_norm": 464.9492929250041, "learning_rate": 8.430381070471348e-06, "loss": 239.5716, "step": 28840 }, { "epoch": 0.5548583764863136, "grad_norm": 427.00728512860155, "learning_rate": 8.424353871102144e-06, "loss": 232.512, "step": 28850 }, { "epoch": 0.5550507017467942, "grad_norm": 455.05319687920013, "learning_rate": 8.4183272586351e-06, "loss": 237.7265, "step": 28860 }, { "epoch": 0.5552430270072747, "grad_norm": 430.9555904005068, "learning_rate": 8.412301235315026e-06, "loss": 245.7041, "step": 28870 }, { "epoch": 0.5554353522677552, "grad_norm": 430.51117604369654, "learning_rate": 8.406275803386525e-06, "loss": 238.007, "step": 28880 }, { "epoch": 0.5556276775282357, "grad_norm": 446.39510440851063, "learning_rate": 8.400250965093968e-06, "loss": 238.4255, "step": 28890 }, { "epoch": 0.5558200027887162, "grad_norm": 465.8734504330715, "learning_rate": 8.394226722681498e-06, "loss": 242.5858, "step": 28900 }, { "epoch": 0.5560123280491968, "grad_norm": 454.0886816478026, "learning_rate": 8.38820307839306e-06, "loss": 235.7706, "step": 28910 }, { "epoch": 0.5562046533096773, "grad_norm": 392.2356594557625, "learning_rate": 8.382180034472353e-06, "loss": 227.9209, "step": 28920 }, { "epoch": 0.5563969785701578, "grad_norm": 436.39915523910065, "learning_rate": 8.376157593162867e-06, "loss": 232.2681, "step": 28930 }, { "epoch": 0.5565893038306384, "grad_norm": 480.64260870326405, "learning_rate": 8.370135756707853e-06, "loss": 236.8256, "step": 28940 }, { "epoch": 0.556781629091119, "grad_norm": 417.1925180290578, "learning_rate": 8.364114527350357e-06, "loss": 234.1597, "step": 28950 }, { "epoch": 0.5569739543515995, "grad_norm": 449.36298230254397, "learning_rate": 8.358093907333182e-06, "loss": 231.3657, "step": 28960 }, { "epoch": 0.55716627961208, "grad_norm": 471.0711818909541, "learning_rate": 8.35207389889891e-06, "loss": 238.2488, "step": 28970 }, { "epoch": 0.5573586048725605, "grad_norm": 435.1234091862365, "learning_rate": 8.346054504289888e-06, "loss": 240.501, "step": 28980 }, { "epoch": 0.557550930133041, "grad_norm": 423.04558933868947, "learning_rate": 8.34003572574825e-06, "loss": 232.0768, "step": 28990 }, { "epoch": 0.5577432553935215, "grad_norm": 438.87182140576124, "learning_rate": 8.334017565515892e-06, "loss": 238.6667, "step": 29000 }, { "epoch": 0.5579355806540021, "grad_norm": 439.66266151022955, "learning_rate": 8.328000025834472e-06, "loss": 226.9736, "step": 29010 }, { "epoch": 0.5581279059144826, "grad_norm": 436.7555100469507, "learning_rate": 8.321983108945431e-06, "loss": 236.197, "step": 29020 }, { "epoch": 0.5583202311749631, "grad_norm": 439.97985744223655, "learning_rate": 8.315966817089972e-06, "loss": 241.0712, "step": 29030 }, { "epoch": 0.5585125564354436, "grad_norm": 433.4090724994676, "learning_rate": 8.309951152509057e-06, "loss": 235.3943, "step": 29040 }, { "epoch": 0.5587048816959241, "grad_norm": 436.90969806228907, "learning_rate": 8.303936117443422e-06, "loss": 242.7536, "step": 29050 }, { "epoch": 0.5588972069564047, "grad_norm": 404.12443658387616, "learning_rate": 8.297921714133576e-06, "loss": 238.2426, "step": 29060 }, { "epoch": 0.5590895322168852, "grad_norm": 468.93512599776744, "learning_rate": 8.291907944819782e-06, "loss": 232.6437, "step": 29070 }, { "epoch": 0.5592818574773657, "grad_norm": 1199.7744068590969, "learning_rate": 8.285894811742065e-06, "loss": 226.6045, "step": 29080 }, { "epoch": 0.5594741827378462, "grad_norm": 434.02002733519527, "learning_rate": 8.279882317140224e-06, "loss": 229.8454, "step": 29090 }, { "epoch": 0.5596665079983267, "grad_norm": 452.2935112989161, "learning_rate": 8.273870463253813e-06, "loss": 235.6263, "step": 29100 }, { "epoch": 0.5598588332588073, "grad_norm": 450.0834081645108, "learning_rate": 8.267859252322144e-06, "loss": 239.8458, "step": 29110 }, { "epoch": 0.5600511585192878, "grad_norm": 446.1097247813684, "learning_rate": 8.261848686584293e-06, "loss": 230.9927, "step": 29120 }, { "epoch": 0.5602434837797683, "grad_norm": 443.7628055153972, "learning_rate": 8.255838768279106e-06, "loss": 232.7486, "step": 29130 }, { "epoch": 0.5604358090402489, "grad_norm": 433.5340503963023, "learning_rate": 8.249829499645167e-06, "loss": 235.3675, "step": 29140 }, { "epoch": 0.5606281343007294, "grad_norm": 434.9334074012724, "learning_rate": 8.243820882920837e-06, "loss": 229.7794, "step": 29150 }, { "epoch": 0.56082045956121, "grad_norm": 472.2739399899253, "learning_rate": 8.23781292034422e-06, "loss": 233.5955, "step": 29160 }, { "epoch": 0.5610127848216905, "grad_norm": 417.91950790778156, "learning_rate": 8.231805614153192e-06, "loss": 232.6033, "step": 29170 }, { "epoch": 0.561205110082171, "grad_norm": 432.3120678574319, "learning_rate": 8.22579896658537e-06, "loss": 231.9148, "step": 29180 }, { "epoch": 0.5613974353426515, "grad_norm": 457.3705948304885, "learning_rate": 8.219792979878126e-06, "loss": 229.5217, "step": 29190 }, { "epoch": 0.561589760603132, "grad_norm": 433.83089134028205, "learning_rate": 8.213787656268599e-06, "loss": 238.5942, "step": 29200 }, { "epoch": 0.5617820858636126, "grad_norm": 414.6576865837906, "learning_rate": 8.20778299799367e-06, "loss": 234.8118, "step": 29210 }, { "epoch": 0.5619744111240931, "grad_norm": 418.0767112332765, "learning_rate": 8.201779007289975e-06, "loss": 231.9389, "step": 29220 }, { "epoch": 0.5621667363845736, "grad_norm": 458.975100774114, "learning_rate": 8.195775686393898e-06, "loss": 242.5277, "step": 29230 }, { "epoch": 0.5623590616450541, "grad_norm": 455.0985297645168, "learning_rate": 8.189773037541585e-06, "loss": 236.6801, "step": 29240 }, { "epoch": 0.5625513869055346, "grad_norm": 413.6803791152206, "learning_rate": 8.183771062968917e-06, "loss": 239.2418, "step": 29250 }, { "epoch": 0.5627437121660152, "grad_norm": 479.33810263701804, "learning_rate": 8.177769764911528e-06, "loss": 236.2911, "step": 29260 }, { "epoch": 0.5629360374264957, "grad_norm": 431.8818470494597, "learning_rate": 8.171769145604812e-06, "loss": 237.3531, "step": 29270 }, { "epoch": 0.5631283626869762, "grad_norm": 393.4924122227779, "learning_rate": 8.165769207283891e-06, "loss": 231.8063, "step": 29280 }, { "epoch": 0.5633206879474567, "grad_norm": 441.90694572883865, "learning_rate": 8.15976995218365e-06, "loss": 236.3916, "step": 29290 }, { "epoch": 0.5635130132079372, "grad_norm": 443.6313797272376, "learning_rate": 8.153771382538706e-06, "loss": 227.402, "step": 29300 }, { "epoch": 0.5637053384684177, "grad_norm": 463.69689906755025, "learning_rate": 8.147773500583434e-06, "loss": 236.4332, "step": 29310 }, { "epoch": 0.5638976637288983, "grad_norm": 410.4716215607669, "learning_rate": 8.141776308551942e-06, "loss": 234.3254, "step": 29320 }, { "epoch": 0.5640899889893788, "grad_norm": 444.71168832516685, "learning_rate": 8.135779808678084e-06, "loss": 236.9215, "step": 29330 }, { "epoch": 0.5642823142498593, "grad_norm": 426.33049987883237, "learning_rate": 8.129784003195458e-06, "loss": 227.8894, "step": 29340 }, { "epoch": 0.5644746395103399, "grad_norm": 438.2620539316175, "learning_rate": 8.123788894337405e-06, "loss": 232.5625, "step": 29350 }, { "epoch": 0.5646669647708205, "grad_norm": 426.74264904474813, "learning_rate": 8.117794484337003e-06, "loss": 234.1855, "step": 29360 }, { "epoch": 0.564859290031301, "grad_norm": 437.32046894423706, "learning_rate": 8.111800775427066e-06, "loss": 233.2375, "step": 29370 }, { "epoch": 0.5650516152917815, "grad_norm": 428.3537798232405, "learning_rate": 8.10580776984016e-06, "loss": 232.6249, "step": 29380 }, { "epoch": 0.565243940552262, "grad_norm": 392.1065371992853, "learning_rate": 8.099815469808573e-06, "loss": 232.3645, "step": 29390 }, { "epoch": 0.5654362658127425, "grad_norm": 416.9722829901236, "learning_rate": 8.093823877564343e-06, "loss": 233.1531, "step": 29400 }, { "epoch": 0.565628591073223, "grad_norm": 423.60496820830866, "learning_rate": 8.087832995339236e-06, "loss": 232.5088, "step": 29410 }, { "epoch": 0.5658209163337036, "grad_norm": 429.36969833486455, "learning_rate": 8.081842825364756e-06, "loss": 228.5401, "step": 29420 }, { "epoch": 0.5660132415941841, "grad_norm": 430.1413141416627, "learning_rate": 8.075853369872149e-06, "loss": 234.6433, "step": 29430 }, { "epoch": 0.5662055668546646, "grad_norm": 408.5181320541568, "learning_rate": 8.069864631092377e-06, "loss": 234.2195, "step": 29440 }, { "epoch": 0.5663978921151451, "grad_norm": 426.82140710699855, "learning_rate": 8.063876611256158e-06, "loss": 236.027, "step": 29450 }, { "epoch": 0.5665902173756256, "grad_norm": 396.1152339466765, "learning_rate": 8.057889312593924e-06, "loss": 238.9753, "step": 29460 }, { "epoch": 0.5667825426361062, "grad_norm": 1433.6892418418208, "learning_rate": 8.051902737335847e-06, "loss": 244.3272, "step": 29470 }, { "epoch": 0.5669748678965867, "grad_norm": 454.6451528307559, "learning_rate": 8.045916887711822e-06, "loss": 237.8165, "step": 29480 }, { "epoch": 0.5671671931570672, "grad_norm": 438.9242695035424, "learning_rate": 8.039931765951485e-06, "loss": 242.5767, "step": 29490 }, { "epoch": 0.5673595184175477, "grad_norm": 430.1708938564893, "learning_rate": 8.033947374284196e-06, "loss": 229.8049, "step": 29500 }, { "epoch": 0.5675518436780282, "grad_norm": 433.9020765366243, "learning_rate": 8.02796371493904e-06, "loss": 238.5978, "step": 29510 }, { "epoch": 0.5677441689385088, "grad_norm": 457.179755585707, "learning_rate": 8.021980790144828e-06, "loss": 226.3361, "step": 29520 }, { "epoch": 0.5679364941989893, "grad_norm": 401.10126184229546, "learning_rate": 8.015998602130107e-06, "loss": 233.015, "step": 29530 }, { "epoch": 0.5681288194594698, "grad_norm": 419.8783587524023, "learning_rate": 8.01001715312314e-06, "loss": 230.0096, "step": 29540 }, { "epoch": 0.5683211447199504, "grad_norm": 406.5105505968295, "learning_rate": 8.00403644535191e-06, "loss": 233.0671, "step": 29550 }, { "epoch": 0.568513469980431, "grad_norm": 429.09092078313, "learning_rate": 7.998056481044146e-06, "loss": 235.6919, "step": 29560 }, { "epoch": 0.5687057952409115, "grad_norm": 482.7599097446289, "learning_rate": 7.992077262427282e-06, "loss": 243.4911, "step": 29570 }, { "epoch": 0.568898120501392, "grad_norm": 443.50735620417, "learning_rate": 7.986098791728475e-06, "loss": 231.8863, "step": 29580 }, { "epoch": 0.5690904457618725, "grad_norm": 428.7615281032246, "learning_rate": 7.9801210711746e-06, "loss": 237.9173, "step": 29590 }, { "epoch": 0.569282771022353, "grad_norm": 408.8363081666945, "learning_rate": 7.974144102992273e-06, "loss": 232.385, "step": 29600 }, { "epoch": 0.5694750962828335, "grad_norm": 504.4538415192509, "learning_rate": 7.968167889407813e-06, "loss": 229.3145, "step": 29610 }, { "epoch": 0.5696674215433141, "grad_norm": 407.0660969430595, "learning_rate": 7.96219243264725e-06, "loss": 236.7475, "step": 29620 }, { "epoch": 0.5698597468037946, "grad_norm": 429.1831615564374, "learning_rate": 7.956217734936353e-06, "loss": 231.8069, "step": 29630 }, { "epoch": 0.5700520720642751, "grad_norm": 422.59361402290205, "learning_rate": 7.950243798500593e-06, "loss": 229.0326, "step": 29640 }, { "epoch": 0.5702443973247556, "grad_norm": 455.3871506965756, "learning_rate": 7.94427062556517e-06, "loss": 236.7605, "step": 29650 }, { "epoch": 0.5704367225852361, "grad_norm": 413.4690621057088, "learning_rate": 7.938298218354985e-06, "loss": 231.2507, "step": 29660 }, { "epoch": 0.5706290478457167, "grad_norm": 454.06650192224396, "learning_rate": 7.932326579094665e-06, "loss": 236.3325, "step": 29670 }, { "epoch": 0.5708213731061972, "grad_norm": 424.6707673047249, "learning_rate": 7.926355710008545e-06, "loss": 233.2701, "step": 29680 }, { "epoch": 0.5710136983666777, "grad_norm": 431.309560668897, "learning_rate": 7.920385613320675e-06, "loss": 230.0675, "step": 29690 }, { "epoch": 0.5712060236271582, "grad_norm": 503.5816889167475, "learning_rate": 7.914416291254817e-06, "loss": 224.8446, "step": 29700 }, { "epoch": 0.5713983488876387, "grad_norm": 418.8314412889683, "learning_rate": 7.908447746034447e-06, "loss": 228.1362, "step": 29710 }, { "epoch": 0.5715906741481193, "grad_norm": 419.6478281748611, "learning_rate": 7.902479979882749e-06, "loss": 238.2103, "step": 29720 }, { "epoch": 0.5717829994085998, "grad_norm": 447.38569161606716, "learning_rate": 7.896512995022614e-06, "loss": 234.149, "step": 29730 }, { "epoch": 0.5719753246690803, "grad_norm": 387.5736557529939, "learning_rate": 7.890546793676652e-06, "loss": 231.6947, "step": 29740 }, { "epoch": 0.5721676499295609, "grad_norm": 424.8616425416752, "learning_rate": 7.884581378067171e-06, "loss": 234.9736, "step": 29750 }, { "epoch": 0.5723599751900414, "grad_norm": 409.924725771009, "learning_rate": 7.878616750416186e-06, "loss": 230.9598, "step": 29760 }, { "epoch": 0.572552300450522, "grad_norm": 478.96694226206637, "learning_rate": 7.872652912945426e-06, "loss": 231.2302, "step": 29770 }, { "epoch": 0.5727446257110025, "grad_norm": 478.47508680884954, "learning_rate": 7.866689867876323e-06, "loss": 235.5065, "step": 29780 }, { "epoch": 0.572936950971483, "grad_norm": 481.5899600576842, "learning_rate": 7.860727617430013e-06, "loss": 232.1499, "step": 29790 }, { "epoch": 0.5731292762319635, "grad_norm": 455.6893184536974, "learning_rate": 7.85476616382733e-06, "loss": 224.5898, "step": 29800 }, { "epoch": 0.573321601492444, "grad_norm": 452.9548592326789, "learning_rate": 7.848805509288824e-06, "loss": 239.2905, "step": 29810 }, { "epoch": 0.5735139267529246, "grad_norm": 443.6704667489931, "learning_rate": 7.84284565603474e-06, "loss": 230.783, "step": 29820 }, { "epoch": 0.5737062520134051, "grad_norm": 429.7996519098853, "learning_rate": 7.83688660628502e-06, "loss": 233.0479, "step": 29830 }, { "epoch": 0.5738985772738856, "grad_norm": 426.8290524830637, "learning_rate": 7.830928362259313e-06, "loss": 232.428, "step": 29840 }, { "epoch": 0.5740909025343661, "grad_norm": 424.59455158508, "learning_rate": 7.824970926176967e-06, "loss": 235.2198, "step": 29850 }, { "epoch": 0.5742832277948466, "grad_norm": 429.35119612704057, "learning_rate": 7.819014300257033e-06, "loss": 230.9038, "step": 29860 }, { "epoch": 0.5744755530553272, "grad_norm": 403.15251024914807, "learning_rate": 7.813058486718252e-06, "loss": 235.8594, "step": 29870 }, { "epoch": 0.5746678783158077, "grad_norm": 415.4461327359759, "learning_rate": 7.80710348777906e-06, "loss": 233.234, "step": 29880 }, { "epoch": 0.5748602035762882, "grad_norm": 441.5589073334, "learning_rate": 7.801149305657609e-06, "loss": 233.0159, "step": 29890 }, { "epoch": 0.5750525288367687, "grad_norm": 409.6369009422606, "learning_rate": 7.795195942571722e-06, "loss": 242.0823, "step": 29900 }, { "epoch": 0.5752448540972492, "grad_norm": 413.24560529311753, "learning_rate": 7.789243400738934e-06, "loss": 231.3054, "step": 29910 }, { "epoch": 0.5754371793577298, "grad_norm": 418.90371337350683, "learning_rate": 7.783291682376465e-06, "loss": 241.4429, "step": 29920 }, { "epoch": 0.5756295046182103, "grad_norm": 433.6583464583209, "learning_rate": 7.77734078970124e-06, "loss": 240.439, "step": 29930 }, { "epoch": 0.5758218298786908, "grad_norm": 447.94753747349483, "learning_rate": 7.77139072492986e-06, "loss": 230.878, "step": 29940 }, { "epoch": 0.5760141551391713, "grad_norm": 422.5523702070567, "learning_rate": 7.765441490278625e-06, "loss": 231.5587, "step": 29950 }, { "epoch": 0.5762064803996519, "grad_norm": 405.55562650065235, "learning_rate": 7.759493087963535e-06, "loss": 235.8433, "step": 29960 }, { "epoch": 0.5763988056601325, "grad_norm": 371.66110667266116, "learning_rate": 7.753545520200264e-06, "loss": 236.9481, "step": 29970 }, { "epoch": 0.576591130920613, "grad_norm": 416.8493988111023, "learning_rate": 7.747598789204183e-06, "loss": 230.5512, "step": 29980 }, { "epoch": 0.5767834561810935, "grad_norm": 447.3445072834849, "learning_rate": 7.741652897190355e-06, "loss": 235.6195, "step": 29990 }, { "epoch": 0.576975781441574, "grad_norm": 401.13914360154604, "learning_rate": 7.735707846373527e-06, "loss": 229.3117, "step": 30000 }, { "epoch": 0.5771681067020545, "grad_norm": 443.54738499440026, "learning_rate": 7.72976363896813e-06, "loss": 233.2199, "step": 30010 }, { "epoch": 0.5773604319625351, "grad_norm": 413.1042957854533, "learning_rate": 7.723820277188278e-06, "loss": 235.5748, "step": 30020 }, { "epoch": 0.5775527572230156, "grad_norm": 423.718236725885, "learning_rate": 7.717877763247787e-06, "loss": 231.1978, "step": 30030 }, { "epoch": 0.5777450824834961, "grad_norm": 455.63615005758646, "learning_rate": 7.711936099360138e-06, "loss": 236.8507, "step": 30040 }, { "epoch": 0.5779374077439766, "grad_norm": 446.42643797268255, "learning_rate": 7.705995287738498e-06, "loss": 231.9344, "step": 30050 }, { "epoch": 0.5781297330044571, "grad_norm": 388.7642847848498, "learning_rate": 7.70005533059573e-06, "loss": 233.6973, "step": 30060 }, { "epoch": 0.5783220582649377, "grad_norm": 421.4703086973421, "learning_rate": 7.694116230144368e-06, "loss": 231.8808, "step": 30070 }, { "epoch": 0.5785143835254182, "grad_norm": 407.19610921994956, "learning_rate": 7.688177988596628e-06, "loss": 234.6228, "step": 30080 }, { "epoch": 0.5787067087858987, "grad_norm": 455.4291490158142, "learning_rate": 7.682240608164401e-06, "loss": 226.4843, "step": 30090 }, { "epoch": 0.5788990340463792, "grad_norm": 441.8403519094396, "learning_rate": 7.676304091059273e-06, "loss": 231.8175, "step": 30100 }, { "epoch": 0.5790913593068597, "grad_norm": 450.8278827515652, "learning_rate": 7.670368439492495e-06, "loss": 232.8311, "step": 30110 }, { "epoch": 0.5792836845673403, "grad_norm": 405.8194780721003, "learning_rate": 7.664433655674995e-06, "loss": 219.8278, "step": 30120 }, { "epoch": 0.5794760098278208, "grad_norm": 427.7900353555768, "learning_rate": 7.658499741817383e-06, "loss": 238.838, "step": 30130 }, { "epoch": 0.5796683350883013, "grad_norm": 419.69001637825784, "learning_rate": 7.65256670012995e-06, "loss": 228.6759, "step": 30140 }, { "epoch": 0.5798606603487818, "grad_norm": 456.5769461953817, "learning_rate": 7.646634532822652e-06, "loss": 233.5072, "step": 30150 }, { "epoch": 0.5800529856092624, "grad_norm": 432.19645898671143, "learning_rate": 7.64070324210512e-06, "loss": 237.3528, "step": 30160 }, { "epoch": 0.580245310869743, "grad_norm": 414.74355142617253, "learning_rate": 7.634772830186668e-06, "loss": 226.0943, "step": 30170 }, { "epoch": 0.5804376361302235, "grad_norm": 463.38022675404983, "learning_rate": 7.628843299276276e-06, "loss": 238.0814, "step": 30180 }, { "epoch": 0.580629961390704, "grad_norm": 458.96808206393257, "learning_rate": 7.62291465158259e-06, "loss": 228.016, "step": 30190 }, { "epoch": 0.5808222866511845, "grad_norm": 388.6820310962333, "learning_rate": 7.616986889313939e-06, "loss": 226.2567, "step": 30200 }, { "epoch": 0.581014611911665, "grad_norm": 420.74530790257467, "learning_rate": 7.611060014678313e-06, "loss": 228.9455, "step": 30210 }, { "epoch": 0.5812069371721456, "grad_norm": 404.46120195011787, "learning_rate": 7.605134029883381e-06, "loss": 231.8223, "step": 30220 }, { "epoch": 0.5813992624326261, "grad_norm": 427.0435056406804, "learning_rate": 7.599208937136465e-06, "loss": 230.0195, "step": 30230 }, { "epoch": 0.5815915876931066, "grad_norm": 492.652091815168, "learning_rate": 7.593284738644574e-06, "loss": 236.554, "step": 30240 }, { "epoch": 0.5817839129535871, "grad_norm": 405.2876325245797, "learning_rate": 7.58736143661437e-06, "loss": 231.29, "step": 30250 }, { "epoch": 0.5819762382140676, "grad_norm": 400.54105191631095, "learning_rate": 7.5814390332521824e-06, "loss": 231.5176, "step": 30260 }, { "epoch": 0.5821685634745482, "grad_norm": 452.9209422209049, "learning_rate": 7.575517530764011e-06, "loss": 237.1046, "step": 30270 }, { "epoch": 0.5823608887350287, "grad_norm": 419.08458295175444, "learning_rate": 7.569596931355517e-06, "loss": 230.7505, "step": 30280 }, { "epoch": 0.5825532139955092, "grad_norm": 404.9006220897538, "learning_rate": 7.56367723723203e-06, "loss": 229.8239, "step": 30290 }, { "epoch": 0.5827455392559897, "grad_norm": 430.54315501259185, "learning_rate": 7.557758450598534e-06, "loss": 234.9123, "step": 30300 }, { "epoch": 0.5829378645164702, "grad_norm": 439.26702337885746, "learning_rate": 7.551840573659677e-06, "loss": 233.0573, "step": 30310 }, { "epoch": 0.5831301897769507, "grad_norm": 424.3910357431075, "learning_rate": 7.5459236086197775e-06, "loss": 235.5535, "step": 30320 }, { "epoch": 0.5833225150374313, "grad_norm": 435.8422419126059, "learning_rate": 7.5400075576828e-06, "loss": 230.3998, "step": 30330 }, { "epoch": 0.5835148402979118, "grad_norm": 467.59057678324507, "learning_rate": 7.534092423052382e-06, "loss": 229.3895, "step": 30340 }, { "epoch": 0.5837071655583923, "grad_norm": 392.83174014064605, "learning_rate": 7.5281782069318075e-06, "loss": 236.8895, "step": 30350 }, { "epoch": 0.5838994908188728, "grad_norm": 408.64504988459606, "learning_rate": 7.522264911524031e-06, "loss": 225.9544, "step": 30360 }, { "epoch": 0.5840918160793535, "grad_norm": 458.60682984834364, "learning_rate": 7.516352539031654e-06, "loss": 231.1042, "step": 30370 }, { "epoch": 0.584284141339834, "grad_norm": 442.10506943256286, "learning_rate": 7.510441091656933e-06, "loss": 230.3113, "step": 30380 }, { "epoch": 0.5844764666003145, "grad_norm": 424.90061971921887, "learning_rate": 7.504530571601792e-06, "loss": 232.1466, "step": 30390 }, { "epoch": 0.584668791860795, "grad_norm": 450.53099398000296, "learning_rate": 7.498620981067799e-06, "loss": 231.1732, "step": 30400 }, { "epoch": 0.5848611171212755, "grad_norm": 444.83171206775637, "learning_rate": 7.492712322256177e-06, "loss": 235.6026, "step": 30410 }, { "epoch": 0.585053442381756, "grad_norm": 408.7054957045418, "learning_rate": 7.486804597367807e-06, "loss": 227.3443, "step": 30420 }, { "epoch": 0.5852457676422366, "grad_norm": 472.02099183170355, "learning_rate": 7.480897808603219e-06, "loss": 227.5788, "step": 30430 }, { "epoch": 0.5854380929027171, "grad_norm": 399.4266232448373, "learning_rate": 7.474991958162594e-06, "loss": 228.8603, "step": 30440 }, { "epoch": 0.5856304181631976, "grad_norm": 424.9655179956938, "learning_rate": 7.469087048245758e-06, "loss": 228.3165, "step": 30450 }, { "epoch": 0.5858227434236781, "grad_norm": 492.2468964577878, "learning_rate": 7.463183081052201e-06, "loss": 246.5162, "step": 30460 }, { "epoch": 0.5860150686841586, "grad_norm": 470.4055813416328, "learning_rate": 7.457280058781049e-06, "loss": 234.8857, "step": 30470 }, { "epoch": 0.5862073939446392, "grad_norm": 391.8138703097309, "learning_rate": 7.451377983631078e-06, "loss": 240.1962, "step": 30480 }, { "epoch": 0.5863997192051197, "grad_norm": 440.67473922789134, "learning_rate": 7.445476857800717e-06, "loss": 232.0519, "step": 30490 }, { "epoch": 0.5865920444656002, "grad_norm": 441.44048588385436, "learning_rate": 7.439576683488039e-06, "loss": 236.3313, "step": 30500 }, { "epoch": 0.5867843697260807, "grad_norm": 413.6776950795708, "learning_rate": 7.4336774628907604e-06, "loss": 233.8738, "step": 30510 }, { "epoch": 0.5869766949865612, "grad_norm": 377.0394250130346, "learning_rate": 7.427779198206238e-06, "loss": 227.6462, "step": 30520 }, { "epoch": 0.5871690202470418, "grad_norm": 417.4955303849164, "learning_rate": 7.421881891631487e-06, "loss": 230.2861, "step": 30530 }, { "epoch": 0.5873613455075223, "grad_norm": 452.8235469494731, "learning_rate": 7.415985545363152e-06, "loss": 227.2522, "step": 30540 }, { "epoch": 0.5875536707680028, "grad_norm": 405.2523553072771, "learning_rate": 7.410090161597523e-06, "loss": 229.1613, "step": 30550 }, { "epoch": 0.5877459960284833, "grad_norm": 430.0011771345925, "learning_rate": 7.404195742530533e-06, "loss": 228.5989, "step": 30560 }, { "epoch": 0.587938321288964, "grad_norm": 439.6467105663855, "learning_rate": 7.398302290357763e-06, "loss": 229.5614, "step": 30570 }, { "epoch": 0.5881306465494445, "grad_norm": 436.81704954605465, "learning_rate": 7.392409807274421e-06, "loss": 226.3135, "step": 30580 }, { "epoch": 0.588322971809925, "grad_norm": 474.10804028632, "learning_rate": 7.386518295475355e-06, "loss": 231.6304, "step": 30590 }, { "epoch": 0.5885152970704055, "grad_norm": 467.3658345242713, "learning_rate": 7.380627757155065e-06, "loss": 233.0394, "step": 30600 }, { "epoch": 0.588707622330886, "grad_norm": 399.486822025666, "learning_rate": 7.374738194507675e-06, "loss": 227.9455, "step": 30610 }, { "epoch": 0.5888999475913665, "grad_norm": 454.2506372473318, "learning_rate": 7.3688496097269494e-06, "loss": 229.979, "step": 30620 }, { "epoch": 0.5890922728518471, "grad_norm": 445.4615069161232, "learning_rate": 7.362962005006286e-06, "loss": 240.3873, "step": 30630 }, { "epoch": 0.5892845981123276, "grad_norm": 402.9525346358271, "learning_rate": 7.3570753825387275e-06, "loss": 223.9229, "step": 30640 }, { "epoch": 0.5894769233728081, "grad_norm": 417.43736079717536, "learning_rate": 7.35118974451694e-06, "loss": 223.1825, "step": 30650 }, { "epoch": 0.5896692486332886, "grad_norm": 426.8909589760677, "learning_rate": 7.345305093133226e-06, "loss": 226.7633, "step": 30660 }, { "epoch": 0.5898615738937691, "grad_norm": 408.00056600101846, "learning_rate": 7.3394214305795175e-06, "loss": 231.6228, "step": 30670 }, { "epoch": 0.5900538991542497, "grad_norm": 418.52003638147767, "learning_rate": 7.33353875904739e-06, "loss": 228.9093, "step": 30680 }, { "epoch": 0.5902462244147302, "grad_norm": 423.2060876939536, "learning_rate": 7.327657080728032e-06, "loss": 226.2645, "step": 30690 }, { "epoch": 0.5904385496752107, "grad_norm": 431.7907857457317, "learning_rate": 7.321776397812279e-06, "loss": 228.9725, "step": 30700 }, { "epoch": 0.5906308749356912, "grad_norm": 426.5910519938674, "learning_rate": 7.315896712490584e-06, "loss": 232.8618, "step": 30710 }, { "epoch": 0.5908232001961717, "grad_norm": 415.5122035914214, "learning_rate": 7.310018026953036e-06, "loss": 230.8512, "step": 30720 }, { "epoch": 0.5910155254566523, "grad_norm": 416.11562562473597, "learning_rate": 7.304140343389348e-06, "loss": 230.4957, "step": 30730 }, { "epoch": 0.5912078507171328, "grad_norm": 413.03003196926915, "learning_rate": 7.298263663988853e-06, "loss": 236.0105, "step": 30740 }, { "epoch": 0.5914001759776133, "grad_norm": 489.76692735114614, "learning_rate": 7.292387990940526e-06, "loss": 221.4764, "step": 30750 }, { "epoch": 0.5915925012380938, "grad_norm": 442.67044421149234, "learning_rate": 7.286513326432953e-06, "loss": 229.0771, "step": 30760 }, { "epoch": 0.5917848264985743, "grad_norm": 434.77302435504646, "learning_rate": 7.2806396726543526e-06, "loss": 239.3977, "step": 30770 }, { "epoch": 0.591977151759055, "grad_norm": 487.6630040877233, "learning_rate": 7.2747670317925625e-06, "loss": 234.1094, "step": 30780 }, { "epoch": 0.5921694770195355, "grad_norm": 530.3969122692031, "learning_rate": 7.268895406035046e-06, "loss": 227.5739, "step": 30790 }, { "epoch": 0.592361802280016, "grad_norm": 449.93679596780515, "learning_rate": 7.263024797568884e-06, "loss": 236.7667, "step": 30800 }, { "epoch": 0.5925541275404965, "grad_norm": 409.5199402061422, "learning_rate": 7.257155208580778e-06, "loss": 228.957, "step": 30810 }, { "epoch": 0.592746452800977, "grad_norm": 423.41739669071717, "learning_rate": 7.251286641257062e-06, "loss": 232.9693, "step": 30820 }, { "epoch": 0.5929387780614576, "grad_norm": 440.0582684109187, "learning_rate": 7.245419097783674e-06, "loss": 238.0952, "step": 30830 }, { "epoch": 0.5931311033219381, "grad_norm": 413.04356947514185, "learning_rate": 7.239552580346181e-06, "loss": 222.2364, "step": 30840 }, { "epoch": 0.5933234285824186, "grad_norm": 454.83446336969416, "learning_rate": 7.233687091129757e-06, "loss": 239.9237, "step": 30850 }, { "epoch": 0.5935157538428991, "grad_norm": 454.8123010774503, "learning_rate": 7.227822632319208e-06, "loss": 240.0741, "step": 30860 }, { "epoch": 0.5937080791033796, "grad_norm": 475.77692664060123, "learning_rate": 7.221959206098945e-06, "loss": 244.5912, "step": 30870 }, { "epoch": 0.5939004043638602, "grad_norm": 453.8827590251814, "learning_rate": 7.216096814652992e-06, "loss": 242.6609, "step": 30880 }, { "epoch": 0.5940927296243407, "grad_norm": 411.61717707446314, "learning_rate": 7.210235460165002e-06, "loss": 231.2817, "step": 30890 }, { "epoch": 0.5942850548848212, "grad_norm": 404.19490631455994, "learning_rate": 7.2043751448182275e-06, "loss": 229.3745, "step": 30900 }, { "epoch": 0.5944773801453017, "grad_norm": 396.702491867281, "learning_rate": 7.198515870795542e-06, "loss": 227.3233, "step": 30910 }, { "epoch": 0.5946697054057822, "grad_norm": 417.1059933929704, "learning_rate": 7.192657640279421e-06, "loss": 235.2362, "step": 30920 }, { "epoch": 0.5948620306662628, "grad_norm": 412.98431677687495, "learning_rate": 7.18680045545197e-06, "loss": 235.2402, "step": 30930 }, { "epoch": 0.5950543559267433, "grad_norm": 522.2108226665455, "learning_rate": 7.180944318494888e-06, "loss": 234.2923, "step": 30940 }, { "epoch": 0.5952466811872238, "grad_norm": 416.30528816933514, "learning_rate": 7.175089231589485e-06, "loss": 234.4751, "step": 30950 }, { "epoch": 0.5954390064477043, "grad_norm": 430.34876434075846, "learning_rate": 7.1692351969166905e-06, "loss": 241.9152, "step": 30960 }, { "epoch": 0.5956313317081848, "grad_norm": 432.6461229166163, "learning_rate": 7.163382216657033e-06, "loss": 227.4343, "step": 30970 }, { "epoch": 0.5958236569686655, "grad_norm": 391.851122062776, "learning_rate": 7.157530292990654e-06, "loss": 228.7255, "step": 30980 }, { "epoch": 0.596015982229146, "grad_norm": 416.6609842411124, "learning_rate": 7.151679428097291e-06, "loss": 221.7838, "step": 30990 }, { "epoch": 0.5962083074896265, "grad_norm": 436.3104190053486, "learning_rate": 7.145829624156304e-06, "loss": 232.5095, "step": 31000 }, { "epoch": 0.596400632750107, "grad_norm": 444.625772468422, "learning_rate": 7.1399808833466445e-06, "loss": 228.7998, "step": 31010 }, { "epoch": 0.5965929580105875, "grad_norm": 408.19194897682854, "learning_rate": 7.134133207846869e-06, "loss": 231.0402, "step": 31020 }, { "epoch": 0.5967852832710681, "grad_norm": 441.69043582928697, "learning_rate": 7.128286599835139e-06, "loss": 235.2315, "step": 31030 }, { "epoch": 0.5969776085315486, "grad_norm": 429.1934329341106, "learning_rate": 7.122441061489228e-06, "loss": 220.99, "step": 31040 }, { "epoch": 0.5971699337920291, "grad_norm": 436.46288146683776, "learning_rate": 7.1165965949864934e-06, "loss": 231.0572, "step": 31050 }, { "epoch": 0.5973622590525096, "grad_norm": 413.3573834085358, "learning_rate": 7.110753202503906e-06, "loss": 229.231, "step": 31060 }, { "epoch": 0.5975545843129901, "grad_norm": 429.7575862430089, "learning_rate": 7.104910886218036e-06, "loss": 231.5436, "step": 31070 }, { "epoch": 0.5977469095734707, "grad_norm": 452.85150154079275, "learning_rate": 7.0990696483050466e-06, "loss": 230.5596, "step": 31080 }, { "epoch": 0.5979392348339512, "grad_norm": 407.79552272283394, "learning_rate": 7.093229490940704e-06, "loss": 231.8558, "step": 31090 }, { "epoch": 0.5981315600944317, "grad_norm": 410.29861285862563, "learning_rate": 7.087390416300364e-06, "loss": 223.9269, "step": 31100 }, { "epoch": 0.5983238853549122, "grad_norm": 423.527518905979, "learning_rate": 7.081552426558995e-06, "loss": 237.3905, "step": 31110 }, { "epoch": 0.5985162106153927, "grad_norm": 416.8528848057666, "learning_rate": 7.075715523891146e-06, "loss": 224.5927, "step": 31120 }, { "epoch": 0.5987085358758732, "grad_norm": 435.14326974674015, "learning_rate": 7.069879710470965e-06, "loss": 226.3886, "step": 31130 }, { "epoch": 0.5989008611363538, "grad_norm": 444.6357715018171, "learning_rate": 7.064044988472204e-06, "loss": 222.7304, "step": 31140 }, { "epoch": 0.5990931863968343, "grad_norm": 435.06287248578263, "learning_rate": 7.058211360068196e-06, "loss": 226.1482, "step": 31150 }, { "epoch": 0.5992855116573148, "grad_norm": 425.1248581785124, "learning_rate": 7.052378827431871e-06, "loss": 231.0841, "step": 31160 }, { "epoch": 0.5994778369177953, "grad_norm": 460.4416805066328, "learning_rate": 7.046547392735747e-06, "loss": 224.6262, "step": 31170 }, { "epoch": 0.5996701621782758, "grad_norm": 447.77334630031237, "learning_rate": 7.040717058151945e-06, "loss": 225.4802, "step": 31180 }, { "epoch": 0.5998624874387565, "grad_norm": 414.48880746781487, "learning_rate": 7.034887825852164e-06, "loss": 229.6972, "step": 31190 }, { "epoch": 0.600054812699237, "grad_norm": 396.412252112278, "learning_rate": 7.029059698007699e-06, "loss": 222.5981, "step": 31200 }, { "epoch": 0.6002471379597175, "grad_norm": 462.33603927648323, "learning_rate": 7.023232676789424e-06, "loss": 238.546, "step": 31210 }, { "epoch": 0.600439463220198, "grad_norm": 465.2785037641388, "learning_rate": 7.01740676436782e-06, "loss": 229.8366, "step": 31220 }, { "epoch": 0.6006317884806786, "grad_norm": 411.1024134751334, "learning_rate": 7.011581962912936e-06, "loss": 232.3649, "step": 31230 }, { "epoch": 0.6008241137411591, "grad_norm": 442.1245248314739, "learning_rate": 7.005758274594412e-06, "loss": 229.8007, "step": 31240 }, { "epoch": 0.6010164390016396, "grad_norm": 415.9724184568621, "learning_rate": 6.999935701581482e-06, "loss": 228.002, "step": 31250 }, { "epoch": 0.6012087642621201, "grad_norm": 422.07573573152905, "learning_rate": 6.9941142460429555e-06, "loss": 229.0496, "step": 31260 }, { "epoch": 0.6014010895226006, "grad_norm": 411.8871133271173, "learning_rate": 6.988293910147229e-06, "loss": 229.6827, "step": 31270 }, { "epoch": 0.6015934147830811, "grad_norm": 438.6845728167455, "learning_rate": 6.982474696062278e-06, "loss": 230.1404, "step": 31280 }, { "epoch": 0.6017857400435617, "grad_norm": 421.96162422719874, "learning_rate": 6.97665660595567e-06, "loss": 229.8157, "step": 31290 }, { "epoch": 0.6019780653040422, "grad_norm": 437.0338238568071, "learning_rate": 6.970839641994545e-06, "loss": 226.9675, "step": 31300 }, { "epoch": 0.6021703905645227, "grad_norm": 429.8338738138326, "learning_rate": 6.965023806345619e-06, "loss": 226.8889, "step": 31310 }, { "epoch": 0.6023627158250032, "grad_norm": 376.2411129408347, "learning_rate": 6.959209101175206e-06, "loss": 225.3643, "step": 31320 }, { "epoch": 0.6025550410854837, "grad_norm": 420.77861340778287, "learning_rate": 6.9533955286491805e-06, "loss": 233.2466, "step": 31330 }, { "epoch": 0.6027473663459643, "grad_norm": 375.10389459286114, "learning_rate": 6.947583090933008e-06, "loss": 222.4511, "step": 31340 }, { "epoch": 0.6029396916064448, "grad_norm": 384.4566297787739, "learning_rate": 6.941771790191716e-06, "loss": 224.1897, "step": 31350 }, { "epoch": 0.6031320168669253, "grad_norm": 422.8293228385227, "learning_rate": 6.9359616285899266e-06, "loss": 230.1716, "step": 31360 }, { "epoch": 0.6033243421274058, "grad_norm": 473.8511483921668, "learning_rate": 6.930152608291829e-06, "loss": 236.4207, "step": 31370 }, { "epoch": 0.6035166673878863, "grad_norm": 409.07980676046975, "learning_rate": 6.924344731461179e-06, "loss": 222.9459, "step": 31380 }, { "epoch": 0.603708992648367, "grad_norm": 404.4186209304979, "learning_rate": 6.918538000261325e-06, "loss": 225.3304, "step": 31390 }, { "epoch": 0.6039013179088475, "grad_norm": 470.42591099954507, "learning_rate": 6.912732416855171e-06, "loss": 232.8654, "step": 31400 }, { "epoch": 0.604093643169328, "grad_norm": 446.1054671043929, "learning_rate": 6.906927983405207e-06, "loss": 234.0471, "step": 31410 }, { "epoch": 0.6042859684298085, "grad_norm": 404.6723379679115, "learning_rate": 6.901124702073481e-06, "loss": 227.3843, "step": 31420 }, { "epoch": 0.604478293690289, "grad_norm": 412.32031400244597, "learning_rate": 6.895322575021628e-06, "loss": 233.438, "step": 31430 }, { "epoch": 0.6046706189507696, "grad_norm": 442.68633185419355, "learning_rate": 6.88952160441084e-06, "loss": 233.5317, "step": 31440 }, { "epoch": 0.6048629442112501, "grad_norm": 428.652802850576, "learning_rate": 6.8837217924018825e-06, "loss": 227.9286, "step": 31450 }, { "epoch": 0.6050552694717306, "grad_norm": 405.59024106209904, "learning_rate": 6.877923141155087e-06, "loss": 234.7568, "step": 31460 }, { "epoch": 0.6052475947322111, "grad_norm": 429.74687944027386, "learning_rate": 6.87212565283036e-06, "loss": 223.7243, "step": 31470 }, { "epoch": 0.6054399199926916, "grad_norm": 413.62916687397916, "learning_rate": 6.8663293295871715e-06, "loss": 224.6487, "step": 31480 }, { "epoch": 0.6056322452531722, "grad_norm": 403.35266000421103, "learning_rate": 6.86053417358455e-06, "loss": 232.3997, "step": 31490 }, { "epoch": 0.6058245705136527, "grad_norm": 418.18742835234315, "learning_rate": 6.854740186981102e-06, "loss": 228.476, "step": 31500 }, { "epoch": 0.6060168957741332, "grad_norm": 413.3692710430336, "learning_rate": 6.848947371934989e-06, "loss": 232.8474, "step": 31510 }, { "epoch": 0.6062092210346137, "grad_norm": 458.7192780655746, "learning_rate": 6.843155730603939e-06, "loss": 234.3798, "step": 31520 }, { "epoch": 0.6064015462950942, "grad_norm": 410.4532868082447, "learning_rate": 6.837365265145237e-06, "loss": 224.8932, "step": 31530 }, { "epoch": 0.6065938715555748, "grad_norm": 441.7061583346476, "learning_rate": 6.831575977715745e-06, "loss": 230.1555, "step": 31540 }, { "epoch": 0.6067861968160553, "grad_norm": 392.5810126768839, "learning_rate": 6.825787870471873e-06, "loss": 225.8656, "step": 31550 }, { "epoch": 0.6069785220765358, "grad_norm": 510.8174711552602, "learning_rate": 6.820000945569592e-06, "loss": 226.7284, "step": 31560 }, { "epoch": 0.6071708473370163, "grad_norm": 479.66101806827083, "learning_rate": 6.814215205164444e-06, "loss": 237.7188, "step": 31570 }, { "epoch": 0.6073631725974968, "grad_norm": 399.1802075372459, "learning_rate": 6.808430651411518e-06, "loss": 225.9488, "step": 31580 }, { "epoch": 0.6075554978579775, "grad_norm": 428.7763591661641, "learning_rate": 6.802647286465461e-06, "loss": 228.123, "step": 31590 }, { "epoch": 0.607747823118458, "grad_norm": 425.208346229042, "learning_rate": 6.796865112480482e-06, "loss": 235.3086, "step": 31600 }, { "epoch": 0.6079401483789385, "grad_norm": 410.9462737738275, "learning_rate": 6.79108413161035e-06, "loss": 230.0048, "step": 31610 }, { "epoch": 0.608132473639419, "grad_norm": 434.71890880773276, "learning_rate": 6.785304346008381e-06, "loss": 229.3023, "step": 31620 }, { "epoch": 0.6083247988998995, "grad_norm": 402.8449693305862, "learning_rate": 6.779525757827452e-06, "loss": 224.3566, "step": 31630 }, { "epoch": 0.6085171241603801, "grad_norm": 423.4436545904073, "learning_rate": 6.773748369219986e-06, "loss": 232.5006, "step": 31640 }, { "epoch": 0.6087094494208606, "grad_norm": 416.3640244341056, "learning_rate": 6.767972182337974e-06, "loss": 236.615, "step": 31650 }, { "epoch": 0.6089017746813411, "grad_norm": 407.73175172586537, "learning_rate": 6.762197199332945e-06, "loss": 227.2565, "step": 31660 }, { "epoch": 0.6090940999418216, "grad_norm": 387.546886934667, "learning_rate": 6.756423422355981e-06, "loss": 223.1566, "step": 31670 }, { "epoch": 0.6092864252023021, "grad_norm": 426.7701797203757, "learning_rate": 6.750650853557728e-06, "loss": 222.9499, "step": 31680 }, { "epoch": 0.6094787504627827, "grad_norm": 401.4472983638562, "learning_rate": 6.744879495088364e-06, "loss": 231.3808, "step": 31690 }, { "epoch": 0.6096710757232632, "grad_norm": 422.555188962367, "learning_rate": 6.7391093490976285e-06, "loss": 220.4079, "step": 31700 }, { "epoch": 0.6098634009837437, "grad_norm": 422.1159712098415, "learning_rate": 6.7333404177348036e-06, "loss": 220.7019, "step": 31710 }, { "epoch": 0.6100557262442242, "grad_norm": 401.1683902465003, "learning_rate": 6.727572703148726e-06, "loss": 223.8618, "step": 31720 }, { "epoch": 0.6102480515047047, "grad_norm": 399.4895379726904, "learning_rate": 6.721806207487769e-06, "loss": 228.9129, "step": 31730 }, { "epoch": 0.6104403767651853, "grad_norm": 410.35629069607234, "learning_rate": 6.716040932899857e-06, "loss": 235.2316, "step": 31740 }, { "epoch": 0.6106327020256658, "grad_norm": 402.15512413169114, "learning_rate": 6.710276881532463e-06, "loss": 228.6174, "step": 31750 }, { "epoch": 0.6108250272861463, "grad_norm": 404.59875865191776, "learning_rate": 6.704514055532597e-06, "loss": 230.2955, "step": 31760 }, { "epoch": 0.6110173525466268, "grad_norm": 444.9015145453593, "learning_rate": 6.698752457046822e-06, "loss": 223.154, "step": 31770 }, { "epoch": 0.6112096778071073, "grad_norm": 436.20073319166994, "learning_rate": 6.692992088221231e-06, "loss": 233.3152, "step": 31780 }, { "epoch": 0.6114020030675879, "grad_norm": 405.4522546906219, "learning_rate": 6.687232951201473e-06, "loss": 222.9069, "step": 31790 }, { "epoch": 0.6115943283280685, "grad_norm": 419.9575646012201, "learning_rate": 6.681475048132729e-06, "loss": 234.9125, "step": 31800 }, { "epoch": 0.611786653588549, "grad_norm": 416.17713050775745, "learning_rate": 6.675718381159719e-06, "loss": 225.6077, "step": 31810 }, { "epoch": 0.6119789788490295, "grad_norm": 418.3594856479087, "learning_rate": 6.6699629524267114e-06, "loss": 224.1185, "step": 31820 }, { "epoch": 0.61217130410951, "grad_norm": 409.5014753040369, "learning_rate": 6.664208764077507e-06, "loss": 230.5952, "step": 31830 }, { "epoch": 0.6123636293699906, "grad_norm": 409.58535223196526, "learning_rate": 6.658455818255445e-06, "loss": 221.038, "step": 31840 }, { "epoch": 0.6125559546304711, "grad_norm": 401.4956852117134, "learning_rate": 6.652704117103401e-06, "loss": 222.4708, "step": 31850 }, { "epoch": 0.6127482798909516, "grad_norm": 408.45207805125233, "learning_rate": 6.646953662763796e-06, "loss": 237.5248, "step": 31860 }, { "epoch": 0.6129406051514321, "grad_norm": 424.513095277039, "learning_rate": 6.6412044573785725e-06, "loss": 227.6676, "step": 31870 }, { "epoch": 0.6131329304119126, "grad_norm": 422.6860046503193, "learning_rate": 6.635456503089217e-06, "loss": 226.0225, "step": 31880 }, { "epoch": 0.6133252556723932, "grad_norm": 457.4857511993487, "learning_rate": 6.6297098020367435e-06, "loss": 242.5258, "step": 31890 }, { "epoch": 0.6135175809328737, "grad_norm": 426.9327896119648, "learning_rate": 6.623964356361707e-06, "loss": 240.1359, "step": 31900 }, { "epoch": 0.6137099061933542, "grad_norm": 430.3015965170402, "learning_rate": 6.618220168204193e-06, "loss": 225.0844, "step": 31910 }, { "epoch": 0.6139022314538347, "grad_norm": 388.5793558755709, "learning_rate": 6.6124772397038115e-06, "loss": 227.3012, "step": 31920 }, { "epoch": 0.6140945567143152, "grad_norm": 436.1814936283576, "learning_rate": 6.606735572999714e-06, "loss": 234.0395, "step": 31930 }, { "epoch": 0.6142868819747958, "grad_norm": 398.50637975193155, "learning_rate": 6.600995170230575e-06, "loss": 218.5695, "step": 31940 }, { "epoch": 0.6144792072352763, "grad_norm": 464.0005666559156, "learning_rate": 6.595256033534598e-06, "loss": 223.3937, "step": 31950 }, { "epoch": 0.6146715324957568, "grad_norm": 413.6983316480439, "learning_rate": 6.589518165049514e-06, "loss": 223.8607, "step": 31960 }, { "epoch": 0.6148638577562373, "grad_norm": 424.0484035368132, "learning_rate": 6.5837815669125906e-06, "loss": 229.2501, "step": 31970 }, { "epoch": 0.6150561830167178, "grad_norm": 399.37052022136805, "learning_rate": 6.5780462412606124e-06, "loss": 217.4186, "step": 31980 }, { "epoch": 0.6152485082771983, "grad_norm": 414.7301268176305, "learning_rate": 6.572312190229895e-06, "loss": 224.6406, "step": 31990 }, { "epoch": 0.615440833537679, "grad_norm": 407.4929905602582, "learning_rate": 6.5665794159562734e-06, "loss": 224.0125, "step": 32000 }, { "epoch": 0.6156331587981595, "grad_norm": 445.25430596855256, "learning_rate": 6.560847920575118e-06, "loss": 220.6891, "step": 32010 }, { "epoch": 0.61582548405864, "grad_norm": 404.02473259242674, "learning_rate": 6.5551177062213126e-06, "loss": 232.0424, "step": 32020 }, { "epoch": 0.6160178093191205, "grad_norm": 401.4137244071718, "learning_rate": 6.5493887750292616e-06, "loss": 224.4488, "step": 32030 }, { "epoch": 0.616210134579601, "grad_norm": 394.8810662544867, "learning_rate": 6.54366112913291e-06, "loss": 234.6445, "step": 32040 }, { "epoch": 0.6164024598400816, "grad_norm": 441.38737658537883, "learning_rate": 6.537934770665701e-06, "loss": 233.0354, "step": 32050 }, { "epoch": 0.6165947851005621, "grad_norm": 433.46738262206486, "learning_rate": 6.532209701760615e-06, "loss": 237.4941, "step": 32060 }, { "epoch": 0.6167871103610426, "grad_norm": 421.3984738399414, "learning_rate": 6.526485924550138e-06, "loss": 232.097, "step": 32070 }, { "epoch": 0.6169794356215231, "grad_norm": 568.8816481180494, "learning_rate": 6.520763441166291e-06, "loss": 230.0154, "step": 32080 }, { "epoch": 0.6171717608820037, "grad_norm": 385.5136898476257, "learning_rate": 6.515042253740601e-06, "loss": 216.763, "step": 32090 }, { "epoch": 0.6173640861424842, "grad_norm": 429.7704586362384, "learning_rate": 6.509322364404112e-06, "loss": 229.7286, "step": 32100 }, { "epoch": 0.6175564114029647, "grad_norm": 397.18492624274734, "learning_rate": 6.503603775287395e-06, "loss": 227.5604, "step": 32110 }, { "epoch": 0.6177487366634452, "grad_norm": 428.32726061371864, "learning_rate": 6.497886488520524e-06, "loss": 222.9204, "step": 32120 }, { "epoch": 0.6179410619239257, "grad_norm": 452.0128461453846, "learning_rate": 6.4921705062331e-06, "loss": 234.3032, "step": 32130 }, { "epoch": 0.6181333871844062, "grad_norm": 410.44888770343476, "learning_rate": 6.486455830554224e-06, "loss": 222.3085, "step": 32140 }, { "epoch": 0.6183257124448868, "grad_norm": 485.15354076366333, "learning_rate": 6.4807424636125285e-06, "loss": 226.4709, "step": 32150 }, { "epoch": 0.6185180377053673, "grad_norm": 430.227140917458, "learning_rate": 6.475030407536141e-06, "loss": 228.9367, "step": 32160 }, { "epoch": 0.6187103629658478, "grad_norm": 400.26791701588655, "learning_rate": 6.469319664452709e-06, "loss": 227.1511, "step": 32170 }, { "epoch": 0.6189026882263283, "grad_norm": 420.3334240601306, "learning_rate": 6.463610236489391e-06, "loss": 221.2679, "step": 32180 }, { "epoch": 0.6190950134868088, "grad_norm": 415.8398682323889, "learning_rate": 6.457902125772854e-06, "loss": 226.6218, "step": 32190 }, { "epoch": 0.6192873387472894, "grad_norm": 491.827731218485, "learning_rate": 6.452195334429277e-06, "loss": 229.5702, "step": 32200 }, { "epoch": 0.61947966400777, "grad_norm": 424.2696377102414, "learning_rate": 6.446489864584341e-06, "loss": 224.0806, "step": 32210 }, { "epoch": 0.6196719892682505, "grad_norm": 397.0421844748927, "learning_rate": 6.440785718363245e-06, "loss": 223.3903, "step": 32220 }, { "epoch": 0.619864314528731, "grad_norm": 418.63420095871084, "learning_rate": 6.435082897890688e-06, "loss": 223.8234, "step": 32230 }, { "epoch": 0.6200566397892115, "grad_norm": 414.6394142402439, "learning_rate": 6.429381405290873e-06, "loss": 223.4642, "step": 32240 }, { "epoch": 0.6202489650496921, "grad_norm": 417.6273198855874, "learning_rate": 6.4236812426875124e-06, "loss": 228.2614, "step": 32250 }, { "epoch": 0.6204412903101726, "grad_norm": 428.1871115389233, "learning_rate": 6.4179824122038244e-06, "loss": 226.9813, "step": 32260 }, { "epoch": 0.6206336155706531, "grad_norm": 459.7291468508094, "learning_rate": 6.412284915962532e-06, "loss": 228.2366, "step": 32270 }, { "epoch": 0.6208259408311336, "grad_norm": 406.90371089574955, "learning_rate": 6.406588756085849e-06, "loss": 222.4472, "step": 32280 }, { "epoch": 0.6210182660916141, "grad_norm": 422.53534585686907, "learning_rate": 6.400893934695514e-06, "loss": 224.6374, "step": 32290 }, { "epoch": 0.6212105913520947, "grad_norm": 424.33570838179855, "learning_rate": 6.395200453912747e-06, "loss": 230.1573, "step": 32300 }, { "epoch": 0.6214029166125752, "grad_norm": 402.7245190674156, "learning_rate": 6.389508315858272e-06, "loss": 228.3169, "step": 32310 }, { "epoch": 0.6215952418730557, "grad_norm": 430.8290670915051, "learning_rate": 6.38381752265232e-06, "loss": 224.8335, "step": 32320 }, { "epoch": 0.6217875671335362, "grad_norm": 470.47378827039034, "learning_rate": 6.378128076414619e-06, "loss": 230.406, "step": 32330 }, { "epoch": 0.6219798923940167, "grad_norm": 431.366732515625, "learning_rate": 6.372439979264393e-06, "loss": 229.6548, "step": 32340 }, { "epoch": 0.6221722176544973, "grad_norm": 414.25317450164516, "learning_rate": 6.3667532333203655e-06, "loss": 221.0754, "step": 32350 }, { "epoch": 0.6223645429149778, "grad_norm": 384.6862073261352, "learning_rate": 6.361067840700747e-06, "loss": 223.1284, "step": 32360 }, { "epoch": 0.6225568681754583, "grad_norm": 403.96040133608227, "learning_rate": 6.355383803523265e-06, "loss": 224.7733, "step": 32370 }, { "epoch": 0.6227491934359388, "grad_norm": 409.6805576624066, "learning_rate": 6.349701123905123e-06, "loss": 225.2049, "step": 32380 }, { "epoch": 0.6229415186964193, "grad_norm": 398.9666378801448, "learning_rate": 6.344019803963021e-06, "loss": 227.3894, "step": 32390 }, { "epoch": 0.6231338439568999, "grad_norm": 417.0033530660901, "learning_rate": 6.338339845813164e-06, "loss": 223.8857, "step": 32400 }, { "epoch": 0.6233261692173805, "grad_norm": 406.4564969629636, "learning_rate": 6.332661251571241e-06, "loss": 222.1794, "step": 32410 }, { "epoch": 0.623518494477861, "grad_norm": 409.63481695095373, "learning_rate": 6.326984023352435e-06, "loss": 229.1074, "step": 32420 }, { "epoch": 0.6237108197383415, "grad_norm": 401.4548811884464, "learning_rate": 6.321308163271413e-06, "loss": 224.6141, "step": 32430 }, { "epoch": 0.623903144998822, "grad_norm": 427.1591386049867, "learning_rate": 6.315633673442349e-06, "loss": 224.6687, "step": 32440 }, { "epoch": 0.6240954702593026, "grad_norm": 414.42085249198607, "learning_rate": 6.309960555978894e-06, "loss": 221.5361, "step": 32450 }, { "epoch": 0.6242877955197831, "grad_norm": 396.68173985942974, "learning_rate": 6.304288812994183e-06, "loss": 222.9928, "step": 32460 }, { "epoch": 0.6244801207802636, "grad_norm": 405.2462982459528, "learning_rate": 6.298618446600856e-06, "loss": 232.3568, "step": 32470 }, { "epoch": 0.6246724460407441, "grad_norm": 408.38965061786035, "learning_rate": 6.292949458911029e-06, "loss": 226.5822, "step": 32480 }, { "epoch": 0.6248647713012246, "grad_norm": 406.85472408021496, "learning_rate": 6.287281852036304e-06, "loss": 219.727, "step": 32490 }, { "epoch": 0.6250570965617052, "grad_norm": 406.3130271794487, "learning_rate": 6.2816156280877675e-06, "loss": 223.7757, "step": 32500 }, { "epoch": 0.6252494218221857, "grad_norm": 368.8070749656955, "learning_rate": 6.275950789176002e-06, "loss": 226.879, "step": 32510 }, { "epoch": 0.6254417470826662, "grad_norm": 399.78400131991526, "learning_rate": 6.270287337411064e-06, "loss": 222.7621, "step": 32520 }, { "epoch": 0.6256340723431467, "grad_norm": 418.7943588434436, "learning_rate": 6.264625274902492e-06, "loss": 229.581, "step": 32530 }, { "epoch": 0.6258263976036272, "grad_norm": 408.46925834619316, "learning_rate": 6.258964603759311e-06, "loss": 231.0025, "step": 32540 }, { "epoch": 0.6260187228641078, "grad_norm": 425.725554948789, "learning_rate": 6.2533053260900345e-06, "loss": 223.7737, "step": 32550 }, { "epoch": 0.6262110481245883, "grad_norm": 379.102124914087, "learning_rate": 6.247647444002644e-06, "loss": 227.2497, "step": 32560 }, { "epoch": 0.6264033733850688, "grad_norm": 410.724015359739, "learning_rate": 6.241990959604607e-06, "loss": 232.2127, "step": 32570 }, { "epoch": 0.6265956986455493, "grad_norm": 389.35958129429366, "learning_rate": 6.2363358750028745e-06, "loss": 225.1557, "step": 32580 }, { "epoch": 0.6267880239060298, "grad_norm": 403.83741678591196, "learning_rate": 6.23068219230387e-06, "loss": 220.9148, "step": 32590 }, { "epoch": 0.6269803491665104, "grad_norm": 449.17386156052123, "learning_rate": 6.2250299136134925e-06, "loss": 221.5463, "step": 32600 }, { "epoch": 0.6271726744269909, "grad_norm": 411.41821537712275, "learning_rate": 6.219379041037128e-06, "loss": 227.4787, "step": 32610 }, { "epoch": 0.6273649996874715, "grad_norm": 849.7831508922266, "learning_rate": 6.213729576679632e-06, "loss": 224.3808, "step": 32620 }, { "epoch": 0.627557324947952, "grad_norm": 403.5203729518764, "learning_rate": 6.208081522645339e-06, "loss": 219.9894, "step": 32630 }, { "epoch": 0.6277496502084325, "grad_norm": 443.87665459898096, "learning_rate": 6.202434881038048e-06, "loss": 224.0993, "step": 32640 }, { "epoch": 0.6279419754689131, "grad_norm": 378.8980746815382, "learning_rate": 6.196789653961048e-06, "loss": 222.3341, "step": 32650 }, { "epoch": 0.6281343007293936, "grad_norm": 375.0334287873214, "learning_rate": 6.191145843517093e-06, "loss": 222.9797, "step": 32660 }, { "epoch": 0.6283266259898741, "grad_norm": 432.9549116883272, "learning_rate": 6.185503451808401e-06, "loss": 226.5957, "step": 32670 }, { "epoch": 0.6285189512503546, "grad_norm": 405.67308004001814, "learning_rate": 6.1798624809366755e-06, "loss": 231.5721, "step": 32680 }, { "epoch": 0.6287112765108351, "grad_norm": 428.6002921642038, "learning_rate": 6.174222933003084e-06, "loss": 226.5769, "step": 32690 }, { "epoch": 0.6289036017713157, "grad_norm": 427.70493777899384, "learning_rate": 6.168584810108269e-06, "loss": 227.2838, "step": 32700 }, { "epoch": 0.6290959270317962, "grad_norm": 451.3863609797896, "learning_rate": 6.162948114352328e-06, "loss": 232.0322, "step": 32710 }, { "epoch": 0.6292882522922767, "grad_norm": 429.05655546731646, "learning_rate": 6.157312847834848e-06, "loss": 225.3299, "step": 32720 }, { "epoch": 0.6294805775527572, "grad_norm": 392.0913172198605, "learning_rate": 6.1516790126548695e-06, "loss": 222.9261, "step": 32730 }, { "epoch": 0.6296729028132377, "grad_norm": 448.13366902461837, "learning_rate": 6.146046610910899e-06, "loss": 226.53, "step": 32740 }, { "epoch": 0.6298652280737183, "grad_norm": 419.7198476188548, "learning_rate": 6.140415644700915e-06, "loss": 235.4107, "step": 32750 }, { "epoch": 0.6300575533341988, "grad_norm": 430.6483742843943, "learning_rate": 6.13478611612236e-06, "loss": 227.4435, "step": 32760 }, { "epoch": 0.6302498785946793, "grad_norm": 384.4994474298553, "learning_rate": 6.129158027272144e-06, "loss": 224.8102, "step": 32770 }, { "epoch": 0.6304422038551598, "grad_norm": 442.829800147313, "learning_rate": 6.123531380246632e-06, "loss": 221.9298, "step": 32780 }, { "epoch": 0.6306345291156403, "grad_norm": 373.9780847542322, "learning_rate": 6.117906177141657e-06, "loss": 227.9078, "step": 32790 }, { "epoch": 0.6308268543761208, "grad_norm": 447.63520768166217, "learning_rate": 6.112282420052518e-06, "loss": 222.5495, "step": 32800 }, { "epoch": 0.6310191796366014, "grad_norm": 396.84245210532055, "learning_rate": 6.106660111073969e-06, "loss": 225.7975, "step": 32810 }, { "epoch": 0.631211504897082, "grad_norm": 403.2168245481589, "learning_rate": 6.101039252300227e-06, "loss": 228.843, "step": 32820 }, { "epoch": 0.6314038301575625, "grad_norm": 404.034799768103, "learning_rate": 6.095419845824971e-06, "loss": 223.8758, "step": 32830 }, { "epoch": 0.631596155418043, "grad_norm": 434.55380619672695, "learning_rate": 6.089801893741338e-06, "loss": 225.8411, "step": 32840 }, { "epoch": 0.6317884806785236, "grad_norm": 416.5709962472005, "learning_rate": 6.084185398141921e-06, "loss": 224.7379, "step": 32850 }, { "epoch": 0.6319808059390041, "grad_norm": 410.53352332312016, "learning_rate": 6.078570361118768e-06, "loss": 227.0122, "step": 32860 }, { "epoch": 0.6321731311994846, "grad_norm": 427.8234096788836, "learning_rate": 6.072956784763393e-06, "loss": 224.9295, "step": 32870 }, { "epoch": 0.6323654564599651, "grad_norm": 387.1536452080818, "learning_rate": 6.067344671166757e-06, "loss": 227.618, "step": 32880 }, { "epoch": 0.6325577817204456, "grad_norm": 431.11970604539044, "learning_rate": 6.061734022419281e-06, "loss": 224.875, "step": 32890 }, { "epoch": 0.6327501069809262, "grad_norm": 378.35286774475384, "learning_rate": 6.056124840610839e-06, "loss": 215.4979, "step": 32900 }, { "epoch": 0.6329424322414067, "grad_norm": 420.8170335069947, "learning_rate": 6.050517127830761e-06, "loss": 225.6952, "step": 32910 }, { "epoch": 0.6331347575018872, "grad_norm": 433.34233228942406, "learning_rate": 6.044910886167825e-06, "loss": 232.8873, "step": 32920 }, { "epoch": 0.6333270827623677, "grad_norm": 434.79753443137196, "learning_rate": 6.03930611771026e-06, "loss": 222.2902, "step": 32930 }, { "epoch": 0.6335194080228482, "grad_norm": 437.6716142386955, "learning_rate": 6.033702824545755e-06, "loss": 219.4517, "step": 32940 }, { "epoch": 0.6337117332833287, "grad_norm": 415.6155024311574, "learning_rate": 6.028101008761445e-06, "loss": 215.7741, "step": 32950 }, { "epoch": 0.6339040585438093, "grad_norm": 413.1861406045952, "learning_rate": 6.022500672443907e-06, "loss": 222.5636, "step": 32960 }, { "epoch": 0.6340963838042898, "grad_norm": 455.1841454265467, "learning_rate": 6.016901817679177e-06, "loss": 229.8508, "step": 32970 }, { "epoch": 0.6342887090647703, "grad_norm": 406.19998642742985, "learning_rate": 6.011304446552741e-06, "loss": 220.1547, "step": 32980 }, { "epoch": 0.6344810343252508, "grad_norm": 408.67321509130966, "learning_rate": 6.005708561149523e-06, "loss": 224.5256, "step": 32990 }, { "epoch": 0.6346733595857313, "grad_norm": 454.1508846177071, "learning_rate": 6.000114163553894e-06, "loss": 227.7325, "step": 33000 }, { "epoch": 0.6348656848462119, "grad_norm": 407.94469387292355, "learning_rate": 5.994521255849684e-06, "loss": 229.7664, "step": 33010 }, { "epoch": 0.6350580101066924, "grad_norm": 383.1222949482725, "learning_rate": 5.988929840120151e-06, "loss": 226.5475, "step": 33020 }, { "epoch": 0.635250335367173, "grad_norm": 430.6352316115025, "learning_rate": 5.983339918448008e-06, "loss": 225.6059, "step": 33030 }, { "epoch": 0.6354426606276535, "grad_norm": 439.07054562109374, "learning_rate": 5.977751492915404e-06, "loss": 220.4708, "step": 33040 }, { "epoch": 0.635634985888134, "grad_norm": 404.56295238553315, "learning_rate": 5.972164565603944e-06, "loss": 224.4825, "step": 33050 }, { "epoch": 0.6358273111486146, "grad_norm": 406.995529099741, "learning_rate": 5.966579138594661e-06, "loss": 231.6047, "step": 33060 }, { "epoch": 0.6360196364090951, "grad_norm": 400.01546417372987, "learning_rate": 5.960995213968033e-06, "loss": 226.5418, "step": 33070 }, { "epoch": 0.6362119616695756, "grad_norm": 419.6904858661481, "learning_rate": 5.955412793803983e-06, "loss": 220.0837, "step": 33080 }, { "epoch": 0.6364042869300561, "grad_norm": 485.6585027234312, "learning_rate": 5.949831880181869e-06, "loss": 237.2516, "step": 33090 }, { "epoch": 0.6365966121905366, "grad_norm": 394.2503466781619, "learning_rate": 5.944252475180487e-06, "loss": 233.9495, "step": 33100 }, { "epoch": 0.6367889374510172, "grad_norm": 395.8595153683831, "learning_rate": 5.938674580878077e-06, "loss": 224.2514, "step": 33110 }, { "epoch": 0.6369812627114977, "grad_norm": 390.84158609300476, "learning_rate": 5.933098199352307e-06, "loss": 228.95, "step": 33120 }, { "epoch": 0.6371735879719782, "grad_norm": 413.4804885818183, "learning_rate": 5.927523332680296e-06, "loss": 228.8225, "step": 33130 }, { "epoch": 0.6373659132324587, "grad_norm": 408.0006026856194, "learning_rate": 5.921949982938583e-06, "loss": 224.0146, "step": 33140 }, { "epoch": 0.6375582384929392, "grad_norm": 437.68887809019344, "learning_rate": 5.916378152203146e-06, "loss": 228.0215, "step": 33150 }, { "epoch": 0.6377505637534198, "grad_norm": 386.16005900095394, "learning_rate": 5.910807842549409e-06, "loss": 222.9319, "step": 33160 }, { "epoch": 0.6379428890139003, "grad_norm": 408.9391249353531, "learning_rate": 5.905239056052212e-06, "loss": 221.9154, "step": 33170 }, { "epoch": 0.6381352142743808, "grad_norm": 429.83712295028107, "learning_rate": 5.8996717947858395e-06, "loss": 224.3552, "step": 33180 }, { "epoch": 0.6383275395348613, "grad_norm": 424.9036419643019, "learning_rate": 5.894106060824005e-06, "loss": 224.208, "step": 33190 }, { "epoch": 0.6385198647953418, "grad_norm": 393.39569978460133, "learning_rate": 5.888541856239854e-06, "loss": 219.0052, "step": 33200 }, { "epoch": 0.6387121900558224, "grad_norm": 403.41128611404923, "learning_rate": 5.882979183105959e-06, "loss": 224.0252, "step": 33210 }, { "epoch": 0.6389045153163029, "grad_norm": 375.57838578237636, "learning_rate": 5.8774180434943184e-06, "loss": 221.4567, "step": 33220 }, { "epoch": 0.6390968405767835, "grad_norm": 400.0832045059267, "learning_rate": 5.871858439476374e-06, "loss": 224.6318, "step": 33230 }, { "epoch": 0.639289165837264, "grad_norm": 403.3555899226912, "learning_rate": 5.86630037312298e-06, "loss": 222.7588, "step": 33240 }, { "epoch": 0.6394814910977445, "grad_norm": 394.8468972266905, "learning_rate": 5.860743846504429e-06, "loss": 227.7456, "step": 33250 }, { "epoch": 0.6396738163582251, "grad_norm": 390.4306289888978, "learning_rate": 5.8551888616904305e-06, "loss": 224.8804, "step": 33260 }, { "epoch": 0.6398661416187056, "grad_norm": 402.6392375588884, "learning_rate": 5.849635420750131e-06, "loss": 233.8076, "step": 33270 }, { "epoch": 0.6400584668791861, "grad_norm": 405.4030079861563, "learning_rate": 5.84408352575209e-06, "loss": 224.683, "step": 33280 }, { "epoch": 0.6402507921396666, "grad_norm": 422.18051938551014, "learning_rate": 5.8385331787642956e-06, "loss": 216.603, "step": 33290 }, { "epoch": 0.6404431174001471, "grad_norm": 414.40698456003713, "learning_rate": 5.8329843818541665e-06, "loss": 225.3564, "step": 33300 }, { "epoch": 0.6406354426606277, "grad_norm": 4053.9080014364586, "learning_rate": 5.827437137088535e-06, "loss": 217.1704, "step": 33310 }, { "epoch": 0.6408277679211082, "grad_norm": 443.65653348116734, "learning_rate": 5.8218914465336585e-06, "loss": 222.8102, "step": 33320 }, { "epoch": 0.6410200931815887, "grad_norm": 407.5381632684021, "learning_rate": 5.816347312255209e-06, "loss": 220.9344, "step": 33330 }, { "epoch": 0.6412124184420692, "grad_norm": 432.8726081584538, "learning_rate": 5.810804736318295e-06, "loss": 218.7138, "step": 33340 }, { "epoch": 0.6414047437025497, "grad_norm": 409.6094247013621, "learning_rate": 5.805263720787426e-06, "loss": 215.6701, "step": 33350 }, { "epoch": 0.6415970689630303, "grad_norm": 421.3961216263699, "learning_rate": 5.799724267726547e-06, "loss": 230.2704, "step": 33360 }, { "epoch": 0.6417893942235108, "grad_norm": 436.8606416476537, "learning_rate": 5.794186379199004e-06, "loss": 224.7854, "step": 33370 }, { "epoch": 0.6419817194839913, "grad_norm": 385.19126170909993, "learning_rate": 5.7886500572675774e-06, "loss": 229.8884, "step": 33380 }, { "epoch": 0.6421740447444718, "grad_norm": 410.08083947779033, "learning_rate": 5.783115303994451e-06, "loss": 228.7794, "step": 33390 }, { "epoch": 0.6423663700049523, "grad_norm": 492.054819820757, "learning_rate": 5.777582121441227e-06, "loss": 227.8315, "step": 33400 }, { "epoch": 0.6425586952654329, "grad_norm": 445.95883835936723, "learning_rate": 5.772050511668931e-06, "loss": 224.836, "step": 33410 }, { "epoch": 0.6427510205259134, "grad_norm": 435.3855244057773, "learning_rate": 5.766520476737993e-06, "loss": 227.5989, "step": 33420 }, { "epoch": 0.642943345786394, "grad_norm": 389.5184874367246, "learning_rate": 5.760992018708253e-06, "loss": 221.791, "step": 33430 }, { "epoch": 0.6431356710468745, "grad_norm": 414.25805197352906, "learning_rate": 5.755465139638983e-06, "loss": 212.4072, "step": 33440 }, { "epoch": 0.643327996307355, "grad_norm": 454.88337487877595, "learning_rate": 5.749939841588846e-06, "loss": 233.1634, "step": 33450 }, { "epoch": 0.6435203215678356, "grad_norm": 394.29976695267965, "learning_rate": 5.744416126615926e-06, "loss": 222.2909, "step": 33460 }, { "epoch": 0.6437126468283161, "grad_norm": 420.64409327834727, "learning_rate": 5.738893996777713e-06, "loss": 227.7246, "step": 33470 }, { "epoch": 0.6439049720887966, "grad_norm": 419.37745056673526, "learning_rate": 5.7333734541311144e-06, "loss": 218.7318, "step": 33480 }, { "epoch": 0.6440972973492771, "grad_norm": 475.780322785805, "learning_rate": 5.727854500732435e-06, "loss": 227.803, "step": 33490 }, { "epoch": 0.6442896226097576, "grad_norm": 376.13930068645567, "learning_rate": 5.722337138637402e-06, "loss": 218.5222, "step": 33500 }, { "epoch": 0.6444819478702382, "grad_norm": 389.52465494080644, "learning_rate": 5.716821369901131e-06, "loss": 221.9775, "step": 33510 }, { "epoch": 0.6446742731307187, "grad_norm": 417.33202921995843, "learning_rate": 5.711307196578166e-06, "loss": 218.3189, "step": 33520 }, { "epoch": 0.6448665983911992, "grad_norm": 384.08840911503916, "learning_rate": 5.70579462072244e-06, "loss": 221.0843, "step": 33530 }, { "epoch": 0.6450589236516797, "grad_norm": 422.5164232195794, "learning_rate": 5.700283644387292e-06, "loss": 223.5748, "step": 33540 }, { "epoch": 0.6452512489121602, "grad_norm": 394.31213942669797, "learning_rate": 5.6947742696254794e-06, "loss": 227.8395, "step": 33550 }, { "epoch": 0.6454435741726408, "grad_norm": 388.4902837593827, "learning_rate": 5.689266498489148e-06, "loss": 223.9905, "step": 33560 }, { "epoch": 0.6456358994331213, "grad_norm": 389.7127812151404, "learning_rate": 5.683760333029851e-06, "loss": 227.3252, "step": 33570 }, { "epoch": 0.6458282246936018, "grad_norm": 404.24183921875766, "learning_rate": 5.678255775298542e-06, "loss": 219.1205, "step": 33580 }, { "epoch": 0.6460205499540823, "grad_norm": 479.49408614188127, "learning_rate": 5.672752827345584e-06, "loss": 219.6218, "step": 33590 }, { "epoch": 0.6462128752145628, "grad_norm": 415.62002185370955, "learning_rate": 5.667251491220731e-06, "loss": 215.895, "step": 33600 }, { "epoch": 0.6464052004750434, "grad_norm": 434.1394585893389, "learning_rate": 5.661751768973136e-06, "loss": 221.4117, "step": 33610 }, { "epoch": 0.6465975257355239, "grad_norm": 493.94627005401446, "learning_rate": 5.656253662651362e-06, "loss": 227.1449, "step": 33620 }, { "epoch": 0.6467898509960044, "grad_norm": 425.0606242239906, "learning_rate": 5.650757174303356e-06, "loss": 216.2708, "step": 33630 }, { "epoch": 0.646982176256485, "grad_norm": 443.4465030931299, "learning_rate": 5.645262305976476e-06, "loss": 224.5514, "step": 33640 }, { "epoch": 0.6471745015169655, "grad_norm": 408.6957233228005, "learning_rate": 5.639769059717462e-06, "loss": 219.8099, "step": 33650 }, { "epoch": 0.6473668267774461, "grad_norm": 486.7217919823491, "learning_rate": 5.634277437572466e-06, "loss": 222.9049, "step": 33660 }, { "epoch": 0.6475591520379266, "grad_norm": 450.18604970929465, "learning_rate": 5.6287874415870225e-06, "loss": 223.3881, "step": 33670 }, { "epoch": 0.6477514772984071, "grad_norm": 423.56262269041434, "learning_rate": 5.623299073806063e-06, "loss": 219.3294, "step": 33680 }, { "epoch": 0.6479438025588876, "grad_norm": 395.5691473922273, "learning_rate": 5.617812336273912e-06, "loss": 223.5912, "step": 33690 }, { "epoch": 0.6481361278193681, "grad_norm": 421.742192253228, "learning_rate": 5.612327231034296e-06, "loss": 218.6008, "step": 33700 }, { "epoch": 0.6483284530798487, "grad_norm": 390.25443910740637, "learning_rate": 5.606843760130321e-06, "loss": 219.9142, "step": 33710 }, { "epoch": 0.6485207783403292, "grad_norm": 430.8700159313394, "learning_rate": 5.601361925604485e-06, "loss": 221.7454, "step": 33720 }, { "epoch": 0.6487131036008097, "grad_norm": 436.781275015363, "learning_rate": 5.595881729498691e-06, "loss": 217.4805, "step": 33730 }, { "epoch": 0.6489054288612902, "grad_norm": 408.3330101282725, "learning_rate": 5.590403173854215e-06, "loss": 224.3699, "step": 33740 }, { "epoch": 0.6490977541217707, "grad_norm": 388.9927700273765, "learning_rate": 5.584926260711732e-06, "loss": 224.5737, "step": 33750 }, { "epoch": 0.6492900793822513, "grad_norm": 385.55469692825466, "learning_rate": 5.579450992111294e-06, "loss": 225.4747, "step": 33760 }, { "epoch": 0.6494824046427318, "grad_norm": 375.76089468010906, "learning_rate": 5.573977370092358e-06, "loss": 216.5367, "step": 33770 }, { "epoch": 0.6496747299032123, "grad_norm": 421.7683933429201, "learning_rate": 5.568505396693749e-06, "loss": 224.6679, "step": 33780 }, { "epoch": 0.6498670551636928, "grad_norm": 424.2931956692947, "learning_rate": 5.563035073953691e-06, "loss": 228.2854, "step": 33790 }, { "epoch": 0.6500593804241733, "grad_norm": 428.7432092626859, "learning_rate": 5.557566403909794e-06, "loss": 216.1636, "step": 33800 }, { "epoch": 0.6502517056846538, "grad_norm": 410.74148038217317, "learning_rate": 5.552099388599042e-06, "loss": 224.7191, "step": 33810 }, { "epoch": 0.6504440309451344, "grad_norm": 434.5821354657664, "learning_rate": 5.5466340300578095e-06, "loss": 217.314, "step": 33820 }, { "epoch": 0.6506363562056149, "grad_norm": 409.0372763908047, "learning_rate": 5.541170330321845e-06, "loss": 220.3839, "step": 33830 }, { "epoch": 0.6508286814660955, "grad_norm": 366.64841615178324, "learning_rate": 5.535708291426297e-06, "loss": 222.9088, "step": 33840 }, { "epoch": 0.651021006726576, "grad_norm": 397.5235155205105, "learning_rate": 5.53024791540568e-06, "loss": 218.9031, "step": 33850 }, { "epoch": 0.6512133319870566, "grad_norm": 386.4828244785691, "learning_rate": 5.5247892042938944e-06, "loss": 216.198, "step": 33860 }, { "epoch": 0.6514056572475371, "grad_norm": 404.84116459039456, "learning_rate": 5.519332160124215e-06, "loss": 221.2257, "step": 33870 }, { "epoch": 0.6515979825080176, "grad_norm": 430.55704155310667, "learning_rate": 5.513876784929311e-06, "loss": 216.755, "step": 33880 }, { "epoch": 0.6517903077684981, "grad_norm": 412.7398547381667, "learning_rate": 5.5084230807412135e-06, "loss": 215.7313, "step": 33890 }, { "epoch": 0.6519826330289786, "grad_norm": 450.4316455206422, "learning_rate": 5.502971049591332e-06, "loss": 227.2195, "step": 33900 }, { "epoch": 0.6521749582894592, "grad_norm": 388.22484360431287, "learning_rate": 5.497520693510469e-06, "loss": 221.7378, "step": 33910 }, { "epoch": 0.6523672835499397, "grad_norm": 380.15952453851884, "learning_rate": 5.492072014528783e-06, "loss": 224.4312, "step": 33920 }, { "epoch": 0.6525596088104202, "grad_norm": 416.64682782605706, "learning_rate": 5.4866250146758235e-06, "loss": 221.9234, "step": 33930 }, { "epoch": 0.6527519340709007, "grad_norm": 412.32675595857177, "learning_rate": 5.481179695980503e-06, "loss": 222.2215, "step": 33940 }, { "epoch": 0.6529442593313812, "grad_norm": 370.1401697001076, "learning_rate": 5.475736060471117e-06, "loss": 226.2021, "step": 33950 }, { "epoch": 0.6531365845918617, "grad_norm": 406.84148359695627, "learning_rate": 5.470294110175329e-06, "loss": 222.4338, "step": 33960 }, { "epoch": 0.6533289098523423, "grad_norm": 380.2284682536495, "learning_rate": 5.464853847120169e-06, "loss": 216.6895, "step": 33970 }, { "epoch": 0.6535212351128228, "grad_norm": 408.10588551318745, "learning_rate": 5.459415273332056e-06, "loss": 222.4578, "step": 33980 }, { "epoch": 0.6537135603733033, "grad_norm": 422.16612821356006, "learning_rate": 5.453978390836763e-06, "loss": 215.5359, "step": 33990 }, { "epoch": 0.6539058856337838, "grad_norm": 383.1771770680644, "learning_rate": 5.44854320165944e-06, "loss": 227.0804, "step": 34000 }, { "epoch": 0.6540982108942643, "grad_norm": 406.17556187166747, "learning_rate": 5.443109707824599e-06, "loss": 227.7723, "step": 34010 }, { "epoch": 0.6542905361547449, "grad_norm": 397.91469372714204, "learning_rate": 5.437677911356137e-06, "loss": 220.6621, "step": 34020 }, { "epoch": 0.6544828614152254, "grad_norm": 433.8150369675571, "learning_rate": 5.432247814277305e-06, "loss": 215.9695, "step": 34030 }, { "epoch": 0.6546751866757059, "grad_norm": 445.71808778097335, "learning_rate": 5.426819418610718e-06, "loss": 226.9019, "step": 34040 }, { "epoch": 0.6548675119361865, "grad_norm": 427.87873663873904, "learning_rate": 5.4213927263783725e-06, "loss": 215.9779, "step": 34050 }, { "epoch": 0.655059837196667, "grad_norm": 420.4167125671266, "learning_rate": 5.415967739601616e-06, "loss": 214.3531, "step": 34060 }, { "epoch": 0.6552521624571476, "grad_norm": 408.00528417635167, "learning_rate": 5.41054446030117e-06, "loss": 220.8566, "step": 34070 }, { "epoch": 0.6554444877176281, "grad_norm": 460.8509257662566, "learning_rate": 5.405122890497114e-06, "loss": 229.1481, "step": 34080 }, { "epoch": 0.6556368129781086, "grad_norm": 389.10711964885326, "learning_rate": 5.399703032208896e-06, "loss": 221.9489, "step": 34090 }, { "epoch": 0.6558291382385891, "grad_norm": 374.6333246079594, "learning_rate": 5.3942848874553235e-06, "loss": 219.252, "step": 34100 }, { "epoch": 0.6560214634990696, "grad_norm": 436.0036236312717, "learning_rate": 5.388868458254565e-06, "loss": 213.0718, "step": 34110 }, { "epoch": 0.6562137887595502, "grad_norm": 429.8354606863401, "learning_rate": 5.3834537466241455e-06, "loss": 218.5837, "step": 34120 }, { "epoch": 0.6564061140200307, "grad_norm": 424.2748165593473, "learning_rate": 5.378040754580964e-06, "loss": 213.8902, "step": 34130 }, { "epoch": 0.6565984392805112, "grad_norm": 406.9017677770473, "learning_rate": 5.37262948414127e-06, "loss": 222.2124, "step": 34140 }, { "epoch": 0.6567907645409917, "grad_norm": 434.01457954406874, "learning_rate": 5.367219937320663e-06, "loss": 215.3664, "step": 34150 }, { "epoch": 0.6569830898014722, "grad_norm": 412.75081737851093, "learning_rate": 5.361812116134122e-06, "loss": 219.1794, "step": 34160 }, { "epoch": 0.6571754150619528, "grad_norm": 415.4118554333351, "learning_rate": 5.356406022595963e-06, "loss": 224.351, "step": 34170 }, { "epoch": 0.6573677403224333, "grad_norm": 429.41077008673665, "learning_rate": 5.351001658719872e-06, "loss": 222.8747, "step": 34180 }, { "epoch": 0.6575600655829138, "grad_norm": 369.8348048500022, "learning_rate": 5.345599026518877e-06, "loss": 221.3077, "step": 34190 }, { "epoch": 0.6577523908433943, "grad_norm": 426.6929621375697, "learning_rate": 5.3401981280053745e-06, "loss": 228.286, "step": 34200 }, { "epoch": 0.6579447161038748, "grad_norm": 385.2522102795723, "learning_rate": 5.334798965191115e-06, "loss": 220.0016, "step": 34210 }, { "epoch": 0.6581370413643554, "grad_norm": 414.4667890306486, "learning_rate": 5.329401540087188e-06, "loss": 216.608, "step": 34220 }, { "epoch": 0.6583293666248359, "grad_norm": 386.13409626904894, "learning_rate": 5.3240058547040525e-06, "loss": 213.6617, "step": 34230 }, { "epoch": 0.6585216918853164, "grad_norm": 395.2595710193424, "learning_rate": 5.318611911051512e-06, "loss": 217.3819, "step": 34240 }, { "epoch": 0.658714017145797, "grad_norm": 382.1077538938697, "learning_rate": 5.313219711138717e-06, "loss": 220.277, "step": 34250 }, { "epoch": 0.6589063424062775, "grad_norm": 410.2347197492905, "learning_rate": 5.307829256974173e-06, "loss": 215.7177, "step": 34260 }, { "epoch": 0.6590986676667581, "grad_norm": 408.4589098291733, "learning_rate": 5.302440550565739e-06, "loss": 210.0758, "step": 34270 }, { "epoch": 0.6592909929272386, "grad_norm": 389.65221267945395, "learning_rate": 5.297053593920618e-06, "loss": 212.2247, "step": 34280 }, { "epoch": 0.6594833181877191, "grad_norm": 384.61484155685713, "learning_rate": 5.291668389045362e-06, "loss": 218.25, "step": 34290 }, { "epoch": 0.6596756434481996, "grad_norm": 409.0366665702512, "learning_rate": 5.286284937945866e-06, "loss": 220.1443, "step": 34300 }, { "epoch": 0.6598679687086801, "grad_norm": 407.88290789083067, "learning_rate": 5.280903242627384e-06, "loss": 223.4864, "step": 34310 }, { "epoch": 0.6600602939691607, "grad_norm": 628.1510297138167, "learning_rate": 5.2755233050945076e-06, "loss": 221.4672, "step": 34320 }, { "epoch": 0.6602526192296412, "grad_norm": 427.69772756075423, "learning_rate": 5.270145127351168e-06, "loss": 226.2193, "step": 34330 }, { "epoch": 0.6604449444901217, "grad_norm": 434.90129355321193, "learning_rate": 5.264768711400656e-06, "loss": 219.5553, "step": 34340 }, { "epoch": 0.6606372697506022, "grad_norm": 399.23687171864344, "learning_rate": 5.25939405924559e-06, "loss": 221.3278, "step": 34350 }, { "epoch": 0.6608295950110827, "grad_norm": 411.3223621323265, "learning_rate": 5.254021172887947e-06, "loss": 226.4097, "step": 34360 }, { "epoch": 0.6610219202715633, "grad_norm": 400.7998974581744, "learning_rate": 5.248650054329032e-06, "loss": 227.0978, "step": 34370 }, { "epoch": 0.6612142455320438, "grad_norm": 399.3956126061256, "learning_rate": 5.2432807055695035e-06, "loss": 226.1593, "step": 34380 }, { "epoch": 0.6614065707925243, "grad_norm": 431.821138441697, "learning_rate": 5.237913128609352e-06, "loss": 221.4863, "step": 34390 }, { "epoch": 0.6615988960530048, "grad_norm": 387.98993645508483, "learning_rate": 5.232547325447908e-06, "loss": 228.9696, "step": 34400 }, { "epoch": 0.6617912213134853, "grad_norm": 455.2606264976643, "learning_rate": 5.227183298083854e-06, "loss": 219.1108, "step": 34410 }, { "epoch": 0.6619835465739659, "grad_norm": 434.27210616795634, "learning_rate": 5.221821048515193e-06, "loss": 220.9553, "step": 34420 }, { "epoch": 0.6621758718344464, "grad_norm": 405.2208327967891, "learning_rate": 5.216460578739278e-06, "loss": 217.0133, "step": 34430 }, { "epoch": 0.6623681970949269, "grad_norm": 417.8550831398497, "learning_rate": 5.211101890752792e-06, "loss": 216.4514, "step": 34440 }, { "epoch": 0.6625605223554074, "grad_norm": 406.3149397970147, "learning_rate": 5.205744986551763e-06, "loss": 217.8611, "step": 34450 }, { "epoch": 0.662752847615888, "grad_norm": 412.35480112393446, "learning_rate": 5.200389868131547e-06, "loss": 227.1226, "step": 34460 }, { "epoch": 0.6629451728763686, "grad_norm": 390.8784820652284, "learning_rate": 5.195036537486833e-06, "loss": 225.5386, "step": 34470 }, { "epoch": 0.6631374981368491, "grad_norm": 410.0052035346963, "learning_rate": 5.189684996611657e-06, "loss": 225.1919, "step": 34480 }, { "epoch": 0.6633298233973296, "grad_norm": 371.4325178548429, "learning_rate": 5.18433524749937e-06, "loss": 216.1162, "step": 34490 }, { "epoch": 0.6635221486578101, "grad_norm": 474.5518626843271, "learning_rate": 5.178987292142674e-06, "loss": 231.3805, "step": 34500 }, { "epoch": 0.6637144739182906, "grad_norm": 389.5231699921982, "learning_rate": 5.173641132533586e-06, "loss": 219.1944, "step": 34510 }, { "epoch": 0.6639067991787712, "grad_norm": 390.4161380012965, "learning_rate": 5.16829677066347e-06, "loss": 226.4411, "step": 34520 }, { "epoch": 0.6640991244392517, "grad_norm": 426.3713495748812, "learning_rate": 5.16295420852301e-06, "loss": 218.0286, "step": 34530 }, { "epoch": 0.6642914496997322, "grad_norm": 392.93237883825964, "learning_rate": 5.157613448102219e-06, "loss": 222.1857, "step": 34540 }, { "epoch": 0.6644837749602127, "grad_norm": 367.1567326154693, "learning_rate": 5.152274491390441e-06, "loss": 220.2064, "step": 34550 }, { "epoch": 0.6646761002206932, "grad_norm": 414.91270187754816, "learning_rate": 5.1469373403763555e-06, "loss": 222.7994, "step": 34560 }, { "epoch": 0.6648684254811738, "grad_norm": 391.92457518505773, "learning_rate": 5.14160199704796e-06, "loss": 218.081, "step": 34570 }, { "epoch": 0.6650607507416543, "grad_norm": 440.6149088282293, "learning_rate": 5.136268463392578e-06, "loss": 217.0255, "step": 34580 }, { "epoch": 0.6652530760021348, "grad_norm": 428.32323036007244, "learning_rate": 5.13093674139687e-06, "loss": 219.5003, "step": 34590 }, { "epoch": 0.6654454012626153, "grad_norm": 386.524370795735, "learning_rate": 5.12560683304681e-06, "loss": 215.506, "step": 34600 }, { "epoch": 0.6656377265230958, "grad_norm": 389.87164413735854, "learning_rate": 5.120278740327702e-06, "loss": 217.1482, "step": 34610 }, { "epoch": 0.6658300517835763, "grad_norm": 391.91386671069597, "learning_rate": 5.114952465224168e-06, "loss": 219.4571, "step": 34620 }, { "epoch": 0.6660223770440569, "grad_norm": 419.52780364454793, "learning_rate": 5.109628009720162e-06, "loss": 226.8344, "step": 34630 }, { "epoch": 0.6662147023045374, "grad_norm": 456.95884508220956, "learning_rate": 5.104305375798958e-06, "loss": 221.8309, "step": 34640 }, { "epoch": 0.6664070275650179, "grad_norm": 418.5649634874358, "learning_rate": 5.098984565443148e-06, "loss": 224.2618, "step": 34650 }, { "epoch": 0.6665993528254985, "grad_norm": 435.9904573132652, "learning_rate": 5.093665580634639e-06, "loss": 215.8238, "step": 34660 }, { "epoch": 0.6667916780859791, "grad_norm": 410.1478997836662, "learning_rate": 5.088348423354674e-06, "loss": 215.894, "step": 34670 }, { "epoch": 0.6669840033464596, "grad_norm": 399.34382371602294, "learning_rate": 5.0830330955838045e-06, "loss": 223.2651, "step": 34680 }, { "epoch": 0.6671763286069401, "grad_norm": 408.08863746996576, "learning_rate": 5.077719599301895e-06, "loss": 220.5595, "step": 34690 }, { "epoch": 0.6673686538674206, "grad_norm": 475.0752896157902, "learning_rate": 5.072407936488145e-06, "loss": 233.0844, "step": 34700 }, { "epoch": 0.6675609791279011, "grad_norm": 383.36865412071796, "learning_rate": 5.067098109121058e-06, "loss": 220.24, "step": 34710 }, { "epoch": 0.6677533043883817, "grad_norm": 397.5417620210139, "learning_rate": 5.0617901191784536e-06, "loss": 220.2389, "step": 34720 }, { "epoch": 0.6679456296488622, "grad_norm": 400.3219953075971, "learning_rate": 5.05648396863747e-06, "loss": 219.4802, "step": 34730 }, { "epoch": 0.6681379549093427, "grad_norm": 427.2479469307242, "learning_rate": 5.051179659474568e-06, "loss": 222.0553, "step": 34740 }, { "epoch": 0.6683302801698232, "grad_norm": 405.8582281354667, "learning_rate": 5.045877193665508e-06, "loss": 221.493, "step": 34750 }, { "epoch": 0.6685226054303037, "grad_norm": 434.3289414696171, "learning_rate": 5.040576573185372e-06, "loss": 219.562, "step": 34760 }, { "epoch": 0.6687149306907842, "grad_norm": 428.99912820381775, "learning_rate": 5.0352778000085565e-06, "loss": 217.7414, "step": 34770 }, { "epoch": 0.6689072559512648, "grad_norm": 379.5355414447412, "learning_rate": 5.029980876108762e-06, "loss": 221.4646, "step": 34780 }, { "epoch": 0.6690995812117453, "grad_norm": 371.29290585646544, "learning_rate": 5.024685803459013e-06, "loss": 214.9381, "step": 34790 }, { "epoch": 0.6692919064722258, "grad_norm": 370.2864344194036, "learning_rate": 5.019392584031628e-06, "loss": 213.0196, "step": 34800 }, { "epoch": 0.6694842317327063, "grad_norm": 432.81366640852184, "learning_rate": 5.0141012197982534e-06, "loss": 227.414, "step": 34810 }, { "epoch": 0.6696765569931868, "grad_norm": 421.46565519766983, "learning_rate": 5.0088117127298285e-06, "loss": 217.3553, "step": 34820 }, { "epoch": 0.6698688822536674, "grad_norm": 384.9203523314455, "learning_rate": 5.003524064796608e-06, "loss": 220.0766, "step": 34830 }, { "epoch": 0.6700612075141479, "grad_norm": 401.2230869106227, "learning_rate": 4.998238277968149e-06, "loss": 220.3158, "step": 34840 }, { "epoch": 0.6702535327746284, "grad_norm": 399.2223901244871, "learning_rate": 4.992954354213331e-06, "loss": 215.849, "step": 34850 }, { "epoch": 0.6704458580351089, "grad_norm": 456.7888903253584, "learning_rate": 4.98767229550032e-06, "loss": 224.2743, "step": 34860 }, { "epoch": 0.6706381832955896, "grad_norm": 415.7765312303501, "learning_rate": 4.982392103796595e-06, "loss": 216.1302, "step": 34870 }, { "epoch": 0.6708305085560701, "grad_norm": 422.25682451052126, "learning_rate": 4.977113781068945e-06, "loss": 224.4988, "step": 34880 }, { "epoch": 0.6710228338165506, "grad_norm": 387.5028416458855, "learning_rate": 4.971837329283458e-06, "loss": 222.0692, "step": 34890 }, { "epoch": 0.6712151590770311, "grad_norm": 377.7802855481398, "learning_rate": 4.966562750405517e-06, "loss": 227.673, "step": 34900 }, { "epoch": 0.6714074843375116, "grad_norm": 424.1443555863365, "learning_rate": 4.9612900463998274e-06, "loss": 218.8016, "step": 34910 }, { "epoch": 0.6715998095979921, "grad_norm": 445.07863344730316, "learning_rate": 4.9560192192303735e-06, "loss": 227.9261, "step": 34920 }, { "epoch": 0.6717921348584727, "grad_norm": 419.1258770683227, "learning_rate": 4.95075027086046e-06, "loss": 216.7835, "step": 34930 }, { "epoch": 0.6719844601189532, "grad_norm": 425.4784161543398, "learning_rate": 4.9454832032526755e-06, "loss": 225.8217, "step": 34940 }, { "epoch": 0.6721767853794337, "grad_norm": 410.22499642384236, "learning_rate": 4.940218018368924e-06, "loss": 220.4243, "step": 34950 }, { "epoch": 0.6723691106399142, "grad_norm": 381.72086856082564, "learning_rate": 4.934954718170396e-06, "loss": 218.1714, "step": 34960 }, { "epoch": 0.6725614359003947, "grad_norm": 416.010977343979, "learning_rate": 4.9296933046175834e-06, "loss": 214.2865, "step": 34970 }, { "epoch": 0.6727537611608753, "grad_norm": 402.40914639630927, "learning_rate": 4.924433779670271e-06, "loss": 214.6974, "step": 34980 }, { "epoch": 0.6729460864213558, "grad_norm": 391.17333376277037, "learning_rate": 4.9191761452875554e-06, "loss": 227.203, "step": 34990 }, { "epoch": 0.6731384116818363, "grad_norm": 404.8483203612921, "learning_rate": 4.913920403427812e-06, "loss": 217.7973, "step": 35000 }, { "epoch": 0.6733307369423168, "grad_norm": 445.71152017928637, "learning_rate": 4.908666556048719e-06, "loss": 215.3907, "step": 35010 }, { "epoch": 0.6735230622027973, "grad_norm": 406.2545068423606, "learning_rate": 4.903414605107244e-06, "loss": 217.489, "step": 35020 }, { "epoch": 0.6737153874632779, "grad_norm": 390.5927867242238, "learning_rate": 4.89816455255966e-06, "loss": 219.0073, "step": 35030 }, { "epoch": 0.6739077127237584, "grad_norm": 407.93375759457336, "learning_rate": 4.892916400361516e-06, "loss": 220.319, "step": 35040 }, { "epoch": 0.6741000379842389, "grad_norm": 397.9938323002778, "learning_rate": 4.887670150467671e-06, "loss": 218.9028, "step": 35050 }, { "epoch": 0.6742923632447194, "grad_norm": 378.5453163839792, "learning_rate": 4.882425804832258e-06, "loss": 218.4058, "step": 35060 }, { "epoch": 0.6744846885052, "grad_norm": 1701.0645554894036, "learning_rate": 4.8771833654087165e-06, "loss": 219.2215, "step": 35070 }, { "epoch": 0.6746770137656806, "grad_norm": 421.12677006161465, "learning_rate": 4.8719428341497665e-06, "loss": 219.8677, "step": 35080 }, { "epoch": 0.6748693390261611, "grad_norm": 403.05812729571164, "learning_rate": 4.866704213007413e-06, "loss": 222.7858, "step": 35090 }, { "epoch": 0.6750616642866416, "grad_norm": 400.47874375477545, "learning_rate": 4.861467503932965e-06, "loss": 220.94, "step": 35100 }, { "epoch": 0.6752539895471221, "grad_norm": 371.1653158650625, "learning_rate": 4.856232708877007e-06, "loss": 213.8064, "step": 35110 }, { "epoch": 0.6754463148076026, "grad_norm": 449.53977222491653, "learning_rate": 4.8509998297894075e-06, "loss": 225.4592, "step": 35120 }, { "epoch": 0.6756386400680832, "grad_norm": 425.94483782608006, "learning_rate": 4.845768868619338e-06, "loss": 219.3791, "step": 35130 }, { "epoch": 0.6758309653285637, "grad_norm": 426.54675977469947, "learning_rate": 4.8405398273152405e-06, "loss": 224.3054, "step": 35140 }, { "epoch": 0.6760232905890442, "grad_norm": 410.1514685033484, "learning_rate": 4.8353127078248455e-06, "loss": 228.5359, "step": 35150 }, { "epoch": 0.6762156158495247, "grad_norm": 404.99268776020017, "learning_rate": 4.830087512095164e-06, "loss": 215.9931, "step": 35160 }, { "epoch": 0.6764079411100052, "grad_norm": 396.35087915909986, "learning_rate": 4.824864242072506e-06, "loss": 216.081, "step": 35170 }, { "epoch": 0.6766002663704858, "grad_norm": 429.4034664446162, "learning_rate": 4.819642899702447e-06, "loss": 219.0898, "step": 35180 }, { "epoch": 0.6767925916309663, "grad_norm": 413.4915093918844, "learning_rate": 4.814423486929846e-06, "loss": 218.1461, "step": 35190 }, { "epoch": 0.6769849168914468, "grad_norm": 431.1310407809555, "learning_rate": 4.809206005698856e-06, "loss": 216.2095, "step": 35200 }, { "epoch": 0.6771772421519273, "grad_norm": 391.036190075778, "learning_rate": 4.803990457952903e-06, "loss": 230.6294, "step": 35210 }, { "epoch": 0.6773695674124078, "grad_norm": 437.8428504398393, "learning_rate": 4.798776845634688e-06, "loss": 215.9749, "step": 35220 }, { "epoch": 0.6775618926728884, "grad_norm": 474.30686517047116, "learning_rate": 4.793565170686196e-06, "loss": 221.5434, "step": 35230 }, { "epoch": 0.6777542179333689, "grad_norm": 391.9764584980503, "learning_rate": 4.7883554350486925e-06, "loss": 225.1253, "step": 35240 }, { "epoch": 0.6779465431938494, "grad_norm": 390.4915306979907, "learning_rate": 4.783147640662716e-06, "loss": 214.2675, "step": 35250 }, { "epoch": 0.6781388684543299, "grad_norm": 404.8724496864521, "learning_rate": 4.777941789468086e-06, "loss": 217.5677, "step": 35260 }, { "epoch": 0.6783311937148104, "grad_norm": 410.4147782816225, "learning_rate": 4.772737883403889e-06, "loss": 225.9869, "step": 35270 }, { "epoch": 0.6785235189752911, "grad_norm": 417.6396121262922, "learning_rate": 4.767535924408504e-06, "loss": 222.0417, "step": 35280 }, { "epoch": 0.6787158442357716, "grad_norm": 398.7773521099608, "learning_rate": 4.76233591441957e-06, "loss": 220.6352, "step": 35290 }, { "epoch": 0.6789081694962521, "grad_norm": 444.68245930806336, "learning_rate": 4.757137855374001e-06, "loss": 216.2313, "step": 35300 }, { "epoch": 0.6791004947567326, "grad_norm": 417.2630411286579, "learning_rate": 4.751941749207996e-06, "loss": 222.229, "step": 35310 }, { "epoch": 0.6792928200172131, "grad_norm": 380.1821619331443, "learning_rate": 4.746747597857014e-06, "loss": 217.6884, "step": 35320 }, { "epoch": 0.6794851452776937, "grad_norm": 411.9494189243853, "learning_rate": 4.74155540325579e-06, "loss": 220.5652, "step": 35330 }, { "epoch": 0.6796774705381742, "grad_norm": 446.72028111730015, "learning_rate": 4.736365167338333e-06, "loss": 225.6941, "step": 35340 }, { "epoch": 0.6798697957986547, "grad_norm": 427.20146684548655, "learning_rate": 4.731176892037918e-06, "loss": 215.628, "step": 35350 }, { "epoch": 0.6800621210591352, "grad_norm": 400.4752864673295, "learning_rate": 4.725990579287097e-06, "loss": 213.7563, "step": 35360 }, { "epoch": 0.6802544463196157, "grad_norm": 480.5398254124123, "learning_rate": 4.720806231017676e-06, "loss": 221.9207, "step": 35370 }, { "epoch": 0.6804467715800963, "grad_norm": 404.4653045016176, "learning_rate": 4.7156238491607506e-06, "loss": 219.9062, "step": 35380 }, { "epoch": 0.6806390968405768, "grad_norm": 398.92275156228794, "learning_rate": 4.710443435646666e-06, "loss": 216.9354, "step": 35390 }, { "epoch": 0.6808314221010573, "grad_norm": 396.44172072203673, "learning_rate": 4.705264992405043e-06, "loss": 216.3257, "step": 35400 }, { "epoch": 0.6810237473615378, "grad_norm": 414.9739739372363, "learning_rate": 4.700088521364761e-06, "loss": 216.6547, "step": 35410 }, { "epoch": 0.6812160726220183, "grad_norm": 402.9635413856429, "learning_rate": 4.694914024453977e-06, "loss": 221.0535, "step": 35420 }, { "epoch": 0.6814083978824989, "grad_norm": 448.0603991656356, "learning_rate": 4.689741503600103e-06, "loss": 220.5415, "step": 35430 }, { "epoch": 0.6816007231429794, "grad_norm": 468.4523285944009, "learning_rate": 4.684570960729818e-06, "loss": 219.5447, "step": 35440 }, { "epoch": 0.6817930484034599, "grad_norm": 395.9661355731399, "learning_rate": 4.679402397769057e-06, "loss": 214.6342, "step": 35450 }, { "epoch": 0.6819853736639404, "grad_norm": 442.422516066603, "learning_rate": 4.674235816643035e-06, "loss": 228.8218, "step": 35460 }, { "epoch": 0.6821776989244209, "grad_norm": 365.55331513023737, "learning_rate": 4.66907121927621e-06, "loss": 218.3393, "step": 35470 }, { "epoch": 0.6823700241849016, "grad_norm": 406.7754196266838, "learning_rate": 4.663908607592316e-06, "loss": 215.1052, "step": 35480 }, { "epoch": 0.6825623494453821, "grad_norm": 385.4351186325653, "learning_rate": 4.658747983514334e-06, "loss": 209.8509, "step": 35490 }, { "epoch": 0.6827546747058626, "grad_norm": 430.42020327718194, "learning_rate": 4.653589348964517e-06, "loss": 218.0502, "step": 35500 }, { "epoch": 0.6829469999663431, "grad_norm": 417.76945690414715, "learning_rate": 4.648432705864369e-06, "loss": 237.4081, "step": 35510 }, { "epoch": 0.6831393252268236, "grad_norm": 415.3004662423424, "learning_rate": 4.64327805613465e-06, "loss": 225.0952, "step": 35520 }, { "epoch": 0.6833316504873042, "grad_norm": 461.4128726600178, "learning_rate": 4.638125401695391e-06, "loss": 216.5117, "step": 35530 }, { "epoch": 0.6835239757477847, "grad_norm": 437.23718739371304, "learning_rate": 4.632974744465865e-06, "loss": 224.5861, "step": 35540 }, { "epoch": 0.6837163010082652, "grad_norm": 421.5048453080918, "learning_rate": 4.627826086364603e-06, "loss": 226.2318, "step": 35550 }, { "epoch": 0.6839086262687457, "grad_norm": 502.55783888187017, "learning_rate": 4.622679429309404e-06, "loss": 220.68, "step": 35560 }, { "epoch": 0.6841009515292262, "grad_norm": 390.21991768610644, "learning_rate": 4.617534775217307e-06, "loss": 209.8581, "step": 35570 }, { "epoch": 0.6842932767897068, "grad_norm": 393.66885736023653, "learning_rate": 4.6123921260046135e-06, "loss": 214.7617, "step": 35580 }, { "epoch": 0.6844856020501873, "grad_norm": 393.9994217123655, "learning_rate": 4.607251483586869e-06, "loss": 223.121, "step": 35590 }, { "epoch": 0.6846779273106678, "grad_norm": 394.6677257224646, "learning_rate": 4.6021128498788855e-06, "loss": 212.4204, "step": 35600 }, { "epoch": 0.6848702525711483, "grad_norm": 390.0223437269778, "learning_rate": 4.596976226794718e-06, "loss": 213.0069, "step": 35610 }, { "epoch": 0.6850625778316288, "grad_norm": 365.9931384181439, "learning_rate": 4.591841616247669e-06, "loss": 225.4255, "step": 35620 }, { "epoch": 0.6852549030921093, "grad_norm": 425.7434583247429, "learning_rate": 4.586709020150297e-06, "loss": 216.6788, "step": 35630 }, { "epoch": 0.6854472283525899, "grad_norm": 416.9298132211286, "learning_rate": 4.581578440414417e-06, "loss": 214.8177, "step": 35640 }, { "epoch": 0.6856395536130704, "grad_norm": 391.189193703694, "learning_rate": 4.576449878951079e-06, "loss": 221.3543, "step": 35650 }, { "epoch": 0.6858318788735509, "grad_norm": 456.13133359288577, "learning_rate": 4.571323337670584e-06, "loss": 218.6048, "step": 35660 }, { "epoch": 0.6860242041340314, "grad_norm": 356.172617503518, "learning_rate": 4.566198818482494e-06, "loss": 219.5613, "step": 35670 }, { "epoch": 0.686216529394512, "grad_norm": 377.8119828173668, "learning_rate": 4.5610763232956e-06, "loss": 221.6812, "step": 35680 }, { "epoch": 0.6864088546549926, "grad_norm": 389.65658442797036, "learning_rate": 4.55595585401795e-06, "loss": 226.5807, "step": 35690 }, { "epoch": 0.6866011799154731, "grad_norm": 425.29899536955804, "learning_rate": 4.550837412556828e-06, "loss": 229.349, "step": 35700 }, { "epoch": 0.6867935051759536, "grad_norm": 401.015777030877, "learning_rate": 4.545721000818778e-06, "loss": 226.9609, "step": 35710 }, { "epoch": 0.6869858304364341, "grad_norm": 399.04573981872664, "learning_rate": 4.540606620709575e-06, "loss": 234.3499, "step": 35720 }, { "epoch": 0.6871781556969146, "grad_norm": 395.16422376910265, "learning_rate": 4.535494274134236e-06, "loss": 223.3513, "step": 35730 }, { "epoch": 0.6873704809573952, "grad_norm": 412.30307874966866, "learning_rate": 4.530383962997036e-06, "loss": 220.3366, "step": 35740 }, { "epoch": 0.6875628062178757, "grad_norm": 435.66609210857285, "learning_rate": 4.525275689201476e-06, "loss": 229.2069, "step": 35750 }, { "epoch": 0.6877551314783562, "grad_norm": 460.42100774759336, "learning_rate": 4.520169454650299e-06, "loss": 222.6201, "step": 35760 }, { "epoch": 0.6879474567388367, "grad_norm": 463.9467816745536, "learning_rate": 4.515065261245498e-06, "loss": 224.6992, "step": 35770 }, { "epoch": 0.6881397819993172, "grad_norm": 400.3259681576388, "learning_rate": 4.509963110888305e-06, "loss": 218.8042, "step": 35780 }, { "epoch": 0.6883321072597978, "grad_norm": 398.00445988717433, "learning_rate": 4.504863005479182e-06, "loss": 214.1886, "step": 35790 }, { "epoch": 0.6885244325202783, "grad_norm": 453.40343361828576, "learning_rate": 4.499764946917834e-06, "loss": 222.4477, "step": 35800 }, { "epoch": 0.6887167577807588, "grad_norm": 377.2414076529407, "learning_rate": 4.494668937103201e-06, "loss": 219.7592, "step": 35810 }, { "epoch": 0.6889090830412393, "grad_norm": 384.9161514166962, "learning_rate": 4.489574977933467e-06, "loss": 216.2119, "step": 35820 }, { "epoch": 0.6891014083017198, "grad_norm": 409.8341540854874, "learning_rate": 4.484483071306048e-06, "loss": 226.0465, "step": 35830 }, { "epoch": 0.6892937335622004, "grad_norm": 366.05182434451, "learning_rate": 4.479393219117588e-06, "loss": 215.4577, "step": 35840 }, { "epoch": 0.6894860588226809, "grad_norm": 379.5787758698706, "learning_rate": 4.474305423263984e-06, "loss": 216.8345, "step": 35850 }, { "epoch": 0.6896783840831614, "grad_norm": 436.45034008295283, "learning_rate": 4.469219685640348e-06, "loss": 213.4985, "step": 35860 }, { "epoch": 0.6898707093436419, "grad_norm": 438.91448163518584, "learning_rate": 4.4641360081410356e-06, "loss": 219.4, "step": 35870 }, { "epoch": 0.6900630346041224, "grad_norm": 409.6116397181822, "learning_rate": 4.4590543926596285e-06, "loss": 220.7814, "step": 35880 }, { "epoch": 0.6902553598646031, "grad_norm": 418.8105487840902, "learning_rate": 4.453974841088953e-06, "loss": 223.7794, "step": 35890 }, { "epoch": 0.6904476851250836, "grad_norm": 398.5432586878791, "learning_rate": 4.448897355321049e-06, "loss": 218.6897, "step": 35900 }, { "epoch": 0.6906400103855641, "grad_norm": 383.66018857637016, "learning_rate": 4.443821937247205e-06, "loss": 217.4083, "step": 35910 }, { "epoch": 0.6908323356460446, "grad_norm": 414.73046501485646, "learning_rate": 4.438748588757922e-06, "loss": 218.7771, "step": 35920 }, { "epoch": 0.6910246609065251, "grad_norm": 380.7592696692757, "learning_rate": 4.433677311742945e-06, "loss": 216.4235, "step": 35930 }, { "epoch": 0.6912169861670057, "grad_norm": 383.20210251628, "learning_rate": 4.428608108091241e-06, "loss": 216.2294, "step": 35940 }, { "epoch": 0.6914093114274862, "grad_norm": 389.83690133773337, "learning_rate": 4.4235409796909965e-06, "loss": 217.3402, "step": 35950 }, { "epoch": 0.6916016366879667, "grad_norm": 409.7192019886682, "learning_rate": 4.418475928429644e-06, "loss": 218.1749, "step": 35960 }, { "epoch": 0.6917939619484472, "grad_norm": 411.4669615637612, "learning_rate": 4.413412956193826e-06, "loss": 225.1929, "step": 35970 }, { "epoch": 0.6919862872089277, "grad_norm": 440.62809127856247, "learning_rate": 4.4083520648694156e-06, "loss": 210.2017, "step": 35980 }, { "epoch": 0.6921786124694083, "grad_norm": 386.5606776080072, "learning_rate": 4.4032932563415075e-06, "loss": 216.4489, "step": 35990 }, { "epoch": 0.6923709377298888, "grad_norm": 389.3415345420483, "learning_rate": 4.398236532494434e-06, "loss": 217.0247, "step": 36000 }, { "epoch": 0.6925632629903693, "grad_norm": 437.0736833560942, "learning_rate": 4.393181895211735e-06, "loss": 227.7811, "step": 36010 }, { "epoch": 0.6927555882508498, "grad_norm": 411.9620831442087, "learning_rate": 4.388129346376177e-06, "loss": 217.2434, "step": 36020 }, { "epoch": 0.6929479135113303, "grad_norm": 401.59662123749564, "learning_rate": 4.383078887869759e-06, "loss": 214.7082, "step": 36030 }, { "epoch": 0.6931402387718109, "grad_norm": 366.91846840815407, "learning_rate": 4.378030521573683e-06, "loss": 214.2149, "step": 36040 }, { "epoch": 0.6933325640322914, "grad_norm": 401.0252302703506, "learning_rate": 4.372984249368393e-06, "loss": 217.7893, "step": 36050 }, { "epoch": 0.6935248892927719, "grad_norm": 370.61991209202967, "learning_rate": 4.367940073133533e-06, "loss": 215.8928, "step": 36060 }, { "epoch": 0.6937172145532524, "grad_norm": 372.9909954708285, "learning_rate": 4.362897994747982e-06, "loss": 218.2172, "step": 36070 }, { "epoch": 0.6939095398137329, "grad_norm": 377.8590749980655, "learning_rate": 4.35785801608983e-06, "loss": 210.704, "step": 36080 }, { "epoch": 0.6941018650742136, "grad_norm": 443.0240719634579, "learning_rate": 4.352820139036379e-06, "loss": 217.6218, "step": 36090 }, { "epoch": 0.6942941903346941, "grad_norm": 378.81805403458264, "learning_rate": 4.347784365464163e-06, "loss": 216.5826, "step": 36100 }, { "epoch": 0.6944865155951746, "grad_norm": 377.0661857394148, "learning_rate": 4.342750697248922e-06, "loss": 223.8753, "step": 36110 }, { "epoch": 0.6946788408556551, "grad_norm": 423.67280073615655, "learning_rate": 4.337719136265614e-06, "loss": 211.0271, "step": 36120 }, { "epoch": 0.6948711661161356, "grad_norm": 360.0370892686145, "learning_rate": 4.332689684388408e-06, "loss": 214.1245, "step": 36130 }, { "epoch": 0.6950634913766162, "grad_norm": 391.7009479640741, "learning_rate": 4.327662343490701e-06, "loss": 217.2823, "step": 36140 }, { "epoch": 0.6952558166370967, "grad_norm": 419.38673821848687, "learning_rate": 4.322637115445088e-06, "loss": 224.3325, "step": 36150 }, { "epoch": 0.6954481418975772, "grad_norm": 413.7845806064118, "learning_rate": 4.3176140021233845e-06, "loss": 215.707, "step": 36160 }, { "epoch": 0.6956404671580577, "grad_norm": 390.1556774787163, "learning_rate": 4.312593005396615e-06, "loss": 218.6496, "step": 36170 }, { "epoch": 0.6958327924185382, "grad_norm": 387.4228282520312, "learning_rate": 4.307574127135022e-06, "loss": 213.2394, "step": 36180 }, { "epoch": 0.6960251176790188, "grad_norm": 436.92378932308515, "learning_rate": 4.3025573692080516e-06, "loss": 219.7877, "step": 36190 }, { "epoch": 0.6962174429394993, "grad_norm": 398.60931934556857, "learning_rate": 4.297542733484364e-06, "loss": 214.5497, "step": 36200 }, { "epoch": 0.6964097681999798, "grad_norm": 362.11414098938627, "learning_rate": 4.292530221831832e-06, "loss": 219.6255, "step": 36210 }, { "epoch": 0.6966020934604603, "grad_norm": 389.04778788562544, "learning_rate": 4.2875198361175305e-06, "loss": 218.1958, "step": 36220 }, { "epoch": 0.6967944187209408, "grad_norm": 382.16408108309616, "learning_rate": 4.282511578207746e-06, "loss": 215.1704, "step": 36230 }, { "epoch": 0.6969867439814214, "grad_norm": 396.8392630552316, "learning_rate": 4.277505449967967e-06, "loss": 220.5081, "step": 36240 }, { "epoch": 0.6971790692419019, "grad_norm": 378.9595549794043, "learning_rate": 4.2725014532629015e-06, "loss": 220.177, "step": 36250 }, { "epoch": 0.6973713945023824, "grad_norm": 437.6280921220109, "learning_rate": 4.267499589956453e-06, "loss": 217.4724, "step": 36260 }, { "epoch": 0.6975637197628629, "grad_norm": 397.7583302478522, "learning_rate": 4.262499861911727e-06, "loss": 223.5542, "step": 36270 }, { "epoch": 0.6977560450233434, "grad_norm": 408.00787437210926, "learning_rate": 4.257502270991048e-06, "loss": 219.0948, "step": 36280 }, { "epoch": 0.697948370283824, "grad_norm": 402.04654195431567, "learning_rate": 4.252506819055934e-06, "loss": 220.6372, "step": 36290 }, { "epoch": 0.6981406955443046, "grad_norm": 414.8568135035925, "learning_rate": 4.2475135079671045e-06, "loss": 217.2164, "step": 36300 }, { "epoch": 0.6983330208047851, "grad_norm": 368.07813678844695, "learning_rate": 4.242522339584486e-06, "loss": 213.7817, "step": 36310 }, { "epoch": 0.6985253460652656, "grad_norm": 378.734022113604, "learning_rate": 4.2375333157672114e-06, "loss": 218.8537, "step": 36320 }, { "epoch": 0.6987176713257461, "grad_norm": 377.1309120295078, "learning_rate": 4.232546438373604e-06, "loss": 217.3216, "step": 36330 }, { "epoch": 0.6989099965862267, "grad_norm": 413.2698859907694, "learning_rate": 4.227561709261198e-06, "loss": 210.7284, "step": 36340 }, { "epoch": 0.6991023218467072, "grad_norm": 401.79411990133167, "learning_rate": 4.222579130286716e-06, "loss": 220.0384, "step": 36350 }, { "epoch": 0.6992946471071877, "grad_norm": 413.46076823933174, "learning_rate": 4.217598703306095e-06, "loss": 222.4217, "step": 36360 }, { "epoch": 0.6994869723676682, "grad_norm": 388.77647912409503, "learning_rate": 4.212620430174457e-06, "loss": 222.7028, "step": 36370 }, { "epoch": 0.6996792976281487, "grad_norm": 427.063085247276, "learning_rate": 4.207644312746124e-06, "loss": 217.6686, "step": 36380 }, { "epoch": 0.6998716228886293, "grad_norm": 385.49363023837566, "learning_rate": 4.202670352874625e-06, "loss": 215.446, "step": 36390 }, { "epoch": 0.7000639481491098, "grad_norm": 394.7322835256111, "learning_rate": 4.197698552412672e-06, "loss": 224.6516, "step": 36400 }, { "epoch": 0.7002562734095903, "grad_norm": 388.8321210388741, "learning_rate": 4.192728913212181e-06, "loss": 220.3064, "step": 36410 }, { "epoch": 0.7004485986700708, "grad_norm": 378.6688332614007, "learning_rate": 4.187761437124256e-06, "loss": 210.9347, "step": 36420 }, { "epoch": 0.7006409239305513, "grad_norm": 508.0817570342048, "learning_rate": 4.182796125999207e-06, "loss": 222.0558, "step": 36430 }, { "epoch": 0.7008332491910318, "grad_norm": 480.9940368722151, "learning_rate": 4.177832981686526e-06, "loss": 221.5692, "step": 36440 }, { "epoch": 0.7010255744515124, "grad_norm": 415.9012148632597, "learning_rate": 4.172872006034899e-06, "loss": 218.7598, "step": 36450 }, { "epoch": 0.7012178997119929, "grad_norm": 431.4645534519969, "learning_rate": 4.167913200892217e-06, "loss": 214.2937, "step": 36460 }, { "epoch": 0.7014102249724734, "grad_norm": 381.06772321132337, "learning_rate": 4.162956568105543e-06, "loss": 225.915, "step": 36470 }, { "epoch": 0.7016025502329539, "grad_norm": 388.60806706428485, "learning_rate": 4.158002109521149e-06, "loss": 214.21, "step": 36480 }, { "epoch": 0.7017948754934344, "grad_norm": 408.77425431935796, "learning_rate": 4.153049826984482e-06, "loss": 218.154, "step": 36490 }, { "epoch": 0.7019872007539151, "grad_norm": 369.10178348734564, "learning_rate": 4.148099722340192e-06, "loss": 225.4862, "step": 36500 }, { "epoch": 0.7021795260143956, "grad_norm": 396.3483999643876, "learning_rate": 4.143151797432109e-06, "loss": 216.5708, "step": 36510 }, { "epoch": 0.7023718512748761, "grad_norm": 411.0565684847517, "learning_rate": 4.1382060541032505e-06, "loss": 219.8197, "step": 36520 }, { "epoch": 0.7025641765353566, "grad_norm": 401.13262557200215, "learning_rate": 4.133262494195824e-06, "loss": 217.943, "step": 36530 }, { "epoch": 0.7027565017958372, "grad_norm": 364.3826246453327, "learning_rate": 4.12832111955123e-06, "loss": 209.2772, "step": 36540 }, { "epoch": 0.7029488270563177, "grad_norm": 405.0066517413073, "learning_rate": 4.123381932010044e-06, "loss": 220.1532, "step": 36550 }, { "epoch": 0.7031411523167982, "grad_norm": 373.58866199094336, "learning_rate": 4.11844493341203e-06, "loss": 218.792, "step": 36560 }, { "epoch": 0.7033334775772787, "grad_norm": 382.6080284475787, "learning_rate": 4.113510125596145e-06, "loss": 220.5175, "step": 36570 }, { "epoch": 0.7035258028377592, "grad_norm": 383.4123986557622, "learning_rate": 4.1085775104005186e-06, "loss": 210.9176, "step": 36580 }, { "epoch": 0.7037181280982397, "grad_norm": 426.4322933050965, "learning_rate": 4.10364708966247e-06, "loss": 219.6772, "step": 36590 }, { "epoch": 0.7039104533587203, "grad_norm": 379.7676666846126, "learning_rate": 4.098718865218496e-06, "loss": 210.9602, "step": 36600 }, { "epoch": 0.7041027786192008, "grad_norm": 397.40373076708136, "learning_rate": 4.0937928389042815e-06, "loss": 224.6631, "step": 36610 }, { "epoch": 0.7042951038796813, "grad_norm": 375.13173179317903, "learning_rate": 4.088869012554694e-06, "loss": 212.3754, "step": 36620 }, { "epoch": 0.7044874291401618, "grad_norm": 401.3622644076648, "learning_rate": 4.08394738800377e-06, "loss": 217.7254, "step": 36630 }, { "epoch": 0.7046797544006423, "grad_norm": 373.0768116647785, "learning_rate": 4.07902796708474e-06, "loss": 212.6043, "step": 36640 }, { "epoch": 0.7048720796611229, "grad_norm": 407.3650275149077, "learning_rate": 4.074110751630005e-06, "loss": 223.1159, "step": 36650 }, { "epoch": 0.7050644049216034, "grad_norm": 387.43968848525844, "learning_rate": 4.0691957434711446e-06, "loss": 214.1859, "step": 36660 }, { "epoch": 0.7052567301820839, "grad_norm": 388.9674021574162, "learning_rate": 4.0642829444389165e-06, "loss": 215.2391, "step": 36670 }, { "epoch": 0.7054490554425644, "grad_norm": 375.57479401836554, "learning_rate": 4.059372356363263e-06, "loss": 224.5012, "step": 36680 }, { "epoch": 0.7056413807030449, "grad_norm": 369.53617259237427, "learning_rate": 4.054463981073296e-06, "loss": 216.8208, "step": 36690 }, { "epoch": 0.7058337059635255, "grad_norm": 395.5861650392418, "learning_rate": 4.049557820397297e-06, "loss": 218.5601, "step": 36700 }, { "epoch": 0.7060260312240061, "grad_norm": 428.8634696467282, "learning_rate": 4.044653876162738e-06, "loss": 209.5418, "step": 36710 }, { "epoch": 0.7062183564844866, "grad_norm": 395.0019917828623, "learning_rate": 4.039752150196257e-06, "loss": 223.0564, "step": 36720 }, { "epoch": 0.7064106817449671, "grad_norm": 384.39564430705565, "learning_rate": 4.034852644323661e-06, "loss": 220.2196, "step": 36730 }, { "epoch": 0.7066030070054476, "grad_norm": 378.0906398002916, "learning_rate": 4.029955360369935e-06, "loss": 212.0748, "step": 36740 }, { "epoch": 0.7067953322659282, "grad_norm": 412.94105531361157, "learning_rate": 4.0250603001592416e-06, "loss": 209.6829, "step": 36750 }, { "epoch": 0.7069876575264087, "grad_norm": 367.7742189188349, "learning_rate": 4.020167465514903e-06, "loss": 212.8914, "step": 36760 }, { "epoch": 0.7071799827868892, "grad_norm": 466.9708614206917, "learning_rate": 4.015276858259427e-06, "loss": 216.3343, "step": 36770 }, { "epoch": 0.7073723080473697, "grad_norm": 417.7808912930527, "learning_rate": 4.0103884802144775e-06, "loss": 218.156, "step": 36780 }, { "epoch": 0.7075646333078502, "grad_norm": 382.6623648613474, "learning_rate": 4.0055023332009e-06, "loss": 217.9552, "step": 36790 }, { "epoch": 0.7077569585683308, "grad_norm": 380.1630706460532, "learning_rate": 4.000618419038702e-06, "loss": 205.2449, "step": 36800 }, { "epoch": 0.7079492838288113, "grad_norm": 390.85828417221916, "learning_rate": 3.9957367395470555e-06, "loss": 220.6086, "step": 36810 }, { "epoch": 0.7081416090892918, "grad_norm": 384.39487026876157, "learning_rate": 3.990857296544315e-06, "loss": 209.5735, "step": 36820 }, { "epoch": 0.7083339343497723, "grad_norm": 394.3181175812435, "learning_rate": 3.985980091847985e-06, "loss": 213.1617, "step": 36830 }, { "epoch": 0.7085262596102528, "grad_norm": 402.4696077077873, "learning_rate": 3.981105127274748e-06, "loss": 218.0245, "step": 36840 }, { "epoch": 0.7087185848707334, "grad_norm": 418.98189710713734, "learning_rate": 3.976232404640441e-06, "loss": 208.4677, "step": 36850 }, { "epoch": 0.7089109101312139, "grad_norm": 377.55295471670394, "learning_rate": 3.971361925760081e-06, "loss": 216.8333, "step": 36860 }, { "epoch": 0.7091032353916944, "grad_norm": 399.90882363551344, "learning_rate": 3.966493692447838e-06, "loss": 218.0203, "step": 36870 }, { "epoch": 0.7092955606521749, "grad_norm": 387.3373044280473, "learning_rate": 3.961627706517044e-06, "loss": 214.0818, "step": 36880 }, { "epoch": 0.7094878859126554, "grad_norm": 380.9525935212469, "learning_rate": 3.956763969780206e-06, "loss": 214.3401, "step": 36890 }, { "epoch": 0.709680211173136, "grad_norm": 372.8655952139652, "learning_rate": 3.951902484048978e-06, "loss": 212.3872, "step": 36900 }, { "epoch": 0.7098725364336166, "grad_norm": 428.66956815989255, "learning_rate": 3.94704325113419e-06, "loss": 213.0268, "step": 36910 }, { "epoch": 0.7100648616940971, "grad_norm": 371.1782345954982, "learning_rate": 3.942186272845821e-06, "loss": 213.4061, "step": 36920 }, { "epoch": 0.7102571869545776, "grad_norm": 381.22663818214284, "learning_rate": 3.937331550993021e-06, "loss": 216.1139, "step": 36930 }, { "epoch": 0.7104495122150581, "grad_norm": 423.725917758182, "learning_rate": 3.932479087384089e-06, "loss": 220.7802, "step": 36940 }, { "epoch": 0.7106418374755387, "grad_norm": 385.8627263522587, "learning_rate": 3.927628883826488e-06, "loss": 215.2264, "step": 36950 }, { "epoch": 0.7108341627360192, "grad_norm": 390.6378826256979, "learning_rate": 3.922780942126837e-06, "loss": 215.5406, "step": 36960 }, { "epoch": 0.7110264879964997, "grad_norm": 366.64284324379037, "learning_rate": 3.91793526409092e-06, "loss": 215.8305, "step": 36970 }, { "epoch": 0.7112188132569802, "grad_norm": 405.37768285901956, "learning_rate": 3.913091851523667e-06, "loss": 216.9105, "step": 36980 }, { "epoch": 0.7114111385174607, "grad_norm": 381.79533253671025, "learning_rate": 3.908250706229168e-06, "loss": 211.014, "step": 36990 }, { "epoch": 0.7116034637779413, "grad_norm": 375.0037377936083, "learning_rate": 3.903411830010676e-06, "loss": 221.757, "step": 37000 }, { "epoch": 0.7117957890384218, "grad_norm": 392.06676748232667, "learning_rate": 3.8985752246705885e-06, "loss": 215.6115, "step": 37010 }, { "epoch": 0.7119881142989023, "grad_norm": 398.8947573310577, "learning_rate": 3.893740892010463e-06, "loss": 216.497, "step": 37020 }, { "epoch": 0.7121804395593828, "grad_norm": 384.7264355834237, "learning_rate": 3.888908833831002e-06, "loss": 220.2314, "step": 37030 }, { "epoch": 0.7123727648198633, "grad_norm": 376.65721600794285, "learning_rate": 3.884079051932073e-06, "loss": 216.734, "step": 37040 }, { "epoch": 0.7125650900803439, "grad_norm": 382.61379253815494, "learning_rate": 3.879251548112692e-06, "loss": 221.2815, "step": 37050 }, { "epoch": 0.7127574153408244, "grad_norm": 448.9567243317206, "learning_rate": 3.874426324171019e-06, "loss": 228.9296, "step": 37060 }, { "epoch": 0.7129497406013049, "grad_norm": 464.1160680085575, "learning_rate": 3.869603381904377e-06, "loss": 217.6615, "step": 37070 }, { "epoch": 0.7131420658617854, "grad_norm": 385.99393433823667, "learning_rate": 3.864782723109227e-06, "loss": 210.8737, "step": 37080 }, { "epoch": 0.7133343911222659, "grad_norm": 401.45338608556045, "learning_rate": 3.859964349581187e-06, "loss": 213.8944, "step": 37090 }, { "epoch": 0.7135267163827465, "grad_norm": 405.72798247152645, "learning_rate": 3.855148263115017e-06, "loss": 214.8662, "step": 37100 }, { "epoch": 0.713719041643227, "grad_norm": 386.78157222311626, "learning_rate": 3.850334465504637e-06, "loss": 217.1708, "step": 37110 }, { "epoch": 0.7139113669037076, "grad_norm": 414.9903177788113, "learning_rate": 3.845522958543104e-06, "loss": 222.5964, "step": 37120 }, { "epoch": 0.7141036921641881, "grad_norm": 445.21050251339625, "learning_rate": 3.840713744022624e-06, "loss": 217.331, "step": 37130 }, { "epoch": 0.7142960174246686, "grad_norm": 396.368690572658, "learning_rate": 3.835906823734548e-06, "loss": 220.5609, "step": 37140 }, { "epoch": 0.7144883426851492, "grad_norm": 403.12981001721914, "learning_rate": 3.831102199469379e-06, "loss": 230.0353, "step": 37150 }, { "epoch": 0.7146806679456297, "grad_norm": 386.08035290109416, "learning_rate": 3.826299873016758e-06, "loss": 219.3719, "step": 37160 }, { "epoch": 0.7148729932061102, "grad_norm": 402.72050886512335, "learning_rate": 3.821499846165468e-06, "loss": 208.788, "step": 37170 }, { "epoch": 0.7150653184665907, "grad_norm": 383.4067361784764, "learning_rate": 3.816702120703449e-06, "loss": 215.059, "step": 37180 }, { "epoch": 0.7152576437270712, "grad_norm": 376.40450746521014, "learning_rate": 3.8119066984177654e-06, "loss": 212.6686, "step": 37190 }, { "epoch": 0.7154499689875518, "grad_norm": 413.62893387422844, "learning_rate": 3.8071135810946415e-06, "loss": 217.3548, "step": 37200 }, { "epoch": 0.7156422942480323, "grad_norm": 412.3567630502925, "learning_rate": 3.802322770519424e-06, "loss": 211.1593, "step": 37210 }, { "epoch": 0.7158346195085128, "grad_norm": 359.2289363382688, "learning_rate": 3.7975342684766215e-06, "loss": 211.115, "step": 37220 }, { "epoch": 0.7160269447689933, "grad_norm": 448.77799802289206, "learning_rate": 3.792748076749867e-06, "loss": 217.7228, "step": 37230 }, { "epoch": 0.7162192700294738, "grad_norm": 455.2804006658165, "learning_rate": 3.787964197121934e-06, "loss": 218.0768, "step": 37240 }, { "epoch": 0.7164115952899544, "grad_norm": 395.4492883309248, "learning_rate": 3.7831826313747454e-06, "loss": 209.8682, "step": 37250 }, { "epoch": 0.7166039205504349, "grad_norm": 411.3581220117286, "learning_rate": 3.778403381289353e-06, "loss": 210.5628, "step": 37260 }, { "epoch": 0.7167962458109154, "grad_norm": 382.0133005085732, "learning_rate": 3.7736264486459486e-06, "loss": 212.949, "step": 37270 }, { "epoch": 0.7169885710713959, "grad_norm": 416.7489572781986, "learning_rate": 3.7688518352238555e-06, "loss": 213.1978, "step": 37280 }, { "epoch": 0.7171808963318764, "grad_norm": 405.33642727439207, "learning_rate": 3.7640795428015462e-06, "loss": 217.8278, "step": 37290 }, { "epoch": 0.717373221592357, "grad_norm": 381.83188029984956, "learning_rate": 3.7593095731566186e-06, "loss": 212.8764, "step": 37300 }, { "epoch": 0.7175655468528375, "grad_norm": 381.97402060451054, "learning_rate": 3.7545419280658025e-06, "loss": 215.6786, "step": 37310 }, { "epoch": 0.7177578721133181, "grad_norm": 394.3325526817493, "learning_rate": 3.749776609304975e-06, "loss": 214.9202, "step": 37320 }, { "epoch": 0.7179501973737986, "grad_norm": 409.54189789928955, "learning_rate": 3.7450136186491315e-06, "loss": 218.1853, "step": 37330 }, { "epoch": 0.7181425226342791, "grad_norm": 407.07468646164665, "learning_rate": 3.7402529578724134e-06, "loss": 215.7882, "step": 37340 }, { "epoch": 0.7183348478947597, "grad_norm": 377.22379337736726, "learning_rate": 3.735494628748082e-06, "loss": 214.9534, "step": 37350 }, { "epoch": 0.7185271731552402, "grad_norm": 394.0257390962629, "learning_rate": 3.730738633048543e-06, "loss": 211.4239, "step": 37360 }, { "epoch": 0.7187194984157207, "grad_norm": 389.58732067506895, "learning_rate": 3.7259849725453225e-06, "loss": 217.8116, "step": 37370 }, { "epoch": 0.7189118236762012, "grad_norm": 377.0923953290185, "learning_rate": 3.7212336490090815e-06, "loss": 212.8401, "step": 37380 }, { "epoch": 0.7191041489366817, "grad_norm": 403.96954872872465, "learning_rate": 3.7164846642096053e-06, "loss": 216.3146, "step": 37390 }, { "epoch": 0.7192964741971622, "grad_norm": 385.1954453311772, "learning_rate": 3.7117380199158204e-06, "loss": 221.3708, "step": 37400 }, { "epoch": 0.7194887994576428, "grad_norm": 382.8224178957188, "learning_rate": 3.706993717895768e-06, "loss": 212.1186, "step": 37410 }, { "epoch": 0.7196811247181233, "grad_norm": 420.2076096408132, "learning_rate": 3.70225175991662e-06, "loss": 214.2058, "step": 37420 }, { "epoch": 0.7198734499786038, "grad_norm": 418.72564680723696, "learning_rate": 3.697512147744684e-06, "loss": 220.3151, "step": 37430 }, { "epoch": 0.7200657752390843, "grad_norm": 410.8615680402483, "learning_rate": 3.6927748831453835e-06, "loss": 210.4465, "step": 37440 }, { "epoch": 0.7202581004995648, "grad_norm": 451.79272070794457, "learning_rate": 3.688039967883269e-06, "loss": 215.5568, "step": 37450 }, { "epoch": 0.7204504257600454, "grad_norm": 383.8605944001642, "learning_rate": 3.683307403722025e-06, "loss": 212.7483, "step": 37460 }, { "epoch": 0.7206427510205259, "grad_norm": 395.36973397526606, "learning_rate": 3.678577192424445e-06, "loss": 212.0434, "step": 37470 }, { "epoch": 0.7208350762810064, "grad_norm": 453.8066554156848, "learning_rate": 3.6738493357524628e-06, "loss": 212.7815, "step": 37480 }, { "epoch": 0.7210274015414869, "grad_norm": 380.37639243922393, "learning_rate": 3.6691238354671233e-06, "loss": 216.1363, "step": 37490 }, { "epoch": 0.7212197268019674, "grad_norm": 441.8785987891232, "learning_rate": 3.664400693328595e-06, "loss": 216.0949, "step": 37500 }, { "epoch": 0.721412052062448, "grad_norm": 390.01442903407167, "learning_rate": 3.6596799110961746e-06, "loss": 210.2427, "step": 37510 }, { "epoch": 0.7216043773229286, "grad_norm": 402.5980550515958, "learning_rate": 3.6549614905282724e-06, "loss": 219.8088, "step": 37520 }, { "epoch": 0.7217967025834091, "grad_norm": 390.03219720239906, "learning_rate": 3.6502454333824224e-06, "loss": 217.078, "step": 37530 }, { "epoch": 0.7219890278438896, "grad_norm": 398.9283709306277, "learning_rate": 3.6455317414152803e-06, "loss": 215.0608, "step": 37540 }, { "epoch": 0.7221813531043701, "grad_norm": 386.8608648754265, "learning_rate": 3.640820416382618e-06, "loss": 216.3629, "step": 37550 }, { "epoch": 0.7223736783648507, "grad_norm": 365.46810760796393, "learning_rate": 3.6361114600393242e-06, "loss": 207.1098, "step": 37560 }, { "epoch": 0.7225660036253312, "grad_norm": 409.28456355992006, "learning_rate": 3.6314048741394057e-06, "loss": 219.487, "step": 37570 }, { "epoch": 0.7227583288858117, "grad_norm": 386.7037923787949, "learning_rate": 3.6267006604359943e-06, "loss": 216.4873, "step": 37580 }, { "epoch": 0.7229506541462922, "grad_norm": 407.7254066263435, "learning_rate": 3.6219988206813285e-06, "loss": 221.3888, "step": 37590 }, { "epoch": 0.7231429794067727, "grad_norm": 379.2419560029266, "learning_rate": 3.6172993566267623e-06, "loss": 211.0802, "step": 37600 }, { "epoch": 0.7233353046672533, "grad_norm": 423.416286781411, "learning_rate": 3.6126022700227715e-06, "loss": 215.6808, "step": 37610 }, { "epoch": 0.7235276299277338, "grad_norm": 392.0818348934516, "learning_rate": 3.6079075626189476e-06, "loss": 216.8167, "step": 37620 }, { "epoch": 0.7237199551882143, "grad_norm": 375.6976536586021, "learning_rate": 3.603215236163987e-06, "loss": 209.7872, "step": 37630 }, { "epoch": 0.7239122804486948, "grad_norm": 381.61006376092143, "learning_rate": 3.5985252924057023e-06, "loss": 211.1029, "step": 37640 }, { "epoch": 0.7241046057091753, "grad_norm": 360.58643961335366, "learning_rate": 3.5938377330910245e-06, "loss": 214.0201, "step": 37650 }, { "epoch": 0.7242969309696559, "grad_norm": 389.5572225164136, "learning_rate": 3.5891525599659905e-06, "loss": 214.1953, "step": 37660 }, { "epoch": 0.7244892562301364, "grad_norm": 376.96841726163024, "learning_rate": 3.5844697747757496e-06, "loss": 211.0877, "step": 37670 }, { "epoch": 0.7246815814906169, "grad_norm": 385.9190622298062, "learning_rate": 3.5797893792645577e-06, "loss": 214.5382, "step": 37680 }, { "epoch": 0.7248739067510974, "grad_norm": 441.936096418896, "learning_rate": 3.5751113751757925e-06, "loss": 216.2822, "step": 37690 }, { "epoch": 0.7250662320115779, "grad_norm": 371.7068358969222, "learning_rate": 3.5704357642519295e-06, "loss": 211.2753, "step": 37700 }, { "epoch": 0.7252585572720585, "grad_norm": 439.6478301760872, "learning_rate": 3.5657625482345526e-06, "loss": 212.1111, "step": 37710 }, { "epoch": 0.725450882532539, "grad_norm": 428.51900380435933, "learning_rate": 3.5610917288643655e-06, "loss": 213.3385, "step": 37720 }, { "epoch": 0.7256432077930196, "grad_norm": 432.0612694325686, "learning_rate": 3.556423307881167e-06, "loss": 216.1817, "step": 37730 }, { "epoch": 0.7258355330535001, "grad_norm": 419.2711745131861, "learning_rate": 3.551757287023865e-06, "loss": 216.0665, "step": 37740 }, { "epoch": 0.7260278583139806, "grad_norm": 354.73624102156936, "learning_rate": 3.547093668030479e-06, "loss": 217.3852, "step": 37750 }, { "epoch": 0.7262201835744612, "grad_norm": 411.31539865415704, "learning_rate": 3.542432452638126e-06, "loss": 215.8667, "step": 37760 }, { "epoch": 0.7264125088349417, "grad_norm": 375.5391671951594, "learning_rate": 3.5377736425830366e-06, "loss": 219.6735, "step": 37770 }, { "epoch": 0.7266048340954222, "grad_norm": 445.47570070130257, "learning_rate": 3.5331172396005354e-06, "loss": 209.6886, "step": 37780 }, { "epoch": 0.7267971593559027, "grad_norm": 387.302323914234, "learning_rate": 3.528463245425062e-06, "loss": 220.6175, "step": 37790 }, { "epoch": 0.7269894846163832, "grad_norm": 371.3444822515457, "learning_rate": 3.5238116617901486e-06, "loss": 213.4917, "step": 37800 }, { "epoch": 0.7271818098768638, "grad_norm": 413.97165815655575, "learning_rate": 3.519162490428433e-06, "loss": 220.2051, "step": 37810 }, { "epoch": 0.7273741351373443, "grad_norm": 400.79588450286514, "learning_rate": 3.5145157330716516e-06, "loss": 211.9624, "step": 37820 }, { "epoch": 0.7275664603978248, "grad_norm": 382.4262973156571, "learning_rate": 3.509871391450652e-06, "loss": 220.5928, "step": 37830 }, { "epoch": 0.7277587856583053, "grad_norm": 376.69697537133334, "learning_rate": 3.505229467295371e-06, "loss": 213.096, "step": 37840 }, { "epoch": 0.7279511109187858, "grad_norm": 382.8646927653982, "learning_rate": 3.5005899623348493e-06, "loss": 224.9132, "step": 37850 }, { "epoch": 0.7281434361792664, "grad_norm": 391.33572040404994, "learning_rate": 3.495952878297221e-06, "loss": 217.0161, "step": 37860 }, { "epoch": 0.7283357614397469, "grad_norm": 386.14417599405647, "learning_rate": 3.4913182169097315e-06, "loss": 210.7214, "step": 37870 }, { "epoch": 0.7285280867002274, "grad_norm": 372.181013680401, "learning_rate": 3.4866859798987084e-06, "loss": 212.2185, "step": 37880 }, { "epoch": 0.7287204119607079, "grad_norm": 474.6264707198723, "learning_rate": 3.4820561689895906e-06, "loss": 215.612, "step": 37890 }, { "epoch": 0.7289127372211884, "grad_norm": 416.08600144946297, "learning_rate": 3.4774287859068988e-06, "loss": 209.9914, "step": 37900 }, { "epoch": 0.729105062481669, "grad_norm": 412.40189661700816, "learning_rate": 3.472803832374263e-06, "loss": 207.5467, "step": 37910 }, { "epoch": 0.7292973877421495, "grad_norm": 378.48672886471013, "learning_rate": 3.4681813101144e-06, "loss": 215.2839, "step": 37920 }, { "epoch": 0.7294897130026301, "grad_norm": 369.0745295431209, "learning_rate": 3.4635612208491197e-06, "loss": 217.8814, "step": 37930 }, { "epoch": 0.7296820382631106, "grad_norm": 365.8571157845644, "learning_rate": 3.458943566299334e-06, "loss": 205.6632, "step": 37940 }, { "epoch": 0.7298743635235911, "grad_norm": 396.1648681981586, "learning_rate": 3.454328348185042e-06, "loss": 206.6167, "step": 37950 }, { "epoch": 0.7300666887840717, "grad_norm": 420.6619390886531, "learning_rate": 3.4497155682253314e-06, "loss": 220.5103, "step": 37960 }, { "epoch": 0.7302590140445522, "grad_norm": 370.7848964152775, "learning_rate": 3.4451052281383922e-06, "loss": 208.4737, "step": 37970 }, { "epoch": 0.7304513393050327, "grad_norm": 436.12107681847715, "learning_rate": 3.440497329641499e-06, "loss": 217.1044, "step": 37980 }, { "epoch": 0.7306436645655132, "grad_norm": 405.2681207341782, "learning_rate": 3.435891874451017e-06, "loss": 211.4854, "step": 37990 }, { "epoch": 0.7308359898259937, "grad_norm": 401.76486804390674, "learning_rate": 3.431288864282398e-06, "loss": 211.495, "step": 38000 }, { "epoch": 0.7310283150864743, "grad_norm": 393.80215459274297, "learning_rate": 3.4266883008501937e-06, "loss": 208.8307, "step": 38010 }, { "epoch": 0.7312206403469548, "grad_norm": 403.0547242021537, "learning_rate": 3.4220901858680365e-06, "loss": 219.4676, "step": 38020 }, { "epoch": 0.7314129656074353, "grad_norm": 386.04037789868806, "learning_rate": 3.4174945210486445e-06, "loss": 216.0579, "step": 38030 }, { "epoch": 0.7316052908679158, "grad_norm": 371.44900781854693, "learning_rate": 3.4129013081038285e-06, "loss": 216.7225, "step": 38040 }, { "epoch": 0.7317976161283963, "grad_norm": 372.5616089910205, "learning_rate": 3.40831054874449e-06, "loss": 212.3076, "step": 38050 }, { "epoch": 0.7319899413888769, "grad_norm": 390.34243525406276, "learning_rate": 3.403722244680606e-06, "loss": 211.6561, "step": 38060 }, { "epoch": 0.7321822666493574, "grad_norm": 399.363061548625, "learning_rate": 3.3991363976212423e-06, "loss": 221.2642, "step": 38070 }, { "epoch": 0.7323745919098379, "grad_norm": 390.86162224395724, "learning_rate": 3.394553009274556e-06, "loss": 208.7783, "step": 38080 }, { "epoch": 0.7325669171703184, "grad_norm": 376.030082914757, "learning_rate": 3.389972081347782e-06, "loss": 211.4085, "step": 38090 }, { "epoch": 0.7327592424307989, "grad_norm": 421.28527494798163, "learning_rate": 3.385393615547239e-06, "loss": 221.0359, "step": 38100 }, { "epoch": 0.7329515676912794, "grad_norm": 385.6668608487081, "learning_rate": 3.3808176135783276e-06, "loss": 225.6684, "step": 38110 }, { "epoch": 0.73314389295176, "grad_norm": 395.77734220139604, "learning_rate": 3.3762440771455386e-06, "loss": 217.6572, "step": 38120 }, { "epoch": 0.7333362182122405, "grad_norm": 360.22850059152927, "learning_rate": 3.371673007952435e-06, "loss": 211.7321, "step": 38130 }, { "epoch": 0.7335285434727211, "grad_norm": 451.19564657478725, "learning_rate": 3.3671044077016634e-06, "loss": 223.9502, "step": 38140 }, { "epoch": 0.7337208687332016, "grad_norm": 377.72615175686684, "learning_rate": 3.3625382780949576e-06, "loss": 217.7665, "step": 38150 }, { "epoch": 0.7339131939936822, "grad_norm": 415.1236291122088, "learning_rate": 3.357974620833121e-06, "loss": 216.6595, "step": 38160 }, { "epoch": 0.7341055192541627, "grad_norm": 400.53581474755936, "learning_rate": 3.353413437616039e-06, "loss": 218.0245, "step": 38170 }, { "epoch": 0.7342978445146432, "grad_norm": 404.45658003650885, "learning_rate": 3.3488547301426786e-06, "loss": 203.9033, "step": 38180 }, { "epoch": 0.7344901697751237, "grad_norm": 405.42808882473145, "learning_rate": 3.344298500111087e-06, "loss": 213.4934, "step": 38190 }, { "epoch": 0.7346824950356042, "grad_norm": 385.6353083140758, "learning_rate": 3.3397447492183833e-06, "loss": 213.109, "step": 38200 }, { "epoch": 0.7348748202960848, "grad_norm": 395.50540149928486, "learning_rate": 3.3351934791607576e-06, "loss": 215.3671, "step": 38210 }, { "epoch": 0.7350671455565653, "grad_norm": 396.4688833152432, "learning_rate": 3.330644691633492e-06, "loss": 213.5135, "step": 38220 }, { "epoch": 0.7352594708170458, "grad_norm": 416.1352364224095, "learning_rate": 3.3260983883309306e-06, "loss": 220.9658, "step": 38230 }, { "epoch": 0.7354517960775263, "grad_norm": 396.2827182497824, "learning_rate": 3.321554570946497e-06, "loss": 211.0146, "step": 38240 }, { "epoch": 0.7356441213380068, "grad_norm": 392.59965837030563, "learning_rate": 3.317013241172684e-06, "loss": 213.7918, "step": 38250 }, { "epoch": 0.7358364465984873, "grad_norm": 377.3783215380771, "learning_rate": 3.3124744007010688e-06, "loss": 211.642, "step": 38260 }, { "epoch": 0.7360287718589679, "grad_norm": 407.9626386487798, "learning_rate": 3.3079380512222904e-06, "loss": 213.8329, "step": 38270 }, { "epoch": 0.7362210971194484, "grad_norm": 399.81884197970874, "learning_rate": 3.3034041944260654e-06, "loss": 214.722, "step": 38280 }, { "epoch": 0.7364134223799289, "grad_norm": 400.74149208559845, "learning_rate": 3.2988728320011774e-06, "loss": 211.8562, "step": 38290 }, { "epoch": 0.7366057476404094, "grad_norm": 398.42200787287084, "learning_rate": 3.294343965635489e-06, "loss": 212.6863, "step": 38300 }, { "epoch": 0.7367980729008899, "grad_norm": 360.8047174002981, "learning_rate": 3.289817597015923e-06, "loss": 206.4632, "step": 38310 }, { "epoch": 0.7369903981613705, "grad_norm": 420.24950274864705, "learning_rate": 3.2852937278284837e-06, "loss": 216.2751, "step": 38320 }, { "epoch": 0.737182723421851, "grad_norm": 413.1067219285103, "learning_rate": 3.28077235975823e-06, "loss": 212.4115, "step": 38330 }, { "epoch": 0.7373750486823316, "grad_norm": 393.10881815630324, "learning_rate": 3.2762534944893033e-06, "loss": 216.4406, "step": 38340 }, { "epoch": 0.7375673739428121, "grad_norm": 362.0353279532254, "learning_rate": 3.271737133704904e-06, "loss": 208.488, "step": 38350 }, { "epoch": 0.7377596992032927, "grad_norm": 409.13149928697777, "learning_rate": 3.2672232790872983e-06, "loss": 214.1322, "step": 38360 }, { "epoch": 0.7379520244637732, "grad_norm": 398.8009694634766, "learning_rate": 3.262711932317828e-06, "loss": 216.5977, "step": 38370 }, { "epoch": 0.7381443497242537, "grad_norm": 397.3899555932812, "learning_rate": 3.258203095076894e-06, "loss": 212.8879, "step": 38380 }, { "epoch": 0.7383366749847342, "grad_norm": 423.94228008383357, "learning_rate": 3.2536967690439592e-06, "loss": 222.6395, "step": 38390 }, { "epoch": 0.7385290002452147, "grad_norm": 366.08395855529665, "learning_rate": 3.249192955897562e-06, "loss": 213.8753, "step": 38400 }, { "epoch": 0.7387213255056952, "grad_norm": 382.8523485325907, "learning_rate": 3.2446916573152955e-06, "loss": 214.5685, "step": 38410 }, { "epoch": 0.7389136507661758, "grad_norm": 370.92386982816544, "learning_rate": 3.24019287497382e-06, "loss": 210.0728, "step": 38420 }, { "epoch": 0.7391059760266563, "grad_norm": 386.27770105026764, "learning_rate": 3.235696610548852e-06, "loss": 206.765, "step": 38430 }, { "epoch": 0.7392983012871368, "grad_norm": 377.6579108413092, "learning_rate": 3.231202865715184e-06, "loss": 215.869, "step": 38440 }, { "epoch": 0.7394906265476173, "grad_norm": 364.18110165419216, "learning_rate": 3.226711642146655e-06, "loss": 221.5153, "step": 38450 }, { "epoch": 0.7396829518080978, "grad_norm": 366.34314623436023, "learning_rate": 3.222222941516179e-06, "loss": 211.9042, "step": 38460 }, { "epoch": 0.7398752770685784, "grad_norm": 372.8199171418252, "learning_rate": 3.2177367654957137e-06, "loss": 205.8132, "step": 38470 }, { "epoch": 0.7400676023290589, "grad_norm": 412.9839015956195, "learning_rate": 3.213253115756295e-06, "loss": 213.2388, "step": 38480 }, { "epoch": 0.7402599275895394, "grad_norm": 384.3722887546716, "learning_rate": 3.208771993968003e-06, "loss": 213.9061, "step": 38490 }, { "epoch": 0.7404522528500199, "grad_norm": 432.2626937087747, "learning_rate": 3.2042934017999795e-06, "loss": 209.8503, "step": 38500 }, { "epoch": 0.7406445781105004, "grad_norm": 368.4995964807424, "learning_rate": 3.1998173409204326e-06, "loss": 211.2958, "step": 38510 }, { "epoch": 0.740836903370981, "grad_norm": 411.90924050817864, "learning_rate": 3.1953438129966175e-06, "loss": 221.98, "step": 38520 }, { "epoch": 0.7410292286314615, "grad_norm": 399.9467861844467, "learning_rate": 3.190872819694849e-06, "loss": 220.5172, "step": 38530 }, { "epoch": 0.741221553891942, "grad_norm": 362.32565209940924, "learning_rate": 3.1864043626804953e-06, "loss": 207.5476, "step": 38540 }, { "epoch": 0.7414138791524226, "grad_norm": 387.2613273555262, "learning_rate": 3.18193844361799e-06, "loss": 206.5205, "step": 38550 }, { "epoch": 0.7416062044129031, "grad_norm": 381.62547798058756, "learning_rate": 3.1774750641708095e-06, "loss": 209.4551, "step": 38560 }, { "epoch": 0.7417985296733837, "grad_norm": 400.43162824113284, "learning_rate": 3.1730142260014875e-06, "loss": 213.3418, "step": 38570 }, { "epoch": 0.7419908549338642, "grad_norm": 399.7989303182746, "learning_rate": 3.1685559307716187e-06, "loss": 210.5069, "step": 38580 }, { "epoch": 0.7421831801943447, "grad_norm": 404.96576419568987, "learning_rate": 3.164100180141839e-06, "loss": 212.9981, "step": 38590 }, { "epoch": 0.7423755054548252, "grad_norm": 399.1935336954331, "learning_rate": 3.159646975771842e-06, "loss": 216.6827, "step": 38600 }, { "epoch": 0.7425678307153057, "grad_norm": 414.73239676081874, "learning_rate": 3.155196319320374e-06, "loss": 220.2067, "step": 38610 }, { "epoch": 0.7427601559757863, "grad_norm": 407.57386494826847, "learning_rate": 3.1507482124452337e-06, "loss": 217.2531, "step": 38620 }, { "epoch": 0.7429524812362668, "grad_norm": 401.86931011984944, "learning_rate": 3.146302656803266e-06, "loss": 209.7045, "step": 38630 }, { "epoch": 0.7431448064967473, "grad_norm": 388.65518628770843, "learning_rate": 3.1418596540503653e-06, "loss": 209.4193, "step": 38640 }, { "epoch": 0.7433371317572278, "grad_norm": 404.19009529074054, "learning_rate": 3.1374192058414755e-06, "loss": 220.9851, "step": 38650 }, { "epoch": 0.7435294570177083, "grad_norm": 392.35560514186375, "learning_rate": 3.1329813138305944e-06, "loss": 216.9903, "step": 38660 }, { "epoch": 0.7437217822781889, "grad_norm": 378.99140078284177, "learning_rate": 3.128545979670762e-06, "loss": 205.9095, "step": 38670 }, { "epoch": 0.7439141075386694, "grad_norm": 356.97991826053993, "learning_rate": 3.124113205014063e-06, "loss": 209.2848, "step": 38680 }, { "epoch": 0.7441064327991499, "grad_norm": 409.0252085946919, "learning_rate": 3.119682991511639e-06, "loss": 214.9606, "step": 38690 }, { "epoch": 0.7442987580596304, "grad_norm": 376.6255874008177, "learning_rate": 3.1152553408136686e-06, "loss": 211.097, "step": 38700 }, { "epoch": 0.7444910833201109, "grad_norm": 423.33406546829696, "learning_rate": 3.110830254569378e-06, "loss": 214.8367, "step": 38710 }, { "epoch": 0.7446834085805915, "grad_norm": 376.6874226133999, "learning_rate": 3.106407734427037e-06, "loss": 214.7831, "step": 38720 }, { "epoch": 0.744875733841072, "grad_norm": 418.0899045821614, "learning_rate": 3.101987782033966e-06, "loss": 214.9129, "step": 38730 }, { "epoch": 0.7450680591015525, "grad_norm": 400.30569583700344, "learning_rate": 3.097570399036519e-06, "loss": 213.6869, "step": 38740 }, { "epoch": 0.7452603843620331, "grad_norm": 383.42602546053075, "learning_rate": 3.0931555870801033e-06, "loss": 206.9788, "step": 38750 }, { "epoch": 0.7454527096225136, "grad_norm": 356.1900325540024, "learning_rate": 3.0887433478091587e-06, "loss": 210.5475, "step": 38760 }, { "epoch": 0.7456450348829942, "grad_norm": 367.03826476126704, "learning_rate": 3.0843336828671765e-06, "loss": 211.6151, "step": 38770 }, { "epoch": 0.7458373601434747, "grad_norm": 413.7632781708218, "learning_rate": 3.079926593896683e-06, "loss": 210.51, "step": 38780 }, { "epoch": 0.7460296854039552, "grad_norm": 399.30631891094725, "learning_rate": 3.0755220825392397e-06, "loss": 208.5259, "step": 38790 }, { "epoch": 0.7462220106644357, "grad_norm": 405.0131366510525, "learning_rate": 3.0711201504354628e-06, "loss": 212.0677, "step": 38800 }, { "epoch": 0.7464143359249162, "grad_norm": 360.94292117850114, "learning_rate": 3.0667207992249948e-06, "loss": 214.5323, "step": 38810 }, { "epoch": 0.7466066611853968, "grad_norm": 385.09193853354145, "learning_rate": 3.062324030546523e-06, "loss": 213.5694, "step": 38820 }, { "epoch": 0.7467989864458773, "grad_norm": 398.3778364003248, "learning_rate": 3.057929846037767e-06, "loss": 209.2806, "step": 38830 }, { "epoch": 0.7469913117063578, "grad_norm": 742.5675863119249, "learning_rate": 3.0535382473354945e-06, "loss": 214.8567, "step": 38840 }, { "epoch": 0.7471836369668383, "grad_norm": 408.47781024558793, "learning_rate": 3.0491492360755003e-06, "loss": 214.0556, "step": 38850 }, { "epoch": 0.7473759622273188, "grad_norm": 421.4223228558872, "learning_rate": 3.0447628138926153e-06, "loss": 205.1925, "step": 38860 }, { "epoch": 0.7475682874877994, "grad_norm": 402.6709587511956, "learning_rate": 3.0403789824207165e-06, "loss": 206.5705, "step": 38870 }, { "epoch": 0.7477606127482799, "grad_norm": 399.43451685824147, "learning_rate": 3.0359977432927013e-06, "loss": 209.7825, "step": 38880 }, { "epoch": 0.7479529380087604, "grad_norm": 378.2326058062348, "learning_rate": 3.0316190981405147e-06, "loss": 209.3171, "step": 38890 }, { "epoch": 0.7481452632692409, "grad_norm": 383.2054001914671, "learning_rate": 3.0272430485951244e-06, "loss": 205.747, "step": 38900 }, { "epoch": 0.7483375885297214, "grad_norm": 381.85301956063654, "learning_rate": 3.0228695962865438e-06, "loss": 213.7018, "step": 38910 }, { "epoch": 0.748529913790202, "grad_norm": 381.84347906420965, "learning_rate": 3.018498742843806e-06, "loss": 215.2569, "step": 38920 }, { "epoch": 0.7487222390506825, "grad_norm": 389.72086948502, "learning_rate": 3.014130489894982e-06, "loss": 212.5456, "step": 38930 }, { "epoch": 0.748914564311163, "grad_norm": 381.69047349620934, "learning_rate": 3.0097648390671765e-06, "loss": 205.8806, "step": 38940 }, { "epoch": 0.7491068895716435, "grad_norm": 395.4374140124631, "learning_rate": 3.005401791986522e-06, "loss": 217.4661, "step": 38950 }, { "epoch": 0.7492992148321241, "grad_norm": 412.10893285433343, "learning_rate": 3.00104135027818e-06, "loss": 216.992, "step": 38960 }, { "epoch": 0.7494915400926047, "grad_norm": 384.5676629741645, "learning_rate": 2.99668351556634e-06, "loss": 215.7178, "step": 38970 }, { "epoch": 0.7496838653530852, "grad_norm": 387.03736723957786, "learning_rate": 2.99232828947423e-06, "loss": 215.4647, "step": 38980 }, { "epoch": 0.7498761906135657, "grad_norm": 394.12633978471496, "learning_rate": 2.987975673624096e-06, "loss": 208.3384, "step": 38990 }, { "epoch": 0.7500685158740462, "grad_norm": 406.5301865142197, "learning_rate": 2.9836256696372178e-06, "loss": 216.6155, "step": 39000 }, { "epoch": 0.7502608411345267, "grad_norm": 382.50716260252875, "learning_rate": 2.9792782791338936e-06, "loss": 213.395, "step": 39010 }, { "epoch": 0.7504531663950073, "grad_norm": 385.7269047709961, "learning_rate": 2.9749335037334604e-06, "loss": 214.8669, "step": 39020 }, { "epoch": 0.7506454916554878, "grad_norm": 375.7249782620801, "learning_rate": 2.9705913450542777e-06, "loss": 207.3446, "step": 39030 }, { "epoch": 0.7508378169159683, "grad_norm": 395.05859468248786, "learning_rate": 2.9662518047137214e-06, "loss": 209.4555, "step": 39040 }, { "epoch": 0.7510301421764488, "grad_norm": 436.94216794977086, "learning_rate": 2.961914884328203e-06, "loss": 213.5024, "step": 39050 }, { "epoch": 0.7512224674369293, "grad_norm": 376.9192390926667, "learning_rate": 2.9575805855131546e-06, "loss": 211.8836, "step": 39060 }, { "epoch": 0.7514147926974099, "grad_norm": 360.944377478836, "learning_rate": 2.9532489098830274e-06, "loss": 204.9103, "step": 39070 }, { "epoch": 0.7516071179578904, "grad_norm": 391.2692282582815, "learning_rate": 2.9489198590512967e-06, "loss": 210.6938, "step": 39080 }, { "epoch": 0.7517994432183709, "grad_norm": 380.3382717877565, "learning_rate": 2.9445934346304706e-06, "loss": 212.9648, "step": 39090 }, { "epoch": 0.7519917684788514, "grad_norm": 375.75944537292645, "learning_rate": 2.940269638232065e-06, "loss": 211.1945, "step": 39100 }, { "epoch": 0.7521840937393319, "grad_norm": 370.8254249722766, "learning_rate": 2.935948471466622e-06, "loss": 208.9388, "step": 39110 }, { "epoch": 0.7523764189998124, "grad_norm": 375.93976811394225, "learning_rate": 2.9316299359437085e-06, "loss": 210.7232, "step": 39120 }, { "epoch": 0.752568744260293, "grad_norm": 388.6608756366859, "learning_rate": 2.9273140332719064e-06, "loss": 209.3444, "step": 39130 }, { "epoch": 0.7527610695207735, "grad_norm": 394.8411038032195, "learning_rate": 2.923000765058818e-06, "loss": 211.9417, "step": 39140 }, { "epoch": 0.752953394781254, "grad_norm": 357.8108760822515, "learning_rate": 2.9186901329110605e-06, "loss": 206.0361, "step": 39150 }, { "epoch": 0.7531457200417346, "grad_norm": 411.33338976079426, "learning_rate": 2.9143821384342808e-06, "loss": 208.3241, "step": 39160 }, { "epoch": 0.7533380453022152, "grad_norm": 406.8763755050772, "learning_rate": 2.9100767832331277e-06, "loss": 209.0823, "step": 39170 }, { "epoch": 0.7535303705626957, "grad_norm": 404.6217829369774, "learning_rate": 2.9057740689112822e-06, "loss": 216.5944, "step": 39180 }, { "epoch": 0.7537226958231762, "grad_norm": 395.94485031572174, "learning_rate": 2.901473997071428e-06, "loss": 210.6494, "step": 39190 }, { "epoch": 0.7539150210836567, "grad_norm": 364.8912713794418, "learning_rate": 2.8971765693152767e-06, "loss": 209.9009, "step": 39200 }, { "epoch": 0.7541073463441372, "grad_norm": 368.61475603258856, "learning_rate": 2.8928817872435465e-06, "loss": 209.4373, "step": 39210 }, { "epoch": 0.7542996716046177, "grad_norm": 383.7738753542534, "learning_rate": 2.8885896524559696e-06, "loss": 213.8918, "step": 39220 }, { "epoch": 0.7544919968650983, "grad_norm": 396.9673238524041, "learning_rate": 2.8843001665513016e-06, "loss": 211.1548, "step": 39230 }, { "epoch": 0.7546843221255788, "grad_norm": 398.0556002414846, "learning_rate": 2.8800133311273016e-06, "loss": 217.4401, "step": 39240 }, { "epoch": 0.7548766473860593, "grad_norm": 407.99467701478494, "learning_rate": 2.875729147780745e-06, "loss": 214.796, "step": 39250 }, { "epoch": 0.7550689726465398, "grad_norm": 430.7337783857894, "learning_rate": 2.871447618107417e-06, "loss": 210.8239, "step": 39260 }, { "epoch": 0.7552612979070203, "grad_norm": 395.9840164375906, "learning_rate": 2.867168743702122e-06, "loss": 206.1601, "step": 39270 }, { "epoch": 0.7554536231675009, "grad_norm": 409.001009160928, "learning_rate": 2.8628925261586683e-06, "loss": 216.4979, "step": 39280 }, { "epoch": 0.7556459484279814, "grad_norm": 430.96511143901336, "learning_rate": 2.8586189670698717e-06, "loss": 212.8584, "step": 39290 }, { "epoch": 0.7558382736884619, "grad_norm": 369.7596818222909, "learning_rate": 2.854348068027568e-06, "loss": 209.4182, "step": 39300 }, { "epoch": 0.7560305989489424, "grad_norm": 402.3889795623061, "learning_rate": 2.850079830622593e-06, "loss": 215.6611, "step": 39310 }, { "epoch": 0.7562229242094229, "grad_norm": 398.99672659650656, "learning_rate": 2.845814256444799e-06, "loss": 221.0187, "step": 39320 }, { "epoch": 0.7564152494699035, "grad_norm": 367.41047924702616, "learning_rate": 2.8415513470830357e-06, "loss": 218.1719, "step": 39330 }, { "epoch": 0.756607574730384, "grad_norm": 406.3223290528226, "learning_rate": 2.837291104125174e-06, "loss": 215.5918, "step": 39340 }, { "epoch": 0.7567998999908645, "grad_norm": 363.89980218495265, "learning_rate": 2.833033529158079e-06, "loss": 208.7063, "step": 39350 }, { "epoch": 0.7569922252513451, "grad_norm": 400.34670914874675, "learning_rate": 2.8287786237676253e-06, "loss": 219.3123, "step": 39360 }, { "epoch": 0.7571845505118256, "grad_norm": 389.73127859276985, "learning_rate": 2.824526389538701e-06, "loss": 216.983, "step": 39370 }, { "epoch": 0.7573768757723062, "grad_norm": 385.61119778823263, "learning_rate": 2.8202768280551894e-06, "loss": 208.8682, "step": 39380 }, { "epoch": 0.7575692010327867, "grad_norm": 402.8467762543396, "learning_rate": 2.8160299408999827e-06, "loss": 219.3151, "step": 39390 }, { "epoch": 0.7577615262932672, "grad_norm": 388.6695812007182, "learning_rate": 2.811785729654972e-06, "loss": 213.4402, "step": 39400 }, { "epoch": 0.7579538515537477, "grad_norm": 433.95926371201625, "learning_rate": 2.8075441959010628e-06, "loss": 209.4541, "step": 39410 }, { "epoch": 0.7581461768142282, "grad_norm": 389.98800289470756, "learning_rate": 2.8033053412181543e-06, "loss": 209.9189, "step": 39420 }, { "epoch": 0.7583385020747088, "grad_norm": 350.80924189818757, "learning_rate": 2.799069167185148e-06, "loss": 199.8208, "step": 39430 }, { "epoch": 0.7585308273351893, "grad_norm": 390.00383181463206, "learning_rate": 2.7948356753799466e-06, "loss": 219.3106, "step": 39440 }, { "epoch": 0.7587231525956698, "grad_norm": 389.23729622559796, "learning_rate": 2.7906048673794593e-06, "loss": 209.4522, "step": 39450 }, { "epoch": 0.7589154778561503, "grad_norm": 372.12571019976934, "learning_rate": 2.7863767447595946e-06, "loss": 215.2538, "step": 39460 }, { "epoch": 0.7591078031166308, "grad_norm": 368.5555867845163, "learning_rate": 2.7821513090952523e-06, "loss": 208.9072, "step": 39470 }, { "epoch": 0.7593001283771114, "grad_norm": 400.9093183377035, "learning_rate": 2.7779285619603446e-06, "loss": 211.3843, "step": 39480 }, { "epoch": 0.7594924536375919, "grad_norm": 362.03232664261355, "learning_rate": 2.77370850492777e-06, "loss": 224.0575, "step": 39490 }, { "epoch": 0.7596847788980724, "grad_norm": 412.70488746760253, "learning_rate": 2.7694911395694324e-06, "loss": 212.1221, "step": 39500 }, { "epoch": 0.7598771041585529, "grad_norm": 383.4012591281291, "learning_rate": 2.765276467456225e-06, "loss": 215.0577, "step": 39510 }, { "epoch": 0.7600694294190334, "grad_norm": 426.3263710762801, "learning_rate": 2.761064490158052e-06, "loss": 213.2636, "step": 39520 }, { "epoch": 0.760261754679514, "grad_norm": 379.15038772281486, "learning_rate": 2.7568552092438018e-06, "loss": 211.3147, "step": 39530 }, { "epoch": 0.7604540799399945, "grad_norm": 361.5405567790523, "learning_rate": 2.7526486262813578e-06, "loss": 209.4867, "step": 39540 }, { "epoch": 0.760646405200475, "grad_norm": 406.0292213017384, "learning_rate": 2.7484447428376094e-06, "loss": 204.6452, "step": 39550 }, { "epoch": 0.7608387304609555, "grad_norm": 396.3097126086856, "learning_rate": 2.7442435604784313e-06, "loss": 213.2015, "step": 39560 }, { "epoch": 0.7610310557214361, "grad_norm": 418.29638892861743, "learning_rate": 2.740045080768694e-06, "loss": 211.9402, "step": 39570 }, { "epoch": 0.7612233809819167, "grad_norm": 379.33138573699574, "learning_rate": 2.7358493052722603e-06, "loss": 208.9075, "step": 39580 }, { "epoch": 0.7614157062423972, "grad_norm": 438.1353530896628, "learning_rate": 2.7316562355519904e-06, "loss": 209.6229, "step": 39590 }, { "epoch": 0.7616080315028777, "grad_norm": 429.43310704191356, "learning_rate": 2.727465873169729e-06, "loss": 211.5358, "step": 39600 }, { "epoch": 0.7618003567633582, "grad_norm": 375.30528783560396, "learning_rate": 2.723278219686324e-06, "loss": 211.6839, "step": 39610 }, { "epoch": 0.7619926820238387, "grad_norm": 389.7426065337725, "learning_rate": 2.7190932766615998e-06, "loss": 213.2397, "step": 39620 }, { "epoch": 0.7621850072843193, "grad_norm": 361.26565453677784, "learning_rate": 2.714911045654385e-06, "loss": 205.8367, "step": 39630 }, { "epoch": 0.7623773325447998, "grad_norm": 410.2427715654697, "learning_rate": 2.7107315282224878e-06, "loss": 209.0607, "step": 39640 }, { "epoch": 0.7625696578052803, "grad_norm": 389.6081665585904, "learning_rate": 2.7065547259227078e-06, "loss": 213.5682, "step": 39650 }, { "epoch": 0.7627619830657608, "grad_norm": 414.38171683702114, "learning_rate": 2.7023806403108397e-06, "loss": 215.9959, "step": 39660 }, { "epoch": 0.7629543083262413, "grad_norm": 373.52675098944684, "learning_rate": 2.698209272941659e-06, "loss": 206.277, "step": 39670 }, { "epoch": 0.7631466335867219, "grad_norm": 427.9547862737436, "learning_rate": 2.694040625368931e-06, "loss": 220.976, "step": 39680 }, { "epoch": 0.7633389588472024, "grad_norm": 394.99758024115886, "learning_rate": 2.689874699145405e-06, "loss": 208.6515, "step": 39690 }, { "epoch": 0.7635312841076829, "grad_norm": 372.7943856847385, "learning_rate": 2.685711495822827e-06, "loss": 208.8916, "step": 39700 }, { "epoch": 0.7637236093681634, "grad_norm": 395.39271770215197, "learning_rate": 2.6815510169519164e-06, "loss": 207.7454, "step": 39710 }, { "epoch": 0.7639159346286439, "grad_norm": 386.7147699847869, "learning_rate": 2.677393264082381e-06, "loss": 220.2939, "step": 39720 }, { "epoch": 0.7641082598891245, "grad_norm": 352.90171295598793, "learning_rate": 2.673238238762921e-06, "loss": 215.5709, "step": 39730 }, { "epoch": 0.764300585149605, "grad_norm": 412.79674189734624, "learning_rate": 2.6690859425412075e-06, "loss": 211.8218, "step": 39740 }, { "epoch": 0.7644929104100855, "grad_norm": 440.5740240739491, "learning_rate": 2.6649363769639103e-06, "loss": 221.7515, "step": 39750 }, { "epoch": 0.764685235670566, "grad_norm": 401.37888801809254, "learning_rate": 2.660789543576667e-06, "loss": 211.859, "step": 39760 }, { "epoch": 0.7648775609310466, "grad_norm": 369.5282264370912, "learning_rate": 2.6566454439241107e-06, "loss": 213.5541, "step": 39770 }, { "epoch": 0.7650698861915272, "grad_norm": 397.52046623148067, "learning_rate": 2.652504079549848e-06, "loss": 218.0958, "step": 39780 }, { "epoch": 0.7652622114520077, "grad_norm": 369.68359955230426, "learning_rate": 2.648365451996466e-06, "loss": 213.4677, "step": 39790 }, { "epoch": 0.7654545367124882, "grad_norm": 358.15209748122936, "learning_rate": 2.6442295628055346e-06, "loss": 205.2239, "step": 39800 }, { "epoch": 0.7656468619729687, "grad_norm": 382.9311218183089, "learning_rate": 2.64009641351761e-06, "loss": 208.5467, "step": 39810 }, { "epoch": 0.7658391872334492, "grad_norm": 362.4926086620384, "learning_rate": 2.635966005672218e-06, "loss": 211.9969, "step": 39820 }, { "epoch": 0.7660315124939298, "grad_norm": 391.845281686503, "learning_rate": 2.631838340807865e-06, "loss": 215.461, "step": 39830 }, { "epoch": 0.7662238377544103, "grad_norm": 389.38735272616975, "learning_rate": 2.6277134204620436e-06, "loss": 211.6494, "step": 39840 }, { "epoch": 0.7664161630148908, "grad_norm": 431.8984457834533, "learning_rate": 2.6235912461712167e-06, "loss": 218.689, "step": 39850 }, { "epoch": 0.7666084882753713, "grad_norm": 402.3679808093642, "learning_rate": 2.619471819470821e-06, "loss": 209.1339, "step": 39860 }, { "epoch": 0.7668008135358518, "grad_norm": 371.1726689792626, "learning_rate": 2.6153551418952827e-06, "loss": 206.0153, "step": 39870 }, { "epoch": 0.7669931387963324, "grad_norm": 372.4524033856654, "learning_rate": 2.6112412149779888e-06, "loss": 212.4483, "step": 39880 }, { "epoch": 0.7671854640568129, "grad_norm": 390.2470029755241, "learning_rate": 2.6071300402513165e-06, "loss": 213.3157, "step": 39890 }, { "epoch": 0.7673777893172934, "grad_norm": 395.5268433720969, "learning_rate": 2.603021619246604e-06, "loss": 214.1548, "step": 39900 }, { "epoch": 0.7675701145777739, "grad_norm": 374.38766170504414, "learning_rate": 2.5989159534941768e-06, "loss": 210.0371, "step": 39910 }, { "epoch": 0.7677624398382544, "grad_norm": 348.97056465125155, "learning_rate": 2.5948130445233232e-06, "loss": 210.1506, "step": 39920 }, { "epoch": 0.767954765098735, "grad_norm": 385.74405268303934, "learning_rate": 2.5907128938623093e-06, "loss": 211.9513, "step": 39930 }, { "epoch": 0.7681470903592155, "grad_norm": 423.85196996392256, "learning_rate": 2.5866155030383722e-06, "loss": 217.6935, "step": 39940 }, { "epoch": 0.768339415619696, "grad_norm": 407.2722872398317, "learning_rate": 2.582520873577726e-06, "loss": 206.3344, "step": 39950 }, { "epoch": 0.7685317408801765, "grad_norm": 352.25993822206067, "learning_rate": 2.578429007005552e-06, "loss": 210.1658, "step": 39960 }, { "epoch": 0.768724066140657, "grad_norm": 386.0638546789341, "learning_rate": 2.5743399048460004e-06, "loss": 215.3121, "step": 39970 }, { "epoch": 0.7689163914011377, "grad_norm": 369.5798666043826, "learning_rate": 2.570253568622193e-06, "loss": 211.6251, "step": 39980 }, { "epoch": 0.7691087166616182, "grad_norm": 374.4656310115537, "learning_rate": 2.5661699998562286e-06, "loss": 212.1195, "step": 39990 }, { "epoch": 0.7693010419220987, "grad_norm": 409.6320917581006, "learning_rate": 2.5620892000691643e-06, "loss": 220.7276, "step": 40000 }, { "epoch": 0.7694933671825792, "grad_norm": 399.7185184456794, "learning_rate": 2.5580111707810296e-06, "loss": 205.4715, "step": 40010 }, { "epoch": 0.7696856924430597, "grad_norm": 417.77311432139237, "learning_rate": 2.5539359135108244e-06, "loss": 212.5975, "step": 40020 }, { "epoch": 0.7698780177035403, "grad_norm": 357.71082793451444, "learning_rate": 2.549863429776519e-06, "loss": 207.3461, "step": 40030 }, { "epoch": 0.7700703429640208, "grad_norm": 381.80855188560895, "learning_rate": 2.5457937210950433e-06, "loss": 211.3806, "step": 40040 }, { "epoch": 0.7702626682245013, "grad_norm": 389.6639341252145, "learning_rate": 2.541726788982294e-06, "loss": 210.348, "step": 40050 }, { "epoch": 0.7704549934849818, "grad_norm": 377.51986488485994, "learning_rate": 2.5376626349531395e-06, "loss": 211.5141, "step": 40060 }, { "epoch": 0.7706473187454623, "grad_norm": 376.46020036282295, "learning_rate": 2.53360126052141e-06, "loss": 215.3596, "step": 40070 }, { "epoch": 0.7708396440059428, "grad_norm": 391.33115229090686, "learning_rate": 2.529542667199896e-06, "loss": 215.8145, "step": 40080 }, { "epoch": 0.7710319692664234, "grad_norm": 403.976553907104, "learning_rate": 2.525486856500363e-06, "loss": 208.0852, "step": 40090 }, { "epoch": 0.7712242945269039, "grad_norm": 362.49665409997715, "learning_rate": 2.5214338299335306e-06, "loss": 211.3493, "step": 40100 }, { "epoch": 0.7714166197873844, "grad_norm": 366.49316080226913, "learning_rate": 2.5173835890090826e-06, "loss": 214.2764, "step": 40110 }, { "epoch": 0.7716089450478649, "grad_norm": 404.7037842540386, "learning_rate": 2.5133361352356666e-06, "loss": 209.8697, "step": 40120 }, { "epoch": 0.7718012703083454, "grad_norm": 361.8774100387627, "learning_rate": 2.5092914701208958e-06, "loss": 204.0543, "step": 40130 }, { "epoch": 0.771993595568826, "grad_norm": 356.3596220835635, "learning_rate": 2.5052495951713406e-06, "loss": 210.4549, "step": 40140 }, { "epoch": 0.7721859208293065, "grad_norm": 358.213439768463, "learning_rate": 2.5012105118925267e-06, "loss": 208.4516, "step": 40150 }, { "epoch": 0.772378246089787, "grad_norm": 399.55786379734417, "learning_rate": 2.497174221788955e-06, "loss": 208.3724, "step": 40160 }, { "epoch": 0.7725705713502675, "grad_norm": 405.0462877160036, "learning_rate": 2.4931407263640683e-06, "loss": 210.5768, "step": 40170 }, { "epoch": 0.7727628966107482, "grad_norm": 393.1726569261335, "learning_rate": 2.489110027120285e-06, "loss": 213.4158, "step": 40180 }, { "epoch": 0.7729552218712287, "grad_norm": 374.1379140084526, "learning_rate": 2.4850821255589664e-06, "loss": 208.5962, "step": 40190 }, { "epoch": 0.7731475471317092, "grad_norm": 353.74974249857166, "learning_rate": 2.4810570231804463e-06, "loss": 217.174, "step": 40200 }, { "epoch": 0.7733398723921897, "grad_norm": 379.0423595891199, "learning_rate": 2.4770347214840063e-06, "loss": 208.6815, "step": 40210 }, { "epoch": 0.7735321976526702, "grad_norm": 352.99146402263244, "learning_rate": 2.473015221967886e-06, "loss": 207.484, "step": 40220 }, { "epoch": 0.7737245229131507, "grad_norm": 421.33861937517287, "learning_rate": 2.4689985261292805e-06, "loss": 208.2637, "step": 40230 }, { "epoch": 0.7739168481736313, "grad_norm": 407.46918761914634, "learning_rate": 2.464984635464348e-06, "loss": 208.6252, "step": 40240 }, { "epoch": 0.7741091734341118, "grad_norm": 354.16983107885034, "learning_rate": 2.460973551468194e-06, "loss": 198.5188, "step": 40250 }, { "epoch": 0.7743014986945923, "grad_norm": 393.1502003953757, "learning_rate": 2.456965275634878e-06, "loss": 214.0779, "step": 40260 }, { "epoch": 0.7744938239550728, "grad_norm": 370.7356071511445, "learning_rate": 2.4529598094574226e-06, "loss": 213.3693, "step": 40270 }, { "epoch": 0.7746861492155533, "grad_norm": 380.25155597419234, "learning_rate": 2.4489571544277944e-06, "loss": 207.8812, "step": 40280 }, { "epoch": 0.7748784744760339, "grad_norm": 384.6823808337117, "learning_rate": 2.444957312036914e-06, "loss": 216.7433, "step": 40290 }, { "epoch": 0.7750707997365144, "grad_norm": 373.41206974329594, "learning_rate": 2.4409602837746625e-06, "loss": 206.5674, "step": 40300 }, { "epoch": 0.7752631249969949, "grad_norm": 361.24037894315006, "learning_rate": 2.4369660711298603e-06, "loss": 206.8981, "step": 40310 }, { "epoch": 0.7754554502574754, "grad_norm": 401.287058424949, "learning_rate": 2.4329746755902917e-06, "loss": 217.8212, "step": 40320 }, { "epoch": 0.7756477755179559, "grad_norm": 396.7202974685465, "learning_rate": 2.428986098642684e-06, "loss": 207.4115, "step": 40330 }, { "epoch": 0.7758401007784365, "grad_norm": 380.77805355964557, "learning_rate": 2.425000341772711e-06, "loss": 209.3615, "step": 40340 }, { "epoch": 0.776032426038917, "grad_norm": 400.86785566184125, "learning_rate": 2.4210174064650084e-06, "loss": 203.576, "step": 40350 }, { "epoch": 0.7762247512993975, "grad_norm": 416.89772600492324, "learning_rate": 2.4170372942031506e-06, "loss": 210.3003, "step": 40360 }, { "epoch": 0.776417076559878, "grad_norm": 404.7180500533755, "learning_rate": 2.4130600064696618e-06, "loss": 209.1882, "step": 40370 }, { "epoch": 0.7766094018203585, "grad_norm": 388.5533773879684, "learning_rate": 2.4090855447460205e-06, "loss": 213.25, "step": 40380 }, { "epoch": 0.7768017270808392, "grad_norm": 393.5919657426778, "learning_rate": 2.4051139105126463e-06, "loss": 210.339, "step": 40390 }, { "epoch": 0.7769940523413197, "grad_norm": 426.08640397184075, "learning_rate": 2.4011451052489064e-06, "loss": 214.112, "step": 40400 }, { "epoch": 0.7771863776018002, "grad_norm": 399.0350070963163, "learning_rate": 2.3971791304331125e-06, "loss": 206.2459, "step": 40410 }, { "epoch": 0.7773787028622807, "grad_norm": 366.80926515212064, "learning_rate": 2.393215987542531e-06, "loss": 209.712, "step": 40420 }, { "epoch": 0.7775710281227612, "grad_norm": 386.78936525839777, "learning_rate": 2.3892556780533606e-06, "loss": 223.2839, "step": 40430 }, { "epoch": 0.7777633533832418, "grad_norm": 386.6402615053517, "learning_rate": 2.385298203440758e-06, "loss": 208.233, "step": 40440 }, { "epoch": 0.7779556786437223, "grad_norm": 367.298475936663, "learning_rate": 2.3813435651788107e-06, "loss": 209.0829, "step": 40450 }, { "epoch": 0.7781480039042028, "grad_norm": 363.53060180816016, "learning_rate": 2.377391764740562e-06, "loss": 205.0176, "step": 40460 }, { "epoch": 0.7783403291646833, "grad_norm": 373.2186635977586, "learning_rate": 2.3734428035979883e-06, "loss": 202.7703, "step": 40470 }, { "epoch": 0.7785326544251638, "grad_norm": 352.4191148747799, "learning_rate": 2.3694966832220123e-06, "loss": 209.5993, "step": 40480 }, { "epoch": 0.7787249796856444, "grad_norm": 392.4156549700369, "learning_rate": 2.365553405082501e-06, "loss": 212.0213, "step": 40490 }, { "epoch": 0.7789173049461249, "grad_norm": 361.5778541776504, "learning_rate": 2.3616129706482604e-06, "loss": 211.7287, "step": 40500 }, { "epoch": 0.7791096302066054, "grad_norm": 409.50114908696054, "learning_rate": 2.357675381387036e-06, "loss": 211.5296, "step": 40510 }, { "epoch": 0.7793019554670859, "grad_norm": 370.6351263812637, "learning_rate": 2.3537406387655114e-06, "loss": 207.166, "step": 40520 }, { "epoch": 0.7794942807275664, "grad_norm": 398.10616343751417, "learning_rate": 2.349808744249321e-06, "loss": 209.6109, "step": 40530 }, { "epoch": 0.779686605988047, "grad_norm": 371.82780440909073, "learning_rate": 2.345879699303025e-06, "loss": 209.3387, "step": 40540 }, { "epoch": 0.7798789312485275, "grad_norm": 409.6346665746718, "learning_rate": 2.3419535053901264e-06, "loss": 209.4224, "step": 40550 }, { "epoch": 0.780071256509008, "grad_norm": 466.3919975552398, "learning_rate": 2.338030163973073e-06, "loss": 222.971, "step": 40560 }, { "epoch": 0.7802635817694885, "grad_norm": 454.49359412655474, "learning_rate": 2.334109676513242e-06, "loss": 210.0865, "step": 40570 }, { "epoch": 0.780455907029969, "grad_norm": 361.61006941299746, "learning_rate": 2.330192044470948e-06, "loss": 206.8723, "step": 40580 }, { "epoch": 0.7806482322904497, "grad_norm": 379.20199257545136, "learning_rate": 2.3262772693054457e-06, "loss": 216.4956, "step": 40590 }, { "epoch": 0.7808405575509302, "grad_norm": 388.46386274133926, "learning_rate": 2.322365352474928e-06, "loss": 208.4771, "step": 40600 }, { "epoch": 0.7810328828114107, "grad_norm": 415.436739259405, "learning_rate": 2.3184562954365153e-06, "loss": 214.4146, "step": 40610 }, { "epoch": 0.7812252080718912, "grad_norm": 398.5329126749291, "learning_rate": 2.3145500996462656e-06, "loss": 215.2695, "step": 40620 }, { "epoch": 0.7814175333323717, "grad_norm": 438.33163439495354, "learning_rate": 2.310646766559177e-06, "loss": 217.8767, "step": 40630 }, { "epoch": 0.7816098585928523, "grad_norm": 367.6509677730949, "learning_rate": 2.3067462976291744e-06, "loss": 221.5393, "step": 40640 }, { "epoch": 0.7818021838533328, "grad_norm": 365.1621423978296, "learning_rate": 2.302848694309118e-06, "loss": 214.411, "step": 40650 }, { "epoch": 0.7819945091138133, "grad_norm": 371.1656255314514, "learning_rate": 2.2989539580507957e-06, "loss": 212.0582, "step": 40660 }, { "epoch": 0.7821868343742938, "grad_norm": 366.49136219272657, "learning_rate": 2.2950620903049414e-06, "loss": 213.4375, "step": 40670 }, { "epoch": 0.7823791596347743, "grad_norm": 382.3267572102029, "learning_rate": 2.2911730925212073e-06, "loss": 212.9401, "step": 40680 }, { "epoch": 0.7825714848952549, "grad_norm": 385.54656457172945, "learning_rate": 2.2872869661481766e-06, "loss": 205.6087, "step": 40690 }, { "epoch": 0.7827638101557354, "grad_norm": 381.35934430726485, "learning_rate": 2.283403712633375e-06, "loss": 205.9314, "step": 40700 }, { "epoch": 0.7829561354162159, "grad_norm": 395.2955811950514, "learning_rate": 2.279523333423247e-06, "loss": 217.6813, "step": 40710 }, { "epoch": 0.7831484606766964, "grad_norm": 364.21103978588957, "learning_rate": 2.2756458299631667e-06, "loss": 203.4238, "step": 40720 }, { "epoch": 0.7833407859371769, "grad_norm": 420.8925373105516, "learning_rate": 2.271771203697445e-06, "loss": 208.1244, "step": 40730 }, { "epoch": 0.7835331111976575, "grad_norm": 402.48721611435695, "learning_rate": 2.267899456069311e-06, "loss": 211.8673, "step": 40740 }, { "epoch": 0.783725436458138, "grad_norm": 387.2492381568478, "learning_rate": 2.2640305885209336e-06, "loss": 215.0771, "step": 40750 }, { "epoch": 0.7839177617186185, "grad_norm": 490.7325616931793, "learning_rate": 2.2601646024933976e-06, "loss": 218.0952, "step": 40760 }, { "epoch": 0.784110086979099, "grad_norm": 372.99874000973307, "learning_rate": 2.256301499426716e-06, "loss": 208.6124, "step": 40770 }, { "epoch": 0.7843024122395795, "grad_norm": 381.8762460911402, "learning_rate": 2.252441280759838e-06, "loss": 208.2737, "step": 40780 }, { "epoch": 0.78449473750006, "grad_norm": 373.99020109006455, "learning_rate": 2.248583947930628e-06, "loss": 204.0592, "step": 40790 }, { "epoch": 0.7846870627605407, "grad_norm": 364.2311543412736, "learning_rate": 2.2447295023758755e-06, "loss": 208.1338, "step": 40800 }, { "epoch": 0.7848793880210212, "grad_norm": 378.7443553582108, "learning_rate": 2.2408779455313035e-06, "loss": 209.0336, "step": 40810 }, { "epoch": 0.7850717132815017, "grad_norm": 373.838788342394, "learning_rate": 2.2370292788315505e-06, "loss": 212.4266, "step": 40820 }, { "epoch": 0.7852640385419822, "grad_norm": 388.422659611989, "learning_rate": 2.2331835037101825e-06, "loss": 203.205, "step": 40830 }, { "epoch": 0.7854563638024628, "grad_norm": 379.6486678988751, "learning_rate": 2.2293406215996814e-06, "loss": 208.6698, "step": 40840 }, { "epoch": 0.7856486890629433, "grad_norm": 358.08450003870985, "learning_rate": 2.2255006339314667e-06, "loss": 209.7238, "step": 40850 }, { "epoch": 0.7858410143234238, "grad_norm": 426.21040775390503, "learning_rate": 2.2216635421358623e-06, "loss": 205.0207, "step": 40860 }, { "epoch": 0.7860333395839043, "grad_norm": 388.51231251878716, "learning_rate": 2.2178293476421276e-06, "loss": 206.4274, "step": 40870 }, { "epoch": 0.7862256648443848, "grad_norm": 465.4097706179838, "learning_rate": 2.213998051878431e-06, "loss": 207.1637, "step": 40880 }, { "epoch": 0.7864179901048653, "grad_norm": 421.3127931274906, "learning_rate": 2.2101696562718735e-06, "loss": 203.6789, "step": 40890 }, { "epoch": 0.7866103153653459, "grad_norm": 397.25827661302765, "learning_rate": 2.206344162248466e-06, "loss": 216.7128, "step": 40900 }, { "epoch": 0.7868026406258264, "grad_norm": 381.4484702235938, "learning_rate": 2.2025215712331383e-06, "loss": 210.033, "step": 40910 }, { "epoch": 0.7869949658863069, "grad_norm": 395.54586204684006, "learning_rate": 2.1987018846497487e-06, "loss": 205.8125, "step": 40920 }, { "epoch": 0.7871872911467874, "grad_norm": 367.7789538166651, "learning_rate": 2.194885103921064e-06, "loss": 211.1847, "step": 40930 }, { "epoch": 0.787379616407268, "grad_norm": 393.0109345617761, "learning_rate": 2.191071230468772e-06, "loss": 212.5428, "step": 40940 }, { "epoch": 0.7875719416677485, "grad_norm": 416.3467328375286, "learning_rate": 2.1872602657134757e-06, "loss": 212.9899, "step": 40950 }, { "epoch": 0.787764266928229, "grad_norm": 376.86730881825264, "learning_rate": 2.1834522110747014e-06, "loss": 209.9806, "step": 40960 }, { "epoch": 0.7879565921887095, "grad_norm": 370.94172546040613, "learning_rate": 2.179647067970885e-06, "loss": 207.9988, "step": 40970 }, { "epoch": 0.78814891744919, "grad_norm": 484.27605644855134, "learning_rate": 2.1758448378193743e-06, "loss": 214.4127, "step": 40980 }, { "epoch": 0.7883412427096705, "grad_norm": 391.2194759694569, "learning_rate": 2.1720455220364443e-06, "loss": 208.5151, "step": 40990 }, { "epoch": 0.7885335679701512, "grad_norm": 396.5174047992866, "learning_rate": 2.168249122037275e-06, "loss": 205.8971, "step": 41000 }, { "epoch": 0.7887258932306317, "grad_norm": 380.90194724114434, "learning_rate": 2.1644556392359583e-06, "loss": 207.6405, "step": 41010 }, { "epoch": 0.7889182184911122, "grad_norm": 375.352348977756, "learning_rate": 2.160665075045508e-06, "loss": 199.7787, "step": 41020 }, { "epoch": 0.7891105437515927, "grad_norm": 372.62798697200947, "learning_rate": 2.1568774308778494e-06, "loss": 211.7347, "step": 41030 }, { "epoch": 0.7893028690120732, "grad_norm": 406.53854028717467, "learning_rate": 2.1530927081438148e-06, "loss": 219.5607, "step": 41040 }, { "epoch": 0.7894951942725538, "grad_norm": 423.68231539923755, "learning_rate": 2.1493109082531473e-06, "loss": 200.8829, "step": 41050 }, { "epoch": 0.7896875195330343, "grad_norm": 396.1324553868352, "learning_rate": 2.1455320326145103e-06, "loss": 203.8986, "step": 41060 }, { "epoch": 0.7898798447935148, "grad_norm": 364.4211746942873, "learning_rate": 2.141756082635471e-06, "loss": 206.797, "step": 41070 }, { "epoch": 0.7900721700539953, "grad_norm": 389.649453891042, "learning_rate": 2.137983059722507e-06, "loss": 201.7376, "step": 41080 }, { "epoch": 0.7902644953144758, "grad_norm": 387.0089249503018, "learning_rate": 2.1342129652810063e-06, "loss": 208.0186, "step": 41090 }, { "epoch": 0.7904568205749564, "grad_norm": 394.4162726860938, "learning_rate": 2.1304458007152694e-06, "loss": 204.6994, "step": 41100 }, { "epoch": 0.7906491458354369, "grad_norm": 397.59111280299146, "learning_rate": 2.1266815674285026e-06, "loss": 216.6823, "step": 41110 }, { "epoch": 0.7908414710959174, "grad_norm": 384.82380167561735, "learning_rate": 2.1229202668228197e-06, "loss": 218.8221, "step": 41120 }, { "epoch": 0.7910337963563979, "grad_norm": 381.19785425991427, "learning_rate": 2.1191619002992405e-06, "loss": 201.5279, "step": 41130 }, { "epoch": 0.7912261216168784, "grad_norm": 462.92689135610516, "learning_rate": 2.1154064692577e-06, "loss": 207.5111, "step": 41140 }, { "epoch": 0.791418446877359, "grad_norm": 381.3694790807442, "learning_rate": 2.111653975097029e-06, "loss": 206.0437, "step": 41150 }, { "epoch": 0.7916107721378395, "grad_norm": 410.5500321929193, "learning_rate": 2.1079044192149713e-06, "loss": 208.7455, "step": 41160 }, { "epoch": 0.79180309739832, "grad_norm": 379.1735991775821, "learning_rate": 2.1041578030081777e-06, "loss": 208.1734, "step": 41170 }, { "epoch": 0.7919954226588005, "grad_norm": 374.2296064865967, "learning_rate": 2.100414127872198e-06, "loss": 203.8804, "step": 41180 }, { "epoch": 0.792187747919281, "grad_norm": 368.2118493745568, "learning_rate": 2.0966733952014904e-06, "loss": 215.8449, "step": 41190 }, { "epoch": 0.7923800731797617, "grad_norm": 336.3000961510837, "learning_rate": 2.0929356063894125e-06, "loss": 207.8467, "step": 41200 }, { "epoch": 0.7925723984402422, "grad_norm": 417.828983799324, "learning_rate": 2.089200762828234e-06, "loss": 209.6514, "step": 41210 }, { "epoch": 0.7927647237007227, "grad_norm": 371.89545464401465, "learning_rate": 2.0854688659091203e-06, "loss": 210.8139, "step": 41220 }, { "epoch": 0.7929570489612032, "grad_norm": 379.7393062396534, "learning_rate": 2.08173991702214e-06, "loss": 201.6136, "step": 41230 }, { "epoch": 0.7931493742216837, "grad_norm": 401.7372233329505, "learning_rate": 2.0780139175562675e-06, "loss": 210.716, "step": 41240 }, { "epoch": 0.7933416994821643, "grad_norm": 382.9525792334095, "learning_rate": 2.0742908688993746e-06, "loss": 205.0247, "step": 41250 }, { "epoch": 0.7935340247426448, "grad_norm": 373.84164533215096, "learning_rate": 2.070570772438236e-06, "loss": 204.1771, "step": 41260 }, { "epoch": 0.7937263500031253, "grad_norm": 375.2773060248128, "learning_rate": 2.066853629558524e-06, "loss": 209.8484, "step": 41270 }, { "epoch": 0.7939186752636058, "grad_norm": 358.09329970319953, "learning_rate": 2.0631394416448157e-06, "loss": 200.9646, "step": 41280 }, { "epoch": 0.7941110005240863, "grad_norm": 363.4687126312382, "learning_rate": 2.059428210080583e-06, "loss": 202.5251, "step": 41290 }, { "epoch": 0.7943033257845669, "grad_norm": 412.07035716220867, "learning_rate": 2.0557199362482005e-06, "loss": 210.6676, "step": 41300 }, { "epoch": 0.7944956510450474, "grad_norm": 371.78941453302485, "learning_rate": 2.052014621528935e-06, "loss": 208.0328, "step": 41310 }, { "epoch": 0.7946879763055279, "grad_norm": 367.2522493217452, "learning_rate": 2.048312267302961e-06, "loss": 206.5413, "step": 41320 }, { "epoch": 0.7948803015660084, "grad_norm": 363.6215668802465, "learning_rate": 2.044612874949341e-06, "loss": 203.8802, "step": 41330 }, { "epoch": 0.7950726268264889, "grad_norm": 388.5862279885824, "learning_rate": 2.040916445846034e-06, "loss": 214.138, "step": 41340 }, { "epoch": 0.7952649520869695, "grad_norm": 478.54260791926976, "learning_rate": 2.037222981369905e-06, "loss": 204.9165, "step": 41350 }, { "epoch": 0.79545727734745, "grad_norm": 396.8818735865609, "learning_rate": 2.033532482896707e-06, "loss": 206.2721, "step": 41360 }, { "epoch": 0.7956496026079305, "grad_norm": 400.0979738904692, "learning_rate": 2.0298449518010875e-06, "loss": 206.8979, "step": 41370 }, { "epoch": 0.795841927868411, "grad_norm": 430.2141975558431, "learning_rate": 2.0261603894565897e-06, "loss": 207.9649, "step": 41380 }, { "epoch": 0.7960342531288915, "grad_norm": 374.930373576075, "learning_rate": 2.0224787972356574e-06, "loss": 211.1537, "step": 41390 }, { "epoch": 0.796226578389372, "grad_norm": 378.1549978452382, "learning_rate": 2.0188001765096198e-06, "loss": 211.6395, "step": 41400 }, { "epoch": 0.7964189036498527, "grad_norm": 383.3548792639214, "learning_rate": 2.0151245286486998e-06, "loss": 208.4597, "step": 41410 }, { "epoch": 0.7966112289103332, "grad_norm": 374.38924242464293, "learning_rate": 2.011451855022021e-06, "loss": 211.3832, "step": 41420 }, { "epoch": 0.7968035541708137, "grad_norm": 377.74745787461427, "learning_rate": 2.0077821569975885e-06, "loss": 207.8268, "step": 41430 }, { "epoch": 0.7969958794312942, "grad_norm": 401.8075312843853, "learning_rate": 2.0041154359423087e-06, "loss": 214.7287, "step": 41440 }, { "epoch": 0.7971882046917748, "grad_norm": 408.96477305382166, "learning_rate": 2.000451693221971e-06, "loss": 214.4973, "step": 41450 }, { "epoch": 0.7973805299522553, "grad_norm": 390.1806889630553, "learning_rate": 1.9967909302012635e-06, "loss": 206.4686, "step": 41460 }, { "epoch": 0.7975728552127358, "grad_norm": 374.1116299851007, "learning_rate": 1.9931331482437553e-06, "loss": 209.1153, "step": 41470 }, { "epoch": 0.7977651804732163, "grad_norm": 365.5129898815175, "learning_rate": 1.989478348711913e-06, "loss": 206.2217, "step": 41480 }, { "epoch": 0.7979575057336968, "grad_norm": 416.0808687396897, "learning_rate": 1.9858265329670844e-06, "loss": 205.5742, "step": 41490 }, { "epoch": 0.7981498309941774, "grad_norm": 398.41175782165334, "learning_rate": 1.9821777023695178e-06, "loss": 206.7222, "step": 41500 }, { "epoch": 0.7983421562546579, "grad_norm": 369.40730826166464, "learning_rate": 1.9785318582783375e-06, "loss": 205.7667, "step": 41510 }, { "epoch": 0.7985344815151384, "grad_norm": 370.9989182735855, "learning_rate": 1.9748890020515577e-06, "loss": 209.4025, "step": 41520 }, { "epoch": 0.7987268067756189, "grad_norm": 361.7503771232004, "learning_rate": 1.9712491350460895e-06, "loss": 202.9891, "step": 41530 }, { "epoch": 0.7989191320360994, "grad_norm": 345.97152031372815, "learning_rate": 1.967612258617718e-06, "loss": 204.3035, "step": 41540 }, { "epoch": 0.79911145729658, "grad_norm": 404.66309614884284, "learning_rate": 1.9639783741211218e-06, "loss": 209.4293, "step": 41550 }, { "epoch": 0.7993037825570605, "grad_norm": 376.599238149299, "learning_rate": 1.960347482909859e-06, "loss": 209.0351, "step": 41560 }, { "epoch": 0.799496107817541, "grad_norm": 426.6318711373734, "learning_rate": 1.956719586336382e-06, "loss": 211.9038, "step": 41570 }, { "epoch": 0.7996884330780215, "grad_norm": 372.0986377358308, "learning_rate": 1.953094685752017e-06, "loss": 209.4806, "step": 41580 }, { "epoch": 0.799880758338502, "grad_norm": 350.6518750560805, "learning_rate": 1.949472782506984e-06, "loss": 211.3456, "step": 41590 }, { "epoch": 0.8000730835989825, "grad_norm": 392.1056829673286, "learning_rate": 1.945853877950382e-06, "loss": 218.7484, "step": 41600 }, { "epoch": 0.8002654088594632, "grad_norm": 426.1488961164271, "learning_rate": 1.942237973430192e-06, "loss": 213.8978, "step": 41610 }, { "epoch": 0.8004577341199437, "grad_norm": 393.5070429472998, "learning_rate": 1.9386250702932784e-06, "loss": 210.5322, "step": 41620 }, { "epoch": 0.8006500593804242, "grad_norm": 375.03818369316474, "learning_rate": 1.9350151698853857e-06, "loss": 210.4921, "step": 41630 }, { "epoch": 0.8008423846409047, "grad_norm": 360.15439987391665, "learning_rate": 1.9314082735511475e-06, "loss": 208.094, "step": 41640 }, { "epoch": 0.8010347099013853, "grad_norm": 381.0743658570249, "learning_rate": 1.92780438263407e-06, "loss": 205.0781, "step": 41650 }, { "epoch": 0.8012270351618658, "grad_norm": 353.8982612598771, "learning_rate": 1.9242034984765436e-06, "loss": 208.9497, "step": 41660 }, { "epoch": 0.8014193604223463, "grad_norm": 369.89037903693855, "learning_rate": 1.9206056224198346e-06, "loss": 203.1918, "step": 41670 }, { "epoch": 0.8016116856828268, "grad_norm": 392.1642696750914, "learning_rate": 1.9170107558040983e-06, "loss": 214.6508, "step": 41680 }, { "epoch": 0.8018040109433073, "grad_norm": 364.8607112589798, "learning_rate": 1.9134188999683613e-06, "loss": 202.766, "step": 41690 }, { "epoch": 0.8019963362037879, "grad_norm": 386.51606403550096, "learning_rate": 1.9098300562505266e-06, "loss": 207.0134, "step": 41700 }, { "epoch": 0.8021886614642684, "grad_norm": 364.748551191837, "learning_rate": 1.9062442259873847e-06, "loss": 211.1021, "step": 41710 }, { "epoch": 0.8023809867247489, "grad_norm": 401.25946898896325, "learning_rate": 1.9026614105145935e-06, "loss": 216.4434, "step": 41720 }, { "epoch": 0.8025733119852294, "grad_norm": 392.9856943967848, "learning_rate": 1.8990816111666976e-06, "loss": 208.1454, "step": 41730 }, { "epoch": 0.8027656372457099, "grad_norm": 390.4493611998971, "learning_rate": 1.8955048292771083e-06, "loss": 205.0919, "step": 41740 }, { "epoch": 0.8029579625061904, "grad_norm": 415.085700641082, "learning_rate": 1.891931066178122e-06, "loss": 200.7006, "step": 41750 }, { "epoch": 0.803150287766671, "grad_norm": 339.2285846018839, "learning_rate": 1.888360323200904e-06, "loss": 199.2415, "step": 41760 }, { "epoch": 0.8033426130271515, "grad_norm": 401.20788185343383, "learning_rate": 1.8847926016754947e-06, "loss": 206.1355, "step": 41770 }, { "epoch": 0.803534938287632, "grad_norm": 412.1138494801861, "learning_rate": 1.8812279029308177e-06, "loss": 205.3744, "step": 41780 }, { "epoch": 0.8037272635481125, "grad_norm": 376.8151987996318, "learning_rate": 1.87766622829466e-06, "loss": 208.3064, "step": 41790 }, { "epoch": 0.803919588808593, "grad_norm": 382.2229894827946, "learning_rate": 1.874107579093688e-06, "loss": 219.2669, "step": 41800 }, { "epoch": 0.8041119140690736, "grad_norm": 368.6116072457296, "learning_rate": 1.870551956653437e-06, "loss": 211.5302, "step": 41810 }, { "epoch": 0.8043042393295542, "grad_norm": 385.280062826804, "learning_rate": 1.8669993622983217e-06, "loss": 212.2618, "step": 41820 }, { "epoch": 0.8044965645900347, "grad_norm": 354.77223705156314, "learning_rate": 1.863449797351624e-06, "loss": 207.746, "step": 41830 }, { "epoch": 0.8046888898505152, "grad_norm": 367.2867320331355, "learning_rate": 1.8599032631354963e-06, "loss": 210.4903, "step": 41840 }, { "epoch": 0.8048812151109958, "grad_norm": 453.4886518814372, "learning_rate": 1.8563597609709626e-06, "loss": 216.1181, "step": 41850 }, { "epoch": 0.8050735403714763, "grad_norm": 375.4274482466092, "learning_rate": 1.852819292177922e-06, "loss": 209.5629, "step": 41860 }, { "epoch": 0.8052658656319568, "grad_norm": 412.8022239124099, "learning_rate": 1.8492818580751414e-06, "loss": 218.483, "step": 41870 }, { "epoch": 0.8054581908924373, "grad_norm": 386.54951328148917, "learning_rate": 1.8457474599802527e-06, "loss": 203.4288, "step": 41880 }, { "epoch": 0.8056505161529178, "grad_norm": 386.6269139117028, "learning_rate": 1.842216099209767e-06, "loss": 212.1619, "step": 41890 }, { "epoch": 0.8058428414133983, "grad_norm": 362.9212830565881, "learning_rate": 1.8386877770790524e-06, "loss": 205.978, "step": 41900 }, { "epoch": 0.8060351666738789, "grad_norm": 444.8007768212546, "learning_rate": 1.8351624949023539e-06, "loss": 214.794, "step": 41910 }, { "epoch": 0.8062274919343594, "grad_norm": 374.81534673955224, "learning_rate": 1.8316402539927757e-06, "loss": 209.651, "step": 41920 }, { "epoch": 0.8064198171948399, "grad_norm": 395.46132099994793, "learning_rate": 1.8281210556623007e-06, "loss": 202.8083, "step": 41930 }, { "epoch": 0.8066121424553204, "grad_norm": 385.1851609238975, "learning_rate": 1.8246049012217693e-06, "loss": 203.7979, "step": 41940 }, { "epoch": 0.8068044677158009, "grad_norm": 376.71585137162646, "learning_rate": 1.8210917919808891e-06, "loss": 213.5396, "step": 41950 }, { "epoch": 0.8069967929762815, "grad_norm": 383.27855075608784, "learning_rate": 1.817581729248239e-06, "loss": 208.0222, "step": 41960 }, { "epoch": 0.807189118236762, "grad_norm": 428.5429226169241, "learning_rate": 1.8140747143312588e-06, "loss": 206.4999, "step": 41970 }, { "epoch": 0.8073814434972425, "grad_norm": 412.9275824586872, "learning_rate": 1.8105707485362511e-06, "loss": 201.0712, "step": 41980 }, { "epoch": 0.807573768757723, "grad_norm": 357.2183740508322, "learning_rate": 1.8070698331683844e-06, "loss": 208.2365, "step": 41990 }, { "epoch": 0.8077660940182035, "grad_norm": 371.07263761142406, "learning_rate": 1.8035719695316955e-06, "loss": 215.9635, "step": 42000 }, { "epoch": 0.8079584192786841, "grad_norm": 370.5917801017485, "learning_rate": 1.800077158929081e-06, "loss": 208.0769, "step": 42010 }, { "epoch": 0.8081507445391647, "grad_norm": 390.5821982735599, "learning_rate": 1.7965854026622953e-06, "loss": 205.1271, "step": 42020 }, { "epoch": 0.8083430697996452, "grad_norm": 408.2207047930982, "learning_rate": 1.7930967020319667e-06, "loss": 211.125, "step": 42030 }, { "epoch": 0.8085353950601257, "grad_norm": 387.5800192722426, "learning_rate": 1.7896110583375747e-06, "loss": 200.1964, "step": 42040 }, { "epoch": 0.8087277203206062, "grad_norm": 339.61653075244897, "learning_rate": 1.7861284728774652e-06, "loss": 207.2948, "step": 42050 }, { "epoch": 0.8089200455810868, "grad_norm": 374.41868452555894, "learning_rate": 1.7826489469488395e-06, "loss": 204.1716, "step": 42060 }, { "epoch": 0.8091123708415673, "grad_norm": 389.567981799662, "learning_rate": 1.7791724818477708e-06, "loss": 204.8572, "step": 42070 }, { "epoch": 0.8093046961020478, "grad_norm": 378.25298561747064, "learning_rate": 1.7756990788691797e-06, "loss": 203.1536, "step": 42080 }, { "epoch": 0.8094970213625283, "grad_norm": 353.5955856533669, "learning_rate": 1.772228739306854e-06, "loss": 206.8032, "step": 42090 }, { "epoch": 0.8096893466230088, "grad_norm": 405.7057134537505, "learning_rate": 1.7687614644534333e-06, "loss": 204.1162, "step": 42100 }, { "epoch": 0.8098816718834894, "grad_norm": 417.5545628769309, "learning_rate": 1.7652972556004267e-06, "loss": 212.9713, "step": 42110 }, { "epoch": 0.8100739971439699, "grad_norm": 400.07789244321077, "learning_rate": 1.7618361140381922e-06, "loss": 217.3094, "step": 42120 }, { "epoch": 0.8102663224044504, "grad_norm": 399.8002995939122, "learning_rate": 1.7583780410559449e-06, "loss": 209.0708, "step": 42130 }, { "epoch": 0.8104586476649309, "grad_norm": 375.89655749514503, "learning_rate": 1.7549230379417636e-06, "loss": 209.8407, "step": 42140 }, { "epoch": 0.8106509729254114, "grad_norm": 407.82013037505374, "learning_rate": 1.7514711059825773e-06, "loss": 203.6781, "step": 42150 }, { "epoch": 0.810843298185892, "grad_norm": 361.21082426808533, "learning_rate": 1.7480222464641783e-06, "loss": 219.0545, "step": 42160 }, { "epoch": 0.8110356234463725, "grad_norm": 381.5994405974679, "learning_rate": 1.7445764606712024e-06, "loss": 204.033, "step": 42170 }, { "epoch": 0.811227948706853, "grad_norm": 388.86811375164496, "learning_rate": 1.7411337498871561e-06, "loss": 205.6674, "step": 42180 }, { "epoch": 0.8114202739673335, "grad_norm": 402.71771828181284, "learning_rate": 1.737694115394387e-06, "loss": 210.5151, "step": 42190 }, { "epoch": 0.811612599227814, "grad_norm": 380.73564784427685, "learning_rate": 1.7342575584741018e-06, "loss": 206.2773, "step": 42200 }, { "epoch": 0.8118049244882946, "grad_norm": 397.428208511356, "learning_rate": 1.7308240804063648e-06, "loss": 205.6393, "step": 42210 }, { "epoch": 0.8119972497487751, "grad_norm": 351.08297928426407, "learning_rate": 1.7273936824700888e-06, "loss": 212.4962, "step": 42220 }, { "epoch": 0.8121895750092557, "grad_norm": 348.61299077882325, "learning_rate": 1.7239663659430384e-06, "loss": 207.6494, "step": 42230 }, { "epoch": 0.8123819002697362, "grad_norm": 404.09632401668375, "learning_rate": 1.7205421321018312e-06, "loss": 212.5592, "step": 42240 }, { "epoch": 0.8125742255302167, "grad_norm": 353.0282682863881, "learning_rate": 1.7171209822219427e-06, "loss": 214.9213, "step": 42250 }, { "epoch": 0.8127665507906973, "grad_norm": 374.0724173205953, "learning_rate": 1.713702917577692e-06, "loss": 210.6041, "step": 42260 }, { "epoch": 0.8129588760511778, "grad_norm": 377.9211951109493, "learning_rate": 1.71028793944225e-06, "loss": 206.3823, "step": 42270 }, { "epoch": 0.8131512013116583, "grad_norm": 369.4642192940653, "learning_rate": 1.7068760490876425e-06, "loss": 205.6524, "step": 42280 }, { "epoch": 0.8133435265721388, "grad_norm": 364.6709525429443, "learning_rate": 1.7034672477847402e-06, "loss": 208.5839, "step": 42290 }, { "epoch": 0.8135358518326193, "grad_norm": 363.41763280913244, "learning_rate": 1.700061536803268e-06, "loss": 204.9894, "step": 42300 }, { "epoch": 0.8137281770930999, "grad_norm": 461.2436710840498, "learning_rate": 1.696658917411793e-06, "loss": 205.032, "step": 42310 }, { "epoch": 0.8139205023535804, "grad_norm": 350.8760346611292, "learning_rate": 1.6932593908777394e-06, "loss": 209.3596, "step": 42320 }, { "epoch": 0.8141128276140609, "grad_norm": 368.2903131814151, "learning_rate": 1.689862958467372e-06, "loss": 204.6411, "step": 42330 }, { "epoch": 0.8143051528745414, "grad_norm": 399.36844148503786, "learning_rate": 1.6864696214458065e-06, "loss": 207.4226, "step": 42340 }, { "epoch": 0.8144974781350219, "grad_norm": 371.03722013969127, "learning_rate": 1.683079381077003e-06, "loss": 211.7935, "step": 42350 }, { "epoch": 0.8146898033955025, "grad_norm": 372.1826920492772, "learning_rate": 1.6796922386237724e-06, "loss": 214.8903, "step": 42360 }, { "epoch": 0.814882128655983, "grad_norm": 399.4672720882924, "learning_rate": 1.67630819534777e-06, "loss": 208.9046, "step": 42370 }, { "epoch": 0.8150744539164635, "grad_norm": 415.16717761610704, "learning_rate": 1.6729272525094908e-06, "loss": 208.0337, "step": 42380 }, { "epoch": 0.815266779176944, "grad_norm": 389.78413077117796, "learning_rate": 1.6695494113682874e-06, "loss": 210.0916, "step": 42390 }, { "epoch": 0.8154591044374245, "grad_norm": 388.33934171479245, "learning_rate": 1.6661746731823458e-06, "loss": 205.351, "step": 42400 }, { "epoch": 0.815651429697905, "grad_norm": 367.4188093087156, "learning_rate": 1.6628030392087001e-06, "loss": 209.2656, "step": 42410 }, { "epoch": 0.8158437549583856, "grad_norm": 369.82074700429, "learning_rate": 1.6594345107032273e-06, "loss": 212.7064, "step": 42420 }, { "epoch": 0.8160360802188662, "grad_norm": 362.7657596873365, "learning_rate": 1.6560690889206499e-06, "loss": 210.4991, "step": 42430 }, { "epoch": 0.8162284054793467, "grad_norm": 365.01707799881785, "learning_rate": 1.6527067751145354e-06, "loss": 201.3554, "step": 42440 }, { "epoch": 0.8164207307398272, "grad_norm": 375.0802696652425, "learning_rate": 1.6493475705372863e-06, "loss": 207.5702, "step": 42450 }, { "epoch": 0.8166130560003078, "grad_norm": 403.0493934491084, "learning_rate": 1.6459914764401497e-06, "loss": 213.1153, "step": 42460 }, { "epoch": 0.8168053812607883, "grad_norm": 396.8085935361489, "learning_rate": 1.64263849407322e-06, "loss": 207.8117, "step": 42470 }, { "epoch": 0.8169977065212688, "grad_norm": 360.9792495982528, "learning_rate": 1.6392886246854234e-06, "loss": 197.5292, "step": 42480 }, { "epoch": 0.8171900317817493, "grad_norm": 399.14962520313526, "learning_rate": 1.6359418695245311e-06, "loss": 204.9792, "step": 42490 }, { "epoch": 0.8173823570422298, "grad_norm": 387.88111820914077, "learning_rate": 1.632598229837158e-06, "loss": 207.9681, "step": 42500 }, { "epoch": 0.8175746823027104, "grad_norm": 369.77251215534267, "learning_rate": 1.629257706868751e-06, "loss": 206.1137, "step": 42510 }, { "epoch": 0.8177670075631909, "grad_norm": 360.0090283721679, "learning_rate": 1.6259203018636016e-06, "loss": 204.9569, "step": 42520 }, { "epoch": 0.8179593328236714, "grad_norm": 392.34529343451965, "learning_rate": 1.6225860160648343e-06, "loss": 213.567, "step": 42530 }, { "epoch": 0.8181516580841519, "grad_norm": 367.09983865534264, "learning_rate": 1.6192548507144213e-06, "loss": 210.2467, "step": 42540 }, { "epoch": 0.8183439833446324, "grad_norm": 361.2438051846816, "learning_rate": 1.6159268070531642e-06, "loss": 203.0745, "step": 42550 }, { "epoch": 0.818536308605113, "grad_norm": 415.82254238437633, "learning_rate": 1.6126018863207005e-06, "loss": 215.2535, "step": 42560 }, { "epoch": 0.8187286338655935, "grad_norm": 374.52827279819735, "learning_rate": 1.609280089755515e-06, "loss": 208.4515, "step": 42570 }, { "epoch": 0.818920959126074, "grad_norm": 394.0702665850043, "learning_rate": 1.6059614185949157e-06, "loss": 209.876, "step": 42580 }, { "epoch": 0.8191132843865545, "grad_norm": 388.4257304191313, "learning_rate": 1.6026458740750584e-06, "loss": 198.6361, "step": 42590 }, { "epoch": 0.819305609647035, "grad_norm": 358.9323469595191, "learning_rate": 1.5993334574309238e-06, "loss": 210.6616, "step": 42600 }, { "epoch": 0.8194979349075155, "grad_norm": 368.07816743601427, "learning_rate": 1.5960241698963374e-06, "loss": 201.8158, "step": 42610 }, { "epoch": 0.8196902601679961, "grad_norm": 371.98819391646555, "learning_rate": 1.592718012703951e-06, "loss": 206.5568, "step": 42620 }, { "epoch": 0.8198825854284766, "grad_norm": 366.2847503045154, "learning_rate": 1.589414987085255e-06, "loss": 198.923, "step": 42630 }, { "epoch": 0.8200749106889572, "grad_norm": 377.38426657334304, "learning_rate": 1.5861150942705672e-06, "loss": 204.7531, "step": 42640 }, { "epoch": 0.8202672359494377, "grad_norm": 345.66962036636977, "learning_rate": 1.5828183354890504e-06, "loss": 199.6333, "step": 42650 }, { "epoch": 0.8204595612099183, "grad_norm": 366.6233608059497, "learning_rate": 1.5795247119686885e-06, "loss": 200.4812, "step": 42660 }, { "epoch": 0.8206518864703988, "grad_norm": 390.75195589742253, "learning_rate": 1.5762342249363006e-06, "loss": 212.3442, "step": 42670 }, { "epoch": 0.8208442117308793, "grad_norm": 357.6144815084936, "learning_rate": 1.5729468756175426e-06, "loss": 199.1743, "step": 42680 }, { "epoch": 0.8210365369913598, "grad_norm": 391.48648376608594, "learning_rate": 1.5696626652368973e-06, "loss": 203.6902, "step": 42690 }, { "epoch": 0.8212288622518403, "grad_norm": 411.47456657443206, "learning_rate": 1.5663815950176742e-06, "loss": 206.812, "step": 42700 }, { "epoch": 0.8214211875123208, "grad_norm": 403.3799944984759, "learning_rate": 1.5631036661820232e-06, "loss": 212.0593, "step": 42710 }, { "epoch": 0.8216135127728014, "grad_norm": 352.70857191156745, "learning_rate": 1.5598288799509153e-06, "loss": 203.5993, "step": 42720 }, { "epoch": 0.8218058380332819, "grad_norm": 399.7315480094786, "learning_rate": 1.5565572375441573e-06, "loss": 205.3522, "step": 42730 }, { "epoch": 0.8219981632937624, "grad_norm": 420.6868483884786, "learning_rate": 1.5532887401803787e-06, "loss": 208.7667, "step": 42740 }, { "epoch": 0.8221904885542429, "grad_norm": 385.3181638305602, "learning_rate": 1.5500233890770434e-06, "loss": 206.6352, "step": 42750 }, { "epoch": 0.8223828138147234, "grad_norm": 377.90850155617335, "learning_rate": 1.5467611854504406e-06, "loss": 201.4559, "step": 42760 }, { "epoch": 0.822575139075204, "grad_norm": 389.8834038356547, "learning_rate": 1.5435021305156862e-06, "loss": 208.0062, "step": 42770 }, { "epoch": 0.8227674643356845, "grad_norm": 381.4462336169545, "learning_rate": 1.5402462254867222e-06, "loss": 213.6258, "step": 42780 }, { "epoch": 0.822959789596165, "grad_norm": 405.7552092019749, "learning_rate": 1.5369934715763235e-06, "loss": 207.9729, "step": 42790 }, { "epoch": 0.8231521148566455, "grad_norm": 361.5114360632329, "learning_rate": 1.533743869996086e-06, "loss": 202.8256, "step": 42800 }, { "epoch": 0.823344440117126, "grad_norm": 377.7792467971178, "learning_rate": 1.5304974219564318e-06, "loss": 205.7259, "step": 42810 }, { "epoch": 0.8235367653776066, "grad_norm": 391.09395136335803, "learning_rate": 1.5272541286666075e-06, "loss": 202.7261, "step": 42820 }, { "epoch": 0.8237290906380871, "grad_norm": 411.43715038388666, "learning_rate": 1.5240139913346906e-06, "loss": 207.5069, "step": 42830 }, { "epoch": 0.8239214158985677, "grad_norm": 418.0467421549725, "learning_rate": 1.5207770111675735e-06, "loss": 213.0044, "step": 42840 }, { "epoch": 0.8241137411590482, "grad_norm": 348.3304180145024, "learning_rate": 1.5175431893709836e-06, "loss": 208.9385, "step": 42850 }, { "epoch": 0.8243060664195287, "grad_norm": 382.98146330525566, "learning_rate": 1.5143125271494607e-06, "loss": 204.573, "step": 42860 }, { "epoch": 0.8244983916800093, "grad_norm": 380.2728364984045, "learning_rate": 1.511085025706378e-06, "loss": 204.909, "step": 42870 }, { "epoch": 0.8246907169404898, "grad_norm": 364.8850353229922, "learning_rate": 1.5078606862439248e-06, "loss": 203.9125, "step": 42880 }, { "epoch": 0.8248830422009703, "grad_norm": 385.4312039366786, "learning_rate": 1.5046395099631106e-06, "loss": 207.736, "step": 42890 }, { "epoch": 0.8250753674614508, "grad_norm": 415.9649915079839, "learning_rate": 1.5014214980637754e-06, "loss": 204.9748, "step": 42900 }, { "epoch": 0.8252676927219313, "grad_norm": 382.41490357263086, "learning_rate": 1.4982066517445748e-06, "loss": 202.9691, "step": 42910 }, { "epoch": 0.8254600179824119, "grad_norm": 425.19013791079897, "learning_rate": 1.4949949722029811e-06, "loss": 205.6067, "step": 42920 }, { "epoch": 0.8256523432428924, "grad_norm": 389.78754848592973, "learning_rate": 1.4917864606352983e-06, "loss": 215.3066, "step": 42930 }, { "epoch": 0.8258446685033729, "grad_norm": 378.0103159459695, "learning_rate": 1.4885811182366406e-06, "loss": 211.2018, "step": 42940 }, { "epoch": 0.8260369937638534, "grad_norm": 374.5487429698657, "learning_rate": 1.485378946200946e-06, "loss": 206.4354, "step": 42950 }, { "epoch": 0.8262293190243339, "grad_norm": 337.45494276169416, "learning_rate": 1.4821799457209684e-06, "loss": 203.6319, "step": 42960 }, { "epoch": 0.8264216442848145, "grad_norm": 395.82782939758397, "learning_rate": 1.478984117988287e-06, "loss": 208.2151, "step": 42970 }, { "epoch": 0.826613969545295, "grad_norm": 366.9216357376794, "learning_rate": 1.4757914641932924e-06, "loss": 202.5619, "step": 42980 }, { "epoch": 0.8268062948057755, "grad_norm": 355.8624239101601, "learning_rate": 1.4726019855251928e-06, "loss": 208.0176, "step": 42990 }, { "epoch": 0.826998620066256, "grad_norm": 354.09193575255347, "learning_rate": 1.4694156831720185e-06, "loss": 204.8916, "step": 43000 }, { "epoch": 0.8271909453267365, "grad_norm": 378.4216589868182, "learning_rate": 1.4662325583206172e-06, "loss": 210.9614, "step": 43010 }, { "epoch": 0.8273832705872171, "grad_norm": 354.49938820199424, "learning_rate": 1.463052612156649e-06, "loss": 208.6167, "step": 43020 }, { "epoch": 0.8275755958476976, "grad_norm": 396.37355740819686, "learning_rate": 1.4598758458645878e-06, "loss": 208.938, "step": 43030 }, { "epoch": 0.8277679211081782, "grad_norm": 395.3336639522541, "learning_rate": 1.4567022606277314e-06, "loss": 205.7786, "step": 43040 }, { "epoch": 0.8279602463686587, "grad_norm": 386.8890530766187, "learning_rate": 1.4535318576281854e-06, "loss": 202.6189, "step": 43050 }, { "epoch": 0.8281525716291392, "grad_norm": 377.7512675996208, "learning_rate": 1.4503646380468729e-06, "loss": 202.4282, "step": 43060 }, { "epoch": 0.8283448968896198, "grad_norm": 409.4644380711561, "learning_rate": 1.4472006030635288e-06, "loss": 208.351, "step": 43070 }, { "epoch": 0.8285372221501003, "grad_norm": 428.1367689688268, "learning_rate": 1.4440397538567086e-06, "loss": 210.8609, "step": 43080 }, { "epoch": 0.8287295474105808, "grad_norm": 352.9025028590039, "learning_rate": 1.4408820916037735e-06, "loss": 204.6995, "step": 43090 }, { "epoch": 0.8289218726710613, "grad_norm": 463.0241299362715, "learning_rate": 1.4377276174808984e-06, "loss": 204.3954, "step": 43100 }, { "epoch": 0.8291141979315418, "grad_norm": 409.4859384209472, "learning_rate": 1.4345763326630768e-06, "loss": 206.5103, "step": 43110 }, { "epoch": 0.8293065231920224, "grad_norm": 377.0778820136871, "learning_rate": 1.4314282383241097e-06, "loss": 204.2329, "step": 43120 }, { "epoch": 0.8294988484525029, "grad_norm": 389.2982591056965, "learning_rate": 1.4282833356366066e-06, "loss": 204.242, "step": 43130 }, { "epoch": 0.8296911737129834, "grad_norm": 351.0782059364485, "learning_rate": 1.4251416257719962e-06, "loss": 203.7137, "step": 43140 }, { "epoch": 0.8298834989734639, "grad_norm": 411.833262302244, "learning_rate": 1.4220031099005094e-06, "loss": 206.6646, "step": 43150 }, { "epoch": 0.8300758242339444, "grad_norm": 354.1078997029452, "learning_rate": 1.4188677891911961e-06, "loss": 198.2409, "step": 43160 }, { "epoch": 0.830268149494425, "grad_norm": 385.26701070746, "learning_rate": 1.4157356648119103e-06, "loss": 201.8019, "step": 43170 }, { "epoch": 0.8304604747549055, "grad_norm": 398.0017416010933, "learning_rate": 1.412606737929313e-06, "loss": 209.9437, "step": 43180 }, { "epoch": 0.830652800015386, "grad_norm": 350.38366159555943, "learning_rate": 1.4094810097088817e-06, "loss": 206.9258, "step": 43190 }, { "epoch": 0.8308451252758665, "grad_norm": 376.1112166408293, "learning_rate": 1.4063584813148979e-06, "loss": 204.1622, "step": 43200 }, { "epoch": 0.831037450536347, "grad_norm": 381.8069347895772, "learning_rate": 1.4032391539104484e-06, "loss": 209.0268, "step": 43210 }, { "epoch": 0.8312297757968276, "grad_norm": 366.5592143026922, "learning_rate": 1.4001230286574363e-06, "loss": 208.1606, "step": 43220 }, { "epoch": 0.8314221010573081, "grad_norm": 399.8774281314007, "learning_rate": 1.3970101067165642e-06, "loss": 206.0998, "step": 43230 }, { "epoch": 0.8316144263177886, "grad_norm": 377.794880820037, "learning_rate": 1.3939003892473446e-06, "loss": 203.2025, "step": 43240 }, { "epoch": 0.8318067515782692, "grad_norm": 430.77880711968334, "learning_rate": 1.390793877408093e-06, "loss": 207.3121, "step": 43250 }, { "epoch": 0.8319990768387497, "grad_norm": 365.30208837721693, "learning_rate": 1.3876905723559397e-06, "loss": 202.6982, "step": 43260 }, { "epoch": 0.8321914020992303, "grad_norm": 348.6124368238359, "learning_rate": 1.3845904752468075e-06, "loss": 207.9971, "step": 43270 }, { "epoch": 0.8323837273597108, "grad_norm": 426.8109912064802, "learning_rate": 1.3814935872354385e-06, "loss": 203.388, "step": 43280 }, { "epoch": 0.8325760526201913, "grad_norm": 370.86422185206516, "learning_rate": 1.3783999094753653e-06, "loss": 205.4613, "step": 43290 }, { "epoch": 0.8327683778806718, "grad_norm": 391.34146476262, "learning_rate": 1.3753094431189385e-06, "loss": 210.4081, "step": 43300 }, { "epoch": 0.8329607031411523, "grad_norm": 363.34214430757095, "learning_rate": 1.3722221893173027e-06, "loss": 199.529, "step": 43310 }, { "epoch": 0.8331530284016329, "grad_norm": 358.37837525956473, "learning_rate": 1.369138149220407e-06, "loss": 205.2364, "step": 43320 }, { "epoch": 0.8333453536621134, "grad_norm": 407.2503065166098, "learning_rate": 1.3660573239770091e-06, "loss": 210.4062, "step": 43330 }, { "epoch": 0.8335376789225939, "grad_norm": 381.8922203649801, "learning_rate": 1.3629797147346635e-06, "loss": 214.432, "step": 43340 }, { "epoch": 0.8337300041830744, "grad_norm": 377.12143221474616, "learning_rate": 1.3599053226397275e-06, "loss": 209.5975, "step": 43350 }, { "epoch": 0.8339223294435549, "grad_norm": 412.8418477082048, "learning_rate": 1.3568341488373637e-06, "loss": 208.5336, "step": 43360 }, { "epoch": 0.8341146547040355, "grad_norm": 365.8775906786246, "learning_rate": 1.3537661944715342e-06, "loss": 204.7445, "step": 43370 }, { "epoch": 0.834306979964516, "grad_norm": 354.70476910530317, "learning_rate": 1.350701460685e-06, "loss": 210.1551, "step": 43380 }, { "epoch": 0.8344993052249965, "grad_norm": 393.22456570496587, "learning_rate": 1.3476399486193214e-06, "loss": 203.1828, "step": 43390 }, { "epoch": 0.834691630485477, "grad_norm": 408.1931928165359, "learning_rate": 1.3445816594148654e-06, "loss": 214.0573, "step": 43400 }, { "epoch": 0.8348839557459575, "grad_norm": 427.5986714768565, "learning_rate": 1.3415265942107925e-06, "loss": 210.9196, "step": 43410 }, { "epoch": 0.835076281006438, "grad_norm": 345.37005481901633, "learning_rate": 1.3384747541450615e-06, "loss": 199.947, "step": 43420 }, { "epoch": 0.8352686062669186, "grad_norm": 400.3090468199459, "learning_rate": 1.3354261403544345e-06, "loss": 206.9206, "step": 43430 }, { "epoch": 0.8354609315273991, "grad_norm": 396.8117352224526, "learning_rate": 1.3323807539744726e-06, "loss": 215.036, "step": 43440 }, { "epoch": 0.8356532567878797, "grad_norm": 388.14309009985914, "learning_rate": 1.32933859613953e-06, "loss": 207.7481, "step": 43450 }, { "epoch": 0.8358455820483602, "grad_norm": 400.6428246283045, "learning_rate": 1.3262996679827567e-06, "loss": 208.6612, "step": 43460 }, { "epoch": 0.8360379073088408, "grad_norm": 393.72973428537125, "learning_rate": 1.3232639706361083e-06, "loss": 207.2187, "step": 43470 }, { "epoch": 0.8362302325693213, "grad_norm": 424.8615231714765, "learning_rate": 1.3202315052303304e-06, "loss": 197.9963, "step": 43480 }, { "epoch": 0.8364225578298018, "grad_norm": 366.87618959401823, "learning_rate": 1.3172022728949651e-06, "loss": 206.0512, "step": 43490 }, { "epoch": 0.8366148830902823, "grad_norm": 385.2714230521603, "learning_rate": 1.3141762747583498e-06, "loss": 200.3243, "step": 43500 }, { "epoch": 0.8368072083507628, "grad_norm": 379.18669169817093, "learning_rate": 1.3111535119476237e-06, "loss": 206.8235, "step": 43510 }, { "epoch": 0.8369995336112434, "grad_norm": 393.2839572386095, "learning_rate": 1.3081339855887133e-06, "loss": 212.3775, "step": 43520 }, { "epoch": 0.8371918588717239, "grad_norm": 453.1823512102, "learning_rate": 1.3051176968063407e-06, "loss": 209.7109, "step": 43530 }, { "epoch": 0.8373841841322044, "grad_norm": 389.20289934192317, "learning_rate": 1.302104646724026e-06, "loss": 205.8315, "step": 43540 }, { "epoch": 0.8375765093926849, "grad_norm": 378.9919446636174, "learning_rate": 1.299094836464081e-06, "loss": 208.582, "step": 43550 }, { "epoch": 0.8377688346531654, "grad_norm": 384.9649496654457, "learning_rate": 1.2960882671476062e-06, "loss": 203.4462, "step": 43560 }, { "epoch": 0.837961159913646, "grad_norm": 346.4524821498244, "learning_rate": 1.2930849398945033e-06, "loss": 203.5101, "step": 43570 }, { "epoch": 0.8381534851741265, "grad_norm": 363.42465926197104, "learning_rate": 1.2900848558234625e-06, "loss": 206.3717, "step": 43580 }, { "epoch": 0.838345810434607, "grad_norm": 358.66666701920167, "learning_rate": 1.2870880160519628e-06, "loss": 206.9612, "step": 43590 }, { "epoch": 0.8385381356950875, "grad_norm": 384.55230391340456, "learning_rate": 1.2840944216962802e-06, "loss": 209.1919, "step": 43600 }, { "epoch": 0.838730460955568, "grad_norm": 364.057246624706, "learning_rate": 1.2811040738714742e-06, "loss": 204.3007, "step": 43610 }, { "epoch": 0.8389227862160485, "grad_norm": 357.7533222636988, "learning_rate": 1.2781169736914067e-06, "loss": 202.8367, "step": 43620 }, { "epoch": 0.8391151114765291, "grad_norm": 365.36288961938175, "learning_rate": 1.275133122268719e-06, "loss": 199.7843, "step": 43630 }, { "epoch": 0.8393074367370096, "grad_norm": 372.39789154120996, "learning_rate": 1.2721525207148456e-06, "loss": 205.1193, "step": 43640 }, { "epoch": 0.8394997619974901, "grad_norm": 392.8532414713598, "learning_rate": 1.2691751701400145e-06, "loss": 206.081, "step": 43650 }, { "epoch": 0.8396920872579707, "grad_norm": 374.00260622159925, "learning_rate": 1.2662010716532392e-06, "loss": 206.8555, "step": 43660 }, { "epoch": 0.8398844125184513, "grad_norm": 391.1200221624074, "learning_rate": 1.2632302263623198e-06, "loss": 200.8548, "step": 43670 }, { "epoch": 0.8400767377789318, "grad_norm": 360.55748336623765, "learning_rate": 1.2602626353738479e-06, "loss": 207.4963, "step": 43680 }, { "epoch": 0.8402690630394123, "grad_norm": 407.6834107541414, "learning_rate": 1.2572982997932037e-06, "loss": 210.2553, "step": 43690 }, { "epoch": 0.8404613882998928, "grad_norm": 400.4503739308167, "learning_rate": 1.2543372207245508e-06, "loss": 205.8775, "step": 43700 }, { "epoch": 0.8406537135603733, "grad_norm": 360.2130206354292, "learning_rate": 1.2513793992708467e-06, "loss": 210.1699, "step": 43710 }, { "epoch": 0.8408460388208538, "grad_norm": 395.0485487885795, "learning_rate": 1.2484248365338248e-06, "loss": 206.1793, "step": 43720 }, { "epoch": 0.8410383640813344, "grad_norm": 367.2410192807635, "learning_rate": 1.2454735336140167e-06, "loss": 207.9479, "step": 43730 }, { "epoch": 0.8412306893418149, "grad_norm": 363.75268193113, "learning_rate": 1.2425254916107321e-06, "loss": 205.6623, "step": 43740 }, { "epoch": 0.8414230146022954, "grad_norm": 371.9467390396526, "learning_rate": 1.2395807116220648e-06, "loss": 207.7912, "step": 43750 }, { "epoch": 0.8416153398627759, "grad_norm": 387.1542990519759, "learning_rate": 1.236639194744902e-06, "loss": 210.2121, "step": 43760 }, { "epoch": 0.8418076651232564, "grad_norm": 406.63128959488984, "learning_rate": 1.233700942074907e-06, "loss": 205.1486, "step": 43770 }, { "epoch": 0.841999990383737, "grad_norm": 345.39095881535553, "learning_rate": 1.230765954706531e-06, "loss": 202.5575, "step": 43780 }, { "epoch": 0.8421923156442175, "grad_norm": 400.62383242473885, "learning_rate": 1.227834233733005e-06, "loss": 209.2507, "step": 43790 }, { "epoch": 0.842384640904698, "grad_norm": 363.9228370647116, "learning_rate": 1.2249057802463527e-06, "loss": 206.0571, "step": 43800 }, { "epoch": 0.8425769661651785, "grad_norm": 356.91017574922256, "learning_rate": 1.221980595337372e-06, "loss": 204.8864, "step": 43810 }, { "epoch": 0.842769291425659, "grad_norm": 373.6667604329769, "learning_rate": 1.219058680095644e-06, "loss": 207.9622, "step": 43820 }, { "epoch": 0.8429616166861396, "grad_norm": 367.0553791020593, "learning_rate": 1.2161400356095376e-06, "loss": 200.6157, "step": 43830 }, { "epoch": 0.8431539419466201, "grad_norm": 359.1084804345651, "learning_rate": 1.2132246629661948e-06, "loss": 204.2739, "step": 43840 }, { "epoch": 0.8433462672071006, "grad_norm": 355.012140131159, "learning_rate": 1.21031256325155e-06, "loss": 206.1288, "step": 43850 }, { "epoch": 0.8435385924675812, "grad_norm": 409.81794687657094, "learning_rate": 1.2074037375503056e-06, "loss": 198.1729, "step": 43860 }, { "epoch": 0.8437309177280617, "grad_norm": 379.0431068053593, "learning_rate": 1.2044981869459571e-06, "loss": 210.0953, "step": 43870 }, { "epoch": 0.8439232429885423, "grad_norm": 377.71797648753795, "learning_rate": 1.201595912520771e-06, "loss": 208.7903, "step": 43880 }, { "epoch": 0.8441155682490228, "grad_norm": 379.4799052424159, "learning_rate": 1.198696915355796e-06, "loss": 215.2234, "step": 43890 }, { "epoch": 0.8443078935095033, "grad_norm": 395.77615052504984, "learning_rate": 1.1958011965308624e-06, "loss": 210.4527, "step": 43900 }, { "epoch": 0.8445002187699838, "grad_norm": 392.10683098479126, "learning_rate": 1.192908757124578e-06, "loss": 206.6592, "step": 43910 }, { "epoch": 0.8446925440304643, "grad_norm": 362.38496475020617, "learning_rate": 1.190019598214327e-06, "loss": 208.6194, "step": 43920 }, { "epoch": 0.8448848692909449, "grad_norm": 409.81221917113635, "learning_rate": 1.1871337208762723e-06, "loss": 202.5024, "step": 43930 }, { "epoch": 0.8450771945514254, "grad_norm": 417.1247358705236, "learning_rate": 1.1842511261853596e-06, "loss": 210.2868, "step": 43940 }, { "epoch": 0.8452695198119059, "grad_norm": 372.9501483526634, "learning_rate": 1.1813718152153054e-06, "loss": 202.5984, "step": 43950 }, { "epoch": 0.8454618450723864, "grad_norm": 389.8741194615128, "learning_rate": 1.1784957890386051e-06, "loss": 204.0407, "step": 43960 }, { "epoch": 0.8456541703328669, "grad_norm": 363.2134290907783, "learning_rate": 1.1756230487265296e-06, "loss": 203.0969, "step": 43970 }, { "epoch": 0.8458464955933475, "grad_norm": 392.96502388654324, "learning_rate": 1.1727535953491308e-06, "loss": 204.6773, "step": 43980 }, { "epoch": 0.846038820853828, "grad_norm": 367.1321391536074, "learning_rate": 1.1698874299752293e-06, "loss": 199.7048, "step": 43990 }, { "epoch": 0.8462311461143085, "grad_norm": 354.87337064746697, "learning_rate": 1.1670245536724267e-06, "loss": 205.9254, "step": 44000 }, { "epoch": 0.846423471374789, "grad_norm": 367.77289151048666, "learning_rate": 1.1641649675070975e-06, "loss": 207.6062, "step": 44010 }, { "epoch": 0.8466157966352695, "grad_norm": 412.32926517504535, "learning_rate": 1.161308672544389e-06, "loss": 219.878, "step": 44020 }, { "epoch": 0.84680812189575, "grad_norm": 381.3936188019946, "learning_rate": 1.1584556698482252e-06, "loss": 204.3105, "step": 44030 }, { "epoch": 0.8470004471562306, "grad_norm": 393.4578562868416, "learning_rate": 1.1556059604812985e-06, "loss": 203.6952, "step": 44040 }, { "epoch": 0.8471927724167111, "grad_norm": 378.85770801966174, "learning_rate": 1.1527595455050844e-06, "loss": 206.0731, "step": 44050 }, { "epoch": 0.8473850976771916, "grad_norm": 428.11879829537617, "learning_rate": 1.1499164259798223e-06, "loss": 206.6966, "step": 44060 }, { "epoch": 0.8475774229376722, "grad_norm": 344.52758866783785, "learning_rate": 1.1470766029645253e-06, "loss": 209.6004, "step": 44070 }, { "epoch": 0.8477697481981528, "grad_norm": 391.69909105604853, "learning_rate": 1.1442400775169849e-06, "loss": 208.7681, "step": 44080 }, { "epoch": 0.8479620734586333, "grad_norm": 350.48260471174507, "learning_rate": 1.141406850693757e-06, "loss": 199.2871, "step": 44090 }, { "epoch": 0.8481543987191138, "grad_norm": 360.88210862046384, "learning_rate": 1.1385769235501742e-06, "loss": 209.238, "step": 44100 }, { "epoch": 0.8483467239795943, "grad_norm": 404.2304100493653, "learning_rate": 1.1357502971403335e-06, "loss": 208.8373, "step": 44110 }, { "epoch": 0.8485390492400748, "grad_norm": 382.13975041926614, "learning_rate": 1.132926972517111e-06, "loss": 203.786, "step": 44120 }, { "epoch": 0.8487313745005554, "grad_norm": 425.13146544979304, "learning_rate": 1.130106950732145e-06, "loss": 201.2365, "step": 44130 }, { "epoch": 0.8489236997610359, "grad_norm": 377.39230241799714, "learning_rate": 1.1272902328358514e-06, "loss": 205.3801, "step": 44140 }, { "epoch": 0.8491160250215164, "grad_norm": 427.3905961534483, "learning_rate": 1.1244768198774047e-06, "loss": 212.7839, "step": 44150 }, { "epoch": 0.8493083502819969, "grad_norm": 414.7022196347994, "learning_rate": 1.121666712904762e-06, "loss": 223.0878, "step": 44160 }, { "epoch": 0.8495006755424774, "grad_norm": 351.3713145163187, "learning_rate": 1.1188599129646382e-06, "loss": 205.506, "step": 44170 }, { "epoch": 0.849693000802958, "grad_norm": 415.7091690217121, "learning_rate": 1.116056421102517e-06, "loss": 213.8812, "step": 44180 }, { "epoch": 0.8498853260634385, "grad_norm": 367.5481271184139, "learning_rate": 1.113256238362659e-06, "loss": 210.2667, "step": 44190 }, { "epoch": 0.850077651323919, "grad_norm": 405.32896873771574, "learning_rate": 1.1104593657880812e-06, "loss": 205.0123, "step": 44200 }, { "epoch": 0.8502699765843995, "grad_norm": 404.8301095268004, "learning_rate": 1.1076658044205746e-06, "loss": 212.5438, "step": 44210 }, { "epoch": 0.85046230184488, "grad_norm": 378.41032147574657, "learning_rate": 1.1048755553006928e-06, "loss": 211.2558, "step": 44220 }, { "epoch": 0.8506546271053606, "grad_norm": 365.29816154303506, "learning_rate": 1.1020886194677605e-06, "loss": 201.2551, "step": 44230 }, { "epoch": 0.8508469523658411, "grad_norm": 381.32882627359896, "learning_rate": 1.0993049979598635e-06, "loss": 206.3111, "step": 44240 }, { "epoch": 0.8510392776263216, "grad_norm": 372.40198755307824, "learning_rate": 1.0965246918138529e-06, "loss": 213.805, "step": 44250 }, { "epoch": 0.8512316028868021, "grad_norm": 374.1485402508953, "learning_rate": 1.093747702065351e-06, "loss": 204.924, "step": 44260 }, { "epoch": 0.8514239281472827, "grad_norm": 378.64906319014085, "learning_rate": 1.090974029748736e-06, "loss": 207.0282, "step": 44270 }, { "epoch": 0.8516162534077633, "grad_norm": 369.9100250219373, "learning_rate": 1.0882036758971592e-06, "loss": 205.483, "step": 44280 }, { "epoch": 0.8518085786682438, "grad_norm": 377.76284828261345, "learning_rate": 1.0854366415425289e-06, "loss": 214.42, "step": 44290 }, { "epoch": 0.8520009039287243, "grad_norm": 332.81151595857335, "learning_rate": 1.0826729277155224e-06, "loss": 205.6704, "step": 44300 }, { "epoch": 0.8521932291892048, "grad_norm": 392.98028017869154, "learning_rate": 1.0799125354455752e-06, "loss": 213.0135, "step": 44310 }, { "epoch": 0.8523855544496853, "grad_norm": 373.1936930199224, "learning_rate": 1.0771554657608896e-06, "loss": 206.1825, "step": 44320 }, { "epoch": 0.8525778797101659, "grad_norm": 410.2686245364778, "learning_rate": 1.0744017196884248e-06, "loss": 204.4684, "step": 44330 }, { "epoch": 0.8527702049706464, "grad_norm": 374.00561560123856, "learning_rate": 1.0716512982539106e-06, "loss": 201.9522, "step": 44340 }, { "epoch": 0.8529625302311269, "grad_norm": 362.53881544070924, "learning_rate": 1.0689042024818307e-06, "loss": 205.2763, "step": 44350 }, { "epoch": 0.8531548554916074, "grad_norm": 438.3463551343566, "learning_rate": 1.0661604333954312e-06, "loss": 210.8474, "step": 44360 }, { "epoch": 0.8533471807520879, "grad_norm": 345.4678441637618, "learning_rate": 1.0634199920167255e-06, "loss": 202.4046, "step": 44370 }, { "epoch": 0.8535395060125684, "grad_norm": 372.98725728652727, "learning_rate": 1.0606828793664804e-06, "loss": 204.8702, "step": 44380 }, { "epoch": 0.853731831273049, "grad_norm": 403.59533892318296, "learning_rate": 1.0579490964642247e-06, "loss": 207.6489, "step": 44390 }, { "epoch": 0.8539241565335295, "grad_norm": 406.1277687126905, "learning_rate": 1.0552186443282464e-06, "loss": 205.9058, "step": 44400 }, { "epoch": 0.85411648179401, "grad_norm": 359.94142885986656, "learning_rate": 1.0524915239755939e-06, "loss": 200.386, "step": 44410 }, { "epoch": 0.8543088070544905, "grad_norm": 402.329057345141, "learning_rate": 1.0497677364220792e-06, "loss": 205.3322, "step": 44420 }, { "epoch": 0.854501132314971, "grad_norm": 350.8751641643714, "learning_rate": 1.047047282682262e-06, "loss": 210.0956, "step": 44430 }, { "epoch": 0.8546934575754516, "grad_norm": 387.1363118151032, "learning_rate": 1.0443301637694713e-06, "loss": 212.7379, "step": 44440 }, { "epoch": 0.8548857828359321, "grad_norm": 390.1386210830029, "learning_rate": 1.0416163806957857e-06, "loss": 197.3423, "step": 44450 }, { "epoch": 0.8550781080964126, "grad_norm": 386.67706049782305, "learning_rate": 1.0389059344720475e-06, "loss": 206.0977, "step": 44460 }, { "epoch": 0.8552704333568931, "grad_norm": 361.4399360569785, "learning_rate": 1.0361988261078482e-06, "loss": 202.2557, "step": 44470 }, { "epoch": 0.8554627586173738, "grad_norm": 374.41295180218486, "learning_rate": 1.0334950566115466e-06, "loss": 200.1122, "step": 44480 }, { "epoch": 0.8556550838778543, "grad_norm": 375.90329301969854, "learning_rate": 1.0307946269902492e-06, "loss": 206.7902, "step": 44490 }, { "epoch": 0.8558474091383348, "grad_norm": 358.0379686092457, "learning_rate": 1.0280975382498225e-06, "loss": 207.3438, "step": 44500 }, { "epoch": 0.8560397343988153, "grad_norm": 370.3599343400562, "learning_rate": 1.0254037913948845e-06, "loss": 195.9171, "step": 44510 }, { "epoch": 0.8562320596592958, "grad_norm": 384.37643309280776, "learning_rate": 1.0227133874288152e-06, "loss": 210.6145, "step": 44520 }, { "epoch": 0.8564243849197763, "grad_norm": 361.6373646381273, "learning_rate": 1.0200263273537458e-06, "loss": 205.4525, "step": 44530 }, { "epoch": 0.8566167101802569, "grad_norm": 368.8958150630338, "learning_rate": 1.0173426121705577e-06, "loss": 209.8104, "step": 44540 }, { "epoch": 0.8568090354407374, "grad_norm": 382.9865762354701, "learning_rate": 1.0146622428788943e-06, "loss": 206.8092, "step": 44550 }, { "epoch": 0.8570013607012179, "grad_norm": 393.93271411196594, "learning_rate": 1.0119852204771463e-06, "loss": 213.0676, "step": 44560 }, { "epoch": 0.8571936859616984, "grad_norm": 358.78545290721587, "learning_rate": 1.0093115459624637e-06, "loss": 203.6277, "step": 44570 }, { "epoch": 0.857386011222179, "grad_norm": 353.05483385192855, "learning_rate": 1.0066412203307419e-06, "loss": 210.3207, "step": 44580 }, { "epoch": 0.8575783364826595, "grad_norm": 397.2383677595859, "learning_rate": 1.0039742445766376e-06, "loss": 204.5051, "step": 44590 }, { "epoch": 0.85777066174314, "grad_norm": 382.3735376840602, "learning_rate": 1.0013106196935528e-06, "loss": 210.2912, "step": 44600 }, { "epoch": 0.8579629870036205, "grad_norm": 410.49734290936505, "learning_rate": 9.986503466736419e-07, "loss": 202.3068, "step": 44610 }, { "epoch": 0.858155312264101, "grad_norm": 373.4030813457647, "learning_rate": 9.959934265078176e-07, "loss": 212.7593, "step": 44620 }, { "epoch": 0.8583476375245815, "grad_norm": 413.0165296247096, "learning_rate": 9.933398601857347e-07, "loss": 203.608, "step": 44630 }, { "epoch": 0.8585399627850621, "grad_norm": 373.5339538200506, "learning_rate": 9.90689648695804e-07, "loss": 202.5312, "step": 44640 }, { "epoch": 0.8587322880455426, "grad_norm": 390.02380358076636, "learning_rate": 9.880427930251834e-07, "loss": 203.2612, "step": 44650 }, { "epoch": 0.8589246133060231, "grad_norm": 356.10087692230826, "learning_rate": 9.853992941597878e-07, "loss": 203.1021, "step": 44660 }, { "epoch": 0.8591169385665036, "grad_norm": 364.3438617370437, "learning_rate": 9.827591530842729e-07, "loss": 201.0466, "step": 44670 }, { "epoch": 0.8593092638269842, "grad_norm": 343.3440077623596, "learning_rate": 9.801223707820484e-07, "loss": 202.8622, "step": 44680 }, { "epoch": 0.8595015890874648, "grad_norm": 363.3457369517634, "learning_rate": 9.774889482352735e-07, "loss": 200.0119, "step": 44690 }, { "epoch": 0.8596939143479453, "grad_norm": 338.7743387512505, "learning_rate": 9.74858886424852e-07, "loss": 202.2917, "step": 44700 }, { "epoch": 0.8598862396084258, "grad_norm": 404.3062313829463, "learning_rate": 9.722321863304418e-07, "loss": 208.2456, "step": 44710 }, { "epoch": 0.8600785648689063, "grad_norm": 396.1293156302092, "learning_rate": 9.696088489304412e-07, "loss": 208.8752, "step": 44720 }, { "epoch": 0.8602708901293868, "grad_norm": 367.6745241601311, "learning_rate": 9.669888752020061e-07, "loss": 204.5631, "step": 44730 }, { "epoch": 0.8604632153898674, "grad_norm": 349.8437209042745, "learning_rate": 9.643722661210285e-07, "loss": 197.7485, "step": 44740 }, { "epoch": 0.8606555406503479, "grad_norm": 354.87852525496646, "learning_rate": 9.617590226621543e-07, "loss": 202.4761, "step": 44750 }, { "epoch": 0.8608478659108284, "grad_norm": 366.24691164947046, "learning_rate": 9.5914914579877e-07, "loss": 200.2327, "step": 44760 }, { "epoch": 0.8610401911713089, "grad_norm": 375.11075292850165, "learning_rate": 9.565426365030172e-07, "loss": 206.2503, "step": 44770 }, { "epoch": 0.8612325164317894, "grad_norm": 346.9034725974352, "learning_rate": 9.539394957457737e-07, "loss": 212.3791, "step": 44780 }, { "epoch": 0.86142484169227, "grad_norm": 334.45724346266996, "learning_rate": 9.51339724496666e-07, "loss": 203.4135, "step": 44790 }, { "epoch": 0.8616171669527505, "grad_norm": 333.1165354642953, "learning_rate": 9.487433237240695e-07, "loss": 200.082, "step": 44800 }, { "epoch": 0.861809492213231, "grad_norm": 376.32224355143666, "learning_rate": 9.461502943950973e-07, "loss": 210.6318, "step": 44810 }, { "epoch": 0.8620018174737115, "grad_norm": 322.53950362849884, "learning_rate": 9.435606374756123e-07, "loss": 197.1338, "step": 44820 }, { "epoch": 0.862194142734192, "grad_norm": 367.08690999989363, "learning_rate": 9.409743539302152e-07, "loss": 197.6756, "step": 44830 }, { "epoch": 0.8623864679946726, "grad_norm": 389.54716928078994, "learning_rate": 9.383914447222576e-07, "loss": 206.0583, "step": 44840 }, { "epoch": 0.8625787932551531, "grad_norm": 395.36836908466665, "learning_rate": 9.358119108138309e-07, "loss": 207.0507, "step": 44850 }, { "epoch": 0.8627711185156336, "grad_norm": 364.116585749919, "learning_rate": 9.332357531657644e-07, "loss": 197.6354, "step": 44860 }, { "epoch": 0.8629634437761141, "grad_norm": 351.18462138856256, "learning_rate": 9.306629727376404e-07, "loss": 203.6384, "step": 44870 }, { "epoch": 0.8631557690365947, "grad_norm": 383.54192358548283, "learning_rate": 9.280935704877736e-07, "loss": 200.413, "step": 44880 }, { "epoch": 0.8633480942970753, "grad_norm": 401.6479467350847, "learning_rate": 9.255275473732239e-07, "loss": 207.4342, "step": 44890 }, { "epoch": 0.8635404195575558, "grad_norm": 382.1092868960059, "learning_rate": 9.229649043497924e-07, "loss": 207.5034, "step": 44900 }, { "epoch": 0.8637327448180363, "grad_norm": 425.518572987113, "learning_rate": 9.20405642372022e-07, "loss": 208.9183, "step": 44910 }, { "epoch": 0.8639250700785168, "grad_norm": 422.7285676914632, "learning_rate": 9.178497623931959e-07, "loss": 206.2202, "step": 44920 }, { "epoch": 0.8641173953389973, "grad_norm": 383.11994673804685, "learning_rate": 9.152972653653369e-07, "loss": 202.4289, "step": 44930 }, { "epoch": 0.8643097205994779, "grad_norm": 384.1125325853366, "learning_rate": 9.127481522392068e-07, "loss": 206.058, "step": 44940 }, { "epoch": 0.8645020458599584, "grad_norm": 366.9270733253421, "learning_rate": 9.102024239643092e-07, "loss": 204.4223, "step": 44950 }, { "epoch": 0.8646943711204389, "grad_norm": 424.3447502841698, "learning_rate": 9.076600814888869e-07, "loss": 206.8118, "step": 44960 }, { "epoch": 0.8648866963809194, "grad_norm": 367.60663290787295, "learning_rate": 9.051211257599169e-07, "loss": 207.4621, "step": 44970 }, { "epoch": 0.8650790216413999, "grad_norm": 345.05789977402355, "learning_rate": 9.025855577231224e-07, "loss": 206.2592, "step": 44980 }, { "epoch": 0.8652713469018805, "grad_norm": 379.141354890545, "learning_rate": 9.000533783229581e-07, "loss": 207.8849, "step": 44990 }, { "epoch": 0.865463672162361, "grad_norm": 349.9815884882839, "learning_rate": 8.975245885026207e-07, "loss": 209.9808, "step": 45000 }, { "epoch": 0.8656559974228415, "grad_norm": 432.743954605365, "learning_rate": 8.949991892040399e-07, "loss": 207.3274, "step": 45010 }, { "epoch": 0.865848322683322, "grad_norm": 402.31527873343225, "learning_rate": 8.92477181367889e-07, "loss": 201.7501, "step": 45020 }, { "epoch": 0.8660406479438025, "grad_norm": 358.62038236501905, "learning_rate": 8.899585659335719e-07, "loss": 203.6723, "step": 45030 }, { "epoch": 0.866232973204283, "grad_norm": 414.9560503106033, "learning_rate": 8.874433438392305e-07, "loss": 219.7011, "step": 45040 }, { "epoch": 0.8664252984647636, "grad_norm": 365.7653499690012, "learning_rate": 8.849315160217465e-07, "loss": 204.9315, "step": 45050 }, { "epoch": 0.8666176237252441, "grad_norm": 401.5348786535948, "learning_rate": 8.824230834167325e-07, "loss": 204.5615, "step": 45060 }, { "epoch": 0.8668099489857246, "grad_norm": 359.1950619519764, "learning_rate": 8.799180469585378e-07, "loss": 216.4338, "step": 45070 }, { "epoch": 0.8670022742462051, "grad_norm": 368.6054977733908, "learning_rate": 8.77416407580246e-07, "loss": 204.5957, "step": 45080 }, { "epoch": 0.8671945995066858, "grad_norm": 390.0618423988173, "learning_rate": 8.749181662136785e-07, "loss": 210.4847, "step": 45090 }, { "epoch": 0.8673869247671663, "grad_norm": 386.23430742941576, "learning_rate": 8.724233237893897e-07, "loss": 203.7169, "step": 45100 }, { "epoch": 0.8675792500276468, "grad_norm": 381.93347106147223, "learning_rate": 8.699318812366641e-07, "loss": 206.1659, "step": 45110 }, { "epoch": 0.8677715752881273, "grad_norm": 377.03377994121126, "learning_rate": 8.67443839483526e-07, "loss": 209.6646, "step": 45120 }, { "epoch": 0.8679639005486078, "grad_norm": 361.7981862240369, "learning_rate": 8.649591994567275e-07, "loss": 202.7763, "step": 45130 }, { "epoch": 0.8681562258090884, "grad_norm": 347.0297282655106, "learning_rate": 8.62477962081758e-07, "loss": 204.6304, "step": 45140 }, { "epoch": 0.8683485510695689, "grad_norm": 401.05624166746236, "learning_rate": 8.600001282828341e-07, "loss": 211.8197, "step": 45150 }, { "epoch": 0.8685408763300494, "grad_norm": 389.49478016128694, "learning_rate": 8.57525698982914e-07, "loss": 206.5129, "step": 45160 }, { "epoch": 0.8687332015905299, "grad_norm": 355.5411996169538, "learning_rate": 8.550546751036759e-07, "loss": 211.5079, "step": 45170 }, { "epoch": 0.8689255268510104, "grad_norm": 397.39041976636537, "learning_rate": 8.525870575655393e-07, "loss": 205.2016, "step": 45180 }, { "epoch": 0.869117852111491, "grad_norm": 378.08690407276947, "learning_rate": 8.501228472876466e-07, "loss": 208.413, "step": 45190 }, { "epoch": 0.8693101773719715, "grad_norm": 367.74648454741344, "learning_rate": 8.476620451878803e-07, "loss": 203.0439, "step": 45200 }, { "epoch": 0.869502502632452, "grad_norm": 362.45996382295584, "learning_rate": 8.45204652182846e-07, "loss": 207.6312, "step": 45210 }, { "epoch": 0.8696948278929325, "grad_norm": 368.74715038315435, "learning_rate": 8.427506691878806e-07, "loss": 207.3251, "step": 45220 }, { "epoch": 0.869887153153413, "grad_norm": 444.66132386606756, "learning_rate": 8.403000971170561e-07, "loss": 202.5711, "step": 45230 }, { "epoch": 0.8700794784138935, "grad_norm": 368.5699517376908, "learning_rate": 8.378529368831667e-07, "loss": 205.0931, "step": 45240 }, { "epoch": 0.8702718036743741, "grad_norm": 373.52988715555995, "learning_rate": 8.354091893977401e-07, "loss": 202.8805, "step": 45250 }, { "epoch": 0.8704641289348546, "grad_norm": 330.02472461193537, "learning_rate": 8.329688555710336e-07, "loss": 205.5159, "step": 45260 }, { "epoch": 0.8706564541953351, "grad_norm": 369.8401631096334, "learning_rate": 8.305319363120279e-07, "loss": 209.6239, "step": 45270 }, { "epoch": 0.8708487794558156, "grad_norm": 371.93020182746807, "learning_rate": 8.280984325284392e-07, "loss": 204.057, "step": 45280 }, { "epoch": 0.8710411047162963, "grad_norm": 366.4124863602406, "learning_rate": 8.256683451267044e-07, "loss": 202.2217, "step": 45290 }, { "epoch": 0.8712334299767768, "grad_norm": 401.3745714268526, "learning_rate": 8.232416750119921e-07, "loss": 204.0654, "step": 45300 }, { "epoch": 0.8714257552372573, "grad_norm": 438.6099221597723, "learning_rate": 8.208184230881966e-07, "loss": 204.2088, "step": 45310 }, { "epoch": 0.8716180804977378, "grad_norm": 382.21974873877775, "learning_rate": 8.183985902579405e-07, "loss": 202.3825, "step": 45320 }, { "epoch": 0.8718104057582183, "grad_norm": 349.6362483656875, "learning_rate": 8.159821774225685e-07, "loss": 197.3225, "step": 45330 }, { "epoch": 0.8720027310186989, "grad_norm": 373.74093574235343, "learning_rate": 8.13569185482157e-07, "loss": 201.8258, "step": 45340 }, { "epoch": 0.8721950562791794, "grad_norm": 344.8620381836919, "learning_rate": 8.111596153355061e-07, "loss": 198.8958, "step": 45350 }, { "epoch": 0.8723873815396599, "grad_norm": 363.25728616523907, "learning_rate": 8.08753467880139e-07, "loss": 205.9329, "step": 45360 }, { "epoch": 0.8725797068001404, "grad_norm": 361.92353161253294, "learning_rate": 8.063507440123052e-07, "loss": 200.0677, "step": 45370 }, { "epoch": 0.8727720320606209, "grad_norm": 402.9977827066541, "learning_rate": 8.039514446269836e-07, "loss": 209.2742, "step": 45380 }, { "epoch": 0.8729643573211014, "grad_norm": 377.60017382719764, "learning_rate": 8.015555706178702e-07, "loss": 203.6551, "step": 45390 }, { "epoch": 0.873156682581582, "grad_norm": 390.44910722744527, "learning_rate": 7.991631228773889e-07, "loss": 207.5807, "step": 45400 }, { "epoch": 0.8733490078420625, "grad_norm": 416.6786635853311, "learning_rate": 7.967741022966857e-07, "loss": 200.2847, "step": 45410 }, { "epoch": 0.873541333102543, "grad_norm": 377.07772320469735, "learning_rate": 7.943885097656356e-07, "loss": 198.2747, "step": 45420 }, { "epoch": 0.8737336583630235, "grad_norm": 365.40415920328473, "learning_rate": 7.920063461728311e-07, "loss": 200.7008, "step": 45430 }, { "epoch": 0.873925983623504, "grad_norm": 369.26419427899185, "learning_rate": 7.896276124055846e-07, "loss": 202.8512, "step": 45440 }, { "epoch": 0.8741183088839846, "grad_norm": 382.19082452593113, "learning_rate": 7.872523093499396e-07, "loss": 206.8361, "step": 45450 }, { "epoch": 0.8743106341444651, "grad_norm": 368.6169331143544, "learning_rate": 7.848804378906561e-07, "loss": 200.6371, "step": 45460 }, { "epoch": 0.8745029594049456, "grad_norm": 332.67222186556825, "learning_rate": 7.825119989112173e-07, "loss": 198.5944, "step": 45470 }, { "epoch": 0.8746952846654261, "grad_norm": 375.18432048767204, "learning_rate": 7.801469932938255e-07, "loss": 205.6325, "step": 45480 }, { "epoch": 0.8748876099259066, "grad_norm": 345.5457286430055, "learning_rate": 7.777854219194092e-07, "loss": 199.7954, "step": 45490 }, { "epoch": 0.8750799351863873, "grad_norm": 358.8387028070915, "learning_rate": 7.754272856676126e-07, "loss": 208.0702, "step": 45500 }, { "epoch": 0.8752722604468678, "grad_norm": 411.84363556380816, "learning_rate": 7.73072585416802e-07, "loss": 208.7903, "step": 45510 }, { "epoch": 0.8754645857073483, "grad_norm": 372.36643076886344, "learning_rate": 7.707213220440679e-07, "loss": 198.5439, "step": 45520 }, { "epoch": 0.8756569109678288, "grad_norm": 354.6240118019231, "learning_rate": 7.683734964252143e-07, "loss": 203.2871, "step": 45530 }, { "epoch": 0.8758492362283093, "grad_norm": 400.4324176672373, "learning_rate": 7.66029109434766e-07, "loss": 207.1067, "step": 45540 }, { "epoch": 0.8760415614887899, "grad_norm": 361.84110404032936, "learning_rate": 7.636881619459724e-07, "loss": 204.4386, "step": 45550 }, { "epoch": 0.8762338867492704, "grad_norm": 413.3732866995702, "learning_rate": 7.613506548307936e-07, "loss": 205.0402, "step": 45560 }, { "epoch": 0.8764262120097509, "grad_norm": 389.5679749808248, "learning_rate": 7.590165889599166e-07, "loss": 202.9151, "step": 45570 }, { "epoch": 0.8766185372702314, "grad_norm": 345.7123025795795, "learning_rate": 7.566859652027381e-07, "loss": 200.5478, "step": 45580 }, { "epoch": 0.8768108625307119, "grad_norm": 393.9553002767725, "learning_rate": 7.543587844273814e-07, "loss": 204.4718, "step": 45590 }, { "epoch": 0.8770031877911925, "grad_norm": 380.5869976110789, "learning_rate": 7.52035047500681e-07, "loss": 199.6537, "step": 45600 }, { "epoch": 0.877195513051673, "grad_norm": 379.71790600150086, "learning_rate": 7.497147552881901e-07, "loss": 205.6837, "step": 45610 }, { "epoch": 0.8773878383121535, "grad_norm": 391.56070334823016, "learning_rate": 7.473979086541772e-07, "loss": 205.2985, "step": 45620 }, { "epoch": 0.877580163572634, "grad_norm": 375.6810724889429, "learning_rate": 7.450845084616332e-07, "loss": 201.014, "step": 45630 }, { "epoch": 0.8777724888331145, "grad_norm": 355.06306727775114, "learning_rate": 7.427745555722598e-07, "loss": 211.5624, "step": 45640 }, { "epoch": 0.8779648140935951, "grad_norm": 382.6134881670004, "learning_rate": 7.404680508464767e-07, "loss": 207.2711, "step": 45650 }, { "epoch": 0.8781571393540756, "grad_norm": 415.84241390706507, "learning_rate": 7.381649951434167e-07, "loss": 206.2194, "step": 45660 }, { "epoch": 0.8783494646145561, "grad_norm": 390.43837367641703, "learning_rate": 7.358653893209333e-07, "loss": 211.0992, "step": 45670 }, { "epoch": 0.8785417898750366, "grad_norm": 345.89120578020845, "learning_rate": 7.335692342355882e-07, "loss": 204.2241, "step": 45680 }, { "epoch": 0.8787341151355171, "grad_norm": 358.7017660828885, "learning_rate": 7.312765307426662e-07, "loss": 199.9989, "step": 45690 }, { "epoch": 0.8789264403959978, "grad_norm": 365.00133067731844, "learning_rate": 7.28987279696155e-07, "loss": 206.2718, "step": 45700 }, { "epoch": 0.8791187656564783, "grad_norm": 372.8764711544401, "learning_rate": 7.267014819487695e-07, "loss": 209.8032, "step": 45710 }, { "epoch": 0.8793110909169588, "grad_norm": 371.093758204294, "learning_rate": 7.244191383519272e-07, "loss": 202.6339, "step": 45720 }, { "epoch": 0.8795034161774393, "grad_norm": 394.87972330526776, "learning_rate": 7.221402497557629e-07, "loss": 200.9425, "step": 45730 }, { "epoch": 0.8796957414379198, "grad_norm": 376.815066501531, "learning_rate": 7.198648170091294e-07, "loss": 199.555, "step": 45740 }, { "epoch": 0.8798880666984004, "grad_norm": 385.33257332086646, "learning_rate": 7.175928409595844e-07, "loss": 203.5715, "step": 45750 }, { "epoch": 0.8800803919588809, "grad_norm": 378.2450231973383, "learning_rate": 7.153243224534001e-07, "loss": 207.6875, "step": 45760 }, { "epoch": 0.8802727172193614, "grad_norm": 382.7940984393643, "learning_rate": 7.130592623355659e-07, "loss": 201.5336, "step": 45770 }, { "epoch": 0.8804650424798419, "grad_norm": 349.33252287289235, "learning_rate": 7.10797661449778e-07, "loss": 203.0433, "step": 45780 }, { "epoch": 0.8806573677403224, "grad_norm": 384.9729025551196, "learning_rate": 7.085395206384449e-07, "loss": 204.3773, "step": 45790 }, { "epoch": 0.880849693000803, "grad_norm": 352.9406174020772, "learning_rate": 7.062848407426859e-07, "loss": 200.2767, "step": 45800 }, { "epoch": 0.8810420182612835, "grad_norm": 380.2513907905553, "learning_rate": 7.040336226023336e-07, "loss": 199.0211, "step": 45810 }, { "epoch": 0.881234343521764, "grad_norm": 355.21992081418955, "learning_rate": 7.017858670559274e-07, "loss": 206.7003, "step": 45820 }, { "epoch": 0.8814266687822445, "grad_norm": 389.61976290166365, "learning_rate": 6.99541574940722e-07, "loss": 208.8777, "step": 45830 }, { "epoch": 0.881618994042725, "grad_norm": 374.2619308356377, "learning_rate": 6.973007470926774e-07, "loss": 204.061, "step": 45840 }, { "epoch": 0.8818113193032056, "grad_norm": 372.2038190396962, "learning_rate": 6.95063384346466e-07, "loss": 204.5984, "step": 45850 }, { "epoch": 0.8820036445636861, "grad_norm": 366.9288915472056, "learning_rate": 6.92829487535468e-07, "loss": 207.5679, "step": 45860 }, { "epoch": 0.8821959698241666, "grad_norm": 368.095059287682, "learning_rate": 6.905990574917709e-07, "loss": 206.0688, "step": 45870 }, { "epoch": 0.8823882950846471, "grad_norm": 367.48039929585497, "learning_rate": 6.88372095046177e-07, "loss": 199.513, "step": 45880 }, { "epoch": 0.8825806203451276, "grad_norm": 375.3178781970457, "learning_rate": 6.861486010281915e-07, "loss": 210.4137, "step": 45890 }, { "epoch": 0.8827729456056082, "grad_norm": 367.39012211890025, "learning_rate": 6.839285762660275e-07, "loss": 198.4013, "step": 45900 }, { "epoch": 0.8829652708660888, "grad_norm": 351.84173165269743, "learning_rate": 6.81712021586608e-07, "loss": 203.327, "step": 45910 }, { "epoch": 0.8831575961265693, "grad_norm": 353.607786444627, "learning_rate": 6.794989378155659e-07, "loss": 206.263, "step": 45920 }, { "epoch": 0.8833499213870498, "grad_norm": 355.1080079478987, "learning_rate": 6.772893257772361e-07, "loss": 209.7235, "step": 45930 }, { "epoch": 0.8835422466475303, "grad_norm": 379.0470728426872, "learning_rate": 6.750831862946605e-07, "loss": 214.05, "step": 45940 }, { "epoch": 0.8837345719080109, "grad_norm": 378.9822573013641, "learning_rate": 6.728805201895949e-07, "loss": 208.5714, "step": 45950 }, { "epoch": 0.8839268971684914, "grad_norm": 379.19435655325555, "learning_rate": 6.70681328282492e-07, "loss": 193.3402, "step": 45960 }, { "epoch": 0.8841192224289719, "grad_norm": 365.1627720191045, "learning_rate": 6.684856113925143e-07, "loss": 204.8902, "step": 45970 }, { "epoch": 0.8843115476894524, "grad_norm": 390.8553183688744, "learning_rate": 6.662933703375307e-07, "loss": 210.1179, "step": 45980 }, { "epoch": 0.8845038729499329, "grad_norm": 381.18685966428393, "learning_rate": 6.641046059341171e-07, "loss": 197.886, "step": 45990 }, { "epoch": 0.8846961982104135, "grad_norm": 414.52283383046336, "learning_rate": 6.619193189975515e-07, "loss": 206.7159, "step": 46000 }, { "epoch": 0.884888523470894, "grad_norm": 364.0085577681013, "learning_rate": 6.597375103418135e-07, "loss": 202.9025, "step": 46010 }, { "epoch": 0.8850808487313745, "grad_norm": 335.3442443427356, "learning_rate": 6.575591807795944e-07, "loss": 196.0612, "step": 46020 }, { "epoch": 0.885273173991855, "grad_norm": 366.0912740882515, "learning_rate": 6.553843311222863e-07, "loss": 200.8568, "step": 46030 }, { "epoch": 0.8854654992523355, "grad_norm": 348.7560643861536, "learning_rate": 6.532129621799832e-07, "loss": 204.3942, "step": 46040 }, { "epoch": 0.885657824512816, "grad_norm": 402.2090307279005, "learning_rate": 6.510450747614816e-07, "loss": 209.5204, "step": 46050 }, { "epoch": 0.8858501497732966, "grad_norm": 355.3617180768231, "learning_rate": 6.488806696742889e-07, "loss": 212.6424, "step": 46060 }, { "epoch": 0.8860424750337771, "grad_norm": 369.40341690346924, "learning_rate": 6.46719747724609e-07, "loss": 216.6485, "step": 46070 }, { "epoch": 0.8862348002942576, "grad_norm": 383.3276577951005, "learning_rate": 6.44562309717347e-07, "loss": 210.4688, "step": 46080 }, { "epoch": 0.8864271255547381, "grad_norm": 403.9067351721234, "learning_rate": 6.424083564561134e-07, "loss": 207.8883, "step": 46090 }, { "epoch": 0.8866194508152186, "grad_norm": 360.23802338025024, "learning_rate": 6.402578887432232e-07, "loss": 200.543, "step": 46100 }, { "epoch": 0.8868117760756993, "grad_norm": 404.9765070823738, "learning_rate": 6.381109073796865e-07, "loss": 207.1947, "step": 46110 }, { "epoch": 0.8870041013361798, "grad_norm": 377.127081222995, "learning_rate": 6.359674131652204e-07, "loss": 199.1138, "step": 46120 }, { "epoch": 0.8871964265966603, "grad_norm": 419.97193095252965, "learning_rate": 6.338274068982408e-07, "loss": 205.2515, "step": 46130 }, { "epoch": 0.8873887518571408, "grad_norm": 373.6805519913914, "learning_rate": 6.316908893758656e-07, "loss": 202.9596, "step": 46140 }, { "epoch": 0.8875810771176214, "grad_norm": 377.849801341338, "learning_rate": 6.295578613939113e-07, "loss": 204.4356, "step": 46150 }, { "epoch": 0.8877734023781019, "grad_norm": 371.3649719915459, "learning_rate": 6.274283237468948e-07, "loss": 198.1371, "step": 46160 }, { "epoch": 0.8879657276385824, "grad_norm": 366.7234511654086, "learning_rate": 6.25302277228036e-07, "loss": 207.253, "step": 46170 }, { "epoch": 0.8881580528990629, "grad_norm": 398.98612201339967, "learning_rate": 6.231797226292502e-07, "loss": 209.8231, "step": 46180 }, { "epoch": 0.8883503781595434, "grad_norm": 365.6950231288033, "learning_rate": 6.210606607411529e-07, "loss": 198.6315, "step": 46190 }, { "epoch": 0.888542703420024, "grad_norm": 356.54294065426336, "learning_rate": 6.189450923530627e-07, "loss": 199.4474, "step": 46200 }, { "epoch": 0.8887350286805045, "grad_norm": 355.00000362213683, "learning_rate": 6.168330182529924e-07, "loss": 208.0717, "step": 46210 }, { "epoch": 0.888927353940985, "grad_norm": 365.0013600609673, "learning_rate": 6.147244392276541e-07, "loss": 204.4221, "step": 46220 }, { "epoch": 0.8891196792014655, "grad_norm": 333.9679464941792, "learning_rate": 6.126193560624583e-07, "loss": 207.7982, "step": 46230 }, { "epoch": 0.889312004461946, "grad_norm": 368.20901956424706, "learning_rate": 6.105177695415165e-07, "loss": 203.0013, "step": 46240 }, { "epoch": 0.8895043297224265, "grad_norm": 349.5582846764632, "learning_rate": 6.084196804476317e-07, "loss": 201.766, "step": 46250 }, { "epoch": 0.8896966549829071, "grad_norm": 362.5981944507473, "learning_rate": 6.063250895623096e-07, "loss": 200.6178, "step": 46260 }, { "epoch": 0.8898889802433876, "grad_norm": 468.65567792156855, "learning_rate": 6.042339976657486e-07, "loss": 210.9137, "step": 46270 }, { "epoch": 0.8900813055038681, "grad_norm": 374.7715979663631, "learning_rate": 6.021464055368498e-07, "loss": 204.2966, "step": 46280 }, { "epoch": 0.8902736307643486, "grad_norm": 342.5629581474716, "learning_rate": 6.000623139532036e-07, "loss": 210.7192, "step": 46290 }, { "epoch": 0.8904659560248291, "grad_norm": 347.0291314072588, "learning_rate": 5.979817236910979e-07, "loss": 204.352, "step": 46300 }, { "epoch": 0.8906582812853097, "grad_norm": 359.55541967698986, "learning_rate": 5.959046355255238e-07, "loss": 215.4892, "step": 46310 }, { "epoch": 0.8908506065457903, "grad_norm": 367.9546075898875, "learning_rate": 5.93831050230158e-07, "loss": 203.4867, "step": 46320 }, { "epoch": 0.8910429318062708, "grad_norm": 369.75157000110255, "learning_rate": 5.917609685773784e-07, "loss": 201.9396, "step": 46330 }, { "epoch": 0.8912352570667513, "grad_norm": 365.94807170332615, "learning_rate": 5.896943913382547e-07, "loss": 197.4694, "step": 46340 }, { "epoch": 0.8914275823272318, "grad_norm": 346.1240882698529, "learning_rate": 5.876313192825544e-07, "loss": 199.2389, "step": 46350 }, { "epoch": 0.8916199075877124, "grad_norm": 388.23454574162827, "learning_rate": 5.855717531787375e-07, "loss": 216.136, "step": 46360 }, { "epoch": 0.8918122328481929, "grad_norm": 482.01798595457234, "learning_rate": 5.835156937939568e-07, "loss": 209.7548, "step": 46370 }, { "epoch": 0.8920045581086734, "grad_norm": 383.9791955451826, "learning_rate": 5.814631418940641e-07, "loss": 202.5437, "step": 46380 }, { "epoch": 0.8921968833691539, "grad_norm": 366.6715312043861, "learning_rate": 5.794140982435981e-07, "loss": 203.0349, "step": 46390 }, { "epoch": 0.8923892086296344, "grad_norm": 377.4855170659537, "learning_rate": 5.773685636057924e-07, "loss": 204.4214, "step": 46400 }, { "epoch": 0.892581533890115, "grad_norm": 356.8191381889658, "learning_rate": 5.753265387425777e-07, "loss": 213.0747, "step": 46410 }, { "epoch": 0.8927738591505955, "grad_norm": 445.6339628561458, "learning_rate": 5.732880244145744e-07, "loss": 200.147, "step": 46420 }, { "epoch": 0.892966184411076, "grad_norm": 346.3965130011756, "learning_rate": 5.712530213810951e-07, "loss": 205.3982, "step": 46430 }, { "epoch": 0.8931585096715565, "grad_norm": 359.86112888369723, "learning_rate": 5.692215304001447e-07, "loss": 200.0912, "step": 46440 }, { "epoch": 0.893350834932037, "grad_norm": 371.2252252217051, "learning_rate": 5.671935522284177e-07, "loss": 207.2773, "step": 46450 }, { "epoch": 0.8935431601925176, "grad_norm": 387.7891305057454, "learning_rate": 5.651690876213067e-07, "loss": 209.9808, "step": 46460 }, { "epoch": 0.8937354854529981, "grad_norm": 376.3163557617063, "learning_rate": 5.631481373328895e-07, "loss": 210.1483, "step": 46470 }, { "epoch": 0.8939278107134786, "grad_norm": 374.02670898027696, "learning_rate": 5.61130702115934e-07, "loss": 207.9196, "step": 46480 }, { "epoch": 0.8941201359739591, "grad_norm": 395.47811864916605, "learning_rate": 5.591167827219057e-07, "loss": 204.4367, "step": 46490 }, { "epoch": 0.8943124612344396, "grad_norm": 418.67294003082634, "learning_rate": 5.571063799009546e-07, "loss": 205.7631, "step": 46500 }, { "epoch": 0.8945047864949202, "grad_norm": 358.3868145255379, "learning_rate": 5.550994944019216e-07, "loss": 204.9341, "step": 46510 }, { "epoch": 0.8946971117554008, "grad_norm": 385.46057471016724, "learning_rate": 5.53096126972339e-07, "loss": 206.154, "step": 46520 }, { "epoch": 0.8948894370158813, "grad_norm": 379.22927767318134, "learning_rate": 5.510962783584295e-07, "loss": 206.324, "step": 46530 }, { "epoch": 0.8950817622763618, "grad_norm": 391.7849643196316, "learning_rate": 5.490999493051008e-07, "loss": 202.4588, "step": 46540 }, { "epoch": 0.8952740875368423, "grad_norm": 367.13564177532015, "learning_rate": 5.471071405559547e-07, "loss": 204.6717, "step": 46550 }, { "epoch": 0.8954664127973229, "grad_norm": 367.52771561929217, "learning_rate": 5.451178528532786e-07, "loss": 205.6467, "step": 46560 }, { "epoch": 0.8956587380578034, "grad_norm": 370.97247477686295, "learning_rate": 5.431320869380519e-07, "loss": 195.571, "step": 46570 }, { "epoch": 0.8958510633182839, "grad_norm": 354.1037625399484, "learning_rate": 5.411498435499363e-07, "loss": 208.7288, "step": 46580 }, { "epoch": 0.8960433885787644, "grad_norm": 370.04010756323106, "learning_rate": 5.391711234272856e-07, "loss": 200.8438, "step": 46590 }, { "epoch": 0.8962357138392449, "grad_norm": 342.60862362183866, "learning_rate": 5.371959273071414e-07, "loss": 205.324, "step": 46600 }, { "epoch": 0.8964280390997255, "grad_norm": 340.18217719968425, "learning_rate": 5.352242559252308e-07, "loss": 196.882, "step": 46610 }, { "epoch": 0.896620364360206, "grad_norm": 354.5573567298894, "learning_rate": 5.332561100159683e-07, "loss": 205.8079, "step": 46620 }, { "epoch": 0.8968126896206865, "grad_norm": 373.54553948249486, "learning_rate": 5.312914903124566e-07, "loss": 201.3235, "step": 46630 }, { "epoch": 0.897005014881167, "grad_norm": 373.0912869085149, "learning_rate": 5.293303975464836e-07, "loss": 203.0003, "step": 46640 }, { "epoch": 0.8971973401416475, "grad_norm": 387.36782777616816, "learning_rate": 5.273728324485261e-07, "loss": 206.0036, "step": 46650 }, { "epoch": 0.8973896654021281, "grad_norm": 374.1344936069173, "learning_rate": 5.254187957477397e-07, "loss": 215.6528, "step": 46660 }, { "epoch": 0.8975819906626086, "grad_norm": 366.80684221497444, "learning_rate": 5.234682881719766e-07, "loss": 205.1975, "step": 46670 }, { "epoch": 0.8977743159230891, "grad_norm": 354.88616736075994, "learning_rate": 5.215213104477645e-07, "loss": 202.4825, "step": 46680 }, { "epoch": 0.8979666411835696, "grad_norm": 361.8222081322325, "learning_rate": 5.195778633003223e-07, "loss": 199.2351, "step": 46690 }, { "epoch": 0.8981589664440501, "grad_norm": 352.66703581608317, "learning_rate": 5.176379474535509e-07, "loss": 201.5392, "step": 46700 }, { "epoch": 0.8983512917045307, "grad_norm": 381.60741418949686, "learning_rate": 5.1570156363004e-07, "loss": 207.155, "step": 46710 }, { "epoch": 0.8985436169650112, "grad_norm": 370.25547878484844, "learning_rate": 5.13768712551057e-07, "loss": 210.8079, "step": 46720 }, { "epoch": 0.8987359422254918, "grad_norm": 347.8823398235995, "learning_rate": 5.118393949365574e-07, "loss": 203.7626, "step": 46730 }, { "epoch": 0.8989282674859723, "grad_norm": 447.6125248578692, "learning_rate": 5.099136115051829e-07, "loss": 204.6365, "step": 46740 }, { "epoch": 0.8991205927464528, "grad_norm": 364.81190244947885, "learning_rate": 5.079913629742539e-07, "loss": 205.7409, "step": 46750 }, { "epoch": 0.8993129180069334, "grad_norm": 402.75434587675386, "learning_rate": 5.060726500597768e-07, "loss": 210.9729, "step": 46760 }, { "epoch": 0.8995052432674139, "grad_norm": 371.0447523532784, "learning_rate": 5.041574734764376e-07, "loss": 203.5738, "step": 46770 }, { "epoch": 0.8996975685278944, "grad_norm": 379.84309738134664, "learning_rate": 5.022458339376124e-07, "loss": 202.9529, "step": 46780 }, { "epoch": 0.8998898937883749, "grad_norm": 373.46681249710264, "learning_rate": 5.003377321553538e-07, "loss": 208.3639, "step": 46790 }, { "epoch": 0.9000822190488554, "grad_norm": 360.8372071765622, "learning_rate": 4.984331688403976e-07, "loss": 206.3781, "step": 46800 }, { "epoch": 0.900274544309336, "grad_norm": 367.41910152285726, "learning_rate": 4.96532144702162e-07, "loss": 201.8339, "step": 46810 }, { "epoch": 0.9004668695698165, "grad_norm": 367.18844600287684, "learning_rate": 4.946346604487462e-07, "loss": 214.1873, "step": 46820 }, { "epoch": 0.900659194830297, "grad_norm": 362.82922440388523, "learning_rate": 4.927407167869346e-07, "loss": 202.1727, "step": 46830 }, { "epoch": 0.9008515200907775, "grad_norm": 377.3317954957144, "learning_rate": 4.908503144221877e-07, "loss": 203.4602, "step": 46840 }, { "epoch": 0.901043845351258, "grad_norm": 362.0899497662295, "learning_rate": 4.889634540586518e-07, "loss": 204.7197, "step": 46850 }, { "epoch": 0.9012361706117386, "grad_norm": 380.6867409610158, "learning_rate": 4.870801363991484e-07, "loss": 211.6256, "step": 46860 }, { "epoch": 0.9014284958722191, "grad_norm": 353.2190297394774, "learning_rate": 4.852003621451829e-07, "loss": 200.2173, "step": 46870 }, { "epoch": 0.9016208211326996, "grad_norm": 383.00260016165333, "learning_rate": 4.833241319969395e-07, "loss": 199.2871, "step": 46880 }, { "epoch": 0.9018131463931801, "grad_norm": 352.3503283548052, "learning_rate": 4.814514466532849e-07, "loss": 203.7786, "step": 46890 }, { "epoch": 0.9020054716536606, "grad_norm": 392.9533483299655, "learning_rate": 4.795823068117622e-07, "loss": 207.2969, "step": 46900 }, { "epoch": 0.9021977969141411, "grad_norm": 369.9887388449985, "learning_rate": 4.777167131685945e-07, "loss": 198.2329, "step": 46910 }, { "epoch": 0.9023901221746217, "grad_norm": 350.70354203666636, "learning_rate": 4.7585466641868696e-07, "loss": 209.4631, "step": 46920 }, { "epoch": 0.9025824474351023, "grad_norm": 336.65393823123486, "learning_rate": 4.7399616725561925e-07, "loss": 200.1722, "step": 46930 }, { "epoch": 0.9027747726955828, "grad_norm": 399.165053561979, "learning_rate": 4.721412163716521e-07, "loss": 203.2035, "step": 46940 }, { "epoch": 0.9029670979560633, "grad_norm": 387.47311179419717, "learning_rate": 4.702898144577228e-07, "loss": 215.1573, "step": 46950 }, { "epoch": 0.9031594232165439, "grad_norm": 364.63146834015566, "learning_rate": 4.6844196220345086e-07, "loss": 200.2567, "step": 46960 }, { "epoch": 0.9033517484770244, "grad_norm": 370.6505007186757, "learning_rate": 4.665976602971278e-07, "loss": 207.1559, "step": 46970 }, { "epoch": 0.9035440737375049, "grad_norm": 418.33878762694326, "learning_rate": 4.647569094257276e-07, "loss": 209.9307, "step": 46980 }, { "epoch": 0.9037363989979854, "grad_norm": 354.09666090788795, "learning_rate": 4.629197102748984e-07, "loss": 199.1513, "step": 46990 }, { "epoch": 0.9039287242584659, "grad_norm": 379.9613313164776, "learning_rate": 4.610860635289671e-07, "loss": 205.9776, "step": 47000 }, { "epoch": 0.9041210495189465, "grad_norm": 342.8654608475949, "learning_rate": 4.592559698709387e-07, "loss": 204.1239, "step": 47010 }, { "epoch": 0.904313374779427, "grad_norm": 365.03178984759643, "learning_rate": 4.5742942998248774e-07, "loss": 203.5788, "step": 47020 }, { "epoch": 0.9045057000399075, "grad_norm": 400.44008202355786, "learning_rate": 4.5560644454397563e-07, "loss": 204.3993, "step": 47030 }, { "epoch": 0.904698025300388, "grad_norm": 357.6281828460192, "learning_rate": 4.537870142344314e-07, "loss": 205.6249, "step": 47040 }, { "epoch": 0.9048903505608685, "grad_norm": 356.1279600757491, "learning_rate": 4.5197113973156403e-07, "loss": 202.473, "step": 47050 }, { "epoch": 0.905082675821349, "grad_norm": 392.38275587321226, "learning_rate": 4.5015882171175476e-07, "loss": 209.4659, "step": 47060 }, { "epoch": 0.9052750010818296, "grad_norm": 440.67633214524574, "learning_rate": 4.483500608500657e-07, "loss": 202.5479, "step": 47070 }, { "epoch": 0.9054673263423101, "grad_norm": 405.8475253122133, "learning_rate": 4.4654485782022697e-07, "loss": 203.5308, "step": 47080 }, { "epoch": 0.9056596516027906, "grad_norm": 367.5869952051399, "learning_rate": 4.447432132946472e-07, "loss": 208.0369, "step": 47090 }, { "epoch": 0.9058519768632711, "grad_norm": 407.84748177285456, "learning_rate": 4.429451279444119e-07, "loss": 205.149, "step": 47100 }, { "epoch": 0.9060443021237516, "grad_norm": 370.4969781493726, "learning_rate": 4.411506024392753e-07, "loss": 208.2056, "step": 47110 }, { "epoch": 0.9062366273842322, "grad_norm": 432.42100237714845, "learning_rate": 4.393596374476705e-07, "loss": 198.6828, "step": 47120 }, { "epoch": 0.9064289526447128, "grad_norm": 352.0118656013363, "learning_rate": 4.3757223363670055e-07, "loss": 196.4775, "step": 47130 }, { "epoch": 0.9066212779051933, "grad_norm": 353.0874115951717, "learning_rate": 4.3578839167214505e-07, "loss": 207.3963, "step": 47140 }, { "epoch": 0.9068136031656738, "grad_norm": 346.2629225427659, "learning_rate": 4.3400811221845693e-07, "loss": 206.5523, "step": 47150 }, { "epoch": 0.9070059284261544, "grad_norm": 356.06970196521286, "learning_rate": 4.322313959387592e-07, "loss": 204.9614, "step": 47160 }, { "epoch": 0.9071982536866349, "grad_norm": 352.3578156247154, "learning_rate": 4.304582434948479e-07, "loss": 203.4813, "step": 47170 }, { "epoch": 0.9073905789471154, "grad_norm": 331.86270353744106, "learning_rate": 4.2868865554719583e-07, "loss": 204.4491, "step": 47180 }, { "epoch": 0.9075829042075959, "grad_norm": 345.3433093695026, "learning_rate": 4.269226327549447e-07, "loss": 206.9256, "step": 47190 }, { "epoch": 0.9077752294680764, "grad_norm": 381.57903165582337, "learning_rate": 4.251601757759061e-07, "loss": 197.6213, "step": 47200 }, { "epoch": 0.907967554728557, "grad_norm": 377.9686273076321, "learning_rate": 4.234012852665703e-07, "loss": 210.6211, "step": 47210 }, { "epoch": 0.9081598799890375, "grad_norm": 384.2630539010244, "learning_rate": 4.2164596188209226e-07, "loss": 208.5283, "step": 47220 }, { "epoch": 0.908352205249518, "grad_norm": 404.399858694891, "learning_rate": 4.198942062763023e-07, "loss": 206.963, "step": 47230 }, { "epoch": 0.9085445305099985, "grad_norm": 385.48323831190004, "learning_rate": 4.181460191016984e-07, "loss": 200.9335, "step": 47240 }, { "epoch": 0.908736855770479, "grad_norm": 345.5274778341019, "learning_rate": 4.1640140100945304e-07, "loss": 208.171, "step": 47250 }, { "epoch": 0.9089291810309595, "grad_norm": 400.21735481896087, "learning_rate": 4.146603526494086e-07, "loss": 203.9448, "step": 47260 }, { "epoch": 0.9091215062914401, "grad_norm": 406.9966190864523, "learning_rate": 4.129228746700742e-07, "loss": 209.6477, "step": 47270 }, { "epoch": 0.9093138315519206, "grad_norm": 457.7861940909638, "learning_rate": 4.111889677186354e-07, "loss": 204.5692, "step": 47280 }, { "epoch": 0.9095061568124011, "grad_norm": 378.529792369946, "learning_rate": 4.094586324409411e-07, "loss": 200.7375, "step": 47290 }, { "epoch": 0.9096984820728816, "grad_norm": 348.5234843857561, "learning_rate": 4.0773186948151246e-07, "loss": 207.2667, "step": 47300 }, { "epoch": 0.9098908073333621, "grad_norm": 410.0679852549603, "learning_rate": 4.060086794835405e-07, "loss": 201.6419, "step": 47310 }, { "epoch": 0.9100831325938427, "grad_norm": 354.9593348868804, "learning_rate": 4.042890630888863e-07, "loss": 201.8753, "step": 47320 }, { "epoch": 0.9102754578543232, "grad_norm": 346.93318134298937, "learning_rate": 4.025730209380774e-07, "loss": 196.3145, "step": 47330 }, { "epoch": 0.9104677831148038, "grad_norm": 383.25819779485136, "learning_rate": 4.0086055367031027e-07, "loss": 209.8385, "step": 47340 }, { "epoch": 0.9106601083752843, "grad_norm": 339.2740254487882, "learning_rate": 3.9915166192345365e-07, "loss": 202.9469, "step": 47350 }, { "epoch": 0.9108524336357648, "grad_norm": 356.6812401563461, "learning_rate": 3.9744634633403944e-07, "loss": 200.4444, "step": 47360 }, { "epoch": 0.9110447588962454, "grad_norm": 355.92559105823244, "learning_rate": 3.957446075372706e-07, "loss": 198.258, "step": 47370 }, { "epoch": 0.9112370841567259, "grad_norm": 344.25196126803843, "learning_rate": 3.940464461670135e-07, "loss": 208.3938, "step": 47380 }, { "epoch": 0.9114294094172064, "grad_norm": 371.65925046265824, "learning_rate": 3.923518628558087e-07, "loss": 206.099, "step": 47390 }, { "epoch": 0.9116217346776869, "grad_norm": 357.89725758710927, "learning_rate": 3.9066085823485923e-07, "loss": 204.5455, "step": 47400 }, { "epoch": 0.9118140599381674, "grad_norm": 364.16976377382895, "learning_rate": 3.8897343293403777e-07, "loss": 210.3828, "step": 47410 }, { "epoch": 0.912006385198648, "grad_norm": 387.65288304202727, "learning_rate": 3.872895875818794e-07, "loss": 203.672, "step": 47420 }, { "epoch": 0.9121987104591285, "grad_norm": 381.59155738979234, "learning_rate": 3.856093228055924e-07, "loss": 200.3493, "step": 47430 }, { "epoch": 0.912391035719609, "grad_norm": 366.5548177347879, "learning_rate": 3.83932639231045e-07, "loss": 205.937, "step": 47440 }, { "epoch": 0.9125833609800895, "grad_norm": 377.39625723752096, "learning_rate": 3.822595374827742e-07, "loss": 201.2788, "step": 47450 }, { "epoch": 0.91277568624057, "grad_norm": 347.20240379430714, "learning_rate": 3.805900181839839e-07, "loss": 206.0641, "step": 47460 }, { "epoch": 0.9129680115010506, "grad_norm": 365.1487363771161, "learning_rate": 3.789240819565432e-07, "loss": 201.2513, "step": 47470 }, { "epoch": 0.9131603367615311, "grad_norm": 343.1728654780718, "learning_rate": 3.772617294209835e-07, "loss": 203.1564, "step": 47480 }, { "epoch": 0.9133526620220116, "grad_norm": 368.1403117998596, "learning_rate": 3.7560296119650396e-07, "loss": 203.5886, "step": 47490 }, { "epoch": 0.9135449872824921, "grad_norm": 409.34288816991506, "learning_rate": 3.739477779009704e-07, "loss": 214.804, "step": 47500 }, { "epoch": 0.9137373125429726, "grad_norm": 361.75045353472785, "learning_rate": 3.7229618015091065e-07, "loss": 206.2953, "step": 47510 }, { "epoch": 0.9139296378034532, "grad_norm": 330.3557653212239, "learning_rate": 3.7064816856151484e-07, "loss": 201.3823, "step": 47520 }, { "epoch": 0.9141219630639337, "grad_norm": 439.2700796390219, "learning_rate": 3.6900374374664425e-07, "loss": 209.4542, "step": 47530 }, { "epoch": 0.9143142883244143, "grad_norm": 368.11447628322605, "learning_rate": 3.6736290631881667e-07, "loss": 206.7973, "step": 47540 }, { "epoch": 0.9145066135848948, "grad_norm": 358.6895190021785, "learning_rate": 3.657256568892187e-07, "loss": 208.0302, "step": 47550 }, { "epoch": 0.9146989388453753, "grad_norm": 384.87962908381985, "learning_rate": 3.6409199606769806e-07, "loss": 197.1002, "step": 47560 }, { "epoch": 0.9148912641058559, "grad_norm": 377.1110216795921, "learning_rate": 3.6246192446276694e-07, "loss": 200.3165, "step": 47570 }, { "epoch": 0.9150835893663364, "grad_norm": 365.9667458775916, "learning_rate": 3.6083544268160077e-07, "loss": 206.7293, "step": 47580 }, { "epoch": 0.9152759146268169, "grad_norm": 356.25417010013473, "learning_rate": 3.5921255133003483e-07, "loss": 202.4675, "step": 47590 }, { "epoch": 0.9154682398872974, "grad_norm": 383.3454193146976, "learning_rate": 3.5759325101257013e-07, "loss": 206.6678, "step": 47600 }, { "epoch": 0.9156605651477779, "grad_norm": 425.3300779116341, "learning_rate": 3.559775423323708e-07, "loss": 203.6999, "step": 47610 }, { "epoch": 0.9158528904082585, "grad_norm": 358.3871817832119, "learning_rate": 3.54365425891261e-07, "loss": 198.2385, "step": 47620 }, { "epoch": 0.916045215668739, "grad_norm": 369.534451186069, "learning_rate": 3.527569022897259e-07, "loss": 208.2636, "step": 47630 }, { "epoch": 0.9162375409292195, "grad_norm": 363.39713092435045, "learning_rate": 3.511519721269163e-07, "loss": 206.4843, "step": 47640 }, { "epoch": 0.9164298661897, "grad_norm": 361.1899849085721, "learning_rate": 3.4955063600064177e-07, "loss": 199.0428, "step": 47650 }, { "epoch": 0.9166221914501805, "grad_norm": 367.102044228421, "learning_rate": 3.479528945073707e-07, "loss": 202.6954, "step": 47660 }, { "epoch": 0.916814516710661, "grad_norm": 370.2284489568778, "learning_rate": 3.4635874824223924e-07, "loss": 205.7312, "step": 47670 }, { "epoch": 0.9170068419711416, "grad_norm": 1491.2089817496505, "learning_rate": 3.4476819779903694e-07, "loss": 203.5768, "step": 47680 }, { "epoch": 0.9171991672316221, "grad_norm": 385.33103089603003, "learning_rate": 3.43181243770222e-07, "loss": 202.2294, "step": 47690 }, { "epoch": 0.9173914924921026, "grad_norm": 386.7401731294933, "learning_rate": 3.4159788674690386e-07, "loss": 200.2038, "step": 47700 }, { "epoch": 0.9175838177525831, "grad_norm": 360.4659660069398, "learning_rate": 3.4001812731886077e-07, "loss": 197.0495, "step": 47710 }, { "epoch": 0.9177761430130636, "grad_norm": 338.9240260409679, "learning_rate": 3.384419660745253e-07, "loss": 198.7667, "step": 47720 }, { "epoch": 0.9179684682735442, "grad_norm": 395.8833580109662, "learning_rate": 3.368694036009923e-07, "loss": 206.6854, "step": 47730 }, { "epoch": 0.9181607935340247, "grad_norm": 359.3387069270951, "learning_rate": 3.353004404840121e-07, "loss": 195.6251, "step": 47740 }, { "epoch": 0.9183531187945053, "grad_norm": 388.5566870010379, "learning_rate": 3.3373507730800167e-07, "loss": 214.3476, "step": 47750 }, { "epoch": 0.9185454440549858, "grad_norm": 401.7933169326655, "learning_rate": 3.321733146560324e-07, "loss": 202.7705, "step": 47760 }, { "epoch": 0.9187377693154664, "grad_norm": 395.89760442687924, "learning_rate": 3.306151531098323e-07, "loss": 206.8543, "step": 47770 }, { "epoch": 0.9189300945759469, "grad_norm": 385.3185963686976, "learning_rate": 3.2906059324979255e-07, "loss": 203.3513, "step": 47780 }, { "epoch": 0.9191224198364274, "grad_norm": 429.4162990212621, "learning_rate": 3.275096356549612e-07, "loss": 208.1671, "step": 47790 }, { "epoch": 0.9193147450969079, "grad_norm": 356.6887472597652, "learning_rate": 3.2596228090304496e-07, "loss": 195.5573, "step": 47800 }, { "epoch": 0.9195070703573884, "grad_norm": 366.6174730315061, "learning_rate": 3.2441852957040607e-07, "loss": 203.6505, "step": 47810 }, { "epoch": 0.919699395617869, "grad_norm": 334.85261334029383, "learning_rate": 3.228783822320669e-07, "loss": 201.472, "step": 47820 }, { "epoch": 0.9198917208783495, "grad_norm": 338.19620511147923, "learning_rate": 3.213418394617085e-07, "loss": 203.9608, "step": 47830 }, { "epoch": 0.92008404613883, "grad_norm": 372.11493415277323, "learning_rate": 3.1980890183166633e-07, "loss": 202.6368, "step": 47840 }, { "epoch": 0.9202763713993105, "grad_norm": 362.7566210990523, "learning_rate": 3.1827956991293374e-07, "loss": 205.0013, "step": 47850 }, { "epoch": 0.920468696659791, "grad_norm": 356.19409593986904, "learning_rate": 3.167538442751639e-07, "loss": 205.7227, "step": 47860 }, { "epoch": 0.9206610219202715, "grad_norm": 359.43992074056297, "learning_rate": 3.1523172548666215e-07, "loss": 204.0532, "step": 47870 }, { "epoch": 0.9208533471807521, "grad_norm": 371.40706282104145, "learning_rate": 3.1371321411439284e-07, "loss": 208.5364, "step": 47880 }, { "epoch": 0.9210456724412326, "grad_norm": 371.0894884672173, "learning_rate": 3.1219831072397787e-07, "loss": 204.8019, "step": 47890 }, { "epoch": 0.9212379977017131, "grad_norm": 337.6790563896661, "learning_rate": 3.1068701587969375e-07, "loss": 211.4002, "step": 47900 }, { "epoch": 0.9214303229621936, "grad_norm": 404.55132204350554, "learning_rate": 3.091793301444701e-07, "loss": 204.9296, "step": 47910 }, { "epoch": 0.9216226482226741, "grad_norm": 366.7035027521802, "learning_rate": 3.076752540798977e-07, "loss": 204.2317, "step": 47920 }, { "epoch": 0.9218149734831547, "grad_norm": 372.1984744229368, "learning_rate": 3.061747882462185e-07, "loss": 206.4927, "step": 47930 }, { "epoch": 0.9220072987436352, "grad_norm": 356.53694210546337, "learning_rate": 3.0467793320233306e-07, "loss": 204.2181, "step": 47940 }, { "epoch": 0.9221996240041158, "grad_norm": 344.61641376271024, "learning_rate": 3.031846895057922e-07, "loss": 206.777, "step": 47950 }, { "epoch": 0.9223919492645963, "grad_norm": 376.2867652943092, "learning_rate": 3.0169505771280747e-07, "loss": 203.469, "step": 47960 }, { "epoch": 0.9225842745250769, "grad_norm": 360.2214225164992, "learning_rate": 3.002090383782408e-07, "loss": 200.5217, "step": 47970 }, { "epoch": 0.9227765997855574, "grad_norm": 403.49409568696825, "learning_rate": 2.9872663205561035e-07, "loss": 201.0238, "step": 47980 }, { "epoch": 0.9229689250460379, "grad_norm": 381.81450013793, "learning_rate": 2.972478392970857e-07, "loss": 207.7948, "step": 47990 }, { "epoch": 0.9231612503065184, "grad_norm": 411.2418845791998, "learning_rate": 2.9577266065349716e-07, "loss": 201.1859, "step": 48000 }, { "epoch": 0.9233535755669989, "grad_norm": 349.44293308445873, "learning_rate": 2.9430109667432096e-07, "loss": 203.7939, "step": 48010 }, { "epoch": 0.9235459008274794, "grad_norm": 404.8199214393542, "learning_rate": 2.9283314790769177e-07, "loss": 210.031, "step": 48020 }, { "epoch": 0.92373822608796, "grad_norm": 360.9611356091389, "learning_rate": 2.913688149003946e-07, "loss": 201.0061, "step": 48030 }, { "epoch": 0.9239305513484405, "grad_norm": 377.75875322922377, "learning_rate": 2.899080981978719e-07, "loss": 208.6834, "step": 48040 }, { "epoch": 0.924122876608921, "grad_norm": 347.49444750421264, "learning_rate": 2.8845099834421517e-07, "loss": 198.3466, "step": 48050 }, { "epoch": 0.9243152018694015, "grad_norm": 363.1898996353905, "learning_rate": 2.869975158821681e-07, "loss": 202.7241, "step": 48060 }, { "epoch": 0.924507527129882, "grad_norm": 373.73072467323504, "learning_rate": 2.8554765135313303e-07, "loss": 210.3304, "step": 48070 }, { "epoch": 0.9246998523903626, "grad_norm": 346.3678165713118, "learning_rate": 2.8410140529715803e-07, "loss": 198.656, "step": 48080 }, { "epoch": 0.9248921776508431, "grad_norm": 408.8899856385577, "learning_rate": 2.826587782529444e-07, "loss": 197.3259, "step": 48090 }, { "epoch": 0.9250845029113236, "grad_norm": 395.28756397129325, "learning_rate": 2.812197707578501e-07, "loss": 206.7726, "step": 48100 }, { "epoch": 0.9252768281718041, "grad_norm": 374.73883939745434, "learning_rate": 2.797843833478797e-07, "loss": 203.0115, "step": 48110 }, { "epoch": 0.9254691534322846, "grad_norm": 414.48937512172563, "learning_rate": 2.7835261655769217e-07, "loss": 198.1998, "step": 48120 }, { "epoch": 0.9256614786927652, "grad_norm": 383.48483959778997, "learning_rate": 2.769244709205976e-07, "loss": 202.3205, "step": 48130 }, { "epoch": 0.9258538039532457, "grad_norm": 354.60566734302694, "learning_rate": 2.7549994696855376e-07, "loss": 199.4333, "step": 48140 }, { "epoch": 0.9260461292137262, "grad_norm": 351.7661442332772, "learning_rate": 2.740790452321751e-07, "loss": 197.2775, "step": 48150 }, { "epoch": 0.9262384544742068, "grad_norm": 347.18242948752805, "learning_rate": 2.726617662407238e-07, "loss": 198.4915, "step": 48160 }, { "epoch": 0.9264307797346873, "grad_norm": 363.7257243553536, "learning_rate": 2.7124811052211097e-07, "loss": 207.6766, "step": 48170 }, { "epoch": 0.9266231049951679, "grad_norm": 373.9905698629852, "learning_rate": 2.698380786029031e-07, "loss": 199.6323, "step": 48180 }, { "epoch": 0.9268154302556484, "grad_norm": 380.6972835135455, "learning_rate": 2.6843167100831125e-07, "loss": 204.1734, "step": 48190 }, { "epoch": 0.9270077555161289, "grad_norm": 365.96935316520796, "learning_rate": 2.6702888826219965e-07, "loss": 205.8778, "step": 48200 }, { "epoch": 0.9272000807766094, "grad_norm": 367.64405489146344, "learning_rate": 2.6562973088708146e-07, "loss": 201.8723, "step": 48210 }, { "epoch": 0.92739240603709, "grad_norm": 361.5171836618392, "learning_rate": 2.6423419940412086e-07, "loss": 199.7775, "step": 48220 }, { "epoch": 0.9275847312975705, "grad_norm": 360.6448170792559, "learning_rate": 2.628422943331288e-07, "loss": 206.8077, "step": 48230 }, { "epoch": 0.927777056558051, "grad_norm": 355.49062067023806, "learning_rate": 2.614540161925683e-07, "loss": 207.698, "step": 48240 }, { "epoch": 0.9279693818185315, "grad_norm": 359.92747535186066, "learning_rate": 2.6006936549954784e-07, "loss": 203.5888, "step": 48250 }, { "epoch": 0.928161707079012, "grad_norm": 335.6376748689618, "learning_rate": 2.5868834276983057e-07, "loss": 206.4889, "step": 48260 }, { "epoch": 0.9283540323394925, "grad_norm": 353.81266355058114, "learning_rate": 2.573109485178216e-07, "loss": 208.7306, "step": 48270 }, { "epoch": 0.9285463575999731, "grad_norm": 342.592150067986, "learning_rate": 2.5593718325657713e-07, "loss": 208.251, "step": 48280 }, { "epoch": 0.9287386828604536, "grad_norm": 387.7713118351166, "learning_rate": 2.545670474978057e-07, "loss": 197.3804, "step": 48290 }, { "epoch": 0.9289310081209341, "grad_norm": 377.23164123419025, "learning_rate": 2.532005417518568e-07, "loss": 199.8187, "step": 48300 }, { "epoch": 0.9291233333814146, "grad_norm": 360.85145185611924, "learning_rate": 2.5183766652773336e-07, "loss": 200.0176, "step": 48310 }, { "epoch": 0.9293156586418951, "grad_norm": 376.82586839149235, "learning_rate": 2.504784223330814e-07, "loss": 200.3287, "step": 48320 }, { "epoch": 0.9295079839023757, "grad_norm": 387.4454318593123, "learning_rate": 2.4912280967419934e-07, "loss": 204.5187, "step": 48330 }, { "epoch": 0.9297003091628562, "grad_norm": 370.70729747122306, "learning_rate": 2.477708290560299e-07, "loss": 212.5554, "step": 48340 }, { "epoch": 0.9298926344233367, "grad_norm": 358.11856272218097, "learning_rate": 2.464224809821614e-07, "loss": 214.3949, "step": 48350 }, { "epoch": 0.9300849596838173, "grad_norm": 363.064909752825, "learning_rate": 2.450777659548353e-07, "loss": 206.2752, "step": 48360 }, { "epoch": 0.9302772849442978, "grad_norm": 379.0780338120148, "learning_rate": 2.4373668447493225e-07, "loss": 208.6998, "step": 48370 }, { "epoch": 0.9304696102047784, "grad_norm": 328.99901329137055, "learning_rate": 2.4239923704198476e-07, "loss": 201.4028, "step": 48380 }, { "epoch": 0.9306619354652589, "grad_norm": 345.89962421584164, "learning_rate": 2.410654241541688e-07, "loss": 199.6826, "step": 48390 }, { "epoch": 0.9308542607257394, "grad_norm": 359.66192706034724, "learning_rate": 2.3973524630830804e-07, "loss": 204.3283, "step": 48400 }, { "epoch": 0.9310465859862199, "grad_norm": 363.39198243705334, "learning_rate": 2.3840870399987283e-07, "loss": 206.5529, "step": 48410 }, { "epoch": 0.9312389112467004, "grad_norm": 341.8669495957109, "learning_rate": 2.370857977229768e-07, "loss": 206.5942, "step": 48420 }, { "epoch": 0.931431236507181, "grad_norm": 349.1426673710083, "learning_rate": 2.3576652797038247e-07, "loss": 205.0635, "step": 48430 }, { "epoch": 0.9316235617676615, "grad_norm": 373.8402796169833, "learning_rate": 2.344508952334934e-07, "loss": 201.2203, "step": 48440 }, { "epoch": 0.931815887028142, "grad_norm": 358.8377869045671, "learning_rate": 2.3313890000236316e-07, "loss": 199.2215, "step": 48450 }, { "epoch": 0.9320082122886225, "grad_norm": 351.36239148372766, "learning_rate": 2.3183054276568752e-07, "loss": 201.9332, "step": 48460 }, { "epoch": 0.932200537549103, "grad_norm": 362.1154369119544, "learning_rate": 2.305258240108077e-07, "loss": 199.5552, "step": 48470 }, { "epoch": 0.9323928628095836, "grad_norm": 375.7211434386111, "learning_rate": 2.2922474422371166e-07, "loss": 202.7204, "step": 48480 }, { "epoch": 0.9325851880700641, "grad_norm": 347.4714011997012, "learning_rate": 2.279273038890273e-07, "loss": 205.913, "step": 48490 }, { "epoch": 0.9327775133305446, "grad_norm": 369.7647960670378, "learning_rate": 2.2663350349003134e-07, "loss": 203.0409, "step": 48500 }, { "epoch": 0.9329698385910251, "grad_norm": 363.10036707668166, "learning_rate": 2.2534334350864274e-07, "loss": 196.7114, "step": 48510 }, { "epoch": 0.9331621638515056, "grad_norm": 407.3035070825069, "learning_rate": 2.2405682442542487e-07, "loss": 204.9385, "step": 48520 }, { "epoch": 0.9333544891119862, "grad_norm": 344.4587572631576, "learning_rate": 2.2277394671958442e-07, "loss": 204.7839, "step": 48530 }, { "epoch": 0.9335468143724667, "grad_norm": 367.51882233901233, "learning_rate": 2.2149471086897355e-07, "loss": 203.4901, "step": 48540 }, { "epoch": 0.9337391396329472, "grad_norm": 430.9405742211201, "learning_rate": 2.202191173500845e-07, "loss": 207.0275, "step": 48550 }, { "epoch": 0.9339314648934277, "grad_norm": 399.93485206691724, "learning_rate": 2.1894716663805716e-07, "loss": 206.4945, "step": 48560 }, { "epoch": 0.9341237901539083, "grad_norm": 383.76664637446873, "learning_rate": 2.176788592066692e-07, "loss": 214.786, "step": 48570 }, { "epoch": 0.9343161154143889, "grad_norm": 386.38907715344436, "learning_rate": 2.164141955283472e-07, "loss": 198.5718, "step": 48580 }, { "epoch": 0.9345084406748694, "grad_norm": 359.70027705460774, "learning_rate": 2.1515317607415654e-07, "loss": 204.1084, "step": 48590 }, { "epoch": 0.9347007659353499, "grad_norm": 345.1851583304933, "learning_rate": 2.1389580131380373e-07, "loss": 218.2766, "step": 48600 }, { "epoch": 0.9348930911958304, "grad_norm": 396.48516202017555, "learning_rate": 2.126420717156441e-07, "loss": 213.2414, "step": 48610 }, { "epoch": 0.9350854164563109, "grad_norm": 379.4163502223697, "learning_rate": 2.113919877466686e-07, "loss": 199.8932, "step": 48620 }, { "epoch": 0.9352777417167915, "grad_norm": 431.10562221510736, "learning_rate": 2.1014554987251356e-07, "loss": 209.9968, "step": 48630 }, { "epoch": 0.935470066977272, "grad_norm": 355.1958101296038, "learning_rate": 2.0890275855745546e-07, "loss": 202.85, "step": 48640 }, { "epoch": 0.9356623922377525, "grad_norm": 361.6536819389774, "learning_rate": 2.0766361426441505e-07, "loss": 200.2586, "step": 48650 }, { "epoch": 0.935854717498233, "grad_norm": 351.229851464096, "learning_rate": 2.0642811745495206e-07, "loss": 210.561, "step": 48660 }, { "epoch": 0.9360470427587135, "grad_norm": 387.1118790965528, "learning_rate": 2.0519626858926944e-07, "loss": 203.5569, "step": 48670 }, { "epoch": 0.936239368019194, "grad_norm": 362.0983303705198, "learning_rate": 2.0396806812621018e-07, "loss": 203.5939, "step": 48680 }, { "epoch": 0.9364316932796746, "grad_norm": 372.9376111079694, "learning_rate": 2.0274351652325942e-07, "loss": 211.234, "step": 48690 }, { "epoch": 0.9366240185401551, "grad_norm": 358.5530843818397, "learning_rate": 2.0152261423654118e-07, "loss": 207.7548, "step": 48700 }, { "epoch": 0.9368163438006356, "grad_norm": 346.21485175206715, "learning_rate": 2.003053617208217e-07, "loss": 201.5005, "step": 48710 }, { "epoch": 0.9370086690611161, "grad_norm": 380.7950642401895, "learning_rate": 1.9909175942950832e-07, "loss": 211.7329, "step": 48720 }, { "epoch": 0.9372009943215966, "grad_norm": 409.8248611235089, "learning_rate": 1.9788180781464716e-07, "loss": 204.814, "step": 48730 }, { "epoch": 0.9373933195820772, "grad_norm": 501.76856256447684, "learning_rate": 1.9667550732692554e-07, "loss": 206.4797, "step": 48740 }, { "epoch": 0.9375856448425577, "grad_norm": 339.3430298952168, "learning_rate": 1.9547285841566843e-07, "loss": 198.2602, "step": 48750 }, { "epoch": 0.9377779701030382, "grad_norm": 368.6174017289793, "learning_rate": 1.9427386152884643e-07, "loss": 202.4432, "step": 48760 }, { "epoch": 0.9379702953635188, "grad_norm": 389.6407474801892, "learning_rate": 1.9307851711306336e-07, "loss": 205.2368, "step": 48770 }, { "epoch": 0.9381626206239994, "grad_norm": 353.2704467807482, "learning_rate": 1.918868256135653e-07, "loss": 207.9949, "step": 48780 }, { "epoch": 0.9383549458844799, "grad_norm": 344.1559176437801, "learning_rate": 1.9069878747423943e-07, "loss": 203.307, "step": 48790 }, { "epoch": 0.9385472711449604, "grad_norm": 392.8586903856337, "learning_rate": 1.8951440313760838e-07, "loss": 204.1177, "step": 48800 }, { "epoch": 0.9387395964054409, "grad_norm": 362.4137931939056, "learning_rate": 1.8833367304483708e-07, "loss": 206.1665, "step": 48810 }, { "epoch": 0.9389319216659214, "grad_norm": 359.6413979311788, "learning_rate": 1.8715659763572703e-07, "loss": 203.5739, "step": 48820 }, { "epoch": 0.939124246926402, "grad_norm": 366.1872098363556, "learning_rate": 1.8598317734872086e-07, "loss": 201.9677, "step": 48830 }, { "epoch": 0.9393165721868825, "grad_norm": 381.0241662016051, "learning_rate": 1.8481341262089668e-07, "loss": 205.5354, "step": 48840 }, { "epoch": 0.939508897447363, "grad_norm": 410.5420664912381, "learning_rate": 1.836473038879727e-07, "loss": 200.2537, "step": 48850 }, { "epoch": 0.9397012227078435, "grad_norm": 416.3983696284498, "learning_rate": 1.8248485158430696e-07, "loss": 203.7448, "step": 48860 }, { "epoch": 0.939893547968324, "grad_norm": 386.79674397757145, "learning_rate": 1.813260561428909e-07, "loss": 202.6938, "step": 48870 }, { "epoch": 0.9400858732288045, "grad_norm": 390.39280922089506, "learning_rate": 1.8017091799535924e-07, "loss": 199.2614, "step": 48880 }, { "epoch": 0.9402781984892851, "grad_norm": 350.1015938046084, "learning_rate": 1.7901943757198003e-07, "loss": 199.1779, "step": 48890 }, { "epoch": 0.9404705237497656, "grad_norm": 355.49705519233663, "learning_rate": 1.7787161530166242e-07, "loss": 203.8312, "step": 48900 }, { "epoch": 0.9406628490102461, "grad_norm": 386.0596600024102, "learning_rate": 1.7672745161194992e-07, "loss": 205.6768, "step": 48910 }, { "epoch": 0.9408551742707266, "grad_norm": 362.82443817959654, "learning_rate": 1.75586946929025e-07, "loss": 201.8567, "step": 48920 }, { "epoch": 0.9410474995312071, "grad_norm": 362.41809499919225, "learning_rate": 1.7445010167770672e-07, "loss": 206.5927, "step": 48930 }, { "epoch": 0.9412398247916877, "grad_norm": 375.539960908434, "learning_rate": 1.7331691628145076e-07, "loss": 206.4405, "step": 48940 }, { "epoch": 0.9414321500521682, "grad_norm": 382.0279265982589, "learning_rate": 1.7218739116235061e-07, "loss": 203.9179, "step": 48950 }, { "epoch": 0.9416244753126487, "grad_norm": 344.5599209163815, "learning_rate": 1.710615267411353e-07, "loss": 198.841, "step": 48960 }, { "epoch": 0.9418168005731293, "grad_norm": 382.27322880808947, "learning_rate": 1.6993932343717158e-07, "loss": 203.6364, "step": 48970 }, { "epoch": 0.9420091258336099, "grad_norm": 361.94592765984186, "learning_rate": 1.6882078166846173e-07, "loss": 198.9499, "step": 48980 }, { "epoch": 0.9422014510940904, "grad_norm": 359.8541416803671, "learning_rate": 1.677059018516425e-07, "loss": 199.0995, "step": 48990 }, { "epoch": 0.9423937763545709, "grad_norm": 401.1738769568403, "learning_rate": 1.6659468440198835e-07, "loss": 206.322, "step": 49000 }, { "epoch": 0.9425861016150514, "grad_norm": 330.51819131816546, "learning_rate": 1.6548712973341152e-07, "loss": 216.3232, "step": 49010 }, { "epoch": 0.9427784268755319, "grad_norm": 376.554847946768, "learning_rate": 1.6438323825845647e-07, "loss": 200.087, "step": 49020 }, { "epoch": 0.9429707521360124, "grad_norm": 368.0763010305208, "learning_rate": 1.6328301038830429e-07, "loss": 205.7377, "step": 49030 }, { "epoch": 0.943163077396493, "grad_norm": 389.1818584560128, "learning_rate": 1.621864465327716e-07, "loss": 197.0627, "step": 49040 }, { "epoch": 0.9433554026569735, "grad_norm": 407.1234660885087, "learning_rate": 1.6109354710031167e-07, "loss": 207.463, "step": 49050 }, { "epoch": 0.943547727917454, "grad_norm": 337.339471767984, "learning_rate": 1.6000431249800995e-07, "loss": 194.6463, "step": 49060 }, { "epoch": 0.9437400531779345, "grad_norm": 331.3329776157531, "learning_rate": 1.5891874313158862e-07, "loss": 194.299, "step": 49070 }, { "epoch": 0.943932378438415, "grad_norm": 366.63202388553077, "learning_rate": 1.5783683940540528e-07, "loss": 200.2874, "step": 49080 }, { "epoch": 0.9441247036988956, "grad_norm": 348.4823437808373, "learning_rate": 1.5675860172244982e-07, "loss": 199.1079, "step": 49090 }, { "epoch": 0.9443170289593761, "grad_norm": 355.51891158510244, "learning_rate": 1.5568403048434877e-07, "loss": 198.4023, "step": 49100 }, { "epoch": 0.9445093542198566, "grad_norm": 341.23308673530204, "learning_rate": 1.5461312609136192e-07, "loss": 201.2382, "step": 49110 }, { "epoch": 0.9447016794803371, "grad_norm": 379.54409283688267, "learning_rate": 1.5354588894238465e-07, "loss": 198.9856, "step": 49120 }, { "epoch": 0.9448940047408176, "grad_norm": 352.0549935720582, "learning_rate": 1.524823194349434e-07, "loss": 206.9746, "step": 49130 }, { "epoch": 0.9450863300012982, "grad_norm": 379.5768404078221, "learning_rate": 1.514224179652013e-07, "loss": 206.9802, "step": 49140 }, { "epoch": 0.9452786552617787, "grad_norm": 388.7938587044275, "learning_rate": 1.5036618492795473e-07, "loss": 204.4031, "step": 49150 }, { "epoch": 0.9454709805222592, "grad_norm": 355.03687574565714, "learning_rate": 1.4931362071663125e-07, "loss": 202.7154, "step": 49160 }, { "epoch": 0.9456633057827397, "grad_norm": 357.95611089396147, "learning_rate": 1.4826472572329498e-07, "loss": 202.3218, "step": 49170 }, { "epoch": 0.9458556310432203, "grad_norm": 374.21875249515443, "learning_rate": 1.4721950033864118e-07, "loss": 197.0, "step": 49180 }, { "epoch": 0.9460479563037009, "grad_norm": 349.2312173960439, "learning_rate": 1.4617794495199956e-07, "loss": 206.4769, "step": 49190 }, { "epoch": 0.9462402815641814, "grad_norm": 374.2752050855024, "learning_rate": 1.4514005995133197e-07, "loss": 205.4025, "step": 49200 }, { "epoch": 0.9464326068246619, "grad_norm": 360.80127601919077, "learning_rate": 1.441058457232336e-07, "loss": 199.2605, "step": 49210 }, { "epoch": 0.9466249320851424, "grad_norm": 369.57335289896054, "learning_rate": 1.43075302652933e-07, "loss": 209.8724, "step": 49220 }, { "epoch": 0.9468172573456229, "grad_norm": 356.4890944029722, "learning_rate": 1.4204843112428867e-07, "loss": 204.1399, "step": 49230 }, { "epoch": 0.9470095826061035, "grad_norm": 356.8180844794629, "learning_rate": 1.4102523151979574e-07, "loss": 202.7515, "step": 49240 }, { "epoch": 0.947201907866584, "grad_norm": 403.9491451758941, "learning_rate": 1.40005704220576e-07, "loss": 206.3632, "step": 49250 }, { "epoch": 0.9473942331270645, "grad_norm": 362.0320227388929, "learning_rate": 1.3898984960638907e-07, "loss": 204.8017, "step": 49260 }, { "epoch": 0.947586558387545, "grad_norm": 410.84140402433786, "learning_rate": 1.3797766805562328e-07, "loss": 207.2748, "step": 49270 }, { "epoch": 0.9477788836480255, "grad_norm": 408.68346359505136, "learning_rate": 1.3696915994530048e-07, "loss": 203.7896, "step": 49280 }, { "epoch": 0.9479712089085061, "grad_norm": 346.2945964479388, "learning_rate": 1.359643256510701e-07, "loss": 198.9533, "step": 49290 }, { "epoch": 0.9481635341689866, "grad_norm": 364.7453645051926, "learning_rate": 1.3496316554722056e-07, "loss": 201.9824, "step": 49300 }, { "epoch": 0.9483558594294671, "grad_norm": 373.742128369879, "learning_rate": 1.3396568000666467e-07, "loss": 204.5055, "step": 49310 }, { "epoch": 0.9485481846899476, "grad_norm": 361.09834362350585, "learning_rate": 1.3297186940094853e-07, "loss": 207.0482, "step": 49320 }, { "epoch": 0.9487405099504281, "grad_norm": 407.5412021337874, "learning_rate": 1.3198173410025383e-07, "loss": 200.9387, "step": 49330 }, { "epoch": 0.9489328352109087, "grad_norm": 384.1957083876091, "learning_rate": 1.3099527447338668e-07, "loss": 205.2417, "step": 49340 }, { "epoch": 0.9491251604713892, "grad_norm": 376.81351588748106, "learning_rate": 1.3001249088778756e-07, "loss": 208.6732, "step": 49350 }, { "epoch": 0.9493174857318697, "grad_norm": 417.5765423310436, "learning_rate": 1.29033383709527e-07, "loss": 204.7751, "step": 49360 }, { "epoch": 0.9495098109923502, "grad_norm": 389.2639735642371, "learning_rate": 1.2805795330330774e-07, "loss": 206.6264, "step": 49370 }, { "epoch": 0.9497021362528308, "grad_norm": 355.58374485327874, "learning_rate": 1.2708620003245908e-07, "loss": 210.9091, "step": 49380 }, { "epoch": 0.9498944615133114, "grad_norm": 358.71833706596266, "learning_rate": 1.2611812425894487e-07, "loss": 195.0542, "step": 49390 }, { "epoch": 0.9500867867737919, "grad_norm": 366.26793443105026, "learning_rate": 1.2515372634335666e-07, "loss": 199.6854, "step": 49400 }, { "epoch": 0.9502791120342724, "grad_norm": 370.2548974783838, "learning_rate": 1.2419300664491707e-07, "loss": 214.1474, "step": 49410 }, { "epoch": 0.9504714372947529, "grad_norm": 339.2438335057985, "learning_rate": 1.2323596552147876e-07, "loss": 204.4429, "step": 49420 }, { "epoch": 0.9506637625552334, "grad_norm": 364.2600805628213, "learning_rate": 1.2228260332952213e-07, "loss": 204.4185, "step": 49430 }, { "epoch": 0.950856087815714, "grad_norm": 405.59084423019425, "learning_rate": 1.213329204241609e-07, "loss": 199.0191, "step": 49440 }, { "epoch": 0.9510484130761945, "grad_norm": 389.25992145419013, "learning_rate": 1.203869171591343e-07, "loss": 198.2744, "step": 49450 }, { "epoch": 0.951240738336675, "grad_norm": 418.872131031006, "learning_rate": 1.1944459388681496e-07, "loss": 211.5102, "step": 49460 }, { "epoch": 0.9514330635971555, "grad_norm": 381.99020641868276, "learning_rate": 1.1850595095820095e-07, "loss": 205.557, "step": 49470 }, { "epoch": 0.951625388857636, "grad_norm": 400.31684008483734, "learning_rate": 1.175709887229215e-07, "loss": 199.0176, "step": 49480 }, { "epoch": 0.9518177141181166, "grad_norm": 390.2782834876259, "learning_rate": 1.1663970752923581e-07, "loss": 201.231, "step": 49490 }, { "epoch": 0.9520100393785971, "grad_norm": 336.31515318587105, "learning_rate": 1.1571210772402975e-07, "loss": 203.0084, "step": 49500 }, { "epoch": 0.9522023646390776, "grad_norm": 370.2327882528829, "learning_rate": 1.1478818965281912e-07, "loss": 200.8858, "step": 49510 }, { "epoch": 0.9523946898995581, "grad_norm": 414.32139466194104, "learning_rate": 1.1386795365974757e-07, "loss": 208.771, "step": 49520 }, { "epoch": 0.9525870151600386, "grad_norm": 373.1270070639388, "learning_rate": 1.1295140008758864e-07, "loss": 202.6968, "step": 49530 }, { "epoch": 0.9527793404205191, "grad_norm": 485.10358612647025, "learning_rate": 1.1203852927774372e-07, "loss": 205.5088, "step": 49540 }, { "epoch": 0.9529716656809997, "grad_norm": 397.7323628375016, "learning_rate": 1.111293415702408e-07, "loss": 201.8083, "step": 49550 }, { "epoch": 0.9531639909414802, "grad_norm": 376.61355488506143, "learning_rate": 1.1022383730373897e-07, "loss": 205.5113, "step": 49560 }, { "epoch": 0.9533563162019607, "grad_norm": 356.5223479198953, "learning_rate": 1.093220168155218e-07, "loss": 202.1081, "step": 49570 }, { "epoch": 0.9535486414624412, "grad_norm": 383.8961525318942, "learning_rate": 1.0842388044150387e-07, "loss": 203.0571, "step": 49580 }, { "epoch": 0.9537409667229219, "grad_norm": 373.77944159230293, "learning_rate": 1.0752942851622649e-07, "loss": 201.3082, "step": 49590 }, { "epoch": 0.9539332919834024, "grad_norm": 363.3856229004409, "learning_rate": 1.066386613728565e-07, "loss": 200.6104, "step": 49600 }, { "epoch": 0.9541256172438829, "grad_norm": 373.2443593283993, "learning_rate": 1.0575157934319069e-07, "loss": 206.9029, "step": 49610 }, { "epoch": 0.9543179425043634, "grad_norm": 354.611394383355, "learning_rate": 1.0486818275765364e-07, "loss": 202.265, "step": 49620 }, { "epoch": 0.9545102677648439, "grad_norm": 370.1579322913445, "learning_rate": 1.0398847194529437e-07, "loss": 205.4919, "step": 49630 }, { "epoch": 0.9547025930253245, "grad_norm": 376.87278121082795, "learning_rate": 1.0311244723379188e-07, "loss": 203.0618, "step": 49640 }, { "epoch": 0.954894918285805, "grad_norm": 360.9044723112168, "learning_rate": 1.0224010894944958e-07, "loss": 208.2209, "step": 49650 }, { "epoch": 0.9550872435462855, "grad_norm": 356.3859438699043, "learning_rate": 1.0137145741719867e-07, "loss": 224.3437, "step": 49660 }, { "epoch": 0.955279568806766, "grad_norm": 415.253070479042, "learning_rate": 1.0050649296060033e-07, "loss": 217.9474, "step": 49670 }, { "epoch": 0.9554718940672465, "grad_norm": 382.4375103604002, "learning_rate": 9.964521590183684e-08, "loss": 206.8328, "step": 49680 }, { "epoch": 0.955664219327727, "grad_norm": 382.3781089448624, "learning_rate": 9.878762656172159e-08, "loss": 203.4472, "step": 49690 }, { "epoch": 0.9558565445882076, "grad_norm": 358.7459553032876, "learning_rate": 9.793372525969125e-08, "loss": 211.4204, "step": 49700 }, { "epoch": 0.9560488698486881, "grad_norm": 388.9944664591722, "learning_rate": 9.70835123138103e-08, "loss": 206.2478, "step": 49710 }, { "epoch": 0.9562411951091686, "grad_norm": 372.34198076520255, "learning_rate": 9.623698804076875e-08, "loss": 206.4913, "step": 49720 }, { "epoch": 0.9564335203696491, "grad_norm": 382.5906584218293, "learning_rate": 9.539415275588326e-08, "loss": 212.3889, "step": 49730 }, { "epoch": 0.9566258456301296, "grad_norm": 350.8513474884998, "learning_rate": 9.455500677309603e-08, "loss": 219.9304, "step": 49740 }, { "epoch": 0.9568181708906102, "grad_norm": 388.96588130347004, "learning_rate": 9.371955040497371e-08, "loss": 202.6126, "step": 49750 }, { "epoch": 0.9570104961510907, "grad_norm": 375.62725859907977, "learning_rate": 9.288778396271292e-08, "loss": 211.2534, "step": 49760 }, { "epoch": 0.9572028214115712, "grad_norm": 378.96835722770936, "learning_rate": 9.205970775613027e-08, "loss": 204.1733, "step": 49770 }, { "epoch": 0.9573951466720517, "grad_norm": 444.15361922264344, "learning_rate": 9.123532209367237e-08, "loss": 211.4552, "step": 49780 }, { "epoch": 0.9575874719325324, "grad_norm": 356.41757443249867, "learning_rate": 9.04146272824069e-08, "loss": 195.6299, "step": 49790 }, { "epoch": 0.9577797971930129, "grad_norm": 339.34410097238435, "learning_rate": 8.959762362803159e-08, "loss": 201.799, "step": 49800 }, { "epoch": 0.9579721224534934, "grad_norm": 334.8532234197658, "learning_rate": 8.87843114348652e-08, "loss": 200.7965, "step": 49810 }, { "epoch": 0.9581644477139739, "grad_norm": 369.6196269310955, "learning_rate": 8.797469100585432e-08, "loss": 208.1472, "step": 49820 }, { "epoch": 0.9583567729744544, "grad_norm": 433.6968760664219, "learning_rate": 8.716876264256768e-08, "loss": 215.906, "step": 49830 }, { "epoch": 0.958549098234935, "grad_norm": 359.4031513674069, "learning_rate": 8.636652664520184e-08, "loss": 205.0442, "step": 49840 }, { "epoch": 0.9587414234954155, "grad_norm": 371.4292114478069, "learning_rate": 8.556798331257555e-08, "loss": 205.6551, "step": 49850 }, { "epoch": 0.958933748755896, "grad_norm": 391.06737577632396, "learning_rate": 8.477313294213307e-08, "loss": 204.2198, "step": 49860 }, { "epoch": 0.9591260740163765, "grad_norm": 359.97384413061303, "learning_rate": 8.398197582994316e-08, "loss": 197.127, "step": 49870 }, { "epoch": 0.959318399276857, "grad_norm": 365.9605009515636, "learning_rate": 8.319451227069897e-08, "loss": 205.0339, "step": 49880 }, { "epoch": 0.9595107245373375, "grad_norm": 357.00235980264677, "learning_rate": 8.241074255771808e-08, "loss": 200.3495, "step": 49890 }, { "epoch": 0.9597030497978181, "grad_norm": 357.12807224336836, "learning_rate": 8.163066698294031e-08, "loss": 204.6414, "step": 49900 }, { "epoch": 0.9598953750582986, "grad_norm": 358.2252431919054, "learning_rate": 8.085428583693211e-08, "loss": 201.7902, "step": 49910 }, { "epoch": 0.9600877003187791, "grad_norm": 369.9463084300626, "learning_rate": 8.008159940888216e-08, "loss": 203.2062, "step": 49920 }, { "epoch": 0.9602800255792596, "grad_norm": 358.4508390369491, "learning_rate": 7.931260798660356e-08, "loss": 206.8046, "step": 49930 }, { "epoch": 0.9604723508397401, "grad_norm": 338.20180019751376, "learning_rate": 7.854731185653386e-08, "loss": 199.1347, "step": 49940 }, { "epoch": 0.9606646761002207, "grad_norm": 346.69498247382717, "learning_rate": 7.778571130373059e-08, "loss": 202.7521, "step": 49950 }, { "epoch": 0.9608570013607012, "grad_norm": 359.8435702581509, "learning_rate": 7.702780661188014e-08, "loss": 206.7363, "step": 49960 }, { "epoch": 0.9610493266211817, "grad_norm": 353.89086273997054, "learning_rate": 7.627359806328782e-08, "loss": 205.0232, "step": 49970 }, { "epoch": 0.9612416518816622, "grad_norm": 373.7622693781548, "learning_rate": 7.552308593888558e-08, "loss": 204.5835, "step": 49980 }, { "epoch": 0.9614339771421427, "grad_norm": 373.6093164309671, "learning_rate": 7.477627051822534e-08, "loss": 209.0156, "step": 49990 }, { "epoch": 0.9616263024026234, "grad_norm": 394.8648310465121, "learning_rate": 7.403315207948236e-08, "loss": 212.7868, "step": 50000 }, { "epoch": 0.9618186276631039, "grad_norm": 389.73245897893435, "learning_rate": 7.329373089945968e-08, "loss": 197.4793, "step": 50010 }, { "epoch": 0.9620109529235844, "grad_norm": 361.56549853055265, "learning_rate": 7.255800725357586e-08, "loss": 205.0992, "step": 50020 }, { "epoch": 0.9622032781840649, "grad_norm": 366.5302465927919, "learning_rate": 7.182598141587838e-08, "loss": 205.1815, "step": 50030 }, { "epoch": 0.9623956034445454, "grad_norm": 371.9301266193586, "learning_rate": 7.109765365903243e-08, "loss": 208.482, "step": 50040 }, { "epoch": 0.962587928705026, "grad_norm": 357.2384001956751, "learning_rate": 7.03730242543299e-08, "loss": 205.4693, "step": 50050 }, { "epoch": 0.9627802539655065, "grad_norm": 337.4818013276292, "learning_rate": 6.965209347168156e-08, "loss": 199.8143, "step": 50060 }, { "epoch": 0.962972579225987, "grad_norm": 358.4629665251457, "learning_rate": 6.89348615796237e-08, "loss": 199.1667, "step": 50070 }, { "epoch": 0.9631649044864675, "grad_norm": 357.08603315142034, "learning_rate": 6.822132884531373e-08, "loss": 201.4948, "step": 50080 }, { "epoch": 0.963357229746948, "grad_norm": 400.5110726502833, "learning_rate": 6.751149553452907e-08, "loss": 208.5828, "step": 50090 }, { "epoch": 0.9635495550074286, "grad_norm": 350.9263576414659, "learning_rate": 6.680536191167263e-08, "loss": 200.1319, "step": 50100 }, { "epoch": 0.9637418802679091, "grad_norm": 359.42279037814063, "learning_rate": 6.610292823976628e-08, "loss": 199.5466, "step": 50110 }, { "epoch": 0.9639342055283896, "grad_norm": 360.1887352280769, "learning_rate": 6.540419478045623e-08, "loss": 207.0207, "step": 50120 }, { "epoch": 0.9641265307888701, "grad_norm": 378.47852751386193, "learning_rate": 6.470916179400765e-08, "loss": 198.0305, "step": 50130 }, { "epoch": 0.9643188560493506, "grad_norm": 363.80575600351375, "learning_rate": 6.401782953931013e-08, "loss": 200.3281, "step": 50140 }, { "epoch": 0.9645111813098312, "grad_norm": 380.6795736776294, "learning_rate": 6.33301982738721e-08, "loss": 201.7145, "step": 50150 }, { "epoch": 0.9647035065703117, "grad_norm": 342.60556435465, "learning_rate": 6.264626825382647e-08, "loss": 209.3092, "step": 50160 }, { "epoch": 0.9648958318307922, "grad_norm": 355.6897515699277, "learning_rate": 6.196603973392501e-08, "loss": 208.415, "step": 50170 }, { "epoch": 0.9650881570912727, "grad_norm": 376.9843894696672, "learning_rate": 6.12895129675406e-08, "loss": 199.2474, "step": 50180 }, { "epoch": 0.9652804823517532, "grad_norm": 378.4277548540897, "learning_rate": 6.061668820667055e-08, "loss": 206.1151, "step": 50190 }, { "epoch": 0.9654728076122339, "grad_norm": 355.19572710632053, "learning_rate": 5.994756570192994e-08, "loss": 208.4154, "step": 50200 }, { "epoch": 0.9656651328727144, "grad_norm": 361.552674747987, "learning_rate": 5.928214570255497e-08, "loss": 195.7584, "step": 50210 }, { "epoch": 0.9658574581331949, "grad_norm": 386.79872658684997, "learning_rate": 5.862042845640403e-08, "loss": 201.5981, "step": 50220 }, { "epoch": 0.9660497833936754, "grad_norm": 363.0551393693288, "learning_rate": 5.796241420995663e-08, "loss": 195.1777, "step": 50230 }, { "epoch": 0.9662421086541559, "grad_norm": 349.4889589444486, "learning_rate": 5.730810320831226e-08, "loss": 204.0646, "step": 50240 }, { "epoch": 0.9664344339146365, "grad_norm": 355.09980958283677, "learning_rate": 5.66574956951893e-08, "loss": 205.1864, "step": 50250 }, { "epoch": 0.966626759175117, "grad_norm": 435.38762972989383, "learning_rate": 5.6010591912930565e-08, "loss": 208.1305, "step": 50260 }, { "epoch": 0.9668190844355975, "grad_norm": 354.15731877745867, "learning_rate": 5.5367392102495534e-08, "loss": 204.5344, "step": 50270 }, { "epoch": 0.967011409696078, "grad_norm": 377.60355751663684, "learning_rate": 5.472789650346588e-08, "loss": 205.7249, "step": 50280 }, { "epoch": 0.9672037349565585, "grad_norm": 365.5187625608226, "learning_rate": 5.4092105354043304e-08, "loss": 205.6885, "step": 50290 }, { "epoch": 0.967396060217039, "grad_norm": 363.4031489148836, "learning_rate": 5.346001889104946e-08, "loss": 197.1267, "step": 50300 }, { "epoch": 0.9675883854775196, "grad_norm": 386.9760149331261, "learning_rate": 5.2831637349926026e-08, "loss": 201.4931, "step": 50310 }, { "epoch": 0.9677807107380001, "grad_norm": 383.4784305510812, "learning_rate": 5.220696096473465e-08, "loss": 200.2673, "step": 50320 }, { "epoch": 0.9679730359984806, "grad_norm": 450.6443804678607, "learning_rate": 5.1585989968157e-08, "loss": 215.3417, "step": 50330 }, { "epoch": 0.9681653612589611, "grad_norm": 333.14887862669724, "learning_rate": 5.096872459149471e-08, "loss": 201.8255, "step": 50340 }, { "epoch": 0.9683576865194417, "grad_norm": 375.73528120989096, "learning_rate": 5.035516506466942e-08, "loss": 212.2653, "step": 50350 }, { "epoch": 0.9685500117799222, "grad_norm": 446.9680920916504, "learning_rate": 4.9745311616220535e-08, "loss": 213.5823, "step": 50360 }, { "epoch": 0.9687423370404027, "grad_norm": 348.24251205490697, "learning_rate": 4.913916447330858e-08, "loss": 203.9064, "step": 50370 }, { "epoch": 0.9689346623008832, "grad_norm": 361.1064584240234, "learning_rate": 4.853672386171515e-08, "loss": 209.2491, "step": 50380 }, { "epoch": 0.9691269875613637, "grad_norm": 357.17661301751633, "learning_rate": 4.793799000583743e-08, "loss": 202.6957, "step": 50390 }, { "epoch": 0.9693193128218442, "grad_norm": 389.8714792917872, "learning_rate": 4.73429631286948e-08, "loss": 202.3398, "step": 50400 }, { "epoch": 0.9695116380823249, "grad_norm": 375.9401820395255, "learning_rate": 4.675164345192551e-08, "loss": 196.6517, "step": 50410 }, { "epoch": 0.9697039633428054, "grad_norm": 364.1204881832129, "learning_rate": 4.6164031195785606e-08, "loss": 204.5044, "step": 50420 }, { "epoch": 0.9698962886032859, "grad_norm": 340.01497015692127, "learning_rate": 4.558012657915112e-08, "loss": 203.8972, "step": 50430 }, { "epoch": 0.9700886138637664, "grad_norm": 376.7845513146719, "learning_rate": 4.4999929819515844e-08, "loss": 209.5113, "step": 50440 }, { "epoch": 0.970280939124247, "grad_norm": 392.9127299781355, "learning_rate": 4.442344113299579e-08, "loss": 204.1648, "step": 50450 }, { "epoch": 0.9704732643847275, "grad_norm": 363.11778607954176, "learning_rate": 4.385066073432143e-08, "loss": 198.4782, "step": 50460 }, { "epoch": 0.970665589645208, "grad_norm": 392.83199707541826, "learning_rate": 4.3281588836844303e-08, "loss": 202.6459, "step": 50470 }, { "epoch": 0.9708579149056885, "grad_norm": 363.29121051422067, "learning_rate": 4.271622565253486e-08, "loss": 198.3532, "step": 50480 }, { "epoch": 0.971050240166169, "grad_norm": 383.5253643785734, "learning_rate": 4.2154571391982425e-08, "loss": 201.4317, "step": 50490 }, { "epoch": 0.9712425654266496, "grad_norm": 371.63736854051393, "learning_rate": 4.159662626439187e-08, "loss": 205.1226, "step": 50500 }, { "epoch": 0.9714348906871301, "grad_norm": 413.13774609311355, "learning_rate": 4.104239047758918e-08, "loss": 207.9182, "step": 50510 }, { "epoch": 0.9716272159476106, "grad_norm": 356.77074943930876, "learning_rate": 4.049186423801921e-08, "loss": 196.8607, "step": 50520 }, { "epoch": 0.9718195412080911, "grad_norm": 398.50404199376686, "learning_rate": 3.994504775074237e-08, "loss": 198.2637, "step": 50530 }, { "epoch": 0.9720118664685716, "grad_norm": 373.8703519544385, "learning_rate": 3.9401941219440186e-08, "loss": 206.9144, "step": 50540 }, { "epoch": 0.9722041917290521, "grad_norm": 355.8067911671731, "learning_rate": 3.8862544846409725e-08, "loss": 200.8955, "step": 50550 }, { "epoch": 0.9723965169895327, "grad_norm": 375.83505663357437, "learning_rate": 3.832685883256915e-08, "loss": 205.2627, "step": 50560 }, { "epoch": 0.9725888422500132, "grad_norm": 349.45645297098065, "learning_rate": 3.7794883377449966e-08, "loss": 200.1641, "step": 50570 }, { "epoch": 0.9727811675104937, "grad_norm": 348.5611055810571, "learning_rate": 3.726661867920478e-08, "loss": 203.0395, "step": 50580 }, { "epoch": 0.9729734927709742, "grad_norm": 358.2669006617166, "learning_rate": 3.674206493460508e-08, "loss": 200.8857, "step": 50590 }, { "epoch": 0.9731658180314547, "grad_norm": 351.54670609819493, "learning_rate": 3.622122233903791e-08, "loss": 199.7689, "step": 50600 }, { "epoch": 0.9733581432919354, "grad_norm": 407.56600378938055, "learning_rate": 3.5704091086508076e-08, "loss": 207.6954, "step": 50610 }, { "epoch": 0.9735504685524159, "grad_norm": 387.4352155478232, "learning_rate": 3.519067136963705e-08, "loss": 201.6304, "step": 50620 }, { "epoch": 0.9737427938128964, "grad_norm": 390.14497586458174, "learning_rate": 3.468096337966853e-08, "loss": 203.7833, "step": 50630 }, { "epoch": 0.9739351190733769, "grad_norm": 396.4177518961923, "learning_rate": 3.417496730645731e-08, "loss": 203.5137, "step": 50640 }, { "epoch": 0.9741274443338575, "grad_norm": 359.62905979548225, "learning_rate": 3.3672683338480396e-08, "loss": 206.6287, "step": 50650 }, { "epoch": 0.974319769594338, "grad_norm": 374.5785033485421, "learning_rate": 3.317411166282813e-08, "loss": 201.4154, "step": 50660 }, { "epoch": 0.9745120948548185, "grad_norm": 344.7383211872308, "learning_rate": 3.2679252465213085e-08, "loss": 203.9449, "step": 50670 }, { "epoch": 0.974704420115299, "grad_norm": 379.8235967438272, "learning_rate": 3.218810592996113e-08, "loss": 205.5725, "step": 50680 }, { "epoch": 0.9748967453757795, "grad_norm": 437.5386935924966, "learning_rate": 3.170067224001483e-08, "loss": 214.244, "step": 50690 }, { "epoch": 0.97508907063626, "grad_norm": 340.66102436358256, "learning_rate": 3.1216951576936714e-08, "loss": 194.8719, "step": 50700 }, { "epoch": 0.9752813958967406, "grad_norm": 354.3046604099427, "learning_rate": 3.073694412090489e-08, "loss": 200.0747, "step": 50710 }, { "epoch": 0.9754737211572211, "grad_norm": 404.6759521674685, "learning_rate": 3.026065005071188e-08, "loss": 205.5647, "step": 50720 }, { "epoch": 0.9756660464177016, "grad_norm": 368.34396678368597, "learning_rate": 2.9788069543772445e-08, "loss": 208.5045, "step": 50730 }, { "epoch": 0.9758583716781821, "grad_norm": 343.731348335998, "learning_rate": 2.9319202776113553e-08, "loss": 199.9978, "step": 50740 }, { "epoch": 0.9760506969386626, "grad_norm": 358.37688261755903, "learning_rate": 2.8854049922379946e-08, "loss": 200.5989, "step": 50750 }, { "epoch": 0.9762430221991432, "grad_norm": 398.3606329947295, "learning_rate": 2.839261115583303e-08, "loss": 206.8188, "step": 50760 }, { "epoch": 0.9764353474596237, "grad_norm": 390.08735321288367, "learning_rate": 2.793488664835309e-08, "loss": 203.6234, "step": 50770 }, { "epoch": 0.9766276727201042, "grad_norm": 373.6140125054845, "learning_rate": 2.7480876570433746e-08, "loss": 198.9102, "step": 50780 }, { "epoch": 0.9768199979805847, "grad_norm": 369.86485291057227, "learning_rate": 2.7030581091186393e-08, "loss": 208.798, "step": 50790 }, { "epoch": 0.9770123232410652, "grad_norm": 375.2328900326083, "learning_rate": 2.658400037833686e-08, "loss": 201.3384, "step": 50800 }, { "epoch": 0.9772046485015459, "grad_norm": 351.08290942092873, "learning_rate": 2.6141134598233197e-08, "loss": 201.1264, "step": 50810 }, { "epoch": 0.9773969737620264, "grad_norm": 407.0875589024692, "learning_rate": 2.5701983915831232e-08, "loss": 206.2367, "step": 50820 }, { "epoch": 0.9775892990225069, "grad_norm": 335.8769076080468, "learning_rate": 2.5266548494710108e-08, "loss": 203.3587, "step": 50830 }, { "epoch": 0.9777816242829874, "grad_norm": 347.6112306832183, "learning_rate": 2.4834828497062315e-08, "loss": 198.6425, "step": 50840 }, { "epoch": 0.977973949543468, "grad_norm": 355.4252037339203, "learning_rate": 2.4406824083694768e-08, "loss": 206.2745, "step": 50850 }, { "epoch": 0.9781662748039485, "grad_norm": 358.60436061549416, "learning_rate": 2.398253541403217e-08, "loss": 210.9274, "step": 50860 }, { "epoch": 0.978358600064429, "grad_norm": 357.160490329138, "learning_rate": 2.3561962646116988e-08, "loss": 202.8984, "step": 50870 }, { "epoch": 0.9785509253249095, "grad_norm": 365.29160886992554, "learning_rate": 2.3145105936603906e-08, "loss": 195.4823, "step": 50880 }, { "epoch": 0.97874325058539, "grad_norm": 439.6208461726787, "learning_rate": 2.273196544076539e-08, "loss": 209.3478, "step": 50890 }, { "epoch": 0.9789355758458705, "grad_norm": 342.78350491530574, "learning_rate": 2.2322541312490565e-08, "loss": 203.3775, "step": 50900 }, { "epoch": 0.9791279011063511, "grad_norm": 371.94100727044844, "learning_rate": 2.1916833704281882e-08, "loss": 213.3107, "step": 50910 }, { "epoch": 0.9793202263668316, "grad_norm": 374.65242784405365, "learning_rate": 2.1514842767258458e-08, "loss": 196.9479, "step": 50920 }, { "epoch": 0.9795125516273121, "grad_norm": 362.6499251722214, "learning_rate": 2.1116568651156076e-08, "loss": 207.6976, "step": 50930 }, { "epoch": 0.9797048768877926, "grad_norm": 359.99189821552505, "learning_rate": 2.0722011504326066e-08, "loss": 202.3579, "step": 50940 }, { "epoch": 0.9798972021482731, "grad_norm": 358.3485630916027, "learning_rate": 2.033117147373309e-08, "loss": 199.6103, "step": 50950 }, { "epoch": 0.9800895274087537, "grad_norm": 358.5260330266349, "learning_rate": 1.9944048704959583e-08, "loss": 204.0412, "step": 50960 }, { "epoch": 0.9802818526692342, "grad_norm": 331.59994950907867, "learning_rate": 1.956064334220131e-08, "loss": 199.497, "step": 50970 }, { "epoch": 0.9804741779297147, "grad_norm": 351.14588732668517, "learning_rate": 1.9180955528270706e-08, "loss": 207.2379, "step": 50980 }, { "epoch": 0.9806665031901952, "grad_norm": 397.7205106483465, "learning_rate": 1.8804985404595743e-08, "loss": 206.9686, "step": 50990 }, { "epoch": 0.9808588284506757, "grad_norm": 342.2036064924589, "learning_rate": 1.843273311121885e-08, "loss": 198.5449, "step": 51000 }, { "epoch": 0.9810511537111563, "grad_norm": 973.6236059783471, "learning_rate": 1.806419878679799e-08, "loss": 207.533, "step": 51010 }, { "epoch": 0.9812434789716369, "grad_norm": 353.3697998451452, "learning_rate": 1.7699382568605595e-08, "loss": 199.2419, "step": 51020 }, { "epoch": 0.9814358042321174, "grad_norm": 358.8817119846803, "learning_rate": 1.733828459253073e-08, "loss": 205.8519, "step": 51030 }, { "epoch": 0.9816281294925979, "grad_norm": 361.6388583905268, "learning_rate": 1.6980904993075808e-08, "loss": 201.8072, "step": 51040 }, { "epoch": 0.9818204547530784, "grad_norm": 370.99855580721135, "learning_rate": 1.6627243903357682e-08, "loss": 216.963, "step": 51050 }, { "epoch": 0.982012780013559, "grad_norm": 332.18711761413573, "learning_rate": 1.6277301455110972e-08, "loss": 201.4797, "step": 51060 }, { "epoch": 0.9822051052740395, "grad_norm": 360.9403903695172, "learning_rate": 1.5931077778682524e-08, "loss": 205.7921, "step": 51070 }, { "epoch": 0.98239743053452, "grad_norm": 405.309678195462, "learning_rate": 1.5588573003035847e-08, "loss": 205.7948, "step": 51080 }, { "epoch": 0.9825897557950005, "grad_norm": 425.3253946702827, "learning_rate": 1.5249787255747774e-08, "loss": 205.9136, "step": 51090 }, { "epoch": 0.982782081055481, "grad_norm": 342.8363078901211, "learning_rate": 1.4914720663009585e-08, "loss": 201.0298, "step": 51100 }, { "epoch": 0.9829744063159616, "grad_norm": 364.46943806717525, "learning_rate": 1.4583373349629226e-08, "loss": 201.3648, "step": 51110 }, { "epoch": 0.9831667315764421, "grad_norm": 356.8904847406518, "learning_rate": 1.4255745439027968e-08, "loss": 200.1133, "step": 51120 }, { "epoch": 0.9833590568369226, "grad_norm": 382.6854737957742, "learning_rate": 1.3931837053241526e-08, "loss": 206.7351, "step": 51130 }, { "epoch": 0.9835513820974031, "grad_norm": 365.6773762375214, "learning_rate": 1.3611648312920057e-08, "loss": 207.3315, "step": 51140 }, { "epoch": 0.9837437073578836, "grad_norm": 375.61365436912155, "learning_rate": 1.3295179337329267e-08, "loss": 203.3566, "step": 51150 }, { "epoch": 0.9839360326183642, "grad_norm": 361.52019708468623, "learning_rate": 1.2982430244347088e-08, "loss": 210.0051, "step": 51160 }, { "epoch": 0.9841283578788447, "grad_norm": 400.3697977142987, "learning_rate": 1.2673401150470333e-08, "loss": 209.121, "step": 51170 }, { "epoch": 0.9843206831393252, "grad_norm": 376.62403287433347, "learning_rate": 1.2368092170804702e-08, "loss": 204.1128, "step": 51180 }, { "epoch": 0.9845130083998057, "grad_norm": 415.609338948867, "learning_rate": 1.2066503419073672e-08, "loss": 202.9911, "step": 51190 }, { "epoch": 0.9847053336602862, "grad_norm": 351.03770142823123, "learning_rate": 1.1768635007614049e-08, "loss": 203.4782, "step": 51200 }, { "epoch": 0.9848976589207667, "grad_norm": 348.4563186322979, "learning_rate": 1.1474487047375971e-08, "loss": 202.7223, "step": 51210 }, { "epoch": 0.9850899841812474, "grad_norm": 468.1294713250719, "learning_rate": 1.1184059647926238e-08, "loss": 207.7692, "step": 51220 }, { "epoch": 0.9852823094417279, "grad_norm": 401.7357054109847, "learning_rate": 1.0897352917443871e-08, "loss": 208.4456, "step": 51230 }, { "epoch": 0.9854746347022084, "grad_norm": 379.80491056057997, "learning_rate": 1.0614366962721223e-08, "loss": 206.1894, "step": 51240 }, { "epoch": 0.9856669599626889, "grad_norm": 332.7708771596052, "learning_rate": 1.033510188916731e-08, "loss": 194.6979, "step": 51250 }, { "epoch": 0.9858592852231695, "grad_norm": 367.69120235667486, "learning_rate": 1.0059557800802256e-08, "loss": 203.0271, "step": 51260 }, { "epoch": 0.98605161048365, "grad_norm": 354.47859012941774, "learning_rate": 9.78773480026396e-09, "loss": 197.9222, "step": 51270 }, { "epoch": 0.9862439357441305, "grad_norm": 368.979646170427, "learning_rate": 9.519632988800321e-09, "loss": 207.5625, "step": 51280 }, { "epoch": 0.986436261004611, "grad_norm": 350.6849494358281, "learning_rate": 9.25525246627479e-09, "loss": 206.7379, "step": 51290 }, { "epoch": 0.9866285862650915, "grad_norm": 371.93165701237086, "learning_rate": 8.994593331165257e-09, "loss": 208.2924, "step": 51300 }, { "epoch": 0.986820911525572, "grad_norm": 355.56485864819984, "learning_rate": 8.737655680562951e-09, "loss": 197.2067, "step": 51310 }, { "epoch": 0.9870132367860526, "grad_norm": 387.0998506662651, "learning_rate": 8.484439610172424e-09, "loss": 202.9401, "step": 51320 }, { "epoch": 0.9872055620465331, "grad_norm": 355.26681707954, "learning_rate": 8.234945214312673e-09, "loss": 205.5573, "step": 51330 }, { "epoch": 0.9873978873070136, "grad_norm": 347.49241835232834, "learning_rate": 7.989172585917138e-09, "loss": 204.8943, "step": 51340 }, { "epoch": 0.9875902125674941, "grad_norm": 366.33187406736545, "learning_rate": 7.747121816530368e-09, "loss": 202.9648, "step": 51350 }, { "epoch": 0.9877825378279746, "grad_norm": 361.2236680092179, "learning_rate": 7.508792996313573e-09, "loss": 205.3263, "step": 51360 }, { "epoch": 0.9879748630884552, "grad_norm": 346.94050202589585, "learning_rate": 7.274186214040191e-09, "loss": 199.8622, "step": 51370 }, { "epoch": 0.9881671883489357, "grad_norm": 362.9462198362998, "learning_rate": 7.043301557096982e-09, "loss": 200.5334, "step": 51380 }, { "epoch": 0.9883595136094162, "grad_norm": 366.0453311046353, "learning_rate": 6.816139111484044e-09, "loss": 202.2913, "step": 51390 }, { "epoch": 0.9885518388698967, "grad_norm": 350.9302385734949, "learning_rate": 6.592698961818134e-09, "loss": 205.0694, "step": 51400 }, { "epoch": 0.9887441641303772, "grad_norm": 334.7278843456203, "learning_rate": 6.37298119132379e-09, "loss": 197.3568, "step": 51410 }, { "epoch": 0.9889364893908578, "grad_norm": 362.22652421114105, "learning_rate": 6.156985881844435e-09, "loss": 201.2631, "step": 51420 }, { "epoch": 0.9891288146513384, "grad_norm": 361.4713180098413, "learning_rate": 5.944713113833489e-09, "loss": 202.7434, "step": 51430 }, { "epoch": 0.9893211399118189, "grad_norm": 345.8168883809075, "learning_rate": 5.736162966359926e-09, "loss": 205.4553, "step": 51440 }, { "epoch": 0.9895134651722994, "grad_norm": 357.7816003446847, "learning_rate": 5.531335517104941e-09, "loss": 201.2375, "step": 51450 }, { "epoch": 0.98970579043278, "grad_norm": 376.926498796295, "learning_rate": 5.3302308423641704e-09, "loss": 201.9192, "step": 51460 }, { "epoch": 0.9898981156932605, "grad_norm": 377.41667615426076, "learning_rate": 5.132849017044361e-09, "loss": 206.9932, "step": 51470 }, { "epoch": 0.990090440953741, "grad_norm": 396.061243250311, "learning_rate": 4.939190114666703e-09, "loss": 210.7631, "step": 51480 }, { "epoch": 0.9902827662142215, "grad_norm": 357.7638461008757, "learning_rate": 4.749254207367937e-09, "loss": 199.3093, "step": 51490 }, { "epoch": 0.990475091474702, "grad_norm": 334.91060176109545, "learning_rate": 4.563041365894805e-09, "loss": 204.0571, "step": 51500 }, { "epoch": 0.9906674167351825, "grad_norm": 412.1332012506797, "learning_rate": 4.380551659608489e-09, "loss": 205.031, "step": 51510 }, { "epoch": 0.9908597419956631, "grad_norm": 354.85025322793365, "learning_rate": 4.2017851564835065e-09, "loss": 202.9292, "step": 51520 }, { "epoch": 0.9910520672561436, "grad_norm": 362.5359086568774, "learning_rate": 4.026741923107702e-09, "loss": 199.1497, "step": 51530 }, { "epoch": 0.9912443925166241, "grad_norm": 390.7978788203712, "learning_rate": 3.855422024681144e-09, "loss": 208.5175, "step": 51540 }, { "epoch": 0.9914367177771046, "grad_norm": 319.8806252743432, "learning_rate": 3.6878255250183406e-09, "loss": 195.2692, "step": 51550 }, { "epoch": 0.9916290430375851, "grad_norm": 329.742320677582, "learning_rate": 3.5239524865460227e-09, "loss": 199.9458, "step": 51560 }, { "epoch": 0.9918213682980657, "grad_norm": 389.1102937204921, "learning_rate": 3.363802970304253e-09, "loss": 211.3452, "step": 51570 }, { "epoch": 0.9920136935585462, "grad_norm": 363.6836465777458, "learning_rate": 3.207377035946424e-09, "loss": 205.9241, "step": 51580 }, { "epoch": 0.9922060188190267, "grad_norm": 354.83362189362083, "learning_rate": 3.05467474173704e-09, "loss": 204.5754, "step": 51590 }, { "epoch": 0.9923983440795072, "grad_norm": 380.2079650855752, "learning_rate": 2.9056961445572686e-09, "loss": 202.7342, "step": 51600 }, { "epoch": 0.9925906693399877, "grad_norm": 368.47379931382955, "learning_rate": 2.7604412998982754e-09, "loss": 204.7153, "step": 51610 }, { "epoch": 0.9927829946004683, "grad_norm": 389.45078787660464, "learning_rate": 2.61891026186456e-09, "loss": 200.7578, "step": 51620 }, { "epoch": 0.9929753198609489, "grad_norm": 370.698779462313, "learning_rate": 2.4811030831739525e-09, "loss": 213.4457, "step": 51630 }, { "epoch": 0.9931676451214294, "grad_norm": 379.0056945762546, "learning_rate": 2.347019815158724e-09, "loss": 206.9128, "step": 51640 }, { "epoch": 0.9933599703819099, "grad_norm": 396.11509190807016, "learning_rate": 2.216660507762258e-09, "loss": 204.9282, "step": 51650 }, { "epoch": 0.9935522956423904, "grad_norm": 368.6581037478492, "learning_rate": 2.0900252095401583e-09, "loss": 199.7001, "step": 51660 }, { "epoch": 0.993744620902871, "grad_norm": 385.23305238769063, "learning_rate": 1.9671139676624707e-09, "loss": 202.9382, "step": 51670 }, { "epoch": 0.9939369461633515, "grad_norm": 371.93326834522463, "learning_rate": 1.8479268279125717e-09, "loss": 205.3309, "step": 51680 }, { "epoch": 0.994129271423832, "grad_norm": 357.21354124749047, "learning_rate": 1.73246383468495e-09, "loss": 198.5947, "step": 51690 }, { "epoch": 0.9943215966843125, "grad_norm": 348.09372276413296, "learning_rate": 1.6207250309874246e-09, "loss": 203.0926, "step": 51700 }, { "epoch": 0.994513921944793, "grad_norm": 401.7305325144985, "learning_rate": 1.512710458442257e-09, "loss": 206.9878, "step": 51710 }, { "epoch": 0.9947062472052736, "grad_norm": 365.2095163742593, "learning_rate": 1.408420157280599e-09, "loss": 209.9957, "step": 51720 }, { "epoch": 0.9948985724657541, "grad_norm": 355.9166980705559, "learning_rate": 1.3078541663502642e-09, "loss": 206.524, "step": 51730 }, { "epoch": 0.9950908977262346, "grad_norm": 368.11151387147976, "learning_rate": 1.2110125231112879e-09, "loss": 208.0215, "step": 51740 }, { "epoch": 0.9952832229867151, "grad_norm": 385.0085528021469, "learning_rate": 1.117895263633706e-09, "loss": 204.5201, "step": 51750 }, { "epoch": 0.9954755482471956, "grad_norm": 390.7948995271977, "learning_rate": 1.0285024226042162e-09, "loss": 204.7518, "step": 51760 }, { "epoch": 0.9956678735076762, "grad_norm": 344.1379499338581, "learning_rate": 9.428340333184072e-10, "loss": 199.1081, "step": 51770 }, { "epoch": 0.9958601987681567, "grad_norm": 375.4426110016105, "learning_rate": 8.608901276874193e-10, "loss": 212.3385, "step": 51780 }, { "epoch": 0.9960525240286372, "grad_norm": 397.50610033894924, "learning_rate": 7.826707362335039e-10, "loss": 208.1914, "step": 51790 }, { "epoch": 0.9962448492891177, "grad_norm": 383.62146782858906, "learning_rate": 7.081758880911338e-10, "loss": 220.8641, "step": 51800 }, { "epoch": 0.9964371745495982, "grad_norm": 348.1050821023941, "learning_rate": 6.374056110103332e-10, "loss": 197.8274, "step": 51810 }, { "epoch": 0.9966294998100788, "grad_norm": 384.4545751015449, "learning_rate": 5.703599313511277e-10, "loss": 201.8037, "step": 51820 }, { "epoch": 0.9968218250705593, "grad_norm": 360.0229515815529, "learning_rate": 5.070388740868737e-10, "loss": 205.0513, "step": 51830 }, { "epoch": 0.9970141503310399, "grad_norm": 381.8984436367163, "learning_rate": 4.474424628031493e-10, "loss": 205.1911, "step": 51840 }, { "epoch": 0.9972064755915204, "grad_norm": 345.9292320856205, "learning_rate": 3.9157071969997407e-10, "loss": 197.5689, "step": 51850 }, { "epoch": 0.9973988008520009, "grad_norm": 391.7863656828509, "learning_rate": 3.394236655873684e-10, "loss": 206.6955, "step": 51860 }, { "epoch": 0.9975911261124815, "grad_norm": 350.84394351790695, "learning_rate": 2.910013198886841e-10, "loss": 205.6138, "step": 51870 }, { "epoch": 0.997783451372962, "grad_norm": 384.4586453573272, "learning_rate": 2.4630370064171463e-10, "loss": 205.7148, "step": 51880 }, { "epoch": 0.9979757766334425, "grad_norm": 352.8663929488422, "learning_rate": 2.0533082449647467e-10, "loss": 202.1314, "step": 51890 }, { "epoch": 0.998168101893923, "grad_norm": 359.50165438039494, "learning_rate": 1.6808270671186954e-10, "loss": 209.9645, "step": 51900 }, { "epoch": 0.9983604271544035, "grad_norm": 415.44156285872293, "learning_rate": 1.345593611645768e-10, "loss": 200.2923, "step": 51910 }, { "epoch": 0.9985527524148841, "grad_norm": 377.8012119963822, "learning_rate": 1.0476080034016456e-10, "loss": 212.1618, "step": 51920 }, { "epoch": 0.9987450776753646, "grad_norm": 376.1355865143914, "learning_rate": 7.868703533864264e-11, "loss": 204.8333, "step": 51930 }, { "epoch": 0.9989374029358451, "grad_norm": 396.76812887551205, "learning_rate": 5.633807587224205e-11, "loss": 205.867, "step": 51940 }, { "epoch": 0.9991297281963256, "grad_norm": 367.44817221330374, "learning_rate": 3.771393026541503e-11, "loss": 201.6554, "step": 51950 }, { "epoch": 0.9993220534568061, "grad_norm": 354.4649358181581, "learning_rate": 2.281460545594527e-11, "loss": 199.9556, "step": 51960 }, { "epoch": 0.9995143787172867, "grad_norm": 360.9267721712571, "learning_rate": 1.164010699272744e-11, "loss": 201.5196, "step": 51970 }, { "epoch": 0.9997067039777672, "grad_norm": 356.4807932466468, "learning_rate": 4.190439037987659e-12, "loss": 198.2291, "step": 51980 }, { "epoch": 0.9998990292382477, "grad_norm": 368.023569934151, "learning_rate": 4.656043661732668e-13, "loss": 220.5766, "step": 51990 }, { "epoch": 0.999995191868488, "eval_loss": 242.9601287841797, "eval_runtime": 167.1518, "eval_samples_per_second": 4.164, "eval_steps_per_second": 0.132, "step": 51995 }, { "epoch": 0.999995191868488, "step": 51995, "total_flos": 7.252179007674778e+16, "train_loss": 276.11489062246443, "train_runtime": 988432.6936, "train_samples_per_second": 3.367, "train_steps_per_second": 0.053 } ], "logging_steps": 10, "max_steps": 51995, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.252179007674778e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }