{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999990383783212, "eval_steps": 500, "global_step": 51995, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.9232433575982536e-05, "grad_norm": 94608.6885681578, "learning_rate": 3.846153846153847e-08, "loss": 12643.8086, "step": 1 }, { "epoch": 0.00019232433575982536, "grad_norm": 100578.99176905143, "learning_rate": 3.846153846153847e-07, "loss": 12553.6059, "step": 10 }, { "epoch": 0.0003846486715196507, "grad_norm": 88772.19286704408, "learning_rate": 7.692307692307694e-07, "loss": 11972.1172, "step": 20 }, { "epoch": 0.0005769730072794761, "grad_norm": 59360.15295432608, "learning_rate": 1.153846153846154e-06, "loss": 9967.2266, "step": 30 }, { "epoch": 0.0007692973430393014, "grad_norm": 28494.91588680842, "learning_rate": 1.5384615384615387e-06, "loss": 7446.3711, "step": 40 }, { "epoch": 0.0009616216787991269, "grad_norm": 16366.768222931018, "learning_rate": 1.9230769230769234e-06, "loss": 5403.1641, "step": 50 }, { "epoch": 0.0011539460145589522, "grad_norm": 8839.161009449497, "learning_rate": 2.307692307692308e-06, "loss": 3814.0988, "step": 60 }, { "epoch": 0.0013462703503187776, "grad_norm": 5383.497342018133, "learning_rate": 2.6923076923076923e-06, "loss": 2743.5736, "step": 70 }, { "epoch": 0.001538594686078603, "grad_norm": 3725.296652871175, "learning_rate": 3.0769230769230774e-06, "loss": 2219.7875, "step": 80 }, { "epoch": 0.0017309190218384282, "grad_norm": 3599.0985769624353, "learning_rate": 3.4615384615384617e-06, "loss": 1872.5768, "step": 90 }, { "epoch": 0.0019232433575982538, "grad_norm": 3094.252836745076, "learning_rate": 3.846153846153847e-06, "loss": 1735.4064, "step": 100 }, { "epoch": 0.002115567693358079, "grad_norm": 3300.1362331979553, "learning_rate": 4.230769230769231e-06, "loss": 1558.9357, "step": 110 }, { "epoch": 0.0023078920291179045, "grad_norm": 2104.1956388095073, "learning_rate": 4.615384615384616e-06, "loss": 1429.3346, "step": 120 }, { "epoch": 0.00250021636487773, "grad_norm": 2513.861867038951, "learning_rate": 5e-06, "loss": 1316.1369, "step": 130 }, { "epoch": 0.002692540700637555, "grad_norm": 3141.9987854701485, "learning_rate": 5.384615384615385e-06, "loss": 1260.3841, "step": 140 }, { "epoch": 0.0028848650363973807, "grad_norm": 1951.151761018987, "learning_rate": 5.769230769230769e-06, "loss": 1205.7934, "step": 150 }, { "epoch": 0.003077189372157206, "grad_norm": 1503.5840998796243, "learning_rate": 6.153846153846155e-06, "loss": 1149.7426, "step": 160 }, { "epoch": 0.0032695137079170314, "grad_norm": 1550.680002852119, "learning_rate": 6.538461538461539e-06, "loss": 1118.103, "step": 170 }, { "epoch": 0.0034618380436768565, "grad_norm": 1560.5468989439112, "learning_rate": 6.923076923076923e-06, "loss": 1092.9717, "step": 180 }, { "epoch": 0.003654162379436682, "grad_norm": 1425.4218406177279, "learning_rate": 7.307692307692308e-06, "loss": 1052.8212, "step": 190 }, { "epoch": 0.0038464867151965076, "grad_norm": 1516.6197391542155, "learning_rate": 7.692307692307694e-06, "loss": 1052.4535, "step": 200 }, { "epoch": 0.004038811050956333, "grad_norm": 1780.8347820865515, "learning_rate": 8.076923076923077e-06, "loss": 999.0068, "step": 210 }, { "epoch": 0.004231135386716158, "grad_norm": 1318.6042934766288, "learning_rate": 8.461538461538462e-06, "loss": 976.9977, "step": 220 }, { "epoch": 0.004423459722475984, "grad_norm": 1107.1960966004538, "learning_rate": 8.846153846153847e-06, "loss": 960.0191, "step": 230 }, { "epoch": 0.004615784058235809, "grad_norm": 1243.931843838465, "learning_rate": 9.230769230769232e-06, "loss": 950.4475, "step": 240 }, { "epoch": 0.004808108393995634, "grad_norm": 1603.148311812429, "learning_rate": 9.615384615384616e-06, "loss": 950.857, "step": 250 }, { "epoch": 0.00500043272975546, "grad_norm": 1234.584566075074, "learning_rate": 1e-05, "loss": 954.7617, "step": 260 }, { "epoch": 0.005192757065515285, "grad_norm": 1457.858865452839, "learning_rate": 1.0384615384615386e-05, "loss": 932.0896, "step": 270 }, { "epoch": 0.00538508140127511, "grad_norm": 1626.39266826436, "learning_rate": 1.076923076923077e-05, "loss": 900.2836, "step": 280 }, { "epoch": 0.005577405737034935, "grad_norm": 1341.1192889501265, "learning_rate": 1.1153846153846154e-05, "loss": 905.4902, "step": 290 }, { "epoch": 0.005769730072794761, "grad_norm": 2004.7690948696184, "learning_rate": 1.1538461538461538e-05, "loss": 872.6172, "step": 300 }, { "epoch": 0.0059620544085545865, "grad_norm": 1513.3944788118977, "learning_rate": 1.1923076923076925e-05, "loss": 869.792, "step": 310 }, { "epoch": 0.006154378744314412, "grad_norm": 1113.0910903993315, "learning_rate": 1.230769230769231e-05, "loss": 874.9273, "step": 320 }, { "epoch": 0.006346703080074238, "grad_norm": 3193.214792895934, "learning_rate": 1.2692307692307693e-05, "loss": 858.4352, "step": 330 }, { "epoch": 0.006539027415834063, "grad_norm": 1117.0052091046994, "learning_rate": 1.3076923076923078e-05, "loss": 843.8179, "step": 340 }, { "epoch": 0.006731351751593888, "grad_norm": 1068.3254606697517, "learning_rate": 1.3461538461538463e-05, "loss": 811.4118, "step": 350 }, { "epoch": 0.006923676087353713, "grad_norm": 1131.4811824271526, "learning_rate": 1.3846153846153847e-05, "loss": 823.7328, "step": 360 }, { "epoch": 0.007116000423113539, "grad_norm": 1245.6413691901007, "learning_rate": 1.4230769230769232e-05, "loss": 838.5344, "step": 370 }, { "epoch": 0.007308324758873364, "grad_norm": 1188.0536582603752, "learning_rate": 1.4615384615384615e-05, "loss": 833.5658, "step": 380 }, { "epoch": 0.007500649094633189, "grad_norm": 1033.2554609126928, "learning_rate": 1.5000000000000002e-05, "loss": 816.3561, "step": 390 }, { "epoch": 0.007692973430393015, "grad_norm": 1106.619330781247, "learning_rate": 1.5384615384615387e-05, "loss": 799.7768, "step": 400 }, { "epoch": 0.00788529776615284, "grad_norm": 965.6234161186003, "learning_rate": 1.576923076923077e-05, "loss": 783.5753, "step": 410 }, { "epoch": 0.008077622101912665, "grad_norm": 1063.0371172029031, "learning_rate": 1.6153846153846154e-05, "loss": 807.2202, "step": 420 }, { "epoch": 0.00826994643767249, "grad_norm": 1401.141549219412, "learning_rate": 1.653846153846154e-05, "loss": 782.2932, "step": 430 }, { "epoch": 0.008462270773432316, "grad_norm": 961.9169300295785, "learning_rate": 1.6923076923076924e-05, "loss": 783.0447, "step": 440 }, { "epoch": 0.008654595109192142, "grad_norm": 1113.6035912659463, "learning_rate": 1.730769230769231e-05, "loss": 817.2932, "step": 450 }, { "epoch": 0.008846919444951968, "grad_norm": 1018.1114190982277, "learning_rate": 1.7692307692307694e-05, "loss": 768.6484, "step": 460 }, { "epoch": 0.009039243780711793, "grad_norm": 994.1764027298699, "learning_rate": 1.807692307692308e-05, "loss": 767.3088, "step": 470 }, { "epoch": 0.009231568116471618, "grad_norm": 1079.9442609599444, "learning_rate": 1.8461538461538465e-05, "loss": 746.3848, "step": 480 }, { "epoch": 0.009423892452231443, "grad_norm": 1132.6673906167728, "learning_rate": 1.8846153846153846e-05, "loss": 767.0798, "step": 490 }, { "epoch": 0.009616216787991268, "grad_norm": 972.2311975595254, "learning_rate": 1.923076923076923e-05, "loss": 750.1532, "step": 500 }, { "epoch": 0.009808541123751093, "grad_norm": 953.2034517617658, "learning_rate": 1.9615384615384617e-05, "loss": 765.4885, "step": 510 }, { "epoch": 0.01000086545951092, "grad_norm": 1080.8681332093806, "learning_rate": 2e-05, "loss": 762.509, "step": 520 }, { "epoch": 0.010193189795270745, "grad_norm": 1388.4857033156295, "learning_rate": 1.999999813758258e-05, "loss": 732.4688, "step": 530 }, { "epoch": 0.01038551413103057, "grad_norm": 1106.684533714725, "learning_rate": 1.9999992550331007e-05, "loss": 740.4309, "step": 540 }, { "epoch": 0.010577838466790395, "grad_norm": 1090.5369324499081, "learning_rate": 1.999998323824736e-05, "loss": 739.5119, "step": 550 }, { "epoch": 0.01077016280255022, "grad_norm": 1129.680526403887, "learning_rate": 1.999997020133512e-05, "loss": 732.7231, "step": 560 }, { "epoch": 0.010962487138310046, "grad_norm": 930.7364821135466, "learning_rate": 1.9999953439599132e-05, "loss": 733.2015, "step": 570 }, { "epoch": 0.01115481147406987, "grad_norm": 933.4120343343305, "learning_rate": 1.9999932953045638e-05, "loss": 737.304, "step": 580 }, { "epoch": 0.011347135809829698, "grad_norm": 915.0633771476615, "learning_rate": 1.999990874168228e-05, "loss": 724.2898, "step": 590 }, { "epoch": 0.011539460145589523, "grad_norm": 1128.361341996108, "learning_rate": 1.9999880805518067e-05, "loss": 735.0713, "step": 600 }, { "epoch": 0.011731784481349348, "grad_norm": 849.0208141614002, "learning_rate": 1.9999849144563406e-05, "loss": 720.8895, "step": 610 }, { "epoch": 0.011924108817109173, "grad_norm": 1015.5962484886528, "learning_rate": 1.9999813758830092e-05, "loss": 713.2033, "step": 620 }, { "epoch": 0.012116433152868998, "grad_norm": 1026.2741464178252, "learning_rate": 1.9999774648331307e-05, "loss": 716.7091, "step": 630 }, { "epoch": 0.012308757488628823, "grad_norm": 969.4774400659321, "learning_rate": 1.9999731813081616e-05, "loss": 718.2322, "step": 640 }, { "epoch": 0.012501081824388648, "grad_norm": 967.3757615945976, "learning_rate": 1.9999685253096975e-05, "loss": 710.2178, "step": 650 }, { "epoch": 0.012693406160148475, "grad_norm": 871.670618965462, "learning_rate": 1.999963496839473e-05, "loss": 688.6732, "step": 660 }, { "epoch": 0.0128857304959083, "grad_norm": 918.4170497435479, "learning_rate": 1.9999580958993606e-05, "loss": 692.1547, "step": 670 }, { "epoch": 0.013078054831668125, "grad_norm": 957.1468754682305, "learning_rate": 1.9999523224913722e-05, "loss": 714.7746, "step": 680 }, { "epoch": 0.01327037916742795, "grad_norm": 865.7781073300224, "learning_rate": 1.999946176617659e-05, "loss": 705.0376, "step": 690 }, { "epoch": 0.013462703503187776, "grad_norm": 936.9998829159986, "learning_rate": 1.999939658280509e-05, "loss": 687.2993, "step": 700 }, { "epoch": 0.0136550278389476, "grad_norm": 906.7755440766291, "learning_rate": 1.9999327674823513e-05, "loss": 706.7441, "step": 710 }, { "epoch": 0.013847352174707426, "grad_norm": 856.9815132697244, "learning_rate": 1.9999255042257522e-05, "loss": 697.0999, "step": 720 }, { "epoch": 0.014039676510467253, "grad_norm": 927.2050883683178, "learning_rate": 1.999917868513417e-05, "loss": 681.3204, "step": 730 }, { "epoch": 0.014232000846227078, "grad_norm": 922.0296694917098, "learning_rate": 1.9999098603481895e-05, "loss": 662.6776, "step": 740 }, { "epoch": 0.014424325181986903, "grad_norm": 959.1175136904378, "learning_rate": 1.9999014797330536e-05, "loss": 681.5562, "step": 750 }, { "epoch": 0.014616649517746728, "grad_norm": 816.3936907296245, "learning_rate": 1.99989272667113e-05, "loss": 685.389, "step": 760 }, { "epoch": 0.014808973853506553, "grad_norm": 874.4326400369638, "learning_rate": 1.99988360116568e-05, "loss": 678.6024, "step": 770 }, { "epoch": 0.015001298189266378, "grad_norm": 798.9969239141585, "learning_rate": 1.999874103220102e-05, "loss": 669.6026, "step": 780 }, { "epoch": 0.015193622525026203, "grad_norm": 904.5693607601238, "learning_rate": 1.999864232837934e-05, "loss": 676.1861, "step": 790 }, { "epoch": 0.01538594686078603, "grad_norm": 1098.9356397046763, "learning_rate": 1.9998539900228526e-05, "loss": 677.7401, "step": 800 }, { "epoch": 0.015578271196545855, "grad_norm": 838.3649701940624, "learning_rate": 1.999843374778673e-05, "loss": 675.1596, "step": 810 }, { "epoch": 0.01577059553230568, "grad_norm": 910.3491848511147, "learning_rate": 1.999832387109349e-05, "loss": 661.2108, "step": 820 }, { "epoch": 0.015962919868065507, "grad_norm": 839.7251367189293, "learning_rate": 1.9998210270189736e-05, "loss": 653.5074, "step": 830 }, { "epoch": 0.01615524420382533, "grad_norm": 922.4346859381994, "learning_rate": 1.9998092945117786e-05, "loss": 652.9832, "step": 840 }, { "epoch": 0.016347568539585158, "grad_norm": 797.9334887512913, "learning_rate": 1.999797189592134e-05, "loss": 658.8486, "step": 850 }, { "epoch": 0.01653989287534498, "grad_norm": 26733.46203732987, "learning_rate": 1.999784712264548e-05, "loss": 698.5595, "step": 860 }, { "epoch": 0.016732217211104808, "grad_norm": 828.643285218162, "learning_rate": 1.9997718625336686e-05, "loss": 651.8198, "step": 870 }, { "epoch": 0.01692454154686463, "grad_norm": 898.1396587421336, "learning_rate": 1.9997586404042825e-05, "loss": 671.2599, "step": 880 }, { "epoch": 0.017116865882624458, "grad_norm": 802.1362445755046, "learning_rate": 1.9997450458813142e-05, "loss": 645.942, "step": 890 }, { "epoch": 0.017309190218384285, "grad_norm": 1016.8188200039426, "learning_rate": 1.9997310789698276e-05, "loss": 639.8075, "step": 900 }, { "epoch": 0.01750151455414411, "grad_norm": 821.0411601718398, "learning_rate": 1.999716739675025e-05, "loss": 655.8576, "step": 910 }, { "epoch": 0.017693838889903935, "grad_norm": 1936.8704555826098, "learning_rate": 1.9997020280022482e-05, "loss": 647.9312, "step": 920 }, { "epoch": 0.01788616322566376, "grad_norm": 1018.8993638712327, "learning_rate": 1.9996869439569767e-05, "loss": 638.7674, "step": 930 }, { "epoch": 0.018078487561423585, "grad_norm": 904.9237018635373, "learning_rate": 1.9996714875448286e-05, "loss": 641.0137, "step": 940 }, { "epoch": 0.01827081189718341, "grad_norm": 1003.3093857172769, "learning_rate": 1.9996556587715617e-05, "loss": 649.2616, "step": 950 }, { "epoch": 0.018463136232943236, "grad_norm": 960.2339889734326, "learning_rate": 1.9996394576430716e-05, "loss": 645.8987, "step": 960 }, { "epoch": 0.018655460568703063, "grad_norm": 836.5678203183564, "learning_rate": 1.9996228841653932e-05, "loss": 647.2, "step": 970 }, { "epoch": 0.018847784904462886, "grad_norm": 798.1541783785667, "learning_rate": 1.9996059383447e-05, "loss": 632.6011, "step": 980 }, { "epoch": 0.019040109240222713, "grad_norm": 1001.9244344517015, "learning_rate": 1.999588620187303e-05, "loss": 637.499, "step": 990 }, { "epoch": 0.019232433575982536, "grad_norm": 927.656831492326, "learning_rate": 1.9995709296996545e-05, "loss": 644.8208, "step": 1000 }, { "epoch": 0.019424757911742363, "grad_norm": 767.8154780334495, "learning_rate": 1.999552866888343e-05, "loss": 642.4208, "step": 1010 }, { "epoch": 0.019617082247502186, "grad_norm": 761.3185760521654, "learning_rate": 1.9995344317600965e-05, "loss": 652.1784, "step": 1020 }, { "epoch": 0.019809406583262013, "grad_norm": 1340.2077546097548, "learning_rate": 1.9995156243217824e-05, "loss": 641.2602, "step": 1030 }, { "epoch": 0.02000173091902184, "grad_norm": 846.3841401165058, "learning_rate": 1.9994964445804054e-05, "loss": 656.1286, "step": 1040 }, { "epoch": 0.020194055254781663, "grad_norm": 806.7606160265955, "learning_rate": 1.9994768925431104e-05, "loss": 634.8761, "step": 1050 }, { "epoch": 0.02038637959054149, "grad_norm": 707.4507239896416, "learning_rate": 1.9994569682171796e-05, "loss": 628.0639, "step": 1060 }, { "epoch": 0.020578703926301314, "grad_norm": 756.2349192400593, "learning_rate": 1.9994366716100346e-05, "loss": 631.8284, "step": 1070 }, { "epoch": 0.02077102826206114, "grad_norm": 891.7453215514997, "learning_rate": 1.999416002729236e-05, "loss": 629.0493, "step": 1080 }, { "epoch": 0.020963352597820964, "grad_norm": 908.0499458662863, "learning_rate": 1.999394961582482e-05, "loss": 645.8078, "step": 1090 }, { "epoch": 0.02115567693358079, "grad_norm": 858.8921391277137, "learning_rate": 1.9993735481776105e-05, "loss": 613.4529, "step": 1100 }, { "epoch": 0.021348001269340618, "grad_norm": 1634.7873260446481, "learning_rate": 1.9993517625225976e-05, "loss": 617.2008, "step": 1110 }, { "epoch": 0.02154032560510044, "grad_norm": 820.0560949206612, "learning_rate": 1.9993296046255578e-05, "loss": 608.7404, "step": 1120 }, { "epoch": 0.021732649940860268, "grad_norm": 804.3695313812165, "learning_rate": 1.9993070744947447e-05, "loss": 626.9961, "step": 1130 }, { "epoch": 0.02192497427662009, "grad_norm": 1114.8561548281725, "learning_rate": 1.9992841721385508e-05, "loss": 611.4933, "step": 1140 }, { "epoch": 0.022117298612379918, "grad_norm": 3419.5258436900294, "learning_rate": 1.999260897565506e-05, "loss": 614.8227, "step": 1150 }, { "epoch": 0.02230962294813974, "grad_norm": 1076.6006001682858, "learning_rate": 1.9992372507842807e-05, "loss": 627.309, "step": 1160 }, { "epoch": 0.02250194728389957, "grad_norm": 732.8498874304387, "learning_rate": 1.9992132318036825e-05, "loss": 615.4989, "step": 1170 }, { "epoch": 0.022694271619659395, "grad_norm": 995.8140259128272, "learning_rate": 1.9991888406326575e-05, "loss": 630.0663, "step": 1180 }, { "epoch": 0.02288659595541922, "grad_norm": 763.5901984809835, "learning_rate": 1.9991640772802916e-05, "loss": 606.8447, "step": 1190 }, { "epoch": 0.023078920291179045, "grad_norm": 727.5466510360628, "learning_rate": 1.9991389417558088e-05, "loss": 612.981, "step": 1200 }, { "epoch": 0.02327124462693887, "grad_norm": 821.0411142467469, "learning_rate": 1.9991134340685713e-05, "loss": 602.0013, "step": 1210 }, { "epoch": 0.023463568962698696, "grad_norm": 840.802859921454, "learning_rate": 1.999087554228081e-05, "loss": 598.6377, "step": 1220 }, { "epoch": 0.02365589329845852, "grad_norm": 756.3177316860919, "learning_rate": 1.999061302243977e-05, "loss": 612.0126, "step": 1230 }, { "epoch": 0.023848217634218346, "grad_norm": 1195.0447257910407, "learning_rate": 1.9990346781260378e-05, "loss": 616.7188, "step": 1240 }, { "epoch": 0.024040541969978173, "grad_norm": 922.9388146082537, "learning_rate": 1.9990076818841805e-05, "loss": 596.7434, "step": 1250 }, { "epoch": 0.024232866305737996, "grad_norm": 700.6416809095787, "learning_rate": 1.998980313528461e-05, "loss": 609.2294, "step": 1260 }, { "epoch": 0.024425190641497823, "grad_norm": 1257.6949867941905, "learning_rate": 1.9989525730690736e-05, "loss": 609.9669, "step": 1270 }, { "epoch": 0.024617514977257646, "grad_norm": 903.549613148072, "learning_rate": 1.998924460516351e-05, "loss": 620.5952, "step": 1280 }, { "epoch": 0.024809839313017473, "grad_norm": 764.6321620873817, "learning_rate": 1.9988959758807645e-05, "loss": 614.1899, "step": 1290 }, { "epoch": 0.025002163648777297, "grad_norm": 778.3259603306221, "learning_rate": 1.9988671191729243e-05, "loss": 624.8979, "step": 1300 }, { "epoch": 0.025194487984537123, "grad_norm": 1005.8223323049509, "learning_rate": 1.998837890403579e-05, "loss": 615.0917, "step": 1310 }, { "epoch": 0.02538681232029695, "grad_norm": 1440.4206048755198, "learning_rate": 1.998808289583616e-05, "loss": 604.339, "step": 1320 }, { "epoch": 0.025579136656056774, "grad_norm": 999.4877466798821, "learning_rate": 1.998778316724061e-05, "loss": 595.0511, "step": 1330 }, { "epoch": 0.0257714609918166, "grad_norm": 856.5499567850246, "learning_rate": 1.998747971836078e-05, "loss": 582.4183, "step": 1340 }, { "epoch": 0.025963785327576424, "grad_norm": 771.1349971770352, "learning_rate": 1.9987172549309707e-05, "loss": 608.028, "step": 1350 }, { "epoch": 0.02615610966333625, "grad_norm": 855.138500276869, "learning_rate": 1.9986861660201802e-05, "loss": 596.8151, "step": 1360 }, { "epoch": 0.026348433999096074, "grad_norm": 792.8188188026194, "learning_rate": 1.998654705115287e-05, "loss": 623.3355, "step": 1370 }, { "epoch": 0.0265407583348559, "grad_norm": 717.0052410633494, "learning_rate": 1.9986228722280093e-05, "loss": 615.0966, "step": 1380 }, { "epoch": 0.026733082670615728, "grad_norm": 743.0989252905181, "learning_rate": 1.998590667370204e-05, "loss": 595.9803, "step": 1390 }, { "epoch": 0.02692540700637555, "grad_norm": 760.5612612863387, "learning_rate": 1.998558090553868e-05, "loss": 609.7504, "step": 1400 }, { "epoch": 0.027117731342135378, "grad_norm": 775.1687306616708, "learning_rate": 1.9985251417911347e-05, "loss": 593.0434, "step": 1410 }, { "epoch": 0.0273100556778952, "grad_norm": 741.3442683045452, "learning_rate": 1.9984918210942776e-05, "loss": 585.3288, "step": 1420 }, { "epoch": 0.02750238001365503, "grad_norm": 730.328436539225, "learning_rate": 1.9984581284757074e-05, "loss": 596.1948, "step": 1430 }, { "epoch": 0.027694704349414852, "grad_norm": 698.1410628688395, "learning_rate": 1.9984240639479745e-05, "loss": 589.9968, "step": 1440 }, { "epoch": 0.02788702868517468, "grad_norm": 886.6719463044703, "learning_rate": 1.9983896275237677e-05, "loss": 586.9773, "step": 1450 }, { "epoch": 0.028079353020934505, "grad_norm": 848.7504027618456, "learning_rate": 1.9983548192159132e-05, "loss": 587.161, "step": 1460 }, { "epoch": 0.02827167735669433, "grad_norm": 796.0128772973629, "learning_rate": 1.998319639037377e-05, "loss": 598.3176, "step": 1470 }, { "epoch": 0.028464001692454156, "grad_norm": 932.687167624479, "learning_rate": 1.9982840870012626e-05, "loss": 589.4393, "step": 1480 }, { "epoch": 0.02865632602821398, "grad_norm": 769.0697081505059, "learning_rate": 1.9982481631208138e-05, "loss": 582.1562, "step": 1490 }, { "epoch": 0.028848650363973806, "grad_norm": 927.1602634645985, "learning_rate": 1.9982118674094104e-05, "loss": 587.0485, "step": 1500 }, { "epoch": 0.02904097469973363, "grad_norm": 827.0889935686532, "learning_rate": 1.9981751998805725e-05, "loss": 581.6353, "step": 1510 }, { "epoch": 0.029233299035493456, "grad_norm": 996.163060843575, "learning_rate": 1.998138160547958e-05, "loss": 590.505, "step": 1520 }, { "epoch": 0.029425623371253283, "grad_norm": 914.838305631208, "learning_rate": 1.9981007494253638e-05, "loss": 588.7489, "step": 1530 }, { "epoch": 0.029617947707013106, "grad_norm": 911.7105459419249, "learning_rate": 1.998062966526724e-05, "loss": 594.3789, "step": 1540 }, { "epoch": 0.029810272042772933, "grad_norm": 705.4340084272872, "learning_rate": 1.998024811866113e-05, "loss": 593.7086, "step": 1550 }, { "epoch": 0.030002596378532757, "grad_norm": 861.251850051679, "learning_rate": 1.9979862854577427e-05, "loss": 597.0841, "step": 1560 }, { "epoch": 0.030194920714292584, "grad_norm": 750.188392481234, "learning_rate": 1.9979473873159635e-05, "loss": 584.6696, "step": 1570 }, { "epoch": 0.030387245050052407, "grad_norm": 757.1086820583647, "learning_rate": 1.9979081174552638e-05, "loss": 594.2866, "step": 1580 }, { "epoch": 0.030579569385812234, "grad_norm": 760.443045425971, "learning_rate": 1.997868475890271e-05, "loss": 589.7756, "step": 1590 }, { "epoch": 0.03077189372157206, "grad_norm": 760.888452583621, "learning_rate": 1.997828462635752e-05, "loss": 575.5263, "step": 1600 }, { "epoch": 0.030964218057331884, "grad_norm": 939.2634714082963, "learning_rate": 1.99778807770661e-05, "loss": 578.2944, "step": 1610 }, { "epoch": 0.03115654239309171, "grad_norm": 817.6277925232175, "learning_rate": 1.9977473211178882e-05, "loss": 593.5157, "step": 1620 }, { "epoch": 0.03134886672885154, "grad_norm": 736.0775460201876, "learning_rate": 1.9977061928847676e-05, "loss": 586.7161, "step": 1630 }, { "epoch": 0.03154119106461136, "grad_norm": 711.1986925625198, "learning_rate": 1.9976646930225678e-05, "loss": 592.29, "step": 1640 }, { "epoch": 0.031733515400371184, "grad_norm": 876.663942507758, "learning_rate": 1.997622821546747e-05, "loss": 580.1834, "step": 1650 }, { "epoch": 0.031925839736131015, "grad_norm": 754.3849455814952, "learning_rate": 1.9975805784729008e-05, "loss": 581.797, "step": 1660 }, { "epoch": 0.03211816407189084, "grad_norm": 781.673019472033, "learning_rate": 1.9975379638167654e-05, "loss": 620.744, "step": 1670 }, { "epoch": 0.03231048840765066, "grad_norm": 692.1175529132593, "learning_rate": 1.9974949775942134e-05, "loss": 564.507, "step": 1680 }, { "epoch": 0.032502812743410485, "grad_norm": 731.9105990335349, "learning_rate": 1.997451619821256e-05, "loss": 589.0906, "step": 1690 }, { "epoch": 0.032695137079170315, "grad_norm": 856.315165121257, "learning_rate": 1.997407890514044e-05, "loss": 576.5989, "step": 1700 }, { "epoch": 0.03288746141493014, "grad_norm": 797.0996376202884, "learning_rate": 1.9973637896888652e-05, "loss": 564.9523, "step": 1710 }, { "epoch": 0.03307978575068996, "grad_norm": 682.1407433412485, "learning_rate": 1.997319317362147e-05, "loss": 571.1717, "step": 1720 }, { "epoch": 0.03327211008644979, "grad_norm": 722.5978016546433, "learning_rate": 1.9972744735504542e-05, "loss": 557.932, "step": 1730 }, { "epoch": 0.033464434422209616, "grad_norm": 1066.4776783343032, "learning_rate": 1.9972292582704905e-05, "loss": 583.6356, "step": 1740 }, { "epoch": 0.03365675875796944, "grad_norm": 1091.442768853797, "learning_rate": 1.997183671539098e-05, "loss": 575.6317, "step": 1750 }, { "epoch": 0.03384908309372926, "grad_norm": 690.316815081678, "learning_rate": 1.9971377133732567e-05, "loss": 570.6772, "step": 1760 }, { "epoch": 0.03404140742948909, "grad_norm": 671.2236932546693, "learning_rate": 1.997091383790086e-05, "loss": 582.4128, "step": 1770 }, { "epoch": 0.034233731765248916, "grad_norm": 757.1943605183349, "learning_rate": 1.997044682806842e-05, "loss": 558.5421, "step": 1780 }, { "epoch": 0.03442605610100874, "grad_norm": 753.0373151865076, "learning_rate": 1.9969976104409202e-05, "loss": 576.9115, "step": 1790 }, { "epoch": 0.03461838043676857, "grad_norm": 815.0380471542311, "learning_rate": 1.9969501667098547e-05, "loss": 582.4467, "step": 1800 }, { "epoch": 0.03481070477252839, "grad_norm": 829.7918441384254, "learning_rate": 1.996902351631317e-05, "loss": 561.6938, "step": 1810 }, { "epoch": 0.03500302910828822, "grad_norm": 963.6197521916137, "learning_rate": 1.996854165223118e-05, "loss": 560.3726, "step": 1820 }, { "epoch": 0.03519535344404804, "grad_norm": 796.0395944595921, "learning_rate": 1.9968056075032058e-05, "loss": 560.0353, "step": 1830 }, { "epoch": 0.03538767777980787, "grad_norm": 694.7128512741667, "learning_rate": 1.9967566784896676e-05, "loss": 557.3366, "step": 1840 }, { "epoch": 0.035580002115567694, "grad_norm": 692.2461914066811, "learning_rate": 1.996707378200729e-05, "loss": 547.8145, "step": 1850 }, { "epoch": 0.03577232645132752, "grad_norm": 1022.0757647180731, "learning_rate": 1.9966577066547526e-05, "loss": 556.1324, "step": 1860 }, { "epoch": 0.03596465078708735, "grad_norm": 754.5291255951288, "learning_rate": 1.9966076638702412e-05, "loss": 567.7352, "step": 1870 }, { "epoch": 0.03615697512284717, "grad_norm": 658.4566590087529, "learning_rate": 1.9965572498658346e-05, "loss": 564.1984, "step": 1880 }, { "epoch": 0.036349299458606994, "grad_norm": 703.9357386627887, "learning_rate": 1.996506464660311e-05, "loss": 562.5533, "step": 1890 }, { "epoch": 0.03654162379436682, "grad_norm": 1354.1007956418598, "learning_rate": 1.9964553082725873e-05, "loss": 550.8262, "step": 1900 }, { "epoch": 0.03673394813012665, "grad_norm": 965.1647425113485, "learning_rate": 1.996403780721718e-05, "loss": 557.0399, "step": 1910 }, { "epoch": 0.03692627246588647, "grad_norm": 856.3895064371682, "learning_rate": 1.9963518820268968e-05, "loss": 568.2071, "step": 1920 }, { "epoch": 0.037118596801646295, "grad_norm": 726.6624727792246, "learning_rate": 1.996299612207455e-05, "loss": 552.3866, "step": 1930 }, { "epoch": 0.037310921137406125, "grad_norm": 833.5772157518286, "learning_rate": 1.9962469712828613e-05, "loss": 571.4761, "step": 1940 }, { "epoch": 0.03750324547316595, "grad_norm": 767.736432325189, "learning_rate": 1.996193959272725e-05, "loss": 543.1805, "step": 1950 }, { "epoch": 0.03769556980892577, "grad_norm": 716.1463033104436, "learning_rate": 1.9961405761967914e-05, "loss": 558.2827, "step": 1960 }, { "epoch": 0.037887894144685595, "grad_norm": 760.6145737591669, "learning_rate": 1.996086822074945e-05, "loss": 551.1155, "step": 1970 }, { "epoch": 0.038080218480445426, "grad_norm": 780.5334606807841, "learning_rate": 1.996032696927208e-05, "loss": 556.977, "step": 1980 }, { "epoch": 0.03827254281620525, "grad_norm": 1030.3170317696865, "learning_rate": 1.9959782007737418e-05, "loss": 556.1245, "step": 1990 }, { "epoch": 0.03846486715196507, "grad_norm": 826.3580099606195, "learning_rate": 1.9959233336348452e-05, "loss": 541.4277, "step": 2000 }, { "epoch": 0.0386571914877249, "grad_norm": 693.1981882140224, "learning_rate": 1.9958680955309546e-05, "loss": 560.1344, "step": 2010 }, { "epoch": 0.038849515823484726, "grad_norm": 1055.8601947595325, "learning_rate": 1.9958124864826457e-05, "loss": 558.6848, "step": 2020 }, { "epoch": 0.03904184015924455, "grad_norm": 748.4590657616682, "learning_rate": 1.9957565065106318e-05, "loss": 542.2576, "step": 2030 }, { "epoch": 0.03923416449500437, "grad_norm": 668.2717036729244, "learning_rate": 1.9957001556357652e-05, "loss": 547.7366, "step": 2040 }, { "epoch": 0.0394264888307642, "grad_norm": 771.5865978323359, "learning_rate": 1.995643433879035e-05, "loss": 538.1941, "step": 2050 }, { "epoch": 0.039618813166524026, "grad_norm": 677.3405758876926, "learning_rate": 1.9955863412615693e-05, "loss": 550.9306, "step": 2060 }, { "epoch": 0.03981113750228385, "grad_norm": 743.9922405719423, "learning_rate": 1.9955288778046338e-05, "loss": 555.3232, "step": 2070 }, { "epoch": 0.04000346183804368, "grad_norm": 698.9378102470318, "learning_rate": 1.995471043529633e-05, "loss": 550.2594, "step": 2080 }, { "epoch": 0.040195786173803504, "grad_norm": 941.1099281364698, "learning_rate": 1.99541283845811e-05, "loss": 551.2195, "step": 2090 }, { "epoch": 0.04038811050956333, "grad_norm": 758.8590943012098, "learning_rate": 1.9953542626117437e-05, "loss": 553.01, "step": 2100 }, { "epoch": 0.04058043484532315, "grad_norm": 728.6791740588354, "learning_rate": 1.9952953160123537e-05, "loss": 560.2909, "step": 2110 }, { "epoch": 0.04077275918108298, "grad_norm": 704.9364163187533, "learning_rate": 1.995235998681896e-05, "loss": 551.6747, "step": 2120 }, { "epoch": 0.040965083516842804, "grad_norm": 867.618758450042, "learning_rate": 1.9951763106424658e-05, "loss": 552.1828, "step": 2130 }, { "epoch": 0.04115740785260263, "grad_norm": 671.4482081969627, "learning_rate": 1.9951162519162962e-05, "loss": 546.2693, "step": 2140 }, { "epoch": 0.04134973218836246, "grad_norm": 681.6085942169542, "learning_rate": 1.9950558225257574e-05, "loss": 544.3579, "step": 2150 }, { "epoch": 0.04154205652412228, "grad_norm": 659.0744185297452, "learning_rate": 1.9949950224933584e-05, "loss": 545.4658, "step": 2160 }, { "epoch": 0.041734380859882104, "grad_norm": 669.992502636515, "learning_rate": 1.994933851841747e-05, "loss": 545.2745, "step": 2170 }, { "epoch": 0.04192670519564193, "grad_norm": 738.3814003175634, "learning_rate": 1.994872310593707e-05, "loss": 547.8611, "step": 2180 }, { "epoch": 0.04211902953140176, "grad_norm": 771.670011949177, "learning_rate": 1.994810398772162e-05, "loss": 549.3855, "step": 2190 }, { "epoch": 0.04231135386716158, "grad_norm": 739.3839708115137, "learning_rate": 1.994748116400174e-05, "loss": 539.5586, "step": 2200 }, { "epoch": 0.042503678202921405, "grad_norm": 756.7255259576679, "learning_rate": 1.994685463500941e-05, "loss": 548.5997, "step": 2210 }, { "epoch": 0.042696002538681235, "grad_norm": 797.4589123090307, "learning_rate": 1.9946224400978006e-05, "loss": 540.3652, "step": 2220 }, { "epoch": 0.04288832687444106, "grad_norm": 1007.0318957752902, "learning_rate": 1.994559046214228e-05, "loss": 541.5651, "step": 2230 }, { "epoch": 0.04308065121020088, "grad_norm": 754.2131677595725, "learning_rate": 1.9944952818738366e-05, "loss": 555.0396, "step": 2240 }, { "epoch": 0.043272975545960705, "grad_norm": 842.0956363485972, "learning_rate": 1.9944311471003775e-05, "loss": 555.0413, "step": 2250 }, { "epoch": 0.043465299881720536, "grad_norm": 662.8958046412165, "learning_rate": 1.9943666419177392e-05, "loss": 525.6874, "step": 2260 }, { "epoch": 0.04365762421748036, "grad_norm": 786.6938179819454, "learning_rate": 1.9943017663499497e-05, "loss": 541.0315, "step": 2270 }, { "epoch": 0.04384994855324018, "grad_norm": 851.9566168187908, "learning_rate": 1.9942365204211734e-05, "loss": 538.0033, "step": 2280 }, { "epoch": 0.04404227288900001, "grad_norm": 1014.0267740996094, "learning_rate": 1.9941709041557134e-05, "loss": 541.5473, "step": 2290 }, { "epoch": 0.044234597224759836, "grad_norm": 880.1497073582835, "learning_rate": 1.994104917578011e-05, "loss": 558.7345, "step": 2300 }, { "epoch": 0.04442692156051966, "grad_norm": 857.6755267738223, "learning_rate": 1.9940385607126456e-05, "loss": 535.1024, "step": 2310 }, { "epoch": 0.04461924589627948, "grad_norm": 708.2875686175457, "learning_rate": 1.9939718335843326e-05, "loss": 548.1394, "step": 2320 }, { "epoch": 0.04481157023203931, "grad_norm": 707.9581612505445, "learning_rate": 1.9939047362179283e-05, "loss": 545.4577, "step": 2330 }, { "epoch": 0.04500389456779914, "grad_norm": 1014.2719202407706, "learning_rate": 1.9938372686384244e-05, "loss": 528.5873, "step": 2340 }, { "epoch": 0.04519621890355896, "grad_norm": 646.5182432340446, "learning_rate": 1.9937694308709514e-05, "loss": 536.911, "step": 2350 }, { "epoch": 0.04538854323931879, "grad_norm": 728.5652322845784, "learning_rate": 1.9937012229407783e-05, "loss": 545.6943, "step": 2360 }, { "epoch": 0.045580867575078614, "grad_norm": 705.5261108419323, "learning_rate": 1.9936326448733107e-05, "loss": 543.6517, "step": 2370 }, { "epoch": 0.04577319191083844, "grad_norm": 641.42054684661, "learning_rate": 1.9935636966940938e-05, "loss": 530.3073, "step": 2380 }, { "epoch": 0.04596551624659826, "grad_norm": 890.057060664522, "learning_rate": 1.993494378428809e-05, "loss": 539.9008, "step": 2390 }, { "epoch": 0.04615784058235809, "grad_norm": 976.3084239308178, "learning_rate": 1.9934246901032764e-05, "loss": 525.3227, "step": 2400 }, { "epoch": 0.046350164918117914, "grad_norm": 1298.319451345551, "learning_rate": 1.9933546317434536e-05, "loss": 538.6964, "step": 2410 }, { "epoch": 0.04654248925387774, "grad_norm": 847.2600433963399, "learning_rate": 1.9932842033754363e-05, "loss": 542.708, "step": 2420 }, { "epoch": 0.04673481358963757, "grad_norm": 750.3045876851812, "learning_rate": 1.9932134050254578e-05, "loss": 537.4875, "step": 2430 }, { "epoch": 0.04692713792539739, "grad_norm": 761.5212981614922, "learning_rate": 1.9931422367198893e-05, "loss": 529.205, "step": 2440 }, { "epoch": 0.047119462261157215, "grad_norm": 729.1939858107567, "learning_rate": 1.9930706984852403e-05, "loss": 533.5258, "step": 2450 }, { "epoch": 0.04731178659691704, "grad_norm": 693.8060340474619, "learning_rate": 1.992998790348157e-05, "loss": 533.9992, "step": 2460 }, { "epoch": 0.04750411093267687, "grad_norm": 685.9913702085655, "learning_rate": 1.992926512335424e-05, "loss": 548.7127, "step": 2470 }, { "epoch": 0.04769643526843669, "grad_norm": 659.4068670772733, "learning_rate": 1.992853864473964e-05, "loss": 532.6922, "step": 2480 }, { "epoch": 0.047888759604196515, "grad_norm": 744.2674592726822, "learning_rate": 1.992780846790837e-05, "loss": 537.2677, "step": 2490 }, { "epoch": 0.048081083939956346, "grad_norm": 742.7395697062876, "learning_rate": 1.9927074593132412e-05, "loss": 534.2826, "step": 2500 }, { "epoch": 0.04827340827571617, "grad_norm": 765.7297010871453, "learning_rate": 1.9926337020685114e-05, "loss": 543.3615, "step": 2510 }, { "epoch": 0.04846573261147599, "grad_norm": 705.1322359295946, "learning_rate": 1.992559575084122e-05, "loss": 541.1059, "step": 2520 }, { "epoch": 0.048658056947235816, "grad_norm": 729.6861253272316, "learning_rate": 1.9924850783876832e-05, "loss": 532.5268, "step": 2530 }, { "epoch": 0.048850381282995646, "grad_norm": 990.1228190611076, "learning_rate": 1.992410212006944e-05, "loss": 535.9039, "step": 2540 }, { "epoch": 0.04904270561875547, "grad_norm": 724.5479997891864, "learning_rate": 1.9923349759697915e-05, "loss": 526.4707, "step": 2550 }, { "epoch": 0.04923502995451529, "grad_norm": 835.7920156059688, "learning_rate": 1.992259370304249e-05, "loss": 523.1281, "step": 2560 }, { "epoch": 0.04942735429027512, "grad_norm": 750.6988564118371, "learning_rate": 1.992183395038479e-05, "loss": 535.6861, "step": 2570 }, { "epoch": 0.049619678626034947, "grad_norm": 649.3047660881562, "learning_rate": 1.9921070502007808e-05, "loss": 528.5413, "step": 2580 }, { "epoch": 0.04981200296179477, "grad_norm": 635.5945736581797, "learning_rate": 1.9920303358195916e-05, "loss": 524.0945, "step": 2590 }, { "epoch": 0.05000432729755459, "grad_norm": 741.3078339093645, "learning_rate": 1.9919532519234858e-05, "loss": 533.6659, "step": 2600 }, { "epoch": 0.050196651633314424, "grad_norm": 665.3001146406066, "learning_rate": 1.9918757985411767e-05, "loss": 522.319, "step": 2610 }, { "epoch": 0.05038897596907425, "grad_norm": 763.3581388836408, "learning_rate": 1.991797975701514e-05, "loss": 533.2575, "step": 2620 }, { "epoch": 0.05058130030483407, "grad_norm": 1598.675239624877, "learning_rate": 1.9917197834334858e-05, "loss": 522.5906, "step": 2630 }, { "epoch": 0.0507736246405939, "grad_norm": 862.7382749409518, "learning_rate": 1.9916412217662162e-05, "loss": 520.266, "step": 2640 }, { "epoch": 0.050965948976353724, "grad_norm": 656.1481470342628, "learning_rate": 1.9915622907289695e-05, "loss": 522.355, "step": 2650 }, { "epoch": 0.05115827331211355, "grad_norm": 733.1657818999223, "learning_rate": 1.9914829903511458e-05, "loss": 519.6336, "step": 2660 }, { "epoch": 0.05135059764787337, "grad_norm": 811.9638977805398, "learning_rate": 1.9914033206622828e-05, "loss": 535.8986, "step": 2670 }, { "epoch": 0.0515429219836332, "grad_norm": 718.5237045758081, "learning_rate": 1.9913232816920565e-05, "loss": 529.5738, "step": 2680 }, { "epoch": 0.051735246319393025, "grad_norm": 709.1296709887417, "learning_rate": 1.99124287347028e-05, "loss": 533.9658, "step": 2690 }, { "epoch": 0.05192757065515285, "grad_norm": 681.1701492583577, "learning_rate": 1.991162096026904e-05, "loss": 533.8906, "step": 2700 }, { "epoch": 0.05211989499091268, "grad_norm": 665.1251504988176, "learning_rate": 1.9910809493920172e-05, "loss": 525.5392, "step": 2710 }, { "epoch": 0.0523122193266725, "grad_norm": 676.1904534285137, "learning_rate": 1.990999433595845e-05, "loss": 529.293, "step": 2720 }, { "epoch": 0.052504543662432325, "grad_norm": 888.0866247953736, "learning_rate": 1.99091754866875e-05, "loss": 527.442, "step": 2730 }, { "epoch": 0.05269686799819215, "grad_norm": 734.1583158081239, "learning_rate": 1.990835294641234e-05, "loss": 521.3226, "step": 2740 }, { "epoch": 0.05288919233395198, "grad_norm": 611.4985369452567, "learning_rate": 1.990752671543935e-05, "loss": 525.7815, "step": 2750 }, { "epoch": 0.0530815166697118, "grad_norm": 678.5443627783844, "learning_rate": 1.9906696794076284e-05, "loss": 527.4831, "step": 2760 }, { "epoch": 0.053273841005471625, "grad_norm": 621.1472476181906, "learning_rate": 1.9905863182632285e-05, "loss": 530.1809, "step": 2770 }, { "epoch": 0.053466165341231456, "grad_norm": 613.6255766300031, "learning_rate": 1.990502588141784e-05, "loss": 519.0969, "step": 2780 }, { "epoch": 0.05365848967699128, "grad_norm": 631.3053582863905, "learning_rate": 1.990418489074485e-05, "loss": 509.2854, "step": 2790 }, { "epoch": 0.0538508140127511, "grad_norm": 753.1234591908322, "learning_rate": 1.9903340210926555e-05, "loss": 510.0524, "step": 2800 }, { "epoch": 0.054043138348510926, "grad_norm": 745.726058901604, "learning_rate": 1.9902491842277592e-05, "loss": 531.1826, "step": 2810 }, { "epoch": 0.054235462684270756, "grad_norm": 786.1870834276812, "learning_rate": 1.9901639785113967e-05, "loss": 526.6046, "step": 2820 }, { "epoch": 0.05442778702003058, "grad_norm": 816.9446739495014, "learning_rate": 1.990078403975305e-05, "loss": 537.0502, "step": 2830 }, { "epoch": 0.0546201113557904, "grad_norm": 662.5454116835482, "learning_rate": 1.9899924606513593e-05, "loss": 521.8435, "step": 2840 }, { "epoch": 0.05481243569155023, "grad_norm": 697.6644935840952, "learning_rate": 1.9899061485715726e-05, "loss": 512.287, "step": 2850 }, { "epoch": 0.05500476002731006, "grad_norm": 657.042049649758, "learning_rate": 1.9898194677680943e-05, "loss": 522.9424, "step": 2860 }, { "epoch": 0.05519708436306988, "grad_norm": 704.66905540707, "learning_rate": 1.9897324182732118e-05, "loss": 515.9715, "step": 2870 }, { "epoch": 0.055389408698829704, "grad_norm": 653.9461352074152, "learning_rate": 1.989645000119349e-05, "loss": 513.6467, "step": 2880 }, { "epoch": 0.055581733034589534, "grad_norm": 648.845462458859, "learning_rate": 1.9895572133390687e-05, "loss": 517.3306, "step": 2890 }, { "epoch": 0.05577405737034936, "grad_norm": 617.4711130107623, "learning_rate": 1.9894690579650694e-05, "loss": 518.482, "step": 2900 }, { "epoch": 0.05596638170610918, "grad_norm": 737.4042827552958, "learning_rate": 1.9893805340301876e-05, "loss": 521.9151, "step": 2910 }, { "epoch": 0.05615870604186901, "grad_norm": 665.3499871063476, "learning_rate": 1.989291641567397e-05, "loss": 516.3638, "step": 2920 }, { "epoch": 0.056351030377628834, "grad_norm": 634.0834194937779, "learning_rate": 1.9892023806098083e-05, "loss": 502.445, "step": 2930 }, { "epoch": 0.05654335471338866, "grad_norm": 626.6913492134656, "learning_rate": 1.9891127511906703e-05, "loss": 511.0661, "step": 2940 }, { "epoch": 0.05673567904914848, "grad_norm": 637.3891412661674, "learning_rate": 1.9890227533433685e-05, "loss": 515.9606, "step": 2950 }, { "epoch": 0.05692800338490831, "grad_norm": 774.722354100866, "learning_rate": 1.988932387101425e-05, "loss": 530.3114, "step": 2960 }, { "epoch": 0.057120327720668135, "grad_norm": 661.0222702671855, "learning_rate": 1.9888416524985e-05, "loss": 512.457, "step": 2970 }, { "epoch": 0.05731265205642796, "grad_norm": 596.821145071028, "learning_rate": 1.988750549568391e-05, "loss": 523.493, "step": 2980 }, { "epoch": 0.05750497639218779, "grad_norm": 684.7345599870473, "learning_rate": 1.9886590783450317e-05, "loss": 501.1971, "step": 2990 }, { "epoch": 0.05769730072794761, "grad_norm": 626.9730550631467, "learning_rate": 1.9885672388624942e-05, "loss": 510.5386, "step": 3000 }, { "epoch": 0.057889625063707435, "grad_norm": 1118.3852260251188, "learning_rate": 1.9884750311549868e-05, "loss": 522.3505, "step": 3010 }, { "epoch": 0.05808194939946726, "grad_norm": 647.0029106792531, "learning_rate": 1.9883824552568557e-05, "loss": 520.7158, "step": 3020 }, { "epoch": 0.05827427373522709, "grad_norm": 662.8229603194394, "learning_rate": 1.9882895112025835e-05, "loss": 526.3525, "step": 3030 }, { "epoch": 0.05846659807098691, "grad_norm": 704.9460673982611, "learning_rate": 1.9881961990267906e-05, "loss": 500.5047, "step": 3040 }, { "epoch": 0.058658922406746736, "grad_norm": 1698.837077403239, "learning_rate": 1.988102518764234e-05, "loss": 521.0469, "step": 3050 }, { "epoch": 0.058851246742506566, "grad_norm": 597.8717905084069, "learning_rate": 1.9880084704498084e-05, "loss": 519.1069, "step": 3060 }, { "epoch": 0.05904357107826639, "grad_norm": 712.4482798190649, "learning_rate": 1.987914054118545e-05, "loss": 532.2404, "step": 3070 }, { "epoch": 0.05923589541402621, "grad_norm": 871.7664288960812, "learning_rate": 1.9878192698056125e-05, "loss": 516.484, "step": 3080 }, { "epoch": 0.059428219749786036, "grad_norm": 621.4967780730051, "learning_rate": 1.9877241175463165e-05, "loss": 516.0212, "step": 3090 }, { "epoch": 0.05962054408554587, "grad_norm": 622.6022269382079, "learning_rate": 1.9876285973760993e-05, "loss": 523.7819, "step": 3100 }, { "epoch": 0.05981286842130569, "grad_norm": 654.5456179019733, "learning_rate": 1.9875327093305405e-05, "loss": 517.6488, "step": 3110 }, { "epoch": 0.06000519275706551, "grad_norm": 674.4448361971492, "learning_rate": 1.9874364534453577e-05, "loss": 514.8555, "step": 3120 }, { "epoch": 0.060197517092825344, "grad_norm": 682.141457904393, "learning_rate": 1.9873398297564036e-05, "loss": 517.7271, "step": 3130 }, { "epoch": 0.06038984142858517, "grad_norm": 550.8065749052461, "learning_rate": 1.9872428382996697e-05, "loss": 521.9763, "step": 3140 }, { "epoch": 0.06058216576434499, "grad_norm": 937.2468781077604, "learning_rate": 1.987145479111283e-05, "loss": 503.1943, "step": 3150 }, { "epoch": 0.060774490100104814, "grad_norm": 763.5125743270676, "learning_rate": 1.987047752227509e-05, "loss": 513.4222, "step": 3160 }, { "epoch": 0.060966814435864644, "grad_norm": 633.7946765874968, "learning_rate": 1.9869496576847488e-05, "loss": 499.4493, "step": 3170 }, { "epoch": 0.06115913877162447, "grad_norm": 629.8401231851526, "learning_rate": 1.9868511955195407e-05, "loss": 514.617, "step": 3180 }, { "epoch": 0.06135146310738429, "grad_norm": 722.274363839671, "learning_rate": 1.986752365768561e-05, "loss": 491.6552, "step": 3190 }, { "epoch": 0.06154378744314412, "grad_norm": 698.004629411705, "learning_rate": 1.986653168468622e-05, "loss": 519.5537, "step": 3200 }, { "epoch": 0.061736111778903945, "grad_norm": 778.116353487612, "learning_rate": 1.9865536036566727e-05, "loss": 511.8583, "step": 3210 }, { "epoch": 0.06192843611466377, "grad_norm": 670.6659366755904, "learning_rate": 1.9864536713697992e-05, "loss": 521.8036, "step": 3220 }, { "epoch": 0.06212076045042359, "grad_norm": 613.7695500085715, "learning_rate": 1.986353371645225e-05, "loss": 523.6647, "step": 3230 }, { "epoch": 0.06231308478618342, "grad_norm": 652.4045245318075, "learning_rate": 1.9862527045203105e-05, "loss": 515.5668, "step": 3240 }, { "epoch": 0.06250540912194325, "grad_norm": 640.2665108728766, "learning_rate": 1.986151670032552e-05, "loss": 510.1916, "step": 3250 }, { "epoch": 0.06269773345770308, "grad_norm": 672.8530359257265, "learning_rate": 1.986050268219583e-05, "loss": 500.603, "step": 3260 }, { "epoch": 0.06289005779346289, "grad_norm": 643.0303715876427, "learning_rate": 1.9859484991191742e-05, "loss": 518.3387, "step": 3270 }, { "epoch": 0.06308238212922272, "grad_norm": 667.8351982269853, "learning_rate": 1.985846362769233e-05, "loss": 522.2421, "step": 3280 }, { "epoch": 0.06327470646498255, "grad_norm": 706.1733963549115, "learning_rate": 1.9857438592078034e-05, "loss": 509.3566, "step": 3290 }, { "epoch": 0.06346703080074237, "grad_norm": 823.7219002972914, "learning_rate": 1.9856409884730667e-05, "loss": 512.2886, "step": 3300 }, { "epoch": 0.0636593551365022, "grad_norm": 604.2720020574882, "learning_rate": 1.98553775060334e-05, "loss": 502.6998, "step": 3310 }, { "epoch": 0.06385167947226203, "grad_norm": 626.5921201793383, "learning_rate": 1.9854341456370777e-05, "loss": 493.7466, "step": 3320 }, { "epoch": 0.06404400380802185, "grad_norm": 674.7114276650651, "learning_rate": 1.9853301736128712e-05, "loss": 510.9708, "step": 3330 }, { "epoch": 0.06423632814378168, "grad_norm": 727.7131791678172, "learning_rate": 1.9852258345694486e-05, "loss": 521.3875, "step": 3340 }, { "epoch": 0.06442865247954149, "grad_norm": 708.5336543391381, "learning_rate": 1.9851211285456738e-05, "loss": 507.2278, "step": 3350 }, { "epoch": 0.06462097681530132, "grad_norm": 699.332774977859, "learning_rate": 1.9850160555805485e-05, "loss": 518.902, "step": 3360 }, { "epoch": 0.06481330115106115, "grad_norm": 754.9990775585326, "learning_rate": 1.9849106157132105e-05, "loss": 521.9002, "step": 3370 }, { "epoch": 0.06500562548682097, "grad_norm": 628.1105872878063, "learning_rate": 1.9848048089829347e-05, "loss": 513.4633, "step": 3380 }, { "epoch": 0.0651979498225808, "grad_norm": 607.6423577389, "learning_rate": 1.9846986354291324e-05, "loss": 501.1044, "step": 3390 }, { "epoch": 0.06539027415834063, "grad_norm": 685.3186577512565, "learning_rate": 1.9845920950913506e-05, "loss": 516.6853, "step": 3400 }, { "epoch": 0.06558259849410045, "grad_norm": 663.6144566856839, "learning_rate": 1.9844851880092748e-05, "loss": 495.0083, "step": 3410 }, { "epoch": 0.06577492282986028, "grad_norm": 655.7296760262857, "learning_rate": 1.9843779142227258e-05, "loss": 503.3423, "step": 3420 }, { "epoch": 0.06596724716562011, "grad_norm": 619.243560898213, "learning_rate": 1.984270273771661e-05, "loss": 504.1719, "step": 3430 }, { "epoch": 0.06615957150137992, "grad_norm": 694.8542065917636, "learning_rate": 1.9841622666961756e-05, "loss": 503.6424, "step": 3440 }, { "epoch": 0.06635189583713975, "grad_norm": 673.2736125358978, "learning_rate": 1.9840538930364992e-05, "loss": 514.9401, "step": 3450 }, { "epoch": 0.06654422017289958, "grad_norm": 628.7796022492911, "learning_rate": 1.983945152833e-05, "loss": 494.9331, "step": 3460 }, { "epoch": 0.0667365445086594, "grad_norm": 696.7574145593314, "learning_rate": 1.9838360461261817e-05, "loss": 512.15, "step": 3470 }, { "epoch": 0.06692886884441923, "grad_norm": 637.5080529680322, "learning_rate": 1.9837265729566853e-05, "loss": 496.1206, "step": 3480 }, { "epoch": 0.06712119318017905, "grad_norm": 699.5220534995685, "learning_rate": 1.9836167333652866e-05, "loss": 514.8845, "step": 3490 }, { "epoch": 0.06731351751593888, "grad_norm": 639.8540894236521, "learning_rate": 1.9835065273929002e-05, "loss": 501.3714, "step": 3500 }, { "epoch": 0.06750584185169871, "grad_norm": 633.729983004065, "learning_rate": 1.9833959550805754e-05, "loss": 501.0371, "step": 3510 }, { "epoch": 0.06769816618745852, "grad_norm": 802.133374494615, "learning_rate": 1.9832850164694983e-05, "loss": 500.5452, "step": 3520 }, { "epoch": 0.06789049052321836, "grad_norm": 830.7796341589134, "learning_rate": 1.9831737116009924e-05, "loss": 497.9683, "step": 3530 }, { "epoch": 0.06808281485897819, "grad_norm": 630.399254399982, "learning_rate": 1.9830620405165164e-05, "loss": 493.7095, "step": 3540 }, { "epoch": 0.068275139194738, "grad_norm": 644.411241438413, "learning_rate": 1.982950003257666e-05, "loss": 517.4553, "step": 3550 }, { "epoch": 0.06846746353049783, "grad_norm": 588.2682798659015, "learning_rate": 1.9828375998661738e-05, "loss": 507.9328, "step": 3560 }, { "epoch": 0.06865978786625766, "grad_norm": 617.9867513432937, "learning_rate": 1.9827248303839073e-05, "loss": 478.5115, "step": 3570 }, { "epoch": 0.06885211220201748, "grad_norm": 650.826612173906, "learning_rate": 1.982611694852872e-05, "loss": 500.3652, "step": 3580 }, { "epoch": 0.06904443653777731, "grad_norm": 649.8580989326786, "learning_rate": 1.9824981933152087e-05, "loss": 489.6465, "step": 3590 }, { "epoch": 0.06923676087353714, "grad_norm": 621.0773120313047, "learning_rate": 1.9823843258131945e-05, "loss": 500.1181, "step": 3600 }, { "epoch": 0.06942908520929696, "grad_norm": 703.0441909093954, "learning_rate": 1.9822700923892438e-05, "loss": 517.5681, "step": 3610 }, { "epoch": 0.06962140954505679, "grad_norm": 655.7881223496453, "learning_rate": 1.9821554930859066e-05, "loss": 502.2795, "step": 3620 }, { "epoch": 0.0698137338808166, "grad_norm": 717.8903663067391, "learning_rate": 1.982040527945869e-05, "loss": 506.7368, "step": 3630 }, { "epoch": 0.07000605821657643, "grad_norm": 573.5901477923879, "learning_rate": 1.9819251970119534e-05, "loss": 511.8817, "step": 3640 }, { "epoch": 0.07019838255233626, "grad_norm": 687.4724535149858, "learning_rate": 1.9818095003271193e-05, "loss": 496.4292, "step": 3650 }, { "epoch": 0.07039070688809608, "grad_norm": 639.0458926308955, "learning_rate": 1.9816934379344613e-05, "loss": 501.0932, "step": 3660 }, { "epoch": 0.07058303122385591, "grad_norm": 680.6213480239904, "learning_rate": 1.9815770098772108e-05, "loss": 499.6543, "step": 3670 }, { "epoch": 0.07077535555961574, "grad_norm": 705.8244315480342, "learning_rate": 1.9814602161987354e-05, "loss": 490.7324, "step": 3680 }, { "epoch": 0.07096767989537556, "grad_norm": 583.136369592737, "learning_rate": 1.981343056942539e-05, "loss": 490.0691, "step": 3690 }, { "epoch": 0.07116000423113539, "grad_norm": 715.6044089791625, "learning_rate": 1.9812255321522614e-05, "loss": 515.5474, "step": 3700 }, { "epoch": 0.07135232856689522, "grad_norm": 581.4536845406911, "learning_rate": 1.981107641871678e-05, "loss": 496.5484, "step": 3710 }, { "epoch": 0.07154465290265503, "grad_norm": 657.1353485262981, "learning_rate": 1.980989386144702e-05, "loss": 494.1127, "step": 3720 }, { "epoch": 0.07173697723841486, "grad_norm": 614.2914353568027, "learning_rate": 1.980870765015381e-05, "loss": 502.2589, "step": 3730 }, { "epoch": 0.0719293015741747, "grad_norm": 698.3911986377062, "learning_rate": 1.9807517785278997e-05, "loss": 501.1257, "step": 3740 }, { "epoch": 0.07212162590993451, "grad_norm": 630.4361176425793, "learning_rate": 1.9806324267265786e-05, "loss": 498.7661, "step": 3750 }, { "epoch": 0.07231395024569434, "grad_norm": 615.211225033671, "learning_rate": 1.9805127096558742e-05, "loss": 498.0036, "step": 3760 }, { "epoch": 0.07250627458145416, "grad_norm": 719.8962573967688, "learning_rate": 1.980392627360379e-05, "loss": 501.0617, "step": 3770 }, { "epoch": 0.07269859891721399, "grad_norm": 741.5868184209745, "learning_rate": 1.9802721798848225e-05, "loss": 509.3568, "step": 3780 }, { "epoch": 0.07289092325297382, "grad_norm": 665.4832813423726, "learning_rate": 1.980151367274068e-05, "loss": 488.9466, "step": 3790 }, { "epoch": 0.07308324758873364, "grad_norm": 603.8607554267116, "learning_rate": 1.9800301895731172e-05, "loss": 502.7106, "step": 3800 }, { "epoch": 0.07327557192449347, "grad_norm": 627.9793823986435, "learning_rate": 1.9799086468271065e-05, "loss": 497.9342, "step": 3810 }, { "epoch": 0.0734678962602533, "grad_norm": 636.4435313966623, "learning_rate": 1.9797867390813086e-05, "loss": 490.8221, "step": 3820 }, { "epoch": 0.07366022059601311, "grad_norm": 632.9088111605367, "learning_rate": 1.9796644663811318e-05, "loss": 497.4042, "step": 3830 }, { "epoch": 0.07385254493177294, "grad_norm": 718.9900162397643, "learning_rate": 1.9795418287721215e-05, "loss": 492.4667, "step": 3840 }, { "epoch": 0.07404486926753277, "grad_norm": 687.1497841147152, "learning_rate": 1.9794188262999574e-05, "loss": 504.8531, "step": 3850 }, { "epoch": 0.07423719360329259, "grad_norm": 642.1877408136563, "learning_rate": 1.979295459010456e-05, "loss": 495.9551, "step": 3860 }, { "epoch": 0.07442951793905242, "grad_norm": 642.986179627092, "learning_rate": 1.9791717269495698e-05, "loss": 486.4746, "step": 3870 }, { "epoch": 0.07462184227481225, "grad_norm": 722.7348743238974, "learning_rate": 1.979047630163387e-05, "loss": 489.8139, "step": 3880 }, { "epoch": 0.07481416661057207, "grad_norm": 713.3383230851642, "learning_rate": 1.9789231686981313e-05, "loss": 507.3725, "step": 3890 }, { "epoch": 0.0750064909463319, "grad_norm": 584.1448448604131, "learning_rate": 1.978798342600163e-05, "loss": 489.6483, "step": 3900 }, { "epoch": 0.07519881528209171, "grad_norm": 642.4517334947999, "learning_rate": 1.978673151915977e-05, "loss": 499.9061, "step": 3910 }, { "epoch": 0.07539113961785154, "grad_norm": 587.61012014815, "learning_rate": 1.9785475966922055e-05, "loss": 497.4966, "step": 3920 }, { "epoch": 0.07558346395361137, "grad_norm": 647.6673123961527, "learning_rate": 1.9784216769756156e-05, "loss": 489.6694, "step": 3930 }, { "epoch": 0.07577578828937119, "grad_norm": 767.1562758524656, "learning_rate": 1.97829539281311e-05, "loss": 486.8688, "step": 3940 }, { "epoch": 0.07596811262513102, "grad_norm": 622.1411124647373, "learning_rate": 1.9781687442517278e-05, "loss": 495.8756, "step": 3950 }, { "epoch": 0.07616043696089085, "grad_norm": 611.8275224683359, "learning_rate": 1.9780417313386433e-05, "loss": 503.6595, "step": 3960 }, { "epoch": 0.07635276129665067, "grad_norm": 635.9795739979108, "learning_rate": 1.9779143541211664e-05, "loss": 492.0267, "step": 3970 }, { "epoch": 0.0765450856324105, "grad_norm": 605.1944045484206, "learning_rate": 1.9777866126467436e-05, "loss": 474.1518, "step": 3980 }, { "epoch": 0.07673740996817033, "grad_norm": 688.9065962615937, "learning_rate": 1.9776585069629566e-05, "loss": 488.8311, "step": 3990 }, { "epoch": 0.07692973430393014, "grad_norm": 616.907512994056, "learning_rate": 1.9775300371175225e-05, "loss": 495.4689, "step": 4000 }, { "epoch": 0.07712205863968997, "grad_norm": 869.3229600963, "learning_rate": 1.9774012031582935e-05, "loss": 484.9483, "step": 4010 }, { "epoch": 0.0773143829754498, "grad_norm": 621.4725522442566, "learning_rate": 1.9772720051332585e-05, "loss": 492.811, "step": 4020 }, { "epoch": 0.07750670731120962, "grad_norm": 883.3922953575607, "learning_rate": 1.977142443090542e-05, "loss": 490.3932, "step": 4030 }, { "epoch": 0.07769903164696945, "grad_norm": 624.9347849676961, "learning_rate": 1.9770125170784035e-05, "loss": 491.0477, "step": 4040 }, { "epoch": 0.07789135598272927, "grad_norm": 646.4092159406114, "learning_rate": 1.9768822271452385e-05, "loss": 501.6062, "step": 4050 }, { "epoch": 0.0780836803184891, "grad_norm": 627.6555519603643, "learning_rate": 1.9767515733395774e-05, "loss": 491.7407, "step": 4060 }, { "epoch": 0.07827600465424893, "grad_norm": 1351.6676370674604, "learning_rate": 1.976620555710087e-05, "loss": 491.9886, "step": 4070 }, { "epoch": 0.07846832899000875, "grad_norm": 629.1803078798862, "learning_rate": 1.976489174305569e-05, "loss": 494.6177, "step": 4080 }, { "epoch": 0.07866065332576858, "grad_norm": 578.7649810787432, "learning_rate": 1.9763574291749603e-05, "loss": 479.5029, "step": 4090 }, { "epoch": 0.0788529776615284, "grad_norm": 540.8769728899618, "learning_rate": 1.9762253203673348e-05, "loss": 483.3324, "step": 4100 }, { "epoch": 0.07904530199728822, "grad_norm": 656.8305147234112, "learning_rate": 1.9760928479319003e-05, "loss": 494.2564, "step": 4110 }, { "epoch": 0.07923762633304805, "grad_norm": 571.891691206646, "learning_rate": 1.9759600119180005e-05, "loss": 490.0733, "step": 4120 }, { "epoch": 0.07942995066880788, "grad_norm": 568.0849472261627, "learning_rate": 1.975826812375115e-05, "loss": 483.9812, "step": 4130 }, { "epoch": 0.0796222750045677, "grad_norm": 620.4971891151242, "learning_rate": 1.9756932493528583e-05, "loss": 468.2408, "step": 4140 }, { "epoch": 0.07981459934032753, "grad_norm": 690.5422084610557, "learning_rate": 1.97555932290098e-05, "loss": 516.2057, "step": 4150 }, { "epoch": 0.08000692367608736, "grad_norm": 631.7672672240832, "learning_rate": 1.9754250330693658e-05, "loss": 485.5635, "step": 4160 }, { "epoch": 0.08019924801184718, "grad_norm": 548.6785879389789, "learning_rate": 1.9752903799080366e-05, "loss": 499.1404, "step": 4170 }, { "epoch": 0.08039157234760701, "grad_norm": 580.5544406665524, "learning_rate": 1.9751553634671485e-05, "loss": 474.7078, "step": 4180 }, { "epoch": 0.08058389668336682, "grad_norm": 707.3800613133382, "learning_rate": 1.9750199837969922e-05, "loss": 479.0868, "step": 4190 }, { "epoch": 0.08077622101912665, "grad_norm": 607.9172149397061, "learning_rate": 1.9748842409479953e-05, "loss": 487.9297, "step": 4200 }, { "epoch": 0.08096854535488648, "grad_norm": 686.0145755679489, "learning_rate": 1.9747481349707197e-05, "loss": 497.5591, "step": 4210 }, { "epoch": 0.0811608696906463, "grad_norm": 669.0150916061837, "learning_rate": 1.9746116659158618e-05, "loss": 488.224, "step": 4220 }, { "epoch": 0.08135319402640613, "grad_norm": 597.9819691648493, "learning_rate": 1.9744748338342546e-05, "loss": 481.8612, "step": 4230 }, { "epoch": 0.08154551836216596, "grad_norm": 654.7117910540604, "learning_rate": 1.974337638776866e-05, "loss": 486.0175, "step": 4240 }, { "epoch": 0.08173784269792578, "grad_norm": 594.8033843059904, "learning_rate": 1.9742000807947986e-05, "loss": 476.3571, "step": 4250 }, { "epoch": 0.08193016703368561, "grad_norm": 641.1753752875821, "learning_rate": 1.9740621599392907e-05, "loss": 479.6258, "step": 4260 }, { "epoch": 0.08212249136944544, "grad_norm": 639.0411963835763, "learning_rate": 1.9739238762617155e-05, "loss": 479.2322, "step": 4270 }, { "epoch": 0.08231481570520525, "grad_norm": 609.621309326222, "learning_rate": 1.973785229813581e-05, "loss": 489.9551, "step": 4280 }, { "epoch": 0.08250714004096509, "grad_norm": 595.0031502326282, "learning_rate": 1.973646220646531e-05, "loss": 492.7053, "step": 4290 }, { "epoch": 0.08269946437672492, "grad_norm": 605.150045864125, "learning_rate": 1.973506848812344e-05, "loss": 482.1862, "step": 4300 }, { "epoch": 0.08289178871248473, "grad_norm": 643.5863400964225, "learning_rate": 1.9733671143629342e-05, "loss": 501.3153, "step": 4310 }, { "epoch": 0.08308411304824456, "grad_norm": 590.4906240524667, "learning_rate": 1.9732270173503493e-05, "loss": 483.9846, "step": 4320 }, { "epoch": 0.08327643738400438, "grad_norm": 583.8559636423248, "learning_rate": 1.9730865578267745e-05, "loss": 476.9609, "step": 4330 }, { "epoch": 0.08346876171976421, "grad_norm": 579.2100154212275, "learning_rate": 1.972945735844528e-05, "loss": 482.5675, "step": 4340 }, { "epoch": 0.08366108605552404, "grad_norm": 628.1190919574348, "learning_rate": 1.972804551456063e-05, "loss": 488.1154, "step": 4350 }, { "epoch": 0.08385341039128386, "grad_norm": 635.6027094674298, "learning_rate": 1.9726630047139695e-05, "loss": 490.5554, "step": 4360 }, { "epoch": 0.08404573472704369, "grad_norm": 563.4844302115933, "learning_rate": 1.9725210956709707e-05, "loss": 474.7476, "step": 4370 }, { "epoch": 0.08423805906280352, "grad_norm": 663.2736077910538, "learning_rate": 1.9723788243799253e-05, "loss": 494.2354, "step": 4380 }, { "epoch": 0.08443038339856333, "grad_norm": 577.906945078492, "learning_rate": 1.972236190893827e-05, "loss": 473.9004, "step": 4390 }, { "epoch": 0.08462270773432316, "grad_norm": 819.4941920482539, "learning_rate": 1.972093195265805e-05, "loss": 502.3352, "step": 4400 }, { "epoch": 0.084815032070083, "grad_norm": 715.8667133204852, "learning_rate": 1.9719498375491224e-05, "loss": 476.0669, "step": 4410 }, { "epoch": 0.08500735640584281, "grad_norm": 639.8410343934174, "learning_rate": 1.9718061177971777e-05, "loss": 477.4385, "step": 4420 }, { "epoch": 0.08519968074160264, "grad_norm": 568.7116382152404, "learning_rate": 1.9716620360635036e-05, "loss": 486.7625, "step": 4430 }, { "epoch": 0.08539200507736247, "grad_norm": 731.286113852741, "learning_rate": 1.971517592401769e-05, "loss": 497.6304, "step": 4440 }, { "epoch": 0.08558432941312229, "grad_norm": 615.9179155234693, "learning_rate": 1.9713727868657764e-05, "loss": 476.1781, "step": 4450 }, { "epoch": 0.08577665374888212, "grad_norm": 650.3429361883706, "learning_rate": 1.971227619509463e-05, "loss": 472.3176, "step": 4460 }, { "epoch": 0.08596897808464193, "grad_norm": 551.4978595475194, "learning_rate": 1.971082090386902e-05, "loss": 493.2781, "step": 4470 }, { "epoch": 0.08616130242040176, "grad_norm": 4711.236231799215, "learning_rate": 1.9709361995523e-05, "loss": 482.4984, "step": 4480 }, { "epoch": 0.0863536267561616, "grad_norm": 715.8205450069757, "learning_rate": 1.9707899470599998e-05, "loss": 497.31, "step": 4490 }, { "epoch": 0.08654595109192141, "grad_norm": 665.0607681375076, "learning_rate": 1.970643332964477e-05, "loss": 478.2403, "step": 4500 }, { "epoch": 0.08673827542768124, "grad_norm": 613.021352941912, "learning_rate": 1.9704963573203435e-05, "loss": 474.9517, "step": 4510 }, { "epoch": 0.08693059976344107, "grad_norm": 637.2150147876924, "learning_rate": 1.970349020182345e-05, "loss": 489.9517, "step": 4520 }, { "epoch": 0.08712292409920089, "grad_norm": 599.3458803137219, "learning_rate": 1.9702013216053623e-05, "loss": 469.1975, "step": 4530 }, { "epoch": 0.08731524843496072, "grad_norm": 610.6760803314122, "learning_rate": 1.9700532616444114e-05, "loss": 485.5857, "step": 4540 }, { "epoch": 0.08750757277072055, "grad_norm": 601.7390395855866, "learning_rate": 1.969904840354641e-05, "loss": 479.4551, "step": 4550 }, { "epoch": 0.08769989710648037, "grad_norm": 599.5265816559986, "learning_rate": 1.9697560577913358e-05, "loss": 493.1255, "step": 4560 }, { "epoch": 0.0878922214422402, "grad_norm": 596.6153359263633, "learning_rate": 1.9696069140099152e-05, "loss": 476.3837, "step": 4570 }, { "epoch": 0.08808454577800003, "grad_norm": 600.991320286704, "learning_rate": 1.969457409065933e-05, "loss": 478.4364, "step": 4580 }, { "epoch": 0.08827687011375984, "grad_norm": 559.9529040626103, "learning_rate": 1.969307543015077e-05, "loss": 477.7218, "step": 4590 }, { "epoch": 0.08846919444951967, "grad_norm": 589.8268999503209, "learning_rate": 1.9691573159131696e-05, "loss": 481.6495, "step": 4600 }, { "epoch": 0.08866151878527949, "grad_norm": 597.181130058833, "learning_rate": 1.9690067278161686e-05, "loss": 482.775, "step": 4610 }, { "epoch": 0.08885384312103932, "grad_norm": 635.0713423449403, "learning_rate": 1.9688557787801647e-05, "loss": 477.3972, "step": 4620 }, { "epoch": 0.08904616745679915, "grad_norm": 583.9772768431457, "learning_rate": 1.968704468861385e-05, "loss": 484.344, "step": 4630 }, { "epoch": 0.08923849179255897, "grad_norm": 556.6993701208112, "learning_rate": 1.968552798116189e-05, "loss": 465.836, "step": 4640 }, { "epoch": 0.0894308161283188, "grad_norm": 659.3678227798055, "learning_rate": 1.9684007666010716e-05, "loss": 471.6841, "step": 4650 }, { "epoch": 0.08962314046407863, "grad_norm": 577.0937028794929, "learning_rate": 1.9682483743726624e-05, "loss": 485.9341, "step": 4660 }, { "epoch": 0.08981546479983844, "grad_norm": 593.5870471604508, "learning_rate": 1.968095621487725e-05, "loss": 480.7809, "step": 4670 }, { "epoch": 0.09000778913559827, "grad_norm": 569.4212941663662, "learning_rate": 1.9679425080031574e-05, "loss": 474.5443, "step": 4680 }, { "epoch": 0.0902001134713581, "grad_norm": 620.810725310614, "learning_rate": 1.9677890339759914e-05, "loss": 494.733, "step": 4690 }, { "epoch": 0.09039243780711792, "grad_norm": 641.0236578652243, "learning_rate": 1.967635199463394e-05, "loss": 472.8692, "step": 4700 }, { "epoch": 0.09058476214287775, "grad_norm": 588.2953424814439, "learning_rate": 1.9674810045226658e-05, "loss": 478.5095, "step": 4710 }, { "epoch": 0.09077708647863758, "grad_norm": 606.7000221463143, "learning_rate": 1.967326449211242e-05, "loss": 473.909, "step": 4720 }, { "epoch": 0.0909694108143974, "grad_norm": 615.4246537147368, "learning_rate": 1.9671715335866915e-05, "loss": 472.5508, "step": 4730 }, { "epoch": 0.09116173515015723, "grad_norm": 641.6407347507667, "learning_rate": 1.9670162577067182e-05, "loss": 494.7355, "step": 4740 }, { "epoch": 0.09135405948591704, "grad_norm": 641.7626247280142, "learning_rate": 1.9668606216291598e-05, "loss": 474.9429, "step": 4750 }, { "epoch": 0.09154638382167687, "grad_norm": 661.5624953738348, "learning_rate": 1.9667046254119878e-05, "loss": 468.8927, "step": 4760 }, { "epoch": 0.0917387081574367, "grad_norm": 547.2739020006313, "learning_rate": 1.966548269113309e-05, "loss": 478.5688, "step": 4770 }, { "epoch": 0.09193103249319652, "grad_norm": 588.1387559099504, "learning_rate": 1.9663915527913628e-05, "loss": 480.806, "step": 4780 }, { "epoch": 0.09212335682895635, "grad_norm": 622.5556008004595, "learning_rate": 1.9662344765045237e-05, "loss": 486.7343, "step": 4790 }, { "epoch": 0.09231568116471618, "grad_norm": 588.7538340142726, "learning_rate": 1.9660770403112996e-05, "loss": 477.6282, "step": 4800 }, { "epoch": 0.092508005500476, "grad_norm": 599.5534553495175, "learning_rate": 1.9659192442703336e-05, "loss": 480.692, "step": 4810 }, { "epoch": 0.09270032983623583, "grad_norm": 629.8478171353657, "learning_rate": 1.965761088440402e-05, "loss": 482.3751, "step": 4820 }, { "epoch": 0.09289265417199566, "grad_norm": 574.2939691622477, "learning_rate": 1.9656025728804147e-05, "loss": 479.2723, "step": 4830 }, { "epoch": 0.09308497850775548, "grad_norm": 571.5501890537847, "learning_rate": 1.9654436976494165e-05, "loss": 472.0582, "step": 4840 }, { "epoch": 0.0932773028435153, "grad_norm": 581.3352572393229, "learning_rate": 1.9652844628065857e-05, "loss": 465.4456, "step": 4850 }, { "epoch": 0.09346962717927514, "grad_norm": 600.1076490653732, "learning_rate": 1.965124868411235e-05, "loss": 484.4775, "step": 4860 }, { "epoch": 0.09366195151503495, "grad_norm": 632.8925638683768, "learning_rate": 1.96496491452281e-05, "loss": 468.282, "step": 4870 }, { "epoch": 0.09385427585079478, "grad_norm": 623.0519412152863, "learning_rate": 1.9648046012008916e-05, "loss": 466.2744, "step": 4880 }, { "epoch": 0.0940466001865546, "grad_norm": 588.051955241533, "learning_rate": 1.9646439285051936e-05, "loss": 477.8211, "step": 4890 }, { "epoch": 0.09423892452231443, "grad_norm": 1277.4530072499047, "learning_rate": 1.9644828964955633e-05, "loss": 481.9776, "step": 4900 }, { "epoch": 0.09443124885807426, "grad_norm": 634.1287597467085, "learning_rate": 1.9643215052319836e-05, "loss": 486.0713, "step": 4910 }, { "epoch": 0.09462357319383408, "grad_norm": 643.2370525932306, "learning_rate": 1.9641597547745694e-05, "loss": 489.6987, "step": 4920 }, { "epoch": 0.0948158975295939, "grad_norm": 577.7070532458126, "learning_rate": 1.9639976451835698e-05, "loss": 488.959, "step": 4930 }, { "epoch": 0.09500822186535374, "grad_norm": 602.5186322884324, "learning_rate": 1.9638351765193685e-05, "loss": 482.8837, "step": 4940 }, { "epoch": 0.09520054620111355, "grad_norm": 581.9110060326702, "learning_rate": 1.9636723488424823e-05, "loss": 459.0135, "step": 4950 }, { "epoch": 0.09539287053687338, "grad_norm": 598.9069945196771, "learning_rate": 1.9635091622135616e-05, "loss": 478.5167, "step": 4960 }, { "epoch": 0.09558519487263321, "grad_norm": 554.4119378962503, "learning_rate": 1.963345616693391e-05, "loss": 471.1859, "step": 4970 }, { "epoch": 0.09577751920839303, "grad_norm": 633.1868778820232, "learning_rate": 1.9631817123428883e-05, "loss": 478.766, "step": 4980 }, { "epoch": 0.09596984354415286, "grad_norm": 671.3344848959622, "learning_rate": 1.9630174492231052e-05, "loss": 470.221, "step": 4990 }, { "epoch": 0.09616216787991269, "grad_norm": 589.7150538203424, "learning_rate": 1.962852827395227e-05, "loss": 491.5578, "step": 5000 }, { "epoch": 0.09635449221567251, "grad_norm": 587.107949743048, "learning_rate": 1.962687846920573e-05, "loss": 472.3505, "step": 5010 }, { "epoch": 0.09654681655143234, "grad_norm": 666.7828972123168, "learning_rate": 1.9625225078605946e-05, "loss": 468.2737, "step": 5020 }, { "epoch": 0.09673914088719215, "grad_norm": 555.3826730095677, "learning_rate": 1.9623568102768792e-05, "loss": 473.1111, "step": 5030 }, { "epoch": 0.09693146522295198, "grad_norm": 612.9316558466332, "learning_rate": 1.9621907542311457e-05, "loss": 473.7984, "step": 5040 }, { "epoch": 0.09712378955871181, "grad_norm": 631.319563204153, "learning_rate": 1.9620243397852473e-05, "loss": 457.7611, "step": 5050 }, { "epoch": 0.09731611389447163, "grad_norm": 585.1780378158704, "learning_rate": 1.9618575670011705e-05, "loss": 457.3799, "step": 5060 }, { "epoch": 0.09750843823023146, "grad_norm": 602.3842116350421, "learning_rate": 1.9616904359410357e-05, "loss": 465.7362, "step": 5070 }, { "epoch": 0.09770076256599129, "grad_norm": 532.7673408628551, "learning_rate": 1.9615229466670963e-05, "loss": 470.901, "step": 5080 }, { "epoch": 0.09789308690175111, "grad_norm": 559.1806780182956, "learning_rate": 1.9613550992417396e-05, "loss": 470.3552, "step": 5090 }, { "epoch": 0.09808541123751094, "grad_norm": 543.1499297463367, "learning_rate": 1.961186893727486e-05, "loss": 468.5514, "step": 5100 }, { "epoch": 0.09827773557327077, "grad_norm": 654.4171332285497, "learning_rate": 1.9610183301869882e-05, "loss": 455.5978, "step": 5110 }, { "epoch": 0.09847005990903059, "grad_norm": 619.8780298423139, "learning_rate": 1.9608494086830348e-05, "loss": 462.9577, "step": 5120 }, { "epoch": 0.09866238424479042, "grad_norm": 594.9394519211523, "learning_rate": 1.9606801292785452e-05, "loss": 471.1731, "step": 5130 }, { "epoch": 0.09885470858055025, "grad_norm": 564.264842204434, "learning_rate": 1.960510492036574e-05, "loss": 471.615, "step": 5140 }, { "epoch": 0.09904703291631006, "grad_norm": 600.5482761333604, "learning_rate": 1.9603404970203078e-05, "loss": 475.2653, "step": 5150 }, { "epoch": 0.09923935725206989, "grad_norm": 585.9806699820225, "learning_rate": 1.9601701442930667e-05, "loss": 482.664, "step": 5160 }, { "epoch": 0.09943168158782971, "grad_norm": 633.2626147438331, "learning_rate": 1.9599994339183047e-05, "loss": 475.1929, "step": 5170 }, { "epoch": 0.09962400592358954, "grad_norm": 663.6061801987727, "learning_rate": 1.9598283659596084e-05, "loss": 461.7257, "step": 5180 }, { "epoch": 0.09981633025934937, "grad_norm": 705.174353073422, "learning_rate": 1.9596569404806983e-05, "loss": 462.4165, "step": 5190 }, { "epoch": 0.10000865459510919, "grad_norm": 604.372733532367, "learning_rate": 1.9594851575454266e-05, "loss": 460.5077, "step": 5200 }, { "epoch": 0.10020097893086902, "grad_norm": 539.4772132662268, "learning_rate": 1.9593130172177806e-05, "loss": 480.9955, "step": 5210 }, { "epoch": 0.10039330326662885, "grad_norm": 644.937214270631, "learning_rate": 1.959140519561879e-05, "loss": 471.4614, "step": 5220 }, { "epoch": 0.10058562760238866, "grad_norm": 619.1647634857868, "learning_rate": 1.9589676646419744e-05, "loss": 473.1965, "step": 5230 }, { "epoch": 0.1007779519381485, "grad_norm": 552.5511189129061, "learning_rate": 1.958794452522453e-05, "loss": 477.8996, "step": 5240 }, { "epoch": 0.10097027627390832, "grad_norm": 614.2803353963909, "learning_rate": 1.9586208832678328e-05, "loss": 464.7093, "step": 5250 }, { "epoch": 0.10116260060966814, "grad_norm": 598.2840360964544, "learning_rate": 1.958446956942766e-05, "loss": 471.2755, "step": 5260 }, { "epoch": 0.10135492494542797, "grad_norm": 554.1458141527467, "learning_rate": 1.9582726736120365e-05, "loss": 464.1896, "step": 5270 }, { "epoch": 0.1015472492811878, "grad_norm": 583.5294180046756, "learning_rate": 1.958098033340563e-05, "loss": 468.7423, "step": 5280 }, { "epoch": 0.10173957361694762, "grad_norm": 614.4101624241066, "learning_rate": 1.9579230361933952e-05, "loss": 483.4132, "step": 5290 }, { "epoch": 0.10193189795270745, "grad_norm": 629.9302868086264, "learning_rate": 1.9577476822357174e-05, "loss": 458.8688, "step": 5300 }, { "epoch": 0.10212422228846726, "grad_norm": 552.3628393502764, "learning_rate": 1.9575719715328457e-05, "loss": 480.5301, "step": 5310 }, { "epoch": 0.1023165466242271, "grad_norm": 618.3053732075059, "learning_rate": 1.957395904150229e-05, "loss": 458.3162, "step": 5320 }, { "epoch": 0.10250887095998693, "grad_norm": 628.7291530324881, "learning_rate": 1.9572194801534504e-05, "loss": 470.3111, "step": 5330 }, { "epoch": 0.10270119529574674, "grad_norm": 585.4955338752067, "learning_rate": 1.9570426996082238e-05, "loss": 469.1822, "step": 5340 }, { "epoch": 0.10289351963150657, "grad_norm": 594.2863616162359, "learning_rate": 1.9568655625803982e-05, "loss": 458.7862, "step": 5350 }, { "epoch": 0.1030858439672664, "grad_norm": 553.7806659575447, "learning_rate": 1.956688069135954e-05, "loss": 470.0884, "step": 5360 }, { "epoch": 0.10327816830302622, "grad_norm": 557.6974868851648, "learning_rate": 1.9565102193410035e-05, "loss": 462.1694, "step": 5370 }, { "epoch": 0.10347049263878605, "grad_norm": 569.6451341412975, "learning_rate": 1.956332013261794e-05, "loss": 465.1062, "step": 5380 }, { "epoch": 0.10366281697454588, "grad_norm": 615.4239535877139, "learning_rate": 1.9561534509647038e-05, "loss": 452.582, "step": 5390 }, { "epoch": 0.1038551413103057, "grad_norm": 664.4226117018029, "learning_rate": 1.9559745325162445e-05, "loss": 477.0128, "step": 5400 }, { "epoch": 0.10404746564606553, "grad_norm": 569.4112852430824, "learning_rate": 1.9557952579830604e-05, "loss": 468.2923, "step": 5410 }, { "epoch": 0.10423978998182536, "grad_norm": 504.5785090520457, "learning_rate": 1.955615627431928e-05, "loss": 468.452, "step": 5420 }, { "epoch": 0.10443211431758517, "grad_norm": 577.922135541517, "learning_rate": 1.955435640929757e-05, "loss": 452.7184, "step": 5430 }, { "epoch": 0.104624438653345, "grad_norm": 573.4704796404985, "learning_rate": 1.9552552985435893e-05, "loss": 454.6495, "step": 5440 }, { "epoch": 0.10481676298910482, "grad_norm": 591.1003705233481, "learning_rate": 1.9550746003405996e-05, "loss": 460.658, "step": 5450 }, { "epoch": 0.10500908732486465, "grad_norm": 559.711833742097, "learning_rate": 1.9548935463880945e-05, "loss": 472.0356, "step": 5460 }, { "epoch": 0.10520141166062448, "grad_norm": 731.4096514399223, "learning_rate": 1.9547121367535143e-05, "loss": 475.5825, "step": 5470 }, { "epoch": 0.1053937359963843, "grad_norm": 675.1186885970267, "learning_rate": 1.9545303715044305e-05, "loss": 472.9401, "step": 5480 }, { "epoch": 0.10558606033214413, "grad_norm": 621.5446954509481, "learning_rate": 1.9543482507085484e-05, "loss": 470.6092, "step": 5490 }, { "epoch": 0.10577838466790396, "grad_norm": 564.8005480639721, "learning_rate": 1.9541657744337038e-05, "loss": 457.4794, "step": 5500 }, { "epoch": 0.10597070900366377, "grad_norm": 557.3165313706415, "learning_rate": 1.9539829427478675e-05, "loss": 472.0952, "step": 5510 }, { "epoch": 0.1061630333394236, "grad_norm": 596.9725020294912, "learning_rate": 1.95379975571914e-05, "loss": 477.8515, "step": 5520 }, { "epoch": 0.10635535767518343, "grad_norm": 610.5795461524461, "learning_rate": 1.953616213415756e-05, "loss": 465.1081, "step": 5530 }, { "epoch": 0.10654768201094325, "grad_norm": 532.9527100251266, "learning_rate": 1.9534323159060824e-05, "loss": 477.3551, "step": 5540 }, { "epoch": 0.10674000634670308, "grad_norm": 618.7761228781006, "learning_rate": 1.9532480632586175e-05, "loss": 479.9234, "step": 5550 }, { "epoch": 0.10693233068246291, "grad_norm": 586.1059966469652, "learning_rate": 1.953063455541992e-05, "loss": 476.143, "step": 5560 }, { "epoch": 0.10712465501822273, "grad_norm": 644.2358708785018, "learning_rate": 1.9528784928249703e-05, "loss": 460.9408, "step": 5570 }, { "epoch": 0.10731697935398256, "grad_norm": 561.5106647817859, "learning_rate": 1.9526931751764467e-05, "loss": 450.0845, "step": 5580 }, { "epoch": 0.10750930368974237, "grad_norm": 586.2308262734489, "learning_rate": 1.95250750266545e-05, "loss": 464.2511, "step": 5590 }, { "epoch": 0.1077016280255022, "grad_norm": 713.7751058815495, "learning_rate": 1.9523214753611398e-05, "loss": 450.9627, "step": 5600 }, { "epoch": 0.10789395236126204, "grad_norm": 563.3784121067714, "learning_rate": 1.952135093332808e-05, "loss": 474.7403, "step": 5610 }, { "epoch": 0.10808627669702185, "grad_norm": 541.5234523770434, "learning_rate": 1.9519483566498788e-05, "loss": 459.177, "step": 5620 }, { "epoch": 0.10827860103278168, "grad_norm": 533.0503444636863, "learning_rate": 1.9517612653819088e-05, "loss": 462.7783, "step": 5630 }, { "epoch": 0.10847092536854151, "grad_norm": 601.3699334313936, "learning_rate": 1.9515738195985868e-05, "loss": 479.1894, "step": 5640 }, { "epoch": 0.10866324970430133, "grad_norm": 565.7627426136539, "learning_rate": 1.951386019369732e-05, "loss": 458.9444, "step": 5650 }, { "epoch": 0.10885557404006116, "grad_norm": 601.6013229962927, "learning_rate": 1.9511978647652984e-05, "loss": 453.5309, "step": 5660 }, { "epoch": 0.10904789837582099, "grad_norm": 572.5979504101963, "learning_rate": 1.9510093558553687e-05, "loss": 460.8308, "step": 5670 }, { "epoch": 0.1092402227115808, "grad_norm": 555.5591057204285, "learning_rate": 1.950820492710161e-05, "loss": 457.2931, "step": 5680 }, { "epoch": 0.10943254704734064, "grad_norm": 627.0639773158829, "learning_rate": 1.9506312754000235e-05, "loss": 460.4723, "step": 5690 }, { "epoch": 0.10962487138310047, "grad_norm": 567.7280179595467, "learning_rate": 1.9504417039954357e-05, "loss": 454.7062, "step": 5700 }, { "epoch": 0.10981719571886028, "grad_norm": 589.7552420184576, "learning_rate": 1.9502517785670098e-05, "loss": 472.0097, "step": 5710 }, { "epoch": 0.11000952005462011, "grad_norm": 609.0864005083679, "learning_rate": 1.950061499185491e-05, "loss": 472.8547, "step": 5720 }, { "epoch": 0.11020184439037993, "grad_norm": 574.8393854788927, "learning_rate": 1.9498708659217542e-05, "loss": 458.5378, "step": 5730 }, { "epoch": 0.11039416872613976, "grad_norm": 593.3147818039952, "learning_rate": 1.9496798788468077e-05, "loss": 451.2261, "step": 5740 }, { "epoch": 0.11058649306189959, "grad_norm": 591.8585009842711, "learning_rate": 1.9494885380317906e-05, "loss": 453.768, "step": 5750 }, { "epoch": 0.11077881739765941, "grad_norm": 556.8757756377022, "learning_rate": 1.9492968435479744e-05, "loss": 450.8655, "step": 5760 }, { "epoch": 0.11097114173341924, "grad_norm": 567.0838774827757, "learning_rate": 1.949104795466762e-05, "loss": 453.8706, "step": 5770 }, { "epoch": 0.11116346606917907, "grad_norm": 572.239523238843, "learning_rate": 1.9489123938596886e-05, "loss": 444.5173, "step": 5780 }, { "epoch": 0.11135579040493888, "grad_norm": 572.460747506397, "learning_rate": 1.94871963879842e-05, "loss": 460.8298, "step": 5790 }, { "epoch": 0.11154811474069871, "grad_norm": 547.762611217189, "learning_rate": 1.9485265303547547e-05, "loss": 456.2129, "step": 5800 }, { "epoch": 0.11174043907645854, "grad_norm": 681.0375694733043, "learning_rate": 1.9483330686006223e-05, "loss": 463.6338, "step": 5810 }, { "epoch": 0.11193276341221836, "grad_norm": 685.4994674000491, "learning_rate": 1.948139253608084e-05, "loss": 486.1659, "step": 5820 }, { "epoch": 0.11212508774797819, "grad_norm": 598.1425119667529, "learning_rate": 1.9479450854493327e-05, "loss": 456.0472, "step": 5830 }, { "epoch": 0.11231741208373802, "grad_norm": 570.9363313117783, "learning_rate": 1.9477505641966933e-05, "loss": 451.9121, "step": 5840 }, { "epoch": 0.11250973641949784, "grad_norm": 577.1784111188559, "learning_rate": 1.9475556899226213e-05, "loss": 460.049, "step": 5850 }, { "epoch": 0.11270206075525767, "grad_norm": 607.1651708353087, "learning_rate": 1.9473604626997037e-05, "loss": 451.1603, "step": 5860 }, { "epoch": 0.11289438509101749, "grad_norm": 575.331530518834, "learning_rate": 1.94716488260066e-05, "loss": 458.459, "step": 5870 }, { "epoch": 0.11308670942677732, "grad_norm": 608.6725118986951, "learning_rate": 1.946968949698341e-05, "loss": 456.1864, "step": 5880 }, { "epoch": 0.11327903376253715, "grad_norm": 645.1061651579687, "learning_rate": 1.9467726640657277e-05, "loss": 450.295, "step": 5890 }, { "epoch": 0.11347135809829696, "grad_norm": 586.8906624550228, "learning_rate": 1.9465760257759336e-05, "loss": 455.3526, "step": 5900 }, { "epoch": 0.11366368243405679, "grad_norm": 612.8186262513243, "learning_rate": 1.9463790349022027e-05, "loss": 446.5723, "step": 5910 }, { "epoch": 0.11385600676981662, "grad_norm": 526.1600416399643, "learning_rate": 1.9461816915179117e-05, "loss": 456.4721, "step": 5920 }, { "epoch": 0.11404833110557644, "grad_norm": 624.0233025646951, "learning_rate": 1.945983995696567e-05, "loss": 462.4404, "step": 5930 }, { "epoch": 0.11424065544133627, "grad_norm": 597.7485044490996, "learning_rate": 1.9457859475118077e-05, "loss": 454.947, "step": 5940 }, { "epoch": 0.1144329797770961, "grad_norm": 582.2865791588138, "learning_rate": 1.9455875470374027e-05, "loss": 469.912, "step": 5950 }, { "epoch": 0.11462530411285592, "grad_norm": 527.2170495374962, "learning_rate": 1.9453887943472532e-05, "loss": 440.7697, "step": 5960 }, { "epoch": 0.11481762844861575, "grad_norm": 609.9048837986057, "learning_rate": 1.945189689515392e-05, "loss": 467.5322, "step": 5970 }, { "epoch": 0.11500995278437558, "grad_norm": 517.0803414211166, "learning_rate": 1.9449902326159815e-05, "loss": 443.7057, "step": 5980 }, { "epoch": 0.1152022771201354, "grad_norm": 559.1504196549349, "learning_rate": 1.9447904237233164e-05, "loss": 457.1309, "step": 5990 }, { "epoch": 0.11539460145589522, "grad_norm": 717.0088099356094, "learning_rate": 1.9445902629118223e-05, "loss": 451.0554, "step": 6000 }, { "epoch": 0.11558692579165504, "grad_norm": 553.1601859777684, "learning_rate": 1.9443897502560555e-05, "loss": 465.8052, "step": 6010 }, { "epoch": 0.11577925012741487, "grad_norm": 556.5726901982019, "learning_rate": 1.9441888858307042e-05, "loss": 476.5284, "step": 6020 }, { "epoch": 0.1159715744631747, "grad_norm": 529.381321358997, "learning_rate": 1.943987669710586e-05, "loss": 444.8907, "step": 6030 }, { "epoch": 0.11616389879893452, "grad_norm": 571.4391456040348, "learning_rate": 1.9437861019706522e-05, "loss": 445.6038, "step": 6040 }, { "epoch": 0.11635622313469435, "grad_norm": 609.125673479725, "learning_rate": 1.943584182685982e-05, "loss": 463.3053, "step": 6050 }, { "epoch": 0.11654854747045418, "grad_norm": 865.9092772804071, "learning_rate": 1.9433819119317878e-05, "loss": 456.327, "step": 6060 }, { "epoch": 0.116740871806214, "grad_norm": 609.1420721393692, "learning_rate": 1.9431792897834115e-05, "loss": 449.2634, "step": 6070 }, { "epoch": 0.11693319614197382, "grad_norm": 602.5476476123913, "learning_rate": 1.9429763163163273e-05, "loss": 467.8189, "step": 6080 }, { "epoch": 0.11712552047773366, "grad_norm": 545.4484795961454, "learning_rate": 1.942772991606139e-05, "loss": 441.9267, "step": 6090 }, { "epoch": 0.11731784481349347, "grad_norm": 736.771299779919, "learning_rate": 1.9425693157285816e-05, "loss": 454.4545, "step": 6100 }, { "epoch": 0.1175101691492533, "grad_norm": 592.854521574643, "learning_rate": 1.942365288759521e-05, "loss": 443.1416, "step": 6110 }, { "epoch": 0.11770249348501313, "grad_norm": 556.0528704358943, "learning_rate": 1.9421609107749542e-05, "loss": 464.8642, "step": 6120 }, { "epoch": 0.11789481782077295, "grad_norm": 538.044132231871, "learning_rate": 1.9419561818510085e-05, "loss": 454.9487, "step": 6130 }, { "epoch": 0.11808714215653278, "grad_norm": 579.3182249998647, "learning_rate": 1.9417511020639416e-05, "loss": 455.0798, "step": 6140 }, { "epoch": 0.1182794664922926, "grad_norm": 560.7693432823665, "learning_rate": 1.9415456714901432e-05, "loss": 460.8459, "step": 6150 }, { "epoch": 0.11847179082805243, "grad_norm": 548.6833992249758, "learning_rate": 1.941339890206132e-05, "loss": 459.5579, "step": 6160 }, { "epoch": 0.11866411516381226, "grad_norm": 546.2777243774797, "learning_rate": 1.9411337582885587e-05, "loss": 451.0988, "step": 6170 }, { "epoch": 0.11885643949957207, "grad_norm": 569.7658880448489, "learning_rate": 1.9409272758142034e-05, "loss": 442.799, "step": 6180 }, { "epoch": 0.1190487638353319, "grad_norm": 598.5469988105174, "learning_rate": 1.940720442859978e-05, "loss": 454.6981, "step": 6190 }, { "epoch": 0.11924108817109173, "grad_norm": 520.1388083901044, "learning_rate": 1.940513259502924e-05, "loss": 455.6266, "step": 6200 }, { "epoch": 0.11943341250685155, "grad_norm": 592.2642859558092, "learning_rate": 1.9403057258202144e-05, "loss": 452.3042, "step": 6210 }, { "epoch": 0.11962573684261138, "grad_norm": 538.2159895835466, "learning_rate": 1.940097841889151e-05, "loss": 449.2261, "step": 6220 }, { "epoch": 0.11981806117837121, "grad_norm": 555.7028374629331, "learning_rate": 1.939889607787168e-05, "loss": 461.0196, "step": 6230 }, { "epoch": 0.12001038551413103, "grad_norm": 581.2120867911883, "learning_rate": 1.9396810235918287e-05, "loss": 456.7599, "step": 6240 }, { "epoch": 0.12020270984989086, "grad_norm": 585.3890168566849, "learning_rate": 1.939472089380828e-05, "loss": 450.5199, "step": 6250 }, { "epoch": 0.12039503418565069, "grad_norm": 537.6026806644555, "learning_rate": 1.9392628052319895e-05, "loss": 457.9451, "step": 6260 }, { "epoch": 0.1205873585214105, "grad_norm": 582.1901000546167, "learning_rate": 1.9390531712232687e-05, "loss": 454.9115, "step": 6270 }, { "epoch": 0.12077968285717033, "grad_norm": 556.1801994889596, "learning_rate": 1.9388431874327504e-05, "loss": 468.9773, "step": 6280 }, { "epoch": 0.12097200719293016, "grad_norm": 655.9631280020292, "learning_rate": 1.9386328539386502e-05, "loss": 458.9917, "step": 6290 }, { "epoch": 0.12116433152868998, "grad_norm": 536.1963488882964, "learning_rate": 1.938422170819314e-05, "loss": 458.6655, "step": 6300 }, { "epoch": 0.12135665586444981, "grad_norm": 566.4790735212034, "learning_rate": 1.938211138153218e-05, "loss": 444.7368, "step": 6310 }, { "epoch": 0.12154898020020963, "grad_norm": 540.2566765307556, "learning_rate": 1.9379997560189677e-05, "loss": 450.3039, "step": 6320 }, { "epoch": 0.12174130453596946, "grad_norm": 588.4333892241063, "learning_rate": 1.9377880244953e-05, "loss": 447.3329, "step": 6330 }, { "epoch": 0.12193362887172929, "grad_norm": 559.2034383246234, "learning_rate": 1.9375759436610813e-05, "loss": 456.1573, "step": 6340 }, { "epoch": 0.1221259532074891, "grad_norm": 675.705887382901, "learning_rate": 1.937363513595308e-05, "loss": 456.2759, "step": 6350 }, { "epoch": 0.12231827754324893, "grad_norm": 557.1691693464365, "learning_rate": 1.937150734377107e-05, "loss": 439.1136, "step": 6360 }, { "epoch": 0.12251060187900877, "grad_norm": 529.5951367319399, "learning_rate": 1.9369376060857354e-05, "loss": 441.9863, "step": 6370 }, { "epoch": 0.12270292621476858, "grad_norm": 574.2605059738312, "learning_rate": 1.936724128800579e-05, "loss": 460.2209, "step": 6380 }, { "epoch": 0.12289525055052841, "grad_norm": 553.2665262790448, "learning_rate": 1.9365103026011555e-05, "loss": 453.9132, "step": 6390 }, { "epoch": 0.12308757488628824, "grad_norm": 605.4233027827128, "learning_rate": 1.9362961275671112e-05, "loss": 457.6689, "step": 6400 }, { "epoch": 0.12327989922204806, "grad_norm": 586.7659932436243, "learning_rate": 1.936081603778223e-05, "loss": 467.9555, "step": 6410 }, { "epoch": 0.12347222355780789, "grad_norm": 579.1158928957905, "learning_rate": 1.9358667313143972e-05, "loss": 455.3643, "step": 6420 }, { "epoch": 0.12366454789356772, "grad_norm": 599.4161547485973, "learning_rate": 1.93565151025567e-05, "loss": 454.3246, "step": 6430 }, { "epoch": 0.12385687222932754, "grad_norm": 590.9679448435286, "learning_rate": 1.9354359406822084e-05, "loss": 459.4577, "step": 6440 }, { "epoch": 0.12404919656508737, "grad_norm": 570.0842259149974, "learning_rate": 1.935220022674308e-05, "loss": 445.058, "step": 6450 }, { "epoch": 0.12424152090084718, "grad_norm": 574.0282928884872, "learning_rate": 1.9350037563123947e-05, "loss": 437.3688, "step": 6460 }, { "epoch": 0.12443384523660701, "grad_norm": 567.5621462821342, "learning_rate": 1.9347871416770245e-05, "loss": 457.1672, "step": 6470 }, { "epoch": 0.12462616957236684, "grad_norm": 627.125907188958, "learning_rate": 1.9345701788488825e-05, "loss": 443.653, "step": 6480 }, { "epoch": 0.12481849390812666, "grad_norm": 528.493118446666, "learning_rate": 1.9343528679087837e-05, "loss": 452.8219, "step": 6490 }, { "epoch": 0.1250108182438865, "grad_norm": 589.1564062016865, "learning_rate": 1.934135208937673e-05, "loss": 451.268, "step": 6500 }, { "epoch": 0.12520314257964632, "grad_norm": 588.1787184927141, "learning_rate": 1.9339172020166245e-05, "loss": 460.5093, "step": 6510 }, { "epoch": 0.12539546691540615, "grad_norm": 559.0172907781034, "learning_rate": 1.933698847226843e-05, "loss": 454.6871, "step": 6520 }, { "epoch": 0.12558779125116595, "grad_norm": 522.4693165831601, "learning_rate": 1.9334801446496606e-05, "loss": 445.7012, "step": 6530 }, { "epoch": 0.12578011558692578, "grad_norm": 530.8457545579569, "learning_rate": 1.933261094366542e-05, "loss": 451.3096, "step": 6540 }, { "epoch": 0.12597243992268561, "grad_norm": 562.1488463433628, "learning_rate": 1.9330416964590787e-05, "loss": 436.5796, "step": 6550 }, { "epoch": 0.12616476425844544, "grad_norm": 615.5752112743173, "learning_rate": 1.932821951008993e-05, "loss": 461.9782, "step": 6560 }, { "epoch": 0.12635708859420527, "grad_norm": 610.040926518066, "learning_rate": 1.9326018580981373e-05, "loss": 447.5911, "step": 6570 }, { "epoch": 0.1265494129299651, "grad_norm": 550.5832465583508, "learning_rate": 1.9323814178084914e-05, "loss": 440.8309, "step": 6580 }, { "epoch": 0.1267417372657249, "grad_norm": 591.049276600134, "learning_rate": 1.9321606302221662e-05, "loss": 454.5273, "step": 6590 }, { "epoch": 0.12693406160148474, "grad_norm": 593.8169916602016, "learning_rate": 1.9319394954214013e-05, "loss": 455.2549, "step": 6600 }, { "epoch": 0.12712638593724457, "grad_norm": 620.7210887126705, "learning_rate": 1.931718013488566e-05, "loss": 451.1783, "step": 6610 }, { "epoch": 0.1273187102730044, "grad_norm": 631.7601020832637, "learning_rate": 1.9314961845061584e-05, "loss": 455.9168, "step": 6620 }, { "epoch": 0.12751103460876423, "grad_norm": 790.7017724612514, "learning_rate": 1.9312740085568063e-05, "loss": 463.5684, "step": 6630 }, { "epoch": 0.12770335894452406, "grad_norm": 534.8783070848594, "learning_rate": 1.9310514857232666e-05, "loss": 440.6818, "step": 6640 }, { "epoch": 0.12789568328028386, "grad_norm": 535.2064559744528, "learning_rate": 1.930828616088425e-05, "loss": 434.7803, "step": 6650 }, { "epoch": 0.1280880076160437, "grad_norm": 545.1513186662329, "learning_rate": 1.9306053997352973e-05, "loss": 442.1026, "step": 6660 }, { "epoch": 0.12828033195180352, "grad_norm": 555.7306761069738, "learning_rate": 1.9303818367470274e-05, "loss": 444.6883, "step": 6670 }, { "epoch": 0.12847265628756335, "grad_norm": 541.9976527210671, "learning_rate": 1.9301579272068894e-05, "loss": 448.924, "step": 6680 }, { "epoch": 0.12866498062332318, "grad_norm": 585.6049150429797, "learning_rate": 1.9299336711982853e-05, "loss": 449.9302, "step": 6690 }, { "epoch": 0.12885730495908299, "grad_norm": 575.5148590127932, "learning_rate": 1.9297090688047473e-05, "loss": 450.3919, "step": 6700 }, { "epoch": 0.12904962929484282, "grad_norm": 563.0153338400012, "learning_rate": 1.929484120109936e-05, "loss": 444.159, "step": 6710 }, { "epoch": 0.12924195363060265, "grad_norm": 561.6015685341068, "learning_rate": 1.9292588251976404e-05, "loss": 439.5263, "step": 6720 }, { "epoch": 0.12943427796636248, "grad_norm": 618.0408070492041, "learning_rate": 1.92903318415178e-05, "loss": 448.6772, "step": 6730 }, { "epoch": 0.1296266023021223, "grad_norm": 560.1286072818397, "learning_rate": 1.9288071970564015e-05, "loss": 460.3758, "step": 6740 }, { "epoch": 0.12981892663788214, "grad_norm": 579.6981835780392, "learning_rate": 1.9285808639956823e-05, "loss": 450.6625, "step": 6750 }, { "epoch": 0.13001125097364194, "grad_norm": 569.7116518232411, "learning_rate": 1.9283541850539272e-05, "loss": 446.0654, "step": 6760 }, { "epoch": 0.13020357530940177, "grad_norm": 546.358091447522, "learning_rate": 1.92812716031557e-05, "loss": 440.4798, "step": 6770 }, { "epoch": 0.1303958996451616, "grad_norm": 574.5179906842806, "learning_rate": 1.9278997898651746e-05, "loss": 451.1808, "step": 6780 }, { "epoch": 0.13058822398092143, "grad_norm": 549.0151048075636, "learning_rate": 1.9276720737874327e-05, "loss": 448.2003, "step": 6790 }, { "epoch": 0.13078054831668126, "grad_norm": 572.9689293748356, "learning_rate": 1.9274440121671637e-05, "loss": 437.8925, "step": 6800 }, { "epoch": 0.13097287265244106, "grad_norm": 562.673525311016, "learning_rate": 1.9272156050893173e-05, "loss": 446.0264, "step": 6810 }, { "epoch": 0.1311651969882009, "grad_norm": 658.2692168239895, "learning_rate": 1.926986852638972e-05, "loss": 447.0198, "step": 6820 }, { "epoch": 0.13135752132396072, "grad_norm": 677.0630203293408, "learning_rate": 1.926757754901333e-05, "loss": 439.7201, "step": 6830 }, { "epoch": 0.13154984565972055, "grad_norm": 558.8686078058347, "learning_rate": 1.926528311961737e-05, "loss": 445.9571, "step": 6840 }, { "epoch": 0.13174216999548038, "grad_norm": 554.1681325389345, "learning_rate": 1.9262985239056463e-05, "loss": 453.8511, "step": 6850 }, { "epoch": 0.13193449433124022, "grad_norm": 602.1042896629361, "learning_rate": 1.9260683908186544e-05, "loss": 442.8998, "step": 6860 }, { "epoch": 0.13212681866700002, "grad_norm": 539.0167513606403, "learning_rate": 1.9258379127864808e-05, "loss": 438.5836, "step": 6870 }, { "epoch": 0.13231914300275985, "grad_norm": 612.1029205950629, "learning_rate": 1.925607089894976e-05, "loss": 440.011, "step": 6880 }, { "epoch": 0.13251146733851968, "grad_norm": 545.441470772547, "learning_rate": 1.9253759222301168e-05, "loss": 445.2561, "step": 6890 }, { "epoch": 0.1327037916742795, "grad_norm": 547.8940711851035, "learning_rate": 1.9251444098780095e-05, "loss": 444.2211, "step": 6900 }, { "epoch": 0.13289611601003934, "grad_norm": 567.2072916810628, "learning_rate": 1.924912552924889e-05, "loss": 444.5792, "step": 6910 }, { "epoch": 0.13308844034579917, "grad_norm": 690.1238130681841, "learning_rate": 1.924680351457118e-05, "loss": 449.0135, "step": 6920 }, { "epoch": 0.13328076468155897, "grad_norm": 625.3567629918222, "learning_rate": 1.9244478055611875e-05, "loss": 435.281, "step": 6930 }, { "epoch": 0.1334730890173188, "grad_norm": 562.8921699418266, "learning_rate": 1.9242149153237175e-05, "loss": 452.8662, "step": 6940 }, { "epoch": 0.13366541335307863, "grad_norm": 566.8539377652648, "learning_rate": 1.923981680831455e-05, "loss": 434.2891, "step": 6950 }, { "epoch": 0.13385773768883846, "grad_norm": 529.1648959684138, "learning_rate": 1.923748102171277e-05, "loss": 449.7102, "step": 6960 }, { "epoch": 0.1340500620245983, "grad_norm": 622.7985948956308, "learning_rate": 1.9235141794301867e-05, "loss": 443.65, "step": 6970 }, { "epoch": 0.1342423863603581, "grad_norm": 653.9551519168286, "learning_rate": 1.9232799126953173e-05, "loss": 446.7727, "step": 6980 }, { "epoch": 0.13443471069611793, "grad_norm": 580.8320821440503, "learning_rate": 1.9230453020539285e-05, "loss": 447.424, "step": 6990 }, { "epoch": 0.13462703503187776, "grad_norm": 613.0775728874669, "learning_rate": 1.9228103475934096e-05, "loss": 434.6862, "step": 7000 }, { "epoch": 0.1348193593676376, "grad_norm": 544.3346854360603, "learning_rate": 1.9225750494012767e-05, "loss": 452.8522, "step": 7010 }, { "epoch": 0.13501168370339742, "grad_norm": 589.083557180598, "learning_rate": 1.9223394075651748e-05, "loss": 449.5531, "step": 7020 }, { "epoch": 0.13520400803915725, "grad_norm": 609.831405804266, "learning_rate": 1.9221034221728764e-05, "loss": 441.9063, "step": 7030 }, { "epoch": 0.13539633237491705, "grad_norm": 610.3204446716359, "learning_rate": 1.9218670933122826e-05, "loss": 446.5802, "step": 7040 }, { "epoch": 0.13558865671067688, "grad_norm": 575.5031137801874, "learning_rate": 1.9216304210714213e-05, "loss": 435.0925, "step": 7050 }, { "epoch": 0.1357809810464367, "grad_norm": 568.071511098741, "learning_rate": 1.9213934055384498e-05, "loss": 463.8728, "step": 7060 }, { "epoch": 0.13597330538219654, "grad_norm": 564.9895317015021, "learning_rate": 1.9211560468016516e-05, "loss": 452.2563, "step": 7070 }, { "epoch": 0.13616562971795637, "grad_norm": 601.5690005736805, "learning_rate": 1.9209183449494397e-05, "loss": 437.316, "step": 7080 }, { "epoch": 0.13635795405371617, "grad_norm": 612.0170495369792, "learning_rate": 1.9206803000703534e-05, "loss": 447.462, "step": 7090 }, { "epoch": 0.136550278389476, "grad_norm": 536.0231868247605, "learning_rate": 1.9204419122530614e-05, "loss": 442.7653, "step": 7100 }, { "epoch": 0.13674260272523583, "grad_norm": 569.45332421543, "learning_rate": 1.9202031815863583e-05, "loss": 443.6591, "step": 7110 }, { "epoch": 0.13693492706099566, "grad_norm": 533.1756418302409, "learning_rate": 1.919964108159168e-05, "loss": 445.0802, "step": 7120 }, { "epoch": 0.1371272513967555, "grad_norm": 526.6507440457178, "learning_rate": 1.9197246920605408e-05, "loss": 432.5921, "step": 7130 }, { "epoch": 0.13731957573251533, "grad_norm": 545.0966495594668, "learning_rate": 1.9194849333796557e-05, "loss": 440.4764, "step": 7140 }, { "epoch": 0.13751190006827513, "grad_norm": 565.1981793829112, "learning_rate": 1.9192448322058187e-05, "loss": 449.5943, "step": 7150 }, { "epoch": 0.13770422440403496, "grad_norm": 516.7266678204488, "learning_rate": 1.9190043886284635e-05, "loss": 443.0106, "step": 7160 }, { "epoch": 0.1378965487397948, "grad_norm": 595.3751727839116, "learning_rate": 1.9187636027371518e-05, "loss": 433.9862, "step": 7170 }, { "epoch": 0.13808887307555462, "grad_norm": 658.1726007929548, "learning_rate": 1.9185224746215714e-05, "loss": 457.4265, "step": 7180 }, { "epoch": 0.13828119741131445, "grad_norm": 602.2515559857055, "learning_rate": 1.9182810043715394e-05, "loss": 438.1577, "step": 7190 }, { "epoch": 0.13847352174707428, "grad_norm": 523.6108092055384, "learning_rate": 1.9180391920769993e-05, "loss": 438.2888, "step": 7200 }, { "epoch": 0.13866584608283408, "grad_norm": 577.8699183900699, "learning_rate": 1.9177970378280215e-05, "loss": 441.081, "step": 7210 }, { "epoch": 0.1388581704185939, "grad_norm": 555.7984157792681, "learning_rate": 1.9175545417148056e-05, "loss": 449.9366, "step": 7220 }, { "epoch": 0.13905049475435374, "grad_norm": 601.0036850989364, "learning_rate": 1.9173117038276766e-05, "loss": 445.1274, "step": 7230 }, { "epoch": 0.13924281909011357, "grad_norm": 649.1687776871014, "learning_rate": 1.9170685242570878e-05, "loss": 450.8615, "step": 7240 }, { "epoch": 0.1394351434258734, "grad_norm": 527.2314933455276, "learning_rate": 1.9168250030936195e-05, "loss": 432.5535, "step": 7250 }, { "epoch": 0.1396274677616332, "grad_norm": 532.4805675864951, "learning_rate": 1.91658114042798e-05, "loss": 443.7549, "step": 7260 }, { "epoch": 0.13981979209739304, "grad_norm": 580.159318741437, "learning_rate": 1.9163369363510026e-05, "loss": 435.2532, "step": 7270 }, { "epoch": 0.14001211643315287, "grad_norm": 578.2713835246826, "learning_rate": 1.916092390953651e-05, "loss": 441.5688, "step": 7280 }, { "epoch": 0.1402044407689127, "grad_norm": 558.372779805351, "learning_rate": 1.915847504327013e-05, "loss": 439.2564, "step": 7290 }, { "epoch": 0.14039676510467253, "grad_norm": 529.1449548373271, "learning_rate": 1.9156022765623057e-05, "loss": 439.4631, "step": 7300 }, { "epoch": 0.14058908944043236, "grad_norm": 545.7670719788337, "learning_rate": 1.9153567077508718e-05, "loss": 432.0012, "step": 7310 }, { "epoch": 0.14078141377619216, "grad_norm": 595.2851457029889, "learning_rate": 1.9151107979841824e-05, "loss": 436.38, "step": 7320 }, { "epoch": 0.140973738111952, "grad_norm": 538.6557840713295, "learning_rate": 1.9148645473538338e-05, "loss": 430.5198, "step": 7330 }, { "epoch": 0.14116606244771182, "grad_norm": 546.0718350433435, "learning_rate": 1.9146179559515507e-05, "loss": 434.9198, "step": 7340 }, { "epoch": 0.14135838678347165, "grad_norm": 604.7659032605735, "learning_rate": 1.9143710238691847e-05, "loss": 435.8765, "step": 7350 }, { "epoch": 0.14155071111923148, "grad_norm": 544.1282262601212, "learning_rate": 1.9141237511987137e-05, "loss": 427.6289, "step": 7360 }, { "epoch": 0.14174303545499128, "grad_norm": 557.6461067823977, "learning_rate": 1.9138761380322425e-05, "loss": 448.1304, "step": 7370 }, { "epoch": 0.14193535979075111, "grad_norm": 609.9076211931574, "learning_rate": 1.913628184462003e-05, "loss": 436.6113, "step": 7380 }, { "epoch": 0.14212768412651094, "grad_norm": 705.8008060059544, "learning_rate": 1.913379890580354e-05, "loss": 441.1925, "step": 7390 }, { "epoch": 0.14232000846227078, "grad_norm": 534.110582723359, "learning_rate": 1.9131312564797805e-05, "loss": 439.0979, "step": 7400 }, { "epoch": 0.1425123327980306, "grad_norm": 636.9791987731925, "learning_rate": 1.912882282252895e-05, "loss": 425.6083, "step": 7410 }, { "epoch": 0.14270465713379044, "grad_norm": 556.9531193170636, "learning_rate": 1.9126329679924364e-05, "loss": 443.8383, "step": 7420 }, { "epoch": 0.14289698146955024, "grad_norm": 544.4647112172504, "learning_rate": 1.9123833137912693e-05, "loss": 440.5781, "step": 7430 }, { "epoch": 0.14308930580531007, "grad_norm": 527.4625931596618, "learning_rate": 1.9121333197423867e-05, "loss": 458.2057, "step": 7440 }, { "epoch": 0.1432816301410699, "grad_norm": 2117.5634089858195, "learning_rate": 1.9118829859389067e-05, "loss": 451.6985, "step": 7450 }, { "epoch": 0.14347395447682973, "grad_norm": 598.4948095429457, "learning_rate": 1.9116323124740748e-05, "loss": 436.7958, "step": 7460 }, { "epoch": 0.14366627881258956, "grad_norm": 570.4295972789612, "learning_rate": 1.9113812994412627e-05, "loss": 440.0646, "step": 7470 }, { "epoch": 0.1438586031483494, "grad_norm": 576.0305848119193, "learning_rate": 1.911129946933968e-05, "loss": 440.1847, "step": 7480 }, { "epoch": 0.1440509274841092, "grad_norm": 542.6346419853453, "learning_rate": 1.9108782550458164e-05, "loss": 453.2081, "step": 7490 }, { "epoch": 0.14424325181986902, "grad_norm": 575.3669732122477, "learning_rate": 1.9106262238705583e-05, "loss": 449.5358, "step": 7500 }, { "epoch": 0.14443557615562885, "grad_norm": 537.6084302704626, "learning_rate": 1.9103738535020713e-05, "loss": 446.0702, "step": 7510 }, { "epoch": 0.14462790049138868, "grad_norm": 543.6680320093975, "learning_rate": 1.910121144034359e-05, "loss": 428.9582, "step": 7520 }, { "epoch": 0.1448202248271485, "grad_norm": 542.7679299190743, "learning_rate": 1.909868095561552e-05, "loss": 436.3199, "step": 7530 }, { "epoch": 0.14501254916290832, "grad_norm": 524.253006819328, "learning_rate": 1.9096147081779063e-05, "loss": 445.5708, "step": 7540 }, { "epoch": 0.14520487349866815, "grad_norm": 554.829815846205, "learning_rate": 1.9093609819778044e-05, "loss": 437.1036, "step": 7550 }, { "epoch": 0.14539719783442798, "grad_norm": 501.9055615810935, "learning_rate": 1.9091069170557554e-05, "loss": 443.9232, "step": 7560 }, { "epoch": 0.1455895221701878, "grad_norm": 559.9313886576359, "learning_rate": 1.9088525135063944e-05, "loss": 437.4566, "step": 7570 }, { "epoch": 0.14578184650594764, "grad_norm": 517.4741249400627, "learning_rate": 1.9085977714244822e-05, "loss": 435.0832, "step": 7580 }, { "epoch": 0.14597417084170747, "grad_norm": 550.9501878286719, "learning_rate": 1.908342690904906e-05, "loss": 443.4718, "step": 7590 }, { "epoch": 0.14616649517746727, "grad_norm": 603.8445489928295, "learning_rate": 1.9080872720426793e-05, "loss": 440.3922, "step": 7600 }, { "epoch": 0.1463588195132271, "grad_norm": 561.9610965814381, "learning_rate": 1.9078315149329413e-05, "loss": 420.2626, "step": 7610 }, { "epoch": 0.14655114384898693, "grad_norm": 511.9348084977807, "learning_rate": 1.9075754196709574e-05, "loss": 446.088, "step": 7620 }, { "epoch": 0.14674346818474676, "grad_norm": 514.9396544232271, "learning_rate": 1.9073189863521184e-05, "loss": 433.6025, "step": 7630 }, { "epoch": 0.1469357925205066, "grad_norm": 629.7431275383268, "learning_rate": 1.9070622150719423e-05, "loss": 439.0813, "step": 7640 }, { "epoch": 0.1471281168562664, "grad_norm": 608.7647068301741, "learning_rate": 1.9068051059260716e-05, "loss": 437.1541, "step": 7650 }, { "epoch": 0.14732044119202622, "grad_norm": 591.4084677037018, "learning_rate": 1.9065476590102752e-05, "loss": 443.5418, "step": 7660 }, { "epoch": 0.14751276552778605, "grad_norm": 670.2283456290832, "learning_rate": 1.906289874420448e-05, "loss": 451.7118, "step": 7670 }, { "epoch": 0.14770508986354589, "grad_norm": 530.2510227646507, "learning_rate": 1.9060317522526105e-05, "loss": 430.0094, "step": 7680 }, { "epoch": 0.14789741419930572, "grad_norm": 582.8801334902874, "learning_rate": 1.905773292602909e-05, "loss": 442.6443, "step": 7690 }, { "epoch": 0.14808973853506555, "grad_norm": 548.2699834322779, "learning_rate": 1.905514495567615e-05, "loss": 433.7478, "step": 7700 }, { "epoch": 0.14828206287082535, "grad_norm": 546.8084915195556, "learning_rate": 1.9052553612431268e-05, "loss": 453.4112, "step": 7710 }, { "epoch": 0.14847438720658518, "grad_norm": 593.3055536179546, "learning_rate": 1.9049958897259674e-05, "loss": 434.2642, "step": 7720 }, { "epoch": 0.148666711542345, "grad_norm": 508.2996949859877, "learning_rate": 1.904736081112785e-05, "loss": 439.8122, "step": 7730 }, { "epoch": 0.14885903587810484, "grad_norm": 589.5306799327531, "learning_rate": 1.9044759355003552e-05, "loss": 441.0922, "step": 7740 }, { "epoch": 0.14905136021386467, "grad_norm": 585.7134046183394, "learning_rate": 1.904215452985577e-05, "loss": 433.4066, "step": 7750 }, { "epoch": 0.1492436845496245, "grad_norm": 691.8373481101901, "learning_rate": 1.9039546336654765e-05, "loss": 440.9248, "step": 7760 }, { "epoch": 0.1494360088853843, "grad_norm": 627.92254723191, "learning_rate": 1.903693477637204e-05, "loss": 431.1621, "step": 7770 }, { "epoch": 0.14962833322114413, "grad_norm": 611.342956564641, "learning_rate": 1.903431984998036e-05, "loss": 436.4596, "step": 7780 }, { "epoch": 0.14982065755690396, "grad_norm": 580.9808788783724, "learning_rate": 1.9031701558453747e-05, "loss": 453.6431, "step": 7790 }, { "epoch": 0.1500129818926638, "grad_norm": 508.2592836367386, "learning_rate": 1.902907990276746e-05, "loss": 418.0719, "step": 7800 }, { "epoch": 0.15020530622842362, "grad_norm": 566.2762269851959, "learning_rate": 1.9026454883898036e-05, "loss": 441.5839, "step": 7810 }, { "epoch": 0.15039763056418343, "grad_norm": 553.876784327105, "learning_rate": 1.902382650282324e-05, "loss": 428.4753, "step": 7820 }, { "epoch": 0.15058995489994326, "grad_norm": 509.25067367581033, "learning_rate": 1.902119476052211e-05, "loss": 424.0172, "step": 7830 }, { "epoch": 0.1507822792357031, "grad_norm": 573.8549114711305, "learning_rate": 1.9018559657974918e-05, "loss": 436.3267, "step": 7840 }, { "epoch": 0.15097460357146292, "grad_norm": 578.8517407293792, "learning_rate": 1.90159211961632e-05, "loss": 476.6084, "step": 7850 }, { "epoch": 0.15116692790722275, "grad_norm": 604.5224267412457, "learning_rate": 1.901327937606974e-05, "loss": 436.9109, "step": 7860 }, { "epoch": 0.15135925224298258, "grad_norm": 556.5233939076458, "learning_rate": 1.901063419867857e-05, "loss": 444.9464, "step": 7870 }, { "epoch": 0.15155157657874238, "grad_norm": 566.4432173386184, "learning_rate": 1.900798566497498e-05, "loss": 437.7201, "step": 7880 }, { "epoch": 0.1517439009145022, "grad_norm": 523.0909791332912, "learning_rate": 1.9005333775945496e-05, "loss": 439.1833, "step": 7890 }, { "epoch": 0.15193622525026204, "grad_norm": 590.1169918399377, "learning_rate": 1.9002678532577915e-05, "loss": 430.2732, "step": 7900 }, { "epoch": 0.15212854958602187, "grad_norm": 538.7629382249178, "learning_rate": 1.900001993586126e-05, "loss": 441.559, "step": 7910 }, { "epoch": 0.1523208739217817, "grad_norm": 556.1148531125073, "learning_rate": 1.8997357986785822e-05, "loss": 446.4016, "step": 7920 }, { "epoch": 0.1525131982575415, "grad_norm": 643.330176953753, "learning_rate": 1.899469268634313e-05, "loss": 442.2081, "step": 7930 }, { "epoch": 0.15270552259330133, "grad_norm": 502.7658311238469, "learning_rate": 1.8992024035525964e-05, "loss": 426.8118, "step": 7940 }, { "epoch": 0.15289784692906117, "grad_norm": 563.7536501867388, "learning_rate": 1.8989352035328352e-05, "loss": 438.9939, "step": 7950 }, { "epoch": 0.153090171264821, "grad_norm": 595.729334769332, "learning_rate": 1.8986676686745572e-05, "loss": 431.4548, "step": 7960 }, { "epoch": 0.15328249560058083, "grad_norm": 541.6423282934109, "learning_rate": 1.8983997990774145e-05, "loss": 434.93, "step": 7970 }, { "epoch": 0.15347481993634066, "grad_norm": 584.8342988997091, "learning_rate": 1.8981315948411842e-05, "loss": 443.9966, "step": 7980 }, { "epoch": 0.15366714427210046, "grad_norm": 620.1016078368535, "learning_rate": 1.897863056065768e-05, "loss": 444.0354, "step": 7990 }, { "epoch": 0.1538594686078603, "grad_norm": 543.2149365694071, "learning_rate": 1.8975941828511923e-05, "loss": 434.4141, "step": 8000 }, { "epoch": 0.15405179294362012, "grad_norm": 549.2536788675371, "learning_rate": 1.8973249752976075e-05, "loss": 433.059, "step": 8010 }, { "epoch": 0.15424411727937995, "grad_norm": 566.1975761435316, "learning_rate": 1.8970554335052897e-05, "loss": 428.917, "step": 8020 }, { "epoch": 0.15443644161513978, "grad_norm": 601.2707463310079, "learning_rate": 1.8967855575746375e-05, "loss": 434.3125, "step": 8030 }, { "epoch": 0.1546287659508996, "grad_norm": 572.9370704510053, "learning_rate": 1.8965153476061763e-05, "loss": 441.1062, "step": 8040 }, { "epoch": 0.1548210902866594, "grad_norm": 544.8283044070936, "learning_rate": 1.896244803700555e-05, "loss": 434.5202, "step": 8050 }, { "epoch": 0.15501341462241924, "grad_norm": 551.435718815261, "learning_rate": 1.8959739259585458e-05, "loss": 435.1253, "step": 8060 }, { "epoch": 0.15520573895817907, "grad_norm": 580.6890855690795, "learning_rate": 1.895702714481047e-05, "loss": 421.8108, "step": 8070 }, { "epoch": 0.1553980632939389, "grad_norm": 519.697868206756, "learning_rate": 1.8954311693690798e-05, "loss": 417.7399, "step": 8080 }, { "epoch": 0.15559038762969873, "grad_norm": 533.1929671672967, "learning_rate": 1.8951592907237906e-05, "loss": 432.3487, "step": 8090 }, { "epoch": 0.15578271196545854, "grad_norm": 690.4581820900596, "learning_rate": 1.8948870786464496e-05, "loss": 444.6823, "step": 8100 }, { "epoch": 0.15597503630121837, "grad_norm": 592.2038275260893, "learning_rate": 1.8946145332384515e-05, "loss": 423.3146, "step": 8110 }, { "epoch": 0.1561673606369782, "grad_norm": 629.1982185777041, "learning_rate": 1.8943416546013148e-05, "loss": 441.5614, "step": 8120 }, { "epoch": 0.15635968497273803, "grad_norm": 500.79431278963153, "learning_rate": 1.894068442836682e-05, "loss": 432.5976, "step": 8130 }, { "epoch": 0.15655200930849786, "grad_norm": 575.801820386397, "learning_rate": 1.8937948980463207e-05, "loss": 428.3051, "step": 8140 }, { "epoch": 0.1567443336442577, "grad_norm": 585.8597750759566, "learning_rate": 1.893521020332121e-05, "loss": 445.5108, "step": 8150 }, { "epoch": 0.1569366579800175, "grad_norm": 581.9024039306552, "learning_rate": 1.8932468097960988e-05, "loss": 429.6226, "step": 8160 }, { "epoch": 0.15712898231577732, "grad_norm": 548.2506755004573, "learning_rate": 1.892972266540392e-05, "loss": 420.4457, "step": 8170 }, { "epoch": 0.15732130665153715, "grad_norm": 518.0948250716367, "learning_rate": 1.8926973906672635e-05, "loss": 422.4104, "step": 8180 }, { "epoch": 0.15751363098729698, "grad_norm": 539.6317603547405, "learning_rate": 1.892422182279101e-05, "loss": 438.4853, "step": 8190 }, { "epoch": 0.1577059553230568, "grad_norm": 560.3636254848747, "learning_rate": 1.892146641478414e-05, "loss": 431.9881, "step": 8200 }, { "epoch": 0.15789827965881661, "grad_norm": 581.6132816498555, "learning_rate": 1.8918707683678376e-05, "loss": 421.6939, "step": 8210 }, { "epoch": 0.15809060399457645, "grad_norm": 608.7072010580962, "learning_rate": 1.8915945630501296e-05, "loss": 431.0424, "step": 8220 }, { "epoch": 0.15828292833033628, "grad_norm": 562.8788592397115, "learning_rate": 1.8913180256281723e-05, "loss": 433.2843, "step": 8230 }, { "epoch": 0.1584752526660961, "grad_norm": 519.4687530271713, "learning_rate": 1.8910411562049706e-05, "loss": 423.8484, "step": 8240 }, { "epoch": 0.15866757700185594, "grad_norm": 540.379523632361, "learning_rate": 1.8907639548836548e-05, "loss": 429.4329, "step": 8250 }, { "epoch": 0.15885990133761577, "grad_norm": 584.1014209106212, "learning_rate": 1.8904864217674766e-05, "loss": 447.2186, "step": 8260 }, { "epoch": 0.15905222567337557, "grad_norm": 611.6965536357849, "learning_rate": 1.8902085569598136e-05, "loss": 421.9906, "step": 8270 }, { "epoch": 0.1592445500091354, "grad_norm": 549.0467627488447, "learning_rate": 1.889930360564165e-05, "loss": 425.6736, "step": 8280 }, { "epoch": 0.15943687434489523, "grad_norm": 553.4719219076825, "learning_rate": 1.8896518326841554e-05, "loss": 419.4191, "step": 8290 }, { "epoch": 0.15962919868065506, "grad_norm": 539.6735805099422, "learning_rate": 1.889372973423531e-05, "loss": 430.6573, "step": 8300 }, { "epoch": 0.1598215230164149, "grad_norm": 501.766952099889, "learning_rate": 1.889093782886162e-05, "loss": 428.0232, "step": 8310 }, { "epoch": 0.16001384735217472, "grad_norm": 551.0356240631205, "learning_rate": 1.8888142611760433e-05, "loss": 426.1552, "step": 8320 }, { "epoch": 0.16020617168793452, "grad_norm": 562.525599816841, "learning_rate": 1.8885344083972912e-05, "loss": 423.6793, "step": 8330 }, { "epoch": 0.16039849602369435, "grad_norm": 583.1779236079669, "learning_rate": 1.8882542246541468e-05, "loss": 445.9307, "step": 8340 }, { "epoch": 0.16059082035945418, "grad_norm": 615.1845129614437, "learning_rate": 1.887973710050974e-05, "loss": 443.9851, "step": 8350 }, { "epoch": 0.16078314469521401, "grad_norm": 584.8857841386294, "learning_rate": 1.887692864692259e-05, "loss": 423.9522, "step": 8360 }, { "epoch": 0.16097546903097384, "grad_norm": 553.4133223514797, "learning_rate": 1.887411688682613e-05, "loss": 438.2559, "step": 8370 }, { "epoch": 0.16116779336673365, "grad_norm": 570.8960463191577, "learning_rate": 1.887130182126769e-05, "loss": 441.2844, "step": 8380 }, { "epoch": 0.16136011770249348, "grad_norm": 667.5638120400549, "learning_rate": 1.8868483451295835e-05, "loss": 433.5899, "step": 8390 }, { "epoch": 0.1615524420382533, "grad_norm": 569.025894881582, "learning_rate": 1.8865661777960366e-05, "loss": 427.1694, "step": 8400 }, { "epoch": 0.16174476637401314, "grad_norm": 513.2618461942593, "learning_rate": 1.88628368023123e-05, "loss": 432.4041, "step": 8410 }, { "epoch": 0.16193709070977297, "grad_norm": 620.9651312400831, "learning_rate": 1.8860008525403903e-05, "loss": 449.5799, "step": 8420 }, { "epoch": 0.1621294150455328, "grad_norm": 523.06292503676, "learning_rate": 1.885717694828866e-05, "loss": 431.8335, "step": 8430 }, { "epoch": 0.1623217393812926, "grad_norm": 653.042768179501, "learning_rate": 1.8854342072021282e-05, "loss": 418.552, "step": 8440 }, { "epoch": 0.16251406371705243, "grad_norm": 511.6522064427705, "learning_rate": 1.8851503897657717e-05, "loss": 434.6689, "step": 8450 }, { "epoch": 0.16270638805281226, "grad_norm": 521.8439186722719, "learning_rate": 1.8848662426255135e-05, "loss": 440.1868, "step": 8460 }, { "epoch": 0.1628987123885721, "grad_norm": 532.5330484085582, "learning_rate": 1.8845817658871942e-05, "loss": 434.4043, "step": 8470 }, { "epoch": 0.16309103672433192, "grad_norm": 546.976816839853, "learning_rate": 1.8842969596567765e-05, "loss": 441.8241, "step": 8480 }, { "epoch": 0.16328336106009173, "grad_norm": 576.69187127638, "learning_rate": 1.884011824040346e-05, "loss": 449.426, "step": 8490 }, { "epoch": 0.16347568539585156, "grad_norm": 621.7037817843438, "learning_rate": 1.883726359144111e-05, "loss": 434.0773, "step": 8500 }, { "epoch": 0.16366800973161139, "grad_norm": 504.5021465667742, "learning_rate": 1.8834405650744023e-05, "loss": 426.3316, "step": 8510 }, { "epoch": 0.16386033406737122, "grad_norm": 628.6596615396477, "learning_rate": 1.883154441937674e-05, "loss": 437.8812, "step": 8520 }, { "epoch": 0.16405265840313105, "grad_norm": 542.6726097126638, "learning_rate": 1.8828679898405015e-05, "loss": 416.4292, "step": 8530 }, { "epoch": 0.16424498273889088, "grad_norm": 554.6754415804415, "learning_rate": 1.8825812088895835e-05, "loss": 434.5456, "step": 8540 }, { "epoch": 0.16443730707465068, "grad_norm": 529.5274527416682, "learning_rate": 1.882294099191742e-05, "loss": 435.6156, "step": 8550 }, { "epoch": 0.1646296314104105, "grad_norm": 545.3900996071341, "learning_rate": 1.88200666085392e-05, "loss": 426.0449, "step": 8560 }, { "epoch": 0.16482195574617034, "grad_norm": 561.5017344100828, "learning_rate": 1.8817188939831838e-05, "loss": 434.16, "step": 8570 }, { "epoch": 0.16501428008193017, "grad_norm": 524.1371948724865, "learning_rate": 1.8814307986867214e-05, "loss": 415.4602, "step": 8580 }, { "epoch": 0.16520660441769, "grad_norm": 538.5403613520467, "learning_rate": 1.881142375071844e-05, "loss": 408.1654, "step": 8590 }, { "epoch": 0.16539892875344983, "grad_norm": 660.09730136131, "learning_rate": 1.8808536232459844e-05, "loss": 438.1817, "step": 8600 }, { "epoch": 0.16559125308920963, "grad_norm": 509.92483346796786, "learning_rate": 1.8805645433166976e-05, "loss": 441.5352, "step": 8610 }, { "epoch": 0.16578357742496946, "grad_norm": 560.7575613908119, "learning_rate": 1.8802751353916618e-05, "loss": 437.8606, "step": 8620 }, { "epoch": 0.1659759017607293, "grad_norm": 525.5924896458009, "learning_rate": 1.8799853995786763e-05, "loss": 430.4469, "step": 8630 }, { "epoch": 0.16616822609648912, "grad_norm": 563.1080871985357, "learning_rate": 1.8796953359856626e-05, "loss": 409.7498, "step": 8640 }, { "epoch": 0.16636055043224895, "grad_norm": 582.690469897271, "learning_rate": 1.879404944720665e-05, "loss": 427.2426, "step": 8650 }, { "epoch": 0.16655287476800876, "grad_norm": 560.4089513612888, "learning_rate": 1.8791142258918496e-05, "loss": 429.3333, "step": 8660 }, { "epoch": 0.1667451991037686, "grad_norm": 573.2907731727554, "learning_rate": 1.8788231796075037e-05, "loss": 438.3569, "step": 8670 }, { "epoch": 0.16693752343952842, "grad_norm": 616.8718364832039, "learning_rate": 1.8785318059760384e-05, "loss": 418.9777, "step": 8680 }, { "epoch": 0.16712984777528825, "grad_norm": 536.2940676573769, "learning_rate": 1.8782401051059838e-05, "loss": 424.638, "step": 8690 }, { "epoch": 0.16732217211104808, "grad_norm": 517.2732316134967, "learning_rate": 1.8779480771059954e-05, "loss": 413.4565, "step": 8700 }, { "epoch": 0.1675144964468079, "grad_norm": 548.6441593095126, "learning_rate": 1.8776557220848477e-05, "loss": 424.9253, "step": 8710 }, { "epoch": 0.1677068207825677, "grad_norm": 704.5101201740893, "learning_rate": 1.8773630401514388e-05, "loss": 425.7483, "step": 8720 }, { "epoch": 0.16789914511832754, "grad_norm": 570.644947874167, "learning_rate": 1.877070031414787e-05, "loss": 430.1442, "step": 8730 }, { "epoch": 0.16809146945408737, "grad_norm": 753.9641535372008, "learning_rate": 1.876776695984034e-05, "loss": 423.4344, "step": 8740 }, { "epoch": 0.1682837937898472, "grad_norm": 601.3750020968382, "learning_rate": 1.8764830339684426e-05, "loss": 419.546, "step": 8750 }, { "epoch": 0.16847611812560703, "grad_norm": 555.995162734277, "learning_rate": 1.8761890454773965e-05, "loss": 424.8769, "step": 8760 }, { "epoch": 0.16866844246136684, "grad_norm": 555.6905106933057, "learning_rate": 1.8758947306204012e-05, "loss": 434.1687, "step": 8770 }, { "epoch": 0.16886076679712667, "grad_norm": 563.0678086087067, "learning_rate": 1.8756000895070854e-05, "loss": 410.9748, "step": 8780 }, { "epoch": 0.1690530911328865, "grad_norm": 525.1484063246721, "learning_rate": 1.8753051222471968e-05, "loss": 433.5676, "step": 8790 }, { "epoch": 0.16924541546864633, "grad_norm": 582.5866661832113, "learning_rate": 1.8750098289506066e-05, "loss": 421.106, "step": 8800 }, { "epoch": 0.16943773980440616, "grad_norm": 582.2388981661788, "learning_rate": 1.8747142097273057e-05, "loss": 417.332, "step": 8810 }, { "epoch": 0.169630064140166, "grad_norm": 515.6525576635153, "learning_rate": 1.8744182646874085e-05, "loss": 425.4491, "step": 8820 }, { "epoch": 0.1698223884759258, "grad_norm": 518.7076773187805, "learning_rate": 1.8741219939411494e-05, "loss": 439.8529, "step": 8830 }, { "epoch": 0.17001471281168562, "grad_norm": 567.5260696340699, "learning_rate": 1.873825397598884e-05, "loss": 434.1176, "step": 8840 }, { "epoch": 0.17020703714744545, "grad_norm": 509.8572598415664, "learning_rate": 1.8735284757710897e-05, "loss": 428.2947, "step": 8850 }, { "epoch": 0.17039936148320528, "grad_norm": 578.9582231928556, "learning_rate": 1.873231228568365e-05, "loss": 433.9714, "step": 8860 }, { "epoch": 0.1705916858189651, "grad_norm": 542.4515139172046, "learning_rate": 1.8729336561014294e-05, "loss": 434.2283, "step": 8870 }, { "epoch": 0.17078401015472494, "grad_norm": 543.8338730974002, "learning_rate": 1.8726357584811242e-05, "loss": 420.8061, "step": 8880 }, { "epoch": 0.17097633449048474, "grad_norm": 532.7654305582791, "learning_rate": 1.8723375358184107e-05, "loss": 433.0151, "step": 8890 }, { "epoch": 0.17116865882624457, "grad_norm": 512.5359719309298, "learning_rate": 1.8720389882243722e-05, "loss": 420.664, "step": 8900 }, { "epoch": 0.1713609831620044, "grad_norm": 534.9811641267368, "learning_rate": 1.871740115810213e-05, "loss": 438.8542, "step": 8910 }, { "epoch": 0.17155330749776423, "grad_norm": 527.4542376506438, "learning_rate": 1.8714409186872578e-05, "loss": 433.1957, "step": 8920 }, { "epoch": 0.17174563183352407, "grad_norm": 534.2467297715082, "learning_rate": 1.871141396966953e-05, "loss": 434.9628, "step": 8930 }, { "epoch": 0.17193795616928387, "grad_norm": 548.1164226003325, "learning_rate": 1.8708415507608647e-05, "loss": 420.644, "step": 8940 }, { "epoch": 0.1721302805050437, "grad_norm": 492.71640548491524, "learning_rate": 1.8705413801806817e-05, "loss": 427.233, "step": 8950 }, { "epoch": 0.17232260484080353, "grad_norm": 547.2007064477028, "learning_rate": 1.8702408853382114e-05, "loss": 420.3109, "step": 8960 }, { "epoch": 0.17251492917656336, "grad_norm": 523.5479372406793, "learning_rate": 1.8699400663453842e-05, "loss": 422.3723, "step": 8970 }, { "epoch": 0.1727072535123232, "grad_norm": 519.4993779419578, "learning_rate": 1.8696389233142498e-05, "loss": 419.6555, "step": 8980 }, { "epoch": 0.17289957784808302, "grad_norm": 502.62735831219584, "learning_rate": 1.8693374563569792e-05, "loss": 420.121, "step": 8990 }, { "epoch": 0.17309190218384282, "grad_norm": 566.7886844877982, "learning_rate": 1.8690356655858634e-05, "loss": 424.814, "step": 9000 }, { "epoch": 0.17328422651960265, "grad_norm": 591.3701541543867, "learning_rate": 1.868733551113315e-05, "loss": 433.5979, "step": 9010 }, { "epoch": 0.17347655085536248, "grad_norm": 553.2749494894153, "learning_rate": 1.8684311130518663e-05, "loss": 420.8928, "step": 9020 }, { "epoch": 0.1736688751911223, "grad_norm": 613.200549157228, "learning_rate": 1.8681283515141705e-05, "loss": 421.6006, "step": 9030 }, { "epoch": 0.17386119952688214, "grad_norm": 516.7305214545012, "learning_rate": 1.8678252666130016e-05, "loss": 424.7881, "step": 9040 }, { "epoch": 0.17405352386264195, "grad_norm": 566.6093792031394, "learning_rate": 1.8675218584612534e-05, "loss": 421.7676, "step": 9050 }, { "epoch": 0.17424584819840178, "grad_norm": 511.2119317871291, "learning_rate": 1.8672181271719406e-05, "loss": 427.5226, "step": 9060 }, { "epoch": 0.1744381725341616, "grad_norm": 596.8432639819043, "learning_rate": 1.866914072858198e-05, "loss": 435.9604, "step": 9070 }, { "epoch": 0.17463049686992144, "grad_norm": 558.3075578096414, "learning_rate": 1.8666096956332805e-05, "loss": 426.1812, "step": 9080 }, { "epoch": 0.17482282120568127, "grad_norm": 511.4292529690994, "learning_rate": 1.8663049956105642e-05, "loss": 422.1414, "step": 9090 }, { "epoch": 0.1750151455414411, "grad_norm": 522.7046412746399, "learning_rate": 1.8659999729035445e-05, "loss": 420.2742, "step": 9100 }, { "epoch": 0.1752074698772009, "grad_norm": 493.81231795962515, "learning_rate": 1.8656946276258373e-05, "loss": 420.0853, "step": 9110 }, { "epoch": 0.17539979421296073, "grad_norm": 571.5146890274276, "learning_rate": 1.8653889598911787e-05, "loss": 431.4177, "step": 9120 }, { "epoch": 0.17559211854872056, "grad_norm": 521.4456421773015, "learning_rate": 1.865082969813425e-05, "loss": 418.0671, "step": 9130 }, { "epoch": 0.1757844428844804, "grad_norm": 564.0059254649086, "learning_rate": 1.8647766575065523e-05, "loss": 425.494, "step": 9140 }, { "epoch": 0.17597676722024022, "grad_norm": 577.5935009146527, "learning_rate": 1.864470023084657e-05, "loss": 444.1621, "step": 9150 }, { "epoch": 0.17616909155600005, "grad_norm": 518.2614350062311, "learning_rate": 1.864163066661955e-05, "loss": 427.1668, "step": 9160 }, { "epoch": 0.17636141589175985, "grad_norm": 602.2692324132776, "learning_rate": 1.8638557883527833e-05, "loss": 419.9159, "step": 9170 }, { "epoch": 0.17655374022751968, "grad_norm": 591.646699534163, "learning_rate": 1.8635481882715975e-05, "loss": 413.4255, "step": 9180 }, { "epoch": 0.17674606456327951, "grad_norm": 489.06767888855086, "learning_rate": 1.863240266532973e-05, "loss": 414.8031, "step": 9190 }, { "epoch": 0.17693838889903934, "grad_norm": 575.6543170412791, "learning_rate": 1.8629320232516063e-05, "loss": 427.3708, "step": 9200 }, { "epoch": 0.17713071323479918, "grad_norm": 549.4387331358224, "learning_rate": 1.862623458542313e-05, "loss": 422.5452, "step": 9210 }, { "epoch": 0.17732303757055898, "grad_norm": 524.0464185455141, "learning_rate": 1.862314572520028e-05, "loss": 419.5607, "step": 9220 }, { "epoch": 0.1775153619063188, "grad_norm": 595.4925303858677, "learning_rate": 1.862005365299806e-05, "loss": 414.3376, "step": 9230 }, { "epoch": 0.17770768624207864, "grad_norm": 532.4377562848898, "learning_rate": 1.8616958369968223e-05, "loss": 441.0964, "step": 9240 }, { "epoch": 0.17790001057783847, "grad_norm": 503.47459913660003, "learning_rate": 1.8613859877263708e-05, "loss": 422.7668, "step": 9250 }, { "epoch": 0.1780923349135983, "grad_norm": 505.177979755498, "learning_rate": 1.8610758176038647e-05, "loss": 410.8366, "step": 9260 }, { "epoch": 0.17828465924935813, "grad_norm": 580.1178516448416, "learning_rate": 1.860765326744838e-05, "loss": 416.7083, "step": 9270 }, { "epoch": 0.17847698358511793, "grad_norm": 531.6274491521162, "learning_rate": 1.8604545152649426e-05, "loss": 425.9408, "step": 9280 }, { "epoch": 0.17866930792087776, "grad_norm": 511.7090105790692, "learning_rate": 1.860143383279952e-05, "loss": 430.9702, "step": 9290 }, { "epoch": 0.1788616322566376, "grad_norm": 578.6974413585342, "learning_rate": 1.859831930905756e-05, "loss": 415.5254, "step": 9300 }, { "epoch": 0.17905395659239742, "grad_norm": 552.8420613889978, "learning_rate": 1.8595201582583668e-05, "loss": 423.8861, "step": 9310 }, { "epoch": 0.17924628092815725, "grad_norm": 519.5427538259271, "learning_rate": 1.859208065453914e-05, "loss": 416.2268, "step": 9320 }, { "epoch": 0.17943860526391706, "grad_norm": 553.5123808009505, "learning_rate": 1.8588956526086472e-05, "loss": 433.5628, "step": 9330 }, { "epoch": 0.1796309295996769, "grad_norm": 549.8761157194938, "learning_rate": 1.8585829198389347e-05, "loss": 416.0442, "step": 9340 }, { "epoch": 0.17982325393543672, "grad_norm": 562.8631278908741, "learning_rate": 1.8582698672612646e-05, "loss": 417.3363, "step": 9350 }, { "epoch": 0.18001557827119655, "grad_norm": 586.7017995530338, "learning_rate": 1.8579564949922438e-05, "loss": 417.5483, "step": 9360 }, { "epoch": 0.18020790260695638, "grad_norm": 528.6126212847728, "learning_rate": 1.8576428031485984e-05, "loss": 409.244, "step": 9370 }, { "epoch": 0.1804002269427162, "grad_norm": 530.6686096377931, "learning_rate": 1.8573287918471728e-05, "loss": 421.8057, "step": 9380 }, { "epoch": 0.180592551278476, "grad_norm": 585.0771653810731, "learning_rate": 1.8570144612049322e-05, "loss": 417.3723, "step": 9390 }, { "epoch": 0.18078487561423584, "grad_norm": 559.0368429444012, "learning_rate": 1.856699811338958e-05, "loss": 419.4453, "step": 9400 }, { "epoch": 0.18097719994999567, "grad_norm": 654.8321479458963, "learning_rate": 1.8563848423664536e-05, "loss": 430.0406, "step": 9410 }, { "epoch": 0.1811695242857555, "grad_norm": 495.766456885269, "learning_rate": 1.8560695544047388e-05, "loss": 436.637, "step": 9420 }, { "epoch": 0.18136184862151533, "grad_norm": 578.2104264562734, "learning_rate": 1.8557539475712538e-05, "loss": 446.099, "step": 9430 }, { "epoch": 0.18155417295727516, "grad_norm": 578.2861321575405, "learning_rate": 1.855438021983556e-05, "loss": 420.0583, "step": 9440 }, { "epoch": 0.18174649729303496, "grad_norm": 659.5385427960081, "learning_rate": 1.8551217777593233e-05, "loss": 420.5492, "step": 9450 }, { "epoch": 0.1819388216287948, "grad_norm": 534.7839812928478, "learning_rate": 1.8548052150163514e-05, "loss": 416.5834, "step": 9460 }, { "epoch": 0.18213114596455462, "grad_norm": 522.8373626061169, "learning_rate": 1.8544883338725544e-05, "loss": 404.8246, "step": 9470 }, { "epoch": 0.18232347030031446, "grad_norm": 520.1017128871034, "learning_rate": 1.8541711344459652e-05, "loss": 418.9685, "step": 9480 }, { "epoch": 0.18251579463607429, "grad_norm": 560.0962448637667, "learning_rate": 1.8538536168547353e-05, "loss": 428.3481, "step": 9490 }, { "epoch": 0.1827081189718341, "grad_norm": 622.9568825824865, "learning_rate": 1.8535357812171356e-05, "loss": 434.1384, "step": 9500 }, { "epoch": 0.18290044330759392, "grad_norm": 555.6372958533534, "learning_rate": 1.8532176276515538e-05, "loss": 421.5643, "step": 9510 }, { "epoch": 0.18309276764335375, "grad_norm": 522.7995245990104, "learning_rate": 1.8528991562764967e-05, "loss": 420.3086, "step": 9520 }, { "epoch": 0.18328509197911358, "grad_norm": 521.8334563410268, "learning_rate": 1.85258036721059e-05, "loss": 416.3376, "step": 9530 }, { "epoch": 0.1834774163148734, "grad_norm": 499.68224234408467, "learning_rate": 1.8522612605725777e-05, "loss": 426.5406, "step": 9540 }, { "epoch": 0.18366974065063324, "grad_norm": 578.4621884185445, "learning_rate": 1.8519418364813215e-05, "loss": 419.4123, "step": 9550 }, { "epoch": 0.18386206498639304, "grad_norm": 522.0543225762189, "learning_rate": 1.851622095055801e-05, "loss": 434.0053, "step": 9560 }, { "epoch": 0.18405438932215287, "grad_norm": 523.6593173386211, "learning_rate": 1.8513020364151155e-05, "loss": 419.1064, "step": 9570 }, { "epoch": 0.1842467136579127, "grad_norm": 662.5500637187648, "learning_rate": 1.850981660678481e-05, "loss": 418.0609, "step": 9580 }, { "epoch": 0.18443903799367253, "grad_norm": 529.6694191680093, "learning_rate": 1.8506609679652323e-05, "loss": 415.039, "step": 9590 }, { "epoch": 0.18463136232943236, "grad_norm": 572.3020842816497, "learning_rate": 1.8503399583948224e-05, "loss": 429.1895, "step": 9600 }, { "epoch": 0.1848236866651922, "grad_norm": 496.5845821703022, "learning_rate": 1.8500186320868215e-05, "loss": 408.0225, "step": 9610 }, { "epoch": 0.185016011000952, "grad_norm": 531.7513028622932, "learning_rate": 1.8496969891609186e-05, "loss": 434.5869, "step": 9620 }, { "epoch": 0.18520833533671183, "grad_norm": 589.8906599286073, "learning_rate": 1.8493750297369208e-05, "loss": 430.4186, "step": 9630 }, { "epoch": 0.18540065967247166, "grad_norm": 539.6848176097469, "learning_rate": 1.849052753934752e-05, "loss": 430.6536, "step": 9640 }, { "epoch": 0.1855929840082315, "grad_norm": 520.5519171859622, "learning_rate": 1.8487301618744552e-05, "loss": 431.9636, "step": 9650 }, { "epoch": 0.18578530834399132, "grad_norm": 526.9089236180882, "learning_rate": 1.84840725367619e-05, "loss": 409.1337, "step": 9660 }, { "epoch": 0.18597763267975112, "grad_norm": 549.8399229515857, "learning_rate": 1.8480840294602352e-05, "loss": 414.6718, "step": 9670 }, { "epoch": 0.18616995701551095, "grad_norm": 514.7280426238358, "learning_rate": 1.8477604893469857e-05, "loss": 429.3853, "step": 9680 }, { "epoch": 0.18636228135127078, "grad_norm": 558.315549484784, "learning_rate": 1.847436633456955e-05, "loss": 426.2568, "step": 9690 }, { "epoch": 0.1865546056870306, "grad_norm": 528.9386090871537, "learning_rate": 1.8471124619107744e-05, "loss": 410.7179, "step": 9700 }, { "epoch": 0.18674693002279044, "grad_norm": 497.6646727906393, "learning_rate": 1.846787974829192e-05, "loss": 413.8442, "step": 9710 }, { "epoch": 0.18693925435855027, "grad_norm": 569.0517040183468, "learning_rate": 1.8464631723330745e-05, "loss": 423.8359, "step": 9720 }, { "epoch": 0.18713157869431007, "grad_norm": 544.2586133501704, "learning_rate": 1.8461380545434054e-05, "loss": 406.8935, "step": 9730 }, { "epoch": 0.1873239030300699, "grad_norm": 554.8309689483496, "learning_rate": 1.8458126215812848e-05, "loss": 408.8487, "step": 9740 }, { "epoch": 0.18751622736582974, "grad_norm": 690.3802857522378, "learning_rate": 1.845486873567932e-05, "loss": 415.6544, "step": 9750 }, { "epoch": 0.18770855170158957, "grad_norm": 487.27608875891974, "learning_rate": 1.8451608106246822e-05, "loss": 408.5747, "step": 9760 }, { "epoch": 0.1879008760373494, "grad_norm": 1665.5502816082571, "learning_rate": 1.8448344328729893e-05, "loss": 424.0524, "step": 9770 }, { "epoch": 0.1880932003731092, "grad_norm": 523.0179811206775, "learning_rate": 1.8445077404344226e-05, "loss": 408.2082, "step": 9780 }, { "epoch": 0.18828552470886903, "grad_norm": 550.6570585819196, "learning_rate": 1.8441807334306702e-05, "loss": 415.0986, "step": 9790 }, { "epoch": 0.18847784904462886, "grad_norm": 519.3988019514969, "learning_rate": 1.8438534119835365e-05, "loss": 407.2675, "step": 9800 }, { "epoch": 0.1886701733803887, "grad_norm": 542.7050796549003, "learning_rate": 1.8435257762149436e-05, "loss": 420.9396, "step": 9810 }, { "epoch": 0.18886249771614852, "grad_norm": 500.71488346322286, "learning_rate": 1.8431978262469305e-05, "loss": 427.5086, "step": 9820 }, { "epoch": 0.18905482205190835, "grad_norm": 599.7988511294999, "learning_rate": 1.8428695622016532e-05, "loss": 410.2052, "step": 9830 }, { "epoch": 0.18924714638766815, "grad_norm": 529.0811037911369, "learning_rate": 1.8425409842013843e-05, "loss": 424.4892, "step": 9840 }, { "epoch": 0.18943947072342798, "grad_norm": 494.85889023281277, "learning_rate": 1.8422120923685135e-05, "loss": 406.3137, "step": 9850 }, { "epoch": 0.1896317950591878, "grad_norm": 528.8127401570667, "learning_rate": 1.8418828868255484e-05, "loss": 412.9516, "step": 9860 }, { "epoch": 0.18982411939494764, "grad_norm": 537.0267192602489, "learning_rate": 1.8415533676951117e-05, "loss": 421.6755, "step": 9870 }, { "epoch": 0.19001644373070747, "grad_norm": 513.3969136082288, "learning_rate": 1.8412235350999444e-05, "loss": 425.0687, "step": 9880 }, { "epoch": 0.1902087680664673, "grad_norm": 527.09258917363, "learning_rate": 1.840893389162903e-05, "loss": 445.8872, "step": 9890 }, { "epoch": 0.1904010924022271, "grad_norm": 500.25795635803814, "learning_rate": 1.8405629300069626e-05, "loss": 421.5463, "step": 9900 }, { "epoch": 0.19059341673798694, "grad_norm": 545.3130687777858, "learning_rate": 1.840232157755213e-05, "loss": 423.7159, "step": 9910 }, { "epoch": 0.19078574107374677, "grad_norm": 608.618059123915, "learning_rate": 1.8399010725308616e-05, "loss": 411.9484, "step": 9920 }, { "epoch": 0.1909780654095066, "grad_norm": 545.1218608516824, "learning_rate": 1.839569674457232e-05, "loss": 421.4794, "step": 9930 }, { "epoch": 0.19117038974526643, "grad_norm": 504.94712412278, "learning_rate": 1.8392379636577647e-05, "loss": 432.3682, "step": 9940 }, { "epoch": 0.19136271408102623, "grad_norm": 546.9329024442096, "learning_rate": 1.8389059402560165e-05, "loss": 423.1993, "step": 9950 }, { "epoch": 0.19155503841678606, "grad_norm": 502.32540284404547, "learning_rate": 1.8385736043756605e-05, "loss": 416.5917, "step": 9960 }, { "epoch": 0.1917473627525459, "grad_norm": 509.708538396133, "learning_rate": 1.838240956140486e-05, "loss": 421.7241, "step": 9970 }, { "epoch": 0.19193968708830572, "grad_norm": 525.1293879716845, "learning_rate": 1.8379079956743996e-05, "loss": 433.5008, "step": 9980 }, { "epoch": 0.19213201142406555, "grad_norm": 497.61637306609146, "learning_rate": 1.8375747231014233e-05, "loss": 418.8824, "step": 9990 }, { "epoch": 0.19232433575982538, "grad_norm": 572.406381802282, "learning_rate": 1.8372411385456956e-05, "loss": 420.2644, "step": 10000 }, { "epoch": 0.19251666009558518, "grad_norm": 536.8888580901031, "learning_rate": 1.8369072421314717e-05, "loss": 416.1535, "step": 10010 }, { "epoch": 0.19270898443134502, "grad_norm": 539.5311818166617, "learning_rate": 1.8365730339831212e-05, "loss": 410.9119, "step": 10020 }, { "epoch": 0.19290130876710485, "grad_norm": 523.245654348139, "learning_rate": 1.8362385142251328e-05, "loss": 414.0204, "step": 10030 }, { "epoch": 0.19309363310286468, "grad_norm": 536.474516444986, "learning_rate": 1.8359036829821085e-05, "loss": 410.5013, "step": 10040 }, { "epoch": 0.1932859574386245, "grad_norm": 515.0024761825331, "learning_rate": 1.8355685403787677e-05, "loss": 407.9228, "step": 10050 }, { "epoch": 0.1934782817743843, "grad_norm": 527.230463860649, "learning_rate": 1.8352330865399457e-05, "loss": 407.3625, "step": 10060 }, { "epoch": 0.19367060611014414, "grad_norm": 642.3091996399853, "learning_rate": 1.834897321590593e-05, "loss": 412.4108, "step": 10070 }, { "epoch": 0.19386293044590397, "grad_norm": 540.0323798323193, "learning_rate": 1.8345612456557767e-05, "loss": 403.2625, "step": 10080 }, { "epoch": 0.1940552547816638, "grad_norm": 575.5953061828302, "learning_rate": 1.83422485886068e-05, "loss": 406.2676, "step": 10090 }, { "epoch": 0.19424757911742363, "grad_norm": 656.8468315467643, "learning_rate": 1.833888161330601e-05, "loss": 428.1458, "step": 10100 }, { "epoch": 0.19443990345318346, "grad_norm": 576.5050128483706, "learning_rate": 1.833551153190954e-05, "loss": 411.1276, "step": 10110 }, { "epoch": 0.19463222778894326, "grad_norm": 566.5844914913063, "learning_rate": 1.8332138345672686e-05, "loss": 407.6996, "step": 10120 }, { "epoch": 0.1948245521247031, "grad_norm": 544.3805983252685, "learning_rate": 1.832876205585191e-05, "loss": 418.7956, "step": 10130 }, { "epoch": 0.19501687646046292, "grad_norm": 501.0720341320704, "learning_rate": 1.8325382663704826e-05, "loss": 412.451, "step": 10140 }, { "epoch": 0.19520920079622275, "grad_norm": 574.3306194291852, "learning_rate": 1.8322000170490194e-05, "loss": 404.8666, "step": 10150 }, { "epoch": 0.19540152513198258, "grad_norm": 544.7437684730154, "learning_rate": 1.831861457746794e-05, "loss": 415.9554, "step": 10160 }, { "epoch": 0.19559384946774241, "grad_norm": 529.9547074984308, "learning_rate": 1.8315225885899144e-05, "loss": 417.0008, "step": 10170 }, { "epoch": 0.19578617380350222, "grad_norm": 528.9189770261082, "learning_rate": 1.8311834097046038e-05, "loss": 406.9135, "step": 10180 }, { "epoch": 0.19597849813926205, "grad_norm": 548.3876544489895, "learning_rate": 1.8308439212172e-05, "loss": 417.8647, "step": 10190 }, { "epoch": 0.19617082247502188, "grad_norm": 557.4102901046656, "learning_rate": 1.830504123254158e-05, "loss": 425.4412, "step": 10200 }, { "epoch": 0.1963631468107817, "grad_norm": 521.4352191559834, "learning_rate": 1.830164015942046e-05, "loss": 417.0783, "step": 10210 }, { "epoch": 0.19655547114654154, "grad_norm": 1354.3653106645966, "learning_rate": 1.8298235994075488e-05, "loss": 408.7513, "step": 10220 }, { "epoch": 0.19674779548230134, "grad_norm": 502.4758460590438, "learning_rate": 1.829482873777466e-05, "loss": 423.2251, "step": 10230 }, { "epoch": 0.19694011981806117, "grad_norm": 1317.3278761813658, "learning_rate": 1.8291418391787116e-05, "loss": 426.1462, "step": 10240 }, { "epoch": 0.197132444153821, "grad_norm": 500.45854920060214, "learning_rate": 1.8288004957383162e-05, "loss": 414.126, "step": 10250 }, { "epoch": 0.19732476848958083, "grad_norm": 530.5593261671512, "learning_rate": 1.8284588435834242e-05, "loss": 415.1306, "step": 10260 }, { "epoch": 0.19751709282534066, "grad_norm": 519.9739315938061, "learning_rate": 1.828116882841295e-05, "loss": 416.7035, "step": 10270 }, { "epoch": 0.1977094171611005, "grad_norm": 524.8884270767219, "learning_rate": 1.8277746136393042e-05, "loss": 426.8112, "step": 10280 }, { "epoch": 0.1979017414968603, "grad_norm": 524.3284840384099, "learning_rate": 1.827432036104941e-05, "loss": 412.6839, "step": 10290 }, { "epoch": 0.19809406583262013, "grad_norm": 503.33586436995387, "learning_rate": 1.8270891503658096e-05, "loss": 412.2217, "step": 10300 }, { "epoch": 0.19828639016837996, "grad_norm": 546.0982813263964, "learning_rate": 1.8267459565496298e-05, "loss": 419.6713, "step": 10310 }, { "epoch": 0.19847871450413979, "grad_norm": 511.52602813136303, "learning_rate": 1.8264024547842346e-05, "loss": 413.6944, "step": 10320 }, { "epoch": 0.19867103883989962, "grad_norm": 518.0320879974444, "learning_rate": 1.8260586451975745e-05, "loss": 409.5191, "step": 10330 }, { "epoch": 0.19886336317565942, "grad_norm": 554.6757703597846, "learning_rate": 1.825714527917711e-05, "loss": 424.7147, "step": 10340 }, { "epoch": 0.19905568751141925, "grad_norm": 623.6783765698033, "learning_rate": 1.8253701030728235e-05, "loss": 418.4444, "step": 10350 }, { "epoch": 0.19924801184717908, "grad_norm": 515.3750985319824, "learning_rate": 1.8250253707912036e-05, "loss": 420.7123, "step": 10360 }, { "epoch": 0.1994403361829389, "grad_norm": 515.2598514122711, "learning_rate": 1.8246803312012593e-05, "loss": 401.7974, "step": 10370 }, { "epoch": 0.19963266051869874, "grad_norm": 519.8969995253459, "learning_rate": 1.8243349844315116e-05, "loss": 404.8949, "step": 10380 }, { "epoch": 0.19982498485445857, "grad_norm": 515.4214128174964, "learning_rate": 1.8239893306105966e-05, "loss": 406.2662, "step": 10390 }, { "epoch": 0.20001730919021837, "grad_norm": 486.61132653951995, "learning_rate": 1.823643369867264e-05, "loss": 407.4249, "step": 10400 }, { "epoch": 0.2002096335259782, "grad_norm": 474.6687855552454, "learning_rate": 1.8232971023303798e-05, "loss": 400.1477, "step": 10410 }, { "epoch": 0.20040195786173803, "grad_norm": 516.3606444909349, "learning_rate": 1.8229505281289216e-05, "loss": 421.7604, "step": 10420 }, { "epoch": 0.20059428219749786, "grad_norm": 492.3888246658609, "learning_rate": 1.8226036473919836e-05, "loss": 401.3217, "step": 10430 }, { "epoch": 0.2007866065332577, "grad_norm": 495.7204526571697, "learning_rate": 1.8222564602487724e-05, "loss": 412.7046, "step": 10440 }, { "epoch": 0.20097893086901752, "grad_norm": 510.8913180339026, "learning_rate": 1.82190896682861e-05, "loss": 410.9804, "step": 10450 }, { "epoch": 0.20117125520477733, "grad_norm": 523.1719450732343, "learning_rate": 1.8215611672609316e-05, "loss": 397.5099, "step": 10460 }, { "epoch": 0.20136357954053716, "grad_norm": 523.0320537596587, "learning_rate": 1.821213061675287e-05, "loss": 409.6579, "step": 10470 }, { "epoch": 0.201555903876297, "grad_norm": 503.92571864409314, "learning_rate": 1.8208646502013395e-05, "loss": 410.3252, "step": 10480 }, { "epoch": 0.20174822821205682, "grad_norm": 515.077737494529, "learning_rate": 1.820515932968867e-05, "loss": 410.6644, "step": 10490 }, { "epoch": 0.20194055254781665, "grad_norm": 531.9273618650429, "learning_rate": 1.8201669101077608e-05, "loss": 412.3847, "step": 10500 }, { "epoch": 0.20213287688357645, "grad_norm": 551.2360124120286, "learning_rate": 1.819817581748026e-05, "loss": 428.8473, "step": 10510 }, { "epoch": 0.20232520121933628, "grad_norm": 511.39960278942294, "learning_rate": 1.8194679480197817e-05, "loss": 409.6806, "step": 10520 }, { "epoch": 0.2025175255550961, "grad_norm": 534.2559471885919, "learning_rate": 1.8191180090532608e-05, "loss": 417.9859, "step": 10530 }, { "epoch": 0.20270984989085594, "grad_norm": 501.39575407441913, "learning_rate": 1.8187677649788097e-05, "loss": 406.1442, "step": 10540 }, { "epoch": 0.20290217422661577, "grad_norm": 528.6779947784553, "learning_rate": 1.8184172159268884e-05, "loss": 417.8737, "step": 10550 }, { "epoch": 0.2030944985623756, "grad_norm": 524.4025728183602, "learning_rate": 1.818066362028071e-05, "loss": 411.8988, "step": 10560 }, { "epoch": 0.2032868228981354, "grad_norm": 595.632420522306, "learning_rate": 1.8177152034130442e-05, "loss": 426.585, "step": 10570 }, { "epoch": 0.20347914723389524, "grad_norm": 510.89813590025693, "learning_rate": 1.8173637402126093e-05, "loss": 425.3784, "step": 10580 }, { "epoch": 0.20367147156965507, "grad_norm": 513.174119758378, "learning_rate": 1.8170119725576808e-05, "loss": 415.6281, "step": 10590 }, { "epoch": 0.2038637959054149, "grad_norm": 505.80519222164645, "learning_rate": 1.8166599005792852e-05, "loss": 410.2682, "step": 10600 }, { "epoch": 0.20405612024117473, "grad_norm": 544.6945237539323, "learning_rate": 1.816307524408565e-05, "loss": 404.582, "step": 10610 }, { "epoch": 0.20424844457693453, "grad_norm": 520.879276910382, "learning_rate": 1.8159548441767732e-05, "loss": 408.4701, "step": 10620 }, { "epoch": 0.20444076891269436, "grad_norm": 477.1172774106002, "learning_rate": 1.8156018600152777e-05, "loss": 412.6134, "step": 10630 }, { "epoch": 0.2046330932484542, "grad_norm": 604.3311480874454, "learning_rate": 1.81524857205556e-05, "loss": 416.6437, "step": 10640 }, { "epoch": 0.20482541758421402, "grad_norm": 554.351834134022, "learning_rate": 1.814894980429213e-05, "loss": 430.0104, "step": 10650 }, { "epoch": 0.20501774191997385, "grad_norm": 513.705109326578, "learning_rate": 1.8145410852679447e-05, "loss": 411.1, "step": 10660 }, { "epoch": 0.20521006625573368, "grad_norm": 512.0090545380464, "learning_rate": 1.8141868867035745e-05, "loss": 407.035, "step": 10670 }, { "epoch": 0.20540239059149348, "grad_norm": 535.4145618185576, "learning_rate": 1.8138323848680354e-05, "loss": 405.9104, "step": 10680 }, { "epoch": 0.2055947149272533, "grad_norm": 500.0198126110811, "learning_rate": 1.813477579893374e-05, "loss": 409.5342, "step": 10690 }, { "epoch": 0.20578703926301314, "grad_norm": 542.989652958391, "learning_rate": 1.8131224719117497e-05, "loss": 409.1924, "step": 10700 }, { "epoch": 0.20597936359877297, "grad_norm": 539.8265827173909, "learning_rate": 1.8127670610554332e-05, "loss": 412.5566, "step": 10710 }, { "epoch": 0.2061716879345328, "grad_norm": 516.3627271813201, "learning_rate": 1.81241134745681e-05, "loss": 405.9106, "step": 10720 }, { "epoch": 0.20636401227029263, "grad_norm": 537.7886605094616, "learning_rate": 1.812055331248377e-05, "loss": 418.2211, "step": 10730 }, { "epoch": 0.20655633660605244, "grad_norm": 531.4364475833352, "learning_rate": 1.811699012562745e-05, "loss": 414.2586, "step": 10740 }, { "epoch": 0.20674866094181227, "grad_norm": 538.2785709537496, "learning_rate": 1.8113423915326362e-05, "loss": 422.3838, "step": 10750 }, { "epoch": 0.2069409852775721, "grad_norm": 537.5690068034183, "learning_rate": 1.8109854682908864e-05, "loss": 421.5552, "step": 10760 }, { "epoch": 0.20713330961333193, "grad_norm": 561.8138977513785, "learning_rate": 1.8106282429704436e-05, "loss": 413.7626, "step": 10770 }, { "epoch": 0.20732563394909176, "grad_norm": 530.3932512048439, "learning_rate": 1.810270715704368e-05, "loss": 421.6035, "step": 10780 }, { "epoch": 0.20751795828485156, "grad_norm": 528.0991394333488, "learning_rate": 1.809912886625833e-05, "loss": 410.8647, "step": 10790 }, { "epoch": 0.2077102826206114, "grad_norm": 484.9974792580638, "learning_rate": 1.8095547558681243e-05, "loss": 426.7599, "step": 10800 }, { "epoch": 0.20790260695637122, "grad_norm": 542.5439490570113, "learning_rate": 1.809196323564639e-05, "loss": 423.1881, "step": 10810 }, { "epoch": 0.20809493129213105, "grad_norm": 498.19894917262286, "learning_rate": 1.8088375898488873e-05, "loss": 405.2069, "step": 10820 }, { "epoch": 0.20828725562789088, "grad_norm": 503.4427791105701, "learning_rate": 1.808478554854492e-05, "loss": 416.0496, "step": 10830 }, { "epoch": 0.2084795799636507, "grad_norm": 504.0248423442197, "learning_rate": 1.8081192187151873e-05, "loss": 409.1388, "step": 10840 }, { "epoch": 0.20867190429941052, "grad_norm": 507.40350465388576, "learning_rate": 1.8077595815648202e-05, "loss": 404.2461, "step": 10850 }, { "epoch": 0.20886422863517035, "grad_norm": 490.9277981461252, "learning_rate": 1.8073996435373494e-05, "loss": 413.7467, "step": 10860 }, { "epoch": 0.20905655297093018, "grad_norm": 531.0572375677967, "learning_rate": 1.8070394047668466e-05, "loss": 410.2942, "step": 10870 }, { "epoch": 0.20924887730669, "grad_norm": 502.7509631142382, "learning_rate": 1.8066788653874936e-05, "loss": 399.2792, "step": 10880 }, { "epoch": 0.20944120164244984, "grad_norm": 501.4967718909897, "learning_rate": 1.806318025533586e-05, "loss": 411.6022, "step": 10890 }, { "epoch": 0.20963352597820964, "grad_norm": 541.2113298892342, "learning_rate": 1.805956885339531e-05, "loss": 421.027, "step": 10900 }, { "epoch": 0.20982585031396947, "grad_norm": 554.7804161009711, "learning_rate": 1.8055954449398472e-05, "loss": 423.1687, "step": 10910 }, { "epoch": 0.2100181746497293, "grad_norm": 577.628870388658, "learning_rate": 1.8052337044691648e-05, "loss": 407.1125, "step": 10920 }, { "epoch": 0.21021049898548913, "grad_norm": 481.78927926055235, "learning_rate": 1.8048716640622262e-05, "loss": 415.8896, "step": 10930 }, { "epoch": 0.21040282332124896, "grad_norm": 516.7605948800522, "learning_rate": 1.8045093238538856e-05, "loss": 422.0905, "step": 10940 }, { "epoch": 0.2105951476570088, "grad_norm": 516.2539525082283, "learning_rate": 1.8041466839791087e-05, "loss": 410.3786, "step": 10950 }, { "epoch": 0.2107874719927686, "grad_norm": 493.1980218771841, "learning_rate": 1.8037837445729733e-05, "loss": 410.5156, "step": 10960 }, { "epoch": 0.21097979632852842, "grad_norm": 542.3396341658416, "learning_rate": 1.803420505770668e-05, "loss": 405.1088, "step": 10970 }, { "epoch": 0.21117212066428825, "grad_norm": 517.8850704321621, "learning_rate": 1.803056967707493e-05, "loss": 405.2751, "step": 10980 }, { "epoch": 0.21136444500004808, "grad_norm": 482.43393376057685, "learning_rate": 1.8026931305188603e-05, "loss": 416.6449, "step": 10990 }, { "epoch": 0.21155676933580791, "grad_norm": 497.11512978520756, "learning_rate": 1.802328994340294e-05, "loss": 415.077, "step": 11000 }, { "epoch": 0.21174909367156775, "grad_norm": 582.0300690596135, "learning_rate": 1.8019645593074275e-05, "loss": 409.7937, "step": 11010 }, { "epoch": 0.21194141800732755, "grad_norm": 504.7725007010852, "learning_rate": 1.8015998255560082e-05, "loss": 394.9882, "step": 11020 }, { "epoch": 0.21213374234308738, "grad_norm": 528.387777493482, "learning_rate": 1.801234793221892e-05, "loss": 400.3989, "step": 11030 }, { "epoch": 0.2123260666788472, "grad_norm": 467.06890144295636, "learning_rate": 1.800869462441049e-05, "loss": 386.9421, "step": 11040 }, { "epoch": 0.21251839101460704, "grad_norm": 510.04313871139175, "learning_rate": 1.8005038333495572e-05, "loss": 409.0464, "step": 11050 }, { "epoch": 0.21271071535036687, "grad_norm": 564.1915961993303, "learning_rate": 1.8001379060836088e-05, "loss": 410.9805, "step": 11060 }, { "epoch": 0.21290303968612667, "grad_norm": 495.65682272039265, "learning_rate": 1.7997716807795046e-05, "loss": 418.5792, "step": 11070 }, { "epoch": 0.2130953640218865, "grad_norm": 532.2225276470311, "learning_rate": 1.7994051575736585e-05, "loss": 402.7211, "step": 11080 }, { "epoch": 0.21328768835764633, "grad_norm": 460.54738570783405, "learning_rate": 1.7990383366025935e-05, "loss": 413.6778, "step": 11090 }, { "epoch": 0.21348001269340616, "grad_norm": 563.9381616777556, "learning_rate": 1.7986712180029448e-05, "loss": 402.2324, "step": 11100 }, { "epoch": 0.213672337029166, "grad_norm": 557.7393630089101, "learning_rate": 1.798303801911458e-05, "loss": 409.0407, "step": 11110 }, { "epoch": 0.21386466136492582, "grad_norm": 464.7240432785937, "learning_rate": 1.7979360884649894e-05, "loss": 406.1634, "step": 11120 }, { "epoch": 0.21405698570068563, "grad_norm": 519.6839412427632, "learning_rate": 1.7975680778005058e-05, "loss": 403.6662, "step": 11130 }, { "epoch": 0.21424931003644546, "grad_norm": 520.6865750830053, "learning_rate": 1.7971997700550856e-05, "loss": 412.701, "step": 11140 }, { "epoch": 0.2144416343722053, "grad_norm": 532.444937859847, "learning_rate": 1.7968311653659177e-05, "loss": 413.2475, "step": 11150 }, { "epoch": 0.21463395870796512, "grad_norm": 513.7990626397482, "learning_rate": 1.7964622638703003e-05, "loss": 414.746, "step": 11160 }, { "epoch": 0.21482628304372495, "grad_norm": 572.7112169973356, "learning_rate": 1.796093065705644e-05, "loss": 401.9031, "step": 11170 }, { "epoch": 0.21501860737948475, "grad_norm": 539.3257332781393, "learning_rate": 1.7957235710094686e-05, "loss": 394.6945, "step": 11180 }, { "epoch": 0.21521093171524458, "grad_norm": 497.9512755554518, "learning_rate": 1.7953537799194042e-05, "loss": 411.4992, "step": 11190 }, { "epoch": 0.2154032560510044, "grad_norm": 514.0453246141834, "learning_rate": 1.7949836925731934e-05, "loss": 421.7444, "step": 11200 }, { "epoch": 0.21559558038676424, "grad_norm": 511.29439032055143, "learning_rate": 1.7946133091086858e-05, "loss": 414.8357, "step": 11210 }, { "epoch": 0.21578790472252407, "grad_norm": 529.0137973053264, "learning_rate": 1.7942426296638447e-05, "loss": 411.2592, "step": 11220 }, { "epoch": 0.2159802290582839, "grad_norm": 486.62988226461977, "learning_rate": 1.7938716543767412e-05, "loss": 405.5414, "step": 11230 }, { "epoch": 0.2161725533940437, "grad_norm": 562.0868523718697, "learning_rate": 1.7935003833855576e-05, "loss": 416.3538, "step": 11240 }, { "epoch": 0.21636487772980353, "grad_norm": 536.2486147099664, "learning_rate": 1.7931288168285863e-05, "loss": 408.2126, "step": 11250 }, { "epoch": 0.21655720206556336, "grad_norm": 523.3446221690112, "learning_rate": 1.79275695484423e-05, "loss": 403.6002, "step": 11260 }, { "epoch": 0.2167495264013232, "grad_norm": 604.8486062384638, "learning_rate": 1.7923847975710003e-05, "loss": 410.7343, "step": 11270 }, { "epoch": 0.21694185073708303, "grad_norm": 524.8062386188335, "learning_rate": 1.7920123451475203e-05, "loss": 421.6781, "step": 11280 }, { "epoch": 0.21713417507284286, "grad_norm": 501.58986967916536, "learning_rate": 1.7916395977125227e-05, "loss": 402.3847, "step": 11290 }, { "epoch": 0.21732649940860266, "grad_norm": 497.1449132876496, "learning_rate": 1.7912665554048486e-05, "loss": 413.3773, "step": 11300 }, { "epoch": 0.2175188237443625, "grad_norm": 492.83356620960893, "learning_rate": 1.7908932183634515e-05, "loss": 407.8787, "step": 11310 }, { "epoch": 0.21771114808012232, "grad_norm": 493.8232105420126, "learning_rate": 1.790519586727392e-05, "loss": 413.0008, "step": 11320 }, { "epoch": 0.21790347241588215, "grad_norm": 511.52334341120667, "learning_rate": 1.790145660635843e-05, "loss": 409.4102, "step": 11330 }, { "epoch": 0.21809579675164198, "grad_norm": 485.98400907009767, "learning_rate": 1.7897714402280844e-05, "loss": 412.8913, "step": 11340 }, { "epoch": 0.21828812108740178, "grad_norm": 529.7334552106086, "learning_rate": 1.789396925643508e-05, "loss": 426.9078, "step": 11350 }, { "epoch": 0.2184804454231616, "grad_norm": 511.9475347544775, "learning_rate": 1.7890221170216144e-05, "loss": 404.4276, "step": 11360 }, { "epoch": 0.21867276975892144, "grad_norm": 493.1437768338328, "learning_rate": 1.788647014502013e-05, "loss": 401.8671, "step": 11370 }, { "epoch": 0.21886509409468127, "grad_norm": 528.4241937381482, "learning_rate": 1.7882716182244242e-05, "loss": 400.7938, "step": 11380 }, { "epoch": 0.2190574184304411, "grad_norm": 514.5513086963199, "learning_rate": 1.7878959283286758e-05, "loss": 404.7781, "step": 11390 }, { "epoch": 0.21924974276620093, "grad_norm": 572.177159998548, "learning_rate": 1.787519944954707e-05, "loss": 403.674, "step": 11400 }, { "epoch": 0.21944206710196074, "grad_norm": 587.3937210816053, "learning_rate": 1.7871436682425645e-05, "loss": 405.5933, "step": 11410 }, { "epoch": 0.21963439143772057, "grad_norm": 1165.6479538826209, "learning_rate": 1.786767098332406e-05, "loss": 413.9093, "step": 11420 }, { "epoch": 0.2198267157734804, "grad_norm": 478.1020254282917, "learning_rate": 1.7863902353644972e-05, "loss": 393.565, "step": 11430 }, { "epoch": 0.22001904010924023, "grad_norm": 492.4046983344083, "learning_rate": 1.7860130794792137e-05, "loss": 404.3131, "step": 11440 }, { "epoch": 0.22021136444500006, "grad_norm": 551.1213544422876, "learning_rate": 1.7856356308170394e-05, "loss": 413.0437, "step": 11450 }, { "epoch": 0.22040368878075986, "grad_norm": 578.7197861461077, "learning_rate": 1.7852578895185675e-05, "loss": 421.9933, "step": 11460 }, { "epoch": 0.2205960131165197, "grad_norm": 647.3976116167437, "learning_rate": 1.7848798557245008e-05, "loss": 413.7521, "step": 11470 }, { "epoch": 0.22078833745227952, "grad_norm": 535.4830253335278, "learning_rate": 1.7845015295756506e-05, "loss": 417.5281, "step": 11480 }, { "epoch": 0.22098066178803935, "grad_norm": 486.6803793001602, "learning_rate": 1.784122911212937e-05, "loss": 406.7004, "step": 11490 }, { "epoch": 0.22117298612379918, "grad_norm": 473.37143544185335, "learning_rate": 1.7837440007773895e-05, "loss": 401.1356, "step": 11500 }, { "epoch": 0.221365310459559, "grad_norm": 537.4054390682203, "learning_rate": 1.783364798410146e-05, "loss": 403.5251, "step": 11510 }, { "epoch": 0.22155763479531881, "grad_norm": 545.1805428427086, "learning_rate": 1.782985304252452e-05, "loss": 404.6236, "step": 11520 }, { "epoch": 0.22174995913107864, "grad_norm": 519.245572912035, "learning_rate": 1.7826055184456643e-05, "loss": 414.8062, "step": 11530 }, { "epoch": 0.22194228346683847, "grad_norm": 658.6581715472673, "learning_rate": 1.7822254411312455e-05, "loss": 411.0006, "step": 11540 }, { "epoch": 0.2221346078025983, "grad_norm": 505.36060857534454, "learning_rate": 1.781845072450769e-05, "loss": 399.6282, "step": 11550 }, { "epoch": 0.22232693213835814, "grad_norm": 628.4760229429728, "learning_rate": 1.7814644125459157e-05, "loss": 412.5159, "step": 11560 }, { "epoch": 0.22251925647411797, "grad_norm": 570.8506741190848, "learning_rate": 1.781083461558475e-05, "loss": 402.9419, "step": 11570 }, { "epoch": 0.22271158080987777, "grad_norm": 513.782891668989, "learning_rate": 1.7807022196303447e-05, "loss": 408.1303, "step": 11580 }, { "epoch": 0.2229039051456376, "grad_norm": 501.2028742366539, "learning_rate": 1.7803206869035318e-05, "loss": 403.4169, "step": 11590 }, { "epoch": 0.22309622948139743, "grad_norm": 519.7860185158811, "learning_rate": 1.7799388635201498e-05, "loss": 410.635, "step": 11600 }, { "epoch": 0.22328855381715726, "grad_norm": 471.10481959750166, "learning_rate": 1.7795567496224226e-05, "loss": 404.7676, "step": 11610 }, { "epoch": 0.2234808781529171, "grad_norm": 525.033983104948, "learning_rate": 1.7791743453526812e-05, "loss": 411.6758, "step": 11620 }, { "epoch": 0.2236732024886769, "grad_norm": 506.5371381669754, "learning_rate": 1.778791650853364e-05, "loss": 405.416, "step": 11630 }, { "epoch": 0.22386552682443672, "grad_norm": 542.2771407282709, "learning_rate": 1.77840866626702e-05, "loss": 402.695, "step": 11640 }, { "epoch": 0.22405785116019655, "grad_norm": 489.49733714748487, "learning_rate": 1.7780253917363026e-05, "loss": 407.5623, "step": 11650 }, { "epoch": 0.22425017549595638, "grad_norm": 537.2319933491199, "learning_rate": 1.7776418274039767e-05, "loss": 404.8456, "step": 11660 }, { "epoch": 0.2244424998317162, "grad_norm": 598.2556782268568, "learning_rate": 1.7772579734129136e-05, "loss": 411.4863, "step": 11670 }, { "epoch": 0.22463482416747604, "grad_norm": 475.40109318967745, "learning_rate": 1.776873829906092e-05, "loss": 405.2609, "step": 11680 }, { "epoch": 0.22482714850323585, "grad_norm": 580.8312358329363, "learning_rate": 1.776489397026599e-05, "loss": 422.9873, "step": 11690 }, { "epoch": 0.22501947283899568, "grad_norm": 541.0152834448922, "learning_rate": 1.7761046749176302e-05, "loss": 399.0734, "step": 11700 }, { "epoch": 0.2252117971747555, "grad_norm": 540.985137145699, "learning_rate": 1.7757196637224874e-05, "loss": 409.2649, "step": 11710 }, { "epoch": 0.22540412151051534, "grad_norm": 497.6752914536555, "learning_rate": 1.7753343635845817e-05, "loss": 402.5908, "step": 11720 }, { "epoch": 0.22559644584627517, "grad_norm": 490.4517266066059, "learning_rate": 1.7749487746474305e-05, "loss": 402.6892, "step": 11730 }, { "epoch": 0.22578877018203497, "grad_norm": 508.15890225317537, "learning_rate": 1.7745628970546592e-05, "loss": 408.5553, "step": 11740 }, { "epoch": 0.2259810945177948, "grad_norm": 496.42361433271526, "learning_rate": 1.774176730950001e-05, "loss": 388.7166, "step": 11750 }, { "epoch": 0.22617341885355463, "grad_norm": 500.959891259976, "learning_rate": 1.7737902764772967e-05, "loss": 417.2532, "step": 11760 }, { "epoch": 0.22636574318931446, "grad_norm": 626.9393137696261, "learning_rate": 1.773403533780494e-05, "loss": 402.5101, "step": 11770 }, { "epoch": 0.2265580675250743, "grad_norm": 491.34585223492417, "learning_rate": 1.7730165030036482e-05, "loss": 401.105, "step": 11780 }, { "epoch": 0.22675039186083412, "grad_norm": 529.215403286338, "learning_rate": 1.772629184290922e-05, "loss": 402.1561, "step": 11790 }, { "epoch": 0.22694271619659392, "grad_norm": 508.9339115369465, "learning_rate": 1.7722415777865845e-05, "loss": 395.756, "step": 11800 }, { "epoch": 0.22713504053235375, "grad_norm": 520.80420386598, "learning_rate": 1.771853683635014e-05, "loss": 402.6091, "step": 11810 }, { "epoch": 0.22732736486811359, "grad_norm": 476.04083255495846, "learning_rate": 1.7714655019806932e-05, "loss": 403.5805, "step": 11820 }, { "epoch": 0.22751968920387342, "grad_norm": 550.7913044470721, "learning_rate": 1.7710770329682145e-05, "loss": 416.7787, "step": 11830 }, { "epoch": 0.22771201353963325, "grad_norm": 505.5853126590449, "learning_rate": 1.770688276742276e-05, "loss": 409.7005, "step": 11840 }, { "epoch": 0.22790433787539308, "grad_norm": 515.014467614758, "learning_rate": 1.770299233447682e-05, "loss": 404.2626, "step": 11850 }, { "epoch": 0.22809666221115288, "grad_norm": 473.6618477989999, "learning_rate": 1.769909903229346e-05, "loss": 398.4137, "step": 11860 }, { "epoch": 0.2282889865469127, "grad_norm": 689.5768301005184, "learning_rate": 1.7695202862322863e-05, "loss": 403.3624, "step": 11870 }, { "epoch": 0.22848131088267254, "grad_norm": 513.5343481211617, "learning_rate": 1.769130382601629e-05, "loss": 410.9995, "step": 11880 }, { "epoch": 0.22867363521843237, "grad_norm": 537.7866624459691, "learning_rate": 1.768740192482607e-05, "loss": 408.0978, "step": 11890 }, { "epoch": 0.2288659595541922, "grad_norm": 519.0439904283717, "learning_rate": 1.7683497160205595e-05, "loss": 408.7737, "step": 11900 }, { "epoch": 0.229058283889952, "grad_norm": 520.000781685986, "learning_rate": 1.7679589533609323e-05, "loss": 412.1072, "step": 11910 }, { "epoch": 0.22925060822571183, "grad_norm": 504.07478428993613, "learning_rate": 1.767567904649278e-05, "loss": 418.4932, "step": 11920 }, { "epoch": 0.22944293256147166, "grad_norm": 548.4436125907348, "learning_rate": 1.7671765700312567e-05, "loss": 412.4563, "step": 11930 }, { "epoch": 0.2296352568972315, "grad_norm": 516.7824892769144, "learning_rate": 1.7667849496526327e-05, "loss": 399.2371, "step": 11940 }, { "epoch": 0.22982758123299132, "grad_norm": 548.6939758778841, "learning_rate": 1.7663930436592793e-05, "loss": 409.4398, "step": 11950 }, { "epoch": 0.23001990556875115, "grad_norm": 490.7281621447608, "learning_rate": 1.7660008521971744e-05, "loss": 405.1733, "step": 11960 }, { "epoch": 0.23021222990451096, "grad_norm": 511.16713007607234, "learning_rate": 1.765608375412403e-05, "loss": 403.498, "step": 11970 }, { "epoch": 0.2304045542402708, "grad_norm": 515.4528928202382, "learning_rate": 1.7652156134511563e-05, "loss": 410.0643, "step": 11980 }, { "epoch": 0.23059687857603062, "grad_norm": 479.16690893280617, "learning_rate": 1.7648225664597314e-05, "loss": 412.6704, "step": 11990 }, { "epoch": 0.23078920291179045, "grad_norm": 494.1166178415702, "learning_rate": 1.764429234584532e-05, "loss": 406.721, "step": 12000 }, { "epoch": 0.23098152724755028, "grad_norm": 482.23576240889537, "learning_rate": 1.7640356179720674e-05, "loss": 386.7119, "step": 12010 }, { "epoch": 0.23117385158331008, "grad_norm": 509.9384623017886, "learning_rate": 1.7636417167689538e-05, "loss": 411.914, "step": 12020 }, { "epoch": 0.2313661759190699, "grad_norm": 524.1668014236113, "learning_rate": 1.7632475311219125e-05, "loss": 394.6435, "step": 12030 }, { "epoch": 0.23155850025482974, "grad_norm": 518.9214490502344, "learning_rate": 1.7628530611777716e-05, "loss": 405.5806, "step": 12040 }, { "epoch": 0.23175082459058957, "grad_norm": 525.9921426863668, "learning_rate": 1.7624583070834646e-05, "loss": 410.241, "step": 12050 }, { "epoch": 0.2319431489263494, "grad_norm": 586.9930140832246, "learning_rate": 1.7620632689860298e-05, "loss": 410.2238, "step": 12060 }, { "epoch": 0.23213547326210923, "grad_norm": 535.9314428181268, "learning_rate": 1.761667947032614e-05, "loss": 414.5618, "step": 12070 }, { "epoch": 0.23232779759786903, "grad_norm": 493.13645952804507, "learning_rate": 1.761272341370467e-05, "loss": 400.5868, "step": 12080 }, { "epoch": 0.23252012193362886, "grad_norm": 502.0074543058723, "learning_rate": 1.7608764521469456e-05, "loss": 394.2096, "step": 12090 }, { "epoch": 0.2327124462693887, "grad_norm": 447.8403171622099, "learning_rate": 1.760480279509512e-05, "loss": 400.9692, "step": 12100 }, { "epoch": 0.23290477060514853, "grad_norm": 487.7660518837125, "learning_rate": 1.7600838236057342e-05, "loss": 408.061, "step": 12110 }, { "epoch": 0.23309709494090836, "grad_norm": 504.32252923341, "learning_rate": 1.759687084583285e-05, "loss": 400.5435, "step": 12120 }, { "epoch": 0.2332894192766682, "grad_norm": 521.4183192579749, "learning_rate": 1.7592900625899437e-05, "loss": 410.5279, "step": 12130 }, { "epoch": 0.233481743612428, "grad_norm": 520.2086183398725, "learning_rate": 1.7588927577735943e-05, "loss": 396.2346, "step": 12140 }, { "epoch": 0.23367406794818782, "grad_norm": 541.2095691095466, "learning_rate": 1.758495170282226e-05, "loss": 402.8773, "step": 12150 }, { "epoch": 0.23386639228394765, "grad_norm": 497.34174095448043, "learning_rate": 1.7580973002639337e-05, "loss": 409.8557, "step": 12160 }, { "epoch": 0.23405871661970748, "grad_norm": 515.004309869215, "learning_rate": 1.7576991478669174e-05, "loss": 403.8827, "step": 12170 }, { "epoch": 0.2342510409554673, "grad_norm": 490.6503805558587, "learning_rate": 1.7573007132394823e-05, "loss": 403.6582, "step": 12180 }, { "epoch": 0.2344433652912271, "grad_norm": 486.47122799114857, "learning_rate": 1.756901996530039e-05, "loss": 406.2761, "step": 12190 }, { "epoch": 0.23463568962698694, "grad_norm": 510.7373931463473, "learning_rate": 1.7565029978871025e-05, "loss": 409.7734, "step": 12200 }, { "epoch": 0.23482801396274677, "grad_norm": 503.7204131351527, "learning_rate": 1.7561037174592933e-05, "loss": 402.4177, "step": 12210 }, { "epoch": 0.2350203382985066, "grad_norm": 613.9987338779152, "learning_rate": 1.7557041553953368e-05, "loss": 396.2206, "step": 12220 }, { "epoch": 0.23521266263426643, "grad_norm": 497.95913507578257, "learning_rate": 1.7553043118440634e-05, "loss": 400.1966, "step": 12230 }, { "epoch": 0.23540498697002626, "grad_norm": 515.1885704157239, "learning_rate": 1.7549041869544077e-05, "loss": 401.2695, "step": 12240 }, { "epoch": 0.23559731130578607, "grad_norm": 526.6068757020573, "learning_rate": 1.7545037808754105e-05, "loss": 394.7766, "step": 12250 }, { "epoch": 0.2357896356415459, "grad_norm": 499.4230551838952, "learning_rate": 1.754103093756216e-05, "loss": 387.9822, "step": 12260 }, { "epoch": 0.23598195997730573, "grad_norm": 463.07631244890723, "learning_rate": 1.7537021257460732e-05, "loss": 398.1867, "step": 12270 }, { "epoch": 0.23617428431306556, "grad_norm": 490.5667042577999, "learning_rate": 1.7533008769943366e-05, "loss": 394.0945, "step": 12280 }, { "epoch": 0.2363666086488254, "grad_norm": 486.8045600759694, "learning_rate": 1.7528993476504644e-05, "loss": 398.946, "step": 12290 }, { "epoch": 0.2365589329845852, "grad_norm": 477.7223469608313, "learning_rate": 1.7524975378640198e-05, "loss": 399.5925, "step": 12300 }, { "epoch": 0.23675125732034502, "grad_norm": 490.00348840587196, "learning_rate": 1.75209544778467e-05, "loss": 400.93, "step": 12310 }, { "epoch": 0.23694358165610485, "grad_norm": 526.6182594372053, "learning_rate": 1.7516930775621873e-05, "loss": 417.465, "step": 12320 }, { "epoch": 0.23713590599186468, "grad_norm": 511.61066304254655, "learning_rate": 1.751290427346448e-05, "loss": 400.8351, "step": 12330 }, { "epoch": 0.2373282303276245, "grad_norm": 494.9780163791452, "learning_rate": 1.7508874972874325e-05, "loss": 408.4519, "step": 12340 }, { "epoch": 0.23752055466338434, "grad_norm": 546.2624513285988, "learning_rate": 1.7504842875352254e-05, "loss": 403.3538, "step": 12350 }, { "epoch": 0.23771287899914414, "grad_norm": 499.7649936159823, "learning_rate": 1.7500807982400155e-05, "loss": 400.1042, "step": 12360 }, { "epoch": 0.23790520333490398, "grad_norm": 483.472968093783, "learning_rate": 1.749677029552097e-05, "loss": 397.2355, "step": 12370 }, { "epoch": 0.2380975276706638, "grad_norm": 490.58246762273086, "learning_rate": 1.7492729816218656e-05, "loss": 404.0796, "step": 12380 }, { "epoch": 0.23828985200642364, "grad_norm": 512.0959230228034, "learning_rate": 1.7488686545998237e-05, "loss": 392.8472, "step": 12390 }, { "epoch": 0.23848217634218347, "grad_norm": 577.4452246167682, "learning_rate": 1.7484640486365757e-05, "loss": 401.7335, "step": 12400 }, { "epoch": 0.2386745006779433, "grad_norm": 558.3170516246616, "learning_rate": 1.748059163882831e-05, "loss": 406.5239, "step": 12410 }, { "epoch": 0.2388668250137031, "grad_norm": 505.0730131020004, "learning_rate": 1.747654000489402e-05, "loss": 399.255, "step": 12420 }, { "epoch": 0.23905914934946293, "grad_norm": 482.447998986777, "learning_rate": 1.7472485586072062e-05, "loss": 397.8921, "step": 12430 }, { "epoch": 0.23925147368522276, "grad_norm": 527.1088462627331, "learning_rate": 1.746842838387264e-05, "loss": 393.3306, "step": 12440 }, { "epoch": 0.2394437980209826, "grad_norm": 545.7977664145235, "learning_rate": 1.746436839980698e-05, "loss": 402.0523, "step": 12450 }, { "epoch": 0.23963612235674242, "grad_norm": 476.520326166508, "learning_rate": 1.7460305635387372e-05, "loss": 407.1924, "step": 12460 }, { "epoch": 0.23982844669250222, "grad_norm": 535.8142212509558, "learning_rate": 1.745624009212713e-05, "loss": 402.2665, "step": 12470 }, { "epoch": 0.24002077102826205, "grad_norm": 523.9774108562103, "learning_rate": 1.7452171771540593e-05, "loss": 406.2526, "step": 12480 }, { "epoch": 0.24021309536402188, "grad_norm": 518.210626682501, "learning_rate": 1.744810067514315e-05, "loss": 404.5391, "step": 12490 }, { "epoch": 0.24040541969978171, "grad_norm": 464.7851437183551, "learning_rate": 1.7444026804451214e-05, "loss": 401.0928, "step": 12500 }, { "epoch": 0.24059774403554154, "grad_norm": 511.1698751849852, "learning_rate": 1.7439950160982236e-05, "loss": 398.2425, "step": 12510 }, { "epoch": 0.24079006837130137, "grad_norm": 494.84867844615843, "learning_rate": 1.7435870746254697e-05, "loss": 399.8055, "step": 12520 }, { "epoch": 0.24098239270706118, "grad_norm": 489.06260400465635, "learning_rate": 1.7431788561788116e-05, "loss": 397.8201, "step": 12530 }, { "epoch": 0.241174717042821, "grad_norm": 460.25957431996915, "learning_rate": 1.742770360910303e-05, "loss": 409.0396, "step": 12540 }, { "epoch": 0.24136704137858084, "grad_norm": 471.6208125880307, "learning_rate": 1.7423615889721027e-05, "loss": 395.8972, "step": 12550 }, { "epoch": 0.24155936571434067, "grad_norm": 485.304085396627, "learning_rate": 1.7419525405164705e-05, "loss": 411.7617, "step": 12560 }, { "epoch": 0.2417516900501005, "grad_norm": 478.4964002415521, "learning_rate": 1.7415432156957713e-05, "loss": 403.1268, "step": 12570 }, { "epoch": 0.24194401438586033, "grad_norm": 542.8550894111927, "learning_rate": 1.741133614662471e-05, "loss": 403.8331, "step": 12580 }, { "epoch": 0.24213633872162013, "grad_norm": 523.1138850783602, "learning_rate": 1.7407237375691394e-05, "loss": 414.3643, "step": 12590 }, { "epoch": 0.24232866305737996, "grad_norm": 524.4502760690234, "learning_rate": 1.740313584568449e-05, "loss": 395.4663, "step": 12600 }, { "epoch": 0.2425209873931398, "grad_norm": 478.83937448034806, "learning_rate": 1.739903155813175e-05, "loss": 409.1269, "step": 12610 }, { "epoch": 0.24271331172889962, "grad_norm": 506.720439151921, "learning_rate": 1.7394924514561955e-05, "loss": 403.15, "step": 12620 }, { "epoch": 0.24290563606465945, "grad_norm": 453.23440418633027, "learning_rate": 1.739081471650491e-05, "loss": 394.0672, "step": 12630 }, { "epoch": 0.24309796040041926, "grad_norm": 489.07783280922706, "learning_rate": 1.7386702165491443e-05, "loss": 409.9712, "step": 12640 }, { "epoch": 0.24329028473617909, "grad_norm": 520.0588970362297, "learning_rate": 1.738258686305342e-05, "loss": 396.5729, "step": 12650 }, { "epoch": 0.24348260907193892, "grad_norm": 549.5339785495672, "learning_rate": 1.7378468810723713e-05, "loss": 407.8526, "step": 12660 }, { "epoch": 0.24367493340769875, "grad_norm": 500.9440479359089, "learning_rate": 1.7374348010036235e-05, "loss": 405.7856, "step": 12670 }, { "epoch": 0.24386725774345858, "grad_norm": 493.83052897723354, "learning_rate": 1.7370224462525916e-05, "loss": 401.2475, "step": 12680 }, { "epoch": 0.2440595820792184, "grad_norm": 550.5896558027431, "learning_rate": 1.736609816972871e-05, "loss": 404.3938, "step": 12690 }, { "epoch": 0.2442519064149782, "grad_norm": 493.96843272790596, "learning_rate": 1.7361969133181585e-05, "loss": 385.9142, "step": 12700 }, { "epoch": 0.24444423075073804, "grad_norm": 540.9977871999351, "learning_rate": 1.735783735442255e-05, "loss": 403.5269, "step": 12710 }, { "epoch": 0.24463655508649787, "grad_norm": 510.8629280468882, "learning_rate": 1.7353702834990617e-05, "loss": 425.3035, "step": 12720 }, { "epoch": 0.2448288794222577, "grad_norm": 544.1964570733115, "learning_rate": 1.7349565576425828e-05, "loss": 386.6944, "step": 12730 }, { "epoch": 0.24502120375801753, "grad_norm": 516.9311598070396, "learning_rate": 1.7345425580269245e-05, "loss": 403.9177, "step": 12740 }, { "epoch": 0.24521352809377733, "grad_norm": 539.7679779782113, "learning_rate": 1.734128284806295e-05, "loss": 397.8656, "step": 12750 }, { "epoch": 0.24540585242953716, "grad_norm": 514.0789548166919, "learning_rate": 1.7337137381350033e-05, "loss": 397.7061, "step": 12760 }, { "epoch": 0.245598176765297, "grad_norm": 503.9537545664038, "learning_rate": 1.7332989181674623e-05, "loss": 386.2186, "step": 12770 }, { "epoch": 0.24579050110105682, "grad_norm": 517.8654270094286, "learning_rate": 1.7328838250581846e-05, "loss": 395.1542, "step": 12780 }, { "epoch": 0.24598282543681665, "grad_norm": 512.873321453811, "learning_rate": 1.7324684589617862e-05, "loss": 394.3579, "step": 12790 }, { "epoch": 0.24617514977257648, "grad_norm": 582.1269460735923, "learning_rate": 1.7320528200329846e-05, "loss": 403.6785, "step": 12800 }, { "epoch": 0.2463674741083363, "grad_norm": 548.9962404996943, "learning_rate": 1.7316369084265973e-05, "loss": 390.926, "step": 12810 }, { "epoch": 0.24655979844409612, "grad_norm": 555.163049282594, "learning_rate": 1.731220724297545e-05, "loss": 386.4555, "step": 12820 }, { "epoch": 0.24675212277985595, "grad_norm": 631.6775856350101, "learning_rate": 1.730804267800849e-05, "loss": 401.4694, "step": 12830 }, { "epoch": 0.24694444711561578, "grad_norm": 602.7890327694288, "learning_rate": 1.7303875390916338e-05, "loss": 388.7352, "step": 12840 }, { "epoch": 0.2471367714513756, "grad_norm": 526.4574971592747, "learning_rate": 1.729970538325122e-05, "loss": 389.842, "step": 12850 }, { "epoch": 0.24732909578713544, "grad_norm": 529.7133168402188, "learning_rate": 1.7295532656566413e-05, "loss": 405.9639, "step": 12860 }, { "epoch": 0.24752142012289524, "grad_norm": 507.10701570307333, "learning_rate": 1.729135721241618e-05, "loss": 392.3189, "step": 12870 }, { "epoch": 0.24771374445865507, "grad_norm": 519.7929294037986, "learning_rate": 1.7287179052355803e-05, "loss": 407.6865, "step": 12880 }, { "epoch": 0.2479060687944149, "grad_norm": 545.1179594167253, "learning_rate": 1.7282998177941586e-05, "loss": 410.6187, "step": 12890 }, { "epoch": 0.24809839313017473, "grad_norm": 479.04999404581645, "learning_rate": 1.7278814590730826e-05, "loss": 393.8491, "step": 12900 }, { "epoch": 0.24829071746593456, "grad_norm": 488.16137893157065, "learning_rate": 1.7274628292281846e-05, "loss": 392.4036, "step": 12910 }, { "epoch": 0.24848304180169437, "grad_norm": 491.38506954197476, "learning_rate": 1.727043928415397e-05, "loss": 396.4322, "step": 12920 }, { "epoch": 0.2486753661374542, "grad_norm": 500.6580624908905, "learning_rate": 1.726624756790754e-05, "loss": 395.8262, "step": 12930 }, { "epoch": 0.24886769047321403, "grad_norm": 481.581111220438, "learning_rate": 1.7262053145103893e-05, "loss": 405.2584, "step": 12940 }, { "epoch": 0.24906001480897386, "grad_norm": 559.6000491084448, "learning_rate": 1.7257856017305387e-05, "loss": 402.569, "step": 12950 }, { "epoch": 0.2492523391447337, "grad_norm": 523.9419639673177, "learning_rate": 1.725365618607538e-05, "loss": 393.6709, "step": 12960 }, { "epoch": 0.24944466348049352, "grad_norm": 484.91979811737974, "learning_rate": 1.7249453652978244e-05, "loss": 394.9398, "step": 12970 }, { "epoch": 0.24963698781625332, "grad_norm": 458.8732700503137, "learning_rate": 1.7245248419579353e-05, "loss": 404.7233, "step": 12980 }, { "epoch": 0.24982931215201315, "grad_norm": 577.4922009839557, "learning_rate": 1.7241040487445082e-05, "loss": 392.0792, "step": 12990 }, { "epoch": 0.250021636487773, "grad_norm": 478.1826052259454, "learning_rate": 1.723682985814282e-05, "loss": 394.2405, "step": 13000 }, { "epoch": 0.2502139608235328, "grad_norm": 531.693456444788, "learning_rate": 1.7232616533240958e-05, "loss": 399.8074, "step": 13010 }, { "epoch": 0.25040628515929264, "grad_norm": 522.370861953215, "learning_rate": 1.7228400514308884e-05, "loss": 404.5146, "step": 13020 }, { "epoch": 0.25059860949505247, "grad_norm": 538.5987587828238, "learning_rate": 1.7224181802917003e-05, "loss": 393.8064, "step": 13030 }, { "epoch": 0.2507909338308123, "grad_norm": 488.79228764564897, "learning_rate": 1.721996040063671e-05, "loss": 401.6155, "step": 13040 }, { "epoch": 0.25098325816657213, "grad_norm": 575.877792016825, "learning_rate": 1.7215736309040408e-05, "loss": 394.1593, "step": 13050 }, { "epoch": 0.2511755825023319, "grad_norm": 573.6376108302794, "learning_rate": 1.7211509529701507e-05, "loss": 394.0724, "step": 13060 }, { "epoch": 0.25136790683809174, "grad_norm": 502.46104757318847, "learning_rate": 1.7207280064194403e-05, "loss": 400.841, "step": 13070 }, { "epoch": 0.25156023117385157, "grad_norm": 593.6212569720309, "learning_rate": 1.7203047914094514e-05, "loss": 392.9266, "step": 13080 }, { "epoch": 0.2517525555096114, "grad_norm": 534.8493134328581, "learning_rate": 1.7198813080978235e-05, "loss": 398.4978, "step": 13090 }, { "epoch": 0.25194487984537123, "grad_norm": 508.50000822439955, "learning_rate": 1.7194575566422975e-05, "loss": 402.9541, "step": 13100 }, { "epoch": 0.25213720418113106, "grad_norm": 503.46795981620033, "learning_rate": 1.719033537200714e-05, "loss": 387.8704, "step": 13110 }, { "epoch": 0.2523295285168909, "grad_norm": 552.8418782386735, "learning_rate": 1.7186092499310133e-05, "loss": 400.2994, "step": 13120 }, { "epoch": 0.2525218528526507, "grad_norm": 490.59823005418565, "learning_rate": 1.7181846949912347e-05, "loss": 394.2668, "step": 13130 }, { "epoch": 0.25271417718841055, "grad_norm": 456.0405912709396, "learning_rate": 1.717759872539519e-05, "loss": 387.0123, "step": 13140 }, { "epoch": 0.2529065015241704, "grad_norm": 527.3645399737716, "learning_rate": 1.7173347827341046e-05, "loss": 391.4127, "step": 13150 }, { "epoch": 0.2530988258599302, "grad_norm": 500.5945889224094, "learning_rate": 1.7169094257333307e-05, "loss": 407.2625, "step": 13160 }, { "epoch": 0.25329115019569, "grad_norm": 560.8620724587252, "learning_rate": 1.716483801695636e-05, "loss": 402.7937, "step": 13170 }, { "epoch": 0.2534834745314498, "grad_norm": 515.6376667044018, "learning_rate": 1.7160579107795587e-05, "loss": 402.5097, "step": 13180 }, { "epoch": 0.25367579886720965, "grad_norm": 495.8000810614592, "learning_rate": 1.715631753143735e-05, "loss": 390.1797, "step": 13190 }, { "epoch": 0.2538681232029695, "grad_norm": 506.2588397467559, "learning_rate": 1.715205328946903e-05, "loss": 406.007, "step": 13200 }, { "epoch": 0.2540604475387293, "grad_norm": 486.92849925236, "learning_rate": 1.7147786383478978e-05, "loss": 393.3736, "step": 13210 }, { "epoch": 0.25425277187448914, "grad_norm": 472.11951109854493, "learning_rate": 1.7143516815056545e-05, "loss": 405.0434, "step": 13220 }, { "epoch": 0.25444509621024897, "grad_norm": 485.4528015488731, "learning_rate": 1.713924458579208e-05, "loss": 413.6899, "step": 13230 }, { "epoch": 0.2546374205460088, "grad_norm": 464.97778065736014, "learning_rate": 1.7134969697276912e-05, "loss": 391.6427, "step": 13240 }, { "epoch": 0.2548297448817686, "grad_norm": 518.6565357349871, "learning_rate": 1.7130692151103373e-05, "loss": 391.0698, "step": 13250 }, { "epoch": 0.25502206921752846, "grad_norm": 520.9885861519131, "learning_rate": 1.7126411948864776e-05, "loss": 394.4865, "step": 13260 }, { "epoch": 0.2552143935532883, "grad_norm": 538.6008653690043, "learning_rate": 1.7122129092155422e-05, "loss": 409.9459, "step": 13270 }, { "epoch": 0.2554067178890481, "grad_norm": 523.6484876908094, "learning_rate": 1.7117843582570608e-05, "loss": 394.9764, "step": 13280 }, { "epoch": 0.2555990422248079, "grad_norm": 500.8952287163879, "learning_rate": 1.711355542170661e-05, "loss": 382.9939, "step": 13290 }, { "epoch": 0.2557913665605677, "grad_norm": 506.43507480449324, "learning_rate": 1.710926461116071e-05, "loss": 385.2951, "step": 13300 }, { "epoch": 0.25598369089632755, "grad_norm": 465.0683375799653, "learning_rate": 1.710497115253115e-05, "loss": 397.325, "step": 13310 }, { "epoch": 0.2561760152320874, "grad_norm": 479.49249798039165, "learning_rate": 1.7100675047417178e-05, "loss": 394.8781, "step": 13320 }, { "epoch": 0.2563683395678472, "grad_norm": 491.94065035437836, "learning_rate": 1.7096376297419027e-05, "loss": 395.2518, "step": 13330 }, { "epoch": 0.25656066390360704, "grad_norm": 482.79102308649334, "learning_rate": 1.70920749041379e-05, "loss": 399.4447, "step": 13340 }, { "epoch": 0.2567529882393669, "grad_norm": 501.13073800526644, "learning_rate": 1.7087770869176005e-05, "loss": 393.9273, "step": 13350 }, { "epoch": 0.2569453125751267, "grad_norm": 487.66926214288037, "learning_rate": 1.7083464194136517e-05, "loss": 394.5412, "step": 13360 }, { "epoch": 0.25713763691088654, "grad_norm": 501.87717967736893, "learning_rate": 1.707915488062361e-05, "loss": 397.7266, "step": 13370 }, { "epoch": 0.25732996124664637, "grad_norm": 476.5338352256334, "learning_rate": 1.7074842930242418e-05, "loss": 391.353, "step": 13380 }, { "epoch": 0.2575222855824062, "grad_norm": 498.2845587910708, "learning_rate": 1.7070528344599083e-05, "loss": 396.7885, "step": 13390 }, { "epoch": 0.25771460991816597, "grad_norm": 461.0064445443026, "learning_rate": 1.7066211125300713e-05, "loss": 386.897, "step": 13400 }, { "epoch": 0.2579069342539258, "grad_norm": 560.8637582656743, "learning_rate": 1.70618912739554e-05, "loss": 386.8501, "step": 13410 }, { "epoch": 0.25809925858968563, "grad_norm": 469.4646679721641, "learning_rate": 1.705756879217222e-05, "loss": 393.7109, "step": 13420 }, { "epoch": 0.25829158292544546, "grad_norm": 511.92778143924806, "learning_rate": 1.7053243681561225e-05, "loss": 410.817, "step": 13430 }, { "epoch": 0.2584839072612053, "grad_norm": 528.1499790472774, "learning_rate": 1.7048915943733444e-05, "loss": 398.5653, "step": 13440 }, { "epoch": 0.2586762315969651, "grad_norm": 462.0393087196226, "learning_rate": 1.704458558030089e-05, "loss": 391.2169, "step": 13450 }, { "epoch": 0.25886855593272495, "grad_norm": 523.8683383069764, "learning_rate": 1.704025259287656e-05, "loss": 381.1235, "step": 13460 }, { "epoch": 0.2590608802684848, "grad_norm": 513.565424708402, "learning_rate": 1.7035916983074405e-05, "loss": 396.7118, "step": 13470 }, { "epoch": 0.2592532046042446, "grad_norm": 501.4891746489902, "learning_rate": 1.7031578752509377e-05, "loss": 408.4267, "step": 13480 }, { "epoch": 0.25944552894000444, "grad_norm": 487.54437565653166, "learning_rate": 1.70272379027974e-05, "loss": 384.9325, "step": 13490 }, { "epoch": 0.2596378532757643, "grad_norm": 564.2934710791556, "learning_rate": 1.7022894435555356e-05, "loss": 394.9, "step": 13500 }, { "epoch": 0.25983017761152405, "grad_norm": 539.3788348620219, "learning_rate": 1.7018548352401123e-05, "loss": 393.6208, "step": 13510 }, { "epoch": 0.2600225019472839, "grad_norm": 473.30599910217467, "learning_rate": 1.7014199654953543e-05, "loss": 389.3117, "step": 13520 }, { "epoch": 0.2602148262830437, "grad_norm": 515.5798223507278, "learning_rate": 1.700984834483244e-05, "loss": 392.133, "step": 13530 }, { "epoch": 0.26040715061880354, "grad_norm": 504.6545423291964, "learning_rate": 1.7005494423658598e-05, "loss": 395.9388, "step": 13540 }, { "epoch": 0.26059947495456337, "grad_norm": 483.3783504290054, "learning_rate": 1.7001137893053782e-05, "loss": 385.5046, "step": 13550 }, { "epoch": 0.2607917992903232, "grad_norm": 490.27407806206344, "learning_rate": 1.6996778754640727e-05, "loss": 389.0065, "step": 13560 }, { "epoch": 0.26098412362608303, "grad_norm": 589.0909621649263, "learning_rate": 1.6992417010043144e-05, "loss": 392.4468, "step": 13570 }, { "epoch": 0.26117644796184286, "grad_norm": 520.0976561948931, "learning_rate": 1.6988052660885707e-05, "loss": 390.2126, "step": 13580 }, { "epoch": 0.2613687722976027, "grad_norm": 463.3302057527136, "learning_rate": 1.6983685708794064e-05, "loss": 386.2181, "step": 13590 }, { "epoch": 0.2615610966333625, "grad_norm": 477.1543438658674, "learning_rate": 1.6979316155394834e-05, "loss": 393.59, "step": 13600 }, { "epoch": 0.26175342096912235, "grad_norm": 496.3409429170343, "learning_rate": 1.6974944002315605e-05, "loss": 388.0618, "step": 13610 }, { "epoch": 0.2619457453048821, "grad_norm": 497.3053983864858, "learning_rate": 1.697056925118493e-05, "loss": 390.2847, "step": 13620 }, { "epoch": 0.26213806964064196, "grad_norm": 461.43407013193405, "learning_rate": 1.696619190363233e-05, "loss": 404.1869, "step": 13630 }, { "epoch": 0.2623303939764018, "grad_norm": 506.0156931045527, "learning_rate": 1.69618119612883e-05, "loss": 386.771, "step": 13640 }, { "epoch": 0.2625227183121616, "grad_norm": 487.07559184360855, "learning_rate": 1.695742942578429e-05, "loss": 393.2927, "step": 13650 }, { "epoch": 0.26271504264792145, "grad_norm": 469.9499839843871, "learning_rate": 1.6953044298752724e-05, "loss": 389.4216, "step": 13660 }, { "epoch": 0.2629073669836813, "grad_norm": 538.5546733998156, "learning_rate": 1.694865658182699e-05, "loss": 410.8956, "step": 13670 }, { "epoch": 0.2630996913194411, "grad_norm": 476.89414366245006, "learning_rate": 1.6944266276641442e-05, "loss": 393.5947, "step": 13680 }, { "epoch": 0.26329201565520094, "grad_norm": 501.89278804852927, "learning_rate": 1.6939873384831394e-05, "loss": 385.8564, "step": 13690 }, { "epoch": 0.26348433999096077, "grad_norm": 456.58366158960615, "learning_rate": 1.6935477908033124e-05, "loss": 397.3424, "step": 13700 }, { "epoch": 0.2636766643267206, "grad_norm": 485.9892859826436, "learning_rate": 1.6931079847883877e-05, "loss": 395.0168, "step": 13710 }, { "epoch": 0.26386898866248043, "grad_norm": 515.9973931537564, "learning_rate": 1.692667920602186e-05, "loss": 395.985, "step": 13720 }, { "epoch": 0.2640613129982402, "grad_norm": 497.03522776962814, "learning_rate": 1.6922275984086233e-05, "loss": 402.1692, "step": 13730 }, { "epoch": 0.26425363733400004, "grad_norm": 564.9704882150211, "learning_rate": 1.691787018371713e-05, "loss": 391.1026, "step": 13740 }, { "epoch": 0.26444596166975987, "grad_norm": 454.8470592230021, "learning_rate": 1.6913461806555635e-05, "loss": 401.463, "step": 13750 }, { "epoch": 0.2646382860055197, "grad_norm": 477.6611023031933, "learning_rate": 1.6909050854243797e-05, "loss": 395.7901, "step": 13760 }, { "epoch": 0.2648306103412795, "grad_norm": 486.2583210183533, "learning_rate": 1.690463732842462e-05, "loss": 392.4859, "step": 13770 }, { "epoch": 0.26502293467703936, "grad_norm": 482.2059275168781, "learning_rate": 1.6900221230742073e-05, "loss": 397.4946, "step": 13780 }, { "epoch": 0.2652152590127992, "grad_norm": 515.6381207804257, "learning_rate": 1.689580256284108e-05, "loss": 406.7184, "step": 13790 }, { "epoch": 0.265407583348559, "grad_norm": 492.64407558052153, "learning_rate": 1.689138132636752e-05, "loss": 391.2247, "step": 13800 }, { "epoch": 0.26559990768431885, "grad_norm": 482.2207179184786, "learning_rate": 1.688695752296823e-05, "loss": 387.1333, "step": 13810 }, { "epoch": 0.2657922320200787, "grad_norm": 496.6269979700899, "learning_rate": 1.6882531154291007e-05, "loss": 391.7797, "step": 13820 }, { "epoch": 0.2659845563558385, "grad_norm": 494.5378802620604, "learning_rate": 1.6878102221984593e-05, "loss": 402.8014, "step": 13830 }, { "epoch": 0.26617688069159834, "grad_norm": 518.8220787544024, "learning_rate": 1.6873670727698702e-05, "loss": 405.8197, "step": 13840 }, { "epoch": 0.2663692050273581, "grad_norm": 437.1255653537561, "learning_rate": 1.686923667308398e-05, "loss": 396.0104, "step": 13850 }, { "epoch": 0.26656152936311794, "grad_norm": 489.23596809080226, "learning_rate": 1.6864800059792057e-05, "loss": 393.2639, "step": 13860 }, { "epoch": 0.2667538536988778, "grad_norm": 479.96032692242295, "learning_rate": 1.686036088947548e-05, "loss": 415.1752, "step": 13870 }, { "epoch": 0.2669461780346376, "grad_norm": 531.1733812480209, "learning_rate": 1.6855919163787777e-05, "loss": 401.2976, "step": 13880 }, { "epoch": 0.26713850237039743, "grad_norm": 475.35689357547085, "learning_rate": 1.6851474884383416e-05, "loss": 387.0333, "step": 13890 }, { "epoch": 0.26733082670615727, "grad_norm": 491.80161008614084, "learning_rate": 1.6847028052917814e-05, "loss": 393.2367, "step": 13900 }, { "epoch": 0.2675231510419171, "grad_norm": 573.1466331439958, "learning_rate": 1.6842578671047345e-05, "loss": 386.1845, "step": 13910 }, { "epoch": 0.2677154753776769, "grad_norm": 493.13460555719877, "learning_rate": 1.683812674042933e-05, "loss": 379.9144, "step": 13920 }, { "epoch": 0.26790779971343676, "grad_norm": 462.4759753129118, "learning_rate": 1.683367226272204e-05, "loss": 390.1884, "step": 13930 }, { "epoch": 0.2681001240491966, "grad_norm": 466.18931766774114, "learning_rate": 1.6829215239584695e-05, "loss": 393.8679, "step": 13940 }, { "epoch": 0.2682924483849564, "grad_norm": 491.7159484394516, "learning_rate": 1.6824755672677458e-05, "loss": 393.7705, "step": 13950 }, { "epoch": 0.2684847727207162, "grad_norm": 588.9248694701199, "learning_rate": 1.682029356366145e-05, "loss": 393.9963, "step": 13960 }, { "epoch": 0.268677097056476, "grad_norm": 501.10526433530686, "learning_rate": 1.6815828914198732e-05, "loss": 377.7531, "step": 13970 }, { "epoch": 0.26886942139223585, "grad_norm": 523.2026065633, "learning_rate": 1.6811361725952308e-05, "loss": 405.5974, "step": 13980 }, { "epoch": 0.2690617457279957, "grad_norm": 506.67394583214536, "learning_rate": 1.6806892000586135e-05, "loss": 390.8128, "step": 13990 }, { "epoch": 0.2692540700637555, "grad_norm": 544.1896223915375, "learning_rate": 1.6802419739765114e-05, "loss": 389.8078, "step": 14000 }, { "epoch": 0.26944639439951534, "grad_norm": 491.96316076754937, "learning_rate": 1.679794494515508e-05, "loss": 391.9018, "step": 14010 }, { "epoch": 0.2696387187352752, "grad_norm": 542.9530459510884, "learning_rate": 1.6793467618422828e-05, "loss": 385.7537, "step": 14020 }, { "epoch": 0.269831043071035, "grad_norm": 535.404844202274, "learning_rate": 1.6788987761236088e-05, "loss": 395.4638, "step": 14030 }, { "epoch": 0.27002336740679483, "grad_norm": 489.38268937887545, "learning_rate": 1.6784505375263533e-05, "loss": 387.0627, "step": 14040 }, { "epoch": 0.27021569174255466, "grad_norm": 493.74504632673415, "learning_rate": 1.678002046217477e-05, "loss": 390.6436, "step": 14050 }, { "epoch": 0.2704080160783145, "grad_norm": 599.7337319808317, "learning_rate": 1.6775533023640363e-05, "loss": 426.1557, "step": 14060 }, { "epoch": 0.27060034041407427, "grad_norm": 623.8127113724317, "learning_rate": 1.6771043061331806e-05, "loss": 415.4726, "step": 14070 }, { "epoch": 0.2707926647498341, "grad_norm": 495.45378039437037, "learning_rate": 1.6766550576921533e-05, "loss": 387.8605, "step": 14080 }, { "epoch": 0.27098498908559393, "grad_norm": 504.15813492014405, "learning_rate": 1.676205557208293e-05, "loss": 396.13, "step": 14090 }, { "epoch": 0.27117731342135376, "grad_norm": 547.2105305436389, "learning_rate": 1.67575580484903e-05, "loss": 377.4808, "step": 14100 }, { "epoch": 0.2713696377571136, "grad_norm": 476.53585613180485, "learning_rate": 1.6753058007818906e-05, "loss": 395.6949, "step": 14110 }, { "epoch": 0.2715619620928734, "grad_norm": 512.0339523348306, "learning_rate": 1.674855545174493e-05, "loss": 385.6241, "step": 14120 }, { "epoch": 0.27175428642863325, "grad_norm": 486.4501433186579, "learning_rate": 1.6744050381945507e-05, "loss": 398.1865, "step": 14130 }, { "epoch": 0.2719466107643931, "grad_norm": 481.17986956039204, "learning_rate": 1.67395428000987e-05, "loss": 386.8289, "step": 14140 }, { "epoch": 0.2721389351001529, "grad_norm": 498.87643368094456, "learning_rate": 1.6735032707883502e-05, "loss": 391.9912, "step": 14150 }, { "epoch": 0.27233125943591274, "grad_norm": 487.406389720137, "learning_rate": 1.6730520106979855e-05, "loss": 379.5662, "step": 14160 }, { "epoch": 0.2725235837716726, "grad_norm": 514.7910188117253, "learning_rate": 1.672600499906863e-05, "loss": 392.7333, "step": 14170 }, { "epoch": 0.27271590810743235, "grad_norm": 511.7755234491555, "learning_rate": 1.6721487385831622e-05, "loss": 395.4198, "step": 14180 }, { "epoch": 0.2729082324431922, "grad_norm": 487.05765967267655, "learning_rate": 1.6716967268951574e-05, "loss": 393.7244, "step": 14190 }, { "epoch": 0.273100556778952, "grad_norm": 486.21869972819553, "learning_rate": 1.6712444650112152e-05, "loss": 398.4978, "step": 14200 }, { "epoch": 0.27329288111471184, "grad_norm": 559.8126928923966, "learning_rate": 1.6707919530997956e-05, "loss": 401.5352, "step": 14210 }, { "epoch": 0.27348520545047167, "grad_norm": 525.4416841159325, "learning_rate": 1.6703391913294524e-05, "loss": 387.6162, "step": 14220 }, { "epoch": 0.2736775297862315, "grad_norm": 518.055564266573, "learning_rate": 1.6698861798688312e-05, "loss": 395.3866, "step": 14230 }, { "epoch": 0.27386985412199133, "grad_norm": 504.06500478413204, "learning_rate": 1.6694329188866717e-05, "loss": 394.1962, "step": 14240 }, { "epoch": 0.27406217845775116, "grad_norm": 476.36847485542984, "learning_rate": 1.6689794085518057e-05, "loss": 382.5779, "step": 14250 }, { "epoch": 0.274254502793511, "grad_norm": 524.5001467805347, "learning_rate": 1.668525649033159e-05, "loss": 401.2712, "step": 14260 }, { "epoch": 0.2744468271292708, "grad_norm": 511.7771635917971, "learning_rate": 1.6680716404997482e-05, "loss": 390.3644, "step": 14270 }, { "epoch": 0.27463915146503065, "grad_norm": 495.52370130659904, "learning_rate": 1.667617383120686e-05, "loss": 396.2208, "step": 14280 }, { "epoch": 0.2748314758007904, "grad_norm": 474.7706071353199, "learning_rate": 1.667162877065174e-05, "loss": 390.4807, "step": 14290 }, { "epoch": 0.27502380013655026, "grad_norm": 479.8887676170485, "learning_rate": 1.6667081225025087e-05, "loss": 387.1438, "step": 14300 }, { "epoch": 0.2752161244723101, "grad_norm": 515.0957857214537, "learning_rate": 1.666253119602079e-05, "loss": 394.5681, "step": 14310 }, { "epoch": 0.2754084488080699, "grad_norm": 503.76659880261764, "learning_rate": 1.665797868533366e-05, "loss": 382.0825, "step": 14320 }, { "epoch": 0.27560077314382975, "grad_norm": 536.9037136753394, "learning_rate": 1.6653423694659433e-05, "loss": 404.3942, "step": 14330 }, { "epoch": 0.2757930974795896, "grad_norm": 558.0341787521504, "learning_rate": 1.6648866225694757e-05, "loss": 394.4466, "step": 14340 }, { "epoch": 0.2759854218153494, "grad_norm": 487.9536840470224, "learning_rate": 1.6644306280137227e-05, "loss": 388.3453, "step": 14350 }, { "epoch": 0.27617774615110924, "grad_norm": 549.9656942677718, "learning_rate": 1.6639743859685336e-05, "loss": 397.5311, "step": 14360 }, { "epoch": 0.27637007048686907, "grad_norm": 467.65786759439186, "learning_rate": 1.663517896603852e-05, "loss": 391.5482, "step": 14370 }, { "epoch": 0.2765623948226289, "grad_norm": 520.5947403263569, "learning_rate": 1.6630611600897126e-05, "loss": 392.006, "step": 14380 }, { "epoch": 0.27675471915838873, "grad_norm": 491.3663480775731, "learning_rate": 1.6626041765962413e-05, "loss": 403.8781, "step": 14390 }, { "epoch": 0.27694704349414856, "grad_norm": 465.5328672239248, "learning_rate": 1.662146946293658e-05, "loss": 391.9352, "step": 14400 }, { "epoch": 0.27713936782990833, "grad_norm": 493.89827320644065, "learning_rate": 1.6616894693522727e-05, "loss": 385.1233, "step": 14410 }, { "epoch": 0.27733169216566816, "grad_norm": 499.00496418938826, "learning_rate": 1.6612317459424884e-05, "loss": 383.6975, "step": 14420 }, { "epoch": 0.277524016501428, "grad_norm": 499.0241251917486, "learning_rate": 1.6607737762347987e-05, "loss": 391.0595, "step": 14430 }, { "epoch": 0.2777163408371878, "grad_norm": 488.43702927538936, "learning_rate": 1.6603155603997908e-05, "loss": 391.3056, "step": 14440 }, { "epoch": 0.27790866517294766, "grad_norm": 513.7568596869821, "learning_rate": 1.6598570986081424e-05, "loss": 389.4551, "step": 14450 }, { "epoch": 0.2781009895087075, "grad_norm": 488.8984804680765, "learning_rate": 1.6593983910306225e-05, "loss": 386.0653, "step": 14460 }, { "epoch": 0.2782933138444673, "grad_norm": 475.5486903350814, "learning_rate": 1.658939437838092e-05, "loss": 381.9182, "step": 14470 }, { "epoch": 0.27848563818022715, "grad_norm": 483.7020740351836, "learning_rate": 1.658480239201504e-05, "loss": 388.15, "step": 14480 }, { "epoch": 0.278677962515987, "grad_norm": 474.2297926591958, "learning_rate": 1.6580207952919018e-05, "loss": 380.9073, "step": 14490 }, { "epoch": 0.2788702868517468, "grad_norm": 532.3122715536738, "learning_rate": 1.657561106280421e-05, "loss": 386.3968, "step": 14500 }, { "epoch": 0.27906261118750664, "grad_norm": 487.3535681886864, "learning_rate": 1.6571011723382882e-05, "loss": 387.3301, "step": 14510 }, { "epoch": 0.2792549355232664, "grad_norm": 488.6986066079575, "learning_rate": 1.6566409936368207e-05, "loss": 393.3085, "step": 14520 }, { "epoch": 0.27944725985902624, "grad_norm": 510.40771262185467, "learning_rate": 1.6561805703474285e-05, "loss": 380.7416, "step": 14530 }, { "epoch": 0.2796395841947861, "grad_norm": 492.0546940217477, "learning_rate": 1.655719902641611e-05, "loss": 390.49, "step": 14540 }, { "epoch": 0.2798319085305459, "grad_norm": 474.56344068259085, "learning_rate": 1.6552589906909586e-05, "loss": 383.2617, "step": 14550 }, { "epoch": 0.28002423286630573, "grad_norm": 536.2365934811677, "learning_rate": 1.654797834667155e-05, "loss": 374.7162, "step": 14560 }, { "epoch": 0.28021655720206556, "grad_norm": 535.0234100697963, "learning_rate": 1.6543364347419714e-05, "loss": 395.434, "step": 14570 }, { "epoch": 0.2804088815378254, "grad_norm": 534.4528084084313, "learning_rate": 1.6538747910872733e-05, "loss": 390.3084, "step": 14580 }, { "epoch": 0.2806012058735852, "grad_norm": 518.1222162094313, "learning_rate": 1.6534129038750145e-05, "loss": 386.5567, "step": 14590 }, { "epoch": 0.28079353020934505, "grad_norm": 632.019829050338, "learning_rate": 1.65295077327724e-05, "loss": 387.525, "step": 14600 }, { "epoch": 0.2809858545451049, "grad_norm": 484.1064687775313, "learning_rate": 1.652488399466087e-05, "loss": 377.9122, "step": 14610 }, { "epoch": 0.2811781788808647, "grad_norm": 469.8652315484747, "learning_rate": 1.6520257826137807e-05, "loss": 392.4902, "step": 14620 }, { "epoch": 0.2813705032166245, "grad_norm": 540.3384561437886, "learning_rate": 1.6515629228926396e-05, "loss": 396.9217, "step": 14630 }, { "epoch": 0.2815628275523843, "grad_norm": 476.7879068094032, "learning_rate": 1.6510998204750702e-05, "loss": 402.6011, "step": 14640 }, { "epoch": 0.28175515188814415, "grad_norm": 458.3851532035752, "learning_rate": 1.650636475533571e-05, "loss": 386.3834, "step": 14650 }, { "epoch": 0.281947476223904, "grad_norm": 488.3971521177812, "learning_rate": 1.6501728882407305e-05, "loss": 391.1553, "step": 14660 }, { "epoch": 0.2821398005596638, "grad_norm": 519.911671875725, "learning_rate": 1.649709058769227e-05, "loss": 378.8972, "step": 14670 }, { "epoch": 0.28233212489542364, "grad_norm": 517.2565307701216, "learning_rate": 1.6492449872918293e-05, "loss": 378.7213, "step": 14680 }, { "epoch": 0.28252444923118347, "grad_norm": 463.86330751564213, "learning_rate": 1.6487806739813966e-05, "loss": 382.2467, "step": 14690 }, { "epoch": 0.2827167735669433, "grad_norm": 491.7713185776623, "learning_rate": 1.6483161190108778e-05, "loss": 385.834, "step": 14700 }, { "epoch": 0.28290909790270313, "grad_norm": 492.04990272489835, "learning_rate": 1.6478513225533117e-05, "loss": 393.276, "step": 14710 }, { "epoch": 0.28310142223846296, "grad_norm": 473.92846724740224, "learning_rate": 1.647386284781828e-05, "loss": 392.6294, "step": 14720 }, { "epoch": 0.2832937465742228, "grad_norm": 473.51366958046833, "learning_rate": 1.6469210058696448e-05, "loss": 396.0201, "step": 14730 }, { "epoch": 0.28348607090998257, "grad_norm": 513.5508599611892, "learning_rate": 1.646455485990071e-05, "loss": 383.8247, "step": 14740 }, { "epoch": 0.2836783952457424, "grad_norm": 497.50634405561897, "learning_rate": 1.645989725316506e-05, "loss": 384.7214, "step": 14750 }, { "epoch": 0.28387071958150223, "grad_norm": 471.5952807884002, "learning_rate": 1.6455237240224364e-05, "loss": 379.8508, "step": 14760 }, { "epoch": 0.28406304391726206, "grad_norm": 497.51785648599713, "learning_rate": 1.6450574822814412e-05, "loss": 387.6232, "step": 14770 }, { "epoch": 0.2842553682530219, "grad_norm": 462.968041044798, "learning_rate": 1.6445910002671872e-05, "loss": 387.0854, "step": 14780 }, { "epoch": 0.2844476925887817, "grad_norm": 511.1203351303572, "learning_rate": 1.644124278153431e-05, "loss": 389.8439, "step": 14790 }, { "epoch": 0.28464001692454155, "grad_norm": 543.5307978614018, "learning_rate": 1.64365731611402e-05, "loss": 390.1835, "step": 14800 }, { "epoch": 0.2848323412603014, "grad_norm": 468.3616680586934, "learning_rate": 1.6431901143228888e-05, "loss": 378.7895, "step": 14810 }, { "epoch": 0.2850246655960612, "grad_norm": 488.52103847851134, "learning_rate": 1.6427226729540623e-05, "loss": 380.1911, "step": 14820 }, { "epoch": 0.28521698993182104, "grad_norm": 487.61978203376117, "learning_rate": 1.6422549921816556e-05, "loss": 393.6369, "step": 14830 }, { "epoch": 0.28540931426758087, "grad_norm": 472.1358548853999, "learning_rate": 1.641787072179871e-05, "loss": 381.1461, "step": 14840 }, { "epoch": 0.28560163860334065, "grad_norm": 520.1675409127897, "learning_rate": 1.6413189131230022e-05, "loss": 380.0909, "step": 14850 }, { "epoch": 0.2857939629391005, "grad_norm": 485.978698749814, "learning_rate": 1.6408505151854292e-05, "loss": 384.8645, "step": 14860 }, { "epoch": 0.2859862872748603, "grad_norm": 495.28185861111274, "learning_rate": 1.6403818785416236e-05, "loss": 380.8901, "step": 14870 }, { "epoch": 0.28617861161062014, "grad_norm": 458.01453173995077, "learning_rate": 1.6399130033661444e-05, "loss": 397.4296, "step": 14880 }, { "epoch": 0.28637093594637997, "grad_norm": 476.64320326477275, "learning_rate": 1.6394438898336402e-05, "loss": 385.6506, "step": 14890 }, { "epoch": 0.2865632602821398, "grad_norm": 541.8504396274184, "learning_rate": 1.6389745381188475e-05, "loss": 396.2868, "step": 14900 }, { "epoch": 0.28675558461789963, "grad_norm": 476.24906697976616, "learning_rate": 1.6385049483965926e-05, "loss": 380.1042, "step": 14910 }, { "epoch": 0.28694790895365946, "grad_norm": 512.7942479475595, "learning_rate": 1.6380351208417897e-05, "loss": 385.5607, "step": 14920 }, { "epoch": 0.2871402332894193, "grad_norm": 471.00853555682755, "learning_rate": 1.6375650556294417e-05, "loss": 382.3335, "step": 14930 }, { "epoch": 0.2873325576251791, "grad_norm": 499.52034733524675, "learning_rate": 1.6370947529346404e-05, "loss": 385.2976, "step": 14940 }, { "epoch": 0.28752488196093895, "grad_norm": 486.82498278013855, "learning_rate": 1.6366242129325652e-05, "loss": 386.699, "step": 14950 }, { "epoch": 0.2877172062966988, "grad_norm": 461.49359895748785, "learning_rate": 1.636153435798485e-05, "loss": 391.78, "step": 14960 }, { "epoch": 0.28790953063245855, "grad_norm": 493.8995605913665, "learning_rate": 1.6356824217077564e-05, "loss": 373.2573, "step": 14970 }, { "epoch": 0.2881018549682184, "grad_norm": 521.052487777547, "learning_rate": 1.6352111708358243e-05, "loss": 384.3046, "step": 14980 }, { "epoch": 0.2882941793039782, "grad_norm": 487.92242776300156, "learning_rate": 1.6347396833582224e-05, "loss": 389.7221, "step": 14990 }, { "epoch": 0.28848650363973805, "grad_norm": 463.67397745910336, "learning_rate": 1.634267959450571e-05, "loss": 380.6968, "step": 15000 }, { "epoch": 0.2886788279754979, "grad_norm": 481.26091086966454, "learning_rate": 1.63379599928858e-05, "loss": 396.7292, "step": 15010 }, { "epoch": 0.2888711523112577, "grad_norm": 434.5124362503905, "learning_rate": 1.6333238030480473e-05, "loss": 387.3863, "step": 15020 }, { "epoch": 0.28906347664701754, "grad_norm": 491.90650335544296, "learning_rate": 1.6328513709048573e-05, "loss": 378.2793, "step": 15030 }, { "epoch": 0.28925580098277737, "grad_norm": 546.9311713653154, "learning_rate": 1.6323787030349833e-05, "loss": 384.7709, "step": 15040 }, { "epoch": 0.2894481253185372, "grad_norm": 530.2226410803686, "learning_rate": 1.6319057996144868e-05, "loss": 389.6169, "step": 15050 }, { "epoch": 0.289640449654297, "grad_norm": 495.49817364137556, "learning_rate": 1.631432660819516e-05, "loss": 376.3631, "step": 15060 }, { "epoch": 0.28983277399005686, "grad_norm": 517.459453193237, "learning_rate": 1.6309592868263075e-05, "loss": 386.113, "step": 15070 }, { "epoch": 0.29002509832581663, "grad_norm": 520.8782391961886, "learning_rate": 1.630485677811185e-05, "loss": 390.316, "step": 15080 }, { "epoch": 0.29021742266157646, "grad_norm": 494.6786563660572, "learning_rate": 1.630011833950561e-05, "loss": 385.1296, "step": 15090 }, { "epoch": 0.2904097469973363, "grad_norm": 496.32032175509653, "learning_rate": 1.6295377554209338e-05, "loss": 390.7008, "step": 15100 }, { "epoch": 0.2906020713330961, "grad_norm": 524.6278616686512, "learning_rate": 1.6290634423988897e-05, "loss": 388.9809, "step": 15110 }, { "epoch": 0.29079439566885595, "grad_norm": 490.09439006565714, "learning_rate": 1.6285888950611023e-05, "loss": 391.4503, "step": 15120 }, { "epoch": 0.2909867200046158, "grad_norm": 486.966021625674, "learning_rate": 1.6281141135843334e-05, "loss": 383.3823, "step": 15130 }, { "epoch": 0.2911790443403756, "grad_norm": 488.781315739911, "learning_rate": 1.6276390981454306e-05, "loss": 386.7401, "step": 15140 }, { "epoch": 0.29137136867613544, "grad_norm": 503.57567011536383, "learning_rate": 1.6271638489213297e-05, "loss": 397.332, "step": 15150 }, { "epoch": 0.2915636930118953, "grad_norm": 673.8851475203368, "learning_rate": 1.6266883660890527e-05, "loss": 398.2306, "step": 15160 }, { "epoch": 0.2917560173476551, "grad_norm": 496.3247441244141, "learning_rate": 1.6262126498257098e-05, "loss": 384.9945, "step": 15170 }, { "epoch": 0.29194834168341494, "grad_norm": 476.35917625149597, "learning_rate": 1.625736700308497e-05, "loss": 387.6532, "step": 15180 }, { "epoch": 0.2921406660191747, "grad_norm": 473.77252573428257, "learning_rate": 1.6252605177146978e-05, "loss": 383.0787, "step": 15190 }, { "epoch": 0.29233299035493454, "grad_norm": 481.34409260225357, "learning_rate": 1.624784102221682e-05, "loss": 392.2435, "step": 15200 }, { "epoch": 0.29252531469069437, "grad_norm": 475.5771470039311, "learning_rate": 1.6243074540069067e-05, "loss": 382.1738, "step": 15210 }, { "epoch": 0.2927176390264542, "grad_norm": 517.829452811324, "learning_rate": 1.6238305732479158e-05, "loss": 392.2777, "step": 15220 }, { "epoch": 0.29290996336221403, "grad_norm": 526.4621373465495, "learning_rate": 1.6233534601223396e-05, "loss": 387.8406, "step": 15230 }, { "epoch": 0.29310228769797386, "grad_norm": 478.25607538973367, "learning_rate": 1.6228761148078943e-05, "loss": 391.3679, "step": 15240 }, { "epoch": 0.2932946120337337, "grad_norm": 475.47471105435926, "learning_rate": 1.622398537482383e-05, "loss": 393.5634, "step": 15250 }, { "epoch": 0.2934869363694935, "grad_norm": 560.4740962824944, "learning_rate": 1.621920728323696e-05, "loss": 392.568, "step": 15260 }, { "epoch": 0.29367926070525335, "grad_norm": 494.87613487730755, "learning_rate": 1.621442687509809e-05, "loss": 385.4871, "step": 15270 }, { "epoch": 0.2938715850410132, "grad_norm": 512.9864101092595, "learning_rate": 1.6209644152187848e-05, "loss": 392.8923, "step": 15280 }, { "epoch": 0.294063909376773, "grad_norm": 486.19639377330276, "learning_rate": 1.620485911628771e-05, "loss": 394.0132, "step": 15290 }, { "epoch": 0.2942562337125328, "grad_norm": 470.3526868614509, "learning_rate": 1.6200071769180026e-05, "loss": 368.9018, "step": 15300 }, { "epoch": 0.2944485580482926, "grad_norm": 445.00269907400354, "learning_rate": 1.6195282112648007e-05, "loss": 386.3872, "step": 15310 }, { "epoch": 0.29464088238405245, "grad_norm": 506.9645941907079, "learning_rate": 1.6190490148475724e-05, "loss": 381.0153, "step": 15320 }, { "epoch": 0.2948332067198123, "grad_norm": 455.66778163181453, "learning_rate": 1.6185695878448094e-05, "loss": 389.7398, "step": 15330 }, { "epoch": 0.2950255310555721, "grad_norm": 434.4250529789861, "learning_rate": 1.6180899304350915e-05, "loss": 372.087, "step": 15340 }, { "epoch": 0.29521785539133194, "grad_norm": 500.35196090291123, "learning_rate": 1.6176100427970826e-05, "loss": 380.515, "step": 15350 }, { "epoch": 0.29541017972709177, "grad_norm": 484.0534434395125, "learning_rate": 1.6171299251095324e-05, "loss": 384.4088, "step": 15360 }, { "epoch": 0.2956025040628516, "grad_norm": 496.5740929991856, "learning_rate": 1.6166495775512777e-05, "loss": 396.1473, "step": 15370 }, { "epoch": 0.29579482839861143, "grad_norm": 517.0120237674353, "learning_rate": 1.6161690003012392e-05, "loss": 390.5824, "step": 15380 }, { "epoch": 0.29598715273437126, "grad_norm": 494.9591564232326, "learning_rate": 1.615688193538425e-05, "loss": 378.619, "step": 15390 }, { "epoch": 0.2961794770701311, "grad_norm": 571.1103475973266, "learning_rate": 1.615207157441927e-05, "loss": 389.4476, "step": 15400 }, { "epoch": 0.29637180140589087, "grad_norm": 474.7299271462728, "learning_rate": 1.6147258921909236e-05, "loss": 380.0867, "step": 15410 }, { "epoch": 0.2965641257416507, "grad_norm": 560.9972230006571, "learning_rate": 1.6142443979646774e-05, "loss": 398.957, "step": 15420 }, { "epoch": 0.2967564500774105, "grad_norm": 448.23823648255143, "learning_rate": 1.6137626749425377e-05, "loss": 377.3599, "step": 15430 }, { "epoch": 0.29694877441317036, "grad_norm": 479.3145501516596, "learning_rate": 1.6132807233039382e-05, "loss": 386.1266, "step": 15440 }, { "epoch": 0.2971410987489302, "grad_norm": 528.0677323746904, "learning_rate": 1.612798543228398e-05, "loss": 383.0138, "step": 15450 }, { "epoch": 0.29733342308469, "grad_norm": 490.62521823390904, "learning_rate": 1.612316134895521e-05, "loss": 387.7146, "step": 15460 }, { "epoch": 0.29752574742044985, "grad_norm": 455.12196157480594, "learning_rate": 1.611833498484997e-05, "loss": 387.1293, "step": 15470 }, { "epoch": 0.2977180717562097, "grad_norm": 540.2640954521449, "learning_rate": 1.611350634176599e-05, "loss": 389.78, "step": 15480 }, { "epoch": 0.2979103960919695, "grad_norm": 500.9857814104484, "learning_rate": 1.6108675421501865e-05, "loss": 391.0004, "step": 15490 }, { "epoch": 0.29810272042772934, "grad_norm": 491.1924764973054, "learning_rate": 1.610384222585704e-05, "loss": 388.4165, "step": 15500 }, { "epoch": 0.29829504476348917, "grad_norm": 473.6500171513281, "learning_rate": 1.609900675663179e-05, "loss": 376.6896, "step": 15510 }, { "epoch": 0.298487369099249, "grad_norm": 477.5587209017475, "learning_rate": 1.609416901562725e-05, "loss": 371.4006, "step": 15520 }, { "epoch": 0.2986796934350088, "grad_norm": 507.91221115477657, "learning_rate": 1.60893290046454e-05, "loss": 384.6357, "step": 15530 }, { "epoch": 0.2988720177707686, "grad_norm": 493.17435750231357, "learning_rate": 1.608448672548907e-05, "loss": 389.1049, "step": 15540 }, { "epoch": 0.29906434210652844, "grad_norm": 530.1929565826192, "learning_rate": 1.6079642179961917e-05, "loss": 372.4196, "step": 15550 }, { "epoch": 0.29925666644228827, "grad_norm": 513.9989479149148, "learning_rate": 1.6074795369868463e-05, "loss": 388.418, "step": 15560 }, { "epoch": 0.2994489907780481, "grad_norm": 469.28213013333186, "learning_rate": 1.6069946297014064e-05, "loss": 382.0257, "step": 15570 }, { "epoch": 0.2996413151138079, "grad_norm": 519.1702966244364, "learning_rate": 1.6065094963204915e-05, "loss": 380.3084, "step": 15580 }, { "epoch": 0.29983363944956776, "grad_norm": 458.70827536114274, "learning_rate": 1.6060241370248064e-05, "loss": 386.5981, "step": 15590 }, { "epoch": 0.3000259637853276, "grad_norm": 494.09100236320444, "learning_rate": 1.605538551995139e-05, "loss": 389.0104, "step": 15600 }, { "epoch": 0.3002182881210874, "grad_norm": 499.8257326714157, "learning_rate": 1.6050527414123616e-05, "loss": 381.525, "step": 15610 }, { "epoch": 0.30041061245684725, "grad_norm": 514.2821900561872, "learning_rate": 1.604566705457431e-05, "loss": 370.4945, "step": 15620 }, { "epoch": 0.3006029367926071, "grad_norm": 495.99600409824876, "learning_rate": 1.604080444311387e-05, "loss": 376.4167, "step": 15630 }, { "epoch": 0.30079526112836685, "grad_norm": 496.12042045174064, "learning_rate": 1.6035939581553543e-05, "loss": 389.8231, "step": 15640 }, { "epoch": 0.3009875854641267, "grad_norm": 500.4805466121026, "learning_rate": 1.603107247170541e-05, "loss": 394.658, "step": 15650 }, { "epoch": 0.3011799097998865, "grad_norm": 488.2017398651639, "learning_rate": 1.6026203115382392e-05, "loss": 394.7382, "step": 15660 }, { "epoch": 0.30137223413564634, "grad_norm": 505.8285315529847, "learning_rate": 1.6021331514398233e-05, "loss": 376.4537, "step": 15670 }, { "epoch": 0.3015645584714062, "grad_norm": 538.4909365883506, "learning_rate": 1.6016457670567535e-05, "loss": 396.45, "step": 15680 }, { "epoch": 0.301756882807166, "grad_norm": 520.0627945628461, "learning_rate": 1.6011581585705715e-05, "loss": 384.5404, "step": 15690 }, { "epoch": 0.30194920714292584, "grad_norm": 533.1833578871967, "learning_rate": 1.600670326162904e-05, "loss": 391.3917, "step": 15700 }, { "epoch": 0.30214153147868567, "grad_norm": 496.59726655207214, "learning_rate": 1.600182270015461e-05, "loss": 376.9667, "step": 15710 }, { "epoch": 0.3023338558144455, "grad_norm": 470.33865795550236, "learning_rate": 1.5996939903100338e-05, "loss": 373.5374, "step": 15720 }, { "epoch": 0.3025261801502053, "grad_norm": 494.7224862314734, "learning_rate": 1.5992054872285005e-05, "loss": 377.8496, "step": 15730 }, { "epoch": 0.30271850448596516, "grad_norm": 462.42162046846744, "learning_rate": 1.5987167609528187e-05, "loss": 386.7111, "step": 15740 }, { "epoch": 0.30291082882172493, "grad_norm": 549.5813713322203, "learning_rate": 1.598227811665032e-05, "loss": 394.5676, "step": 15750 }, { "epoch": 0.30310315315748476, "grad_norm": 479.453865277826, "learning_rate": 1.597738639547265e-05, "loss": 372.977, "step": 15760 }, { "epoch": 0.3032954774932446, "grad_norm": 437.4432263127666, "learning_rate": 1.597249244781727e-05, "loss": 387.8498, "step": 15770 }, { "epoch": 0.3034878018290044, "grad_norm": 480.8793637734585, "learning_rate": 1.5967596275507094e-05, "loss": 391.69, "step": 15780 }, { "epoch": 0.30368012616476425, "grad_norm": 480.18538183593864, "learning_rate": 1.5962697880365863e-05, "loss": 374.785, "step": 15790 }, { "epoch": 0.3038724505005241, "grad_norm": 485.7658136927617, "learning_rate": 1.5957797264218145e-05, "loss": 372.742, "step": 15800 }, { "epoch": 0.3040647748362839, "grad_norm": 526.1846228185616, "learning_rate": 1.5952894428889347e-05, "loss": 402.5581, "step": 15810 }, { "epoch": 0.30425709917204374, "grad_norm": 474.2349139837495, "learning_rate": 1.594798937620569e-05, "loss": 374.9354, "step": 15820 }, { "epoch": 0.3044494235078036, "grad_norm": 487.1475350282878, "learning_rate": 1.594308210799422e-05, "loss": 381.5031, "step": 15830 }, { "epoch": 0.3046417478435634, "grad_norm": 526.0957708674093, "learning_rate": 1.5938172626082823e-05, "loss": 389.1943, "step": 15840 }, { "epoch": 0.30483407217932323, "grad_norm": 484.2232035646498, "learning_rate": 1.5933260932300192e-05, "loss": 382.6517, "step": 15850 }, { "epoch": 0.305026396515083, "grad_norm": 481.9646853442586, "learning_rate": 1.5928347028475855e-05, "loss": 385.7438, "step": 15860 }, { "epoch": 0.30521872085084284, "grad_norm": 506.3687257691799, "learning_rate": 1.592343091644016e-05, "loss": 379.9382, "step": 15870 }, { "epoch": 0.30541104518660267, "grad_norm": 550.512446852848, "learning_rate": 1.5918512598024275e-05, "loss": 368.3928, "step": 15880 }, { "epoch": 0.3056033695223625, "grad_norm": 516.7421443143606, "learning_rate": 1.59135920750602e-05, "loss": 394.3213, "step": 15890 }, { "epoch": 0.30579569385812233, "grad_norm": 479.36253297341716, "learning_rate": 1.590866934938074e-05, "loss": 386.138, "step": 15900 }, { "epoch": 0.30598801819388216, "grad_norm": 457.4125480024917, "learning_rate": 1.590374442281953e-05, "loss": 375.0515, "step": 15910 }, { "epoch": 0.306180342529642, "grad_norm": 507.5572160106832, "learning_rate": 1.5898817297211028e-05, "loss": 377.9083, "step": 15920 }, { "epoch": 0.3063726668654018, "grad_norm": 513.6180166698787, "learning_rate": 1.589388797439051e-05, "loss": 387.941, "step": 15930 }, { "epoch": 0.30656499120116165, "grad_norm": 540.7423722592335, "learning_rate": 1.5888956456194056e-05, "loss": 379.9804, "step": 15940 }, { "epoch": 0.3067573155369215, "grad_norm": 494.6165946657413, "learning_rate": 1.588402274445858e-05, "loss": 397.443, "step": 15950 }, { "epoch": 0.3069496398726813, "grad_norm": 485.2879526777606, "learning_rate": 1.5879086841021815e-05, "loss": 387.4972, "step": 15960 }, { "epoch": 0.30714196420844114, "grad_norm": 483.77146964448315, "learning_rate": 1.5874148747722294e-05, "loss": 385.6518, "step": 15970 }, { "epoch": 0.3073342885442009, "grad_norm": 500.09250591109884, "learning_rate": 1.5869208466399382e-05, "loss": 381.8277, "step": 15980 }, { "epoch": 0.30752661287996075, "grad_norm": 519.7212693848686, "learning_rate": 1.586426599889325e-05, "loss": 388.2954, "step": 15990 }, { "epoch": 0.3077189372157206, "grad_norm": 519.4154904155673, "learning_rate": 1.5859321347044882e-05, "loss": 393.8029, "step": 16000 }, { "epoch": 0.3079112615514804, "grad_norm": 475.0755911633883, "learning_rate": 1.5854374512696084e-05, "loss": 388.3107, "step": 16010 }, { "epoch": 0.30810358588724024, "grad_norm": 515.056518059897, "learning_rate": 1.584942549768947e-05, "loss": 383.1705, "step": 16020 }, { "epoch": 0.30829591022300007, "grad_norm": 489.79656202666257, "learning_rate": 1.584447430386846e-05, "loss": 395.4364, "step": 16030 }, { "epoch": 0.3084882345587599, "grad_norm": 471.0922792299047, "learning_rate": 1.58395209330773e-05, "loss": 368.5746, "step": 16040 }, { "epoch": 0.30868055889451973, "grad_norm": 507.9029922206225, "learning_rate": 1.5834565387161034e-05, "loss": 389.2412, "step": 16050 }, { "epoch": 0.30887288323027956, "grad_norm": 505.36307091146045, "learning_rate": 1.5829607667965524e-05, "loss": 379.7645, "step": 16060 }, { "epoch": 0.3090652075660394, "grad_norm": 533.0401365312647, "learning_rate": 1.5824647777337433e-05, "loss": 402.0754, "step": 16070 }, { "epoch": 0.3092575319017992, "grad_norm": 477.822109995146, "learning_rate": 1.5819685717124245e-05, "loss": 378.4888, "step": 16080 }, { "epoch": 0.309449856237559, "grad_norm": 491.97574484715693, "learning_rate": 1.5814721489174246e-05, "loss": 378.9121, "step": 16090 }, { "epoch": 0.3096421805733188, "grad_norm": 504.15390175929986, "learning_rate": 1.580975509533652e-05, "loss": 386.5091, "step": 16100 }, { "epoch": 0.30983450490907866, "grad_norm": 472.79917790946797, "learning_rate": 1.5804786537460972e-05, "loss": 374.8191, "step": 16110 }, { "epoch": 0.3100268292448385, "grad_norm": 527.9536707655863, "learning_rate": 1.5799815817398312e-05, "loss": 380.7867, "step": 16120 }, { "epoch": 0.3102191535805983, "grad_norm": 540.2626487985623, "learning_rate": 1.579484293700004e-05, "loss": 385.7923, "step": 16130 }, { "epoch": 0.31041147791635815, "grad_norm": 487.14746385446807, "learning_rate": 1.578986789811849e-05, "loss": 393.2775, "step": 16140 }, { "epoch": 0.310603802252118, "grad_norm": 442.3817324977554, "learning_rate": 1.5784890702606763e-05, "loss": 376.5007, "step": 16150 }, { "epoch": 0.3107961265878778, "grad_norm": 530.164851806514, "learning_rate": 1.5779911352318792e-05, "loss": 381.8134, "step": 16160 }, { "epoch": 0.31098845092363764, "grad_norm": 562.094692156776, "learning_rate": 1.5774929849109303e-05, "loss": 380.7581, "step": 16170 }, { "epoch": 0.31118077525939747, "grad_norm": 472.9884759106849, "learning_rate": 1.5769946194833816e-05, "loss": 395.6762, "step": 16180 }, { "epoch": 0.3113730995951573, "grad_norm": 653.2759011964929, "learning_rate": 1.5764960391348666e-05, "loss": 391.3308, "step": 16190 }, { "epoch": 0.3115654239309171, "grad_norm": 453.7480143451823, "learning_rate": 1.5759972440510985e-05, "loss": 370.0301, "step": 16200 }, { "epoch": 0.3117577482666769, "grad_norm": 462.127878654082, "learning_rate": 1.5754982344178697e-05, "loss": 372.8345, "step": 16210 }, { "epoch": 0.31195007260243673, "grad_norm": 458.6353890517486, "learning_rate": 1.5749990104210534e-05, "loss": 385.0438, "step": 16220 }, { "epoch": 0.31214239693819656, "grad_norm": 505.3959018430225, "learning_rate": 1.574499572246602e-05, "loss": 377.8, "step": 16230 }, { "epoch": 0.3123347212739564, "grad_norm": 530.6902398817294, "learning_rate": 1.5739999200805483e-05, "loss": 396.8385, "step": 16240 }, { "epoch": 0.3125270456097162, "grad_norm": 469.0159835432282, "learning_rate": 1.573500054109004e-05, "loss": 382.3637, "step": 16250 }, { "epoch": 0.31271936994547606, "grad_norm": 468.0069546163065, "learning_rate": 1.5729999745181617e-05, "loss": 380.0171, "step": 16260 }, { "epoch": 0.3129116942812359, "grad_norm": 477.51891179615933, "learning_rate": 1.572499681494292e-05, "loss": 379.426, "step": 16270 }, { "epoch": 0.3131040186169957, "grad_norm": 480.5057157160723, "learning_rate": 1.571999175223746e-05, "loss": 376.0035, "step": 16280 }, { "epoch": 0.31329634295275555, "grad_norm": 499.78679071245256, "learning_rate": 1.571498455892954e-05, "loss": 387.6434, "step": 16290 }, { "epoch": 0.3134886672885154, "grad_norm": 476.4294157500738, "learning_rate": 1.570997523688426e-05, "loss": 388.008, "step": 16300 }, { "epoch": 0.31368099162427515, "grad_norm": 447.5674210996581, "learning_rate": 1.570496378796751e-05, "loss": 377.4202, "step": 16310 }, { "epoch": 0.313873315960035, "grad_norm": 467.2936933672317, "learning_rate": 1.5699950214045966e-05, "loss": 390.1317, "step": 16320 }, { "epoch": 0.3140656402957948, "grad_norm": 592.210511601536, "learning_rate": 1.5694934516987102e-05, "loss": 387.3247, "step": 16330 }, { "epoch": 0.31425796463155464, "grad_norm": 509.6399695265995, "learning_rate": 1.5689916698659193e-05, "loss": 391.6255, "step": 16340 }, { "epoch": 0.3144502889673145, "grad_norm": 475.9107510277038, "learning_rate": 1.568489676093128e-05, "loss": 370.1673, "step": 16350 }, { "epoch": 0.3146426133030743, "grad_norm": 479.4800658769051, "learning_rate": 1.5679874705673215e-05, "loss": 387.8395, "step": 16360 }, { "epoch": 0.31483493763883413, "grad_norm": 471.42216324652395, "learning_rate": 1.5674850534755628e-05, "loss": 387.203, "step": 16370 }, { "epoch": 0.31502726197459396, "grad_norm": 469.9233379455086, "learning_rate": 1.566982425004994e-05, "loss": 384.5128, "step": 16380 }, { "epoch": 0.3152195863103538, "grad_norm": 477.30929266097127, "learning_rate": 1.5664795853428357e-05, "loss": 380.8335, "step": 16390 }, { "epoch": 0.3154119106461136, "grad_norm": 450.40161426989624, "learning_rate": 1.565976534676388e-05, "loss": 375.3493, "step": 16400 }, { "epoch": 0.31560423498187345, "grad_norm": 508.10142288422304, "learning_rate": 1.5654732731930286e-05, "loss": 387.4695, "step": 16410 }, { "epoch": 0.31579655931763323, "grad_norm": 520.6726798846985, "learning_rate": 1.5649698010802138e-05, "loss": 388.4852, "step": 16420 }, { "epoch": 0.31598888365339306, "grad_norm": 505.42801805197837, "learning_rate": 1.564466118525479e-05, "loss": 376.6891, "step": 16430 }, { "epoch": 0.3161812079891529, "grad_norm": 477.95172533009156, "learning_rate": 1.5639622257164372e-05, "loss": 381.4712, "step": 16440 }, { "epoch": 0.3163735323249127, "grad_norm": 496.54564262131134, "learning_rate": 1.5634581228407807e-05, "loss": 369.9816, "step": 16450 }, { "epoch": 0.31656585666067255, "grad_norm": 486.65491609609694, "learning_rate": 1.562953810086279e-05, "loss": 380.1699, "step": 16460 }, { "epoch": 0.3167581809964324, "grad_norm": 508.2947239889114, "learning_rate": 1.562449287640781e-05, "loss": 374.3872, "step": 16470 }, { "epoch": 0.3169505053321922, "grad_norm": 475.81346313671924, "learning_rate": 1.5619445556922118e-05, "loss": 382.2745, "step": 16480 }, { "epoch": 0.31714282966795204, "grad_norm": 470.4893587917357, "learning_rate": 1.561439614428577e-05, "loss": 385.8443, "step": 16490 }, { "epoch": 0.3173351540037119, "grad_norm": 485.9223921477143, "learning_rate": 1.5609344640379585e-05, "loss": 384.2544, "step": 16500 }, { "epoch": 0.3175274783394717, "grad_norm": 521.2054049523408, "learning_rate": 1.560429104708516e-05, "loss": 373.9025, "step": 16510 }, { "epoch": 0.31771980267523153, "grad_norm": 485.2264953291109, "learning_rate": 1.5599235366284874e-05, "loss": 376.6931, "step": 16520 }, { "epoch": 0.31791212701099136, "grad_norm": 480.9287832495446, "learning_rate": 1.5594177599861894e-05, "loss": 385.223, "step": 16530 }, { "epoch": 0.31810445134675114, "grad_norm": 489.4742675039022, "learning_rate": 1.5589117749700147e-05, "loss": 381.2006, "step": 16540 }, { "epoch": 0.31829677568251097, "grad_norm": 458.57385493405224, "learning_rate": 1.5584055817684346e-05, "loss": 372.0469, "step": 16550 }, { "epoch": 0.3184891000182708, "grad_norm": 517.4634174509583, "learning_rate": 1.5578991805699975e-05, "loss": 382.9156, "step": 16560 }, { "epoch": 0.31868142435403063, "grad_norm": 496.50548891733985, "learning_rate": 1.5573925715633297e-05, "loss": 378.2142, "step": 16570 }, { "epoch": 0.31887374868979046, "grad_norm": 485.7748332980264, "learning_rate": 1.5568857549371348e-05, "loss": 385.6914, "step": 16580 }, { "epoch": 0.3190660730255503, "grad_norm": 514.8659900741009, "learning_rate": 1.5563787308801934e-05, "loss": 378.1995, "step": 16590 }, { "epoch": 0.3192583973613101, "grad_norm": 465.27092153134146, "learning_rate": 1.5558714995813636e-05, "loss": 378.0243, "step": 16600 }, { "epoch": 0.31945072169706995, "grad_norm": 480.4739220381383, "learning_rate": 1.5553640612295807e-05, "loss": 377.963, "step": 16610 }, { "epoch": 0.3196430460328298, "grad_norm": 468.7087140912121, "learning_rate": 1.5548564160138572e-05, "loss": 388.8835, "step": 16620 }, { "epoch": 0.3198353703685896, "grad_norm": 481.73616248602275, "learning_rate": 1.5543485641232825e-05, "loss": 374.0541, "step": 16630 }, { "epoch": 0.32002769470434944, "grad_norm": 489.5761439883091, "learning_rate": 1.553840505747023e-05, "loss": 371.5378, "step": 16640 }, { "epoch": 0.3202200190401092, "grad_norm": 452.1163008133125, "learning_rate": 1.5533322410743223e-05, "loss": 380.3839, "step": 16650 }, { "epoch": 0.32041234337586905, "grad_norm": 474.70436798658284, "learning_rate": 1.5528237702945e-05, "loss": 378.9041, "step": 16660 }, { "epoch": 0.3206046677116289, "grad_norm": 466.62285022413806, "learning_rate": 1.5523150935969534e-05, "loss": 373.3779, "step": 16670 }, { "epoch": 0.3207969920473887, "grad_norm": 474.8741849334241, "learning_rate": 1.5518062111711566e-05, "loss": 375.5115, "step": 16680 }, { "epoch": 0.32098931638314854, "grad_norm": 518.483571008647, "learning_rate": 1.5512971232066593e-05, "loss": 375.157, "step": 16690 }, { "epoch": 0.32118164071890837, "grad_norm": 484.72655755235917, "learning_rate": 1.5507878298930888e-05, "loss": 375.746, "step": 16700 }, { "epoch": 0.3213739650546682, "grad_norm": 498.67486744118185, "learning_rate": 1.5502783314201478e-05, "loss": 379.8609, "step": 16710 }, { "epoch": 0.32156628939042803, "grad_norm": 477.88687550637076, "learning_rate": 1.549768627977617e-05, "loss": 382.8419, "step": 16720 }, { "epoch": 0.32175861372618786, "grad_norm": 484.6069756423416, "learning_rate": 1.5492587197553517e-05, "loss": 388.9117, "step": 16730 }, { "epoch": 0.3219509380619477, "grad_norm": 505.736213411871, "learning_rate": 1.5487486069432848e-05, "loss": 378.6362, "step": 16740 }, { "epoch": 0.3221432623977075, "grad_norm": 497.96417750337025, "learning_rate": 1.5482382897314243e-05, "loss": 376.4035, "step": 16750 }, { "epoch": 0.3223355867334673, "grad_norm": 494.82135744094677, "learning_rate": 1.5477277683098555e-05, "loss": 392.0643, "step": 16760 }, { "epoch": 0.3225279110692271, "grad_norm": 484.427335221297, "learning_rate": 1.547217042868739e-05, "loss": 370.8638, "step": 16770 }, { "epoch": 0.32272023540498695, "grad_norm": 462.99282168935787, "learning_rate": 1.546706113598312e-05, "loss": 371.024, "step": 16780 }, { "epoch": 0.3229125597407468, "grad_norm": 491.05421379025603, "learning_rate": 1.5461949806888867e-05, "loss": 371.8982, "step": 16790 }, { "epoch": 0.3231048840765066, "grad_norm": 508.7786608188533, "learning_rate": 1.5456836443308512e-05, "loss": 379.871, "step": 16800 }, { "epoch": 0.32329720841226645, "grad_norm": 520.4386966025636, "learning_rate": 1.545172104714671e-05, "loss": 378.7275, "step": 16810 }, { "epoch": 0.3234895327480263, "grad_norm": 521.6918398886307, "learning_rate": 1.544660362030886e-05, "loss": 375.9082, "step": 16820 }, { "epoch": 0.3236818570837861, "grad_norm": 512.1836726299016, "learning_rate": 1.544148416470111e-05, "loss": 383.3218, "step": 16830 }, { "epoch": 0.32387418141954594, "grad_norm": 489.12754225502795, "learning_rate": 1.5436362682230378e-05, "loss": 386.0422, "step": 16840 }, { "epoch": 0.32406650575530577, "grad_norm": 466.65374940666953, "learning_rate": 1.543123917480433e-05, "loss": 381.8087, "step": 16850 }, { "epoch": 0.3242588300910656, "grad_norm": 461.40002697156746, "learning_rate": 1.542611364433139e-05, "loss": 385.4735, "step": 16860 }, { "epoch": 0.32445115442682537, "grad_norm": 459.4202788763098, "learning_rate": 1.5420986092720735e-05, "loss": 371.1076, "step": 16870 }, { "epoch": 0.3246434787625852, "grad_norm": 449.98582200463227, "learning_rate": 1.541585652188229e-05, "loss": 373.7883, "step": 16880 }, { "epoch": 0.32483580309834503, "grad_norm": 457.0824188517946, "learning_rate": 1.5410724933726732e-05, "loss": 385.3052, "step": 16890 }, { "epoch": 0.32502812743410486, "grad_norm": 470.0828548991895, "learning_rate": 1.5405591330165503e-05, "loss": 367.4301, "step": 16900 }, { "epoch": 0.3252204517698647, "grad_norm": 494.3550963053494, "learning_rate": 1.5400455713110777e-05, "loss": 381.5691, "step": 16910 }, { "epoch": 0.3254127761056245, "grad_norm": 495.14418149445703, "learning_rate": 1.539531808447549e-05, "loss": 385.7987, "step": 16920 }, { "epoch": 0.32560510044138435, "grad_norm": 472.9642363089013, "learning_rate": 1.5390178446173325e-05, "loss": 383.8926, "step": 16930 }, { "epoch": 0.3257974247771442, "grad_norm": 517.9876336877029, "learning_rate": 1.538503680011871e-05, "loss": 376.7862, "step": 16940 }, { "epoch": 0.325989749112904, "grad_norm": 505.3528534634227, "learning_rate": 1.537989314822682e-05, "loss": 376.2068, "step": 16950 }, { "epoch": 0.32618207344866385, "grad_norm": 474.1088352158935, "learning_rate": 1.5374747492413587e-05, "loss": 381.843, "step": 16960 }, { "epoch": 0.3263743977844237, "grad_norm": 509.77581047387105, "learning_rate": 1.536959983459568e-05, "loss": 381.9839, "step": 16970 }, { "epoch": 0.32656672212018345, "grad_norm": 466.4720994420121, "learning_rate": 1.536445017669052e-05, "loss": 373.449, "step": 16980 }, { "epoch": 0.3267590464559433, "grad_norm": 499.65117401835397, "learning_rate": 1.535929852061626e-05, "loss": 370.986, "step": 16990 }, { "epoch": 0.3269513707917031, "grad_norm": 502.879039502589, "learning_rate": 1.5354144868291817e-05, "loss": 387.5408, "step": 17000 }, { "epoch": 0.32714369512746294, "grad_norm": 509.3116178854893, "learning_rate": 1.5348989221636835e-05, "loss": 375.3288, "step": 17010 }, { "epoch": 0.32733601946322277, "grad_norm": 495.2204101094012, "learning_rate": 1.5343831582571706e-05, "loss": 395.3084, "step": 17020 }, { "epoch": 0.3275283437989826, "grad_norm": 458.64683776833766, "learning_rate": 1.5338671953017576e-05, "loss": 373.3518, "step": 17030 }, { "epoch": 0.32772066813474243, "grad_norm": 446.723133129233, "learning_rate": 1.5333510334896308e-05, "loss": 375.0342, "step": 17040 }, { "epoch": 0.32791299247050226, "grad_norm": 71497.58265802731, "learning_rate": 1.532834673013053e-05, "loss": 393.9046, "step": 17050 }, { "epoch": 0.3281053168062621, "grad_norm": 465.9825861908541, "learning_rate": 1.5323181140643598e-05, "loss": 373.7633, "step": 17060 }, { "epoch": 0.3282976411420219, "grad_norm": 511.23713354141057, "learning_rate": 1.5318013568359603e-05, "loss": 375.508, "step": 17070 }, { "epoch": 0.32848996547778175, "grad_norm": 468.334192498827, "learning_rate": 1.531284401520338e-05, "loss": 371.5967, "step": 17080 }, { "epoch": 0.3286822898135416, "grad_norm": 467.54562368668144, "learning_rate": 1.530767248310051e-05, "loss": 377.1894, "step": 17090 }, { "epoch": 0.32887461414930136, "grad_norm": 460.4204135496014, "learning_rate": 1.53024989739773e-05, "loss": 377.0717, "step": 17100 }, { "epoch": 0.3290669384850612, "grad_norm": 482.75785645727285, "learning_rate": 1.5297323489760792e-05, "loss": 374.5172, "step": 17110 }, { "epoch": 0.329259262820821, "grad_norm": 459.16051444697376, "learning_rate": 1.5292146032378778e-05, "loss": 383.0635, "step": 17120 }, { "epoch": 0.32945158715658085, "grad_norm": 488.2350542751421, "learning_rate": 1.5286966603759767e-05, "loss": 378.1121, "step": 17130 }, { "epoch": 0.3296439114923407, "grad_norm": 529.2036957718933, "learning_rate": 1.5281785205833013e-05, "loss": 373.3984, "step": 17140 }, { "epoch": 0.3298362358281005, "grad_norm": 495.58232268012483, "learning_rate": 1.52766018405285e-05, "loss": 374.0483, "step": 17150 }, { "epoch": 0.33002856016386034, "grad_norm": 462.7853111302842, "learning_rate": 1.5271416509776948e-05, "loss": 371.5238, "step": 17160 }, { "epoch": 0.33022088449962017, "grad_norm": 455.4394390356612, "learning_rate": 1.5266229215509806e-05, "loss": 375.2193, "step": 17170 }, { "epoch": 0.33041320883538, "grad_norm": 546.3913112434052, "learning_rate": 1.5261039959659257e-05, "loss": 382.3061, "step": 17180 }, { "epoch": 0.33060553317113983, "grad_norm": 516.4477945404207, "learning_rate": 1.5255848744158214e-05, "loss": 361.9489, "step": 17190 }, { "epoch": 0.33079785750689966, "grad_norm": 449.4434195607374, "learning_rate": 1.5250655570940317e-05, "loss": 390.1684, "step": 17200 }, { "epoch": 0.33099018184265944, "grad_norm": 490.190130280452, "learning_rate": 1.5245460441939934e-05, "loss": 382.9938, "step": 17210 }, { "epoch": 0.33118250617841927, "grad_norm": 527.3113368564473, "learning_rate": 1.5240263359092167e-05, "loss": 380.4742, "step": 17220 }, { "epoch": 0.3313748305141791, "grad_norm": 470.0570538586981, "learning_rate": 1.5235064324332846e-05, "loss": 386.2793, "step": 17230 }, { "epoch": 0.3315671548499389, "grad_norm": 486.9989422305508, "learning_rate": 1.5229863339598528e-05, "loss": 371.9208, "step": 17240 }, { "epoch": 0.33175947918569876, "grad_norm": 508.8063668870747, "learning_rate": 1.5224660406826486e-05, "loss": 368.9817, "step": 17250 }, { "epoch": 0.3319518035214586, "grad_norm": 504.0257942047159, "learning_rate": 1.5219455527954732e-05, "loss": 388.4633, "step": 17260 }, { "epoch": 0.3321441278572184, "grad_norm": 472.301557222717, "learning_rate": 1.5214248704921995e-05, "loss": 373.2826, "step": 17270 }, { "epoch": 0.33233645219297825, "grad_norm": 517.4597406367698, "learning_rate": 1.5209039939667731e-05, "loss": 376.0201, "step": 17280 }, { "epoch": 0.3325287765287381, "grad_norm": 545.273822728609, "learning_rate": 1.5203829234132118e-05, "loss": 375.969, "step": 17290 }, { "epoch": 0.3327211008644979, "grad_norm": 562.1083999001257, "learning_rate": 1.5198616590256064e-05, "loss": 382.0835, "step": 17300 }, { "epoch": 0.33291342520025774, "grad_norm": 495.6276533090367, "learning_rate": 1.5193402009981187e-05, "loss": 361.7445, "step": 17310 }, { "epoch": 0.3331057495360175, "grad_norm": 476.47835534030764, "learning_rate": 1.5188185495249832e-05, "loss": 381.098, "step": 17320 }, { "epoch": 0.33329807387177735, "grad_norm": 444.11661287010764, "learning_rate": 1.5182967048005069e-05, "loss": 376.8885, "step": 17330 }, { "epoch": 0.3334903982075372, "grad_norm": 470.2516077821844, "learning_rate": 1.5177746670190674e-05, "loss": 370.4099, "step": 17340 }, { "epoch": 0.333682722543297, "grad_norm": 482.9856806878494, "learning_rate": 1.5172524363751162e-05, "loss": 369.4453, "step": 17350 }, { "epoch": 0.33387504687905684, "grad_norm": 532.1990923733988, "learning_rate": 1.5167300130631748e-05, "loss": 376.5783, "step": 17360 }, { "epoch": 0.33406737121481667, "grad_norm": 485.45210052577, "learning_rate": 1.516207397277838e-05, "loss": 373.4351, "step": 17370 }, { "epoch": 0.3342596955505765, "grad_norm": 469.7862255954336, "learning_rate": 1.5156845892137711e-05, "loss": 361.6263, "step": 17380 }, { "epoch": 0.3344520198863363, "grad_norm": 478.2530516753436, "learning_rate": 1.5151615890657113e-05, "loss": 380.667, "step": 17390 }, { "epoch": 0.33464434422209616, "grad_norm": 469.38863379886374, "learning_rate": 1.5146383970284679e-05, "loss": 375.3877, "step": 17400 }, { "epoch": 0.334836668557856, "grad_norm": 417.1455417074075, "learning_rate": 1.514115013296921e-05, "loss": 372.6206, "step": 17410 }, { "epoch": 0.3350289928936158, "grad_norm": 494.0037941883318, "learning_rate": 1.513591438066023e-05, "loss": 374.6683, "step": 17420 }, { "epoch": 0.3352213172293756, "grad_norm": 505.1937804210555, "learning_rate": 1.5130676715307962e-05, "loss": 391.8111, "step": 17430 }, { "epoch": 0.3354136415651354, "grad_norm": 480.34724741981717, "learning_rate": 1.5125437138863353e-05, "loss": 390.2885, "step": 17440 }, { "epoch": 0.33560596590089525, "grad_norm": 478.2841053518114, "learning_rate": 1.512019565327806e-05, "loss": 379.6284, "step": 17450 }, { "epoch": 0.3357982902366551, "grad_norm": 483.98494783902385, "learning_rate": 1.5114952260504448e-05, "loss": 388.126, "step": 17460 }, { "epoch": 0.3359906145724149, "grad_norm": 465.02478443190154, "learning_rate": 1.5109706962495596e-05, "loss": 378.6843, "step": 17470 }, { "epoch": 0.33618293890817474, "grad_norm": 498.44092870110927, "learning_rate": 1.510445976120529e-05, "loss": 373.266, "step": 17480 }, { "epoch": 0.3363752632439346, "grad_norm": 474.0369123843142, "learning_rate": 1.5099210658588029e-05, "loss": 376.8539, "step": 17490 }, { "epoch": 0.3365675875796944, "grad_norm": 486.4450465086325, "learning_rate": 1.5093959656599008e-05, "loss": 380.5545, "step": 17500 }, { "epoch": 0.33675991191545424, "grad_norm": 484.1369924739329, "learning_rate": 1.5088706757194147e-05, "loss": 368.5146, "step": 17510 }, { "epoch": 0.33695223625121407, "grad_norm": 475.1897734956671, "learning_rate": 1.5083451962330062e-05, "loss": 370.8672, "step": 17520 }, { "epoch": 0.3371445605869739, "grad_norm": 504.2752531643805, "learning_rate": 1.5078195273964081e-05, "loss": 369.6901, "step": 17530 }, { "epoch": 0.33733688492273367, "grad_norm": 446.3898974181883, "learning_rate": 1.5072936694054222e-05, "loss": 370.656, "step": 17540 }, { "epoch": 0.3375292092584935, "grad_norm": 487.0597556124582, "learning_rate": 1.5067676224559231e-05, "loss": 377.0279, "step": 17550 }, { "epoch": 0.33772153359425333, "grad_norm": 479.5423938975111, "learning_rate": 1.506241386743854e-05, "loss": 376.4824, "step": 17560 }, { "epoch": 0.33791385793001316, "grad_norm": 477.43146565089535, "learning_rate": 1.5057149624652297e-05, "loss": 363.8776, "step": 17570 }, { "epoch": 0.338106182265773, "grad_norm": 541.270034106954, "learning_rate": 1.5051883498161334e-05, "loss": 369.7278, "step": 17580 }, { "epoch": 0.3382985066015328, "grad_norm": 520.5892848187016, "learning_rate": 1.5046615489927206e-05, "loss": 369.1312, "step": 17590 }, { "epoch": 0.33849083093729265, "grad_norm": 475.4938536630045, "learning_rate": 1.5041345601912154e-05, "loss": 371.8933, "step": 17600 }, { "epoch": 0.3386831552730525, "grad_norm": 498.1856271685703, "learning_rate": 1.5036073836079128e-05, "loss": 379.6265, "step": 17610 }, { "epoch": 0.3388754796088123, "grad_norm": 487.4943707001513, "learning_rate": 1.5030800194391773e-05, "loss": 366.008, "step": 17620 }, { "epoch": 0.33906780394457214, "grad_norm": 489.1970871708965, "learning_rate": 1.5025524678814428e-05, "loss": 378.8491, "step": 17630 }, { "epoch": 0.339260128280332, "grad_norm": 461.82368666616, "learning_rate": 1.5020247291312138e-05, "loss": 378.9849, "step": 17640 }, { "epoch": 0.3394524526160918, "grad_norm": 489.880742186845, "learning_rate": 1.5014968033850647e-05, "loss": 392.6186, "step": 17650 }, { "epoch": 0.3396447769518516, "grad_norm": 587.6277073218204, "learning_rate": 1.5009686908396388e-05, "loss": 378.6439, "step": 17660 }, { "epoch": 0.3398371012876114, "grad_norm": 448.2616848898422, "learning_rate": 1.5004403916916494e-05, "loss": 377.4304, "step": 17670 }, { "epoch": 0.34002942562337124, "grad_norm": 460.38681861251325, "learning_rate": 1.4999119061378791e-05, "loss": 379.6803, "step": 17680 }, { "epoch": 0.34022174995913107, "grad_norm": 475.82737567797994, "learning_rate": 1.4993832343751802e-05, "loss": 390.8007, "step": 17690 }, { "epoch": 0.3404140742948909, "grad_norm": 450.60313886665847, "learning_rate": 1.4988543766004737e-05, "loss": 369.8139, "step": 17700 }, { "epoch": 0.34060639863065073, "grad_norm": 485.42499080953183, "learning_rate": 1.498325333010751e-05, "loss": 362.2229, "step": 17710 }, { "epoch": 0.34079872296641056, "grad_norm": 466.7144395419084, "learning_rate": 1.497796103803072e-05, "loss": 378.5834, "step": 17720 }, { "epoch": 0.3409910473021704, "grad_norm": 491.45758109377846, "learning_rate": 1.4972666891745655e-05, "loss": 376.6357, "step": 17730 }, { "epoch": 0.3411833716379302, "grad_norm": 479.1086596452101, "learning_rate": 1.49673708932243e-05, "loss": 361.8653, "step": 17740 }, { "epoch": 0.34137569597369005, "grad_norm": 469.8843122325225, "learning_rate": 1.4962073044439328e-05, "loss": 379.0124, "step": 17750 }, { "epoch": 0.3415680203094499, "grad_norm": 462.44513637880374, "learning_rate": 1.4956773347364095e-05, "loss": 380.2196, "step": 17760 }, { "epoch": 0.34176034464520966, "grad_norm": 518.7330123514714, "learning_rate": 1.4951471803972657e-05, "loss": 375.6815, "step": 17770 }, { "epoch": 0.3419526689809695, "grad_norm": 530.270547201222, "learning_rate": 1.4946168416239746e-05, "loss": 372.9629, "step": 17780 }, { "epoch": 0.3421449933167293, "grad_norm": 482.13253350069255, "learning_rate": 1.4940863186140788e-05, "loss": 385.9331, "step": 17790 }, { "epoch": 0.34233731765248915, "grad_norm": 483.2864946447858, "learning_rate": 1.4935556115651898e-05, "loss": 364.5144, "step": 17800 }, { "epoch": 0.342529641988249, "grad_norm": 461.10352016301266, "learning_rate": 1.4930247206749863e-05, "loss": 367.5356, "step": 17810 }, { "epoch": 0.3427219663240088, "grad_norm": 500.35232414817864, "learning_rate": 1.4924936461412172e-05, "loss": 377.9878, "step": 17820 }, { "epoch": 0.34291429065976864, "grad_norm": 490.674885762262, "learning_rate": 1.4919623881616989e-05, "loss": 367.6417, "step": 17830 }, { "epoch": 0.34310661499552847, "grad_norm": 467.642760018096, "learning_rate": 1.4914309469343158e-05, "loss": 361.5473, "step": 17840 }, { "epoch": 0.3432989393312883, "grad_norm": 478.12902654557894, "learning_rate": 1.4908993226570214e-05, "loss": 389.5556, "step": 17850 }, { "epoch": 0.34349126366704813, "grad_norm": 486.2603294498519, "learning_rate": 1.4903675155278365e-05, "loss": 369.2395, "step": 17860 }, { "epoch": 0.34368358800280796, "grad_norm": 466.3609027262824, "learning_rate": 1.4898355257448508e-05, "loss": 378.8496, "step": 17870 }, { "epoch": 0.34387591233856774, "grad_norm": 446.2215810772876, "learning_rate": 1.4893033535062219e-05, "loss": 369.5298, "step": 17880 }, { "epoch": 0.34406823667432757, "grad_norm": 465.8005000615126, "learning_rate": 1.4887709990101748e-05, "loss": 374.6228, "step": 17890 }, { "epoch": 0.3442605610100874, "grad_norm": 487.0060744397368, "learning_rate": 1.4882384624550028e-05, "loss": 380.0092, "step": 17900 }, { "epoch": 0.3444528853458472, "grad_norm": 484.50067612771096, "learning_rate": 1.4877057440390672e-05, "loss": 364.349, "step": 17910 }, { "epoch": 0.34464520968160706, "grad_norm": 484.7299552912734, "learning_rate": 1.4871728439607967e-05, "loss": 371.8014, "step": 17920 }, { "epoch": 0.3448375340173669, "grad_norm": 459.16385695759345, "learning_rate": 1.4866397624186877e-05, "loss": 370.6781, "step": 17930 }, { "epoch": 0.3450298583531267, "grad_norm": 443.9478373940645, "learning_rate": 1.4861064996113042e-05, "loss": 367.4106, "step": 17940 }, { "epoch": 0.34522218268888655, "grad_norm": 495.3128492277379, "learning_rate": 1.485573055737278e-05, "loss": 379.6042, "step": 17950 }, { "epoch": 0.3454145070246464, "grad_norm": 511.57493450094523, "learning_rate": 1.485039430995308e-05, "loss": 374.9716, "step": 17960 }, { "epoch": 0.3456068313604062, "grad_norm": 509.76636955661064, "learning_rate": 1.4845056255841608e-05, "loss": 378.0369, "step": 17970 }, { "epoch": 0.34579915569616604, "grad_norm": 518.4135318649124, "learning_rate": 1.48397163970267e-05, "loss": 386.0346, "step": 17980 }, { "epoch": 0.3459914800319258, "grad_norm": 477.46111326271, "learning_rate": 1.4834374735497362e-05, "loss": 373.7529, "step": 17990 }, { "epoch": 0.34618380436768564, "grad_norm": 517.0975109192298, "learning_rate": 1.4829031273243277e-05, "loss": 382.4908, "step": 18000 }, { "epoch": 0.3463761287034455, "grad_norm": 481.90150309383307, "learning_rate": 1.4823686012254798e-05, "loss": 368.5236, "step": 18010 }, { "epoch": 0.3465684530392053, "grad_norm": 432.72358211087396, "learning_rate": 1.4818338954522943e-05, "loss": 363.4754, "step": 18020 }, { "epoch": 0.34676077737496513, "grad_norm": 475.8098001656779, "learning_rate": 1.4812990102039411e-05, "loss": 368.5819, "step": 18030 }, { "epoch": 0.34695310171072496, "grad_norm": 477.2507632238008, "learning_rate": 1.480763945679655e-05, "loss": 367.3182, "step": 18040 }, { "epoch": 0.3471454260464848, "grad_norm": 481.42520559266967, "learning_rate": 1.4802287020787396e-05, "loss": 367.6229, "step": 18050 }, { "epoch": 0.3473377503822446, "grad_norm": 521.5074361785358, "learning_rate": 1.4796932796005634e-05, "loss": 376.0522, "step": 18060 }, { "epoch": 0.34753007471800446, "grad_norm": 474.8537616330755, "learning_rate": 1.4791576784445632e-05, "loss": 366.5921, "step": 18070 }, { "epoch": 0.3477223990537643, "grad_norm": 473.9143272349859, "learning_rate": 1.4786218988102414e-05, "loss": 367.4316, "step": 18080 }, { "epoch": 0.3479147233895241, "grad_norm": 477.1176968146836, "learning_rate": 1.4780859408971668e-05, "loss": 363.1174, "step": 18090 }, { "epoch": 0.3481070477252839, "grad_norm": 482.7478512382826, "learning_rate": 1.4775498049049754e-05, "loss": 375.75, "step": 18100 }, { "epoch": 0.3482993720610437, "grad_norm": 468.48901283900074, "learning_rate": 1.4770134910333684e-05, "loss": 378.7698, "step": 18110 }, { "epoch": 0.34849169639680355, "grad_norm": 470.13430588110907, "learning_rate": 1.4764769994821145e-05, "loss": 381.0392, "step": 18120 }, { "epoch": 0.3486840207325634, "grad_norm": 521.5453057145618, "learning_rate": 1.4759403304510472e-05, "loss": 380.8982, "step": 18130 }, { "epoch": 0.3488763450683232, "grad_norm": 489.6819104975119, "learning_rate": 1.475403484140067e-05, "loss": 382.0617, "step": 18140 }, { "epoch": 0.34906866940408304, "grad_norm": 493.11120681882915, "learning_rate": 1.4748664607491408e-05, "loss": 373.9825, "step": 18150 }, { "epoch": 0.3492609937398429, "grad_norm": 531.8998048889769, "learning_rate": 1.4743292604783008e-05, "loss": 378.1855, "step": 18160 }, { "epoch": 0.3494533180756027, "grad_norm": 460.6150979845959, "learning_rate": 1.4737918835276451e-05, "loss": 369.2993, "step": 18170 }, { "epoch": 0.34964564241136253, "grad_norm": 566.5690486177965, "learning_rate": 1.4732543300973374e-05, "loss": 373.2924, "step": 18180 }, { "epoch": 0.34983796674712236, "grad_norm": 507.62894627917535, "learning_rate": 1.472716600387608e-05, "loss": 370.3426, "step": 18190 }, { "epoch": 0.3500302910828822, "grad_norm": 454.14600624306524, "learning_rate": 1.4721786945987519e-05, "loss": 378.0637, "step": 18200 }, { "epoch": 0.350222615418642, "grad_norm": 487.7970833153401, "learning_rate": 1.4716406129311307e-05, "loss": 375.8229, "step": 18210 }, { "epoch": 0.3504149397544018, "grad_norm": 557.1772760818087, "learning_rate": 1.4711023555851702e-05, "loss": 379.5749, "step": 18220 }, { "epoch": 0.35060726409016163, "grad_norm": 472.7505135280188, "learning_rate": 1.470563922761363e-05, "loss": 358.2809, "step": 18230 }, { "epoch": 0.35079958842592146, "grad_norm": 447.35925071647546, "learning_rate": 1.470025314660266e-05, "loss": 371.3621, "step": 18240 }, { "epoch": 0.3509919127616813, "grad_norm": 472.2543227029913, "learning_rate": 1.4694865314825024e-05, "loss": 378.2429, "step": 18250 }, { "epoch": 0.3511842370974411, "grad_norm": 513.1253676462142, "learning_rate": 1.4689475734287596e-05, "loss": 372.8578, "step": 18260 }, { "epoch": 0.35137656143320095, "grad_norm": 460.6382226840667, "learning_rate": 1.4684084406997903e-05, "loss": 376.7589, "step": 18270 }, { "epoch": 0.3515688857689608, "grad_norm": 493.2486285313152, "learning_rate": 1.467869133496413e-05, "loss": 372.8126, "step": 18280 }, { "epoch": 0.3517612101047206, "grad_norm": 460.3758687516171, "learning_rate": 1.4673296520195105e-05, "loss": 366.9916, "step": 18290 }, { "epoch": 0.35195353444048044, "grad_norm": 424.9863611582297, "learning_rate": 1.4667899964700309e-05, "loss": 369.9615, "step": 18300 }, { "epoch": 0.3521458587762403, "grad_norm": 471.53786531469933, "learning_rate": 1.466250167048987e-05, "loss": 371.1262, "step": 18310 }, { "epoch": 0.3523381831120001, "grad_norm": 477.33761854951507, "learning_rate": 1.4657101639574563e-05, "loss": 380.0143, "step": 18320 }, { "epoch": 0.3525305074477599, "grad_norm": 472.84647606185706, "learning_rate": 1.4651699873965808e-05, "loss": 373.6507, "step": 18330 }, { "epoch": 0.3527228317835197, "grad_norm": 492.7427337478879, "learning_rate": 1.4646296375675676e-05, "loss": 381.0266, "step": 18340 }, { "epoch": 0.35291515611927954, "grad_norm": 489.60803894214706, "learning_rate": 1.464089114671688e-05, "loss": 384.5405, "step": 18350 }, { "epoch": 0.35310748045503937, "grad_norm": 537.1395429440232, "learning_rate": 1.4635484189102776e-05, "loss": 369.3183, "step": 18360 }, { "epoch": 0.3532998047907992, "grad_norm": 507.2652867755757, "learning_rate": 1.4630075504847373e-05, "loss": 372.9186, "step": 18370 }, { "epoch": 0.35349212912655903, "grad_norm": 490.8104944284974, "learning_rate": 1.4624665095965311e-05, "loss": 359.4041, "step": 18380 }, { "epoch": 0.35368445346231886, "grad_norm": 500.95964086887125, "learning_rate": 1.4619252964471881e-05, "loss": 366.1107, "step": 18390 }, { "epoch": 0.3538767777980787, "grad_norm": 486.0674668984741, "learning_rate": 1.461383911238301e-05, "loss": 382.9969, "step": 18400 }, { "epoch": 0.3540691021338385, "grad_norm": 527.2809508699592, "learning_rate": 1.4608423541715273e-05, "loss": 368.5316, "step": 18410 }, { "epoch": 0.35426142646959835, "grad_norm": 501.4414508551843, "learning_rate": 1.4603006254485874e-05, "loss": 373.755, "step": 18420 }, { "epoch": 0.3544537508053582, "grad_norm": 466.86780574911705, "learning_rate": 1.4597587252712666e-05, "loss": 371.6094, "step": 18430 }, { "epoch": 0.35464607514111796, "grad_norm": 483.7910960149139, "learning_rate": 1.4592166538414136e-05, "loss": 360.8165, "step": 18440 }, { "epoch": 0.3548383994768778, "grad_norm": 473.212601935416, "learning_rate": 1.4586744113609416e-05, "loss": 374.5418, "step": 18450 }, { "epoch": 0.3550307238126376, "grad_norm": 457.5055478376314, "learning_rate": 1.4581319980318266e-05, "loss": 377.9521, "step": 18460 }, { "epoch": 0.35522304814839745, "grad_norm": 496.8836948958783, "learning_rate": 1.4575894140561086e-05, "loss": 361.657, "step": 18470 }, { "epoch": 0.3554153724841573, "grad_norm": 556.6375691072625, "learning_rate": 1.4570466596358914e-05, "loss": 381.3114, "step": 18480 }, { "epoch": 0.3556076968199171, "grad_norm": 466.10931472007314, "learning_rate": 1.4565037349733415e-05, "loss": 367.4475, "step": 18490 }, { "epoch": 0.35580002115567694, "grad_norm": 468.4653706677871, "learning_rate": 1.45596064027069e-05, "loss": 371.8457, "step": 18500 }, { "epoch": 0.35599234549143677, "grad_norm": 478.9980049459254, "learning_rate": 1.4554173757302303e-05, "loss": 361.1702, "step": 18510 }, { "epoch": 0.3561846698271966, "grad_norm": 514.8019688614664, "learning_rate": 1.4548739415543197e-05, "loss": 363.7348, "step": 18520 }, { "epoch": 0.35637699416295643, "grad_norm": 460.8756793552097, "learning_rate": 1.454330337945378e-05, "loss": 364.9218, "step": 18530 }, { "epoch": 0.35656931849871626, "grad_norm": 451.6966638431431, "learning_rate": 1.4537865651058893e-05, "loss": 364.4594, "step": 18540 }, { "epoch": 0.35676164283447603, "grad_norm": 483.5397751016099, "learning_rate": 1.4532426232383998e-05, "loss": 368.4219, "step": 18550 }, { "epoch": 0.35695396717023586, "grad_norm": 479.499384908411, "learning_rate": 1.4526985125455184e-05, "loss": 371.799, "step": 18560 }, { "epoch": 0.3571462915059957, "grad_norm": 480.66528347190183, "learning_rate": 1.4521542332299177e-05, "loss": 376.2643, "step": 18570 }, { "epoch": 0.3573386158417555, "grad_norm": 440.27272620204917, "learning_rate": 1.4516097854943325e-05, "loss": 364.4861, "step": 18580 }, { "epoch": 0.35753094017751536, "grad_norm": 470.423216406799, "learning_rate": 1.4510651695415612e-05, "loss": 358.5396, "step": 18590 }, { "epoch": 0.3577232645132752, "grad_norm": 544.9613137094686, "learning_rate": 1.4505203855744637e-05, "loss": 372.2926, "step": 18600 }, { "epoch": 0.357915588849035, "grad_norm": 469.33719869134734, "learning_rate": 1.4499754337959628e-05, "loss": 373.9996, "step": 18610 }, { "epoch": 0.35810791318479485, "grad_norm": 466.49979759304574, "learning_rate": 1.449430314409045e-05, "loss": 364.2124, "step": 18620 }, { "epoch": 0.3583002375205547, "grad_norm": 463.99293359458255, "learning_rate": 1.4488850276167572e-05, "loss": 370.5665, "step": 18630 }, { "epoch": 0.3584925618563145, "grad_norm": 470.6263087023456, "learning_rate": 1.44833957362221e-05, "loss": 377.4652, "step": 18640 }, { "epoch": 0.35868488619207434, "grad_norm": 499.339775205238, "learning_rate": 1.4477939526285767e-05, "loss": 379.4685, "step": 18650 }, { "epoch": 0.3588772105278341, "grad_norm": 483.2300276479665, "learning_rate": 1.4472481648390914e-05, "loss": 383.0591, "step": 18660 }, { "epoch": 0.35906953486359394, "grad_norm": 521.9206208099488, "learning_rate": 1.4467022104570514e-05, "loss": 376.079, "step": 18670 }, { "epoch": 0.3592618591993538, "grad_norm": 531.1641275059612, "learning_rate": 1.4461560896858156e-05, "loss": 370.4005, "step": 18680 }, { "epoch": 0.3594541835351136, "grad_norm": 467.6617049989319, "learning_rate": 1.4456098027288046e-05, "loss": 366.0533, "step": 18690 }, { "epoch": 0.35964650787087343, "grad_norm": 498.8822631824095, "learning_rate": 1.4450633497895017e-05, "loss": 368.0153, "step": 18700 }, { "epoch": 0.35983883220663326, "grad_norm": 461.3429916911708, "learning_rate": 1.4445167310714514e-05, "loss": 379.8898, "step": 18710 }, { "epoch": 0.3600311565423931, "grad_norm": 489.02969225920174, "learning_rate": 1.4439699467782602e-05, "loss": 365.7172, "step": 18720 }, { "epoch": 0.3602234808781529, "grad_norm": 483.3793464633447, "learning_rate": 1.4434229971135965e-05, "loss": 373.2573, "step": 18730 }, { "epoch": 0.36041580521391275, "grad_norm": 472.3147493799003, "learning_rate": 1.4428758822811894e-05, "loss": 366.5052, "step": 18740 }, { "epoch": 0.3606081295496726, "grad_norm": 537.1914996117569, "learning_rate": 1.442328602484831e-05, "loss": 378.5692, "step": 18750 }, { "epoch": 0.3608004538854324, "grad_norm": 469.97997058596286, "learning_rate": 1.441781157928373e-05, "loss": 363.9328, "step": 18760 }, { "epoch": 0.36099277822119225, "grad_norm": 463.8028779875606, "learning_rate": 1.44123354881573e-05, "loss": 359.4484, "step": 18770 }, { "epoch": 0.361185102556952, "grad_norm": 495.25411281555085, "learning_rate": 1.4406857753508772e-05, "loss": 375.4433, "step": 18780 }, { "epoch": 0.36137742689271185, "grad_norm": 500.1093255159945, "learning_rate": 1.4401378377378512e-05, "loss": 372.3941, "step": 18790 }, { "epoch": 0.3615697512284717, "grad_norm": 500.5009044830819, "learning_rate": 1.43958973618075e-05, "loss": 360.7825, "step": 18800 }, { "epoch": 0.3617620755642315, "grad_norm": 436.95753915479196, "learning_rate": 1.4390414708837322e-05, "loss": 362.9499, "step": 18810 }, { "epoch": 0.36195439989999134, "grad_norm": 481.0786028952087, "learning_rate": 1.4384930420510173e-05, "loss": 364.6544, "step": 18820 }, { "epoch": 0.36214672423575117, "grad_norm": 687.8924801110628, "learning_rate": 1.4379444498868864e-05, "loss": 362.6777, "step": 18830 }, { "epoch": 0.362339048571511, "grad_norm": 507.4525976413074, "learning_rate": 1.4373956945956807e-05, "loss": 382.8648, "step": 18840 }, { "epoch": 0.36253137290727083, "grad_norm": 482.6485776818181, "learning_rate": 1.4368467763818026e-05, "loss": 362.4334, "step": 18850 }, { "epoch": 0.36272369724303066, "grad_norm": 478.8925713377844, "learning_rate": 1.436297695449715e-05, "loss": 376.4112, "step": 18860 }, { "epoch": 0.3629160215787905, "grad_norm": 492.0712608783987, "learning_rate": 1.4357484520039412e-05, "loss": 364.4529, "step": 18870 }, { "epoch": 0.3631083459145503, "grad_norm": 549.5561926360407, "learning_rate": 1.4351990462490662e-05, "loss": 368.4865, "step": 18880 }, { "epoch": 0.3633006702503101, "grad_norm": 435.3191540823031, "learning_rate": 1.434649478389734e-05, "loss": 361.5154, "step": 18890 }, { "epoch": 0.36349299458606993, "grad_norm": 454.04073118695345, "learning_rate": 1.4340997486306491e-05, "loss": 376.9093, "step": 18900 }, { "epoch": 0.36368531892182976, "grad_norm": 478.90101693098524, "learning_rate": 1.4335498571765777e-05, "loss": 377.3971, "step": 18910 }, { "epoch": 0.3638776432575896, "grad_norm": 489.3196143523086, "learning_rate": 1.4329998042323447e-05, "loss": 357.2565, "step": 18920 }, { "epoch": 0.3640699675933494, "grad_norm": 487.7085603866977, "learning_rate": 1.4324495900028358e-05, "loss": 376.0046, "step": 18930 }, { "epoch": 0.36426229192910925, "grad_norm": 517.9821557927096, "learning_rate": 1.4318992146929967e-05, "loss": 367.6477, "step": 18940 }, { "epoch": 0.3644546162648691, "grad_norm": 465.237196888301, "learning_rate": 1.4313486785078335e-05, "loss": 365.7833, "step": 18950 }, { "epoch": 0.3646469406006289, "grad_norm": 494.7187196327382, "learning_rate": 1.4307979816524111e-05, "loss": 371.6348, "step": 18960 }, { "epoch": 0.36483926493638874, "grad_norm": 475.7723785490395, "learning_rate": 1.4302471243318554e-05, "loss": 374.1505, "step": 18970 }, { "epoch": 0.36503158927214857, "grad_norm": 446.2522602947378, "learning_rate": 1.4296961067513519e-05, "loss": 368.5986, "step": 18980 }, { "epoch": 0.3652239136079084, "grad_norm": 483.7849072662519, "learning_rate": 1.4291449291161452e-05, "loss": 365.7878, "step": 18990 }, { "epoch": 0.3654162379436682, "grad_norm": 447.9650747489853, "learning_rate": 1.4285935916315401e-05, "loss": 365.5911, "step": 19000 }, { "epoch": 0.365608562279428, "grad_norm": 540.3015209984369, "learning_rate": 1.4280420945029004e-05, "loss": 379.3237, "step": 19010 }, { "epoch": 0.36580088661518784, "grad_norm": 453.9456310980776, "learning_rate": 1.4274904379356498e-05, "loss": 370.3269, "step": 19020 }, { "epoch": 0.36599321095094767, "grad_norm": 478.07166752877004, "learning_rate": 1.4269386221352714e-05, "loss": 367.498, "step": 19030 }, { "epoch": 0.3661855352867075, "grad_norm": 474.19457759997107, "learning_rate": 1.4263866473073076e-05, "loss": 363.9463, "step": 19040 }, { "epoch": 0.36637785962246733, "grad_norm": 475.50977595489707, "learning_rate": 1.4258345136573595e-05, "loss": 360.9163, "step": 19050 }, { "epoch": 0.36657018395822716, "grad_norm": 490.7470611620901, "learning_rate": 1.4252822213910878e-05, "loss": 369.506, "step": 19060 }, { "epoch": 0.366762508293987, "grad_norm": 515.9097384940071, "learning_rate": 1.4247297707142126e-05, "loss": 365.0083, "step": 19070 }, { "epoch": 0.3669548326297468, "grad_norm": 463.39629709100745, "learning_rate": 1.4241771618325123e-05, "loss": 366.3742, "step": 19080 }, { "epoch": 0.36714715696550665, "grad_norm": 469.1828279196171, "learning_rate": 1.4236243949518249e-05, "loss": 384.597, "step": 19090 }, { "epoch": 0.3673394813012665, "grad_norm": 455.8680547902369, "learning_rate": 1.4230714702780466e-05, "loss": 367.4729, "step": 19100 }, { "epoch": 0.36753180563702625, "grad_norm": 467.31672538611986, "learning_rate": 1.422518388017133e-05, "loss": 374.8073, "step": 19110 }, { "epoch": 0.3677241299727861, "grad_norm": 457.39484363915466, "learning_rate": 1.4219651483750978e-05, "loss": 363.8252, "step": 19120 }, { "epoch": 0.3679164543085459, "grad_norm": 450.0351127175283, "learning_rate": 1.4214117515580139e-05, "loss": 370.6834, "step": 19130 }, { "epoch": 0.36810877864430575, "grad_norm": 480.7195428253656, "learning_rate": 1.4208581977720124e-05, "loss": 374.3641, "step": 19140 }, { "epoch": 0.3683011029800656, "grad_norm": 455.6731196219641, "learning_rate": 1.420304487223283e-05, "loss": 361.2098, "step": 19150 }, { "epoch": 0.3684934273158254, "grad_norm": 466.3299826478185, "learning_rate": 1.4197506201180737e-05, "loss": 367.955, "step": 19160 }, { "epoch": 0.36868575165158524, "grad_norm": 498.94977562167395, "learning_rate": 1.4191965966626908e-05, "loss": 357.7075, "step": 19170 }, { "epoch": 0.36887807598734507, "grad_norm": 461.95530452123086, "learning_rate": 1.418642417063499e-05, "loss": 360.8351, "step": 19180 }, { "epoch": 0.3690704003231049, "grad_norm": 492.1237723966069, "learning_rate": 1.4180880815269207e-05, "loss": 356.3411, "step": 19190 }, { "epoch": 0.3692627246588647, "grad_norm": 492.5773755139487, "learning_rate": 1.4175335902594372e-05, "loss": 365.5533, "step": 19200 }, { "epoch": 0.36945504899462456, "grad_norm": 478.34406730903527, "learning_rate": 1.416978943467587e-05, "loss": 370.8607, "step": 19210 }, { "epoch": 0.3696473733303844, "grad_norm": 490.27236538774264, "learning_rate": 1.4164241413579669e-05, "loss": 372.9625, "step": 19220 }, { "epoch": 0.36983969766614416, "grad_norm": 469.43304628567404, "learning_rate": 1.4158691841372318e-05, "loss": 372.9711, "step": 19230 }, { "epoch": 0.370032022001904, "grad_norm": 470.9117717020862, "learning_rate": 1.4153140720120936e-05, "loss": 363.0882, "step": 19240 }, { "epoch": 0.3702243463376638, "grad_norm": 456.1484694341779, "learning_rate": 1.4147588051893233e-05, "loss": 363.6552, "step": 19250 }, { "epoch": 0.37041667067342365, "grad_norm": 455.73373772994705, "learning_rate": 1.4142033838757476e-05, "loss": 364.5586, "step": 19260 }, { "epoch": 0.3706089950091835, "grad_norm": 442.4048520737741, "learning_rate": 1.4136478082782525e-05, "loss": 365.4893, "step": 19270 }, { "epoch": 0.3708013193449433, "grad_norm": 450.78640381123984, "learning_rate": 1.4130920786037798e-05, "loss": 366.0341, "step": 19280 }, { "epoch": 0.37099364368070314, "grad_norm": 465.85437142306637, "learning_rate": 1.412536195059331e-05, "loss": 373.8014, "step": 19290 }, { "epoch": 0.371185968016463, "grad_norm": 450.2548230321448, "learning_rate": 1.4119801578519625e-05, "loss": 375.3096, "step": 19300 }, { "epoch": 0.3713782923522228, "grad_norm": 467.73712802617746, "learning_rate": 1.4114239671887892e-05, "loss": 375.294, "step": 19310 }, { "epoch": 0.37157061668798264, "grad_norm": 519.3381325164414, "learning_rate": 1.4108676232769831e-05, "loss": 367.66, "step": 19320 }, { "epoch": 0.37176294102374247, "grad_norm": 491.61357238814594, "learning_rate": 1.410311126323773e-05, "loss": 369.1357, "step": 19330 }, { "epoch": 0.37195526535950224, "grad_norm": 520.1743038584326, "learning_rate": 1.409754476536445e-05, "loss": 359.8314, "step": 19340 }, { "epoch": 0.37214758969526207, "grad_norm": 455.0012210234154, "learning_rate": 1.4091976741223414e-05, "loss": 360.3283, "step": 19350 }, { "epoch": 0.3723399140310219, "grad_norm": 529.730473525027, "learning_rate": 1.408640719288863e-05, "loss": 382.2609, "step": 19360 }, { "epoch": 0.37253223836678173, "grad_norm": 463.0720758737707, "learning_rate": 1.408083612243465e-05, "loss": 375.0657, "step": 19370 }, { "epoch": 0.37272456270254156, "grad_norm": 458.8636394086308, "learning_rate": 1.4075263531936614e-05, "loss": 371.4612, "step": 19380 }, { "epoch": 0.3729168870383014, "grad_norm": 475.31286439351413, "learning_rate": 1.4069689423470219e-05, "loss": 360.5068, "step": 19390 }, { "epoch": 0.3731092113740612, "grad_norm": 475.28123605968545, "learning_rate": 1.4064113799111725e-05, "loss": 369.6281, "step": 19400 }, { "epoch": 0.37330153570982105, "grad_norm": 525.3843914109004, "learning_rate": 1.405853666093796e-05, "loss": 363.4456, "step": 19410 }, { "epoch": 0.3734938600455809, "grad_norm": 527.4287327678679, "learning_rate": 1.405295801102632e-05, "loss": 366.8147, "step": 19420 }, { "epoch": 0.3736861843813407, "grad_norm": 486.28243353481656, "learning_rate": 1.4047377851454758e-05, "loss": 374.8594, "step": 19430 }, { "epoch": 0.37387850871710054, "grad_norm": 459.48720811620757, "learning_rate": 1.4041796184301788e-05, "loss": 357.8949, "step": 19440 }, { "epoch": 0.3740708330528603, "grad_norm": 496.1689285256519, "learning_rate": 1.4036213011646496e-05, "loss": 375.414, "step": 19450 }, { "epoch": 0.37426315738862015, "grad_norm": 504.21145921952154, "learning_rate": 1.4030628335568515e-05, "loss": 376.2479, "step": 19460 }, { "epoch": 0.37445548172438, "grad_norm": 487.32683024066483, "learning_rate": 1.4025042158148048e-05, "loss": 365.3398, "step": 19470 }, { "epoch": 0.3746478060601398, "grad_norm": 500.42549569346295, "learning_rate": 1.4019454481465853e-05, "loss": 368.8787, "step": 19480 }, { "epoch": 0.37484013039589964, "grad_norm": 487.6545501117345, "learning_rate": 1.4013865307603248e-05, "loss": 360.3625, "step": 19490 }, { "epoch": 0.37503245473165947, "grad_norm": 485.5966714022845, "learning_rate": 1.4008274638642103e-05, "loss": 372.803, "step": 19500 }, { "epoch": 0.3752247790674193, "grad_norm": 464.27557752870007, "learning_rate": 1.4002682476664857e-05, "loss": 361.566, "step": 19510 }, { "epoch": 0.37541710340317913, "grad_norm": 468.2358679428454, "learning_rate": 1.3997088823754494e-05, "loss": 357.5025, "step": 19520 }, { "epoch": 0.37560942773893896, "grad_norm": 548.9838545754756, "learning_rate": 1.399149368199456e-05, "loss": 372.736, "step": 19530 }, { "epoch": 0.3758017520746988, "grad_norm": 470.50530292960065, "learning_rate": 1.398589705346915e-05, "loss": 362.6622, "step": 19540 }, { "epoch": 0.3759940764104586, "grad_norm": 472.32480058256823, "learning_rate": 1.3980298940262918e-05, "loss": 364.1768, "step": 19550 }, { "epoch": 0.3761864007462184, "grad_norm": 454.8190138741512, "learning_rate": 1.3974699344461065e-05, "loss": 363.9456, "step": 19560 }, { "epoch": 0.3763787250819782, "grad_norm": 462.7899615553797, "learning_rate": 1.396909826814935e-05, "loss": 361.9878, "step": 19570 }, { "epoch": 0.37657104941773806, "grad_norm": 496.5285422127198, "learning_rate": 1.3963495713414085e-05, "loss": 365.7559, "step": 19580 }, { "epoch": 0.3767633737534979, "grad_norm": 463.41592531727036, "learning_rate": 1.3957891682342127e-05, "loss": 369.7199, "step": 19590 }, { "epoch": 0.3769556980892577, "grad_norm": 513.3141011068556, "learning_rate": 1.3952286177020879e-05, "loss": 368.0453, "step": 19600 }, { "epoch": 0.37714802242501755, "grad_norm": 493.3014176705202, "learning_rate": 1.3946679199538308e-05, "loss": 359.7146, "step": 19610 }, { "epoch": 0.3773403467607774, "grad_norm": 457.4156258324006, "learning_rate": 1.3941070751982917e-05, "loss": 359.958, "step": 19620 }, { "epoch": 0.3775326710965372, "grad_norm": 458.2063917914075, "learning_rate": 1.3935460836443758e-05, "loss": 364.2169, "step": 19630 }, { "epoch": 0.37772499543229704, "grad_norm": 465.3891272534213, "learning_rate": 1.3929849455010433e-05, "loss": 371.9292, "step": 19640 }, { "epoch": 0.37791731976805687, "grad_norm": 473.1018145388463, "learning_rate": 1.3924236609773094e-05, "loss": 366.6291, "step": 19650 }, { "epoch": 0.3781096441038167, "grad_norm": 452.29374844932244, "learning_rate": 1.3918622302822425e-05, "loss": 364.0584, "step": 19660 }, { "epoch": 0.3783019684395765, "grad_norm": 452.7003382353477, "learning_rate": 1.391300653624967e-05, "loss": 373.7889, "step": 19670 }, { "epoch": 0.3784942927753363, "grad_norm": 431.7075819021544, "learning_rate": 1.39073893121466e-05, "loss": 367.3789, "step": 19680 }, { "epoch": 0.37868661711109614, "grad_norm": 474.0787559246814, "learning_rate": 1.3901770632605546e-05, "loss": 364.3479, "step": 19690 }, { "epoch": 0.37887894144685597, "grad_norm": 456.01172132628227, "learning_rate": 1.3896150499719372e-05, "loss": 359.5419, "step": 19700 }, { "epoch": 0.3790712657826158, "grad_norm": 487.28696942430304, "learning_rate": 1.3890528915581482e-05, "loss": 367.0523, "step": 19710 }, { "epoch": 0.3792635901183756, "grad_norm": 495.7553666359008, "learning_rate": 1.3884905882285829e-05, "loss": 367.023, "step": 19720 }, { "epoch": 0.37945591445413546, "grad_norm": 431.17508580800506, "learning_rate": 1.3879281401926894e-05, "loss": 360.7892, "step": 19730 }, { "epoch": 0.3796482387898953, "grad_norm": 477.5925671809703, "learning_rate": 1.3873655476599707e-05, "loss": 358.9177, "step": 19740 }, { "epoch": 0.3798405631256551, "grad_norm": 445.276611087243, "learning_rate": 1.3868028108399829e-05, "loss": 362.4861, "step": 19750 }, { "epoch": 0.38003288746141495, "grad_norm": 446.24543483448787, "learning_rate": 1.3862399299423364e-05, "loss": 363.028, "step": 19760 }, { "epoch": 0.3802252117971748, "grad_norm": 475.1323465293365, "learning_rate": 1.3856769051766947e-05, "loss": 366.61, "step": 19770 }, { "epoch": 0.3804175361329346, "grad_norm": 453.86170588270767, "learning_rate": 1.3851137367527757e-05, "loss": 352.2456, "step": 19780 }, { "epoch": 0.3806098604686944, "grad_norm": 457.86424782971244, "learning_rate": 1.3845504248803501e-05, "loss": 366.6129, "step": 19790 }, { "epoch": 0.3808021848044542, "grad_norm": 471.2643524137598, "learning_rate": 1.383986969769242e-05, "loss": 377.4821, "step": 19800 }, { "epoch": 0.38099450914021404, "grad_norm": 482.84436395394437, "learning_rate": 1.38342337162933e-05, "loss": 368.8804, "step": 19810 }, { "epoch": 0.3811868334759739, "grad_norm": 457.05015934093933, "learning_rate": 1.3828596306705442e-05, "loss": 360.2316, "step": 19820 }, { "epoch": 0.3813791578117337, "grad_norm": 452.45954436302424, "learning_rate": 1.3822957471028693e-05, "loss": 354.8089, "step": 19830 }, { "epoch": 0.38157148214749353, "grad_norm": 493.8366998821656, "learning_rate": 1.3817317211363422e-05, "loss": 355.6104, "step": 19840 }, { "epoch": 0.38176380648325337, "grad_norm": 533.0477094840109, "learning_rate": 1.3811675529810535e-05, "loss": 362.2145, "step": 19850 }, { "epoch": 0.3819561308190132, "grad_norm": 501.3865763672517, "learning_rate": 1.3806032428471463e-05, "loss": 365.4072, "step": 19860 }, { "epoch": 0.382148455154773, "grad_norm": 477.5726462219742, "learning_rate": 1.3800387909448171e-05, "loss": 366.6195, "step": 19870 }, { "epoch": 0.38234077949053286, "grad_norm": 483.90916861451507, "learning_rate": 1.3794741974843154e-05, "loss": 372.125, "step": 19880 }, { "epoch": 0.3825331038262927, "grad_norm": 466.06497337132845, "learning_rate": 1.3789094626759419e-05, "loss": 358.4106, "step": 19890 }, { "epoch": 0.38272542816205246, "grad_norm": 439.4312828146732, "learning_rate": 1.3783445867300515e-05, "loss": 368.8052, "step": 19900 }, { "epoch": 0.3829177524978123, "grad_norm": 448.6623636390141, "learning_rate": 1.3777795698570511e-05, "loss": 371.1609, "step": 19910 }, { "epoch": 0.3831100768335721, "grad_norm": 534.8038140927481, "learning_rate": 1.3772144122674e-05, "loss": 356.932, "step": 19920 }, { "epoch": 0.38330240116933195, "grad_norm": 445.9865402663116, "learning_rate": 1.3766491141716103e-05, "loss": 360.1602, "step": 19930 }, { "epoch": 0.3834947255050918, "grad_norm": 471.0283681984944, "learning_rate": 1.3760836757802462e-05, "loss": 366.6469, "step": 19940 }, { "epoch": 0.3836870498408516, "grad_norm": 468.1572778667453, "learning_rate": 1.3755180973039241e-05, "loss": 366.2488, "step": 19950 }, { "epoch": 0.38387937417661144, "grad_norm": 506.7616728459948, "learning_rate": 1.3749523789533128e-05, "loss": 362.2985, "step": 19960 }, { "epoch": 0.3840716985123713, "grad_norm": 472.9821945188994, "learning_rate": 1.374386520939133e-05, "loss": 358.6702, "step": 19970 }, { "epoch": 0.3842640228481311, "grad_norm": 509.3010479427292, "learning_rate": 1.3738205234721568e-05, "loss": 379.9848, "step": 19980 }, { "epoch": 0.38445634718389093, "grad_norm": 485.03072891581843, "learning_rate": 1.3732543867632098e-05, "loss": 362.7552, "step": 19990 }, { "epoch": 0.38464867151965076, "grad_norm": 453.2582155133797, "learning_rate": 1.3726881110231682e-05, "loss": 362.8308, "step": 20000 }, { "epoch": 0.38484099585541054, "grad_norm": 460.1577311549998, "learning_rate": 1.3721216964629605e-05, "loss": 366.6201, "step": 20010 }, { "epoch": 0.38503332019117037, "grad_norm": 461.28718521613234, "learning_rate": 1.3715551432935664e-05, "loss": 377.0017, "step": 20020 }, { "epoch": 0.3852256445269302, "grad_norm": 472.8270687580648, "learning_rate": 1.3709884517260178e-05, "loss": 378.4079, "step": 20030 }, { "epoch": 0.38541796886269003, "grad_norm": 479.17719695105575, "learning_rate": 1.3704216219713984e-05, "loss": 356.01, "step": 20040 }, { "epoch": 0.38561029319844986, "grad_norm": 491.09679332711613, "learning_rate": 1.3698546542408424e-05, "loss": 366.6224, "step": 20050 }, { "epoch": 0.3858026175342097, "grad_norm": 472.94637955620817, "learning_rate": 1.369287548745536e-05, "loss": 358.1708, "step": 20060 }, { "epoch": 0.3859949418699695, "grad_norm": 633.3068023514486, "learning_rate": 1.3687203056967165e-05, "loss": 367.8477, "step": 20070 }, { "epoch": 0.38618726620572935, "grad_norm": 493.0946979828676, "learning_rate": 1.368152925305673e-05, "loss": 370.3541, "step": 20080 }, { "epoch": 0.3863795905414892, "grad_norm": 465.67294376250896, "learning_rate": 1.3675854077837449e-05, "loss": 365.836, "step": 20090 }, { "epoch": 0.386571914877249, "grad_norm": 438.3645445878314, "learning_rate": 1.3670177533423234e-05, "loss": 354.1514, "step": 20100 }, { "epoch": 0.38676423921300884, "grad_norm": 458.8383294964573, "learning_rate": 1.3664499621928502e-05, "loss": 361.6879, "step": 20110 }, { "epoch": 0.3869565635487686, "grad_norm": 479.9508481813655, "learning_rate": 1.3658820345468183e-05, "loss": 367.6514, "step": 20120 }, { "epoch": 0.38714888788452845, "grad_norm": 450.4661863666756, "learning_rate": 1.365313970615771e-05, "loss": 357.3286, "step": 20130 }, { "epoch": 0.3873412122202883, "grad_norm": 476.57308222670775, "learning_rate": 1.3647457706113031e-05, "loss": 368.3551, "step": 20140 }, { "epoch": 0.3875335365560481, "grad_norm": 478.1509107793025, "learning_rate": 1.36417743474506e-05, "loss": 369.8673, "step": 20150 }, { "epoch": 0.38772586089180794, "grad_norm": 450.1315484878896, "learning_rate": 1.3636089632287369e-05, "loss": 376.2891, "step": 20160 }, { "epoch": 0.38791818522756777, "grad_norm": 466.403720146349, "learning_rate": 1.36304035627408e-05, "loss": 368.5667, "step": 20170 }, { "epoch": 0.3881105095633276, "grad_norm": 438.24305150782135, "learning_rate": 1.3624716140928861e-05, "loss": 376.8687, "step": 20180 }, { "epoch": 0.38830283389908743, "grad_norm": 448.70357037877056, "learning_rate": 1.3619027368970025e-05, "loss": 369.4069, "step": 20190 }, { "epoch": 0.38849515823484726, "grad_norm": 488.48898350293024, "learning_rate": 1.3613337248983265e-05, "loss": 360.4751, "step": 20200 }, { "epoch": 0.3886874825706071, "grad_norm": 478.67847656757516, "learning_rate": 1.3607645783088055e-05, "loss": 366.1508, "step": 20210 }, { "epoch": 0.3888798069063669, "grad_norm": 460.859899181671, "learning_rate": 1.3601952973404376e-05, "loss": 373.7587, "step": 20220 }, { "epoch": 0.3890721312421267, "grad_norm": 427.59912678933176, "learning_rate": 1.35962588220527e-05, "loss": 376.6293, "step": 20230 }, { "epoch": 0.3892644555778865, "grad_norm": 483.46941360647116, "learning_rate": 1.3590563331154008e-05, "loss": 376.1973, "step": 20240 }, { "epoch": 0.38945677991364636, "grad_norm": 462.9376483176115, "learning_rate": 1.3584866502829774e-05, "loss": 368.2831, "step": 20250 }, { "epoch": 0.3896491042494062, "grad_norm": 478.95172047171945, "learning_rate": 1.3579168339201975e-05, "loss": 366.7303, "step": 20260 }, { "epoch": 0.389841428585166, "grad_norm": 463.70949404051413, "learning_rate": 1.3573468842393077e-05, "loss": 361.8117, "step": 20270 }, { "epoch": 0.39003375292092585, "grad_norm": 458.8731170959038, "learning_rate": 1.356776801452606e-05, "loss": 366.3602, "step": 20280 }, { "epoch": 0.3902260772566857, "grad_norm": 446.06714177503085, "learning_rate": 1.3562065857724378e-05, "loss": 366.7551, "step": 20290 }, { "epoch": 0.3904184015924455, "grad_norm": 518.5084723170153, "learning_rate": 1.3556362374111993e-05, "loss": 362.1282, "step": 20300 }, { "epoch": 0.39061072592820534, "grad_norm": 508.5864425234927, "learning_rate": 1.3550657565813362e-05, "loss": 369.1649, "step": 20310 }, { "epoch": 0.39080305026396517, "grad_norm": 470.47778414380946, "learning_rate": 1.3544951434953423e-05, "loss": 358.6583, "step": 20320 }, { "epoch": 0.390995374599725, "grad_norm": 468.5432529014539, "learning_rate": 1.3539243983657627e-05, "loss": 363.1038, "step": 20330 }, { "epoch": 0.39118769893548483, "grad_norm": 453.1641573954629, "learning_rate": 1.3533535214051896e-05, "loss": 363.9854, "step": 20340 }, { "epoch": 0.3913800232712446, "grad_norm": 484.96424082685286, "learning_rate": 1.3527825128262656e-05, "loss": 367.4173, "step": 20350 }, { "epoch": 0.39157234760700443, "grad_norm": 448.83586770584895, "learning_rate": 1.3522113728416821e-05, "loss": 372.012, "step": 20360 }, { "epoch": 0.39176467194276426, "grad_norm": 458.8468604306312, "learning_rate": 1.3516401016641793e-05, "loss": 359.7743, "step": 20370 }, { "epoch": 0.3919569962785241, "grad_norm": 494.97774992361997, "learning_rate": 1.351068699506546e-05, "loss": 365.4681, "step": 20380 }, { "epoch": 0.3921493206142839, "grad_norm": 436.9973681365952, "learning_rate": 1.3504971665816202e-05, "loss": 360.1445, "step": 20390 }, { "epoch": 0.39234164495004376, "grad_norm": 467.59108084396087, "learning_rate": 1.3499255031022887e-05, "loss": 353.143, "step": 20400 }, { "epoch": 0.3925339692858036, "grad_norm": 455.5612168175912, "learning_rate": 1.3493537092814863e-05, "loss": 366.5097, "step": 20410 }, { "epoch": 0.3927262936215634, "grad_norm": 525.4407351856285, "learning_rate": 1.348781785332197e-05, "loss": 363.0196, "step": 20420 }, { "epoch": 0.39291861795732325, "grad_norm": 479.77735869209687, "learning_rate": 1.3482097314674526e-05, "loss": 359.4147, "step": 20430 }, { "epoch": 0.3931109422930831, "grad_norm": 501.05363325645897, "learning_rate": 1.3476375479003347e-05, "loss": 357.4058, "step": 20440 }, { "epoch": 0.3933032666288429, "grad_norm": 499.8402290218398, "learning_rate": 1.3470652348439715e-05, "loss": 361.6666, "step": 20450 }, { "epoch": 0.3934955909646027, "grad_norm": 778.1187248257576, "learning_rate": 1.3464927925115405e-05, "loss": 379.1409, "step": 20460 }, { "epoch": 0.3936879153003625, "grad_norm": 480.1673190600996, "learning_rate": 1.3459202211162663e-05, "loss": 359.0808, "step": 20470 }, { "epoch": 0.39388023963612234, "grad_norm": 524.8709136005673, "learning_rate": 1.345347520871423e-05, "loss": 360.9238, "step": 20480 }, { "epoch": 0.3940725639718822, "grad_norm": 478.43321389043695, "learning_rate": 1.3447746919903318e-05, "loss": 356.7554, "step": 20490 }, { "epoch": 0.394264888307642, "grad_norm": 435.3551589654242, "learning_rate": 1.3442017346863618e-05, "loss": 366.2917, "step": 20500 }, { "epoch": 0.39445721264340183, "grad_norm": 468.6913303477722, "learning_rate": 1.3436286491729306e-05, "loss": 372.9864, "step": 20510 }, { "epoch": 0.39464953697916166, "grad_norm": 466.00802438493156, "learning_rate": 1.3430554356635029e-05, "loss": 367.0901, "step": 20520 }, { "epoch": 0.3948418613149215, "grad_norm": 452.08959574790794, "learning_rate": 1.342482094371591e-05, "loss": 359.0619, "step": 20530 }, { "epoch": 0.3950341856506813, "grad_norm": 453.30372623543707, "learning_rate": 1.341908625510755e-05, "loss": 372.5314, "step": 20540 }, { "epoch": 0.39522650998644115, "grad_norm": 434.6113949594394, "learning_rate": 1.341335029294603e-05, "loss": 357.4971, "step": 20550 }, { "epoch": 0.395418834322201, "grad_norm": 441.5489979908519, "learning_rate": 1.3407613059367898e-05, "loss": 360.6357, "step": 20560 }, { "epoch": 0.39561115865796076, "grad_norm": 498.03942572073885, "learning_rate": 1.3401874556510182e-05, "loss": 361.7844, "step": 20570 }, { "epoch": 0.3958034829937206, "grad_norm": 477.9780355552977, "learning_rate": 1.3396134786510375e-05, "loss": 368.7838, "step": 20580 }, { "epoch": 0.3959958073294804, "grad_norm": 534.6909859502371, "learning_rate": 1.3390393751506452e-05, "loss": 365.4386, "step": 20590 }, { "epoch": 0.39618813166524025, "grad_norm": 459.42743548275524, "learning_rate": 1.338465145363685e-05, "loss": 354.5065, "step": 20600 }, { "epoch": 0.3963804560010001, "grad_norm": 472.3562074849193, "learning_rate": 1.3378907895040478e-05, "loss": 367.1442, "step": 20610 }, { "epoch": 0.3965727803367599, "grad_norm": 467.66253604461303, "learning_rate": 1.3373163077856724e-05, "loss": 359.797, "step": 20620 }, { "epoch": 0.39676510467251974, "grad_norm": 461.52725967280736, "learning_rate": 1.3367417004225429e-05, "loss": 367.3525, "step": 20630 }, { "epoch": 0.39695742900827957, "grad_norm": 451.3392313190913, "learning_rate": 1.3361669676286919e-05, "loss": 366.7544, "step": 20640 }, { "epoch": 0.3971497533440394, "grad_norm": 459.12627050804315, "learning_rate": 1.335592109618197e-05, "loss": 349.8331, "step": 20650 }, { "epoch": 0.39734207767979923, "grad_norm": 472.8396109907538, "learning_rate": 1.335017126605184e-05, "loss": 367.9225, "step": 20660 }, { "epoch": 0.39753440201555906, "grad_norm": 446.27922333169556, "learning_rate": 1.3344420188038243e-05, "loss": 356.1516, "step": 20670 }, { "epoch": 0.39772672635131884, "grad_norm": 453.5543158766576, "learning_rate": 1.333866786428336e-05, "loss": 365.2807, "step": 20680 }, { "epoch": 0.39791905068707867, "grad_norm": 480.12907925444244, "learning_rate": 1.3332914296929838e-05, "loss": 370.1983, "step": 20690 }, { "epoch": 0.3981113750228385, "grad_norm": 440.4908319470761, "learning_rate": 1.3327159488120784e-05, "loss": 360.8958, "step": 20700 }, { "epoch": 0.39830369935859833, "grad_norm": 478.82279115675357, "learning_rate": 1.3321403439999775e-05, "loss": 363.128, "step": 20710 }, { "epoch": 0.39849602369435816, "grad_norm": 462.46524586081466, "learning_rate": 1.3315646154710835e-05, "loss": 372.2664, "step": 20720 }, { "epoch": 0.398688348030118, "grad_norm": 483.8009497849133, "learning_rate": 1.3309887634398466e-05, "loss": 372.2889, "step": 20730 }, { "epoch": 0.3988806723658778, "grad_norm": 477.1379085372006, "learning_rate": 1.3304127881207614e-05, "loss": 357.0821, "step": 20740 }, { "epoch": 0.39907299670163765, "grad_norm": 481.4974602358291, "learning_rate": 1.3298366897283697e-05, "loss": 369.5656, "step": 20750 }, { "epoch": 0.3992653210373975, "grad_norm": 435.3978580088629, "learning_rate": 1.3292604684772585e-05, "loss": 361.2189, "step": 20760 }, { "epoch": 0.3994576453731573, "grad_norm": 440.91582015318886, "learning_rate": 1.3286841245820605e-05, "loss": 359.8629, "step": 20770 }, { "epoch": 0.39964996970891714, "grad_norm": 451.1196756325273, "learning_rate": 1.3281076582574548e-05, "loss": 359.707, "step": 20780 }, { "epoch": 0.3998422940446769, "grad_norm": 452.4582348651019, "learning_rate": 1.3275310697181652e-05, "loss": 357.2123, "step": 20790 }, { "epoch": 0.40003461838043675, "grad_norm": 452.9152479344711, "learning_rate": 1.3269543591789616e-05, "loss": 354.137, "step": 20800 }, { "epoch": 0.4002269427161966, "grad_norm": 466.0739270843239, "learning_rate": 1.3263775268546588e-05, "loss": 363.9303, "step": 20810 }, { "epoch": 0.4004192670519564, "grad_norm": 448.51023925319134, "learning_rate": 1.3258005729601178e-05, "loss": 365.6434, "step": 20820 }, { "epoch": 0.40061159138771624, "grad_norm": 476.91273009734255, "learning_rate": 1.325223497710244e-05, "loss": 369.1298, "step": 20830 }, { "epoch": 0.40080391572347607, "grad_norm": 483.803283035268, "learning_rate": 1.3246463013199882e-05, "loss": 369.1609, "step": 20840 }, { "epoch": 0.4009962400592359, "grad_norm": 576.2222722747586, "learning_rate": 1.3240689840043475e-05, "loss": 365.8833, "step": 20850 }, { "epoch": 0.40118856439499573, "grad_norm": 496.65959556817023, "learning_rate": 1.323491545978362e-05, "loss": 358.0746, "step": 20860 }, { "epoch": 0.40138088873075556, "grad_norm": 456.8910471692591, "learning_rate": 1.3229139874571186e-05, "loss": 359.431, "step": 20870 }, { "epoch": 0.4015732130665154, "grad_norm": 460.0008152653636, "learning_rate": 1.3223363086557477e-05, "loss": 355.9718, "step": 20880 }, { "epoch": 0.4017655374022752, "grad_norm": 466.65609178345323, "learning_rate": 1.3217585097894255e-05, "loss": 349.2668, "step": 20890 }, { "epoch": 0.40195786173803505, "grad_norm": 477.95348563997453, "learning_rate": 1.3211805910733724e-05, "loss": 367.3587, "step": 20900 }, { "epoch": 0.4021501860737948, "grad_norm": 524.414526216332, "learning_rate": 1.3206025527228535e-05, "loss": 359.227, "step": 20910 }, { "epoch": 0.40234251040955465, "grad_norm": 469.5417929189521, "learning_rate": 1.3200243949531788e-05, "loss": 368.1941, "step": 20920 }, { "epoch": 0.4025348347453145, "grad_norm": 470.9943123772341, "learning_rate": 1.3194461179797023e-05, "loss": 358.7273, "step": 20930 }, { "epoch": 0.4027271590810743, "grad_norm": 452.1509868117934, "learning_rate": 1.3188677220178225e-05, "loss": 345.5576, "step": 20940 }, { "epoch": 0.40291948341683415, "grad_norm": 480.1860625748449, "learning_rate": 1.3182892072829828e-05, "loss": 347.6384, "step": 20950 }, { "epoch": 0.403111807752594, "grad_norm": 529.9280626935827, "learning_rate": 1.3177105739906702e-05, "loss": 349.8616, "step": 20960 }, { "epoch": 0.4033041320883538, "grad_norm": 472.1405012673907, "learning_rate": 1.3171318223564156e-05, "loss": 361.0386, "step": 20970 }, { "epoch": 0.40349645642411364, "grad_norm": 438.29909332627955, "learning_rate": 1.3165529525957947e-05, "loss": 363.7229, "step": 20980 }, { "epoch": 0.40368878075987347, "grad_norm": 493.9356324770707, "learning_rate": 1.3159739649244271e-05, "loss": 361.6345, "step": 20990 }, { "epoch": 0.4038811050956333, "grad_norm": 484.5949545828873, "learning_rate": 1.3153948595579764e-05, "loss": 364.8096, "step": 21000 }, { "epoch": 0.4040734294313931, "grad_norm": 418.17653626818446, "learning_rate": 1.3148156367121491e-05, "loss": 368.4321, "step": 21010 }, { "epoch": 0.4042657537671529, "grad_norm": 499.708926652198, "learning_rate": 1.3142362966026967e-05, "loss": 359.458, "step": 21020 }, { "epoch": 0.40445807810291273, "grad_norm": 459.6739275932414, "learning_rate": 1.3136568394454136e-05, "loss": 362.5559, "step": 21030 }, { "epoch": 0.40465040243867256, "grad_norm": 441.3454725459896, "learning_rate": 1.313077265456138e-05, "loss": 357.4751, "step": 21040 }, { "epoch": 0.4048427267744324, "grad_norm": 482.74303749486506, "learning_rate": 1.3124975748507514e-05, "loss": 365.2877, "step": 21050 }, { "epoch": 0.4050350511101922, "grad_norm": 454.77702380243915, "learning_rate": 1.3119177678451793e-05, "loss": 371.1625, "step": 21060 }, { "epoch": 0.40522737544595205, "grad_norm": 447.1521105136589, "learning_rate": 1.3113378446553903e-05, "loss": 369.1298, "step": 21070 }, { "epoch": 0.4054196997817119, "grad_norm": 470.1238170890203, "learning_rate": 1.3107578054973962e-05, "loss": 363.8412, "step": 21080 }, { "epoch": 0.4056120241174717, "grad_norm": 449.4914150454394, "learning_rate": 1.3101776505872516e-05, "loss": 363.123, "step": 21090 }, { "epoch": 0.40580434845323154, "grad_norm": 501.1557750397434, "learning_rate": 1.309597380141055e-05, "loss": 359.8808, "step": 21100 }, { "epoch": 0.4059966727889914, "grad_norm": 491.25296606898326, "learning_rate": 1.3090169943749475e-05, "loss": 357.7032, "step": 21110 }, { "epoch": 0.4061889971247512, "grad_norm": 464.6999900958375, "learning_rate": 1.3084364935051132e-05, "loss": 359.5759, "step": 21120 }, { "epoch": 0.406381321460511, "grad_norm": 465.8542145995899, "learning_rate": 1.307855877747779e-05, "loss": 367.1791, "step": 21130 }, { "epoch": 0.4065736457962708, "grad_norm": 467.3494052218952, "learning_rate": 1.307275147319215e-05, "loss": 350.4204, "step": 21140 }, { "epoch": 0.40676597013203064, "grad_norm": 477.22362338579916, "learning_rate": 1.3066943024357333e-05, "loss": 351.9511, "step": 21150 }, { "epoch": 0.40695829446779047, "grad_norm": 505.4582191018218, "learning_rate": 1.306113343313689e-05, "loss": 362.5992, "step": 21160 }, { "epoch": 0.4071506188035503, "grad_norm": 473.4444719359127, "learning_rate": 1.3055322701694801e-05, "loss": 369.0544, "step": 21170 }, { "epoch": 0.40734294313931013, "grad_norm": 516.8069004273458, "learning_rate": 1.3049510832195466e-05, "loss": 364.5951, "step": 21180 }, { "epoch": 0.40753526747506996, "grad_norm": 494.8109604879005, "learning_rate": 1.3043697826803707e-05, "loss": 355.1831, "step": 21190 }, { "epoch": 0.4077275918108298, "grad_norm": 442.1788140648132, "learning_rate": 1.303788368768478e-05, "loss": 347.0145, "step": 21200 }, { "epoch": 0.4079199161465896, "grad_norm": 483.60524242773596, "learning_rate": 1.3032068417004351e-05, "loss": 366.5362, "step": 21210 }, { "epoch": 0.40811224048234945, "grad_norm": 483.5920979964635, "learning_rate": 1.302625201692851e-05, "loss": 360.8435, "step": 21220 }, { "epoch": 0.4083045648181093, "grad_norm": 486.00452919303007, "learning_rate": 1.302043448962378e-05, "loss": 353.2764, "step": 21230 }, { "epoch": 0.40849688915386906, "grad_norm": 556.2820255780632, "learning_rate": 1.301461583725708e-05, "loss": 365.7417, "step": 21240 }, { "epoch": 0.4086892134896289, "grad_norm": 557.8909420988612, "learning_rate": 1.3008796061995772e-05, "loss": 363.3791, "step": 21250 }, { "epoch": 0.4088815378253887, "grad_norm": 446.95090091587434, "learning_rate": 1.3002975166007618e-05, "loss": 355.1656, "step": 21260 }, { "epoch": 0.40907386216114855, "grad_norm": 483.7531629355145, "learning_rate": 1.2997153151460814e-05, "loss": 360.441, "step": 21270 }, { "epoch": 0.4092661864969084, "grad_norm": 449.98429188400655, "learning_rate": 1.299133002052396e-05, "loss": 356.2514, "step": 21280 }, { "epoch": 0.4094585108326682, "grad_norm": 519.9671908061794, "learning_rate": 1.2985505775366079e-05, "loss": 364.107, "step": 21290 }, { "epoch": 0.40965083516842804, "grad_norm": 475.6833797538685, "learning_rate": 1.2979680418156604e-05, "loss": 370.397, "step": 21300 }, { "epoch": 0.40984315950418787, "grad_norm": 505.61185361114025, "learning_rate": 1.297385395106538e-05, "loss": 362.5435, "step": 21310 }, { "epoch": 0.4100354838399477, "grad_norm": 467.9278210641104, "learning_rate": 1.2968026376262679e-05, "loss": 345.4082, "step": 21320 }, { "epoch": 0.41022780817570753, "grad_norm": 474.7007701764091, "learning_rate": 1.2962197695919167e-05, "loss": 366.5094, "step": 21330 }, { "epoch": 0.41042013251146736, "grad_norm": 440.36632850954436, "learning_rate": 1.2956367912205939e-05, "loss": 364.4144, "step": 21340 }, { "epoch": 0.41061245684722714, "grad_norm": 439.9674708133569, "learning_rate": 1.2950537027294487e-05, "loss": 355.7068, "step": 21350 }, { "epoch": 0.41080478118298697, "grad_norm": 460.83750225291146, "learning_rate": 1.2944705043356722e-05, "loss": 359.8593, "step": 21360 }, { "epoch": 0.4109971055187468, "grad_norm": 470.44102474487124, "learning_rate": 1.2938871962564965e-05, "loss": 352.2021, "step": 21370 }, { "epoch": 0.4111894298545066, "grad_norm": 490.9972094918669, "learning_rate": 1.2933037787091935e-05, "loss": 368.0638, "step": 21380 }, { "epoch": 0.41138175419026646, "grad_norm": 488.8015397753577, "learning_rate": 1.2927202519110775e-05, "loss": 372.5835, "step": 21390 }, { "epoch": 0.4115740785260263, "grad_norm": 458.14919799764624, "learning_rate": 1.2921366160795017e-05, "loss": 358.4192, "step": 21400 }, { "epoch": 0.4117664028617861, "grad_norm": 432.9748994515109, "learning_rate": 1.2915528714318612e-05, "loss": 368.0081, "step": 21410 }, { "epoch": 0.41195872719754595, "grad_norm": 460.7771613286394, "learning_rate": 1.2909690181855914e-05, "loss": 356.3427, "step": 21420 }, { "epoch": 0.4121510515333058, "grad_norm": 505.9727688541971, "learning_rate": 1.290385056558168e-05, "loss": 360.1893, "step": 21430 }, { "epoch": 0.4123433758690656, "grad_norm": 477.4148390171022, "learning_rate": 1.2898009867671066e-05, "loss": 357.5101, "step": 21440 }, { "epoch": 0.41253570020482544, "grad_norm": 496.8967425078629, "learning_rate": 1.2892168090299639e-05, "loss": 352.5651, "step": 21450 }, { "epoch": 0.41272802454058527, "grad_norm": 475.16826605810854, "learning_rate": 1.2886325235643367e-05, "loss": 353.5881, "step": 21460 }, { "epoch": 0.41292034887634504, "grad_norm": 441.48714774515406, "learning_rate": 1.288048130587861e-05, "loss": 352.4681, "step": 21470 }, { "epoch": 0.4131126732121049, "grad_norm": 463.2352084650711, "learning_rate": 1.287463630318214e-05, "loss": 358.8107, "step": 21480 }, { "epoch": 0.4133049975478647, "grad_norm": 485.4626674830089, "learning_rate": 1.2868790229731123e-05, "loss": 359.6656, "step": 21490 }, { "epoch": 0.41349732188362454, "grad_norm": 468.374109728343, "learning_rate": 1.2862943087703127e-05, "loss": 355.3887, "step": 21500 }, { "epoch": 0.41368964621938437, "grad_norm": 451.3774990309563, "learning_rate": 1.2857094879276115e-05, "loss": 363.3622, "step": 21510 }, { "epoch": 0.4138819705551442, "grad_norm": 491.3516656098727, "learning_rate": 1.2851245606628447e-05, "loss": 359.746, "step": 21520 }, { "epoch": 0.414074294890904, "grad_norm": 473.53688745001983, "learning_rate": 1.2845395271938876e-05, "loss": 357.2325, "step": 21530 }, { "epoch": 0.41426661922666386, "grad_norm": 511.927983187308, "learning_rate": 1.2839543877386562e-05, "loss": 351.3271, "step": 21540 }, { "epoch": 0.4144589435624237, "grad_norm": 449.0944658901166, "learning_rate": 1.283369142515105e-05, "loss": 361.6248, "step": 21550 }, { "epoch": 0.4146512678981835, "grad_norm": 465.4977112841231, "learning_rate": 1.282783791741228e-05, "loss": 370.761, "step": 21560 }, { "epoch": 0.41484359223394335, "grad_norm": 5484.992293667798, "learning_rate": 1.2821983356350593e-05, "loss": 380.6152, "step": 21570 }, { "epoch": 0.4150359165697031, "grad_norm": 471.3213825403217, "learning_rate": 1.2816127744146711e-05, "loss": 359.9935, "step": 21580 }, { "epoch": 0.41522824090546295, "grad_norm": 447.56332457228893, "learning_rate": 1.2810271082981755e-05, "loss": 359.3304, "step": 21590 }, { "epoch": 0.4154205652412228, "grad_norm": 498.61162266238523, "learning_rate": 1.2804413375037232e-05, "loss": 351.4054, "step": 21600 }, { "epoch": 0.4156128895769826, "grad_norm": 490.62680887650953, "learning_rate": 1.2798554622495042e-05, "loss": 357.0392, "step": 21610 }, { "epoch": 0.41580521391274244, "grad_norm": 455.8535222343372, "learning_rate": 1.2792694827537477e-05, "loss": 362.5506, "step": 21620 }, { "epoch": 0.4159975382485023, "grad_norm": 486.23131546873833, "learning_rate": 1.2786833992347212e-05, "loss": 356.4791, "step": 21630 }, { "epoch": 0.4161898625842621, "grad_norm": 432.2308869162765, "learning_rate": 1.2780972119107312e-05, "loss": 361.85, "step": 21640 }, { "epoch": 0.41638218692002193, "grad_norm": 436.94642001588204, "learning_rate": 1.2775109210001225e-05, "loss": 349.1665, "step": 21650 }, { "epoch": 0.41657451125578177, "grad_norm": 516.9578667716966, "learning_rate": 1.2769245267212789e-05, "loss": 357.1654, "step": 21660 }, { "epoch": 0.4167668355915416, "grad_norm": 483.2205202416933, "learning_rate": 1.2763380292926227e-05, "loss": 356.3035, "step": 21670 }, { "epoch": 0.4169591599273014, "grad_norm": 451.76594064335154, "learning_rate": 1.2757514289326146e-05, "loss": 353.5011, "step": 21680 }, { "epoch": 0.4171514842630612, "grad_norm": 461.48383791289245, "learning_rate": 1.2751647258597531e-05, "loss": 358.6389, "step": 21690 }, { "epoch": 0.41734380859882103, "grad_norm": 481.68683753036305, "learning_rate": 1.274577920292576e-05, "loss": 346.4288, "step": 21700 }, { "epoch": 0.41753613293458086, "grad_norm": 440.34213962332115, "learning_rate": 1.2739910124496585e-05, "loss": 361.8271, "step": 21710 }, { "epoch": 0.4177284572703407, "grad_norm": 464.86968818352574, "learning_rate": 1.2734040025496137e-05, "loss": 352.9531, "step": 21720 }, { "epoch": 0.4179207816061005, "grad_norm": 462.0077211343192, "learning_rate": 1.2728168908110937e-05, "loss": 355.3349, "step": 21730 }, { "epoch": 0.41811310594186035, "grad_norm": 436.0814640607625, "learning_rate": 1.2722296774527871e-05, "loss": 347.6623, "step": 21740 }, { "epoch": 0.4183054302776202, "grad_norm": 432.97812381628853, "learning_rate": 1.271642362693422e-05, "loss": 367.2855, "step": 21750 }, { "epoch": 0.41849775461338, "grad_norm": 538.6227091185007, "learning_rate": 1.2710549467517628e-05, "loss": 353.0426, "step": 21760 }, { "epoch": 0.41869007894913984, "grad_norm": 443.31428722031995, "learning_rate": 1.270467429846613e-05, "loss": 340.3564, "step": 21770 }, { "epoch": 0.4188824032848997, "grad_norm": 431.40700391780126, "learning_rate": 1.2698798121968122e-05, "loss": 352.4626, "step": 21780 }, { "epoch": 0.4190747276206595, "grad_norm": 457.55028204982324, "learning_rate": 1.2692920940212387e-05, "loss": 366.8255, "step": 21790 }, { "epoch": 0.4192670519564193, "grad_norm": 445.569704287574, "learning_rate": 1.2687042755388077e-05, "loss": 347.9427, "step": 21800 }, { "epoch": 0.4194593762921791, "grad_norm": 428.7648916948125, "learning_rate": 1.2681163569684718e-05, "loss": 359.4237, "step": 21810 }, { "epoch": 0.41965170062793894, "grad_norm": 437.35057059980613, "learning_rate": 1.2675283385292212e-05, "loss": 360.2542, "step": 21820 }, { "epoch": 0.41984402496369877, "grad_norm": 488.98922795721256, "learning_rate": 1.2669402204400825e-05, "loss": 350.4583, "step": 21830 }, { "epoch": 0.4200363492994586, "grad_norm": 495.8385665928512, "learning_rate": 1.2663520029201206e-05, "loss": 347.4213, "step": 21840 }, { "epoch": 0.42022867363521843, "grad_norm": 551.2231368701516, "learning_rate": 1.2657636861884363e-05, "loss": 352.0523, "step": 21850 }, { "epoch": 0.42042099797097826, "grad_norm": 475.12387201222555, "learning_rate": 1.2651752704641686e-05, "loss": 356.7493, "step": 21860 }, { "epoch": 0.4206133223067381, "grad_norm": 483.72055523352617, "learning_rate": 1.2645867559664918e-05, "loss": 358.9258, "step": 21870 }, { "epoch": 0.4208056466424979, "grad_norm": 422.2589474800384, "learning_rate": 1.2639981429146184e-05, "loss": 346.4987, "step": 21880 }, { "epoch": 0.42099797097825775, "grad_norm": 532.9596887838914, "learning_rate": 1.2634094315277967e-05, "loss": 352.5222, "step": 21890 }, { "epoch": 0.4211902953140176, "grad_norm": 482.06855643493924, "learning_rate": 1.262820622025312e-05, "loss": 363.8119, "step": 21900 }, { "epoch": 0.42138261964977736, "grad_norm": 521.8497325382895, "learning_rate": 1.2622317146264864e-05, "loss": 361.689, "step": 21910 }, { "epoch": 0.4215749439855372, "grad_norm": 490.2268848417426, "learning_rate": 1.2616427095506779e-05, "loss": 358.514, "step": 21920 }, { "epoch": 0.421767268321297, "grad_norm": 422.1126332182719, "learning_rate": 1.2610536070172815e-05, "loss": 359.9763, "step": 21930 }, { "epoch": 0.42195959265705685, "grad_norm": 483.16607311328096, "learning_rate": 1.2604644072457277e-05, "loss": 351.9336, "step": 21940 }, { "epoch": 0.4221519169928167, "grad_norm": 486.5645570245076, "learning_rate": 1.259875110455484e-05, "loss": 357.5309, "step": 21950 }, { "epoch": 0.4223442413285765, "grad_norm": 434.6644044157051, "learning_rate": 1.2592857168660535e-05, "loss": 356.7706, "step": 21960 }, { "epoch": 0.42253656566433634, "grad_norm": 475.64407494901366, "learning_rate": 1.2586962266969758e-05, "loss": 359.2397, "step": 21970 }, { "epoch": 0.42272889000009617, "grad_norm": 544.7920515048304, "learning_rate": 1.2581066401678261e-05, "loss": 371.0822, "step": 21980 }, { "epoch": 0.422921214335856, "grad_norm": 456.6653580156985, "learning_rate": 1.2575169574982158e-05, "loss": 353.69, "step": 21990 }, { "epoch": 0.42311353867161583, "grad_norm": 413.937897057499, "learning_rate": 1.2569271789077919e-05, "loss": 358.3366, "step": 22000 }, { "epoch": 0.42330586300737566, "grad_norm": 464.0643570938417, "learning_rate": 1.256337304616237e-05, "loss": 348.7437, "step": 22010 }, { "epoch": 0.4234981873431355, "grad_norm": 483.55449784929607, "learning_rate": 1.25574733484327e-05, "loss": 352.3562, "step": 22020 }, { "epoch": 0.42369051167889527, "grad_norm": 471.6017174578337, "learning_rate": 1.2551572698086446e-05, "loss": 361.4498, "step": 22030 }, { "epoch": 0.4238828360146551, "grad_norm": 452.1224267963627, "learning_rate": 1.2545671097321497e-05, "loss": 355.5773, "step": 22040 }, { "epoch": 0.4240751603504149, "grad_norm": 459.1790605332593, "learning_rate": 1.2539768548336112e-05, "loss": 356.8583, "step": 22050 }, { "epoch": 0.42426748468617476, "grad_norm": 510.35860960140946, "learning_rate": 1.2533865053328886e-05, "loss": 354.9296, "step": 22060 }, { "epoch": 0.4244598090219346, "grad_norm": 461.7441282465687, "learning_rate": 1.2527960614498778e-05, "loss": 361.6722, "step": 22070 }, { "epoch": 0.4246521333576944, "grad_norm": 474.9706255978627, "learning_rate": 1.252205523404509e-05, "loss": 362.0148, "step": 22080 }, { "epoch": 0.42484445769345425, "grad_norm": 454.50327474860177, "learning_rate": 1.2516148914167481e-05, "loss": 361.5063, "step": 22090 }, { "epoch": 0.4250367820292141, "grad_norm": 470.22588490796045, "learning_rate": 1.2510241657065958e-05, "loss": 346.1036, "step": 22100 }, { "epoch": 0.4252291063649739, "grad_norm": 450.4961956923022, "learning_rate": 1.2504333464940874e-05, "loss": 347.454, "step": 22110 }, { "epoch": 0.42542143070073374, "grad_norm": 1166.5237137262181, "learning_rate": 1.2498424339992934e-05, "loss": 353.2138, "step": 22120 }, { "epoch": 0.42561375503649357, "grad_norm": 489.17746331812305, "learning_rate": 1.2492514284423194e-05, "loss": 356.392, "step": 22130 }, { "epoch": 0.42580607937225334, "grad_norm": 477.7647586631091, "learning_rate": 1.2486603300433045e-05, "loss": 353.9992, "step": 22140 }, { "epoch": 0.4259984037080132, "grad_norm": 517.421499289549, "learning_rate": 1.2480691390224238e-05, "loss": 360.8063, "step": 22150 }, { "epoch": 0.426190728043773, "grad_norm": 459.9200542352227, "learning_rate": 1.2474778555998855e-05, "loss": 341.3725, "step": 22160 }, { "epoch": 0.42638305237953283, "grad_norm": 465.6041733316549, "learning_rate": 1.246886479995933e-05, "loss": 362.1571, "step": 22170 }, { "epoch": 0.42657537671529266, "grad_norm": 491.530544993616, "learning_rate": 1.2462950124308444e-05, "loss": 358.6839, "step": 22180 }, { "epoch": 0.4267677010510525, "grad_norm": 511.6491291065285, "learning_rate": 1.2457034531249313e-05, "loss": 352.3591, "step": 22190 }, { "epoch": 0.4269600253868123, "grad_norm": 464.55501962277793, "learning_rate": 1.2451118022985402e-05, "loss": 353.4329, "step": 22200 }, { "epoch": 0.42715234972257216, "grad_norm": 464.2570395726874, "learning_rate": 1.2445200601720504e-05, "loss": 346.6129, "step": 22210 }, { "epoch": 0.427344674058332, "grad_norm": 451.3595060238119, "learning_rate": 1.243928226965877e-05, "loss": 353.174, "step": 22220 }, { "epoch": 0.4275369983940918, "grad_norm": 428.8214651667687, "learning_rate": 1.2433363029004675e-05, "loss": 353.4309, "step": 22230 }, { "epoch": 0.42772932272985165, "grad_norm": 447.660059888906, "learning_rate": 1.2427442881963042e-05, "loss": 353.6233, "step": 22240 }, { "epoch": 0.4279216470656114, "grad_norm": 428.85548521373573, "learning_rate": 1.242152183073902e-05, "loss": 357.1601, "step": 22250 }, { "epoch": 0.42811397140137125, "grad_norm": 450.36372940478407, "learning_rate": 1.2415599877538111e-05, "loss": 345.0228, "step": 22260 }, { "epoch": 0.4283062957371311, "grad_norm": 458.84300925126934, "learning_rate": 1.2409677024566145e-05, "loss": 365.5294, "step": 22270 }, { "epoch": 0.4284986200728909, "grad_norm": 487.5794962862781, "learning_rate": 1.2403753274029281e-05, "loss": 359.2313, "step": 22280 }, { "epoch": 0.42869094440865074, "grad_norm": 434.0123600246192, "learning_rate": 1.2397828628134028e-05, "loss": 345.9444, "step": 22290 }, { "epoch": 0.4288832687444106, "grad_norm": 455.26191386780675, "learning_rate": 1.2391903089087208e-05, "loss": 353.5667, "step": 22300 }, { "epoch": 0.4290755930801704, "grad_norm": 478.61224251356435, "learning_rate": 1.2385976659095993e-05, "loss": 353.1015, "step": 22310 }, { "epoch": 0.42926791741593023, "grad_norm": 442.75037662501694, "learning_rate": 1.2380049340367876e-05, "loss": 347.4218, "step": 22320 }, { "epoch": 0.42946024175169006, "grad_norm": 478.25701038187225, "learning_rate": 1.2374121135110688e-05, "loss": 367.8834, "step": 22330 }, { "epoch": 0.4296525660874499, "grad_norm": 431.9551326904518, "learning_rate": 1.2368192045532586e-05, "loss": 354.8841, "step": 22340 }, { "epoch": 0.4298448904232097, "grad_norm": 433.5347825432444, "learning_rate": 1.236226207384206e-05, "loss": 351.6588, "step": 22350 }, { "epoch": 0.4300372147589695, "grad_norm": 474.89242289033746, "learning_rate": 1.2356331222247929e-05, "loss": 359.1791, "step": 22360 }, { "epoch": 0.43022953909472933, "grad_norm": 467.42679677909086, "learning_rate": 1.2350399492959328e-05, "loss": 362.0955, "step": 22370 }, { "epoch": 0.43042186343048916, "grad_norm": 469.85750400246616, "learning_rate": 1.234446688818574e-05, "loss": 356.8853, "step": 22380 }, { "epoch": 0.430614187766249, "grad_norm": 461.90255826053743, "learning_rate": 1.233853341013695e-05, "loss": 352.9148, "step": 22390 }, { "epoch": 0.4308065121020088, "grad_norm": 449.86036415667627, "learning_rate": 1.233259906102309e-05, "loss": 352.1118, "step": 22400 }, { "epoch": 0.43099883643776865, "grad_norm": 441.29320721249115, "learning_rate": 1.2326663843054603e-05, "loss": 350.8514, "step": 22410 }, { "epoch": 0.4311911607735285, "grad_norm": 443.4054153774575, "learning_rate": 1.2320727758442264e-05, "loss": 358.9528, "step": 22420 }, { "epoch": 0.4313834851092883, "grad_norm": 464.48785147118207, "learning_rate": 1.231479080939716e-05, "loss": 364.3797, "step": 22430 }, { "epoch": 0.43157580944504814, "grad_norm": 412.516768195798, "learning_rate": 1.230885299813071e-05, "loss": 363.6567, "step": 22440 }, { "epoch": 0.43176813378080797, "grad_norm": 495.99355667153515, "learning_rate": 1.2302914326854651e-05, "loss": 356.0047, "step": 22450 }, { "epoch": 0.4319604581165678, "grad_norm": 473.1165962262813, "learning_rate": 1.2296974797781036e-05, "loss": 364.2631, "step": 22460 }, { "epoch": 0.43215278245232763, "grad_norm": 470.9665452868922, "learning_rate": 1.2291034413122247e-05, "loss": 368.466, "step": 22470 }, { "epoch": 0.4323451067880874, "grad_norm": 477.7097465613152, "learning_rate": 1.2285093175090976e-05, "loss": 354.1781, "step": 22480 }, { "epoch": 0.43253743112384724, "grad_norm": 487.1634400935877, "learning_rate": 1.227915108590024e-05, "loss": 348.4954, "step": 22490 }, { "epoch": 0.43272975545960707, "grad_norm": 438.2995394209405, "learning_rate": 1.2273208147763363e-05, "loss": 342.7937, "step": 22500 }, { "epoch": 0.4329220797953669, "grad_norm": 483.84122120624323, "learning_rate": 1.2267264362893995e-05, "loss": 356.5782, "step": 22510 }, { "epoch": 0.43311440413112673, "grad_norm": 452.2892054335234, "learning_rate": 1.2261319733506096e-05, "loss": 355.2324, "step": 22520 }, { "epoch": 0.43330672846688656, "grad_norm": 470.97288871365726, "learning_rate": 1.2255374261813944e-05, "loss": 362.2226, "step": 22530 }, { "epoch": 0.4334990528026464, "grad_norm": 451.9701225379797, "learning_rate": 1.2249427950032127e-05, "loss": 358.8834, "step": 22540 }, { "epoch": 0.4336913771384062, "grad_norm": 527.6292859766575, "learning_rate": 1.224348080037555e-05, "loss": 367.1118, "step": 22550 }, { "epoch": 0.43388370147416605, "grad_norm": 481.7096752559559, "learning_rate": 1.2237532815059427e-05, "loss": 351.9961, "step": 22560 }, { "epoch": 0.4340760258099259, "grad_norm": 440.6525927364171, "learning_rate": 1.2231583996299285e-05, "loss": 351.348, "step": 22570 }, { "epoch": 0.4342683501456857, "grad_norm": 459.4464599866437, "learning_rate": 1.2225634346310962e-05, "loss": 349.3989, "step": 22580 }, { "epoch": 0.4344606744814455, "grad_norm": 434.8497012594701, "learning_rate": 1.22196838673106e-05, "loss": 359.4571, "step": 22590 }, { "epoch": 0.4346529988172053, "grad_norm": 473.9689727384777, "learning_rate": 1.2213732561514657e-05, "loss": 361.0863, "step": 22600 }, { "epoch": 0.43484532315296515, "grad_norm": 429.20142883517275, "learning_rate": 1.2207780431139894e-05, "loss": 358.7701, "step": 22610 }, { "epoch": 0.435037647488725, "grad_norm": 496.32285337930824, "learning_rate": 1.2201827478403385e-05, "loss": 352.4951, "step": 22620 }, { "epoch": 0.4352299718244848, "grad_norm": 409.46571043219234, "learning_rate": 1.2195873705522508e-05, "loss": 339.3425, "step": 22630 }, { "epoch": 0.43542229616024464, "grad_norm": 444.3838341019518, "learning_rate": 1.2189919114714936e-05, "loss": 341.6629, "step": 22640 }, { "epoch": 0.43561462049600447, "grad_norm": 464.2398396802652, "learning_rate": 1.2183963708198668e-05, "loss": 365.7007, "step": 22650 }, { "epoch": 0.4358069448317643, "grad_norm": 499.83234232424223, "learning_rate": 1.2178007488191983e-05, "loss": 350.4901, "step": 22660 }, { "epoch": 0.43599926916752413, "grad_norm": 442.55899736757885, "learning_rate": 1.2172050456913482e-05, "loss": 358.6078, "step": 22670 }, { "epoch": 0.43619159350328396, "grad_norm": 462.1902943855575, "learning_rate": 1.2166092616582055e-05, "loss": 342.5886, "step": 22680 }, { "epoch": 0.4363839178390438, "grad_norm": 549.4000100993889, "learning_rate": 1.2160133969416903e-05, "loss": 364.8587, "step": 22690 }, { "epoch": 0.43657624217480356, "grad_norm": 432.36243197849393, "learning_rate": 1.2154174517637526e-05, "loss": 350.1362, "step": 22700 }, { "epoch": 0.4367685665105634, "grad_norm": 458.9124051394236, "learning_rate": 1.2148214263463718e-05, "loss": 343.0685, "step": 22710 }, { "epoch": 0.4369608908463232, "grad_norm": 448.1825617633941, "learning_rate": 1.2142253209115577e-05, "loss": 352.7216, "step": 22720 }, { "epoch": 0.43715321518208305, "grad_norm": 439.7398588746751, "learning_rate": 1.2136291356813494e-05, "loss": 364.9622, "step": 22730 }, { "epoch": 0.4373455395178429, "grad_norm": 519.7291884971663, "learning_rate": 1.2130328708778162e-05, "loss": 352.5206, "step": 22740 }, { "epoch": 0.4375378638536027, "grad_norm": 444.6701436437316, "learning_rate": 1.2124365267230571e-05, "loss": 357.2035, "step": 22750 }, { "epoch": 0.43773018818936255, "grad_norm": 480.83404938359894, "learning_rate": 1.2118401034392003e-05, "loss": 357.8444, "step": 22760 }, { "epoch": 0.4379225125251224, "grad_norm": 431.38970168151576, "learning_rate": 1.2112436012484035e-05, "loss": 347.8432, "step": 22770 }, { "epoch": 0.4381148368608822, "grad_norm": 450.4498394827926, "learning_rate": 1.210647020372854e-05, "loss": 357.4706, "step": 22780 }, { "epoch": 0.43830716119664204, "grad_norm": 454.58896351006905, "learning_rate": 1.2100503610347686e-05, "loss": 351.4655, "step": 22790 }, { "epoch": 0.43849948553240187, "grad_norm": 464.49865169827825, "learning_rate": 1.2094536234563927e-05, "loss": 357.5448, "step": 22800 }, { "epoch": 0.43869180986816164, "grad_norm": 454.9531691770624, "learning_rate": 1.2088568078600013e-05, "loss": 353.4185, "step": 22810 }, { "epoch": 0.43888413420392147, "grad_norm": 491.9449169806258, "learning_rate": 1.2082599144678983e-05, "loss": 361.997, "step": 22820 }, { "epoch": 0.4390764585396813, "grad_norm": 459.65266714544276, "learning_rate": 1.2076629435024168e-05, "loss": 357.8603, "step": 22830 }, { "epoch": 0.43926878287544113, "grad_norm": 434.1950049394593, "learning_rate": 1.2070658951859183e-05, "loss": 367.5184, "step": 22840 }, { "epoch": 0.43946110721120096, "grad_norm": 444.2003240155268, "learning_rate": 1.2064687697407939e-05, "loss": 368.1171, "step": 22850 }, { "epoch": 0.4396534315469608, "grad_norm": 462.3118097966811, "learning_rate": 1.2058715673894625e-05, "loss": 347.1098, "step": 22860 }, { "epoch": 0.4398457558827206, "grad_norm": 479.1257367580937, "learning_rate": 1.2052742883543724e-05, "loss": 369.2424, "step": 22870 }, { "epoch": 0.44003808021848045, "grad_norm": 475.7591165528743, "learning_rate": 1.2046769328580004e-05, "loss": 346.5411, "step": 22880 }, { "epoch": 0.4402304045542403, "grad_norm": 439.72942995054444, "learning_rate": 1.204079501122851e-05, "loss": 354.0866, "step": 22890 }, { "epoch": 0.4404227288900001, "grad_norm": 464.5375736387472, "learning_rate": 1.2034819933714576e-05, "loss": 351.7987, "step": 22900 }, { "epoch": 0.44061505322575995, "grad_norm": 476.52778047454865, "learning_rate": 1.2028844098263827e-05, "loss": 357.7463, "step": 22910 }, { "epoch": 0.4408073775615197, "grad_norm": 490.0229149954796, "learning_rate": 1.2022867507102159e-05, "loss": 360.2738, "step": 22920 }, { "epoch": 0.44099970189727955, "grad_norm": 436.4063805346974, "learning_rate": 1.2016890162455752e-05, "loss": 352.7715, "step": 22930 }, { "epoch": 0.4411920262330394, "grad_norm": 435.8970063266725, "learning_rate": 1.2010912066551072e-05, "loss": 348.4483, "step": 22940 }, { "epoch": 0.4413843505687992, "grad_norm": 435.64221107343036, "learning_rate": 1.2004933221614854e-05, "loss": 346.5264, "step": 22950 }, { "epoch": 0.44157667490455904, "grad_norm": 516.5056519694289, "learning_rate": 1.1998953629874126e-05, "loss": 347.5064, "step": 22960 }, { "epoch": 0.44176899924031887, "grad_norm": 461.9243961115514, "learning_rate": 1.1992973293556184e-05, "loss": 363.5012, "step": 22970 }, { "epoch": 0.4419613235760787, "grad_norm": 473.5388989045999, "learning_rate": 1.1986992214888607e-05, "loss": 361.0567, "step": 22980 }, { "epoch": 0.44215364791183853, "grad_norm": 466.29663227564714, "learning_rate": 1.1981010396099244e-05, "loss": 353.8068, "step": 22990 }, { "epoch": 0.44234597224759836, "grad_norm": 505.03568454371253, "learning_rate": 1.1975027839416227e-05, "loss": 354.4532, "step": 23000 }, { "epoch": 0.4425382965833582, "grad_norm": 459.8080569670777, "learning_rate": 1.1969044547067961e-05, "loss": 350.0083, "step": 23010 }, { "epoch": 0.442730620919118, "grad_norm": 455.4227014655408, "learning_rate": 1.1963060521283117e-05, "loss": 346.3249, "step": 23020 }, { "epoch": 0.44292294525487785, "grad_norm": 474.5281833041822, "learning_rate": 1.1957075764290652e-05, "loss": 350.3306, "step": 23030 }, { "epoch": 0.44311526959063763, "grad_norm": 422.75569934678424, "learning_rate": 1.1951090278319784e-05, "loss": 363.0249, "step": 23040 }, { "epoch": 0.44330759392639746, "grad_norm": 443.50751408249164, "learning_rate": 1.1945104065600013e-05, "loss": 345.0779, "step": 23050 }, { "epoch": 0.4434999182621573, "grad_norm": 445.4630900307036, "learning_rate": 1.1939117128361101e-05, "loss": 352.458, "step": 23060 }, { "epoch": 0.4436922425979171, "grad_norm": 1081.5978111395164, "learning_rate": 1.1933129468833087e-05, "loss": 345.2083, "step": 23070 }, { "epoch": 0.44388456693367695, "grad_norm": 447.04742239637517, "learning_rate": 1.192714108924627e-05, "loss": 347.1263, "step": 23080 }, { "epoch": 0.4440768912694368, "grad_norm": 510.0560859588336, "learning_rate": 1.1921151991831225e-05, "loss": 356.1168, "step": 23090 }, { "epoch": 0.4442692156051966, "grad_norm": 449.24248333120875, "learning_rate": 1.1915162178818793e-05, "loss": 361.5072, "step": 23100 }, { "epoch": 0.44446153994095644, "grad_norm": 461.5912139116939, "learning_rate": 1.1909171652440079e-05, "loss": 355.7509, "step": 23110 }, { "epoch": 0.44465386427671627, "grad_norm": 448.03856233600294, "learning_rate": 1.1903180414926457e-05, "loss": 352.4296, "step": 23120 }, { "epoch": 0.4448461886124761, "grad_norm": 452.9595065196257, "learning_rate": 1.189718846850956e-05, "loss": 346.3238, "step": 23130 }, { "epoch": 0.44503851294823593, "grad_norm": 477.2244147339225, "learning_rate": 1.1891195815421293e-05, "loss": 336.5658, "step": 23140 }, { "epoch": 0.4452308372839957, "grad_norm": 478.1351204430414, "learning_rate": 1.1885202457893819e-05, "loss": 353.1867, "step": 23150 }, { "epoch": 0.44542316161975554, "grad_norm": 458.6464189651247, "learning_rate": 1.1879208398159563e-05, "loss": 354.6019, "step": 23160 }, { "epoch": 0.44561548595551537, "grad_norm": 421.5239837489341, "learning_rate": 1.1873213638451215e-05, "loss": 341.7047, "step": 23170 }, { "epoch": 0.4458078102912752, "grad_norm": 452.2073834229528, "learning_rate": 1.1867218181001725e-05, "loss": 355.6551, "step": 23180 }, { "epoch": 0.446000134627035, "grad_norm": 570.7736551151988, "learning_rate": 1.1861222028044301e-05, "loss": 362.1008, "step": 23190 }, { "epoch": 0.44619245896279486, "grad_norm": 458.37915504573186, "learning_rate": 1.1855225181812408e-05, "loss": 345.1014, "step": 23200 }, { "epoch": 0.4463847832985547, "grad_norm": 436.68572876331586, "learning_rate": 1.1849227644539781e-05, "loss": 346.1532, "step": 23210 }, { "epoch": 0.4465771076343145, "grad_norm": 440.556803127086, "learning_rate": 1.1843229418460391e-05, "loss": 345.0815, "step": 23220 }, { "epoch": 0.44676943197007435, "grad_norm": 432.7037947552592, "learning_rate": 1.1837230505808485e-05, "loss": 344.6313, "step": 23230 }, { "epoch": 0.4469617563058342, "grad_norm": 504.46737785188816, "learning_rate": 1.1831230908818563e-05, "loss": 345.6509, "step": 23240 }, { "epoch": 0.447154080641594, "grad_norm": 496.82318608976516, "learning_rate": 1.1825230629725366e-05, "loss": 358.9157, "step": 23250 }, { "epoch": 0.4473464049773538, "grad_norm": 438.07573626612026, "learning_rate": 1.1819229670763908e-05, "loss": 355.3396, "step": 23260 }, { "epoch": 0.4475387293131136, "grad_norm": 526.8128568267338, "learning_rate": 1.1813228034169442e-05, "loss": 363.1398, "step": 23270 }, { "epoch": 0.44773105364887344, "grad_norm": 438.2688938757383, "learning_rate": 1.180722572217748e-05, "loss": 354.8272, "step": 23280 }, { "epoch": 0.4479233779846333, "grad_norm": 446.2383070355474, "learning_rate": 1.1801222737023783e-05, "loss": 355.2811, "step": 23290 }, { "epoch": 0.4481157023203931, "grad_norm": 432.95901976838326, "learning_rate": 1.1795219080944366e-05, "loss": 351.8697, "step": 23300 }, { "epoch": 0.44830802665615294, "grad_norm": 426.69914460236123, "learning_rate": 1.178921475617549e-05, "loss": 352.2932, "step": 23310 }, { "epoch": 0.44850035099191277, "grad_norm": 542.3162635281931, "learning_rate": 1.1783209764953666e-05, "loss": 357.4332, "step": 23320 }, { "epoch": 0.4486926753276726, "grad_norm": 482.6538928496427, "learning_rate": 1.1777204109515653e-05, "loss": 362.0119, "step": 23330 }, { "epoch": 0.4488849996634324, "grad_norm": 474.0828419038547, "learning_rate": 1.1771197792098465e-05, "loss": 352.2589, "step": 23340 }, { "epoch": 0.44907732399919226, "grad_norm": 419.41728830651, "learning_rate": 1.1765190814939351e-05, "loss": 351.6807, "step": 23350 }, { "epoch": 0.4492696483349521, "grad_norm": 519.4215891522969, "learning_rate": 1.175918318027581e-05, "loss": 344.8265, "step": 23360 }, { "epoch": 0.44946197267071186, "grad_norm": 468.4725018251048, "learning_rate": 1.1753174890345591e-05, "loss": 352.2972, "step": 23370 }, { "epoch": 0.4496542970064717, "grad_norm": 479.12714410744263, "learning_rate": 1.174716594738668e-05, "loss": 355.5, "step": 23380 }, { "epoch": 0.4498466213422315, "grad_norm": 491.0914008434106, "learning_rate": 1.1741156353637304e-05, "loss": 343.2382, "step": 23390 }, { "epoch": 0.45003894567799135, "grad_norm": 429.42297383282795, "learning_rate": 1.1735146111335945e-05, "loss": 348.132, "step": 23400 }, { "epoch": 0.4502312700137512, "grad_norm": 421.7864502342012, "learning_rate": 1.1729135222721315e-05, "loss": 347.8242, "step": 23410 }, { "epoch": 0.450423594349511, "grad_norm": 447.68249484641063, "learning_rate": 1.1723123690032376e-05, "loss": 336.8539, "step": 23420 }, { "epoch": 0.45061591868527084, "grad_norm": 485.7670008683627, "learning_rate": 1.171711151550832e-05, "loss": 363.7877, "step": 23430 }, { "epoch": 0.4508082430210307, "grad_norm": 449.00491653264834, "learning_rate": 1.1711098701388581e-05, "loss": 350.8491, "step": 23440 }, { "epoch": 0.4510005673567905, "grad_norm": 454.2703523796391, "learning_rate": 1.1705085249912837e-05, "loss": 347.3355, "step": 23450 }, { "epoch": 0.45119289169255034, "grad_norm": 411.9916057270429, "learning_rate": 1.1699071163320997e-05, "loss": 353.2647, "step": 23460 }, { "epoch": 0.45138521602831017, "grad_norm": 452.8958070873488, "learning_rate": 1.169305644385321e-05, "loss": 345.1969, "step": 23470 }, { "epoch": 0.45157754036406994, "grad_norm": 494.2100717146746, "learning_rate": 1.168704109374986e-05, "loss": 353.7135, "step": 23480 }, { "epoch": 0.45176986469982977, "grad_norm": 449.68152600840335, "learning_rate": 1.1681025115251566e-05, "loss": 355.7168, "step": 23490 }, { "epoch": 0.4519621890355896, "grad_norm": 674.964144990722, "learning_rate": 1.1675008510599176e-05, "loss": 343.0748, "step": 23500 }, { "epoch": 0.45215451337134943, "grad_norm": 490.0261202930183, "learning_rate": 1.1668991282033784e-05, "loss": 348.6971, "step": 23510 }, { "epoch": 0.45234683770710926, "grad_norm": 455.8273639377138, "learning_rate": 1.16629734317967e-05, "loss": 358.1657, "step": 23520 }, { "epoch": 0.4525391620428691, "grad_norm": 419.0916901909572, "learning_rate": 1.165695496212948e-05, "loss": 362.4954, "step": 23530 }, { "epoch": 0.4527314863786289, "grad_norm": 454.6441146595929, "learning_rate": 1.1650935875273901e-05, "loss": 354.3914, "step": 23540 }, { "epoch": 0.45292381071438875, "grad_norm": 419.06496780810573, "learning_rate": 1.1644916173471976e-05, "loss": 349.879, "step": 23550 }, { "epoch": 0.4531161350501486, "grad_norm": 478.5405629393259, "learning_rate": 1.1638895858965942e-05, "loss": 349.3788, "step": 23560 }, { "epoch": 0.4533084593859084, "grad_norm": 410.9707440162878, "learning_rate": 1.1632874933998268e-05, "loss": 347.2258, "step": 23570 }, { "epoch": 0.45350078372166824, "grad_norm": 455.2224512740443, "learning_rate": 1.1626853400811649e-05, "loss": 352.5157, "step": 23580 }, { "epoch": 0.4536931080574281, "grad_norm": 385.77117263918154, "learning_rate": 1.1620831261649003e-05, "loss": 344.0658, "step": 23590 }, { "epoch": 0.45388543239318785, "grad_norm": 440.1833825299997, "learning_rate": 1.1614808518753485e-05, "loss": 350.3631, "step": 23600 }, { "epoch": 0.4540777567289477, "grad_norm": 480.10720954439904, "learning_rate": 1.1608785174368461e-05, "loss": 352.6639, "step": 23610 }, { "epoch": 0.4542700810647075, "grad_norm": 441.5500416075034, "learning_rate": 1.1602761230737531e-05, "loss": 366.6013, "step": 23620 }, { "epoch": 0.45446240540046734, "grad_norm": 482.1848272942076, "learning_rate": 1.1596736690104514e-05, "loss": 367.0901, "step": 23630 }, { "epoch": 0.45465472973622717, "grad_norm": 460.45210208397236, "learning_rate": 1.1590711554713452e-05, "loss": 346.6765, "step": 23640 }, { "epoch": 0.454847054071987, "grad_norm": 456.1787475203777, "learning_rate": 1.1584685826808604e-05, "loss": 363.1558, "step": 23650 }, { "epoch": 0.45503937840774683, "grad_norm": 499.47085897902195, "learning_rate": 1.157865950863446e-05, "loss": 353.6442, "step": 23660 }, { "epoch": 0.45523170274350666, "grad_norm": 536.0662737737993, "learning_rate": 1.1572632602435717e-05, "loss": 361.2149, "step": 23670 }, { "epoch": 0.4554240270792665, "grad_norm": 436.0792700804202, "learning_rate": 1.1566605110457305e-05, "loss": 347.6642, "step": 23680 }, { "epoch": 0.4556163514150263, "grad_norm": 505.564823658509, "learning_rate": 1.1560577034944364e-05, "loss": 353.9503, "step": 23690 }, { "epoch": 0.45580867575078615, "grad_norm": 465.59527106465043, "learning_rate": 1.1554548378142249e-05, "loss": 355.2777, "step": 23700 }, { "epoch": 0.4560010000865459, "grad_norm": 506.2045142510194, "learning_rate": 1.1548519142296541e-05, "loss": 353.3511, "step": 23710 }, { "epoch": 0.45619332442230576, "grad_norm": 440.91948793154995, "learning_rate": 1.1542489329653024e-05, "loss": 358.9648, "step": 23720 }, { "epoch": 0.4563856487580656, "grad_norm": 452.31877685288396, "learning_rate": 1.153645894245771e-05, "loss": 340.7169, "step": 23730 }, { "epoch": 0.4565779730938254, "grad_norm": 461.49590591788785, "learning_rate": 1.1530427982956813e-05, "loss": 360.2601, "step": 23740 }, { "epoch": 0.45677029742958525, "grad_norm": 453.1214623453998, "learning_rate": 1.1524396453396767e-05, "loss": 348.997, "step": 23750 }, { "epoch": 0.4569626217653451, "grad_norm": 479.4461904394601, "learning_rate": 1.1518364356024219e-05, "loss": 357.3673, "step": 23760 }, { "epoch": 0.4571549461011049, "grad_norm": 460.89689096615183, "learning_rate": 1.1512331693086025e-05, "loss": 342.6755, "step": 23770 }, { "epoch": 0.45734727043686474, "grad_norm": 462.8234758102437, "learning_rate": 1.1506298466829256e-05, "loss": 358.9926, "step": 23780 }, { "epoch": 0.45753959477262457, "grad_norm": 448.5863191433053, "learning_rate": 1.1500264679501181e-05, "loss": 348.5272, "step": 23790 }, { "epoch": 0.4577319191083844, "grad_norm": 460.53409831470395, "learning_rate": 1.1494230333349292e-05, "loss": 355.6848, "step": 23800 }, { "epoch": 0.45792424344414423, "grad_norm": 509.920675443593, "learning_rate": 1.1488195430621284e-05, "loss": 350.3394, "step": 23810 }, { "epoch": 0.458116567779904, "grad_norm": 439.49786410227006, "learning_rate": 1.1482159973565051e-05, "loss": 359.6057, "step": 23820 }, { "epoch": 0.45830889211566384, "grad_norm": 491.9157748010782, "learning_rate": 1.147612396442871e-05, "loss": 349.4035, "step": 23830 }, { "epoch": 0.45850121645142367, "grad_norm": 451.0924836074746, "learning_rate": 1.1470087405460572e-05, "loss": 343.7919, "step": 23840 }, { "epoch": 0.4586935407871835, "grad_norm": 457.8414577037548, "learning_rate": 1.1464050298909153e-05, "loss": 341.696, "step": 23850 }, { "epoch": 0.4588858651229433, "grad_norm": 412.019896624018, "learning_rate": 1.1458012647023178e-05, "loss": 331.7996, "step": 23860 }, { "epoch": 0.45907818945870316, "grad_norm": 482.92689908053995, "learning_rate": 1.1451974452051572e-05, "loss": 354.1885, "step": 23870 }, { "epoch": 0.459270513794463, "grad_norm": 495.59841852125925, "learning_rate": 1.1445935716243463e-05, "loss": 350.7379, "step": 23880 }, { "epoch": 0.4594628381302228, "grad_norm": 454.90738787300114, "learning_rate": 1.143989644184818e-05, "loss": 350.8042, "step": 23890 }, { "epoch": 0.45965516246598265, "grad_norm": 460.26627063019345, "learning_rate": 1.1433856631115252e-05, "loss": 360.6821, "step": 23900 }, { "epoch": 0.4598474868017425, "grad_norm": 402.56429958468243, "learning_rate": 1.142781628629441e-05, "loss": 345.0776, "step": 23910 }, { "epoch": 0.4600398111375023, "grad_norm": 440.5609777778486, "learning_rate": 1.1421775409635585e-05, "loss": 346.5627, "step": 23920 }, { "epoch": 0.4602321354732621, "grad_norm": 578.4568510348026, "learning_rate": 1.1415734003388899e-05, "loss": 357.2716, "step": 23930 }, { "epoch": 0.4604244598090219, "grad_norm": 462.71039126999233, "learning_rate": 1.1409692069804678e-05, "loss": 356.5922, "step": 23940 }, { "epoch": 0.46061678414478174, "grad_norm": 413.1532488354179, "learning_rate": 1.1403649611133444e-05, "loss": 339.4667, "step": 23950 }, { "epoch": 0.4608091084805416, "grad_norm": 488.3438516601459, "learning_rate": 1.1397606629625913e-05, "loss": 358.0177, "step": 23960 }, { "epoch": 0.4610014328163014, "grad_norm": 437.8413528757877, "learning_rate": 1.1391563127532992e-05, "loss": 347.4919, "step": 23970 }, { "epoch": 0.46119375715206123, "grad_norm": 431.40359132969706, "learning_rate": 1.1385519107105791e-05, "loss": 350.7815, "step": 23980 }, { "epoch": 0.46138608148782106, "grad_norm": 473.6656634299338, "learning_rate": 1.1379474570595604e-05, "loss": 352.9971, "step": 23990 }, { "epoch": 0.4615784058235809, "grad_norm": 449.7846065380691, "learning_rate": 1.1373429520253922e-05, "loss": 341.5583, "step": 24000 }, { "epoch": 0.4617707301593407, "grad_norm": 471.5925685796337, "learning_rate": 1.1367383958332427e-05, "loss": 343.8156, "step": 24010 }, { "epoch": 0.46196305449510056, "grad_norm": 473.61604947104604, "learning_rate": 1.1361337887082991e-05, "loss": 350.4561, "step": 24020 }, { "epoch": 0.4621553788308604, "grad_norm": 452.7277506604807, "learning_rate": 1.1355291308757672e-05, "loss": 352.6881, "step": 24030 }, { "epoch": 0.46234770316662016, "grad_norm": 433.32400679299263, "learning_rate": 1.1349244225608727e-05, "loss": 350.1903, "step": 24040 }, { "epoch": 0.46254002750238, "grad_norm": 418.122197251669, "learning_rate": 1.1343196639888591e-05, "loss": 354.7574, "step": 24050 }, { "epoch": 0.4627323518381398, "grad_norm": 446.06130360221, "learning_rate": 1.133714855384989e-05, "loss": 357.2964, "step": 24060 }, { "epoch": 0.46292467617389965, "grad_norm": 446.2298532602672, "learning_rate": 1.1331099969745439e-05, "loss": 349.6846, "step": 24070 }, { "epoch": 0.4631170005096595, "grad_norm": 517.8338606966761, "learning_rate": 1.132505088982823e-05, "loss": 365.5651, "step": 24080 }, { "epoch": 0.4633093248454193, "grad_norm": 435.9096581886154, "learning_rate": 1.131900131635145e-05, "loss": 344.9095, "step": 24090 }, { "epoch": 0.46350164918117914, "grad_norm": 502.3136728521285, "learning_rate": 1.1312951251568461e-05, "loss": 352.8324, "step": 24100 }, { "epoch": 0.463693973516939, "grad_norm": 444.2673845684395, "learning_rate": 1.1306900697732816e-05, "loss": 343.4255, "step": 24110 }, { "epoch": 0.4638862978526988, "grad_norm": 453.9107994548356, "learning_rate": 1.1300849657098248e-05, "loss": 350.1235, "step": 24120 }, { "epoch": 0.46407862218845863, "grad_norm": 474.68764861178875, "learning_rate": 1.1294798131918665e-05, "loss": 344.1036, "step": 24130 }, { "epoch": 0.46427094652421846, "grad_norm": 429.4393498052354, "learning_rate": 1.1288746124448164e-05, "loss": 346.141, "step": 24140 }, { "epoch": 0.4644632708599783, "grad_norm": 462.7185174091672, "learning_rate": 1.1282693636941013e-05, "loss": 340.0787, "step": 24150 }, { "epoch": 0.46465559519573807, "grad_norm": 416.5424380174432, "learning_rate": 1.1276640671651671e-05, "loss": 358.5521, "step": 24160 }, { "epoch": 0.4648479195314979, "grad_norm": 468.1693665282551, "learning_rate": 1.1270587230834757e-05, "loss": 355.7998, "step": 24170 }, { "epoch": 0.46504024386725773, "grad_norm": 440.98128068223815, "learning_rate": 1.1264533316745088e-05, "loss": 348.2278, "step": 24180 }, { "epoch": 0.46523256820301756, "grad_norm": 426.1283064895602, "learning_rate": 1.1258478931637641e-05, "loss": 346.072, "step": 24190 }, { "epoch": 0.4654248925387774, "grad_norm": 439.37164269656154, "learning_rate": 1.1252424077767577e-05, "loss": 332.2517, "step": 24200 }, { "epoch": 0.4656172168745372, "grad_norm": 579.5531226976567, "learning_rate": 1.1246368757390231e-05, "loss": 342.8532, "step": 24210 }, { "epoch": 0.46580954121029705, "grad_norm": 536.2588621443251, "learning_rate": 1.1240312972761105e-05, "loss": 342.9382, "step": 24220 }, { "epoch": 0.4660018655460569, "grad_norm": 469.8016300904265, "learning_rate": 1.1234256726135882e-05, "loss": 348.8735, "step": 24230 }, { "epoch": 0.4661941898818167, "grad_norm": 458.4639804768228, "learning_rate": 1.1228200019770412e-05, "loss": 333.2857, "step": 24240 }, { "epoch": 0.46638651421757654, "grad_norm": 539.0390267204716, "learning_rate": 1.1222142855920719e-05, "loss": 353.6003, "step": 24250 }, { "epoch": 0.4665788385533364, "grad_norm": 451.7144947091559, "learning_rate": 1.1216085236842997e-05, "loss": 344.9879, "step": 24260 }, { "epoch": 0.46677116288909615, "grad_norm": 462.0655486019301, "learning_rate": 1.1210027164793609e-05, "loss": 356.0899, "step": 24270 }, { "epoch": 0.466963487224856, "grad_norm": 430.84130865051253, "learning_rate": 1.1203968642029086e-05, "loss": 354.1064, "step": 24280 }, { "epoch": 0.4671558115606158, "grad_norm": 464.77943367322086, "learning_rate": 1.1197909670806126e-05, "loss": 347.4563, "step": 24290 }, { "epoch": 0.46734813589637564, "grad_norm": 461.17496182656316, "learning_rate": 1.1191850253381602e-05, "loss": 360.7592, "step": 24300 }, { "epoch": 0.46754046023213547, "grad_norm": 414.7179900777237, "learning_rate": 1.1185790392012538e-05, "loss": 348.1267, "step": 24310 }, { "epoch": 0.4677327845678953, "grad_norm": 514.2895881477375, "learning_rate": 1.117973008895614e-05, "loss": 348.7906, "step": 24320 }, { "epoch": 0.46792510890365513, "grad_norm": 437.8789944275789, "learning_rate": 1.1173669346469767e-05, "loss": 357.3282, "step": 24330 }, { "epoch": 0.46811743323941496, "grad_norm": 449.6214027720906, "learning_rate": 1.1167608166810948e-05, "loss": 345.5132, "step": 24340 }, { "epoch": 0.4683097575751748, "grad_norm": 456.3285175844381, "learning_rate": 1.1161546552237368e-05, "loss": 363.5194, "step": 24350 }, { "epoch": 0.4685020819109346, "grad_norm": 467.80311061048957, "learning_rate": 1.1155484505006884e-05, "loss": 349.5496, "step": 24360 }, { "epoch": 0.46869440624669445, "grad_norm": 466.4334589354807, "learning_rate": 1.1149422027377501e-05, "loss": 349.8633, "step": 24370 }, { "epoch": 0.4688867305824542, "grad_norm": 449.03343841662524, "learning_rate": 1.1143359121607397e-05, "loss": 346.7382, "step": 24380 }, { "epoch": 0.46907905491821406, "grad_norm": 427.20341280497115, "learning_rate": 1.1137295789954904e-05, "loss": 341.3352, "step": 24390 }, { "epoch": 0.4692713792539739, "grad_norm": 447.96260766478883, "learning_rate": 1.1131232034678513e-05, "loss": 345.6423, "step": 24400 }, { "epoch": 0.4694637035897337, "grad_norm": 482.5977086710106, "learning_rate": 1.1125167858036874e-05, "loss": 349.5774, "step": 24410 }, { "epoch": 0.46965602792549355, "grad_norm": 537.1234688920891, "learning_rate": 1.1119103262288788e-05, "loss": 344.8953, "step": 24420 }, { "epoch": 0.4698483522612534, "grad_norm": 481.41717467916874, "learning_rate": 1.1113038249693221e-05, "loss": 343.588, "step": 24430 }, { "epoch": 0.4700406765970132, "grad_norm": 438.699428985415, "learning_rate": 1.1106972822509287e-05, "loss": 347.0128, "step": 24440 }, { "epoch": 0.47023300093277304, "grad_norm": 561.2251274783999, "learning_rate": 1.1100906982996257e-05, "loss": 357.1581, "step": 24450 }, { "epoch": 0.47042532526853287, "grad_norm": 418.10743233194864, "learning_rate": 1.109484073341356e-05, "loss": 347.4563, "step": 24460 }, { "epoch": 0.4706176496042927, "grad_norm": 457.9113594480013, "learning_rate": 1.1088774076020772e-05, "loss": 335.8791, "step": 24470 }, { "epoch": 0.47080997394005253, "grad_norm": 448.84675583006396, "learning_rate": 1.1082707013077625e-05, "loss": 343.1197, "step": 24480 }, { "epoch": 0.4710022982758123, "grad_norm": 486.6496154504289, "learning_rate": 1.1076639546843993e-05, "loss": 350.1951, "step": 24490 }, { "epoch": 0.47119462261157213, "grad_norm": 434.43273951451687, "learning_rate": 1.1070571679579913e-05, "loss": 350.046, "step": 24500 }, { "epoch": 0.47138694694733196, "grad_norm": 424.2807672383882, "learning_rate": 1.1064503413545565e-05, "loss": 354.8194, "step": 24510 }, { "epoch": 0.4715792712830918, "grad_norm": 464.69086297043356, "learning_rate": 1.1058434751001272e-05, "loss": 349.3823, "step": 24520 }, { "epoch": 0.4717715956188516, "grad_norm": 546.2101556802386, "learning_rate": 1.1052365694207516e-05, "loss": 345.8188, "step": 24530 }, { "epoch": 0.47196391995461145, "grad_norm": 442.90774448197226, "learning_rate": 1.1046296245424922e-05, "loss": 340.8246, "step": 24540 }, { "epoch": 0.4721562442903713, "grad_norm": 460.72259924303825, "learning_rate": 1.1040226406914254e-05, "loss": 343.2918, "step": 24550 }, { "epoch": 0.4723485686261311, "grad_norm": 443.35966540667556, "learning_rate": 1.103415618093643e-05, "loss": 358.9477, "step": 24560 }, { "epoch": 0.47254089296189095, "grad_norm": 492.01720815670456, "learning_rate": 1.1028085569752512e-05, "loss": 345.4642, "step": 24570 }, { "epoch": 0.4727332172976508, "grad_norm": 412.1990961267601, "learning_rate": 1.1022014575623695e-05, "loss": 353.017, "step": 24580 }, { "epoch": 0.4729255416334106, "grad_norm": 540.5796744301151, "learning_rate": 1.1015943200811325e-05, "loss": 345.3408, "step": 24590 }, { "epoch": 0.4731178659691704, "grad_norm": 422.4793078979871, "learning_rate": 1.1009871447576894e-05, "loss": 338.4367, "step": 24600 }, { "epoch": 0.4733101903049302, "grad_norm": 451.29858440959424, "learning_rate": 1.100379931818203e-05, "loss": 350.746, "step": 24610 }, { "epoch": 0.47350251464069004, "grad_norm": 485.95574517008765, "learning_rate": 1.0997726814888497e-05, "loss": 364.0745, "step": 24620 }, { "epoch": 0.4736948389764499, "grad_norm": 418.0621755984242, "learning_rate": 1.0991653939958203e-05, "loss": 337.7672, "step": 24630 }, { "epoch": 0.4738871633122097, "grad_norm": 455.56264805557186, "learning_rate": 1.0985580695653193e-05, "loss": 353.3663, "step": 24640 }, { "epoch": 0.47407948764796953, "grad_norm": 418.28588400058135, "learning_rate": 1.0979507084235653e-05, "loss": 346.353, "step": 24650 }, { "epoch": 0.47427181198372936, "grad_norm": 409.83371591037053, "learning_rate": 1.0973433107967901e-05, "loss": 344.1408, "step": 24660 }, { "epoch": 0.4744641363194892, "grad_norm": 490.30897601421594, "learning_rate": 1.0967358769112389e-05, "loss": 350.0065, "step": 24670 }, { "epoch": 0.474656460655249, "grad_norm": 484.6349494526896, "learning_rate": 1.0961284069931717e-05, "loss": 341.357, "step": 24680 }, { "epoch": 0.47484878499100885, "grad_norm": 465.423951443859, "learning_rate": 1.0955209012688602e-05, "loss": 347.5928, "step": 24690 }, { "epoch": 0.4750411093267687, "grad_norm": 409.1567254486132, "learning_rate": 1.094913359964591e-05, "loss": 341.2007, "step": 24700 }, { "epoch": 0.4752334336625285, "grad_norm": 445.6050355718344, "learning_rate": 1.0943057833066622e-05, "loss": 350.1684, "step": 24710 }, { "epoch": 0.4754257579982883, "grad_norm": 458.31830053364575, "learning_rate": 1.093698171521387e-05, "loss": 347.2435, "step": 24720 }, { "epoch": 0.4756180823340481, "grad_norm": 442.15865168854026, "learning_rate": 1.0930905248350903e-05, "loss": 336.4149, "step": 24730 }, { "epoch": 0.47581040666980795, "grad_norm": 446.323305378914, "learning_rate": 1.0924828434741101e-05, "loss": 349.5124, "step": 24740 }, { "epoch": 0.4760027310055678, "grad_norm": 438.08495807593556, "learning_rate": 1.0918751276647988e-05, "loss": 350.5676, "step": 24750 }, { "epoch": 0.4761950553413276, "grad_norm": 445.1443369993769, "learning_rate": 1.0912673776335194e-05, "loss": 344.2197, "step": 24760 }, { "epoch": 0.47638737967708744, "grad_norm": 443.9768021761547, "learning_rate": 1.0906595936066496e-05, "loss": 347.4774, "step": 24770 }, { "epoch": 0.47657970401284727, "grad_norm": 425.05006112185043, "learning_rate": 1.090051775810578e-05, "loss": 345.046, "step": 24780 }, { "epoch": 0.4767720283486071, "grad_norm": 478.3464622247465, "learning_rate": 1.0894439244717075e-05, "loss": 354.5189, "step": 24790 }, { "epoch": 0.47696435268436693, "grad_norm": 443.74489667286645, "learning_rate": 1.0888360398164521e-05, "loss": 361.6882, "step": 24800 }, { "epoch": 0.47715667702012676, "grad_norm": 439.81802183197306, "learning_rate": 1.088228122071239e-05, "loss": 351.7037, "step": 24810 }, { "epoch": 0.4773490013558866, "grad_norm": 431.5731288746098, "learning_rate": 1.0876201714625076e-05, "loss": 349.9723, "step": 24820 }, { "epoch": 0.47754132569164637, "grad_norm": 462.3459775062199, "learning_rate": 1.0870121882167095e-05, "loss": 345.8595, "step": 24830 }, { "epoch": 0.4777336500274062, "grad_norm": 436.5308037709048, "learning_rate": 1.0864041725603085e-05, "loss": 344.7583, "step": 24840 }, { "epoch": 0.47792597436316603, "grad_norm": 460.26871532608396, "learning_rate": 1.08579612471978e-05, "loss": 339.4395, "step": 24850 }, { "epoch": 0.47811829869892586, "grad_norm": 431.9694158221801, "learning_rate": 1.0851880449216123e-05, "loss": 339.2742, "step": 24860 }, { "epoch": 0.4783106230346857, "grad_norm": 484.2571614779925, "learning_rate": 1.0845799333923045e-05, "loss": 351.7642, "step": 24870 }, { "epoch": 0.4785029473704455, "grad_norm": 444.5009495762413, "learning_rate": 1.0839717903583684e-05, "loss": 344.0675, "step": 24880 }, { "epoch": 0.47869527170620535, "grad_norm": 466.60153754361727, "learning_rate": 1.0833636160463273e-05, "loss": 345.1038, "step": 24890 }, { "epoch": 0.4788875960419652, "grad_norm": 454.12358645908995, "learning_rate": 1.0827554106827162e-05, "loss": 349.2369, "step": 24900 }, { "epoch": 0.479079920377725, "grad_norm": 458.23607739890093, "learning_rate": 1.0821471744940812e-05, "loss": 341.8702, "step": 24910 }, { "epoch": 0.47927224471348484, "grad_norm": 439.01173869618395, "learning_rate": 1.0815389077069805e-05, "loss": 340.1212, "step": 24920 }, { "epoch": 0.47946456904924467, "grad_norm": 506.9600893811272, "learning_rate": 1.0809306105479834e-05, "loss": 350.3729, "step": 24930 }, { "epoch": 0.47965689338500445, "grad_norm": 439.36878697300597, "learning_rate": 1.0803222832436701e-05, "loss": 338.7325, "step": 24940 }, { "epoch": 0.4798492177207643, "grad_norm": 494.8668702701581, "learning_rate": 1.0797139260206331e-05, "loss": 347.2341, "step": 24950 }, { "epoch": 0.4800415420565241, "grad_norm": 459.6139563612464, "learning_rate": 1.079105539105475e-05, "loss": 350.5526, "step": 24960 }, { "epoch": 0.48023386639228394, "grad_norm": 569.5012562851917, "learning_rate": 1.0784971227248104e-05, "loss": 352.9748, "step": 24970 }, { "epoch": 0.48042619072804377, "grad_norm": 424.89197083814355, "learning_rate": 1.077888677105264e-05, "loss": 352.4325, "step": 24980 }, { "epoch": 0.4806185150638036, "grad_norm": 446.49260866953017, "learning_rate": 1.0772802024734716e-05, "loss": 343.1698, "step": 24990 }, { "epoch": 0.48081083939956343, "grad_norm": 477.87822001902447, "learning_rate": 1.07667169905608e-05, "loss": 352.7179, "step": 25000 }, { "epoch": 0.48100316373532326, "grad_norm": 455.3227866533238, "learning_rate": 1.0760631670797468e-05, "loss": 343.0752, "step": 25010 }, { "epoch": 0.4811954880710831, "grad_norm": 494.27166505087615, "learning_rate": 1.07545460677114e-05, "loss": 337.4043, "step": 25020 }, { "epoch": 0.4813878124068429, "grad_norm": 503.07761611765466, "learning_rate": 1.0748460183569385e-05, "loss": 350.7558, "step": 25030 }, { "epoch": 0.48158013674260275, "grad_norm": 475.1194980159393, "learning_rate": 1.0742374020638315e-05, "loss": 346.2251, "step": 25040 }, { "epoch": 0.4817724610783625, "grad_norm": 438.784613352954, "learning_rate": 1.073628758118518e-05, "loss": 357.0386, "step": 25050 }, { "epoch": 0.48196478541412235, "grad_norm": 478.48266691770704, "learning_rate": 1.0730200867477083e-05, "loss": 342.8381, "step": 25060 }, { "epoch": 0.4821571097498822, "grad_norm": 471.0869564054891, "learning_rate": 1.0724113881781222e-05, "loss": 346.0839, "step": 25070 }, { "epoch": 0.482349434085642, "grad_norm": 513.9448779597403, "learning_rate": 1.0718026626364902e-05, "loss": 357.383, "step": 25080 }, { "epoch": 0.48254175842140185, "grad_norm": 431.4311270140063, "learning_rate": 1.0711939103495515e-05, "loss": 344.1433, "step": 25090 }, { "epoch": 0.4827340827571617, "grad_norm": 498.95380197289, "learning_rate": 1.0705851315440575e-05, "loss": 345.7734, "step": 25100 }, { "epoch": 0.4829264070929215, "grad_norm": 438.87532980026094, "learning_rate": 1.0699763264467675e-05, "loss": 345.4024, "step": 25110 }, { "epoch": 0.48311873142868134, "grad_norm": 472.51704784160995, "learning_rate": 1.069367495284452e-05, "loss": 343.9813, "step": 25120 }, { "epoch": 0.48331105576444117, "grad_norm": 422.5549587358019, "learning_rate": 1.06875863828389e-05, "loss": 334.9597, "step": 25130 }, { "epoch": 0.483503380100201, "grad_norm": 490.89726557182894, "learning_rate": 1.0681497556718706e-05, "loss": 331.7853, "step": 25140 }, { "epoch": 0.4836957044359608, "grad_norm": 475.2300081889788, "learning_rate": 1.067540847675193e-05, "loss": 348.329, "step": 25150 }, { "epoch": 0.48388802877172066, "grad_norm": 463.3089969042881, "learning_rate": 1.0669319145206646e-05, "loss": 349.3792, "step": 25160 }, { "epoch": 0.48408035310748043, "grad_norm": 455.0400266361233, "learning_rate": 1.066322956435104e-05, "loss": 348.8611, "step": 25170 }, { "epoch": 0.48427267744324026, "grad_norm": 458.31353774106776, "learning_rate": 1.0657139736453375e-05, "loss": 354.1627, "step": 25180 }, { "epoch": 0.4844650017790001, "grad_norm": 467.24138652429815, "learning_rate": 1.0651049663782007e-05, "loss": 356.1149, "step": 25190 }, { "epoch": 0.4846573261147599, "grad_norm": 449.78103138295876, "learning_rate": 1.0644959348605397e-05, "loss": 345.8042, "step": 25200 }, { "epoch": 0.48484965045051975, "grad_norm": 443.87809744652037, "learning_rate": 1.0638868793192079e-05, "loss": 340.828, "step": 25210 }, { "epoch": 0.4850419747862796, "grad_norm": 452.6505135387983, "learning_rate": 1.0632777999810685e-05, "loss": 338.9143, "step": 25220 }, { "epoch": 0.4852342991220394, "grad_norm": 439.7766405435959, "learning_rate": 1.062668697072994e-05, "loss": 345.1837, "step": 25230 }, { "epoch": 0.48542662345779924, "grad_norm": 448.4700508249548, "learning_rate": 1.0620595708218646e-05, "loss": 341.9236, "step": 25240 }, { "epoch": 0.4856189477935591, "grad_norm": 475.4474428490237, "learning_rate": 1.06145042145457e-05, "loss": 352.3563, "step": 25250 }, { "epoch": 0.4858112721293189, "grad_norm": 419.36365099043456, "learning_rate": 1.0608412491980084e-05, "loss": 336.5221, "step": 25260 }, { "epoch": 0.48600359646507874, "grad_norm": 444.23904760051914, "learning_rate": 1.0602320542790866e-05, "loss": 345.2479, "step": 25270 }, { "epoch": 0.4861959208008385, "grad_norm": 509.203731905521, "learning_rate": 1.0596228369247188e-05, "loss": 340.8519, "step": 25280 }, { "epoch": 0.48638824513659834, "grad_norm": 461.5633830418135, "learning_rate": 1.0590135973618294e-05, "loss": 341.8667, "step": 25290 }, { "epoch": 0.48658056947235817, "grad_norm": 492.74475147986857, "learning_rate": 1.0584043358173492e-05, "loss": 344.5896, "step": 25300 }, { "epoch": 0.486772893808118, "grad_norm": 441.82248779718276, "learning_rate": 1.0577950525182189e-05, "loss": 345.3792, "step": 25310 }, { "epoch": 0.48696521814387783, "grad_norm": 496.70645990489965, "learning_rate": 1.0571857476913856e-05, "loss": 334.5547, "step": 25320 }, { "epoch": 0.48715754247963766, "grad_norm": 438.4203165582315, "learning_rate": 1.056576421563806e-05, "loss": 347.8875, "step": 25330 }, { "epoch": 0.4873498668153975, "grad_norm": 485.23372980210036, "learning_rate": 1.0559670743624439e-05, "loss": 333.4783, "step": 25340 }, { "epoch": 0.4875421911511573, "grad_norm": 520.4504717812571, "learning_rate": 1.0553577063142705e-05, "loss": 355.2171, "step": 25350 }, { "epoch": 0.48773451548691715, "grad_norm": 456.3826035251919, "learning_rate": 1.0547483176462662e-05, "loss": 350.5201, "step": 25360 }, { "epoch": 0.487926839822677, "grad_norm": 604.3539239542757, "learning_rate": 1.0541389085854177e-05, "loss": 348.1062, "step": 25370 }, { "epoch": 0.4881191641584368, "grad_norm": 462.9639866093167, "learning_rate": 1.0535294793587197e-05, "loss": 347.1798, "step": 25380 }, { "epoch": 0.4883114884941966, "grad_norm": 459.1180620545904, "learning_rate": 1.0529200301931747e-05, "loss": 334.194, "step": 25390 }, { "epoch": 0.4885038128299564, "grad_norm": 466.0139643714818, "learning_rate": 1.052310561315793e-05, "loss": 344.5897, "step": 25400 }, { "epoch": 0.48869613716571625, "grad_norm": 411.7056966811325, "learning_rate": 1.0517010729535903e-05, "loss": 351.3513, "step": 25410 }, { "epoch": 0.4888884615014761, "grad_norm": 447.7577930113551, "learning_rate": 1.0510915653335925e-05, "loss": 339.0789, "step": 25420 }, { "epoch": 0.4890807858372359, "grad_norm": 462.4783046700875, "learning_rate": 1.05048203868283e-05, "loss": 354.5434, "step": 25430 }, { "epoch": 0.48927311017299574, "grad_norm": 484.2055523382258, "learning_rate": 1.0498724932283419e-05, "loss": 344.0051, "step": 25440 }, { "epoch": 0.48946543450875557, "grad_norm": 462.1476748192149, "learning_rate": 1.0492629291971738e-05, "loss": 349.671, "step": 25450 }, { "epoch": 0.4896577588445154, "grad_norm": 433.1666272164113, "learning_rate": 1.0486533468163782e-05, "loss": 341.0747, "step": 25460 }, { "epoch": 0.48985008318027523, "grad_norm": 427.28080531477684, "learning_rate": 1.0480437463130145e-05, "loss": 338.3895, "step": 25470 }, { "epoch": 0.49004240751603506, "grad_norm": 426.31975680146144, "learning_rate": 1.0474341279141486e-05, "loss": 353.6146, "step": 25480 }, { "epoch": 0.4902347318517949, "grad_norm": 443.26554421686126, "learning_rate": 1.0468244918468538e-05, "loss": 348.0944, "step": 25490 }, { "epoch": 0.49042705618755467, "grad_norm": 488.49539952721847, "learning_rate": 1.0462148383382086e-05, "loss": 341.5997, "step": 25500 }, { "epoch": 0.4906193805233145, "grad_norm": 475.67857010537426, "learning_rate": 1.0456051676152996e-05, "loss": 338.9557, "step": 25510 }, { "epoch": 0.4908117048590743, "grad_norm": 606.8907150405572, "learning_rate": 1.0449954799052189e-05, "loss": 349.8166, "step": 25520 }, { "epoch": 0.49100402919483416, "grad_norm": 504.9694016136632, "learning_rate": 1.044385775435065e-05, "loss": 352.5424, "step": 25530 }, { "epoch": 0.491196353530594, "grad_norm": 441.06303350524996, "learning_rate": 1.043776054431943e-05, "loss": 344.1763, "step": 25540 }, { "epoch": 0.4913886778663538, "grad_norm": 428.6514306100753, "learning_rate": 1.0431663171229636e-05, "loss": 344.2802, "step": 25550 }, { "epoch": 0.49158100220211365, "grad_norm": 465.9579446728845, "learning_rate": 1.0425565637352441e-05, "loss": 348.4694, "step": 25560 }, { "epoch": 0.4917733265378735, "grad_norm": 454.88875844656116, "learning_rate": 1.0419467944959072e-05, "loss": 349.0906, "step": 25570 }, { "epoch": 0.4919656508736333, "grad_norm": 446.72372455965024, "learning_rate": 1.0413370096320823e-05, "loss": 334.4174, "step": 25580 }, { "epoch": 0.49215797520939314, "grad_norm": 436.1817000916842, "learning_rate": 1.0407272093709038e-05, "loss": 344.6982, "step": 25590 }, { "epoch": 0.49235029954515297, "grad_norm": 454.19384015769054, "learning_rate": 1.0401173939395128e-05, "loss": 342.2402, "step": 25600 }, { "epoch": 0.49254262388091274, "grad_norm": 465.78700065151713, "learning_rate": 1.0395075635650549e-05, "loss": 339.2602, "step": 25610 }, { "epoch": 0.4927349482166726, "grad_norm": 423.61803979987485, "learning_rate": 1.038897718474682e-05, "loss": 341.5205, "step": 25620 }, { "epoch": 0.4929272725524324, "grad_norm": 446.3128603090774, "learning_rate": 1.0382878588955517e-05, "loss": 350.9064, "step": 25630 }, { "epoch": 0.49311959688819224, "grad_norm": 437.4944675601813, "learning_rate": 1.0376779850548257e-05, "loss": 345.0944, "step": 25640 }, { "epoch": 0.49331192122395207, "grad_norm": 434.73650695158506, "learning_rate": 1.0370680971796732e-05, "loss": 336.7031, "step": 25650 }, { "epoch": 0.4935042455597119, "grad_norm": 455.07444197146157, "learning_rate": 1.0364581954972662e-05, "loss": 337.445, "step": 25660 }, { "epoch": 0.4936965698954717, "grad_norm": 470.63817511194935, "learning_rate": 1.0358482802347838e-05, "loss": 347.9157, "step": 25670 }, { "epoch": 0.49388889423123156, "grad_norm": 615.3420191970537, "learning_rate": 1.0352383516194088e-05, "loss": 343.8957, "step": 25680 }, { "epoch": 0.4940812185669914, "grad_norm": 449.8024243375401, "learning_rate": 1.0346284098783304e-05, "loss": 344.9775, "step": 25690 }, { "epoch": 0.4942735429027512, "grad_norm": 439.79371865907626, "learning_rate": 1.0340184552387406e-05, "loss": 344.3167, "step": 25700 }, { "epoch": 0.49446586723851105, "grad_norm": 510.9504219105106, "learning_rate": 1.0334084879278381e-05, "loss": 344.9335, "step": 25710 }, { "epoch": 0.4946581915742709, "grad_norm": 453.71646428694226, "learning_rate": 1.032798508172826e-05, "loss": 345.6492, "step": 25720 }, { "epoch": 0.49485051591003065, "grad_norm": 417.9122891919555, "learning_rate": 1.0321885162009111e-05, "loss": 334.8714, "step": 25730 }, { "epoch": 0.4950428402457905, "grad_norm": 426.3977137430743, "learning_rate": 1.0315785122393053e-05, "loss": 339.017, "step": 25740 }, { "epoch": 0.4952351645815503, "grad_norm": 455.42934401603463, "learning_rate": 1.0309684965152254e-05, "loss": 346.5886, "step": 25750 }, { "epoch": 0.49542748891731014, "grad_norm": 427.4702055917018, "learning_rate": 1.030358469255892e-05, "loss": 349.8492, "step": 25760 }, { "epoch": 0.49561981325307, "grad_norm": 503.1026610452885, "learning_rate": 1.0297484306885304e-05, "loss": 342.484, "step": 25770 }, { "epoch": 0.4958121375888298, "grad_norm": 422.15219926086354, "learning_rate": 1.0291383810403697e-05, "loss": 343.0117, "step": 25780 }, { "epoch": 0.49600446192458963, "grad_norm": 426.2328515088724, "learning_rate": 1.028528320538643e-05, "loss": 345.1555, "step": 25790 }, { "epoch": 0.49619678626034947, "grad_norm": 420.70888404290093, "learning_rate": 1.0279182494105879e-05, "loss": 337.1838, "step": 25800 }, { "epoch": 0.4963891105961093, "grad_norm": 430.721565898764, "learning_rate": 1.0273081678834462e-05, "loss": 337.5997, "step": 25810 }, { "epoch": 0.4965814349318691, "grad_norm": 429.10717872706454, "learning_rate": 1.026698076184463e-05, "loss": 335.7756, "step": 25820 }, { "epoch": 0.49677375926762896, "grad_norm": 445.63623859890174, "learning_rate": 1.0260879745408876e-05, "loss": 344.804, "step": 25830 }, { "epoch": 0.49696608360338873, "grad_norm": 515.0421940610776, "learning_rate": 1.0254778631799722e-05, "loss": 332.7388, "step": 25840 }, { "epoch": 0.49715840793914856, "grad_norm": 547.9305516906084, "learning_rate": 1.024867742328974e-05, "loss": 349.1427, "step": 25850 }, { "epoch": 0.4973507322749084, "grad_norm": 457.29395189890954, "learning_rate": 1.024257612215152e-05, "loss": 347.0169, "step": 25860 }, { "epoch": 0.4975430566106682, "grad_norm": 436.5744785798286, "learning_rate": 1.02364747306577e-05, "loss": 347.5799, "step": 25870 }, { "epoch": 0.49773538094642805, "grad_norm": 437.3957172964602, "learning_rate": 1.023037325108095e-05, "loss": 344.1047, "step": 25880 }, { "epoch": 0.4979277052821879, "grad_norm": 451.2434663139427, "learning_rate": 1.022427168569397e-05, "loss": 336.6407, "step": 25890 }, { "epoch": 0.4981200296179477, "grad_norm": 429.12410838179204, "learning_rate": 1.021817003676949e-05, "loss": 346.0296, "step": 25900 }, { "epoch": 0.49831235395370754, "grad_norm": 540.7591793592441, "learning_rate": 1.0212068306580274e-05, "loss": 350.6603, "step": 25910 }, { "epoch": 0.4985046782894674, "grad_norm": 432.4797951328987, "learning_rate": 1.0205966497399118e-05, "loss": 331.3958, "step": 25920 }, { "epoch": 0.4986970026252272, "grad_norm": 475.8132467748236, "learning_rate": 1.0199864611498841e-05, "loss": 351.4599, "step": 25930 }, { "epoch": 0.49888932696098703, "grad_norm": 430.0734617528874, "learning_rate": 1.0193762651152299e-05, "loss": 351.4419, "step": 25940 }, { "epoch": 0.4990816512967468, "grad_norm": 472.1029956897937, "learning_rate": 1.018766061863237e-05, "loss": 340.3128, "step": 25950 }, { "epoch": 0.49927397563250664, "grad_norm": 465.6110802717735, "learning_rate": 1.018155851621196e-05, "loss": 343.5428, "step": 25960 }, { "epoch": 0.49946629996826647, "grad_norm": 430.92541766427337, "learning_rate": 1.0175456346164e-05, "loss": 339.8401, "step": 25970 }, { "epoch": 0.4996586243040263, "grad_norm": 422.5119090674598, "learning_rate": 1.0169354110761447e-05, "loss": 342.0407, "step": 25980 }, { "epoch": 0.49985094863978613, "grad_norm": 435.6800807681979, "learning_rate": 1.0163251812277289e-05, "loss": 338.9048, "step": 25990 }, { "epoch": 0.500043272975546, "grad_norm": 473.092265132417, "learning_rate": 1.0157149452984523e-05, "loss": 341.6524, "step": 26000 }, { "epoch": 0.5002355973113057, "grad_norm": 427.8281945237504, "learning_rate": 1.0151047035156182e-05, "loss": 343.6659, "step": 26010 }, { "epoch": 0.5004279216470656, "grad_norm": 478.5189418354036, "learning_rate": 1.0144944561065316e-05, "loss": 339.8255, "step": 26020 }, { "epoch": 0.5006202459828254, "grad_norm": 435.2745113248596, "learning_rate": 1.0138842032984996e-05, "loss": 356.8999, "step": 26030 }, { "epoch": 0.5008125703185853, "grad_norm": 405.26899659127605, "learning_rate": 1.013273945318831e-05, "loss": 349.762, "step": 26040 }, { "epoch": 0.5010048946543451, "grad_norm": 428.87900165739535, "learning_rate": 1.0126636823948373e-05, "loss": 338.0337, "step": 26050 }, { "epoch": 0.5011972189901049, "grad_norm": 455.91267921591265, "learning_rate": 1.0120534147538305e-05, "loss": 345.6226, "step": 26060 }, { "epoch": 0.5013895433258647, "grad_norm": 443.5850119841078, "learning_rate": 1.011443142623126e-05, "loss": 351.0039, "step": 26070 }, { "epoch": 0.5015818676616246, "grad_norm": 436.0291216771967, "learning_rate": 1.0108328662300399e-05, "loss": 340.9716, "step": 26080 }, { "epoch": 0.5017741919973844, "grad_norm": 446.3422677503715, "learning_rate": 1.0102225858018902e-05, "loss": 336.164, "step": 26090 }, { "epoch": 0.5019665163331443, "grad_norm": 438.47251662104185, "learning_rate": 1.009612301565996e-05, "loss": 342.6302, "step": 26100 }, { "epoch": 0.502158840668904, "grad_norm": 433.6151797711422, "learning_rate": 1.0090020137496783e-05, "loss": 338.885, "step": 26110 }, { "epoch": 0.5023511650046638, "grad_norm": 499.6250064975691, "learning_rate": 1.008391722580259e-05, "loss": 354.9817, "step": 26120 }, { "epoch": 0.5025434893404237, "grad_norm": 468.84803857442233, "learning_rate": 1.0077814282850617e-05, "loss": 358.8431, "step": 26130 }, { "epoch": 0.5027358136761835, "grad_norm": 439.65479580682444, "learning_rate": 1.0071711310914111e-05, "loss": 342.4324, "step": 26140 }, { "epoch": 0.5029281380119434, "grad_norm": 438.4205862312812, "learning_rate": 1.0065608312266324e-05, "loss": 346.871, "step": 26150 }, { "epoch": 0.5031204623477031, "grad_norm": 467.38154208423816, "learning_rate": 1.005950528918052e-05, "loss": 344.1982, "step": 26160 }, { "epoch": 0.503312786683463, "grad_norm": 434.153971787788, "learning_rate": 1.0053402243929986e-05, "loss": 348.1347, "step": 26170 }, { "epoch": 0.5035051110192228, "grad_norm": 464.53257993576017, "learning_rate": 1.0047299178787993e-05, "loss": 343.7879, "step": 26180 }, { "epoch": 0.5036974353549827, "grad_norm": 440.69561714345843, "learning_rate": 1.0041196096027841e-05, "loss": 340.0599, "step": 26190 }, { "epoch": 0.5038897596907425, "grad_norm": 431.1380537992603, "learning_rate": 1.003509299792282e-05, "loss": 340.2102, "step": 26200 }, { "epoch": 0.5040820840265023, "grad_norm": 530.5361816157434, "learning_rate": 1.0028989886746241e-05, "loss": 329.6086, "step": 26210 }, { "epoch": 0.5042744083622621, "grad_norm": 420.87720352454573, "learning_rate": 1.0022886764771405e-05, "loss": 339.4763, "step": 26220 }, { "epoch": 0.5044667326980219, "grad_norm": 451.4880905735666, "learning_rate": 1.0016783634271626e-05, "loss": 338.5879, "step": 26230 }, { "epoch": 0.5046590570337818, "grad_norm": 480.27448745078203, "learning_rate": 1.001068049752022e-05, "loss": 342.3657, "step": 26240 }, { "epoch": 0.5048513813695416, "grad_norm": 437.82798984123554, "learning_rate": 1.0004577356790506e-05, "loss": 334.7288, "step": 26250 }, { "epoch": 0.5050437057053014, "grad_norm": 489.6626013709203, "learning_rate": 9.998474214355805e-06, "loss": 341.5562, "step": 26260 }, { "epoch": 0.5052360300410612, "grad_norm": 465.29240718561823, "learning_rate": 9.992371072489434e-06, "loss": 351.4869, "step": 26270 }, { "epoch": 0.5054283543768211, "grad_norm": 442.5453968598927, "learning_rate": 9.986267933464707e-06, "loss": 354.0186, "step": 26280 }, { "epoch": 0.5056206787125809, "grad_norm": 449.96081343231003, "learning_rate": 9.980164799554953e-06, "loss": 347.4886, "step": 26290 }, { "epoch": 0.5058130030483408, "grad_norm": 465.16494406161587, "learning_rate": 9.97406167303348e-06, "loss": 343.1912, "step": 26300 }, { "epoch": 0.5060053273841005, "grad_norm": 536.5672845204716, "learning_rate": 9.967958556173612e-06, "loss": 345.5935, "step": 26310 }, { "epoch": 0.5061976517198604, "grad_norm": 484.91161652683945, "learning_rate": 9.961855451248645e-06, "loss": 336.4124, "step": 26320 }, { "epoch": 0.5063899760556202, "grad_norm": 414.40591062278037, "learning_rate": 9.955752360531896e-06, "loss": 351.0156, "step": 26330 }, { "epoch": 0.50658230039138, "grad_norm": 518.9984050344088, "learning_rate": 9.949649286296663e-06, "loss": 341.3864, "step": 26340 }, { "epoch": 0.5067746247271399, "grad_norm": 515.5651496992565, "learning_rate": 9.943546230816236e-06, "loss": 343.5138, "step": 26350 }, { "epoch": 0.5069669490628996, "grad_norm": 450.28204301907545, "learning_rate": 9.937443196363908e-06, "loss": 350.0704, "step": 26360 }, { "epoch": 0.5071592733986595, "grad_norm": 453.02053857871084, "learning_rate": 9.931340185212955e-06, "loss": 336.2348, "step": 26370 }, { "epoch": 0.5073515977344193, "grad_norm": 491.1923399841011, "learning_rate": 9.925237199636652e-06, "loss": 334.6663, "step": 26380 }, { "epoch": 0.5075439220701792, "grad_norm": 513.6348863780322, "learning_rate": 9.919134241908252e-06, "loss": 339.5493, "step": 26390 }, { "epoch": 0.507736246405939, "grad_norm": 468.4940037945204, "learning_rate": 9.913031314301016e-06, "loss": 351.3495, "step": 26400 }, { "epoch": 0.5079285707416988, "grad_norm": 468.49295621127857, "learning_rate": 9.906928419088178e-06, "loss": 336.9728, "step": 26410 }, { "epoch": 0.5081208950774586, "grad_norm": 462.0725195256504, "learning_rate": 9.900825558542965e-06, "loss": 347.8383, "step": 26420 }, { "epoch": 0.5083132194132185, "grad_norm": 459.97071544613885, "learning_rate": 9.894722734938595e-06, "loss": 352.6653, "step": 26430 }, { "epoch": 0.5085055437489783, "grad_norm": 445.8809364873831, "learning_rate": 9.888619950548267e-06, "loss": 338.8954, "step": 26440 }, { "epoch": 0.508697868084738, "grad_norm": 406.24969992272116, "learning_rate": 9.88251720764517e-06, "loss": 333.8568, "step": 26450 }, { "epoch": 0.5088901924204979, "grad_norm": 434.58611560041817, "learning_rate": 9.876414508502468e-06, "loss": 340.9845, "step": 26460 }, { "epoch": 0.5090825167562577, "grad_norm": 453.8723914764098, "learning_rate": 9.870311855393324e-06, "loss": 346.499, "step": 26470 }, { "epoch": 0.5092748410920176, "grad_norm": 449.97169361927297, "learning_rate": 9.864209250590875e-06, "loss": 332.3278, "step": 26480 }, { "epoch": 0.5094671654277774, "grad_norm": 401.3479610629499, "learning_rate": 9.858106696368235e-06, "loss": 345.8816, "step": 26490 }, { "epoch": 0.5096594897635373, "grad_norm": 438.93567781557203, "learning_rate": 9.852004194998503e-06, "loss": 338.5684, "step": 26500 }, { "epoch": 0.509851814099297, "grad_norm": 440.5472231891131, "learning_rate": 9.845901748754767e-06, "loss": 338.7455, "step": 26510 }, { "epoch": 0.5100441384350569, "grad_norm": 415.21322748978696, "learning_rate": 9.839799359910087e-06, "loss": 344.3596, "step": 26520 }, { "epoch": 0.5102364627708167, "grad_norm": 458.1084664626786, "learning_rate": 9.833697030737495e-06, "loss": 354.6294, "step": 26530 }, { "epoch": 0.5104287871065766, "grad_norm": 436.6787253675564, "learning_rate": 9.827594763510016e-06, "loss": 346.794, "step": 26540 }, { "epoch": 0.5106211114423364, "grad_norm": 448.1170740412977, "learning_rate": 9.821492560500641e-06, "loss": 337.7722, "step": 26550 }, { "epoch": 0.5108134357780962, "grad_norm": 396.6410997935971, "learning_rate": 9.815390423982339e-06, "loss": 341.1862, "step": 26560 }, { "epoch": 0.511005760113856, "grad_norm": 470.0484816188522, "learning_rate": 9.809288356228051e-06, "loss": 338.5166, "step": 26570 }, { "epoch": 0.5111980844496158, "grad_norm": 447.3056913642524, "learning_rate": 9.803186359510701e-06, "loss": 339.6327, "step": 26580 }, { "epoch": 0.5113904087853757, "grad_norm": 459.000569588546, "learning_rate": 9.797084436103186e-06, "loss": 342.616, "step": 26590 }, { "epoch": 0.5115827331211354, "grad_norm": 435.33280173405103, "learning_rate": 9.790982588278362e-06, "loss": 345.4626, "step": 26600 }, { "epoch": 0.5117750574568953, "grad_norm": 466.38512320023113, "learning_rate": 9.784880818309076e-06, "loss": 338.4864, "step": 26610 }, { "epoch": 0.5119673817926551, "grad_norm": 465.4942602488181, "learning_rate": 9.778779128468133e-06, "loss": 361.0277, "step": 26620 }, { "epoch": 0.512159706128415, "grad_norm": 455.13323019582765, "learning_rate": 9.77267752102831e-06, "loss": 341.2256, "step": 26630 }, { "epoch": 0.5123520304641748, "grad_norm": 471.91920957852267, "learning_rate": 9.766575998262353e-06, "loss": 337.2599, "step": 26640 }, { "epoch": 0.5125443547999347, "grad_norm": 449.80734038194635, "learning_rate": 9.760474562442984e-06, "loss": 358.0246, "step": 26650 }, { "epoch": 0.5127366791356944, "grad_norm": 521.8266663715146, "learning_rate": 9.754373215842884e-06, "loss": 343.768, "step": 26660 }, { "epoch": 0.5129290034714543, "grad_norm": 492.9214205865808, "learning_rate": 9.748271960734708e-06, "loss": 351.5626, "step": 26670 }, { "epoch": 0.5131213278072141, "grad_norm": 426.94099297025866, "learning_rate": 9.742170799391063e-06, "loss": 331.4579, "step": 26680 }, { "epoch": 0.5133136521429739, "grad_norm": 471.2429557142538, "learning_rate": 9.736069734084541e-06, "loss": 329.301, "step": 26690 }, { "epoch": 0.5135059764787337, "grad_norm": 428.84147913856174, "learning_rate": 9.729968767087685e-06, "loss": 338.2121, "step": 26700 }, { "epoch": 0.5136983008144935, "grad_norm": 479.6254456239305, "learning_rate": 9.723867900673e-06, "loss": 331.2304, "step": 26710 }, { "epoch": 0.5138906251502534, "grad_norm": 417.61116967195187, "learning_rate": 9.717767137112964e-06, "loss": 341.0746, "step": 26720 }, { "epoch": 0.5140829494860132, "grad_norm": 473.1536989765579, "learning_rate": 9.711666478680007e-06, "loss": 347.7481, "step": 26730 }, { "epoch": 0.5142752738217731, "grad_norm": 456.8626738321443, "learning_rate": 9.705565927646526e-06, "loss": 339.454, "step": 26740 }, { "epoch": 0.5144675981575328, "grad_norm": 481.5129464094531, "learning_rate": 9.699465486284871e-06, "loss": 339.195, "step": 26750 }, { "epoch": 0.5146599224932927, "grad_norm": 452.08185532759103, "learning_rate": 9.693365156867363e-06, "loss": 334.3235, "step": 26760 }, { "epoch": 0.5148522468290525, "grad_norm": 479.66554519802463, "learning_rate": 9.68726494166627e-06, "loss": 333.8176, "step": 26770 }, { "epoch": 0.5150445711648124, "grad_norm": 454.6945929340308, "learning_rate": 9.681164842953816e-06, "loss": 340.9728, "step": 26780 }, { "epoch": 0.5152368955005722, "grad_norm": 476.6845113981557, "learning_rate": 9.675064863002196e-06, "loss": 340.5218, "step": 26790 }, { "epoch": 0.5154292198363319, "grad_norm": 455.3659689305939, "learning_rate": 9.668965004083549e-06, "loss": 346.2917, "step": 26800 }, { "epoch": 0.5156215441720918, "grad_norm": 493.80965921803494, "learning_rate": 9.66286526846997e-06, "loss": 342.8252, "step": 26810 }, { "epoch": 0.5158138685078516, "grad_norm": 504.6513010726089, "learning_rate": 9.656765658433507e-06, "loss": 334.8217, "step": 26820 }, { "epoch": 0.5160061928436115, "grad_norm": 467.309410542043, "learning_rate": 9.650666176246171e-06, "loss": 339.3484, "step": 26830 }, { "epoch": 0.5161985171793713, "grad_norm": 476.0467276490168, "learning_rate": 9.644566824179916e-06, "loss": 331.1993, "step": 26840 }, { "epoch": 0.5163908415151311, "grad_norm": 441.6495571161134, "learning_rate": 9.638467604506648e-06, "loss": 343.9717, "step": 26850 }, { "epoch": 0.5165831658508909, "grad_norm": 417.00699745682414, "learning_rate": 9.632368519498224e-06, "loss": 337.1766, "step": 26860 }, { "epoch": 0.5167754901866508, "grad_norm": 437.5703591096981, "learning_rate": 9.626269571426456e-06, "loss": 348.3298, "step": 26870 }, { "epoch": 0.5169678145224106, "grad_norm": 456.85078227257094, "learning_rate": 9.620170762563103e-06, "loss": 332.2039, "step": 26880 }, { "epoch": 0.5171601388581705, "grad_norm": 429.5608359887313, "learning_rate": 9.614072095179862e-06, "loss": 338.3338, "step": 26890 }, { "epoch": 0.5173524631939302, "grad_norm": 463.06188230953273, "learning_rate": 9.607973571548396e-06, "loss": 348.9774, "step": 26900 }, { "epoch": 0.51754478752969, "grad_norm": 428.1768626436941, "learning_rate": 9.601875193940301e-06, "loss": 330.0414, "step": 26910 }, { "epoch": 0.5177371118654499, "grad_norm": 438.71866643416524, "learning_rate": 9.595776964627119e-06, "loss": 335.5818, "step": 26920 }, { "epoch": 0.5179294362012097, "grad_norm": 446.2781621492192, "learning_rate": 9.589678885880342e-06, "loss": 341.9717, "step": 26930 }, { "epoch": 0.5181217605369696, "grad_norm": 463.88599220541624, "learning_rate": 9.583580959971402e-06, "loss": 339.0089, "step": 26940 }, { "epoch": 0.5183140848727293, "grad_norm": 443.0918244425555, "learning_rate": 9.577483189171681e-06, "loss": 345.7707, "step": 26950 }, { "epoch": 0.5185064092084892, "grad_norm": 433.1425729845799, "learning_rate": 9.571385575752487e-06, "loss": 335.6523, "step": 26960 }, { "epoch": 0.518698733544249, "grad_norm": 498.50752877677576, "learning_rate": 9.565288121985093e-06, "loss": 348.2144, "step": 26970 }, { "epoch": 0.5188910578800089, "grad_norm": 443.5120851101149, "learning_rate": 9.559190830140695e-06, "loss": 345.7477, "step": 26980 }, { "epoch": 0.5190833822157687, "grad_norm": 446.2507898899773, "learning_rate": 9.553093702490433e-06, "loss": 334.2978, "step": 26990 }, { "epoch": 0.5192757065515285, "grad_norm": 437.6071370750127, "learning_rate": 9.54699674130538e-06, "loss": 338.2994, "step": 27000 }, { "epoch": 0.5194680308872883, "grad_norm": 422.12308663996794, "learning_rate": 9.540899948856561e-06, "loss": 328.964, "step": 27010 }, { "epoch": 0.5196603552230481, "grad_norm": 427.6474875637078, "learning_rate": 9.534803327414931e-06, "loss": 345.2805, "step": 27020 }, { "epoch": 0.519852679558808, "grad_norm": 444.04750253279036, "learning_rate": 9.52870687925138e-06, "loss": 341.8681, "step": 27030 }, { "epoch": 0.5200450038945678, "grad_norm": 465.5837361454199, "learning_rate": 9.522610606636728e-06, "loss": 340.9822, "step": 27040 }, { "epoch": 0.5202373282303276, "grad_norm": 440.35732478272877, "learning_rate": 9.516514511841745e-06, "loss": 343.4004, "step": 27050 }, { "epoch": 0.5204296525660874, "grad_norm": 426.34681471216334, "learning_rate": 9.51041859713712e-06, "loss": 329.5743, "step": 27060 }, { "epoch": 0.5206219769018473, "grad_norm": 433.8876551556778, "learning_rate": 9.50432286479348e-06, "loss": 337.3153, "step": 27070 }, { "epoch": 0.5208143012376071, "grad_norm": 440.3975265547607, "learning_rate": 9.498227317081387e-06, "loss": 341.7763, "step": 27080 }, { "epoch": 0.521006625573367, "grad_norm": 491.4696973561834, "learning_rate": 9.492131956271334e-06, "loss": 338.346, "step": 27090 }, { "epoch": 0.5211989499091267, "grad_norm": 478.4658942725379, "learning_rate": 9.486036784633738e-06, "loss": 341.305, "step": 27100 }, { "epoch": 0.5213912742448866, "grad_norm": 452.927882810684, "learning_rate": 9.47994180443895e-06, "loss": 332.9051, "step": 27110 }, { "epoch": 0.5215835985806464, "grad_norm": 444.6085391917634, "learning_rate": 9.473847017957254e-06, "loss": 339.4169, "step": 27120 }, { "epoch": 0.5217759229164062, "grad_norm": 450.68569710805474, "learning_rate": 9.467752427458851e-06, "loss": 342.388, "step": 27130 }, { "epoch": 0.5219682472521661, "grad_norm": 451.06512983722774, "learning_rate": 9.461658035213878e-06, "loss": 337.2664, "step": 27140 }, { "epoch": 0.5221605715879258, "grad_norm": 428.33853337240384, "learning_rate": 9.455563843492397e-06, "loss": 339.9756, "step": 27150 }, { "epoch": 0.5223528959236857, "grad_norm": 459.54159229164367, "learning_rate": 9.449469854564393e-06, "loss": 332.8475, "step": 27160 }, { "epoch": 0.5225452202594455, "grad_norm": 414.48512219442904, "learning_rate": 9.443376070699778e-06, "loss": 340.6676, "step": 27170 }, { "epoch": 0.5227375445952054, "grad_norm": 467.4786499837992, "learning_rate": 9.437282494168379e-06, "loss": 334.2827, "step": 27180 }, { "epoch": 0.5229298689309652, "grad_norm": 452.12718048029274, "learning_rate": 9.431189127239962e-06, "loss": 348.6354, "step": 27190 }, { "epoch": 0.523122193266725, "grad_norm": 429.63075900356364, "learning_rate": 9.4250959721842e-06, "loss": 334.5864, "step": 27200 }, { "epoch": 0.5233145176024848, "grad_norm": 481.2536545754605, "learning_rate": 9.419003031270692e-06, "loss": 339.6702, "step": 27210 }, { "epoch": 0.5235068419382447, "grad_norm": 403.6776440665063, "learning_rate": 9.412910306768959e-06, "loss": 331.6599, "step": 27220 }, { "epoch": 0.5236991662740045, "grad_norm": 433.9658409070433, "learning_rate": 9.40681780094844e-06, "loss": 329.3873, "step": 27230 }, { "epoch": 0.5238914906097643, "grad_norm": 453.95931032368793, "learning_rate": 9.400725516078496e-06, "loss": 336.5731, "step": 27240 }, { "epoch": 0.5240838149455241, "grad_norm": 442.5289560235294, "learning_rate": 9.394633454428396e-06, "loss": 345.3696, "step": 27250 }, { "epoch": 0.5242761392812839, "grad_norm": 392.6976284047434, "learning_rate": 9.388541618267341e-06, "loss": 340.6594, "step": 27260 }, { "epoch": 0.5244684636170438, "grad_norm": 424.63016958522695, "learning_rate": 9.382450009864434e-06, "loss": 346.652, "step": 27270 }, { "epoch": 0.5246607879528036, "grad_norm": 431.39515318511565, "learning_rate": 9.376358631488697e-06, "loss": 325.4798, "step": 27280 }, { "epoch": 0.5248531122885635, "grad_norm": 450.44302218789613, "learning_rate": 9.37026748540907e-06, "loss": 342.5619, "step": 27290 }, { "epoch": 0.5250454366243232, "grad_norm": 480.7471023848163, "learning_rate": 9.364176573894404e-06, "loss": 331.1925, "step": 27300 }, { "epoch": 0.5252377609600831, "grad_norm": 496.0676672748972, "learning_rate": 9.358085899213467e-06, "loss": 337.8682, "step": 27310 }, { "epoch": 0.5254300852958429, "grad_norm": 445.49214195866523, "learning_rate": 9.351995463634925e-06, "loss": 334.9769, "step": 27320 }, { "epoch": 0.5256224096316028, "grad_norm": 437.53513656220525, "learning_rate": 9.345905269427374e-06, "loss": 334.3453, "step": 27330 }, { "epoch": 0.5258147339673626, "grad_norm": 396.73516754663257, "learning_rate": 9.33981531885931e-06, "loss": 340.4432, "step": 27340 }, { "epoch": 0.5260070583031223, "grad_norm": 464.1043517284434, "learning_rate": 9.333725614199132e-06, "loss": 341.525, "step": 27350 }, { "epoch": 0.5261993826388822, "grad_norm": 472.77488696354254, "learning_rate": 9.32763615771516e-06, "loss": 341.5732, "step": 27360 }, { "epoch": 0.526391706974642, "grad_norm": 602.5340527676817, "learning_rate": 9.321546951675616e-06, "loss": 341.2068, "step": 27370 }, { "epoch": 0.5265840313104019, "grad_norm": 430.6197362546596, "learning_rate": 9.315457998348627e-06, "loss": 334.2109, "step": 27380 }, { "epoch": 0.5267763556461617, "grad_norm": 419.1736102297784, "learning_rate": 9.309369300002224e-06, "loss": 349.3417, "step": 27390 }, { "epoch": 0.5269686799819215, "grad_norm": 441.6157714767051, "learning_rate": 9.303280858904356e-06, "loss": 334.7883, "step": 27400 }, { "epoch": 0.5271610043176813, "grad_norm": 464.59223017060003, "learning_rate": 9.297192677322862e-06, "loss": 345.1679, "step": 27410 }, { "epoch": 0.5273533286534412, "grad_norm": 446.44145284009414, "learning_rate": 9.291104757525486e-06, "loss": 345.3021, "step": 27420 }, { "epoch": 0.527545652989201, "grad_norm": 468.80816541459, "learning_rate": 9.28501710177988e-06, "loss": 328.7764, "step": 27430 }, { "epoch": 0.5277379773249609, "grad_norm": 444.29357835953067, "learning_rate": 9.278929712353595e-06, "loss": 346.5714, "step": 27440 }, { "epoch": 0.5279303016607206, "grad_norm": 469.3491646014543, "learning_rate": 9.272842591514083e-06, "loss": 358.628, "step": 27450 }, { "epoch": 0.5281226259964804, "grad_norm": 428.40195476739024, "learning_rate": 9.2667557415287e-06, "loss": 342.0147, "step": 27460 }, { "epoch": 0.5283149503322403, "grad_norm": 476.3116740634766, "learning_rate": 9.260669164664687e-06, "loss": 344.0848, "step": 27470 }, { "epoch": 0.5285072746680001, "grad_norm": 446.56240586600103, "learning_rate": 9.254582863189205e-06, "loss": 347.999, "step": 27480 }, { "epoch": 0.52869959900376, "grad_norm": 504.0513679274186, "learning_rate": 9.248496839369293e-06, "loss": 331.0626, "step": 27490 }, { "epoch": 0.5288919233395197, "grad_norm": 436.8948134547532, "learning_rate": 9.242411095471897e-06, "loss": 360.6836, "step": 27500 }, { "epoch": 0.5290842476752796, "grad_norm": 424.060867543618, "learning_rate": 9.236325633763856e-06, "loss": 335.3242, "step": 27510 }, { "epoch": 0.5292765720110394, "grad_norm": 413.5356040210607, "learning_rate": 9.230240456511905e-06, "loss": 351.8245, "step": 27520 }, { "epoch": 0.5294688963467993, "grad_norm": 474.9002354952727, "learning_rate": 9.224155565982673e-06, "loss": 332.4841, "step": 27530 }, { "epoch": 0.529661220682559, "grad_norm": 448.1461839060164, "learning_rate": 9.218070964442673e-06, "loss": 339.3383, "step": 27540 }, { "epoch": 0.5298535450183189, "grad_norm": 412.1948335911245, "learning_rate": 9.21198665415833e-06, "loss": 338.0674, "step": 27550 }, { "epoch": 0.5300458693540787, "grad_norm": 438.62678980169545, "learning_rate": 9.205902637395943e-06, "loss": 341.4978, "step": 27560 }, { "epoch": 0.5302381936898385, "grad_norm": 473.17742750787113, "learning_rate": 9.199818916421706e-06, "loss": 332.2519, "step": 27570 }, { "epoch": 0.5304305180255984, "grad_norm": 446.67672469028355, "learning_rate": 9.193735493501707e-06, "loss": 338.8791, "step": 27580 }, { "epoch": 0.5306228423613581, "grad_norm": 435.28041816545954, "learning_rate": 9.187652370901925e-06, "loss": 345.6248, "step": 27590 }, { "epoch": 0.530815166697118, "grad_norm": 418.6282966369881, "learning_rate": 9.181569550888217e-06, "loss": 340.5824, "step": 27600 }, { "epoch": 0.5310074910328778, "grad_norm": 502.5156746744452, "learning_rate": 9.175487035726332e-06, "loss": 335.7792, "step": 27610 }, { "epoch": 0.5311998153686377, "grad_norm": 469.1641062599217, "learning_rate": 9.169404827681912e-06, "loss": 331.6646, "step": 27620 }, { "epoch": 0.5313921397043975, "grad_norm": 427.13753895045767, "learning_rate": 9.163322929020476e-06, "loss": 331.2961, "step": 27630 }, { "epoch": 0.5315844640401574, "grad_norm": 424.02002431219785, "learning_rate": 9.157241342007428e-06, "loss": 337.6597, "step": 27640 }, { "epoch": 0.5317767883759171, "grad_norm": 425.14340527355495, "learning_rate": 9.15116006890806e-06, "loss": 341.9425, "step": 27650 }, { "epoch": 0.531969112711677, "grad_norm": 470.85583914878305, "learning_rate": 9.145079111987552e-06, "loss": 341.8651, "step": 27660 }, { "epoch": 0.5321614370474368, "grad_norm": 425.60658910311287, "learning_rate": 9.138998473510953e-06, "loss": 334.6015, "step": 27670 }, { "epoch": 0.5323537613831967, "grad_norm": 423.49096563894926, "learning_rate": 9.1329181557432e-06, "loss": 341.0399, "step": 27680 }, { "epoch": 0.5325460857189565, "grad_norm": 438.7966182790001, "learning_rate": 9.126838160949119e-06, "loss": 338.1275, "step": 27690 }, { "epoch": 0.5327384100547162, "grad_norm": 436.2738776572443, "learning_rate": 9.120758491393402e-06, "loss": 345.8831, "step": 27700 }, { "epoch": 0.5329307343904761, "grad_norm": 490.6268343034542, "learning_rate": 9.114679149340623e-06, "loss": 347.0099, "step": 27710 }, { "epoch": 0.5331230587262359, "grad_norm": 441.4693682034415, "learning_rate": 9.10860013705524e-06, "loss": 337.0759, "step": 27720 }, { "epoch": 0.5333153830619958, "grad_norm": 414.15192782864995, "learning_rate": 9.102521456801582e-06, "loss": 330.4536, "step": 27730 }, { "epoch": 0.5335077073977555, "grad_norm": 434.794170602652, "learning_rate": 9.096443110843864e-06, "loss": 335.0601, "step": 27740 }, { "epoch": 0.5337000317335154, "grad_norm": 447.43551757004855, "learning_rate": 9.090365101446157e-06, "loss": 337.0183, "step": 27750 }, { "epoch": 0.5338923560692752, "grad_norm": 446.93597862565804, "learning_rate": 9.084287430872435e-06, "loss": 338.7575, "step": 27760 }, { "epoch": 0.5340846804050351, "grad_norm": 481.3634539599402, "learning_rate": 9.078210101386518e-06, "loss": 333.1981, "step": 27770 }, { "epoch": 0.5342770047407949, "grad_norm": 447.3149994843473, "learning_rate": 9.072133115252113e-06, "loss": 336.9669, "step": 27780 }, { "epoch": 0.5344693290765548, "grad_norm": 429.3612659469217, "learning_rate": 9.066056474732798e-06, "loss": 336.0402, "step": 27790 }, { "epoch": 0.5346616534123145, "grad_norm": 414.5918973974524, "learning_rate": 9.059980182092022e-06, "loss": 331.5065, "step": 27800 }, { "epoch": 0.5348539777480743, "grad_norm": 417.84237302182044, "learning_rate": 9.053904239593106e-06, "loss": 333.6915, "step": 27810 }, { "epoch": 0.5350463020838342, "grad_norm": 446.77193543813934, "learning_rate": 9.047828649499236e-06, "loss": 338.6586, "step": 27820 }, { "epoch": 0.535238626419594, "grad_norm": 444.4986007671738, "learning_rate": 9.041753414073463e-06, "loss": 334.6368, "step": 27830 }, { "epoch": 0.5354309507553539, "grad_norm": 485.7434297371959, "learning_rate": 9.035678535578723e-06, "loss": 334.8524, "step": 27840 }, { "epoch": 0.5356232750911136, "grad_norm": 500.19237962256324, "learning_rate": 9.029604016277798e-06, "loss": 338.1684, "step": 27850 }, { "epoch": 0.5358155994268735, "grad_norm": 461.7583356155207, "learning_rate": 9.02352985843335e-06, "loss": 345.0043, "step": 27860 }, { "epoch": 0.5360079237626333, "grad_norm": 437.61452419759644, "learning_rate": 9.017456064307904e-06, "loss": 338.4087, "step": 27870 }, { "epoch": 0.5362002480983932, "grad_norm": 438.8446878519873, "learning_rate": 9.01138263616385e-06, "loss": 339.3034, "step": 27880 }, { "epoch": 0.536392572434153, "grad_norm": 461.63840676842267, "learning_rate": 9.005309576263436e-06, "loss": 334.8043, "step": 27890 }, { "epoch": 0.5365848967699128, "grad_norm": 461.7534268183935, "learning_rate": 8.999236886868772e-06, "loss": 329.6729, "step": 27900 }, { "epoch": 0.5367772211056726, "grad_norm": 407.3645047384184, "learning_rate": 8.993164570241844e-06, "loss": 332.8074, "step": 27910 }, { "epoch": 0.5369695454414324, "grad_norm": 425.52965102653695, "learning_rate": 8.987092628644483e-06, "loss": 335.9697, "step": 27920 }, { "epoch": 0.5371618697771923, "grad_norm": 418.56227043833223, "learning_rate": 8.981021064338388e-06, "loss": 336.1161, "step": 27930 }, { "epoch": 0.537354194112952, "grad_norm": 461.23536225491625, "learning_rate": 8.974949879585118e-06, "loss": 339.2944, "step": 27940 }, { "epoch": 0.5375465184487119, "grad_norm": 485.3516253607373, "learning_rate": 8.968879076646093e-06, "loss": 334.7745, "step": 27950 }, { "epoch": 0.5377388427844717, "grad_norm": 421.53071117785976, "learning_rate": 8.96280865778258e-06, "loss": 335.2337, "step": 27960 }, { "epoch": 0.5379311671202316, "grad_norm": 430.1855333776133, "learning_rate": 8.956738625255709e-06, "loss": 342.5138, "step": 27970 }, { "epoch": 0.5381234914559914, "grad_norm": 475.16221216607164, "learning_rate": 8.950668981326473e-06, "loss": 333.5263, "step": 27980 }, { "epoch": 0.5383158157917513, "grad_norm": 511.645706997364, "learning_rate": 8.94459972825571e-06, "loss": 335.4495, "step": 27990 }, { "epoch": 0.538508140127511, "grad_norm": 410.51887273755403, "learning_rate": 8.938530868304121e-06, "loss": 332.1829, "step": 28000 }, { "epoch": 0.5387004644632709, "grad_norm": 437.6134356795408, "learning_rate": 8.932462403732248e-06, "loss": 334.7665, "step": 28010 }, { "epoch": 0.5388927887990307, "grad_norm": 428.4209467792851, "learning_rate": 8.926394336800502e-06, "loss": 343.598, "step": 28020 }, { "epoch": 0.5390851131347905, "grad_norm": 451.84650321673274, "learning_rate": 8.920326669769134e-06, "loss": 343.8959, "step": 28030 }, { "epoch": 0.5392774374705503, "grad_norm": 518.4843242374923, "learning_rate": 8.914259404898247e-06, "loss": 337.2625, "step": 28040 }, { "epoch": 0.5394697618063101, "grad_norm": 416.23432604720193, "learning_rate": 8.908192544447803e-06, "loss": 335.3577, "step": 28050 }, { "epoch": 0.53966208614207, "grad_norm": 426.30592660350817, "learning_rate": 8.902126090677605e-06, "loss": 324.2099, "step": 28060 }, { "epoch": 0.5398544104778298, "grad_norm": 402.30371872416225, "learning_rate": 8.896060045847305e-06, "loss": 337.4351, "step": 28070 }, { "epoch": 0.5400467348135897, "grad_norm": 456.36624915387966, "learning_rate": 8.889994412216403e-06, "loss": 333.0561, "step": 28080 }, { "epoch": 0.5402390591493494, "grad_norm": 412.24727874851567, "learning_rate": 8.883929192044254e-06, "loss": 331.1373, "step": 28090 }, { "epoch": 0.5404313834851093, "grad_norm": 380.62541716075714, "learning_rate": 8.877864387590049e-06, "loss": 340.2437, "step": 28100 }, { "epoch": 0.5406237078208691, "grad_norm": 440.1228835952414, "learning_rate": 8.871800001112822e-06, "loss": 338.534, "step": 28110 }, { "epoch": 0.540816032156629, "grad_norm": 480.08817067322906, "learning_rate": 8.865736034871468e-06, "loss": 345.7047, "step": 28120 }, { "epoch": 0.5410083564923888, "grad_norm": 471.61473402532386, "learning_rate": 8.859672491124706e-06, "loss": 338.4267, "step": 28130 }, { "epoch": 0.5412006808281485, "grad_norm": 432.3467408308856, "learning_rate": 8.853609372131105e-06, "loss": 341.381, "step": 28140 }, { "epoch": 0.5413930051639084, "grad_norm": 443.80785402538953, "learning_rate": 8.84754668014908e-06, "loss": 335.8323, "step": 28150 }, { "epoch": 0.5415853294996682, "grad_norm": 439.0447745842612, "learning_rate": 8.841484417436886e-06, "loss": 334.076, "step": 28160 }, { "epoch": 0.5417776538354281, "grad_norm": 422.7650893979843, "learning_rate": 8.835422586252613e-06, "loss": 347.1306, "step": 28170 }, { "epoch": 0.5419699781711879, "grad_norm": 421.6508770064547, "learning_rate": 8.829361188854194e-06, "loss": 321.6093, "step": 28180 }, { "epoch": 0.5421623025069477, "grad_norm": 427.31250226236494, "learning_rate": 8.823300227499393e-06, "loss": 333.0436, "step": 28190 }, { "epoch": 0.5423546268427075, "grad_norm": 468.52047744918286, "learning_rate": 8.817239704445827e-06, "loss": 340.8722, "step": 28200 }, { "epoch": 0.5425469511784674, "grad_norm": 504.0240027744351, "learning_rate": 8.811179621950937e-06, "loss": 330.1855, "step": 28210 }, { "epoch": 0.5427392755142272, "grad_norm": 478.89209220649013, "learning_rate": 8.805119982272001e-06, "loss": 337.9279, "step": 28220 }, { "epoch": 0.5429315998499871, "grad_norm": 423.3389409426329, "learning_rate": 8.799060787666142e-06, "loss": 340.6609, "step": 28230 }, { "epoch": 0.5431239241857468, "grad_norm": 393.1420448334663, "learning_rate": 8.793002040390304e-06, "loss": 339.8627, "step": 28240 }, { "epoch": 0.5433162485215066, "grad_norm": 414.86516448883225, "learning_rate": 8.786943742701273e-06, "loss": 334.7134, "step": 28250 }, { "epoch": 0.5435085728572665, "grad_norm": 412.89993580484315, "learning_rate": 8.780885896855659e-06, "loss": 329.9108, "step": 28260 }, { "epoch": 0.5437008971930263, "grad_norm": 481.13998132736816, "learning_rate": 8.774828505109918e-06, "loss": 345.4943, "step": 28270 }, { "epoch": 0.5438932215287862, "grad_norm": 500.99070497146204, "learning_rate": 8.768771569720324e-06, "loss": 336.8532, "step": 28280 }, { "epoch": 0.5440855458645459, "grad_norm": 453.980302741379, "learning_rate": 8.762715092942983e-06, "loss": 347.41, "step": 28290 }, { "epoch": 0.5442778702003058, "grad_norm": 409.65322457432677, "learning_rate": 8.756659077033838e-06, "loss": 335.5657, "step": 28300 }, { "epoch": 0.5444701945360656, "grad_norm": 414.8686596089426, "learning_rate": 8.750603524248653e-06, "loss": 347.6419, "step": 28310 }, { "epoch": 0.5446625188718255, "grad_norm": 421.86740367267794, "learning_rate": 8.744548436843021e-06, "loss": 340.6217, "step": 28320 }, { "epoch": 0.5448548432075853, "grad_norm": 482.2323568937171, "learning_rate": 8.738493817072359e-06, "loss": 338.5933, "step": 28330 }, { "epoch": 0.5450471675433451, "grad_norm": 467.3871180532147, "learning_rate": 8.73243966719192e-06, "loss": 339.4357, "step": 28340 }, { "epoch": 0.5452394918791049, "grad_norm": 401.4908893834571, "learning_rate": 8.726385989456764e-06, "loss": 334.9473, "step": 28350 }, { "epoch": 0.5454318162148647, "grad_norm": 433.4794396308252, "learning_rate": 8.7203327861218e-06, "loss": 339.971, "step": 28360 }, { "epoch": 0.5456241405506246, "grad_norm": 441.0387351790818, "learning_rate": 8.71428005944173e-06, "loss": 337.72, "step": 28370 }, { "epoch": 0.5458164648863844, "grad_norm": 428.13136384157383, "learning_rate": 8.708227811671112e-06, "loss": 333.3676, "step": 28380 }, { "epoch": 0.5460087892221442, "grad_norm": 442.5879824465687, "learning_rate": 8.702176045064296e-06, "loss": 336.1903, "step": 28390 }, { "epoch": 0.546201113557904, "grad_norm": 422.39172087490203, "learning_rate": 8.696124761875467e-06, "loss": 339.8283, "step": 28400 }, { "epoch": 0.5463934378936639, "grad_norm": 434.0395528927452, "learning_rate": 8.690073964358635e-06, "loss": 343.2911, "step": 28410 }, { "epoch": 0.5465857622294237, "grad_norm": 463.67618687858186, "learning_rate": 8.684023654767613e-06, "loss": 337.2746, "step": 28420 }, { "epoch": 0.5467780865651836, "grad_norm": 419.11976645542535, "learning_rate": 8.677973835356048e-06, "loss": 329.8752, "step": 28430 }, { "epoch": 0.5469704109009433, "grad_norm": 412.4573191019436, "learning_rate": 8.671924508377392e-06, "loss": 330.0221, "step": 28440 }, { "epoch": 0.5471627352367032, "grad_norm": 421.7986683894783, "learning_rate": 8.665875676084927e-06, "loss": 333.1989, "step": 28450 }, { "epoch": 0.547355059572463, "grad_norm": 454.27721239662213, "learning_rate": 8.659827340731738e-06, "loss": 340.4138, "step": 28460 }, { "epoch": 0.5475473839082228, "grad_norm": 448.56329087880783, "learning_rate": 8.653779504570728e-06, "loss": 337.8382, "step": 28470 }, { "epoch": 0.5477397082439827, "grad_norm": 455.7554211130853, "learning_rate": 8.647732169854622e-06, "loss": 338.9848, "step": 28480 }, { "epoch": 0.5479320325797424, "grad_norm": 425.8428844021126, "learning_rate": 8.641685338835947e-06, "loss": 335.3184, "step": 28490 }, { "epoch": 0.5481243569155023, "grad_norm": 418.6209147011269, "learning_rate": 8.635639013767053e-06, "loss": 338.6773, "step": 28500 }, { "epoch": 0.5483166812512621, "grad_norm": 454.39227518125404, "learning_rate": 8.629593196900088e-06, "loss": 334.4415, "step": 28510 }, { "epoch": 0.548509005587022, "grad_norm": 443.6295486310688, "learning_rate": 8.62354789048703e-06, "loss": 335.5656, "step": 28520 }, { "epoch": 0.5487013299227818, "grad_norm": 455.3809299234011, "learning_rate": 8.617503096779648e-06, "loss": 344.2566, "step": 28530 }, { "epoch": 0.5488936542585416, "grad_norm": 432.61163492997537, "learning_rate": 8.61145881802953e-06, "loss": 338.4444, "step": 28540 }, { "epoch": 0.5490859785943014, "grad_norm": 416.04342025552677, "learning_rate": 8.605415056488067e-06, "loss": 337.1958, "step": 28550 }, { "epoch": 0.5492783029300613, "grad_norm": 433.7150243152945, "learning_rate": 8.599371814406465e-06, "loss": 328.8556, "step": 28560 }, { "epoch": 0.5494706272658211, "grad_norm": 430.8442735669157, "learning_rate": 8.59332909403573e-06, "loss": 362.964, "step": 28570 }, { "epoch": 0.5496629516015809, "grad_norm": 465.53509833606705, "learning_rate": 8.587286897626672e-06, "loss": 335.1742, "step": 28580 }, { "epoch": 0.5498552759373407, "grad_norm": 427.74232433655965, "learning_rate": 8.581245227429918e-06, "loss": 340.6384, "step": 28590 }, { "epoch": 0.5500476002731005, "grad_norm": 436.3727526506871, "learning_rate": 8.575204085695887e-06, "loss": 327.1928, "step": 28600 }, { "epoch": 0.5502399246088604, "grad_norm": 423.15984783366486, "learning_rate": 8.5691634746748e-06, "loss": 341.7284, "step": 28610 }, { "epoch": 0.5504322489446202, "grad_norm": 418.5620886294004, "learning_rate": 8.563123396616683e-06, "loss": 328.5208, "step": 28620 }, { "epoch": 0.5506245732803801, "grad_norm": 434.9170721520363, "learning_rate": 8.557083853771377e-06, "loss": 333.2414, "step": 28630 }, { "epoch": 0.5508168976161398, "grad_norm": 470.2420065683713, "learning_rate": 8.551044848388502e-06, "loss": 338.3116, "step": 28640 }, { "epoch": 0.5510092219518997, "grad_norm": 431.9839568196808, "learning_rate": 8.545006382717487e-06, "loss": 336.1055, "step": 28650 }, { "epoch": 0.5512015462876595, "grad_norm": 439.70073126859603, "learning_rate": 8.538968459007569e-06, "loss": 346.576, "step": 28660 }, { "epoch": 0.5513938706234194, "grad_norm": 407.1652728810677, "learning_rate": 8.532931079507772e-06, "loss": 335.7871, "step": 28670 }, { "epoch": 0.5515861949591792, "grad_norm": 430.1923176470468, "learning_rate": 8.526894246466916e-06, "loss": 333.8787, "step": 28680 }, { "epoch": 0.551778519294939, "grad_norm": 405.54655513701874, "learning_rate": 8.520857962133623e-06, "loss": 332.1585, "step": 28690 }, { "epoch": 0.5519708436306988, "grad_norm": 448.57622451269, "learning_rate": 8.514822228756311e-06, "loss": 340.4178, "step": 28700 }, { "epoch": 0.5521631679664586, "grad_norm": 412.2554033897965, "learning_rate": 8.508787048583191e-06, "loss": 335.7281, "step": 28710 }, { "epoch": 0.5523554923022185, "grad_norm": 436.5841673392677, "learning_rate": 8.502752423862264e-06, "loss": 340.6044, "step": 28720 }, { "epoch": 0.5525478166379783, "grad_norm": 417.799958675462, "learning_rate": 8.496718356841335e-06, "loss": 329.6618, "step": 28730 }, { "epoch": 0.5527401409737381, "grad_norm": 418.7008634354597, "learning_rate": 8.49068484976799e-06, "loss": 336.898, "step": 28740 }, { "epoch": 0.5529324653094979, "grad_norm": 424.53243014044205, "learning_rate": 8.484651904889614e-06, "loss": 331.572, "step": 28750 }, { "epoch": 0.5531247896452578, "grad_norm": 470.03338812860176, "learning_rate": 8.478619524453369e-06, "loss": 332.2833, "step": 28760 }, { "epoch": 0.5533171139810176, "grad_norm": 446.2105367755079, "learning_rate": 8.472587710706232e-06, "loss": 329.3469, "step": 28770 }, { "epoch": 0.5535094383167775, "grad_norm": 417.8468134694729, "learning_rate": 8.466556465894942e-06, "loss": 337.4566, "step": 28780 }, { "epoch": 0.5537017626525372, "grad_norm": 454.18677070536796, "learning_rate": 8.460525792266046e-06, "loss": 342.5244, "step": 28790 }, { "epoch": 0.5538940869882971, "grad_norm": 433.468099090712, "learning_rate": 8.454495692065862e-06, "loss": 346.3313, "step": 28800 }, { "epoch": 0.5540864113240569, "grad_norm": 440.8969137114263, "learning_rate": 8.448466167540514e-06, "loss": 330.4774, "step": 28810 }, { "epoch": 0.5542787356598167, "grad_norm": 430.8910401230213, "learning_rate": 8.442437220935893e-06, "loss": 346.404, "step": 28820 }, { "epoch": 0.5544710599955766, "grad_norm": 420.68848721767137, "learning_rate": 8.436408854497679e-06, "loss": 338.6633, "step": 28830 }, { "epoch": 0.5546633843313363, "grad_norm": 447.5909695442307, "learning_rate": 8.430381070471348e-06, "loss": 339.4675, "step": 28840 }, { "epoch": 0.5548557086670962, "grad_norm": 458.24278042564714, "learning_rate": 8.424353871102144e-06, "loss": 330.7136, "step": 28850 }, { "epoch": 0.555048033002856, "grad_norm": 463.9529781575977, "learning_rate": 8.4183272586351e-06, "loss": 337.8066, "step": 28860 }, { "epoch": 0.5552403573386159, "grad_norm": 451.15439965981165, "learning_rate": 8.412301235315026e-06, "loss": 347.0402, "step": 28870 }, { "epoch": 0.5554326816743757, "grad_norm": 468.96241894049217, "learning_rate": 8.406275803386525e-06, "loss": 338.5047, "step": 28880 }, { "epoch": 0.5556250060101355, "grad_norm": 432.69849751099804, "learning_rate": 8.400250965093968e-06, "loss": 337.7101, "step": 28890 }, { "epoch": 0.5558173303458953, "grad_norm": 464.24731570275924, "learning_rate": 8.394226722681498e-06, "loss": 341.856, "step": 28900 }, { "epoch": 0.5560096546816552, "grad_norm": 474.9783446731229, "learning_rate": 8.38820307839306e-06, "loss": 334.5956, "step": 28910 }, { "epoch": 0.556201979017415, "grad_norm": 502.81647467693927, "learning_rate": 8.382180034472353e-06, "loss": 325.1009, "step": 28920 }, { "epoch": 0.5563943033531747, "grad_norm": 414.34753733509274, "learning_rate": 8.376157593162867e-06, "loss": 329.5131, "step": 28930 }, { "epoch": 0.5565866276889346, "grad_norm": 462.21592865856906, "learning_rate": 8.370135756707853e-06, "loss": 336.8699, "step": 28940 }, { "epoch": 0.5567789520246944, "grad_norm": 502.0915206721188, "learning_rate": 8.364114527350357e-06, "loss": 333.491, "step": 28950 }, { "epoch": 0.5569712763604543, "grad_norm": 434.6518119218972, "learning_rate": 8.358093907333182e-06, "loss": 329.48, "step": 28960 }, { "epoch": 0.5571636006962141, "grad_norm": 462.289336722964, "learning_rate": 8.35207389889891e-06, "loss": 339.1186, "step": 28970 }, { "epoch": 0.557355925031974, "grad_norm": 455.06554300250497, "learning_rate": 8.346054504289888e-06, "loss": 339.4001, "step": 28980 }, { "epoch": 0.5575482493677337, "grad_norm": 440.7847408197368, "learning_rate": 8.34003572574825e-06, "loss": 331.0541, "step": 28990 }, { "epoch": 0.5577405737034936, "grad_norm": 419.28611684688883, "learning_rate": 8.334017565515892e-06, "loss": 336.4699, "step": 29000 }, { "epoch": 0.5579328980392534, "grad_norm": 440.0149818815394, "learning_rate": 8.328000025834472e-06, "loss": 323.6228, "step": 29010 }, { "epoch": 0.5581252223750133, "grad_norm": 463.7826318548719, "learning_rate": 8.321983108945431e-06, "loss": 335.2554, "step": 29020 }, { "epoch": 0.558317546710773, "grad_norm": 467.4520094011459, "learning_rate": 8.315966817089972e-06, "loss": 344.2589, "step": 29030 }, { "epoch": 0.5585098710465328, "grad_norm": 442.478723335083, "learning_rate": 8.309951152509057e-06, "loss": 334.4404, "step": 29040 }, { "epoch": 0.5587021953822927, "grad_norm": 405.7022535682226, "learning_rate": 8.303936117443422e-06, "loss": 343.2435, "step": 29050 }, { "epoch": 0.5588945197180525, "grad_norm": 407.3883669865574, "learning_rate": 8.297921714133576e-06, "loss": 338.1101, "step": 29060 }, { "epoch": 0.5590868440538124, "grad_norm": 470.5200609320266, "learning_rate": 8.291907944819782e-06, "loss": 330.1531, "step": 29070 }, { "epoch": 0.5592791683895721, "grad_norm": 506.1059406661888, "learning_rate": 8.285894811742065e-06, "loss": 322.5621, "step": 29080 }, { "epoch": 0.559471492725332, "grad_norm": 431.05327168838556, "learning_rate": 8.279882317140224e-06, "loss": 328.4875, "step": 29090 }, { "epoch": 0.5596638170610918, "grad_norm": 424.87231380658955, "learning_rate": 8.273870463253813e-06, "loss": 335.0859, "step": 29100 }, { "epoch": 0.5598561413968517, "grad_norm": 459.6517685777888, "learning_rate": 8.267859252322144e-06, "loss": 339.4594, "step": 29110 }, { "epoch": 0.5600484657326115, "grad_norm": 462.0607954601287, "learning_rate": 8.261848686584293e-06, "loss": 329.8024, "step": 29120 }, { "epoch": 0.5602407900683714, "grad_norm": 452.9882794501342, "learning_rate": 8.255838768279106e-06, "loss": 332.184, "step": 29130 }, { "epoch": 0.5604331144041311, "grad_norm": 447.47040112793513, "learning_rate": 8.249829499645167e-06, "loss": 334.8988, "step": 29140 }, { "epoch": 0.5606254387398909, "grad_norm": 458.17452602336243, "learning_rate": 8.243820882920837e-06, "loss": 327.148, "step": 29150 }, { "epoch": 0.5608177630756508, "grad_norm": 487.41883874216603, "learning_rate": 8.23781292034422e-06, "loss": 333.2591, "step": 29160 }, { "epoch": 0.5610100874114106, "grad_norm": 423.82459507322125, "learning_rate": 8.231805614153192e-06, "loss": 331.0956, "step": 29170 }, { "epoch": 0.5612024117471704, "grad_norm": 442.5060143198106, "learning_rate": 8.22579896658537e-06, "loss": 330.2205, "step": 29180 }, { "epoch": 0.5613947360829302, "grad_norm": 426.49906324196724, "learning_rate": 8.219792979878126e-06, "loss": 327.4737, "step": 29190 }, { "epoch": 0.5615870604186901, "grad_norm": 422.81930245385394, "learning_rate": 8.213787656268599e-06, "loss": 338.3242, "step": 29200 }, { "epoch": 0.5617793847544499, "grad_norm": 462.3170358537627, "learning_rate": 8.20778299799367e-06, "loss": 334.1542, "step": 29210 }, { "epoch": 0.5619717090902098, "grad_norm": 437.1471185988838, "learning_rate": 8.201779007289975e-06, "loss": 331.3995, "step": 29220 }, { "epoch": 0.5621640334259695, "grad_norm": 487.7957088093334, "learning_rate": 8.195775686393898e-06, "loss": 344.2795, "step": 29230 }, { "epoch": 0.5623563577617294, "grad_norm": 478.33005089591705, "learning_rate": 8.189773037541585e-06, "loss": 336.6861, "step": 29240 }, { "epoch": 0.5625486820974892, "grad_norm": 422.10092133872394, "learning_rate": 8.183771062968917e-06, "loss": 340.3007, "step": 29250 }, { "epoch": 0.562741006433249, "grad_norm": 474.0324858154835, "learning_rate": 8.177769764911528e-06, "loss": 336.0106, "step": 29260 }, { "epoch": 0.5629333307690089, "grad_norm": 434.3888681914373, "learning_rate": 8.171769145604812e-06, "loss": 335.4993, "step": 29270 }, { "epoch": 0.5631256551047686, "grad_norm": 408.68330738599025, "learning_rate": 8.165769207283891e-06, "loss": 329.405, "step": 29280 }, { "epoch": 0.5633179794405285, "grad_norm": 467.13611665112876, "learning_rate": 8.15976995218365e-06, "loss": 334.0891, "step": 29290 }, { "epoch": 0.5635103037762883, "grad_norm": 409.14244844029383, "learning_rate": 8.153771382538706e-06, "loss": 324.47, "step": 29300 }, { "epoch": 0.5637026281120482, "grad_norm": 432.7894556210294, "learning_rate": 8.147773500583434e-06, "loss": 335.9282, "step": 29310 }, { "epoch": 0.563894952447808, "grad_norm": 417.8412144481969, "learning_rate": 8.141776308551942e-06, "loss": 332.9616, "step": 29320 }, { "epoch": 0.5640872767835678, "grad_norm": 454.71876988862135, "learning_rate": 8.135779808678084e-06, "loss": 337.533, "step": 29330 }, { "epoch": 0.5642796011193276, "grad_norm": 423.2581077297414, "learning_rate": 8.129784003195458e-06, "loss": 324.7285, "step": 29340 }, { "epoch": 0.5644719254550875, "grad_norm": 441.9834783817504, "learning_rate": 8.123788894337405e-06, "loss": 332.1972, "step": 29350 }, { "epoch": 0.5646642497908473, "grad_norm": 434.08298623716075, "learning_rate": 8.117794484337003e-06, "loss": 332.6274, "step": 29360 }, { "epoch": 0.5648565741266071, "grad_norm": 453.8837857720592, "learning_rate": 8.111800775427066e-06, "loss": 332.9875, "step": 29370 }, { "epoch": 0.5650488984623669, "grad_norm": 443.85682117737525, "learning_rate": 8.10580776984016e-06, "loss": 332.123, "step": 29380 }, { "epoch": 0.5652412227981267, "grad_norm": 400.4770174997833, "learning_rate": 8.099815469808573e-06, "loss": 332.0791, "step": 29390 }, { "epoch": 0.5654335471338866, "grad_norm": 421.0863326202375, "learning_rate": 8.093823877564343e-06, "loss": 332.9803, "step": 29400 }, { "epoch": 0.5656258714696464, "grad_norm": 460.39039016403785, "learning_rate": 8.087832995339236e-06, "loss": 331.3364, "step": 29410 }, { "epoch": 0.5658181958054063, "grad_norm": 436.94490182003267, "learning_rate": 8.081842825364756e-06, "loss": 325.6589, "step": 29420 }, { "epoch": 0.566010520141166, "grad_norm": 435.7743717292608, "learning_rate": 8.075853369872149e-06, "loss": 332.4927, "step": 29430 }, { "epoch": 0.5662028444769259, "grad_norm": 424.79688028392485, "learning_rate": 8.069864631092377e-06, "loss": 332.5448, "step": 29440 }, { "epoch": 0.5663951688126857, "grad_norm": 411.1753534629818, "learning_rate": 8.063876611256158e-06, "loss": 335.4941, "step": 29450 }, { "epoch": 0.5665874931484456, "grad_norm": 401.227188464867, "learning_rate": 8.057889312593924e-06, "loss": 340.2273, "step": 29460 }, { "epoch": 0.5667798174842054, "grad_norm": 3810.649817924996, "learning_rate": 8.051902737335847e-06, "loss": 347.7162, "step": 29470 }, { "epoch": 0.5669721418199651, "grad_norm": 438.02312995201396, "learning_rate": 8.045916887711822e-06, "loss": 337.5929, "step": 29480 }, { "epoch": 0.567164466155725, "grad_norm": 448.4780252859747, "learning_rate": 8.039931765951485e-06, "loss": 345.2213, "step": 29490 }, { "epoch": 0.5673567904914848, "grad_norm": 571.3342952328112, "learning_rate": 8.033947374284196e-06, "loss": 329.6985, "step": 29500 }, { "epoch": 0.5675491148272447, "grad_norm": 439.33405526116576, "learning_rate": 8.02796371493904e-06, "loss": 338.3913, "step": 29510 }, { "epoch": 0.5677414391630045, "grad_norm": 455.9108263313518, "learning_rate": 8.021980790144828e-06, "loss": 324.158, "step": 29520 }, { "epoch": 0.5679337634987643, "grad_norm": 412.031586350393, "learning_rate": 8.015998602130107e-06, "loss": 329.6155, "step": 29530 }, { "epoch": 0.5681260878345241, "grad_norm": 441.49365493333977, "learning_rate": 8.01001715312314e-06, "loss": 326.0366, "step": 29540 }, { "epoch": 0.568318412170284, "grad_norm": 420.2265928528216, "learning_rate": 8.00403644535191e-06, "loss": 331.1729, "step": 29550 }, { "epoch": 0.5685107365060438, "grad_norm": 427.4948133929633, "learning_rate": 7.998056481044146e-06, "loss": 336.6901, "step": 29560 }, { "epoch": 0.5687030608418037, "grad_norm": 506.5082940779469, "learning_rate": 7.992077262427282e-06, "loss": 343.8321, "step": 29570 }, { "epoch": 0.5688953851775634, "grad_norm": 434.22758575124624, "learning_rate": 7.986098791728475e-06, "loss": 330.4786, "step": 29580 }, { "epoch": 0.5690877095133232, "grad_norm": 421.47017215140454, "learning_rate": 7.9801210711746e-06, "loss": 337.844, "step": 29590 }, { "epoch": 0.5692800338490831, "grad_norm": 418.32667221583915, "learning_rate": 7.974144102992273e-06, "loss": 330.2382, "step": 29600 }, { "epoch": 0.5694723581848429, "grad_norm": 512.6053370121507, "learning_rate": 7.968167889407813e-06, "loss": 328.4573, "step": 29610 }, { "epoch": 0.5696646825206028, "grad_norm": 438.324798620883, "learning_rate": 7.96219243264725e-06, "loss": 335.7818, "step": 29620 }, { "epoch": 0.5698570068563625, "grad_norm": 455.7128143073738, "learning_rate": 7.956217734936353e-06, "loss": 330.2597, "step": 29630 }, { "epoch": 0.5700493311921224, "grad_norm": 430.46145668708493, "learning_rate": 7.950243798500593e-06, "loss": 326.9545, "step": 29640 }, { "epoch": 0.5702416555278822, "grad_norm": 441.55785457803626, "learning_rate": 7.94427062556517e-06, "loss": 336.7078, "step": 29650 }, { "epoch": 0.5704339798636421, "grad_norm": 407.74719322133365, "learning_rate": 7.938298218354985e-06, "loss": 329.5589, "step": 29660 }, { "epoch": 0.5706263041994019, "grad_norm": 456.31055397247604, "learning_rate": 7.932326579094665e-06, "loss": 336.0979, "step": 29670 }, { "epoch": 0.5708186285351617, "grad_norm": 450.69316985112437, "learning_rate": 7.926355710008545e-06, "loss": 332.5213, "step": 29680 }, { "epoch": 0.5710109528709215, "grad_norm": 422.9822101382185, "learning_rate": 7.920385613320675e-06, "loss": 326.7693, "step": 29690 }, { "epoch": 0.5712032772066813, "grad_norm": 472.8179066379729, "learning_rate": 7.914416291254817e-06, "loss": 322.3947, "step": 29700 }, { "epoch": 0.5713956015424412, "grad_norm": 420.4229825097595, "learning_rate": 7.908447746034447e-06, "loss": 325.7185, "step": 29710 }, { "epoch": 0.571587925878201, "grad_norm": 449.5150610677103, "learning_rate": 7.902479979882749e-06, "loss": 341.1461, "step": 29720 }, { "epoch": 0.5717802502139608, "grad_norm": 499.1224914926725, "learning_rate": 7.896512995022614e-06, "loss": 334.9708, "step": 29730 }, { "epoch": 0.5719725745497206, "grad_norm": 590.0282859279401, "learning_rate": 7.890546793676652e-06, "loss": 330.1338, "step": 29740 }, { "epoch": 0.5721648988854805, "grad_norm": 454.7010902208668, "learning_rate": 7.884581378067171e-06, "loss": 334.7914, "step": 29750 }, { "epoch": 0.5723572232212403, "grad_norm": 418.45804727090473, "learning_rate": 7.878616750416186e-06, "loss": 330.687, "step": 29760 }, { "epoch": 0.5725495475570002, "grad_norm": 458.1393066528645, "learning_rate": 7.872652912945426e-06, "loss": 330.6302, "step": 29770 }, { "epoch": 0.5727418718927599, "grad_norm": 495.0496315737, "learning_rate": 7.866689867876323e-06, "loss": 334.8875, "step": 29780 }, { "epoch": 0.5729341962285198, "grad_norm": 438.2565273464387, "learning_rate": 7.860727617430013e-06, "loss": 330.4333, "step": 29790 }, { "epoch": 0.5731265205642796, "grad_norm": 412.9805257332131, "learning_rate": 7.85476616382733e-06, "loss": 320.4414, "step": 29800 }, { "epoch": 0.5733188449000395, "grad_norm": 483.069705121499, "learning_rate": 7.848805509288824e-06, "loss": 341.8112, "step": 29810 }, { "epoch": 0.5735111692357993, "grad_norm": 398.53307287294416, "learning_rate": 7.84284565603474e-06, "loss": 328.7659, "step": 29820 }, { "epoch": 0.573703493571559, "grad_norm": 455.5459483468834, "learning_rate": 7.83688660628502e-06, "loss": 332.784, "step": 29830 }, { "epoch": 0.5738958179073189, "grad_norm": 490.62416658254017, "learning_rate": 7.830928362259313e-06, "loss": 332.3923, "step": 29840 }, { "epoch": 0.5740881422430787, "grad_norm": 429.79524937179906, "learning_rate": 7.824970926176967e-06, "loss": 334.61, "step": 29850 }, { "epoch": 0.5742804665788386, "grad_norm": 449.1893593883112, "learning_rate": 7.819014300257033e-06, "loss": 329.0212, "step": 29860 }, { "epoch": 0.5744727909145984, "grad_norm": 414.33619573207307, "learning_rate": 7.813058486718252e-06, "loss": 336.6774, "step": 29870 }, { "epoch": 0.5746651152503582, "grad_norm": 439.29259653225347, "learning_rate": 7.80710348777906e-06, "loss": 333.8371, "step": 29880 }, { "epoch": 0.574857439586118, "grad_norm": 428.49964835785215, "learning_rate": 7.801149305657609e-06, "loss": 331.8615, "step": 29890 }, { "epoch": 0.5750497639218779, "grad_norm": 427.2398552866976, "learning_rate": 7.795195942571722e-06, "loss": 342.7597, "step": 29900 }, { "epoch": 0.5752420882576377, "grad_norm": 425.80215562660857, "learning_rate": 7.789243400738934e-06, "loss": 330.5996, "step": 29910 }, { "epoch": 0.5754344125933976, "grad_norm": 406.52357920872674, "learning_rate": 7.783291682376465e-06, "loss": 339.7319, "step": 29920 }, { "epoch": 0.5756267369291573, "grad_norm": 463.2569794056569, "learning_rate": 7.77734078970124e-06, "loss": 340.5203, "step": 29930 }, { "epoch": 0.5758190612649171, "grad_norm": 444.6726578722199, "learning_rate": 7.77139072492986e-06, "loss": 330.1363, "step": 29940 }, { "epoch": 0.576011385600677, "grad_norm": 442.0133493623101, "learning_rate": 7.765441490278625e-06, "loss": 331.1776, "step": 29950 }, { "epoch": 0.5762037099364368, "grad_norm": 447.5851833126177, "learning_rate": 7.759493087963535e-06, "loss": 335.1232, "step": 29960 }, { "epoch": 0.5763960342721967, "grad_norm": 475.53567280781186, "learning_rate": 7.753545520200264e-06, "loss": 335.765, "step": 29970 }, { "epoch": 0.5765883586079564, "grad_norm": 421.54015641946467, "learning_rate": 7.747598789204183e-06, "loss": 329.2269, "step": 29980 }, { "epoch": 0.5767806829437163, "grad_norm": 426.1310802483252, "learning_rate": 7.741652897190355e-06, "loss": 336.0074, "step": 29990 }, { "epoch": 0.5769730072794761, "grad_norm": 427.86259920965443, "learning_rate": 7.735707846373527e-06, "loss": 328.8445, "step": 30000 }, { "epoch": 0.577165331615236, "grad_norm": 428.41995362601057, "learning_rate": 7.72976363896813e-06, "loss": 333.8077, "step": 30010 }, { "epoch": 0.5773576559509958, "grad_norm": 430.2802609969973, "learning_rate": 7.723820277188278e-06, "loss": 336.9656, "step": 30020 }, { "epoch": 0.5775499802867556, "grad_norm": 440.6500255127248, "learning_rate": 7.717877763247787e-06, "loss": 330.8104, "step": 30030 }, { "epoch": 0.5777423046225154, "grad_norm": 472.56819220412467, "learning_rate": 7.711936099360138e-06, "loss": 337.4611, "step": 30040 }, { "epoch": 0.5779346289582752, "grad_norm": 496.4691959227554, "learning_rate": 7.705995287738498e-06, "loss": 330.234, "step": 30050 }, { "epoch": 0.5781269532940351, "grad_norm": 461.17763073437783, "learning_rate": 7.70005533059573e-06, "loss": 334.2353, "step": 30060 }, { "epoch": 0.5783192776297948, "grad_norm": 449.07961400479104, "learning_rate": 7.694116230144368e-06, "loss": 330.7339, "step": 30070 }, { "epoch": 0.5785116019655547, "grad_norm": 442.34392009174167, "learning_rate": 7.688177988596628e-06, "loss": 333.8402, "step": 30080 }, { "epoch": 0.5787039263013145, "grad_norm": 461.90023247640244, "learning_rate": 7.682240608164401e-06, "loss": 324.8395, "step": 30090 }, { "epoch": 0.5788962506370744, "grad_norm": 401.98428729685395, "learning_rate": 7.676304091059273e-06, "loss": 329.8856, "step": 30100 }, { "epoch": 0.5790885749728342, "grad_norm": 442.8307087132358, "learning_rate": 7.670368439492495e-06, "loss": 334.4132, "step": 30110 }, { "epoch": 0.579280899308594, "grad_norm": 438.18728197992647, "learning_rate": 7.664433655674995e-06, "loss": 313.7942, "step": 30120 }, { "epoch": 0.5794732236443538, "grad_norm": 442.9409576515072, "learning_rate": 7.658499741817383e-06, "loss": 340.8581, "step": 30130 }, { "epoch": 0.5796655479801137, "grad_norm": 429.8861541271232, "learning_rate": 7.65256670012995e-06, "loss": 326.3264, "step": 30140 }, { "epoch": 0.5798578723158735, "grad_norm": 467.8462077252699, "learning_rate": 7.646634532822652e-06, "loss": 333.515, "step": 30150 }, { "epoch": 0.5800501966516333, "grad_norm": 459.2481688825721, "learning_rate": 7.64070324210512e-06, "loss": 336.135, "step": 30160 }, { "epoch": 0.5802425209873932, "grad_norm": 430.40511731683125, "learning_rate": 7.634772830186668e-06, "loss": 324.1047, "step": 30170 }, { "epoch": 0.5804348453231529, "grad_norm": 468.6415184680704, "learning_rate": 7.628843299276276e-06, "loss": 339.1803, "step": 30180 }, { "epoch": 0.5806271696589128, "grad_norm": 401.7519481676133, "learning_rate": 7.62291465158259e-06, "loss": 325.6971, "step": 30190 }, { "epoch": 0.5808194939946726, "grad_norm": 415.3591764446833, "learning_rate": 7.616986889313939e-06, "loss": 323.2868, "step": 30200 }, { "epoch": 0.5810118183304325, "grad_norm": 446.6236767599377, "learning_rate": 7.611060014678313e-06, "loss": 329.1341, "step": 30210 }, { "epoch": 0.5812041426661922, "grad_norm": 459.8590181303199, "learning_rate": 7.605134029883381e-06, "loss": 329.813, "step": 30220 }, { "epoch": 0.5813964670019521, "grad_norm": 445.0618593825332, "learning_rate": 7.599208937136465e-06, "loss": 329.2379, "step": 30230 }, { "epoch": 0.5815887913377119, "grad_norm": 485.4991220989977, "learning_rate": 7.593284738644574e-06, "loss": 338.6362, "step": 30240 }, { "epoch": 0.5817811156734718, "grad_norm": 408.2764466326972, "learning_rate": 7.58736143661437e-06, "loss": 330.9625, "step": 30250 }, { "epoch": 0.5819734400092316, "grad_norm": 411.6901003315401, "learning_rate": 7.5814390332521824e-06, "loss": 331.4792, "step": 30260 }, { "epoch": 0.5821657643449913, "grad_norm": 427.6664496008229, "learning_rate": 7.575517530764011e-06, "loss": 336.1827, "step": 30270 }, { "epoch": 0.5823580886807512, "grad_norm": 409.60128877955236, "learning_rate": 7.569596931355517e-06, "loss": 330.2883, "step": 30280 }, { "epoch": 0.582550413016511, "grad_norm": 411.2913629827442, "learning_rate": 7.56367723723203e-06, "loss": 328.3345, "step": 30290 }, { "epoch": 0.5827427373522709, "grad_norm": 441.0386184492986, "learning_rate": 7.557758450598534e-06, "loss": 336.6578, "step": 30300 }, { "epoch": 0.5829350616880307, "grad_norm": 444.7976225529548, "learning_rate": 7.551840573659677e-06, "loss": 333.3413, "step": 30310 }, { "epoch": 0.5831273860237906, "grad_norm": 434.18355514212993, "learning_rate": 7.5459236086197775e-06, "loss": 334.238, "step": 30320 }, { "epoch": 0.5833197103595503, "grad_norm": 412.83184305261153, "learning_rate": 7.5400075576828e-06, "loss": 329.6643, "step": 30330 }, { "epoch": 0.5835120346953102, "grad_norm": 436.73047040176095, "learning_rate": 7.534092423052382e-06, "loss": 329.2323, "step": 30340 }, { "epoch": 0.58370435903107, "grad_norm": 421.70849754147355, "learning_rate": 7.5281782069318075e-06, "loss": 335.3367, "step": 30350 }, { "epoch": 0.5838966833668299, "grad_norm": 417.6416592533216, "learning_rate": 7.522264911524031e-06, "loss": 325.445, "step": 30360 }, { "epoch": 0.5840890077025896, "grad_norm": 466.31279118258874, "learning_rate": 7.516352539031654e-06, "loss": 330.2999, "step": 30370 }, { "epoch": 0.5842813320383494, "grad_norm": 479.3594140106477, "learning_rate": 7.510441091656933e-06, "loss": 330.6483, "step": 30380 }, { "epoch": 0.5844736563741093, "grad_norm": 430.07724722480975, "learning_rate": 7.504530571601792e-06, "loss": 331.6849, "step": 30390 }, { "epoch": 0.5846659807098691, "grad_norm": 498.1720790483102, "learning_rate": 7.498620981067799e-06, "loss": 329.8774, "step": 30400 }, { "epoch": 0.584858305045629, "grad_norm": 461.2205110539114, "learning_rate": 7.492712322256177e-06, "loss": 336.4995, "step": 30410 }, { "epoch": 0.5850506293813887, "grad_norm": 414.40799409005166, "learning_rate": 7.486804597367807e-06, "loss": 326.0727, "step": 30420 }, { "epoch": 0.5852429537171486, "grad_norm": 439.59222443430093, "learning_rate": 7.480897808603219e-06, "loss": 325.2597, "step": 30430 }, { "epoch": 0.5854352780529084, "grad_norm": 419.77136939642673, "learning_rate": 7.474991958162594e-06, "loss": 328.0964, "step": 30440 }, { "epoch": 0.5856276023886683, "grad_norm": 410.85890223519806, "learning_rate": 7.469087048245758e-06, "loss": 326.8105, "step": 30450 }, { "epoch": 0.5858199267244281, "grad_norm": 479.17213907929494, "learning_rate": 7.463183081052201e-06, "loss": 340.601, "step": 30460 }, { "epoch": 0.586012251060188, "grad_norm": 475.3548038233099, "learning_rate": 7.457280058781049e-06, "loss": 335.7268, "step": 30470 }, { "epoch": 0.5862045753959477, "grad_norm": 420.529168022175, "learning_rate": 7.451377983631078e-06, "loss": 340.9229, "step": 30480 }, { "epoch": 0.5863968997317075, "grad_norm": 463.02557577749565, "learning_rate": 7.445476857800717e-06, "loss": 329.5683, "step": 30490 }, { "epoch": 0.5865892240674674, "grad_norm": 430.83495633025575, "learning_rate": 7.439576683488039e-06, "loss": 336.5015, "step": 30500 }, { "epoch": 0.5867815484032272, "grad_norm": 453.3661164215135, "learning_rate": 7.4336774628907604e-06, "loss": 334.8364, "step": 30510 }, { "epoch": 0.586973872738987, "grad_norm": 397.206072454155, "learning_rate": 7.427779198206238e-06, "loss": 324.6285, "step": 30520 }, { "epoch": 0.5871661970747468, "grad_norm": 428.51349209302424, "learning_rate": 7.421881891631487e-06, "loss": 331.0998, "step": 30530 }, { "epoch": 0.5873585214105067, "grad_norm": 428.6108243354417, "learning_rate": 7.415985545363152e-06, "loss": 325.7343, "step": 30540 }, { "epoch": 0.5875508457462665, "grad_norm": 421.9440933291155, "learning_rate": 7.410090161597523e-06, "loss": 328.0761, "step": 30550 }, { "epoch": 0.5877431700820264, "grad_norm": 446.0539710711119, "learning_rate": 7.404195742530533e-06, "loss": 326.7984, "step": 30560 }, { "epoch": 0.5879354944177861, "grad_norm": 464.76931857488046, "learning_rate": 7.398302290357763e-06, "loss": 328.7076, "step": 30570 }, { "epoch": 0.588127818753546, "grad_norm": 464.6798010441902, "learning_rate": 7.392409807274421e-06, "loss": 325.0724, "step": 30580 }, { "epoch": 0.5883201430893058, "grad_norm": 418.15569085958595, "learning_rate": 7.386518295475355e-06, "loss": 331.5551, "step": 30590 }, { "epoch": 0.5885124674250656, "grad_norm": 428.93737309888206, "learning_rate": 7.380627757155065e-06, "loss": 333.5645, "step": 30600 }, { "epoch": 0.5887047917608255, "grad_norm": 443.75068237274616, "learning_rate": 7.374738194507675e-06, "loss": 326.6464, "step": 30610 }, { "epoch": 0.5888971160965852, "grad_norm": 444.4822789658591, "learning_rate": 7.3688496097269494e-06, "loss": 328.3985, "step": 30620 }, { "epoch": 0.5890894404323451, "grad_norm": 484.85193210810803, "learning_rate": 7.362962005006286e-06, "loss": 342.7203, "step": 30630 }, { "epoch": 0.5892817647681049, "grad_norm": 452.9430165062033, "learning_rate": 7.3570753825387275e-06, "loss": 321.6017, "step": 30640 }, { "epoch": 0.5894740891038648, "grad_norm": 417.8218990676746, "learning_rate": 7.35118974451694e-06, "loss": 321.0145, "step": 30650 }, { "epoch": 0.5896664134396246, "grad_norm": 416.3957295283941, "learning_rate": 7.345305093133226e-06, "loss": 325.3449, "step": 30660 }, { "epoch": 0.5898587377753844, "grad_norm": 413.5927335537093, "learning_rate": 7.3394214305795175e-06, "loss": 332.4665, "step": 30670 }, { "epoch": 0.5900510621111442, "grad_norm": 433.5886633728642, "learning_rate": 7.33353875904739e-06, "loss": 327.536, "step": 30680 }, { "epoch": 0.5902433864469041, "grad_norm": 439.71164942072016, "learning_rate": 7.327657080728032e-06, "loss": 324.6934, "step": 30690 }, { "epoch": 0.5904357107826639, "grad_norm": 435.7450282510498, "learning_rate": 7.321776397812279e-06, "loss": 326.868, "step": 30700 }, { "epoch": 0.5906280351184237, "grad_norm": 452.5244934922471, "learning_rate": 7.315896712490584e-06, "loss": 332.5115, "step": 30710 }, { "epoch": 0.5908203594541835, "grad_norm": 447.61515435948786, "learning_rate": 7.310018026953036e-06, "loss": 331.0866, "step": 30720 }, { "epoch": 0.5910126837899433, "grad_norm": 430.70014246441565, "learning_rate": 7.304140343389348e-06, "loss": 329.881, "step": 30730 }, { "epoch": 0.5912050081257032, "grad_norm": 429.9615360622204, "learning_rate": 7.298263663988853e-06, "loss": 336.6074, "step": 30740 }, { "epoch": 0.591397332461463, "grad_norm": 481.5978794481741, "learning_rate": 7.292387990940526e-06, "loss": 318.0565, "step": 30750 }, { "epoch": 0.5915896567972229, "grad_norm": 415.670224469492, "learning_rate": 7.286513326432953e-06, "loss": 328.3108, "step": 30760 }, { "epoch": 0.5917819811329826, "grad_norm": 422.412987484641, "learning_rate": 7.2806396726543526e-06, "loss": 341.2282, "step": 30770 }, { "epoch": 0.5919743054687425, "grad_norm": 465.98049976748166, "learning_rate": 7.2747670317925625e-06, "loss": 333.1395, "step": 30780 }, { "epoch": 0.5921666298045023, "grad_norm": 477.49473669294923, "learning_rate": 7.268895406035046e-06, "loss": 326.3879, "step": 30790 }, { "epoch": 0.5923589541402622, "grad_norm": 456.04511489001106, "learning_rate": 7.263024797568884e-06, "loss": 337.6323, "step": 30800 }, { "epoch": 0.592551278476022, "grad_norm": 400.7959342469601, "learning_rate": 7.257155208580778e-06, "loss": 328.681, "step": 30810 }, { "epoch": 0.5927436028117817, "grad_norm": 419.09008314305464, "learning_rate": 7.251286641257062e-06, "loss": 333.4711, "step": 30820 }, { "epoch": 0.5929359271475416, "grad_norm": 431.7195446667333, "learning_rate": 7.245419097783674e-06, "loss": 339.5082, "step": 30830 }, { "epoch": 0.5931282514833014, "grad_norm": 420.27025602517847, "learning_rate": 7.239552580346181e-06, "loss": 319.8995, "step": 30840 }, { "epoch": 0.5933205758190613, "grad_norm": 440.9464286333631, "learning_rate": 7.233687091129757e-06, "loss": 344.2386, "step": 30850 }, { "epoch": 0.593512900154821, "grad_norm": 483.89962623035797, "learning_rate": 7.227822632319208e-06, "loss": 343.4308, "step": 30860 }, { "epoch": 0.5937052244905809, "grad_norm": 481.7308087127284, "learning_rate": 7.221959206098945e-06, "loss": 347.1303, "step": 30870 }, { "epoch": 0.5938975488263407, "grad_norm": 448.4888737988553, "learning_rate": 7.216096814652992e-06, "loss": 346.2199, "step": 30880 }, { "epoch": 0.5940898731621006, "grad_norm": 418.0307840420486, "learning_rate": 7.210235460165002e-06, "loss": 331.0121, "step": 30890 }, { "epoch": 0.5942821974978604, "grad_norm": 416.2280929286208, "learning_rate": 7.2043751448182275e-06, "loss": 328.2097, "step": 30900 }, { "epoch": 0.5944745218336203, "grad_norm": 396.2929183727845, "learning_rate": 7.198515870795542e-06, "loss": 326.6847, "step": 30910 }, { "epoch": 0.59466684616938, "grad_norm": 414.9848902286814, "learning_rate": 7.192657640279421e-06, "loss": 335.425, "step": 30920 }, { "epoch": 0.5948591705051399, "grad_norm": 457.7861496970338, "learning_rate": 7.18680045545197e-06, "loss": 335.1131, "step": 30930 }, { "epoch": 0.5950514948408997, "grad_norm": 494.03918143631216, "learning_rate": 7.180944318494888e-06, "loss": 334.7091, "step": 30940 }, { "epoch": 0.5952438191766595, "grad_norm": 455.3685004409763, "learning_rate": 7.175089231589485e-06, "loss": 334.6876, "step": 30950 }, { "epoch": 0.5954361435124194, "grad_norm": 439.6711012667096, "learning_rate": 7.1692351969166905e-06, "loss": 347.1738, "step": 30960 }, { "epoch": 0.5956284678481791, "grad_norm": 433.3878199910252, "learning_rate": 7.163382216657033e-06, "loss": 326.3027, "step": 30970 }, { "epoch": 0.595820792183939, "grad_norm": 409.99590740432757, "learning_rate": 7.157530292990654e-06, "loss": 329.2366, "step": 30980 }, { "epoch": 0.5960131165196988, "grad_norm": 431.52405336260904, "learning_rate": 7.151679428097291e-06, "loss": 319.1754, "step": 30990 }, { "epoch": 0.5962054408554587, "grad_norm": 455.1807812575659, "learning_rate": 7.145829624156304e-06, "loss": 333.7459, "step": 31000 }, { "epoch": 0.5963977651912185, "grad_norm": 493.3453544067815, "learning_rate": 7.1399808833466445e-06, "loss": 328.3524, "step": 31010 }, { "epoch": 0.5965900895269783, "grad_norm": 483.4519052459808, "learning_rate": 7.134133207846869e-06, "loss": 331.9075, "step": 31020 }, { "epoch": 0.5967824138627381, "grad_norm": 460.9509047632126, "learning_rate": 7.128286599835139e-06, "loss": 337.3895, "step": 31030 }, { "epoch": 0.596974738198498, "grad_norm": 424.24732218577697, "learning_rate": 7.122441061489228e-06, "loss": 318.9668, "step": 31040 }, { "epoch": 0.5971670625342578, "grad_norm": 457.798484478798, "learning_rate": 7.1165965949864934e-06, "loss": 330.7322, "step": 31050 }, { "epoch": 0.5973593868700176, "grad_norm": 468.46423289349167, "learning_rate": 7.110753202503906e-06, "loss": 329.8272, "step": 31060 }, { "epoch": 0.5975517112057774, "grad_norm": 452.47958392396356, "learning_rate": 7.104910886218036e-06, "loss": 332.745, "step": 31070 }, { "epoch": 0.5977440355415372, "grad_norm": 442.86853921388683, "learning_rate": 7.0990696483050466e-06, "loss": 330.2383, "step": 31080 }, { "epoch": 0.5979363598772971, "grad_norm": 418.626475384388, "learning_rate": 7.093229490940704e-06, "loss": 331.1363, "step": 31090 }, { "epoch": 0.5981286842130569, "grad_norm": 406.5862014101304, "learning_rate": 7.087390416300364e-06, "loss": 321.2443, "step": 31100 }, { "epoch": 0.5983210085488168, "grad_norm": 439.4052352894715, "learning_rate": 7.081552426558995e-06, "loss": 339.0512, "step": 31110 }, { "epoch": 0.5985133328845765, "grad_norm": 424.5968584083964, "learning_rate": 7.075715523891146e-06, "loss": 324.1058, "step": 31120 }, { "epoch": 0.5987056572203364, "grad_norm": 448.70573702347997, "learning_rate": 7.069879710470965e-06, "loss": 324.4255, "step": 31130 }, { "epoch": 0.5988979815560962, "grad_norm": 435.67281719470986, "learning_rate": 7.064044988472204e-06, "loss": 320.2699, "step": 31140 }, { "epoch": 0.5990903058918561, "grad_norm": 461.8817151359626, "learning_rate": 7.058211360068196e-06, "loss": 324.3273, "step": 31150 }, { "epoch": 0.5992826302276159, "grad_norm": 452.24181285786625, "learning_rate": 7.052378827431871e-06, "loss": 330.5285, "step": 31160 }, { "epoch": 0.5994749545633756, "grad_norm": 475.31707579050004, "learning_rate": 7.046547392735747e-06, "loss": 322.7347, "step": 31170 }, { "epoch": 0.5996672788991355, "grad_norm": 495.86005226998174, "learning_rate": 7.040717058151945e-06, "loss": 324.2601, "step": 31180 }, { "epoch": 0.5998596032348953, "grad_norm": 415.33022435390995, "learning_rate": 7.034887825852164e-06, "loss": 329.2483, "step": 31190 }, { "epoch": 0.6000519275706552, "grad_norm": 436.19998092436464, "learning_rate": 7.029059698007699e-06, "loss": 320.7651, "step": 31200 }, { "epoch": 0.600244251906415, "grad_norm": 471.59220586959736, "learning_rate": 7.023232676789424e-06, "loss": 341.8868, "step": 31210 }, { "epoch": 0.6004365762421748, "grad_norm": 476.8202834855007, "learning_rate": 7.01740676436782e-06, "loss": 328.4182, "step": 31220 }, { "epoch": 0.6006289005779346, "grad_norm": 428.9616607929938, "learning_rate": 7.011581962912936e-06, "loss": 332.7413, "step": 31230 }, { "epoch": 0.6008212249136945, "grad_norm": 463.69521784344965, "learning_rate": 7.005758274594412e-06, "loss": 331.1642, "step": 31240 }, { "epoch": 0.6010135492494543, "grad_norm": 423.72747756075296, "learning_rate": 6.999935701581482e-06, "loss": 327.5476, "step": 31250 }, { "epoch": 0.6012058735852142, "grad_norm": 423.9380522363376, "learning_rate": 6.9941142460429555e-06, "loss": 326.322, "step": 31260 }, { "epoch": 0.6013981979209739, "grad_norm": 430.2591285280863, "learning_rate": 6.988293910147229e-06, "loss": 328.2301, "step": 31270 }, { "epoch": 0.6015905222567337, "grad_norm": 419.2111748804502, "learning_rate": 6.982474696062278e-06, "loss": 331.0351, "step": 31280 }, { "epoch": 0.6017828465924936, "grad_norm": 420.13574881257034, "learning_rate": 6.97665660595567e-06, "loss": 329.3996, "step": 31290 }, { "epoch": 0.6019751709282534, "grad_norm": 423.05504045628146, "learning_rate": 6.970839641994545e-06, "loss": 325.3394, "step": 31300 }, { "epoch": 0.6021674952640133, "grad_norm": 409.03972788519604, "learning_rate": 6.965023806345619e-06, "loss": 326.1781, "step": 31310 }, { "epoch": 0.602359819599773, "grad_norm": 399.1468065778292, "learning_rate": 6.959209101175206e-06, "loss": 323.4126, "step": 31320 }, { "epoch": 0.6025521439355329, "grad_norm": 443.7106738892151, "learning_rate": 6.9533955286491805e-06, "loss": 333.6226, "step": 31330 }, { "epoch": 0.6027444682712927, "grad_norm": 390.7697000691092, "learning_rate": 6.947583090933008e-06, "loss": 320.8648, "step": 31340 }, { "epoch": 0.6029367926070526, "grad_norm": 406.5422917215672, "learning_rate": 6.941771790191716e-06, "loss": 322.8538, "step": 31350 }, { "epoch": 0.6031291169428123, "grad_norm": 418.194064783171, "learning_rate": 6.9359616285899266e-06, "loss": 328.5125, "step": 31360 }, { "epoch": 0.6033214412785722, "grad_norm": 445.7989038206677, "learning_rate": 6.930152608291829e-06, "loss": 337.901, "step": 31370 }, { "epoch": 0.603513765614332, "grad_norm": 423.173245012053, "learning_rate": 6.924344731461179e-06, "loss": 319.9932, "step": 31380 }, { "epoch": 0.6037060899500918, "grad_norm": 411.5581267282828, "learning_rate": 6.918538000261325e-06, "loss": 324.1152, "step": 31390 }, { "epoch": 0.6038984142858517, "grad_norm": 472.8986853644844, "learning_rate": 6.912732416855171e-06, "loss": 335.1744, "step": 31400 }, { "epoch": 0.6040907386216114, "grad_norm": 476.3325024728093, "learning_rate": 6.906927983405207e-06, "loss": 336.0696, "step": 31410 }, { "epoch": 0.6042830629573713, "grad_norm": 408.8631082548548, "learning_rate": 6.901124702073481e-06, "loss": 325.908, "step": 31420 }, { "epoch": 0.6044753872931311, "grad_norm": 430.6616833450127, "learning_rate": 6.895322575021628e-06, "loss": 333.273, "step": 31430 }, { "epoch": 0.604667711628891, "grad_norm": 434.6259514020785, "learning_rate": 6.88952160441084e-06, "loss": 334.1016, "step": 31440 }, { "epoch": 0.6048600359646508, "grad_norm": 436.98857614808065, "learning_rate": 6.8837217924018825e-06, "loss": 327.0335, "step": 31450 }, { "epoch": 0.6050523603004107, "grad_norm": 434.1359932462219, "learning_rate": 6.877923141155087e-06, "loss": 336.8191, "step": 31460 }, { "epoch": 0.6052446846361704, "grad_norm": 420.9197192491584, "learning_rate": 6.87212565283036e-06, "loss": 322.0309, "step": 31470 }, { "epoch": 0.6054370089719303, "grad_norm": 459.55648391653125, "learning_rate": 6.8663293295871715e-06, "loss": 322.854, "step": 31480 }, { "epoch": 0.6056293333076901, "grad_norm": 409.7728528591732, "learning_rate": 6.86053417358455e-06, "loss": 333.7287, "step": 31490 }, { "epoch": 0.6058216576434499, "grad_norm": 424.42545183647627, "learning_rate": 6.854740186981102e-06, "loss": 327.6027, "step": 31500 }, { "epoch": 0.6060139819792097, "grad_norm": 431.8624804371158, "learning_rate": 6.848947371934989e-06, "loss": 333.8613, "step": 31510 }, { "epoch": 0.6062063063149695, "grad_norm": 440.882114932241, "learning_rate": 6.843155730603939e-06, "loss": 335.2275, "step": 31520 }, { "epoch": 0.6063986306507294, "grad_norm": 410.87712071861705, "learning_rate": 6.837365265145237e-06, "loss": 323.1684, "step": 31530 }, { "epoch": 0.6065909549864892, "grad_norm": 441.7371065461392, "learning_rate": 6.831575977715745e-06, "loss": 332.0648, "step": 31540 }, { "epoch": 0.6067832793222491, "grad_norm": 418.2044710315922, "learning_rate": 6.825787870471873e-06, "loss": 325.2839, "step": 31550 }, { "epoch": 0.6069756036580088, "grad_norm": 477.9637089706895, "learning_rate": 6.820000945569592e-06, "loss": 325.3218, "step": 31560 }, { "epoch": 0.6071679279937687, "grad_norm": 478.63470381819985, "learning_rate": 6.814215205164444e-06, "loss": 337.3299, "step": 31570 }, { "epoch": 0.6073602523295285, "grad_norm": 431.2063649604528, "learning_rate": 6.808430651411518e-06, "loss": 326.4344, "step": 31580 }, { "epoch": 0.6075525766652884, "grad_norm": 437.5634034043336, "learning_rate": 6.802647286465461e-06, "loss": 328.3208, "step": 31590 }, { "epoch": 0.6077449010010482, "grad_norm": 443.68659553090095, "learning_rate": 6.796865112480482e-06, "loss": 336.2546, "step": 31600 }, { "epoch": 0.6079372253368079, "grad_norm": 410.68802734590344, "learning_rate": 6.79108413161035e-06, "loss": 331.4933, "step": 31610 }, { "epoch": 0.6081295496725678, "grad_norm": 450.82071981640553, "learning_rate": 6.785304346008381e-06, "loss": 331.3573, "step": 31620 }, { "epoch": 0.6083218740083276, "grad_norm": 413.8835795348315, "learning_rate": 6.779525757827452e-06, "loss": 321.4032, "step": 31630 }, { "epoch": 0.6085141983440875, "grad_norm": 407.97395421733364, "learning_rate": 6.773748369219986e-06, "loss": 331.3999, "step": 31640 }, { "epoch": 0.6087065226798473, "grad_norm": 446.3660351781709, "learning_rate": 6.767972182337974e-06, "loss": 338.6244, "step": 31650 }, { "epoch": 0.6088988470156071, "grad_norm": 412.42102182963106, "learning_rate": 6.762197199332945e-06, "loss": 326.3635, "step": 31660 }, { "epoch": 0.6090911713513669, "grad_norm": 417.5113755716868, "learning_rate": 6.756423422355981e-06, "loss": 320.6295, "step": 31670 }, { "epoch": 0.6092834956871268, "grad_norm": 396.98151134937456, "learning_rate": 6.750650853557728e-06, "loss": 321.2423, "step": 31680 }, { "epoch": 0.6094758200228866, "grad_norm": 426.87495631595505, "learning_rate": 6.744879495088364e-06, "loss": 331.0351, "step": 31690 }, { "epoch": 0.6096681443586465, "grad_norm": 429.4301663260041, "learning_rate": 6.7391093490976285e-06, "loss": 317.8627, "step": 31700 }, { "epoch": 0.6098604686944062, "grad_norm": 438.9367309224993, "learning_rate": 6.7333404177348036e-06, "loss": 318.1759, "step": 31710 }, { "epoch": 0.610052793030166, "grad_norm": 421.5808481922939, "learning_rate": 6.727572703148726e-06, "loss": 321.6035, "step": 31720 }, { "epoch": 0.6102451173659259, "grad_norm": 421.2256198499856, "learning_rate": 6.721806207487769e-06, "loss": 330.6125, "step": 31730 }, { "epoch": 0.6104374417016857, "grad_norm": 426.228325026148, "learning_rate": 6.716040932899857e-06, "loss": 337.3397, "step": 31740 }, { "epoch": 0.6106297660374456, "grad_norm": 436.38767737451286, "learning_rate": 6.710276881532463e-06, "loss": 328.5183, "step": 31750 }, { "epoch": 0.6108220903732053, "grad_norm": 410.4774005376651, "learning_rate": 6.704514055532597e-06, "loss": 331.1836, "step": 31760 }, { "epoch": 0.6110144147089652, "grad_norm": 448.99529407431874, "learning_rate": 6.698752457046822e-06, "loss": 320.2948, "step": 31770 }, { "epoch": 0.611206739044725, "grad_norm": 453.78535420099576, "learning_rate": 6.692992088221231e-06, "loss": 336.2243, "step": 31780 }, { "epoch": 0.6113990633804849, "grad_norm": 432.39546381511616, "learning_rate": 6.687232951201473e-06, "loss": 321.0264, "step": 31790 }, { "epoch": 0.6115913877162447, "grad_norm": 431.60681963290085, "learning_rate": 6.681475048132729e-06, "loss": 337.4963, "step": 31800 }, { "epoch": 0.6117837120520045, "grad_norm": 412.00998093735575, "learning_rate": 6.675718381159719e-06, "loss": 325.3441, "step": 31810 }, { "epoch": 0.6119760363877643, "grad_norm": 414.27886611671755, "learning_rate": 6.6699629524267114e-06, "loss": 321.3202, "step": 31820 }, { "epoch": 0.6121683607235241, "grad_norm": 429.11378557268085, "learning_rate": 6.664208764077507e-06, "loss": 335.3874, "step": 31830 }, { "epoch": 0.612360685059284, "grad_norm": 424.95879573324083, "learning_rate": 6.658455818255445e-06, "loss": 319.7482, "step": 31840 }, { "epoch": 0.6125530093950438, "grad_norm": 409.34126518998613, "learning_rate": 6.652704117103401e-06, "loss": 321.8787, "step": 31850 }, { "epoch": 0.6127453337308036, "grad_norm": 413.338977258228, "learning_rate": 6.646953662763796e-06, "loss": 338.9022, "step": 31860 }, { "epoch": 0.6129376580665634, "grad_norm": 432.3472601520748, "learning_rate": 6.6412044573785725e-06, "loss": 328.2723, "step": 31870 }, { "epoch": 0.6131299824023233, "grad_norm": 416.94271118525955, "learning_rate": 6.635456503089217e-06, "loss": 326.2529, "step": 31880 }, { "epoch": 0.6133223067380831, "grad_norm": 520.2741569123933, "learning_rate": 6.6297098020367435e-06, "loss": 344.4937, "step": 31890 }, { "epoch": 0.613514631073843, "grad_norm": 414.0172107168599, "learning_rate": 6.623964356361707e-06, "loss": 342.6946, "step": 31900 }, { "epoch": 0.6137069554096027, "grad_norm": 447.82016238858694, "learning_rate": 6.618220168204193e-06, "loss": 325.3615, "step": 31910 }, { "epoch": 0.6138992797453626, "grad_norm": 406.40458600547805, "learning_rate": 6.6124772397038115e-06, "loss": 328.425, "step": 31920 }, { "epoch": 0.6140916040811224, "grad_norm": 428.2966682996343, "learning_rate": 6.606735572999714e-06, "loss": 336.6029, "step": 31930 }, { "epoch": 0.6142839284168823, "grad_norm": 404.30106573083583, "learning_rate": 6.600995170230575e-06, "loss": 316.1977, "step": 31940 }, { "epoch": 0.6144762527526421, "grad_norm": 440.7107669034454, "learning_rate": 6.595256033534598e-06, "loss": 322.2435, "step": 31950 }, { "epoch": 0.6146685770884018, "grad_norm": 450.99281026713794, "learning_rate": 6.589518165049514e-06, "loss": 323.5361, "step": 31960 }, { "epoch": 0.6148609014241617, "grad_norm": 434.4686367799548, "learning_rate": 6.5837815669125906e-06, "loss": 327.8862, "step": 31970 }, { "epoch": 0.6150532257599215, "grad_norm": 406.74916648632023, "learning_rate": 6.5780462412606124e-06, "loss": 313.7675, "step": 31980 }, { "epoch": 0.6152455500956814, "grad_norm": 413.2782229599621, "learning_rate": 6.572312190229895e-06, "loss": 324.2452, "step": 31990 }, { "epoch": 0.6154378744314412, "grad_norm": 428.5324791770067, "learning_rate": 6.5665794159562734e-06, "loss": 322.2049, "step": 32000 }, { "epoch": 0.615630198767201, "grad_norm": 435.7705616014478, "learning_rate": 6.560847920575118e-06, "loss": 318.6313, "step": 32010 }, { "epoch": 0.6158225231029608, "grad_norm": 413.6048996748444, "learning_rate": 6.5551177062213126e-06, "loss": 333.2631, "step": 32020 }, { "epoch": 0.6160148474387207, "grad_norm": 410.4890441406439, "learning_rate": 6.5493887750292616e-06, "loss": 324.2066, "step": 32030 }, { "epoch": 0.6162071717744805, "grad_norm": 401.06790112767646, "learning_rate": 6.54366112913291e-06, "loss": 337.9793, "step": 32040 }, { "epoch": 0.6163994961102404, "grad_norm": 456.3108819641096, "learning_rate": 6.537934770665701e-06, "loss": 334.1323, "step": 32050 }, { "epoch": 0.6165918204460001, "grad_norm": 421.1389596570581, "learning_rate": 6.532209701760615e-06, "loss": 340.9781, "step": 32060 }, { "epoch": 0.6167841447817599, "grad_norm": 426.95528614319517, "learning_rate": 6.526485924550138e-06, "loss": 332.3481, "step": 32070 }, { "epoch": 0.6169764691175198, "grad_norm": 573.6889626483983, "learning_rate": 6.520763441166291e-06, "loss": 331.3861, "step": 32080 }, { "epoch": 0.6171687934532796, "grad_norm": 440.45618569058104, "learning_rate": 6.515042253740601e-06, "loss": 312.4993, "step": 32090 }, { "epoch": 0.6173611177890395, "grad_norm": 447.4479672089497, "learning_rate": 6.509322364404112e-06, "loss": 331.0521, "step": 32100 }, { "epoch": 0.6175534421247992, "grad_norm": 425.50743641148125, "learning_rate": 6.503603775287395e-06, "loss": 327.4585, "step": 32110 }, { "epoch": 0.6177457664605591, "grad_norm": 428.49746655448627, "learning_rate": 6.497886488520524e-06, "loss": 323.4842, "step": 32120 }, { "epoch": 0.6179380907963189, "grad_norm": 457.8434213004478, "learning_rate": 6.4921705062331e-06, "loss": 335.378, "step": 32130 }, { "epoch": 0.6181304151320788, "grad_norm": 442.70575999445475, "learning_rate": 6.486455830554224e-06, "loss": 320.6056, "step": 32140 }, { "epoch": 0.6183227394678386, "grad_norm": 594.1806569317398, "learning_rate": 6.4807424636125285e-06, "loss": 326.3652, "step": 32150 }, { "epoch": 0.6185150638035984, "grad_norm": 431.1665881155747, "learning_rate": 6.475030407536141e-06, "loss": 328.5355, "step": 32160 }, { "epoch": 0.6187073881393582, "grad_norm": 421.0350598890005, "learning_rate": 6.469319664452709e-06, "loss": 326.7443, "step": 32170 }, { "epoch": 0.618899712475118, "grad_norm": 409.23853737120083, "learning_rate": 6.463610236489391e-06, "loss": 319.5542, "step": 32180 }, { "epoch": 0.6190920368108779, "grad_norm": 456.83870606542223, "learning_rate": 6.457902125772854e-06, "loss": 327.7158, "step": 32190 }, { "epoch": 0.6192843611466377, "grad_norm": 453.99404556395046, "learning_rate": 6.452195334429277e-06, "loss": 329.5449, "step": 32200 }, { "epoch": 0.6194766854823975, "grad_norm": 475.0510378981282, "learning_rate": 6.446489864584341e-06, "loss": 322.3272, "step": 32210 }, { "epoch": 0.6196690098181573, "grad_norm": 425.11870277903165, "learning_rate": 6.440785718363245e-06, "loss": 321.6365, "step": 32220 }, { "epoch": 0.6198613341539172, "grad_norm": 433.46603621909895, "learning_rate": 6.435082897890688e-06, "loss": 324.7147, "step": 32230 }, { "epoch": 0.620053658489677, "grad_norm": 414.7375458168306, "learning_rate": 6.429381405290873e-06, "loss": 323.154, "step": 32240 }, { "epoch": 0.6202459828254369, "grad_norm": 452.5861336513786, "learning_rate": 6.4236812426875124e-06, "loss": 327.5643, "step": 32250 }, { "epoch": 0.6204383071611966, "grad_norm": 469.30850682635673, "learning_rate": 6.4179824122038244e-06, "loss": 327.8253, "step": 32260 }, { "epoch": 0.6206306314969565, "grad_norm": 451.42769607913056, "learning_rate": 6.412284915962532e-06, "loss": 330.3799, "step": 32270 }, { "epoch": 0.6208229558327163, "grad_norm": 410.43366971821615, "learning_rate": 6.406588756085849e-06, "loss": 320.7363, "step": 32280 }, { "epoch": 0.6210152801684761, "grad_norm": 418.3653168917956, "learning_rate": 6.400893934695514e-06, "loss": 323.9497, "step": 32290 }, { "epoch": 0.621207604504236, "grad_norm": 455.3732134667118, "learning_rate": 6.395200453912747e-06, "loss": 330.9363, "step": 32300 }, { "epoch": 0.6213999288399957, "grad_norm": 414.77459704481186, "learning_rate": 6.389508315858272e-06, "loss": 328.9256, "step": 32310 }, { "epoch": 0.6215922531757556, "grad_norm": 444.7560679248882, "learning_rate": 6.38381752265232e-06, "loss": 325.5793, "step": 32320 }, { "epoch": 0.6217845775115154, "grad_norm": 465.0531572072124, "learning_rate": 6.378128076414619e-06, "loss": 330.5696, "step": 32330 }, { "epoch": 0.6219769018472753, "grad_norm": 468.5882004724719, "learning_rate": 6.372439979264393e-06, "loss": 332.7551, "step": 32340 }, { "epoch": 0.622169226183035, "grad_norm": 409.0557983184617, "learning_rate": 6.3667532333203655e-06, "loss": 319.2977, "step": 32350 }, { "epoch": 0.6223615505187949, "grad_norm": 432.3491555331506, "learning_rate": 6.361067840700747e-06, "loss": 322.4629, "step": 32360 }, { "epoch": 0.6225538748545547, "grad_norm": 423.5721209957189, "learning_rate": 6.355383803523265e-06, "loss": 324.1534, "step": 32370 }, { "epoch": 0.6227461991903146, "grad_norm": 441.7958215177036, "learning_rate": 6.349701123905123e-06, "loss": 326.1752, "step": 32380 }, { "epoch": 0.6229385235260744, "grad_norm": 418.3116862715124, "learning_rate": 6.344019803963021e-06, "loss": 328.9536, "step": 32390 }, { "epoch": 0.6231308478618341, "grad_norm": 418.21228146984464, "learning_rate": 6.338339845813164e-06, "loss": 324.4175, "step": 32400 }, { "epoch": 0.623323172197594, "grad_norm": 424.9867330009795, "learning_rate": 6.332661251571241e-06, "loss": 321.1011, "step": 32410 }, { "epoch": 0.6235154965333538, "grad_norm": 429.0430428008001, "learning_rate": 6.326984023352435e-06, "loss": 330.4229, "step": 32420 }, { "epoch": 0.6237078208691137, "grad_norm": 393.81594969739876, "learning_rate": 6.321308163271413e-06, "loss": 324.0193, "step": 32430 }, { "epoch": 0.6239001452048735, "grad_norm": 456.6985511449763, "learning_rate": 6.315633673442349e-06, "loss": 324.1999, "step": 32440 }, { "epoch": 0.6240924695406334, "grad_norm": 419.92969770799095, "learning_rate": 6.309960555978894e-06, "loss": 320.1714, "step": 32450 }, { "epoch": 0.6242847938763931, "grad_norm": 417.7106530617901, "learning_rate": 6.304288812994183e-06, "loss": 321.9302, "step": 32460 }, { "epoch": 0.624477118212153, "grad_norm": 407.2339064814913, "learning_rate": 6.298618446600856e-06, "loss": 334.5645, "step": 32470 }, { "epoch": 0.6246694425479128, "grad_norm": 422.27652461561996, "learning_rate": 6.292949458911029e-06, "loss": 326.5539, "step": 32480 }, { "epoch": 0.6248617668836727, "grad_norm": 432.9597248352087, "learning_rate": 6.287281852036304e-06, "loss": 318.7616, "step": 32490 }, { "epoch": 0.6250540912194325, "grad_norm": 416.9766790941515, "learning_rate": 6.2816156280877675e-06, "loss": 322.7292, "step": 32500 }, { "epoch": 0.6252464155551922, "grad_norm": 408.8695050778491, "learning_rate": 6.275950789176002e-06, "loss": 328.5652, "step": 32510 }, { "epoch": 0.6254387398909521, "grad_norm": 406.750794698313, "learning_rate": 6.270287337411064e-06, "loss": 321.6231, "step": 32520 }, { "epoch": 0.6256310642267119, "grad_norm": 434.35189119278624, "learning_rate": 6.264625274902492e-06, "loss": 328.9104, "step": 32530 }, { "epoch": 0.6258233885624718, "grad_norm": 441.217565698764, "learning_rate": 6.258964603759311e-06, "loss": 332.9562, "step": 32540 }, { "epoch": 0.6260157128982315, "grad_norm": 426.77229867822825, "learning_rate": 6.2533053260900345e-06, "loss": 322.5945, "step": 32550 }, { "epoch": 0.6262080372339914, "grad_norm": 483.1264069683457, "learning_rate": 6.247647444002644e-06, "loss": 327.2905, "step": 32560 }, { "epoch": 0.6264003615697512, "grad_norm": 436.6549684532777, "learning_rate": 6.241990959604607e-06, "loss": 334.4606, "step": 32570 }, { "epoch": 0.6265926859055111, "grad_norm": 401.2974328382389, "learning_rate": 6.2363358750028745e-06, "loss": 325.6229, "step": 32580 }, { "epoch": 0.6267850102412709, "grad_norm": 406.44672587720225, "learning_rate": 6.23068219230387e-06, "loss": 318.5489, "step": 32590 }, { "epoch": 0.6269773345770308, "grad_norm": 455.20375356590705, "learning_rate": 6.2250299136134925e-06, "loss": 319.4099, "step": 32600 }, { "epoch": 0.6271696589127905, "grad_norm": 418.82334411779806, "learning_rate": 6.219379041037128e-06, "loss": 326.8413, "step": 32610 }, { "epoch": 0.6273619832485503, "grad_norm": 439.19329369503697, "learning_rate": 6.213729576679632e-06, "loss": 323.3758, "step": 32620 }, { "epoch": 0.6275543075843102, "grad_norm": 416.2625185128422, "learning_rate": 6.208081522645339e-06, "loss": 318.5643, "step": 32630 }, { "epoch": 0.62774663192007, "grad_norm": 445.4966848300152, "learning_rate": 6.202434881038048e-06, "loss": 323.1161, "step": 32640 }, { "epoch": 0.6279389562558298, "grad_norm": 403.7901714338289, "learning_rate": 6.196789653961048e-06, "loss": 321.4052, "step": 32650 }, { "epoch": 0.6281312805915896, "grad_norm": 402.4884676658384, "learning_rate": 6.191145843517093e-06, "loss": 322.4316, "step": 32660 }, { "epoch": 0.6283236049273495, "grad_norm": 471.4715054154106, "learning_rate": 6.185503451808401e-06, "loss": 325.7773, "step": 32670 }, { "epoch": 0.6285159292631093, "grad_norm": 424.61665334536804, "learning_rate": 6.1798624809366755e-06, "loss": 333.7152, "step": 32680 }, { "epoch": 0.6287082535988692, "grad_norm": 463.05504565767166, "learning_rate": 6.174222933003084e-06, "loss": 326.8595, "step": 32690 }, { "epoch": 0.628900577934629, "grad_norm": 447.63783470891326, "learning_rate": 6.168584810108269e-06, "loss": 328.4714, "step": 32700 }, { "epoch": 0.6290929022703888, "grad_norm": 473.41771899779656, "learning_rate": 6.162948114352328e-06, "loss": 335.0917, "step": 32710 }, { "epoch": 0.6292852266061486, "grad_norm": 465.20344187668286, "learning_rate": 6.157312847834848e-06, "loss": 324.3855, "step": 32720 }, { "epoch": 0.6294775509419084, "grad_norm": 419.9382018746983, "learning_rate": 6.1516790126548695e-06, "loss": 321.8709, "step": 32730 }, { "epoch": 0.6296698752776683, "grad_norm": 432.3435457974304, "learning_rate": 6.146046610910899e-06, "loss": 326.0685, "step": 32740 }, { "epoch": 0.629862199613428, "grad_norm": 410.18715744874953, "learning_rate": 6.140415644700915e-06, "loss": 339.1354, "step": 32750 }, { "epoch": 0.6300545239491879, "grad_norm": 440.33709101015756, "learning_rate": 6.13478611612236e-06, "loss": 328.9889, "step": 32760 }, { "epoch": 0.6302468482849477, "grad_norm": 402.57351663000355, "learning_rate": 6.129158027272144e-06, "loss": 323.6507, "step": 32770 }, { "epoch": 0.6304391726207076, "grad_norm": 452.5979517759669, "learning_rate": 6.123531380246632e-06, "loss": 320.9756, "step": 32780 }, { "epoch": 0.6306314969564674, "grad_norm": 404.4887640766371, "learning_rate": 6.117906177141657e-06, "loss": 328.8006, "step": 32790 }, { "epoch": 0.6308238212922272, "grad_norm": 462.76630626964396, "learning_rate": 6.112282420052518e-06, "loss": 321.5611, "step": 32800 }, { "epoch": 0.631016145627987, "grad_norm": 419.70725828797083, "learning_rate": 6.106660111073969e-06, "loss": 327.3932, "step": 32810 }, { "epoch": 0.6312084699637469, "grad_norm": 406.57270690324384, "learning_rate": 6.101039252300227e-06, "loss": 328.7421, "step": 32820 }, { "epoch": 0.6314007942995067, "grad_norm": 424.11730292183296, "learning_rate": 6.095419845824971e-06, "loss": 323.4531, "step": 32830 }, { "epoch": 0.6315931186352665, "grad_norm": 435.44789791149435, "learning_rate": 6.089801893741338e-06, "loss": 325.521, "step": 32840 }, { "epoch": 0.6317854429710263, "grad_norm": 406.92073548289306, "learning_rate": 6.084185398141921e-06, "loss": 324.5324, "step": 32850 }, { "epoch": 0.6319777673067861, "grad_norm": 445.37035709504715, "learning_rate": 6.078570361118768e-06, "loss": 328.6068, "step": 32860 }, { "epoch": 0.632170091642546, "grad_norm": 436.58632830997254, "learning_rate": 6.072956784763393e-06, "loss": 324.0492, "step": 32870 }, { "epoch": 0.6323624159783058, "grad_norm": 395.89775800273503, "learning_rate": 6.067344671166757e-06, "loss": 327.2833, "step": 32880 }, { "epoch": 0.6325547403140657, "grad_norm": 437.50506006442527, "learning_rate": 6.061734022419281e-06, "loss": 326.9905, "step": 32890 }, { "epoch": 0.6327470646498254, "grad_norm": 416.31786384346094, "learning_rate": 6.056124840610839e-06, "loss": 313.8626, "step": 32900 }, { "epoch": 0.6329393889855853, "grad_norm": 463.5627340464092, "learning_rate": 6.050517127830761e-06, "loss": 326.8129, "step": 32910 }, { "epoch": 0.6331317133213451, "grad_norm": 416.54406993323664, "learning_rate": 6.044910886167825e-06, "loss": 336.6158, "step": 32920 }, { "epoch": 0.633324037657105, "grad_norm": 439.75612499363956, "learning_rate": 6.03930611771026e-06, "loss": 321.1817, "step": 32930 }, { "epoch": 0.6335163619928648, "grad_norm": 462.1824301255701, "learning_rate": 6.033702824545755e-06, "loss": 316.3526, "step": 32940 }, { "epoch": 0.6337086863286245, "grad_norm": 437.57200648004044, "learning_rate": 6.028101008761445e-06, "loss": 312.8964, "step": 32950 }, { "epoch": 0.6339010106643844, "grad_norm": 417.85734741965825, "learning_rate": 6.022500672443907e-06, "loss": 321.7556, "step": 32960 }, { "epoch": 0.6340933350001442, "grad_norm": 458.4178721966177, "learning_rate": 6.016901817679177e-06, "loss": 331.59, "step": 32970 }, { "epoch": 0.6342856593359041, "grad_norm": 410.8474244278934, "learning_rate": 6.011304446552741e-06, "loss": 318.5654, "step": 32980 }, { "epoch": 0.6344779836716639, "grad_norm": 434.78701503041884, "learning_rate": 6.005708561149523e-06, "loss": 324.2937, "step": 32990 }, { "epoch": 0.6346703080074237, "grad_norm": 448.34652220499385, "learning_rate": 6.000114163553894e-06, "loss": 328.6474, "step": 33000 }, { "epoch": 0.6348626323431835, "grad_norm": 434.05873206618304, "learning_rate": 5.994521255849684e-06, "loss": 331.5147, "step": 33010 }, { "epoch": 0.6350549566789434, "grad_norm": 411.50099490053105, "learning_rate": 5.988929840120151e-06, "loss": 328.2983, "step": 33020 }, { "epoch": 0.6352472810147032, "grad_norm": 445.10534357339293, "learning_rate": 5.983339918448008e-06, "loss": 324.5035, "step": 33030 }, { "epoch": 0.6354396053504631, "grad_norm": 445.902211599106, "learning_rate": 5.977751492915404e-06, "loss": 318.4406, "step": 33040 }, { "epoch": 0.6356319296862228, "grad_norm": 414.1709800694302, "learning_rate": 5.972164565603944e-06, "loss": 324.7382, "step": 33050 }, { "epoch": 0.6358242540219827, "grad_norm": 407.91460852423523, "learning_rate": 5.966579138594661e-06, "loss": 333.8346, "step": 33060 }, { "epoch": 0.6360165783577425, "grad_norm": 425.83599914913617, "learning_rate": 5.960995213968033e-06, "loss": 326.7666, "step": 33070 }, { "epoch": 0.6362089026935023, "grad_norm": 429.2653214507619, "learning_rate": 5.955412793803983e-06, "loss": 318.4811, "step": 33080 }, { "epoch": 0.6364012270292622, "grad_norm": 514.921101360999, "learning_rate": 5.949831880181869e-06, "loss": 339.2025, "step": 33090 }, { "epoch": 0.6365935513650219, "grad_norm": 439.47238772651775, "learning_rate": 5.944252475180487e-06, "loss": 336.8574, "step": 33100 }, { "epoch": 0.6367858757007818, "grad_norm": 408.60479581193187, "learning_rate": 5.938674580878077e-06, "loss": 324.96, "step": 33110 }, { "epoch": 0.6369782000365416, "grad_norm": 440.49902995174443, "learning_rate": 5.933098199352307e-06, "loss": 329.8435, "step": 33120 }, { "epoch": 0.6371705243723015, "grad_norm": 430.0941986481869, "learning_rate": 5.927523332680296e-06, "loss": 330.0749, "step": 33130 }, { "epoch": 0.6373628487080613, "grad_norm": 426.120737751968, "learning_rate": 5.921949982938583e-06, "loss": 324.2136, "step": 33140 }, { "epoch": 0.6375551730438211, "grad_norm": 446.29464797735096, "learning_rate": 5.916378152203146e-06, "loss": 330.048, "step": 33150 }, { "epoch": 0.6377474973795809, "grad_norm": 389.33807674876357, "learning_rate": 5.910807842549409e-06, "loss": 323.6343, "step": 33160 }, { "epoch": 0.6379398217153408, "grad_norm": 439.3824082828931, "learning_rate": 5.905239056052212e-06, "loss": 321.4603, "step": 33170 }, { "epoch": 0.6381321460511006, "grad_norm": 420.43187384435345, "learning_rate": 5.8996717947858395e-06, "loss": 324.7336, "step": 33180 }, { "epoch": 0.6383244703868604, "grad_norm": 413.1979360709277, "learning_rate": 5.894106060824005e-06, "loss": 324.3086, "step": 33190 }, { "epoch": 0.6385167947226202, "grad_norm": 402.21970248440675, "learning_rate": 5.888541856239854e-06, "loss": 318.3952, "step": 33200 }, { "epoch": 0.63870911905838, "grad_norm": 416.30673265789903, "learning_rate": 5.882979183105959e-06, "loss": 323.8405, "step": 33210 }, { "epoch": 0.6389014433941399, "grad_norm": 388.146357708491, "learning_rate": 5.8774180434943184e-06, "loss": 320.0798, "step": 33220 }, { "epoch": 0.6390937677298997, "grad_norm": 441.4225023402244, "learning_rate": 5.871858439476374e-06, "loss": 323.9578, "step": 33230 }, { "epoch": 0.6392860920656596, "grad_norm": 478.43504781601206, "learning_rate": 5.86630037312298e-06, "loss": 321.9073, "step": 33240 }, { "epoch": 0.6394784164014193, "grad_norm": 483.573101210037, "learning_rate": 5.860743846504429e-06, "loss": 328.7776, "step": 33250 }, { "epoch": 0.6396707407371792, "grad_norm": 418.2961299977733, "learning_rate": 5.8551888616904305e-06, "loss": 325.4701, "step": 33260 }, { "epoch": 0.639863065072939, "grad_norm": 420.5245189112849, "learning_rate": 5.849635420750131e-06, "loss": 335.8387, "step": 33270 }, { "epoch": 0.6400553894086989, "grad_norm": 426.4050479463985, "learning_rate": 5.84408352575209e-06, "loss": 321.9759, "step": 33280 }, { "epoch": 0.6402477137444587, "grad_norm": 423.98399844741635, "learning_rate": 5.8385331787642956e-06, "loss": 315.5348, "step": 33290 }, { "epoch": 0.6404400380802184, "grad_norm": 435.67662918847105, "learning_rate": 5.8329843818541665e-06, "loss": 326.7342, "step": 33300 }, { "epoch": 0.6406323624159783, "grad_norm": 463.14679690394445, "learning_rate": 5.827437137088535e-06, "loss": 312.5115, "step": 33310 }, { "epoch": 0.6408246867517381, "grad_norm": 444.29042742403766, "learning_rate": 5.8218914465336585e-06, "loss": 321.6006, "step": 33320 }, { "epoch": 0.641017011087498, "grad_norm": 430.5522148589923, "learning_rate": 5.816347312255209e-06, "loss": 321.4361, "step": 33330 }, { "epoch": 0.6412093354232578, "grad_norm": 433.57878399989, "learning_rate": 5.810804736318295e-06, "loss": 315.8478, "step": 33340 }, { "epoch": 0.6414016597590176, "grad_norm": 410.5329670801972, "learning_rate": 5.805263720787426e-06, "loss": 313.4332, "step": 33350 }, { "epoch": 0.6415939840947774, "grad_norm": 413.8264513873521, "learning_rate": 5.799724267726547e-06, "loss": 331.2219, "step": 33360 }, { "epoch": 0.6417863084305373, "grad_norm": 434.498285955691, "learning_rate": 5.794186379199004e-06, "loss": 324.3416, "step": 33370 }, { "epoch": 0.6419786327662971, "grad_norm": 415.7685317419322, "learning_rate": 5.7886500572675774e-06, "loss": 331.7816, "step": 33380 }, { "epoch": 0.642170957102057, "grad_norm": 411.44326253029175, "learning_rate": 5.783115303994451e-06, "loss": 331.2956, "step": 33390 }, { "epoch": 0.6423632814378167, "grad_norm": 480.5991771259947, "learning_rate": 5.777582121441227e-06, "loss": 329.7617, "step": 33400 }, { "epoch": 0.6425556057735765, "grad_norm": 469.1966076991373, "learning_rate": 5.772050511668931e-06, "loss": 325.405, "step": 33410 }, { "epoch": 0.6427479301093364, "grad_norm": 430.2266558638055, "learning_rate": 5.766520476737993e-06, "loss": 329.6934, "step": 33420 }, { "epoch": 0.6429402544450962, "grad_norm": 417.9938475095403, "learning_rate": 5.760992018708253e-06, "loss": 321.3423, "step": 33430 }, { "epoch": 0.6431325787808561, "grad_norm": 445.7673609332607, "learning_rate": 5.755465139638983e-06, "loss": 308.9907, "step": 33440 }, { "epoch": 0.6433249031166158, "grad_norm": 432.2937425308705, "learning_rate": 5.749939841588846e-06, "loss": 333.224, "step": 33450 }, { "epoch": 0.6435172274523757, "grad_norm": 396.1072917640877, "learning_rate": 5.744416126615926e-06, "loss": 322.1843, "step": 33460 }, { "epoch": 0.6437095517881355, "grad_norm": 448.37514029819243, "learning_rate": 5.738893996777713e-06, "loss": 330.3824, "step": 33470 }, { "epoch": 0.6439018761238954, "grad_norm": 443.17029410102015, "learning_rate": 5.7333734541311144e-06, "loss": 319.5919, "step": 33480 }, { "epoch": 0.6440942004596552, "grad_norm": 612.602437334116, "learning_rate": 5.727854500732435e-06, "loss": 329.0085, "step": 33490 }, { "epoch": 0.644286524795415, "grad_norm": 395.1838202480761, "learning_rate": 5.722337138637402e-06, "loss": 317.8712, "step": 33500 }, { "epoch": 0.6444788491311748, "grad_norm": 408.5772804821038, "learning_rate": 5.716821369901131e-06, "loss": 322.3591, "step": 33510 }, { "epoch": 0.6446711734669346, "grad_norm": 437.4398299274796, "learning_rate": 5.711307196578166e-06, "loss": 318.0684, "step": 33520 }, { "epoch": 0.6448634978026945, "grad_norm": 400.2229432446763, "learning_rate": 5.70579462072244e-06, "loss": 319.5992, "step": 33530 }, { "epoch": 0.6450558221384542, "grad_norm": 427.599522981999, "learning_rate": 5.700283644387292e-06, "loss": 323.8386, "step": 33540 }, { "epoch": 0.6452481464742141, "grad_norm": 430.965032771086, "learning_rate": 5.6947742696254794e-06, "loss": 329.6379, "step": 33550 }, { "epoch": 0.6454404708099739, "grad_norm": 393.64273338958515, "learning_rate": 5.689266498489148e-06, "loss": 324.8614, "step": 33560 }, { "epoch": 0.6456327951457338, "grad_norm": 423.86460278823523, "learning_rate": 5.683760333029851e-06, "loss": 327.9987, "step": 33570 }, { "epoch": 0.6458251194814936, "grad_norm": 426.0800268488462, "learning_rate": 5.678255775298542e-06, "loss": 318.0062, "step": 33580 }, { "epoch": 0.6460174438172535, "grad_norm": 473.4428249994104, "learning_rate": 5.672752827345584e-06, "loss": 318.4216, "step": 33590 }, { "epoch": 0.6462097681530132, "grad_norm": 425.11221321286064, "learning_rate": 5.667251491220731e-06, "loss": 314.7529, "step": 33600 }, { "epoch": 0.6464020924887731, "grad_norm": 397.038250473068, "learning_rate": 5.661751768973136e-06, "loss": 318.8346, "step": 33610 }, { "epoch": 0.6465944168245329, "grad_norm": 470.6579011756659, "learning_rate": 5.656253662651362e-06, "loss": 328.6394, "step": 33620 }, { "epoch": 0.6467867411602927, "grad_norm": 434.3707844144886, "learning_rate": 5.650757174303356e-06, "loss": 313.9123, "step": 33630 }, { "epoch": 0.6469790654960526, "grad_norm": 454.15912114352625, "learning_rate": 5.645262305976476e-06, "loss": 325.0698, "step": 33640 }, { "epoch": 0.6471713898318123, "grad_norm": 394.343867723725, "learning_rate": 5.639769059717462e-06, "loss": 317.2573, "step": 33650 }, { "epoch": 0.6473637141675722, "grad_norm": 443.84290679732754, "learning_rate": 5.634277437572466e-06, "loss": 322.7038, "step": 33660 }, { "epoch": 0.647556038503332, "grad_norm": 426.55076482747074, "learning_rate": 5.6287874415870225e-06, "loss": 322.7959, "step": 33670 }, { "epoch": 0.6477483628390919, "grad_norm": 438.08260777866343, "learning_rate": 5.623299073806063e-06, "loss": 317.4958, "step": 33680 }, { "epoch": 0.6479406871748516, "grad_norm": 402.14329703250354, "learning_rate": 5.617812336273912e-06, "loss": 323.1414, "step": 33690 }, { "epoch": 0.6481330115106115, "grad_norm": 453.62004875708766, "learning_rate": 5.612327231034296e-06, "loss": 317.6323, "step": 33700 }, { "epoch": 0.6483253358463713, "grad_norm": 411.53354932360133, "learning_rate": 5.606843760130321e-06, "loss": 318.9939, "step": 33710 }, { "epoch": 0.6485176601821312, "grad_norm": 459.62900713929673, "learning_rate": 5.601361925604485e-06, "loss": 322.6615, "step": 33720 }, { "epoch": 0.648709984517891, "grad_norm": 429.6514691001418, "learning_rate": 5.595881729498691e-06, "loss": 316.6743, "step": 33730 }, { "epoch": 0.6489023088536507, "grad_norm": 392.2215857929651, "learning_rate": 5.590403173854215e-06, "loss": 326.778, "step": 33740 }, { "epoch": 0.6490946331894106, "grad_norm": 391.2446133210011, "learning_rate": 5.584926260711732e-06, "loss": 324.8535, "step": 33750 }, { "epoch": 0.6492869575251704, "grad_norm": 413.7997329191996, "learning_rate": 5.579450992111294e-06, "loss": 325.6451, "step": 33760 }, { "epoch": 0.6494792818609303, "grad_norm": 388.137565885144, "learning_rate": 5.573977370092358e-06, "loss": 314.3849, "step": 33770 }, { "epoch": 0.6496716061966901, "grad_norm": 442.3418880343542, "learning_rate": 5.568505396693749e-06, "loss": 325.2625, "step": 33780 }, { "epoch": 0.64986393053245, "grad_norm": 444.99577517429634, "learning_rate": 5.563035073953691e-06, "loss": 331.6391, "step": 33790 }, { "epoch": 0.6500562548682097, "grad_norm": 406.7391152014691, "learning_rate": 5.557566403909794e-06, "loss": 315.0927, "step": 33800 }, { "epoch": 0.6502485792039696, "grad_norm": 416.18513318889, "learning_rate": 5.552099388599042e-06, "loss": 327.7045, "step": 33810 }, { "epoch": 0.6504409035397294, "grad_norm": 435.6835734121358, "learning_rate": 5.5466340300578095e-06, "loss": 316.6872, "step": 33820 }, { "epoch": 0.6506332278754893, "grad_norm": 425.301982879258, "learning_rate": 5.541170330321845e-06, "loss": 321.9507, "step": 33830 }, { "epoch": 0.650825552211249, "grad_norm": 388.1779247762942, "learning_rate": 5.535708291426297e-06, "loss": 322.6335, "step": 33840 }, { "epoch": 0.6510178765470088, "grad_norm": 417.2170676311644, "learning_rate": 5.53024791540568e-06, "loss": 318.0769, "step": 33850 }, { "epoch": 0.6512102008827687, "grad_norm": 411.30815786731057, "learning_rate": 5.5247892042938944e-06, "loss": 314.6812, "step": 33860 }, { "epoch": 0.6514025252185285, "grad_norm": 426.40200860632825, "learning_rate": 5.519332160124215e-06, "loss": 321.7491, "step": 33870 }, { "epoch": 0.6515948495542884, "grad_norm": 461.55399752605183, "learning_rate": 5.513876784929311e-06, "loss": 314.5473, "step": 33880 }, { "epoch": 0.6517871738900481, "grad_norm": 412.7237247104228, "learning_rate": 5.5084230807412135e-06, "loss": 313.2229, "step": 33890 }, { "epoch": 0.651979498225808, "grad_norm": 471.67806684335244, "learning_rate": 5.502971049591332e-06, "loss": 328.9412, "step": 33900 }, { "epoch": 0.6521718225615678, "grad_norm": 407.6270918724697, "learning_rate": 5.497520693510469e-06, "loss": 321.9584, "step": 33910 }, { "epoch": 0.6523641468973277, "grad_norm": 391.1070096585722, "learning_rate": 5.492072014528783e-06, "loss": 326.1006, "step": 33920 }, { "epoch": 0.6525564712330875, "grad_norm": 422.2585049304015, "learning_rate": 5.4866250146758235e-06, "loss": 323.3586, "step": 33930 }, { "epoch": 0.6527487955688474, "grad_norm": 420.1783076752337, "learning_rate": 5.481179695980503e-06, "loss": 321.3275, "step": 33940 }, { "epoch": 0.6529411199046071, "grad_norm": 391.98385017473686, "learning_rate": 5.475736060471117e-06, "loss": 327.6176, "step": 33950 }, { "epoch": 0.6531334442403669, "grad_norm": 408.94302995629175, "learning_rate": 5.470294110175329e-06, "loss": 321.7637, "step": 33960 }, { "epoch": 0.6533257685761268, "grad_norm": 406.17111579506235, "learning_rate": 5.464853847120169e-06, "loss": 315.7161, "step": 33970 }, { "epoch": 0.6535180929118866, "grad_norm": 444.5009568534264, "learning_rate": 5.459415273332056e-06, "loss": 325.0126, "step": 33980 }, { "epoch": 0.6537104172476464, "grad_norm": 410.50299705956877, "learning_rate": 5.453978390836763e-06, "loss": 313.3642, "step": 33990 }, { "epoch": 0.6539027415834062, "grad_norm": 422.1007201084567, "learning_rate": 5.44854320165944e-06, "loss": 326.4321, "step": 34000 }, { "epoch": 0.6540950659191661, "grad_norm": 434.0990973610923, "learning_rate": 5.443109707824599e-06, "loss": 330.3254, "step": 34010 }, { "epoch": 0.6542873902549259, "grad_norm": 432.81951475895295, "learning_rate": 5.437677911356137e-06, "loss": 321.7455, "step": 34020 }, { "epoch": 0.6544797145906858, "grad_norm": 432.9774423501474, "learning_rate": 5.432247814277305e-06, "loss": 314.3346, "step": 34030 }, { "epoch": 0.6546720389264455, "grad_norm": 464.89339637834706, "learning_rate": 5.426819418610718e-06, "loss": 328.4793, "step": 34040 }, { "epoch": 0.6548643632622054, "grad_norm": 422.37295282356837, "learning_rate": 5.4213927263783725e-06, "loss": 313.1352, "step": 34050 }, { "epoch": 0.6550566875979652, "grad_norm": 406.7230782663906, "learning_rate": 5.415967739601616e-06, "loss": 312.4417, "step": 34060 }, { "epoch": 0.655249011933725, "grad_norm": 430.46783667465684, "learning_rate": 5.41054446030117e-06, "loss": 321.0382, "step": 34070 }, { "epoch": 0.6554413362694849, "grad_norm": 466.0152177198623, "learning_rate": 5.405122890497114e-06, "loss": 331.1604, "step": 34080 }, { "epoch": 0.6556336606052446, "grad_norm": 404.7552280434815, "learning_rate": 5.399703032208896e-06, "loss": 321.9568, "step": 34090 }, { "epoch": 0.6558259849410045, "grad_norm": 384.31666367720516, "learning_rate": 5.3942848874553235e-06, "loss": 319.4767, "step": 34100 }, { "epoch": 0.6560183092767643, "grad_norm": 403.78580996538915, "learning_rate": 5.388868458254565e-06, "loss": 310.9621, "step": 34110 }, { "epoch": 0.6562106336125242, "grad_norm": 412.74071685789914, "learning_rate": 5.3834537466241455e-06, "loss": 316.402, "step": 34120 }, { "epoch": 0.656402957948284, "grad_norm": 394.62577335859555, "learning_rate": 5.378040754580964e-06, "loss": 312.2696, "step": 34130 }, { "epoch": 0.6565952822840438, "grad_norm": 484.3278084268684, "learning_rate": 5.37262948414127e-06, "loss": 323.8389, "step": 34140 }, { "epoch": 0.6567876066198036, "grad_norm": 454.9515756714042, "learning_rate": 5.367219937320663e-06, "loss": 315.6518, "step": 34150 }, { "epoch": 0.6569799309555635, "grad_norm": 429.8524095947134, "learning_rate": 5.361812116134122e-06, "loss": 319.3386, "step": 34160 }, { "epoch": 0.6571722552913233, "grad_norm": 462.0170696979145, "learning_rate": 5.356406022595963e-06, "loss": 324.6877, "step": 34170 }, { "epoch": 0.6573645796270832, "grad_norm": 434.2684872638781, "learning_rate": 5.351001658719872e-06, "loss": 323.9905, "step": 34180 }, { "epoch": 0.6575569039628429, "grad_norm": 384.55361116744785, "learning_rate": 5.345599026518877e-06, "loss": 322.8377, "step": 34190 }, { "epoch": 0.6577492282986027, "grad_norm": 408.24448614574965, "learning_rate": 5.3401981280053745e-06, "loss": 329.8235, "step": 34200 }, { "epoch": 0.6579415526343626, "grad_norm": 412.46697449906543, "learning_rate": 5.334798965191115e-06, "loss": 319.9216, "step": 34210 }, { "epoch": 0.6581338769701224, "grad_norm": 453.2313210783639, "learning_rate": 5.329401540087188e-06, "loss": 316.3315, "step": 34220 }, { "epoch": 0.6583262013058823, "grad_norm": 410.4965679772034, "learning_rate": 5.3240058547040525e-06, "loss": 311.8159, "step": 34230 }, { "epoch": 0.658518525641642, "grad_norm": 418.2916279810732, "learning_rate": 5.318611911051512e-06, "loss": 317.0869, "step": 34240 }, { "epoch": 0.6587108499774019, "grad_norm": 398.0746003532323, "learning_rate": 5.313219711138717e-06, "loss": 321.4545, "step": 34250 }, { "epoch": 0.6589031743131617, "grad_norm": 435.32959353558465, "learning_rate": 5.307829256974173e-06, "loss": 314.1013, "step": 34260 }, { "epoch": 0.6590954986489216, "grad_norm": 412.71587127992666, "learning_rate": 5.302440550565739e-06, "loss": 307.7437, "step": 34270 }, { "epoch": 0.6592878229846814, "grad_norm": 417.5824587680408, "learning_rate": 5.297053593920618e-06, "loss": 310.0909, "step": 34280 }, { "epoch": 0.6594801473204412, "grad_norm": 427.7726785260718, "learning_rate": 5.291668389045362e-06, "loss": 318.7255, "step": 34290 }, { "epoch": 0.659672471656201, "grad_norm": 404.77648442538276, "learning_rate": 5.286284937945866e-06, "loss": 322.9675, "step": 34300 }, { "epoch": 0.6598647959919608, "grad_norm": 419.15125676750876, "learning_rate": 5.280903242627384e-06, "loss": 324.9051, "step": 34310 }, { "epoch": 0.6600571203277207, "grad_norm": 700.4601942372726, "learning_rate": 5.2755233050945076e-06, "loss": 323.9069, "step": 34320 }, { "epoch": 0.6602494446634805, "grad_norm": 440.5554254683184, "learning_rate": 5.270145127351168e-06, "loss": 329.7043, "step": 34330 }, { "epoch": 0.6604417689992403, "grad_norm": 452.4434197686861, "learning_rate": 5.264768711400656e-06, "loss": 319.6636, "step": 34340 }, { "epoch": 0.6606340933350001, "grad_norm": 439.113167807011, "learning_rate": 5.25939405924559e-06, "loss": 321.309, "step": 34350 }, { "epoch": 0.66082641767076, "grad_norm": 419.5085140287832, "learning_rate": 5.254021172887947e-06, "loss": 328.4319, "step": 34360 }, { "epoch": 0.6610187420065198, "grad_norm": 420.4206765441334, "learning_rate": 5.248650054329032e-06, "loss": 327.2183, "step": 34370 }, { "epoch": 0.6612110663422797, "grad_norm": 406.7490039045254, "learning_rate": 5.2432807055695035e-06, "loss": 328.0459, "step": 34380 }, { "epoch": 0.6614033906780394, "grad_norm": 424.36551611046895, "learning_rate": 5.237913128609352e-06, "loss": 323.0216, "step": 34390 }, { "epoch": 0.6615957150137993, "grad_norm": 437.52935730821406, "learning_rate": 5.232547325447908e-06, "loss": 325.0063, "step": 34400 }, { "epoch": 0.6617880393495591, "grad_norm": 437.01287618092044, "learning_rate": 5.227183298083854e-06, "loss": 318.581, "step": 34410 }, { "epoch": 0.6619803636853189, "grad_norm": 448.95864817232075, "learning_rate": 5.221821048515193e-06, "loss": 321.3659, "step": 34420 }, { "epoch": 0.6621726880210788, "grad_norm": 423.5284873839197, "learning_rate": 5.216460578739278e-06, "loss": 315.2833, "step": 34430 }, { "epoch": 0.6623650123568385, "grad_norm": 409.9628990423758, "learning_rate": 5.211101890752792e-06, "loss": 314.3752, "step": 34440 }, { "epoch": 0.6625573366925984, "grad_norm": 440.23287487525386, "learning_rate": 5.205744986551763e-06, "loss": 317.4021, "step": 34450 }, { "epoch": 0.6627496610283582, "grad_norm": 419.76651942099187, "learning_rate": 5.200389868131547e-06, "loss": 330.2907, "step": 34460 }, { "epoch": 0.6629419853641181, "grad_norm": 422.6388629850351, "learning_rate": 5.195036537486833e-06, "loss": 328.2027, "step": 34470 }, { "epoch": 0.6631343096998779, "grad_norm": 401.3370957544165, "learning_rate": 5.189684996611657e-06, "loss": 328.9475, "step": 34480 }, { "epoch": 0.6633266340356377, "grad_norm": 389.84335812195116, "learning_rate": 5.18433524749937e-06, "loss": 313.9279, "step": 34490 }, { "epoch": 0.6635189583713975, "grad_norm": 447.09148903429923, "learning_rate": 5.178987292142674e-06, "loss": 334.1997, "step": 34500 }, { "epoch": 0.6637112827071574, "grad_norm": 408.0765166723503, "learning_rate": 5.173641132533586e-06, "loss": 320.0878, "step": 34510 }, { "epoch": 0.6639036070429172, "grad_norm": 451.49801340447186, "learning_rate": 5.16829677066347e-06, "loss": 329.4619, "step": 34520 }, { "epoch": 0.664095931378677, "grad_norm": 463.31000007246547, "learning_rate": 5.16295420852301e-06, "loss": 317.359, "step": 34530 }, { "epoch": 0.6642882557144368, "grad_norm": 425.00419191410685, "learning_rate": 5.157613448102219e-06, "loss": 322.7488, "step": 34540 }, { "epoch": 0.6644805800501966, "grad_norm": 403.49033485417215, "learning_rate": 5.152274491390441e-06, "loss": 319.9063, "step": 34550 }, { "epoch": 0.6646729043859565, "grad_norm": 458.3740647040998, "learning_rate": 5.1469373403763555e-06, "loss": 326.2286, "step": 34560 }, { "epoch": 0.6648652287217163, "grad_norm": 406.11874601058656, "learning_rate": 5.14160199704796e-06, "loss": 320.0305, "step": 34570 }, { "epoch": 0.6650575530574762, "grad_norm": 448.023791067687, "learning_rate": 5.136268463392578e-06, "loss": 316.2592, "step": 34580 }, { "epoch": 0.6652498773932359, "grad_norm": 461.41336024953245, "learning_rate": 5.13093674139687e-06, "loss": 319.7057, "step": 34590 }, { "epoch": 0.6654422017289958, "grad_norm": 409.72632238289253, "learning_rate": 5.12560683304681e-06, "loss": 313.9694, "step": 34600 }, { "epoch": 0.6656345260647556, "grad_norm": 407.0339279205974, "learning_rate": 5.120278740327702e-06, "loss": 316.8297, "step": 34610 }, { "epoch": 0.6658268504005155, "grad_norm": 443.27103645300116, "learning_rate": 5.114952465224168e-06, "loss": 319.7591, "step": 34620 }, { "epoch": 0.6660191747362753, "grad_norm": 434.1902683370954, "learning_rate": 5.109628009720162e-06, "loss": 330.3795, "step": 34630 }, { "epoch": 0.666211499072035, "grad_norm": 457.3037224180103, "learning_rate": 5.104305375798958e-06, "loss": 323.4473, "step": 34640 }, { "epoch": 0.6664038234077949, "grad_norm": 434.7740603021931, "learning_rate": 5.098984565443148e-06, "loss": 327.71, "step": 34650 }, { "epoch": 0.6665961477435547, "grad_norm": 445.9017906778416, "learning_rate": 5.093665580634639e-06, "loss": 313.9246, "step": 34660 }, { "epoch": 0.6667884720793146, "grad_norm": 442.4596365556121, "learning_rate": 5.088348423354674e-06, "loss": 314.1189, "step": 34670 }, { "epoch": 0.6669807964150744, "grad_norm": 426.5848077492119, "learning_rate": 5.0830330955838045e-06, "loss": 324.6211, "step": 34680 }, { "epoch": 0.6671731207508342, "grad_norm": 414.6957175063593, "learning_rate": 5.077719599301895e-06, "loss": 320.5044, "step": 34690 }, { "epoch": 0.667365445086594, "grad_norm": 450.4426821588187, "learning_rate": 5.072407936488145e-06, "loss": 339.2428, "step": 34700 }, { "epoch": 0.6675577694223539, "grad_norm": 429.1245757343828, "learning_rate": 5.067098109121058e-06, "loss": 319.8875, "step": 34710 }, { "epoch": 0.6677500937581137, "grad_norm": 434.6340471275704, "learning_rate": 5.0617901191784536e-06, "loss": 321.0468, "step": 34720 }, { "epoch": 0.6679424180938736, "grad_norm": 414.75166287316534, "learning_rate": 5.05648396863747e-06, "loss": 318.6448, "step": 34730 }, { "epoch": 0.6681347424296333, "grad_norm": 441.0143680065906, "learning_rate": 5.051179659474568e-06, "loss": 322.9612, "step": 34740 }, { "epoch": 0.6683270667653931, "grad_norm": 422.7741616526519, "learning_rate": 5.045877193665508e-06, "loss": 321.4521, "step": 34750 }, { "epoch": 0.668519391101153, "grad_norm": 433.93211677484453, "learning_rate": 5.040576573185372e-06, "loss": 320.7336, "step": 34760 }, { "epoch": 0.6687117154369128, "grad_norm": 447.1348325928919, "learning_rate": 5.0352778000085565e-06, "loss": 319.1764, "step": 34770 }, { "epoch": 0.6689040397726727, "grad_norm": 420.30364596003307, "learning_rate": 5.029980876108762e-06, "loss": 325.4064, "step": 34780 }, { "epoch": 0.6690963641084324, "grad_norm": 379.8542432665582, "learning_rate": 5.024685803459013e-06, "loss": 314.0862, "step": 34790 }, { "epoch": 0.6692886884441923, "grad_norm": 399.0285316648089, "learning_rate": 5.019392584031628e-06, "loss": 310.9487, "step": 34800 }, { "epoch": 0.6694810127799521, "grad_norm": 447.18871457515337, "learning_rate": 5.0141012197982534e-06, "loss": 329.5792, "step": 34810 }, { "epoch": 0.669673337115712, "grad_norm": 429.254181904457, "learning_rate": 5.0088117127298285e-06, "loss": 317.2112, "step": 34820 }, { "epoch": 0.6698656614514717, "grad_norm": 412.40775690696927, "learning_rate": 5.003524064796608e-06, "loss": 321.0009, "step": 34830 }, { "epoch": 0.6700579857872316, "grad_norm": 428.9910498813792, "learning_rate": 4.998238277968149e-06, "loss": 322.7637, "step": 34840 }, { "epoch": 0.6702503101229914, "grad_norm": 443.3119990918627, "learning_rate": 4.992954354213331e-06, "loss": 314.5496, "step": 34850 }, { "epoch": 0.6704426344587512, "grad_norm": 497.6257895821623, "learning_rate": 4.98767229550032e-06, "loss": 327.0348, "step": 34860 }, { "epoch": 0.6706349587945111, "grad_norm": 436.69055454832454, "learning_rate": 4.982392103796595e-06, "loss": 315.5988, "step": 34870 }, { "epoch": 0.6708272831302708, "grad_norm": 427.79002311571855, "learning_rate": 4.977113781068945e-06, "loss": 326.8617, "step": 34880 }, { "epoch": 0.6710196074660307, "grad_norm": 405.54540379520216, "learning_rate": 4.971837329283458e-06, "loss": 323.7293, "step": 34890 }, { "epoch": 0.6712119318017905, "grad_norm": 415.8029035353139, "learning_rate": 4.966562750405517e-06, "loss": 330.4927, "step": 34900 }, { "epoch": 0.6714042561375504, "grad_norm": 461.77870002641004, "learning_rate": 4.9612900463998274e-06, "loss": 318.3586, "step": 34910 }, { "epoch": 0.6715965804733102, "grad_norm": 447.17270875950743, "learning_rate": 4.9560192192303735e-06, "loss": 330.4827, "step": 34920 }, { "epoch": 0.67178890480907, "grad_norm": 436.8488392769194, "learning_rate": 4.95075027086046e-06, "loss": 316.7468, "step": 34930 }, { "epoch": 0.6719812291448298, "grad_norm": 434.0188965839647, "learning_rate": 4.9454832032526755e-06, "loss": 328.8788, "step": 34940 }, { "epoch": 0.6721735534805897, "grad_norm": 415.7735931947023, "learning_rate": 4.940218018368924e-06, "loss": 323.4168, "step": 34950 }, { "epoch": 0.6723658778163495, "grad_norm": 413.5492003926558, "learning_rate": 4.934954718170396e-06, "loss": 318.2195, "step": 34960 }, { "epoch": 0.6725582021521093, "grad_norm": 418.6289685293497, "learning_rate": 4.9296933046175834e-06, "loss": 312.7955, "step": 34970 }, { "epoch": 0.6727505264878691, "grad_norm": 435.4569139195881, "learning_rate": 4.924433779670271e-06, "loss": 311.7696, "step": 34980 }, { "epoch": 0.6729428508236289, "grad_norm": 437.9520875898177, "learning_rate": 4.9191761452875554e-06, "loss": 328.6258, "step": 34990 }, { "epoch": 0.6731351751593888, "grad_norm": 433.1435276509337, "learning_rate": 4.913920403427812e-06, "loss": 317.9559, "step": 35000 }, { "epoch": 0.6733274994951486, "grad_norm": 445.6820626253493, "learning_rate": 4.908666556048719e-06, "loss": 314.5841, "step": 35010 }, { "epoch": 0.6735198238309085, "grad_norm": 384.0392196330658, "learning_rate": 4.903414605107244e-06, "loss": 317.4141, "step": 35020 }, { "epoch": 0.6737121481666682, "grad_norm": 416.9583123576296, "learning_rate": 4.89816455255966e-06, "loss": 317.8913, "step": 35030 }, { "epoch": 0.6739044725024281, "grad_norm": 430.47849034999916, "learning_rate": 4.892916400361516e-06, "loss": 321.4291, "step": 35040 }, { "epoch": 0.6740967968381879, "grad_norm": 422.74257258075477, "learning_rate": 4.887670150467671e-06, "loss": 320.9816, "step": 35050 }, { "epoch": 0.6742891211739478, "grad_norm": 415.55334900158067, "learning_rate": 4.882425804832258e-06, "loss": 320.2086, "step": 35060 }, { "epoch": 0.6744814455097076, "grad_norm": 1750.8438591972854, "learning_rate": 4.8771833654087165e-06, "loss": 318.8852, "step": 35070 }, { "epoch": 0.6746737698454673, "grad_norm": 452.511906008904, "learning_rate": 4.8719428341497665e-06, "loss": 320.3105, "step": 35080 }, { "epoch": 0.6748660941812272, "grad_norm": 414.2766381834859, "learning_rate": 4.866704213007413e-06, "loss": 324.9075, "step": 35090 }, { "epoch": 0.675058418516987, "grad_norm": 440.11892878794276, "learning_rate": 4.861467503932965e-06, "loss": 323.142, "step": 35100 }, { "epoch": 0.6752507428527469, "grad_norm": 386.32848106126545, "learning_rate": 4.856232708877007e-06, "loss": 312.7934, "step": 35110 }, { "epoch": 0.6754430671885067, "grad_norm": 463.09324815980216, "learning_rate": 4.8509998297894075e-06, "loss": 325.0859, "step": 35120 }, { "epoch": 0.6756353915242665, "grad_norm": 454.8062208909201, "learning_rate": 4.845768868619338e-06, "loss": 318.9287, "step": 35130 }, { "epoch": 0.6758277158600263, "grad_norm": 453.26999025394167, "learning_rate": 4.8405398273152405e-06, "loss": 327.3516, "step": 35140 }, { "epoch": 0.6760200401957862, "grad_norm": 434.3070018409594, "learning_rate": 4.8353127078248455e-06, "loss": 334.8855, "step": 35150 }, { "epoch": 0.676212364531546, "grad_norm": 427.8667220911036, "learning_rate": 4.830087512095164e-06, "loss": 315.9448, "step": 35160 }, { "epoch": 0.6764046888673059, "grad_norm": 424.11467993643754, "learning_rate": 4.824864242072506e-06, "loss": 317.7206, "step": 35170 }, { "epoch": 0.6765970132030656, "grad_norm": 468.14699644869796, "learning_rate": 4.819642899702447e-06, "loss": 320.1482, "step": 35180 }, { "epoch": 0.6767893375388255, "grad_norm": 416.84003049414696, "learning_rate": 4.814423486929846e-06, "loss": 318.9567, "step": 35190 }, { "epoch": 0.6769816618745853, "grad_norm": 450.1386803380564, "learning_rate": 4.809206005698856e-06, "loss": 315.3346, "step": 35200 }, { "epoch": 0.6771739862103451, "grad_norm": 408.23576990355565, "learning_rate": 4.803990457952903e-06, "loss": 332.8654, "step": 35210 }, { "epoch": 0.677366310546105, "grad_norm": 426.6508150301355, "learning_rate": 4.798776845634688e-06, "loss": 315.4395, "step": 35220 }, { "epoch": 0.6775586348818647, "grad_norm": 427.3549892373833, "learning_rate": 4.793565170686196e-06, "loss": 324.9042, "step": 35230 }, { "epoch": 0.6777509592176246, "grad_norm": 471.2052981869491, "learning_rate": 4.7883554350486925e-06, "loss": 327.6963, "step": 35240 }, { "epoch": 0.6779432835533844, "grad_norm": 408.1241504944268, "learning_rate": 4.783147640662716e-06, "loss": 313.6297, "step": 35250 }, { "epoch": 0.6781356078891443, "grad_norm": 484.14036270031346, "learning_rate": 4.777941789468086e-06, "loss": 318.3194, "step": 35260 }, { "epoch": 0.6783279322249041, "grad_norm": 436.8865340266188, "learning_rate": 4.772737883403889e-06, "loss": 329.4075, "step": 35270 }, { "epoch": 0.678520256560664, "grad_norm": 452.4959936775624, "learning_rate": 4.767535924408504e-06, "loss": 323.2735, "step": 35280 }, { "epoch": 0.6787125808964237, "grad_norm": 413.2911630874319, "learning_rate": 4.76233591441957e-06, "loss": 322.1622, "step": 35290 }, { "epoch": 0.6789049052321836, "grad_norm": 455.08512498410465, "learning_rate": 4.757137855374001e-06, "loss": 315.3964, "step": 35300 }, { "epoch": 0.6790972295679434, "grad_norm": 438.72567379585894, "learning_rate": 4.751941749207996e-06, "loss": 324.6634, "step": 35310 }, { "epoch": 0.6792895539037032, "grad_norm": 389.61972316237666, "learning_rate": 4.746747597857014e-06, "loss": 318.8967, "step": 35320 }, { "epoch": 0.679481878239463, "grad_norm": 439.1397239993643, "learning_rate": 4.74155540325579e-06, "loss": 321.9838, "step": 35330 }, { "epoch": 0.6796742025752228, "grad_norm": 459.8977558007983, "learning_rate": 4.736365167338333e-06, "loss": 331.2183, "step": 35340 }, { "epoch": 0.6798665269109827, "grad_norm": 426.0949228352632, "learning_rate": 4.731176892037918e-06, "loss": 315.3601, "step": 35350 }, { "epoch": 0.6800588512467425, "grad_norm": 390.0340840659755, "learning_rate": 4.725990579287097e-06, "loss": 313.4447, "step": 35360 }, { "epoch": 0.6802511755825024, "grad_norm": 456.2995428175703, "learning_rate": 4.720806231017676e-06, "loss": 322.5771, "step": 35370 }, { "epoch": 0.6804434999182621, "grad_norm": 441.44460948213367, "learning_rate": 4.7156238491607506e-06, "loss": 321.9458, "step": 35380 }, { "epoch": 0.680635824254022, "grad_norm": 430.9441742966411, "learning_rate": 4.710443435646666e-06, "loss": 316.4179, "step": 35390 }, { "epoch": 0.6808281485897818, "grad_norm": 435.88259915838825, "learning_rate": 4.705264992405043e-06, "loss": 316.0891, "step": 35400 }, { "epoch": 0.6810204729255417, "grad_norm": 416.05257552492986, "learning_rate": 4.700088521364761e-06, "loss": 316.9074, "step": 35410 }, { "epoch": 0.6812127972613015, "grad_norm": 428.28329795980795, "learning_rate": 4.694914024453977e-06, "loss": 322.7706, "step": 35420 }, { "epoch": 0.6814051215970612, "grad_norm": 459.55013655554416, "learning_rate": 4.689741503600103e-06, "loss": 321.8156, "step": 35430 }, { "epoch": 0.6815974459328211, "grad_norm": 479.46912568338354, "learning_rate": 4.684570960729818e-06, "loss": 320.3851, "step": 35440 }, { "epoch": 0.6817897702685809, "grad_norm": 422.8128373242532, "learning_rate": 4.679402397769057e-06, "loss": 314.1304, "step": 35450 }, { "epoch": 0.6819820946043408, "grad_norm": 441.5462664805623, "learning_rate": 4.674235816643035e-06, "loss": 332.4933, "step": 35460 }, { "epoch": 0.6821744189401006, "grad_norm": 384.86895803155187, "learning_rate": 4.66907121927621e-06, "loss": 317.1574, "step": 35470 }, { "epoch": 0.6823667432758604, "grad_norm": 410.05636297872826, "learning_rate": 4.663908607592316e-06, "loss": 314.3527, "step": 35480 }, { "epoch": 0.6825590676116202, "grad_norm": 425.6598640479102, "learning_rate": 4.658747983514334e-06, "loss": 308.4277, "step": 35490 }, { "epoch": 0.6827513919473801, "grad_norm": 452.20897191454713, "learning_rate": 4.653589348964517e-06, "loss": 318.5503, "step": 35500 }, { "epoch": 0.6829437162831399, "grad_norm": 427.8196403112476, "learning_rate": 4.648432705864369e-06, "loss": 342.3499, "step": 35510 }, { "epoch": 0.6831360406188998, "grad_norm": 442.14298183885467, "learning_rate": 4.64327805613465e-06, "loss": 328.952, "step": 35520 }, { "epoch": 0.6833283649546595, "grad_norm": 447.62615924007883, "learning_rate": 4.638125401695391e-06, "loss": 316.3195, "step": 35530 }, { "epoch": 0.6835206892904193, "grad_norm": 442.65206444983187, "learning_rate": 4.632974744465865e-06, "loss": 325.5538, "step": 35540 }, { "epoch": 0.6837130136261792, "grad_norm": 423.9748888459649, "learning_rate": 4.627826086364603e-06, "loss": 328.1231, "step": 35550 }, { "epoch": 0.683905337961939, "grad_norm": 448.42457965304294, "learning_rate": 4.622679429309404e-06, "loss": 321.4165, "step": 35560 }, { "epoch": 0.6840976622976989, "grad_norm": 410.47197474996636, "learning_rate": 4.617534775217307e-06, "loss": 309.5952, "step": 35570 }, { "epoch": 0.6842899866334586, "grad_norm": 413.0516274306436, "learning_rate": 4.6123921260046135e-06, "loss": 315.1094, "step": 35580 }, { "epoch": 0.6844823109692185, "grad_norm": 411.0255269052447, "learning_rate": 4.607251483586869e-06, "loss": 326.479, "step": 35590 }, { "epoch": 0.6846746353049783, "grad_norm": 392.1154093571456, "learning_rate": 4.6021128498788855e-06, "loss": 311.27, "step": 35600 }, { "epoch": 0.6848669596407382, "grad_norm": 394.2181861863902, "learning_rate": 4.596976226794718e-06, "loss": 312.2333, "step": 35610 }, { "epoch": 0.685059283976498, "grad_norm": 407.61710030899235, "learning_rate": 4.591841616247669e-06, "loss": 326.9352, "step": 35620 }, { "epoch": 0.6852516083122578, "grad_norm": 437.2303815803078, "learning_rate": 4.586709020150297e-06, "loss": 317.9281, "step": 35630 }, { "epoch": 0.6854439326480176, "grad_norm": 432.15993690773496, "learning_rate": 4.581578440414417e-06, "loss": 314.4608, "step": 35640 }, { "epoch": 0.6856362569837774, "grad_norm": 417.4586990853319, "learning_rate": 4.576449878951079e-06, "loss": 323.0189, "step": 35650 }, { "epoch": 0.6858285813195373, "grad_norm": 468.54385730553463, "learning_rate": 4.571323337670584e-06, "loss": 318.4184, "step": 35660 }, { "epoch": 0.686020905655297, "grad_norm": 382.41200637957206, "learning_rate": 4.566198818482494e-06, "loss": 322.2061, "step": 35670 }, { "epoch": 0.6862132299910569, "grad_norm": 400.2019477047444, "learning_rate": 4.5610763232956e-06, "loss": 322.0258, "step": 35680 }, { "epoch": 0.6864055543268167, "grad_norm": 441.2491479190202, "learning_rate": 4.55595585401795e-06, "loss": 330.1082, "step": 35690 }, { "epoch": 0.6865978786625766, "grad_norm": 470.1692026415292, "learning_rate": 4.550837412556828e-06, "loss": 332.463, "step": 35700 }, { "epoch": 0.6867902029983364, "grad_norm": 438.80469802375, "learning_rate": 4.545721000818778e-06, "loss": 330.1236, "step": 35710 }, { "epoch": 0.6869825273340963, "grad_norm": 421.0891993596618, "learning_rate": 4.540606620709575e-06, "loss": 336.9304, "step": 35720 }, { "epoch": 0.687174851669856, "grad_norm": 416.42238642172526, "learning_rate": 4.535494274134236e-06, "loss": 329.0043, "step": 35730 }, { "epoch": 0.6873671760056159, "grad_norm": 424.04608668078015, "learning_rate": 4.530383962997036e-06, "loss": 323.8193, "step": 35740 }, { "epoch": 0.6875595003413757, "grad_norm": 437.76803575672636, "learning_rate": 4.525275689201476e-06, "loss": 333.0717, "step": 35750 }, { "epoch": 0.6877518246771355, "grad_norm": 462.1188378558478, "learning_rate": 4.520169454650299e-06, "loss": 325.3063, "step": 35760 }, { "epoch": 0.6879441490128954, "grad_norm": 461.56490783765344, "learning_rate": 4.515065261245498e-06, "loss": 326.2405, "step": 35770 }, { "epoch": 0.6881364733486551, "grad_norm": 432.38445745970904, "learning_rate": 4.509963110888305e-06, "loss": 320.8102, "step": 35780 }, { "epoch": 0.688328797684415, "grad_norm": 422.6874764980561, "learning_rate": 4.504863005479182e-06, "loss": 312.7877, "step": 35790 }, { "epoch": 0.6885211220201748, "grad_norm": 456.0143171697234, "learning_rate": 4.499764946917834e-06, "loss": 325.3387, "step": 35800 }, { "epoch": 0.6887134463559347, "grad_norm": 417.593090752741, "learning_rate": 4.494668937103201e-06, "loss": 322.0784, "step": 35810 }, { "epoch": 0.6889057706916945, "grad_norm": 410.56786109087756, "learning_rate": 4.489574977933467e-06, "loss": 316.2785, "step": 35820 }, { "epoch": 0.6890980950274543, "grad_norm": 422.9725731446824, "learning_rate": 4.484483071306048e-06, "loss": 329.7869, "step": 35830 }, { "epoch": 0.6892904193632141, "grad_norm": 390.9590335764464, "learning_rate": 4.479393219117588e-06, "loss": 315.1229, "step": 35840 }, { "epoch": 0.689482743698974, "grad_norm": 389.16371732470805, "learning_rate": 4.474305423263984e-06, "loss": 318.954, "step": 35850 }, { "epoch": 0.6896750680347338, "grad_norm": 469.5618475205626, "learning_rate": 4.469219685640348e-06, "loss": 312.1851, "step": 35860 }, { "epoch": 0.6898673923704935, "grad_norm": 434.07403473881715, "learning_rate": 4.4641360081410356e-06, "loss": 321.7083, "step": 35870 }, { "epoch": 0.6900597167062534, "grad_norm": 452.0394084384201, "learning_rate": 4.4590543926596285e-06, "loss": 327.3819, "step": 35880 }, { "epoch": 0.6902520410420132, "grad_norm": 420.01793573626543, "learning_rate": 4.453974841088953e-06, "loss": 326.828, "step": 35890 }, { "epoch": 0.6904443653777731, "grad_norm": 432.57851679897203, "learning_rate": 4.448897355321049e-06, "loss": 320.2067, "step": 35900 }, { "epoch": 0.6906366897135329, "grad_norm": 400.5118038017992, "learning_rate": 4.443821937247205e-06, "loss": 319.4693, "step": 35910 }, { "epoch": 0.6908290140492928, "grad_norm": 453.0344500732784, "learning_rate": 4.438748588757922e-06, "loss": 320.6646, "step": 35920 }, { "epoch": 0.6910213383850525, "grad_norm": 402.8498358206257, "learning_rate": 4.433677311742945e-06, "loss": 317.5354, "step": 35930 }, { "epoch": 0.6912136627208124, "grad_norm": 421.0047258099239, "learning_rate": 4.428608108091241e-06, "loss": 316.8761, "step": 35940 }, { "epoch": 0.6914059870565722, "grad_norm": 434.6702319251514, "learning_rate": 4.4235409796909965e-06, "loss": 318.32, "step": 35950 }, { "epoch": 0.6915983113923321, "grad_norm": 453.69007174373684, "learning_rate": 4.418475928429644e-06, "loss": 318.7631, "step": 35960 }, { "epoch": 0.6917906357280919, "grad_norm": 427.19543539884825, "learning_rate": 4.413412956193826e-06, "loss": 331.0233, "step": 35970 }, { "epoch": 0.6919829600638516, "grad_norm": 462.50477600824416, "learning_rate": 4.4083520648694156e-06, "loss": 309.2859, "step": 35980 }, { "epoch": 0.6921752843996115, "grad_norm": 407.72338468544166, "learning_rate": 4.4032932563415075e-06, "loss": 317.1252, "step": 35990 }, { "epoch": 0.6923676087353713, "grad_norm": 410.47854713205356, "learning_rate": 4.398236532494434e-06, "loss": 318.1686, "step": 36000 }, { "epoch": 0.6925599330711312, "grad_norm": 426.3098330289822, "learning_rate": 4.393181895211735e-06, "loss": 330.8465, "step": 36010 }, { "epoch": 0.692752257406891, "grad_norm": 432.56987064272624, "learning_rate": 4.388129346376177e-06, "loss": 319.1718, "step": 36020 }, { "epoch": 0.6929445817426508, "grad_norm": 422.49937614367343, "learning_rate": 4.383078887869759e-06, "loss": 314.139, "step": 36030 }, { "epoch": 0.6931369060784106, "grad_norm": 393.68416366476873, "learning_rate": 4.378030521573683e-06, "loss": 312.853, "step": 36040 }, { "epoch": 0.6933292304141705, "grad_norm": 433.5787346913957, "learning_rate": 4.372984249368393e-06, "loss": 319.1204, "step": 36050 }, { "epoch": 0.6935215547499303, "grad_norm": 404.5171984953886, "learning_rate": 4.367940073133533e-06, "loss": 316.0725, "step": 36060 }, { "epoch": 0.6937138790856902, "grad_norm": 406.25670450214375, "learning_rate": 4.362897994747982e-06, "loss": 318.8176, "step": 36070 }, { "epoch": 0.6939062034214499, "grad_norm": 452.1534131155957, "learning_rate": 4.35785801608983e-06, "loss": 310.5954, "step": 36080 }, { "epoch": 0.6940985277572097, "grad_norm": 421.65515297007295, "learning_rate": 4.352820139036379e-06, "loss": 318.6872, "step": 36090 }, { "epoch": 0.6942908520929696, "grad_norm": 421.3804530036144, "learning_rate": 4.347784365464163e-06, "loss": 316.7017, "step": 36100 }, { "epoch": 0.6944831764287294, "grad_norm": 397.0646841525965, "learning_rate": 4.342750697248922e-06, "loss": 321.6771, "step": 36110 }, { "epoch": 0.6946755007644893, "grad_norm": 422.6723280240146, "learning_rate": 4.337719136265614e-06, "loss": 309.0128, "step": 36120 }, { "epoch": 0.694867825100249, "grad_norm": 415.55311502447444, "learning_rate": 4.332689684388408e-06, "loss": 313.8722, "step": 36130 }, { "epoch": 0.6950601494360089, "grad_norm": 423.4773718166429, "learning_rate": 4.327662343490701e-06, "loss": 317.5374, "step": 36140 }, { "epoch": 0.6952524737717687, "grad_norm": 452.1517974212016, "learning_rate": 4.322637115445088e-06, "loss": 328.0838, "step": 36150 }, { "epoch": 0.6954447981075286, "grad_norm": 426.1388858511226, "learning_rate": 4.3176140021233845e-06, "loss": 317.6973, "step": 36160 }, { "epoch": 0.6956371224432883, "grad_norm": 429.45168713059724, "learning_rate": 4.312593005396615e-06, "loss": 320.0352, "step": 36170 }, { "epoch": 0.6958294467790482, "grad_norm": 403.3143830262678, "learning_rate": 4.307574127135022e-06, "loss": 313.7953, "step": 36180 }, { "epoch": 0.696021771114808, "grad_norm": 436.50226966201575, "learning_rate": 4.3025573692080516e-06, "loss": 320.5324, "step": 36190 }, { "epoch": 0.6962140954505678, "grad_norm": 453.66195104880705, "learning_rate": 4.297542733484364e-06, "loss": 314.4829, "step": 36200 }, { "epoch": 0.6964064197863277, "grad_norm": 405.5417394794248, "learning_rate": 4.292530221831832e-06, "loss": 321.7942, "step": 36210 }, { "epoch": 0.6965987441220874, "grad_norm": 404.73616107383924, "learning_rate": 4.2875198361175305e-06, "loss": 318.7937, "step": 36220 }, { "epoch": 0.6967910684578473, "grad_norm": 399.56937900877875, "learning_rate": 4.282511578207746e-06, "loss": 314.5185, "step": 36230 }, { "epoch": 0.6969833927936071, "grad_norm": 406.6588756610703, "learning_rate": 4.277505449967967e-06, "loss": 322.8663, "step": 36240 }, { "epoch": 0.697175717129367, "grad_norm": 402.17404522228526, "learning_rate": 4.2725014532629015e-06, "loss": 319.3661, "step": 36250 }, { "epoch": 0.6973680414651268, "grad_norm": 452.1472078977409, "learning_rate": 4.267499589956453e-06, "loss": 319.5087, "step": 36260 }, { "epoch": 0.6975603658008867, "grad_norm": 403.30188694757953, "learning_rate": 4.262499861911727e-06, "loss": 326.616, "step": 36270 }, { "epoch": 0.6977526901366464, "grad_norm": 419.7092005627183, "learning_rate": 4.257502270991048e-06, "loss": 319.4938, "step": 36280 }, { "epoch": 0.6979450144724063, "grad_norm": 419.52135988075264, "learning_rate": 4.252506819055934e-06, "loss": 323.4875, "step": 36290 }, { "epoch": 0.6981373388081661, "grad_norm": 427.88695466954175, "learning_rate": 4.2475135079671045e-06, "loss": 319.5693, "step": 36300 }, { "epoch": 0.698329663143926, "grad_norm": 393.3490937110835, "learning_rate": 4.242522339584486e-06, "loss": 313.5424, "step": 36310 }, { "epoch": 0.6985219874796857, "grad_norm": 409.02350478239816, "learning_rate": 4.2375333157672114e-06, "loss": 319.2571, "step": 36320 }, { "epoch": 0.6987143118154455, "grad_norm": 423.795398104954, "learning_rate": 4.232546438373604e-06, "loss": 318.6126, "step": 36330 }, { "epoch": 0.6989066361512054, "grad_norm": 435.46200281753823, "learning_rate": 4.227561709261198e-06, "loss": 311.0569, "step": 36340 }, { "epoch": 0.6990989604869652, "grad_norm": 397.6911741704681, "learning_rate": 4.222579130286716e-06, "loss": 322.0474, "step": 36350 }, { "epoch": 0.6992912848227251, "grad_norm": 409.05316196009346, "learning_rate": 4.217598703306095e-06, "loss": 325.3665, "step": 36360 }, { "epoch": 0.6994836091584848, "grad_norm": 473.30051214422394, "learning_rate": 4.212620430174457e-06, "loss": 325.2845, "step": 36370 }, { "epoch": 0.6996759334942447, "grad_norm": 435.49511057850685, "learning_rate": 4.207644312746124e-06, "loss": 317.5285, "step": 36380 }, { "epoch": 0.6998682578300045, "grad_norm": 399.91260472400734, "learning_rate": 4.202670352874625e-06, "loss": 315.4795, "step": 36390 }, { "epoch": 0.7000605821657644, "grad_norm": 423.6605439545644, "learning_rate": 4.197698552412672e-06, "loss": 329.5323, "step": 36400 }, { "epoch": 0.7002529065015242, "grad_norm": 475.38245952731415, "learning_rate": 4.192728913212181e-06, "loss": 322.5399, "step": 36410 }, { "epoch": 0.700445230837284, "grad_norm": 390.3484468111378, "learning_rate": 4.187761437124256e-06, "loss": 309.5572, "step": 36420 }, { "epoch": 0.7006375551730438, "grad_norm": 475.8194702864543, "learning_rate": 4.182796125999207e-06, "loss": 327.1869, "step": 36430 }, { "epoch": 0.7008298795088036, "grad_norm": 509.108401621469, "learning_rate": 4.177832981686526e-06, "loss": 323.9206, "step": 36440 }, { "epoch": 0.7010222038445635, "grad_norm": 405.4920872724414, "learning_rate": 4.172872006034899e-06, "loss": 319.7336, "step": 36450 }, { "epoch": 0.7012145281803233, "grad_norm": 458.9281640547833, "learning_rate": 4.167913200892217e-06, "loss": 313.4159, "step": 36460 }, { "epoch": 0.7014068525160831, "grad_norm": 398.40214948296756, "learning_rate": 4.162956568105543e-06, "loss": 330.2787, "step": 36470 }, { "epoch": 0.7015991768518429, "grad_norm": 417.856516036245, "learning_rate": 4.158002109521149e-06, "loss": 314.8625, "step": 36480 }, { "epoch": 0.7017915011876028, "grad_norm": 445.42659397270586, "learning_rate": 4.153049826984482e-06, "loss": 319.1083, "step": 36490 }, { "epoch": 0.7019838255233626, "grad_norm": 418.46396241992085, "learning_rate": 4.148099722340192e-06, "loss": 330.4729, "step": 36500 }, { "epoch": 0.7021761498591225, "grad_norm": 424.9163536276126, "learning_rate": 4.143151797432109e-06, "loss": 316.9279, "step": 36510 }, { "epoch": 0.7023684741948822, "grad_norm": 411.56526316672574, "learning_rate": 4.1382060541032505e-06, "loss": 319.7038, "step": 36520 }, { "epoch": 0.7025607985306421, "grad_norm": 390.34747349856855, "learning_rate": 4.133262494195824e-06, "loss": 320.4265, "step": 36530 }, { "epoch": 0.7027531228664019, "grad_norm": 402.78935819454125, "learning_rate": 4.12832111955123e-06, "loss": 308.1256, "step": 36540 }, { "epoch": 0.7029454472021617, "grad_norm": 426.8640507669044, "learning_rate": 4.123381932010044e-06, "loss": 323.2139, "step": 36550 }, { "epoch": 0.7031377715379216, "grad_norm": 407.3328436129975, "learning_rate": 4.11844493341203e-06, "loss": 319.6881, "step": 36560 }, { "epoch": 0.7033300958736813, "grad_norm": 398.9993406988549, "learning_rate": 4.113510125596145e-06, "loss": 323.4684, "step": 36570 }, { "epoch": 0.7035224202094412, "grad_norm": 439.52176042845457, "learning_rate": 4.1085775104005186e-06, "loss": 308.9553, "step": 36580 }, { "epoch": 0.703714744545201, "grad_norm": 429.7479719974011, "learning_rate": 4.10364708966247e-06, "loss": 321.9735, "step": 36590 }, { "epoch": 0.7039070688809609, "grad_norm": 401.8833282337944, "learning_rate": 4.098718865218496e-06, "loss": 309.0076, "step": 36600 }, { "epoch": 0.7040993932167207, "grad_norm": 411.4078023684322, "learning_rate": 4.0937928389042815e-06, "loss": 327.422, "step": 36610 }, { "epoch": 0.7042917175524805, "grad_norm": 396.5296601982585, "learning_rate": 4.088869012554694e-06, "loss": 312.1202, "step": 36620 }, { "epoch": 0.7044840418882403, "grad_norm": 424.8644766208361, "learning_rate": 4.08394738800377e-06, "loss": 319.6057, "step": 36630 }, { "epoch": 0.7046763662240002, "grad_norm": 405.32483089107455, "learning_rate": 4.07902796708474e-06, "loss": 311.9721, "step": 36640 }, { "epoch": 0.70486869055976, "grad_norm": 435.80359522449277, "learning_rate": 4.074110751630005e-06, "loss": 326.5005, "step": 36650 }, { "epoch": 0.7050610148955198, "grad_norm": 424.77444273655794, "learning_rate": 4.0691957434711446e-06, "loss": 315.0795, "step": 36660 }, { "epoch": 0.7052533392312796, "grad_norm": 403.6574748576981, "learning_rate": 4.0642829444389165e-06, "loss": 315.7512, "step": 36670 }, { "epoch": 0.7054456635670394, "grad_norm": 390.903316881906, "learning_rate": 4.059372356363263e-06, "loss": 328.6013, "step": 36680 }, { "epoch": 0.7056379879027993, "grad_norm": 374.0867880608783, "learning_rate": 4.054463981073296e-06, "loss": 317.724, "step": 36690 }, { "epoch": 0.7058303122385591, "grad_norm": 407.68246192039663, "learning_rate": 4.049557820397297e-06, "loss": 322.0198, "step": 36700 }, { "epoch": 0.706022636574319, "grad_norm": 434.1481626400727, "learning_rate": 4.044653876162738e-06, "loss": 309.617, "step": 36710 }, { "epoch": 0.7062149609100787, "grad_norm": 412.56129671744196, "learning_rate": 4.039752150196257e-06, "loss": 327.7791, "step": 36720 }, { "epoch": 0.7064072852458386, "grad_norm": 381.78473739539356, "learning_rate": 4.034852644323661e-06, "loss": 320.3679, "step": 36730 }, { "epoch": 0.7065996095815984, "grad_norm": 392.7972631087244, "learning_rate": 4.029955360369935e-06, "loss": 312.0091, "step": 36740 }, { "epoch": 0.7067919339173583, "grad_norm": 432.1985367463212, "learning_rate": 4.0250603001592416e-06, "loss": 307.5055, "step": 36750 }, { "epoch": 0.7069842582531181, "grad_norm": 382.8019494597333, "learning_rate": 4.020167465514903e-06, "loss": 313.0305, "step": 36760 }, { "epoch": 0.7071765825888778, "grad_norm": 503.78838334506656, "learning_rate": 4.015276858259427e-06, "loss": 318.0343, "step": 36770 }, { "epoch": 0.7073689069246377, "grad_norm": 437.9965086349302, "learning_rate": 4.0103884802144775e-06, "loss": 320.7152, "step": 36780 }, { "epoch": 0.7075612312603975, "grad_norm": 408.0095816573821, "learning_rate": 4.0055023332009e-06, "loss": 319.4751, "step": 36790 }, { "epoch": 0.7077535555961574, "grad_norm": 396.7262535962733, "learning_rate": 4.000618419038702e-06, "loss": 302.0847, "step": 36800 }, { "epoch": 0.7079458799319172, "grad_norm": 407.658502782889, "learning_rate": 3.9957367395470555e-06, "loss": 323.6847, "step": 36810 }, { "epoch": 0.708138204267677, "grad_norm": 389.20227287430475, "learning_rate": 3.990857296544315e-06, "loss": 308.0567, "step": 36820 }, { "epoch": 0.7083305286034368, "grad_norm": 417.6877878225282, "learning_rate": 3.985980091847985e-06, "loss": 314.1982, "step": 36830 }, { "epoch": 0.7085228529391967, "grad_norm": 431.57888892217744, "learning_rate": 3.981105127274748e-06, "loss": 320.3472, "step": 36840 }, { "epoch": 0.7087151772749565, "grad_norm": 469.0253328628941, "learning_rate": 3.976232404640441e-06, "loss": 307.5793, "step": 36850 }, { "epoch": 0.7089075016107164, "grad_norm": 407.1340414933974, "learning_rate": 3.971361925760081e-06, "loss": 318.378, "step": 36860 }, { "epoch": 0.7090998259464761, "grad_norm": 418.751691105859, "learning_rate": 3.966493692447838e-06, "loss": 319.5382, "step": 36870 }, { "epoch": 0.7092921502822359, "grad_norm": 402.356303614616, "learning_rate": 3.961627706517044e-06, "loss": 314.2855, "step": 36880 }, { "epoch": 0.7094844746179958, "grad_norm": 425.57654010596576, "learning_rate": 3.956763969780206e-06, "loss": 314.7584, "step": 36890 }, { "epoch": 0.7096767989537556, "grad_norm": 401.22077890427266, "learning_rate": 3.951902484048978e-06, "loss": 311.5525, "step": 36900 }, { "epoch": 0.7098691232895155, "grad_norm": 469.4901116631315, "learning_rate": 3.94704325113419e-06, "loss": 314.7782, "step": 36910 }, { "epoch": 0.7100614476252752, "grad_norm": 392.1512097168587, "learning_rate": 3.942186272845821e-06, "loss": 313.5791, "step": 36920 }, { "epoch": 0.7102537719610351, "grad_norm": 399.68716512586286, "learning_rate": 3.937331550993021e-06, "loss": 316.7366, "step": 36930 }, { "epoch": 0.7104460962967949, "grad_norm": 462.0706652871905, "learning_rate": 3.932479087384089e-06, "loss": 323.077, "step": 36940 }, { "epoch": 0.7106384206325548, "grad_norm": 420.04291996058214, "learning_rate": 3.927628883826488e-06, "loss": 316.5559, "step": 36950 }, { "epoch": 0.7108307449683146, "grad_norm": 407.3918509059511, "learning_rate": 3.922780942126837e-06, "loss": 316.4484, "step": 36960 }, { "epoch": 0.7110230693040744, "grad_norm": 384.23940502453195, "learning_rate": 3.91793526409092e-06, "loss": 317.5919, "step": 36970 }, { "epoch": 0.7112153936398342, "grad_norm": 431.2796373970221, "learning_rate": 3.913091851523667e-06, "loss": 318.7198, "step": 36980 }, { "epoch": 0.711407717975594, "grad_norm": 404.95734120667794, "learning_rate": 3.908250706229168e-06, "loss": 310.8542, "step": 36990 }, { "epoch": 0.7116000423113539, "grad_norm": 391.65027125668917, "learning_rate": 3.903411830010676e-06, "loss": 326.9259, "step": 37000 }, { "epoch": 0.7117923666471137, "grad_norm": 409.17589512948126, "learning_rate": 3.8985752246705885e-06, "loss": 317.0762, "step": 37010 }, { "epoch": 0.7119846909828735, "grad_norm": 420.86180758733366, "learning_rate": 3.893740892010463e-06, "loss": 317.103, "step": 37020 }, { "epoch": 0.7121770153186333, "grad_norm": 404.1603006743833, "learning_rate": 3.888908833831002e-06, "loss": 320.6646, "step": 37030 }, { "epoch": 0.7123693396543932, "grad_norm": 407.91243268882334, "learning_rate": 3.884079051932073e-06, "loss": 318.9525, "step": 37040 }, { "epoch": 0.712561663990153, "grad_norm": 403.0385762888256, "learning_rate": 3.879251548112692e-06, "loss": 323.5288, "step": 37050 }, { "epoch": 0.7127539883259129, "grad_norm": 447.7552976398021, "learning_rate": 3.874426324171019e-06, "loss": 332.6827, "step": 37060 }, { "epoch": 0.7129463126616726, "grad_norm": 413.7088950370587, "learning_rate": 3.869603381904377e-06, "loss": 319.8625, "step": 37070 }, { "epoch": 0.7131386369974325, "grad_norm": 411.6214524011452, "learning_rate": 3.864782723109227e-06, "loss": 311.0306, "step": 37080 }, { "epoch": 0.7133309613331923, "grad_norm": 448.99238479862066, "learning_rate": 3.859964349581187e-06, "loss": 316.294, "step": 37090 }, { "epoch": 0.7135232856689521, "grad_norm": 428.94616260505717, "learning_rate": 3.855148263115017e-06, "loss": 316.1106, "step": 37100 }, { "epoch": 0.713715610004712, "grad_norm": 423.9711590581681, "learning_rate": 3.850334465504637e-06, "loss": 320.5208, "step": 37110 }, { "epoch": 0.7139079343404717, "grad_norm": 443.3688503581884, "learning_rate": 3.845522958543104e-06, "loss": 326.4777, "step": 37120 }, { "epoch": 0.7141002586762316, "grad_norm": 459.4295602855482, "learning_rate": 3.840713744022624e-06, "loss": 318.4702, "step": 37130 }, { "epoch": 0.7142925830119914, "grad_norm": 448.60210578785166, "learning_rate": 3.835906823734548e-06, "loss": 322.9629, "step": 37140 }, { "epoch": 0.7144849073477513, "grad_norm": 414.71344020079107, "learning_rate": 3.831102199469379e-06, "loss": 335.3739, "step": 37150 }, { "epoch": 0.714677231683511, "grad_norm": 408.4548346224963, "learning_rate": 3.826299873016758e-06, "loss": 320.6636, "step": 37160 }, { "epoch": 0.7148695560192709, "grad_norm": 396.12786440636427, "learning_rate": 3.821499846165468e-06, "loss": 307.3479, "step": 37170 }, { "epoch": 0.7150618803550307, "grad_norm": 440.68683484793706, "learning_rate": 3.816702120703449e-06, "loss": 317.8695, "step": 37180 }, { "epoch": 0.7152542046907906, "grad_norm": 400.4405466537782, "learning_rate": 3.8119066984177654e-06, "loss": 312.7973, "step": 37190 }, { "epoch": 0.7154465290265504, "grad_norm": 453.7587678612687, "learning_rate": 3.8071135810946415e-06, "loss": 318.936, "step": 37200 }, { "epoch": 0.7156388533623101, "grad_norm": 414.11514208287883, "learning_rate": 3.802322770519424e-06, "loss": 310.4042, "step": 37210 }, { "epoch": 0.71583117769807, "grad_norm": 402.54813440693385, "learning_rate": 3.7975342684766215e-06, "loss": 310.9606, "step": 37220 }, { "epoch": 0.7160235020338298, "grad_norm": 436.92638637071775, "learning_rate": 3.792748076749867e-06, "loss": 319.4578, "step": 37230 }, { "epoch": 0.7162158263695897, "grad_norm": 441.1669366932506, "learning_rate": 3.787964197121934e-06, "loss": 318.59, "step": 37240 }, { "epoch": 0.7164081507053495, "grad_norm": 407.8454849081635, "learning_rate": 3.7831826313747454e-06, "loss": 309.5386, "step": 37250 }, { "epoch": 0.7166004750411094, "grad_norm": 442.3110762810688, "learning_rate": 3.778403381289353e-06, "loss": 309.9025, "step": 37260 }, { "epoch": 0.7167927993768691, "grad_norm": 390.04546748849305, "learning_rate": 3.7736264486459486e-06, "loss": 312.0679, "step": 37270 }, { "epoch": 0.716985123712629, "grad_norm": 409.9691365520833, "learning_rate": 3.7688518352238555e-06, "loss": 313.9818, "step": 37280 }, { "epoch": 0.7171774480483888, "grad_norm": 427.13058434146336, "learning_rate": 3.7640795428015462e-06, "loss": 319.6796, "step": 37290 }, { "epoch": 0.7173697723841487, "grad_norm": 424.5965196432928, "learning_rate": 3.7593095731566186e-06, "loss": 314.6164, "step": 37300 }, { "epoch": 0.7175620967199084, "grad_norm": 425.14250060424666, "learning_rate": 3.7545419280658025e-06, "loss": 318.4672, "step": 37310 }, { "epoch": 0.7177544210556682, "grad_norm": 431.1156015438004, "learning_rate": 3.749776609304975e-06, "loss": 316.3071, "step": 37320 }, { "epoch": 0.7179467453914281, "grad_norm": 445.2991048054992, "learning_rate": 3.7450136186491315e-06, "loss": 320.987, "step": 37330 }, { "epoch": 0.7181390697271879, "grad_norm": 423.6614317551579, "learning_rate": 3.7402529578724134e-06, "loss": 315.6607, "step": 37340 }, { "epoch": 0.7183313940629478, "grad_norm": 403.65496163124425, "learning_rate": 3.735494628748082e-06, "loss": 315.3974, "step": 37350 }, { "epoch": 0.7185237183987075, "grad_norm": 417.0197634954238, "learning_rate": 3.730738633048543e-06, "loss": 312.5922, "step": 37360 }, { "epoch": 0.7187160427344674, "grad_norm": 401.36956774855327, "learning_rate": 3.7259849725453225e-06, "loss": 322.5538, "step": 37370 }, { "epoch": 0.7189083670702272, "grad_norm": 395.92819477555054, "learning_rate": 3.7212336490090815e-06, "loss": 314.0357, "step": 37380 }, { "epoch": 0.7191006914059871, "grad_norm": 416.44554550550464, "learning_rate": 3.7164846642096053e-06, "loss": 317.2763, "step": 37390 }, { "epoch": 0.7192930157417469, "grad_norm": 405.8560779783164, "learning_rate": 3.7117380199158204e-06, "loss": 324.1765, "step": 37400 }, { "epoch": 0.7194853400775068, "grad_norm": 405.44566321724454, "learning_rate": 3.706993717895768e-06, "loss": 312.6861, "step": 37410 }, { "epoch": 0.7196776644132665, "grad_norm": 443.2028923968177, "learning_rate": 3.70225175991662e-06, "loss": 317.0176, "step": 37420 }, { "epoch": 0.7198699887490264, "grad_norm": 447.2753215806813, "learning_rate": 3.697512147744684e-06, "loss": 323.2874, "step": 37430 }, { "epoch": 0.7200623130847862, "grad_norm": 427.65607213351524, "learning_rate": 3.6927748831453835e-06, "loss": 310.19, "step": 37440 }, { "epoch": 0.720254637420546, "grad_norm": 428.7363170292159, "learning_rate": 3.688039967883269e-06, "loss": 316.7893, "step": 37450 }, { "epoch": 0.7204469617563058, "grad_norm": 411.783774523239, "learning_rate": 3.683307403722025e-06, "loss": 312.1469, "step": 37460 }, { "epoch": 0.7206392860920656, "grad_norm": 406.5757628224676, "learning_rate": 3.678577192424445e-06, "loss": 312.4352, "step": 37470 }, { "epoch": 0.7208316104278255, "grad_norm": 469.7002525236319, "learning_rate": 3.6738493357524628e-06, "loss": 313.0454, "step": 37480 }, { "epoch": 0.7210239347635853, "grad_norm": 405.9544252843232, "learning_rate": 3.6691238354671233e-06, "loss": 317.7413, "step": 37490 }, { "epoch": 0.7212162590993452, "grad_norm": 450.3838190527834, "learning_rate": 3.664400693328595e-06, "loss": 317.8098, "step": 37500 }, { "epoch": 0.7214085834351049, "grad_norm": 397.37207343356437, "learning_rate": 3.6596799110961746e-06, "loss": 310.4092, "step": 37510 }, { "epoch": 0.7216009077708648, "grad_norm": 425.1938275623798, "learning_rate": 3.6549614905282724e-06, "loss": 322.1988, "step": 37520 }, { "epoch": 0.7217932321066246, "grad_norm": 409.6033057323231, "learning_rate": 3.6502454333824224e-06, "loss": 319.1281, "step": 37530 }, { "epoch": 0.7219855564423845, "grad_norm": 432.7548608601217, "learning_rate": 3.6455317414152803e-06, "loss": 317.6438, "step": 37540 }, { "epoch": 0.7221778807781443, "grad_norm": 429.2340631888676, "learning_rate": 3.640820416382618e-06, "loss": 318.313, "step": 37550 }, { "epoch": 0.722370205113904, "grad_norm": 388.91906563645216, "learning_rate": 3.6361114600393242e-06, "loss": 305.6224, "step": 37560 }, { "epoch": 0.7225625294496639, "grad_norm": 409.6265087580773, "learning_rate": 3.6314048741394057e-06, "loss": 322.2113, "step": 37570 }, { "epoch": 0.7227548537854237, "grad_norm": 405.2649128830983, "learning_rate": 3.6267006604359943e-06, "loss": 318.1316, "step": 37580 }, { "epoch": 0.7229471781211836, "grad_norm": 437.09071311299414, "learning_rate": 3.6219988206813285e-06, "loss": 325.6359, "step": 37590 }, { "epoch": 0.7231395024569434, "grad_norm": 401.24198030662205, "learning_rate": 3.6172993566267623e-06, "loss": 311.0217, "step": 37600 }, { "epoch": 0.7233318267927032, "grad_norm": 438.65354274660905, "learning_rate": 3.6126022700227715e-06, "loss": 316.6846, "step": 37610 }, { "epoch": 0.723524151128463, "grad_norm": 454.8033362099199, "learning_rate": 3.6079075626189476e-06, "loss": 319.8325, "step": 37620 }, { "epoch": 0.7237164754642229, "grad_norm": 412.55436477767176, "learning_rate": 3.603215236163987e-06, "loss": 309.7259, "step": 37630 }, { "epoch": 0.7239087997999827, "grad_norm": 407.03193529433855, "learning_rate": 3.5985252924057023e-06, "loss": 312.1112, "step": 37640 }, { "epoch": 0.7241011241357426, "grad_norm": 394.0981915137137, "learning_rate": 3.5938377330910245e-06, "loss": 315.1933, "step": 37650 }, { "epoch": 0.7242934484715023, "grad_norm": 422.2728382752941, "learning_rate": 3.5891525599659905e-06, "loss": 314.2316, "step": 37660 }, { "epoch": 0.7244857728072621, "grad_norm": 406.6467869133794, "learning_rate": 3.5844697747757496e-06, "loss": 310.8174, "step": 37670 }, { "epoch": 0.724678097143022, "grad_norm": 416.15640247974386, "learning_rate": 3.5797893792645577e-06, "loss": 316.6512, "step": 37680 }, { "epoch": 0.7248704214787818, "grad_norm": 468.2073568637038, "learning_rate": 3.5751113751757925e-06, "loss": 319.5047, "step": 37690 }, { "epoch": 0.7250627458145417, "grad_norm": 399.7030211088204, "learning_rate": 3.5704357642519295e-06, "loss": 311.1336, "step": 37700 }, { "epoch": 0.7252550701503014, "grad_norm": 466.3737316535616, "learning_rate": 3.5657625482345526e-06, "loss": 312.7548, "step": 37710 }, { "epoch": 0.7254473944860613, "grad_norm": 453.07729264993145, "learning_rate": 3.5610917288643655e-06, "loss": 313.9828, "step": 37720 }, { "epoch": 0.7256397188218211, "grad_norm": 442.5665134675431, "learning_rate": 3.556423307881167e-06, "loss": 318.8357, "step": 37730 }, { "epoch": 0.725832043157581, "grad_norm": 443.4626834334787, "learning_rate": 3.551757287023865e-06, "loss": 318.1125, "step": 37740 }, { "epoch": 0.7260243674933408, "grad_norm": 397.9692858351469, "learning_rate": 3.547093668030479e-06, "loss": 319.7514, "step": 37750 }, { "epoch": 0.7262166918291006, "grad_norm": 412.3175209156679, "learning_rate": 3.542432452638126e-06, "loss": 316.257, "step": 37760 }, { "epoch": 0.7264090161648604, "grad_norm": 416.65854698004955, "learning_rate": 3.5377736425830366e-06, "loss": 322.3939, "step": 37770 }, { "epoch": 0.7266013405006202, "grad_norm": 495.64094641424106, "learning_rate": 3.5331172396005354e-06, "loss": 310.4271, "step": 37780 }, { "epoch": 0.7267936648363801, "grad_norm": 414.5428819124079, "learning_rate": 3.528463245425062e-06, "loss": 322.1516, "step": 37790 }, { "epoch": 0.7269859891721399, "grad_norm": 405.47334307072526, "learning_rate": 3.5238116617901486e-06, "loss": 313.9452, "step": 37800 }, { "epoch": 0.7271783135078997, "grad_norm": 429.37216642645814, "learning_rate": 3.519162490428433e-06, "loss": 324.7638, "step": 37810 }, { "epoch": 0.7273706378436595, "grad_norm": 414.80287021366075, "learning_rate": 3.5145157330716516e-06, "loss": 310.1896, "step": 37820 }, { "epoch": 0.7275629621794194, "grad_norm": 404.3157837780895, "learning_rate": 3.509871391450652e-06, "loss": 324.8781, "step": 37830 }, { "epoch": 0.7277552865151792, "grad_norm": 388.671118795425, "learning_rate": 3.505229467295371e-06, "loss": 313.4352, "step": 37840 }, { "epoch": 0.7279476108509391, "grad_norm": 415.43047737031253, "learning_rate": 3.5005899623348493e-06, "loss": 327.8867, "step": 37850 }, { "epoch": 0.7281399351866988, "grad_norm": 403.89400308963826, "learning_rate": 3.495952878297221e-06, "loss": 319.3212, "step": 37860 }, { "epoch": 0.7283322595224587, "grad_norm": 403.2433499585081, "learning_rate": 3.4913182169097315e-06, "loss": 311.9184, "step": 37870 }, { "epoch": 0.7285245838582185, "grad_norm": 398.9078501818294, "learning_rate": 3.4866859798987084e-06, "loss": 314.8927, "step": 37880 }, { "epoch": 0.7287169081939783, "grad_norm": 443.4683761769683, "learning_rate": 3.4820561689895906e-06, "loss": 316.0644, "step": 37890 }, { "epoch": 0.7289092325297382, "grad_norm": 449.0231561169924, "learning_rate": 3.4774287859068988e-06, "loss": 310.4906, "step": 37900 }, { "epoch": 0.7291015568654979, "grad_norm": 452.6404643791422, "learning_rate": 3.472803832374263e-06, "loss": 306.8345, "step": 37910 }, { "epoch": 0.7292938812012578, "grad_norm": 406.7836696763572, "learning_rate": 3.4681813101144e-06, "loss": 318.8201, "step": 37920 }, { "epoch": 0.7294862055370176, "grad_norm": 389.4193236787394, "learning_rate": 3.4635612208491197e-06, "loss": 319.6306, "step": 37930 }, { "epoch": 0.7296785298727775, "grad_norm": 389.113814219266, "learning_rate": 3.458943566299334e-06, "loss": 304.6019, "step": 37940 }, { "epoch": 0.7298708542085373, "grad_norm": 451.71800765339873, "learning_rate": 3.454328348185042e-06, "loss": 304.9947, "step": 37950 }, { "epoch": 0.7300631785442971, "grad_norm": 406.58528268947873, "learning_rate": 3.4497155682253314e-06, "loss": 322.682, "step": 37960 }, { "epoch": 0.7302555028800569, "grad_norm": 387.11664039755044, "learning_rate": 3.4451052281383922e-06, "loss": 309.2688, "step": 37970 }, { "epoch": 0.7304478272158168, "grad_norm": 438.7083412236591, "learning_rate": 3.440497329641499e-06, "loss": 321.3788, "step": 37980 }, { "epoch": 0.7306401515515766, "grad_norm": 436.585959123442, "learning_rate": 3.435891874451017e-06, "loss": 312.4924, "step": 37990 }, { "epoch": 0.7308324758873364, "grad_norm": 408.98789655135624, "learning_rate": 3.431288864282398e-06, "loss": 312.9037, "step": 38000 }, { "epoch": 0.7310248002230962, "grad_norm": 417.8799339967779, "learning_rate": 3.4266883008501937e-06, "loss": 308.0152, "step": 38010 }, { "epoch": 0.731217124558856, "grad_norm": 443.45688469634183, "learning_rate": 3.4220901858680365e-06, "loss": 321.1792, "step": 38020 }, { "epoch": 0.7314094488946159, "grad_norm": 424.7660227997121, "learning_rate": 3.4174945210486445e-06, "loss": 319.3943, "step": 38030 }, { "epoch": 0.7316017732303757, "grad_norm": 384.3072078283791, "learning_rate": 3.4129013081038285e-06, "loss": 319.874, "step": 38040 }, { "epoch": 0.7317940975661356, "grad_norm": 412.9184162702637, "learning_rate": 3.40831054874449e-06, "loss": 311.6062, "step": 38050 }, { "epoch": 0.7319864219018953, "grad_norm": 406.7459799692194, "learning_rate": 3.403722244680606e-06, "loss": 311.9208, "step": 38060 }, { "epoch": 0.7321787462376552, "grad_norm": 434.1742568733442, "learning_rate": 3.3991363976212423e-06, "loss": 324.9531, "step": 38070 }, { "epoch": 0.732371070573415, "grad_norm": 425.61658690525115, "learning_rate": 3.394553009274556e-06, "loss": 310.1825, "step": 38080 }, { "epoch": 0.7325633949091749, "grad_norm": 405.2715800197643, "learning_rate": 3.389972081347782e-06, "loss": 311.7358, "step": 38090 }, { "epoch": 0.7327557192449347, "grad_norm": 430.6477516855534, "learning_rate": 3.385393615547239e-06, "loss": 323.1989, "step": 38100 }, { "epoch": 0.7329480435806944, "grad_norm": 417.5917856276371, "learning_rate": 3.3808176135783276e-06, "loss": 329.5747, "step": 38110 }, { "epoch": 0.7331403679164543, "grad_norm": 420.3486848807001, "learning_rate": 3.3762440771455386e-06, "loss": 320.8793, "step": 38120 }, { "epoch": 0.7333326922522141, "grad_norm": 402.3293222437787, "learning_rate": 3.371673007952435e-06, "loss": 313.1816, "step": 38130 }, { "epoch": 0.733525016587974, "grad_norm": 448.2393352823683, "learning_rate": 3.3671044077016634e-06, "loss": 328.3257, "step": 38140 }, { "epoch": 0.7337173409237338, "grad_norm": 385.64988460853056, "learning_rate": 3.3625382780949576e-06, "loss": 320.212, "step": 38150 }, { "epoch": 0.7339096652594936, "grad_norm": 413.66587294721336, "learning_rate": 3.357974620833121e-06, "loss": 319.9798, "step": 38160 }, { "epoch": 0.7341019895952534, "grad_norm": 438.22631973845125, "learning_rate": 3.353413437616039e-06, "loss": 320.5506, "step": 38170 }, { "epoch": 0.7342943139310133, "grad_norm": 448.269953081504, "learning_rate": 3.3488547301426786e-06, "loss": 301.8495, "step": 38180 }, { "epoch": 0.7344866382667731, "grad_norm": 405.8556961922498, "learning_rate": 3.344298500111087e-06, "loss": 314.2083, "step": 38190 }, { "epoch": 0.734678962602533, "grad_norm": 409.08674419414484, "learning_rate": 3.3397447492183833e-06, "loss": 315.1931, "step": 38200 }, { "epoch": 0.7348712869382927, "grad_norm": 427.04741052235056, "learning_rate": 3.3351934791607576e-06, "loss": 317.8736, "step": 38210 }, { "epoch": 0.7350636112740525, "grad_norm": 397.0244738739313, "learning_rate": 3.330644691633492e-06, "loss": 315.4427, "step": 38220 }, { "epoch": 0.7352559356098124, "grad_norm": 427.23375430807096, "learning_rate": 3.3260983883309306e-06, "loss": 324.4806, "step": 38230 }, { "epoch": 0.7354482599455722, "grad_norm": 403.6796958049927, "learning_rate": 3.321554570946497e-06, "loss": 310.0045, "step": 38240 }, { "epoch": 0.735640584281332, "grad_norm": 411.0996078047051, "learning_rate": 3.317013241172684e-06, "loss": 316.0688, "step": 38250 }, { "epoch": 0.7358329086170918, "grad_norm": 403.78661228846784, "learning_rate": 3.3124744007010688e-06, "loss": 312.0583, "step": 38260 }, { "epoch": 0.7360252329528517, "grad_norm": 430.2940958442948, "learning_rate": 3.3079380512222904e-06, "loss": 320.3145, "step": 38270 }, { "epoch": 0.7362175572886115, "grad_norm": 409.9006288150114, "learning_rate": 3.3034041944260654e-06, "loss": 318.8557, "step": 38280 }, { "epoch": 0.7364098816243714, "grad_norm": 424.47316890104634, "learning_rate": 3.2988728320011774e-06, "loss": 314.2572, "step": 38290 }, { "epoch": 0.7366022059601312, "grad_norm": 419.28056990895914, "learning_rate": 3.294343965635489e-06, "loss": 314.8191, "step": 38300 }, { "epoch": 0.736794530295891, "grad_norm": 373.9067611329959, "learning_rate": 3.289817597015923e-06, "loss": 305.4609, "step": 38310 }, { "epoch": 0.7369868546316508, "grad_norm": 422.6967809774257, "learning_rate": 3.2852937278284837e-06, "loss": 318.4763, "step": 38320 }, { "epoch": 0.7371791789674106, "grad_norm": 406.8098272778151, "learning_rate": 3.28077235975823e-06, "loss": 313.6993, "step": 38330 }, { "epoch": 0.7373715033031705, "grad_norm": 411.6673975567329, "learning_rate": 3.2762534944893033e-06, "loss": 319.8252, "step": 38340 }, { "epoch": 0.7375638276389302, "grad_norm": 395.8632164995816, "learning_rate": 3.271737133704904e-06, "loss": 308.1031, "step": 38350 }, { "epoch": 0.7377561519746901, "grad_norm": 398.6223668239311, "learning_rate": 3.2672232790872983e-06, "loss": 316.9474, "step": 38360 }, { "epoch": 0.7379484763104499, "grad_norm": 407.15574223548026, "learning_rate": 3.262711932317828e-06, "loss": 319.3968, "step": 38370 }, { "epoch": 0.7381408006462098, "grad_norm": 421.81286585835966, "learning_rate": 3.258203095076894e-06, "loss": 313.7264, "step": 38380 }, { "epoch": 0.7383331249819696, "grad_norm": 436.11465659176275, "learning_rate": 3.2536967690439592e-06, "loss": 326.133, "step": 38390 }, { "epoch": 0.7385254493177295, "grad_norm": 385.3559491294718, "learning_rate": 3.249192955897562e-06, "loss": 313.9984, "step": 38400 }, { "epoch": 0.7387177736534892, "grad_norm": 394.3691437351689, "learning_rate": 3.2446916573152955e-06, "loss": 318.277, "step": 38410 }, { "epoch": 0.7389100979892491, "grad_norm": 392.2305322079415, "learning_rate": 3.24019287497382e-06, "loss": 309.2865, "step": 38420 }, { "epoch": 0.7391024223250089, "grad_norm": 406.1065949033608, "learning_rate": 3.235696610548852e-06, "loss": 305.6187, "step": 38430 }, { "epoch": 0.7392947466607688, "grad_norm": 436.93456010581, "learning_rate": 3.231202865715184e-06, "loss": 317.0469, "step": 38440 }, { "epoch": 0.7394870709965286, "grad_norm": 380.5858387775878, "learning_rate": 3.226711642146655e-06, "loss": 325.8808, "step": 38450 }, { "epoch": 0.7396793953322883, "grad_norm": 412.50051100575945, "learning_rate": 3.222222941516179e-06, "loss": 313.3394, "step": 38460 }, { "epoch": 0.7398717196680482, "grad_norm": 379.09514848672194, "learning_rate": 3.2177367654957137e-06, "loss": 305.7161, "step": 38470 }, { "epoch": 0.740064044003808, "grad_norm": 444.2549819893377, "learning_rate": 3.213253115756295e-06, "loss": 315.6344, "step": 38480 }, { "epoch": 0.7402563683395679, "grad_norm": 428.0221820660699, "learning_rate": 3.208771993968003e-06, "loss": 315.8494, "step": 38490 }, { "epoch": 0.7404486926753276, "grad_norm": 472.9267878368491, "learning_rate": 3.2042934017999795e-06, "loss": 311.2093, "step": 38500 }, { "epoch": 0.7406410170110875, "grad_norm": 400.60202215807976, "learning_rate": 3.1998173409204326e-06, "loss": 313.0489, "step": 38510 }, { "epoch": 0.7408333413468473, "grad_norm": 413.8431248478503, "learning_rate": 3.1953438129966175e-06, "loss": 324.425, "step": 38520 }, { "epoch": 0.7410256656826072, "grad_norm": 430.22678825788074, "learning_rate": 3.190872819694849e-06, "loss": 323.9846, "step": 38530 }, { "epoch": 0.741217990018367, "grad_norm": 384.82680452873507, "learning_rate": 3.1864043626804953e-06, "loss": 307.6771, "step": 38540 }, { "epoch": 0.7414103143541269, "grad_norm": 394.34326680230373, "learning_rate": 3.18193844361799e-06, "loss": 305.6898, "step": 38550 }, { "epoch": 0.7416026386898866, "grad_norm": 398.7812262812955, "learning_rate": 3.1774750641708095e-06, "loss": 308.9979, "step": 38560 }, { "epoch": 0.7417949630256464, "grad_norm": 404.1360343295936, "learning_rate": 3.1730142260014875e-06, "loss": 315.4173, "step": 38570 }, { "epoch": 0.7419872873614063, "grad_norm": 436.04929807229973, "learning_rate": 3.1685559307716187e-06, "loss": 312.3365, "step": 38580 }, { "epoch": 0.7421796116971661, "grad_norm": 455.0903400495209, "learning_rate": 3.164100180141839e-06, "loss": 313.875, "step": 38590 }, { "epoch": 0.742371936032926, "grad_norm": 425.8210960824381, "learning_rate": 3.159646975771842e-06, "loss": 318.1196, "step": 38600 }, { "epoch": 0.7425642603686857, "grad_norm": 461.5249003242236, "learning_rate": 3.155196319320374e-06, "loss": 320.8121, "step": 38610 }, { "epoch": 0.7427565847044456, "grad_norm": 409.07309283229864, "learning_rate": 3.1507482124452337e-06, "loss": 319.5608, "step": 38620 }, { "epoch": 0.7429489090402054, "grad_norm": 400.91881172283905, "learning_rate": 3.146302656803266e-06, "loss": 311.4764, "step": 38630 }, { "epoch": 0.7431412333759653, "grad_norm": 410.3369833827823, "learning_rate": 3.1418596540503653e-06, "loss": 311.0298, "step": 38640 }, { "epoch": 0.743333557711725, "grad_norm": 415.79677935980015, "learning_rate": 3.1374192058414755e-06, "loss": 325.3141, "step": 38650 }, { "epoch": 0.7435258820474849, "grad_norm": 408.23200671095384, "learning_rate": 3.1329813138305944e-06, "loss": 319.9275, "step": 38660 }, { "epoch": 0.7437182063832447, "grad_norm": 406.28556278001923, "learning_rate": 3.128545979670762e-06, "loss": 305.9547, "step": 38670 }, { "epoch": 0.7439105307190045, "grad_norm": 392.1745443789781, "learning_rate": 3.124113205014063e-06, "loss": 309.163, "step": 38680 }, { "epoch": 0.7441028550547644, "grad_norm": 415.78221120051256, "learning_rate": 3.119682991511639e-06, "loss": 316.8025, "step": 38690 }, { "epoch": 0.7442951793905241, "grad_norm": 405.75088787988915, "learning_rate": 3.1152553408136686e-06, "loss": 311.4661, "step": 38700 }, { "epoch": 0.744487503726284, "grad_norm": 421.81033576565244, "learning_rate": 3.110830254569378e-06, "loss": 315.2426, "step": 38710 }, { "epoch": 0.7446798280620438, "grad_norm": 402.1520592952528, "learning_rate": 3.106407734427037e-06, "loss": 317.4716, "step": 38720 }, { "epoch": 0.7448721523978037, "grad_norm": 437.1015581421016, "learning_rate": 3.101987782033966e-06, "loss": 316.8594, "step": 38730 }, { "epoch": 0.7450644767335635, "grad_norm": 408.8132878412377, "learning_rate": 3.097570399036519e-06, "loss": 316.3783, "step": 38740 }, { "epoch": 0.7452568010693233, "grad_norm": 409.239612133476, "learning_rate": 3.0931555870801033e-06, "loss": 307.0697, "step": 38750 }, { "epoch": 0.7454491254050831, "grad_norm": 381.41884243184774, "learning_rate": 3.0887433478091587e-06, "loss": 311.733, "step": 38760 }, { "epoch": 0.745641449740843, "grad_norm": 391.3036778784672, "learning_rate": 3.0843336828671765e-06, "loss": 314.4483, "step": 38770 }, { "epoch": 0.7458337740766028, "grad_norm": 428.2361792481648, "learning_rate": 3.079926593896683e-06, "loss": 313.0411, "step": 38780 }, { "epoch": 0.7460260984123626, "grad_norm": 417.96610594513123, "learning_rate": 3.0755220825392397e-06, "loss": 309.2721, "step": 38790 }, { "epoch": 0.7462184227481224, "grad_norm": 429.42513687118833, "learning_rate": 3.0711201504354628e-06, "loss": 314.5482, "step": 38800 }, { "epoch": 0.7464107470838822, "grad_norm": 384.40194923734737, "learning_rate": 3.0667207992249948e-06, "loss": 317.0427, "step": 38810 }, { "epoch": 0.7466030714196421, "grad_norm": 415.2178957596277, "learning_rate": 3.062324030546523e-06, "loss": 316.397, "step": 38820 }, { "epoch": 0.7467953957554019, "grad_norm": 412.9864857331533, "learning_rate": 3.057929846037767e-06, "loss": 309.3145, "step": 38830 }, { "epoch": 0.7469877200911618, "grad_norm": 480.89559260098406, "learning_rate": 3.0535382473354945e-06, "loss": 317.5547, "step": 38840 }, { "epoch": 0.7471800444269215, "grad_norm": 436.69637992833316, "learning_rate": 3.0491492360755003e-06, "loss": 317.3891, "step": 38850 }, { "epoch": 0.7473723687626814, "grad_norm": 450.4614629079015, "learning_rate": 3.0447628138926153e-06, "loss": 305.826, "step": 38860 }, { "epoch": 0.7475646930984412, "grad_norm": 423.0453978926065, "learning_rate": 3.0403789824207165e-06, "loss": 305.9098, "step": 38870 }, { "epoch": 0.7477570174342011, "grad_norm": 406.5975703764876, "learning_rate": 3.0359977432927013e-06, "loss": 310.9963, "step": 38880 }, { "epoch": 0.7479493417699609, "grad_norm": 394.79896699951405, "learning_rate": 3.0316190981405147e-06, "loss": 309.3581, "step": 38890 }, { "epoch": 0.7481416661057206, "grad_norm": 400.8230855064271, "learning_rate": 3.0272430485951244e-06, "loss": 307.8479, "step": 38900 }, { "epoch": 0.7483339904414805, "grad_norm": 393.4205622519115, "learning_rate": 3.0228695962865438e-06, "loss": 316.2204, "step": 38910 }, { "epoch": 0.7485263147772403, "grad_norm": 398.46960597663616, "learning_rate": 3.018498742843806e-06, "loss": 318.3917, "step": 38920 }, { "epoch": 0.7487186391130002, "grad_norm": 425.43287271154475, "learning_rate": 3.014130489894982e-06, "loss": 314.5338, "step": 38930 }, { "epoch": 0.74891096344876, "grad_norm": 417.4891674980009, "learning_rate": 3.0097648390671765e-06, "loss": 306.0104, "step": 38940 }, { "epoch": 0.7491032877845198, "grad_norm": 413.84242900851444, "learning_rate": 3.005401791986522e-06, "loss": 322.1133, "step": 38950 }, { "epoch": 0.7492956121202796, "grad_norm": 411.02152390458764, "learning_rate": 3.00104135027818e-06, "loss": 321.4357, "step": 38960 }, { "epoch": 0.7494879364560395, "grad_norm": 415.3366665583693, "learning_rate": 2.99668351556634e-06, "loss": 316.4434, "step": 38970 }, { "epoch": 0.7496802607917993, "grad_norm": 409.21065674768164, "learning_rate": 2.99232828947423e-06, "loss": 317.5683, "step": 38980 }, { "epoch": 0.7498725851275592, "grad_norm": 428.6979813797801, "learning_rate": 2.987975673624096e-06, "loss": 307.8015, "step": 38990 }, { "epoch": 0.7500649094633189, "grad_norm": 420.5654456185048, "learning_rate": 2.9836256696372178e-06, "loss": 321.1392, "step": 39000 }, { "epoch": 0.7502572337990787, "grad_norm": 423.43887924544737, "learning_rate": 2.9792782791338936e-06, "loss": 316.6789, "step": 39010 }, { "epoch": 0.7504495581348386, "grad_norm": 403.8965121103728, "learning_rate": 2.9749335037334604e-06, "loss": 318.7557, "step": 39020 }, { "epoch": 0.7506418824705984, "grad_norm": 407.46914141929426, "learning_rate": 2.9705913450542777e-06, "loss": 308.4499, "step": 39030 }, { "epoch": 0.7508342068063583, "grad_norm": 400.33160146764527, "learning_rate": 2.9662518047137214e-06, "loss": 310.8097, "step": 39040 }, { "epoch": 0.751026531142118, "grad_norm": 427.741101676746, "learning_rate": 2.961914884328203e-06, "loss": 315.2042, "step": 39050 }, { "epoch": 0.7512188554778779, "grad_norm": 414.16283904309296, "learning_rate": 2.9575805855131546e-06, "loss": 313.6494, "step": 39060 }, { "epoch": 0.7514111798136377, "grad_norm": 397.01997779053977, "learning_rate": 2.9532489098830274e-06, "loss": 303.9551, "step": 39070 }, { "epoch": 0.7516035041493976, "grad_norm": 418.6923368309888, "learning_rate": 2.9489198590512967e-06, "loss": 312.293, "step": 39080 }, { "epoch": 0.7517958284851574, "grad_norm": 436.20305301883366, "learning_rate": 2.9445934346304706e-06, "loss": 315.7977, "step": 39090 }, { "epoch": 0.7519881528209172, "grad_norm": 399.2215920290596, "learning_rate": 2.940269638232065e-06, "loss": 313.1067, "step": 39100 }, { "epoch": 0.752180477156677, "grad_norm": 410.72525240529393, "learning_rate": 2.935948471466622e-06, "loss": 310.0512, "step": 39110 }, { "epoch": 0.7523728014924368, "grad_norm": 401.15164377495205, "learning_rate": 2.9316299359437085e-06, "loss": 311.5574, "step": 39120 }, { "epoch": 0.7525651258281967, "grad_norm": 435.2378524559211, "learning_rate": 2.9273140332719064e-06, "loss": 309.722, "step": 39130 }, { "epoch": 0.7527574501639565, "grad_norm": 447.7548916501179, "learning_rate": 2.923000765058818e-06, "loss": 312.8755, "step": 39140 }, { "epoch": 0.7529497744997163, "grad_norm": 425.27265018010866, "learning_rate": 2.9186901329110605e-06, "loss": 305.6466, "step": 39150 }, { "epoch": 0.7531420988354761, "grad_norm": 434.84847325270107, "learning_rate": 2.9143821384342808e-06, "loss": 308.9021, "step": 39160 }, { "epoch": 0.753334423171236, "grad_norm": 442.1234647641681, "learning_rate": 2.9100767832331277e-06, "loss": 310.0948, "step": 39170 }, { "epoch": 0.7535267475069958, "grad_norm": 408.522686489624, "learning_rate": 2.9057740689112822e-06, "loss": 319.8424, "step": 39180 }, { "epoch": 0.7537190718427557, "grad_norm": 412.93660270428484, "learning_rate": 2.901473997071428e-06, "loss": 312.1669, "step": 39190 }, { "epoch": 0.7539113961785154, "grad_norm": 398.20484252143274, "learning_rate": 2.8971765693152767e-06, "loss": 311.1424, "step": 39200 }, { "epoch": 0.7541037205142753, "grad_norm": 380.1655257161292, "learning_rate": 2.8928817872435465e-06, "loss": 308.9361, "step": 39210 }, { "epoch": 0.7542960448500351, "grad_norm": 393.81366191113125, "learning_rate": 2.8885896524559696e-06, "loss": 316.0979, "step": 39220 }, { "epoch": 0.7544883691857949, "grad_norm": 428.9532058511396, "learning_rate": 2.8843001665513016e-06, "loss": 312.6022, "step": 39230 }, { "epoch": 0.7546806935215548, "grad_norm": 403.03062248401363, "learning_rate": 2.8800133311273016e-06, "loss": 319.5132, "step": 39240 }, { "epoch": 0.7548730178573145, "grad_norm": 434.6596352874526, "learning_rate": 2.875729147780745e-06, "loss": 316.7485, "step": 39250 }, { "epoch": 0.7550653421930744, "grad_norm": 433.0055960376108, "learning_rate": 2.871447618107417e-06, "loss": 312.0756, "step": 39260 }, { "epoch": 0.7552576665288342, "grad_norm": 403.7441360507335, "learning_rate": 2.867168743702122e-06, "loss": 305.5635, "step": 39270 }, { "epoch": 0.7554499908645941, "grad_norm": 442.02211654745304, "learning_rate": 2.8628925261586683e-06, "loss": 317.6886, "step": 39280 }, { "epoch": 0.7556423152003539, "grad_norm": 452.1194678872526, "learning_rate": 2.8586189670698717e-06, "loss": 313.3257, "step": 39290 }, { "epoch": 0.7558346395361137, "grad_norm": 389.67226983157343, "learning_rate": 2.854348068027568e-06, "loss": 311.0332, "step": 39300 }, { "epoch": 0.7560269638718735, "grad_norm": 423.33268277708675, "learning_rate": 2.850079830622593e-06, "loss": 319.8657, "step": 39310 }, { "epoch": 0.7562192882076334, "grad_norm": 442.3294562210493, "learning_rate": 2.845814256444799e-06, "loss": 326.6072, "step": 39320 }, { "epoch": 0.7564116125433932, "grad_norm": 395.6122928340354, "learning_rate": 2.8415513470830357e-06, "loss": 320.7574, "step": 39330 }, { "epoch": 0.756603936879153, "grad_norm": 431.5769886032502, "learning_rate": 2.837291104125174e-06, "loss": 319.5021, "step": 39340 }, { "epoch": 0.7567962612149128, "grad_norm": 403.0776949615088, "learning_rate": 2.833033529158079e-06, "loss": 310.1233, "step": 39350 }, { "epoch": 0.7569885855506726, "grad_norm": 420.767776531208, "learning_rate": 2.8287786237676253e-06, "loss": 322.8885, "step": 39360 }, { "epoch": 0.7571809098864325, "grad_norm": 435.1093309648183, "learning_rate": 2.824526389538701e-06, "loss": 322.0231, "step": 39370 }, { "epoch": 0.7573732342221923, "grad_norm": 428.29271802338576, "learning_rate": 2.8202768280551894e-06, "loss": 308.8249, "step": 39380 }, { "epoch": 0.7575655585579522, "grad_norm": 420.9540628043602, "learning_rate": 2.8160299408999827e-06, "loss": 322.9439, "step": 39390 }, { "epoch": 0.7577578828937119, "grad_norm": 407.3357297867067, "learning_rate": 2.811785729654972e-06, "loss": 315.7543, "step": 39400 }, { "epoch": 0.7579502072294718, "grad_norm": 405.290836739935, "learning_rate": 2.8075441959010628e-06, "loss": 309.8894, "step": 39410 }, { "epoch": 0.7581425315652316, "grad_norm": 401.76609331373015, "learning_rate": 2.8033053412181543e-06, "loss": 311.5659, "step": 39420 }, { "epoch": 0.7583348559009915, "grad_norm": 375.5352116966792, "learning_rate": 2.799069167185148e-06, "loss": 297.6623, "step": 39430 }, { "epoch": 0.7585271802367513, "grad_norm": 412.79498904241206, "learning_rate": 2.7948356753799466e-06, "loss": 322.5181, "step": 39440 }, { "epoch": 0.758719504572511, "grad_norm": 407.22493665973343, "learning_rate": 2.7906048673794593e-06, "loss": 310.8787, "step": 39450 }, { "epoch": 0.7589118289082709, "grad_norm": 412.6630847413725, "learning_rate": 2.7863767447595946e-06, "loss": 318.2519, "step": 39460 }, { "epoch": 0.7591041532440307, "grad_norm": 384.441955634724, "learning_rate": 2.7821513090952523e-06, "loss": 308.0261, "step": 39470 }, { "epoch": 0.7592964775797906, "grad_norm": 406.67638619866375, "learning_rate": 2.7779285619603446e-06, "loss": 313.2911, "step": 39480 }, { "epoch": 0.7594888019155503, "grad_norm": 381.2990944588106, "learning_rate": 2.77370850492777e-06, "loss": 325.8067, "step": 39490 }, { "epoch": 0.7596811262513102, "grad_norm": 445.0230904938098, "learning_rate": 2.7694911395694324e-06, "loss": 314.7334, "step": 39500 }, { "epoch": 0.75987345058707, "grad_norm": 403.1905932745522, "learning_rate": 2.765276467456225e-06, "loss": 318.6229, "step": 39510 }, { "epoch": 0.7600657749228299, "grad_norm": 458.2073033713597, "learning_rate": 2.761064490158052e-06, "loss": 316.019, "step": 39520 }, { "epoch": 0.7602580992585897, "grad_norm": 408.56165535659346, "learning_rate": 2.7568552092438018e-06, "loss": 312.4274, "step": 39530 }, { "epoch": 0.7604504235943496, "grad_norm": 385.1376898888618, "learning_rate": 2.7526486262813578e-06, "loss": 311.3018, "step": 39540 }, { "epoch": 0.7606427479301093, "grad_norm": 440.24580985049084, "learning_rate": 2.7484447428376094e-06, "loss": 304.7419, "step": 39550 }, { "epoch": 0.7608350722658692, "grad_norm": 432.4582625779512, "learning_rate": 2.7442435604784313e-06, "loss": 316.1508, "step": 39560 }, { "epoch": 0.761027396601629, "grad_norm": 448.45357357743336, "learning_rate": 2.740045080768694e-06, "loss": 313.9462, "step": 39570 }, { "epoch": 0.7612197209373888, "grad_norm": 380.86378975493, "learning_rate": 2.7358493052722603e-06, "loss": 310.8535, "step": 39580 }, { "epoch": 0.7614120452731487, "grad_norm": 445.4903320462483, "learning_rate": 2.7316562355519904e-06, "loss": 311.0319, "step": 39590 }, { "epoch": 0.7616043696089084, "grad_norm": 445.3212157028574, "learning_rate": 2.727465873169729e-06, "loss": 313.6938, "step": 39600 }, { "epoch": 0.7617966939446683, "grad_norm": 404.25712049662417, "learning_rate": 2.723278219686324e-06, "loss": 313.5266, "step": 39610 }, { "epoch": 0.7619890182804281, "grad_norm": 421.3557108061471, "learning_rate": 2.7190932766615998e-06, "loss": 314.9394, "step": 39620 }, { "epoch": 0.762181342616188, "grad_norm": 386.97066855709954, "learning_rate": 2.714911045654385e-06, "loss": 306.5084, "step": 39630 }, { "epoch": 0.7623736669519477, "grad_norm": 460.82167527419216, "learning_rate": 2.7107315282224878e-06, "loss": 312.0479, "step": 39640 }, { "epoch": 0.7625659912877076, "grad_norm": 406.8182848455515, "learning_rate": 2.7065547259227078e-06, "loss": 316.5486, "step": 39650 }, { "epoch": 0.7627583156234674, "grad_norm": 413.99856626749084, "learning_rate": 2.7023806403108397e-06, "loss": 319.0753, "step": 39660 }, { "epoch": 0.7629506399592273, "grad_norm": 411.90051587927303, "learning_rate": 2.698209272941659e-06, "loss": 306.7832, "step": 39670 }, { "epoch": 0.7631429642949871, "grad_norm": 457.43580844036217, "learning_rate": 2.694040625368931e-06, "loss": 326.498, "step": 39680 }, { "epoch": 0.7633352886307468, "grad_norm": 411.30369848863705, "learning_rate": 2.689874699145405e-06, "loss": 308.9756, "step": 39690 }, { "epoch": 0.7635276129665067, "grad_norm": 398.3373121598249, "learning_rate": 2.685711495822827e-06, "loss": 311.1085, "step": 39700 }, { "epoch": 0.7637199373022665, "grad_norm": 428.94310525011616, "learning_rate": 2.6815510169519164e-06, "loss": 308.3243, "step": 39710 }, { "epoch": 0.7639122616380264, "grad_norm": 396.94977533954454, "learning_rate": 2.677393264082381e-06, "loss": 323.8346, "step": 39720 }, { "epoch": 0.7641045859737862, "grad_norm": 385.67101920801804, "learning_rate": 2.673238238762921e-06, "loss": 315.482, "step": 39730 }, { "epoch": 0.764296910309546, "grad_norm": 436.1851237957672, "learning_rate": 2.6690859425412075e-06, "loss": 315.1936, "step": 39740 }, { "epoch": 0.7644892346453058, "grad_norm": 448.8862210291068, "learning_rate": 2.6649363769639103e-06, "loss": 326.1787, "step": 39750 }, { "epoch": 0.7646815589810657, "grad_norm": 423.4744383861163, "learning_rate": 2.660789543576667e-06, "loss": 313.9057, "step": 39760 }, { "epoch": 0.7648738833168255, "grad_norm": 393.9053991402812, "learning_rate": 2.6566454439241107e-06, "loss": 317.3791, "step": 39770 }, { "epoch": 0.7650662076525854, "grad_norm": 428.47595962990806, "learning_rate": 2.652504079549848e-06, "loss": 323.639, "step": 39780 }, { "epoch": 0.7652585319883451, "grad_norm": 410.35898028314455, "learning_rate": 2.648365451996466e-06, "loss": 315.4431, "step": 39790 }, { "epoch": 0.7654508563241049, "grad_norm": 373.75404609373265, "learning_rate": 2.6442295628055346e-06, "loss": 305.6955, "step": 39800 }, { "epoch": 0.7656431806598648, "grad_norm": 417.54418082322036, "learning_rate": 2.64009641351761e-06, "loss": 310.0194, "step": 39810 }, { "epoch": 0.7658355049956246, "grad_norm": 385.0059852460938, "learning_rate": 2.635966005672218e-06, "loss": 314.499, "step": 39820 }, { "epoch": 0.7660278293313845, "grad_norm": 415.00045271537886, "learning_rate": 2.631838340807865e-06, "loss": 316.8466, "step": 39830 }, { "epoch": 0.7662201536671442, "grad_norm": 387.6144328066203, "learning_rate": 2.6277134204620436e-06, "loss": 312.4498, "step": 39840 }, { "epoch": 0.7664124780029041, "grad_norm": 447.39456456731784, "learning_rate": 2.6235912461712167e-06, "loss": 323.8219, "step": 39850 }, { "epoch": 0.7666048023386639, "grad_norm": 440.41725177903106, "learning_rate": 2.619471819470821e-06, "loss": 310.6018, "step": 39860 }, { "epoch": 0.7667971266744238, "grad_norm": 394.7354704432082, "learning_rate": 2.6153551418952827e-06, "loss": 304.8577, "step": 39870 }, { "epoch": 0.7669894510101836, "grad_norm": 403.09031460703915, "learning_rate": 2.6112412149779888e-06, "loss": 314.8679, "step": 39880 }, { "epoch": 0.7671817753459435, "grad_norm": 417.9209442539809, "learning_rate": 2.6071300402513165e-06, "loss": 315.9194, "step": 39890 }, { "epoch": 0.7673740996817032, "grad_norm": 420.64007850047153, "learning_rate": 2.603021619246604e-06, "loss": 318.3601, "step": 39900 }, { "epoch": 0.767566424017463, "grad_norm": 396.55820321757284, "learning_rate": 2.5989159534941768e-06, "loss": 309.9709, "step": 39910 }, { "epoch": 0.7677587483532229, "grad_norm": 367.8801136751504, "learning_rate": 2.5948130445233232e-06, "loss": 312.2183, "step": 39920 }, { "epoch": 0.7679510726889827, "grad_norm": 414.36640348418854, "learning_rate": 2.5907128938623093e-06, "loss": 313.2414, "step": 39930 }, { "epoch": 0.7681433970247425, "grad_norm": 430.1949195026494, "learning_rate": 2.5866155030383722e-06, "loss": 321.1011, "step": 39940 }, { "epoch": 0.7683357213605023, "grad_norm": 429.312936849423, "learning_rate": 2.582520873577726e-06, "loss": 307.8642, "step": 39950 }, { "epoch": 0.7685280456962622, "grad_norm": 393.2588040831892, "learning_rate": 2.578429007005552e-06, "loss": 311.7097, "step": 39960 }, { "epoch": 0.768720370032022, "grad_norm": 416.42721342845925, "learning_rate": 2.5743399048460004e-06, "loss": 317.4759, "step": 39970 }, { "epoch": 0.7689126943677819, "grad_norm": 400.9684695727884, "learning_rate": 2.570253568622193e-06, "loss": 314.3595, "step": 39980 }, { "epoch": 0.7691050187035416, "grad_norm": 427.113626606421, "learning_rate": 2.5661699998562286e-06, "loss": 315.2968, "step": 39990 }, { "epoch": 0.7692973430393015, "grad_norm": 423.94129070785107, "learning_rate": 2.5620892000691643e-06, "loss": 327.664, "step": 40000 }, { "epoch": 0.7694896673750613, "grad_norm": 401.10430676856475, "learning_rate": 2.5580111707810296e-06, "loss": 305.0914, "step": 40010 }, { "epoch": 0.7696819917108211, "grad_norm": 468.06844511648166, "learning_rate": 2.5539359135108244e-06, "loss": 315.7456, "step": 40020 }, { "epoch": 0.769874316046581, "grad_norm": 386.7072814924101, "learning_rate": 2.549863429776519e-06, "loss": 306.9337, "step": 40030 }, { "epoch": 0.7700666403823407, "grad_norm": 401.47087065572555, "learning_rate": 2.5457937210950433e-06, "loss": 313.9831, "step": 40040 }, { "epoch": 0.7702589647181006, "grad_norm": 431.8558423452183, "learning_rate": 2.541726788982294e-06, "loss": 312.0791, "step": 40050 }, { "epoch": 0.7704512890538604, "grad_norm": 390.3045247412946, "learning_rate": 2.5376626349531395e-06, "loss": 312.8002, "step": 40060 }, { "epoch": 0.7706436133896203, "grad_norm": 423.7457867952994, "learning_rate": 2.53360126052141e-06, "loss": 319.1218, "step": 40070 }, { "epoch": 0.7708359377253801, "grad_norm": 427.5206262482864, "learning_rate": 2.529542667199896e-06, "loss": 320.6042, "step": 40080 }, { "epoch": 0.77102826206114, "grad_norm": 401.83857228784666, "learning_rate": 2.525486856500363e-06, "loss": 309.5803, "step": 40090 }, { "epoch": 0.7712205863968997, "grad_norm": 371.0653425091557, "learning_rate": 2.5214338299335306e-06, "loss": 314.1878, "step": 40100 }, { "epoch": 0.7714129107326596, "grad_norm": 403.61409540579876, "learning_rate": 2.5173835890090826e-06, "loss": 315.9101, "step": 40110 }, { "epoch": 0.7716052350684194, "grad_norm": 432.4538903693824, "learning_rate": 2.5133361352356666e-06, "loss": 311.8193, "step": 40120 }, { "epoch": 0.7717975594041792, "grad_norm": 397.61404430746956, "learning_rate": 2.5092914701208958e-06, "loss": 302.5074, "step": 40130 }, { "epoch": 0.771989883739939, "grad_norm": 380.3919254318495, "learning_rate": 2.5052495951713406e-06, "loss": 313.205, "step": 40140 }, { "epoch": 0.7721822080756988, "grad_norm": 391.2171480059374, "learning_rate": 2.5012105118925267e-06, "loss": 309.1393, "step": 40150 }, { "epoch": 0.7723745324114587, "grad_norm": 437.88153051460057, "learning_rate": 2.497174221788955e-06, "loss": 309.5633, "step": 40160 }, { "epoch": 0.7725668567472185, "grad_norm": 417.6253970950156, "learning_rate": 2.4931407263640683e-06, "loss": 313.5437, "step": 40170 }, { "epoch": 0.7727591810829784, "grad_norm": 413.36382260639436, "learning_rate": 2.489110027120285e-06, "loss": 315.954, "step": 40180 }, { "epoch": 0.7729515054187381, "grad_norm": 411.60598692056806, "learning_rate": 2.4850821255589664e-06, "loss": 309.8239, "step": 40190 }, { "epoch": 0.773143829754498, "grad_norm": 400.3319595781661, "learning_rate": 2.4810570231804463e-06, "loss": 320.8611, "step": 40200 }, { "epoch": 0.7733361540902578, "grad_norm": 393.2527964700215, "learning_rate": 2.4770347214840063e-06, "loss": 309.9603, "step": 40210 }, { "epoch": 0.7735284784260177, "grad_norm": 378.5322457103529, "learning_rate": 2.473015221967886e-06, "loss": 309.0794, "step": 40220 }, { "epoch": 0.7737208027617775, "grad_norm": 443.02013964119135, "learning_rate": 2.4689985261292805e-06, "loss": 307.6548, "step": 40230 }, { "epoch": 0.7739131270975372, "grad_norm": 416.5870913965198, "learning_rate": 2.464984635464348e-06, "loss": 311.1075, "step": 40240 }, { "epoch": 0.7741054514332971, "grad_norm": 373.9705085984144, "learning_rate": 2.460973551468194e-06, "loss": 296.5506, "step": 40250 }, { "epoch": 0.7742977757690569, "grad_norm": 421.02398762512104, "learning_rate": 2.456965275634878e-06, "loss": 318.1376, "step": 40260 }, { "epoch": 0.7744901001048168, "grad_norm": 420.4784677538794, "learning_rate": 2.4529598094574226e-06, "loss": 317.219, "step": 40270 }, { "epoch": 0.7746824244405766, "grad_norm": 417.6769893268246, "learning_rate": 2.4489571544277944e-06, "loss": 309.7965, "step": 40280 }, { "epoch": 0.7748747487763364, "grad_norm": 391.3772824446553, "learning_rate": 2.444957312036914e-06, "loss": 319.464, "step": 40290 }, { "epoch": 0.7750670731120962, "grad_norm": 401.08784043456615, "learning_rate": 2.4409602837746625e-06, "loss": 306.3166, "step": 40300 }, { "epoch": 0.7752593974478561, "grad_norm": 381.8706334187539, "learning_rate": 2.4369660711298603e-06, "loss": 307.2619, "step": 40310 }, { "epoch": 0.7754517217836159, "grad_norm": 420.02826108132604, "learning_rate": 2.4329746755902917e-06, "loss": 320.832, "step": 40320 }, { "epoch": 0.7756440461193758, "grad_norm": 431.8368821581145, "learning_rate": 2.428986098642684e-06, "loss": 309.1702, "step": 40330 }, { "epoch": 0.7758363704551355, "grad_norm": 403.6901944052936, "learning_rate": 2.425000341772711e-06, "loss": 312.0167, "step": 40340 }, { "epoch": 0.7760286947908953, "grad_norm": 428.7012456511869, "learning_rate": 2.4210174064650084e-06, "loss": 302.6539, "step": 40350 }, { "epoch": 0.7762210191266552, "grad_norm": 437.6464942151356, "learning_rate": 2.4170372942031506e-06, "loss": 310.3356, "step": 40360 }, { "epoch": 0.776413343462415, "grad_norm": 442.77140645883435, "learning_rate": 2.4130600064696618e-06, "loss": 311.8165, "step": 40370 }, { "epoch": 0.7766056677981749, "grad_norm": 419.62815482563906, "learning_rate": 2.4090855447460205e-06, "loss": 316.7124, "step": 40380 }, { "epoch": 0.7767979921339346, "grad_norm": 423.4392566345319, "learning_rate": 2.4051139105126463e-06, "loss": 314.872, "step": 40390 }, { "epoch": 0.7769903164696945, "grad_norm": 448.4811597860652, "learning_rate": 2.4011451052489064e-06, "loss": 319.0694, "step": 40400 }, { "epoch": 0.7771826408054543, "grad_norm": 426.027252547883, "learning_rate": 2.3971791304331125e-06, "loss": 306.6793, "step": 40410 }, { "epoch": 0.7773749651412142, "grad_norm": 402.44634297955156, "learning_rate": 2.393215987542531e-06, "loss": 312.2662, "step": 40420 }, { "epoch": 0.777567289476974, "grad_norm": 410.22711626699464, "learning_rate": 2.3892556780533606e-06, "loss": 328.041, "step": 40430 }, { "epoch": 0.7777596138127338, "grad_norm": 423.5711571934931, "learning_rate": 2.385298203440758e-06, "loss": 309.2229, "step": 40440 }, { "epoch": 0.7779519381484936, "grad_norm": 405.83359144268087, "learning_rate": 2.3813435651788107e-06, "loss": 312.5401, "step": 40450 }, { "epoch": 0.7781442624842534, "grad_norm": 380.9433366245735, "learning_rate": 2.377391764740562e-06, "loss": 306.0173, "step": 40460 }, { "epoch": 0.7783365868200133, "grad_norm": 400.21931784841496, "learning_rate": 2.3734428035979883e-06, "loss": 301.3098, "step": 40470 }, { "epoch": 0.778528911155773, "grad_norm": 391.2951260508504, "learning_rate": 2.3694966832220123e-06, "loss": 312.3013, "step": 40480 }, { "epoch": 0.7787212354915329, "grad_norm": 422.0511609043098, "learning_rate": 2.365553405082501e-06, "loss": 315.2574, "step": 40490 }, { "epoch": 0.7789135598272927, "grad_norm": 381.0920268929168, "learning_rate": 2.3616129706482604e-06, "loss": 314.7387, "step": 40500 }, { "epoch": 0.7791058841630526, "grad_norm": 421.1473939104334, "learning_rate": 2.357675381387036e-06, "loss": 312.7013, "step": 40510 }, { "epoch": 0.7792982084988124, "grad_norm": 408.1076473581412, "learning_rate": 2.3537406387655114e-06, "loss": 308.5221, "step": 40520 }, { "epoch": 0.7794905328345723, "grad_norm": 401.4157039030911, "learning_rate": 2.349808744249321e-06, "loss": 312.1836, "step": 40530 }, { "epoch": 0.779682857170332, "grad_norm": 389.9347954939756, "learning_rate": 2.345879699303025e-06, "loss": 311.4137, "step": 40540 }, { "epoch": 0.7798751815060919, "grad_norm": 423.9304598170786, "learning_rate": 2.3419535053901264e-06, "loss": 310.1337, "step": 40550 }, { "epoch": 0.7800675058418517, "grad_norm": 477.8369375150172, "learning_rate": 2.338030163973073e-06, "loss": 327.3379, "step": 40560 }, { "epoch": 0.7802598301776115, "grad_norm": 466.7243912405653, "learning_rate": 2.334109676513242e-06, "loss": 312.5931, "step": 40570 }, { "epoch": 0.7804521545133714, "grad_norm": 394.05278841500524, "learning_rate": 2.330192044470948e-06, "loss": 308.7047, "step": 40580 }, { "epoch": 0.7806444788491311, "grad_norm": 408.1179151660319, "learning_rate": 2.3262772693054457e-06, "loss": 320.7805, "step": 40590 }, { "epoch": 0.780836803184891, "grad_norm": 435.99017207023576, "learning_rate": 2.322365352474928e-06, "loss": 310.3925, "step": 40600 }, { "epoch": 0.7810291275206508, "grad_norm": 430.64363398922904, "learning_rate": 2.3184562954365153e-06, "loss": 320.0543, "step": 40610 }, { "epoch": 0.7812214518564107, "grad_norm": 428.5405584811241, "learning_rate": 2.3145500996462656e-06, "loss": 318.8713, "step": 40620 }, { "epoch": 0.7814137761921705, "grad_norm": 442.9949011554765, "learning_rate": 2.310646766559177e-06, "loss": 321.6667, "step": 40630 }, { "epoch": 0.7816061005279303, "grad_norm": 394.32070730531797, "learning_rate": 2.3067462976291744e-06, "loss": 324.4715, "step": 40640 }, { "epoch": 0.7817984248636901, "grad_norm": 395.5026398048101, "learning_rate": 2.302848694309118e-06, "loss": 317.8809, "step": 40650 }, { "epoch": 0.78199074919945, "grad_norm": 411.3453331322445, "learning_rate": 2.2989539580507957e-06, "loss": 315.451, "step": 40660 }, { "epoch": 0.7821830735352098, "grad_norm": 390.90484889462147, "learning_rate": 2.2950620903049414e-06, "loss": 316.9897, "step": 40670 }, { "epoch": 0.7823753978709697, "grad_norm": 421.70828080504424, "learning_rate": 2.2911730925212073e-06, "loss": 316.1592, "step": 40680 }, { "epoch": 0.7825677222067294, "grad_norm": 451.22643446392846, "learning_rate": 2.2872869661481766e-06, "loss": 305.51, "step": 40690 }, { "epoch": 0.7827600465424892, "grad_norm": 414.62841862885125, "learning_rate": 2.283403712633375e-06, "loss": 306.5684, "step": 40700 }, { "epoch": 0.7829523708782491, "grad_norm": 430.6780913238953, "learning_rate": 2.279523333423247e-06, "loss": 322.1448, "step": 40710 }, { "epoch": 0.7831446952140089, "grad_norm": 409.51508280048375, "learning_rate": 2.2756458299631667e-06, "loss": 302.9692, "step": 40720 }, { "epoch": 0.7833370195497688, "grad_norm": 437.0953657669929, "learning_rate": 2.271771203697445e-06, "loss": 311.2765, "step": 40730 }, { "epoch": 0.7835293438855285, "grad_norm": 424.05630715867176, "learning_rate": 2.267899456069311e-06, "loss": 313.9041, "step": 40740 }, { "epoch": 0.7837216682212884, "grad_norm": 420.6614023581561, "learning_rate": 2.2640305885209336e-06, "loss": 320.6284, "step": 40750 }, { "epoch": 0.7839139925570482, "grad_norm": 415.27999943813995, "learning_rate": 2.2601646024933976e-06, "loss": 321.8764, "step": 40760 }, { "epoch": 0.7841063168928081, "grad_norm": 396.26756268775796, "learning_rate": 2.256301499426716e-06, "loss": 310.1289, "step": 40770 }, { "epoch": 0.7842986412285678, "grad_norm": 417.7440705776239, "learning_rate": 2.252441280759838e-06, "loss": 311.4468, "step": 40780 }, { "epoch": 0.7844909655643277, "grad_norm": 422.10711189813924, "learning_rate": 2.248583947930628e-06, "loss": 303.672, "step": 40790 }, { "epoch": 0.7846832899000875, "grad_norm": 392.77485211293487, "learning_rate": 2.2447295023758755e-06, "loss": 310.353, "step": 40800 }, { "epoch": 0.7848756142358473, "grad_norm": 398.8194065818253, "learning_rate": 2.2408779455313035e-06, "loss": 312.5769, "step": 40810 }, { "epoch": 0.7850679385716072, "grad_norm": 398.37553887579645, "learning_rate": 2.2370292788315505e-06, "loss": 314.5786, "step": 40820 }, { "epoch": 0.785260262907367, "grad_norm": 406.3424975986708, "learning_rate": 2.2331835037101825e-06, "loss": 304.1552, "step": 40830 }, { "epoch": 0.7854525872431268, "grad_norm": 408.10032093031566, "learning_rate": 2.2293406215996814e-06, "loss": 309.1903, "step": 40840 }, { "epoch": 0.7856449115788866, "grad_norm": 383.6693692367047, "learning_rate": 2.2255006339314667e-06, "loss": 312.4329, "step": 40850 }, { "epoch": 0.7858372359146465, "grad_norm": 413.7728525325508, "learning_rate": 2.2216635421358623e-06, "loss": 304.8318, "step": 40860 }, { "epoch": 0.7860295602504063, "grad_norm": 408.1206672763752, "learning_rate": 2.2178293476421276e-06, "loss": 307.9965, "step": 40870 }, { "epoch": 0.7862218845861662, "grad_norm": 513.5604920056612, "learning_rate": 2.213998051878431e-06, "loss": 309.4396, "step": 40880 }, { "epoch": 0.7864142089219259, "grad_norm": 429.4090592087335, "learning_rate": 2.2101696562718735e-06, "loss": 304.6626, "step": 40890 }, { "epoch": 0.7866065332576858, "grad_norm": 437.87832050621137, "learning_rate": 2.206344162248466e-06, "loss": 320.112, "step": 40900 }, { "epoch": 0.7867988575934456, "grad_norm": 397.4649874463456, "learning_rate": 2.2025215712331383e-06, "loss": 311.9292, "step": 40910 }, { "epoch": 0.7869911819292054, "grad_norm": 436.76947708713067, "learning_rate": 2.1987018846497487e-06, "loss": 307.7339, "step": 40920 }, { "epoch": 0.7871835062649652, "grad_norm": 394.1935927330882, "learning_rate": 2.194885103921064e-06, "loss": 312.8891, "step": 40930 }, { "epoch": 0.787375830600725, "grad_norm": 424.1501999635146, "learning_rate": 2.191071230468772e-06, "loss": 315.917, "step": 40940 }, { "epoch": 0.7875681549364849, "grad_norm": 431.6847064803192, "learning_rate": 2.1872602657134757e-06, "loss": 316.4769, "step": 40950 }, { "epoch": 0.7877604792722447, "grad_norm": 408.5040175877471, "learning_rate": 2.1834522110747014e-06, "loss": 312.625, "step": 40960 }, { "epoch": 0.7879528036080046, "grad_norm": 403.65461434670306, "learning_rate": 2.179647067970885e-06, "loss": 309.2533, "step": 40970 }, { "epoch": 0.7881451279437643, "grad_norm": 463.5459539500682, "learning_rate": 2.1758448378193743e-06, "loss": 318.881, "step": 40980 }, { "epoch": 0.7883374522795242, "grad_norm": 409.3855141503293, "learning_rate": 2.1720455220364443e-06, "loss": 310.3153, "step": 40990 }, { "epoch": 0.788529776615284, "grad_norm": 418.3573944071805, "learning_rate": 2.168249122037275e-06, "loss": 307.567, "step": 41000 }, { "epoch": 0.7887221009510439, "grad_norm": 413.41596894292144, "learning_rate": 2.1644556392359583e-06, "loss": 309.2228, "step": 41010 }, { "epoch": 0.7889144252868037, "grad_norm": 396.264812271915, "learning_rate": 2.160665075045508e-06, "loss": 300.119, "step": 41020 }, { "epoch": 0.7891067496225634, "grad_norm": 384.9750396955149, "learning_rate": 2.1568774308778494e-06, "loss": 314.1672, "step": 41030 }, { "epoch": 0.7892990739583233, "grad_norm": 432.08155574589415, "learning_rate": 2.1530927081438148e-06, "loss": 324.8616, "step": 41040 }, { "epoch": 0.7894913982940831, "grad_norm": 445.69000324077496, "learning_rate": 2.1493109082531473e-06, "loss": 300.0533, "step": 41050 }, { "epoch": 0.789683722629843, "grad_norm": 433.16143918617195, "learning_rate": 2.1455320326145103e-06, "loss": 304.4936, "step": 41060 }, { "epoch": 0.7898760469656028, "grad_norm": 402.76524694665744, "learning_rate": 2.141756082635471e-06, "loss": 308.7866, "step": 41070 }, { "epoch": 0.7900683713013626, "grad_norm": 419.62007628768663, "learning_rate": 2.137983059722507e-06, "loss": 301.3151, "step": 41080 }, { "epoch": 0.7902606956371224, "grad_norm": 413.6466504810756, "learning_rate": 2.1342129652810063e-06, "loss": 308.5127, "step": 41090 }, { "epoch": 0.7904530199728823, "grad_norm": 417.4033953263661, "learning_rate": 2.1304458007152694e-06, "loss": 305.7939, "step": 41100 }, { "epoch": 0.7906453443086421, "grad_norm": 420.63154716783026, "learning_rate": 2.1266815674285026e-06, "loss": 319.9178, "step": 41110 }, { "epoch": 0.790837668644402, "grad_norm": 426.74679328452436, "learning_rate": 2.1229202668228197e-06, "loss": 325.0511, "step": 41120 }, { "epoch": 0.7910299929801617, "grad_norm": 424.0738356287959, "learning_rate": 2.1191619002992405e-06, "loss": 300.2503, "step": 41130 }, { "epoch": 0.7912223173159215, "grad_norm": 460.0166816375219, "learning_rate": 2.1154064692577e-06, "loss": 308.9102, "step": 41140 }, { "epoch": 0.7914146416516814, "grad_norm": 401.33104977383886, "learning_rate": 2.111653975097029e-06, "loss": 306.6995, "step": 41150 }, { "epoch": 0.7916069659874412, "grad_norm": 426.6695098261237, "learning_rate": 2.1079044192149713e-06, "loss": 311.5517, "step": 41160 }, { "epoch": 0.7917992903232011, "grad_norm": 402.4697929884768, "learning_rate": 2.1041578030081777e-06, "loss": 309.8065, "step": 41170 }, { "epoch": 0.7919916146589608, "grad_norm": 398.7737487701014, "learning_rate": 2.100414127872198e-06, "loss": 303.9714, "step": 41180 }, { "epoch": 0.7921839389947207, "grad_norm": 399.56181410402843, "learning_rate": 2.0966733952014904e-06, "loss": 321.0781, "step": 41190 }, { "epoch": 0.7923762633304805, "grad_norm": 370.74344280725614, "learning_rate": 2.0929356063894125e-06, "loss": 310.3235, "step": 41200 }, { "epoch": 0.7925685876662404, "grad_norm": 433.22970589251315, "learning_rate": 2.089200762828234e-06, "loss": 312.2444, "step": 41210 }, { "epoch": 0.7927609120020002, "grad_norm": 388.8603080085678, "learning_rate": 2.0854688659091203e-06, "loss": 313.439, "step": 41220 }, { "epoch": 0.79295323633776, "grad_norm": 395.7810147279743, "learning_rate": 2.08173991702214e-06, "loss": 302.2224, "step": 41230 }, { "epoch": 0.7931455606735198, "grad_norm": 413.84484823595136, "learning_rate": 2.0780139175562675e-06, "loss": 311.3913, "step": 41240 }, { "epoch": 0.7933378850092796, "grad_norm": 414.3545464302663, "learning_rate": 2.0742908688993746e-06, "loss": 306.7947, "step": 41250 }, { "epoch": 0.7935302093450395, "grad_norm": 408.7211595559714, "learning_rate": 2.070570772438236e-06, "loss": 304.9645, "step": 41260 }, { "epoch": 0.7937225336807993, "grad_norm": 391.41070124901245, "learning_rate": 2.066853629558524e-06, "loss": 312.0633, "step": 41270 }, { "epoch": 0.7939148580165591, "grad_norm": 392.0298225013226, "learning_rate": 2.0631394416448157e-06, "loss": 300.5851, "step": 41280 }, { "epoch": 0.7941071823523189, "grad_norm": 394.33972644436676, "learning_rate": 2.059428210080583e-06, "loss": 302.7163, "step": 41290 }, { "epoch": 0.7942995066880788, "grad_norm": 464.0277523095237, "learning_rate": 2.0557199362482005e-06, "loss": 314.2422, "step": 41300 }, { "epoch": 0.7944918310238386, "grad_norm": 408.88500787597485, "learning_rate": 2.052014621528935e-06, "loss": 308.871, "step": 41310 }, { "epoch": 0.7946841553595985, "grad_norm": 393.6820137702138, "learning_rate": 2.048312267302961e-06, "loss": 318.9189, "step": 41320 }, { "epoch": 0.7948764796953582, "grad_norm": 389.13671333433956, "learning_rate": 2.044612874949341e-06, "loss": 304.0426, "step": 41330 }, { "epoch": 0.7950688040311181, "grad_norm": 402.4689379174277, "learning_rate": 2.040916445846034e-06, "loss": 318.7442, "step": 41340 }, { "epoch": 0.7952611283668779, "grad_norm": 534.4974650455013, "learning_rate": 2.037222981369905e-06, "loss": 305.6872, "step": 41350 }, { "epoch": 0.7954534527026377, "grad_norm": 403.443000767358, "learning_rate": 2.033532482896707e-06, "loss": 307.3848, "step": 41360 }, { "epoch": 0.7956457770383976, "grad_norm": 445.155693695892, "learning_rate": 2.0298449518010875e-06, "loss": 308.726, "step": 41370 }, { "epoch": 0.7958381013741573, "grad_norm": 427.4768329158586, "learning_rate": 2.0261603894565897e-06, "loss": 309.1523, "step": 41380 }, { "epoch": 0.7960304257099172, "grad_norm": 411.55110561014817, "learning_rate": 2.0224787972356574e-06, "loss": 312.8725, "step": 41390 }, { "epoch": 0.796222750045677, "grad_norm": 400.2907779344978, "learning_rate": 2.0188001765096198e-06, "loss": 313.9252, "step": 41400 }, { "epoch": 0.7964150743814369, "grad_norm": 412.6303180111913, "learning_rate": 2.0151245286486998e-06, "loss": 310.0146, "step": 41410 }, { "epoch": 0.7966073987171967, "grad_norm": 400.0735837232573, "learning_rate": 2.011451855022021e-06, "loss": 313.221, "step": 41420 }, { "epoch": 0.7967997230529565, "grad_norm": 404.70054361888714, "learning_rate": 2.0077821569975885e-06, "loss": 311.697, "step": 41430 }, { "epoch": 0.7969920473887163, "grad_norm": 430.3644867725602, "learning_rate": 2.0041154359423087e-06, "loss": 317.9353, "step": 41440 }, { "epoch": 0.7971843717244762, "grad_norm": 428.0505087686813, "learning_rate": 2.000451693221971e-06, "loss": 317.9721, "step": 41450 }, { "epoch": 0.797376696060236, "grad_norm": 410.3627598673023, "learning_rate": 1.9967909302012635e-06, "loss": 308.9829, "step": 41460 }, { "epoch": 0.7975690203959958, "grad_norm": 406.2366473799651, "learning_rate": 1.9931331482437553e-06, "loss": 309.653, "step": 41470 }, { "epoch": 0.7977613447317556, "grad_norm": 385.7246857268013, "learning_rate": 1.989478348711913e-06, "loss": 307.2674, "step": 41480 }, { "epoch": 0.7979536690675154, "grad_norm": 404.9555301471717, "learning_rate": 1.9858265329670844e-06, "loss": 307.7042, "step": 41490 }, { "epoch": 0.7981459934032753, "grad_norm": 409.2744055140272, "learning_rate": 1.9821777023695178e-06, "loss": 308.5514, "step": 41500 }, { "epoch": 0.7983383177390351, "grad_norm": 411.3852387421349, "learning_rate": 1.9785318582783375e-06, "loss": 308.0267, "step": 41510 }, { "epoch": 0.798530642074795, "grad_norm": 384.05684103370396, "learning_rate": 1.9748890020515577e-06, "loss": 312.3036, "step": 41520 }, { "epoch": 0.7987229664105547, "grad_norm": 385.8838582318483, "learning_rate": 1.9712491350460895e-06, "loss": 304.0441, "step": 41530 }, { "epoch": 0.7989152907463146, "grad_norm": 394.6691368185795, "learning_rate": 1.967612258617718e-06, "loss": 304.6756, "step": 41540 }, { "epoch": 0.7991076150820744, "grad_norm": 429.2317074748059, "learning_rate": 1.9639783741211218e-06, "loss": 311.8391, "step": 41550 }, { "epoch": 0.7992999394178343, "grad_norm": 397.7645364719865, "learning_rate": 1.960347482909859e-06, "loss": 309.7134, "step": 41560 }, { "epoch": 0.7994922637535941, "grad_norm": 452.2229297317847, "learning_rate": 1.956719586336382e-06, "loss": 316.8328, "step": 41570 }, { "epoch": 0.7996845880893538, "grad_norm": 410.56945513535027, "learning_rate": 1.953094685752017e-06, "loss": 310.9083, "step": 41580 }, { "epoch": 0.7998769124251137, "grad_norm": 378.70035488693077, "learning_rate": 1.949472782506984e-06, "loss": 313.304, "step": 41590 }, { "epoch": 0.8000692367608735, "grad_norm": 413.5076669215031, "learning_rate": 1.945853877950382e-06, "loss": 325.3368, "step": 41600 }, { "epoch": 0.8002615610966334, "grad_norm": 431.09325237040275, "learning_rate": 1.942237973430192e-06, "loss": 317.6258, "step": 41610 }, { "epoch": 0.8004538854323932, "grad_norm": 438.3780318282062, "learning_rate": 1.9386250702932784e-06, "loss": 314.4511, "step": 41620 }, { "epoch": 0.800646209768153, "grad_norm": 407.703498976155, "learning_rate": 1.9350151698853857e-06, "loss": 314.5751, "step": 41630 }, { "epoch": 0.8008385341039128, "grad_norm": 402.9200771757179, "learning_rate": 1.9314082735511475e-06, "loss": 309.926, "step": 41640 }, { "epoch": 0.8010308584396727, "grad_norm": 419.98269993257713, "learning_rate": 1.92780438263407e-06, "loss": 305.309, "step": 41650 }, { "epoch": 0.8012231827754325, "grad_norm": 387.0709198264562, "learning_rate": 1.9242034984765436e-06, "loss": 312.4188, "step": 41660 }, { "epoch": 0.8014155071111924, "grad_norm": 395.1476476816383, "learning_rate": 1.9206056224198346e-06, "loss": 305.0891, "step": 41670 }, { "epoch": 0.8016078314469521, "grad_norm": 431.28337069401505, "learning_rate": 1.9170107558040983e-06, "loss": 315.3144, "step": 41680 }, { "epoch": 0.801800155782712, "grad_norm": 408.65066544976474, "learning_rate": 1.9134188999683613e-06, "loss": 302.9139, "step": 41690 }, { "epoch": 0.8019924801184718, "grad_norm": 415.65819937197, "learning_rate": 1.9098300562505266e-06, "loss": 310.0961, "step": 41700 }, { "epoch": 0.8021848044542316, "grad_norm": 395.66579858118723, "learning_rate": 1.9062442259873847e-06, "loss": 313.4762, "step": 41710 }, { "epoch": 0.8023771287899915, "grad_norm": 421.56141913034077, "learning_rate": 1.9026614105145935e-06, "loss": 321.0154, "step": 41720 }, { "epoch": 0.8025694531257512, "grad_norm": 405.29192833926294, "learning_rate": 1.8990816111666976e-06, "loss": 309.3742, "step": 41730 }, { "epoch": 0.8027617774615111, "grad_norm": 416.2764926481287, "learning_rate": 1.8955048292771083e-06, "loss": 307.1205, "step": 41740 }, { "epoch": 0.8029541017972709, "grad_norm": 408.4451145932948, "learning_rate": 1.891931066178122e-06, "loss": 299.2861, "step": 41750 }, { "epoch": 0.8031464261330308, "grad_norm": 385.6296985539054, "learning_rate": 1.888360323200904e-06, "loss": 298.5853, "step": 41760 }, { "epoch": 0.8033387504687906, "grad_norm": 390.6209917945643, "learning_rate": 1.8847926016754947e-06, "loss": 307.6281, "step": 41770 }, { "epoch": 0.8035310748045504, "grad_norm": 414.7057062991525, "learning_rate": 1.8812279029308177e-06, "loss": 307.143, "step": 41780 }, { "epoch": 0.8037233991403102, "grad_norm": 412.90339155146097, "learning_rate": 1.87766622829466e-06, "loss": 311.2535, "step": 41790 }, { "epoch": 0.8039157234760701, "grad_norm": 413.0134697210266, "learning_rate": 1.874107579093688e-06, "loss": 324.2331, "step": 41800 }, { "epoch": 0.8041080478118299, "grad_norm": 410.15688941487105, "learning_rate": 1.870551956653437e-06, "loss": 317.0837, "step": 41810 }, { "epoch": 0.8043003721475896, "grad_norm": 405.4574073762015, "learning_rate": 1.8669993622983217e-06, "loss": 316.0981, "step": 41820 }, { "epoch": 0.8044926964833495, "grad_norm": 404.6597887685546, "learning_rate": 1.863449797351624e-06, "loss": 309.9964, "step": 41830 }, { "epoch": 0.8046850208191093, "grad_norm": 403.83109312868817, "learning_rate": 1.8599032631354963e-06, "loss": 313.5171, "step": 41840 }, { "epoch": 0.8048773451548692, "grad_norm": 439.15257366140855, "learning_rate": 1.8563597609709626e-06, "loss": 321.2541, "step": 41850 }, { "epoch": 0.805069669490629, "grad_norm": 408.9617193632844, "learning_rate": 1.852819292177922e-06, "loss": 312.5056, "step": 41860 }, { "epoch": 0.8052619938263889, "grad_norm": 444.4408849629124, "learning_rate": 1.8492818580751414e-06, "loss": 323.8703, "step": 41870 }, { "epoch": 0.8054543181621486, "grad_norm": 428.96391093624396, "learning_rate": 1.8457474599802527e-06, "loss": 304.4847, "step": 41880 }, { "epoch": 0.8056466424979085, "grad_norm": 421.5004817056809, "learning_rate": 1.842216099209767e-06, "loss": 316.7431, "step": 41890 }, { "epoch": 0.8058389668336683, "grad_norm": 388.40606039820835, "learning_rate": 1.8386877770790524e-06, "loss": 307.68, "step": 41900 }, { "epoch": 0.8060312911694282, "grad_norm": 453.65573103567783, "learning_rate": 1.8351624949023539e-06, "loss": 319.7826, "step": 41910 }, { "epoch": 0.806223615505188, "grad_norm": 413.60801458361624, "learning_rate": 1.8316402539927757e-06, "loss": 312.6104, "step": 41920 }, { "epoch": 0.8064159398409477, "grad_norm": 419.5876485086959, "learning_rate": 1.8281210556623007e-06, "loss": 304.2615, "step": 41930 }, { "epoch": 0.8066082641767076, "grad_norm": 413.97681046550025, "learning_rate": 1.8246049012217693e-06, "loss": 305.5498, "step": 41940 }, { "epoch": 0.8068005885124674, "grad_norm": 391.72215458436597, "learning_rate": 1.8210917919808891e-06, "loss": 315.7686, "step": 41950 }, { "epoch": 0.8069929128482273, "grad_norm": 401.94078560143623, "learning_rate": 1.817581729248239e-06, "loss": 309.178, "step": 41960 }, { "epoch": 0.807185237183987, "grad_norm": 463.05005440890915, "learning_rate": 1.8140747143312588e-06, "loss": 309.883, "step": 41970 }, { "epoch": 0.8073775615197469, "grad_norm": 433.2230208778572, "learning_rate": 1.8105707485362511e-06, "loss": 302.4812, "step": 41980 }, { "epoch": 0.8075698858555067, "grad_norm": 383.7629326672162, "learning_rate": 1.8070698331683844e-06, "loss": 308.8602, "step": 41990 }, { "epoch": 0.8077622101912666, "grad_norm": 401.09806979534505, "learning_rate": 1.8035719695316955e-06, "loss": 319.9707, "step": 42000 }, { "epoch": 0.8079545345270264, "grad_norm": 394.22092803620967, "learning_rate": 1.800077158929081e-06, "loss": 311.2034, "step": 42010 }, { "epoch": 0.8081468588627863, "grad_norm": 409.7361859692149, "learning_rate": 1.7965854026622953e-06, "loss": 306.2973, "step": 42020 }, { "epoch": 0.808339183198546, "grad_norm": 439.4300326694273, "learning_rate": 1.7930967020319667e-06, "loss": 314.2226, "step": 42030 }, { "epoch": 0.8085315075343058, "grad_norm": 402.80174441963084, "learning_rate": 1.7896110583375747e-06, "loss": 299.4835, "step": 42040 }, { "epoch": 0.8087238318700657, "grad_norm": 363.44072180640245, "learning_rate": 1.7861284728774652e-06, "loss": 309.756, "step": 42050 }, { "epoch": 0.8089161562058255, "grad_norm": 407.1417539220159, "learning_rate": 1.7826489469488395e-06, "loss": 305.5184, "step": 42060 }, { "epoch": 0.8091084805415854, "grad_norm": 423.7589204811408, "learning_rate": 1.7791724818477708e-06, "loss": 307.9455, "step": 42070 }, { "epoch": 0.8093008048773451, "grad_norm": 414.5921398814254, "learning_rate": 1.7756990788691797e-06, "loss": 303.6039, "step": 42080 }, { "epoch": 0.809493129213105, "grad_norm": 388.42061646518516, "learning_rate": 1.772228739306854e-06, "loss": 309.3741, "step": 42090 }, { "epoch": 0.8096854535488648, "grad_norm": 404.0016850680053, "learning_rate": 1.7687614644534333e-06, "loss": 304.8437, "step": 42100 }, { "epoch": 0.8098777778846247, "grad_norm": 442.6996275222591, "learning_rate": 1.7652972556004267e-06, "loss": 317.3699, "step": 42110 }, { "epoch": 0.8100701022203844, "grad_norm": 441.3560619649883, "learning_rate": 1.7618361140381922e-06, "loss": 322.6206, "step": 42120 }, { "epoch": 0.8102624265561443, "grad_norm": 420.88822753152056, "learning_rate": 1.7583780410559449e-06, "loss": 312.3599, "step": 42130 }, { "epoch": 0.8104547508919041, "grad_norm": 404.81850041085477, "learning_rate": 1.7549230379417636e-06, "loss": 312.039, "step": 42140 }, { "epoch": 0.8106470752276639, "grad_norm": 404.90281054148977, "learning_rate": 1.7514711059825773e-06, "loss": 304.1775, "step": 42150 }, { "epoch": 0.8108393995634238, "grad_norm": 396.2542202053286, "learning_rate": 1.7480222464641783e-06, "loss": 323.5015, "step": 42160 }, { "epoch": 0.8110317238991835, "grad_norm": 423.1607763548634, "learning_rate": 1.7445764606712024e-06, "loss": 305.1296, "step": 42170 }, { "epoch": 0.8112240482349434, "grad_norm": 418.2332118298525, "learning_rate": 1.7411337498871561e-06, "loss": 307.3521, "step": 42180 }, { "epoch": 0.8114163725707032, "grad_norm": 428.055348548745, "learning_rate": 1.737694115394387e-06, "loss": 314.606, "step": 42190 }, { "epoch": 0.8116086969064631, "grad_norm": 410.03672431689245, "learning_rate": 1.7342575584741018e-06, "loss": 308.3802, "step": 42200 }, { "epoch": 0.8118010212422229, "grad_norm": 426.633963182364, "learning_rate": 1.7308240804063648e-06, "loss": 308.3395, "step": 42210 }, { "epoch": 0.8119933455779828, "grad_norm": 407.0921867161722, "learning_rate": 1.7273936824700888e-06, "loss": 315.7191, "step": 42220 }, { "epoch": 0.8121856699137425, "grad_norm": 374.81437917432567, "learning_rate": 1.7239663659430384e-06, "loss": 310.0117, "step": 42230 }, { "epoch": 0.8123779942495024, "grad_norm": 409.10388015131576, "learning_rate": 1.7205421321018312e-06, "loss": 317.0021, "step": 42240 }, { "epoch": 0.8125703185852622, "grad_norm": 382.54398085650195, "learning_rate": 1.7171209822219427e-06, "loss": 319.6945, "step": 42250 }, { "epoch": 0.812762642921022, "grad_norm": 392.3959586105822, "learning_rate": 1.713702917577692e-06, "loss": 312.9114, "step": 42260 }, { "epoch": 0.8129549672567818, "grad_norm": 406.31131552509754, "learning_rate": 1.71028793944225e-06, "loss": 307.5117, "step": 42270 }, { "epoch": 0.8131472915925416, "grad_norm": 401.0170245517488, "learning_rate": 1.7068760490876425e-06, "loss": 308.5841, "step": 42280 }, { "epoch": 0.8133396159283015, "grad_norm": 394.7075119154168, "learning_rate": 1.7034672477847402e-06, "loss": 310.3857, "step": 42290 }, { "epoch": 0.8135319402640613, "grad_norm": 385.3946486335663, "learning_rate": 1.700061536803268e-06, "loss": 306.9722, "step": 42300 }, { "epoch": 0.8137242645998212, "grad_norm": 448.8436897946402, "learning_rate": 1.696658917411793e-06, "loss": 307.0883, "step": 42310 }, { "epoch": 0.8139165889355809, "grad_norm": 402.0297537593429, "learning_rate": 1.6932593908777394e-06, "loss": 312.4774, "step": 42320 }, { "epoch": 0.8141089132713408, "grad_norm": 401.9973192021659, "learning_rate": 1.689862958467372e-06, "loss": 306.6643, "step": 42330 }, { "epoch": 0.8143012376071006, "grad_norm": 412.21038921291057, "learning_rate": 1.6864696214458065e-06, "loss": 309.7551, "step": 42340 }, { "epoch": 0.8144935619428605, "grad_norm": 392.7780980269767, "learning_rate": 1.683079381077003e-06, "loss": 314.4754, "step": 42350 }, { "epoch": 0.8146858862786203, "grad_norm": 409.5128937394212, "learning_rate": 1.6796922386237724e-06, "loss": 318.146, "step": 42360 }, { "epoch": 0.81487821061438, "grad_norm": 412.30147847210947, "learning_rate": 1.67630819534777e-06, "loss": 313.1921, "step": 42370 }, { "epoch": 0.8150705349501399, "grad_norm": 440.11693125471027, "learning_rate": 1.6729272525094908e-06, "loss": 311.6013, "step": 42380 }, { "epoch": 0.8152628592858997, "grad_norm": 431.98961622993335, "learning_rate": 1.6695494113682874e-06, "loss": 314.7857, "step": 42390 }, { "epoch": 0.8154551836216596, "grad_norm": 411.42898088462056, "learning_rate": 1.6661746731823458e-06, "loss": 306.7567, "step": 42400 }, { "epoch": 0.8156475079574194, "grad_norm": 410.6316817635866, "learning_rate": 1.6628030392087001e-06, "loss": 311.9825, "step": 42410 }, { "epoch": 0.8158398322931792, "grad_norm": 409.6194112839262, "learning_rate": 1.6594345107032273e-06, "loss": 317.3002, "step": 42420 }, { "epoch": 0.816032156628939, "grad_norm": 400.9852784179425, "learning_rate": 1.6560690889206499e-06, "loss": 313.499, "step": 42430 }, { "epoch": 0.8162244809646989, "grad_norm": 401.8736664051403, "learning_rate": 1.6527067751145354e-06, "loss": 301.383, "step": 42440 }, { "epoch": 0.8164168053004587, "grad_norm": 398.26230356371184, "learning_rate": 1.6493475705372863e-06, "loss": 309.8659, "step": 42450 }, { "epoch": 0.8166091296362186, "grad_norm": 415.14565715216116, "learning_rate": 1.6459914764401497e-06, "loss": 317.2495, "step": 42460 }, { "epoch": 0.8168014539719783, "grad_norm": 432.73069664734874, "learning_rate": 1.64263849407322e-06, "loss": 310.5572, "step": 42470 }, { "epoch": 0.8169937783077381, "grad_norm": 379.21529022894157, "learning_rate": 1.6392886246854234e-06, "loss": 296.3163, "step": 42480 }, { "epoch": 0.817186102643498, "grad_norm": 416.17721523460403, "learning_rate": 1.6359418695245311e-06, "loss": 305.976, "step": 42490 }, { "epoch": 0.8173784269792578, "grad_norm": 418.29091803976485, "learning_rate": 1.632598229837158e-06, "loss": 310.422, "step": 42500 }, { "epoch": 0.8175707513150177, "grad_norm": 399.3470917060732, "learning_rate": 1.629257706868751e-06, "loss": 307.9481, "step": 42510 }, { "epoch": 0.8177630756507774, "grad_norm": 379.01930220379717, "learning_rate": 1.6259203018636016e-06, "loss": 306.1668, "step": 42520 }, { "epoch": 0.8179553999865373, "grad_norm": 425.2605228905251, "learning_rate": 1.6225860160648343e-06, "loss": 318.8865, "step": 42530 }, { "epoch": 0.8181477243222971, "grad_norm": 396.26121774724527, "learning_rate": 1.6192548507144213e-06, "loss": 313.6861, "step": 42540 }, { "epoch": 0.818340048658057, "grad_norm": 388.96172309999923, "learning_rate": 1.6159268070531642e-06, "loss": 303.8297, "step": 42550 }, { "epoch": 0.8185323729938168, "grad_norm": 445.15160080702293, "learning_rate": 1.6126018863207005e-06, "loss": 318.8677, "step": 42560 }, { "epoch": 0.8187246973295766, "grad_norm": 409.65713462434377, "learning_rate": 1.609280089755515e-06, "loss": 312.0928, "step": 42570 }, { "epoch": 0.8189170216653364, "grad_norm": 442.47644572618, "learning_rate": 1.6059614185949157e-06, "loss": 312.5717, "step": 42580 }, { "epoch": 0.8191093460010962, "grad_norm": 394.5766013685885, "learning_rate": 1.6026458740750584e-06, "loss": 297.5948, "step": 42590 }, { "epoch": 0.8193016703368561, "grad_norm": 387.29266393085965, "learning_rate": 1.5993334574309238e-06, "loss": 314.3256, "step": 42600 }, { "epoch": 0.8194939946726159, "grad_norm": 398.6022759364853, "learning_rate": 1.5960241698963374e-06, "loss": 302.7601, "step": 42610 }, { "epoch": 0.8196863190083757, "grad_norm": 412.5073786107496, "learning_rate": 1.592718012703951e-06, "loss": 309.0457, "step": 42620 }, { "epoch": 0.8198786433441355, "grad_norm": 402.668192180866, "learning_rate": 1.589414987085255e-06, "loss": 298.2543, "step": 42630 }, { "epoch": 0.8200709676798954, "grad_norm": 405.7025109899609, "learning_rate": 1.5861150942705672e-06, "loss": 306.7637, "step": 42640 }, { "epoch": 0.8202632920156552, "grad_norm": 382.3442127885861, "learning_rate": 1.5828183354890504e-06, "loss": 299.5104, "step": 42650 }, { "epoch": 0.8204556163514151, "grad_norm": 379.2150502137799, "learning_rate": 1.5795247119686885e-06, "loss": 299.6946, "step": 42660 }, { "epoch": 0.8206479406871748, "grad_norm": 435.7394531715828, "learning_rate": 1.5762342249363006e-06, "loss": 316.0116, "step": 42670 }, { "epoch": 0.8208402650229347, "grad_norm": 385.3948038543622, "learning_rate": 1.5729468756175426e-06, "loss": 299.1303, "step": 42680 }, { "epoch": 0.8210325893586945, "grad_norm": 411.6885845379898, "learning_rate": 1.5696626652368973e-06, "loss": 305.3687, "step": 42690 }, { "epoch": 0.8212249136944543, "grad_norm": 440.36084463891024, "learning_rate": 1.5663815950176742e-06, "loss": 311.0056, "step": 42700 }, { "epoch": 0.8214172380302142, "grad_norm": 410.38556544557576, "learning_rate": 1.5631036661820232e-06, "loss": 316.9325, "step": 42710 }, { "epoch": 0.8216095623659739, "grad_norm": 393.16886637814446, "learning_rate": 1.5598288799509153e-06, "loss": 304.1306, "step": 42720 }, { "epoch": 0.8218018867017338, "grad_norm": 420.4057562437857, "learning_rate": 1.5565572375441573e-06, "loss": 308.2313, "step": 42730 }, { "epoch": 0.8219942110374936, "grad_norm": 431.61374927043823, "learning_rate": 1.5532887401803787e-06, "loss": 309.4908, "step": 42740 }, { "epoch": 0.8221865353732535, "grad_norm": 405.95297521991796, "learning_rate": 1.5500233890770434e-06, "loss": 309.826, "step": 42750 }, { "epoch": 0.8223788597090133, "grad_norm": 404.76047493259057, "learning_rate": 1.5467611854504406e-06, "loss": 301.0147, "step": 42760 }, { "epoch": 0.8225711840447731, "grad_norm": 396.45738573903355, "learning_rate": 1.5435021305156862e-06, "loss": 309.4137, "step": 42770 }, { "epoch": 0.8227635083805329, "grad_norm": 401.13026869766855, "learning_rate": 1.5402462254867222e-06, "loss": 316.6085, "step": 42780 }, { "epoch": 0.8229558327162928, "grad_norm": 434.444044410929, "learning_rate": 1.5369934715763235e-06, "loss": 309.6957, "step": 42790 }, { "epoch": 0.8231481570520526, "grad_norm": 406.8286170554677, "learning_rate": 1.533743869996086e-06, "loss": 304.6467, "step": 42800 }, { "epoch": 0.8233404813878125, "grad_norm": 413.56825052349353, "learning_rate": 1.5304974219564318e-06, "loss": 309.2877, "step": 42810 }, { "epoch": 0.8235328057235722, "grad_norm": 411.52541558689006, "learning_rate": 1.5272541286666075e-06, "loss": 303.7095, "step": 42820 }, { "epoch": 0.823725130059332, "grad_norm": 428.39787018305475, "learning_rate": 1.5240139913346906e-06, "loss": 310.0144, "step": 42830 }, { "epoch": 0.8239174543950919, "grad_norm": 441.49099139823363, "learning_rate": 1.5207770111675735e-06, "loss": 315.4356, "step": 42840 }, { "epoch": 0.8241097787308517, "grad_norm": 371.96768192793553, "learning_rate": 1.5175431893709836e-06, "loss": 310.1807, "step": 42850 }, { "epoch": 0.8243021030666116, "grad_norm": 407.47598290847037, "learning_rate": 1.5143125271494607e-06, "loss": 304.953, "step": 42860 }, { "epoch": 0.8244944274023713, "grad_norm": 401.4889777858116, "learning_rate": 1.511085025706378e-06, "loss": 306.8126, "step": 42870 }, { "epoch": 0.8246867517381312, "grad_norm": 384.51772526809947, "learning_rate": 1.5078606862439248e-06, "loss": 304.6681, "step": 42880 }, { "epoch": 0.824879076073891, "grad_norm": 414.39931039407116, "learning_rate": 1.5046395099631106e-06, "loss": 309.4831, "step": 42890 }, { "epoch": 0.8250714004096509, "grad_norm": 432.4809190805487, "learning_rate": 1.5014214980637754e-06, "loss": 307.1895, "step": 42900 }, { "epoch": 0.8252637247454107, "grad_norm": 421.95882028972665, "learning_rate": 1.4982066517445748e-06, "loss": 304.3493, "step": 42910 }, { "epoch": 0.8254560490811705, "grad_norm": 432.7778176649635, "learning_rate": 1.4949949722029811e-06, "loss": 307.8679, "step": 42920 }, { "epoch": 0.8256483734169303, "grad_norm": 435.7686026497326, "learning_rate": 1.4917864606352983e-06, "loss": 321.3161, "step": 42930 }, { "epoch": 0.8258406977526901, "grad_norm": 403.7324293283603, "learning_rate": 1.4885811182366406e-06, "loss": 315.8647, "step": 42940 }, { "epoch": 0.82603302208845, "grad_norm": 392.8614771109709, "learning_rate": 1.485378946200946e-06, "loss": 308.2161, "step": 42950 }, { "epoch": 0.8262253464242098, "grad_norm": 377.5813441608248, "learning_rate": 1.4821799457209684e-06, "loss": 303.9158, "step": 42960 }, { "epoch": 0.8264176707599696, "grad_norm": 398.5752891718774, "learning_rate": 1.478984117988287e-06, "loss": 309.6031, "step": 42970 }, { "epoch": 0.8266099950957294, "grad_norm": 408.2175427744864, "learning_rate": 1.4757914641932924e-06, "loss": 304.9219, "step": 42980 }, { "epoch": 0.8268023194314893, "grad_norm": 377.3102620453641, "learning_rate": 1.4726019855251928e-06, "loss": 310.5686, "step": 42990 }, { "epoch": 0.8269946437672491, "grad_norm": 372.7674117475207, "learning_rate": 1.4694156831720185e-06, "loss": 307.1472, "step": 43000 }, { "epoch": 0.827186968103009, "grad_norm": 406.9858561650125, "learning_rate": 1.4662325583206172e-06, "loss": 315.3097, "step": 43010 }, { "epoch": 0.8273792924387687, "grad_norm": 407.5473076985246, "learning_rate": 1.463052612156649e-06, "loss": 314.7602, "step": 43020 }, { "epoch": 0.8275716167745286, "grad_norm": 460.82731087343296, "learning_rate": 1.4598758458645878e-06, "loss": 313.4908, "step": 43030 }, { "epoch": 0.8277639411102884, "grad_norm": 424.11726780910897, "learning_rate": 1.4567022606277314e-06, "loss": 308.193, "step": 43040 }, { "epoch": 0.8279562654460482, "grad_norm": 408.77580804971666, "learning_rate": 1.4535318576281854e-06, "loss": 302.9278, "step": 43050 }, { "epoch": 0.828148589781808, "grad_norm": 405.97204339467817, "learning_rate": 1.4503646380468729e-06, "loss": 302.8712, "step": 43060 }, { "epoch": 0.8283409141175678, "grad_norm": 446.90529183618355, "learning_rate": 1.4472006030635288e-06, "loss": 311.4265, "step": 43070 }, { "epoch": 0.8285332384533277, "grad_norm": 435.4659847610761, "learning_rate": 1.4440397538567086e-06, "loss": 313.404, "step": 43080 }, { "epoch": 0.8287255627890875, "grad_norm": 379.9167230649679, "learning_rate": 1.4408820916037735e-06, "loss": 306.1127, "step": 43090 }, { "epoch": 0.8289178871248474, "grad_norm": 435.5973177012765, "learning_rate": 1.4377276174808984e-06, "loss": 304.7473, "step": 43100 }, { "epoch": 0.8291102114606071, "grad_norm": 438.0031909485522, "learning_rate": 1.4345763326630768e-06, "loss": 308.3987, "step": 43110 }, { "epoch": 0.829302535796367, "grad_norm": 404.40719707800486, "learning_rate": 1.4314282383241097e-06, "loss": 304.1587, "step": 43120 }, { "epoch": 0.8294948601321268, "grad_norm": 403.00195421340226, "learning_rate": 1.4282833356366066e-06, "loss": 306.0333, "step": 43130 }, { "epoch": 0.8296871844678867, "grad_norm": 381.43647340158674, "learning_rate": 1.4251416257719962e-06, "loss": 303.9909, "step": 43140 }, { "epoch": 0.8298795088036465, "grad_norm": 441.02615502985316, "learning_rate": 1.4220031099005094e-06, "loss": 308.5014, "step": 43150 }, { "epoch": 0.8300718331394062, "grad_norm": 379.29968437602855, "learning_rate": 1.4188677891911961e-06, "loss": 297.6056, "step": 43160 }, { "epoch": 0.8302641574751661, "grad_norm": 398.65431769775125, "learning_rate": 1.4157356648119103e-06, "loss": 303.8602, "step": 43170 }, { "epoch": 0.8304564818109259, "grad_norm": 419.8705696419292, "learning_rate": 1.412606737929313e-06, "loss": 311.4084, "step": 43180 }, { "epoch": 0.8306488061466858, "grad_norm": 387.25796585656343, "learning_rate": 1.4094810097088817e-06, "loss": 309.5396, "step": 43190 }, { "epoch": 0.8308411304824456, "grad_norm": 395.52609984033694, "learning_rate": 1.4063584813148979e-06, "loss": 305.6008, "step": 43200 }, { "epoch": 0.8310334548182055, "grad_norm": 420.2690005893665, "learning_rate": 1.4032391539104484e-06, "loss": 312.0884, "step": 43210 }, { "epoch": 0.8312257791539652, "grad_norm": 388.7972106008941, "learning_rate": 1.4001230286574363e-06, "loss": 310.4407, "step": 43220 }, { "epoch": 0.8314181034897251, "grad_norm": 420.6261331747331, "learning_rate": 1.3970101067165642e-06, "loss": 306.8434, "step": 43230 }, { "epoch": 0.8316104278254849, "grad_norm": 399.7898065970122, "learning_rate": 1.3939003892473446e-06, "loss": 305.6393, "step": 43240 }, { "epoch": 0.8318027521612448, "grad_norm": 465.89894772511826, "learning_rate": 1.390793877408093e-06, "loss": 310.9626, "step": 43250 }, { "epoch": 0.8319950764970045, "grad_norm": 419.9633912750672, "learning_rate": 1.3876905723559397e-06, "loss": 306.179, "step": 43260 }, { "epoch": 0.8321874008327643, "grad_norm": 382.302731153885, "learning_rate": 1.3845904752468075e-06, "loss": 311.5814, "step": 43270 }, { "epoch": 0.8323797251685242, "grad_norm": 464.7376973882011, "learning_rate": 1.3814935872354385e-06, "loss": 303.6585, "step": 43280 }, { "epoch": 0.832572049504284, "grad_norm": 400.69595239377145, "learning_rate": 1.3783999094753653e-06, "loss": 307.3913, "step": 43290 }, { "epoch": 0.8327643738400439, "grad_norm": 420.1239247443546, "learning_rate": 1.3753094431189385e-06, "loss": 312.9189, "step": 43300 }, { "epoch": 0.8329566981758036, "grad_norm": 387.0049299290587, "learning_rate": 1.3722221893173027e-06, "loss": 300.6439, "step": 43310 }, { "epoch": 0.8331490225115635, "grad_norm": 389.1267103728879, "learning_rate": 1.369138149220407e-06, "loss": 307.6452, "step": 43320 }, { "epoch": 0.8333413468473233, "grad_norm": 428.0639024149446, "learning_rate": 1.3660573239770091e-06, "loss": 313.8387, "step": 43330 }, { "epoch": 0.8335336711830832, "grad_norm": 425.0378746736501, "learning_rate": 1.3629797147346635e-06, "loss": 317.9529, "step": 43340 }, { "epoch": 0.833725995518843, "grad_norm": 425.08552944771554, "learning_rate": 1.3599053226397275e-06, "loss": 312.3346, "step": 43350 }, { "epoch": 0.8339183198546029, "grad_norm": 457.2770452154439, "learning_rate": 1.3568341488373637e-06, "loss": 311.3716, "step": 43360 }, { "epoch": 0.8341106441903626, "grad_norm": 383.3616072731839, "learning_rate": 1.3537661944715342e-06, "loss": 305.7466, "step": 43370 }, { "epoch": 0.8343029685261224, "grad_norm": 378.19750986663985, "learning_rate": 1.350701460685e-06, "loss": 312.2435, "step": 43380 }, { "epoch": 0.8344952928618823, "grad_norm": 406.6171330880953, "learning_rate": 1.3476399486193214e-06, "loss": 304.5825, "step": 43390 }, { "epoch": 0.8346876171976421, "grad_norm": 434.06189110066236, "learning_rate": 1.3445816594148654e-06, "loss": 320.4958, "step": 43400 }, { "epoch": 0.834879941533402, "grad_norm": 439.5465007523655, "learning_rate": 1.3415265942107925e-06, "loss": 316.4296, "step": 43410 }, { "epoch": 0.8350722658691617, "grad_norm": 392.45425060422684, "learning_rate": 1.3384747541450615e-06, "loss": 300.5675, "step": 43420 }, { "epoch": 0.8352645902049216, "grad_norm": 436.9471421328807, "learning_rate": 1.3354261403544345e-06, "loss": 309.4496, "step": 43430 }, { "epoch": 0.8354569145406814, "grad_norm": 428.5568221156891, "learning_rate": 1.3323807539744726e-06, "loss": 320.4364, "step": 43440 }, { "epoch": 0.8356492388764413, "grad_norm": 406.89079248718434, "learning_rate": 1.32933859613953e-06, "loss": 311.7307, "step": 43450 }, { "epoch": 0.835841563212201, "grad_norm": 432.8153667171676, "learning_rate": 1.3262996679827567e-06, "loss": 310.3885, "step": 43460 }, { "epoch": 0.8360338875479609, "grad_norm": 426.9602538393154, "learning_rate": 1.3232639706361083e-06, "loss": 308.9982, "step": 43470 }, { "epoch": 0.8362262118837207, "grad_norm": 420.303895961567, "learning_rate": 1.3202315052303304e-06, "loss": 296.2161, "step": 43480 }, { "epoch": 0.8364185362194805, "grad_norm": 391.9618038875555, "learning_rate": 1.3172022728949651e-06, "loss": 307.7306, "step": 43490 }, { "epoch": 0.8366108605552404, "grad_norm": 418.06481889559166, "learning_rate": 1.3141762747583498e-06, "loss": 300.9838, "step": 43500 }, { "epoch": 0.8368031848910001, "grad_norm": 401.4725497286522, "learning_rate": 1.3111535119476237e-06, "loss": 309.4073, "step": 43510 }, { "epoch": 0.83699550922676, "grad_norm": 424.73035958519137, "learning_rate": 1.3081339855887133e-06, "loss": 316.1311, "step": 43520 }, { "epoch": 0.8371878335625198, "grad_norm": 420.0330412783158, "learning_rate": 1.3051176968063407e-06, "loss": 313.0947, "step": 43530 }, { "epoch": 0.8373801578982797, "grad_norm": 429.4214631178607, "learning_rate": 1.302104646724026e-06, "loss": 307.8752, "step": 43540 }, { "epoch": 0.8375724822340395, "grad_norm": 412.1896170880573, "learning_rate": 1.299094836464081e-06, "loss": 312.5322, "step": 43550 }, { "epoch": 0.8377648065697993, "grad_norm": 421.7241075889308, "learning_rate": 1.2960882671476062e-06, "loss": 304.3012, "step": 43560 }, { "epoch": 0.8379571309055591, "grad_norm": 363.007332446123, "learning_rate": 1.2930849398945033e-06, "loss": 303.4901, "step": 43570 }, { "epoch": 0.838149455241319, "grad_norm": 405.42988986184895, "learning_rate": 1.2900848558234625e-06, "loss": 309.2977, "step": 43580 }, { "epoch": 0.8383417795770788, "grad_norm": 403.78264280695635, "learning_rate": 1.2870880160519628e-06, "loss": 311.6537, "step": 43590 }, { "epoch": 0.8385341039128386, "grad_norm": 408.8079662781415, "learning_rate": 1.2840944216962802e-06, "loss": 311.8632, "step": 43600 }, { "epoch": 0.8387264282485984, "grad_norm": 405.3287525107537, "learning_rate": 1.2811040738714742e-06, "loss": 305.9968, "step": 43610 }, { "epoch": 0.8389187525843582, "grad_norm": 396.6957967160807, "learning_rate": 1.2781169736914067e-06, "loss": 303.5648, "step": 43620 }, { "epoch": 0.8391110769201181, "grad_norm": 401.18494693017317, "learning_rate": 1.275133122268719e-06, "loss": 300.0335, "step": 43630 }, { "epoch": 0.8393034012558779, "grad_norm": 402.0169973967491, "learning_rate": 1.2721525207148456e-06, "loss": 307.8517, "step": 43640 }, { "epoch": 0.8394957255916378, "grad_norm": 441.2071924621578, "learning_rate": 1.2691751701400145e-06, "loss": 309.8471, "step": 43650 }, { "epoch": 0.8396880499273975, "grad_norm": 416.63996687023854, "learning_rate": 1.2662010716532392e-06, "loss": 309.8868, "step": 43660 }, { "epoch": 0.8398803742631574, "grad_norm": 423.3957295128237, "learning_rate": 1.2632302263623198e-06, "loss": 300.3247, "step": 43670 }, { "epoch": 0.8400726985989172, "grad_norm": 385.76173705849806, "learning_rate": 1.2602626353738479e-06, "loss": 310.8522, "step": 43680 }, { "epoch": 0.8402650229346771, "grad_norm": 419.35014208195366, "learning_rate": 1.2572982997932037e-06, "loss": 314.829, "step": 43690 }, { "epoch": 0.8404573472704369, "grad_norm": 437.044226891431, "learning_rate": 1.2543372207245508e-06, "loss": 308.5821, "step": 43700 }, { "epoch": 0.8406496716061966, "grad_norm": 392.24831579143245, "learning_rate": 1.2513793992708467e-06, "loss": 314.5268, "step": 43710 }, { "epoch": 0.8408419959419565, "grad_norm": 415.8970967651331, "learning_rate": 1.2484248365338248e-06, "loss": 308.462, "step": 43720 }, { "epoch": 0.8410343202777163, "grad_norm": 396.3238155580709, "learning_rate": 1.2454735336140167e-06, "loss": 309.7371, "step": 43730 }, { "epoch": 0.8412266446134762, "grad_norm": 392.4687504825427, "learning_rate": 1.2425254916107321e-06, "loss": 308.669, "step": 43740 }, { "epoch": 0.841418968949236, "grad_norm": 399.34987357090506, "learning_rate": 1.2395807116220648e-06, "loss": 312.6412, "step": 43750 }, { "epoch": 0.8416112932849958, "grad_norm": 417.38855913189497, "learning_rate": 1.236639194744902e-06, "loss": 315.5179, "step": 43760 }, { "epoch": 0.8418036176207556, "grad_norm": 429.66975190352093, "learning_rate": 1.233700942074907e-06, "loss": 307.1796, "step": 43770 }, { "epoch": 0.8419959419565155, "grad_norm": 382.2906613329996, "learning_rate": 1.230765954706531e-06, "loss": 303.7333, "step": 43780 }, { "epoch": 0.8421882662922753, "grad_norm": 448.6587954719122, "learning_rate": 1.227834233733005e-06, "loss": 310.9136, "step": 43790 }, { "epoch": 0.8423805906280352, "grad_norm": 398.58347546090255, "learning_rate": 1.2249057802463527e-06, "loss": 309.7333, "step": 43800 }, { "epoch": 0.8425729149637949, "grad_norm": 382.80139291560556, "learning_rate": 1.221980595337372e-06, "loss": 305.871, "step": 43810 }, { "epoch": 0.8427652392995547, "grad_norm": 384.28099373040374, "learning_rate": 1.219058680095644e-06, "loss": 310.9953, "step": 43820 }, { "epoch": 0.8429575636353146, "grad_norm": 390.50239126599206, "learning_rate": 1.2161400356095376e-06, "loss": 300.5285, "step": 43830 }, { "epoch": 0.8431498879710744, "grad_norm": 394.7135930797511, "learning_rate": 1.2132246629661948e-06, "loss": 307.0489, "step": 43840 }, { "epoch": 0.8433422123068343, "grad_norm": 389.18594248152215, "learning_rate": 1.21031256325155e-06, "loss": 309.5433, "step": 43850 }, { "epoch": 0.843534536642594, "grad_norm": 410.64966859334305, "learning_rate": 1.2074037375503056e-06, "loss": 298.6748, "step": 43860 }, { "epoch": 0.8437268609783539, "grad_norm": 384.6362019726913, "learning_rate": 1.2044981869459571e-06, "loss": 313.5294, "step": 43870 }, { "epoch": 0.8439191853141137, "grad_norm": 404.3040948416852, "learning_rate": 1.201595912520771e-06, "loss": 310.4116, "step": 43880 }, { "epoch": 0.8441115096498736, "grad_norm": 399.8936341399085, "learning_rate": 1.198696915355796e-06, "loss": 321.4883, "step": 43890 }, { "epoch": 0.8443038339856334, "grad_norm": 414.61600652742237, "learning_rate": 1.1958011965308624e-06, "loss": 316.2538, "step": 43900 }, { "epoch": 0.8444961583213932, "grad_norm": 413.8168917707275, "learning_rate": 1.192908757124578e-06, "loss": 310.5925, "step": 43910 }, { "epoch": 0.844688482657153, "grad_norm": 381.0727520182692, "learning_rate": 1.190019598214327e-06, "loss": 311.7601, "step": 43920 }, { "epoch": 0.8448808069929129, "grad_norm": 434.9484055456739, "learning_rate": 1.1871337208762723e-06, "loss": 303.6549, "step": 43930 }, { "epoch": 0.8450731313286727, "grad_norm": 460.2362992462162, "learning_rate": 1.1842511261853596e-06, "loss": 316.0865, "step": 43940 }, { "epoch": 0.8452654556644325, "grad_norm": 386.9826462964263, "learning_rate": 1.1813718152153054e-06, "loss": 303.8425, "step": 43950 }, { "epoch": 0.8454577800001923, "grad_norm": 424.40337115020503, "learning_rate": 1.1784957890386051e-06, "loss": 305.3876, "step": 43960 }, { "epoch": 0.8456501043359521, "grad_norm": 404.1909632525495, "learning_rate": 1.1756230487265296e-06, "loss": 305.2239, "step": 43970 }, { "epoch": 0.845842428671712, "grad_norm": 398.4495033039118, "learning_rate": 1.1727535953491308e-06, "loss": 308.1323, "step": 43980 }, { "epoch": 0.8460347530074718, "grad_norm": 389.2480479244216, "learning_rate": 1.1698874299752293e-06, "loss": 299.6056, "step": 43990 }, { "epoch": 0.8462270773432317, "grad_norm": 394.36355835421574, "learning_rate": 1.1670245536724267e-06, "loss": 309.4105, "step": 44000 }, { "epoch": 0.8464194016789914, "grad_norm": 397.68585784214036, "learning_rate": 1.1641649675070975e-06, "loss": 312.691, "step": 44010 }, { "epoch": 0.8466117260147513, "grad_norm": 410.6147010479267, "learning_rate": 1.161308672544389e-06, "loss": 326.4685, "step": 44020 }, { "epoch": 0.8468040503505111, "grad_norm": 410.55330677161714, "learning_rate": 1.1584556698482252e-06, "loss": 305.0545, "step": 44030 }, { "epoch": 0.846996374686271, "grad_norm": 431.9844551976268, "learning_rate": 1.1556059604812985e-06, "loss": 306.52, "step": 44040 }, { "epoch": 0.8471886990220308, "grad_norm": 405.437695786694, "learning_rate": 1.1527595455050844e-06, "loss": 309.3423, "step": 44050 }, { "epoch": 0.8473810233577905, "grad_norm": 459.3400718099756, "learning_rate": 1.1499164259798223e-06, "loss": 310.3186, "step": 44060 }, { "epoch": 0.8475733476935504, "grad_norm": 385.3061173062442, "learning_rate": 1.1470766029645253e-06, "loss": 315.4323, "step": 44070 }, { "epoch": 0.8477656720293102, "grad_norm": 430.51500251146587, "learning_rate": 1.1442400775169849e-06, "loss": 310.8122, "step": 44080 }, { "epoch": 0.8479579963650701, "grad_norm": 382.0245139719309, "learning_rate": 1.141406850693757e-06, "loss": 299.5895, "step": 44090 }, { "epoch": 0.8481503207008299, "grad_norm": 393.55300023450496, "learning_rate": 1.1385769235501742e-06, "loss": 311.6865, "step": 44100 }, { "epoch": 0.8483426450365897, "grad_norm": 439.4190745537439, "learning_rate": 1.1357502971403335e-06, "loss": 313.2368, "step": 44110 }, { "epoch": 0.8485349693723495, "grad_norm": 409.10469866554496, "learning_rate": 1.132926972517111e-06, "loss": 306.4891, "step": 44120 }, { "epoch": 0.8487272937081094, "grad_norm": 424.9147398193525, "learning_rate": 1.130106950732145e-06, "loss": 302.083, "step": 44130 }, { "epoch": 0.8489196180438692, "grad_norm": 425.26982812103034, "learning_rate": 1.1272902328358514e-06, "loss": 309.5698, "step": 44140 }, { "epoch": 0.8491119423796291, "grad_norm": 474.2980147483282, "learning_rate": 1.1244768198774047e-06, "loss": 317.164, "step": 44150 }, { "epoch": 0.8493042667153888, "grad_norm": 443.4806721760526, "learning_rate": 1.121666712904762e-06, "loss": 329.2048, "step": 44160 }, { "epoch": 0.8494965910511486, "grad_norm": 376.2163407206971, "learning_rate": 1.1188599129646382e-06, "loss": 307.9291, "step": 44170 }, { "epoch": 0.8496889153869085, "grad_norm": 441.9483483619356, "learning_rate": 1.116056421102517e-06, "loss": 320.0675, "step": 44180 }, { "epoch": 0.8498812397226683, "grad_norm": 401.7978514444669, "learning_rate": 1.113256238362659e-06, "loss": 313.4776, "step": 44190 }, { "epoch": 0.8500735640584282, "grad_norm": 437.6590866201023, "learning_rate": 1.1104593657880812e-06, "loss": 306.1304, "step": 44200 }, { "epoch": 0.8502658883941879, "grad_norm": 417.4508570371858, "learning_rate": 1.1076658044205746e-06, "loss": 316.2343, "step": 44210 }, { "epoch": 0.8504582127299478, "grad_norm": 397.3498340990389, "learning_rate": 1.1048755553006928e-06, "loss": 313.7024, "step": 44220 }, { "epoch": 0.8506505370657076, "grad_norm": 413.2392266095835, "learning_rate": 1.1020886194677605e-06, "loss": 302.59, "step": 44230 }, { "epoch": 0.8508428614014675, "grad_norm": 400.93040562028807, "learning_rate": 1.0993049979598635e-06, "loss": 309.4014, "step": 44240 }, { "epoch": 0.8510351857372273, "grad_norm": 404.9059784641159, "learning_rate": 1.0965246918138529e-06, "loss": 317.5931, "step": 44250 }, { "epoch": 0.8512275100729871, "grad_norm": 397.77934194055143, "learning_rate": 1.093747702065351e-06, "loss": 306.4594, "step": 44260 }, { "epoch": 0.8514198344087469, "grad_norm": 400.8242068851222, "learning_rate": 1.090974029748736e-06, "loss": 312.0326, "step": 44270 }, { "epoch": 0.8516121587445067, "grad_norm": 399.29201610333854, "learning_rate": 1.0882036758971592e-06, "loss": 307.1808, "step": 44280 }, { "epoch": 0.8518044830802666, "grad_norm": 394.45936479744114, "learning_rate": 1.0854366415425289e-06, "loss": 319.9591, "step": 44290 }, { "epoch": 0.8519968074160263, "grad_norm": 366.56628147727963, "learning_rate": 1.0826729277155224e-06, "loss": 306.8888, "step": 44300 }, { "epoch": 0.8521891317517862, "grad_norm": 433.5542684729299, "learning_rate": 1.0799125354455752e-06, "loss": 319.3518, "step": 44310 }, { "epoch": 0.852381456087546, "grad_norm": 408.8071207439392, "learning_rate": 1.0771554657608896e-06, "loss": 308.4293, "step": 44320 }, { "epoch": 0.8525737804233059, "grad_norm": 411.1904044875333, "learning_rate": 1.0744017196884248e-06, "loss": 306.739, "step": 44330 }, { "epoch": 0.8527661047590657, "grad_norm": 403.2756905542512, "learning_rate": 1.0716512982539106e-06, "loss": 304.3856, "step": 44340 }, { "epoch": 0.8529584290948256, "grad_norm": 398.3733658713629, "learning_rate": 1.0689042024818307e-06, "loss": 308.7248, "step": 44350 }, { "epoch": 0.8531507534305853, "grad_norm": 456.1789084319189, "learning_rate": 1.0661604333954312e-06, "loss": 316.1972, "step": 44360 }, { "epoch": 0.8533430777663452, "grad_norm": 374.37026480962015, "learning_rate": 1.0634199920167255e-06, "loss": 303.3029, "step": 44370 }, { "epoch": 0.853535402102105, "grad_norm": 402.3953841877796, "learning_rate": 1.0606828793664804e-06, "loss": 307.8187, "step": 44380 }, { "epoch": 0.8537277264378648, "grad_norm": 416.9853259567134, "learning_rate": 1.0579490964642247e-06, "loss": 312.1465, "step": 44390 }, { "epoch": 0.8539200507736247, "grad_norm": 436.07586502455825, "learning_rate": 1.0552186443282464e-06, "loss": 310.3381, "step": 44400 }, { "epoch": 0.8541123751093844, "grad_norm": 385.18047741672416, "learning_rate": 1.0524915239755939e-06, "loss": 302.7083, "step": 44410 }, { "epoch": 0.8543046994451443, "grad_norm": 435.882890877593, "learning_rate": 1.0497677364220792e-06, "loss": 308.524, "step": 44420 }, { "epoch": 0.8544970237809041, "grad_norm": 381.2653958431397, "learning_rate": 1.047047282682262e-06, "loss": 313.531, "step": 44430 }, { "epoch": 0.854689348116664, "grad_norm": 412.582012402211, "learning_rate": 1.0443301637694713e-06, "loss": 317.9873, "step": 44440 }, { "epoch": 0.8548816724524237, "grad_norm": 416.6160209458399, "learning_rate": 1.0416163806957857e-06, "loss": 296.6295, "step": 44450 }, { "epoch": 0.8550739967881836, "grad_norm": 413.6593353856753, "learning_rate": 1.0389059344720475e-06, "loss": 307.4951, "step": 44460 }, { "epoch": 0.8552663211239434, "grad_norm": 379.9253461959537, "learning_rate": 1.0361988261078482e-06, "loss": 302.6177, "step": 44470 }, { "epoch": 0.8554586454597033, "grad_norm": 396.7851390931577, "learning_rate": 1.0334950566115466e-06, "loss": 299.1118, "step": 44480 }, { "epoch": 0.8556509697954631, "grad_norm": 409.273740356896, "learning_rate": 1.0307946269902492e-06, "loss": 310.5079, "step": 44490 }, { "epoch": 0.8558432941312228, "grad_norm": 380.53345495027446, "learning_rate": 1.0280975382498225e-06, "loss": 309.5644, "step": 44500 }, { "epoch": 0.8560356184669827, "grad_norm": 401.0114511136919, "learning_rate": 1.0254037913948845e-06, "loss": 295.9165, "step": 44510 }, { "epoch": 0.8562279428027425, "grad_norm": 417.74737228382287, "learning_rate": 1.0227133874288152e-06, "loss": 314.8125, "step": 44520 }, { "epoch": 0.8564202671385024, "grad_norm": 389.81975755023103, "learning_rate": 1.0200263273537458e-06, "loss": 307.6695, "step": 44530 }, { "epoch": 0.8566125914742622, "grad_norm": 407.9799030203983, "learning_rate": 1.0173426121705577e-06, "loss": 314.1563, "step": 44540 }, { "epoch": 0.856804915810022, "grad_norm": 413.7191571147601, "learning_rate": 1.0146622428788943e-06, "loss": 309.1433, "step": 44550 }, { "epoch": 0.8569972401457818, "grad_norm": 403.3886196539769, "learning_rate": 1.0119852204771463e-06, "loss": 317.8424, "step": 44560 }, { "epoch": 0.8571895644815417, "grad_norm": 393.5342442825214, "learning_rate": 1.0093115459624637e-06, "loss": 305.5662, "step": 44570 }, { "epoch": 0.8573818888173015, "grad_norm": 387.4852282953983, "learning_rate": 1.0066412203307419e-06, "loss": 314.7758, "step": 44580 }, { "epoch": 0.8575742131530614, "grad_norm": 409.5449582952906, "learning_rate": 1.0039742445766376e-06, "loss": 306.6252, "step": 44590 }, { "epoch": 0.8577665374888211, "grad_norm": 417.5166939829627, "learning_rate": 1.0013106196935528e-06, "loss": 313.7952, "step": 44600 }, { "epoch": 0.8579588618245809, "grad_norm": 423.1998638370606, "learning_rate": 9.986503466736419e-07, "loss": 303.5087, "step": 44610 }, { "epoch": 0.8581511861603408, "grad_norm": 421.73776360246177, "learning_rate": 9.959934265078176e-07, "loss": 317.2739, "step": 44620 }, { "epoch": 0.8583435104961006, "grad_norm": 433.1108423808941, "learning_rate": 9.933398601857347e-07, "loss": 304.4954, "step": 44630 }, { "epoch": 0.8585358348318605, "grad_norm": 396.3648053219482, "learning_rate": 9.90689648695804e-07, "loss": 305.1951, "step": 44640 }, { "epoch": 0.8587281591676202, "grad_norm": 407.0273369340253, "learning_rate": 9.880427930251834e-07, "loss": 305.0792, "step": 44650 }, { "epoch": 0.8589204835033801, "grad_norm": 385.9401191788157, "learning_rate": 9.853992941597878e-07, "loss": 305.2848, "step": 44660 }, { "epoch": 0.8591128078391399, "grad_norm": 384.6131563985859, "learning_rate": 9.827591530842729e-07, "loss": 302.1396, "step": 44670 }, { "epoch": 0.8593051321748998, "grad_norm": 379.82487906748423, "learning_rate": 9.801223707820484e-07, "loss": 305.8356, "step": 44680 }, { "epoch": 0.8594974565106596, "grad_norm": 386.42129594011146, "learning_rate": 9.774889482352735e-07, "loss": 300.0386, "step": 44690 }, { "epoch": 0.8596897808464194, "grad_norm": 376.1601710340815, "learning_rate": 9.74858886424852e-07, "loss": 303.9091, "step": 44700 }, { "epoch": 0.8598821051821792, "grad_norm": 403.6189853606279, "learning_rate": 9.722321863304418e-07, "loss": 311.3838, "step": 44710 }, { "epoch": 0.860074429517939, "grad_norm": 419.844702330754, "learning_rate": 9.696088489304412e-07, "loss": 312.1175, "step": 44720 }, { "epoch": 0.8602667538536989, "grad_norm": 405.32264576387394, "learning_rate": 9.669888752020061e-07, "loss": 306.867, "step": 44730 }, { "epoch": 0.8604590781894587, "grad_norm": 392.7020945862724, "learning_rate": 9.643722661210285e-07, "loss": 297.6295, "step": 44740 }, { "epoch": 0.8606514025252185, "grad_norm": 385.5000714377669, "learning_rate": 9.617590226621543e-07, "loss": 303.4644, "step": 44750 }, { "epoch": 0.8608437268609783, "grad_norm": 395.86705638238874, "learning_rate": 9.5914914579877e-07, "loss": 303.246, "step": 44760 }, { "epoch": 0.8610360511967382, "grad_norm": 384.34447705197493, "learning_rate": 9.565426365030172e-07, "loss": 309.0579, "step": 44770 }, { "epoch": 0.861228375532498, "grad_norm": 389.97989738581896, "learning_rate": 9.539394957457737e-07, "loss": 316.2567, "step": 44780 }, { "epoch": 0.8614206998682579, "grad_norm": 379.5702998136998, "learning_rate": 9.51339724496666e-07, "loss": 305.6684, "step": 44790 }, { "epoch": 0.8616130242040176, "grad_norm": 380.28903954875796, "learning_rate": 9.487433237240695e-07, "loss": 301.4612, "step": 44800 }, { "epoch": 0.8618053485397775, "grad_norm": 408.0930043897811, "learning_rate": 9.461502943950973e-07, "loss": 316.251, "step": 44810 }, { "epoch": 0.8619976728755373, "grad_norm": 359.93829401593445, "learning_rate": 9.435606374756123e-07, "loss": 296.3609, "step": 44820 }, { "epoch": 0.8621899972112971, "grad_norm": 393.5957222170627, "learning_rate": 9.409743539302152e-07, "loss": 298.2376, "step": 44830 }, { "epoch": 0.862382321547057, "grad_norm": 416.6577577424038, "learning_rate": 9.383914447222576e-07, "loss": 308.6023, "step": 44840 }, { "epoch": 0.8625746458828167, "grad_norm": 417.04512185974056, "learning_rate": 9.358119108138309e-07, "loss": 311.7073, "step": 44850 }, { "epoch": 0.8627669702185766, "grad_norm": 381.7221785189243, "learning_rate": 9.332357531657644e-07, "loss": 296.6584, "step": 44860 }, { "epoch": 0.8629592945543364, "grad_norm": 381.2964103584031, "learning_rate": 9.306629727376404e-07, "loss": 304.0799, "step": 44870 }, { "epoch": 0.8631516188900963, "grad_norm": 418.31063300944476, "learning_rate": 9.280935704877736e-07, "loss": 302.2871, "step": 44880 }, { "epoch": 0.8633439432258561, "grad_norm": 440.54529215087496, "learning_rate": 9.255275473732239e-07, "loss": 311.563, "step": 44890 }, { "epoch": 0.8635362675616159, "grad_norm": 414.4605870446404, "learning_rate": 9.229649043497924e-07, "loss": 309.905, "step": 44900 }, { "epoch": 0.8637285918973757, "grad_norm": 429.4819911621607, "learning_rate": 9.20405642372022e-07, "loss": 313.3387, "step": 44910 }, { "epoch": 0.8639209162331356, "grad_norm": 469.97345671395095, "learning_rate": 9.178497623931959e-07, "loss": 309.0582, "step": 44920 }, { "epoch": 0.8641132405688954, "grad_norm": 401.4636649157551, "learning_rate": 9.152972653653369e-07, "loss": 303.3662, "step": 44930 }, { "epoch": 0.8643055649046553, "grad_norm": 412.3965830265295, "learning_rate": 9.127481522392068e-07, "loss": 308.8958, "step": 44940 }, { "epoch": 0.864497889240415, "grad_norm": 386.2214286793937, "learning_rate": 9.102024239643092e-07, "loss": 307.6219, "step": 44950 }, { "epoch": 0.8646902135761748, "grad_norm": 441.73853293879324, "learning_rate": 9.076600814888869e-07, "loss": 308.5775, "step": 44960 }, { "epoch": 0.8648825379119347, "grad_norm": 389.30577518183196, "learning_rate": 9.051211257599169e-07, "loss": 310.0769, "step": 44970 }, { "epoch": 0.8650748622476945, "grad_norm": 368.22314029030315, "learning_rate": 9.025855577231224e-07, "loss": 310.5161, "step": 44980 }, { "epoch": 0.8652671865834544, "grad_norm": 462.5940260468949, "learning_rate": 9.000533783229581e-07, "loss": 309.9495, "step": 44990 }, { "epoch": 0.8654595109192141, "grad_norm": 383.42489132058324, "learning_rate": 8.975245885026207e-07, "loss": 314.3997, "step": 45000 }, { "epoch": 0.865651835254974, "grad_norm": 459.18740096829055, "learning_rate": 8.949991892040399e-07, "loss": 309.851, "step": 45010 }, { "epoch": 0.8658441595907338, "grad_norm": 431.3174954620526, "learning_rate": 8.92477181367889e-07, "loss": 303.2337, "step": 45020 }, { "epoch": 0.8660364839264937, "grad_norm": 399.99411197624624, "learning_rate": 8.899585659335719e-07, "loss": 305.1389, "step": 45030 }, { "epoch": 0.8662288082622535, "grad_norm": 421.8670178565545, "learning_rate": 8.874433438392305e-07, "loss": 328.1207, "step": 45040 }, { "epoch": 0.8664211325980133, "grad_norm": 405.9968790015555, "learning_rate": 8.849315160217465e-07, "loss": 308.3939, "step": 45050 }, { "epoch": 0.8666134569337731, "grad_norm": 422.5961644624331, "learning_rate": 8.824230834167325e-07, "loss": 306.0928, "step": 45060 }, { "epoch": 0.8668057812695329, "grad_norm": 390.32379695472224, "learning_rate": 8.799180469585378e-07, "loss": 322.4722, "step": 45070 }, { "epoch": 0.8669981056052928, "grad_norm": 399.9720048675275, "learning_rate": 8.77416407580246e-07, "loss": 307.3033, "step": 45080 }, { "epoch": 0.8671904299410526, "grad_norm": 414.4201419526888, "learning_rate": 8.749181662136785e-07, "loss": 316.3143, "step": 45090 }, { "epoch": 0.8673827542768124, "grad_norm": 427.3499862849529, "learning_rate": 8.724233237893897e-07, "loss": 306.0224, "step": 45100 }, { "epoch": 0.8675750786125722, "grad_norm": 402.6087166065143, "learning_rate": 8.699318812366641e-07, "loss": 309.4246, "step": 45110 }, { "epoch": 0.8677674029483321, "grad_norm": 393.55856381573716, "learning_rate": 8.67443839483526e-07, "loss": 313.1001, "step": 45120 }, { "epoch": 0.8679597272840919, "grad_norm": 409.6104681860515, "learning_rate": 8.649591994567275e-07, "loss": 305.6595, "step": 45130 }, { "epoch": 0.8681520516198518, "grad_norm": 376.10437758020055, "learning_rate": 8.62477962081758e-07, "loss": 307.8717, "step": 45140 }, { "epoch": 0.8683443759556115, "grad_norm": 434.1553607527366, "learning_rate": 8.600001282828341e-07, "loss": 316.7945, "step": 45150 }, { "epoch": 0.8685367002913714, "grad_norm": 436.2063378999651, "learning_rate": 8.57525698982914e-07, "loss": 311.1892, "step": 45160 }, { "epoch": 0.8687290246271312, "grad_norm": 398.01392891360763, "learning_rate": 8.550546751036759e-07, "loss": 315.2501, "step": 45170 }, { "epoch": 0.868921348962891, "grad_norm": 419.1874741231257, "learning_rate": 8.525870575655393e-07, "loss": 307.8161, "step": 45180 }, { "epoch": 0.8691136732986509, "grad_norm": 405.6690941889232, "learning_rate": 8.501228472876466e-07, "loss": 311.2014, "step": 45190 }, { "epoch": 0.8693059976344106, "grad_norm": 403.5564878829972, "learning_rate": 8.476620451878803e-07, "loss": 303.7543, "step": 45200 }, { "epoch": 0.8694983219701705, "grad_norm": 390.73081632872675, "learning_rate": 8.45204652182846e-07, "loss": 310.5041, "step": 45210 }, { "epoch": 0.8696906463059303, "grad_norm": 397.2903063208496, "learning_rate": 8.427506691878806e-07, "loss": 311.0627, "step": 45220 }, { "epoch": 0.8698829706416902, "grad_norm": 490.0805569838791, "learning_rate": 8.403000971170561e-07, "loss": 303.6106, "step": 45230 }, { "epoch": 0.87007529497745, "grad_norm": 400.1254759972835, "learning_rate": 8.378529368831667e-07, "loss": 307.972, "step": 45240 }, { "epoch": 0.8702676193132098, "grad_norm": 417.7594179144115, "learning_rate": 8.354091893977401e-07, "loss": 305.1977, "step": 45250 }, { "epoch": 0.8704599436489696, "grad_norm": 365.87429074246427, "learning_rate": 8.329688555710336e-07, "loss": 306.2153, "step": 45260 }, { "epoch": 0.8706522679847295, "grad_norm": 389.4945732877371, "learning_rate": 8.305319363120279e-07, "loss": 314.2629, "step": 45270 }, { "epoch": 0.8708445923204893, "grad_norm": 395.0934662531337, "learning_rate": 8.280984325284392e-07, "loss": 306.9449, "step": 45280 }, { "epoch": 0.871036916656249, "grad_norm": 395.28329275841406, "learning_rate": 8.256683451267044e-07, "loss": 303.7966, "step": 45290 }, { "epoch": 0.8712292409920089, "grad_norm": 424.28038815872304, "learning_rate": 8.232416750119921e-07, "loss": 307.4502, "step": 45300 }, { "epoch": 0.8714215653277687, "grad_norm": 416.23726587443855, "learning_rate": 8.208184230881966e-07, "loss": 307.9138, "step": 45310 }, { "epoch": 0.8716138896635286, "grad_norm": 420.02821998004293, "learning_rate": 8.183985902579405e-07, "loss": 303.6252, "step": 45320 }, { "epoch": 0.8718062139992884, "grad_norm": 381.04014653126745, "learning_rate": 8.159821774225685e-07, "loss": 297.8478, "step": 45330 }, { "epoch": 0.8719985383350483, "grad_norm": 414.5565269988919, "learning_rate": 8.13569185482157e-07, "loss": 302.6021, "step": 45340 }, { "epoch": 0.872190862670808, "grad_norm": 383.19898362444815, "learning_rate": 8.111596153355061e-07, "loss": 299.3064, "step": 45350 }, { "epoch": 0.8723831870065679, "grad_norm": 389.4528379518342, "learning_rate": 8.08753467880139e-07, "loss": 307.9255, "step": 45360 }, { "epoch": 0.8725755113423277, "grad_norm": 401.9641240797891, "learning_rate": 8.063507440123052e-07, "loss": 300.6217, "step": 45370 }, { "epoch": 0.8727678356780876, "grad_norm": 446.3358967244388, "learning_rate": 8.039514446269836e-07, "loss": 313.8597, "step": 45380 }, { "epoch": 0.8729601600138474, "grad_norm": 404.7477785466647, "learning_rate": 8.015555706178702e-07, "loss": 307.3693, "step": 45390 }, { "epoch": 0.8731524843496071, "grad_norm": 422.3906376953504, "learning_rate": 7.991631228773889e-07, "loss": 311.8219, "step": 45400 }, { "epoch": 0.873344808685367, "grad_norm": 391.46905166129295, "learning_rate": 7.967741022966857e-07, "loss": 300.5323, "step": 45410 }, { "epoch": 0.8735371330211268, "grad_norm": 402.91008021356276, "learning_rate": 7.943885097656356e-07, "loss": 297.8026, "step": 45420 }, { "epoch": 0.8737294573568867, "grad_norm": 391.83827296192266, "learning_rate": 7.920063461728311e-07, "loss": 303.6322, "step": 45430 }, { "epoch": 0.8739217816926464, "grad_norm": 401.53853187885346, "learning_rate": 7.896276124055846e-07, "loss": 306.0083, "step": 45440 }, { "epoch": 0.8741141060284063, "grad_norm": 404.838459447113, "learning_rate": 7.872523093499396e-07, "loss": 311.1368, "step": 45450 }, { "epoch": 0.8743064303641661, "grad_norm": 409.24741584776075, "learning_rate": 7.848804378906561e-07, "loss": 303.0447, "step": 45460 }, { "epoch": 0.874498754699926, "grad_norm": 365.37622364170477, "learning_rate": 7.825119989112173e-07, "loss": 297.1866, "step": 45470 }, { "epoch": 0.8746910790356858, "grad_norm": 405.4105845532776, "learning_rate": 7.801469932938255e-07, "loss": 307.8844, "step": 45480 }, { "epoch": 0.8748834033714457, "grad_norm": 374.1113914829126, "learning_rate": 7.777854219194092e-07, "loss": 301.2761, "step": 45490 }, { "epoch": 0.8750757277072054, "grad_norm": 396.8369103400041, "learning_rate": 7.754272856676126e-07, "loss": 311.6533, "step": 45500 }, { "epoch": 0.8752680520429652, "grad_norm": 436.8521525047514, "learning_rate": 7.73072585416802e-07, "loss": 313.9562, "step": 45510 }, { "epoch": 0.8754603763787251, "grad_norm": 405.09612251943463, "learning_rate": 7.707213220440679e-07, "loss": 297.5888, "step": 45520 }, { "epoch": 0.8756527007144849, "grad_norm": 391.916365289241, "learning_rate": 7.683734964252143e-07, "loss": 305.2545, "step": 45530 }, { "epoch": 0.8758450250502448, "grad_norm": 425.0453364247075, "learning_rate": 7.66029109434766e-07, "loss": 312.3987, "step": 45540 }, { "epoch": 0.8760373493860045, "grad_norm": 394.3628989612224, "learning_rate": 7.636881619459724e-07, "loss": 307.1669, "step": 45550 }, { "epoch": 0.8762296737217644, "grad_norm": 435.05202901875793, "learning_rate": 7.613506548307936e-07, "loss": 308.8373, "step": 45560 }, { "epoch": 0.8764219980575242, "grad_norm": 415.56905710505737, "learning_rate": 7.590165889599166e-07, "loss": 305.002, "step": 45570 }, { "epoch": 0.8766143223932841, "grad_norm": 376.6664542530555, "learning_rate": 7.566859652027381e-07, "loss": 302.9801, "step": 45580 }, { "epoch": 0.8768066467290438, "grad_norm": 448.8970396402597, "learning_rate": 7.543587844273814e-07, "loss": 306.5511, "step": 45590 }, { "epoch": 0.8769989710648037, "grad_norm": 405.98444269970327, "learning_rate": 7.52035047500681e-07, "loss": 300.8533, "step": 45600 }, { "epoch": 0.8771912954005635, "grad_norm": 432.5742276359327, "learning_rate": 7.497147552881901e-07, "loss": 308.6317, "step": 45610 }, { "epoch": 0.8773836197363233, "grad_norm": 410.1458948491779, "learning_rate": 7.473979086541772e-07, "loss": 308.2932, "step": 45620 }, { "epoch": 0.8775759440720832, "grad_norm": 408.73801461737315, "learning_rate": 7.450845084616332e-07, "loss": 303.7726, "step": 45630 }, { "epoch": 0.8777682684078429, "grad_norm": 390.61162966833723, "learning_rate": 7.427745555722598e-07, "loss": 315.3282, "step": 45640 }, { "epoch": 0.8779605927436028, "grad_norm": 410.695603424447, "learning_rate": 7.404680508464767e-07, "loss": 310.4774, "step": 45650 }, { "epoch": 0.8781529170793626, "grad_norm": 442.99437144817625, "learning_rate": 7.381649951434167e-07, "loss": 309.927, "step": 45660 }, { "epoch": 0.8783452414151225, "grad_norm": 406.4934380191147, "learning_rate": 7.358653893209333e-07, "loss": 316.9576, "step": 45670 }, { "epoch": 0.8785375657508823, "grad_norm": 376.22778044279886, "learning_rate": 7.335692342355882e-07, "loss": 306.7312, "step": 45680 }, { "epoch": 0.8787298900866422, "grad_norm": 376.9568684998429, "learning_rate": 7.312765307426662e-07, "loss": 300.716, "step": 45690 }, { "epoch": 0.8789222144224019, "grad_norm": 407.9861560067712, "learning_rate": 7.28987279696155e-07, "loss": 309.2125, "step": 45700 }, { "epoch": 0.8791145387581618, "grad_norm": 400.3655920357576, "learning_rate": 7.267014819487695e-07, "loss": 315.2592, "step": 45710 }, { "epoch": 0.8793068630939216, "grad_norm": 390.18770948619374, "learning_rate": 7.244191383519272e-07, "loss": 304.3402, "step": 45720 }, { "epoch": 0.8794991874296814, "grad_norm": 438.375235517891, "learning_rate": 7.221402497557629e-07, "loss": 301.9918, "step": 45730 }, { "epoch": 0.8796915117654412, "grad_norm": 396.5705019696801, "learning_rate": 7.198648170091294e-07, "loss": 299.4299, "step": 45740 }, { "epoch": 0.879883836101201, "grad_norm": 419.8434919393224, "learning_rate": 7.175928409595844e-07, "loss": 307.1673, "step": 45750 }, { "epoch": 0.8800761604369609, "grad_norm": 390.2152470925275, "learning_rate": 7.153243224534001e-07, "loss": 311.8675, "step": 45760 }, { "epoch": 0.8802684847727207, "grad_norm": 422.84121762723163, "learning_rate": 7.130592623355659e-07, "loss": 303.4092, "step": 45770 }, { "epoch": 0.8804608091084806, "grad_norm": 377.63453160700215, "learning_rate": 7.10797661449778e-07, "loss": 306.8756, "step": 45780 }, { "epoch": 0.8806531334442403, "grad_norm": 414.79211569357, "learning_rate": 7.085395206384449e-07, "loss": 307.6578, "step": 45790 }, { "epoch": 0.8808454577800002, "grad_norm": 381.620530340443, "learning_rate": 7.062848407426859e-07, "loss": 302.2135, "step": 45800 }, { "epoch": 0.88103778211576, "grad_norm": 397.56799132021905, "learning_rate": 7.040336226023336e-07, "loss": 299.7477, "step": 45810 }, { "epoch": 0.8812301064515199, "grad_norm": 385.3582867290881, "learning_rate": 7.017858670559274e-07, "loss": 309.8197, "step": 45820 }, { "epoch": 0.8814224307872797, "grad_norm": 422.29913694888336, "learning_rate": 6.99541574940722e-07, "loss": 313.8146, "step": 45830 }, { "epoch": 0.8816147551230394, "grad_norm": 395.110243612897, "learning_rate": 6.973007470926774e-07, "loss": 306.8336, "step": 45840 }, { "epoch": 0.8818070794587993, "grad_norm": 393.68255603138243, "learning_rate": 6.95063384346466e-07, "loss": 308.8002, "step": 45850 }, { "epoch": 0.8819994037945591, "grad_norm": 394.6390426831646, "learning_rate": 6.92829487535468e-07, "loss": 311.4544, "step": 45860 }, { "epoch": 0.882191728130319, "grad_norm": 404.18403133658063, "learning_rate": 6.905990574917709e-07, "loss": 308.3257, "step": 45870 }, { "epoch": 0.8823840524660788, "grad_norm": 382.3695178134656, "learning_rate": 6.88372095046177e-07, "loss": 300.5742, "step": 45880 }, { "epoch": 0.8825763768018386, "grad_norm": 403.9944582127581, "learning_rate": 6.861486010281915e-07, "loss": 314.6948, "step": 45890 }, { "epoch": 0.8827687011375984, "grad_norm": 391.0733677084212, "learning_rate": 6.839285762660275e-07, "loss": 297.6921, "step": 45900 }, { "epoch": 0.8829610254733583, "grad_norm": 397.42788057613114, "learning_rate": 6.81712021586608e-07, "loss": 306.5648, "step": 45910 }, { "epoch": 0.8831533498091181, "grad_norm": 382.44111398233173, "learning_rate": 6.794989378155659e-07, "loss": 309.9393, "step": 45920 }, { "epoch": 0.883345674144878, "grad_norm": 396.18882027417163, "learning_rate": 6.772893257772361e-07, "loss": 313.6461, "step": 45930 }, { "epoch": 0.8835379984806377, "grad_norm": 410.79810345289565, "learning_rate": 6.750831862946605e-07, "loss": 319.9223, "step": 45940 }, { "epoch": 0.8837303228163975, "grad_norm": 413.3058841324921, "learning_rate": 6.728805201895949e-07, "loss": 313.6198, "step": 45950 }, { "epoch": 0.8839226471521574, "grad_norm": 393.6352528130019, "learning_rate": 6.70681328282492e-07, "loss": 290.7339, "step": 45960 }, { "epoch": 0.8841149714879172, "grad_norm": 415.7383568075715, "learning_rate": 6.684856113925143e-07, "loss": 308.1027, "step": 45970 }, { "epoch": 0.8843072958236771, "grad_norm": 416.42752137420587, "learning_rate": 6.662933703375307e-07, "loss": 316.0137, "step": 45980 }, { "epoch": 0.8844996201594368, "grad_norm": 405.45697836834063, "learning_rate": 6.641046059341171e-07, "loss": 297.9852, "step": 45990 }, { "epoch": 0.8846919444951967, "grad_norm": 416.18297552140035, "learning_rate": 6.619193189975515e-07, "loss": 309.7994, "step": 46000 }, { "epoch": 0.8848842688309565, "grad_norm": 393.6140314846193, "learning_rate": 6.597375103418135e-07, "loss": 304.9628, "step": 46010 }, { "epoch": 0.8850765931667164, "grad_norm": 380.7352856643751, "learning_rate": 6.575591807795944e-07, "loss": 294.9117, "step": 46020 }, { "epoch": 0.8852689175024762, "grad_norm": 405.5867603469064, "learning_rate": 6.553843311222863e-07, "loss": 301.155, "step": 46030 }, { "epoch": 0.885461241838236, "grad_norm": 382.85821435645323, "learning_rate": 6.532129621799832e-07, "loss": 307.5864, "step": 46040 }, { "epoch": 0.8856535661739958, "grad_norm": 428.7914636275491, "learning_rate": 6.510450747614816e-07, "loss": 314.8727, "step": 46050 }, { "epoch": 0.8858458905097557, "grad_norm": 389.7594958058858, "learning_rate": 6.488806696742889e-07, "loss": 317.6137, "step": 46060 }, { "epoch": 0.8860382148455155, "grad_norm": 394.5978663407577, "learning_rate": 6.46719747724609e-07, "loss": 322.8763, "step": 46070 }, { "epoch": 0.8862305391812753, "grad_norm": 420.1356553267791, "learning_rate": 6.44562309717347e-07, "loss": 316.5685, "step": 46080 }, { "epoch": 0.8864228635170351, "grad_norm": 427.54016973685555, "learning_rate": 6.424083564561134e-07, "loss": 309.695, "step": 46090 }, { "epoch": 0.8866151878527949, "grad_norm": 392.8559528694308, "learning_rate": 6.402578887432232e-07, "loss": 301.731, "step": 46100 }, { "epoch": 0.8868075121885548, "grad_norm": 427.0669161675345, "learning_rate": 6.381109073796865e-07, "loss": 309.626, "step": 46110 }, { "epoch": 0.8869998365243146, "grad_norm": 391.30889067923704, "learning_rate": 6.359674131652204e-07, "loss": 299.6323, "step": 46120 }, { "epoch": 0.8871921608600745, "grad_norm": 442.34950434563586, "learning_rate": 6.338274068982408e-07, "loss": 309.3837, "step": 46130 }, { "epoch": 0.8873844851958342, "grad_norm": 407.20357961255, "learning_rate": 6.316908893758656e-07, "loss": 305.2207, "step": 46140 }, { "epoch": 0.8875768095315941, "grad_norm": 396.3075894298535, "learning_rate": 6.295578613939113e-07, "loss": 308.2404, "step": 46150 }, { "epoch": 0.8877691338673539, "grad_norm": 418.4500811248668, "learning_rate": 6.274283237468948e-07, "loss": 298.5642, "step": 46160 }, { "epoch": 0.8879614582031138, "grad_norm": 408.9619948815748, "learning_rate": 6.25302277228036e-07, "loss": 311.3323, "step": 46170 }, { "epoch": 0.8881537825388736, "grad_norm": 418.6031689027424, "learning_rate": 6.231797226292502e-07, "loss": 312.8917, "step": 46180 }, { "epoch": 0.8883461068746333, "grad_norm": 390.6458889010803, "learning_rate": 6.210606607411529e-07, "loss": 300.4923, "step": 46190 }, { "epoch": 0.8885384312103932, "grad_norm": 396.7457081422147, "learning_rate": 6.189450923530627e-07, "loss": 300.7349, "step": 46200 }, { "epoch": 0.888730755546153, "grad_norm": 395.79390788646, "learning_rate": 6.168330182529924e-07, "loss": 313.5702, "step": 46210 }, { "epoch": 0.8889230798819129, "grad_norm": 381.346933784914, "learning_rate": 6.147244392276541e-07, "loss": 308.3087, "step": 46220 }, { "epoch": 0.8891154042176727, "grad_norm": 374.7443409959245, "learning_rate": 6.126193560624583e-07, "loss": 311.5375, "step": 46230 }, { "epoch": 0.8893077285534325, "grad_norm": 391.1134842833984, "learning_rate": 6.105177695415165e-07, "loss": 304.2555, "step": 46240 }, { "epoch": 0.8895000528891923, "grad_norm": 399.0237920108718, "learning_rate": 6.084196804476317e-07, "loss": 305.1379, "step": 46250 }, { "epoch": 0.8896923772249522, "grad_norm": 387.0806488803156, "learning_rate": 6.063250895623096e-07, "loss": 302.4917, "step": 46260 }, { "epoch": 0.889884701560712, "grad_norm": 459.1373913912829, "learning_rate": 6.042339976657486e-07, "loss": 316.0654, "step": 46270 }, { "epoch": 0.8900770258964719, "grad_norm": 413.1743800559773, "learning_rate": 6.021464055368498e-07, "loss": 308.2002, "step": 46280 }, { "epoch": 0.8902693502322316, "grad_norm": 383.7871833378915, "learning_rate": 6.000623139532036e-07, "loss": 316.0735, "step": 46290 }, { "epoch": 0.8904616745679914, "grad_norm": 385.48950766825516, "learning_rate": 5.979817236910979e-07, "loss": 307.3239, "step": 46300 }, { "epoch": 0.8906539989037513, "grad_norm": 392.92931098911106, "learning_rate": 5.959046355255238e-07, "loss": 320.3373, "step": 46310 }, { "epoch": 0.8908463232395111, "grad_norm": 392.2716459066827, "learning_rate": 5.93831050230158e-07, "loss": 307.411, "step": 46320 }, { "epoch": 0.891038647575271, "grad_norm": 396.56704491173207, "learning_rate": 5.917609685773784e-07, "loss": 304.5664, "step": 46330 }, { "epoch": 0.8912309719110307, "grad_norm": 404.43538476368377, "learning_rate": 5.896943913382547e-07, "loss": 297.9583, "step": 46340 }, { "epoch": 0.8914232962467906, "grad_norm": 363.7866243581948, "learning_rate": 5.876313192825544e-07, "loss": 300.0678, "step": 46350 }, { "epoch": 0.8916156205825504, "grad_norm": 403.3344072931486, "learning_rate": 5.855717531787375e-07, "loss": 313.7008, "step": 46360 }, { "epoch": 0.8918079449183103, "grad_norm": 419.4885340202976, "learning_rate": 5.835156937939568e-07, "loss": 313.4582, "step": 46370 }, { "epoch": 0.89200026925407, "grad_norm": 411.7132493909285, "learning_rate": 5.814631418940641e-07, "loss": 304.3654, "step": 46380 }, { "epoch": 0.8921925935898299, "grad_norm": 394.67088583255133, "learning_rate": 5.794140982435981e-07, "loss": 305.937, "step": 46390 }, { "epoch": 0.8923849179255897, "grad_norm": 394.4623270911905, "learning_rate": 5.773685636057924e-07, "loss": 308.0931, "step": 46400 }, { "epoch": 0.8925772422613495, "grad_norm": 391.743733333567, "learning_rate": 5.753265387425777e-07, "loss": 318.6562, "step": 46410 }, { "epoch": 0.8927695665971094, "grad_norm": 439.9736607301654, "learning_rate": 5.732880244145744e-07, "loss": 301.2937, "step": 46420 }, { "epoch": 0.8929618909328692, "grad_norm": 373.2950884327214, "learning_rate": 5.712530213810951e-07, "loss": 308.4244, "step": 46430 }, { "epoch": 0.893154215268629, "grad_norm": 391.7507865793555, "learning_rate": 5.692215304001447e-07, "loss": 302.032, "step": 46440 }, { "epoch": 0.8933465396043888, "grad_norm": 389.7173195728798, "learning_rate": 5.671935522284177e-07, "loss": 312.5521, "step": 46450 }, { "epoch": 0.8935388639401487, "grad_norm": 412.1229719256818, "learning_rate": 5.651690876213067e-07, "loss": 314.3553, "step": 46460 }, { "epoch": 0.8937311882759085, "grad_norm": 415.673015676758, "learning_rate": 5.631481373328895e-07, "loss": 317.5739, "step": 46470 }, { "epoch": 0.8939235126116684, "grad_norm": 383.5959857472058, "learning_rate": 5.61130702115934e-07, "loss": 310.8006, "step": 46480 }, { "epoch": 0.8941158369474281, "grad_norm": 424.48620059147385, "learning_rate": 5.591167827219057e-07, "loss": 306.3784, "step": 46490 }, { "epoch": 0.894308161283188, "grad_norm": 432.72999307470707, "learning_rate": 5.571063799009546e-07, "loss": 310.2934, "step": 46500 }, { "epoch": 0.8945004856189478, "grad_norm": 385.8921818135607, "learning_rate": 5.550994944019216e-07, "loss": 309.3458, "step": 46510 }, { "epoch": 0.8946928099547076, "grad_norm": 393.38009021685053, "learning_rate": 5.53096126972339e-07, "loss": 310.8305, "step": 46520 }, { "epoch": 0.8948851342904675, "grad_norm": 403.06746748671895, "learning_rate": 5.510962783584295e-07, "loss": 309.2699, "step": 46530 }, { "epoch": 0.8950774586262272, "grad_norm": 418.46543849267914, "learning_rate": 5.490999493051008e-07, "loss": 304.8145, "step": 46540 }, { "epoch": 0.8952697829619871, "grad_norm": 402.55515222979574, "learning_rate": 5.471071405559547e-07, "loss": 307.2068, "step": 46550 }, { "epoch": 0.8954621072977469, "grad_norm": 399.46919542924513, "learning_rate": 5.451178528532786e-07, "loss": 309.3905, "step": 46560 }, { "epoch": 0.8956544316335068, "grad_norm": 393.45511438942214, "learning_rate": 5.431320869380519e-07, "loss": 295.8229, "step": 46570 }, { "epoch": 0.8958467559692666, "grad_norm": 385.21869754053773, "learning_rate": 5.411498435499363e-07, "loss": 314.3867, "step": 46580 }, { "epoch": 0.8960390803050264, "grad_norm": 401.35334405474435, "learning_rate": 5.391711234272856e-07, "loss": 302.6217, "step": 46590 }, { "epoch": 0.8962314046407862, "grad_norm": 371.52936120465677, "learning_rate": 5.371959273071414e-07, "loss": 309.0508, "step": 46600 }, { "epoch": 0.8964237289765461, "grad_norm": 368.6968738196107, "learning_rate": 5.352242559252308e-07, "loss": 296.9199, "step": 46610 }, { "epoch": 0.8966160533123059, "grad_norm": 391.51795468470993, "learning_rate": 5.332561100159683e-07, "loss": 310.5974, "step": 46620 }, { "epoch": 0.8968083776480656, "grad_norm": 404.8906658797306, "learning_rate": 5.312914903124566e-07, "loss": 303.6626, "step": 46630 }, { "epoch": 0.8970007019838255, "grad_norm": 398.56791648787976, "learning_rate": 5.293303975464836e-07, "loss": 306.0725, "step": 46640 }, { "epoch": 0.8971930263195853, "grad_norm": 409.44681130094625, "learning_rate": 5.273728324485261e-07, "loss": 309.3985, "step": 46650 }, { "epoch": 0.8973853506553452, "grad_norm": 404.49078265445223, "learning_rate": 5.254187957477397e-07, "loss": 321.8518, "step": 46660 }, { "epoch": 0.897577674991105, "grad_norm": 399.3909204247682, "learning_rate": 5.234682881719766e-07, "loss": 310.4104, "step": 46670 }, { "epoch": 0.8977699993268649, "grad_norm": 386.29112737800665, "learning_rate": 5.215213104477645e-07, "loss": 303.7211, "step": 46680 }, { "epoch": 0.8979623236626246, "grad_norm": 395.3838043275145, "learning_rate": 5.195778633003223e-07, "loss": 300.6631, "step": 46690 }, { "epoch": 0.8981546479983845, "grad_norm": 383.18928443229163, "learning_rate": 5.176379474535509e-07, "loss": 301.8354, "step": 46700 }, { "epoch": 0.8983469723341443, "grad_norm": 414.57286653272627, "learning_rate": 5.1570156363004e-07, "loss": 311.391, "step": 46710 }, { "epoch": 0.8985392966699042, "grad_norm": 388.16176822708604, "learning_rate": 5.13768712551057e-07, "loss": 315.8273, "step": 46720 }, { "epoch": 0.898731621005664, "grad_norm": 383.8670608578188, "learning_rate": 5.118393949365574e-07, "loss": 305.7346, "step": 46730 }, { "epoch": 0.8989239453414237, "grad_norm": 455.2550830240003, "learning_rate": 5.099136115051829e-07, "loss": 306.9304, "step": 46740 }, { "epoch": 0.8991162696771836, "grad_norm": 401.90538581807203, "learning_rate": 5.079913629742539e-07, "loss": 308.4706, "step": 46750 }, { "epoch": 0.8993085940129434, "grad_norm": 434.08363082390514, "learning_rate": 5.060726500597768e-07, "loss": 317.7397, "step": 46760 }, { "epoch": 0.8995009183487033, "grad_norm": 401.18885328075567, "learning_rate": 5.041574734764376e-07, "loss": 305.9172, "step": 46770 }, { "epoch": 0.899693242684463, "grad_norm": 408.8802718021376, "learning_rate": 5.022458339376124e-07, "loss": 304.0198, "step": 46780 }, { "epoch": 0.8998855670202229, "grad_norm": 409.6269404814617, "learning_rate": 5.003377321553538e-07, "loss": 312.0058, "step": 46790 }, { "epoch": 0.9000778913559827, "grad_norm": 388.8489158257935, "learning_rate": 4.984331688403976e-07, "loss": 312.4344, "step": 46800 }, { "epoch": 0.9002702156917426, "grad_norm": 407.2707005391624, "learning_rate": 4.96532144702162e-07, "loss": 304.1944, "step": 46810 }, { "epoch": 0.9004625400275024, "grad_norm": 397.98932769010725, "learning_rate": 4.946346604487462e-07, "loss": 321.0849, "step": 46820 }, { "epoch": 0.9006548643632623, "grad_norm": 403.93556181436435, "learning_rate": 4.927407167869346e-07, "loss": 305.4765, "step": 46830 }, { "epoch": 0.900847188699022, "grad_norm": 414.8852899985486, "learning_rate": 4.908503144221877e-07, "loss": 307.014, "step": 46840 }, { "epoch": 0.9010395130347818, "grad_norm": 393.6270191712424, "learning_rate": 4.889634540586518e-07, "loss": 307.8562, "step": 46850 }, { "epoch": 0.9012318373705417, "grad_norm": 411.5943234646785, "learning_rate": 4.870801363991484e-07, "loss": 316.6229, "step": 46860 }, { "epoch": 0.9014241617063015, "grad_norm": 392.1474518141912, "learning_rate": 4.852003621451829e-07, "loss": 301.0697, "step": 46870 }, { "epoch": 0.9016164860420613, "grad_norm": 409.64889906283616, "learning_rate": 4.833241319969395e-07, "loss": 299.2146, "step": 46880 }, { "epoch": 0.9018088103778211, "grad_norm": 389.6474963585249, "learning_rate": 4.814514466532849e-07, "loss": 307.1873, "step": 46890 }, { "epoch": 0.902001134713581, "grad_norm": 424.1156366381604, "learning_rate": 4.795823068117622e-07, "loss": 313.1375, "step": 46900 }, { "epoch": 0.9021934590493408, "grad_norm": 403.40640177961683, "learning_rate": 4.777167131685945e-07, "loss": 300.0229, "step": 46910 }, { "epoch": 0.9023857833851007, "grad_norm": 381.20234247532073, "learning_rate": 4.7585466641868696e-07, "loss": 315.0416, "step": 46920 }, { "epoch": 0.9025781077208604, "grad_norm": 388.3331273384083, "learning_rate": 4.7399616725561925e-07, "loss": 302.3271, "step": 46930 }, { "epoch": 0.9027704320566203, "grad_norm": 427.0967529725314, "learning_rate": 4.721412163716521e-07, "loss": 306.9506, "step": 46940 }, { "epoch": 0.9029627563923801, "grad_norm": 422.53308076831155, "learning_rate": 4.702898144577228e-07, "loss": 320.1698, "step": 46950 }, { "epoch": 0.9031550807281399, "grad_norm": 396.06198883312834, "learning_rate": 4.6844196220345086e-07, "loss": 301.159, "step": 46960 }, { "epoch": 0.9033474050638998, "grad_norm": 409.804926935526, "learning_rate": 4.665976602971278e-07, "loss": 313.4145, "step": 46970 }, { "epoch": 0.9035397293996595, "grad_norm": 387.72794018017765, "learning_rate": 4.647569094257276e-07, "loss": 315.9578, "step": 46980 }, { "epoch": 0.9037320537354194, "grad_norm": 383.42527174430387, "learning_rate": 4.629197102748984e-07, "loss": 299.4948, "step": 46990 }, { "epoch": 0.9039243780711792, "grad_norm": 407.20465709677194, "learning_rate": 4.610860635289671e-07, "loss": 308.1644, "step": 47000 }, { "epoch": 0.9041167024069391, "grad_norm": 392.6398786002841, "learning_rate": 4.592559698709387e-07, "loss": 306.0993, "step": 47010 }, { "epoch": 0.9043090267426989, "grad_norm": 386.98735841037086, "learning_rate": 4.5742942998248774e-07, "loss": 305.9105, "step": 47020 }, { "epoch": 0.9045013510784587, "grad_norm": 401.9132128681427, "learning_rate": 4.5560644454397563e-07, "loss": 307.4984, "step": 47030 }, { "epoch": 0.9046936754142185, "grad_norm": 390.8832774728099, "learning_rate": 4.537870142344314e-07, "loss": 308.8956, "step": 47040 }, { "epoch": 0.9048859997499784, "grad_norm": 388.3048246783133, "learning_rate": 4.5197113973156403e-07, "loss": 304.6942, "step": 47050 }, { "epoch": 0.9050783240857382, "grad_norm": 412.44373128855847, "learning_rate": 4.5015882171175476e-07, "loss": 313.6644, "step": 47060 }, { "epoch": 0.905270648421498, "grad_norm": 464.0814530708157, "learning_rate": 4.483500608500657e-07, "loss": 304.5468, "step": 47070 }, { "epoch": 0.9054629727572578, "grad_norm": 424.29406018528863, "learning_rate": 4.4654485782022697e-07, "loss": 306.5049, "step": 47080 }, { "epoch": 0.9056552970930176, "grad_norm": 413.4845756821773, "learning_rate": 4.447432132946472e-07, "loss": 313.5461, "step": 47090 }, { "epoch": 0.9058476214287775, "grad_norm": 429.2563096228966, "learning_rate": 4.429451279444119e-07, "loss": 308.5096, "step": 47100 }, { "epoch": 0.9060399457645373, "grad_norm": 396.6540074936668, "learning_rate": 4.411506024392753e-07, "loss": 312.3194, "step": 47110 }, { "epoch": 0.9062322701002972, "grad_norm": 445.98581476751997, "learning_rate": 4.393596374476705e-07, "loss": 299.0574, "step": 47120 }, { "epoch": 0.9064245944360569, "grad_norm": 377.0372439405971, "learning_rate": 4.3757223363670055e-07, "loss": 297.6503, "step": 47130 }, { "epoch": 0.9066169187718168, "grad_norm": 382.8075243290351, "learning_rate": 4.3578839167214505e-07, "loss": 312.4722, "step": 47140 }, { "epoch": 0.9068092431075766, "grad_norm": 372.54105592565537, "learning_rate": 4.3400811221845693e-07, "loss": 311.4449, "step": 47150 }, { "epoch": 0.9070015674433365, "grad_norm": 380.14881384319904, "learning_rate": 4.322313959387592e-07, "loss": 306.3151, "step": 47160 }, { "epoch": 0.9071938917790963, "grad_norm": 377.9214585066291, "learning_rate": 4.304582434948479e-07, "loss": 306.5595, "step": 47170 }, { "epoch": 0.9073862161148561, "grad_norm": 363.7041959721657, "learning_rate": 4.2868865554719583e-07, "loss": 308.2623, "step": 47180 }, { "epoch": 0.9075785404506159, "grad_norm": 359.8307204599658, "learning_rate": 4.269226327549447e-07, "loss": 310.797, "step": 47190 }, { "epoch": 0.9077708647863757, "grad_norm": 392.42271178670643, "learning_rate": 4.251601757759061e-07, "loss": 297.5345, "step": 47200 }, { "epoch": 0.9079631891221356, "grad_norm": 410.47760106090544, "learning_rate": 4.234012852665703e-07, "loss": 314.4087, "step": 47210 }, { "epoch": 0.9081555134578954, "grad_norm": 412.60923116712473, "learning_rate": 4.2164596188209226e-07, "loss": 312.397, "step": 47220 }, { "epoch": 0.9083478377936552, "grad_norm": 441.0582246059681, "learning_rate": 4.198942062763023e-07, "loss": 311.3601, "step": 47230 }, { "epoch": 0.908540162129415, "grad_norm": 412.8262065674468, "learning_rate": 4.181460191016984e-07, "loss": 303.7134, "step": 47240 }, { "epoch": 0.9087324864651749, "grad_norm": 385.8560553277431, "learning_rate": 4.1640140100945304e-07, "loss": 313.7107, "step": 47250 }, { "epoch": 0.9089248108009347, "grad_norm": 434.44396051005725, "learning_rate": 4.146603526494086e-07, "loss": 305.7453, "step": 47260 }, { "epoch": 0.9091171351366946, "grad_norm": 433.1057345524686, "learning_rate": 4.129228746700742e-07, "loss": 313.6865, "step": 47270 }, { "epoch": 0.9093094594724543, "grad_norm": 456.50581322726777, "learning_rate": 4.111889677186354e-07, "loss": 306.2014, "step": 47280 }, { "epoch": 0.9095017838082142, "grad_norm": 414.5498867449801, "learning_rate": 4.094586324409411e-07, "loss": 303.7974, "step": 47290 }, { "epoch": 0.909694108143974, "grad_norm": 377.9031480509657, "learning_rate": 4.0773186948151246e-07, "loss": 311.4135, "step": 47300 }, { "epoch": 0.9098864324797338, "grad_norm": 454.26882143084265, "learning_rate": 4.060086794835405e-07, "loss": 303.403, "step": 47310 }, { "epoch": 0.9100787568154937, "grad_norm": 391.93242191769303, "learning_rate": 4.042890630888863e-07, "loss": 304.2959, "step": 47320 }, { "epoch": 0.9102710811512534, "grad_norm": 372.6859946321374, "learning_rate": 4.025730209380774e-07, "loss": 296.9763, "step": 47330 }, { "epoch": 0.9104634054870133, "grad_norm": 416.9777548055224, "learning_rate": 4.0086055367031027e-07, "loss": 312.5712, "step": 47340 }, { "epoch": 0.9106557298227731, "grad_norm": 373.3456540045296, "learning_rate": 3.9915166192345365e-07, "loss": 306.0556, "step": 47350 }, { "epoch": 0.910848054158533, "grad_norm": 400.7407150167145, "learning_rate": 3.9744634633403944e-07, "loss": 303.4901, "step": 47360 }, { "epoch": 0.9110403784942928, "grad_norm": 394.11532067744247, "learning_rate": 3.957446075372706e-07, "loss": 298.7103, "step": 47370 }, { "epoch": 0.9112327028300526, "grad_norm": 404.2754907300138, "learning_rate": 3.940464461670135e-07, "loss": 312.5205, "step": 47380 }, { "epoch": 0.9114250271658124, "grad_norm": 411.70067068935276, "learning_rate": 3.923518628558087e-07, "loss": 304.3337, "step": 47390 }, { "epoch": 0.9116173515015723, "grad_norm": 385.19714376097517, "learning_rate": 3.9066085823485923e-07, "loss": 308.4251, "step": 47400 }, { "epoch": 0.9118096758373321, "grad_norm": 391.45339507284046, "learning_rate": 3.8897343293403777e-07, "loss": 314.9632, "step": 47410 }, { "epoch": 0.9120020001730919, "grad_norm": 394.73197519988577, "learning_rate": 3.872895875818794e-07, "loss": 305.2058, "step": 47420 }, { "epoch": 0.9121943245088517, "grad_norm": 419.10308006123546, "learning_rate": 3.856093228055924e-07, "loss": 302.65, "step": 47430 }, { "epoch": 0.9123866488446115, "grad_norm": 408.8303017265764, "learning_rate": 3.83932639231045e-07, "loss": 310.5522, "step": 47440 }, { "epoch": 0.9125789731803714, "grad_norm": 416.2099356486548, "learning_rate": 3.822595374827742e-07, "loss": 303.4325, "step": 47450 }, { "epoch": 0.9127712975161312, "grad_norm": 391.5307407301064, "learning_rate": 3.805900181839839e-07, "loss": 310.8053, "step": 47460 }, { "epoch": 0.9129636218518911, "grad_norm": 391.2676107436919, "learning_rate": 3.789240819565432e-07, "loss": 302.8504, "step": 47470 }, { "epoch": 0.9131559461876508, "grad_norm": 385.40043254337735, "learning_rate": 3.772617294209835e-07, "loss": 306.8966, "step": 47480 }, { "epoch": 0.9133482705234107, "grad_norm": 397.9634200225289, "learning_rate": 3.7560296119650396e-07, "loss": 307.583, "step": 47490 }, { "epoch": 0.9135405948591705, "grad_norm": 427.62903183076037, "learning_rate": 3.739477779009704e-07, "loss": 319.3813, "step": 47500 }, { "epoch": 0.9137329191949304, "grad_norm": 395.64708827553017, "learning_rate": 3.7229618015091065e-07, "loss": 311.9443, "step": 47510 }, { "epoch": 0.9139252435306902, "grad_norm": 369.4565438294151, "learning_rate": 3.7064816856151484e-07, "loss": 303.8605, "step": 47520 }, { "epoch": 0.9141175678664499, "grad_norm": 454.6926552513965, "learning_rate": 3.6900374374664425e-07, "loss": 314.5326, "step": 47530 }, { "epoch": 0.9143098922022098, "grad_norm": 391.4264203251334, "learning_rate": 3.6736290631881667e-07, "loss": 310.9554, "step": 47540 }, { "epoch": 0.9145022165379696, "grad_norm": 387.13696803554063, "learning_rate": 3.657256568892187e-07, "loss": 312.8435, "step": 47550 }, { "epoch": 0.9146945408737295, "grad_norm": 420.61641793334394, "learning_rate": 3.6409199606769806e-07, "loss": 297.8118, "step": 47560 }, { "epoch": 0.9148868652094893, "grad_norm": 386.45301945373654, "learning_rate": 3.6246192446276694e-07, "loss": 302.9943, "step": 47570 }, { "epoch": 0.9150791895452491, "grad_norm": 427.13290626235886, "learning_rate": 3.6083544268160077e-07, "loss": 310.9614, "step": 47580 }, { "epoch": 0.9152715138810089, "grad_norm": 397.3602625014417, "learning_rate": 3.5921255133003483e-07, "loss": 305.3927, "step": 47590 }, { "epoch": 0.9154638382167688, "grad_norm": 413.5541572634133, "learning_rate": 3.5759325101257013e-07, "loss": 309.5832, "step": 47600 }, { "epoch": 0.9156561625525286, "grad_norm": 417.4621322747239, "learning_rate": 3.559775423323708e-07, "loss": 305.1015, "step": 47610 }, { "epoch": 0.9158484868882885, "grad_norm": 405.6204122728192, "learning_rate": 3.54365425891261e-07, "loss": 300.9579, "step": 47620 }, { "epoch": 0.9160408112240482, "grad_norm": 391.38108303414936, "learning_rate": 3.527569022897259e-07, "loss": 316.3043, "step": 47630 }, { "epoch": 0.916233135559808, "grad_norm": 395.2986918578219, "learning_rate": 3.511519721269163e-07, "loss": 310.4572, "step": 47640 }, { "epoch": 0.9164254598955679, "grad_norm": 404.9160792814242, "learning_rate": 3.4955063600064177e-07, "loss": 300.7953, "step": 47650 }, { "epoch": 0.9166177842313277, "grad_norm": 397.29818292525414, "learning_rate": 3.479528945073707e-07, "loss": 305.4276, "step": 47660 }, { "epoch": 0.9168101085670876, "grad_norm": 410.26718070099497, "learning_rate": 3.4635874824223924e-07, "loss": 309.4091, "step": 47670 }, { "epoch": 0.9170024329028473, "grad_norm": 478.90730364080486, "learning_rate": 3.4476819779903694e-07, "loss": 305.3964, "step": 47680 }, { "epoch": 0.9171947572386072, "grad_norm": 434.3395458451395, "learning_rate": 3.43181243770222e-07, "loss": 303.0155, "step": 47690 }, { "epoch": 0.917387081574367, "grad_norm": 426.04525326116374, "learning_rate": 3.4159788674690386e-07, "loss": 302.1621, "step": 47700 }, { "epoch": 0.9175794059101269, "grad_norm": 385.0049710114499, "learning_rate": 3.4001812731886077e-07, "loss": 297.9396, "step": 47710 }, { "epoch": 0.9177717302458867, "grad_norm": 376.6403131061554, "learning_rate": 3.384419660745253e-07, "loss": 300.3971, "step": 47720 }, { "epoch": 0.9179640545816465, "grad_norm": 420.8032008218724, "learning_rate": 3.368694036009923e-07, "loss": 311.8968, "step": 47730 }, { "epoch": 0.9181563789174063, "grad_norm": 399.704339037201, "learning_rate": 3.353004404840121e-07, "loss": 296.0103, "step": 47740 }, { "epoch": 0.9183487032531661, "grad_norm": 427.13907898273413, "learning_rate": 3.3373507730800167e-07, "loss": 319.8219, "step": 47750 }, { "epoch": 0.918541027588926, "grad_norm": 434.9184272947517, "learning_rate": 3.321733146560324e-07, "loss": 306.2606, "step": 47760 }, { "epoch": 0.9187333519246857, "grad_norm": 427.44417930091885, "learning_rate": 3.306151531098323e-07, "loss": 312.6792, "step": 47770 }, { "epoch": 0.9189256762604456, "grad_norm": 416.03131125671723, "learning_rate": 3.2906059324979255e-07, "loss": 306.7176, "step": 47780 }, { "epoch": 0.9191180005962054, "grad_norm": 459.21913196223795, "learning_rate": 3.275096356549612e-07, "loss": 313.7354, "step": 47790 }, { "epoch": 0.9193103249319653, "grad_norm": 388.8252851182349, "learning_rate": 3.2596228090304496e-07, "loss": 296.0648, "step": 47800 }, { "epoch": 0.9195026492677251, "grad_norm": 389.0709254200922, "learning_rate": 3.2441852957040607e-07, "loss": 306.8889, "step": 47810 }, { "epoch": 0.919694973603485, "grad_norm": 363.8965786606022, "learning_rate": 3.228783822320669e-07, "loss": 306.5515, "step": 47820 }, { "epoch": 0.9198872979392447, "grad_norm": 373.08081727882825, "learning_rate": 3.213418394617085e-07, "loss": 307.475, "step": 47830 }, { "epoch": 0.9200796222750046, "grad_norm": 404.33700687831754, "learning_rate": 3.1980890183166633e-07, "loss": 305.8507, "step": 47840 }, { "epoch": 0.9202719466107644, "grad_norm": 399.776064460427, "learning_rate": 3.1827956991293374e-07, "loss": 307.8798, "step": 47850 }, { "epoch": 0.9204642709465242, "grad_norm": 390.0528466674345, "learning_rate": 3.167538442751639e-07, "loss": 309.2005, "step": 47860 }, { "epoch": 0.920656595282284, "grad_norm": 393.19543182039837, "learning_rate": 3.1523172548666215e-07, "loss": 304.9871, "step": 47870 }, { "epoch": 0.9208489196180438, "grad_norm": 397.9231074449624, "learning_rate": 3.1371321411439284e-07, "loss": 311.978, "step": 47880 }, { "epoch": 0.9210412439538037, "grad_norm": 399.7454165452534, "learning_rate": 3.1219831072397787e-07, "loss": 308.1011, "step": 47890 }, { "epoch": 0.9212335682895635, "grad_norm": 366.566005147306, "learning_rate": 3.1068701587969375e-07, "loss": 313.1039, "step": 47900 }, { "epoch": 0.9214258926253234, "grad_norm": 426.1127708948236, "learning_rate": 3.091793301444701e-07, "loss": 308.1743, "step": 47910 }, { "epoch": 0.9216182169610831, "grad_norm": 410.33449062514165, "learning_rate": 3.076752540798977e-07, "loss": 309.3588, "step": 47920 }, { "epoch": 0.921810541296843, "grad_norm": 395.76997745563017, "learning_rate": 3.061747882462185e-07, "loss": 310.8947, "step": 47930 }, { "epoch": 0.9220028656326028, "grad_norm": 384.98688699766063, "learning_rate": 3.0467793320233306e-07, "loss": 307.1477, "step": 47940 }, { "epoch": 0.9221951899683627, "grad_norm": 374.34915834955206, "learning_rate": 3.031846895057922e-07, "loss": 311.2257, "step": 47950 }, { "epoch": 0.9223875143041225, "grad_norm": 407.39256496700875, "learning_rate": 3.0169505771280747e-07, "loss": 306.5492, "step": 47960 }, { "epoch": 0.9225798386398822, "grad_norm": 392.1264654575426, "learning_rate": 3.002090383782408e-07, "loss": 302.2202, "step": 47970 }, { "epoch": 0.9227721629756421, "grad_norm": 421.66941636906466, "learning_rate": 2.9872663205561035e-07, "loss": 303.7292, "step": 47980 }, { "epoch": 0.9229644873114019, "grad_norm": 433.9303991403661, "learning_rate": 2.972478392970857e-07, "loss": 312.4748, "step": 47990 }, { "epoch": 0.9231568116471618, "grad_norm": 427.746277049765, "learning_rate": 2.9577266065349716e-07, "loss": 304.2794, "step": 48000 }, { "epoch": 0.9233491359829216, "grad_norm": 391.11532197073797, "learning_rate": 2.9430109667432096e-07, "loss": 305.4531, "step": 48010 }, { "epoch": 0.9235414603186815, "grad_norm": 434.51988344651664, "learning_rate": 2.9283314790769177e-07, "loss": 314.5872, "step": 48020 }, { "epoch": 0.9237337846544412, "grad_norm": 393.49782163556256, "learning_rate": 2.913688149003946e-07, "loss": 301.1704, "step": 48030 }, { "epoch": 0.9239261089902011, "grad_norm": 404.44809273378036, "learning_rate": 2.899080981978719e-07, "loss": 312.3442, "step": 48040 }, { "epoch": 0.9241184333259609, "grad_norm": 387.5157200282338, "learning_rate": 2.8845099834421517e-07, "loss": 301.7772, "step": 48050 }, { "epoch": 0.9243107576617208, "grad_norm": 411.80442132545346, "learning_rate": 2.869975158821681e-07, "loss": 304.2401, "step": 48060 }, { "epoch": 0.9245030819974805, "grad_norm": 418.6088207425718, "learning_rate": 2.8554765135313303e-07, "loss": 314.1443, "step": 48070 }, { "epoch": 0.9246954063332403, "grad_norm": 373.7045028644747, "learning_rate": 2.8410140529715803e-07, "loss": 298.7632, "step": 48080 }, { "epoch": 0.9248877306690002, "grad_norm": 419.7575110753599, "learning_rate": 2.826587782529444e-07, "loss": 296.541, "step": 48090 }, { "epoch": 0.92508005500476, "grad_norm": 415.29859382412235, "learning_rate": 2.812197707578501e-07, "loss": 311.2337, "step": 48100 }, { "epoch": 0.9252723793405199, "grad_norm": 418.7754859089795, "learning_rate": 2.797843833478797e-07, "loss": 306.2839, "step": 48110 }, { "epoch": 0.9254647036762796, "grad_norm": 416.093994573165, "learning_rate": 2.7835261655769217e-07, "loss": 300.157, "step": 48120 }, { "epoch": 0.9256570280120395, "grad_norm": 407.7696217736816, "learning_rate": 2.769244709205976e-07, "loss": 305.1632, "step": 48130 }, { "epoch": 0.9258493523477993, "grad_norm": 386.8632437854869, "learning_rate": 2.7549994696855376e-07, "loss": 299.6427, "step": 48140 }, { "epoch": 0.9260416766835592, "grad_norm": 388.3744927833977, "learning_rate": 2.740790452321751e-07, "loss": 298.3499, "step": 48150 }, { "epoch": 0.926234001019319, "grad_norm": 380.1212958145323, "learning_rate": 2.726617662407238e-07, "loss": 299.283, "step": 48160 }, { "epoch": 0.9264263253550789, "grad_norm": 404.928903643814, "learning_rate": 2.7124811052211097e-07, "loss": 311.721, "step": 48170 }, { "epoch": 0.9266186496908386, "grad_norm": 408.78550540664503, "learning_rate": 2.698380786029031e-07, "loss": 303.4837, "step": 48180 }, { "epoch": 0.9268109740265985, "grad_norm": 416.6285787389231, "learning_rate": 2.6843167100831125e-07, "loss": 306.3239, "step": 48190 }, { "epoch": 0.9270032983623583, "grad_norm": 399.5713125741659, "learning_rate": 2.6702888826219965e-07, "loss": 310.2678, "step": 48200 }, { "epoch": 0.9271956226981181, "grad_norm": 404.0405697824927, "learning_rate": 2.6562973088708146e-07, "loss": 303.4187, "step": 48210 }, { "epoch": 0.927387947033878, "grad_norm": 395.0537069860546, "learning_rate": 2.6423419940412086e-07, "loss": 301.2331, "step": 48220 }, { "epoch": 0.9275802713696377, "grad_norm": 398.3143279777804, "learning_rate": 2.628422943331288e-07, "loss": 310.1888, "step": 48230 }, { "epoch": 0.9277725957053976, "grad_norm": 382.9889266888244, "learning_rate": 2.614540161925683e-07, "loss": 312.7567, "step": 48240 }, { "epoch": 0.9279649200411574, "grad_norm": 394.12458619812935, "learning_rate": 2.6006936549954784e-07, "loss": 304.4813, "step": 48250 }, { "epoch": 0.9281572443769173, "grad_norm": 372.88057147235764, "learning_rate": 2.5868834276983057e-07, "loss": 310.6033, "step": 48260 }, { "epoch": 0.928349568712677, "grad_norm": 379.9207204739021, "learning_rate": 2.573109485178216e-07, "loss": 314.7656, "step": 48270 }, { "epoch": 0.9285418930484369, "grad_norm": 387.6038332399289, "learning_rate": 2.5593718325657713e-07, "loss": 313.9663, "step": 48280 }, { "epoch": 0.9287342173841967, "grad_norm": 432.3752052009221, "learning_rate": 2.545670474978057e-07, "loss": 298.0133, "step": 48290 }, { "epoch": 0.9289265417199566, "grad_norm": 400.46269006345216, "learning_rate": 2.532005417518568e-07, "loss": 301.4644, "step": 48300 }, { "epoch": 0.9291188660557164, "grad_norm": 396.8081183846114, "learning_rate": 2.5183766652773336e-07, "loss": 301.6045, "step": 48310 }, { "epoch": 0.9293111903914761, "grad_norm": 398.644412032208, "learning_rate": 2.504784223330814e-07, "loss": 303.1359, "step": 48320 }, { "epoch": 0.929503514727236, "grad_norm": 425.3370328785918, "learning_rate": 2.4912280967419934e-07, "loss": 309.1978, "step": 48330 }, { "epoch": 0.9296958390629958, "grad_norm": 408.3348327436733, "learning_rate": 2.477708290560299e-07, "loss": 317.0573, "step": 48340 }, { "epoch": 0.9298881633987557, "grad_norm": 390.7232295085053, "learning_rate": 2.464224809821614e-07, "loss": 317.6602, "step": 48350 }, { "epoch": 0.9300804877345155, "grad_norm": 400.2972888701405, "learning_rate": 2.450777659548353e-07, "loss": 310.7288, "step": 48360 }, { "epoch": 0.9302728120702753, "grad_norm": 417.79385169680745, "learning_rate": 2.4373668447493225e-07, "loss": 311.584, "step": 48370 }, { "epoch": 0.9304651364060351, "grad_norm": 359.57601381210014, "learning_rate": 2.4239923704198476e-07, "loss": 303.2301, "step": 48380 }, { "epoch": 0.930657460741795, "grad_norm": 385.84907036685, "learning_rate": 2.410654241541688e-07, "loss": 302.2753, "step": 48390 }, { "epoch": 0.9308497850775548, "grad_norm": 398.40108298436763, "learning_rate": 2.3973524630830804e-07, "loss": 306.8883, "step": 48400 }, { "epoch": 0.9310421094133147, "grad_norm": 401.04926996355346, "learning_rate": 2.3840870399987283e-07, "loss": 311.6254, "step": 48410 }, { "epoch": 0.9312344337490744, "grad_norm": 375.93244778795986, "learning_rate": 2.370857977229768e-07, "loss": 311.1448, "step": 48420 }, { "epoch": 0.9314267580848342, "grad_norm": 384.1908223565569, "learning_rate": 2.3576652797038247e-07, "loss": 309.7895, "step": 48430 }, { "epoch": 0.9316190824205941, "grad_norm": 392.4629389107022, "learning_rate": 2.344508952334934e-07, "loss": 304.0092, "step": 48440 }, { "epoch": 0.9318114067563539, "grad_norm": 391.3649359115568, "learning_rate": 2.3313890000236316e-07, "loss": 299.921, "step": 48450 }, { "epoch": 0.9320037310921138, "grad_norm": 388.21067740670526, "learning_rate": 2.3183054276568752e-07, "loss": 305.5152, "step": 48460 }, { "epoch": 0.9321960554278735, "grad_norm": 393.65427404561126, "learning_rate": 2.305258240108077e-07, "loss": 301.6203, "step": 48470 }, { "epoch": 0.9323883797636334, "grad_norm": 405.66105792409974, "learning_rate": 2.2922474422371166e-07, "loss": 306.842, "step": 48480 }, { "epoch": 0.9325807040993932, "grad_norm": 410.0883644699099, "learning_rate": 2.279273038890273e-07, "loss": 310.1019, "step": 48490 }, { "epoch": 0.9327730284351531, "grad_norm": 406.1702012289618, "learning_rate": 2.2663350349003134e-07, "loss": 306.3602, "step": 48500 }, { "epoch": 0.9329653527709129, "grad_norm": 386.66094171385373, "learning_rate": 2.2534334350864274e-07, "loss": 298.1951, "step": 48510 }, { "epoch": 0.9331576771066727, "grad_norm": 709.1092518689194, "learning_rate": 2.2405682442542487e-07, "loss": 308.5916, "step": 48520 }, { "epoch": 0.9333500014424325, "grad_norm": 372.2512476546447, "learning_rate": 2.2277394671958442e-07, "loss": 307.1227, "step": 48530 }, { "epoch": 0.9335423257781923, "grad_norm": 397.79894090076215, "learning_rate": 2.2149471086897355e-07, "loss": 306.5278, "step": 48540 }, { "epoch": 0.9337346501139522, "grad_norm": 459.51028813612584, "learning_rate": 2.202191173500845e-07, "loss": 312.0135, "step": 48550 }, { "epoch": 0.933926974449712, "grad_norm": 417.684765613212, "learning_rate": 2.1894716663805716e-07, "loss": 310.5669, "step": 48560 }, { "epoch": 0.9341192987854718, "grad_norm": 401.319909761339, "learning_rate": 2.176788592066692e-07, "loss": 319.8216, "step": 48570 }, { "epoch": 0.9343116231212316, "grad_norm": 412.5158792404211, "learning_rate": 2.164141955283472e-07, "loss": 300.1668, "step": 48580 }, { "epoch": 0.9345039474569915, "grad_norm": 384.07131377349486, "learning_rate": 2.1515317607415654e-07, "loss": 305.6861, "step": 48590 }, { "epoch": 0.9346962717927513, "grad_norm": 395.1357619209503, "learning_rate": 2.1389580131380373e-07, "loss": 326.2871, "step": 48600 }, { "epoch": 0.9348885961285112, "grad_norm": 424.3619104491749, "learning_rate": 2.126420717156441e-07, "loss": 320.5873, "step": 48610 }, { "epoch": 0.9350809204642709, "grad_norm": 416.9206505964984, "learning_rate": 2.113919877466686e-07, "loss": 301.9237, "step": 48620 }, { "epoch": 0.9352732448000308, "grad_norm": 451.18930335852923, "learning_rate": 2.1014554987251356e-07, "loss": 314.8972, "step": 48630 }, { "epoch": 0.9354655691357906, "grad_norm": 389.0993931771178, "learning_rate": 2.0890275855745546e-07, "loss": 304.6365, "step": 48640 }, { "epoch": 0.9356578934715504, "grad_norm": 394.0023278736881, "learning_rate": 2.0766361426441505e-07, "loss": 302.1834, "step": 48650 }, { "epoch": 0.9358502178073103, "grad_norm": 374.53596992770554, "learning_rate": 2.0642811745495206e-07, "loss": 314.5554, "step": 48660 }, { "epoch": 0.93604254214307, "grad_norm": 418.4275970323863, "learning_rate": 2.0519626858926944e-07, "loss": 306.7219, "step": 48670 }, { "epoch": 0.9362348664788299, "grad_norm": 382.5377869789409, "learning_rate": 2.0396806812621018e-07, "loss": 305.8214, "step": 48680 }, { "epoch": 0.9364271908145897, "grad_norm": 411.2371343496123, "learning_rate": 2.0274351652325942e-07, "loss": 315.2089, "step": 48690 }, { "epoch": 0.9366195151503496, "grad_norm": 403.2280829162947, "learning_rate": 2.0152261423654118e-07, "loss": 312.0557, "step": 48700 }, { "epoch": 0.9368118394861094, "grad_norm": 380.12067956481604, "learning_rate": 2.003053617208217e-07, "loss": 303.6725, "step": 48710 }, { "epoch": 0.9370041638218692, "grad_norm": 412.14216115147207, "learning_rate": 1.9909175942950832e-07, "loss": 318.0202, "step": 48720 }, { "epoch": 0.937196488157629, "grad_norm": 447.68326884775234, "learning_rate": 1.9788180781464716e-07, "loss": 310.3637, "step": 48730 }, { "epoch": 0.9373888124933889, "grad_norm": 409.3970588100632, "learning_rate": 1.9667550732692554e-07, "loss": 309.9213, "step": 48740 }, { "epoch": 0.9375811368291487, "grad_norm": 370.2104350869597, "learning_rate": 1.9547285841566843e-07, "loss": 300.6118, "step": 48750 }, { "epoch": 0.9377734611649085, "grad_norm": 398.5902324477153, "learning_rate": 1.9427386152884643e-07, "loss": 306.0105, "step": 48760 }, { "epoch": 0.9379657855006683, "grad_norm": 416.05418949302583, "learning_rate": 1.9307851711306336e-07, "loss": 308.0399, "step": 48770 }, { "epoch": 0.9381581098364281, "grad_norm": 390.8036209363838, "learning_rate": 1.918868256135653e-07, "loss": 313.6243, "step": 48780 }, { "epoch": 0.938350434172188, "grad_norm": 381.1551355579358, "learning_rate": 1.9069878747423943e-07, "loss": 306.6806, "step": 48790 }, { "epoch": 0.9385427585079478, "grad_norm": 438.6893297102449, "learning_rate": 1.8951440313760838e-07, "loss": 308.8067, "step": 48800 }, { "epoch": 0.9387350828437077, "grad_norm": 384.22215021989064, "learning_rate": 1.8833367304483708e-07, "loss": 310.653, "step": 48810 }, { "epoch": 0.9389274071794674, "grad_norm": 397.3750748611407, "learning_rate": 1.8715659763572703e-07, "loss": 307.2456, "step": 48820 }, { "epoch": 0.9391197315152273, "grad_norm": 396.56710352266924, "learning_rate": 1.8598317734872086e-07, "loss": 305.1906, "step": 48830 }, { "epoch": 0.9393120558509871, "grad_norm": 402.34455709251995, "learning_rate": 1.8481341262089668e-07, "loss": 309.2093, "step": 48840 }, { "epoch": 0.939504380186747, "grad_norm": 436.5600980760861, "learning_rate": 1.836473038879727e-07, "loss": 303.6096, "step": 48850 }, { "epoch": 0.9396967045225068, "grad_norm": 441.5939688861406, "learning_rate": 1.8248485158430696e-07, "loss": 305.9887, "step": 48860 }, { "epoch": 0.9398890288582665, "grad_norm": 416.38813922428216, "learning_rate": 1.813260561428909e-07, "loss": 305.2926, "step": 48870 }, { "epoch": 0.9400813531940264, "grad_norm": 412.8071228424195, "learning_rate": 1.8017091799535924e-07, "loss": 299.6569, "step": 48880 }, { "epoch": 0.9402736775297862, "grad_norm": 382.5826220256806, "learning_rate": 1.7901943757198003e-07, "loss": 300.0388, "step": 48890 }, { "epoch": 0.9404660018655461, "grad_norm": 384.9660267445075, "learning_rate": 1.7787161530166242e-07, "loss": 305.3296, "step": 48900 }, { "epoch": 0.9406583262013058, "grad_norm": 414.76878037652773, "learning_rate": 1.7672745161194992e-07, "loss": 309.6388, "step": 48910 }, { "epoch": 0.9408506505370657, "grad_norm": 394.44532063860356, "learning_rate": 1.75586946929025e-07, "loss": 305.7398, "step": 48920 }, { "epoch": 0.9410429748728255, "grad_norm": 394.61710030903174, "learning_rate": 1.7445010167770672e-07, "loss": 311.1875, "step": 48930 }, { "epoch": 0.9412352992085854, "grad_norm": 409.11691607984994, "learning_rate": 1.7331691628145076e-07, "loss": 310.648, "step": 48940 }, { "epoch": 0.9414276235443452, "grad_norm": 390.60901755971804, "learning_rate": 1.7218739116235061e-07, "loss": 306.5787, "step": 48950 }, { "epoch": 0.9416199478801051, "grad_norm": 388.3207986203266, "learning_rate": 1.710615267411353e-07, "loss": 299.6847, "step": 48960 }, { "epoch": 0.9418122722158648, "grad_norm": 430.40217576926017, "learning_rate": 1.6993932343717158e-07, "loss": 307.952, "step": 48970 }, { "epoch": 0.9420045965516246, "grad_norm": 389.1449077798511, "learning_rate": 1.6882078166846173e-07, "loss": 301.6381, "step": 48980 }, { "epoch": 0.9421969208873845, "grad_norm": 382.38945738082765, "learning_rate": 1.677059018516425e-07, "loss": 299.7711, "step": 48990 }, { "epoch": 0.9423892452231443, "grad_norm": 414.03371972323305, "learning_rate": 1.6659468440198835e-07, "loss": 310.2514, "step": 49000 }, { "epoch": 0.9425815695589042, "grad_norm": 367.14080485365145, "learning_rate": 1.6548712973341152e-07, "loss": 306.2677, "step": 49010 }, { "epoch": 0.9427738938946639, "grad_norm": 387.68516701771307, "learning_rate": 1.6438323825845647e-07, "loss": 301.7731, "step": 49020 }, { "epoch": 0.9429662182304238, "grad_norm": 396.13741559680955, "learning_rate": 1.6328301038830429e-07, "loss": 308.4436, "step": 49030 }, { "epoch": 0.9431585425661836, "grad_norm": 421.965799381177, "learning_rate": 1.621864465327716e-07, "loss": 297.3285, "step": 49040 }, { "epoch": 0.9433508669019435, "grad_norm": 421.77266319393664, "learning_rate": 1.6109354710031167e-07, "loss": 313.5459, "step": 49050 }, { "epoch": 0.9435431912377032, "grad_norm": 378.15834874652194, "learning_rate": 1.6000431249800995e-07, "loss": 293.139, "step": 49060 }, { "epoch": 0.9437355155734631, "grad_norm": 373.7622191531093, "learning_rate": 1.5891874313158862e-07, "loss": 294.0775, "step": 49070 }, { "epoch": 0.9439278399092229, "grad_norm": 403.74011461644466, "learning_rate": 1.5783683940540528e-07, "loss": 301.0671, "step": 49080 }, { "epoch": 0.9441201642449827, "grad_norm": 376.2667831523362, "learning_rate": 1.5675860172244982e-07, "loss": 299.9229, "step": 49090 }, { "epoch": 0.9443124885807426, "grad_norm": 401.3229630982269, "learning_rate": 1.5568403048434877e-07, "loss": 300.5737, "step": 49100 }, { "epoch": 0.9445048129165023, "grad_norm": 373.0953195404252, "learning_rate": 1.5461312609136192e-07, "loss": 302.8909, "step": 49110 }, { "epoch": 0.9446971372522622, "grad_norm": 405.59863465885144, "learning_rate": 1.5354588894238465e-07, "loss": 300.2997, "step": 49120 }, { "epoch": 0.944889461588022, "grad_norm": 379.78001345837527, "learning_rate": 1.524823194349434e-07, "loss": 308.7511, "step": 49130 }, { "epoch": 0.9450817859237819, "grad_norm": 398.4280543255053, "learning_rate": 1.514224179652013e-07, "loss": 311.1649, "step": 49140 }, { "epoch": 0.9452741102595417, "grad_norm": 422.2010323876551, "learning_rate": 1.5036618492795473e-07, "loss": 308.9265, "step": 49150 }, { "epoch": 0.9454664345953016, "grad_norm": 385.51112533019807, "learning_rate": 1.4931362071663125e-07, "loss": 305.2235, "step": 49160 }, { "epoch": 0.9456587589310613, "grad_norm": 375.55332556015094, "learning_rate": 1.4826472572329498e-07, "loss": 304.5089, "step": 49170 }, { "epoch": 0.9458510832668212, "grad_norm": 413.1389251388203, "learning_rate": 1.4721950033864118e-07, "loss": 298.3317, "step": 49180 }, { "epoch": 0.946043407602581, "grad_norm": 388.9181132545055, "learning_rate": 1.4617794495199956e-07, "loss": 308.9312, "step": 49190 }, { "epoch": 0.9462357319383408, "grad_norm": 425.9297379286984, "learning_rate": 1.4514005995133197e-07, "loss": 307.9106, "step": 49200 }, { "epoch": 0.9464280562741006, "grad_norm": 397.88801482406404, "learning_rate": 1.441058457232336e-07, "loss": 301.4593, "step": 49210 }, { "epoch": 0.9466203806098604, "grad_norm": 402.3691909098564, "learning_rate": 1.43075302652933e-07, "loss": 315.4089, "step": 49220 }, { "epoch": 0.9468127049456203, "grad_norm": 406.7789718816777, "learning_rate": 1.4204843112428867e-07, "loss": 306.0235, "step": 49230 }, { "epoch": 0.9470050292813801, "grad_norm": 392.7534964178116, "learning_rate": 1.4102523151979574e-07, "loss": 306.0012, "step": 49240 }, { "epoch": 0.94719735361714, "grad_norm": 427.2808460701671, "learning_rate": 1.40005704220576e-07, "loss": 310.0522, "step": 49250 }, { "epoch": 0.9473896779528997, "grad_norm": 392.41231814618135, "learning_rate": 1.3898984960638907e-07, "loss": 308.7167, "step": 49260 }, { "epoch": 0.9475820022886596, "grad_norm": 430.16492428588924, "learning_rate": 1.3797766805562328e-07, "loss": 312.7875, "step": 49270 }, { "epoch": 0.9477743266244194, "grad_norm": 449.19329225948957, "learning_rate": 1.3696915994530048e-07, "loss": 306.6118, "step": 49280 }, { "epoch": 0.9479666509601793, "grad_norm": 384.21682787348857, "learning_rate": 1.359643256510701e-07, "loss": 301.349, "step": 49290 }, { "epoch": 0.9481589752959391, "grad_norm": 384.98434269197816, "learning_rate": 1.3496316554722056e-07, "loss": 303.1915, "step": 49300 }, { "epoch": 0.948351299631699, "grad_norm": 417.5960335747955, "learning_rate": 1.3396568000666467e-07, "loss": 307.0324, "step": 49310 }, { "epoch": 0.9485436239674587, "grad_norm": 391.2034856262081, "learning_rate": 1.3297186940094853e-07, "loss": 311.3499, "step": 49320 }, { "epoch": 0.9487359483032185, "grad_norm": 434.08483463007167, "learning_rate": 1.3198173410025383e-07, "loss": 303.7476, "step": 49330 }, { "epoch": 0.9489282726389784, "grad_norm": 406.11912099371114, "learning_rate": 1.3099527447338668e-07, "loss": 310.9488, "step": 49340 }, { "epoch": 0.9491205969747382, "grad_norm": 399.76251666063945, "learning_rate": 1.3001249088778756e-07, "loss": 310.5564, "step": 49350 }, { "epoch": 0.949312921310498, "grad_norm": 447.8622510640455, "learning_rate": 1.29033383709527e-07, "loss": 310.721, "step": 49360 }, { "epoch": 0.9495052456462578, "grad_norm": 396.2974496831271, "learning_rate": 1.2805795330330774e-07, "loss": 311.9839, "step": 49370 }, { "epoch": 0.9496975699820177, "grad_norm": 385.20160765765115, "learning_rate": 1.2708620003245908e-07, "loss": 314.5979, "step": 49380 }, { "epoch": 0.9498898943177775, "grad_norm": 387.96535125530716, "learning_rate": 1.2611812425894487e-07, "loss": 296.1735, "step": 49390 }, { "epoch": 0.9500822186535374, "grad_norm": 398.5841305986151, "learning_rate": 1.2515372634335666e-07, "loss": 300.194, "step": 49400 }, { "epoch": 0.9502745429892971, "grad_norm": 403.5354337373987, "learning_rate": 1.2419300664491707e-07, "loss": 322.5583, "step": 49410 }, { "epoch": 0.950466867325057, "grad_norm": 361.2817104121931, "learning_rate": 1.2323596552147876e-07, "loss": 307.058, "step": 49420 }, { "epoch": 0.9506591916608168, "grad_norm": 400.2264206297339, "learning_rate": 1.2228260332952213e-07, "loss": 307.1072, "step": 49430 }, { "epoch": 0.9508515159965766, "grad_norm": 428.97362552393, "learning_rate": 1.213329204241609e-07, "loss": 301.4423, "step": 49440 }, { "epoch": 0.9510438403323365, "grad_norm": 404.18163144765833, "learning_rate": 1.203869171591343e-07, "loss": 300.4965, "step": 49450 }, { "epoch": 0.9512361646680962, "grad_norm": 430.2789604869439, "learning_rate": 1.1944459388681496e-07, "loss": 316.7147, "step": 49460 }, { "epoch": 0.9514284890038561, "grad_norm": 421.6363352017169, "learning_rate": 1.1850595095820095e-07, "loss": 310.342, "step": 49470 }, { "epoch": 0.9516208133396159, "grad_norm": 426.77375756749086, "learning_rate": 1.175709887229215e-07, "loss": 300.3431, "step": 49480 }, { "epoch": 0.9518131376753758, "grad_norm": 406.0751528898561, "learning_rate": 1.1663970752923581e-07, "loss": 305.0877, "step": 49490 }, { "epoch": 0.9520054620111356, "grad_norm": 367.432229710688, "learning_rate": 1.1571210772402975e-07, "loss": 304.2503, "step": 49500 }, { "epoch": 0.9521977863468954, "grad_norm": 403.0718464674492, "learning_rate": 1.1478818965281912e-07, "loss": 303.6261, "step": 49510 }, { "epoch": 0.9523901106826552, "grad_norm": 420.67460824295307, "learning_rate": 1.1386795365974757e-07, "loss": 312.8814, "step": 49520 }, { "epoch": 0.9525824350184151, "grad_norm": 423.5096050607596, "learning_rate": 1.1295140008758864e-07, "loss": 305.2243, "step": 49530 }, { "epoch": 0.9527747593541749, "grad_norm": 425.2835309463311, "learning_rate": 1.1203852927774372e-07, "loss": 308.5336, "step": 49540 }, { "epoch": 0.9529670836899347, "grad_norm": 423.538469079853, "learning_rate": 1.111293415702408e-07, "loss": 304.1133, "step": 49550 }, { "epoch": 0.9531594080256945, "grad_norm": 402.5345550285309, "learning_rate": 1.1022383730373897e-07, "loss": 311.5206, "step": 49560 }, { "epoch": 0.9533517323614543, "grad_norm": 381.8244980876525, "learning_rate": 1.093220168155218e-07, "loss": 305.0832, "step": 49570 }, { "epoch": 0.9535440566972142, "grad_norm": 404.30082496260746, "learning_rate": 1.0842388044150387e-07, "loss": 307.0992, "step": 49580 }, { "epoch": 0.953736381032974, "grad_norm": 414.38757151108206, "learning_rate": 1.0752942851622649e-07, "loss": 304.3737, "step": 49590 }, { "epoch": 0.9539287053687339, "grad_norm": 401.888699612354, "learning_rate": 1.066386613728565e-07, "loss": 303.2939, "step": 49600 }, { "epoch": 0.9541210297044936, "grad_norm": 410.2954464534775, "learning_rate": 1.0575157934319069e-07, "loss": 312.0517, "step": 49610 }, { "epoch": 0.9543133540402535, "grad_norm": 400.4149457331645, "learning_rate": 1.0486818275765364e-07, "loss": 305.2278, "step": 49620 }, { "epoch": 0.9545056783760133, "grad_norm": 404.71467144260447, "learning_rate": 1.0398847194529437e-07, "loss": 309.8971, "step": 49630 }, { "epoch": 0.9546980027117732, "grad_norm": 413.4985580261763, "learning_rate": 1.0311244723379188e-07, "loss": 305.9196, "step": 49640 }, { "epoch": 0.954890327047533, "grad_norm": 395.9696575498284, "learning_rate": 1.0224010894944958e-07, "loss": 313.5794, "step": 49650 }, { "epoch": 0.9550826513832927, "grad_norm": 382.596699608233, "learning_rate": 1.0137145741719867e-07, "loss": 322.5343, "step": 49660 }, { "epoch": 0.9552749757190526, "grad_norm": 419.6672520534782, "learning_rate": 1.0050649296060033e-07, "loss": 323.1996, "step": 49670 }, { "epoch": 0.9554673000548124, "grad_norm": 426.67888625184986, "learning_rate": 9.964521590183684e-08, "loss": 312.8907, "step": 49680 }, { "epoch": 0.9556596243905723, "grad_norm": 419.05212244028445, "learning_rate": 9.878762656172159e-08, "loss": 308.5332, "step": 49690 }, { "epoch": 0.9558519487263321, "grad_norm": 402.4653081142015, "learning_rate": 9.793372525969125e-08, "loss": 318.4444, "step": 49700 }, { "epoch": 0.9560442730620919, "grad_norm": 424.62233924339193, "learning_rate": 9.70835123138103e-08, "loss": 309.7108, "step": 49710 }, { "epoch": 0.9562365973978517, "grad_norm": 413.8927788632199, "learning_rate": 9.623698804076875e-08, "loss": 310.468, "step": 49720 }, { "epoch": 0.9564289217336116, "grad_norm": 415.85893275566184, "learning_rate": 9.539415275588326e-08, "loss": 318.2136, "step": 49730 }, { "epoch": 0.9566212460693714, "grad_norm": 393.99944606521944, "learning_rate": 9.455500677309603e-08, "loss": 328.1828, "step": 49740 }, { "epoch": 0.9568135704051313, "grad_norm": 408.8276224792121, "learning_rate": 9.371955040497371e-08, "loss": 304.5924, "step": 49750 }, { "epoch": 0.957005894740891, "grad_norm": 400.3012703666059, "learning_rate": 9.288778396271292e-08, "loss": 316.8154, "step": 49760 }, { "epoch": 0.9571982190766508, "grad_norm": 417.31839820868305, "learning_rate": 9.205970775613027e-08, "loss": 307.1784, "step": 49770 }, { "epoch": 0.9573905434124107, "grad_norm": 477.06723231406625, "learning_rate": 9.123532209367237e-08, "loss": 316.4184, "step": 49780 }, { "epoch": 0.9575828677481705, "grad_norm": 372.45949209567954, "learning_rate": 9.04146272824069e-08, "loss": 294.5549, "step": 49790 }, { "epoch": 0.9577751920839304, "grad_norm": 375.24122129543525, "learning_rate": 8.959762362803159e-08, "loss": 303.8367, "step": 49800 }, { "epoch": 0.9579675164196901, "grad_norm": 377.53739854846873, "learning_rate": 8.87843114348652e-08, "loss": 304.0116, "step": 49810 }, { "epoch": 0.95815984075545, "grad_norm": 406.4118666624434, "learning_rate": 8.797469100585432e-08, "loss": 312.4471, "step": 49820 }, { "epoch": 0.9583521650912098, "grad_norm": 459.15009359201656, "learning_rate": 8.716876264256768e-08, "loss": 321.9091, "step": 49830 }, { "epoch": 0.9585444894269697, "grad_norm": 408.4950976532906, "learning_rate": 8.636652664520184e-08, "loss": 308.0166, "step": 49840 }, { "epoch": 0.9587368137627295, "grad_norm": 395.5132455886173, "learning_rate": 8.556798331257555e-08, "loss": 310.2185, "step": 49850 }, { "epoch": 0.9589291380984893, "grad_norm": 415.90346541180594, "learning_rate": 8.477313294213307e-08, "loss": 307.0948, "step": 49860 }, { "epoch": 0.9591214624342491, "grad_norm": 391.7613206470682, "learning_rate": 8.398197582994316e-08, "loss": 297.7784, "step": 49870 }, { "epoch": 0.9593137867700089, "grad_norm": 397.5111267836956, "learning_rate": 8.319451227069897e-08, "loss": 307.3765, "step": 49880 }, { "epoch": 0.9595061111057688, "grad_norm": 385.8547203788916, "learning_rate": 8.241074255771808e-08, "loss": 301.4813, "step": 49890 }, { "epoch": 0.9596984354415286, "grad_norm": 382.45215401884656, "learning_rate": 8.163066698294031e-08, "loss": 306.9535, "step": 49900 }, { "epoch": 0.9598907597772884, "grad_norm": 388.8994936404778, "learning_rate": 8.085428583693211e-08, "loss": 304.2015, "step": 49910 }, { "epoch": 0.9600830841130482, "grad_norm": 392.0252979421027, "learning_rate": 8.008159940888216e-08, "loss": 305.9475, "step": 49920 }, { "epoch": 0.9602754084488081, "grad_norm": 392.7824997345464, "learning_rate": 7.931260798660356e-08, "loss": 310.9074, "step": 49930 }, { "epoch": 0.9604677327845679, "grad_norm": 377.2899378856775, "learning_rate": 7.854731185653386e-08, "loss": 300.7823, "step": 49940 }, { "epoch": 0.9606600571203278, "grad_norm": 399.7146138685817, "learning_rate": 7.778571130373059e-08, "loss": 305.756, "step": 49950 }, { "epoch": 0.9608523814560875, "grad_norm": 393.1386161906627, "learning_rate": 7.702780661188014e-08, "loss": 310.4812, "step": 49960 }, { "epoch": 0.9610447057918474, "grad_norm": 411.01353957222665, "learning_rate": 7.627359806328782e-08, "loss": 308.5993, "step": 49970 }, { "epoch": 0.9612370301276072, "grad_norm": 404.54551564524786, "learning_rate": 7.552308593888558e-08, "loss": 308.0479, "step": 49980 }, { "epoch": 0.961429354463367, "grad_norm": 397.9958138875089, "learning_rate": 7.477627051822534e-08, "loss": 312.099, "step": 49990 }, { "epoch": 0.9616216787991269, "grad_norm": 413.00497974495903, "learning_rate": 7.403315207948236e-08, "loss": 319.8621, "step": 50000 }, { "epoch": 0.9618140031348866, "grad_norm": 422.56634417496, "learning_rate": 7.329373089945968e-08, "loss": 299.7168, "step": 50010 }, { "epoch": 0.9620063274706465, "grad_norm": 394.99070959861695, "learning_rate": 7.255800725357586e-08, "loss": 310.4783, "step": 50020 }, { "epoch": 0.9621986518064063, "grad_norm": 407.5660028983763, "learning_rate": 7.182598141587838e-08, "loss": 309.7587, "step": 50030 }, { "epoch": 0.9623909761421662, "grad_norm": 395.4002094215585, "learning_rate": 7.109765365903243e-08, "loss": 312.1669, "step": 50040 }, { "epoch": 0.962583300477926, "grad_norm": 393.15639269285674, "learning_rate": 7.03730242543299e-08, "loss": 309.2928, "step": 50050 }, { "epoch": 0.9627756248136858, "grad_norm": 376.6052912892147, "learning_rate": 6.965209347168156e-08, "loss": 302.5852, "step": 50060 }, { "epoch": 0.9629679491494456, "grad_norm": 393.38766705849474, "learning_rate": 6.89348615796237e-08, "loss": 300.5664, "step": 50070 }, { "epoch": 0.9631602734852055, "grad_norm": 386.8942351982451, "learning_rate": 6.822132884531373e-08, "loss": 304.1994, "step": 50080 }, { "epoch": 0.9633525978209653, "grad_norm": 441.2890245819458, "learning_rate": 6.751149553452907e-08, "loss": 313.6467, "step": 50090 }, { "epoch": 0.963544922156725, "grad_norm": 375.4967465461503, "learning_rate": 6.680536191167263e-08, "loss": 302.0366, "step": 50100 }, { "epoch": 0.9637372464924849, "grad_norm": 390.45468352443703, "learning_rate": 6.610292823976628e-08, "loss": 301.4835, "step": 50110 }, { "epoch": 0.9639295708282447, "grad_norm": 385.14826788375, "learning_rate": 6.540419478045623e-08, "loss": 311.1793, "step": 50120 }, { "epoch": 0.9641218951640046, "grad_norm": 406.9672248852778, "learning_rate": 6.470916179400765e-08, "loss": 300.1742, "step": 50130 }, { "epoch": 0.9643142194997644, "grad_norm": 407.471481756683, "learning_rate": 6.401782953931013e-08, "loss": 302.3209, "step": 50140 }, { "epoch": 0.9645065438355243, "grad_norm": 415.3177685416555, "learning_rate": 6.33301982738721e-08, "loss": 304.1317, "step": 50150 }, { "epoch": 0.964698868171284, "grad_norm": 381.7151504954698, "learning_rate": 6.264626825382647e-08, "loss": 314.0569, "step": 50160 }, { "epoch": 0.9648911925070439, "grad_norm": 391.9590881332063, "learning_rate": 6.196603973392501e-08, "loss": 311.9383, "step": 50170 }, { "epoch": 0.9650835168428037, "grad_norm": 386.02426289811626, "learning_rate": 6.12895129675406e-08, "loss": 299.6151, "step": 50180 }, { "epoch": 0.9652758411785636, "grad_norm": 419.27527632559145, "learning_rate": 6.061668820667055e-08, "loss": 311.5686, "step": 50190 }, { "epoch": 0.9654681655143234, "grad_norm": 400.5806593390106, "learning_rate": 5.994756570192994e-08, "loss": 315.774, "step": 50200 }, { "epoch": 0.9656604898500831, "grad_norm": 389.998173460643, "learning_rate": 5.928214570255497e-08, "loss": 295.7583, "step": 50210 }, { "epoch": 0.965852814185843, "grad_norm": 410.2645306611366, "learning_rate": 5.862042845640403e-08, "loss": 303.524, "step": 50220 }, { "epoch": 0.9660451385216028, "grad_norm": 387.06745229011955, "learning_rate": 5.796241420995663e-08, "loss": 295.1914, "step": 50230 }, { "epoch": 0.9662374628573627, "grad_norm": 380.5509252063031, "learning_rate": 5.730810320831226e-08, "loss": 306.6095, "step": 50240 }, { "epoch": 0.9664297871931224, "grad_norm": 386.61970623577, "learning_rate": 5.66574956951893e-08, "loss": 309.1897, "step": 50250 }, { "epoch": 0.9666221115288823, "grad_norm": 422.5208012942953, "learning_rate": 5.6010591912930565e-08, "loss": 315.5407, "step": 50260 }, { "epoch": 0.9668144358646421, "grad_norm": 398.18764631578574, "learning_rate": 5.5367392102495534e-08, "loss": 307.9312, "step": 50270 }, { "epoch": 0.967006760200402, "grad_norm": 407.6783027921625, "learning_rate": 5.472789650346588e-08, "loss": 311.0954, "step": 50280 }, { "epoch": 0.9671990845361618, "grad_norm": 398.9438511540597, "learning_rate": 5.4092105354043304e-08, "loss": 309.1076, "step": 50290 }, { "epoch": 0.9673914088719217, "grad_norm": 394.09759904739724, "learning_rate": 5.346001889104946e-08, "loss": 297.5935, "step": 50300 }, { "epoch": 0.9675837332076814, "grad_norm": 430.35533188076937, "learning_rate": 5.2831637349926026e-08, "loss": 304.3507, "step": 50310 }, { "epoch": 0.9677760575434413, "grad_norm": 401.7287500696597, "learning_rate": 5.220696096473465e-08, "loss": 302.7906, "step": 50320 }, { "epoch": 0.9679683818792011, "grad_norm": 448.3982342545897, "learning_rate": 5.1585989968157e-08, "loss": 322.4165, "step": 50330 }, { "epoch": 0.9681607062149609, "grad_norm": 364.5716205433589, "learning_rate": 5.096872459149471e-08, "loss": 304.3308, "step": 50340 }, { "epoch": 0.9683530305507208, "grad_norm": 415.3120502310781, "learning_rate": 5.035516506466942e-08, "loss": 319.0656, "step": 50350 }, { "epoch": 0.9685453548864805, "grad_norm": 469.2289410725958, "learning_rate": 4.9745311616220535e-08, "loss": 320.0059, "step": 50360 }, { "epoch": 0.9687376792222404, "grad_norm": 375.84391029795586, "learning_rate": 4.913916447330858e-08, "loss": 306.8135, "step": 50370 }, { "epoch": 0.9689300035580002, "grad_norm": 394.38854920005514, "learning_rate": 4.853672386171515e-08, "loss": 312.7281, "step": 50380 }, { "epoch": 0.9691223278937601, "grad_norm": 393.5334830174472, "learning_rate": 4.793799000583743e-08, "loss": 304.467, "step": 50390 }, { "epoch": 0.9693146522295198, "grad_norm": 424.7752850602039, "learning_rate": 4.73429631286948e-08, "loss": 304.6041, "step": 50400 }, { "epoch": 0.9695069765652797, "grad_norm": 404.8066271402032, "learning_rate": 4.675164345192551e-08, "loss": 295.7238, "step": 50410 }, { "epoch": 0.9696993009010395, "grad_norm": 409.1511846227577, "learning_rate": 4.6164031195785606e-08, "loss": 307.2115, "step": 50420 }, { "epoch": 0.9698916252367994, "grad_norm": 374.41955508403254, "learning_rate": 4.558012657915112e-08, "loss": 308.1001, "step": 50430 }, { "epoch": 0.9700839495725592, "grad_norm": 396.90723459075224, "learning_rate": 4.4999929819515844e-08, "loss": 314.8178, "step": 50440 }, { "epoch": 0.9702762739083189, "grad_norm": 422.1570173600825, "learning_rate": 4.442344113299579e-08, "loss": 307.5798, "step": 50450 }, { "epoch": 0.9704685982440788, "grad_norm": 397.710503498998, "learning_rate": 4.385066073432143e-08, "loss": 299.9531, "step": 50460 }, { "epoch": 0.9706609225798386, "grad_norm": 429.1507150623487, "learning_rate": 4.3281588836844303e-08, "loss": 306.3626, "step": 50470 }, { "epoch": 0.9708532469155985, "grad_norm": 390.79850266021, "learning_rate": 4.271622565253486e-08, "loss": 298.7711, "step": 50480 }, { "epoch": 0.9710455712513583, "grad_norm": 399.88431374344094, "learning_rate": 4.2154571391982425e-08, "loss": 304.1253, "step": 50490 }, { "epoch": 0.9712378955871181, "grad_norm": 410.7327727315363, "learning_rate": 4.159662626439187e-08, "loss": 310.6277, "step": 50500 }, { "epoch": 0.9714302199228779, "grad_norm": 420.84145442808045, "learning_rate": 4.104239047758918e-08, "loss": 311.6168, "step": 50510 }, { "epoch": 0.9716225442586378, "grad_norm": 393.2143837689044, "learning_rate": 4.049186423801921e-08, "loss": 297.8025, "step": 50520 }, { "epoch": 0.9718148685943976, "grad_norm": 415.3830146922677, "learning_rate": 3.994504775074237e-08, "loss": 299.5151, "step": 50530 }, { "epoch": 0.9720071929301575, "grad_norm": 406.4663555662246, "learning_rate": 3.9401941219440186e-08, "loss": 312.5655, "step": 50540 }, { "epoch": 0.9721995172659172, "grad_norm": 401.13190759834015, "learning_rate": 3.8862544846409725e-08, "loss": 303.4033, "step": 50550 }, { "epoch": 0.972391841601677, "grad_norm": 429.3602747938986, "learning_rate": 3.832685883256915e-08, "loss": 309.5042, "step": 50560 }, { "epoch": 0.9725841659374369, "grad_norm": 393.8213768007156, "learning_rate": 3.7794883377449966e-08, "loss": 302.9365, "step": 50570 }, { "epoch": 0.9727764902731967, "grad_norm": 391.2815581564051, "learning_rate": 3.726661867920478e-08, "loss": 305.5504, "step": 50580 }, { "epoch": 0.9729688146089566, "grad_norm": 393.73702787501804, "learning_rate": 3.674206493460508e-08, "loss": 304.4101, "step": 50590 }, { "epoch": 0.9731611389447163, "grad_norm": 387.53807641553425, "learning_rate": 3.622122233903791e-08, "loss": 302.7742, "step": 50600 }, { "epoch": 0.9733534632804762, "grad_norm": 437.84801072834466, "learning_rate": 3.5704091086508076e-08, "loss": 311.2802, "step": 50610 }, { "epoch": 0.973545787616236, "grad_norm": 432.73723220634884, "learning_rate": 3.519067136963705e-08, "loss": 304.799, "step": 50620 }, { "epoch": 0.9737381119519959, "grad_norm": 421.13571721828066, "learning_rate": 3.468096337966853e-08, "loss": 307.158, "step": 50630 }, { "epoch": 0.9739304362877557, "grad_norm": 428.0337305552007, "learning_rate": 3.417496730645731e-08, "loss": 307.0303, "step": 50640 }, { "epoch": 0.9741227606235155, "grad_norm": 407.82962995120494, "learning_rate": 3.3672683338480396e-08, "loss": 309.2827, "step": 50650 }, { "epoch": 0.9743150849592753, "grad_norm": 406.9420647927319, "learning_rate": 3.317411166282813e-08, "loss": 304.6213, "step": 50660 }, { "epoch": 0.9745074092950351, "grad_norm": 375.8363907832895, "learning_rate": 3.2679252465213085e-08, "loss": 306.5873, "step": 50670 }, { "epoch": 0.974699733630795, "grad_norm": 421.29060346936956, "learning_rate": 3.218810592996113e-08, "loss": 310.1658, "step": 50680 }, { "epoch": 0.9748920579665548, "grad_norm": 455.6126356492348, "learning_rate": 3.170067224001483e-08, "loss": 321.2601, "step": 50690 }, { "epoch": 0.9750843823023146, "grad_norm": 378.29428729541024, "learning_rate": 3.1216951576936714e-08, "loss": 293.8388, "step": 50700 }, { "epoch": 0.9752767066380744, "grad_norm": 397.17722750018015, "learning_rate": 3.073694412090489e-08, "loss": 304.5924, "step": 50710 }, { "epoch": 0.9754690309738343, "grad_norm": 409.93933600121204, "learning_rate": 3.026065005071188e-08, "loss": 308.2416, "step": 50720 }, { "epoch": 0.9756613553095941, "grad_norm": 398.2808171212146, "learning_rate": 2.9788069543772445e-08, "loss": 313.6742, "step": 50730 }, { "epoch": 0.975853679645354, "grad_norm": 377.4287067372057, "learning_rate": 2.9319202776113553e-08, "loss": 301.8081, "step": 50740 }, { "epoch": 0.9760460039811137, "grad_norm": 389.89803132758874, "learning_rate": 2.8854049922379946e-08, "loss": 301.6007, "step": 50750 }, { "epoch": 0.9762383283168736, "grad_norm": 417.8896116356527, "learning_rate": 2.839261115583303e-08, "loss": 309.4142, "step": 50760 }, { "epoch": 0.9764306526526334, "grad_norm": 470.34866542704964, "learning_rate": 2.793488664835309e-08, "loss": 306.002, "step": 50770 }, { "epoch": 0.9766229769883932, "grad_norm": 407.1327735245423, "learning_rate": 2.7480876570433746e-08, "loss": 300.7665, "step": 50780 }, { "epoch": 0.9768153013241531, "grad_norm": 400.87714116694724, "learning_rate": 2.7030581091186393e-08, "loss": 314.4448, "step": 50790 }, { "epoch": 0.9770076256599128, "grad_norm": 410.89369144701084, "learning_rate": 2.658400037833686e-08, "loss": 302.8533, "step": 50800 }, { "epoch": 0.9771999499956727, "grad_norm": 382.7188830757354, "learning_rate": 2.6141134598233197e-08, "loss": 303.5628, "step": 50810 }, { "epoch": 0.9773922743314325, "grad_norm": 433.4141542239082, "learning_rate": 2.5701983915831232e-08, "loss": 310.3563, "step": 50820 }, { "epoch": 0.9775845986671924, "grad_norm": 373.79385501027184, "learning_rate": 2.5266548494710108e-08, "loss": 306.4014, "step": 50830 }, { "epoch": 0.9777769230029522, "grad_norm": 377.9372646502148, "learning_rate": 2.4834828497062315e-08, "loss": 300.3727, "step": 50840 }, { "epoch": 0.977969247338712, "grad_norm": 485.5862600261297, "learning_rate": 2.4406824083694768e-08, "loss": 312.0458, "step": 50850 }, { "epoch": 0.9781615716744718, "grad_norm": 394.00691307111975, "learning_rate": 2.398253541403217e-08, "loss": 317.0317, "step": 50860 }, { "epoch": 0.9783538960102317, "grad_norm": 386.2322868237349, "learning_rate": 2.3561962646116988e-08, "loss": 305.768, "step": 50870 }, { "epoch": 0.9785462203459915, "grad_norm": 399.529279335095, "learning_rate": 2.3145105936603906e-08, "loss": 296.2705, "step": 50880 }, { "epoch": 0.9787385446817513, "grad_norm": 425.15762829141664, "learning_rate": 2.273196544076539e-08, "loss": 314.122, "step": 50890 }, { "epoch": 0.9789308690175111, "grad_norm": 380.179433289847, "learning_rate": 2.2322541312490565e-08, "loss": 306.0842, "step": 50900 }, { "epoch": 0.9791231933532709, "grad_norm": 402.3194928844894, "learning_rate": 2.1916833704281882e-08, "loss": 319.9796, "step": 50910 }, { "epoch": 0.9793155176890308, "grad_norm": 416.90339187460955, "learning_rate": 2.1514842767258458e-08, "loss": 298.4591, "step": 50920 }, { "epoch": 0.9795078420247906, "grad_norm": 392.84490320889864, "learning_rate": 2.1116568651156076e-08, "loss": 312.3883, "step": 50930 }, { "epoch": 0.9797001663605505, "grad_norm": 393.7744491721409, "learning_rate": 2.0722011504326066e-08, "loss": 304.6168, "step": 50940 }, { "epoch": 0.9798924906963102, "grad_norm": 382.6497005103055, "learning_rate": 2.033117147373309e-08, "loss": 301.5205, "step": 50950 }, { "epoch": 0.9800848150320701, "grad_norm": 379.180241426101, "learning_rate": 1.9944048704959583e-08, "loss": 305.3807, "step": 50960 }, { "epoch": 0.9802771393678299, "grad_norm": 368.2175644298272, "learning_rate": 1.956064334220131e-08, "loss": 299.8416, "step": 50970 }, { "epoch": 0.9804694637035898, "grad_norm": 385.60592737707225, "learning_rate": 1.9180955528270706e-08, "loss": 312.7126, "step": 50980 }, { "epoch": 0.9806617880393496, "grad_norm": 427.2375132529489, "learning_rate": 1.8804985404595743e-08, "loss": 313.5155, "step": 50990 }, { "epoch": 0.9808541123751093, "grad_norm": 387.3286278280876, "learning_rate": 1.843273311121885e-08, "loss": 300.3423, "step": 51000 }, { "epoch": 0.9810464367108692, "grad_norm": 433.04233986949123, "learning_rate": 1.806419878679799e-08, "loss": 311.5273, "step": 51010 }, { "epoch": 0.981238761046629, "grad_norm": 397.1104184447403, "learning_rate": 1.7699382568605595e-08, "loss": 301.9248, "step": 51020 }, { "epoch": 0.9814310853823889, "grad_norm": 382.9905869992039, "learning_rate": 1.733828459253073e-08, "loss": 310.2416, "step": 51030 }, { "epoch": 0.9816234097181487, "grad_norm": 405.24995014246383, "learning_rate": 1.6980904993075808e-08, "loss": 304.8208, "step": 51040 }, { "epoch": 0.9818157340539085, "grad_norm": 388.39876378459087, "learning_rate": 1.6627243903357682e-08, "loss": 325.4466, "step": 51050 }, { "epoch": 0.9820080583896683, "grad_norm": 371.9757594796885, "learning_rate": 1.6277301455110972e-08, "loss": 306.0827, "step": 51060 }, { "epoch": 0.9822003827254282, "grad_norm": 404.00571243814244, "learning_rate": 1.5931077778682524e-08, "loss": 309.9252, "step": 51070 }, { "epoch": 0.982392707061188, "grad_norm": 406.902630266166, "learning_rate": 1.5588573003035847e-08, "loss": 309.5406, "step": 51080 }, { "epoch": 0.9825850313969479, "grad_norm": 468.74044916363795, "learning_rate": 1.5249787255747774e-08, "loss": 309.0986, "step": 51090 }, { "epoch": 0.9827773557327076, "grad_norm": 379.65768166783414, "learning_rate": 1.4914720663009585e-08, "loss": 302.6833, "step": 51100 }, { "epoch": 0.9829696800684674, "grad_norm": 413.8310771702398, "learning_rate": 1.4583373349629226e-08, "loss": 304.697, "step": 51110 }, { "epoch": 0.9831620044042273, "grad_norm": 389.0675171929064, "learning_rate": 1.4255745439027968e-08, "loss": 300.6031, "step": 51120 }, { "epoch": 0.9833543287399871, "grad_norm": 406.32247314575346, "learning_rate": 1.3931837053241526e-08, "loss": 311.4055, "step": 51130 }, { "epoch": 0.983546653075747, "grad_norm": 402.2565677137849, "learning_rate": 1.3611648312920057e-08, "loss": 312.1779, "step": 51140 }, { "epoch": 0.9837389774115067, "grad_norm": 399.8158105292107, "learning_rate": 1.3295179337329267e-08, "loss": 305.4676, "step": 51150 }, { "epoch": 0.9839313017472666, "grad_norm": 400.69074590236204, "learning_rate": 1.2982430244347088e-08, "loss": 315.5982, "step": 51160 }, { "epoch": 0.9841236260830264, "grad_norm": 419.02739384459056, "learning_rate": 1.2673401150470333e-08, "loss": 314.1231, "step": 51170 }, { "epoch": 0.9843159504187863, "grad_norm": 413.87051591035225, "learning_rate": 1.2368092170804702e-08, "loss": 308.4232, "step": 51180 }, { "epoch": 0.984508274754546, "grad_norm": 451.87349675268644, "learning_rate": 1.2066503419073672e-08, "loss": 307.2372, "step": 51190 }, { "epoch": 0.9847005990903059, "grad_norm": 381.44144011771385, "learning_rate": 1.1768635007614049e-08, "loss": 306.4786, "step": 51200 }, { "epoch": 0.9848929234260657, "grad_norm": 375.4641172892268, "learning_rate": 1.1474487047375971e-08, "loss": 306.0885, "step": 51210 }, { "epoch": 0.9850852477618255, "grad_norm": 466.5386189686091, "learning_rate": 1.1184059647926238e-08, "loss": 310.9882, "step": 51220 }, { "epoch": 0.9852775720975854, "grad_norm": 417.79839407018136, "learning_rate": 1.0897352917443871e-08, "loss": 313.2638, "step": 51230 }, { "epoch": 0.9854698964333451, "grad_norm": 404.86815546221015, "learning_rate": 1.0614366962721223e-08, "loss": 309.3784, "step": 51240 }, { "epoch": 0.985662220769105, "grad_norm": 357.5197060227325, "learning_rate": 1.033510188916731e-08, "loss": 294.0667, "step": 51250 }, { "epoch": 0.9858545451048648, "grad_norm": 406.77961151528575, "learning_rate": 1.0059557800802256e-08, "loss": 305.8481, "step": 51260 }, { "epoch": 0.9860468694406247, "grad_norm": 384.74437075020825, "learning_rate": 9.78773480026396e-09, "loss": 299.6159, "step": 51270 }, { "epoch": 0.9862391937763845, "grad_norm": 406.31745349212474, "learning_rate": 9.519632988800321e-09, "loss": 311.7801, "step": 51280 }, { "epoch": 0.9864315181121444, "grad_norm": 380.0166883704215, "learning_rate": 9.25525246627479e-09, "loss": 311.1568, "step": 51290 }, { "epoch": 0.9866238424479041, "grad_norm": 409.1457322928211, "learning_rate": 8.994593331165257e-09, "loss": 312.1888, "step": 51300 }, { "epoch": 0.986816166783664, "grad_norm": 386.033341229226, "learning_rate": 8.737655680562951e-09, "loss": 297.5578, "step": 51310 }, { "epoch": 0.9870084911194238, "grad_norm": 420.5638003223459, "learning_rate": 8.484439610172424e-09, "loss": 305.1758, "step": 51320 }, { "epoch": 0.9872008154551836, "grad_norm": 398.77872534682115, "learning_rate": 8.234945214312673e-09, "loss": 309.3612, "step": 51330 }, { "epoch": 0.9873931397909435, "grad_norm": 374.43751095335097, "learning_rate": 7.989172585917138e-09, "loss": 307.6127, "step": 51340 }, { "epoch": 0.9875854641267032, "grad_norm": 403.3645140835388, "learning_rate": 7.747121816530368e-09, "loss": 306.856, "step": 51350 }, { "epoch": 0.9877777884624631, "grad_norm": 389.4955670725102, "learning_rate": 7.508792996313573e-09, "loss": 310.2944, "step": 51360 }, { "epoch": 0.9879701127982229, "grad_norm": 385.192448654295, "learning_rate": 7.274186214040191e-09, "loss": 302.005, "step": 51370 }, { "epoch": 0.9881624371339828, "grad_norm": 403.6882028588385, "learning_rate": 7.043301557096982e-09, "loss": 303.7258, "step": 51380 }, { "epoch": 0.9883547614697425, "grad_norm": 400.42846357418983, "learning_rate": 6.816139111484044e-09, "loss": 303.64, "step": 51390 }, { "epoch": 0.9885470858055024, "grad_norm": 374.76557117762826, "learning_rate": 6.592698961818134e-09, "loss": 308.6927, "step": 51400 }, { "epoch": 0.9887394101412622, "grad_norm": 394.6435981372079, "learning_rate": 6.37298119132379e-09, "loss": 298.0467, "step": 51410 }, { "epoch": 0.9889317344770221, "grad_norm": 391.99315736533697, "learning_rate": 6.156985881844435e-09, "loss": 302.1709, "step": 51420 }, { "epoch": 0.9891240588127819, "grad_norm": 389.3439157971508, "learning_rate": 5.944713113833489e-09, "loss": 305.8769, "step": 51430 }, { "epoch": 0.9893163831485418, "grad_norm": 381.8496468000146, "learning_rate": 5.736162966359926e-09, "loss": 310.9098, "step": 51440 }, { "epoch": 0.9895087074843015, "grad_norm": 393.8951535453929, "learning_rate": 5.531335517104941e-09, "loss": 303.4278, "step": 51450 }, { "epoch": 0.9897010318200613, "grad_norm": 409.8679255772124, "learning_rate": 5.3302308423641704e-09, "loss": 305.5253, "step": 51460 }, { "epoch": 0.9898933561558212, "grad_norm": 406.33180432288617, "learning_rate": 5.132849017044361e-09, "loss": 311.3281, "step": 51470 }, { "epoch": 0.990085680491581, "grad_norm": 430.82599841616536, "learning_rate": 4.939190114666703e-09, "loss": 317.1013, "step": 51480 }, { "epoch": 0.9902780048273409, "grad_norm": 385.7016786372432, "learning_rate": 4.749254207367937e-09, "loss": 301.6066, "step": 51490 }, { "epoch": 0.9904703291631006, "grad_norm": 367.50235603668244, "learning_rate": 4.563041365894805e-09, "loss": 307.9614, "step": 51500 }, { "epoch": 0.9906626534988605, "grad_norm": 437.3992355238702, "learning_rate": 4.380551659608489e-09, "loss": 308.0566, "step": 51510 }, { "epoch": 0.9908549778346203, "grad_norm": 395.9166235781837, "learning_rate": 4.2017851564835065e-09, "loss": 305.9135, "step": 51520 }, { "epoch": 0.9910473021703802, "grad_norm": 390.4537718561425, "learning_rate": 4.026741923107702e-09, "loss": 300.276, "step": 51530 }, { "epoch": 0.99123962650614, "grad_norm": 445.79770036122716, "learning_rate": 3.855422024681144e-09, "loss": 312.3877, "step": 51540 }, { "epoch": 0.9914319508418998, "grad_norm": 361.8853476592868, "learning_rate": 3.6878255250183406e-09, "loss": 295.5108, "step": 51550 }, { "epoch": 0.9916242751776596, "grad_norm": 365.2106398601837, "learning_rate": 3.5239524865460227e-09, "loss": 301.0301, "step": 51560 }, { "epoch": 0.9918165995134194, "grad_norm": 412.5798797133672, "learning_rate": 3.363802970304253e-09, "loss": 316.894, "step": 51570 }, { "epoch": 0.9920089238491793, "grad_norm": 389.77594090198704, "learning_rate": 3.207377035946424e-09, "loss": 308.6997, "step": 51580 }, { "epoch": 0.992201248184939, "grad_norm": 384.3906314377048, "learning_rate": 3.05467474173704e-09, "loss": 308.2544, "step": 51590 }, { "epoch": 0.9923935725206989, "grad_norm": 416.19656893957654, "learning_rate": 2.9056961445572686e-09, "loss": 306.6549, "step": 51600 }, { "epoch": 0.9925858968564587, "grad_norm": 398.3854214826971, "learning_rate": 2.7604412998982754e-09, "loss": 307.95, "step": 51610 }, { "epoch": 0.9927782211922186, "grad_norm": 410.56161503258375, "learning_rate": 2.61891026186456e-09, "loss": 303.2625, "step": 51620 }, { "epoch": 0.9929705455279784, "grad_norm": 406.7332818404458, "learning_rate": 2.4811030831739525e-09, "loss": 318.5634, "step": 51630 }, { "epoch": 0.9931628698637383, "grad_norm": 415.37002368973316, "learning_rate": 2.347019815158724e-09, "loss": 312.0445, "step": 51640 }, { "epoch": 0.993355194199498, "grad_norm": 434.88001844916295, "learning_rate": 2.216660507762258e-09, "loss": 309.0632, "step": 51650 }, { "epoch": 0.9935475185352579, "grad_norm": 401.1180103953426, "learning_rate": 2.0900252095401583e-09, "loss": 302.0537, "step": 51660 }, { "epoch": 0.9937398428710177, "grad_norm": 424.51657310923054, "learning_rate": 1.9671139676624707e-09, "loss": 308.1321, "step": 51670 }, { "epoch": 0.9939321672067775, "grad_norm": 399.2239784555877, "learning_rate": 1.8479268279125717e-09, "loss": 308.0895, "step": 51680 }, { "epoch": 0.9941244915425373, "grad_norm": 398.4795944942946, "learning_rate": 1.73246383468495e-09, "loss": 300.3031, "step": 51690 }, { "epoch": 0.9943168158782971, "grad_norm": 377.56698551165994, "learning_rate": 1.6207250309874246e-09, "loss": 306.1907, "step": 51700 }, { "epoch": 0.994509140214057, "grad_norm": 448.83962938024837, "learning_rate": 1.512710458442257e-09, "loss": 312.7012, "step": 51710 }, { "epoch": 0.9947014645498168, "grad_norm": 400.69775770345666, "learning_rate": 1.408420157280599e-09, "loss": 315.2197, "step": 51720 }, { "epoch": 0.9948937888855767, "grad_norm": 376.03710073961975, "learning_rate": 1.3078541663502642e-09, "loss": 309.4352, "step": 51730 }, { "epoch": 0.9950861132213364, "grad_norm": 396.9308645386203, "learning_rate": 1.2110125231112879e-09, "loss": 311.7786, "step": 51740 }, { "epoch": 0.9952784375570963, "grad_norm": 415.3260137729683, "learning_rate": 1.117895263633706e-09, "loss": 307.5083, "step": 51750 }, { "epoch": 0.9954707618928561, "grad_norm": 408.5010771689888, "learning_rate": 1.0285024226042162e-09, "loss": 308.7961, "step": 51760 }, { "epoch": 0.995663086228616, "grad_norm": 376.73186518642893, "learning_rate": 9.428340333184072e-10, "loss": 300.4339, "step": 51770 }, { "epoch": 0.9958554105643758, "grad_norm": 407.29523918210987, "learning_rate": 8.608901276874193e-10, "loss": 320.3509, "step": 51780 }, { "epoch": 0.9960477349001355, "grad_norm": 415.36622235217936, "learning_rate": 7.826707362335039e-10, "loss": 314.0036, "step": 51790 }, { "epoch": 0.9962400592358954, "grad_norm": 399.2257955451147, "learning_rate": 7.081758880911338e-10, "loss": 328.2634, "step": 51800 }, { "epoch": 0.9964323835716552, "grad_norm": 393.6085898693909, "learning_rate": 6.374056110103332e-10, "loss": 299.7243, "step": 51810 }, { "epoch": 0.9966247079074151, "grad_norm": 405.0272867713518, "learning_rate": 5.703599313511277e-10, "loss": 305.4929, "step": 51820 }, { "epoch": 0.9968170322431749, "grad_norm": 397.6661898612281, "learning_rate": 5.070388740868737e-10, "loss": 308.362, "step": 51830 }, { "epoch": 0.9970093565789347, "grad_norm": 404.80826520029643, "learning_rate": 4.474424628031493e-10, "loss": 308.8702, "step": 51840 }, { "epoch": 0.9972016809146945, "grad_norm": 387.30946528310625, "learning_rate": 3.9157071969997407e-10, "loss": 297.5715, "step": 51850 }, { "epoch": 0.9973940052504544, "grad_norm": 428.2876429970752, "learning_rate": 3.394236655873684e-10, "loss": 310.9495, "step": 51860 }, { "epoch": 0.9975863295862142, "grad_norm": 379.36230039227706, "learning_rate": 2.910013198886841e-10, "loss": 308.731, "step": 51870 }, { "epoch": 0.9977786539219741, "grad_norm": 419.4352909245947, "learning_rate": 2.4630370064171463e-10, "loss": 311.907, "step": 51880 }, { "epoch": 0.9979709782577338, "grad_norm": 383.4827735798926, "learning_rate": 2.0533082449647467e-10, "loss": 305.1834, "step": 51890 }, { "epoch": 0.9981633025934936, "grad_norm": 393.88449878209974, "learning_rate": 1.6808270671186954e-10, "loss": 315.9007, "step": 51900 }, { "epoch": 0.9983556269292535, "grad_norm": 443.98924404030754, "learning_rate": 1.345593611645768e-10, "loss": 303.1187, "step": 51910 }, { "epoch": 0.9985479512650133, "grad_norm": 408.95890434584715, "learning_rate": 1.0476080034016456e-10, "loss": 316.9108, "step": 51920 }, { "epoch": 0.9987402756007732, "grad_norm": 400.75925737361706, "learning_rate": 7.868703533864264e-11, "loss": 307.9794, "step": 51930 }, { "epoch": 0.9989325999365329, "grad_norm": 426.5420963544196, "learning_rate": 5.633807587224205e-11, "loss": 311.6262, "step": 51940 }, { "epoch": 0.9991249242722928, "grad_norm": 407.1191060452394, "learning_rate": 3.771393026541503e-11, "loss": 304.6595, "step": 51950 }, { "epoch": 0.9993172486080526, "grad_norm": 391.9785877798069, "learning_rate": 2.281460545594527e-11, "loss": 301.8431, "step": 51960 }, { "epoch": 0.9995095729438125, "grad_norm": 400.3879591656393, "learning_rate": 1.164010699272744e-11, "loss": 305.2578, "step": 51970 }, { "epoch": 0.9997018972795723, "grad_norm": 381.6965270229846, "learning_rate": 4.190439037987659e-12, "loss": 299.185, "step": 51980 }, { "epoch": 0.9998942216153321, "grad_norm": 393.96873763468045, "learning_rate": 4.656043661732668e-13, "loss": 329.7069, "step": 51990 }, { "epoch": 0.999990383783212, "eval_loss": 375.7167663574219, "eval_runtime": 60.9941, "eval_samples_per_second": 11.411, "eval_steps_per_second": 0.361, "step": 51995 }, { "epoch": 0.999990383783212, "step": 51995, "total_flos": 4.760752445875814e+16, "train_loss": 199.8116063359082, "train_runtime": 218479.6594, "train_samples_per_second": 15.231, "train_steps_per_second": 0.238 } ], "logging_steps": 10, "max_steps": 51995, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.760752445875814e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }