{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 25000, "global_step": 61115, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016362595107584062, "grad_norm": 9.953512504451753, "learning_rate": 1.636125654450262e-08, "loss": 0.8675, "step": 10 }, { "epoch": 0.00032725190215168124, "grad_norm": 9.553511923060478, "learning_rate": 3.272251308900524e-08, "loss": 0.8383, "step": 20 }, { "epoch": 0.0004908778532275219, "grad_norm": 11.131209724566652, "learning_rate": 4.908376963350786e-08, "loss": 0.8342, "step": 30 }, { "epoch": 0.0006545038043033625, "grad_norm": 7.5997281783840185, "learning_rate": 6.544502617801048e-08, "loss": 0.8489, "step": 40 }, { "epoch": 0.0008181297553792031, "grad_norm": 13.653268825185362, "learning_rate": 8.18062827225131e-08, "loss": 0.8987, "step": 50 }, { "epoch": 0.0009817557064550437, "grad_norm": 11.055120151769751, "learning_rate": 9.816753926701572e-08, "loss": 0.784, "step": 60 }, { "epoch": 0.0011453816575308843, "grad_norm": 9.500150545973115, "learning_rate": 1.1452879581151833e-07, "loss": 0.8406, "step": 70 }, { "epoch": 0.001309007608606725, "grad_norm": 8.78176130099345, "learning_rate": 1.3089005235602095e-07, "loss": 0.7883, "step": 80 }, { "epoch": 0.0014726335596825656, "grad_norm": 9.285787853972339, "learning_rate": 1.4725130890052356e-07, "loss": 0.7712, "step": 90 }, { "epoch": 0.0016362595107584062, "grad_norm": 10.351765898816028, "learning_rate": 1.636125654450262e-07, "loss": 0.7958, "step": 100 }, { "epoch": 0.0017998854618342468, "grad_norm": 8.451575407818002, "learning_rate": 1.7997382198952883e-07, "loss": 0.7156, "step": 110 }, { "epoch": 0.0019635114129100874, "grad_norm": 8.024542877925896, "learning_rate": 1.9633507853403144e-07, "loss": 0.7417, "step": 120 }, { "epoch": 0.002127137363985928, "grad_norm": 9.133613751637023, "learning_rate": 2.1269633507853407e-07, "loss": 0.6717, "step": 130 }, { "epoch": 0.0022907633150617687, "grad_norm": 5.968510512679798, "learning_rate": 2.2905759162303666e-07, "loss": 0.6115, "step": 140 }, { "epoch": 0.0024543892661376093, "grad_norm": 6.194628606572387, "learning_rate": 2.454188481675393e-07, "loss": 0.512, "step": 150 }, { "epoch": 0.00261801521721345, "grad_norm": 5.464433852845115, "learning_rate": 2.617801047120419e-07, "loss": 0.4875, "step": 160 }, { "epoch": 0.0027816411682892906, "grad_norm": 3.873490318989648, "learning_rate": 2.7814136125654456e-07, "loss": 0.4097, "step": 170 }, { "epoch": 0.002945267119365131, "grad_norm": 4.54004618500498, "learning_rate": 2.945026178010471e-07, "loss": 0.3477, "step": 180 }, { "epoch": 0.003108893070440972, "grad_norm": 3.330129557566636, "learning_rate": 3.108638743455498e-07, "loss": 0.2964, "step": 190 }, { "epoch": 0.0032725190215168124, "grad_norm": 3.5541678440135365, "learning_rate": 3.272251308900524e-07, "loss": 0.2507, "step": 200 }, { "epoch": 0.003436144972592653, "grad_norm": 3.2547986736994554, "learning_rate": 3.43586387434555e-07, "loss": 0.2122, "step": 210 }, { "epoch": 0.0035997709236684937, "grad_norm": 2.683447457033073, "learning_rate": 3.5994764397905766e-07, "loss": 0.2045, "step": 220 }, { "epoch": 0.0037633968747443343, "grad_norm": 3.0304753757748926, "learning_rate": 3.7630890052356027e-07, "loss": 0.1978, "step": 230 }, { "epoch": 0.003927022825820175, "grad_norm": 2.2690815575105, "learning_rate": 3.926701570680629e-07, "loss": 0.179, "step": 240 }, { "epoch": 0.004090648776896016, "grad_norm": 2.4354798410886325, "learning_rate": 4.0903141361256543e-07, "loss": 0.1471, "step": 250 }, { "epoch": 0.004254274727971856, "grad_norm": 2.8505689228875797, "learning_rate": 4.2539267015706815e-07, "loss": 0.1633, "step": 260 }, { "epoch": 0.004417900679047697, "grad_norm": 2.425772445148507, "learning_rate": 4.4175392670157076e-07, "loss": 0.1502, "step": 270 }, { "epoch": 0.004581526630123537, "grad_norm": 1.9156226126443814, "learning_rate": 4.581151832460733e-07, "loss": 0.1299, "step": 280 }, { "epoch": 0.004745152581199378, "grad_norm": 2.7553677242773933, "learning_rate": 4.744764397905759e-07, "loss": 0.1284, "step": 290 }, { "epoch": 0.004908778532275219, "grad_norm": 2.0747187707340426, "learning_rate": 4.908376963350786e-07, "loss": 0.1341, "step": 300 }, { "epoch": 0.00507240448335106, "grad_norm": 2.545379828991952, "learning_rate": 5.071989528795812e-07, "loss": 0.1278, "step": 310 }, { "epoch": 0.0052360304344269, "grad_norm": 1.6684754833198432, "learning_rate": 5.235602094240838e-07, "loss": 0.1134, "step": 320 }, { "epoch": 0.005399656385502741, "grad_norm": 2.5489345776008667, "learning_rate": 5.399214659685865e-07, "loss": 0.1041, "step": 330 }, { "epoch": 0.005563282336578581, "grad_norm": 2.083270341063134, "learning_rate": 5.562827225130891e-07, "loss": 0.083, "step": 340 }, { "epoch": 0.005726908287654422, "grad_norm": 2.561527976097819, "learning_rate": 5.726439790575917e-07, "loss": 0.1129, "step": 350 }, { "epoch": 0.005890534238730262, "grad_norm": 2.332969713459923, "learning_rate": 5.890052356020942e-07, "loss": 0.102, "step": 360 }, { "epoch": 0.006054160189806103, "grad_norm": 3.1385729593806215, "learning_rate": 6.053664921465969e-07, "loss": 0.1054, "step": 370 }, { "epoch": 0.006217786140881944, "grad_norm": 2.8824313153043963, "learning_rate": 6.217277486910996e-07, "loss": 0.0974, "step": 380 }, { "epoch": 0.006381412091957785, "grad_norm": 2.2879714326506653, "learning_rate": 6.380890052356021e-07, "loss": 0.0753, "step": 390 }, { "epoch": 0.006545038043033625, "grad_norm": 1.8575305027963693, "learning_rate": 6.544502617801048e-07, "loss": 0.1008, "step": 400 }, { "epoch": 0.006708663994109466, "grad_norm": 1.8820501103558993, "learning_rate": 6.708115183246073e-07, "loss": 0.0916, "step": 410 }, { "epoch": 0.006872289945185306, "grad_norm": 2.1069617625641377, "learning_rate": 6.8717277486911e-07, "loss": 0.0899, "step": 420 }, { "epoch": 0.007035915896261147, "grad_norm": 2.129401250846256, "learning_rate": 7.035340314136126e-07, "loss": 0.0752, "step": 430 }, { "epoch": 0.007199541847336987, "grad_norm": 2.082577066787224, "learning_rate": 7.198952879581153e-07, "loss": 0.0923, "step": 440 }, { "epoch": 0.007363167798412828, "grad_norm": 2.092195120658152, "learning_rate": 7.362565445026179e-07, "loss": 0.0877, "step": 450 }, { "epoch": 0.0075267937494886685, "grad_norm": 2.38358275909396, "learning_rate": 7.526178010471205e-07, "loss": 0.0846, "step": 460 }, { "epoch": 0.00769041970056451, "grad_norm": 2.046058464990723, "learning_rate": 7.689790575916231e-07, "loss": 0.0744, "step": 470 }, { "epoch": 0.00785404565164035, "grad_norm": 2.255986980690557, "learning_rate": 7.853403141361258e-07, "loss": 0.0866, "step": 480 }, { "epoch": 0.00801767160271619, "grad_norm": 2.2406121886717503, "learning_rate": 8.017015706806283e-07, "loss": 0.0694, "step": 490 }, { "epoch": 0.008181297553792032, "grad_norm": 1.843535855319224, "learning_rate": 8.180628272251309e-07, "loss": 0.0726, "step": 500 }, { "epoch": 0.008344923504867872, "grad_norm": 1.9415564319480298, "learning_rate": 8.344240837696335e-07, "loss": 0.0649, "step": 510 }, { "epoch": 0.008508549455943712, "grad_norm": 2.553942139990562, "learning_rate": 8.507853403141363e-07, "loss": 0.0846, "step": 520 }, { "epoch": 0.008672175407019552, "grad_norm": 2.9189597974150416, "learning_rate": 8.671465968586389e-07, "loss": 0.0734, "step": 530 }, { "epoch": 0.008835801358095394, "grad_norm": 1.4914881504164346, "learning_rate": 8.835078534031415e-07, "loss": 0.0654, "step": 540 }, { "epoch": 0.008999427309171235, "grad_norm": 2.0937713541818836, "learning_rate": 8.998691099476441e-07, "loss": 0.0584, "step": 550 }, { "epoch": 0.009163053260247075, "grad_norm": 2.0007188467312402, "learning_rate": 9.162303664921466e-07, "loss": 0.0634, "step": 560 }, { "epoch": 0.009326679211322917, "grad_norm": 2.5295589149279887, "learning_rate": 9.325916230366493e-07, "loss": 0.0583, "step": 570 }, { "epoch": 0.009490305162398757, "grad_norm": 1.7551442995086026, "learning_rate": 9.489528795811518e-07, "loss": 0.0715, "step": 580 }, { "epoch": 0.009653931113474597, "grad_norm": 2.092845868494887, "learning_rate": 9.653141361256545e-07, "loss": 0.0611, "step": 590 }, { "epoch": 0.009817557064550437, "grad_norm": 2.01934143420376, "learning_rate": 9.816753926701572e-07, "loss": 0.0541, "step": 600 }, { "epoch": 0.00998118301562628, "grad_norm": 1.8566512307485132, "learning_rate": 9.980366492146598e-07, "loss": 0.0571, "step": 610 }, { "epoch": 0.01014480896670212, "grad_norm": 2.5322222748702763, "learning_rate": 1.0143979057591625e-06, "loss": 0.0573, "step": 620 }, { "epoch": 0.01030843491777796, "grad_norm": 2.3213304916444684, "learning_rate": 1.030759162303665e-06, "loss": 0.0524, "step": 630 }, { "epoch": 0.0104720608688538, "grad_norm": 1.7847020518152077, "learning_rate": 1.0471204188481676e-06, "loss": 0.0693, "step": 640 }, { "epoch": 0.010635686819929642, "grad_norm": 1.7525613602373722, "learning_rate": 1.0634816753926703e-06, "loss": 0.0459, "step": 650 }, { "epoch": 0.010799312771005482, "grad_norm": 1.5243208103372825, "learning_rate": 1.079842931937173e-06, "loss": 0.0512, "step": 660 }, { "epoch": 0.010962938722081322, "grad_norm": 2.3214197206038096, "learning_rate": 1.0962041884816754e-06, "loss": 0.0579, "step": 670 }, { "epoch": 0.011126564673157162, "grad_norm": 1.711882984179115, "learning_rate": 1.1125654450261783e-06, "loss": 0.0593, "step": 680 }, { "epoch": 0.011290190624233004, "grad_norm": 1.363766563612011, "learning_rate": 1.1289267015706807e-06, "loss": 0.0518, "step": 690 }, { "epoch": 0.011453816575308844, "grad_norm": 1.6504973222238681, "learning_rate": 1.1452879581151834e-06, "loss": 0.0534, "step": 700 }, { "epoch": 0.011617442526384684, "grad_norm": 1.6642029241468472, "learning_rate": 1.161649214659686e-06, "loss": 0.0493, "step": 710 }, { "epoch": 0.011781068477460525, "grad_norm": 1.7256103586706497, "learning_rate": 1.1780104712041885e-06, "loss": 0.0556, "step": 720 }, { "epoch": 0.011944694428536367, "grad_norm": 1.9415628815918726, "learning_rate": 1.1943717277486911e-06, "loss": 0.0474, "step": 730 }, { "epoch": 0.012108320379612207, "grad_norm": 2.1484877444490977, "learning_rate": 1.2107329842931938e-06, "loss": 0.0537, "step": 740 }, { "epoch": 0.012271946330688047, "grad_norm": 1.5140596944350828, "learning_rate": 1.2270942408376965e-06, "loss": 0.0567, "step": 750 }, { "epoch": 0.012435572281763887, "grad_norm": 1.7353637859431932, "learning_rate": 1.2434554973821991e-06, "loss": 0.0511, "step": 760 }, { "epoch": 0.012599198232839729, "grad_norm": 1.7322299709323383, "learning_rate": 1.2598167539267018e-06, "loss": 0.0639, "step": 770 }, { "epoch": 0.01276282418391557, "grad_norm": 2.1990681288776117, "learning_rate": 1.2761780104712042e-06, "loss": 0.049, "step": 780 }, { "epoch": 0.01292645013499141, "grad_norm": 1.5454137661378569, "learning_rate": 1.292539267015707e-06, "loss": 0.0599, "step": 790 }, { "epoch": 0.01309007608606725, "grad_norm": 2.0334949781020826, "learning_rate": 1.3089005235602096e-06, "loss": 0.0532, "step": 800 }, { "epoch": 0.013253702037143092, "grad_norm": 1.3285573275282347, "learning_rate": 1.3252617801047122e-06, "loss": 0.0396, "step": 810 }, { "epoch": 0.013417327988218932, "grad_norm": 1.6130715610408146, "learning_rate": 1.3416230366492147e-06, "loss": 0.0558, "step": 820 }, { "epoch": 0.013580953939294772, "grad_norm": 2.7581875193798866, "learning_rate": 1.3579842931937173e-06, "loss": 0.0501, "step": 830 }, { "epoch": 0.013744579890370612, "grad_norm": 1.499495311158914, "learning_rate": 1.37434554973822e-06, "loss": 0.0502, "step": 840 }, { "epoch": 0.013908205841446454, "grad_norm": 2.3264096876684013, "learning_rate": 1.3907068062827224e-06, "loss": 0.043, "step": 850 }, { "epoch": 0.014071831792522294, "grad_norm": 1.6531908451109447, "learning_rate": 1.407068062827225e-06, "loss": 0.0449, "step": 860 }, { "epoch": 0.014235457743598134, "grad_norm": 1.788048347180086, "learning_rate": 1.4234293193717278e-06, "loss": 0.0427, "step": 870 }, { "epoch": 0.014399083694673975, "grad_norm": 2.3014140344107545, "learning_rate": 1.4397905759162306e-06, "loss": 0.0527, "step": 880 }, { "epoch": 0.014562709645749817, "grad_norm": 1.4825497780752035, "learning_rate": 1.4561518324607333e-06, "loss": 0.0354, "step": 890 }, { "epoch": 0.014726335596825657, "grad_norm": 1.8486967565995738, "learning_rate": 1.4725130890052358e-06, "loss": 0.0458, "step": 900 }, { "epoch": 0.014889961547901497, "grad_norm": 1.2359362604595727, "learning_rate": 1.4888743455497384e-06, "loss": 0.037, "step": 910 }, { "epoch": 0.015053587498977337, "grad_norm": 1.6318813705255526, "learning_rate": 1.505235602094241e-06, "loss": 0.0394, "step": 920 }, { "epoch": 0.015217213450053179, "grad_norm": 1.022241620000326, "learning_rate": 1.5215968586387435e-06, "loss": 0.0408, "step": 930 }, { "epoch": 0.01538083940112902, "grad_norm": 1.5334145926324694, "learning_rate": 1.5379581151832462e-06, "loss": 0.0403, "step": 940 }, { "epoch": 0.01554446535220486, "grad_norm": 1.4883351942486145, "learning_rate": 1.5543193717277488e-06, "loss": 0.0433, "step": 950 }, { "epoch": 0.0157080913032807, "grad_norm": 1.435001451856834, "learning_rate": 1.5706806282722515e-06, "loss": 0.0447, "step": 960 }, { "epoch": 0.01587171725435654, "grad_norm": 1.973302151367163, "learning_rate": 1.587041884816754e-06, "loss": 0.0295, "step": 970 }, { "epoch": 0.01603534320543238, "grad_norm": 1.133924636051525, "learning_rate": 1.6034031413612566e-06, "loss": 0.0331, "step": 980 }, { "epoch": 0.016198969156508224, "grad_norm": 1.4293649033089593, "learning_rate": 1.6197643979057593e-06, "loss": 0.0532, "step": 990 }, { "epoch": 0.016362595107584064, "grad_norm": 1.5182992486580937, "learning_rate": 1.6361256544502617e-06, "loss": 0.0296, "step": 1000 }, { "epoch": 0.016526221058659904, "grad_norm": 1.9783123474244562, "learning_rate": 1.6524869109947644e-06, "loss": 0.0378, "step": 1010 }, { "epoch": 0.016689847009735744, "grad_norm": 1.3249556318465352, "learning_rate": 1.668848167539267e-06, "loss": 0.0395, "step": 1020 }, { "epoch": 0.016853472960811584, "grad_norm": 1.5923718457847904, "learning_rate": 1.6852094240837697e-06, "loss": 0.042, "step": 1030 }, { "epoch": 0.017017098911887425, "grad_norm": 1.736176595423258, "learning_rate": 1.7015706806282726e-06, "loss": 0.0425, "step": 1040 }, { "epoch": 0.017180724862963265, "grad_norm": 1.6187886173240689, "learning_rate": 1.717931937172775e-06, "loss": 0.0463, "step": 1050 }, { "epoch": 0.017344350814039105, "grad_norm": 1.8876915670056627, "learning_rate": 1.7342931937172777e-06, "loss": 0.0363, "step": 1060 }, { "epoch": 0.01750797676511495, "grad_norm": 1.621982098427535, "learning_rate": 1.7506544502617804e-06, "loss": 0.0455, "step": 1070 }, { "epoch": 0.01767160271619079, "grad_norm": 1.7261554268835486, "learning_rate": 1.767015706806283e-06, "loss": 0.0377, "step": 1080 }, { "epoch": 0.01783522866726663, "grad_norm": 2.0942676313796595, "learning_rate": 1.7833769633507855e-06, "loss": 0.0369, "step": 1090 }, { "epoch": 0.01799885461834247, "grad_norm": 1.2448446537252458, "learning_rate": 1.7997382198952881e-06, "loss": 0.0471, "step": 1100 }, { "epoch": 0.01816248056941831, "grad_norm": 2.5211330945802812, "learning_rate": 1.8160994764397908e-06, "loss": 0.0316, "step": 1110 }, { "epoch": 0.01832610652049415, "grad_norm": 1.1116051684387043, "learning_rate": 1.8324607329842933e-06, "loss": 0.0301, "step": 1120 }, { "epoch": 0.01848973247156999, "grad_norm": 2.052012791543214, "learning_rate": 1.848821989528796e-06, "loss": 0.0411, "step": 1130 }, { "epoch": 0.018653358422645833, "grad_norm": 1.1851667145655387, "learning_rate": 1.8651832460732986e-06, "loss": 0.0292, "step": 1140 }, { "epoch": 0.018816984373721674, "grad_norm": 1.8324052281712595, "learning_rate": 1.8815445026178012e-06, "loss": 0.0402, "step": 1150 }, { "epoch": 0.018980610324797514, "grad_norm": 1.1427010839942016, "learning_rate": 1.8979057591623037e-06, "loss": 0.0293, "step": 1160 }, { "epoch": 0.019144236275873354, "grad_norm": 1.4379419888144533, "learning_rate": 1.914267015706806e-06, "loss": 0.0288, "step": 1170 }, { "epoch": 0.019307862226949194, "grad_norm": 1.2867665085952575, "learning_rate": 1.930628272251309e-06, "loss": 0.031, "step": 1180 }, { "epoch": 0.019471488178025034, "grad_norm": 1.375000512956942, "learning_rate": 1.9469895287958115e-06, "loss": 0.026, "step": 1190 }, { "epoch": 0.019635114129100874, "grad_norm": 1.5209358343311332, "learning_rate": 1.9633507853403143e-06, "loss": 0.0447, "step": 1200 }, { "epoch": 0.019798740080176715, "grad_norm": 1.6955535802183264, "learning_rate": 1.979712041884817e-06, "loss": 0.0357, "step": 1210 }, { "epoch": 0.01996236603125256, "grad_norm": 1.6039777164982636, "learning_rate": 1.9960732984293197e-06, "loss": 0.0313, "step": 1220 }, { "epoch": 0.0201259919823284, "grad_norm": 1.2200956375113978, "learning_rate": 2.012434554973822e-06, "loss": 0.0349, "step": 1230 }, { "epoch": 0.02028961793340424, "grad_norm": 1.6280353166583323, "learning_rate": 2.028795811518325e-06, "loss": 0.0337, "step": 1240 }, { "epoch": 0.02045324388448008, "grad_norm": 1.0386892459979415, "learning_rate": 2.0451570680628274e-06, "loss": 0.0336, "step": 1250 }, { "epoch": 0.02061686983555592, "grad_norm": 1.138067739041246, "learning_rate": 2.06151832460733e-06, "loss": 0.037, "step": 1260 }, { "epoch": 0.02078049578663176, "grad_norm": 2.140608023965435, "learning_rate": 2.0778795811518328e-06, "loss": 0.0335, "step": 1270 }, { "epoch": 0.0209441217377076, "grad_norm": 0.8186344301970785, "learning_rate": 2.094240837696335e-06, "loss": 0.0316, "step": 1280 }, { "epoch": 0.02110774768878344, "grad_norm": 1.2724399370759332, "learning_rate": 2.1106020942408377e-06, "loss": 0.0374, "step": 1290 }, { "epoch": 0.021271373639859283, "grad_norm": 1.873343507142681, "learning_rate": 2.1269633507853405e-06, "loss": 0.0341, "step": 1300 }, { "epoch": 0.021434999590935123, "grad_norm": 1.2812941090395522, "learning_rate": 2.143324607329843e-06, "loss": 0.0326, "step": 1310 }, { "epoch": 0.021598625542010964, "grad_norm": 1.822516644916563, "learning_rate": 2.159685863874346e-06, "loss": 0.0356, "step": 1320 }, { "epoch": 0.021762251493086804, "grad_norm": 1.287142695522933, "learning_rate": 2.1760471204188483e-06, "loss": 0.0254, "step": 1330 }, { "epoch": 0.021925877444162644, "grad_norm": 1.7091995874390806, "learning_rate": 2.1924083769633508e-06, "loss": 0.0285, "step": 1340 }, { "epoch": 0.022089503395238484, "grad_norm": 1.1059669021786156, "learning_rate": 2.2087696335078536e-06, "loss": 0.0354, "step": 1350 }, { "epoch": 0.022253129346314324, "grad_norm": 1.2809574737691118, "learning_rate": 2.2251308900523565e-06, "loss": 0.0338, "step": 1360 }, { "epoch": 0.022416755297390165, "grad_norm": 1.8488392242066378, "learning_rate": 2.241492146596859e-06, "loss": 0.04, "step": 1370 }, { "epoch": 0.022580381248466008, "grad_norm": 1.4938199951428712, "learning_rate": 2.2578534031413614e-06, "loss": 0.0364, "step": 1380 }, { "epoch": 0.02274400719954185, "grad_norm": 1.0027154187115255, "learning_rate": 2.2742146596858643e-06, "loss": 0.0297, "step": 1390 }, { "epoch": 0.02290763315061769, "grad_norm": 1.8665041620870224, "learning_rate": 2.2905759162303667e-06, "loss": 0.033, "step": 1400 }, { "epoch": 0.02307125910169353, "grad_norm": 1.322884025427935, "learning_rate": 2.306937172774869e-06, "loss": 0.0362, "step": 1410 }, { "epoch": 0.02323488505276937, "grad_norm": 1.2034845915472814, "learning_rate": 2.323298429319372e-06, "loss": 0.0353, "step": 1420 }, { "epoch": 0.02339851100384521, "grad_norm": 1.4191131020941135, "learning_rate": 2.3396596858638745e-06, "loss": 0.0319, "step": 1430 }, { "epoch": 0.02356213695492105, "grad_norm": 1.4678436302993285, "learning_rate": 2.356020942408377e-06, "loss": 0.037, "step": 1440 }, { "epoch": 0.02372576290599689, "grad_norm": 1.629487109182382, "learning_rate": 2.37238219895288e-06, "loss": 0.0261, "step": 1450 }, { "epoch": 0.023889388857072733, "grad_norm": 1.0508390211928016, "learning_rate": 2.3887434554973823e-06, "loss": 0.0333, "step": 1460 }, { "epoch": 0.024053014808148573, "grad_norm": 1.651355652791301, "learning_rate": 2.405104712041885e-06, "loss": 0.0234, "step": 1470 }, { "epoch": 0.024216640759224414, "grad_norm": 1.389819287624273, "learning_rate": 2.4214659685863876e-06, "loss": 0.0319, "step": 1480 }, { "epoch": 0.024380266710300254, "grad_norm": 1.031793512867473, "learning_rate": 2.43782722513089e-06, "loss": 0.026, "step": 1490 }, { "epoch": 0.024543892661376094, "grad_norm": 1.2337614675854987, "learning_rate": 2.454188481675393e-06, "loss": 0.0282, "step": 1500 }, { "epoch": 0.024707518612451934, "grad_norm": 2.3099320330721334, "learning_rate": 2.4705497382198954e-06, "loss": 0.0267, "step": 1510 }, { "epoch": 0.024871144563527774, "grad_norm": 1.6455559766834573, "learning_rate": 2.4869109947643982e-06, "loss": 0.0329, "step": 1520 }, { "epoch": 0.025034770514603615, "grad_norm": 1.3886755558928074, "learning_rate": 2.5032722513089007e-06, "loss": 0.0383, "step": 1530 }, { "epoch": 0.025198396465679458, "grad_norm": 0.9877668054313401, "learning_rate": 2.5196335078534036e-06, "loss": 0.039, "step": 1540 }, { "epoch": 0.0253620224167553, "grad_norm": 1.4201776283317427, "learning_rate": 2.535994764397906e-06, "loss": 0.0316, "step": 1550 }, { "epoch": 0.02552564836783114, "grad_norm": 1.938467779668822, "learning_rate": 2.5523560209424085e-06, "loss": 0.0278, "step": 1560 }, { "epoch": 0.02568927431890698, "grad_norm": 1.0006405696606735, "learning_rate": 2.5687172774869113e-06, "loss": 0.026, "step": 1570 }, { "epoch": 0.02585290026998282, "grad_norm": 1.3008512359014655, "learning_rate": 2.585078534031414e-06, "loss": 0.0259, "step": 1580 }, { "epoch": 0.02601652622105866, "grad_norm": 0.9483672511107769, "learning_rate": 2.6014397905759162e-06, "loss": 0.0329, "step": 1590 }, { "epoch": 0.0261801521721345, "grad_norm": 0.851663514207136, "learning_rate": 2.617801047120419e-06, "loss": 0.0315, "step": 1600 }, { "epoch": 0.02634377812321034, "grad_norm": 1.1998289382412684, "learning_rate": 2.6341623036649216e-06, "loss": 0.029, "step": 1610 }, { "epoch": 0.026507404074286183, "grad_norm": 0.9444444251959695, "learning_rate": 2.6505235602094244e-06, "loss": 0.0263, "step": 1620 }, { "epoch": 0.026671030025362023, "grad_norm": 1.388145696381342, "learning_rate": 2.666884816753927e-06, "loss": 0.0202, "step": 1630 }, { "epoch": 0.026834655976437864, "grad_norm": 1.2659599943687638, "learning_rate": 2.6832460732984293e-06, "loss": 0.027, "step": 1640 }, { "epoch": 0.026998281927513704, "grad_norm": 1.8988077488556223, "learning_rate": 2.6996073298429322e-06, "loss": 0.0328, "step": 1650 }, { "epoch": 0.027161907878589544, "grad_norm": 1.579560540759702, "learning_rate": 2.7159685863874347e-06, "loss": 0.0307, "step": 1660 }, { "epoch": 0.027325533829665384, "grad_norm": 1.2309956306342076, "learning_rate": 2.732329842931937e-06, "loss": 0.0263, "step": 1670 }, { "epoch": 0.027489159780741224, "grad_norm": 1.3690398995611774, "learning_rate": 2.74869109947644e-06, "loss": 0.0309, "step": 1680 }, { "epoch": 0.027652785731817068, "grad_norm": 1.252861999046074, "learning_rate": 2.7650523560209424e-06, "loss": 0.0283, "step": 1690 }, { "epoch": 0.027816411682892908, "grad_norm": 1.0414796664294799, "learning_rate": 2.781413612565445e-06, "loss": 0.0303, "step": 1700 }, { "epoch": 0.02798003763396875, "grad_norm": 1.32699206756568, "learning_rate": 2.7977748691099478e-06, "loss": 0.0303, "step": 1710 }, { "epoch": 0.02814366358504459, "grad_norm": 1.070063583830968, "learning_rate": 2.81413612565445e-06, "loss": 0.0347, "step": 1720 }, { "epoch": 0.02830728953612043, "grad_norm": 1.0632958492332283, "learning_rate": 2.8304973821989527e-06, "loss": 0.0234, "step": 1730 }, { "epoch": 0.02847091548719627, "grad_norm": 0.9307340835086102, "learning_rate": 2.8468586387434555e-06, "loss": 0.0224, "step": 1740 }, { "epoch": 0.02863454143827211, "grad_norm": 1.7396266633234245, "learning_rate": 2.863219895287958e-06, "loss": 0.0232, "step": 1750 }, { "epoch": 0.02879816738934795, "grad_norm": 1.108684751334364, "learning_rate": 2.8795811518324613e-06, "loss": 0.0282, "step": 1760 }, { "epoch": 0.028961793340423793, "grad_norm": 1.4441168573409573, "learning_rate": 2.8959424083769637e-06, "loss": 0.0356, "step": 1770 }, { "epoch": 0.029125419291499633, "grad_norm": 1.2174355622706727, "learning_rate": 2.9123036649214666e-06, "loss": 0.0289, "step": 1780 }, { "epoch": 0.029289045242575473, "grad_norm": 1.433182442333968, "learning_rate": 2.928664921465969e-06, "loss": 0.0254, "step": 1790 }, { "epoch": 0.029452671193651313, "grad_norm": 1.0075645389804508, "learning_rate": 2.9450261780104715e-06, "loss": 0.0275, "step": 1800 }, { "epoch": 0.029616297144727154, "grad_norm": 0.9231958449859682, "learning_rate": 2.9613874345549744e-06, "loss": 0.0157, "step": 1810 }, { "epoch": 0.029779923095802994, "grad_norm": 1.3921739289548163, "learning_rate": 2.977748691099477e-06, "loss": 0.019, "step": 1820 }, { "epoch": 0.029943549046878834, "grad_norm": 1.2248008683896874, "learning_rate": 2.9941099476439793e-06, "loss": 0.0329, "step": 1830 }, { "epoch": 0.030107174997954674, "grad_norm": 1.7717617713821525, "learning_rate": 3.010471204188482e-06, "loss": 0.0235, "step": 1840 }, { "epoch": 0.030270800949030518, "grad_norm": 1.0141322764986376, "learning_rate": 3.0268324607329846e-06, "loss": 0.0286, "step": 1850 }, { "epoch": 0.030434426900106358, "grad_norm": 1.477295340198986, "learning_rate": 3.043193717277487e-06, "loss": 0.0261, "step": 1860 }, { "epoch": 0.030598052851182198, "grad_norm": 0.9760848471935755, "learning_rate": 3.05955497382199e-06, "loss": 0.0359, "step": 1870 }, { "epoch": 0.03076167880225804, "grad_norm": 1.2699769943228976, "learning_rate": 3.0759162303664924e-06, "loss": 0.0323, "step": 1880 }, { "epoch": 0.03092530475333388, "grad_norm": 1.091415157395605, "learning_rate": 3.0922774869109952e-06, "loss": 0.0247, "step": 1890 }, { "epoch": 0.03108893070440972, "grad_norm": 1.1175742604918077, "learning_rate": 3.1086387434554977e-06, "loss": 0.0247, "step": 1900 }, { "epoch": 0.03125255665548556, "grad_norm": 1.194345090346912, "learning_rate": 3.125e-06, "loss": 0.0269, "step": 1910 }, { "epoch": 0.0314161826065614, "grad_norm": 1.2676547681009906, "learning_rate": 3.141361256544503e-06, "loss": 0.0244, "step": 1920 }, { "epoch": 0.03157980855763724, "grad_norm": 1.4613241327486248, "learning_rate": 3.1577225130890055e-06, "loss": 0.0244, "step": 1930 }, { "epoch": 0.03174343450871308, "grad_norm": 1.1286075738157584, "learning_rate": 3.174083769633508e-06, "loss": 0.0237, "step": 1940 }, { "epoch": 0.03190706045978892, "grad_norm": 1.212801782645968, "learning_rate": 3.190445026178011e-06, "loss": 0.019, "step": 1950 }, { "epoch": 0.03207068641086476, "grad_norm": 1.1515000735963563, "learning_rate": 3.2068062827225132e-06, "loss": 0.0276, "step": 1960 }, { "epoch": 0.032234312361940604, "grad_norm": 0.8907100707103518, "learning_rate": 3.2231675392670157e-06, "loss": 0.0198, "step": 1970 }, { "epoch": 0.03239793831301645, "grad_norm": 0.9052681867045012, "learning_rate": 3.2395287958115186e-06, "loss": 0.0292, "step": 1980 }, { "epoch": 0.032561564264092284, "grad_norm": 1.353266340697659, "learning_rate": 3.255890052356021e-06, "loss": 0.0262, "step": 1990 }, { "epoch": 0.03272519021516813, "grad_norm": 1.119087184019235, "learning_rate": 3.2722513089005235e-06, "loss": 0.0255, "step": 2000 }, { "epoch": 0.032888816166243964, "grad_norm": 1.371700303957014, "learning_rate": 3.2886125654450263e-06, "loss": 0.0293, "step": 2010 }, { "epoch": 0.03305244211731981, "grad_norm": 0.8600685242121404, "learning_rate": 3.304973821989529e-06, "loss": 0.029, "step": 2020 }, { "epoch": 0.033216068068395645, "grad_norm": 0.995883929435826, "learning_rate": 3.3213350785340317e-06, "loss": 0.0204, "step": 2030 }, { "epoch": 0.03337969401947149, "grad_norm": 1.2753040167006517, "learning_rate": 3.337696335078534e-06, "loss": 0.0217, "step": 2040 }, { "epoch": 0.03354331997054733, "grad_norm": 1.1745927493187636, "learning_rate": 3.3540575916230366e-06, "loss": 0.0281, "step": 2050 }, { "epoch": 0.03370694592162317, "grad_norm": 1.0801097577386816, "learning_rate": 3.3704188481675394e-06, "loss": 0.0203, "step": 2060 }, { "epoch": 0.03387057187269901, "grad_norm": 0.8639756669060888, "learning_rate": 3.3867801047120423e-06, "loss": 0.0221, "step": 2070 }, { "epoch": 0.03403419782377485, "grad_norm": 1.0688842375212482, "learning_rate": 3.403141361256545e-06, "loss": 0.0209, "step": 2080 }, { "epoch": 0.03419782377485069, "grad_norm": 0.9278469578122404, "learning_rate": 3.4195026178010476e-06, "loss": 0.0209, "step": 2090 }, { "epoch": 0.03436144972592653, "grad_norm": 1.028075410668248, "learning_rate": 3.43586387434555e-06, "loss": 0.0221, "step": 2100 }, { "epoch": 0.03452507567700237, "grad_norm": 0.89901911207049, "learning_rate": 3.452225130890053e-06, "loss": 0.0393, "step": 2110 }, { "epoch": 0.03468870162807821, "grad_norm": 1.2591295151007087, "learning_rate": 3.4685863874345554e-06, "loss": 0.0217, "step": 2120 }, { "epoch": 0.034852327579154053, "grad_norm": 1.0461311343194284, "learning_rate": 3.484947643979058e-06, "loss": 0.0265, "step": 2130 }, { "epoch": 0.0350159535302299, "grad_norm": 0.6788058785810165, "learning_rate": 3.5013089005235607e-06, "loss": 0.0187, "step": 2140 }, { "epoch": 0.035179579481305734, "grad_norm": 0.6544390150635634, "learning_rate": 3.517670157068063e-06, "loss": 0.0295, "step": 2150 }, { "epoch": 0.03534320543238158, "grad_norm": 0.8478595266630747, "learning_rate": 3.534031413612566e-06, "loss": 0.0228, "step": 2160 }, { "epoch": 0.035506831383457414, "grad_norm": 0.9519750502818294, "learning_rate": 3.5503926701570685e-06, "loss": 0.021, "step": 2170 }, { "epoch": 0.03567045733453326, "grad_norm": 1.0021700498009167, "learning_rate": 3.566753926701571e-06, "loss": 0.0247, "step": 2180 }, { "epoch": 0.035834083285609095, "grad_norm": 1.7287560435490075, "learning_rate": 3.583115183246074e-06, "loss": 0.023, "step": 2190 }, { "epoch": 0.03599770923668494, "grad_norm": 1.2258058905009668, "learning_rate": 3.5994764397905763e-06, "loss": 0.0208, "step": 2200 }, { "epoch": 0.03616133518776078, "grad_norm": 1.0962938745476274, "learning_rate": 3.6158376963350787e-06, "loss": 0.0276, "step": 2210 }, { "epoch": 0.03632496113883662, "grad_norm": 1.7774300996973011, "learning_rate": 3.6321989528795816e-06, "loss": 0.0227, "step": 2220 }, { "epoch": 0.03648858708991246, "grad_norm": 1.1875072319659314, "learning_rate": 3.648560209424084e-06, "loss": 0.0288, "step": 2230 }, { "epoch": 0.0366522130409883, "grad_norm": 1.301960747116591, "learning_rate": 3.6649214659685865e-06, "loss": 0.0191, "step": 2240 }, { "epoch": 0.03681583899206414, "grad_norm": 0.9252751995193765, "learning_rate": 3.6812827225130894e-06, "loss": 0.0224, "step": 2250 }, { "epoch": 0.03697946494313998, "grad_norm": 1.0411107735174379, "learning_rate": 3.697643979057592e-06, "loss": 0.02, "step": 2260 }, { "epoch": 0.03714309089421582, "grad_norm": 0.6979691382888245, "learning_rate": 3.7140052356020943e-06, "loss": 0.0209, "step": 2270 }, { "epoch": 0.03730671684529167, "grad_norm": 0.6002271976247916, "learning_rate": 3.730366492146597e-06, "loss": 0.0209, "step": 2280 }, { "epoch": 0.0374703427963675, "grad_norm": 1.1463934839259544, "learning_rate": 3.7467277486910996e-06, "loss": 0.0188, "step": 2290 }, { "epoch": 0.03763396874744335, "grad_norm": 1.1663084440395934, "learning_rate": 3.7630890052356025e-06, "loss": 0.0259, "step": 2300 }, { "epoch": 0.037797594698519184, "grad_norm": 0.780042784927533, "learning_rate": 3.779450261780105e-06, "loss": 0.0235, "step": 2310 }, { "epoch": 0.03796122064959503, "grad_norm": 0.7987987794113918, "learning_rate": 3.7958115183246074e-06, "loss": 0.0206, "step": 2320 }, { "epoch": 0.038124846600670864, "grad_norm": 1.2187625658449108, "learning_rate": 3.8121727748691103e-06, "loss": 0.0258, "step": 2330 }, { "epoch": 0.03828847255174671, "grad_norm": 0.978190971220372, "learning_rate": 3.828534031413612e-06, "loss": 0.0212, "step": 2340 }, { "epoch": 0.038452098502822545, "grad_norm": 1.1794199457130456, "learning_rate": 3.844895287958115e-06, "loss": 0.0221, "step": 2350 }, { "epoch": 0.03861572445389839, "grad_norm": 1.0122841297857827, "learning_rate": 3.861256544502618e-06, "loss": 0.0207, "step": 2360 }, { "epoch": 0.03877935040497423, "grad_norm": 0.4307882536507433, "learning_rate": 3.877617801047121e-06, "loss": 0.0195, "step": 2370 }, { "epoch": 0.03894297635605007, "grad_norm": 0.8577245239447127, "learning_rate": 3.893979057591623e-06, "loss": 0.0202, "step": 2380 }, { "epoch": 0.03910660230712591, "grad_norm": 1.285569904245558, "learning_rate": 3.910340314136127e-06, "loss": 0.018, "step": 2390 }, { "epoch": 0.03927022825820175, "grad_norm": 0.91926981178011, "learning_rate": 3.926701570680629e-06, "loss": 0.017, "step": 2400 }, { "epoch": 0.03943385420927759, "grad_norm": 0.9456854575392329, "learning_rate": 3.9430628272251315e-06, "loss": 0.0176, "step": 2410 }, { "epoch": 0.03959748016035343, "grad_norm": 0.9181370052112151, "learning_rate": 3.959424083769634e-06, "loss": 0.0185, "step": 2420 }, { "epoch": 0.03976110611142927, "grad_norm": 1.9430712253794025, "learning_rate": 3.9757853403141364e-06, "loss": 0.0223, "step": 2430 }, { "epoch": 0.03992473206250512, "grad_norm": 1.2290438101625654, "learning_rate": 3.992146596858639e-06, "loss": 0.0238, "step": 2440 }, { "epoch": 0.04008835801358095, "grad_norm": 1.3751276891527615, "learning_rate": 4.008507853403142e-06, "loss": 0.0247, "step": 2450 }, { "epoch": 0.0402519839646568, "grad_norm": 0.8705566094326588, "learning_rate": 4.024869109947644e-06, "loss": 0.0232, "step": 2460 }, { "epoch": 0.040415609915732634, "grad_norm": 1.1334238108261925, "learning_rate": 4.041230366492147e-06, "loss": 0.0165, "step": 2470 }, { "epoch": 0.04057923586680848, "grad_norm": 0.5273715131117723, "learning_rate": 4.05759162303665e-06, "loss": 0.0164, "step": 2480 }, { "epoch": 0.040742861817884314, "grad_norm": 1.2339543352831737, "learning_rate": 4.073952879581152e-06, "loss": 0.0234, "step": 2490 }, { "epoch": 0.04090648776896016, "grad_norm": 0.9815598904057271, "learning_rate": 4.090314136125655e-06, "loss": 0.0174, "step": 2500 }, { "epoch": 0.041070113720035994, "grad_norm": 0.6846770155227239, "learning_rate": 4.106675392670158e-06, "loss": 0.0228, "step": 2510 }, { "epoch": 0.04123373967111184, "grad_norm": 0.9392888212287435, "learning_rate": 4.12303664921466e-06, "loss": 0.0191, "step": 2520 }, { "epoch": 0.04139736562218768, "grad_norm": 0.93492176131368, "learning_rate": 4.139397905759163e-06, "loss": 0.0199, "step": 2530 }, { "epoch": 0.04156099157326352, "grad_norm": 0.9503545660768167, "learning_rate": 4.1557591623036655e-06, "loss": 0.0212, "step": 2540 }, { "epoch": 0.04172461752433936, "grad_norm": 0.9113455239717534, "learning_rate": 4.1721204188481675e-06, "loss": 0.0197, "step": 2550 }, { "epoch": 0.0418882434754152, "grad_norm": 1.004857597317951, "learning_rate": 4.18848167539267e-06, "loss": 0.0188, "step": 2560 }, { "epoch": 0.04205186942649104, "grad_norm": 0.9603709393996345, "learning_rate": 4.204842931937173e-06, "loss": 0.014, "step": 2570 }, { "epoch": 0.04221549537756688, "grad_norm": 1.04941880252467, "learning_rate": 4.221204188481675e-06, "loss": 0.013, "step": 2580 }, { "epoch": 0.04237912132864272, "grad_norm": 1.0081480317799187, "learning_rate": 4.237565445026178e-06, "loss": 0.0215, "step": 2590 }, { "epoch": 0.04254274727971857, "grad_norm": 0.3557005348366723, "learning_rate": 4.253926701570681e-06, "loss": 0.0154, "step": 2600 }, { "epoch": 0.0427063732307944, "grad_norm": 0.5848244053359664, "learning_rate": 4.270287958115183e-06, "loss": 0.0183, "step": 2610 }, { "epoch": 0.04286999918187025, "grad_norm": 1.7544872642593676, "learning_rate": 4.286649214659686e-06, "loss": 0.0157, "step": 2620 }, { "epoch": 0.043033625132946084, "grad_norm": 0.7859301770642318, "learning_rate": 4.303010471204189e-06, "loss": 0.017, "step": 2630 }, { "epoch": 0.04319725108402193, "grad_norm": 0.736964572960593, "learning_rate": 4.319371727748692e-06, "loss": 0.021, "step": 2640 }, { "epoch": 0.043360877035097764, "grad_norm": 1.002093025521673, "learning_rate": 4.335732984293194e-06, "loss": 0.0178, "step": 2650 }, { "epoch": 0.04352450298617361, "grad_norm": 0.6525711180647051, "learning_rate": 4.352094240837697e-06, "loss": 0.0224, "step": 2660 }, { "epoch": 0.043688128937249444, "grad_norm": 1.1254926151656834, "learning_rate": 4.3684554973821995e-06, "loss": 0.016, "step": 2670 }, { "epoch": 0.04385175488832529, "grad_norm": 1.0187827655949202, "learning_rate": 4.3848167539267015e-06, "loss": 0.0153, "step": 2680 }, { "epoch": 0.04401538083940113, "grad_norm": 0.678776487160841, "learning_rate": 4.401178010471204e-06, "loss": 0.0196, "step": 2690 }, { "epoch": 0.04417900679047697, "grad_norm": 0.7657909064193895, "learning_rate": 4.417539267015707e-06, "loss": 0.0193, "step": 2700 }, { "epoch": 0.04434263274155281, "grad_norm": 0.8317235520567553, "learning_rate": 4.43390052356021e-06, "loss": 0.0193, "step": 2710 }, { "epoch": 0.04450625869262865, "grad_norm": 0.7107851144867301, "learning_rate": 4.450261780104713e-06, "loss": 0.0178, "step": 2720 }, { "epoch": 0.04466988464370449, "grad_norm": 0.8717761490682288, "learning_rate": 4.466623036649215e-06, "loss": 0.0181, "step": 2730 }, { "epoch": 0.04483351059478033, "grad_norm": 0.7106786903981227, "learning_rate": 4.482984293193718e-06, "loss": 0.0164, "step": 2740 }, { "epoch": 0.04499713654585617, "grad_norm": 1.5117108534302517, "learning_rate": 4.499345549738221e-06, "loss": 0.0222, "step": 2750 }, { "epoch": 0.045160762496932016, "grad_norm": 0.8624823316930237, "learning_rate": 4.515706806282723e-06, "loss": 0.0147, "step": 2760 }, { "epoch": 0.04532438844800785, "grad_norm": 0.8444141579649685, "learning_rate": 4.532068062827226e-06, "loss": 0.0173, "step": 2770 }, { "epoch": 0.0454880143990837, "grad_norm": 1.470843747434806, "learning_rate": 4.5484293193717286e-06, "loss": 0.0262, "step": 2780 }, { "epoch": 0.045651640350159534, "grad_norm": 0.9429133659470207, "learning_rate": 4.564790575916231e-06, "loss": 0.0183, "step": 2790 }, { "epoch": 0.04581526630123538, "grad_norm": 0.6624127253568172, "learning_rate": 4.5811518324607335e-06, "loss": 0.0193, "step": 2800 }, { "epoch": 0.045978892252311214, "grad_norm": 0.640065560918503, "learning_rate": 4.597513089005236e-06, "loss": 0.0213, "step": 2810 }, { "epoch": 0.04614251820338706, "grad_norm": 1.391760039150221, "learning_rate": 4.613874345549738e-06, "loss": 0.0148, "step": 2820 }, { "epoch": 0.0463061441544629, "grad_norm": 0.956271615184497, "learning_rate": 4.630235602094241e-06, "loss": 0.0189, "step": 2830 }, { "epoch": 0.04646977010553874, "grad_norm": 0.9875201082203795, "learning_rate": 4.646596858638744e-06, "loss": 0.0187, "step": 2840 }, { "epoch": 0.04663339605661458, "grad_norm": 0.6684546748273962, "learning_rate": 4.662958115183246e-06, "loss": 0.0211, "step": 2850 }, { "epoch": 0.04679702200769042, "grad_norm": 0.5601253250399263, "learning_rate": 4.679319371727749e-06, "loss": 0.0184, "step": 2860 }, { "epoch": 0.04696064795876626, "grad_norm": 0.9026348378273591, "learning_rate": 4.695680628272252e-06, "loss": 0.0201, "step": 2870 }, { "epoch": 0.0471242739098421, "grad_norm": 1.2451921632432312, "learning_rate": 4.712041884816754e-06, "loss": 0.0247, "step": 2880 }, { "epoch": 0.04728789986091794, "grad_norm": 0.9593657838522912, "learning_rate": 4.728403141361257e-06, "loss": 0.0177, "step": 2890 }, { "epoch": 0.04745152581199378, "grad_norm": 0.5197250683014698, "learning_rate": 4.74476439790576e-06, "loss": 0.0206, "step": 2900 }, { "epoch": 0.04761515176306962, "grad_norm": 1.0204469510877177, "learning_rate": 4.761125654450262e-06, "loss": 0.0192, "step": 2910 }, { "epoch": 0.047778777714145466, "grad_norm": 1.324166237756431, "learning_rate": 4.7774869109947645e-06, "loss": 0.0219, "step": 2920 }, { "epoch": 0.0479424036652213, "grad_norm": 0.6491258940752783, "learning_rate": 4.793848167539267e-06, "loss": 0.0157, "step": 2930 }, { "epoch": 0.04810602961629715, "grad_norm": 0.5995781781748115, "learning_rate": 4.81020942408377e-06, "loss": 0.0204, "step": 2940 }, { "epoch": 0.048269655567372984, "grad_norm": 0.48262283939141637, "learning_rate": 4.826570680628272e-06, "loss": 0.0184, "step": 2950 }, { "epoch": 0.04843328151844883, "grad_norm": 0.7483504732182222, "learning_rate": 4.842931937172775e-06, "loss": 0.0163, "step": 2960 }, { "epoch": 0.048596907469524664, "grad_norm": 0.8894273497586248, "learning_rate": 4.859293193717278e-06, "loss": 0.0157, "step": 2970 }, { "epoch": 0.04876053342060051, "grad_norm": 0.9784640843077647, "learning_rate": 4.87565445026178e-06, "loss": 0.0175, "step": 2980 }, { "epoch": 0.04892415937167635, "grad_norm": 0.6643778201631815, "learning_rate": 4.892015706806283e-06, "loss": 0.0172, "step": 2990 }, { "epoch": 0.04908778532275219, "grad_norm": 1.0724792461767614, "learning_rate": 4.908376963350786e-06, "loss": 0.0158, "step": 3000 }, { "epoch": 0.04925141127382803, "grad_norm": 0.9780350713415569, "learning_rate": 4.924738219895288e-06, "loss": 0.0198, "step": 3010 }, { "epoch": 0.04941503722490387, "grad_norm": 0.6378674505205143, "learning_rate": 4.941099476439791e-06, "loss": 0.0164, "step": 3020 }, { "epoch": 0.04957866317597971, "grad_norm": 1.0658378022002295, "learning_rate": 4.957460732984294e-06, "loss": 0.0127, "step": 3030 }, { "epoch": 0.04974228912705555, "grad_norm": 0.4829379599897141, "learning_rate": 4.9738219895287965e-06, "loss": 0.0205, "step": 3040 }, { "epoch": 0.04990591507813139, "grad_norm": 0.9270058272193237, "learning_rate": 4.990183246073299e-06, "loss": 0.0189, "step": 3050 }, { "epoch": 0.05006954102920723, "grad_norm": 0.8175719574480865, "learning_rate": 5.006544502617801e-06, "loss": 0.0214, "step": 3060 }, { "epoch": 0.05023316698028307, "grad_norm": 0.7476158427134629, "learning_rate": 5.022905759162304e-06, "loss": 0.0145, "step": 3070 }, { "epoch": 0.050396792931358916, "grad_norm": 0.8211440572711844, "learning_rate": 5.039267015706807e-06, "loss": 0.0207, "step": 3080 }, { "epoch": 0.05056041888243475, "grad_norm": 1.237348947844268, "learning_rate": 5.055628272251309e-06, "loss": 0.0187, "step": 3090 }, { "epoch": 0.0507240448335106, "grad_norm": 0.6781940109031565, "learning_rate": 5.071989528795812e-06, "loss": 0.0162, "step": 3100 }, { "epoch": 0.05088767078458643, "grad_norm": 1.186412411200683, "learning_rate": 5.088350785340315e-06, "loss": 0.0154, "step": 3110 }, { "epoch": 0.05105129673566228, "grad_norm": 0.7883414029868824, "learning_rate": 5.104712041884817e-06, "loss": 0.0206, "step": 3120 }, { "epoch": 0.051214922686738114, "grad_norm": 0.7789990055792931, "learning_rate": 5.12107329842932e-06, "loss": 0.0165, "step": 3130 }, { "epoch": 0.05137854863781396, "grad_norm": 0.4692693724727117, "learning_rate": 5.137434554973823e-06, "loss": 0.0151, "step": 3140 }, { "epoch": 0.0515421745888898, "grad_norm": 0.9530164712691042, "learning_rate": 5.153795811518325e-06, "loss": 0.0167, "step": 3150 }, { "epoch": 0.05170580053996564, "grad_norm": 0.5835788141798162, "learning_rate": 5.170157068062828e-06, "loss": 0.0187, "step": 3160 }, { "epoch": 0.05186942649104148, "grad_norm": 0.6302619895215621, "learning_rate": 5.1865183246073305e-06, "loss": 0.0135, "step": 3170 }, { "epoch": 0.05203305244211732, "grad_norm": 0.6791271946923848, "learning_rate": 5.2028795811518325e-06, "loss": 0.0203, "step": 3180 }, { "epoch": 0.05219667839319316, "grad_norm": 0.781394547868449, "learning_rate": 5.219240837696335e-06, "loss": 0.0172, "step": 3190 }, { "epoch": 0.052360304344269, "grad_norm": 0.9222174635498976, "learning_rate": 5.235602094240838e-06, "loss": 0.0206, "step": 3200 }, { "epoch": 0.05252393029534484, "grad_norm": 0.6031369431049634, "learning_rate": 5.251963350785341e-06, "loss": 0.0183, "step": 3210 }, { "epoch": 0.05268755624642068, "grad_norm": 0.8242313809947889, "learning_rate": 5.268324607329843e-06, "loss": 0.0194, "step": 3220 }, { "epoch": 0.05285118219749652, "grad_norm": 0.9376408899698326, "learning_rate": 5.284685863874346e-06, "loss": 0.0178, "step": 3230 }, { "epoch": 0.053014808148572366, "grad_norm": 0.9334918594622488, "learning_rate": 5.301047120418849e-06, "loss": 0.0193, "step": 3240 }, { "epoch": 0.0531784340996482, "grad_norm": 0.9132822097471964, "learning_rate": 5.317408376963351e-06, "loss": 0.0163, "step": 3250 }, { "epoch": 0.05334206005072405, "grad_norm": 0.9594471109290825, "learning_rate": 5.333769633507854e-06, "loss": 0.018, "step": 3260 }, { "epoch": 0.05350568600179988, "grad_norm": 0.5053382889220102, "learning_rate": 5.350130890052357e-06, "loss": 0.0175, "step": 3270 }, { "epoch": 0.05366931195287573, "grad_norm": 0.36737552990579614, "learning_rate": 5.366492146596859e-06, "loss": 0.0168, "step": 3280 }, { "epoch": 0.053832937903951564, "grad_norm": 0.6303186079532742, "learning_rate": 5.3828534031413616e-06, "loss": 0.0214, "step": 3290 }, { "epoch": 0.05399656385502741, "grad_norm": 0.5951811317541013, "learning_rate": 5.3992146596858644e-06, "loss": 0.0174, "step": 3300 }, { "epoch": 0.05416018980610325, "grad_norm": 0.8921132202079008, "learning_rate": 5.4155759162303665e-06, "loss": 0.0144, "step": 3310 }, { "epoch": 0.05432381575717909, "grad_norm": 0.7604220871075399, "learning_rate": 5.431937172774869e-06, "loss": 0.0137, "step": 3320 }, { "epoch": 0.05448744170825493, "grad_norm": 0.5268241961569924, "learning_rate": 5.448298429319372e-06, "loss": 0.0141, "step": 3330 }, { "epoch": 0.05465106765933077, "grad_norm": 0.5298816285238115, "learning_rate": 5.464659685863874e-06, "loss": 0.0128, "step": 3340 }, { "epoch": 0.05481469361040661, "grad_norm": 0.4953869493948359, "learning_rate": 5.481020942408377e-06, "loss": 0.0149, "step": 3350 }, { "epoch": 0.05497831956148245, "grad_norm": 0.6835023394685612, "learning_rate": 5.49738219895288e-06, "loss": 0.0132, "step": 3360 }, { "epoch": 0.05514194551255829, "grad_norm": 0.7390804600196659, "learning_rate": 5.513743455497382e-06, "loss": 0.0135, "step": 3370 }, { "epoch": 0.055305571463634136, "grad_norm": 1.094216779550603, "learning_rate": 5.530104712041885e-06, "loss": 0.0247, "step": 3380 }, { "epoch": 0.05546919741470997, "grad_norm": 0.4765546557839919, "learning_rate": 5.546465968586388e-06, "loss": 0.0227, "step": 3390 }, { "epoch": 0.055632823365785816, "grad_norm": 0.4804789937460036, "learning_rate": 5.56282722513089e-06, "loss": 0.0126, "step": 3400 }, { "epoch": 0.05579644931686165, "grad_norm": 0.929847189596145, "learning_rate": 5.579188481675393e-06, "loss": 0.0176, "step": 3410 }, { "epoch": 0.0559600752679375, "grad_norm": 0.8139905504662587, "learning_rate": 5.5955497382198955e-06, "loss": 0.0182, "step": 3420 }, { "epoch": 0.05612370121901333, "grad_norm": 0.6658544045988827, "learning_rate": 5.6119109947643975e-06, "loss": 0.0144, "step": 3430 }, { "epoch": 0.05628732717008918, "grad_norm": 0.9705422411034668, "learning_rate": 5.6282722513089e-06, "loss": 0.0169, "step": 3440 }, { "epoch": 0.056450953121165014, "grad_norm": 1.1170219668224348, "learning_rate": 5.644633507853403e-06, "loss": 0.0185, "step": 3450 }, { "epoch": 0.05661457907224086, "grad_norm": 0.7846484169454016, "learning_rate": 5.660994764397905e-06, "loss": 0.0142, "step": 3460 }, { "epoch": 0.0567782050233167, "grad_norm": 0.6837852877664217, "learning_rate": 5.677356020942408e-06, "loss": 0.0184, "step": 3470 }, { "epoch": 0.05694183097439254, "grad_norm": 0.8774040343091403, "learning_rate": 5.693717277486911e-06, "loss": 0.0144, "step": 3480 }, { "epoch": 0.05710545692546838, "grad_norm": 0.9158868072008943, "learning_rate": 5.710078534031414e-06, "loss": 0.015, "step": 3490 }, { "epoch": 0.05726908287654422, "grad_norm": 1.1947732300157077, "learning_rate": 5.726439790575916e-06, "loss": 0.0135, "step": 3500 }, { "epoch": 0.05743270882762006, "grad_norm": 0.7019699246070533, "learning_rate": 5.74280104712042e-06, "loss": 0.0156, "step": 3510 }, { "epoch": 0.0575963347786959, "grad_norm": 0.6006783200064366, "learning_rate": 5.7591623036649226e-06, "loss": 0.0154, "step": 3520 }, { "epoch": 0.05775996072977174, "grad_norm": 0.866441297150528, "learning_rate": 5.7755235602094254e-06, "loss": 0.0175, "step": 3530 }, { "epoch": 0.057923586680847586, "grad_norm": 0.5628684828106116, "learning_rate": 5.7918848167539275e-06, "loss": 0.0191, "step": 3540 }, { "epoch": 0.05808721263192342, "grad_norm": 0.8675233050727733, "learning_rate": 5.80824607329843e-06, "loss": 0.0173, "step": 3550 }, { "epoch": 0.058250838582999266, "grad_norm": 0.9462581119266399, "learning_rate": 5.824607329842933e-06, "loss": 0.0151, "step": 3560 }, { "epoch": 0.0584144645340751, "grad_norm": 0.7358941798708127, "learning_rate": 5.840968586387435e-06, "loss": 0.0166, "step": 3570 }, { "epoch": 0.058578090485150947, "grad_norm": 0.8164273335486668, "learning_rate": 5.857329842931938e-06, "loss": 0.0171, "step": 3580 }, { "epoch": 0.05874171643622678, "grad_norm": 1.1188885583251384, "learning_rate": 5.873691099476441e-06, "loss": 0.0144, "step": 3590 }, { "epoch": 0.05890534238730263, "grad_norm": 0.8654726294460399, "learning_rate": 5.890052356020943e-06, "loss": 0.0167, "step": 3600 }, { "epoch": 0.059068968338378464, "grad_norm": 0.947715811080645, "learning_rate": 5.906413612565446e-06, "loss": 0.0175, "step": 3610 }, { "epoch": 0.05923259428945431, "grad_norm": 0.5313647974355867, "learning_rate": 5.922774869109949e-06, "loss": 0.0151, "step": 3620 }, { "epoch": 0.05939622024053015, "grad_norm": 1.4699055923617934, "learning_rate": 5.939136125654451e-06, "loss": 0.0213, "step": 3630 }, { "epoch": 0.05955984619160599, "grad_norm": 1.0686135179947462, "learning_rate": 5.955497382198954e-06, "loss": 0.018, "step": 3640 }, { "epoch": 0.05972347214268183, "grad_norm": 0.9300579713054162, "learning_rate": 5.9718586387434565e-06, "loss": 0.0146, "step": 3650 }, { "epoch": 0.05988709809375767, "grad_norm": 0.9990855688437372, "learning_rate": 5.9882198952879586e-06, "loss": 0.0191, "step": 3660 }, { "epoch": 0.06005072404483351, "grad_norm": 0.7334955017808976, "learning_rate": 6.0045811518324614e-06, "loss": 0.0176, "step": 3670 }, { "epoch": 0.06021434999590935, "grad_norm": 0.5378412672950716, "learning_rate": 6.020942408376964e-06, "loss": 0.0136, "step": 3680 }, { "epoch": 0.06037797594698519, "grad_norm": 0.773295481868631, "learning_rate": 6.037303664921466e-06, "loss": 0.0138, "step": 3690 }, { "epoch": 0.060541601898061036, "grad_norm": 0.785233922269194, "learning_rate": 6.053664921465969e-06, "loss": 0.0207, "step": 3700 }, { "epoch": 0.06070522784913687, "grad_norm": 0.8094438465472602, "learning_rate": 6.070026178010472e-06, "loss": 0.0104, "step": 3710 }, { "epoch": 0.060868853800212716, "grad_norm": 0.7507120237096889, "learning_rate": 6.086387434554974e-06, "loss": 0.0206, "step": 3720 }, { "epoch": 0.06103247975128855, "grad_norm": 0.6769979296618228, "learning_rate": 6.102748691099477e-06, "loss": 0.018, "step": 3730 }, { "epoch": 0.061196105702364396, "grad_norm": 0.8736448072676698, "learning_rate": 6.11910994764398e-06, "loss": 0.0161, "step": 3740 }, { "epoch": 0.06135973165344023, "grad_norm": 1.446812065317993, "learning_rate": 6.135471204188482e-06, "loss": 0.0185, "step": 3750 }, { "epoch": 0.06152335760451608, "grad_norm": 0.6501649418113474, "learning_rate": 6.151832460732985e-06, "loss": 0.0179, "step": 3760 }, { "epoch": 0.061686983555591914, "grad_norm": 0.553347905930525, "learning_rate": 6.168193717277488e-06, "loss": 0.0174, "step": 3770 }, { "epoch": 0.06185060950666776, "grad_norm": 0.8574760969414789, "learning_rate": 6.1845549738219905e-06, "loss": 0.0157, "step": 3780 }, { "epoch": 0.0620142354577436, "grad_norm": 0.6983145312300888, "learning_rate": 6.2009162303664925e-06, "loss": 0.0163, "step": 3790 }, { "epoch": 0.06217786140881944, "grad_norm": 0.5860764533748031, "learning_rate": 6.217277486910995e-06, "loss": 0.017, "step": 3800 }, { "epoch": 0.06234148735989528, "grad_norm": 0.7520156015999931, "learning_rate": 6.233638743455498e-06, "loss": 0.0152, "step": 3810 }, { "epoch": 0.06250511331097112, "grad_norm": 0.7303343865774456, "learning_rate": 6.25e-06, "loss": 0.0138, "step": 3820 }, { "epoch": 0.06266873926204695, "grad_norm": 0.6678914196391921, "learning_rate": 6.266361256544503e-06, "loss": 0.0229, "step": 3830 }, { "epoch": 0.0628323652131228, "grad_norm": 0.9001683883396626, "learning_rate": 6.282722513089006e-06, "loss": 0.0155, "step": 3840 }, { "epoch": 0.06299599116419864, "grad_norm": 0.65005219765986, "learning_rate": 6.299083769633508e-06, "loss": 0.0128, "step": 3850 }, { "epoch": 0.06315961711527449, "grad_norm": 0.7464912367018244, "learning_rate": 6.315445026178011e-06, "loss": 0.0202, "step": 3860 }, { "epoch": 0.06332324306635033, "grad_norm": 0.4116869081794819, "learning_rate": 6.331806282722514e-06, "loss": 0.014, "step": 3870 }, { "epoch": 0.06348686901742616, "grad_norm": 0.9017957509806717, "learning_rate": 6.348167539267016e-06, "loss": 0.0116, "step": 3880 }, { "epoch": 0.063650494968502, "grad_norm": 0.5816362276964531, "learning_rate": 6.364528795811519e-06, "loss": 0.0172, "step": 3890 }, { "epoch": 0.06381412091957785, "grad_norm": 0.6763658315873537, "learning_rate": 6.380890052356022e-06, "loss": 0.0106, "step": 3900 }, { "epoch": 0.06397774687065369, "grad_norm": 0.49573377401339364, "learning_rate": 6.397251308900524e-06, "loss": 0.0132, "step": 3910 }, { "epoch": 0.06414137282172952, "grad_norm": 0.9714425293283162, "learning_rate": 6.4136125654450265e-06, "loss": 0.015, "step": 3920 }, { "epoch": 0.06430499877280536, "grad_norm": 0.7840714851230691, "learning_rate": 6.429973821989529e-06, "loss": 0.0174, "step": 3930 }, { "epoch": 0.06446862472388121, "grad_norm": 0.8113175088623846, "learning_rate": 6.446335078534031e-06, "loss": 0.0111, "step": 3940 }, { "epoch": 0.06463225067495705, "grad_norm": 0.5547844836094219, "learning_rate": 6.462696335078534e-06, "loss": 0.0144, "step": 3950 }, { "epoch": 0.0647958766260329, "grad_norm": 0.3557549863210959, "learning_rate": 6.479057591623037e-06, "loss": 0.0136, "step": 3960 }, { "epoch": 0.06495950257710872, "grad_norm": 0.7447082306420533, "learning_rate": 6.495418848167539e-06, "loss": 0.0157, "step": 3970 }, { "epoch": 0.06512312852818457, "grad_norm": 0.7294771571097772, "learning_rate": 6.511780104712042e-06, "loss": 0.0164, "step": 3980 }, { "epoch": 0.06528675447926041, "grad_norm": 0.4547296312974022, "learning_rate": 6.528141361256545e-06, "loss": 0.0153, "step": 3990 }, { "epoch": 0.06545038043033626, "grad_norm": 0.9141209918521507, "learning_rate": 6.544502617801047e-06, "loss": 0.019, "step": 4000 }, { "epoch": 0.0656140063814121, "grad_norm": 1.131175695315703, "learning_rate": 6.56086387434555e-06, "loss": 0.014, "step": 4010 }, { "epoch": 0.06577763233248793, "grad_norm": 0.7657730203552764, "learning_rate": 6.577225130890053e-06, "loss": 0.0141, "step": 4020 }, { "epoch": 0.06594125828356377, "grad_norm": 0.5591370124695751, "learning_rate": 6.5935863874345556e-06, "loss": 0.019, "step": 4030 }, { "epoch": 0.06610488423463962, "grad_norm": 0.45728020236342815, "learning_rate": 6.609947643979058e-06, "loss": 0.0123, "step": 4040 }, { "epoch": 0.06626851018571546, "grad_norm": 0.4102907023022425, "learning_rate": 6.6263089005235605e-06, "loss": 0.0176, "step": 4050 }, { "epoch": 0.06643213613679129, "grad_norm": 0.47127324900756756, "learning_rate": 6.642670157068063e-06, "loss": 0.0128, "step": 4060 }, { "epoch": 0.06659576208786713, "grad_norm": 0.5508555023743599, "learning_rate": 6.659031413612565e-06, "loss": 0.0116, "step": 4070 }, { "epoch": 0.06675938803894298, "grad_norm": 0.7590976899691034, "learning_rate": 6.675392670157068e-06, "loss": 0.0131, "step": 4080 }, { "epoch": 0.06692301399001882, "grad_norm": 0.9544608583829555, "learning_rate": 6.691753926701571e-06, "loss": 0.0167, "step": 4090 }, { "epoch": 0.06708663994109466, "grad_norm": 0.3881567337817207, "learning_rate": 6.708115183246073e-06, "loss": 0.0109, "step": 4100 }, { "epoch": 0.0672502658921705, "grad_norm": 0.7505791407631902, "learning_rate": 6.724476439790576e-06, "loss": 0.0142, "step": 4110 }, { "epoch": 0.06741389184324634, "grad_norm": 0.6790782885784833, "learning_rate": 6.740837696335079e-06, "loss": 0.0198, "step": 4120 }, { "epoch": 0.06757751779432218, "grad_norm": 1.4392835399531998, "learning_rate": 6.757198952879581e-06, "loss": 0.0145, "step": 4130 }, { "epoch": 0.06774114374539802, "grad_norm": 0.7630079196631412, "learning_rate": 6.773560209424085e-06, "loss": 0.0124, "step": 4140 }, { "epoch": 0.06790476969647385, "grad_norm": 1.2648593920336735, "learning_rate": 6.7899214659685875e-06, "loss": 0.0159, "step": 4150 }, { "epoch": 0.0680683956475497, "grad_norm": 0.8827105338994375, "learning_rate": 6.80628272251309e-06, "loss": 0.0123, "step": 4160 }, { "epoch": 0.06823202159862554, "grad_norm": 0.3186105235515339, "learning_rate": 6.822643979057592e-06, "loss": 0.008, "step": 4170 }, { "epoch": 0.06839564754970139, "grad_norm": 0.45738580383678074, "learning_rate": 6.839005235602095e-06, "loss": 0.0162, "step": 4180 }, { "epoch": 0.06855927350077723, "grad_norm": 0.7887035488532753, "learning_rate": 6.855366492146598e-06, "loss": 0.0105, "step": 4190 }, { "epoch": 0.06872289945185306, "grad_norm": 0.8042971788596543, "learning_rate": 6.8717277486911e-06, "loss": 0.0176, "step": 4200 }, { "epoch": 0.0688865254029289, "grad_norm": 1.037092702448011, "learning_rate": 6.888089005235603e-06, "loss": 0.0138, "step": 4210 }, { "epoch": 0.06905015135400475, "grad_norm": 0.6317197586946248, "learning_rate": 6.904450261780106e-06, "loss": 0.0125, "step": 4220 }, { "epoch": 0.06921377730508059, "grad_norm": 0.6152060182718515, "learning_rate": 6.920811518324608e-06, "loss": 0.012, "step": 4230 }, { "epoch": 0.06937740325615642, "grad_norm": 0.9609770861603334, "learning_rate": 6.937172774869111e-06, "loss": 0.0153, "step": 4240 }, { "epoch": 0.06954102920723226, "grad_norm": 0.7142544100467115, "learning_rate": 6.953534031413614e-06, "loss": 0.0308, "step": 4250 }, { "epoch": 0.06970465515830811, "grad_norm": 0.4570493097615138, "learning_rate": 6.969895287958116e-06, "loss": 0.0175, "step": 4260 }, { "epoch": 0.06986828110938395, "grad_norm": 0.7537756532597911, "learning_rate": 6.986256544502619e-06, "loss": 0.0149, "step": 4270 }, { "epoch": 0.0700319070604598, "grad_norm": 0.5883813606397793, "learning_rate": 7.0026178010471215e-06, "loss": 0.0123, "step": 4280 }, { "epoch": 0.07019553301153562, "grad_norm": 0.8140805323424636, "learning_rate": 7.0189790575916235e-06, "loss": 0.0165, "step": 4290 }, { "epoch": 0.07035915896261147, "grad_norm": 0.6476029462192842, "learning_rate": 7.035340314136126e-06, "loss": 0.0121, "step": 4300 }, { "epoch": 0.07052278491368731, "grad_norm": 0.7199555226331041, "learning_rate": 7.051701570680629e-06, "loss": 0.0113, "step": 4310 }, { "epoch": 0.07068641086476316, "grad_norm": 0.5367680601761631, "learning_rate": 7.068062827225132e-06, "loss": 0.0106, "step": 4320 }, { "epoch": 0.070850036815839, "grad_norm": 0.8194904953760117, "learning_rate": 7.084424083769634e-06, "loss": 0.0212, "step": 4330 }, { "epoch": 0.07101366276691483, "grad_norm": 0.5779089065110249, "learning_rate": 7.100785340314137e-06, "loss": 0.0151, "step": 4340 }, { "epoch": 0.07117728871799067, "grad_norm": 1.1633273771495134, "learning_rate": 7.11714659685864e-06, "loss": 0.0105, "step": 4350 }, { "epoch": 0.07134091466906652, "grad_norm": 0.7461620935383845, "learning_rate": 7.133507853403142e-06, "loss": 0.0125, "step": 4360 }, { "epoch": 0.07150454062014236, "grad_norm": 0.49373157653633143, "learning_rate": 7.149869109947645e-06, "loss": 0.0201, "step": 4370 }, { "epoch": 0.07166816657121819, "grad_norm": 0.18989488446746042, "learning_rate": 7.166230366492148e-06, "loss": 0.021, "step": 4380 }, { "epoch": 0.07183179252229403, "grad_norm": 0.5059101308509552, "learning_rate": 7.18259162303665e-06, "loss": 0.0151, "step": 4390 }, { "epoch": 0.07199541847336988, "grad_norm": 0.8021459791545428, "learning_rate": 7.1989528795811526e-06, "loss": 0.015, "step": 4400 }, { "epoch": 0.07215904442444572, "grad_norm": 0.6956180294016454, "learning_rate": 7.2153141361256554e-06, "loss": 0.0094, "step": 4410 }, { "epoch": 0.07232267037552156, "grad_norm": 0.37293818447826366, "learning_rate": 7.2316753926701575e-06, "loss": 0.0143, "step": 4420 }, { "epoch": 0.0724862963265974, "grad_norm": 0.572876713233933, "learning_rate": 7.24803664921466e-06, "loss": 0.0155, "step": 4430 }, { "epoch": 0.07264992227767324, "grad_norm": 0.5632004757337942, "learning_rate": 7.264397905759163e-06, "loss": 0.0142, "step": 4440 }, { "epoch": 0.07281354822874908, "grad_norm": 0.9946656856088647, "learning_rate": 7.280759162303665e-06, "loss": 0.0189, "step": 4450 }, { "epoch": 0.07297717417982492, "grad_norm": 0.5564258242381411, "learning_rate": 7.297120418848168e-06, "loss": 0.0131, "step": 4460 }, { "epoch": 0.07314080013090075, "grad_norm": 0.35464786292046324, "learning_rate": 7.313481675392671e-06, "loss": 0.0131, "step": 4470 }, { "epoch": 0.0733044260819766, "grad_norm": 0.3899449351034589, "learning_rate": 7.329842931937173e-06, "loss": 0.0125, "step": 4480 }, { "epoch": 0.07346805203305244, "grad_norm": 0.6709709607965249, "learning_rate": 7.346204188481676e-06, "loss": 0.0109, "step": 4490 }, { "epoch": 0.07363167798412829, "grad_norm": 0.6645920101688736, "learning_rate": 7.362565445026179e-06, "loss": 0.0129, "step": 4500 }, { "epoch": 0.07379530393520413, "grad_norm": 0.6252601823839561, "learning_rate": 7.378926701570681e-06, "loss": 0.0183, "step": 4510 }, { "epoch": 0.07395892988627996, "grad_norm": 0.31433198781455196, "learning_rate": 7.395287958115184e-06, "loss": 0.0168, "step": 4520 }, { "epoch": 0.0741225558373558, "grad_norm": 0.8476864645641823, "learning_rate": 7.4116492146596865e-06, "loss": 0.0184, "step": 4530 }, { "epoch": 0.07428618178843165, "grad_norm": 0.5373989206838607, "learning_rate": 7.4280104712041886e-06, "loss": 0.0143, "step": 4540 }, { "epoch": 0.07444980773950749, "grad_norm": 0.7227427561336182, "learning_rate": 7.4443717277486914e-06, "loss": 0.013, "step": 4550 }, { "epoch": 0.07461343369058333, "grad_norm": 0.6169782553255874, "learning_rate": 7.460732984293194e-06, "loss": 0.0176, "step": 4560 }, { "epoch": 0.07477705964165916, "grad_norm": 0.45290180994966234, "learning_rate": 7.477094240837696e-06, "loss": 0.012, "step": 4570 }, { "epoch": 0.074940685592735, "grad_norm": 0.949344057584366, "learning_rate": 7.493455497382199e-06, "loss": 0.0123, "step": 4580 }, { "epoch": 0.07510431154381085, "grad_norm": 0.9694129051706206, "learning_rate": 7.509816753926702e-06, "loss": 0.0112, "step": 4590 }, { "epoch": 0.0752679374948867, "grad_norm": 0.6480674266712945, "learning_rate": 7.526178010471205e-06, "loss": 0.0147, "step": 4600 }, { "epoch": 0.07543156344596252, "grad_norm": 0.7306346436899508, "learning_rate": 7.542539267015707e-06, "loss": 0.0134, "step": 4610 }, { "epoch": 0.07559518939703837, "grad_norm": 0.570754536863107, "learning_rate": 7.55890052356021e-06, "loss": 0.012, "step": 4620 }, { "epoch": 0.07575881534811421, "grad_norm": 0.8623451210825357, "learning_rate": 7.575261780104713e-06, "loss": 0.0162, "step": 4630 }, { "epoch": 0.07592244129919005, "grad_norm": 0.9131007921259081, "learning_rate": 7.591623036649215e-06, "loss": 0.018, "step": 4640 }, { "epoch": 0.0760860672502659, "grad_norm": 0.45252256982047045, "learning_rate": 7.607984293193718e-06, "loss": 0.0162, "step": 4650 }, { "epoch": 0.07624969320134173, "grad_norm": 0.6699670107713176, "learning_rate": 7.6243455497382205e-06, "loss": 0.0151, "step": 4660 }, { "epoch": 0.07641331915241757, "grad_norm": 0.4494841647265516, "learning_rate": 7.640706806282723e-06, "loss": 0.0169, "step": 4670 }, { "epoch": 0.07657694510349342, "grad_norm": 0.6081696650031454, "learning_rate": 7.657068062827225e-06, "loss": 0.0137, "step": 4680 }, { "epoch": 0.07674057105456926, "grad_norm": 0.6040142336392571, "learning_rate": 7.673429319371727e-06, "loss": 0.0152, "step": 4690 }, { "epoch": 0.07690419700564509, "grad_norm": 0.9129215084422817, "learning_rate": 7.68979057591623e-06, "loss": 0.0147, "step": 4700 }, { "epoch": 0.07706782295672093, "grad_norm": 0.9280713638306605, "learning_rate": 7.706151832460733e-06, "loss": 0.015, "step": 4710 }, { "epoch": 0.07723144890779678, "grad_norm": 0.454699687086561, "learning_rate": 7.722513089005236e-06, "loss": 0.0115, "step": 4720 }, { "epoch": 0.07739507485887262, "grad_norm": 0.4774867966608041, "learning_rate": 7.738874345549739e-06, "loss": 0.0134, "step": 4730 }, { "epoch": 0.07755870080994846, "grad_norm": 0.5458183954795294, "learning_rate": 7.755235602094242e-06, "loss": 0.0151, "step": 4740 }, { "epoch": 0.0777223267610243, "grad_norm": 0.6469101291081161, "learning_rate": 7.771596858638743e-06, "loss": 0.0206, "step": 4750 }, { "epoch": 0.07788595271210014, "grad_norm": 0.42508130812038447, "learning_rate": 7.787958115183246e-06, "loss": 0.0094, "step": 4760 }, { "epoch": 0.07804957866317598, "grad_norm": 0.5617913129927786, "learning_rate": 7.804319371727749e-06, "loss": 0.0123, "step": 4770 }, { "epoch": 0.07821320461425182, "grad_norm": 0.5329401955860894, "learning_rate": 7.820680628272253e-06, "loss": 0.0101, "step": 4780 }, { "epoch": 0.07837683056532765, "grad_norm": 0.6395338737200763, "learning_rate": 7.837041884816754e-06, "loss": 0.0133, "step": 4790 }, { "epoch": 0.0785404565164035, "grad_norm": 0.666131372679514, "learning_rate": 7.853403141361257e-06, "loss": 0.0121, "step": 4800 }, { "epoch": 0.07870408246747934, "grad_norm": 0.8827681161226104, "learning_rate": 7.86976439790576e-06, "loss": 0.0124, "step": 4810 }, { "epoch": 0.07886770841855519, "grad_norm": 0.7232044563691765, "learning_rate": 7.886125654450263e-06, "loss": 0.0133, "step": 4820 }, { "epoch": 0.07903133436963103, "grad_norm": 1.167337803477484, "learning_rate": 7.902486910994766e-06, "loss": 0.014, "step": 4830 }, { "epoch": 0.07919496032070686, "grad_norm": 0.7248225558572633, "learning_rate": 7.918848167539269e-06, "loss": 0.013, "step": 4840 }, { "epoch": 0.0793585862717827, "grad_norm": 0.4863488074299087, "learning_rate": 7.93520942408377e-06, "loss": 0.0126, "step": 4850 }, { "epoch": 0.07952221222285855, "grad_norm": 0.503189764965063, "learning_rate": 7.951570680628273e-06, "loss": 0.015, "step": 4860 }, { "epoch": 0.07968583817393439, "grad_norm": 0.29074322984198236, "learning_rate": 7.967931937172776e-06, "loss": 0.0115, "step": 4870 }, { "epoch": 0.07984946412501023, "grad_norm": 0.4929143644026491, "learning_rate": 7.984293193717279e-06, "loss": 0.0128, "step": 4880 }, { "epoch": 0.08001309007608606, "grad_norm": 0.5715697905468171, "learning_rate": 8.000654450261782e-06, "loss": 0.0094, "step": 4890 }, { "epoch": 0.0801767160271619, "grad_norm": 0.8214363448815072, "learning_rate": 8.017015706806284e-06, "loss": 0.0144, "step": 4900 }, { "epoch": 0.08034034197823775, "grad_norm": 0.5477150888035965, "learning_rate": 8.033376963350786e-06, "loss": 0.0107, "step": 4910 }, { "epoch": 0.0805039679293136, "grad_norm": 0.5947624973163396, "learning_rate": 8.049738219895288e-06, "loss": 0.0093, "step": 4920 }, { "epoch": 0.08066759388038942, "grad_norm": 0.7621795357035333, "learning_rate": 8.066099476439791e-06, "loss": 0.0142, "step": 4930 }, { "epoch": 0.08083121983146527, "grad_norm": 1.1797658516379197, "learning_rate": 8.082460732984294e-06, "loss": 0.0122, "step": 4940 }, { "epoch": 0.08099484578254111, "grad_norm": 0.5267792543844083, "learning_rate": 8.098821989528797e-06, "loss": 0.012, "step": 4950 }, { "epoch": 0.08115847173361695, "grad_norm": 0.5671734433973993, "learning_rate": 8.1151832460733e-06, "loss": 0.0158, "step": 4960 }, { "epoch": 0.0813220976846928, "grad_norm": 0.21432780065511384, "learning_rate": 8.131544502617801e-06, "loss": 0.0121, "step": 4970 }, { "epoch": 0.08148572363576863, "grad_norm": 0.7443064258324583, "learning_rate": 8.147905759162304e-06, "loss": 0.0123, "step": 4980 }, { "epoch": 0.08164934958684447, "grad_norm": 0.563080575786992, "learning_rate": 8.164267015706807e-06, "loss": 0.0131, "step": 4990 }, { "epoch": 0.08181297553792032, "grad_norm": 0.638586722036964, "learning_rate": 8.18062827225131e-06, "loss": 0.0131, "step": 5000 }, { "epoch": 0.08197660148899616, "grad_norm": 0.8550330064027312, "learning_rate": 8.196989528795813e-06, "loss": 0.0133, "step": 5010 }, { "epoch": 0.08214022744007199, "grad_norm": 0.7372420826855306, "learning_rate": 8.213350785340315e-06, "loss": 0.0093, "step": 5020 }, { "epoch": 0.08230385339114783, "grad_norm": 0.6448125244894859, "learning_rate": 8.229712041884818e-06, "loss": 0.0131, "step": 5030 }, { "epoch": 0.08246747934222368, "grad_norm": 0.49065959460000513, "learning_rate": 8.24607329842932e-06, "loss": 0.014, "step": 5040 }, { "epoch": 0.08263110529329952, "grad_norm": 0.5152802313309615, "learning_rate": 8.262434554973822e-06, "loss": 0.0143, "step": 5050 }, { "epoch": 0.08279473124437536, "grad_norm": 0.558065566548443, "learning_rate": 8.278795811518325e-06, "loss": 0.0122, "step": 5060 }, { "epoch": 0.0829583571954512, "grad_norm": 0.5927835160140081, "learning_rate": 8.295157068062828e-06, "loss": 0.0135, "step": 5070 }, { "epoch": 0.08312198314652704, "grad_norm": 0.7597132931129545, "learning_rate": 8.311518324607331e-06, "loss": 0.012, "step": 5080 }, { "epoch": 0.08328560909760288, "grad_norm": 0.579973786729228, "learning_rate": 8.327879581151834e-06, "loss": 0.013, "step": 5090 }, { "epoch": 0.08344923504867872, "grad_norm": 0.504242159574087, "learning_rate": 8.344240837696335e-06, "loss": 0.0108, "step": 5100 }, { "epoch": 0.08361286099975457, "grad_norm": 0.6092329606623748, "learning_rate": 8.360602094240838e-06, "loss": 0.0118, "step": 5110 }, { "epoch": 0.0837764869508304, "grad_norm": 0.5741700793291044, "learning_rate": 8.37696335078534e-06, "loss": 0.0174, "step": 5120 }, { "epoch": 0.08394011290190624, "grad_norm": 0.9029617784371309, "learning_rate": 8.393324607329844e-06, "loss": 0.0112, "step": 5130 }, { "epoch": 0.08410373885298209, "grad_norm": 0.4234557555918461, "learning_rate": 8.409685863874347e-06, "loss": 0.0149, "step": 5140 }, { "epoch": 0.08426736480405793, "grad_norm": 0.5347592564711967, "learning_rate": 8.42604712041885e-06, "loss": 0.01, "step": 5150 }, { "epoch": 0.08443099075513376, "grad_norm": 0.6298103333681973, "learning_rate": 8.44240837696335e-06, "loss": 0.0169, "step": 5160 }, { "epoch": 0.0845946167062096, "grad_norm": 1.0130294513551699, "learning_rate": 8.458769633507854e-06, "loss": 0.0176, "step": 5170 }, { "epoch": 0.08475824265728545, "grad_norm": 0.4329106991631057, "learning_rate": 8.475130890052356e-06, "loss": 0.0129, "step": 5180 }, { "epoch": 0.08492186860836129, "grad_norm": 0.47608299492066497, "learning_rate": 8.49149214659686e-06, "loss": 0.0108, "step": 5190 }, { "epoch": 0.08508549455943713, "grad_norm": 0.5402699114804711, "learning_rate": 8.507853403141362e-06, "loss": 0.0188, "step": 5200 }, { "epoch": 0.08524912051051296, "grad_norm": 0.32003702749057233, "learning_rate": 8.524214659685865e-06, "loss": 0.0138, "step": 5210 }, { "epoch": 0.0854127464615888, "grad_norm": 0.46881971289152213, "learning_rate": 8.540575916230366e-06, "loss": 0.018, "step": 5220 }, { "epoch": 0.08557637241266465, "grad_norm": 0.45995438815927164, "learning_rate": 8.556937172774869e-06, "loss": 0.0124, "step": 5230 }, { "epoch": 0.0857399983637405, "grad_norm": 0.4389298534749428, "learning_rate": 8.573298429319372e-06, "loss": 0.0137, "step": 5240 }, { "epoch": 0.08590362431481632, "grad_norm": 0.4806428743928941, "learning_rate": 8.589659685863875e-06, "loss": 0.0141, "step": 5250 }, { "epoch": 0.08606725026589217, "grad_norm": 0.4536125253959356, "learning_rate": 8.606020942408378e-06, "loss": 0.0132, "step": 5260 }, { "epoch": 0.08623087621696801, "grad_norm": 0.6191264678357633, "learning_rate": 8.62238219895288e-06, "loss": 0.009, "step": 5270 }, { "epoch": 0.08639450216804385, "grad_norm": 0.4229759315390177, "learning_rate": 8.638743455497383e-06, "loss": 0.0109, "step": 5280 }, { "epoch": 0.0865581281191197, "grad_norm": 0.5097145885350656, "learning_rate": 8.655104712041885e-06, "loss": 0.0125, "step": 5290 }, { "epoch": 0.08672175407019553, "grad_norm": 0.5899817657490514, "learning_rate": 8.671465968586387e-06, "loss": 0.0137, "step": 5300 }, { "epoch": 0.08688538002127137, "grad_norm": 0.5235841408712382, "learning_rate": 8.68782722513089e-06, "loss": 0.0106, "step": 5310 }, { "epoch": 0.08704900597234722, "grad_norm": 0.38049642874794304, "learning_rate": 8.704188481675393e-06, "loss": 0.0152, "step": 5320 }, { "epoch": 0.08721263192342306, "grad_norm": 0.4897392377001293, "learning_rate": 8.720549738219896e-06, "loss": 0.0143, "step": 5330 }, { "epoch": 0.08737625787449889, "grad_norm": 0.8889274785827478, "learning_rate": 8.736910994764399e-06, "loss": 0.0121, "step": 5340 }, { "epoch": 0.08753988382557473, "grad_norm": 0.5224856735765664, "learning_rate": 8.7532722513089e-06, "loss": 0.0109, "step": 5350 }, { "epoch": 0.08770350977665058, "grad_norm": 0.45245700203002165, "learning_rate": 8.769633507853403e-06, "loss": 0.0092, "step": 5360 }, { "epoch": 0.08786713572772642, "grad_norm": 0.302319510756231, "learning_rate": 8.785994764397906e-06, "loss": 0.0121, "step": 5370 }, { "epoch": 0.08803076167880226, "grad_norm": 0.5473049093722626, "learning_rate": 8.802356020942409e-06, "loss": 0.0172, "step": 5380 }, { "epoch": 0.0881943876298781, "grad_norm": 0.576704199332457, "learning_rate": 8.818717277486912e-06, "loss": 0.0126, "step": 5390 }, { "epoch": 0.08835801358095394, "grad_norm": 0.5799164147565287, "learning_rate": 8.835078534031415e-06, "loss": 0.012, "step": 5400 }, { "epoch": 0.08852163953202978, "grad_norm": 0.6598336157256204, "learning_rate": 8.851439790575916e-06, "loss": 0.0107, "step": 5410 }, { "epoch": 0.08868526548310562, "grad_norm": 0.46511049748923067, "learning_rate": 8.86780104712042e-06, "loss": 0.0093, "step": 5420 }, { "epoch": 0.08884889143418147, "grad_norm": 0.3611482691432295, "learning_rate": 8.884162303664923e-06, "loss": 0.0092, "step": 5430 }, { "epoch": 0.0890125173852573, "grad_norm": 0.8314071856354412, "learning_rate": 8.900523560209426e-06, "loss": 0.011, "step": 5440 }, { "epoch": 0.08917614333633314, "grad_norm": 0.19772782566529837, "learning_rate": 8.916884816753927e-06, "loss": 0.0081, "step": 5450 }, { "epoch": 0.08933976928740898, "grad_norm": 0.9126046439618061, "learning_rate": 8.93324607329843e-06, "loss": 0.0139, "step": 5460 }, { "epoch": 0.08950339523848483, "grad_norm": 0.4519667591071422, "learning_rate": 8.949607329842933e-06, "loss": 0.0088, "step": 5470 }, { "epoch": 0.08966702118956066, "grad_norm": 0.8499820813009065, "learning_rate": 8.965968586387436e-06, "loss": 0.0157, "step": 5480 }, { "epoch": 0.0898306471406365, "grad_norm": 0.7261175593770764, "learning_rate": 8.982329842931939e-06, "loss": 0.0101, "step": 5490 }, { "epoch": 0.08999427309171235, "grad_norm": 0.43274976938724563, "learning_rate": 8.998691099476442e-06, "loss": 0.0106, "step": 5500 }, { "epoch": 0.09015789904278819, "grad_norm": 0.4231139241430943, "learning_rate": 9.015052356020943e-06, "loss": 0.0111, "step": 5510 }, { "epoch": 0.09032152499386403, "grad_norm": 0.6678215513867426, "learning_rate": 9.031413612565446e-06, "loss": 0.01, "step": 5520 }, { "epoch": 0.09048515094493986, "grad_norm": 0.4431004571020533, "learning_rate": 9.047774869109948e-06, "loss": 0.0133, "step": 5530 }, { "epoch": 0.0906487768960157, "grad_norm": 0.5043903920741631, "learning_rate": 9.064136125654451e-06, "loss": 0.012, "step": 5540 }, { "epoch": 0.09081240284709155, "grad_norm": 0.49879202981327303, "learning_rate": 9.080497382198954e-06, "loss": 0.0144, "step": 5550 }, { "epoch": 0.0909760287981674, "grad_norm": 0.5487611682303718, "learning_rate": 9.096858638743457e-06, "loss": 0.0109, "step": 5560 }, { "epoch": 0.09113965474924322, "grad_norm": 0.5251807533907431, "learning_rate": 9.11321989528796e-06, "loss": 0.0154, "step": 5570 }, { "epoch": 0.09130328070031907, "grad_norm": 0.4825502958915453, "learning_rate": 9.129581151832461e-06, "loss": 0.0126, "step": 5580 }, { "epoch": 0.09146690665139491, "grad_norm": 0.23661115386257375, "learning_rate": 9.145942408376964e-06, "loss": 0.0104, "step": 5590 }, { "epoch": 0.09163053260247075, "grad_norm": 0.5483570770830758, "learning_rate": 9.162303664921467e-06, "loss": 0.013, "step": 5600 }, { "epoch": 0.0917941585535466, "grad_norm": 0.5258580991583125, "learning_rate": 9.17866492146597e-06, "loss": 0.0103, "step": 5610 }, { "epoch": 0.09195778450462243, "grad_norm": 0.47654426598183103, "learning_rate": 9.195026178010473e-06, "loss": 0.0167, "step": 5620 }, { "epoch": 0.09212141045569827, "grad_norm": 0.6901057737770547, "learning_rate": 9.211387434554976e-06, "loss": 0.013, "step": 5630 }, { "epoch": 0.09228503640677412, "grad_norm": 0.5365234514536517, "learning_rate": 9.227748691099477e-06, "loss": 0.0174, "step": 5640 }, { "epoch": 0.09244866235784996, "grad_norm": 0.42164513528728886, "learning_rate": 9.24410994764398e-06, "loss": 0.0097, "step": 5650 }, { "epoch": 0.0926122883089258, "grad_norm": 0.5323929096467224, "learning_rate": 9.260471204188482e-06, "loss": 0.0092, "step": 5660 }, { "epoch": 0.09277591426000163, "grad_norm": 0.6021617778230087, "learning_rate": 9.276832460732985e-06, "loss": 0.0122, "step": 5670 }, { "epoch": 0.09293954021107748, "grad_norm": 0.6100655936588538, "learning_rate": 9.293193717277488e-06, "loss": 0.0125, "step": 5680 }, { "epoch": 0.09310316616215332, "grad_norm": 0.4264614283239258, "learning_rate": 9.309554973821991e-06, "loss": 0.0101, "step": 5690 }, { "epoch": 0.09326679211322916, "grad_norm": 0.3341489359941811, "learning_rate": 9.325916230366492e-06, "loss": 0.0124, "step": 5700 }, { "epoch": 0.093430418064305, "grad_norm": 0.8771706664828531, "learning_rate": 9.342277486910995e-06, "loss": 0.021, "step": 5710 }, { "epoch": 0.09359404401538084, "grad_norm": 0.4024944051729391, "learning_rate": 9.358638743455498e-06, "loss": 0.0107, "step": 5720 }, { "epoch": 0.09375766996645668, "grad_norm": 0.4664819254955415, "learning_rate": 9.375000000000001e-06, "loss": 0.0104, "step": 5730 }, { "epoch": 0.09392129591753252, "grad_norm": 0.9557743248862933, "learning_rate": 9.391361256544504e-06, "loss": 0.0123, "step": 5740 }, { "epoch": 0.09408492186860837, "grad_norm": 0.28965646571528225, "learning_rate": 9.407722513089007e-06, "loss": 0.0111, "step": 5750 }, { "epoch": 0.0942485478196842, "grad_norm": 0.534439643638679, "learning_rate": 9.424083769633508e-06, "loss": 0.0144, "step": 5760 }, { "epoch": 0.09441217377076004, "grad_norm": 0.4790844283951084, "learning_rate": 9.44044502617801e-06, "loss": 0.0124, "step": 5770 }, { "epoch": 0.09457579972183588, "grad_norm": 0.44606197288205657, "learning_rate": 9.456806282722514e-06, "loss": 0.0116, "step": 5780 }, { "epoch": 0.09473942567291173, "grad_norm": 0.48794495511419306, "learning_rate": 9.473167539267016e-06, "loss": 0.0119, "step": 5790 }, { "epoch": 0.09490305162398756, "grad_norm": 0.778767665805708, "learning_rate": 9.48952879581152e-06, "loss": 0.0129, "step": 5800 }, { "epoch": 0.0950666775750634, "grad_norm": 0.561248813747974, "learning_rate": 9.505890052356022e-06, "loss": 0.011, "step": 5810 }, { "epoch": 0.09523030352613925, "grad_norm": 0.47345817934506, "learning_rate": 9.522251308900523e-06, "loss": 0.0114, "step": 5820 }, { "epoch": 0.09539392947721509, "grad_norm": 0.33937833950057283, "learning_rate": 9.538612565445026e-06, "loss": 0.0155, "step": 5830 }, { "epoch": 0.09555755542829093, "grad_norm": 0.22233343462297936, "learning_rate": 9.554973821989529e-06, "loss": 0.01, "step": 5840 }, { "epoch": 0.09572118137936676, "grad_norm": 0.4651076170946273, "learning_rate": 9.571335078534032e-06, "loss": 0.0123, "step": 5850 }, { "epoch": 0.0958848073304426, "grad_norm": 0.41037077259507493, "learning_rate": 9.587696335078535e-06, "loss": 0.0136, "step": 5860 }, { "epoch": 0.09604843328151845, "grad_norm": 0.40390270942599565, "learning_rate": 9.604057591623038e-06, "loss": 0.0139, "step": 5870 }, { "epoch": 0.0962120592325943, "grad_norm": 0.3889564696048176, "learning_rate": 9.62041884816754e-06, "loss": 0.0074, "step": 5880 }, { "epoch": 0.09637568518367012, "grad_norm": 0.635535935685924, "learning_rate": 9.636780104712042e-06, "loss": 0.0096, "step": 5890 }, { "epoch": 0.09653931113474597, "grad_norm": 0.5847337118323772, "learning_rate": 9.653141361256545e-06, "loss": 0.0135, "step": 5900 }, { "epoch": 0.09670293708582181, "grad_norm": 0.313850819389411, "learning_rate": 9.669502617801048e-06, "loss": 0.0116, "step": 5910 }, { "epoch": 0.09686656303689765, "grad_norm": 0.478417121078612, "learning_rate": 9.68586387434555e-06, "loss": 0.0116, "step": 5920 }, { "epoch": 0.0970301889879735, "grad_norm": 0.46530486398417226, "learning_rate": 9.702225130890053e-06, "loss": 0.0085, "step": 5930 }, { "epoch": 0.09719381493904933, "grad_norm": 0.4103041985489078, "learning_rate": 9.718586387434556e-06, "loss": 0.0098, "step": 5940 }, { "epoch": 0.09735744089012517, "grad_norm": 0.5348833294739196, "learning_rate": 9.734947643979057e-06, "loss": 0.0151, "step": 5950 }, { "epoch": 0.09752106684120102, "grad_norm": 0.6883291943698158, "learning_rate": 9.75130890052356e-06, "loss": 0.0134, "step": 5960 }, { "epoch": 0.09768469279227686, "grad_norm": 0.6726455179837149, "learning_rate": 9.767670157068063e-06, "loss": 0.0134, "step": 5970 }, { "epoch": 0.0978483187433527, "grad_norm": 0.5053530352646637, "learning_rate": 9.784031413612566e-06, "loss": 0.0112, "step": 5980 }, { "epoch": 0.09801194469442853, "grad_norm": 0.6342174481202657, "learning_rate": 9.800392670157069e-06, "loss": 0.0088, "step": 5990 }, { "epoch": 0.09817557064550438, "grad_norm": 0.6054053449775758, "learning_rate": 9.816753926701572e-06, "loss": 0.0123, "step": 6000 }, { "epoch": 0.09833919659658022, "grad_norm": 0.3275187680578677, "learning_rate": 9.833115183246073e-06, "loss": 0.0141, "step": 6010 }, { "epoch": 0.09850282254765606, "grad_norm": 0.48429147082982177, "learning_rate": 9.849476439790576e-06, "loss": 0.0113, "step": 6020 }, { "epoch": 0.09866644849873189, "grad_norm": 0.24124561144188728, "learning_rate": 9.865837696335079e-06, "loss": 0.0085, "step": 6030 }, { "epoch": 0.09883007444980774, "grad_norm": 0.7436975583632291, "learning_rate": 9.882198952879581e-06, "loss": 0.0111, "step": 6040 }, { "epoch": 0.09899370040088358, "grad_norm": 0.2831754995725805, "learning_rate": 9.898560209424084e-06, "loss": 0.0103, "step": 6050 }, { "epoch": 0.09915732635195942, "grad_norm": 0.45401967463149967, "learning_rate": 9.914921465968587e-06, "loss": 0.0094, "step": 6060 }, { "epoch": 0.09932095230303527, "grad_norm": 0.5556129123150316, "learning_rate": 9.93128272251309e-06, "loss": 0.0101, "step": 6070 }, { "epoch": 0.0994845782541111, "grad_norm": 0.5305462025353201, "learning_rate": 9.947643979057593e-06, "loss": 0.0094, "step": 6080 }, { "epoch": 0.09964820420518694, "grad_norm": 0.4748836491801436, "learning_rate": 9.964005235602096e-06, "loss": 0.0143, "step": 6090 }, { "epoch": 0.09981183015626278, "grad_norm": 0.6552184020761955, "learning_rate": 9.980366492146599e-06, "loss": 0.0081, "step": 6100 }, { "epoch": 0.09997545610733863, "grad_norm": 0.2635466996072233, "learning_rate": 9.9967277486911e-06, "loss": 0.0103, "step": 6110 }, { "epoch": 0.10013908205841446, "grad_norm": 0.49095445238619656, "learning_rate": 9.99999947802829e-06, "loss": 0.0067, "step": 6120 }, { "epoch": 0.1003027080094903, "grad_norm": 0.29907236600780385, "learning_rate": 9.999997357518407e-06, "loss": 0.0149, "step": 6130 }, { "epoch": 0.10046633396056615, "grad_norm": 0.8397672481540065, "learning_rate": 9.99999360584781e-06, "loss": 0.0115, "step": 6140 }, { "epoch": 0.10062995991164199, "grad_norm": 0.9509560141068086, "learning_rate": 9.999988223017722e-06, "loss": 0.0158, "step": 6150 }, { "epoch": 0.10079358586271783, "grad_norm": 0.8010037045142904, "learning_rate": 9.9999812090299e-06, "loss": 0.0129, "step": 6160 }, { "epoch": 0.10095721181379366, "grad_norm": 0.5371395591512428, "learning_rate": 9.999972563886632e-06, "loss": 0.0136, "step": 6170 }, { "epoch": 0.1011208377648695, "grad_norm": 0.7400906954335196, "learning_rate": 9.999962287590739e-06, "loss": 0.0125, "step": 6180 }, { "epoch": 0.10128446371594535, "grad_norm": 0.4837697018772169, "learning_rate": 9.999950380145572e-06, "loss": 0.0123, "step": 6190 }, { "epoch": 0.1014480896670212, "grad_norm": 0.848980352162884, "learning_rate": 9.999936841555017e-06, "loss": 0.0116, "step": 6200 }, { "epoch": 0.10161171561809704, "grad_norm": 0.5772190766488138, "learning_rate": 9.999921671823487e-06, "loss": 0.0082, "step": 6210 }, { "epoch": 0.10177534156917287, "grad_norm": 0.38725082078719, "learning_rate": 9.999904870955936e-06, "loss": 0.0132, "step": 6220 }, { "epoch": 0.10193896752024871, "grad_norm": 0.7523447039597139, "learning_rate": 9.999886438957843e-06, "loss": 0.0105, "step": 6230 }, { "epoch": 0.10210259347132455, "grad_norm": 0.4450305963353815, "learning_rate": 9.999866375835221e-06, "loss": 0.0109, "step": 6240 }, { "epoch": 0.1022662194224004, "grad_norm": 0.4364063537617544, "learning_rate": 9.999844681594615e-06, "loss": 0.0088, "step": 6250 }, { "epoch": 0.10242984537347623, "grad_norm": 0.28014853889608476, "learning_rate": 9.9998213562431e-06, "loss": 0.0073, "step": 6260 }, { "epoch": 0.10259347132455207, "grad_norm": 0.6019459831370338, "learning_rate": 9.999796399788292e-06, "loss": 0.0102, "step": 6270 }, { "epoch": 0.10275709727562791, "grad_norm": 0.5433955101582122, "learning_rate": 9.999769812238327e-06, "loss": 0.0126, "step": 6280 }, { "epoch": 0.10292072322670376, "grad_norm": 0.7334546492569576, "learning_rate": 9.999741593601881e-06, "loss": 0.0115, "step": 6290 }, { "epoch": 0.1030843491777796, "grad_norm": 0.26043582338582244, "learning_rate": 9.999711743888158e-06, "loss": 0.0096, "step": 6300 }, { "epoch": 0.10324797512885543, "grad_norm": 0.4980737594958685, "learning_rate": 9.999680263106897e-06, "loss": 0.0101, "step": 6310 }, { "epoch": 0.10341160107993128, "grad_norm": 0.4178137912171756, "learning_rate": 9.99964715126837e-06, "loss": 0.0174, "step": 6320 }, { "epoch": 0.10357522703100712, "grad_norm": 0.7385962988512482, "learning_rate": 9.999612408383376e-06, "loss": 0.0137, "step": 6330 }, { "epoch": 0.10373885298208296, "grad_norm": 0.39826782711705155, "learning_rate": 9.99957603446325e-06, "loss": 0.0121, "step": 6340 }, { "epoch": 0.10390247893315879, "grad_norm": 0.35298963132412603, "learning_rate": 9.999538029519861e-06, "loss": 0.0104, "step": 6350 }, { "epoch": 0.10406610488423464, "grad_norm": 0.25144306119691906, "learning_rate": 9.999498393565603e-06, "loss": 0.0138, "step": 6360 }, { "epoch": 0.10422973083531048, "grad_norm": 0.5447426393131171, "learning_rate": 9.99945712661341e-06, "loss": 0.011, "step": 6370 }, { "epoch": 0.10439335678638632, "grad_norm": 0.5467705178447493, "learning_rate": 9.999414228676745e-06, "loss": 0.0088, "step": 6380 }, { "epoch": 0.10455698273746217, "grad_norm": 0.3368890437774478, "learning_rate": 9.9993696997696e-06, "loss": 0.011, "step": 6390 }, { "epoch": 0.104720608688538, "grad_norm": 0.7168666946485738, "learning_rate": 9.999323539906502e-06, "loss": 0.012, "step": 6400 }, { "epoch": 0.10488423463961384, "grad_norm": 0.845153784490695, "learning_rate": 9.99927574910251e-06, "loss": 0.0126, "step": 6410 }, { "epoch": 0.10504786059068968, "grad_norm": 0.6430680689317863, "learning_rate": 9.99922632737322e-06, "loss": 0.0111, "step": 6420 }, { "epoch": 0.10521148654176553, "grad_norm": 0.5778579407416131, "learning_rate": 9.999175274734748e-06, "loss": 0.0122, "step": 6430 }, { "epoch": 0.10537511249284136, "grad_norm": 0.3827239307022562, "learning_rate": 9.999122591203751e-06, "loss": 0.0084, "step": 6440 }, { "epoch": 0.1055387384439172, "grad_norm": 0.16237157664712298, "learning_rate": 9.999068276797417e-06, "loss": 0.0096, "step": 6450 }, { "epoch": 0.10570236439499305, "grad_norm": 0.2703578578777984, "learning_rate": 9.999012331533466e-06, "loss": 0.012, "step": 6460 }, { "epoch": 0.10586599034606889, "grad_norm": 0.5032517355744486, "learning_rate": 9.998954755430145e-06, "loss": 0.0125, "step": 6470 }, { "epoch": 0.10602961629714473, "grad_norm": 0.19448086417145694, "learning_rate": 9.998895548506244e-06, "loss": 0.0104, "step": 6480 }, { "epoch": 0.10619324224822056, "grad_norm": 1.1016969952549975, "learning_rate": 9.998834710781073e-06, "loss": 0.0143, "step": 6490 }, { "epoch": 0.1063568681992964, "grad_norm": 0.4227908717273277, "learning_rate": 9.99877224227448e-06, "loss": 0.008, "step": 6500 }, { "epoch": 0.10652049415037225, "grad_norm": 0.3763617186674828, "learning_rate": 9.998708143006847e-06, "loss": 0.0118, "step": 6510 }, { "epoch": 0.1066841201014481, "grad_norm": 0.6715878812180046, "learning_rate": 9.998642412999082e-06, "loss": 0.0121, "step": 6520 }, { "epoch": 0.10684774605252394, "grad_norm": 0.42395096022430195, "learning_rate": 9.998575052272629e-06, "loss": 0.0106, "step": 6530 }, { "epoch": 0.10701137200359977, "grad_norm": 0.6852581249690433, "learning_rate": 9.998506060849465e-06, "loss": 0.0124, "step": 6540 }, { "epoch": 0.10717499795467561, "grad_norm": 0.5030646364589404, "learning_rate": 9.998435438752094e-06, "loss": 0.0087, "step": 6550 }, { "epoch": 0.10733862390575145, "grad_norm": 0.8514707329056839, "learning_rate": 9.998363186003557e-06, "loss": 0.0092, "step": 6560 }, { "epoch": 0.1075022498568273, "grad_norm": 0.5904561981348231, "learning_rate": 9.998289302627427e-06, "loss": 0.0144, "step": 6570 }, { "epoch": 0.10766587580790313, "grad_norm": 0.4136912731015378, "learning_rate": 9.998213788647804e-06, "loss": 0.0089, "step": 6580 }, { "epoch": 0.10782950175897897, "grad_norm": 0.3640618035395279, "learning_rate": 9.998136644089325e-06, "loss": 0.0132, "step": 6590 }, { "epoch": 0.10799312771005481, "grad_norm": 0.41974650154869514, "learning_rate": 9.998057868977158e-06, "loss": 0.0115, "step": 6600 }, { "epoch": 0.10815675366113066, "grad_norm": 0.5997856513093407, "learning_rate": 9.997977463336999e-06, "loss": 0.0201, "step": 6610 }, { "epoch": 0.1083203796122065, "grad_norm": 0.6365756426066118, "learning_rate": 9.997895427195081e-06, "loss": 0.0103, "step": 6620 }, { "epoch": 0.10848400556328233, "grad_norm": 0.5737435619888953, "learning_rate": 9.997811760578166e-06, "loss": 0.0078, "step": 6630 }, { "epoch": 0.10864763151435818, "grad_norm": 0.5124106814131394, "learning_rate": 9.99772646351355e-06, "loss": 0.0146, "step": 6640 }, { "epoch": 0.10881125746543402, "grad_norm": 0.37502799696794586, "learning_rate": 9.997639536029057e-06, "loss": 0.0071, "step": 6650 }, { "epoch": 0.10897488341650986, "grad_norm": 0.3496365780914275, "learning_rate": 9.997550978153048e-06, "loss": 0.0096, "step": 6660 }, { "epoch": 0.10913850936758569, "grad_norm": 0.3543271745805264, "learning_rate": 9.997460789914413e-06, "loss": 0.0112, "step": 6670 }, { "epoch": 0.10930213531866154, "grad_norm": 0.3877645685162003, "learning_rate": 9.997368971342575e-06, "loss": 0.0097, "step": 6680 }, { "epoch": 0.10946576126973738, "grad_norm": 0.5647123121959107, "learning_rate": 9.997275522467486e-06, "loss": 0.0099, "step": 6690 }, { "epoch": 0.10962938722081322, "grad_norm": 0.3579110467335093, "learning_rate": 9.997180443319635e-06, "loss": 0.0158, "step": 6700 }, { "epoch": 0.10979301317188907, "grad_norm": 0.6510512594206854, "learning_rate": 9.997083733930035e-06, "loss": 0.0095, "step": 6710 }, { "epoch": 0.1099566391229649, "grad_norm": 0.7870841605181781, "learning_rate": 9.99698539433024e-06, "loss": 0.0171, "step": 6720 }, { "epoch": 0.11012026507404074, "grad_norm": 0.4305973730050311, "learning_rate": 9.996885424552332e-06, "loss": 0.0086, "step": 6730 }, { "epoch": 0.11028389102511658, "grad_norm": 0.5048125960477226, "learning_rate": 9.996783824628921e-06, "loss": 0.0087, "step": 6740 }, { "epoch": 0.11044751697619243, "grad_norm": 0.2471328786655937, "learning_rate": 9.996680594593157e-06, "loss": 0.0088, "step": 6750 }, { "epoch": 0.11061114292726827, "grad_norm": 0.525254154690201, "learning_rate": 9.996575734478711e-06, "loss": 0.0106, "step": 6760 }, { "epoch": 0.1107747688783441, "grad_norm": 0.3178604990623649, "learning_rate": 9.996469244319796e-06, "loss": 0.009, "step": 6770 }, { "epoch": 0.11093839482941995, "grad_norm": 0.5792632276416242, "learning_rate": 9.996361124151152e-06, "loss": 0.0095, "step": 6780 }, { "epoch": 0.11110202078049579, "grad_norm": 0.4412227804650962, "learning_rate": 9.996251374008049e-06, "loss": 0.0071, "step": 6790 }, { "epoch": 0.11126564673157163, "grad_norm": 0.1037156866228801, "learning_rate": 9.996139993926293e-06, "loss": 0.0065, "step": 6800 }, { "epoch": 0.11142927268264746, "grad_norm": 0.41941191126354493, "learning_rate": 9.99602698394222e-06, "loss": 0.0156, "step": 6810 }, { "epoch": 0.1115928986337233, "grad_norm": 0.4131215503007927, "learning_rate": 9.995912344092696e-06, "loss": 0.0141, "step": 6820 }, { "epoch": 0.11175652458479915, "grad_norm": 0.8765874065031145, "learning_rate": 9.995796074415123e-06, "loss": 0.013, "step": 6830 }, { "epoch": 0.111920150535875, "grad_norm": 0.19639102705439695, "learning_rate": 9.995678174947428e-06, "loss": 0.0113, "step": 6840 }, { "epoch": 0.11208377648695084, "grad_norm": 0.32128292673184494, "learning_rate": 9.995558645728076e-06, "loss": 0.0106, "step": 6850 }, { "epoch": 0.11224740243802667, "grad_norm": 0.48434215561962674, "learning_rate": 9.995437486796061e-06, "loss": 0.0125, "step": 6860 }, { "epoch": 0.11241102838910251, "grad_norm": 0.5729078299929122, "learning_rate": 9.99531469819091e-06, "loss": 0.0172, "step": 6870 }, { "epoch": 0.11257465434017835, "grad_norm": 1.3737458736241044, "learning_rate": 9.995190279952678e-06, "loss": 0.0124, "step": 6880 }, { "epoch": 0.1127382802912542, "grad_norm": 0.2564057594169095, "learning_rate": 9.995064232121958e-06, "loss": 0.0058, "step": 6890 }, { "epoch": 0.11290190624233003, "grad_norm": 0.1331854057011681, "learning_rate": 9.994936554739866e-06, "loss": 0.0157, "step": 6900 }, { "epoch": 0.11306553219340587, "grad_norm": 0.471743040707044, "learning_rate": 9.994807247848058e-06, "loss": 0.0104, "step": 6910 }, { "epoch": 0.11322915814448171, "grad_norm": 0.22887423957587652, "learning_rate": 9.994676311488718e-06, "loss": 0.0082, "step": 6920 }, { "epoch": 0.11339278409555756, "grad_norm": 0.5053753356765128, "learning_rate": 9.99454374570456e-06, "loss": 0.0106, "step": 6930 }, { "epoch": 0.1135564100466334, "grad_norm": 0.49521876621443284, "learning_rate": 9.994409550538832e-06, "loss": 0.0091, "step": 6940 }, { "epoch": 0.11372003599770923, "grad_norm": 0.5875042581289367, "learning_rate": 9.994273726035315e-06, "loss": 0.0074, "step": 6950 }, { "epoch": 0.11388366194878508, "grad_norm": 0.432155588278268, "learning_rate": 9.994136272238315e-06, "loss": 0.0103, "step": 6960 }, { "epoch": 0.11404728789986092, "grad_norm": 0.39031452179190923, "learning_rate": 9.993997189192677e-06, "loss": 0.0138, "step": 6970 }, { "epoch": 0.11421091385093676, "grad_norm": 0.2731926247935071, "learning_rate": 9.993856476943774e-06, "loss": 0.0122, "step": 6980 }, { "epoch": 0.11437453980201259, "grad_norm": 0.5313500920876125, "learning_rate": 9.99371413553751e-06, "loss": 0.0114, "step": 6990 }, { "epoch": 0.11453816575308844, "grad_norm": 0.43063310857539183, "learning_rate": 9.993570165020322e-06, "loss": 0.0103, "step": 7000 }, { "epoch": 0.11470179170416428, "grad_norm": 0.4672469112453224, "learning_rate": 9.993424565439179e-06, "loss": 0.0085, "step": 7010 }, { "epoch": 0.11486541765524012, "grad_norm": 0.526848647461658, "learning_rate": 9.993277336841576e-06, "loss": 0.0099, "step": 7020 }, { "epoch": 0.11502904360631597, "grad_norm": 0.5703599850373706, "learning_rate": 9.993128479275547e-06, "loss": 0.0113, "step": 7030 }, { "epoch": 0.1151926695573918, "grad_norm": 0.5586900064578564, "learning_rate": 9.992977992789655e-06, "loss": 0.0145, "step": 7040 }, { "epoch": 0.11535629550846764, "grad_norm": 0.966459270445536, "learning_rate": 9.992825877432992e-06, "loss": 0.0099, "step": 7050 }, { "epoch": 0.11551992145954348, "grad_norm": 0.20522298031196878, "learning_rate": 9.992672133255183e-06, "loss": 0.009, "step": 7060 }, { "epoch": 0.11568354741061933, "grad_norm": 0.25623169444005783, "learning_rate": 9.992516760306386e-06, "loss": 0.0123, "step": 7070 }, { "epoch": 0.11584717336169517, "grad_norm": 0.5474450895536691, "learning_rate": 9.992359758637287e-06, "loss": 0.0148, "step": 7080 }, { "epoch": 0.116010799312771, "grad_norm": 0.1622325043630987, "learning_rate": 9.992201128299103e-06, "loss": 0.0118, "step": 7090 }, { "epoch": 0.11617442526384684, "grad_norm": 0.5715358739137986, "learning_rate": 9.992040869343588e-06, "loss": 0.0111, "step": 7100 }, { "epoch": 0.11633805121492269, "grad_norm": 0.28223232876817195, "learning_rate": 9.991878981823025e-06, "loss": 0.0096, "step": 7110 }, { "epoch": 0.11650167716599853, "grad_norm": 0.4089288018832493, "learning_rate": 9.99171546579022e-06, "loss": 0.0109, "step": 7120 }, { "epoch": 0.11666530311707436, "grad_norm": 0.5723616032587838, "learning_rate": 9.991550321298524e-06, "loss": 0.0104, "step": 7130 }, { "epoch": 0.1168289290681502, "grad_norm": 0.2778648237647966, "learning_rate": 9.991383548401808e-06, "loss": 0.0068, "step": 7140 }, { "epoch": 0.11699255501922605, "grad_norm": 0.35871926200508614, "learning_rate": 9.991215147154483e-06, "loss": 0.012, "step": 7150 }, { "epoch": 0.11715618097030189, "grad_norm": 0.5135427271198006, "learning_rate": 9.991045117611481e-06, "loss": 0.0115, "step": 7160 }, { "epoch": 0.11731980692137774, "grad_norm": 0.6694865723399268, "learning_rate": 9.990873459828279e-06, "loss": 0.009, "step": 7170 }, { "epoch": 0.11748343287245357, "grad_norm": 0.5027976638256262, "learning_rate": 9.99070017386087e-06, "loss": 0.0107, "step": 7180 }, { "epoch": 0.11764705882352941, "grad_norm": 0.5759753205342629, "learning_rate": 9.990525259765791e-06, "loss": 0.0119, "step": 7190 }, { "epoch": 0.11781068477460525, "grad_norm": 0.47970539514906335, "learning_rate": 9.9903487176001e-06, "loss": 0.007, "step": 7200 }, { "epoch": 0.1179743107256811, "grad_norm": 0.5529153967992684, "learning_rate": 9.990170547421394e-06, "loss": 0.0101, "step": 7210 }, { "epoch": 0.11813793667675693, "grad_norm": 0.6756920906327544, "learning_rate": 9.989990749287795e-06, "loss": 0.0096, "step": 7220 }, { "epoch": 0.11830156262783277, "grad_norm": 0.5650664639773525, "learning_rate": 9.989809323257961e-06, "loss": 0.0072, "step": 7230 }, { "epoch": 0.11846518857890861, "grad_norm": 0.3517944147814383, "learning_rate": 9.989626269391081e-06, "loss": 0.012, "step": 7240 }, { "epoch": 0.11862881452998446, "grad_norm": 0.6821015905534964, "learning_rate": 9.98944158774687e-06, "loss": 0.0053, "step": 7250 }, { "epoch": 0.1187924404810603, "grad_norm": 0.2038384184246867, "learning_rate": 9.989255278385577e-06, "loss": 0.0144, "step": 7260 }, { "epoch": 0.11895606643213613, "grad_norm": 0.43442665684120113, "learning_rate": 9.989067341367985e-06, "loss": 0.0091, "step": 7270 }, { "epoch": 0.11911969238321198, "grad_norm": 0.3925250349094445, "learning_rate": 9.988877776755402e-06, "loss": 0.009, "step": 7280 }, { "epoch": 0.11928331833428782, "grad_norm": 0.7058911871375118, "learning_rate": 9.98868658460967e-06, "loss": 0.0116, "step": 7290 }, { "epoch": 0.11944694428536366, "grad_norm": 0.23763714170202263, "learning_rate": 9.988493764993163e-06, "loss": 0.0083, "step": 7300 }, { "epoch": 0.11961057023643949, "grad_norm": 0.4119670334210937, "learning_rate": 9.988299317968789e-06, "loss": 0.0104, "step": 7310 }, { "epoch": 0.11977419618751534, "grad_norm": 0.40369454531228166, "learning_rate": 9.988103243599976e-06, "loss": 0.0102, "step": 7320 }, { "epoch": 0.11993782213859118, "grad_norm": 0.37419035482457746, "learning_rate": 9.987905541950694e-06, "loss": 0.009, "step": 7330 }, { "epoch": 0.12010144808966702, "grad_norm": 0.6923309882836086, "learning_rate": 9.98770621308544e-06, "loss": 0.0088, "step": 7340 }, { "epoch": 0.12026507404074287, "grad_norm": 0.7772752064849809, "learning_rate": 9.987505257069239e-06, "loss": 0.0166, "step": 7350 }, { "epoch": 0.1204286999918187, "grad_norm": 0.40883090691405877, "learning_rate": 9.987302673967649e-06, "loss": 0.011, "step": 7360 }, { "epoch": 0.12059232594289454, "grad_norm": 0.6689714658315377, "learning_rate": 9.987098463846762e-06, "loss": 0.012, "step": 7370 }, { "epoch": 0.12075595189397038, "grad_norm": 0.5957023165630145, "learning_rate": 9.986892626773197e-06, "loss": 0.0089, "step": 7380 }, { "epoch": 0.12091957784504623, "grad_norm": 0.6202100805091941, "learning_rate": 9.986685162814106e-06, "loss": 0.0171, "step": 7390 }, { "epoch": 0.12108320379612207, "grad_norm": 0.6673315898986919, "learning_rate": 9.986476072037168e-06, "loss": 0.0065, "step": 7400 }, { "epoch": 0.1212468297471979, "grad_norm": 0.2529659115623473, "learning_rate": 9.986265354510594e-06, "loss": 0.0079, "step": 7410 }, { "epoch": 0.12141045569827374, "grad_norm": 0.3691320759505354, "learning_rate": 9.98605301030313e-06, "loss": 0.0128, "step": 7420 }, { "epoch": 0.12157408164934959, "grad_norm": 0.3451722725340001, "learning_rate": 9.985839039484048e-06, "loss": 0.0111, "step": 7430 }, { "epoch": 0.12173770760042543, "grad_norm": 0.1322355929776524, "learning_rate": 9.985623442123154e-06, "loss": 0.0074, "step": 7440 }, { "epoch": 0.12190133355150126, "grad_norm": 0.1302490846861022, "learning_rate": 9.985406218290781e-06, "loss": 0.01, "step": 7450 }, { "epoch": 0.1220649595025771, "grad_norm": 0.3880780292039577, "learning_rate": 9.985187368057794e-06, "loss": 0.0123, "step": 7460 }, { "epoch": 0.12222858545365295, "grad_norm": 0.21239876531689256, "learning_rate": 9.984966891495591e-06, "loss": 0.0061, "step": 7470 }, { "epoch": 0.12239221140472879, "grad_norm": 0.48722969858836135, "learning_rate": 9.984744788676097e-06, "loss": 0.0067, "step": 7480 }, { "epoch": 0.12255583735580464, "grad_norm": 0.43993669285663584, "learning_rate": 9.98452105967177e-06, "loss": 0.0085, "step": 7490 }, { "epoch": 0.12271946330688047, "grad_norm": 0.6023606851601276, "learning_rate": 9.984295704555595e-06, "loss": 0.0109, "step": 7500 }, { "epoch": 0.12288308925795631, "grad_norm": 0.5471127678796841, "learning_rate": 9.984068723401095e-06, "loss": 0.0152, "step": 7510 }, { "epoch": 0.12304671520903215, "grad_norm": 1.0448909116485974, "learning_rate": 9.983840116282315e-06, "loss": 0.011, "step": 7520 }, { "epoch": 0.123210341160108, "grad_norm": 0.6978748804873199, "learning_rate": 9.983609883273835e-06, "loss": 0.0072, "step": 7530 }, { "epoch": 0.12337396711118383, "grad_norm": 0.5155271110289986, "learning_rate": 9.983378024450765e-06, "loss": 0.0121, "step": 7540 }, { "epoch": 0.12353759306225967, "grad_norm": 0.33877927060760543, "learning_rate": 9.983144539888743e-06, "loss": 0.0115, "step": 7550 }, { "epoch": 0.12370121901333551, "grad_norm": 0.2542065258467828, "learning_rate": 9.98290942966394e-06, "loss": 0.0094, "step": 7560 }, { "epoch": 0.12386484496441136, "grad_norm": 0.5112176686821556, "learning_rate": 9.98267269385306e-06, "loss": 0.0092, "step": 7570 }, { "epoch": 0.1240284709154872, "grad_norm": 0.31840976149965144, "learning_rate": 9.982434332533328e-06, "loss": 0.0082, "step": 7580 }, { "epoch": 0.12419209686656303, "grad_norm": 0.4189409873171635, "learning_rate": 9.98219434578251e-06, "loss": 0.013, "step": 7590 }, { "epoch": 0.12435572281763888, "grad_norm": 0.6527638038825173, "learning_rate": 9.981952733678893e-06, "loss": 0.0101, "step": 7600 }, { "epoch": 0.12451934876871472, "grad_norm": 0.43006567211478064, "learning_rate": 9.981709496301303e-06, "loss": 0.0095, "step": 7610 }, { "epoch": 0.12468297471979056, "grad_norm": 0.6122561192216692, "learning_rate": 9.98146463372909e-06, "loss": 0.0111, "step": 7620 }, { "epoch": 0.1248466006708664, "grad_norm": 0.4927626535205494, "learning_rate": 9.981218146042135e-06, "loss": 0.0085, "step": 7630 }, { "epoch": 0.12501022662194225, "grad_norm": 0.674376503918369, "learning_rate": 9.98097003332085e-06, "loss": 0.0083, "step": 7640 }, { "epoch": 0.12517385257301808, "grad_norm": 0.7374741360877392, "learning_rate": 9.980720295646182e-06, "loss": 0.0079, "step": 7650 }, { "epoch": 0.1253374785240939, "grad_norm": 1.252409135462966, "learning_rate": 9.9804689330996e-06, "loss": 0.0093, "step": 7660 }, { "epoch": 0.12550110447516977, "grad_norm": 0.36368397724751045, "learning_rate": 9.980215945763105e-06, "loss": 0.0076, "step": 7670 }, { "epoch": 0.1256647304262456, "grad_norm": 0.38366978348564307, "learning_rate": 9.979961333719234e-06, "loss": 0.0118, "step": 7680 }, { "epoch": 0.12582835637732145, "grad_norm": 0.4499159108380496, "learning_rate": 9.979705097051045e-06, "loss": 0.0147, "step": 7690 }, { "epoch": 0.12599198232839728, "grad_norm": 0.38279883429941414, "learning_rate": 9.979447235842133e-06, "loss": 0.0085, "step": 7700 }, { "epoch": 0.1261556082794731, "grad_norm": 0.3327792071790146, "learning_rate": 9.979187750176622e-06, "loss": 0.0066, "step": 7710 }, { "epoch": 0.12631923423054897, "grad_norm": 0.3730433963890892, "learning_rate": 9.978926640139161e-06, "loss": 0.0082, "step": 7720 }, { "epoch": 0.1264828601816248, "grad_norm": 0.19140166994682384, "learning_rate": 9.978663905814935e-06, "loss": 0.0085, "step": 7730 }, { "epoch": 0.12664648613270066, "grad_norm": 0.5242504399307966, "learning_rate": 9.97839954728966e-06, "loss": 0.0104, "step": 7740 }, { "epoch": 0.1268101120837765, "grad_norm": 0.8910352378541504, "learning_rate": 9.97813356464957e-06, "loss": 0.0095, "step": 7750 }, { "epoch": 0.12697373803485232, "grad_norm": 0.5119339834424127, "learning_rate": 9.977865957981442e-06, "loss": 0.0115, "step": 7760 }, { "epoch": 0.12713736398592818, "grad_norm": 0.4549393064494094, "learning_rate": 9.977596727372579e-06, "loss": 0.0128, "step": 7770 }, { "epoch": 0.127300989937004, "grad_norm": 0.5718418789768672, "learning_rate": 9.97732587291081e-06, "loss": 0.0073, "step": 7780 }, { "epoch": 0.12746461588807986, "grad_norm": 0.5031150658063366, "learning_rate": 9.977053394684497e-06, "loss": 0.0112, "step": 7790 }, { "epoch": 0.1276282418391557, "grad_norm": 0.33779627638730514, "learning_rate": 9.976779292782534e-06, "loss": 0.0125, "step": 7800 }, { "epoch": 0.12779186779023152, "grad_norm": 0.5834386709489626, "learning_rate": 9.976503567294338e-06, "loss": 0.0128, "step": 7810 }, { "epoch": 0.12795549374130738, "grad_norm": 0.5065300223268314, "learning_rate": 9.976226218309861e-06, "loss": 0.0056, "step": 7820 }, { "epoch": 0.1281191196923832, "grad_norm": 0.39285655863263635, "learning_rate": 9.975947245919586e-06, "loss": 0.0087, "step": 7830 }, { "epoch": 0.12828274564345904, "grad_norm": 0.6351355020650205, "learning_rate": 9.975666650214519e-06, "loss": 0.0102, "step": 7840 }, { "epoch": 0.1284463715945349, "grad_norm": 0.2293904799000577, "learning_rate": 9.975384431286199e-06, "loss": 0.0061, "step": 7850 }, { "epoch": 0.12860999754561073, "grad_norm": 0.29463279243069634, "learning_rate": 9.975100589226698e-06, "loss": 0.0115, "step": 7860 }, { "epoch": 0.12877362349668658, "grad_norm": 0.3681066221815506, "learning_rate": 9.974815124128613e-06, "loss": 0.0082, "step": 7870 }, { "epoch": 0.12893724944776241, "grad_norm": 0.37929925136191706, "learning_rate": 9.974528036085073e-06, "loss": 0.01, "step": 7880 }, { "epoch": 0.12910087539883824, "grad_norm": 0.3984684069318568, "learning_rate": 9.974239325189733e-06, "loss": 0.0101, "step": 7890 }, { "epoch": 0.1292645013499141, "grad_norm": 0.4409635828897977, "learning_rate": 9.973948991536782e-06, "loss": 0.0115, "step": 7900 }, { "epoch": 0.12942812730098993, "grad_norm": 0.5819005272996403, "learning_rate": 9.973657035220936e-06, "loss": 0.0122, "step": 7910 }, { "epoch": 0.1295917532520658, "grad_norm": 0.2964183404331756, "learning_rate": 9.973363456337439e-06, "loss": 0.0067, "step": 7920 }, { "epoch": 0.12975537920314162, "grad_norm": 0.513040956545932, "learning_rate": 9.973068254982067e-06, "loss": 0.0087, "step": 7930 }, { "epoch": 0.12991900515421745, "grad_norm": 0.24495945014448808, "learning_rate": 9.972771431251126e-06, "loss": 0.0073, "step": 7940 }, { "epoch": 0.1300826311052933, "grad_norm": 0.2918552635809396, "learning_rate": 9.972472985241446e-06, "loss": 0.0101, "step": 7950 }, { "epoch": 0.13024625705636914, "grad_norm": 0.4772610801439484, "learning_rate": 9.97217291705039e-06, "loss": 0.008, "step": 7960 }, { "epoch": 0.130409883007445, "grad_norm": 0.28689235850898825, "learning_rate": 9.971871226775856e-06, "loss": 0.0084, "step": 7970 }, { "epoch": 0.13057350895852082, "grad_norm": 0.3211163110539691, "learning_rate": 9.971567914516256e-06, "loss": 0.0079, "step": 7980 }, { "epoch": 0.13073713490959665, "grad_norm": 0.6895975282977052, "learning_rate": 9.971262980370548e-06, "loss": 0.0107, "step": 7990 }, { "epoch": 0.1309007608606725, "grad_norm": 0.4930688444804981, "learning_rate": 9.970956424438206e-06, "loss": 0.0114, "step": 8000 }, { "epoch": 0.13106438681174834, "grad_norm": 0.6669313344247826, "learning_rate": 9.970648246819242e-06, "loss": 0.0106, "step": 8010 }, { "epoch": 0.1312280127628242, "grad_norm": 0.610351134090419, "learning_rate": 9.970338447614189e-06, "loss": 0.0075, "step": 8020 }, { "epoch": 0.13139163871390003, "grad_norm": 0.7731612332068099, "learning_rate": 9.97002702692412e-06, "loss": 0.0088, "step": 8030 }, { "epoch": 0.13155526466497586, "grad_norm": 0.2435463441612163, "learning_rate": 9.969713984850626e-06, "loss": 0.0082, "step": 8040 }, { "epoch": 0.13171889061605171, "grad_norm": 0.3702965455878276, "learning_rate": 9.969399321495831e-06, "loss": 0.0073, "step": 8050 }, { "epoch": 0.13188251656712754, "grad_norm": 0.8532321708797265, "learning_rate": 9.969083036962393e-06, "loss": 0.0055, "step": 8060 }, { "epoch": 0.13204614251820337, "grad_norm": 0.4440392363683305, "learning_rate": 9.968765131353488e-06, "loss": 0.0077, "step": 8070 }, { "epoch": 0.13220976846927923, "grad_norm": 0.47211443767887445, "learning_rate": 9.968445604772831e-06, "loss": 0.0093, "step": 8080 }, { "epoch": 0.13237339442035506, "grad_norm": 0.3864580530825567, "learning_rate": 9.968124457324663e-06, "loss": 0.0085, "step": 8090 }, { "epoch": 0.13253702037143092, "grad_norm": 0.9491863296092485, "learning_rate": 9.96780168911375e-06, "loss": 0.0069, "step": 8100 }, { "epoch": 0.13270064632250675, "grad_norm": 0.8790948819826223, "learning_rate": 9.967477300245388e-06, "loss": 0.0079, "step": 8110 }, { "epoch": 0.13286427227358258, "grad_norm": 0.4761717724864765, "learning_rate": 9.967151290825408e-06, "loss": 0.0079, "step": 8120 }, { "epoch": 0.13302789822465844, "grad_norm": 0.5055332139402398, "learning_rate": 9.966823660960162e-06, "loss": 0.0094, "step": 8130 }, { "epoch": 0.13319152417573427, "grad_norm": 0.5833829337591888, "learning_rate": 9.966494410756531e-06, "loss": 0.0065, "step": 8140 }, { "epoch": 0.13335515012681012, "grad_norm": 0.45025717390775377, "learning_rate": 9.96616354032193e-06, "loss": 0.0091, "step": 8150 }, { "epoch": 0.13351877607788595, "grad_norm": 0.9173263873215725, "learning_rate": 9.965831049764302e-06, "loss": 0.0124, "step": 8160 }, { "epoch": 0.13368240202896178, "grad_norm": 0.5106457676127525, "learning_rate": 9.965496939192113e-06, "loss": 0.0072, "step": 8170 }, { "epoch": 0.13384602798003764, "grad_norm": 0.4423149537287774, "learning_rate": 9.965161208714359e-06, "loss": 0.0107, "step": 8180 }, { "epoch": 0.13400965393111347, "grad_norm": 0.30649577869750927, "learning_rate": 9.96482385844057e-06, "loss": 0.0077, "step": 8190 }, { "epoch": 0.13417327988218933, "grad_norm": 0.41568687447353264, "learning_rate": 9.964484888480797e-06, "loss": 0.0143, "step": 8200 }, { "epoch": 0.13433690583326516, "grad_norm": 0.5147852339554367, "learning_rate": 9.964144298945625e-06, "loss": 0.0093, "step": 8210 }, { "epoch": 0.134500531784341, "grad_norm": 0.40569518941094834, "learning_rate": 9.963802089946166e-06, "loss": 0.009, "step": 8220 }, { "epoch": 0.13466415773541685, "grad_norm": 0.4371374936449431, "learning_rate": 9.96345826159406e-06, "loss": 0.0096, "step": 8230 }, { "epoch": 0.13482778368649267, "grad_norm": 0.3884647954271551, "learning_rate": 9.96311281400147e-06, "loss": 0.0098, "step": 8240 }, { "epoch": 0.1349914096375685, "grad_norm": 0.32999156031145854, "learning_rate": 9.962765747281097e-06, "loss": 0.0074, "step": 8250 }, { "epoch": 0.13515503558864436, "grad_norm": 0.5058323665158481, "learning_rate": 9.962417061546166e-06, "loss": 0.009, "step": 8260 }, { "epoch": 0.1353186615397202, "grad_norm": 0.3371150273031882, "learning_rate": 9.962066756910427e-06, "loss": 0.0075, "step": 8270 }, { "epoch": 0.13548228749079605, "grad_norm": 0.4022507568444807, "learning_rate": 9.96171483348816e-06, "loss": 0.0077, "step": 8280 }, { "epoch": 0.13564591344187188, "grad_norm": 0.6587497644861972, "learning_rate": 9.961361291394176e-06, "loss": 0.0106, "step": 8290 }, { "epoch": 0.1358095393929477, "grad_norm": 0.6405174898465515, "learning_rate": 9.961006130743811e-06, "loss": 0.0119, "step": 8300 }, { "epoch": 0.13597316534402357, "grad_norm": 0.37482383762293103, "learning_rate": 9.960649351652931e-06, "loss": 0.0138, "step": 8310 }, { "epoch": 0.1361367912950994, "grad_norm": 0.2964876803929516, "learning_rate": 9.960290954237927e-06, "loss": 0.0098, "step": 8320 }, { "epoch": 0.13630041724617525, "grad_norm": 0.39809389169501386, "learning_rate": 9.959930938615721e-06, "loss": 0.0084, "step": 8330 }, { "epoch": 0.13646404319725108, "grad_norm": 0.5289905951932483, "learning_rate": 9.95956930490376e-06, "loss": 0.0095, "step": 8340 }, { "epoch": 0.1366276691483269, "grad_norm": 0.24298209907434354, "learning_rate": 9.959206053220023e-06, "loss": 0.0075, "step": 8350 }, { "epoch": 0.13679129509940277, "grad_norm": 0.3546188098797019, "learning_rate": 9.958841183683014e-06, "loss": 0.0055, "step": 8360 }, { "epoch": 0.1369549210504786, "grad_norm": 0.2880727102093247, "learning_rate": 9.958474696411763e-06, "loss": 0.0064, "step": 8370 }, { "epoch": 0.13711854700155446, "grad_norm": 0.6116985426477467, "learning_rate": 9.958106591525833e-06, "loss": 0.0096, "step": 8380 }, { "epoch": 0.1372821729526303, "grad_norm": 0.5298539295490508, "learning_rate": 9.95773686914531e-06, "loss": 0.0081, "step": 8390 }, { "epoch": 0.13744579890370612, "grad_norm": 0.4365517299152412, "learning_rate": 9.95736552939081e-06, "loss": 0.0106, "step": 8400 }, { "epoch": 0.13760942485478198, "grad_norm": 0.47156272106008296, "learning_rate": 9.956992572383474e-06, "loss": 0.0073, "step": 8410 }, { "epoch": 0.1377730508058578, "grad_norm": 0.3090850942231373, "learning_rate": 9.956617998244975e-06, "loss": 0.0073, "step": 8420 }, { "epoch": 0.13793667675693366, "grad_norm": 0.5692173622575482, "learning_rate": 9.95624180709751e-06, "loss": 0.0108, "step": 8430 }, { "epoch": 0.1381003027080095, "grad_norm": 0.21130202639782922, "learning_rate": 9.955863999063809e-06, "loss": 0.009, "step": 8440 }, { "epoch": 0.13826392865908532, "grad_norm": 1.2382408730524779, "learning_rate": 9.955484574267118e-06, "loss": 0.0086, "step": 8450 }, { "epoch": 0.13842755461016118, "grad_norm": 0.4020863358384348, "learning_rate": 9.955103532831223e-06, "loss": 0.0085, "step": 8460 }, { "epoch": 0.138591180561237, "grad_norm": 0.1747274571972482, "learning_rate": 9.954720874880429e-06, "loss": 0.011, "step": 8470 }, { "epoch": 0.13875480651231284, "grad_norm": 0.617611697190206, "learning_rate": 9.954336600539572e-06, "loss": 0.0089, "step": 8480 }, { "epoch": 0.1389184324633887, "grad_norm": 0.5432507166102423, "learning_rate": 9.953950709934015e-06, "loss": 0.0122, "step": 8490 }, { "epoch": 0.13908205841446453, "grad_norm": 0.6456532716126964, "learning_rate": 9.95356320318965e-06, "loss": 0.008, "step": 8500 }, { "epoch": 0.13924568436554038, "grad_norm": 0.6509873850301885, "learning_rate": 9.95317408043289e-06, "loss": 0.0096, "step": 8510 }, { "epoch": 0.13940931031661621, "grad_norm": 0.3961443727664188, "learning_rate": 9.952783341790683e-06, "loss": 0.0068, "step": 8520 }, { "epoch": 0.13957293626769204, "grad_norm": 0.2451510181407701, "learning_rate": 9.952390987390499e-06, "loss": 0.011, "step": 8530 }, { "epoch": 0.1397365622187679, "grad_norm": 0.4924171652507646, "learning_rate": 9.951997017360337e-06, "loss": 0.0088, "step": 8540 }, { "epoch": 0.13990018816984373, "grad_norm": 0.2381077667610896, "learning_rate": 9.951601431828724e-06, "loss": 0.0089, "step": 8550 }, { "epoch": 0.1400638141209196, "grad_norm": 0.4134252017787631, "learning_rate": 9.951204230924712e-06, "loss": 0.0082, "step": 8560 }, { "epoch": 0.14022744007199542, "grad_norm": 0.2219257577304448, "learning_rate": 9.950805414777879e-06, "loss": 0.0093, "step": 8570 }, { "epoch": 0.14039106602307125, "grad_norm": 0.7723556396810369, "learning_rate": 9.950404983518334e-06, "loss": 0.0097, "step": 8580 }, { "epoch": 0.1405546919741471, "grad_norm": 0.7952814794637963, "learning_rate": 9.950002937276708e-06, "loss": 0.0121, "step": 8590 }, { "epoch": 0.14071831792522294, "grad_norm": 0.380027843914542, "learning_rate": 9.949599276184165e-06, "loss": 0.0109, "step": 8600 }, { "epoch": 0.1408819438762988, "grad_norm": 0.37471098615637444, "learning_rate": 9.949194000372391e-06, "loss": 0.0067, "step": 8610 }, { "epoch": 0.14104556982737462, "grad_norm": 0.3440514636109666, "learning_rate": 9.948787109973598e-06, "loss": 0.0078, "step": 8620 }, { "epoch": 0.14120919577845045, "grad_norm": 0.424943435511282, "learning_rate": 9.948378605120528e-06, "loss": 0.01, "step": 8630 }, { "epoch": 0.1413728217295263, "grad_norm": 0.16893986040428946, "learning_rate": 9.94796848594645e-06, "loss": 0.0125, "step": 8640 }, { "epoch": 0.14153644768060214, "grad_norm": 0.15839320773848795, "learning_rate": 9.947556752585158e-06, "loss": 0.0098, "step": 8650 }, { "epoch": 0.141700073631678, "grad_norm": 0.4259802341373496, "learning_rate": 9.94714340517097e-06, "loss": 0.0073, "step": 8660 }, { "epoch": 0.14186369958275383, "grad_norm": 0.6112867083304985, "learning_rate": 9.946728443838735e-06, "loss": 0.0115, "step": 8670 }, { "epoch": 0.14202732553382966, "grad_norm": 0.4635477358331691, "learning_rate": 9.946311868723826e-06, "loss": 0.0089, "step": 8680 }, { "epoch": 0.14219095148490551, "grad_norm": 0.3613685922466621, "learning_rate": 9.945893679962146e-06, "loss": 0.007, "step": 8690 }, { "epoch": 0.14235457743598134, "grad_norm": 0.29280344832715, "learning_rate": 9.94547387769012e-06, "loss": 0.0101, "step": 8700 }, { "epoch": 0.14251820338705717, "grad_norm": 0.3903999029193127, "learning_rate": 9.945052462044698e-06, "loss": 0.0093, "step": 8710 }, { "epoch": 0.14268182933813303, "grad_norm": 0.6670602607382556, "learning_rate": 9.944629433163365e-06, "loss": 0.0117, "step": 8720 }, { "epoch": 0.14284545528920886, "grad_norm": 0.2554563727979691, "learning_rate": 9.944204791184123e-06, "loss": 0.0146, "step": 8730 }, { "epoch": 0.14300908124028472, "grad_norm": 0.5172661304058259, "learning_rate": 9.943778536245505e-06, "loss": 0.009, "step": 8740 }, { "epoch": 0.14317270719136055, "grad_norm": 0.5401029861472235, "learning_rate": 9.943350668486568e-06, "loss": 0.0105, "step": 8750 }, { "epoch": 0.14333633314243638, "grad_norm": 0.35109286699254905, "learning_rate": 9.942921188046898e-06, "loss": 0.0083, "step": 8760 }, { "epoch": 0.14349995909351224, "grad_norm": 0.8117302205244186, "learning_rate": 9.942490095066603e-06, "loss": 0.0108, "step": 8770 }, { "epoch": 0.14366358504458807, "grad_norm": 0.18422880204362338, "learning_rate": 9.942057389686323e-06, "loss": 0.0067, "step": 8780 }, { "epoch": 0.14382721099566392, "grad_norm": 1.0442420844936615, "learning_rate": 9.94162307204722e-06, "loss": 0.0078, "step": 8790 }, { "epoch": 0.14399083694673975, "grad_norm": 0.5708013045768598, "learning_rate": 9.941187142290978e-06, "loss": 0.0081, "step": 8800 }, { "epoch": 0.14415446289781558, "grad_norm": 0.46354191840015313, "learning_rate": 9.940749600559816e-06, "loss": 0.0056, "step": 8810 }, { "epoch": 0.14431808884889144, "grad_norm": 0.19860903530881147, "learning_rate": 9.940310446996474e-06, "loss": 0.0078, "step": 8820 }, { "epoch": 0.14448171479996727, "grad_norm": 0.4625361162855827, "learning_rate": 9.939869681744215e-06, "loss": 0.0083, "step": 8830 }, { "epoch": 0.14464534075104313, "grad_norm": 0.043661821918211566, "learning_rate": 9.939427304946832e-06, "loss": 0.0053, "step": 8840 }, { "epoch": 0.14480896670211896, "grad_norm": 0.5937446254846646, "learning_rate": 9.938983316748643e-06, "loss": 0.0083, "step": 8850 }, { "epoch": 0.1449725926531948, "grad_norm": 0.14140342932585184, "learning_rate": 9.938537717294492e-06, "loss": 0.0098, "step": 8860 }, { "epoch": 0.14513621860427064, "grad_norm": 0.3285297438534549, "learning_rate": 9.93809050672975e-06, "loss": 0.0077, "step": 8870 }, { "epoch": 0.14529984455534647, "grad_norm": 0.16116408629552414, "learning_rate": 9.937641685200306e-06, "loss": 0.0096, "step": 8880 }, { "epoch": 0.14546347050642233, "grad_norm": 0.2635515471787533, "learning_rate": 9.937191252852583e-06, "loss": 0.0076, "step": 8890 }, { "epoch": 0.14562709645749816, "grad_norm": 0.45473968670725573, "learning_rate": 9.936739209833528e-06, "loss": 0.0072, "step": 8900 }, { "epoch": 0.145790722408574, "grad_norm": 0.3446584360047905, "learning_rate": 9.93628555629061e-06, "loss": 0.0067, "step": 8910 }, { "epoch": 0.14595434835964985, "grad_norm": 0.6062019616633063, "learning_rate": 9.935830292371827e-06, "loss": 0.0093, "step": 8920 }, { "epoch": 0.14611797431072568, "grad_norm": 0.21348975147000424, "learning_rate": 9.9353734182257e-06, "loss": 0.0093, "step": 8930 }, { "epoch": 0.1462816002618015, "grad_norm": 0.6169294096149263, "learning_rate": 9.934914934001274e-06, "loss": 0.0071, "step": 8940 }, { "epoch": 0.14644522621287737, "grad_norm": 0.308857607310664, "learning_rate": 9.934454839848126e-06, "loss": 0.0073, "step": 8950 }, { "epoch": 0.1466088521639532, "grad_norm": 0.42150804636863715, "learning_rate": 9.93399313591635e-06, "loss": 0.0063, "step": 8960 }, { "epoch": 0.14677247811502905, "grad_norm": 0.14321617806529321, "learning_rate": 9.933529822356568e-06, "loss": 0.0082, "step": 8970 }, { "epoch": 0.14693610406610488, "grad_norm": 0.4233447666638266, "learning_rate": 9.933064899319933e-06, "loss": 0.0081, "step": 8980 }, { "epoch": 0.1470997300171807, "grad_norm": 0.5036144380917476, "learning_rate": 9.932598366958113e-06, "loss": 0.0075, "step": 8990 }, { "epoch": 0.14726335596825657, "grad_norm": 0.3378704556119975, "learning_rate": 9.932130225423307e-06, "loss": 0.0067, "step": 9000 }, { "epoch": 0.1474269819193324, "grad_norm": 0.44832351842906576, "learning_rate": 9.931660474868238e-06, "loss": 0.0091, "step": 9010 }, { "epoch": 0.14759060787040826, "grad_norm": 0.3124134853489913, "learning_rate": 9.931189115446156e-06, "loss": 0.0079, "step": 9020 }, { "epoch": 0.1477542338214841, "grad_norm": 0.376129076267134, "learning_rate": 9.93071614731083e-06, "loss": 0.0081, "step": 9030 }, { "epoch": 0.14791785977255992, "grad_norm": 0.5592900785105428, "learning_rate": 9.930241570616561e-06, "loss": 0.0084, "step": 9040 }, { "epoch": 0.14808148572363578, "grad_norm": 0.3045366401986836, "learning_rate": 9.92976538551817e-06, "loss": 0.0087, "step": 9050 }, { "epoch": 0.1482451116747116, "grad_norm": 0.2669190363544682, "learning_rate": 9.929287592171002e-06, "loss": 0.0087, "step": 9060 }, { "epoch": 0.14840873762578746, "grad_norm": 0.36518718513976595, "learning_rate": 9.92880819073093e-06, "loss": 0.01, "step": 9070 }, { "epoch": 0.1485723635768633, "grad_norm": 0.588493426328603, "learning_rate": 9.928327181354352e-06, "loss": 0.0102, "step": 9080 }, { "epoch": 0.14873598952793912, "grad_norm": 0.5717443119906511, "learning_rate": 9.927844564198186e-06, "loss": 0.0062, "step": 9090 }, { "epoch": 0.14889961547901498, "grad_norm": 0.9134875475071091, "learning_rate": 9.927360339419878e-06, "loss": 0.0082, "step": 9100 }, { "epoch": 0.1490632414300908, "grad_norm": 0.2879624478961297, "learning_rate": 9.9268745071774e-06, "loss": 0.0073, "step": 9110 }, { "epoch": 0.14922686738116667, "grad_norm": 0.8029359234190678, "learning_rate": 9.926387067629243e-06, "loss": 0.014, "step": 9120 }, { "epoch": 0.1493904933322425, "grad_norm": 0.37907731222138397, "learning_rate": 9.925898020934428e-06, "loss": 0.0098, "step": 9130 }, { "epoch": 0.14955411928331833, "grad_norm": 0.2476944483508969, "learning_rate": 9.925407367252497e-06, "loss": 0.008, "step": 9140 }, { "epoch": 0.14971774523439418, "grad_norm": 0.4204750739847926, "learning_rate": 9.924915106743515e-06, "loss": 0.0074, "step": 9150 }, { "epoch": 0.14988137118547, "grad_norm": 0.7074599029391628, "learning_rate": 9.924421239568077e-06, "loss": 0.0117, "step": 9160 }, { "epoch": 0.15004499713654584, "grad_norm": 0.2677756013199778, "learning_rate": 9.923925765887296e-06, "loss": 0.0087, "step": 9170 }, { "epoch": 0.1502086230876217, "grad_norm": 0.5697800424731361, "learning_rate": 9.923428685862814e-06, "loss": 0.0079, "step": 9180 }, { "epoch": 0.15037224903869753, "grad_norm": 0.8009000925321772, "learning_rate": 9.92292999965679e-06, "loss": 0.0076, "step": 9190 }, { "epoch": 0.1505358749897734, "grad_norm": 0.49303097539862706, "learning_rate": 9.922429707431917e-06, "loss": 0.0088, "step": 9200 }, { "epoch": 0.15069950094084922, "grad_norm": 0.6974439715725193, "learning_rate": 9.921927809351402e-06, "loss": 0.0075, "step": 9210 }, { "epoch": 0.15086312689192505, "grad_norm": 0.19476468926689297, "learning_rate": 9.921424305578983e-06, "loss": 0.0053, "step": 9220 }, { "epoch": 0.1510267528430009, "grad_norm": 0.37303715697780476, "learning_rate": 9.920919196278917e-06, "loss": 0.0083, "step": 9230 }, { "epoch": 0.15119037879407674, "grad_norm": 0.2224756456525914, "learning_rate": 9.92041248161599e-06, "loss": 0.0065, "step": 9240 }, { "epoch": 0.1513540047451526, "grad_norm": 0.21778292086119141, "learning_rate": 9.919904161755507e-06, "loss": 0.0058, "step": 9250 }, { "epoch": 0.15151763069622842, "grad_norm": 0.2713447061718825, "learning_rate": 9.919394236863298e-06, "loss": 0.0077, "step": 9260 }, { "epoch": 0.15168125664730425, "grad_norm": 0.35129680145951364, "learning_rate": 9.918882707105717e-06, "loss": 0.007, "step": 9270 }, { "epoch": 0.1518448825983801, "grad_norm": 0.6536664463782678, "learning_rate": 9.918369572649643e-06, "loss": 0.006, "step": 9280 }, { "epoch": 0.15200850854945594, "grad_norm": 0.47511884991950143, "learning_rate": 9.917854833662474e-06, "loss": 0.0071, "step": 9290 }, { "epoch": 0.1521721345005318, "grad_norm": 0.5483570606410505, "learning_rate": 9.917338490312138e-06, "loss": 0.0072, "step": 9300 }, { "epoch": 0.15233576045160763, "grad_norm": 0.3916762090305378, "learning_rate": 9.916820542767082e-06, "loss": 0.0122, "step": 9310 }, { "epoch": 0.15249938640268346, "grad_norm": 0.4146095761940944, "learning_rate": 9.916300991196275e-06, "loss": 0.0095, "step": 9320 }, { "epoch": 0.15266301235375931, "grad_norm": 0.18649352333468477, "learning_rate": 9.915779835769215e-06, "loss": 0.0062, "step": 9330 }, { "epoch": 0.15282663830483514, "grad_norm": 0.4991207924409798, "learning_rate": 9.915257076655915e-06, "loss": 0.0104, "step": 9340 }, { "epoch": 0.15299026425591097, "grad_norm": 0.574098452266438, "learning_rate": 9.91473271402692e-06, "loss": 0.0146, "step": 9350 }, { "epoch": 0.15315389020698683, "grad_norm": 0.5249645618883285, "learning_rate": 9.914206748053294e-06, "loss": 0.0088, "step": 9360 }, { "epoch": 0.15331751615806266, "grad_norm": 0.550680337462673, "learning_rate": 9.913679178906622e-06, "loss": 0.0101, "step": 9370 }, { "epoch": 0.15348114210913852, "grad_norm": 0.3011147529976332, "learning_rate": 9.913150006759016e-06, "loss": 0.0089, "step": 9380 }, { "epoch": 0.15364476806021435, "grad_norm": 0.5461743807658227, "learning_rate": 9.912619231783108e-06, "loss": 0.0065, "step": 9390 }, { "epoch": 0.15380839401129018, "grad_norm": 0.4394247789462552, "learning_rate": 9.912086854152052e-06, "loss": 0.0082, "step": 9400 }, { "epoch": 0.15397201996236604, "grad_norm": 0.34739636345630137, "learning_rate": 9.91155287403953e-06, "loss": 0.0096, "step": 9410 }, { "epoch": 0.15413564591344187, "grad_norm": 0.5506210556273866, "learning_rate": 9.911017291619742e-06, "loss": 0.006, "step": 9420 }, { "epoch": 0.15429927186451772, "grad_norm": 0.6869432462297704, "learning_rate": 9.910480107067412e-06, "loss": 0.0092, "step": 9430 }, { "epoch": 0.15446289781559355, "grad_norm": 0.3028731878883146, "learning_rate": 9.90994132055779e-06, "loss": 0.0087, "step": 9440 }, { "epoch": 0.15462652376666938, "grad_norm": 0.5596070179186514, "learning_rate": 9.909400932266643e-06, "loss": 0.01, "step": 9450 }, { "epoch": 0.15479014971774524, "grad_norm": 0.6744786881582322, "learning_rate": 9.90885894237026e-06, "loss": 0.0097, "step": 9460 }, { "epoch": 0.15495377566882107, "grad_norm": 0.535044468377577, "learning_rate": 9.90831535104546e-06, "loss": 0.0091, "step": 9470 }, { "epoch": 0.15511740161989693, "grad_norm": 0.5124968937806973, "learning_rate": 9.907770158469581e-06, "loss": 0.0096, "step": 9480 }, { "epoch": 0.15528102757097276, "grad_norm": 0.211693740141867, "learning_rate": 9.90722336482048e-06, "loss": 0.0094, "step": 9490 }, { "epoch": 0.1554446535220486, "grad_norm": 0.56360984591575, "learning_rate": 9.906674970276538e-06, "loss": 0.0067, "step": 9500 }, { "epoch": 0.15560827947312444, "grad_norm": 0.40719023869943, "learning_rate": 9.90612497501666e-06, "loss": 0.0073, "step": 9510 }, { "epoch": 0.15577190542420027, "grad_norm": 0.226362732371626, "learning_rate": 9.905573379220274e-06, "loss": 0.0052, "step": 9520 }, { "epoch": 0.15593553137527613, "grad_norm": 0.16824055838680885, "learning_rate": 9.905020183067326e-06, "loss": 0.0104, "step": 9530 }, { "epoch": 0.15609915732635196, "grad_norm": 0.2439869592434656, "learning_rate": 9.904465386738287e-06, "loss": 0.0068, "step": 9540 }, { "epoch": 0.1562627832774278, "grad_norm": 0.3589889660316506, "learning_rate": 9.90390899041415e-06, "loss": 0.0079, "step": 9550 }, { "epoch": 0.15642640922850365, "grad_norm": 0.581687246316274, "learning_rate": 9.903350994276429e-06, "loss": 0.0073, "step": 9560 }, { "epoch": 0.15659003517957948, "grad_norm": 0.11769508423524805, "learning_rate": 9.902791398507162e-06, "loss": 0.0099, "step": 9570 }, { "epoch": 0.1567536611306553, "grad_norm": 0.2404613429511316, "learning_rate": 9.902230203288902e-06, "loss": 0.0074, "step": 9580 }, { "epoch": 0.15691728708173117, "grad_norm": 0.3787016102887237, "learning_rate": 9.901667408804736e-06, "loss": 0.01, "step": 9590 }, { "epoch": 0.157080913032807, "grad_norm": 0.48468770378930476, "learning_rate": 9.901103015238262e-06, "loss": 0.0112, "step": 9600 }, { "epoch": 0.15724453898388285, "grad_norm": 0.40627445634562653, "learning_rate": 9.900537022773605e-06, "loss": 0.0078, "step": 9610 }, { "epoch": 0.15740816493495868, "grad_norm": 0.25892633792073505, "learning_rate": 9.899969431595406e-06, "loss": 0.0067, "step": 9620 }, { "epoch": 0.1575717908860345, "grad_norm": 0.19838158906940834, "learning_rate": 9.899400241888837e-06, "loss": 0.0052, "step": 9630 }, { "epoch": 0.15773541683711037, "grad_norm": 0.5851929222273135, "learning_rate": 9.898829453839584e-06, "loss": 0.0117, "step": 9640 }, { "epoch": 0.1578990427881862, "grad_norm": 0.26023426338765165, "learning_rate": 9.898257067633853e-06, "loss": 0.0085, "step": 9650 }, { "epoch": 0.15806266873926206, "grad_norm": 0.35403532811040644, "learning_rate": 9.897683083458382e-06, "loss": 0.0078, "step": 9660 }, { "epoch": 0.1582262946903379, "grad_norm": 0.3324359511178061, "learning_rate": 9.897107501500418e-06, "loss": 0.0098, "step": 9670 }, { "epoch": 0.15838992064141372, "grad_norm": 0.48517397962770487, "learning_rate": 9.896530321947734e-06, "loss": 0.0095, "step": 9680 }, { "epoch": 0.15855354659248957, "grad_norm": 0.47146398077149093, "learning_rate": 9.895951544988629e-06, "loss": 0.0113, "step": 9690 }, { "epoch": 0.1587171725435654, "grad_norm": 0.3270636987118544, "learning_rate": 9.895371170811915e-06, "loss": 0.0077, "step": 9700 }, { "epoch": 0.15888079849464126, "grad_norm": 0.5261811419817289, "learning_rate": 9.89478919960693e-06, "loss": 0.0073, "step": 9710 }, { "epoch": 0.1590444244457171, "grad_norm": 0.18257772348583526, "learning_rate": 9.89420563156353e-06, "loss": 0.0055, "step": 9720 }, { "epoch": 0.15920805039679292, "grad_norm": 0.43962987638433737, "learning_rate": 9.893620466872097e-06, "loss": 0.008, "step": 9730 }, { "epoch": 0.15937167634786878, "grad_norm": 0.3663453261638167, "learning_rate": 9.893033705723529e-06, "loss": 0.0052, "step": 9740 }, { "epoch": 0.1595353022989446, "grad_norm": 0.859832464154158, "learning_rate": 9.892445348309245e-06, "loss": 0.0075, "step": 9750 }, { "epoch": 0.15969892825002047, "grad_norm": 0.4476234566964949, "learning_rate": 9.89185539482119e-06, "loss": 0.0063, "step": 9760 }, { "epoch": 0.1598625542010963, "grad_norm": 0.16726253059105453, "learning_rate": 9.891263845451823e-06, "loss": 0.0076, "step": 9770 }, { "epoch": 0.16002618015217213, "grad_norm": 0.1659356537933866, "learning_rate": 9.890670700394125e-06, "loss": 0.0069, "step": 9780 }, { "epoch": 0.16018980610324798, "grad_norm": 0.5108017461498804, "learning_rate": 9.890075959841604e-06, "loss": 0.0085, "step": 9790 }, { "epoch": 0.1603534320543238, "grad_norm": 0.4302016027300465, "learning_rate": 9.889479623988281e-06, "loss": 0.0095, "step": 9800 }, { "epoch": 0.16051705800539964, "grad_norm": 0.37388041313357845, "learning_rate": 9.888881693028697e-06, "loss": 0.0089, "step": 9810 }, { "epoch": 0.1606806839564755, "grad_norm": 0.3038370863097063, "learning_rate": 9.888282167157921e-06, "loss": 0.0075, "step": 9820 }, { "epoch": 0.16084430990755133, "grad_norm": 0.8798560569350088, "learning_rate": 9.887681046571535e-06, "loss": 0.0092, "step": 9830 }, { "epoch": 0.1610079358586272, "grad_norm": 0.39983846609676804, "learning_rate": 9.887078331465648e-06, "loss": 0.0073, "step": 9840 }, { "epoch": 0.16117156180970302, "grad_norm": 0.3288416908084513, "learning_rate": 9.886474022036878e-06, "loss": 0.0108, "step": 9850 }, { "epoch": 0.16133518776077885, "grad_norm": 0.21542943838620762, "learning_rate": 9.885868118482376e-06, "loss": 0.0097, "step": 9860 }, { "epoch": 0.1614988137118547, "grad_norm": 0.7118021771700417, "learning_rate": 9.885260620999805e-06, "loss": 0.0095, "step": 9870 }, { "epoch": 0.16166243966293053, "grad_norm": 0.39639184416249734, "learning_rate": 9.884651529787353e-06, "loss": 0.0087, "step": 9880 }, { "epoch": 0.1618260656140064, "grad_norm": 0.5077956662498805, "learning_rate": 9.884040845043721e-06, "loss": 0.0041, "step": 9890 }, { "epoch": 0.16198969156508222, "grad_norm": 0.4065907434295841, "learning_rate": 9.883428566968136e-06, "loss": 0.0091, "step": 9900 }, { "epoch": 0.16215331751615805, "grad_norm": 0.47204222186076167, "learning_rate": 9.882814695760345e-06, "loss": 0.0092, "step": 9910 }, { "epoch": 0.1623169434672339, "grad_norm": 0.1558916093685597, "learning_rate": 9.88219923162061e-06, "loss": 0.0068, "step": 9920 }, { "epoch": 0.16248056941830974, "grad_norm": 0.20919770550817593, "learning_rate": 9.881582174749715e-06, "loss": 0.008, "step": 9930 }, { "epoch": 0.1626441953693856, "grad_norm": 0.23932957273879332, "learning_rate": 9.880963525348966e-06, "loss": 0.0098, "step": 9940 }, { "epoch": 0.16280782132046143, "grad_norm": 0.2068389087463516, "learning_rate": 9.880343283620187e-06, "loss": 0.0094, "step": 9950 }, { "epoch": 0.16297144727153726, "grad_norm": 0.14699290218634523, "learning_rate": 9.879721449765715e-06, "loss": 0.0042, "step": 9960 }, { "epoch": 0.16313507322261311, "grad_norm": 0.36538710189305573, "learning_rate": 9.87909802398842e-06, "loss": 0.0091, "step": 9970 }, { "epoch": 0.16329869917368894, "grad_norm": 0.33013233856342156, "learning_rate": 9.87847300649168e-06, "loss": 0.0064, "step": 9980 }, { "epoch": 0.1634623251247648, "grad_norm": 0.32196683340121124, "learning_rate": 9.877846397479395e-06, "loss": 0.0093, "step": 9990 }, { "epoch": 0.16362595107584063, "grad_norm": 0.309836342011727, "learning_rate": 9.877218197155987e-06, "loss": 0.0058, "step": 10000 }, { "epoch": 0.16378957702691646, "grad_norm": 0.322799701026625, "learning_rate": 9.876588405726395e-06, "loss": 0.0099, "step": 10010 }, { "epoch": 0.16395320297799232, "grad_norm": 0.25984899724981264, "learning_rate": 9.875957023396077e-06, "loss": 0.0082, "step": 10020 }, { "epoch": 0.16411682892906815, "grad_norm": 0.4110625467661034, "learning_rate": 9.875324050371009e-06, "loss": 0.0095, "step": 10030 }, { "epoch": 0.16428045488014398, "grad_norm": 0.4428619115890564, "learning_rate": 9.874689486857688e-06, "loss": 0.0079, "step": 10040 }, { "epoch": 0.16444408083121984, "grad_norm": 0.2970819924248066, "learning_rate": 9.874053333063129e-06, "loss": 0.0074, "step": 10050 }, { "epoch": 0.16460770678229567, "grad_norm": 0.43446315284570247, "learning_rate": 9.873415589194868e-06, "loss": 0.0067, "step": 10060 }, { "epoch": 0.16477133273337152, "grad_norm": 0.5383267442342676, "learning_rate": 9.872776255460957e-06, "loss": 0.0089, "step": 10070 }, { "epoch": 0.16493495868444735, "grad_norm": 0.3985499659950599, "learning_rate": 9.872135332069964e-06, "loss": 0.0082, "step": 10080 }, { "epoch": 0.16509858463552318, "grad_norm": 0.4268874543393266, "learning_rate": 9.871492819230983e-06, "loss": 0.008, "step": 10090 }, { "epoch": 0.16526221058659904, "grad_norm": 0.6020027020977168, "learning_rate": 9.870848717153621e-06, "loss": 0.0082, "step": 10100 }, { "epoch": 0.16542583653767487, "grad_norm": 0.5374988523185453, "learning_rate": 9.870203026048004e-06, "loss": 0.0101, "step": 10110 }, { "epoch": 0.16558946248875073, "grad_norm": 0.4329202167058763, "learning_rate": 9.869555746124777e-06, "loss": 0.007, "step": 10120 }, { "epoch": 0.16575308843982656, "grad_norm": 0.4535929033520635, "learning_rate": 9.868906877595107e-06, "loss": 0.0091, "step": 10130 }, { "epoch": 0.1659167143909024, "grad_norm": 0.2632666687701405, "learning_rate": 9.86825642067067e-06, "loss": 0.0065, "step": 10140 }, { "epoch": 0.16608034034197824, "grad_norm": 0.36879767811462943, "learning_rate": 9.867604375563671e-06, "loss": 0.0097, "step": 10150 }, { "epoch": 0.16624396629305407, "grad_norm": 0.291099772187893, "learning_rate": 9.866950742486829e-06, "loss": 0.0087, "step": 10160 }, { "epoch": 0.16640759224412993, "grad_norm": 0.2844259989266916, "learning_rate": 9.866295521653376e-06, "loss": 0.0083, "step": 10170 }, { "epoch": 0.16657121819520576, "grad_norm": 0.5125757783765755, "learning_rate": 9.865638713277068e-06, "loss": 0.01, "step": 10180 }, { "epoch": 0.1667348441462816, "grad_norm": 0.46441647891567794, "learning_rate": 9.864980317572178e-06, "loss": 0.0041, "step": 10190 }, { "epoch": 0.16689847009735745, "grad_norm": 0.31501133107570845, "learning_rate": 9.864320334753495e-06, "loss": 0.0072, "step": 10200 }, { "epoch": 0.16706209604843328, "grad_norm": 0.38613445544791253, "learning_rate": 9.863658765036328e-06, "loss": 0.0075, "step": 10210 }, { "epoch": 0.16722572199950914, "grad_norm": 0.17437420262687167, "learning_rate": 9.862995608636502e-06, "loss": 0.0063, "step": 10220 }, { "epoch": 0.16738934795058497, "grad_norm": 0.5324733380918414, "learning_rate": 9.862330865770357e-06, "loss": 0.0073, "step": 10230 }, { "epoch": 0.1675529739016608, "grad_norm": 0.5242492073416941, "learning_rate": 9.861664536654757e-06, "loss": 0.01, "step": 10240 }, { "epoch": 0.16771659985273665, "grad_norm": 0.20473056698530814, "learning_rate": 9.86099662150708e-06, "loss": 0.0071, "step": 10250 }, { "epoch": 0.16788022580381248, "grad_norm": 0.27294945119052566, "learning_rate": 9.86032712054522e-06, "loss": 0.0074, "step": 10260 }, { "epoch": 0.1680438517548883, "grad_norm": 0.3483303015911527, "learning_rate": 9.85965603398759e-06, "loss": 0.0106, "step": 10270 }, { "epoch": 0.16820747770596417, "grad_norm": 0.2328859732347586, "learning_rate": 9.85898336205312e-06, "loss": 0.0093, "step": 10280 }, { "epoch": 0.16837110365704, "grad_norm": 0.3140498645292364, "learning_rate": 9.85830910496126e-06, "loss": 0.0071, "step": 10290 }, { "epoch": 0.16853472960811586, "grad_norm": 0.5199400245549783, "learning_rate": 9.857633262931972e-06, "loss": 0.0102, "step": 10300 }, { "epoch": 0.1686983555591917, "grad_norm": 0.4056932278604273, "learning_rate": 9.856955836185736e-06, "loss": 0.0083, "step": 10310 }, { "epoch": 0.16886198151026752, "grad_norm": 0.12662900147618225, "learning_rate": 9.856276824943553e-06, "loss": 0.0084, "step": 10320 }, { "epoch": 0.16902560746134337, "grad_norm": 0.25840266007922685, "learning_rate": 9.855596229426941e-06, "loss": 0.01, "step": 10330 }, { "epoch": 0.1691892334124192, "grad_norm": 0.4803474933005574, "learning_rate": 9.854914049857926e-06, "loss": 0.0068, "step": 10340 }, { "epoch": 0.16935285936349506, "grad_norm": 0.5893516603927258, "learning_rate": 9.854230286459062e-06, "loss": 0.0093, "step": 10350 }, { "epoch": 0.1695164853145709, "grad_norm": 0.41429833719847015, "learning_rate": 9.85354493945341e-06, "loss": 0.0084, "step": 10360 }, { "epoch": 0.16968011126564672, "grad_norm": 0.14762429963862791, "learning_rate": 9.852858009064557e-06, "loss": 0.0064, "step": 10370 }, { "epoch": 0.16984373721672258, "grad_norm": 0.25709373600296614, "learning_rate": 9.852169495516599e-06, "loss": 0.0063, "step": 10380 }, { "epoch": 0.1700073631677984, "grad_norm": 0.2652830553115959, "learning_rate": 9.851479399034153e-06, "loss": 0.0127, "step": 10390 }, { "epoch": 0.17017098911887427, "grad_norm": 0.8985472934503602, "learning_rate": 9.850787719842351e-06, "loss": 0.0071, "step": 10400 }, { "epoch": 0.1703346150699501, "grad_norm": 0.18685012772041115, "learning_rate": 9.850094458166841e-06, "loss": 0.0074, "step": 10410 }, { "epoch": 0.17049824102102593, "grad_norm": 0.1525854988689108, "learning_rate": 9.849399614233785e-06, "loss": 0.0064, "step": 10420 }, { "epoch": 0.17066186697210178, "grad_norm": 0.4601118642797104, "learning_rate": 9.848703188269865e-06, "loss": 0.0097, "step": 10430 }, { "epoch": 0.1708254929231776, "grad_norm": 0.38392863390009024, "learning_rate": 9.848005180502279e-06, "loss": 0.0056, "step": 10440 }, { "epoch": 0.17098911887425344, "grad_norm": 0.21364795629404754, "learning_rate": 9.847305591158738e-06, "loss": 0.0052, "step": 10450 }, { "epoch": 0.1711527448253293, "grad_norm": 0.1429460745271687, "learning_rate": 9.846604420467471e-06, "loss": 0.0089, "step": 10460 }, { "epoch": 0.17131637077640513, "grad_norm": 0.4801257314849713, "learning_rate": 9.845901668657224e-06, "loss": 0.0083, "step": 10470 }, { "epoch": 0.171479996727481, "grad_norm": 0.6128505151573106, "learning_rate": 9.845197335957255e-06, "loss": 0.0106, "step": 10480 }, { "epoch": 0.17164362267855682, "grad_norm": 0.4370845575132257, "learning_rate": 9.84449142259734e-06, "loss": 0.0119, "step": 10490 }, { "epoch": 0.17180724862963265, "grad_norm": 0.5414983436641663, "learning_rate": 9.843783928807774e-06, "loss": 0.0055, "step": 10500 }, { "epoch": 0.1719708745807085, "grad_norm": 0.3138193315040035, "learning_rate": 9.843074854819362e-06, "loss": 0.0093, "step": 10510 }, { "epoch": 0.17213450053178433, "grad_norm": 0.5963417187091092, "learning_rate": 9.842364200863425e-06, "loss": 0.009, "step": 10520 }, { "epoch": 0.1722981264828602, "grad_norm": 0.5131871421267759, "learning_rate": 9.841651967171806e-06, "loss": 0.0079, "step": 10530 }, { "epoch": 0.17246175243393602, "grad_norm": 0.2833442046353934, "learning_rate": 9.840938153976856e-06, "loss": 0.0096, "step": 10540 }, { "epoch": 0.17262537838501185, "grad_norm": 0.43053219249403707, "learning_rate": 9.840222761511442e-06, "loss": 0.0089, "step": 10550 }, { "epoch": 0.1727890043360877, "grad_norm": 0.18665372389328408, "learning_rate": 9.839505790008951e-06, "loss": 0.0088, "step": 10560 }, { "epoch": 0.17295263028716354, "grad_norm": 0.27656579583746305, "learning_rate": 9.838787239703281e-06, "loss": 0.0098, "step": 10570 }, { "epoch": 0.1731162562382394, "grad_norm": 0.1720743109569887, "learning_rate": 9.838067110828848e-06, "loss": 0.0068, "step": 10580 }, { "epoch": 0.17327988218931523, "grad_norm": 0.527159024174209, "learning_rate": 9.837345403620579e-06, "loss": 0.0087, "step": 10590 }, { "epoch": 0.17344350814039106, "grad_norm": 0.3281411502061594, "learning_rate": 9.836622118313918e-06, "loss": 0.0097, "step": 10600 }, { "epoch": 0.1736071340914669, "grad_norm": 0.26966717061620776, "learning_rate": 9.835897255144828e-06, "loss": 0.0049, "step": 10610 }, { "epoch": 0.17377076004254274, "grad_norm": 0.37903177565896995, "learning_rate": 9.83517081434978e-06, "loss": 0.0073, "step": 10620 }, { "epoch": 0.1739343859936186, "grad_norm": 0.3433705280406555, "learning_rate": 9.83444279616576e-06, "loss": 0.0057, "step": 10630 }, { "epoch": 0.17409801194469443, "grad_norm": 0.4002972917486334, "learning_rate": 9.833713200830274e-06, "loss": 0.0073, "step": 10640 }, { "epoch": 0.17426163789577026, "grad_norm": 0.38881258312647543, "learning_rate": 9.832982028581341e-06, "loss": 0.0055, "step": 10650 }, { "epoch": 0.17442526384684612, "grad_norm": 0.35746573325509196, "learning_rate": 9.83224927965749e-06, "loss": 0.006, "step": 10660 }, { "epoch": 0.17458888979792195, "grad_norm": 0.4625603782162722, "learning_rate": 9.83151495429777e-06, "loss": 0.0062, "step": 10670 }, { "epoch": 0.17475251574899778, "grad_norm": 0.522601737733597, "learning_rate": 9.83077905274174e-06, "loss": 0.0111, "step": 10680 }, { "epoch": 0.17491614170007364, "grad_norm": 0.2978727947406584, "learning_rate": 9.830041575229473e-06, "loss": 0.0056, "step": 10690 }, { "epoch": 0.17507976765114946, "grad_norm": 0.3216176564452935, "learning_rate": 9.82930252200156e-06, "loss": 0.0079, "step": 10700 }, { "epoch": 0.17524339360222532, "grad_norm": 0.2855433876944901, "learning_rate": 9.828561893299106e-06, "loss": 0.0059, "step": 10710 }, { "epoch": 0.17540701955330115, "grad_norm": 0.3647883468307729, "learning_rate": 9.827819689363725e-06, "loss": 0.0075, "step": 10720 }, { "epoch": 0.17557064550437698, "grad_norm": 0.10036363130058357, "learning_rate": 9.82707591043755e-06, "loss": 0.007, "step": 10730 }, { "epoch": 0.17573427145545284, "grad_norm": 0.43802238484688444, "learning_rate": 9.826330556763225e-06, "loss": 0.0067, "step": 10740 }, { "epoch": 0.17589789740652867, "grad_norm": 0.569053628296172, "learning_rate": 9.825583628583907e-06, "loss": 0.0042, "step": 10750 }, { "epoch": 0.17606152335760453, "grad_norm": 0.584931561283576, "learning_rate": 9.824835126143269e-06, "loss": 0.0085, "step": 10760 }, { "epoch": 0.17622514930868036, "grad_norm": 0.2941199978430101, "learning_rate": 9.8240850496855e-06, "loss": 0.0073, "step": 10770 }, { "epoch": 0.1763887752597562, "grad_norm": 0.6186404002665649, "learning_rate": 9.823333399455293e-06, "loss": 0.0065, "step": 10780 }, { "epoch": 0.17655240121083204, "grad_norm": 0.4555420816456537, "learning_rate": 9.822580175697864e-06, "loss": 0.0057, "step": 10790 }, { "epoch": 0.17671602716190787, "grad_norm": 0.14560680073374588, "learning_rate": 9.82182537865894e-06, "loss": 0.0054, "step": 10800 }, { "epoch": 0.17687965311298373, "grad_norm": 0.23037198840494935, "learning_rate": 9.821069008584759e-06, "loss": 0.0062, "step": 10810 }, { "epoch": 0.17704327906405956, "grad_norm": 0.09358854796123231, "learning_rate": 9.820311065722072e-06, "loss": 0.0088, "step": 10820 }, { "epoch": 0.1772069050151354, "grad_norm": 0.10902212768373744, "learning_rate": 9.819551550318147e-06, "loss": 0.0051, "step": 10830 }, { "epoch": 0.17737053096621125, "grad_norm": 0.29398243222901055, "learning_rate": 9.818790462620762e-06, "loss": 0.0048, "step": 10840 }, { "epoch": 0.17753415691728708, "grad_norm": 0.21289324190222603, "learning_rate": 9.818027802878204e-06, "loss": 0.0079, "step": 10850 }, { "epoch": 0.17769778286836294, "grad_norm": 0.2985632379121235, "learning_rate": 9.817263571339285e-06, "loss": 0.0066, "step": 10860 }, { "epoch": 0.17786140881943877, "grad_norm": 0.30960720423749627, "learning_rate": 9.816497768253317e-06, "loss": 0.0078, "step": 10870 }, { "epoch": 0.1780250347705146, "grad_norm": 0.32659478410833875, "learning_rate": 9.81573039387013e-06, "loss": 0.0066, "step": 10880 }, { "epoch": 0.17818866072159045, "grad_norm": 0.2689840558915546, "learning_rate": 9.814961448440066e-06, "loss": 0.0065, "step": 10890 }, { "epoch": 0.17835228667266628, "grad_norm": 0.3050141845385428, "learning_rate": 9.814190932213981e-06, "loss": 0.0071, "step": 10900 }, { "epoch": 0.1785159126237421, "grad_norm": 0.7256849726430792, "learning_rate": 9.813418845443242e-06, "loss": 0.0072, "step": 10910 }, { "epoch": 0.17867953857481797, "grad_norm": 0.27098188971047327, "learning_rate": 9.812645188379729e-06, "loss": 0.0051, "step": 10920 }, { "epoch": 0.1788431645258938, "grad_norm": 0.05073862591655996, "learning_rate": 9.811869961275834e-06, "loss": 0.0087, "step": 10930 }, { "epoch": 0.17900679047696966, "grad_norm": 0.16464891380734534, "learning_rate": 9.811093164384459e-06, "loss": 0.0057, "step": 10940 }, { "epoch": 0.1791704164280455, "grad_norm": 0.4692611579591489, "learning_rate": 9.810314797959024e-06, "loss": 0.0084, "step": 10950 }, { "epoch": 0.17933404237912132, "grad_norm": 0.36374923824547417, "learning_rate": 9.809534862253456e-06, "loss": 0.0087, "step": 10960 }, { "epoch": 0.17949766833019717, "grad_norm": 0.36037975071253286, "learning_rate": 9.808753357522193e-06, "loss": 0.0084, "step": 10970 }, { "epoch": 0.179661294281273, "grad_norm": 0.2744567048343302, "learning_rate": 9.80797028402019e-06, "loss": 0.0067, "step": 10980 }, { "epoch": 0.17982492023234886, "grad_norm": 0.3101395009251681, "learning_rate": 9.807185642002907e-06, "loss": 0.0076, "step": 10990 }, { "epoch": 0.1799885461834247, "grad_norm": 0.27151334583454845, "learning_rate": 9.806399431726326e-06, "loss": 0.0082, "step": 11000 }, { "epoch": 0.18015217213450052, "grad_norm": 0.3093504215518516, "learning_rate": 9.805611653446926e-06, "loss": 0.0077, "step": 11010 }, { "epoch": 0.18031579808557638, "grad_norm": 0.1842202760155649, "learning_rate": 9.804822307421713e-06, "loss": 0.0086, "step": 11020 }, { "epoch": 0.1804794240366522, "grad_norm": 0.17955579595185978, "learning_rate": 9.804031393908192e-06, "loss": 0.0097, "step": 11030 }, { "epoch": 0.18064304998772807, "grad_norm": 0.4727951966746553, "learning_rate": 9.80323891316439e-06, "loss": 0.0088, "step": 11040 }, { "epoch": 0.1808066759388039, "grad_norm": 0.304157099812122, "learning_rate": 9.802444865448834e-06, "loss": 0.0063, "step": 11050 }, { "epoch": 0.18097030188987973, "grad_norm": 0.48102946848628697, "learning_rate": 9.80164925102057e-06, "loss": 0.0087, "step": 11060 }, { "epoch": 0.18113392784095558, "grad_norm": 0.17445604439930787, "learning_rate": 9.800852070139154e-06, "loss": 0.0083, "step": 11070 }, { "epoch": 0.1812975537920314, "grad_norm": 0.2521407754152959, "learning_rate": 9.800053323064655e-06, "loss": 0.0108, "step": 11080 }, { "epoch": 0.18146117974310727, "grad_norm": 0.4284801552123415, "learning_rate": 9.799253010057645e-06, "loss": 0.0076, "step": 11090 }, { "epoch": 0.1816248056941831, "grad_norm": 0.3192531954836076, "learning_rate": 9.798451131379213e-06, "loss": 0.0048, "step": 11100 }, { "epoch": 0.18178843164525893, "grad_norm": 0.4739618682474299, "learning_rate": 9.797647687290959e-06, "loss": 0.0079, "step": 11110 }, { "epoch": 0.1819520575963348, "grad_norm": 0.43335921480928347, "learning_rate": 9.796842678054994e-06, "loss": 0.0063, "step": 11120 }, { "epoch": 0.18211568354741062, "grad_norm": 0.7066375515263355, "learning_rate": 9.796036103933935e-06, "loss": 0.0049, "step": 11130 }, { "epoch": 0.18227930949848645, "grad_norm": 0.4281251377031283, "learning_rate": 9.795227965190914e-06, "loss": 0.0071, "step": 11140 }, { "epoch": 0.1824429354495623, "grad_norm": 0.589647200424727, "learning_rate": 9.794418262089571e-06, "loss": 0.0051, "step": 11150 }, { "epoch": 0.18260656140063813, "grad_norm": 0.21634038640987657, "learning_rate": 9.793606994894058e-06, "loss": 0.0065, "step": 11160 }, { "epoch": 0.182770187351714, "grad_norm": 0.2466957868707691, "learning_rate": 9.792794163869038e-06, "loss": 0.0063, "step": 11170 }, { "epoch": 0.18293381330278982, "grad_norm": 0.2585836539038335, "learning_rate": 9.79197976927968e-06, "loss": 0.0065, "step": 11180 }, { "epoch": 0.18309743925386565, "grad_norm": 0.1315996827887025, "learning_rate": 9.791163811391667e-06, "loss": 0.0091, "step": 11190 }, { "epoch": 0.1832610652049415, "grad_norm": 0.18507857651942677, "learning_rate": 9.790346290471192e-06, "loss": 0.0041, "step": 11200 }, { "epoch": 0.18342469115601734, "grad_norm": 0.36226611268884884, "learning_rate": 9.789527206784955e-06, "loss": 0.0075, "step": 11210 }, { "epoch": 0.1835883171070932, "grad_norm": 0.6361997358448347, "learning_rate": 9.788706560600167e-06, "loss": 0.0068, "step": 11220 }, { "epoch": 0.18375194305816903, "grad_norm": 0.1864986627660527, "learning_rate": 9.787884352184552e-06, "loss": 0.0049, "step": 11230 }, { "epoch": 0.18391556900924486, "grad_norm": 0.42457477005701955, "learning_rate": 9.78706058180634e-06, "loss": 0.0067, "step": 11240 }, { "epoch": 0.1840791949603207, "grad_norm": 0.2995253922746208, "learning_rate": 9.78623524973427e-06, "loss": 0.0071, "step": 11250 }, { "epoch": 0.18424282091139654, "grad_norm": 0.1470295387337036, "learning_rate": 9.785408356237595e-06, "loss": 0.007, "step": 11260 }, { "epoch": 0.1844064468624724, "grad_norm": 0.3706219745456237, "learning_rate": 9.78457990158607e-06, "loss": 0.0073, "step": 11270 }, { "epoch": 0.18457007281354823, "grad_norm": 0.18642248818552987, "learning_rate": 9.783749886049968e-06, "loss": 0.0059, "step": 11280 }, { "epoch": 0.18473369876462406, "grad_norm": 0.3367756237327783, "learning_rate": 9.782918309900065e-06, "loss": 0.0069, "step": 11290 }, { "epoch": 0.18489732471569992, "grad_norm": 0.30037444777426003, "learning_rate": 9.782085173407648e-06, "loss": 0.0073, "step": 11300 }, { "epoch": 0.18506095066677575, "grad_norm": 0.2611671832619326, "learning_rate": 9.781250476844512e-06, "loss": 0.0109, "step": 11310 }, { "epoch": 0.1852245766178516, "grad_norm": 0.2763791821038597, "learning_rate": 9.780414220482963e-06, "loss": 0.005, "step": 11320 }, { "epoch": 0.18538820256892743, "grad_norm": 0.4920780237134297, "learning_rate": 9.779576404595816e-06, "loss": 0.0068, "step": 11330 }, { "epoch": 0.18555182852000326, "grad_norm": 0.34772583793447126, "learning_rate": 9.77873702945639e-06, "loss": 0.0081, "step": 11340 }, { "epoch": 0.18571545447107912, "grad_norm": 0.2402157466953789, "learning_rate": 9.777896095338523e-06, "loss": 0.0054, "step": 11350 }, { "epoch": 0.18587908042215495, "grad_norm": 0.2175962648848062, "learning_rate": 9.777053602516548e-06, "loss": 0.0074, "step": 11360 }, { "epoch": 0.18604270637323078, "grad_norm": 0.4080172542591644, "learning_rate": 9.776209551265316e-06, "loss": 0.0092, "step": 11370 }, { "epoch": 0.18620633232430664, "grad_norm": 0.47385268601911257, "learning_rate": 9.775363941860185e-06, "loss": 0.0092, "step": 11380 }, { "epoch": 0.18636995827538247, "grad_norm": 0.31266517862802007, "learning_rate": 9.774516774577019e-06, "loss": 0.0101, "step": 11390 }, { "epoch": 0.18653358422645833, "grad_norm": 0.3761774585841798, "learning_rate": 9.77366804969219e-06, "loss": 0.0079, "step": 11400 }, { "epoch": 0.18669721017753416, "grad_norm": 0.8707324719372528, "learning_rate": 9.772817767482583e-06, "loss": 0.0071, "step": 11410 }, { "epoch": 0.18686083612861, "grad_norm": 0.725546705686707, "learning_rate": 9.771965928225583e-06, "loss": 0.0089, "step": 11420 }, { "epoch": 0.18702446207968584, "grad_norm": 0.5032077513770665, "learning_rate": 9.771112532199091e-06, "loss": 0.0069, "step": 11430 }, { "epoch": 0.18718808803076167, "grad_norm": 0.5050081611524134, "learning_rate": 9.770257579681513e-06, "loss": 0.0066, "step": 11440 }, { "epoch": 0.18735171398183753, "grad_norm": 0.524208787867849, "learning_rate": 9.769401070951758e-06, "loss": 0.0104, "step": 11450 }, { "epoch": 0.18751533993291336, "grad_norm": 0.41466010769731676, "learning_rate": 9.76854300628925e-06, "loss": 0.0063, "step": 11460 }, { "epoch": 0.1876789658839892, "grad_norm": 0.1771967172885356, "learning_rate": 9.767683385973917e-06, "loss": 0.007, "step": 11470 }, { "epoch": 0.18784259183506505, "grad_norm": 0.1683073529292256, "learning_rate": 9.766822210286193e-06, "loss": 0.0079, "step": 11480 }, { "epoch": 0.18800621778614088, "grad_norm": 0.4341036637448293, "learning_rate": 9.765959479507024e-06, "loss": 0.0069, "step": 11490 }, { "epoch": 0.18816984373721674, "grad_norm": 0.337723694912821, "learning_rate": 9.76509519391786e-06, "loss": 0.006, "step": 11500 }, { "epoch": 0.18833346968829257, "grad_norm": 0.24703291992630738, "learning_rate": 9.764229353800658e-06, "loss": 0.0059, "step": 11510 }, { "epoch": 0.1884970956393684, "grad_norm": 0.44953337718332714, "learning_rate": 9.763361959437882e-06, "loss": 0.0107, "step": 11520 }, { "epoch": 0.18866072159044425, "grad_norm": 0.34415607589276814, "learning_rate": 9.762493011112507e-06, "loss": 0.0084, "step": 11530 }, { "epoch": 0.18882434754152008, "grad_norm": 0.24751949638319196, "learning_rate": 9.761622509108011e-06, "loss": 0.007, "step": 11540 }, { "epoch": 0.1889879734925959, "grad_norm": 0.3063008796272494, "learning_rate": 9.760750453708378e-06, "loss": 0.0077, "step": 11550 }, { "epoch": 0.18915159944367177, "grad_norm": 0.20160061922919376, "learning_rate": 9.759876845198103e-06, "loss": 0.0073, "step": 11560 }, { "epoch": 0.1893152253947476, "grad_norm": 0.5691982020473816, "learning_rate": 9.759001683862186e-06, "loss": 0.009, "step": 11570 }, { "epoch": 0.18947885134582346, "grad_norm": 0.4357055435169999, "learning_rate": 9.758124969986129e-06, "loss": 0.0075, "step": 11580 }, { "epoch": 0.1896424772968993, "grad_norm": 0.08464089722462388, "learning_rate": 9.757246703855947e-06, "loss": 0.0048, "step": 11590 }, { "epoch": 0.18980610324797512, "grad_norm": 0.28062969462398113, "learning_rate": 9.75636688575816e-06, "loss": 0.0095, "step": 11600 }, { "epoch": 0.18996972919905097, "grad_norm": 0.3264916594553321, "learning_rate": 9.75548551597979e-06, "loss": 0.0052, "step": 11610 }, { "epoch": 0.1901333551501268, "grad_norm": 0.17973620222674624, "learning_rate": 9.75460259480837e-06, "loss": 0.009, "step": 11620 }, { "epoch": 0.19029698110120266, "grad_norm": 0.2029561933904139, "learning_rate": 9.753718122531937e-06, "loss": 0.0052, "step": 11630 }, { "epoch": 0.1904606070522785, "grad_norm": 0.3988648618580588, "learning_rate": 9.752832099439035e-06, "loss": 0.0078, "step": 11640 }, { "epoch": 0.19062423300335432, "grad_norm": 0.24359260999830218, "learning_rate": 9.751944525818715e-06, "loss": 0.0057, "step": 11650 }, { "epoch": 0.19078785895443018, "grad_norm": 0.37338015527357743, "learning_rate": 9.75105540196053e-06, "loss": 0.0057, "step": 11660 }, { "epoch": 0.190951484905506, "grad_norm": 0.21174412550323646, "learning_rate": 9.750164728154538e-06, "loss": 0.0064, "step": 11670 }, { "epoch": 0.19111511085658187, "grad_norm": 0.14801117799063032, "learning_rate": 9.749272504691312e-06, "loss": 0.0055, "step": 11680 }, { "epoch": 0.1912787368076577, "grad_norm": 0.31000782565843377, "learning_rate": 9.748378731861921e-06, "loss": 0.0084, "step": 11690 }, { "epoch": 0.19144236275873353, "grad_norm": 0.14206174103190947, "learning_rate": 9.74748340995794e-06, "loss": 0.0049, "step": 11700 }, { "epoch": 0.19160598870980938, "grad_norm": 0.29401582988388164, "learning_rate": 9.746586539271457e-06, "loss": 0.0078, "step": 11710 }, { "epoch": 0.1917696146608852, "grad_norm": 0.4859946997593239, "learning_rate": 9.745688120095056e-06, "loss": 0.008, "step": 11720 }, { "epoch": 0.19193324061196107, "grad_norm": 0.34352281638395943, "learning_rate": 9.744788152721834e-06, "loss": 0.0056, "step": 11730 }, { "epoch": 0.1920968665630369, "grad_norm": 0.2952173557394536, "learning_rate": 9.743886637445385e-06, "loss": 0.0043, "step": 11740 }, { "epoch": 0.19226049251411273, "grad_norm": 0.03682881227578708, "learning_rate": 9.742983574559817e-06, "loss": 0.0068, "step": 11750 }, { "epoch": 0.1924241184651886, "grad_norm": 0.16629063839522457, "learning_rate": 9.742078964359736e-06, "loss": 0.0053, "step": 11760 }, { "epoch": 0.19258774441626442, "grad_norm": 0.7447714376553686, "learning_rate": 9.741172807140254e-06, "loss": 0.0083, "step": 11770 }, { "epoch": 0.19275137036734025, "grad_norm": 0.31920258692730474, "learning_rate": 9.74026510319699e-06, "loss": 0.0053, "step": 11780 }, { "epoch": 0.1929149963184161, "grad_norm": 0.5438699773052527, "learning_rate": 9.739355852826069e-06, "loss": 0.0051, "step": 11790 }, { "epoch": 0.19307862226949193, "grad_norm": 0.22706135112454567, "learning_rate": 9.738445056324113e-06, "loss": 0.0058, "step": 11800 }, { "epoch": 0.1932422482205678, "grad_norm": 0.32765932232844974, "learning_rate": 9.737532713988256e-06, "loss": 0.0046, "step": 11810 }, { "epoch": 0.19340587417164362, "grad_norm": 0.3894588915813887, "learning_rate": 9.736618826116133e-06, "loss": 0.0087, "step": 11820 }, { "epoch": 0.19356950012271945, "grad_norm": 0.20356218035756277, "learning_rate": 9.735703393005885e-06, "loss": 0.0061, "step": 11830 }, { "epoch": 0.1937331260737953, "grad_norm": 0.34566481300505325, "learning_rate": 9.734786414956152e-06, "loss": 0.0053, "step": 11840 }, { "epoch": 0.19389675202487114, "grad_norm": 0.5089015288453581, "learning_rate": 9.733867892266087e-06, "loss": 0.0066, "step": 11850 }, { "epoch": 0.194060377975947, "grad_norm": 0.31411316990389243, "learning_rate": 9.732947825235339e-06, "loss": 0.0077, "step": 11860 }, { "epoch": 0.19422400392702283, "grad_norm": 0.6256280330075266, "learning_rate": 9.732026214164063e-06, "loss": 0.01, "step": 11870 }, { "epoch": 0.19438762987809866, "grad_norm": 0.4237697237391104, "learning_rate": 9.73110305935292e-06, "loss": 0.008, "step": 11880 }, { "epoch": 0.1945512558291745, "grad_norm": 0.28542089516947944, "learning_rate": 9.730178361103071e-06, "loss": 0.0063, "step": 11890 }, { "epoch": 0.19471488178025034, "grad_norm": 0.6879265857677136, "learning_rate": 9.729252119716183e-06, "loss": 0.0093, "step": 11900 }, { "epoch": 0.1948785077313262, "grad_norm": 0.36321127849872575, "learning_rate": 9.72832433549443e-06, "loss": 0.0062, "step": 11910 }, { "epoch": 0.19504213368240203, "grad_norm": 0.16790806420191687, "learning_rate": 9.72739500874048e-06, "loss": 0.0062, "step": 11920 }, { "epoch": 0.19520575963347786, "grad_norm": 0.12708815413892316, "learning_rate": 9.72646413975751e-06, "loss": 0.0057, "step": 11930 }, { "epoch": 0.19536938558455372, "grad_norm": 0.2913581949467308, "learning_rate": 9.725531728849202e-06, "loss": 0.0062, "step": 11940 }, { "epoch": 0.19553301153562955, "grad_norm": 0.12793090218002592, "learning_rate": 9.724597776319737e-06, "loss": 0.0075, "step": 11950 }, { "epoch": 0.1956966374867054, "grad_norm": 0.29278360496416067, "learning_rate": 9.7236622824738e-06, "loss": 0.0056, "step": 11960 }, { "epoch": 0.19586026343778123, "grad_norm": 0.43463834151273584, "learning_rate": 9.72272524761658e-06, "loss": 0.0076, "step": 11970 }, { "epoch": 0.19602388938885706, "grad_norm": 0.40536106608832556, "learning_rate": 9.72178667205377e-06, "loss": 0.0075, "step": 11980 }, { "epoch": 0.19618751533993292, "grad_norm": 0.28355589944274456, "learning_rate": 9.72084655609156e-06, "loss": 0.0118, "step": 11990 }, { "epoch": 0.19635114129100875, "grad_norm": 0.2101001075035258, "learning_rate": 9.719904900036648e-06, "loss": 0.0067, "step": 12000 }, { "epoch": 0.19651476724208458, "grad_norm": 0.11458634054087051, "learning_rate": 9.718961704196233e-06, "loss": 0.0085, "step": 12010 }, { "epoch": 0.19667839319316044, "grad_norm": 0.33031898045802605, "learning_rate": 9.718016968878016e-06, "loss": 0.008, "step": 12020 }, { "epoch": 0.19684201914423627, "grad_norm": 0.19265538490576667, "learning_rate": 9.717070694390199e-06, "loss": 0.0075, "step": 12030 }, { "epoch": 0.19700564509531213, "grad_norm": 0.21289840457184517, "learning_rate": 9.716122881041488e-06, "loss": 0.0078, "step": 12040 }, { "epoch": 0.19716927104638796, "grad_norm": 0.4889128426999637, "learning_rate": 9.715173529141092e-06, "loss": 0.0085, "step": 12050 }, { "epoch": 0.19733289699746379, "grad_norm": 0.2621124078049699, "learning_rate": 9.714222638998717e-06, "loss": 0.0113, "step": 12060 }, { "epoch": 0.19749652294853964, "grad_norm": 0.3385495327414111, "learning_rate": 9.713270210924577e-06, "loss": 0.0076, "step": 12070 }, { "epoch": 0.19766014889961547, "grad_norm": 0.16629378462763933, "learning_rate": 9.712316245229383e-06, "loss": 0.0067, "step": 12080 }, { "epoch": 0.19782377485069133, "grad_norm": 0.4981478954399011, "learning_rate": 9.71136074222435e-06, "loss": 0.0076, "step": 12090 }, { "epoch": 0.19798740080176716, "grad_norm": 0.4360526731889721, "learning_rate": 9.710403702221194e-06, "loss": 0.0093, "step": 12100 }, { "epoch": 0.198151026752843, "grad_norm": 0.31864374836859155, "learning_rate": 9.709445125532133e-06, "loss": 0.0059, "step": 12110 }, { "epoch": 0.19831465270391885, "grad_norm": 0.3657849550095795, "learning_rate": 9.708485012469884e-06, "loss": 0.0062, "step": 12120 }, { "epoch": 0.19847827865499468, "grad_norm": 0.13541145412841896, "learning_rate": 9.707523363347669e-06, "loss": 0.0046, "step": 12130 }, { "epoch": 0.19864190460607054, "grad_norm": 0.32932502068846803, "learning_rate": 9.706560178479207e-06, "loss": 0.0081, "step": 12140 }, { "epoch": 0.19880553055714636, "grad_norm": 0.6144776783137986, "learning_rate": 9.705595458178722e-06, "loss": 0.0087, "step": 12150 }, { "epoch": 0.1989691565082222, "grad_norm": 0.2843430518113607, "learning_rate": 9.704629202760935e-06, "loss": 0.0059, "step": 12160 }, { "epoch": 0.19913278245929805, "grad_norm": 0.2364227673305673, "learning_rate": 9.703661412541073e-06, "loss": 0.0117, "step": 12170 }, { "epoch": 0.19929640841037388, "grad_norm": 5.346360808828941, "learning_rate": 9.702692087834856e-06, "loss": 0.0084, "step": 12180 }, { "epoch": 0.19946003436144974, "grad_norm": 0.175198840295415, "learning_rate": 9.701721228958511e-06, "loss": 0.0061, "step": 12190 }, { "epoch": 0.19962366031252557, "grad_norm": 0.5753284470503537, "learning_rate": 9.700748836228766e-06, "loss": 0.0074, "step": 12200 }, { "epoch": 0.1997872862636014, "grad_norm": 0.14650786177447164, "learning_rate": 9.699774909962842e-06, "loss": 0.0045, "step": 12210 }, { "epoch": 0.19995091221467726, "grad_norm": 1.0074347249256546, "learning_rate": 9.698799450478469e-06, "loss": 0.0074, "step": 12220 }, { "epoch": 0.2001145381657531, "grad_norm": 0.3469719396945667, "learning_rate": 9.697822458093872e-06, "loss": 0.0078, "step": 12230 }, { "epoch": 0.20027816411682892, "grad_norm": 0.5546327667459244, "learning_rate": 9.69684393312778e-06, "loss": 0.0084, "step": 12240 }, { "epoch": 0.20044179006790477, "grad_norm": 0.42755857876549763, "learning_rate": 9.695863875899415e-06, "loss": 0.0081, "step": 12250 }, { "epoch": 0.2006054160189806, "grad_norm": 0.5349918217490497, "learning_rate": 9.694882286728506e-06, "loss": 0.0047, "step": 12260 }, { "epoch": 0.20076904197005646, "grad_norm": 0.3678883635136783, "learning_rate": 9.693899165935278e-06, "loss": 0.0049, "step": 12270 }, { "epoch": 0.2009326679211323, "grad_norm": 0.3788620823741124, "learning_rate": 9.692914513840459e-06, "loss": 0.0063, "step": 12280 }, { "epoch": 0.20109629387220812, "grad_norm": 0.595153655535423, "learning_rate": 9.691928330765273e-06, "loss": 0.0093, "step": 12290 }, { "epoch": 0.20125991982328398, "grad_norm": 0.309871474654151, "learning_rate": 9.690940617031442e-06, "loss": 0.0057, "step": 12300 }, { "epoch": 0.2014235457743598, "grad_norm": 0.4458159092416787, "learning_rate": 9.689951372961195e-06, "loss": 0.0074, "step": 12310 }, { "epoch": 0.20158717172543567, "grad_norm": 0.26059267801767083, "learning_rate": 9.688960598877253e-06, "loss": 0.0064, "step": 12320 }, { "epoch": 0.2017507976765115, "grad_norm": 0.5035292099910589, "learning_rate": 9.687968295102837e-06, "loss": 0.0078, "step": 12330 }, { "epoch": 0.20191442362758732, "grad_norm": 0.31016519161160244, "learning_rate": 9.68697446196167e-06, "loss": 0.0057, "step": 12340 }, { "epoch": 0.20207804957866318, "grad_norm": 0.4823829055947124, "learning_rate": 9.685979099777975e-06, "loss": 0.0074, "step": 12350 }, { "epoch": 0.202241675529739, "grad_norm": 0.3607532471539726, "learning_rate": 9.684982208876468e-06, "loss": 0.0054, "step": 12360 }, { "epoch": 0.20240530148081487, "grad_norm": 0.307286937344267, "learning_rate": 9.683983789582368e-06, "loss": 0.006, "step": 12370 }, { "epoch": 0.2025689274318907, "grad_norm": 0.3021384956652315, "learning_rate": 9.682983842221392e-06, "loss": 0.0059, "step": 12380 }, { "epoch": 0.20273255338296653, "grad_norm": 0.14032912575562123, "learning_rate": 9.681982367119755e-06, "loss": 0.0128, "step": 12390 }, { "epoch": 0.2028961793340424, "grad_norm": 0.25494701953350013, "learning_rate": 9.68097936460417e-06, "loss": 0.0036, "step": 12400 }, { "epoch": 0.20305980528511822, "grad_norm": 0.7215056757533814, "learning_rate": 9.679974835001848e-06, "loss": 0.0072, "step": 12410 }, { "epoch": 0.20322343123619407, "grad_norm": 0.6874998132940427, "learning_rate": 9.678968778640501e-06, "loss": 0.0066, "step": 12420 }, { "epoch": 0.2033870571872699, "grad_norm": 0.6338288676066054, "learning_rate": 9.677961195848339e-06, "loss": 0.0073, "step": 12430 }, { "epoch": 0.20355068313834573, "grad_norm": 0.47786177716347256, "learning_rate": 9.67695208695406e-06, "loss": 0.0074, "step": 12440 }, { "epoch": 0.2037143090894216, "grad_norm": 0.4006828277497043, "learning_rate": 9.675941452286876e-06, "loss": 0.0057, "step": 12450 }, { "epoch": 0.20387793504049742, "grad_norm": 0.5486545919501874, "learning_rate": 9.674929292176487e-06, "loss": 0.0076, "step": 12460 }, { "epoch": 0.20404156099157325, "grad_norm": 0.43607037531822396, "learning_rate": 9.67391560695309e-06, "loss": 0.0073, "step": 12470 }, { "epoch": 0.2042051869426491, "grad_norm": 0.39146743131215267, "learning_rate": 9.672900396947385e-06, "loss": 0.0066, "step": 12480 }, { "epoch": 0.20436881289372494, "grad_norm": 0.35802275456821875, "learning_rate": 9.671883662490562e-06, "loss": 0.0062, "step": 12490 }, { "epoch": 0.2045324388448008, "grad_norm": 0.12251678948981015, "learning_rate": 9.670865403914317e-06, "loss": 0.0093, "step": 12500 }, { "epoch": 0.20469606479587663, "grad_norm": 0.3319458171569489, "learning_rate": 9.669845621550835e-06, "loss": 0.0078, "step": 12510 }, { "epoch": 0.20485969074695246, "grad_norm": 0.42948027025675256, "learning_rate": 9.668824315732805e-06, "loss": 0.0064, "step": 12520 }, { "epoch": 0.2050233166980283, "grad_norm": 0.47347068952592614, "learning_rate": 9.667801486793411e-06, "loss": 0.0087, "step": 12530 }, { "epoch": 0.20518694264910414, "grad_norm": 0.2905391598286487, "learning_rate": 9.666777135066328e-06, "loss": 0.0074, "step": 12540 }, { "epoch": 0.20535056860018, "grad_norm": 0.2237668634623418, "learning_rate": 9.665751260885737e-06, "loss": 0.0043, "step": 12550 }, { "epoch": 0.20551419455125583, "grad_norm": 0.48019144848456347, "learning_rate": 9.664723864586308e-06, "loss": 0.0066, "step": 12560 }, { "epoch": 0.20567782050233166, "grad_norm": 0.5002425281696792, "learning_rate": 9.663694946503213e-06, "loss": 0.0052, "step": 12570 }, { "epoch": 0.20584144645340752, "grad_norm": 0.3639612781704994, "learning_rate": 9.662664506972119e-06, "loss": 0.0091, "step": 12580 }, { "epoch": 0.20600507240448335, "grad_norm": 0.32170670076523167, "learning_rate": 9.661632546329187e-06, "loss": 0.0067, "step": 12590 }, { "epoch": 0.2061686983555592, "grad_norm": 0.431961869402456, "learning_rate": 9.660599064911077e-06, "loss": 0.0072, "step": 12600 }, { "epoch": 0.20633232430663503, "grad_norm": 0.36899834695392764, "learning_rate": 9.659564063054943e-06, "loss": 0.0061, "step": 12610 }, { "epoch": 0.20649595025771086, "grad_norm": 0.26563665919546947, "learning_rate": 9.658527541098438e-06, "loss": 0.0059, "step": 12620 }, { "epoch": 0.20665957620878672, "grad_norm": 0.11193708439377764, "learning_rate": 9.657489499379704e-06, "loss": 0.0077, "step": 12630 }, { "epoch": 0.20682320215986255, "grad_norm": 0.4780575751925386, "learning_rate": 9.656449938237391e-06, "loss": 0.0076, "step": 12640 }, { "epoch": 0.20698682811093838, "grad_norm": 0.23904755872916147, "learning_rate": 9.65540885801063e-06, "loss": 0.0088, "step": 12650 }, { "epoch": 0.20715045406201424, "grad_norm": 0.22872827533944465, "learning_rate": 9.654366259039063e-06, "loss": 0.0057, "step": 12660 }, { "epoch": 0.20731408001309007, "grad_norm": 0.25847687068755604, "learning_rate": 9.653322141662813e-06, "loss": 0.0092, "step": 12670 }, { "epoch": 0.20747770596416593, "grad_norm": 0.532797968329369, "learning_rate": 9.652276506222507e-06, "loss": 0.0102, "step": 12680 }, { "epoch": 0.20764133191524176, "grad_norm": 0.11560189216615746, "learning_rate": 9.651229353059266e-06, "loss": 0.0053, "step": 12690 }, { "epoch": 0.20780495786631759, "grad_norm": 0.3883805671309935, "learning_rate": 9.650180682514703e-06, "loss": 0.0071, "step": 12700 }, { "epoch": 0.20796858381739344, "grad_norm": 0.3029979787429645, "learning_rate": 9.64913049493093e-06, "loss": 0.0073, "step": 12710 }, { "epoch": 0.20813220976846927, "grad_norm": 0.1935482928563381, "learning_rate": 9.648078790650553e-06, "loss": 0.0041, "step": 12720 }, { "epoch": 0.20829583571954513, "grad_norm": 0.09819876469556359, "learning_rate": 9.647025570016667e-06, "loss": 0.0061, "step": 12730 }, { "epoch": 0.20845946167062096, "grad_norm": 0.4154960487791988, "learning_rate": 9.645970833372874e-06, "loss": 0.0099, "step": 12740 }, { "epoch": 0.2086230876216968, "grad_norm": 0.18485296738170212, "learning_rate": 9.644914581063258e-06, "loss": 0.0067, "step": 12750 }, { "epoch": 0.20878671357277265, "grad_norm": 0.5495450320419935, "learning_rate": 9.643856813432404e-06, "loss": 0.0083, "step": 12760 }, { "epoch": 0.20895033952384848, "grad_norm": 0.41755646701200866, "learning_rate": 9.64279753082539e-06, "loss": 0.0081, "step": 12770 }, { "epoch": 0.20911396547492433, "grad_norm": 0.10329182114654682, "learning_rate": 9.641736733587788e-06, "loss": 0.0051, "step": 12780 }, { "epoch": 0.20927759142600016, "grad_norm": 0.3841368710325325, "learning_rate": 9.640674422065665e-06, "loss": 0.0086, "step": 12790 }, { "epoch": 0.209441217377076, "grad_norm": 0.173328307705944, "learning_rate": 9.639610596605582e-06, "loss": 0.0097, "step": 12800 }, { "epoch": 0.20960484332815185, "grad_norm": 0.20769587644032075, "learning_rate": 9.63854525755459e-06, "loss": 0.0056, "step": 12810 }, { "epoch": 0.20976846927922768, "grad_norm": 0.46200802550431713, "learning_rate": 9.63747840526024e-06, "loss": 0.0064, "step": 12820 }, { "epoch": 0.20993209523030354, "grad_norm": 0.19081215819993722, "learning_rate": 9.636410040070573e-06, "loss": 0.0069, "step": 12830 }, { "epoch": 0.21009572118137937, "grad_norm": 0.049189931026139326, "learning_rate": 9.635340162334124e-06, "loss": 0.0067, "step": 12840 }, { "epoch": 0.2102593471324552, "grad_norm": 0.37550403481683076, "learning_rate": 9.634268772399922e-06, "loss": 0.0069, "step": 12850 }, { "epoch": 0.21042297308353106, "grad_norm": 0.1445532454869915, "learning_rate": 9.63319587061749e-06, "loss": 0.0058, "step": 12860 }, { "epoch": 0.21058659903460689, "grad_norm": 0.282937422846647, "learning_rate": 9.632121457336839e-06, "loss": 0.0037, "step": 12870 }, { "epoch": 0.21075022498568272, "grad_norm": 0.25010971169222856, "learning_rate": 9.631045532908483e-06, "loss": 0.0048, "step": 12880 }, { "epoch": 0.21091385093675857, "grad_norm": 0.2563170288396381, "learning_rate": 9.629968097683419e-06, "loss": 0.0049, "step": 12890 }, { "epoch": 0.2110774768878344, "grad_norm": 0.44567158893021547, "learning_rate": 9.628889152013144e-06, "loss": 0.0075, "step": 12900 }, { "epoch": 0.21124110283891026, "grad_norm": 0.5051481386519453, "learning_rate": 9.627808696249642e-06, "loss": 0.0075, "step": 12910 }, { "epoch": 0.2114047287899861, "grad_norm": 0.3005312142160377, "learning_rate": 9.626726730745396e-06, "loss": 0.0065, "step": 12920 }, { "epoch": 0.21156835474106192, "grad_norm": 0.32682279958165067, "learning_rate": 9.625643255853377e-06, "loss": 0.0045, "step": 12930 }, { "epoch": 0.21173198069213778, "grad_norm": 0.31638464574382064, "learning_rate": 9.624558271927049e-06, "loss": 0.0061, "step": 12940 }, { "epoch": 0.2118956066432136, "grad_norm": 0.5055635511008135, "learning_rate": 9.623471779320369e-06, "loss": 0.0045, "step": 12950 }, { "epoch": 0.21205923259428947, "grad_norm": 0.2435266490874863, "learning_rate": 9.622383778387784e-06, "loss": 0.0065, "step": 12960 }, { "epoch": 0.2122228585453653, "grad_norm": 0.3608265970919034, "learning_rate": 9.621294269484238e-06, "loss": 0.0063, "step": 12970 }, { "epoch": 0.21238648449644112, "grad_norm": 0.3488213751205232, "learning_rate": 9.620203252965163e-06, "loss": 0.0073, "step": 12980 }, { "epoch": 0.21255011044751698, "grad_norm": 0.2049900708808698, "learning_rate": 9.619110729186485e-06, "loss": 0.0087, "step": 12990 }, { "epoch": 0.2127137363985928, "grad_norm": 0.39186369361272927, "learning_rate": 9.618016698504618e-06, "loss": 0.0051, "step": 13000 }, { "epoch": 0.21287736234966867, "grad_norm": 0.24222160481711164, "learning_rate": 9.616921161276471e-06, "loss": 0.0034, "step": 13010 }, { "epoch": 0.2130409883007445, "grad_norm": 0.45191656399038793, "learning_rate": 9.615824117859445e-06, "loss": 0.0045, "step": 13020 }, { "epoch": 0.21320461425182033, "grad_norm": 0.2312858761680152, "learning_rate": 9.61472556861143e-06, "loss": 0.0071, "step": 13030 }, { "epoch": 0.2133682402028962, "grad_norm": 0.36388579459186143, "learning_rate": 9.61362551389081e-06, "loss": 0.007, "step": 13040 }, { "epoch": 0.21353186615397202, "grad_norm": 0.3086548382097394, "learning_rate": 9.612523954056454e-06, "loss": 0.0069, "step": 13050 }, { "epoch": 0.21369549210504787, "grad_norm": 0.14314722661602566, "learning_rate": 9.611420889467731e-06, "loss": 0.0046, "step": 13060 }, { "epoch": 0.2138591180561237, "grad_norm": 0.4373123616002356, "learning_rate": 9.610316320484494e-06, "loss": 0.006, "step": 13070 }, { "epoch": 0.21402274400719953, "grad_norm": 0.4871047676305095, "learning_rate": 9.60921024746709e-06, "loss": 0.0067, "step": 13080 }, { "epoch": 0.2141863699582754, "grad_norm": 0.3266083014988156, "learning_rate": 9.608102670776354e-06, "loss": 0.0051, "step": 13090 }, { "epoch": 0.21434999590935122, "grad_norm": 0.2731045691905199, "learning_rate": 9.606993590773616e-06, "loss": 0.0074, "step": 13100 }, { "epoch": 0.21451362186042705, "grad_norm": 0.2764307463807169, "learning_rate": 9.605883007820693e-06, "loss": 0.0057, "step": 13110 }, { "epoch": 0.2146772478115029, "grad_norm": 0.2368996995251964, "learning_rate": 9.604770922279893e-06, "loss": 0.0047, "step": 13120 }, { "epoch": 0.21484087376257874, "grad_norm": 0.24739123282299902, "learning_rate": 9.603657334514012e-06, "loss": 0.0066, "step": 13130 }, { "epoch": 0.2150044997136546, "grad_norm": 0.18433050559270203, "learning_rate": 9.60254224488634e-06, "loss": 0.0068, "step": 13140 }, { "epoch": 0.21516812566473043, "grad_norm": 0.20447111534033818, "learning_rate": 9.601425653760657e-06, "loss": 0.008, "step": 13150 }, { "epoch": 0.21533175161580626, "grad_norm": 0.17175156416557466, "learning_rate": 9.600307561501228e-06, "loss": 0.0071, "step": 13160 }, { "epoch": 0.2154953775668821, "grad_norm": 0.505279682563232, "learning_rate": 9.599187968472814e-06, "loss": 0.0076, "step": 13170 }, { "epoch": 0.21565900351795794, "grad_norm": 0.29735972258051, "learning_rate": 9.59806687504066e-06, "loss": 0.0083, "step": 13180 }, { "epoch": 0.2158226294690338, "grad_norm": 0.24681865111432016, "learning_rate": 9.5969442815705e-06, "loss": 0.0089, "step": 13190 }, { "epoch": 0.21598625542010963, "grad_norm": 0.32140709317686506, "learning_rate": 9.59582018842857e-06, "loss": 0.0062, "step": 13200 }, { "epoch": 0.21614988137118546, "grad_norm": 0.532337821477823, "learning_rate": 9.594694595981575e-06, "loss": 0.0079, "step": 13210 }, { "epoch": 0.21631350732226132, "grad_norm": 0.3852848127046301, "learning_rate": 9.593567504596726e-06, "loss": 0.006, "step": 13220 }, { "epoch": 0.21647713327333715, "grad_norm": 0.36133466685803317, "learning_rate": 9.592438914641715e-06, "loss": 0.0066, "step": 13230 }, { "epoch": 0.216640759224413, "grad_norm": 0.33291897366357015, "learning_rate": 9.591308826484722e-06, "loss": 0.0058, "step": 13240 }, { "epoch": 0.21680438517548883, "grad_norm": 0.32600592817160806, "learning_rate": 9.590177240494423e-06, "loss": 0.007, "step": 13250 }, { "epoch": 0.21696801112656466, "grad_norm": 0.2171981648055445, "learning_rate": 9.589044157039972e-06, "loss": 0.0074, "step": 13260 }, { "epoch": 0.21713163707764052, "grad_norm": 0.17825027067375412, "learning_rate": 9.587909576491025e-06, "loss": 0.0064, "step": 13270 }, { "epoch": 0.21729526302871635, "grad_norm": 0.37501316413056474, "learning_rate": 9.586773499217711e-06, "loss": 0.0053, "step": 13280 }, { "epoch": 0.2174588889797922, "grad_norm": 0.26287798643027416, "learning_rate": 9.585635925590661e-06, "loss": 0.0062, "step": 13290 }, { "epoch": 0.21762251493086804, "grad_norm": 0.18786313071231878, "learning_rate": 9.584496855980985e-06, "loss": 0.0052, "step": 13300 }, { "epoch": 0.21778614088194387, "grad_norm": 0.12355998891252588, "learning_rate": 9.583356290760287e-06, "loss": 0.0069, "step": 13310 }, { "epoch": 0.21794976683301973, "grad_norm": 0.2815342708333563, "learning_rate": 9.582214230300653e-06, "loss": 0.0037, "step": 13320 }, { "epoch": 0.21811339278409556, "grad_norm": 0.5776772098926313, "learning_rate": 9.581070674974662e-06, "loss": 0.0076, "step": 13330 }, { "epoch": 0.21827701873517139, "grad_norm": 0.4677421218474824, "learning_rate": 9.579925625155377e-06, "loss": 0.0056, "step": 13340 }, { "epoch": 0.21844064468624724, "grad_norm": 0.3025267025505492, "learning_rate": 9.578779081216355e-06, "loss": 0.0081, "step": 13350 }, { "epoch": 0.21860427063732307, "grad_norm": 0.37344902339633046, "learning_rate": 9.577631043531628e-06, "loss": 0.0114, "step": 13360 }, { "epoch": 0.21876789658839893, "grad_norm": 0.6941529396153875, "learning_rate": 9.57648151247573e-06, "loss": 0.0088, "step": 13370 }, { "epoch": 0.21893152253947476, "grad_norm": 0.24355546556371185, "learning_rate": 9.575330488423672e-06, "loss": 0.005, "step": 13380 }, { "epoch": 0.2190951484905506, "grad_norm": 0.2342937368232905, "learning_rate": 9.574177971750954e-06, "loss": 0.0064, "step": 13390 }, { "epoch": 0.21925877444162645, "grad_norm": 0.2740934207646199, "learning_rate": 9.573023962833567e-06, "loss": 0.0067, "step": 13400 }, { "epoch": 0.21942240039270228, "grad_norm": 0.5052348012143701, "learning_rate": 9.571868462047985e-06, "loss": 0.0083, "step": 13410 }, { "epoch": 0.21958602634377813, "grad_norm": 0.1356438839628698, "learning_rate": 9.57071146977117e-06, "loss": 0.0035, "step": 13420 }, { "epoch": 0.21974965229485396, "grad_norm": 0.8484093836547718, "learning_rate": 9.569552986380568e-06, "loss": 0.0101, "step": 13430 }, { "epoch": 0.2199132782459298, "grad_norm": 0.21377409613672566, "learning_rate": 9.568393012254118e-06, "loss": 0.0083, "step": 13440 }, { "epoch": 0.22007690419700565, "grad_norm": 0.231030705717587, "learning_rate": 9.567231547770237e-06, "loss": 0.0066, "step": 13450 }, { "epoch": 0.22024053014808148, "grad_norm": 0.2017966744892309, "learning_rate": 9.566068593307835e-06, "loss": 0.0042, "step": 13460 }, { "epoch": 0.22040415609915734, "grad_norm": 0.29284216348492254, "learning_rate": 9.564904149246302e-06, "loss": 0.007, "step": 13470 }, { "epoch": 0.22056778205023317, "grad_norm": 0.30560245920193674, "learning_rate": 9.56373821596552e-06, "loss": 0.0049, "step": 13480 }, { "epoch": 0.220731408001309, "grad_norm": 0.5001219378865481, "learning_rate": 9.562570793845856e-06, "loss": 0.0067, "step": 13490 }, { "epoch": 0.22089503395238486, "grad_norm": 0.3086288716310587, "learning_rate": 9.561401883268157e-06, "loss": 0.0077, "step": 13500 }, { "epoch": 0.22105865990346069, "grad_norm": 0.21121580296196205, "learning_rate": 9.56023148461376e-06, "loss": 0.0035, "step": 13510 }, { "epoch": 0.22122228585453654, "grad_norm": 0.16208485563364608, "learning_rate": 9.55905959826449e-06, "loss": 0.0077, "step": 13520 }, { "epoch": 0.22138591180561237, "grad_norm": 0.6129561213274362, "learning_rate": 9.557886224602648e-06, "loss": 0.0083, "step": 13530 }, { "epoch": 0.2215495377566882, "grad_norm": 0.32513100068890144, "learning_rate": 9.556711364011031e-06, "loss": 0.0059, "step": 13540 }, { "epoch": 0.22171316370776406, "grad_norm": 0.17938448655392583, "learning_rate": 9.555535016872918e-06, "loss": 0.0041, "step": 13550 }, { "epoch": 0.2218767896588399, "grad_norm": 0.17900725186625582, "learning_rate": 9.554357183572067e-06, "loss": 0.0058, "step": 13560 }, { "epoch": 0.22204041560991572, "grad_norm": 0.42399516304112894, "learning_rate": 9.553177864492728e-06, "loss": 0.0105, "step": 13570 }, { "epoch": 0.22220404156099158, "grad_norm": 0.3813361713911305, "learning_rate": 9.551997060019634e-06, "loss": 0.0053, "step": 13580 }, { "epoch": 0.2223676675120674, "grad_norm": 0.2597109251775523, "learning_rate": 9.550814770538e-06, "loss": 0.006, "step": 13590 }, { "epoch": 0.22253129346314326, "grad_norm": 0.42765706090153527, "learning_rate": 9.549630996433526e-06, "loss": 0.0066, "step": 13600 }, { "epoch": 0.2226949194142191, "grad_norm": 0.48884472589107786, "learning_rate": 9.548445738092397e-06, "loss": 0.0073, "step": 13610 }, { "epoch": 0.22285854536529492, "grad_norm": 0.15532669916419392, "learning_rate": 9.547258995901286e-06, "loss": 0.0075, "step": 13620 }, { "epoch": 0.22302217131637078, "grad_norm": 0.157113377991791, "learning_rate": 9.546070770247345e-06, "loss": 0.0068, "step": 13630 }, { "epoch": 0.2231857972674466, "grad_norm": 0.36220899808927415, "learning_rate": 9.54488106151821e-06, "loss": 0.0048, "step": 13640 }, { "epoch": 0.22334942321852247, "grad_norm": 0.28719435288237044, "learning_rate": 9.543689870102004e-06, "loss": 0.0042, "step": 13650 }, { "epoch": 0.2235130491695983, "grad_norm": 0.25210672502219006, "learning_rate": 9.542497196387332e-06, "loss": 0.0053, "step": 13660 }, { "epoch": 0.22367667512067413, "grad_norm": 0.42478007611712965, "learning_rate": 9.541303040763282e-06, "loss": 0.0057, "step": 13670 }, { "epoch": 0.22384030107175, "grad_norm": 0.21816506900661323, "learning_rate": 9.540107403619427e-06, "loss": 0.0068, "step": 13680 }, { "epoch": 0.22400392702282582, "grad_norm": 0.5742875054648906, "learning_rate": 9.538910285345823e-06, "loss": 0.0051, "step": 13690 }, { "epoch": 0.22416755297390167, "grad_norm": 0.2873033248646462, "learning_rate": 9.537711686333006e-06, "loss": 0.0072, "step": 13700 }, { "epoch": 0.2243311789249775, "grad_norm": 0.4977841781484335, "learning_rate": 9.536511606972002e-06, "loss": 0.0046, "step": 13710 }, { "epoch": 0.22449480487605333, "grad_norm": 0.42424990248621275, "learning_rate": 9.535310047654311e-06, "loss": 0.0051, "step": 13720 }, { "epoch": 0.2246584308271292, "grad_norm": 0.34056986370649706, "learning_rate": 9.534107008771924e-06, "loss": 0.0076, "step": 13730 }, { "epoch": 0.22482205677820502, "grad_norm": 0.2657361732948001, "learning_rate": 9.53290249071731e-06, "loss": 0.0066, "step": 13740 }, { "epoch": 0.22498568272928085, "grad_norm": 0.25114005605147427, "learning_rate": 9.53169649388342e-06, "loss": 0.0055, "step": 13750 }, { "epoch": 0.2251493086803567, "grad_norm": 0.2726808588141479, "learning_rate": 9.530489018663693e-06, "loss": 0.0057, "step": 13760 }, { "epoch": 0.22531293463143254, "grad_norm": 0.3530846299195506, "learning_rate": 9.529280065452042e-06, "loss": 0.0124, "step": 13770 }, { "epoch": 0.2254765605825084, "grad_norm": 0.2818722653891957, "learning_rate": 9.528069634642871e-06, "loss": 0.0049, "step": 13780 }, { "epoch": 0.22564018653358422, "grad_norm": 0.42331227165057544, "learning_rate": 9.526857726631059e-06, "loss": 0.0078, "step": 13790 }, { "epoch": 0.22580381248466005, "grad_norm": 0.4485415808800704, "learning_rate": 9.525644341811968e-06, "loss": 0.0071, "step": 13800 }, { "epoch": 0.2259674384357359, "grad_norm": 0.19258206594422, "learning_rate": 9.524429480581445e-06, "loss": 0.0055, "step": 13810 }, { "epoch": 0.22613106438681174, "grad_norm": 0.3897623178419447, "learning_rate": 9.52321314333582e-06, "loss": 0.0071, "step": 13820 }, { "epoch": 0.2262946903378876, "grad_norm": 0.2321282129526326, "learning_rate": 9.521995330471898e-06, "loss": 0.0065, "step": 13830 }, { "epoch": 0.22645831628896343, "grad_norm": 0.22893327276089045, "learning_rate": 9.520776042386968e-06, "loss": 0.0085, "step": 13840 }, { "epoch": 0.22662194224003926, "grad_norm": 0.6576099504685553, "learning_rate": 9.519555279478805e-06, "loss": 0.0073, "step": 13850 }, { "epoch": 0.22678556819111512, "grad_norm": 0.4418565426040817, "learning_rate": 9.518333042145658e-06, "loss": 0.0065, "step": 13860 }, { "epoch": 0.22694919414219095, "grad_norm": 0.3819352573777527, "learning_rate": 9.517109330786261e-06, "loss": 0.0088, "step": 13870 }, { "epoch": 0.2271128200932668, "grad_norm": 0.26155817292046624, "learning_rate": 9.51588414579983e-06, "loss": 0.0067, "step": 13880 }, { "epoch": 0.22727644604434263, "grad_norm": 0.12584201244957097, "learning_rate": 9.514657487586059e-06, "loss": 0.0059, "step": 13890 }, { "epoch": 0.22744007199541846, "grad_norm": 0.1909321313099596, "learning_rate": 9.513429356545121e-06, "loss": 0.0073, "step": 13900 }, { "epoch": 0.22760369794649432, "grad_norm": 0.2909084391114, "learning_rate": 9.512199753077677e-06, "loss": 0.0066, "step": 13910 }, { "epoch": 0.22776732389757015, "grad_norm": 0.2551113146650867, "learning_rate": 9.510968677584858e-06, "loss": 0.0067, "step": 13920 }, { "epoch": 0.227930949848646, "grad_norm": 0.17608195052899103, "learning_rate": 9.509736130468285e-06, "loss": 0.0076, "step": 13930 }, { "epoch": 0.22809457579972184, "grad_norm": 0.11151091951660022, "learning_rate": 9.508502112130053e-06, "loss": 0.0055, "step": 13940 }, { "epoch": 0.22825820175079767, "grad_norm": 0.39610436178992064, "learning_rate": 9.507266622972738e-06, "loss": 0.0049, "step": 13950 }, { "epoch": 0.22842182770187353, "grad_norm": 0.4088406968195816, "learning_rate": 9.506029663399397e-06, "loss": 0.005, "step": 13960 }, { "epoch": 0.22858545365294936, "grad_norm": 0.5556103262128795, "learning_rate": 9.504791233813567e-06, "loss": 0.0063, "step": 13970 }, { "epoch": 0.22874907960402519, "grad_norm": 0.3142193477238952, "learning_rate": 9.50355133461926e-06, "loss": 0.0057, "step": 13980 }, { "epoch": 0.22891270555510104, "grad_norm": 0.34651416136300606, "learning_rate": 9.502309966220978e-06, "loss": 0.0079, "step": 13990 }, { "epoch": 0.22907633150617687, "grad_norm": 0.2128523021083095, "learning_rate": 9.50106712902369e-06, "loss": 0.0076, "step": 14000 }, { "epoch": 0.22923995745725273, "grad_norm": 0.5650278412198942, "learning_rate": 9.499822823432851e-06, "loss": 0.0056, "step": 14010 }, { "epoch": 0.22940358340832856, "grad_norm": 0.4706166143954149, "learning_rate": 9.498577049854395e-06, "loss": 0.009, "step": 14020 }, { "epoch": 0.2295672093594044, "grad_norm": 0.262413434562026, "learning_rate": 9.497329808694732e-06, "loss": 0.004, "step": 14030 }, { "epoch": 0.22973083531048025, "grad_norm": 0.18732504958743518, "learning_rate": 9.496081100360753e-06, "loss": 0.0075, "step": 14040 }, { "epoch": 0.22989446126155608, "grad_norm": 0.34662797689048, "learning_rate": 9.494830925259828e-06, "loss": 0.006, "step": 14050 }, { "epoch": 0.23005808721263193, "grad_norm": 0.3453315017811865, "learning_rate": 9.493579283799803e-06, "loss": 0.0056, "step": 14060 }, { "epoch": 0.23022171316370776, "grad_norm": 0.26983668964754903, "learning_rate": 9.492326176389004e-06, "loss": 0.0052, "step": 14070 }, { "epoch": 0.2303853391147836, "grad_norm": 0.7037108382465352, "learning_rate": 9.491071603436235e-06, "loss": 0.0118, "step": 14080 }, { "epoch": 0.23054896506585945, "grad_norm": 0.2679989652015329, "learning_rate": 9.48981556535078e-06, "loss": 0.0052, "step": 14090 }, { "epoch": 0.23071259101693528, "grad_norm": 0.5462982707730084, "learning_rate": 9.488558062542396e-06, "loss": 0.0058, "step": 14100 }, { "epoch": 0.23087621696801114, "grad_norm": 0.14330988475015488, "learning_rate": 9.487299095421325e-06, "loss": 0.0035, "step": 14110 }, { "epoch": 0.23103984291908697, "grad_norm": 0.37262643538775314, "learning_rate": 9.48603866439828e-06, "loss": 0.0063, "step": 14120 }, { "epoch": 0.2312034688701628, "grad_norm": 0.34006463751875293, "learning_rate": 9.484776769884457e-06, "loss": 0.0079, "step": 14130 }, { "epoch": 0.23136709482123866, "grad_norm": 0.10388228446029518, "learning_rate": 9.483513412291522e-06, "loss": 0.0066, "step": 14140 }, { "epoch": 0.23153072077231449, "grad_norm": 0.09450310499555999, "learning_rate": 9.482248592031629e-06, "loss": 0.0079, "step": 14150 }, { "epoch": 0.23169434672339034, "grad_norm": 0.3040662061620897, "learning_rate": 9.480982309517398e-06, "loss": 0.005, "step": 14160 }, { "epoch": 0.23185797267446617, "grad_norm": 0.16827225121144465, "learning_rate": 9.479714565161934e-06, "loss": 0.0055, "step": 14170 }, { "epoch": 0.232021598625542, "grad_norm": 0.5489804910377039, "learning_rate": 9.478445359378816e-06, "loss": 0.0086, "step": 14180 }, { "epoch": 0.23218522457661786, "grad_norm": 0.5534358293896808, "learning_rate": 9.477174692582099e-06, "loss": 0.0071, "step": 14190 }, { "epoch": 0.2323488505276937, "grad_norm": 0.3129451956399419, "learning_rate": 9.475902565186316e-06, "loss": 0.0069, "step": 14200 }, { "epoch": 0.23251247647876952, "grad_norm": 0.39368717151298316, "learning_rate": 9.474628977606477e-06, "loss": 0.0083, "step": 14210 }, { "epoch": 0.23267610242984538, "grad_norm": 0.11459955028198908, "learning_rate": 9.473353930258067e-06, "loss": 0.0065, "step": 14220 }, { "epoch": 0.2328397283809212, "grad_norm": 0.44143179423929074, "learning_rate": 9.472077423557047e-06, "loss": 0.0076, "step": 14230 }, { "epoch": 0.23300335433199706, "grad_norm": 0.21452633381719527, "learning_rate": 9.470799457919855e-06, "loss": 0.0058, "step": 14240 }, { "epoch": 0.2331669802830729, "grad_norm": 0.29207166733800266, "learning_rate": 9.469520033763404e-06, "loss": 0.0076, "step": 14250 }, { "epoch": 0.23333060623414872, "grad_norm": 0.4625890655592877, "learning_rate": 9.468239151505085e-06, "loss": 0.0061, "step": 14260 }, { "epoch": 0.23349423218522458, "grad_norm": 0.13674762024956413, "learning_rate": 9.466956811562762e-06, "loss": 0.0068, "step": 14270 }, { "epoch": 0.2336578581363004, "grad_norm": 0.3334218910275108, "learning_rate": 9.465673014354777e-06, "loss": 0.0046, "step": 14280 }, { "epoch": 0.23382148408737627, "grad_norm": 0.209128114710864, "learning_rate": 9.464387760299946e-06, "loss": 0.0052, "step": 14290 }, { "epoch": 0.2339851100384521, "grad_norm": 0.5241121573280724, "learning_rate": 9.463101049817559e-06, "loss": 0.0081, "step": 14300 }, { "epoch": 0.23414873598952793, "grad_norm": 0.3870482538755865, "learning_rate": 9.461812883327382e-06, "loss": 0.0056, "step": 14310 }, { "epoch": 0.23431236194060379, "grad_norm": 0.6962889227049897, "learning_rate": 9.460523261249659e-06, "loss": 0.0103, "step": 14320 }, { "epoch": 0.23447598789167962, "grad_norm": 0.26858273092528284, "learning_rate": 9.459232184005104e-06, "loss": 0.0066, "step": 14330 }, { "epoch": 0.23463961384275547, "grad_norm": 0.3292942805736939, "learning_rate": 9.457939652014912e-06, "loss": 0.0066, "step": 14340 }, { "epoch": 0.2348032397938313, "grad_norm": 0.0901567977586052, "learning_rate": 9.456645665700744e-06, "loss": 0.0086, "step": 14350 }, { "epoch": 0.23496686574490713, "grad_norm": 0.30705374505818217, "learning_rate": 9.45535022548474e-06, "loss": 0.0081, "step": 14360 }, { "epoch": 0.235130491695983, "grad_norm": 0.15174008851125237, "learning_rate": 9.45405333178952e-06, "loss": 0.0061, "step": 14370 }, { "epoch": 0.23529411764705882, "grad_norm": 0.24054074207388504, "learning_rate": 9.452754985038168e-06, "loss": 0.0076, "step": 14380 }, { "epoch": 0.23545774359813468, "grad_norm": 0.20620360373956154, "learning_rate": 9.451455185654248e-06, "loss": 0.0096, "step": 14390 }, { "epoch": 0.2356213695492105, "grad_norm": 0.4700818658116053, "learning_rate": 9.450153934061796e-06, "loss": 0.0086, "step": 14400 }, { "epoch": 0.23578499550028634, "grad_norm": 0.36575437605963235, "learning_rate": 9.448851230685325e-06, "loss": 0.0068, "step": 14410 }, { "epoch": 0.2359486214513622, "grad_norm": 0.360341990905771, "learning_rate": 9.447547075949815e-06, "loss": 0.0052, "step": 14420 }, { "epoch": 0.23611224740243802, "grad_norm": 0.399218996701051, "learning_rate": 9.446241470280726e-06, "loss": 0.0068, "step": 14430 }, { "epoch": 0.23627587335351385, "grad_norm": 0.6245088876773593, "learning_rate": 9.444934414103986e-06, "loss": 0.0058, "step": 14440 }, { "epoch": 0.2364394993045897, "grad_norm": 0.30598324129481186, "learning_rate": 9.443625907846002e-06, "loss": 0.009, "step": 14450 }, { "epoch": 0.23660312525566554, "grad_norm": 0.14514530750061175, "learning_rate": 9.44231595193365e-06, "loss": 0.0047, "step": 14460 }, { "epoch": 0.2367667512067414, "grad_norm": 0.5858242717729784, "learning_rate": 9.44100454679428e-06, "loss": 0.0058, "step": 14470 }, { "epoch": 0.23693037715781723, "grad_norm": 0.36480864996203793, "learning_rate": 9.439691692855715e-06, "loss": 0.0048, "step": 14480 }, { "epoch": 0.23709400310889306, "grad_norm": 1.2661755140068547, "learning_rate": 9.43837739054625e-06, "loss": 0.0052, "step": 14490 }, { "epoch": 0.23725762905996892, "grad_norm": 0.09276635785854584, "learning_rate": 9.437061640294651e-06, "loss": 0.0034, "step": 14500 }, { "epoch": 0.23742125501104475, "grad_norm": 0.0925421417282902, "learning_rate": 9.435744442530162e-06, "loss": 0.0056, "step": 14510 }, { "epoch": 0.2375848809621206, "grad_norm": 0.310637867984842, "learning_rate": 9.434425797682493e-06, "loss": 0.0052, "step": 14520 }, { "epoch": 0.23774850691319643, "grad_norm": 0.3060726422201542, "learning_rate": 9.433105706181829e-06, "loss": 0.0077, "step": 14530 }, { "epoch": 0.23791213286427226, "grad_norm": 0.33467331135817335, "learning_rate": 9.431784168458827e-06, "loss": 0.0038, "step": 14540 }, { "epoch": 0.23807575881534812, "grad_norm": 0.6404301844845167, "learning_rate": 9.430461184944615e-06, "loss": 0.0054, "step": 14550 }, { "epoch": 0.23823938476642395, "grad_norm": 0.9709040983481245, "learning_rate": 9.429136756070793e-06, "loss": 0.0046, "step": 14560 }, { "epoch": 0.2384030107174998, "grad_norm": 0.24878807963288466, "learning_rate": 9.427810882269432e-06, "loss": 0.0049, "step": 14570 }, { "epoch": 0.23856663666857564, "grad_norm": 0.316034350819017, "learning_rate": 9.426483563973075e-06, "loss": 0.0057, "step": 14580 }, { "epoch": 0.23873026261965147, "grad_norm": 0.610862023166375, "learning_rate": 9.425154801614737e-06, "loss": 0.0089, "step": 14590 }, { "epoch": 0.23889388857072733, "grad_norm": 0.09416937810722693, "learning_rate": 9.423824595627904e-06, "loss": 0.0041, "step": 14600 }, { "epoch": 0.23905751452180315, "grad_norm": 0.47276794221891294, "learning_rate": 9.422492946446529e-06, "loss": 0.008, "step": 14610 }, { "epoch": 0.23922114047287898, "grad_norm": 0.23250562888546936, "learning_rate": 9.42115985450504e-06, "loss": 0.0056, "step": 14620 }, { "epoch": 0.23938476642395484, "grad_norm": 0.10945151238661889, "learning_rate": 9.419825320238337e-06, "loss": 0.0049, "step": 14630 }, { "epoch": 0.23954839237503067, "grad_norm": 0.37550509391419934, "learning_rate": 9.418489344081788e-06, "loss": 0.0055, "step": 14640 }, { "epoch": 0.23971201832610653, "grad_norm": 0.3096162291938593, "learning_rate": 9.417151926471227e-06, "loss": 0.0059, "step": 14650 }, { "epoch": 0.23987564427718236, "grad_norm": 0.17577959004882365, "learning_rate": 9.415813067842968e-06, "loss": 0.008, "step": 14660 }, { "epoch": 0.2400392702282582, "grad_norm": 0.19749149213743192, "learning_rate": 9.41447276863379e-06, "loss": 0.0069, "step": 14670 }, { "epoch": 0.24020289617933405, "grad_norm": 0.2311033362831667, "learning_rate": 9.413131029280937e-06, "loss": 0.0063, "step": 14680 }, { "epoch": 0.24036652213040988, "grad_norm": 0.4104589976868849, "learning_rate": 9.411787850222131e-06, "loss": 0.004, "step": 14690 }, { "epoch": 0.24053014808148573, "grad_norm": 0.11229179818877091, "learning_rate": 9.41044323189556e-06, "loss": 0.0046, "step": 14700 }, { "epoch": 0.24069377403256156, "grad_norm": 0.4742399403936828, "learning_rate": 9.409097174739885e-06, "loss": 0.0077, "step": 14710 }, { "epoch": 0.2408573999836374, "grad_norm": 0.34882810854198265, "learning_rate": 9.407749679194228e-06, "loss": 0.0042, "step": 14720 }, { "epoch": 0.24102102593471325, "grad_norm": 0.16425028064596917, "learning_rate": 9.406400745698188e-06, "loss": 0.0057, "step": 14730 }, { "epoch": 0.24118465188578908, "grad_norm": 0.22104259379452276, "learning_rate": 9.405050374691832e-06, "loss": 0.0051, "step": 14740 }, { "epoch": 0.24134827783686494, "grad_norm": 0.41523541428014976, "learning_rate": 9.403698566615693e-06, "loss": 0.0063, "step": 14750 }, { "epoch": 0.24151190378794077, "grad_norm": 0.3978342793878649, "learning_rate": 9.402345321910775e-06, "loss": 0.0072, "step": 14760 }, { "epoch": 0.2416755297390166, "grad_norm": 0.2687176646482882, "learning_rate": 9.400990641018549e-06, "loss": 0.0063, "step": 14770 }, { "epoch": 0.24183915569009246, "grad_norm": 0.26751074241939615, "learning_rate": 9.399634524380959e-06, "loss": 0.0046, "step": 14780 }, { "epoch": 0.24200278164116829, "grad_norm": 0.20211179922679004, "learning_rate": 9.398276972440409e-06, "loss": 0.0039, "step": 14790 }, { "epoch": 0.24216640759224414, "grad_norm": 0.1888648499768089, "learning_rate": 9.39691798563978e-06, "loss": 0.0072, "step": 14800 }, { "epoch": 0.24233003354331997, "grad_norm": 0.40591294721099713, "learning_rate": 9.395557564422418e-06, "loss": 0.0058, "step": 14810 }, { "epoch": 0.2424936594943958, "grad_norm": 0.3004720116766287, "learning_rate": 9.394195709232132e-06, "loss": 0.0066, "step": 14820 }, { "epoch": 0.24265728544547166, "grad_norm": 0.3008383073629911, "learning_rate": 9.392832420513208e-06, "loss": 0.0096, "step": 14830 }, { "epoch": 0.2428209113965475, "grad_norm": 0.15202838911585395, "learning_rate": 9.391467698710393e-06, "loss": 0.0061, "step": 14840 }, { "epoch": 0.24298453734762332, "grad_norm": 0.10839141651285336, "learning_rate": 9.390101544268902e-06, "loss": 0.0061, "step": 14850 }, { "epoch": 0.24314816329869918, "grad_norm": 0.19685018593162384, "learning_rate": 9.388733957634422e-06, "loss": 0.0072, "step": 14860 }, { "epoch": 0.243311789249775, "grad_norm": 0.2182779570417821, "learning_rate": 9.387364939253101e-06, "loss": 0.0049, "step": 14870 }, { "epoch": 0.24347541520085086, "grad_norm": 0.3135913264053835, "learning_rate": 9.385994489571557e-06, "loss": 0.0047, "step": 14880 }, { "epoch": 0.2436390411519267, "grad_norm": 0.17898868179664465, "learning_rate": 9.384622609036876e-06, "loss": 0.0043, "step": 14890 }, { "epoch": 0.24380266710300252, "grad_norm": 0.2480786729202796, "learning_rate": 9.383249298096611e-06, "loss": 0.004, "step": 14900 }, { "epoch": 0.24396629305407838, "grad_norm": 0.20302182619123238, "learning_rate": 9.381874557198777e-06, "loss": 0.0101, "step": 14910 }, { "epoch": 0.2441299190051542, "grad_norm": 0.20275127170617238, "learning_rate": 9.380498386791863e-06, "loss": 0.0037, "step": 14920 }, { "epoch": 0.24429354495623007, "grad_norm": 0.209137415493891, "learning_rate": 9.379120787324818e-06, "loss": 0.008, "step": 14930 }, { "epoch": 0.2444571709073059, "grad_norm": 0.11077655780576559, "learning_rate": 9.37774175924706e-06, "loss": 0.0048, "step": 14940 }, { "epoch": 0.24462079685838173, "grad_norm": 0.31295377068334534, "learning_rate": 9.376361303008472e-06, "loss": 0.0052, "step": 14950 }, { "epoch": 0.24478442280945759, "grad_norm": 0.290928570598236, "learning_rate": 9.374979419059402e-06, "loss": 0.0062, "step": 14960 }, { "epoch": 0.24494804876053342, "grad_norm": 0.2678787881437659, "learning_rate": 9.373596107850669e-06, "loss": 0.0075, "step": 14970 }, { "epoch": 0.24511167471160927, "grad_norm": 0.48207848539764514, "learning_rate": 9.37221136983355e-06, "loss": 0.0078, "step": 14980 }, { "epoch": 0.2452753006626851, "grad_norm": 0.110899440414048, "learning_rate": 9.370825205459795e-06, "loss": 0.0057, "step": 14990 }, { "epoch": 0.24543892661376093, "grad_norm": 0.5391182225649838, "learning_rate": 9.369437615181612e-06, "loss": 0.0068, "step": 15000 }, { "epoch": 0.2456025525648368, "grad_norm": 0.5818721569796923, "learning_rate": 9.36804859945168e-06, "loss": 0.0037, "step": 15010 }, { "epoch": 0.24576617851591262, "grad_norm": 0.3073631380529355, "learning_rate": 9.36665815872314e-06, "loss": 0.0065, "step": 15020 }, { "epoch": 0.24592980446698848, "grad_norm": 0.17585112163546068, "learning_rate": 9.3652662934496e-06, "loss": 0.0062, "step": 15030 }, { "epoch": 0.2460934304180643, "grad_norm": 0.4154698646351483, "learning_rate": 9.36387300408513e-06, "loss": 0.0076, "step": 15040 }, { "epoch": 0.24625705636914014, "grad_norm": 0.2045864347089957, "learning_rate": 9.362478291084264e-06, "loss": 0.0074, "step": 15050 }, { "epoch": 0.246420682320216, "grad_norm": 0.2602914395562712, "learning_rate": 9.361082154902006e-06, "loss": 0.0051, "step": 15060 }, { "epoch": 0.24658430827129182, "grad_norm": 0.1735322801753643, "learning_rate": 9.359684595993821e-06, "loss": 0.0049, "step": 15070 }, { "epoch": 0.24674793422236765, "grad_norm": 0.21999665214678835, "learning_rate": 9.358285614815635e-06, "loss": 0.0078, "step": 15080 }, { "epoch": 0.2469115601734435, "grad_norm": 0.14946872981848264, "learning_rate": 9.356885211823844e-06, "loss": 0.0041, "step": 15090 }, { "epoch": 0.24707518612451934, "grad_norm": 0.23823262693319208, "learning_rate": 9.3554833874753e-06, "loss": 0.0047, "step": 15100 }, { "epoch": 0.2472388120755952, "grad_norm": 0.15335509595211821, "learning_rate": 9.35408014222733e-06, "loss": 0.0075, "step": 15110 }, { "epoch": 0.24740243802667103, "grad_norm": 0.35151024609445214, "learning_rate": 9.352675476537712e-06, "loss": 0.0047, "step": 15120 }, { "epoch": 0.24756606397774686, "grad_norm": 0.3084663704142106, "learning_rate": 9.351269390864695e-06, "loss": 0.0049, "step": 15130 }, { "epoch": 0.24772968992882272, "grad_norm": 0.3124341271759404, "learning_rate": 9.34986188566699e-06, "loss": 0.0057, "step": 15140 }, { "epoch": 0.24789331587989855, "grad_norm": 0.18709997700104167, "learning_rate": 9.348452961403775e-06, "loss": 0.0044, "step": 15150 }, { "epoch": 0.2480569418309744, "grad_norm": 0.29356791694704526, "learning_rate": 9.34704261853468e-06, "loss": 0.0045, "step": 15160 }, { "epoch": 0.24822056778205023, "grad_norm": 0.17566762489209925, "learning_rate": 9.345630857519806e-06, "loss": 0.0054, "step": 15170 }, { "epoch": 0.24838419373312606, "grad_norm": 0.551096575642366, "learning_rate": 9.344217678819718e-06, "loss": 0.0068, "step": 15180 }, { "epoch": 0.24854781968420192, "grad_norm": 0.131767090722387, "learning_rate": 9.342803082895438e-06, "loss": 0.0064, "step": 15190 }, { "epoch": 0.24871144563527775, "grad_norm": 0.2143855045691361, "learning_rate": 9.341387070208451e-06, "loss": 0.0055, "step": 15200 }, { "epoch": 0.2488750715863536, "grad_norm": 0.0788060313310521, "learning_rate": 9.339969641220711e-06, "loss": 0.0027, "step": 15210 }, { "epoch": 0.24903869753742944, "grad_norm": 0.2840929533913191, "learning_rate": 9.338550796394628e-06, "loss": 0.0055, "step": 15220 }, { "epoch": 0.24920232348850527, "grad_norm": 0.17721593948973868, "learning_rate": 9.337130536193072e-06, "loss": 0.0081, "step": 15230 }, { "epoch": 0.24936594943958112, "grad_norm": 0.2716418811540453, "learning_rate": 9.33570886107938e-06, "loss": 0.005, "step": 15240 }, { "epoch": 0.24952957539065695, "grad_norm": 0.715536111658266, "learning_rate": 9.334285771517348e-06, "loss": 0.004, "step": 15250 }, { "epoch": 0.2496932013417328, "grad_norm": 0.371328530470032, "learning_rate": 9.332861267971234e-06, "loss": 0.0071, "step": 15260 }, { "epoch": 0.24985682729280864, "grad_norm": 0.12260695624886554, "learning_rate": 9.331435350905756e-06, "loss": 0.0048, "step": 15270 }, { "epoch": 0.2500204532438845, "grad_norm": 0.3878963374555747, "learning_rate": 9.330008020786096e-06, "loss": 0.0063, "step": 15280 }, { "epoch": 0.25018407919496033, "grad_norm": 0.30388806020351955, "learning_rate": 9.328579278077894e-06, "loss": 0.0049, "step": 15290 }, { "epoch": 0.25034770514603616, "grad_norm": 0.5285349510757692, "learning_rate": 9.327149123247253e-06, "loss": 0.0059, "step": 15300 }, { "epoch": 0.250511331097112, "grad_norm": 0.3340642760538013, "learning_rate": 9.325717556760732e-06, "loss": 0.005, "step": 15310 }, { "epoch": 0.2506749570481878, "grad_norm": 0.2821603859086629, "learning_rate": 9.32428457908536e-06, "loss": 0.0052, "step": 15320 }, { "epoch": 0.2508385829992637, "grad_norm": 0.13198793199892456, "learning_rate": 9.322850190688618e-06, "loss": 0.0056, "step": 15330 }, { "epoch": 0.25100220895033953, "grad_norm": 0.24696458097131938, "learning_rate": 9.321414392038448e-06, "loss": 0.0046, "step": 15340 }, { "epoch": 0.25116583490141536, "grad_norm": 0.6434908805094101, "learning_rate": 9.319977183603257e-06, "loss": 0.0064, "step": 15350 }, { "epoch": 0.2513294608524912, "grad_norm": 0.5082098354708341, "learning_rate": 9.318538565851907e-06, "loss": 0.009, "step": 15360 }, { "epoch": 0.251493086803567, "grad_norm": 0.39458619037577347, "learning_rate": 9.317098539253723e-06, "loss": 0.0041, "step": 15370 }, { "epoch": 0.2516567127546429, "grad_norm": 0.18893194481615558, "learning_rate": 9.315657104278486e-06, "loss": 0.0052, "step": 15380 }, { "epoch": 0.25182033870571874, "grad_norm": 0.3799928632754006, "learning_rate": 9.31421426139644e-06, "loss": 0.0064, "step": 15390 }, { "epoch": 0.25198396465679457, "grad_norm": 0.14133898556258437, "learning_rate": 9.31277001107829e-06, "loss": 0.0072, "step": 15400 }, { "epoch": 0.2521475906078704, "grad_norm": 0.09071703250620583, "learning_rate": 9.31132435379519e-06, "loss": 0.0044, "step": 15410 }, { "epoch": 0.2523112165589462, "grad_norm": 0.362404729237176, "learning_rate": 9.309877290018764e-06, "loss": 0.0059, "step": 15420 }, { "epoch": 0.2524748425100221, "grad_norm": 0.4409186255538609, "learning_rate": 9.308428820221096e-06, "loss": 0.0042, "step": 15430 }, { "epoch": 0.25263846846109794, "grad_norm": 0.2176099883496552, "learning_rate": 9.306978944874715e-06, "loss": 0.0067, "step": 15440 }, { "epoch": 0.2528020944121738, "grad_norm": 0.2963205444288595, "learning_rate": 9.305527664452622e-06, "loss": 0.0043, "step": 15450 }, { "epoch": 0.2529657203632496, "grad_norm": 0.27428931792980327, "learning_rate": 9.30407497942827e-06, "loss": 0.0097, "step": 15460 }, { "epoch": 0.25312934631432543, "grad_norm": 0.23401933485377163, "learning_rate": 9.302620890275573e-06, "loss": 0.0033, "step": 15470 }, { "epoch": 0.2532929722654013, "grad_norm": 0.38879621381988466, "learning_rate": 9.3011653974689e-06, "loss": 0.0078, "step": 15480 }, { "epoch": 0.25345659821647715, "grad_norm": 0.5825043747382485, "learning_rate": 9.299708501483082e-06, "loss": 0.0097, "step": 15490 }, { "epoch": 0.253620224167553, "grad_norm": 0.23484761617081673, "learning_rate": 9.298250202793406e-06, "loss": 0.0099, "step": 15500 }, { "epoch": 0.2537838501186288, "grad_norm": 0.0730527484970993, "learning_rate": 9.296790501875613e-06, "loss": 0.0071, "step": 15510 }, { "epoch": 0.25394747606970464, "grad_norm": 0.18310148817053432, "learning_rate": 9.295329399205906e-06, "loss": 0.0044, "step": 15520 }, { "epoch": 0.2541111020207805, "grad_norm": 0.43931072882735667, "learning_rate": 9.293866895260946e-06, "loss": 0.0052, "step": 15530 }, { "epoch": 0.25427472797185635, "grad_norm": 0.10917023870661761, "learning_rate": 9.292402990517847e-06, "loss": 0.005, "step": 15540 }, { "epoch": 0.2544383539229322, "grad_norm": 0.19653198374575204, "learning_rate": 9.290937685454181e-06, "loss": 0.0099, "step": 15550 }, { "epoch": 0.254601979874008, "grad_norm": 0.5526590209031602, "learning_rate": 9.28947098054798e-06, "loss": 0.006, "step": 15560 }, { "epoch": 0.25476560582508384, "grad_norm": 0.29235142637747696, "learning_rate": 9.28800287627773e-06, "loss": 0.0037, "step": 15570 }, { "epoch": 0.2549292317761597, "grad_norm": 0.22452855765418456, "learning_rate": 9.286533373122372e-06, "loss": 0.0055, "step": 15580 }, { "epoch": 0.25509285772723556, "grad_norm": 0.4555840956003422, "learning_rate": 9.28506247156131e-06, "loss": 0.0057, "step": 15590 }, { "epoch": 0.2552564836783114, "grad_norm": 0.6662472624032246, "learning_rate": 9.283590172074396e-06, "loss": 0.0055, "step": 15600 }, { "epoch": 0.2554201096293872, "grad_norm": 0.39903543442574063, "learning_rate": 9.282116475141942e-06, "loss": 0.0077, "step": 15610 }, { "epoch": 0.25558373558046305, "grad_norm": 0.2221318833007634, "learning_rate": 9.280641381244716e-06, "loss": 0.0062, "step": 15620 }, { "epoch": 0.2557473615315389, "grad_norm": 0.4451123972798245, "learning_rate": 9.279164890863942e-06, "loss": 0.0043, "step": 15630 }, { "epoch": 0.25591098748261476, "grad_norm": 0.27825984329538817, "learning_rate": 9.277687004481298e-06, "loss": 0.0035, "step": 15640 }, { "epoch": 0.2560746134336906, "grad_norm": 0.49818548435411075, "learning_rate": 9.276207722578918e-06, "loss": 0.0064, "step": 15650 }, { "epoch": 0.2562382393847664, "grad_norm": 0.2214312824588999, "learning_rate": 9.274727045639394e-06, "loss": 0.004, "step": 15660 }, { "epoch": 0.25640186533584225, "grad_norm": 0.4074232127254207, "learning_rate": 9.273244974145765e-06, "loss": 0.0073, "step": 15670 }, { "epoch": 0.2565654912869181, "grad_norm": 0.027917935290623194, "learning_rate": 9.271761508581538e-06, "loss": 0.0052, "step": 15680 }, { "epoch": 0.25672911723799396, "grad_norm": 0.08697256400383539, "learning_rate": 9.270276649430662e-06, "loss": 0.005, "step": 15690 }, { "epoch": 0.2568927431890698, "grad_norm": 0.2098381040473996, "learning_rate": 9.268790397177548e-06, "loss": 0.0046, "step": 15700 }, { "epoch": 0.2570563691401456, "grad_norm": 0.247217340379413, "learning_rate": 9.26730275230706e-06, "loss": 0.0068, "step": 15710 }, { "epoch": 0.25721999509122145, "grad_norm": 0.3295195766262875, "learning_rate": 9.265813715304515e-06, "loss": 0.0066, "step": 15720 }, { "epoch": 0.2573836210422973, "grad_norm": 0.44338163580725737, "learning_rate": 9.264323286655685e-06, "loss": 0.0075, "step": 15730 }, { "epoch": 0.25754724699337317, "grad_norm": 0.25224477187620925, "learning_rate": 9.262831466846795e-06, "loss": 0.0108, "step": 15740 }, { "epoch": 0.257710872944449, "grad_norm": 0.36637598598161486, "learning_rate": 9.261338256364528e-06, "loss": 0.0064, "step": 15750 }, { "epoch": 0.25787449889552483, "grad_norm": 0.18728206841760078, "learning_rate": 9.259843655696014e-06, "loss": 0.0046, "step": 15760 }, { "epoch": 0.25803812484660066, "grad_norm": 0.42248866487523834, "learning_rate": 9.258347665328843e-06, "loss": 0.0057, "step": 15770 }, { "epoch": 0.2582017507976765, "grad_norm": 0.19325300019421338, "learning_rate": 9.256850285751053e-06, "loss": 0.0048, "step": 15780 }, { "epoch": 0.2583653767487524, "grad_norm": 0.1890348137555446, "learning_rate": 9.255351517451139e-06, "loss": 0.0034, "step": 15790 }, { "epoch": 0.2585290026998282, "grad_norm": 0.31362968408301894, "learning_rate": 9.253851360918048e-06, "loss": 0.0047, "step": 15800 }, { "epoch": 0.25869262865090403, "grad_norm": 0.04517231788366573, "learning_rate": 9.252349816641175e-06, "loss": 0.0043, "step": 15810 }, { "epoch": 0.25885625460197986, "grad_norm": 0.2170688319977448, "learning_rate": 9.250846885110379e-06, "loss": 0.0044, "step": 15820 }, { "epoch": 0.2590198805530557, "grad_norm": 0.32973057615182727, "learning_rate": 9.249342566815962e-06, "loss": 0.0046, "step": 15830 }, { "epoch": 0.2591835065041316, "grad_norm": 0.2912919858561665, "learning_rate": 9.24783686224868e-06, "loss": 0.0049, "step": 15840 }, { "epoch": 0.2593471324552074, "grad_norm": 0.28108342588539087, "learning_rate": 9.246329771899745e-06, "loss": 0.0052, "step": 15850 }, { "epoch": 0.25951075840628324, "grad_norm": 0.14382390288727165, "learning_rate": 9.244821296260816e-06, "loss": 0.0047, "step": 15860 }, { "epoch": 0.25967438435735907, "grad_norm": 0.40137604861663184, "learning_rate": 9.243311435824007e-06, "loss": 0.0047, "step": 15870 }, { "epoch": 0.2598380103084349, "grad_norm": 0.2998970927302701, "learning_rate": 9.241800191081885e-06, "loss": 0.0069, "step": 15880 }, { "epoch": 0.2600016362595108, "grad_norm": 0.31822855662971483, "learning_rate": 9.240287562527465e-06, "loss": 0.0041, "step": 15890 }, { "epoch": 0.2601652622105866, "grad_norm": 0.39731230282537305, "learning_rate": 9.238773550654216e-06, "loss": 0.0042, "step": 15900 }, { "epoch": 0.26032888816166244, "grad_norm": 0.36608323151151295, "learning_rate": 9.23725815595606e-06, "loss": 0.0053, "step": 15910 }, { "epoch": 0.26049251411273827, "grad_norm": 0.6111761802997118, "learning_rate": 9.235741378927362e-06, "loss": 0.0102, "step": 15920 }, { "epoch": 0.2606561400638141, "grad_norm": 0.267587602821666, "learning_rate": 9.23422322006295e-06, "loss": 0.0071, "step": 15930 }, { "epoch": 0.26081976601489, "grad_norm": 0.24980430542382004, "learning_rate": 9.232703679858092e-06, "loss": 0.0039, "step": 15940 }, { "epoch": 0.2609833919659658, "grad_norm": 0.17696743983110425, "learning_rate": 9.231182758808513e-06, "loss": 0.0043, "step": 15950 }, { "epoch": 0.26114701791704165, "grad_norm": 0.749844863030081, "learning_rate": 9.229660457410385e-06, "loss": 0.0125, "step": 15960 }, { "epoch": 0.2613106438681175, "grad_norm": 0.31216184453003504, "learning_rate": 9.228136776160336e-06, "loss": 0.0069, "step": 15970 }, { "epoch": 0.2614742698191933, "grad_norm": 0.19592320314798167, "learning_rate": 9.226611715555435e-06, "loss": 0.0051, "step": 15980 }, { "epoch": 0.2616378957702692, "grad_norm": 0.3399569954269113, "learning_rate": 9.225085276093208e-06, "loss": 0.0071, "step": 15990 }, { "epoch": 0.261801521721345, "grad_norm": 0.09711338006471965, "learning_rate": 9.223557458271631e-06, "loss": 0.0065, "step": 16000 }, { "epoch": 0.26196514767242085, "grad_norm": 0.1624696169494459, "learning_rate": 9.222028262589125e-06, "loss": 0.0062, "step": 16010 }, { "epoch": 0.2621287736234967, "grad_norm": 0.1464514361768134, "learning_rate": 9.220497689544565e-06, "loss": 0.0049, "step": 16020 }, { "epoch": 0.2622923995745725, "grad_norm": 0.315712020167498, "learning_rate": 9.21896573963727e-06, "loss": 0.0073, "step": 16030 }, { "epoch": 0.2624560255256484, "grad_norm": 0.2340710221026737, "learning_rate": 9.217432413367015e-06, "loss": 0.0057, "step": 16040 }, { "epoch": 0.2626196514767242, "grad_norm": 0.4481334158908465, "learning_rate": 9.215897711234019e-06, "loss": 0.006, "step": 16050 }, { "epoch": 0.26278327742780005, "grad_norm": 0.3681200585180176, "learning_rate": 9.214361633738951e-06, "loss": 0.0055, "step": 16060 }, { "epoch": 0.2629469033788759, "grad_norm": 0.9873727888723629, "learning_rate": 9.21282418138293e-06, "loss": 0.0042, "step": 16070 }, { "epoch": 0.2631105293299517, "grad_norm": 0.2502418228336838, "learning_rate": 9.211285354667525e-06, "loss": 0.0038, "step": 16080 }, { "epoch": 0.26327415528102754, "grad_norm": 0.3852506178246809, "learning_rate": 9.209745154094745e-06, "loss": 0.0061, "step": 16090 }, { "epoch": 0.26343778123210343, "grad_norm": 0.21968144079947483, "learning_rate": 9.20820358016706e-06, "loss": 0.0076, "step": 16100 }, { "epoch": 0.26360140718317926, "grad_norm": 0.5682113116868072, "learning_rate": 9.206660633387374e-06, "loss": 0.0065, "step": 16110 }, { "epoch": 0.2637650331342551, "grad_norm": 0.12985185614621797, "learning_rate": 9.205116314259052e-06, "loss": 0.0061, "step": 16120 }, { "epoch": 0.2639286590853309, "grad_norm": 0.24235605912591063, "learning_rate": 9.2035706232859e-06, "loss": 0.0057, "step": 16130 }, { "epoch": 0.26409228503640675, "grad_norm": 0.3977661903999316, "learning_rate": 9.202023560972171e-06, "loss": 0.0046, "step": 16140 }, { "epoch": 0.26425591098748263, "grad_norm": 0.1100857998918415, "learning_rate": 9.200475127822565e-06, "loss": 0.0054, "step": 16150 }, { "epoch": 0.26441953693855846, "grad_norm": 0.22110769508196634, "learning_rate": 9.198925324342234e-06, "loss": 0.005, "step": 16160 }, { "epoch": 0.2645831628896343, "grad_norm": 0.47260215709791953, "learning_rate": 9.197374151036772e-06, "loss": 0.0037, "step": 16170 }, { "epoch": 0.2647467888407101, "grad_norm": 0.427479677142539, "learning_rate": 9.195821608412226e-06, "loss": 0.0087, "step": 16180 }, { "epoch": 0.26491041479178595, "grad_norm": 0.4068049439722783, "learning_rate": 9.194267696975078e-06, "loss": 0.0121, "step": 16190 }, { "epoch": 0.26507404074286184, "grad_norm": 0.15077933513095784, "learning_rate": 9.19271241723227e-06, "loss": 0.0029, "step": 16200 }, { "epoch": 0.26523766669393767, "grad_norm": 0.40972426930346395, "learning_rate": 9.191155769691183e-06, "loss": 0.0053, "step": 16210 }, { "epoch": 0.2654012926450135, "grad_norm": 0.3646243472243028, "learning_rate": 9.189597754859646e-06, "loss": 0.0047, "step": 16220 }, { "epoch": 0.2655649185960893, "grad_norm": 0.2558549733916791, "learning_rate": 9.188038373245934e-06, "loss": 0.0066, "step": 16230 }, { "epoch": 0.26572854454716516, "grad_norm": 0.18987672564540053, "learning_rate": 9.186477625358765e-06, "loss": 0.0051, "step": 16240 }, { "epoch": 0.26589217049824104, "grad_norm": 0.19000960438179892, "learning_rate": 9.184915511707308e-06, "loss": 0.0042, "step": 16250 }, { "epoch": 0.2660557964493169, "grad_norm": 0.276272599895, "learning_rate": 9.183352032801176e-06, "loss": 0.0034, "step": 16260 }, { "epoch": 0.2662194224003927, "grad_norm": 0.32598802032195595, "learning_rate": 9.181787189150425e-06, "loss": 0.0064, "step": 16270 }, { "epoch": 0.26638304835146853, "grad_norm": 1.1762093269456946, "learning_rate": 9.180220981265555e-06, "loss": 0.0043, "step": 16280 }, { "epoch": 0.26654667430254436, "grad_norm": 0.2927702563467667, "learning_rate": 9.178653409657516e-06, "loss": 0.0057, "step": 16290 }, { "epoch": 0.26671030025362025, "grad_norm": 0.15808257048176252, "learning_rate": 9.177084474837702e-06, "loss": 0.0051, "step": 16300 }, { "epoch": 0.2668739262046961, "grad_norm": 0.25449279001389224, "learning_rate": 9.175514177317947e-06, "loss": 0.0055, "step": 16310 }, { "epoch": 0.2670375521557719, "grad_norm": 0.2658931474874935, "learning_rate": 9.173942517610537e-06, "loss": 0.0068, "step": 16320 }, { "epoch": 0.26720117810684774, "grad_norm": 0.37278190959936935, "learning_rate": 9.172369496228195e-06, "loss": 0.0065, "step": 16330 }, { "epoch": 0.26736480405792357, "grad_norm": 0.139793344097985, "learning_rate": 9.170795113684092e-06, "loss": 0.0076, "step": 16340 }, { "epoch": 0.26752843000899945, "grad_norm": 0.12159953997731686, "learning_rate": 9.169219370491842e-06, "loss": 0.0043, "step": 16350 }, { "epoch": 0.2676920559600753, "grad_norm": 0.6851385185370912, "learning_rate": 9.167642267165503e-06, "loss": 0.0071, "step": 16360 }, { "epoch": 0.2678556819111511, "grad_norm": 0.21222172206097425, "learning_rate": 9.16606380421958e-06, "loss": 0.0061, "step": 16370 }, { "epoch": 0.26801930786222694, "grad_norm": 0.2515939141550328, "learning_rate": 9.164483982169014e-06, "loss": 0.0069, "step": 16380 }, { "epoch": 0.26818293381330277, "grad_norm": 0.2673037322690824, "learning_rate": 9.1629028015292e-06, "loss": 0.0097, "step": 16390 }, { "epoch": 0.26834655976437866, "grad_norm": 0.27115041179488925, "learning_rate": 9.161320262815964e-06, "loss": 0.0059, "step": 16400 }, { "epoch": 0.2685101857154545, "grad_norm": 0.27777013320319616, "learning_rate": 9.159736366545585e-06, "loss": 0.0045, "step": 16410 }, { "epoch": 0.2686738116665303, "grad_norm": 0.4373588297429033, "learning_rate": 9.15815111323478e-06, "loss": 0.0078, "step": 16420 }, { "epoch": 0.26883743761760615, "grad_norm": 0.36616496539841586, "learning_rate": 9.156564503400711e-06, "loss": 0.0053, "step": 16430 }, { "epoch": 0.269001063568682, "grad_norm": 0.3664139287368561, "learning_rate": 9.15497653756098e-06, "loss": 0.0062, "step": 16440 }, { "epoch": 0.26916468951975786, "grad_norm": 0.3282865865169168, "learning_rate": 9.153387216233634e-06, "loss": 0.0058, "step": 16450 }, { "epoch": 0.2693283154708337, "grad_norm": 1.123811052445781, "learning_rate": 9.15179653993716e-06, "loss": 0.0063, "step": 16460 }, { "epoch": 0.2694919414219095, "grad_norm": 0.4781598360290196, "learning_rate": 9.150204509190487e-06, "loss": 0.0082, "step": 16470 }, { "epoch": 0.26965556737298535, "grad_norm": 0.4861164234865872, "learning_rate": 9.148611124512989e-06, "loss": 0.0084, "step": 16480 }, { "epoch": 0.2698191933240612, "grad_norm": 0.13751985791004354, "learning_rate": 9.147016386424478e-06, "loss": 0.0053, "step": 16490 }, { "epoch": 0.269982819275137, "grad_norm": 0.2194043172380987, "learning_rate": 9.145420295445211e-06, "loss": 0.0058, "step": 16500 }, { "epoch": 0.2701464452262129, "grad_norm": 0.4043655161163458, "learning_rate": 9.143822852095882e-06, "loss": 0.0074, "step": 16510 }, { "epoch": 0.2703100711772887, "grad_norm": 0.12532343095695817, "learning_rate": 9.142224056897632e-06, "loss": 0.0065, "step": 16520 }, { "epoch": 0.27047369712836455, "grad_norm": 0.404539253864606, "learning_rate": 9.140623910372036e-06, "loss": 0.0053, "step": 16530 }, { "epoch": 0.2706373230794404, "grad_norm": 0.39385997659071464, "learning_rate": 9.139022413041116e-06, "loss": 0.004, "step": 16540 }, { "epoch": 0.2708009490305162, "grad_norm": 0.30488845326475833, "learning_rate": 9.13741956542733e-06, "loss": 0.0106, "step": 16550 }, { "epoch": 0.2709645749815921, "grad_norm": 0.28862961532336373, "learning_rate": 9.13581536805358e-06, "loss": 0.0071, "step": 16560 }, { "epoch": 0.27112820093266793, "grad_norm": 0.16617848849213243, "learning_rate": 9.134209821443207e-06, "loss": 0.0048, "step": 16570 }, { "epoch": 0.27129182688374376, "grad_norm": 0.30063968274649416, "learning_rate": 9.132602926119993e-06, "loss": 0.0057, "step": 16580 }, { "epoch": 0.2714554528348196, "grad_norm": 0.32286472707696584, "learning_rate": 9.130994682608158e-06, "loss": 0.0045, "step": 16590 }, { "epoch": 0.2716190787858954, "grad_norm": 0.5709931154407546, "learning_rate": 9.129385091432363e-06, "loss": 0.0056, "step": 16600 }, { "epoch": 0.2717827047369713, "grad_norm": 0.2842297296367366, "learning_rate": 9.12777415311771e-06, "loss": 0.007, "step": 16610 }, { "epoch": 0.27194633068804713, "grad_norm": 0.14358795667053467, "learning_rate": 9.126161868189737e-06, "loss": 0.0037, "step": 16620 }, { "epoch": 0.27210995663912296, "grad_norm": 0.061896592319254755, "learning_rate": 9.124548237174426e-06, "loss": 0.0035, "step": 16630 }, { "epoch": 0.2722735825901988, "grad_norm": 0.13172238433050146, "learning_rate": 9.122933260598193e-06, "loss": 0.0052, "step": 16640 }, { "epoch": 0.2724372085412746, "grad_norm": 0.28068527681033073, "learning_rate": 9.121316938987897e-06, "loss": 0.0053, "step": 16650 }, { "epoch": 0.2726008344923505, "grad_norm": 0.37718647786653287, "learning_rate": 9.119699272870835e-06, "loss": 0.0065, "step": 16660 }, { "epoch": 0.27276446044342634, "grad_norm": 0.25271195197285157, "learning_rate": 9.118080262774738e-06, "loss": 0.0055, "step": 16670 }, { "epoch": 0.27292808639450217, "grad_norm": 0.24992086261862945, "learning_rate": 9.116459909227786e-06, "loss": 0.0029, "step": 16680 }, { "epoch": 0.273091712345578, "grad_norm": 0.5070520608668672, "learning_rate": 9.114838212758586e-06, "loss": 0.0066, "step": 16690 }, { "epoch": 0.2732553382966538, "grad_norm": 0.2468771328638775, "learning_rate": 9.113215173896186e-06, "loss": 0.0069, "step": 16700 }, { "epoch": 0.2734189642477297, "grad_norm": 0.3124143941338982, "learning_rate": 9.11159079317008e-06, "loss": 0.0051, "step": 16710 }, { "epoch": 0.27358259019880554, "grad_norm": 0.12410613250248503, "learning_rate": 9.10996507111019e-06, "loss": 0.0042, "step": 16720 }, { "epoch": 0.27374621614988137, "grad_norm": 0.29237440352959504, "learning_rate": 9.108338008246878e-06, "loss": 0.0093, "step": 16730 }, { "epoch": 0.2739098421009572, "grad_norm": 0.23018482446137556, "learning_rate": 9.106709605110946e-06, "loss": 0.0051, "step": 16740 }, { "epoch": 0.27407346805203303, "grad_norm": 0.43116424608691356, "learning_rate": 9.10507986223363e-06, "loss": 0.0035, "step": 16750 }, { "epoch": 0.2742370940031089, "grad_norm": 0.7167975874742553, "learning_rate": 9.103448780146609e-06, "loss": 0.0047, "step": 16760 }, { "epoch": 0.27440071995418475, "grad_norm": 0.32556554865496257, "learning_rate": 9.10181635938199e-06, "loss": 0.0063, "step": 16770 }, { "epoch": 0.2745643459052606, "grad_norm": 0.20405694563286297, "learning_rate": 9.100182600472324e-06, "loss": 0.0048, "step": 16780 }, { "epoch": 0.2747279718563364, "grad_norm": 0.37260164160194664, "learning_rate": 9.098547503950595e-06, "loss": 0.0059, "step": 16790 }, { "epoch": 0.27489159780741224, "grad_norm": 0.29291779516377503, "learning_rate": 9.096911070350224e-06, "loss": 0.0044, "step": 16800 }, { "epoch": 0.2750552237584881, "grad_norm": 0.27726793544459266, "learning_rate": 9.09527330020507e-06, "loss": 0.0032, "step": 16810 }, { "epoch": 0.27521884970956395, "grad_norm": 0.14699445694045887, "learning_rate": 9.093634194049426e-06, "loss": 0.0041, "step": 16820 }, { "epoch": 0.2753824756606398, "grad_norm": 0.33292535913205984, "learning_rate": 9.091993752418021e-06, "loss": 0.0046, "step": 16830 }, { "epoch": 0.2755461016117156, "grad_norm": 0.37292553144646545, "learning_rate": 9.09035197584602e-06, "loss": 0.0076, "step": 16840 }, { "epoch": 0.27570972756279144, "grad_norm": 0.14270971549604855, "learning_rate": 9.088708864869022e-06, "loss": 0.0029, "step": 16850 }, { "epoch": 0.2758733535138673, "grad_norm": 0.3723486209471581, "learning_rate": 9.087064420023065e-06, "loss": 0.0089, "step": 16860 }, { "epoch": 0.27603697946494316, "grad_norm": 0.18615774717755265, "learning_rate": 9.085418641844622e-06, "loss": 0.0055, "step": 16870 }, { "epoch": 0.276200605416019, "grad_norm": 0.2707819121469773, "learning_rate": 9.083771530870595e-06, "loss": 0.0065, "step": 16880 }, { "epoch": 0.2763642313670948, "grad_norm": 0.4581421950572611, "learning_rate": 9.082123087638325e-06, "loss": 0.0036, "step": 16890 }, { "epoch": 0.27652785731817064, "grad_norm": 0.4380214449614211, "learning_rate": 9.080473312685592e-06, "loss": 0.0088, "step": 16900 }, { "epoch": 0.27669148326924653, "grad_norm": 0.42288449154578683, "learning_rate": 9.0788222065506e-06, "loss": 0.0061, "step": 16910 }, { "epoch": 0.27685510922032236, "grad_norm": 0.39832504292257903, "learning_rate": 9.077169769771998e-06, "loss": 0.0076, "step": 16920 }, { "epoch": 0.2770187351713982, "grad_norm": 0.33699388095297433, "learning_rate": 9.075516002888862e-06, "loss": 0.0042, "step": 16930 }, { "epoch": 0.277182361122474, "grad_norm": 0.24703033693729362, "learning_rate": 9.073860906440703e-06, "loss": 0.0066, "step": 16940 }, { "epoch": 0.27734598707354985, "grad_norm": 0.2996451015044408, "learning_rate": 9.07220448096747e-06, "loss": 0.0068, "step": 16950 }, { "epoch": 0.2775096130246257, "grad_norm": 0.35672567459573773, "learning_rate": 9.070546727009539e-06, "loss": 0.0062, "step": 16960 }, { "epoch": 0.27767323897570156, "grad_norm": 0.2796483859279378, "learning_rate": 9.068887645107726e-06, "loss": 0.0056, "step": 16970 }, { "epoch": 0.2778368649267774, "grad_norm": 0.023606022774942986, "learning_rate": 9.067227235803277e-06, "loss": 0.0076, "step": 16980 }, { "epoch": 0.2780004908778532, "grad_norm": 0.27584039758113854, "learning_rate": 9.065565499637869e-06, "loss": 0.0047, "step": 16990 }, { "epoch": 0.27816411682892905, "grad_norm": 0.4360064860040226, "learning_rate": 9.063902437153616e-06, "loss": 0.004, "step": 17000 }, { "epoch": 0.2783277427800049, "grad_norm": 0.21188956130508307, "learning_rate": 9.062238048893061e-06, "loss": 0.0071, "step": 17010 }, { "epoch": 0.27849136873108077, "grad_norm": 0.19395505055327497, "learning_rate": 9.060572335399183e-06, "loss": 0.0093, "step": 17020 }, { "epoch": 0.2786549946821566, "grad_norm": 0.3649470858688348, "learning_rate": 9.05890529721539e-06, "loss": 0.0033, "step": 17030 }, { "epoch": 0.27881862063323243, "grad_norm": 0.033497644451395296, "learning_rate": 9.057236934885524e-06, "loss": 0.0044, "step": 17040 }, { "epoch": 0.27898224658430826, "grad_norm": 0.33870093461295264, "learning_rate": 9.05556724895386e-06, "loss": 0.0046, "step": 17050 }, { "epoch": 0.2791458725353841, "grad_norm": 0.31853956692180563, "learning_rate": 9.053896239965104e-06, "loss": 0.0062, "step": 17060 }, { "epoch": 0.27930949848646, "grad_norm": 0.13327991231249448, "learning_rate": 9.05222390846439e-06, "loss": 0.0039, "step": 17070 }, { "epoch": 0.2794731244375358, "grad_norm": 0.24398948097927978, "learning_rate": 9.050550254997288e-06, "loss": 0.0057, "step": 17080 }, { "epoch": 0.27963675038861163, "grad_norm": 0.19730903980566172, "learning_rate": 9.048875280109798e-06, "loss": 0.0037, "step": 17090 }, { "epoch": 0.27980037633968746, "grad_norm": 0.18928273627153439, "learning_rate": 9.047198984348353e-06, "loss": 0.0068, "step": 17100 }, { "epoch": 0.2799640022907633, "grad_norm": 0.057966889915412885, "learning_rate": 9.045521368259813e-06, "loss": 0.0048, "step": 17110 }, { "epoch": 0.2801276282418392, "grad_norm": 0.39260989032748844, "learning_rate": 9.04384243239147e-06, "loss": 0.0078, "step": 17120 }, { "epoch": 0.280291254192915, "grad_norm": 0.10091180510684265, "learning_rate": 9.042162177291047e-06, "loss": 0.0044, "step": 17130 }, { "epoch": 0.28045488014399084, "grad_norm": 0.2489850493347078, "learning_rate": 9.040480603506699e-06, "loss": 0.0046, "step": 17140 }, { "epoch": 0.28061850609506667, "grad_norm": 0.36502851008245196, "learning_rate": 9.038797711587008e-06, "loss": 0.01, "step": 17150 }, { "epoch": 0.2807821320461425, "grad_norm": 0.28764326182728056, "learning_rate": 9.03711350208099e-06, "loss": 0.0066, "step": 17160 }, { "epoch": 0.2809457579972184, "grad_norm": 0.23908376733862932, "learning_rate": 9.035427975538085e-06, "loss": 0.0051, "step": 17170 }, { "epoch": 0.2811093839482942, "grad_norm": 0.48911992251028985, "learning_rate": 9.03374113250817e-06, "loss": 0.0058, "step": 17180 }, { "epoch": 0.28127300989937004, "grad_norm": 0.41348377410006, "learning_rate": 9.032052973541546e-06, "loss": 0.0051, "step": 17190 }, { "epoch": 0.28143663585044587, "grad_norm": 0.34645790235967133, "learning_rate": 9.030363499188945e-06, "loss": 0.0077, "step": 17200 }, { "epoch": 0.2816002618015217, "grad_norm": 0.4665938835557448, "learning_rate": 9.02867271000153e-06, "loss": 0.0057, "step": 17210 }, { "epoch": 0.2817638877525976, "grad_norm": 0.24234971544515563, "learning_rate": 9.026980606530887e-06, "loss": 0.0068, "step": 17220 }, { "epoch": 0.2819275137036734, "grad_norm": 0.6641114742761444, "learning_rate": 9.025287189329037e-06, "loss": 0.0046, "step": 17230 }, { "epoch": 0.28209113965474925, "grad_norm": 0.0869959670677447, "learning_rate": 9.023592458948429e-06, "loss": 0.0057, "step": 17240 }, { "epoch": 0.2822547656058251, "grad_norm": 0.39168519474225255, "learning_rate": 9.021896415941937e-06, "loss": 0.0047, "step": 17250 }, { "epoch": 0.2824183915569009, "grad_norm": 0.19561469148036173, "learning_rate": 9.020199060862866e-06, "loss": 0.0055, "step": 17260 }, { "epoch": 0.2825820175079768, "grad_norm": 0.1374125961497764, "learning_rate": 9.018500394264948e-06, "loss": 0.0058, "step": 17270 }, { "epoch": 0.2827456434590526, "grad_norm": 0.17806818971856353, "learning_rate": 9.01680041670234e-06, "loss": 0.0046, "step": 17280 }, { "epoch": 0.28290926941012845, "grad_norm": 0.1314765877410827, "learning_rate": 9.015099128729635e-06, "loss": 0.006, "step": 17290 }, { "epoch": 0.2830728953612043, "grad_norm": 0.17452536820361048, "learning_rate": 9.013396530901843e-06, "loss": 0.0043, "step": 17300 }, { "epoch": 0.2832365213122801, "grad_norm": 0.20534089478193973, "learning_rate": 9.01169262377441e-06, "loss": 0.0036, "step": 17310 }, { "epoch": 0.283400147263356, "grad_norm": 0.14910800353288656, "learning_rate": 9.009987407903205e-06, "loss": 0.0043, "step": 17320 }, { "epoch": 0.2835637732144318, "grad_norm": 0.10230165344511472, "learning_rate": 9.008280883844523e-06, "loss": 0.0044, "step": 17330 }, { "epoch": 0.28372739916550765, "grad_norm": 0.3414558865966233, "learning_rate": 9.006573052155087e-06, "loss": 0.0047, "step": 17340 }, { "epoch": 0.2838910251165835, "grad_norm": 0.24668255897968053, "learning_rate": 9.004863913392047e-06, "loss": 0.0036, "step": 17350 }, { "epoch": 0.2840546510676593, "grad_norm": 0.35223751902469175, "learning_rate": 9.003153468112982e-06, "loss": 0.0044, "step": 17360 }, { "epoch": 0.28421827701873514, "grad_norm": 0.40454125751972686, "learning_rate": 9.001441716875893e-06, "loss": 0.0037, "step": 17370 }, { "epoch": 0.28438190296981103, "grad_norm": 0.2614766062667816, "learning_rate": 8.999728660239207e-06, "loss": 0.0042, "step": 17380 }, { "epoch": 0.28454552892088686, "grad_norm": 0.11378411123274325, "learning_rate": 8.99801429876178e-06, "loss": 0.0061, "step": 17390 }, { "epoch": 0.2847091548719627, "grad_norm": 0.12174561884862016, "learning_rate": 8.996298633002894e-06, "loss": 0.0054, "step": 17400 }, { "epoch": 0.2848727808230385, "grad_norm": 0.1211912745790691, "learning_rate": 8.994581663522251e-06, "loss": 0.0037, "step": 17410 }, { "epoch": 0.28503640677411435, "grad_norm": 0.8700669669677659, "learning_rate": 8.992863390879983e-06, "loss": 0.0061, "step": 17420 }, { "epoch": 0.28520003272519023, "grad_norm": 0.46224671200495093, "learning_rate": 8.991143815636646e-06, "loss": 0.0057, "step": 17430 }, { "epoch": 0.28536365867626606, "grad_norm": 0.15800522754695734, "learning_rate": 8.989422938353224e-06, "loss": 0.0047, "step": 17440 }, { "epoch": 0.2855272846273419, "grad_norm": 0.3776115586561121, "learning_rate": 8.987700759591117e-06, "loss": 0.0067, "step": 17450 }, { "epoch": 0.2856909105784177, "grad_norm": 0.18240876229857578, "learning_rate": 8.98597727991216e-06, "loss": 0.007, "step": 17460 }, { "epoch": 0.28585453652949355, "grad_norm": 0.2870768986438059, "learning_rate": 8.984252499878605e-06, "loss": 0.0051, "step": 17470 }, { "epoch": 0.28601816248056944, "grad_norm": 0.29682256498002096, "learning_rate": 8.982526420053134e-06, "loss": 0.0032, "step": 17480 }, { "epoch": 0.28618178843164527, "grad_norm": 0.620859209387552, "learning_rate": 8.980799040998849e-06, "loss": 0.0045, "step": 17490 }, { "epoch": 0.2863454143827211, "grad_norm": 0.14908789786435275, "learning_rate": 8.979070363279275e-06, "loss": 0.0048, "step": 17500 }, { "epoch": 0.2865090403337969, "grad_norm": 0.12589853842307375, "learning_rate": 8.977340387458365e-06, "loss": 0.0031, "step": 17510 }, { "epoch": 0.28667266628487276, "grad_norm": 0.5752820747930553, "learning_rate": 8.97560911410049e-06, "loss": 0.0082, "step": 17520 }, { "epoch": 0.28683629223594864, "grad_norm": 0.2396749134215087, "learning_rate": 8.97387654377045e-06, "loss": 0.0048, "step": 17530 }, { "epoch": 0.28699991818702447, "grad_norm": 0.15013907966603993, "learning_rate": 8.972142677033463e-06, "loss": 0.0037, "step": 17540 }, { "epoch": 0.2871635441381003, "grad_norm": 0.3009543060005969, "learning_rate": 8.970407514455177e-06, "loss": 0.0052, "step": 17550 }, { "epoch": 0.28732717008917613, "grad_norm": 0.5092118824445776, "learning_rate": 8.968671056601654e-06, "loss": 0.0083, "step": 17560 }, { "epoch": 0.28749079604025196, "grad_norm": 0.3151423385490922, "learning_rate": 8.966933304039382e-06, "loss": 0.0061, "step": 17570 }, { "epoch": 0.28765442199132785, "grad_norm": 0.2757763705127762, "learning_rate": 8.965194257335278e-06, "loss": 0.0065, "step": 17580 }, { "epoch": 0.2878180479424037, "grad_norm": 0.31616577434600274, "learning_rate": 8.963453917056666e-06, "loss": 0.0047, "step": 17590 }, { "epoch": 0.2879816738934795, "grad_norm": 0.6927267992672811, "learning_rate": 8.961712283771308e-06, "loss": 0.0101, "step": 17600 }, { "epoch": 0.28814529984455534, "grad_norm": 0.06416194335391504, "learning_rate": 8.95996935804738e-06, "loss": 0.0043, "step": 17610 }, { "epoch": 0.28830892579563117, "grad_norm": 0.27266230434167743, "learning_rate": 8.958225140453479e-06, "loss": 0.0033, "step": 17620 }, { "epoch": 0.28847255174670705, "grad_norm": 0.45188789865407597, "learning_rate": 8.956479631558625e-06, "loss": 0.0087, "step": 17630 }, { "epoch": 0.2886361776977829, "grad_norm": 0.31034738480716423, "learning_rate": 8.954732831932262e-06, "loss": 0.0049, "step": 17640 }, { "epoch": 0.2887998036488587, "grad_norm": 0.3683349327627979, "learning_rate": 8.952984742144251e-06, "loss": 0.0044, "step": 17650 }, { "epoch": 0.28896342959993454, "grad_norm": 0.13618319303300255, "learning_rate": 8.951235362764876e-06, "loss": 0.0043, "step": 17660 }, { "epoch": 0.28912705555101037, "grad_norm": 0.1516569099587365, "learning_rate": 8.94948469436484e-06, "loss": 0.0094, "step": 17670 }, { "epoch": 0.28929068150208626, "grad_norm": 0.28981642376933275, "learning_rate": 8.947732737515267e-06, "loss": 0.005, "step": 17680 }, { "epoch": 0.2894543074531621, "grad_norm": 0.10721092407987674, "learning_rate": 8.945979492787704e-06, "loss": 0.0044, "step": 17690 }, { "epoch": 0.2896179334042379, "grad_norm": 0.09464912754881448, "learning_rate": 8.944224960754114e-06, "loss": 0.0038, "step": 17700 }, { "epoch": 0.28978155935531374, "grad_norm": 0.5280492576363877, "learning_rate": 8.942469141986884e-06, "loss": 0.0041, "step": 17710 }, { "epoch": 0.2899451853063896, "grad_norm": 0.40234931272393704, "learning_rate": 8.940712037058818e-06, "loss": 0.0046, "step": 17720 }, { "epoch": 0.29010881125746546, "grad_norm": 0.2918394805824406, "learning_rate": 8.93895364654314e-06, "loss": 0.006, "step": 17730 }, { "epoch": 0.2902724372085413, "grad_norm": 0.23525890639420918, "learning_rate": 8.937193971013494e-06, "loss": 0.0062, "step": 17740 }, { "epoch": 0.2904360631596171, "grad_norm": 0.19821305982012602, "learning_rate": 8.935433011043944e-06, "loss": 0.0068, "step": 17750 }, { "epoch": 0.29059968911069295, "grad_norm": 0.19061490060542657, "learning_rate": 8.93367076720897e-06, "loss": 0.0044, "step": 17760 }, { "epoch": 0.2907633150617688, "grad_norm": 0.14079871790277276, "learning_rate": 8.931907240083475e-06, "loss": 0.0059, "step": 17770 }, { "epoch": 0.29092694101284466, "grad_norm": 0.2893343061218463, "learning_rate": 8.930142430242779e-06, "loss": 0.0046, "step": 17780 }, { "epoch": 0.2910905669639205, "grad_norm": 0.14988275197951142, "learning_rate": 8.928376338262616e-06, "loss": 0.0051, "step": 17790 }, { "epoch": 0.2912541929149963, "grad_norm": 0.3125567749806149, "learning_rate": 8.926608964719146e-06, "loss": 0.0034, "step": 17800 }, { "epoch": 0.29141781886607215, "grad_norm": 0.16145206350483474, "learning_rate": 8.924840310188943e-06, "loss": 0.0045, "step": 17810 }, { "epoch": 0.291581444817148, "grad_norm": 0.19849447279373644, "learning_rate": 8.923070375249e-06, "loss": 0.0088, "step": 17820 }, { "epoch": 0.2917450707682238, "grad_norm": 0.05015417132354301, "learning_rate": 8.921299160476722e-06, "loss": 0.0038, "step": 17830 }, { "epoch": 0.2919086967192997, "grad_norm": 0.0923168847667189, "learning_rate": 8.919526666449942e-06, "loss": 0.0035, "step": 17840 }, { "epoch": 0.29207232267037553, "grad_norm": 0.3296136564968153, "learning_rate": 8.917752893746902e-06, "loss": 0.0042, "step": 17850 }, { "epoch": 0.29223594862145136, "grad_norm": 0.33119443016129474, "learning_rate": 8.915977842946265e-06, "loss": 0.0066, "step": 17860 }, { "epoch": 0.2923995745725272, "grad_norm": 0.3831678513747912, "learning_rate": 8.91420151462711e-06, "loss": 0.0039, "step": 17870 }, { "epoch": 0.292563200523603, "grad_norm": 0.4927639726226381, "learning_rate": 8.912423909368932e-06, "loss": 0.0049, "step": 17880 }, { "epoch": 0.2927268264746789, "grad_norm": 0.1330237465558743, "learning_rate": 8.910645027751646e-06, "loss": 0.0052, "step": 17890 }, { "epoch": 0.29289045242575473, "grad_norm": 0.2635275296728759, "learning_rate": 8.908864870355576e-06, "loss": 0.0049, "step": 17900 }, { "epoch": 0.29305407837683056, "grad_norm": 0.601864934306598, "learning_rate": 8.90708343776147e-06, "loss": 0.0077, "step": 17910 }, { "epoch": 0.2932177043279064, "grad_norm": 0.27432386746488, "learning_rate": 8.905300730550488e-06, "loss": 0.0049, "step": 17920 }, { "epoch": 0.2933813302789822, "grad_norm": 0.27032056982719216, "learning_rate": 8.903516749304206e-06, "loss": 0.004, "step": 17930 }, { "epoch": 0.2935449562300581, "grad_norm": 0.21771054757089187, "learning_rate": 8.90173149460462e-06, "loss": 0.0051, "step": 17940 }, { "epoch": 0.29370858218113394, "grad_norm": 0.1315058218148815, "learning_rate": 8.899944967034132e-06, "loss": 0.0052, "step": 17950 }, { "epoch": 0.29387220813220977, "grad_norm": 0.13910142824726318, "learning_rate": 8.89815716717557e-06, "loss": 0.0029, "step": 17960 }, { "epoch": 0.2940358340832856, "grad_norm": 0.01675202395006825, "learning_rate": 8.896368095612169e-06, "loss": 0.0082, "step": 17970 }, { "epoch": 0.2941994600343614, "grad_norm": 0.23263561522251786, "learning_rate": 8.894577752927582e-06, "loss": 0.0055, "step": 17980 }, { "epoch": 0.2943630859854373, "grad_norm": 0.22604968974701078, "learning_rate": 8.89278613970588e-06, "loss": 0.0069, "step": 17990 }, { "epoch": 0.29452671193651314, "grad_norm": 0.1791988325137757, "learning_rate": 8.890993256531542e-06, "loss": 0.0042, "step": 18000 }, { "epoch": 0.29469033788758897, "grad_norm": 0.11094865409149303, "learning_rate": 8.889199103989464e-06, "loss": 0.0042, "step": 18010 }, { "epoch": 0.2948539638386648, "grad_norm": 0.36740796795734953, "learning_rate": 8.88740368266496e-06, "loss": 0.0048, "step": 18020 }, { "epoch": 0.29501758978974063, "grad_norm": 0.28781287478582895, "learning_rate": 8.885606993143749e-06, "loss": 0.0075, "step": 18030 }, { "epoch": 0.2951812157408165, "grad_norm": 0.31541973982435617, "learning_rate": 8.883809036011976e-06, "loss": 0.0034, "step": 18040 }, { "epoch": 0.29534484169189235, "grad_norm": 0.32578378859721946, "learning_rate": 8.882009811856188e-06, "loss": 0.0041, "step": 18050 }, { "epoch": 0.2955084676429682, "grad_norm": 0.11586893555456125, "learning_rate": 8.88020932126335e-06, "loss": 0.0061, "step": 18060 }, { "epoch": 0.295672093594044, "grad_norm": 0.02074561200049313, "learning_rate": 8.878407564820843e-06, "loss": 0.0082, "step": 18070 }, { "epoch": 0.29583571954511984, "grad_norm": 0.38095183250575, "learning_rate": 8.876604543116455e-06, "loss": 0.0053, "step": 18080 }, { "epoch": 0.2959993454961957, "grad_norm": 0.36704127972134565, "learning_rate": 8.874800256738392e-06, "loss": 0.0061, "step": 18090 }, { "epoch": 0.29616297144727155, "grad_norm": 0.49207222032707215, "learning_rate": 8.87299470627527e-06, "loss": 0.0064, "step": 18100 }, { "epoch": 0.2963265973983474, "grad_norm": 0.29803018063380854, "learning_rate": 8.871187892316118e-06, "loss": 0.0066, "step": 18110 }, { "epoch": 0.2964902233494232, "grad_norm": 0.3317739189031181, "learning_rate": 8.869379815450378e-06, "loss": 0.0077, "step": 18120 }, { "epoch": 0.29665384930049904, "grad_norm": 0.8672938953239664, "learning_rate": 8.867570476267901e-06, "loss": 0.0047, "step": 18130 }, { "epoch": 0.2968174752515749, "grad_norm": 0.2585070653441631, "learning_rate": 8.865759875358954e-06, "loss": 0.0071, "step": 18140 }, { "epoch": 0.29698110120265075, "grad_norm": 0.24712967476682376, "learning_rate": 8.863948013314212e-06, "loss": 0.007, "step": 18150 }, { "epoch": 0.2971447271537266, "grad_norm": 0.20797324521727528, "learning_rate": 8.862134890724766e-06, "loss": 0.0061, "step": 18160 }, { "epoch": 0.2973083531048024, "grad_norm": 0.2654532173251771, "learning_rate": 8.86032050818211e-06, "loss": 0.0054, "step": 18170 }, { "epoch": 0.29747197905587824, "grad_norm": 0.3023944442755151, "learning_rate": 8.858504866278157e-06, "loss": 0.0059, "step": 18180 }, { "epoch": 0.29763560500695413, "grad_norm": 0.38632815570910545, "learning_rate": 8.85668796560523e-06, "loss": 0.0062, "step": 18190 }, { "epoch": 0.29779923095802996, "grad_norm": 0.1995985927407976, "learning_rate": 8.854869806756055e-06, "loss": 0.0051, "step": 18200 }, { "epoch": 0.2979628569091058, "grad_norm": 0.3106347940007905, "learning_rate": 8.853050390323782e-06, "loss": 0.0042, "step": 18210 }, { "epoch": 0.2981264828601816, "grad_norm": 0.2624950787994254, "learning_rate": 8.851229716901956e-06, "loss": 0.004, "step": 18220 }, { "epoch": 0.29829010881125745, "grad_norm": 0.14497805549842754, "learning_rate": 8.849407787084544e-06, "loss": 0.004, "step": 18230 }, { "epoch": 0.29845373476233333, "grad_norm": 0.24301509614623562, "learning_rate": 8.847584601465917e-06, "loss": 0.0054, "step": 18240 }, { "epoch": 0.29861736071340916, "grad_norm": 0.14482218962449522, "learning_rate": 8.845760160640859e-06, "loss": 0.0042, "step": 18250 }, { "epoch": 0.298780986664485, "grad_norm": 0.21618373291120715, "learning_rate": 8.843934465204559e-06, "loss": 0.0051, "step": 18260 }, { "epoch": 0.2989446126155608, "grad_norm": 0.3438290998089132, "learning_rate": 8.842107515752617e-06, "loss": 0.0027, "step": 18270 }, { "epoch": 0.29910823856663665, "grad_norm": 0.35272352998246265, "learning_rate": 8.840279312881047e-06, "loss": 0.0043, "step": 18280 }, { "epoch": 0.2992718645177125, "grad_norm": 0.438358446710203, "learning_rate": 8.838449857186264e-06, "loss": 0.0055, "step": 18290 }, { "epoch": 0.29943549046878837, "grad_norm": 0.3960303052744374, "learning_rate": 8.836619149265098e-06, "loss": 0.005, "step": 18300 }, { "epoch": 0.2995991164198642, "grad_norm": 0.152214604989409, "learning_rate": 8.834787189714782e-06, "loss": 0.0054, "step": 18310 }, { "epoch": 0.29976274237094, "grad_norm": 0.41348139589715827, "learning_rate": 8.832953979132965e-06, "loss": 0.0069, "step": 18320 }, { "epoch": 0.29992636832201586, "grad_norm": 0.23746624190110746, "learning_rate": 8.831119518117697e-06, "loss": 0.0057, "step": 18330 }, { "epoch": 0.3000899942730917, "grad_norm": 0.23850571342638413, "learning_rate": 8.829283807267438e-06, "loss": 0.0055, "step": 18340 }, { "epoch": 0.30025362022416757, "grad_norm": 0.3862587162446117, "learning_rate": 8.827446847181056e-06, "loss": 0.0036, "step": 18350 }, { "epoch": 0.3004172461752434, "grad_norm": 0.17553908364711773, "learning_rate": 8.825608638457827e-06, "loss": 0.0064, "step": 18360 }, { "epoch": 0.30058087212631923, "grad_norm": 0.11614959115650703, "learning_rate": 8.823769181697438e-06, "loss": 0.005, "step": 18370 }, { "epoch": 0.30074449807739506, "grad_norm": 0.23593007854225115, "learning_rate": 8.821928477499971e-06, "loss": 0.0054, "step": 18380 }, { "epoch": 0.3009081240284709, "grad_norm": 0.34429547443834035, "learning_rate": 8.820086526465931e-06, "loss": 0.0054, "step": 18390 }, { "epoch": 0.3010717499795468, "grad_norm": 0.23218797767641983, "learning_rate": 8.818243329196218e-06, "loss": 0.0039, "step": 18400 }, { "epoch": 0.3012353759306226, "grad_norm": 0.09291590616620059, "learning_rate": 8.816398886292144e-06, "loss": 0.004, "step": 18410 }, { "epoch": 0.30139900188169844, "grad_norm": 0.1762724824173207, "learning_rate": 8.814553198355426e-06, "loss": 0.0047, "step": 18420 }, { "epoch": 0.30156262783277427, "grad_norm": 0.0933406893231049, "learning_rate": 8.812706265988184e-06, "loss": 0.0077, "step": 18430 }, { "epoch": 0.3017262537838501, "grad_norm": 0.16745497530475859, "learning_rate": 8.810858089792954e-06, "loss": 0.0052, "step": 18440 }, { "epoch": 0.301889879734926, "grad_norm": 0.15011709878510496, "learning_rate": 8.80900867037266e-06, "loss": 0.0044, "step": 18450 }, { "epoch": 0.3020535056860018, "grad_norm": 0.41890521116933294, "learning_rate": 8.807158008330655e-06, "loss": 0.0052, "step": 18460 }, { "epoch": 0.30221713163707764, "grad_norm": 0.19639114275751726, "learning_rate": 8.805306104270674e-06, "loss": 0.0063, "step": 18470 }, { "epoch": 0.30238075758815347, "grad_norm": 0.21254538749148008, "learning_rate": 8.803452958796873e-06, "loss": 0.0037, "step": 18480 }, { "epoch": 0.3025443835392293, "grad_norm": 0.3223922441717158, "learning_rate": 8.801598572513808e-06, "loss": 0.0053, "step": 18490 }, { "epoch": 0.3027080094903052, "grad_norm": 0.1489398260998695, "learning_rate": 8.79974294602644e-06, "loss": 0.0076, "step": 18500 }, { "epoch": 0.302871635441381, "grad_norm": 0.20338335397090485, "learning_rate": 8.79788607994013e-06, "loss": 0.0048, "step": 18510 }, { "epoch": 0.30303526139245685, "grad_norm": 0.6816236833588541, "learning_rate": 8.796027974860653e-06, "loss": 0.0071, "step": 18520 }, { "epoch": 0.3031988873435327, "grad_norm": 0.18237243002855794, "learning_rate": 8.794168631394182e-06, "loss": 0.0043, "step": 18530 }, { "epoch": 0.3033625132946085, "grad_norm": 0.34741448150285914, "learning_rate": 8.792308050147294e-06, "loss": 0.0043, "step": 18540 }, { "epoch": 0.3035261392456844, "grad_norm": 1.6548022264429862, "learning_rate": 8.790446231726968e-06, "loss": 0.0062, "step": 18550 }, { "epoch": 0.3036897651967602, "grad_norm": 0.48609516012728515, "learning_rate": 8.788583176740593e-06, "loss": 0.0059, "step": 18560 }, { "epoch": 0.30385339114783605, "grad_norm": 0.7505138221548328, "learning_rate": 8.786718885795958e-06, "loss": 0.0087, "step": 18570 }, { "epoch": 0.3040170170989119, "grad_norm": 0.2441613267717992, "learning_rate": 8.784853359501253e-06, "loss": 0.005, "step": 18580 }, { "epoch": 0.3041806430499877, "grad_norm": 0.18304307885020182, "learning_rate": 8.782986598465073e-06, "loss": 0.0035, "step": 18590 }, { "epoch": 0.3043442690010636, "grad_norm": 0.13016361821385472, "learning_rate": 8.781118603296417e-06, "loss": 0.0049, "step": 18600 }, { "epoch": 0.3045078949521394, "grad_norm": 0.4125757277342999, "learning_rate": 8.779249374604684e-06, "loss": 0.0053, "step": 18610 }, { "epoch": 0.30467152090321525, "grad_norm": 0.3800863376069269, "learning_rate": 8.777378912999678e-06, "loss": 0.0063, "step": 18620 }, { "epoch": 0.3048351468542911, "grad_norm": 0.3328264659703371, "learning_rate": 8.775507219091604e-06, "loss": 0.007, "step": 18630 }, { "epoch": 0.3049987728053669, "grad_norm": 0.4556026664281397, "learning_rate": 8.773634293491068e-06, "loss": 0.0073, "step": 18640 }, { "epoch": 0.3051623987564428, "grad_norm": 0.07479442427530784, "learning_rate": 8.77176013680908e-06, "loss": 0.0047, "step": 18650 }, { "epoch": 0.30532602470751863, "grad_norm": 0.09658526922805287, "learning_rate": 8.769884749657049e-06, "loss": 0.0072, "step": 18660 }, { "epoch": 0.30548965065859446, "grad_norm": 0.14616953519103046, "learning_rate": 8.768008132646788e-06, "loss": 0.004, "step": 18670 }, { "epoch": 0.3056532766096703, "grad_norm": 0.266322465595293, "learning_rate": 8.766130286390512e-06, "loss": 0.004, "step": 18680 }, { "epoch": 0.3058169025607461, "grad_norm": 0.31851643094119897, "learning_rate": 8.76425121150083e-06, "loss": 0.0049, "step": 18690 }, { "epoch": 0.30598052851182195, "grad_norm": 0.12717876830935648, "learning_rate": 8.762370908590762e-06, "loss": 0.0077, "step": 18700 }, { "epoch": 0.30614415446289783, "grad_norm": 0.21868041852126677, "learning_rate": 8.760489378273722e-06, "loss": 0.0052, "step": 18710 }, { "epoch": 0.30630778041397366, "grad_norm": 0.2360320454864102, "learning_rate": 8.758606621163524e-06, "loss": 0.0047, "step": 18720 }, { "epoch": 0.3064714063650495, "grad_norm": 0.1205666298406094, "learning_rate": 8.756722637874386e-06, "loss": 0.0078, "step": 18730 }, { "epoch": 0.3066350323161253, "grad_norm": 0.12478090418601338, "learning_rate": 8.754837429020927e-06, "loss": 0.0048, "step": 18740 }, { "epoch": 0.30679865826720115, "grad_norm": 0.39898851214392134, "learning_rate": 8.752950995218157e-06, "loss": 0.0053, "step": 18750 }, { "epoch": 0.30696228421827704, "grad_norm": 0.07608921786317842, "learning_rate": 8.751063337081493e-06, "loss": 0.0047, "step": 18760 }, { "epoch": 0.30712591016935287, "grad_norm": 0.14223371610305074, "learning_rate": 8.749174455226756e-06, "loss": 0.0041, "step": 18770 }, { "epoch": 0.3072895361204287, "grad_norm": 0.2635544566552066, "learning_rate": 8.747284350270153e-06, "loss": 0.0053, "step": 18780 }, { "epoch": 0.3074531620715045, "grad_norm": 0.459749435014625, "learning_rate": 8.745393022828304e-06, "loss": 0.0057, "step": 18790 }, { "epoch": 0.30761678802258036, "grad_norm": 0.34504988668142356, "learning_rate": 8.743500473518214e-06, "loss": 0.0031, "step": 18800 }, { "epoch": 0.30778041397365624, "grad_norm": 0.4887865601252787, "learning_rate": 8.741606702957299e-06, "loss": 0.0073, "step": 18810 }, { "epoch": 0.30794403992473207, "grad_norm": 0.3289312426956407, "learning_rate": 8.739711711763367e-06, "loss": 0.0034, "step": 18820 }, { "epoch": 0.3081076658758079, "grad_norm": 0.18539507844966194, "learning_rate": 8.737815500554622e-06, "loss": 0.0047, "step": 18830 }, { "epoch": 0.30827129182688373, "grad_norm": 0.1782173454359978, "learning_rate": 8.735918069949676e-06, "loss": 0.0035, "step": 18840 }, { "epoch": 0.30843491777795956, "grad_norm": 0.1774023733353055, "learning_rate": 8.734019420567527e-06, "loss": 0.0047, "step": 18850 }, { "epoch": 0.30859854372903545, "grad_norm": 0.09002331746051315, "learning_rate": 8.732119553027577e-06, "loss": 0.0043, "step": 18860 }, { "epoch": 0.3087621696801113, "grad_norm": 0.34016142042348196, "learning_rate": 8.730218467949622e-06, "loss": 0.0064, "step": 18870 }, { "epoch": 0.3089257956311871, "grad_norm": 0.008464680878899428, "learning_rate": 8.728316165953862e-06, "loss": 0.0023, "step": 18880 }, { "epoch": 0.30908942158226294, "grad_norm": 0.23471549955090934, "learning_rate": 8.72641264766089e-06, "loss": 0.007, "step": 18890 }, { "epoch": 0.30925304753333877, "grad_norm": 0.29956059071569174, "learning_rate": 8.724507913691689e-06, "loss": 0.0064, "step": 18900 }, { "epoch": 0.30941667348441465, "grad_norm": 0.3164203407022074, "learning_rate": 8.722601964667648e-06, "loss": 0.0058, "step": 18910 }, { "epoch": 0.3095802994354905, "grad_norm": 0.29351027790549017, "learning_rate": 8.72069480121055e-06, "loss": 0.0042, "step": 18920 }, { "epoch": 0.3097439253865663, "grad_norm": 0.15280889313344445, "learning_rate": 8.71878642394257e-06, "loss": 0.0034, "step": 18930 }, { "epoch": 0.30990755133764214, "grad_norm": 0.04776895143099245, "learning_rate": 8.716876833486289e-06, "loss": 0.005, "step": 18940 }, { "epoch": 0.31007117728871797, "grad_norm": 0.257234757952196, "learning_rate": 8.714966030464672e-06, "loss": 0.0049, "step": 18950 }, { "epoch": 0.31023480323979385, "grad_norm": 0.5497336065675381, "learning_rate": 8.713054015501085e-06, "loss": 0.0066, "step": 18960 }, { "epoch": 0.3103984291908697, "grad_norm": 0.19806610101213112, "learning_rate": 8.711140789219291e-06, "loss": 0.0046, "step": 18970 }, { "epoch": 0.3105620551419455, "grad_norm": 0.3960724449055792, "learning_rate": 8.709226352243443e-06, "loss": 0.0057, "step": 18980 }, { "epoch": 0.31072568109302134, "grad_norm": 0.2678968686893664, "learning_rate": 8.707310705198095e-06, "loss": 0.0054, "step": 18990 }, { "epoch": 0.3108893070440972, "grad_norm": 0.11846703447912352, "learning_rate": 8.705393848708193e-06, "loss": 0.0045, "step": 19000 }, { "epoch": 0.31105293299517306, "grad_norm": 0.08551642425816144, "learning_rate": 8.703475783399076e-06, "loss": 0.0055, "step": 19010 }, { "epoch": 0.3112165589462489, "grad_norm": 0.3782112803704034, "learning_rate": 8.70155650989648e-06, "loss": 0.0056, "step": 19020 }, { "epoch": 0.3113801848973247, "grad_norm": 0.4696700330745282, "learning_rate": 8.699636028826534e-06, "loss": 0.0081, "step": 19030 }, { "epoch": 0.31154381084840055, "grad_norm": 0.23654459974427444, "learning_rate": 8.697714340815761e-06, "loss": 0.0038, "step": 19040 }, { "epoch": 0.3117074367994764, "grad_norm": 0.3455276061024164, "learning_rate": 8.695791446491074e-06, "loss": 0.0051, "step": 19050 }, { "epoch": 0.31187106275055226, "grad_norm": 0.15878385427065572, "learning_rate": 8.693867346479792e-06, "loss": 0.004, "step": 19060 }, { "epoch": 0.3120346887016281, "grad_norm": 0.5245370086519782, "learning_rate": 8.69194204140961e-06, "loss": 0.0065, "step": 19070 }, { "epoch": 0.3121983146527039, "grad_norm": 0.20725994649395352, "learning_rate": 8.69001553190863e-06, "loss": 0.0086, "step": 19080 }, { "epoch": 0.31236194060377975, "grad_norm": 0.220904064382517, "learning_rate": 8.688087818605338e-06, "loss": 0.0047, "step": 19090 }, { "epoch": 0.3125255665548556, "grad_norm": 0.5675570489871355, "learning_rate": 8.686158902128617e-06, "loss": 0.0032, "step": 19100 }, { "epoch": 0.31268919250593147, "grad_norm": 0.3071925490662874, "learning_rate": 8.684228783107744e-06, "loss": 0.004, "step": 19110 }, { "epoch": 0.3128528184570073, "grad_norm": 0.34065271841656136, "learning_rate": 8.682297462172386e-06, "loss": 0.0063, "step": 19120 }, { "epoch": 0.3130164444080831, "grad_norm": 0.2580444947796707, "learning_rate": 8.680364939952601e-06, "loss": 0.005, "step": 19130 }, { "epoch": 0.31318007035915896, "grad_norm": 0.21422867871123977, "learning_rate": 8.67843121707884e-06, "loss": 0.0037, "step": 19140 }, { "epoch": 0.3133436963102348, "grad_norm": 0.15631003055279277, "learning_rate": 8.676496294181947e-06, "loss": 0.0053, "step": 19150 }, { "epoch": 0.3135073222613106, "grad_norm": 0.21964014531629195, "learning_rate": 8.674560171893157e-06, "loss": 0.0049, "step": 19160 }, { "epoch": 0.3136709482123865, "grad_norm": 0.6720113623825645, "learning_rate": 8.672622850844093e-06, "loss": 0.0057, "step": 19170 }, { "epoch": 0.31383457416346233, "grad_norm": 0.21757784078471265, "learning_rate": 8.670684331666775e-06, "loss": 0.0048, "step": 19180 }, { "epoch": 0.31399820011453816, "grad_norm": 0.2986299670736392, "learning_rate": 8.668744614993607e-06, "loss": 0.0053, "step": 19190 }, { "epoch": 0.314161826065614, "grad_norm": 0.22721085839857583, "learning_rate": 8.666803701457389e-06, "loss": 0.0059, "step": 19200 }, { "epoch": 0.3143254520166898, "grad_norm": 0.402952908259772, "learning_rate": 8.66486159169131e-06, "loss": 0.0044, "step": 19210 }, { "epoch": 0.3144890779677657, "grad_norm": 0.39865063821389984, "learning_rate": 8.66291828632895e-06, "loss": 0.0067, "step": 19220 }, { "epoch": 0.31465270391884154, "grad_norm": 0.09967959660038472, "learning_rate": 8.660973786004276e-06, "loss": 0.0042, "step": 19230 }, { "epoch": 0.31481632986991737, "grad_norm": 0.272280700635327, "learning_rate": 8.659028091351647e-06, "loss": 0.004, "step": 19240 }, { "epoch": 0.3149799558209932, "grad_norm": 0.3624695830039843, "learning_rate": 8.657081203005813e-06, "loss": 0.005, "step": 19250 }, { "epoch": 0.315143581772069, "grad_norm": 0.268045555804144, "learning_rate": 8.655133121601911e-06, "loss": 0.0057, "step": 19260 }, { "epoch": 0.3153072077231449, "grad_norm": 0.2641219001949664, "learning_rate": 8.653183847775469e-06, "loss": 0.0051, "step": 19270 }, { "epoch": 0.31547083367422074, "grad_norm": 0.14383148069089588, "learning_rate": 8.651233382162402e-06, "loss": 0.0043, "step": 19280 }, { "epoch": 0.31563445962529657, "grad_norm": 0.31020297516258566, "learning_rate": 8.649281725399014e-06, "loss": 0.0035, "step": 19290 }, { "epoch": 0.3157980855763724, "grad_norm": 0.2045947405185043, "learning_rate": 8.647328878122e-06, "loss": 0.0051, "step": 19300 }, { "epoch": 0.31596171152744823, "grad_norm": 0.4171154923085513, "learning_rate": 8.645374840968443e-06, "loss": 0.0065, "step": 19310 }, { "epoch": 0.3161253374785241, "grad_norm": 0.24596706126786663, "learning_rate": 8.64341961457581e-06, "loss": 0.0035, "step": 19320 }, { "epoch": 0.31628896342959995, "grad_norm": 0.2999190922434427, "learning_rate": 8.641463199581961e-06, "loss": 0.0042, "step": 19330 }, { "epoch": 0.3164525893806758, "grad_norm": 0.16358232359248806, "learning_rate": 8.639505596625142e-06, "loss": 0.0062, "step": 19340 }, { "epoch": 0.3166162153317516, "grad_norm": 0.30403105146249765, "learning_rate": 8.637546806343984e-06, "loss": 0.0057, "step": 19350 }, { "epoch": 0.31677984128282743, "grad_norm": 0.27359087349567596, "learning_rate": 8.635586829377512e-06, "loss": 0.0047, "step": 19360 }, { "epoch": 0.3169434672339033, "grad_norm": 0.32371888070260113, "learning_rate": 8.63362566636513e-06, "loss": 0.0058, "step": 19370 }, { "epoch": 0.31710709318497915, "grad_norm": 0.493853117321146, "learning_rate": 8.631663317946632e-06, "loss": 0.0079, "step": 19380 }, { "epoch": 0.317270719136055, "grad_norm": 0.6479730582966409, "learning_rate": 8.629699784762206e-06, "loss": 0.006, "step": 19390 }, { "epoch": 0.3174343450871308, "grad_norm": 0.5048300821134012, "learning_rate": 8.627735067452411e-06, "loss": 0.0046, "step": 19400 }, { "epoch": 0.31759797103820664, "grad_norm": 0.40626100384568736, "learning_rate": 8.62576916665821e-06, "loss": 0.0045, "step": 19410 }, { "epoch": 0.3177615969892825, "grad_norm": 0.5322701370781279, "learning_rate": 8.623802083020936e-06, "loss": 0.005, "step": 19420 }, { "epoch": 0.31792522294035835, "grad_norm": 0.19070525098771587, "learning_rate": 8.621833817182318e-06, "loss": 0.0081, "step": 19430 }, { "epoch": 0.3180888488914342, "grad_norm": 0.3130745824220591, "learning_rate": 8.619864369784471e-06, "loss": 0.0057, "step": 19440 }, { "epoch": 0.31825247484251, "grad_norm": 0.3193189382039648, "learning_rate": 8.61789374146989e-06, "loss": 0.0057, "step": 19450 }, { "epoch": 0.31841610079358584, "grad_norm": 0.11979941934246198, "learning_rate": 8.615921932881455e-06, "loss": 0.0052, "step": 19460 }, { "epoch": 0.31857972674466173, "grad_norm": 0.6702571861452684, "learning_rate": 8.613948944662436e-06, "loss": 0.0058, "step": 19470 }, { "epoch": 0.31874335269573756, "grad_norm": 0.4302687150938046, "learning_rate": 8.611974777456487e-06, "loss": 0.0033, "step": 19480 }, { "epoch": 0.3189069786468134, "grad_norm": 0.17134553120860474, "learning_rate": 8.609999431907642e-06, "loss": 0.0063, "step": 19490 }, { "epoch": 0.3190706045978892, "grad_norm": 0.20772018201977024, "learning_rate": 8.608022908660327e-06, "loss": 0.0059, "step": 19500 }, { "epoch": 0.31923423054896505, "grad_norm": 0.2390205838824782, "learning_rate": 8.606045208359343e-06, "loss": 0.0046, "step": 19510 }, { "epoch": 0.31939785650004093, "grad_norm": 0.34693019130854325, "learning_rate": 8.604066331649882e-06, "loss": 0.0035, "step": 19520 }, { "epoch": 0.31956148245111676, "grad_norm": 0.1981509576409543, "learning_rate": 8.602086279177518e-06, "loss": 0.0052, "step": 19530 }, { "epoch": 0.3197251084021926, "grad_norm": 0.5549142472707205, "learning_rate": 8.600105051588206e-06, "loss": 0.0049, "step": 19540 }, { "epoch": 0.3198887343532684, "grad_norm": 0.33226097169147817, "learning_rate": 8.59812264952829e-06, "loss": 0.0042, "step": 19550 }, { "epoch": 0.32005236030434425, "grad_norm": 0.22547435144445466, "learning_rate": 8.596139073644487e-06, "loss": 0.0033, "step": 19560 }, { "epoch": 0.3202159862554201, "grad_norm": 0.06856239155902007, "learning_rate": 8.594154324583912e-06, "loss": 0.004, "step": 19570 }, { "epoch": 0.32037961220649597, "grad_norm": 0.316131174078988, "learning_rate": 8.592168402994049e-06, "loss": 0.0072, "step": 19580 }, { "epoch": 0.3205432381575718, "grad_norm": 0.21167962455686248, "learning_rate": 8.590181309522772e-06, "loss": 0.0045, "step": 19590 }, { "epoch": 0.3207068641086476, "grad_norm": 0.22424023825826722, "learning_rate": 8.588193044818332e-06, "loss": 0.0062, "step": 19600 }, { "epoch": 0.32087049005972346, "grad_norm": 0.19023740462406216, "learning_rate": 8.586203609529368e-06, "loss": 0.0057, "step": 19610 }, { "epoch": 0.3210341160107993, "grad_norm": 0.11670946787342265, "learning_rate": 8.5842130043049e-06, "loss": 0.0057, "step": 19620 }, { "epoch": 0.32119774196187517, "grad_norm": 0.20554451487363826, "learning_rate": 8.582221229794323e-06, "loss": 0.0071, "step": 19630 }, { "epoch": 0.321361367912951, "grad_norm": 0.1120270020156323, "learning_rate": 8.580228286647421e-06, "loss": 0.0044, "step": 19640 }, { "epoch": 0.32152499386402683, "grad_norm": 0.6032586100051742, "learning_rate": 8.578234175514354e-06, "loss": 0.0067, "step": 19650 }, { "epoch": 0.32168861981510266, "grad_norm": 0.4750675502427299, "learning_rate": 8.57623889704567e-06, "loss": 0.0046, "step": 19660 }, { "epoch": 0.3218522457661785, "grad_norm": 0.198568633458702, "learning_rate": 8.574242451892289e-06, "loss": 0.0047, "step": 19670 }, { "epoch": 0.3220158717172544, "grad_norm": 0.33894573873188527, "learning_rate": 8.572244840705519e-06, "loss": 0.0063, "step": 19680 }, { "epoch": 0.3221794976683302, "grad_norm": 0.6828594341380465, "learning_rate": 8.570246064137044e-06, "loss": 0.0061, "step": 19690 }, { "epoch": 0.32234312361940604, "grad_norm": 0.3174630565931213, "learning_rate": 8.568246122838928e-06, "loss": 0.004, "step": 19700 }, { "epoch": 0.32250674957048187, "grad_norm": 0.10652153541117633, "learning_rate": 8.56624501746362e-06, "loss": 0.0028, "step": 19710 }, { "epoch": 0.3226703755215577, "grad_norm": 0.11040291417424263, "learning_rate": 8.56424274866394e-06, "loss": 0.0045, "step": 19720 }, { "epoch": 0.3228340014726336, "grad_norm": 0.16528110684090927, "learning_rate": 8.562239317093097e-06, "loss": 0.0039, "step": 19730 }, { "epoch": 0.3229976274237094, "grad_norm": 0.27694170379314115, "learning_rate": 8.560234723404676e-06, "loss": 0.0056, "step": 19740 }, { "epoch": 0.32316125337478524, "grad_norm": 0.17723179084876442, "learning_rate": 8.558228968252636e-06, "loss": 0.0056, "step": 19750 }, { "epoch": 0.32332487932586107, "grad_norm": 0.16505081513972458, "learning_rate": 8.556222052291323e-06, "loss": 0.0036, "step": 19760 }, { "epoch": 0.3234885052769369, "grad_norm": 0.24193101889973934, "learning_rate": 8.554213976175455e-06, "loss": 0.0044, "step": 19770 }, { "epoch": 0.3236521312280128, "grad_norm": 0.1602473801930467, "learning_rate": 8.552204740560135e-06, "loss": 0.0025, "step": 19780 }, { "epoch": 0.3238157571790886, "grad_norm": 0.40680679208681314, "learning_rate": 8.550194346100835e-06, "loss": 0.005, "step": 19790 }, { "epoch": 0.32397938313016444, "grad_norm": 0.38883432839704213, "learning_rate": 8.548182793453417e-06, "loss": 0.0066, "step": 19800 }, { "epoch": 0.3241430090812403, "grad_norm": 0.7068000441083453, "learning_rate": 8.546170083274109e-06, "loss": 0.005, "step": 19810 }, { "epoch": 0.3243066350323161, "grad_norm": 0.08823285994921172, "learning_rate": 8.544156216219525e-06, "loss": 0.0047, "step": 19820 }, { "epoch": 0.324470260983392, "grad_norm": 0.35512735180526434, "learning_rate": 8.542141192946653e-06, "loss": 0.005, "step": 19830 }, { "epoch": 0.3246338869344678, "grad_norm": 0.4199746661521805, "learning_rate": 8.54012501411286e-06, "loss": 0.0044, "step": 19840 }, { "epoch": 0.32479751288554365, "grad_norm": 0.14131453131454122, "learning_rate": 8.538107680375884e-06, "loss": 0.0044, "step": 19850 }, { "epoch": 0.3249611388366195, "grad_norm": 0.17457541614631383, "learning_rate": 8.53608919239385e-06, "loss": 0.0061, "step": 19860 }, { "epoch": 0.3251247647876953, "grad_norm": 0.29185652303698856, "learning_rate": 8.534069550825252e-06, "loss": 0.0055, "step": 19870 }, { "epoch": 0.3252883907387712, "grad_norm": 0.14926993364488633, "learning_rate": 8.532048756328961e-06, "loss": 0.0073, "step": 19880 }, { "epoch": 0.325452016689847, "grad_norm": 0.1652374687052916, "learning_rate": 8.530026809564227e-06, "loss": 0.0057, "step": 19890 }, { "epoch": 0.32561564264092285, "grad_norm": 0.5973850833255425, "learning_rate": 8.528003711190674e-06, "loss": 0.0068, "step": 19900 }, { "epoch": 0.3257792685919987, "grad_norm": 0.253477523557912, "learning_rate": 8.525979461868303e-06, "loss": 0.0069, "step": 19910 }, { "epoch": 0.3259428945430745, "grad_norm": 0.23226886417651638, "learning_rate": 8.523954062257487e-06, "loss": 0.0042, "step": 19920 }, { "epoch": 0.3261065204941504, "grad_norm": 0.510517293281738, "learning_rate": 8.52192751301898e-06, "loss": 0.0046, "step": 19930 }, { "epoch": 0.32627014644522623, "grad_norm": 0.8339113112366476, "learning_rate": 8.519899814813906e-06, "loss": 0.0044, "step": 19940 }, { "epoch": 0.32643377239630206, "grad_norm": 0.2236550221867777, "learning_rate": 8.517870968303764e-06, "loss": 0.006, "step": 19950 }, { "epoch": 0.3265973983473779, "grad_norm": 0.3328234329723939, "learning_rate": 8.515840974150433e-06, "loss": 0.0033, "step": 19960 }, { "epoch": 0.3267610242984537, "grad_norm": 0.20888639646895898, "learning_rate": 8.51380983301616e-06, "loss": 0.0057, "step": 19970 }, { "epoch": 0.3269246502495296, "grad_norm": 0.2337675852333641, "learning_rate": 8.511777545563571e-06, "loss": 0.0037, "step": 19980 }, { "epoch": 0.32708827620060543, "grad_norm": 0.2739602446724029, "learning_rate": 8.509744112455663e-06, "loss": 0.0043, "step": 19990 }, { "epoch": 0.32725190215168126, "grad_norm": 0.3307902780303389, "learning_rate": 8.507709534355803e-06, "loss": 0.008, "step": 20000 }, { "epoch": 0.3274155281027571, "grad_norm": 0.18152554101054655, "learning_rate": 8.505673811927743e-06, "loss": 0.0046, "step": 20010 }, { "epoch": 0.3275791540538329, "grad_norm": 0.23342479047005812, "learning_rate": 8.503636945835601e-06, "loss": 0.005, "step": 20020 }, { "epoch": 0.32774278000490875, "grad_norm": 0.18481861964914134, "learning_rate": 8.501598936743865e-06, "loss": 0.0032, "step": 20030 }, { "epoch": 0.32790640595598464, "grad_norm": 0.09212070437751925, "learning_rate": 8.4995597853174e-06, "loss": 0.0058, "step": 20040 }, { "epoch": 0.32807003190706047, "grad_norm": 0.320105963998252, "learning_rate": 8.497519492221444e-06, "loss": 0.0054, "step": 20050 }, { "epoch": 0.3282336578581363, "grad_norm": 0.2962106544030525, "learning_rate": 8.495478058121605e-06, "loss": 0.007, "step": 20060 }, { "epoch": 0.3283972838092121, "grad_norm": 0.2129442456594696, "learning_rate": 8.493435483683868e-06, "loss": 0.0054, "step": 20070 }, { "epoch": 0.32856090976028796, "grad_norm": 0.4597597233273598, "learning_rate": 8.491391769574585e-06, "loss": 0.0027, "step": 20080 }, { "epoch": 0.32872453571136384, "grad_norm": 0.6222957106386409, "learning_rate": 8.48934691646048e-06, "loss": 0.0082, "step": 20090 }, { "epoch": 0.32888816166243967, "grad_norm": 0.15574223169742313, "learning_rate": 8.487300925008653e-06, "loss": 0.0038, "step": 20100 }, { "epoch": 0.3290517876135155, "grad_norm": 0.15719296280165462, "learning_rate": 8.485253795886572e-06, "loss": 0.0043, "step": 20110 }, { "epoch": 0.32921541356459133, "grad_norm": 0.1764110861981905, "learning_rate": 8.483205529762073e-06, "loss": 0.0078, "step": 20120 }, { "epoch": 0.32937903951566716, "grad_norm": 0.019104415685411685, "learning_rate": 8.48115612730337e-06, "loss": 0.007, "step": 20130 }, { "epoch": 0.32954266546674305, "grad_norm": 0.48701239465306084, "learning_rate": 8.479105589179047e-06, "loss": 0.0066, "step": 20140 }, { "epoch": 0.3297062914178189, "grad_norm": 0.3089457276742494, "learning_rate": 8.47705391605805e-06, "loss": 0.0042, "step": 20150 }, { "epoch": 0.3298699173688947, "grad_norm": 0.20750493030711897, "learning_rate": 8.475001108609706e-06, "loss": 0.0057, "step": 20160 }, { "epoch": 0.33003354331997053, "grad_norm": 0.4013216626815941, "learning_rate": 8.472947167503703e-06, "loss": 0.0095, "step": 20170 }, { "epoch": 0.33019716927104636, "grad_norm": 0.3473546672034808, "learning_rate": 8.470892093410104e-06, "loss": 0.0042, "step": 20180 }, { "epoch": 0.33036079522212225, "grad_norm": 0.09198941174776062, "learning_rate": 8.468835886999343e-06, "loss": 0.0052, "step": 20190 }, { "epoch": 0.3305244211731981, "grad_norm": 0.407109610862657, "learning_rate": 8.466778548942217e-06, "loss": 0.005, "step": 20200 }, { "epoch": 0.3306880471242739, "grad_norm": 0.2913748401125258, "learning_rate": 8.4647200799099e-06, "loss": 0.0078, "step": 20210 }, { "epoch": 0.33085167307534974, "grad_norm": 0.3484862369288203, "learning_rate": 8.46266048057393e-06, "loss": 0.0082, "step": 20220 }, { "epoch": 0.33101529902642557, "grad_norm": 0.20825568602552483, "learning_rate": 8.460599751606213e-06, "loss": 0.0039, "step": 20230 }, { "epoch": 0.33117892497750145, "grad_norm": 0.23156507017755068, "learning_rate": 8.458537893679028e-06, "loss": 0.0031, "step": 20240 }, { "epoch": 0.3313425509285773, "grad_norm": 0.3992925900372824, "learning_rate": 8.456474907465018e-06, "loss": 0.0062, "step": 20250 }, { "epoch": 0.3315061768796531, "grad_norm": 0.2470117489752518, "learning_rate": 8.454410793637197e-06, "loss": 0.005, "step": 20260 }, { "epoch": 0.33166980283072894, "grad_norm": 0.31271748039786057, "learning_rate": 8.452345552868945e-06, "loss": 0.003, "step": 20270 }, { "epoch": 0.3318334287818048, "grad_norm": 0.15655217538872404, "learning_rate": 8.45027918583401e-06, "loss": 0.0044, "step": 20280 }, { "epoch": 0.33199705473288066, "grad_norm": 0.2243992444036362, "learning_rate": 8.448211693206506e-06, "loss": 0.0043, "step": 20290 }, { "epoch": 0.3321606806839565, "grad_norm": 0.14825698883583144, "learning_rate": 8.44614307566092e-06, "loss": 0.0062, "step": 20300 }, { "epoch": 0.3323243066350323, "grad_norm": 0.31147961259846463, "learning_rate": 8.444073333872097e-06, "loss": 0.0045, "step": 20310 }, { "epoch": 0.33248793258610815, "grad_norm": 0.12602795235252198, "learning_rate": 8.44200246851526e-06, "loss": 0.0029, "step": 20320 }, { "epoch": 0.332651558537184, "grad_norm": 0.28592830211001063, "learning_rate": 8.439930480265988e-06, "loss": 0.0058, "step": 20330 }, { "epoch": 0.33281518448825986, "grad_norm": 0.2798266528724036, "learning_rate": 8.43785736980023e-06, "loss": 0.0056, "step": 20340 }, { "epoch": 0.3329788104393357, "grad_norm": 0.18360385571270163, "learning_rate": 8.435783137794302e-06, "loss": 0.0037, "step": 20350 }, { "epoch": 0.3331424363904115, "grad_norm": 0.2026247069598769, "learning_rate": 8.433707784924888e-06, "loss": 0.0048, "step": 20360 }, { "epoch": 0.33330606234148735, "grad_norm": 0.2030648715923798, "learning_rate": 8.431631311869031e-06, "loss": 0.004, "step": 20370 }, { "epoch": 0.3334696882925632, "grad_norm": 0.44733634321601373, "learning_rate": 8.429553719304146e-06, "loss": 0.004, "step": 20380 }, { "epoch": 0.33363331424363907, "grad_norm": 0.09426474148409458, "learning_rate": 8.427475007908013e-06, "loss": 0.0053, "step": 20390 }, { "epoch": 0.3337969401947149, "grad_norm": 0.18692655710602513, "learning_rate": 8.425395178358773e-06, "loss": 0.0042, "step": 20400 }, { "epoch": 0.3339605661457907, "grad_norm": 0.21142089709592796, "learning_rate": 8.423314231334931e-06, "loss": 0.0043, "step": 20410 }, { "epoch": 0.33412419209686656, "grad_norm": 0.25454891317813116, "learning_rate": 8.421232167515362e-06, "loss": 0.0074, "step": 20420 }, { "epoch": 0.3342878180479424, "grad_norm": 0.22867865250775674, "learning_rate": 8.419148987579301e-06, "loss": 0.0062, "step": 20430 }, { "epoch": 0.33445144399901827, "grad_norm": 0.20283707773724322, "learning_rate": 8.417064692206352e-06, "loss": 0.004, "step": 20440 }, { "epoch": 0.3346150699500941, "grad_norm": 0.35055917738817965, "learning_rate": 8.414979282076477e-06, "loss": 0.0043, "step": 20450 }, { "epoch": 0.33477869590116993, "grad_norm": 0.014783402893192547, "learning_rate": 8.412892757870002e-06, "loss": 0.0027, "step": 20460 }, { "epoch": 0.33494232185224576, "grad_norm": 0.27000256216174645, "learning_rate": 8.410805120267623e-06, "loss": 0.0035, "step": 20470 }, { "epoch": 0.3351059478033216, "grad_norm": 0.3514817262712476, "learning_rate": 8.408716369950391e-06, "loss": 0.0116, "step": 20480 }, { "epoch": 0.3352695737543974, "grad_norm": 0.4262466140975379, "learning_rate": 8.406626507599726e-06, "loss": 0.0044, "step": 20490 }, { "epoch": 0.3354331997054733, "grad_norm": 0.22328168335638882, "learning_rate": 8.40453553389741e-06, "loss": 0.0053, "step": 20500 }, { "epoch": 0.33559682565654914, "grad_norm": 0.3297965358100579, "learning_rate": 8.402443449525581e-06, "loss": 0.005, "step": 20510 }, { "epoch": 0.33576045160762497, "grad_norm": 0.11304855497402304, "learning_rate": 8.400350255166751e-06, "loss": 0.0051, "step": 20520 }, { "epoch": 0.3359240775587008, "grad_norm": 0.1606646568222215, "learning_rate": 8.398255951503784e-06, "loss": 0.0092, "step": 20530 }, { "epoch": 0.3360877035097766, "grad_norm": 0.38636759251863384, "learning_rate": 8.396160539219909e-06, "loss": 0.0061, "step": 20540 }, { "epoch": 0.3362513294608525, "grad_norm": 0.9041239948038072, "learning_rate": 8.394064018998718e-06, "loss": 0.0051, "step": 20550 }, { "epoch": 0.33641495541192834, "grad_norm": 0.2921845595559238, "learning_rate": 8.391966391524165e-06, "loss": 0.0042, "step": 20560 }, { "epoch": 0.33657858136300417, "grad_norm": 0.09000868605860639, "learning_rate": 8.389867657480562e-06, "loss": 0.0038, "step": 20570 }, { "epoch": 0.33674220731408, "grad_norm": 0.18527719679843574, "learning_rate": 8.387767817552585e-06, "loss": 0.0057, "step": 20580 }, { "epoch": 0.33690583326515583, "grad_norm": 0.24264593240171178, "learning_rate": 8.385666872425269e-06, "loss": 0.0047, "step": 20590 }, { "epoch": 0.3370694592162317, "grad_norm": 0.15137008291253395, "learning_rate": 8.383564822784012e-06, "loss": 0.004, "step": 20600 }, { "epoch": 0.33723308516730754, "grad_norm": 0.29583594710949596, "learning_rate": 8.381461669314568e-06, "loss": 0.0035, "step": 20610 }, { "epoch": 0.3373967111183834, "grad_norm": 0.08598990139889533, "learning_rate": 8.379357412703053e-06, "loss": 0.0034, "step": 20620 }, { "epoch": 0.3375603370694592, "grad_norm": 0.2737661261856486, "learning_rate": 8.377252053635946e-06, "loss": 0.0033, "step": 20630 }, { "epoch": 0.33772396302053503, "grad_norm": 0.1277352753469851, "learning_rate": 8.375145592800084e-06, "loss": 0.0039, "step": 20640 }, { "epoch": 0.3378875889716109, "grad_norm": 0.1590029401460674, "learning_rate": 8.37303803088266e-06, "loss": 0.0054, "step": 20650 }, { "epoch": 0.33805121492268675, "grad_norm": 0.21432959612476377, "learning_rate": 8.37092936857123e-06, "loss": 0.0034, "step": 20660 }, { "epoch": 0.3382148408737626, "grad_norm": 0.15784408626267274, "learning_rate": 8.368819606553706e-06, "loss": 0.0035, "step": 20670 }, { "epoch": 0.3383784668248384, "grad_norm": 0.30690303322233475, "learning_rate": 8.366708745518364e-06, "loss": 0.0063, "step": 20680 }, { "epoch": 0.33854209277591424, "grad_norm": 0.1694243694347292, "learning_rate": 8.364596786153832e-06, "loss": 0.0033, "step": 20690 }, { "epoch": 0.3387057187269901, "grad_norm": 0.32450174992590375, "learning_rate": 8.3624837291491e-06, "loss": 0.0059, "step": 20700 }, { "epoch": 0.33886934467806595, "grad_norm": 0.28827172445527954, "learning_rate": 8.360369575193517e-06, "loss": 0.0046, "step": 20710 }, { "epoch": 0.3390329706291418, "grad_norm": 0.0923260024145372, "learning_rate": 8.358254324976787e-06, "loss": 0.0036, "step": 20720 }, { "epoch": 0.3391965965802176, "grad_norm": 0.39116282443741507, "learning_rate": 8.356137979188975e-06, "loss": 0.0069, "step": 20730 }, { "epoch": 0.33936022253129344, "grad_norm": 0.129851264372882, "learning_rate": 8.354020538520498e-06, "loss": 0.0044, "step": 20740 }, { "epoch": 0.33952384848236933, "grad_norm": 0.29309004354245527, "learning_rate": 8.351902003662133e-06, "loss": 0.004, "step": 20750 }, { "epoch": 0.33968747443344516, "grad_norm": 0.15365493840123007, "learning_rate": 8.34978237530502e-06, "loss": 0.0037, "step": 20760 }, { "epoch": 0.339851100384521, "grad_norm": 0.1338658824062015, "learning_rate": 8.347661654140646e-06, "loss": 0.0038, "step": 20770 }, { "epoch": 0.3400147263355968, "grad_norm": 0.1811580203501847, "learning_rate": 8.34553984086086e-06, "loss": 0.0042, "step": 20780 }, { "epoch": 0.34017835228667265, "grad_norm": 0.4052405570666526, "learning_rate": 8.343416936157867e-06, "loss": 0.0043, "step": 20790 }, { "epoch": 0.34034197823774853, "grad_norm": 0.3666725662196123, "learning_rate": 8.341292940724224e-06, "loss": 0.0063, "step": 20800 }, { "epoch": 0.34050560418882436, "grad_norm": 0.13156444635342052, "learning_rate": 8.33916785525285e-06, "loss": 0.0033, "step": 20810 }, { "epoch": 0.3406692301399002, "grad_norm": 0.3843174070954451, "learning_rate": 8.337041680437015e-06, "loss": 0.0047, "step": 20820 }, { "epoch": 0.340832856090976, "grad_norm": 0.10171622277744272, "learning_rate": 8.334914416970348e-06, "loss": 0.0054, "step": 20830 }, { "epoch": 0.34099648204205185, "grad_norm": 0.1730790074187248, "learning_rate": 8.332786065546829e-06, "loss": 0.0044, "step": 20840 }, { "epoch": 0.34116010799312774, "grad_norm": 0.16304753805254601, "learning_rate": 8.330656626860794e-06, "loss": 0.007, "step": 20850 }, { "epoch": 0.34132373394420357, "grad_norm": 0.19238343508409989, "learning_rate": 8.328526101606938e-06, "loss": 0.0092, "step": 20860 }, { "epoch": 0.3414873598952794, "grad_norm": 0.06326117814238459, "learning_rate": 8.326394490480306e-06, "loss": 0.0043, "step": 20870 }, { "epoch": 0.3416509858463552, "grad_norm": 0.1865533200519949, "learning_rate": 8.324261794176295e-06, "loss": 0.0031, "step": 20880 }, { "epoch": 0.34181461179743106, "grad_norm": 0.23144322171024437, "learning_rate": 8.322128013390664e-06, "loss": 0.0043, "step": 20890 }, { "epoch": 0.3419782377485069, "grad_norm": 0.34188691278164557, "learning_rate": 8.319993148819519e-06, "loss": 0.0115, "step": 20900 }, { "epoch": 0.34214186369958277, "grad_norm": 0.5538022011342335, "learning_rate": 8.317857201159323e-06, "loss": 0.0074, "step": 20910 }, { "epoch": 0.3423054896506586, "grad_norm": 0.2117723034746883, "learning_rate": 8.315720171106892e-06, "loss": 0.0059, "step": 20920 }, { "epoch": 0.34246911560173443, "grad_norm": 0.2621165164570185, "learning_rate": 8.313582059359392e-06, "loss": 0.0054, "step": 20930 }, { "epoch": 0.34263274155281026, "grad_norm": 0.26643959795547384, "learning_rate": 8.311442866614343e-06, "loss": 0.0048, "step": 20940 }, { "epoch": 0.3427963675038861, "grad_norm": 0.0915329289616054, "learning_rate": 8.309302593569621e-06, "loss": 0.0034, "step": 20950 }, { "epoch": 0.342959993454962, "grad_norm": 0.13492827383335063, "learning_rate": 8.307161240923454e-06, "loss": 0.0046, "step": 20960 }, { "epoch": 0.3431236194060378, "grad_norm": 0.22177612129928959, "learning_rate": 8.305018809374417e-06, "loss": 0.0105, "step": 20970 }, { "epoch": 0.34328724535711364, "grad_norm": 0.12035781797990085, "learning_rate": 8.302875299621442e-06, "loss": 0.0046, "step": 20980 }, { "epoch": 0.34345087130818946, "grad_norm": 0.1513481217164888, "learning_rate": 8.300730712363811e-06, "loss": 0.0037, "step": 20990 }, { "epoch": 0.3436144972592653, "grad_norm": 0.13055074699320773, "learning_rate": 8.298585048301157e-06, "loss": 0.0028, "step": 21000 }, { "epoch": 0.3437781232103412, "grad_norm": 0.1525672182778529, "learning_rate": 8.296438308133467e-06, "loss": 0.0054, "step": 21010 }, { "epoch": 0.343941749161417, "grad_norm": 0.1143492611670526, "learning_rate": 8.294290492561074e-06, "loss": 0.0039, "step": 21020 }, { "epoch": 0.34410537511249284, "grad_norm": 0.3819506086264186, "learning_rate": 8.29214160228467e-06, "loss": 0.0037, "step": 21030 }, { "epoch": 0.34426900106356867, "grad_norm": 0.19945894008106016, "learning_rate": 8.289991638005286e-06, "loss": 0.0045, "step": 21040 }, { "epoch": 0.3444326270146445, "grad_norm": 0.16224774545919984, "learning_rate": 8.287840600424312e-06, "loss": 0.0035, "step": 21050 }, { "epoch": 0.3445962529657204, "grad_norm": 0.09074054355020106, "learning_rate": 8.28568849024349e-06, "loss": 0.0042, "step": 21060 }, { "epoch": 0.3447598789167962, "grad_norm": 0.3604183568907086, "learning_rate": 8.283535308164901e-06, "loss": 0.0052, "step": 21070 }, { "epoch": 0.34492350486787204, "grad_norm": 0.40051290464463984, "learning_rate": 8.281381054890987e-06, "loss": 0.0031, "step": 21080 }, { "epoch": 0.3450871308189479, "grad_norm": 0.12258653691971061, "learning_rate": 8.279225731124536e-06, "loss": 0.0029, "step": 21090 }, { "epoch": 0.3452507567700237, "grad_norm": 0.16018771266067622, "learning_rate": 8.27706933756868e-06, "loss": 0.0067, "step": 21100 }, { "epoch": 0.3454143827210996, "grad_norm": 0.2427836154846401, "learning_rate": 8.274911874926907e-06, "loss": 0.0051, "step": 21110 }, { "epoch": 0.3455780086721754, "grad_norm": 0.3156056092451682, "learning_rate": 8.272753343903052e-06, "loss": 0.0044, "step": 21120 }, { "epoch": 0.34574163462325125, "grad_norm": 0.5119172317602168, "learning_rate": 8.270593745201297e-06, "loss": 0.006, "step": 21130 }, { "epoch": 0.3459052605743271, "grad_norm": 0.20966712588108682, "learning_rate": 8.26843307952617e-06, "loss": 0.0037, "step": 21140 }, { "epoch": 0.3460688865254029, "grad_norm": 0.14993499489853723, "learning_rate": 8.266271347582555e-06, "loss": 0.0051, "step": 21150 }, { "epoch": 0.3462325124764788, "grad_norm": 0.31698723927179806, "learning_rate": 8.264108550075674e-06, "loss": 0.004, "step": 21160 }, { "epoch": 0.3463961384275546, "grad_norm": 0.136062319828816, "learning_rate": 8.261944687711103e-06, "loss": 0.006, "step": 21170 }, { "epoch": 0.34655976437863045, "grad_norm": 0.23746139087716034, "learning_rate": 8.259779761194767e-06, "loss": 0.0068, "step": 21180 }, { "epoch": 0.3467233903297063, "grad_norm": 0.14484767838760634, "learning_rate": 8.25761377123293e-06, "loss": 0.005, "step": 21190 }, { "epoch": 0.3468870162807821, "grad_norm": 0.2468755209895756, "learning_rate": 8.25544671853221e-06, "loss": 0.0041, "step": 21200 }, { "epoch": 0.347050642231858, "grad_norm": 0.3270319737142135, "learning_rate": 8.25327860379957e-06, "loss": 0.0062, "step": 21210 }, { "epoch": 0.3472142681829338, "grad_norm": 0.18038277250682985, "learning_rate": 8.251109427742319e-06, "loss": 0.0042, "step": 21220 }, { "epoch": 0.34737789413400966, "grad_norm": 0.10596646650280081, "learning_rate": 8.248939191068112e-06, "loss": 0.0044, "step": 21230 }, { "epoch": 0.3475415200850855, "grad_norm": 0.07531578079228332, "learning_rate": 8.246767894484951e-06, "loss": 0.0032, "step": 21240 }, { "epoch": 0.3477051460361613, "grad_norm": 0.17584144291785261, "learning_rate": 8.244595538701183e-06, "loss": 0.0068, "step": 21250 }, { "epoch": 0.3478687719872372, "grad_norm": 0.4641516827922733, "learning_rate": 8.2424221244255e-06, "loss": 0.0051, "step": 21260 }, { "epoch": 0.34803239793831303, "grad_norm": 0.354582958243507, "learning_rate": 8.24024765236694e-06, "loss": 0.0046, "step": 21270 }, { "epoch": 0.34819602388938886, "grad_norm": 0.03288577018364058, "learning_rate": 8.238072123234886e-06, "loss": 0.0028, "step": 21280 }, { "epoch": 0.3483596498404647, "grad_norm": 0.4160287206232713, "learning_rate": 8.235895537739068e-06, "loss": 0.0037, "step": 21290 }, { "epoch": 0.3485232757915405, "grad_norm": 0.3761054668078874, "learning_rate": 8.233717896589555e-06, "loss": 0.0038, "step": 21300 }, { "epoch": 0.3486869017426164, "grad_norm": 0.160046013028521, "learning_rate": 8.231539200496767e-06, "loss": 0.0049, "step": 21310 }, { "epoch": 0.34885052769369224, "grad_norm": 0.1447209997521521, "learning_rate": 8.229359450171463e-06, "loss": 0.0052, "step": 21320 }, { "epoch": 0.34901415364476807, "grad_norm": 0.04679969984870674, "learning_rate": 8.227178646324748e-06, "loss": 0.0038, "step": 21330 }, { "epoch": 0.3491777795958439, "grad_norm": 0.19300305492658276, "learning_rate": 8.224996789668072e-06, "loss": 0.006, "step": 21340 }, { "epoch": 0.3493414055469197, "grad_norm": 0.08702013120550157, "learning_rate": 8.222813880913228e-06, "loss": 0.0038, "step": 21350 }, { "epoch": 0.34950503149799556, "grad_norm": 0.05577931449954347, "learning_rate": 8.220629920772347e-06, "loss": 0.0059, "step": 21360 }, { "epoch": 0.34966865744907144, "grad_norm": 0.061480121994317856, "learning_rate": 8.218444909957911e-06, "loss": 0.0049, "step": 21370 }, { "epoch": 0.34983228340014727, "grad_norm": 0.12283513595437923, "learning_rate": 8.216258849182744e-06, "loss": 0.0036, "step": 21380 }, { "epoch": 0.3499959093512231, "grad_norm": 0.17095257122231972, "learning_rate": 8.214071739160004e-06, "loss": 0.0105, "step": 21390 }, { "epoch": 0.35015953530229893, "grad_norm": 0.284607632213737, "learning_rate": 8.211883580603198e-06, "loss": 0.0047, "step": 21400 }, { "epoch": 0.35032316125337476, "grad_norm": 0.5103060200311446, "learning_rate": 8.209694374226175e-06, "loss": 0.0053, "step": 21410 }, { "epoch": 0.35048678720445064, "grad_norm": 0.35377893651170256, "learning_rate": 8.207504120743127e-06, "loss": 0.0044, "step": 21420 }, { "epoch": 0.3506504131555265, "grad_norm": 0.13316833575020487, "learning_rate": 8.205312820868581e-06, "loss": 0.0035, "step": 21430 }, { "epoch": 0.3508140391066023, "grad_norm": 0.2322430465996936, "learning_rate": 8.203120475317415e-06, "loss": 0.0024, "step": 21440 }, { "epoch": 0.35097766505767813, "grad_norm": 0.23034998353660002, "learning_rate": 8.200927084804839e-06, "loss": 0.0073, "step": 21450 }, { "epoch": 0.35114129100875396, "grad_norm": 0.47361087732602575, "learning_rate": 8.198732650046409e-06, "loss": 0.0081, "step": 21460 }, { "epoch": 0.35130491695982985, "grad_norm": 0.5479816952160357, "learning_rate": 8.19653717175802e-06, "loss": 0.0055, "step": 21470 }, { "epoch": 0.3514685429109057, "grad_norm": 0.4068777016603905, "learning_rate": 8.19434065065591e-06, "loss": 0.0043, "step": 21480 }, { "epoch": 0.3516321688619815, "grad_norm": 0.34150722644298087, "learning_rate": 8.192143087456652e-06, "loss": 0.0066, "step": 21490 }, { "epoch": 0.35179579481305734, "grad_norm": 0.23758385634434, "learning_rate": 8.189944482877166e-06, "loss": 0.0058, "step": 21500 }, { "epoch": 0.35195942076413317, "grad_norm": 0.2786234188022552, "learning_rate": 8.187744837634703e-06, "loss": 0.0046, "step": 21510 }, { "epoch": 0.35212304671520905, "grad_norm": 0.1742955314541393, "learning_rate": 8.185544152446866e-06, "loss": 0.0032, "step": 21520 }, { "epoch": 0.3522866726662849, "grad_norm": 0.30605901548399084, "learning_rate": 8.18334242803158e-06, "loss": 0.005, "step": 21530 }, { "epoch": 0.3524502986173607, "grad_norm": 0.14296784515913666, "learning_rate": 8.181139665107125e-06, "loss": 0.0044, "step": 21540 }, { "epoch": 0.35261392456843654, "grad_norm": 0.08027658208682562, "learning_rate": 8.178935864392113e-06, "loss": 0.0032, "step": 21550 }, { "epoch": 0.3527775505195124, "grad_norm": 0.1932504302687466, "learning_rate": 8.176731026605492e-06, "loss": 0.004, "step": 21560 }, { "epoch": 0.35294117647058826, "grad_norm": 0.33930967881985696, "learning_rate": 8.174525152466555e-06, "loss": 0.0036, "step": 21570 }, { "epoch": 0.3531048024216641, "grad_norm": 0.45273479046442755, "learning_rate": 8.172318242694925e-06, "loss": 0.0056, "step": 21580 }, { "epoch": 0.3532684283727399, "grad_norm": 0.16908740662601207, "learning_rate": 8.170110298010572e-06, "loss": 0.0042, "step": 21590 }, { "epoch": 0.35343205432381575, "grad_norm": 0.26052789833320583, "learning_rate": 8.167901319133798e-06, "loss": 0.0039, "step": 21600 }, { "epoch": 0.3535956802748916, "grad_norm": 0.12491718638791037, "learning_rate": 8.16569130678524e-06, "loss": 0.0056, "step": 21610 }, { "epoch": 0.35375930622596746, "grad_norm": 0.299587139198521, "learning_rate": 8.163480261685878e-06, "loss": 0.0061, "step": 21620 }, { "epoch": 0.3539229321770433, "grad_norm": 0.1868248117022889, "learning_rate": 8.161268184557027e-06, "loss": 0.0047, "step": 21630 }, { "epoch": 0.3540865581281191, "grad_norm": 0.4087806482608733, "learning_rate": 8.159055076120335e-06, "loss": 0.0058, "step": 21640 }, { "epoch": 0.35425018407919495, "grad_norm": 0.2688705596798527, "learning_rate": 8.156840937097793e-06, "loss": 0.004, "step": 21650 }, { "epoch": 0.3544138100302708, "grad_norm": 0.2886753511100073, "learning_rate": 8.154625768211724e-06, "loss": 0.0085, "step": 21660 }, { "epoch": 0.35457743598134667, "grad_norm": 0.19786059982396528, "learning_rate": 8.152409570184785e-06, "loss": 0.0061, "step": 21670 }, { "epoch": 0.3547410619324225, "grad_norm": 0.38351927578523554, "learning_rate": 8.150192343739973e-06, "loss": 0.004, "step": 21680 }, { "epoch": 0.3549046878834983, "grad_norm": 0.268873636356188, "learning_rate": 8.14797408960062e-06, "loss": 0.0096, "step": 21690 }, { "epoch": 0.35506831383457416, "grad_norm": 0.05659051967088951, "learning_rate": 8.145754808490392e-06, "loss": 0.0038, "step": 21700 }, { "epoch": 0.35523193978565, "grad_norm": 0.1594146607796532, "learning_rate": 8.143534501133288e-06, "loss": 0.0047, "step": 21710 }, { "epoch": 0.35539556573672587, "grad_norm": 0.21505692245827976, "learning_rate": 8.141313168253649e-06, "loss": 0.0041, "step": 21720 }, { "epoch": 0.3555591916878017, "grad_norm": 0.25306539222798285, "learning_rate": 8.139090810576138e-06, "loss": 0.0045, "step": 21730 }, { "epoch": 0.35572281763887753, "grad_norm": 0.13680675289337932, "learning_rate": 8.136867428825766e-06, "loss": 0.0048, "step": 21740 }, { "epoch": 0.35588644358995336, "grad_norm": 0.17531384502122008, "learning_rate": 8.134643023727869e-06, "loss": 0.0048, "step": 21750 }, { "epoch": 0.3560500695410292, "grad_norm": 0.19604067591260385, "learning_rate": 8.132417596008121e-06, "loss": 0.0042, "step": 21760 }, { "epoch": 0.356213695492105, "grad_norm": 0.10671383357160226, "learning_rate": 8.130191146392529e-06, "loss": 0.0062, "step": 21770 }, { "epoch": 0.3563773214431809, "grad_norm": 0.6925741570314006, "learning_rate": 8.127963675607434e-06, "loss": 0.0056, "step": 21780 }, { "epoch": 0.35654094739425674, "grad_norm": 0.20657289278593421, "learning_rate": 8.125735184379504e-06, "loss": 0.0029, "step": 21790 }, { "epoch": 0.35670457334533257, "grad_norm": 0.34744837496155256, "learning_rate": 8.12350567343575e-06, "loss": 0.0081, "step": 21800 }, { "epoch": 0.3568681992964084, "grad_norm": 0.29106948130856475, "learning_rate": 8.121275143503507e-06, "loss": 0.0038, "step": 21810 }, { "epoch": 0.3570318252474842, "grad_norm": 0.2545472237161258, "learning_rate": 8.119043595310448e-06, "loss": 0.0047, "step": 21820 }, { "epoch": 0.3571954511985601, "grad_norm": 0.20660940929341418, "learning_rate": 8.116811029584576e-06, "loss": 0.0043, "step": 21830 }, { "epoch": 0.35735907714963594, "grad_norm": 0.13566472365068155, "learning_rate": 8.114577447054222e-06, "loss": 0.0041, "step": 21840 }, { "epoch": 0.35752270310071177, "grad_norm": 0.08089286338907706, "learning_rate": 8.11234284844806e-06, "loss": 0.0052, "step": 21850 }, { "epoch": 0.3576863290517876, "grad_norm": 0.1498472041518974, "learning_rate": 8.110107234495084e-06, "loss": 0.0031, "step": 21860 }, { "epoch": 0.35784995500286343, "grad_norm": 0.1073039911172813, "learning_rate": 8.107870605924624e-06, "loss": 0.0043, "step": 21870 }, { "epoch": 0.3580135809539393, "grad_norm": 0.2902065332125187, "learning_rate": 8.10563296346634e-06, "loss": 0.0033, "step": 21880 }, { "epoch": 0.35817720690501514, "grad_norm": 0.1879263704293451, "learning_rate": 8.103394307850225e-06, "loss": 0.0049, "step": 21890 }, { "epoch": 0.358340832856091, "grad_norm": 0.541430949456604, "learning_rate": 8.101154639806598e-06, "loss": 0.0068, "step": 21900 }, { "epoch": 0.3585044588071668, "grad_norm": 0.24423798519045195, "learning_rate": 8.098913960066115e-06, "loss": 0.0052, "step": 21910 }, { "epoch": 0.35866808475824263, "grad_norm": 0.27221078320296016, "learning_rate": 8.096672269359755e-06, "loss": 0.0057, "step": 21920 }, { "epoch": 0.3588317107093185, "grad_norm": 0.3056004130605978, "learning_rate": 8.094429568418831e-06, "loss": 0.0064, "step": 21930 }, { "epoch": 0.35899533666039435, "grad_norm": 0.12140122293182473, "learning_rate": 8.092185857974986e-06, "loss": 0.0036, "step": 21940 }, { "epoch": 0.3591589626114702, "grad_norm": 0.2383787650848925, "learning_rate": 8.089941138760186e-06, "loss": 0.0052, "step": 21950 }, { "epoch": 0.359322588562546, "grad_norm": 0.060690717801894466, "learning_rate": 8.087695411506738e-06, "loss": 0.0029, "step": 21960 }, { "epoch": 0.35948621451362184, "grad_norm": 0.13027267796747477, "learning_rate": 8.085448676947265e-06, "loss": 0.0038, "step": 21970 }, { "epoch": 0.3596498404646977, "grad_norm": 0.06777375110268201, "learning_rate": 8.083200935814725e-06, "loss": 0.0056, "step": 21980 }, { "epoch": 0.35981346641577355, "grad_norm": 0.26404782395114973, "learning_rate": 8.080952188842407e-06, "loss": 0.0049, "step": 21990 }, { "epoch": 0.3599770923668494, "grad_norm": 0.07344352482194981, "learning_rate": 8.078702436763925e-06, "loss": 0.0043, "step": 22000 }, { "epoch": 0.3601407183179252, "grad_norm": 0.20181901262248428, "learning_rate": 8.076451680313216e-06, "loss": 0.0036, "step": 22010 }, { "epoch": 0.36030434426900104, "grad_norm": 0.2687343206702619, "learning_rate": 8.074199920224555e-06, "loss": 0.0051, "step": 22020 }, { "epoch": 0.3604679702200769, "grad_norm": 0.12082289643392462, "learning_rate": 8.071947157232536e-06, "loss": 0.005, "step": 22030 }, { "epoch": 0.36063159617115276, "grad_norm": 0.20599436045430314, "learning_rate": 8.069693392072082e-06, "loss": 0.0053, "step": 22040 }, { "epoch": 0.3607952221222286, "grad_norm": 0.03827285256559599, "learning_rate": 8.067438625478445e-06, "loss": 0.0033, "step": 22050 }, { "epoch": 0.3609588480733044, "grad_norm": 0.2147216862900357, "learning_rate": 8.065182858187207e-06, "loss": 0.0041, "step": 22060 }, { "epoch": 0.36112247402438025, "grad_norm": 0.2671410455933201, "learning_rate": 8.062926090934264e-06, "loss": 0.0059, "step": 22070 }, { "epoch": 0.36128609997545613, "grad_norm": 0.45165716419902047, "learning_rate": 8.060668324455853e-06, "loss": 0.0053, "step": 22080 }, { "epoch": 0.36144972592653196, "grad_norm": 0.06592630615694613, "learning_rate": 8.05840955948853e-06, "loss": 0.004, "step": 22090 }, { "epoch": 0.3616133518776078, "grad_norm": 0.21117603204229507, "learning_rate": 8.056149796769173e-06, "loss": 0.0042, "step": 22100 }, { "epoch": 0.3617769778286836, "grad_norm": 0.2091945921456715, "learning_rate": 8.053889037034992e-06, "loss": 0.0047, "step": 22110 }, { "epoch": 0.36194060377975945, "grad_norm": 0.564376330231919, "learning_rate": 8.051627281023519e-06, "loss": 0.0036, "step": 22120 }, { "epoch": 0.36210422973083534, "grad_norm": 0.267186532798065, "learning_rate": 8.049364529472616e-06, "loss": 0.0061, "step": 22130 }, { "epoch": 0.36226785568191117, "grad_norm": 0.41323442866810006, "learning_rate": 8.04710078312046e-06, "loss": 0.0047, "step": 22140 }, { "epoch": 0.362431481632987, "grad_norm": 0.17939918237862795, "learning_rate": 8.044836042705561e-06, "loss": 0.004, "step": 22150 }, { "epoch": 0.3625951075840628, "grad_norm": 0.10576247212815698, "learning_rate": 8.042570308966751e-06, "loss": 0.0055, "step": 22160 }, { "epoch": 0.36275873353513866, "grad_norm": 0.35053850065871706, "learning_rate": 8.040303582643185e-06, "loss": 0.0039, "step": 22170 }, { "epoch": 0.36292235948621454, "grad_norm": 0.20925125664040806, "learning_rate": 8.03803586447434e-06, "loss": 0.0058, "step": 22180 }, { "epoch": 0.36308598543729037, "grad_norm": 0.18325287190555176, "learning_rate": 8.035767155200022e-06, "loss": 0.0062, "step": 22190 }, { "epoch": 0.3632496113883662, "grad_norm": 0.19090124460483998, "learning_rate": 8.033497455560359e-06, "loss": 0.0042, "step": 22200 }, { "epoch": 0.36341323733944203, "grad_norm": 0.1718975501360852, "learning_rate": 8.031226766295794e-06, "loss": 0.0054, "step": 22210 }, { "epoch": 0.36357686329051786, "grad_norm": 0.1990188426857915, "learning_rate": 8.028955088147106e-06, "loss": 0.0054, "step": 22220 }, { "epoch": 0.3637404892415937, "grad_norm": 0.41056218075581696, "learning_rate": 8.026682421855386e-06, "loss": 0.007, "step": 22230 }, { "epoch": 0.3639041151926696, "grad_norm": 0.12829855457467737, "learning_rate": 8.02440876816205e-06, "loss": 0.0039, "step": 22240 }, { "epoch": 0.3640677411437454, "grad_norm": 0.24589932467234477, "learning_rate": 8.02213412780884e-06, "loss": 0.0059, "step": 22250 }, { "epoch": 0.36423136709482123, "grad_norm": 0.08400456262599232, "learning_rate": 8.019858501537818e-06, "loss": 0.0044, "step": 22260 }, { "epoch": 0.36439499304589706, "grad_norm": 0.5791083382979467, "learning_rate": 8.017581890091363e-06, "loss": 0.0051, "step": 22270 }, { "epoch": 0.3645586189969729, "grad_norm": 0.3022679329800277, "learning_rate": 8.015304294212183e-06, "loss": 0.0041, "step": 22280 }, { "epoch": 0.3647222449480488, "grad_norm": 0.2651324712526449, "learning_rate": 8.013025714643303e-06, "loss": 0.0038, "step": 22290 }, { "epoch": 0.3648858708991246, "grad_norm": 0.6173237085122881, "learning_rate": 8.010746152128065e-06, "loss": 0.0088, "step": 22300 }, { "epoch": 0.36504949685020044, "grad_norm": 0.1620804500124975, "learning_rate": 8.008465607410143e-06, "loss": 0.0039, "step": 22310 }, { "epoch": 0.36521312280127627, "grad_norm": 0.09480572820867364, "learning_rate": 8.006184081233517e-06, "loss": 0.0035, "step": 22320 }, { "epoch": 0.3653767487523521, "grad_norm": 0.24982967973314837, "learning_rate": 8.003901574342498e-06, "loss": 0.0038, "step": 22330 }, { "epoch": 0.365540374703428, "grad_norm": 0.1420652940557556, "learning_rate": 8.001618087481716e-06, "loss": 0.003, "step": 22340 }, { "epoch": 0.3657040006545038, "grad_norm": 0.018918582899131352, "learning_rate": 7.999333621396115e-06, "loss": 0.0065, "step": 22350 }, { "epoch": 0.36586762660557964, "grad_norm": 0.20904676014770399, "learning_rate": 7.997048176830961e-06, "loss": 0.0029, "step": 22360 }, { "epoch": 0.3660312525566555, "grad_norm": 0.2919347511958678, "learning_rate": 7.994761754531844e-06, "loss": 0.0046, "step": 22370 }, { "epoch": 0.3661948785077313, "grad_norm": 0.1890912854510077, "learning_rate": 7.992474355244663e-06, "loss": 0.0048, "step": 22380 }, { "epoch": 0.3663585044588072, "grad_norm": 0.27444379879412956, "learning_rate": 7.990185979715648e-06, "loss": 0.0044, "step": 22390 }, { "epoch": 0.366522130409883, "grad_norm": 0.09103498353880408, "learning_rate": 7.987896628691336e-06, "loss": 0.0027, "step": 22400 }, { "epoch": 0.36668575636095885, "grad_norm": 0.11851401053287791, "learning_rate": 7.985606302918589e-06, "loss": 0.0047, "step": 22410 }, { "epoch": 0.3668493823120347, "grad_norm": 0.038468302837450334, "learning_rate": 7.983315003144586e-06, "loss": 0.0042, "step": 22420 }, { "epoch": 0.3670130082631105, "grad_norm": 0.43509016582558974, "learning_rate": 7.98102273011682e-06, "loss": 0.0038, "step": 22430 }, { "epoch": 0.3671766342141864, "grad_norm": 0.07581748607239056, "learning_rate": 7.97872948458311e-06, "loss": 0.0052, "step": 22440 }, { "epoch": 0.3673402601652622, "grad_norm": 0.20175306482536795, "learning_rate": 7.976435267291583e-06, "loss": 0.0035, "step": 22450 }, { "epoch": 0.36750388611633805, "grad_norm": 0.19902981340182438, "learning_rate": 7.974140078990686e-06, "loss": 0.004, "step": 22460 }, { "epoch": 0.3676675120674139, "grad_norm": 0.3350546693634301, "learning_rate": 7.971843920429187e-06, "loss": 0.0036, "step": 22470 }, { "epoch": 0.3678311380184897, "grad_norm": 0.1442479477327456, "learning_rate": 7.969546792356164e-06, "loss": 0.0062, "step": 22480 }, { "epoch": 0.3679947639695656, "grad_norm": 0.2218002598970167, "learning_rate": 7.967248695521017e-06, "loss": 0.0057, "step": 22490 }, { "epoch": 0.3681583899206414, "grad_norm": 0.13058529815057038, "learning_rate": 7.964949630673456e-06, "loss": 0.0033, "step": 22500 }, { "epoch": 0.36832201587171726, "grad_norm": 0.17822038345815083, "learning_rate": 7.962649598563513e-06, "loss": 0.0072, "step": 22510 }, { "epoch": 0.3684856418227931, "grad_norm": 0.07651427925511488, "learning_rate": 7.960348599941533e-06, "loss": 0.0047, "step": 22520 }, { "epoch": 0.3686492677738689, "grad_norm": 0.18947187168978472, "learning_rate": 7.958046635558175e-06, "loss": 0.0037, "step": 22530 }, { "epoch": 0.3688128937249448, "grad_norm": 0.33646478914927486, "learning_rate": 7.955743706164414e-06, "loss": 0.0043, "step": 22540 }, { "epoch": 0.36897651967602063, "grad_norm": 0.14466146040736594, "learning_rate": 7.953439812511541e-06, "loss": 0.0058, "step": 22550 }, { "epoch": 0.36914014562709646, "grad_norm": 0.20571336708909738, "learning_rate": 7.951134955351159e-06, "loss": 0.0046, "step": 22560 }, { "epoch": 0.3693037715781723, "grad_norm": 0.11899569063552054, "learning_rate": 7.948829135435188e-06, "loss": 0.0043, "step": 22570 }, { "epoch": 0.3694673975292481, "grad_norm": 0.09638828990684081, "learning_rate": 7.946522353515863e-06, "loss": 0.0023, "step": 22580 }, { "epoch": 0.369631023480324, "grad_norm": 0.15540463561014836, "learning_rate": 7.944214610345727e-06, "loss": 0.004, "step": 22590 }, { "epoch": 0.36979464943139984, "grad_norm": 0.2736904618129395, "learning_rate": 7.941905906677641e-06, "loss": 0.0047, "step": 22600 }, { "epoch": 0.36995827538247567, "grad_norm": 0.2631068626683752, "learning_rate": 7.93959624326478e-06, "loss": 0.0047, "step": 22610 }, { "epoch": 0.3701219013335515, "grad_norm": 0.4128766923777127, "learning_rate": 7.93728562086063e-06, "loss": 0.005, "step": 22620 }, { "epoch": 0.3702855272846273, "grad_norm": 0.5910514253286557, "learning_rate": 7.934974040218995e-06, "loss": 0.0074, "step": 22630 }, { "epoch": 0.3704491532357032, "grad_norm": 0.13597735266234007, "learning_rate": 7.93266150209398e-06, "loss": 0.0044, "step": 22640 }, { "epoch": 0.37061277918677904, "grad_norm": 0.27802477851363055, "learning_rate": 7.930348007240015e-06, "loss": 0.0058, "step": 22650 }, { "epoch": 0.37077640513785487, "grad_norm": 0.2644343739747211, "learning_rate": 7.928033556411835e-06, "loss": 0.0069, "step": 22660 }, { "epoch": 0.3709400310889307, "grad_norm": 0.2965258882218535, "learning_rate": 7.925718150364488e-06, "loss": 0.0084, "step": 22670 }, { "epoch": 0.37110365704000653, "grad_norm": 0.34343811577731503, "learning_rate": 7.923401789853335e-06, "loss": 0.0062, "step": 22680 }, { "epoch": 0.37126728299108236, "grad_norm": 0.08198107450571357, "learning_rate": 7.921084475634047e-06, "loss": 0.0038, "step": 22690 }, { "epoch": 0.37143090894215824, "grad_norm": 0.1598291298521683, "learning_rate": 7.91876620846261e-06, "loss": 0.0055, "step": 22700 }, { "epoch": 0.3715945348932341, "grad_norm": 0.1320846220285246, "learning_rate": 7.916446989095313e-06, "loss": 0.003, "step": 22710 }, { "epoch": 0.3717581608443099, "grad_norm": 0.34019235100729966, "learning_rate": 7.914126818288763e-06, "loss": 0.0028, "step": 22720 }, { "epoch": 0.37192178679538573, "grad_norm": 0.18809256027740193, "learning_rate": 7.911805696799873e-06, "loss": 0.0039, "step": 22730 }, { "epoch": 0.37208541274646156, "grad_norm": 0.3126583119581344, "learning_rate": 7.90948362538587e-06, "loss": 0.0026, "step": 22740 }, { "epoch": 0.37224903869753745, "grad_norm": 0.19810785043559126, "learning_rate": 7.907160604804288e-06, "loss": 0.0043, "step": 22750 }, { "epoch": 0.3724126646486133, "grad_norm": 0.39099521854285685, "learning_rate": 7.904836635812972e-06, "loss": 0.0043, "step": 22760 }, { "epoch": 0.3725762905996891, "grad_norm": 0.12100920360865144, "learning_rate": 7.902511719170072e-06, "loss": 0.0032, "step": 22770 }, { "epoch": 0.37273991655076494, "grad_norm": 0.37345056479715394, "learning_rate": 7.900185855634055e-06, "loss": 0.0035, "step": 22780 }, { "epoch": 0.37290354250184077, "grad_norm": 0.30403356425097144, "learning_rate": 7.897859045963691e-06, "loss": 0.0045, "step": 22790 }, { "epoch": 0.37306716845291665, "grad_norm": 0.365619122958088, "learning_rate": 7.895531290918062e-06, "loss": 0.006, "step": 22800 }, { "epoch": 0.3732307944039925, "grad_norm": 0.10193440457323168, "learning_rate": 7.893202591256558e-06, "loss": 0.0043, "step": 22810 }, { "epoch": 0.3733944203550683, "grad_norm": 0.18744634394516266, "learning_rate": 7.89087294773887e-06, "loss": 0.0043, "step": 22820 }, { "epoch": 0.37355804630614414, "grad_norm": 0.2933513423447109, "learning_rate": 7.888542361125012e-06, "loss": 0.0042, "step": 22830 }, { "epoch": 0.37372167225722, "grad_norm": 0.3244846918389883, "learning_rate": 7.886210832175288e-06, "loss": 0.0032, "step": 22840 }, { "epoch": 0.37388529820829586, "grad_norm": 0.5707253912789513, "learning_rate": 7.883878361650323e-06, "loss": 0.0037, "step": 22850 }, { "epoch": 0.3740489241593717, "grad_norm": 0.19542601265558684, "learning_rate": 7.881544950311042e-06, "loss": 0.0058, "step": 22860 }, { "epoch": 0.3742125501104475, "grad_norm": 0.14725526847070805, "learning_rate": 7.87921059891868e-06, "loss": 0.0045, "step": 22870 }, { "epoch": 0.37437617606152335, "grad_norm": 0.22371416217253562, "learning_rate": 7.876875308234782e-06, "loss": 0.0051, "step": 22880 }, { "epoch": 0.3745398020125992, "grad_norm": 0.3101014089284409, "learning_rate": 7.874539079021188e-06, "loss": 0.0031, "step": 22890 }, { "epoch": 0.37470342796367506, "grad_norm": 0.11512983161318277, "learning_rate": 7.872201912040056e-06, "loss": 0.0034, "step": 22900 }, { "epoch": 0.3748670539147509, "grad_norm": 0.12509936324613832, "learning_rate": 7.869863808053844e-06, "loss": 0.0035, "step": 22910 }, { "epoch": 0.3750306798658267, "grad_norm": 0.13445543336436533, "learning_rate": 7.867524767825319e-06, "loss": 0.0037, "step": 22920 }, { "epoch": 0.37519430581690255, "grad_norm": 0.19703069588882463, "learning_rate": 7.865184792117548e-06, "loss": 0.0029, "step": 22930 }, { "epoch": 0.3753579317679784, "grad_norm": 0.23687038294884713, "learning_rate": 7.86284388169391e-06, "loss": 0.0049, "step": 22940 }, { "epoch": 0.37552155771905427, "grad_norm": 0.19932318637696375, "learning_rate": 7.860502037318082e-06, "loss": 0.0034, "step": 22950 }, { "epoch": 0.3756851836701301, "grad_norm": 0.2854873014982349, "learning_rate": 7.858159259754053e-06, "loss": 0.0034, "step": 22960 }, { "epoch": 0.3758488096212059, "grad_norm": 0.1490873789145156, "learning_rate": 7.85581554976611e-06, "loss": 0.0042, "step": 22970 }, { "epoch": 0.37601243557228176, "grad_norm": 0.2664544290072381, "learning_rate": 7.853470908118849e-06, "loss": 0.0032, "step": 22980 }, { "epoch": 0.3761760615233576, "grad_norm": 0.40427861844606977, "learning_rate": 7.85112533557717e-06, "loss": 0.0056, "step": 22990 }, { "epoch": 0.37633968747443347, "grad_norm": 0.2796467438660062, "learning_rate": 7.848778832906267e-06, "loss": 0.0059, "step": 23000 }, { "epoch": 0.3765033134255093, "grad_norm": 0.09917683090435539, "learning_rate": 7.846431400871652e-06, "loss": 0.0043, "step": 23010 }, { "epoch": 0.37666693937658513, "grad_norm": 0.07909310318799044, "learning_rate": 7.844083040239132e-06, "loss": 0.0045, "step": 23020 }, { "epoch": 0.37683056532766096, "grad_norm": 0.2939838305605887, "learning_rate": 7.841733751774816e-06, "loss": 0.004, "step": 23030 }, { "epoch": 0.3769941912787368, "grad_norm": 0.1260181168443803, "learning_rate": 7.839383536245117e-06, "loss": 0.0059, "step": 23040 }, { "epoch": 0.3771578172298127, "grad_norm": 0.4634244806399275, "learning_rate": 7.837032394416756e-06, "loss": 0.005, "step": 23050 }, { "epoch": 0.3773214431808885, "grad_norm": 0.1738223323037068, "learning_rate": 7.834680327056748e-06, "loss": 0.0045, "step": 23060 }, { "epoch": 0.37748506913196433, "grad_norm": 0.10492700250068246, "learning_rate": 7.832327334932413e-06, "loss": 0.003, "step": 23070 }, { "epoch": 0.37764869508304016, "grad_norm": 0.05876576792953704, "learning_rate": 7.829973418811372e-06, "loss": 0.0039, "step": 23080 }, { "epoch": 0.377812321034116, "grad_norm": 0.3431615570779223, "learning_rate": 7.827618579461553e-06, "loss": 0.0055, "step": 23090 }, { "epoch": 0.3779759469851918, "grad_norm": 0.48790285877659084, "learning_rate": 7.825262817651177e-06, "loss": 0.0035, "step": 23100 }, { "epoch": 0.3781395729362677, "grad_norm": 0.18904968153564555, "learning_rate": 7.822906134148771e-06, "loss": 0.0037, "step": 23110 }, { "epoch": 0.37830319888734354, "grad_norm": 0.23568565868984, "learning_rate": 7.820548529723162e-06, "loss": 0.0032, "step": 23120 }, { "epoch": 0.37846682483841937, "grad_norm": 0.1474894882955067, "learning_rate": 7.818190005143476e-06, "loss": 0.0053, "step": 23130 }, { "epoch": 0.3786304507894952, "grad_norm": 0.3924760327712476, "learning_rate": 7.815830561179137e-06, "loss": 0.0037, "step": 23140 }, { "epoch": 0.37879407674057103, "grad_norm": 0.3869784810142888, "learning_rate": 7.813470198599877e-06, "loss": 0.0055, "step": 23150 }, { "epoch": 0.3789577026916469, "grad_norm": 0.14721768230352789, "learning_rate": 7.811108918175721e-06, "loss": 0.0042, "step": 23160 }, { "epoch": 0.37912132864272274, "grad_norm": 0.35248357123434165, "learning_rate": 7.808746720676991e-06, "loss": 0.0045, "step": 23170 }, { "epoch": 0.3792849545937986, "grad_norm": 0.36251417848818146, "learning_rate": 7.806383606874316e-06, "loss": 0.0041, "step": 23180 }, { "epoch": 0.3794485805448744, "grad_norm": 0.10912764567282592, "learning_rate": 7.804019577538623e-06, "loss": 0.0031, "step": 23190 }, { "epoch": 0.37961220649595023, "grad_norm": 0.20429003794117925, "learning_rate": 7.801654633441127e-06, "loss": 0.0036, "step": 23200 }, { "epoch": 0.3797758324470261, "grad_norm": 0.27048246653185415, "learning_rate": 7.799288775353356e-06, "loss": 0.0061, "step": 23210 }, { "epoch": 0.37993945839810195, "grad_norm": 0.3452122167508699, "learning_rate": 7.796922004047127e-06, "loss": 0.005, "step": 23220 }, { "epoch": 0.3801030843491778, "grad_norm": 0.15326631573756364, "learning_rate": 7.794554320294554e-06, "loss": 0.0036, "step": 23230 }, { "epoch": 0.3802667103002536, "grad_norm": 0.09747025014605211, "learning_rate": 7.792185724868057e-06, "loss": 0.0043, "step": 23240 }, { "epoch": 0.38043033625132944, "grad_norm": 0.08128891271270045, "learning_rate": 7.789816218540347e-06, "loss": 0.0031, "step": 23250 }, { "epoch": 0.3805939622024053, "grad_norm": 0.15003152295647687, "learning_rate": 7.787445802084431e-06, "loss": 0.0028, "step": 23260 }, { "epoch": 0.38075758815348115, "grad_norm": 0.44895299846778386, "learning_rate": 7.785074476273619e-06, "loss": 0.0044, "step": 23270 }, { "epoch": 0.380921214104557, "grad_norm": 0.32355572764142454, "learning_rate": 7.782702241881511e-06, "loss": 0.0033, "step": 23280 }, { "epoch": 0.3810848400556328, "grad_norm": 0.3883518724584549, "learning_rate": 7.780329099682009e-06, "loss": 0.0053, "step": 23290 }, { "epoch": 0.38124846600670864, "grad_norm": 0.3550676649078261, "learning_rate": 7.777955050449309e-06, "loss": 0.0039, "step": 23300 }, { "epoch": 0.3814120919577845, "grad_norm": 0.2290813706395646, "learning_rate": 7.775580094957898e-06, "loss": 0.0054, "step": 23310 }, { "epoch": 0.38157571790886036, "grad_norm": 0.23169392574836783, "learning_rate": 7.773204233982568e-06, "loss": 0.0036, "step": 23320 }, { "epoch": 0.3817393438599362, "grad_norm": 0.15030269521599784, "learning_rate": 7.7708274682984e-06, "loss": 0.0033, "step": 23330 }, { "epoch": 0.381902969811012, "grad_norm": 0.08322809389609011, "learning_rate": 7.76844979868077e-06, "loss": 0.0046, "step": 23340 }, { "epoch": 0.38206659576208785, "grad_norm": 0.11267620146852048, "learning_rate": 7.766071225905355e-06, "loss": 0.0043, "step": 23350 }, { "epoch": 0.38223022171316373, "grad_norm": 0.24255192026343259, "learning_rate": 7.763691750748119e-06, "loss": 0.0048, "step": 23360 }, { "epoch": 0.38239384766423956, "grad_norm": 0.09673091441195862, "learning_rate": 7.761311373985326e-06, "loss": 0.0036, "step": 23370 }, { "epoch": 0.3825574736153154, "grad_norm": 0.3279771898044794, "learning_rate": 7.758930096393527e-06, "loss": 0.0047, "step": 23380 }, { "epoch": 0.3827210995663912, "grad_norm": 0.1512847121251218, "learning_rate": 7.756547918749578e-06, "loss": 0.0041, "step": 23390 }, { "epoch": 0.38288472551746705, "grad_norm": 0.1024776299819252, "learning_rate": 7.754164841830618e-06, "loss": 0.0041, "step": 23400 }, { "epoch": 0.38304835146854294, "grad_norm": 0.3501307089138783, "learning_rate": 7.751780866414085e-06, "loss": 0.0038, "step": 23410 }, { "epoch": 0.38321197741961877, "grad_norm": 0.13892322813927876, "learning_rate": 7.749395993277709e-06, "loss": 0.0044, "step": 23420 }, { "epoch": 0.3833756033706946, "grad_norm": 0.3676640348418677, "learning_rate": 7.747010223199512e-06, "loss": 0.007, "step": 23430 }, { "epoch": 0.3835392293217704, "grad_norm": 0.13520775097314114, "learning_rate": 7.74462355695781e-06, "loss": 0.0032, "step": 23440 }, { "epoch": 0.38370285527284625, "grad_norm": 0.12508156707270715, "learning_rate": 7.74223599533121e-06, "loss": 0.006, "step": 23450 }, { "epoch": 0.38386648122392214, "grad_norm": 0.30234936687706865, "learning_rate": 7.739847539098614e-06, "loss": 0.0034, "step": 23460 }, { "epoch": 0.38403010717499797, "grad_norm": 0.2774033559512334, "learning_rate": 7.73745818903921e-06, "loss": 0.0043, "step": 23470 }, { "epoch": 0.3841937331260738, "grad_norm": 0.22625481227573882, "learning_rate": 7.735067945932485e-06, "loss": 0.0048, "step": 23480 }, { "epoch": 0.38435735907714963, "grad_norm": 0.14256361069685392, "learning_rate": 7.73267681055821e-06, "loss": 0.003, "step": 23490 }, { "epoch": 0.38452098502822546, "grad_norm": 0.25642228173895215, "learning_rate": 7.730284783696454e-06, "loss": 0.0038, "step": 23500 }, { "epoch": 0.38468461097930134, "grad_norm": 0.25134789739128394, "learning_rate": 7.72789186612757e-06, "loss": 0.0033, "step": 23510 }, { "epoch": 0.3848482369303772, "grad_norm": 0.18375583525134048, "learning_rate": 7.725498058632205e-06, "loss": 0.0086, "step": 23520 }, { "epoch": 0.385011862881453, "grad_norm": 0.32767712827444134, "learning_rate": 7.723103361991303e-06, "loss": 0.0039, "step": 23530 }, { "epoch": 0.38517548883252883, "grad_norm": 0.03978580268010153, "learning_rate": 7.720707776986083e-06, "loss": 0.0032, "step": 23540 }, { "epoch": 0.38533911478360466, "grad_norm": 0.1426115032113511, "learning_rate": 7.718311304398065e-06, "loss": 0.0056, "step": 23550 }, { "epoch": 0.3855027407346805, "grad_norm": 0.18433456232944362, "learning_rate": 7.71591394500906e-06, "loss": 0.0035, "step": 23560 }, { "epoch": 0.3856663666857564, "grad_norm": 0.23622257986749895, "learning_rate": 7.713515699601158e-06, "loss": 0.0052, "step": 23570 }, { "epoch": 0.3858299926368322, "grad_norm": 0.6796815732065643, "learning_rate": 7.711116568956747e-06, "loss": 0.0045, "step": 23580 }, { "epoch": 0.38599361858790804, "grad_norm": 0.37824039067660775, "learning_rate": 7.708716553858502e-06, "loss": 0.0036, "step": 23590 }, { "epoch": 0.38615724453898387, "grad_norm": 0.19477081148150735, "learning_rate": 7.706315655089383e-06, "loss": 0.0073, "step": 23600 }, { "epoch": 0.3863208704900597, "grad_norm": 0.17877146675746802, "learning_rate": 7.703913873432643e-06, "loss": 0.0047, "step": 23610 }, { "epoch": 0.3864844964411356, "grad_norm": 0.13598237158444526, "learning_rate": 7.70151120967182e-06, "loss": 0.0032, "step": 23620 }, { "epoch": 0.3866481223922114, "grad_norm": 0.12345505375641387, "learning_rate": 7.699107664590737e-06, "loss": 0.0028, "step": 23630 }, { "epoch": 0.38681174834328724, "grad_norm": 0.41988597366761515, "learning_rate": 7.696703238973516e-06, "loss": 0.0052, "step": 23640 }, { "epoch": 0.3869753742943631, "grad_norm": 0.1300826422769908, "learning_rate": 7.694297933604551e-06, "loss": 0.0035, "step": 23650 }, { "epoch": 0.3871390002454389, "grad_norm": 0.1656186336176813, "learning_rate": 7.691891749268534e-06, "loss": 0.0048, "step": 23660 }, { "epoch": 0.3873026261965148, "grad_norm": 0.0994984491806116, "learning_rate": 7.689484686750439e-06, "loss": 0.004, "step": 23670 }, { "epoch": 0.3874662521475906, "grad_norm": 0.056285104982103255, "learning_rate": 7.687076746835528e-06, "loss": 0.0067, "step": 23680 }, { "epoch": 0.38762987809866645, "grad_norm": 0.14547735025543546, "learning_rate": 7.684667930309348e-06, "loss": 0.0051, "step": 23690 }, { "epoch": 0.3877935040497423, "grad_norm": 0.05723893840771515, "learning_rate": 7.682258237957734e-06, "loss": 0.0035, "step": 23700 }, { "epoch": 0.3879571300008181, "grad_norm": 0.07706274094493967, "learning_rate": 7.679847670566805e-06, "loss": 0.0039, "step": 23710 }, { "epoch": 0.388120755951894, "grad_norm": 0.16908053425663697, "learning_rate": 7.677436228922965e-06, "loss": 0.0047, "step": 23720 }, { "epoch": 0.3882843819029698, "grad_norm": 0.20682704785446202, "learning_rate": 7.675023913812906e-06, "loss": 0.0037, "step": 23730 }, { "epoch": 0.38844800785404565, "grad_norm": 0.2105514257682979, "learning_rate": 7.672610726023602e-06, "loss": 0.0025, "step": 23740 }, { "epoch": 0.3886116338051215, "grad_norm": 0.08150280789864257, "learning_rate": 7.670196666342313e-06, "loss": 0.0033, "step": 23750 }, { "epoch": 0.3887752597561973, "grad_norm": 0.41261195221404373, "learning_rate": 7.667781735556584e-06, "loss": 0.0032, "step": 23760 }, { "epoch": 0.3889388857072732, "grad_norm": 0.1402550595408482, "learning_rate": 7.66536593445424e-06, "loss": 0.0058, "step": 23770 }, { "epoch": 0.389102511658349, "grad_norm": 0.4559601834077003, "learning_rate": 7.6629492638234e-06, "loss": 0.004, "step": 23780 }, { "epoch": 0.38926613760942486, "grad_norm": 0.2718020626946962, "learning_rate": 7.660531724452455e-06, "loss": 0.0046, "step": 23790 }, { "epoch": 0.3894297635605007, "grad_norm": 0.31167053727800204, "learning_rate": 7.658113317130085e-06, "loss": 0.006, "step": 23800 }, { "epoch": 0.3895933895115765, "grad_norm": 0.14162698070805388, "learning_rate": 7.655694042645253e-06, "loss": 0.0047, "step": 23810 }, { "epoch": 0.3897570154626524, "grad_norm": 0.20374436730966325, "learning_rate": 7.653273901787206e-06, "loss": 0.0073, "step": 23820 }, { "epoch": 0.38992064141372823, "grad_norm": 0.08987071896446418, "learning_rate": 7.650852895345469e-06, "loss": 0.0046, "step": 23830 }, { "epoch": 0.39008426736480406, "grad_norm": 0.17976110940763293, "learning_rate": 7.648431024109857e-06, "loss": 0.0032, "step": 23840 }, { "epoch": 0.3902478933158799, "grad_norm": 0.10488565209290589, "learning_rate": 7.646008288870459e-06, "loss": 0.0056, "step": 23850 }, { "epoch": 0.3904115192669557, "grad_norm": 0.09359887947841258, "learning_rate": 7.643584690417648e-06, "loss": 0.0041, "step": 23860 }, { "epoch": 0.3905751452180316, "grad_norm": 0.18393071032048405, "learning_rate": 7.641160229542088e-06, "loss": 0.0041, "step": 23870 }, { "epoch": 0.39073877116910744, "grad_norm": 0.49723905136469476, "learning_rate": 7.638734907034708e-06, "loss": 0.0066, "step": 23880 }, { "epoch": 0.39090239712018326, "grad_norm": 0.14714969173836634, "learning_rate": 7.636308723686732e-06, "loss": 0.0032, "step": 23890 }, { "epoch": 0.3910660230712591, "grad_norm": 0.1950743796665209, "learning_rate": 7.633881680289656e-06, "loss": 0.0037, "step": 23900 }, { "epoch": 0.3912296490223349, "grad_norm": 0.03623669447808912, "learning_rate": 7.631453777635261e-06, "loss": 0.0029, "step": 23910 }, { "epoch": 0.3913932749734108, "grad_norm": 0.21860182386702248, "learning_rate": 7.629025016515609e-06, "loss": 0.0024, "step": 23920 }, { "epoch": 0.39155690092448664, "grad_norm": 0.08400513348713573, "learning_rate": 7.626595397723037e-06, "loss": 0.0026, "step": 23930 }, { "epoch": 0.39172052687556247, "grad_norm": 0.17091518477496065, "learning_rate": 7.624164922050168e-06, "loss": 0.0048, "step": 23940 }, { "epoch": 0.3918841528266383, "grad_norm": 0.17808107860712336, "learning_rate": 7.6217335902899016e-06, "loss": 0.0052, "step": 23950 }, { "epoch": 0.39204777877771413, "grad_norm": 0.34108256477238896, "learning_rate": 7.619301403235415e-06, "loss": 0.0035, "step": 23960 }, { "epoch": 0.39221140472878996, "grad_norm": 0.7140888700959167, "learning_rate": 7.616868361680167e-06, "loss": 0.0054, "step": 23970 }, { "epoch": 0.39237503067986584, "grad_norm": 0.11399917840730914, "learning_rate": 7.614434466417897e-06, "loss": 0.0034, "step": 23980 }, { "epoch": 0.3925386566309417, "grad_norm": 0.6017156041415024, "learning_rate": 7.611999718242615e-06, "loss": 0.0056, "step": 23990 }, { "epoch": 0.3927022825820175, "grad_norm": 0.3419080837919293, "learning_rate": 7.6095641179486194e-06, "loss": 0.0038, "step": 24000 }, { "epoch": 0.39286590853309333, "grad_norm": 0.2749004027013854, "learning_rate": 7.607127666330478e-06, "loss": 0.0046, "step": 24010 }, { "epoch": 0.39302953448416916, "grad_norm": 0.28278145551575046, "learning_rate": 7.6046903641830425e-06, "loss": 0.0033, "step": 24020 }, { "epoch": 0.39319316043524505, "grad_norm": 0.14586971876666538, "learning_rate": 7.6022522123014395e-06, "loss": 0.0048, "step": 24030 }, { "epoch": 0.3933567863863209, "grad_norm": 0.1305475610924498, "learning_rate": 7.599813211481073e-06, "loss": 0.004, "step": 24040 }, { "epoch": 0.3935204123373967, "grad_norm": 0.26745455795232354, "learning_rate": 7.597373362517622e-06, "loss": 0.0047, "step": 24050 }, { "epoch": 0.39368403828847254, "grad_norm": 0.20922475271325178, "learning_rate": 7.594932666207047e-06, "loss": 0.0024, "step": 24060 }, { "epoch": 0.39384766423954837, "grad_norm": 0.18923570980855386, "learning_rate": 7.592491123345577e-06, "loss": 0.0031, "step": 24070 }, { "epoch": 0.39401129019062425, "grad_norm": 0.20786433011041838, "learning_rate": 7.590048734729728e-06, "loss": 0.0064, "step": 24080 }, { "epoch": 0.3941749161417001, "grad_norm": 0.11546911766867284, "learning_rate": 7.587605501156285e-06, "loss": 0.0054, "step": 24090 }, { "epoch": 0.3943385420927759, "grad_norm": 0.2562306657399679, "learning_rate": 7.5851614234223045e-06, "loss": 0.0023, "step": 24100 }, { "epoch": 0.39450216804385174, "grad_norm": 0.43551874151285025, "learning_rate": 7.5827165023251296e-06, "loss": 0.004, "step": 24110 }, { "epoch": 0.39466579399492757, "grad_norm": 0.17809073202210374, "learning_rate": 7.580270738662372e-06, "loss": 0.009, "step": 24120 }, { "epoch": 0.39482941994600346, "grad_norm": 0.2753493933784868, "learning_rate": 7.577824133231915e-06, "loss": 0.0032, "step": 24130 }, { "epoch": 0.3949930458970793, "grad_norm": 0.3611824089100605, "learning_rate": 7.575376686831923e-06, "loss": 0.003, "step": 24140 }, { "epoch": 0.3951566718481551, "grad_norm": 0.29567921942116165, "learning_rate": 7.572928400260832e-06, "loss": 0.0037, "step": 24150 }, { "epoch": 0.39532029779923095, "grad_norm": 0.2900818948408225, "learning_rate": 7.5704792743173525e-06, "loss": 0.0075, "step": 24160 }, { "epoch": 0.3954839237503068, "grad_norm": 0.46853705096539516, "learning_rate": 7.568029309800468e-06, "loss": 0.0033, "step": 24170 }, { "epoch": 0.39564754970138266, "grad_norm": 0.32214776195764494, "learning_rate": 7.565578507509437e-06, "loss": 0.003, "step": 24180 }, { "epoch": 0.3958111756524585, "grad_norm": 0.27679982521829827, "learning_rate": 7.563126868243787e-06, "loss": 0.0038, "step": 24190 }, { "epoch": 0.3959748016035343, "grad_norm": 0.24731296058713828, "learning_rate": 7.560674392803326e-06, "loss": 0.0035, "step": 24200 }, { "epoch": 0.39613842755461015, "grad_norm": 0.15660282467961975, "learning_rate": 7.55822108198813e-06, "loss": 0.0056, "step": 24210 }, { "epoch": 0.396302053505686, "grad_norm": 0.2343976464655349, "learning_rate": 7.555766936598546e-06, "loss": 0.0048, "step": 24220 }, { "epoch": 0.39646567945676187, "grad_norm": 0.4364698283022281, "learning_rate": 7.5533119574351965e-06, "loss": 0.0045, "step": 24230 }, { "epoch": 0.3966293054078377, "grad_norm": 0.13215060219049593, "learning_rate": 7.550856145298977e-06, "loss": 0.0041, "step": 24240 }, { "epoch": 0.3967929313589135, "grad_norm": 0.24503982076897654, "learning_rate": 7.5483995009910505e-06, "loss": 0.0033, "step": 24250 }, { "epoch": 0.39695655730998936, "grad_norm": 0.4873248907037443, "learning_rate": 7.545942025312855e-06, "loss": 0.0055, "step": 24260 }, { "epoch": 0.3971201832610652, "grad_norm": 0.17597594873880792, "learning_rate": 7.543483719066098e-06, "loss": 0.0041, "step": 24270 }, { "epoch": 0.39728380921214107, "grad_norm": 0.1718056894711159, "learning_rate": 7.5410245830527585e-06, "loss": 0.0034, "step": 24280 }, { "epoch": 0.3974474351632169, "grad_norm": 0.09169126859098309, "learning_rate": 7.538564618075086e-06, "loss": 0.0025, "step": 24290 }, { "epoch": 0.39761106111429273, "grad_norm": 0.2819883474338189, "learning_rate": 7.536103824935601e-06, "loss": 0.0038, "step": 24300 }, { "epoch": 0.39777468706536856, "grad_norm": 0.28658077709015933, "learning_rate": 7.533642204437093e-06, "loss": 0.0074, "step": 24310 }, { "epoch": 0.3979383130164444, "grad_norm": 0.4155216468552145, "learning_rate": 7.531179757382623e-06, "loss": 0.0044, "step": 24320 }, { "epoch": 0.3981019389675203, "grad_norm": 0.18270416196540462, "learning_rate": 7.5287164845755204e-06, "loss": 0.0025, "step": 24330 }, { "epoch": 0.3982655649185961, "grad_norm": 0.11653914065654808, "learning_rate": 7.5262523868193825e-06, "loss": 0.0044, "step": 24340 }, { "epoch": 0.39842919086967193, "grad_norm": 0.11482324604004897, "learning_rate": 7.523787464918082e-06, "loss": 0.0029, "step": 24350 }, { "epoch": 0.39859281682074776, "grad_norm": 0.26748661071605706, "learning_rate": 7.521321719675753e-06, "loss": 0.0051, "step": 24360 }, { "epoch": 0.3987564427718236, "grad_norm": 0.3298675199707605, "learning_rate": 7.5188551518968e-06, "loss": 0.0041, "step": 24370 }, { "epoch": 0.3989200687228995, "grad_norm": 0.14649169546178883, "learning_rate": 7.5163877623859015e-06, "loss": 0.0041, "step": 24380 }, { "epoch": 0.3990836946739753, "grad_norm": 0.23165195179763795, "learning_rate": 7.513919551947997e-06, "loss": 0.0039, "step": 24390 }, { "epoch": 0.39924732062505114, "grad_norm": 0.32030853317824903, "learning_rate": 7.511450521388295e-06, "loss": 0.0032, "step": 24400 }, { "epoch": 0.39941094657612697, "grad_norm": 0.13467004850465902, "learning_rate": 7.508980671512276e-06, "loss": 0.0022, "step": 24410 }, { "epoch": 0.3995745725272028, "grad_norm": 0.08361064432088058, "learning_rate": 7.506510003125682e-06, "loss": 0.0045, "step": 24420 }, { "epoch": 0.39973819847827863, "grad_norm": 0.2592533028169886, "learning_rate": 7.504038517034529e-06, "loss": 0.0045, "step": 24430 }, { "epoch": 0.3999018244293545, "grad_norm": 0.10463397804815186, "learning_rate": 7.501566214045091e-06, "loss": 0.0021, "step": 24440 }, { "epoch": 0.40006545038043034, "grad_norm": 0.09251492135273492, "learning_rate": 7.499093094963916e-06, "loss": 0.0041, "step": 24450 }, { "epoch": 0.4002290763315062, "grad_norm": 0.217362914402295, "learning_rate": 7.496619160597814e-06, "loss": 0.0036, "step": 24460 }, { "epoch": 0.400392702282582, "grad_norm": 0.17417262821399385, "learning_rate": 7.494144411753863e-06, "loss": 0.0029, "step": 24470 }, { "epoch": 0.40055632823365783, "grad_norm": 0.14374684981023111, "learning_rate": 7.491668849239408e-06, "loss": 0.0038, "step": 24480 }, { "epoch": 0.4007199541847337, "grad_norm": 0.1038876909313206, "learning_rate": 7.489192473862053e-06, "loss": 0.005, "step": 24490 }, { "epoch": 0.40088358013580955, "grad_norm": 0.20241202462472102, "learning_rate": 7.486715286429674e-06, "loss": 0.0036, "step": 24500 }, { "epoch": 0.4010472060868854, "grad_norm": 0.08564381258566638, "learning_rate": 7.484237287750411e-06, "loss": 0.0039, "step": 24510 }, { "epoch": 0.4012108320379612, "grad_norm": 0.1384282979778313, "learning_rate": 7.4817584786326655e-06, "loss": 0.0033, "step": 24520 }, { "epoch": 0.40137445798903704, "grad_norm": 0.14918851870267386, "learning_rate": 7.4792788598851056e-06, "loss": 0.0036, "step": 24530 }, { "epoch": 0.4015380839401129, "grad_norm": 0.3912841325922538, "learning_rate": 7.476798432316663e-06, "loss": 0.0071, "step": 24540 }, { "epoch": 0.40170170989118875, "grad_norm": 0.10517057131657231, "learning_rate": 7.474317196736534e-06, "loss": 0.0046, "step": 24550 }, { "epoch": 0.4018653358422646, "grad_norm": 0.438422896905194, "learning_rate": 7.471835153954176e-06, "loss": 0.0056, "step": 24560 }, { "epoch": 0.4020289617933404, "grad_norm": 0.9635172988054806, "learning_rate": 7.469352304779314e-06, "loss": 0.004, "step": 24570 }, { "epoch": 0.40219258774441624, "grad_norm": 0.250769087968856, "learning_rate": 7.466868650021932e-06, "loss": 0.0042, "step": 24580 }, { "epoch": 0.4023562136954921, "grad_norm": 0.42790683545976993, "learning_rate": 7.4643841904922774e-06, "loss": 0.0035, "step": 24590 }, { "epoch": 0.40251983964656796, "grad_norm": 0.5465562104007252, "learning_rate": 7.4618989270008645e-06, "loss": 0.005, "step": 24600 }, { "epoch": 0.4026834655976438, "grad_norm": 0.07521216493912247, "learning_rate": 7.459412860358465e-06, "loss": 0.0042, "step": 24610 }, { "epoch": 0.4028470915487196, "grad_norm": 0.3394244108728312, "learning_rate": 7.456925991376112e-06, "loss": 0.0016, "step": 24620 }, { "epoch": 0.40301071749979545, "grad_norm": 0.16124652601557224, "learning_rate": 7.454438320865105e-06, "loss": 0.0029, "step": 24630 }, { "epoch": 0.40317434345087133, "grad_norm": 0.3146371089502906, "learning_rate": 7.451949849637003e-06, "loss": 0.0062, "step": 24640 }, { "epoch": 0.40333796940194716, "grad_norm": 0.15917060775104497, "learning_rate": 7.449460578503623e-06, "loss": 0.0045, "step": 24650 }, { "epoch": 0.403501595353023, "grad_norm": 0.23178933729667187, "learning_rate": 7.446970508277049e-06, "loss": 0.0041, "step": 24660 }, { "epoch": 0.4036652213040988, "grad_norm": 0.12327364502342168, "learning_rate": 7.444479639769619e-06, "loss": 0.0059, "step": 24670 }, { "epoch": 0.40382884725517465, "grad_norm": 0.21301808310173742, "learning_rate": 7.441987973793938e-06, "loss": 0.0037, "step": 24680 }, { "epoch": 0.40399247320625054, "grad_norm": 0.17798293986555608, "learning_rate": 7.4394955111628675e-06, "loss": 0.0043, "step": 24690 }, { "epoch": 0.40415609915732637, "grad_norm": 0.1242754693210632, "learning_rate": 7.437002252689526e-06, "loss": 0.0042, "step": 24700 }, { "epoch": 0.4043197251084022, "grad_norm": 0.2822948963275418, "learning_rate": 7.434508199187297e-06, "loss": 0.0039, "step": 24710 }, { "epoch": 0.404483351059478, "grad_norm": 0.12324009229018375, "learning_rate": 7.432013351469825e-06, "loss": 0.0032, "step": 24720 }, { "epoch": 0.40464697701055385, "grad_norm": 0.27731210759245295, "learning_rate": 7.429517710351003e-06, "loss": 0.0036, "step": 24730 }, { "epoch": 0.40481060296162974, "grad_norm": 0.11191213090405355, "learning_rate": 7.427021276644994e-06, "loss": 0.0043, "step": 24740 }, { "epoch": 0.40497422891270557, "grad_norm": 0.2670990291241771, "learning_rate": 7.424524051166217e-06, "loss": 0.0046, "step": 24750 }, { "epoch": 0.4051378548637814, "grad_norm": 0.2289976824532421, "learning_rate": 7.4220260347293416e-06, "loss": 0.0043, "step": 24760 }, { "epoch": 0.40530148081485723, "grad_norm": 0.24994441863450337, "learning_rate": 7.419527228149308e-06, "loss": 0.0033, "step": 24770 }, { "epoch": 0.40546510676593306, "grad_norm": 0.1885125850723168, "learning_rate": 7.417027632241305e-06, "loss": 0.0042, "step": 24780 }, { "epoch": 0.40562873271700894, "grad_norm": 0.1365113928687538, "learning_rate": 7.414527247820779e-06, "loss": 0.0048, "step": 24790 }, { "epoch": 0.4057923586680848, "grad_norm": 0.31691579356917443, "learning_rate": 7.412026075703442e-06, "loss": 0.0036, "step": 24800 }, { "epoch": 0.4059559846191606, "grad_norm": 0.2988252530650168, "learning_rate": 7.409524116705252e-06, "loss": 0.0027, "step": 24810 }, { "epoch": 0.40611961057023643, "grad_norm": 0.21014786417115097, "learning_rate": 7.407021371642431e-06, "loss": 0.0033, "step": 24820 }, { "epoch": 0.40628323652131226, "grad_norm": 0.3467988559851425, "learning_rate": 7.404517841331456e-06, "loss": 0.0064, "step": 24830 }, { "epoch": 0.40644686247238815, "grad_norm": 0.21297141036119263, "learning_rate": 7.402013526589057e-06, "loss": 0.005, "step": 24840 }, { "epoch": 0.406610488423464, "grad_norm": 0.27899888130643274, "learning_rate": 7.399508428232224e-06, "loss": 0.0026, "step": 24850 }, { "epoch": 0.4067741143745398, "grad_norm": 0.10119414622072359, "learning_rate": 7.397002547078201e-06, "loss": 0.0031, "step": 24860 }, { "epoch": 0.40693774032561564, "grad_norm": 0.4926613697071245, "learning_rate": 7.394495883944488e-06, "loss": 0.0042, "step": 24870 }, { "epoch": 0.40710136627669147, "grad_norm": 0.09518737309021089, "learning_rate": 7.391988439648838e-06, "loss": 0.0037, "step": 24880 }, { "epoch": 0.4072649922277673, "grad_norm": 0.05776711379574887, "learning_rate": 7.389480215009262e-06, "loss": 0.0047, "step": 24890 }, { "epoch": 0.4074286181788432, "grad_norm": 0.19075215755369676, "learning_rate": 7.386971210844025e-06, "loss": 0.0028, "step": 24900 }, { "epoch": 0.407592244129919, "grad_norm": 0.18034706335636252, "learning_rate": 7.384461427971641e-06, "loss": 0.0026, "step": 24910 }, { "epoch": 0.40775587008099484, "grad_norm": 0.22716767124497111, "learning_rate": 7.381950867210885e-06, "loss": 0.0035, "step": 24920 }, { "epoch": 0.40791949603207067, "grad_norm": 0.1408005002183599, "learning_rate": 7.379439529380783e-06, "loss": 0.0048, "step": 24930 }, { "epoch": 0.4080831219831465, "grad_norm": 0.30906575390049795, "learning_rate": 7.376927415300614e-06, "loss": 0.0041, "step": 24940 }, { "epoch": 0.4082467479342224, "grad_norm": 0.21051147143743668, "learning_rate": 7.374414525789912e-06, "loss": 0.0049, "step": 24950 }, { "epoch": 0.4084103738852982, "grad_norm": 0.1676996881952094, "learning_rate": 7.371900861668461e-06, "loss": 0.0046, "step": 24960 }, { "epoch": 0.40857399983637405, "grad_norm": 0.14949044401428832, "learning_rate": 7.369386423756301e-06, "loss": 0.0059, "step": 24970 }, { "epoch": 0.4087376257874499, "grad_norm": 0.23799930117179977, "learning_rate": 7.366871212873721e-06, "loss": 0.0055, "step": 24980 }, { "epoch": 0.4089012517385257, "grad_norm": 0.15534322283996277, "learning_rate": 7.364355229841267e-06, "loss": 0.0035, "step": 24990 }, { "epoch": 0.4090648776896016, "grad_norm": 0.1295135965334056, "learning_rate": 7.361838475479731e-06, "loss": 0.0034, "step": 25000 }, { "epoch": 0.4090648776896016, "eval_loss": 0.0027813350316137075, "eval_runtime": 7.9267, "eval_samples_per_second": 25.231, "eval_steps_per_second": 6.308, "step": 25000 }, { "epoch": 0.4092285036406774, "grad_norm": 0.16366820602490878, "learning_rate": 7.35932095061016e-06, "loss": 0.0061, "step": 25010 }, { "epoch": 0.40939212959175325, "grad_norm": 0.11493003572652542, "learning_rate": 7.356802656053854e-06, "loss": 0.0037, "step": 25020 }, { "epoch": 0.4095557555428291, "grad_norm": 0.22172278090165354, "learning_rate": 7.354283592632362e-06, "loss": 0.0037, "step": 25030 }, { "epoch": 0.4097193814939049, "grad_norm": 0.09709497709916562, "learning_rate": 7.351763761167481e-06, "loss": 0.006, "step": 25040 }, { "epoch": 0.4098830074449808, "grad_norm": 0.28601669696342946, "learning_rate": 7.349243162481264e-06, "loss": 0.0045, "step": 25050 }, { "epoch": 0.4100466333960566, "grad_norm": 0.09213507453239303, "learning_rate": 7.346721797396011e-06, "loss": 0.0056, "step": 25060 }, { "epoch": 0.41021025934713246, "grad_norm": 0.022767298901006292, "learning_rate": 7.344199666734272e-06, "loss": 0.0026, "step": 25070 }, { "epoch": 0.4103738852982083, "grad_norm": 0.44932751718055636, "learning_rate": 7.3416767713188495e-06, "loss": 0.0046, "step": 25080 }, { "epoch": 0.4105375112492841, "grad_norm": 0.05508288098749318, "learning_rate": 7.339153111972792e-06, "loss": 0.0034, "step": 25090 }, { "epoch": 0.41070113720036, "grad_norm": 0.1993481578208381, "learning_rate": 7.3366286895193985e-06, "loss": 0.0041, "step": 25100 }, { "epoch": 0.41086476315143583, "grad_norm": 0.153656181469206, "learning_rate": 7.334103504782217e-06, "loss": 0.0041, "step": 25110 }, { "epoch": 0.41102838910251166, "grad_norm": 0.15019282274719914, "learning_rate": 7.331577558585047e-06, "loss": 0.0044, "step": 25120 }, { "epoch": 0.4111920150535875, "grad_norm": 0.25499043726316334, "learning_rate": 7.32905085175193e-06, "loss": 0.0027, "step": 25130 }, { "epoch": 0.4113556410046633, "grad_norm": 0.08621914453850096, "learning_rate": 7.326523385107163e-06, "loss": 0.0064, "step": 25140 }, { "epoch": 0.4115192669557392, "grad_norm": 0.26592603663902625, "learning_rate": 7.323995159475284e-06, "loss": 0.0037, "step": 25150 }, { "epoch": 0.41168289290681503, "grad_norm": 0.5372821929579726, "learning_rate": 7.321466175681085e-06, "loss": 0.0042, "step": 25160 }, { "epoch": 0.41184651885789086, "grad_norm": 0.3324393080286242, "learning_rate": 7.318936434549599e-06, "loss": 0.007, "step": 25170 }, { "epoch": 0.4120101448089667, "grad_norm": 0.22921505891319766, "learning_rate": 7.316405936906112e-06, "loss": 0.0046, "step": 25180 }, { "epoch": 0.4121737707600425, "grad_norm": 0.12725402319379356, "learning_rate": 7.313874683576152e-06, "loss": 0.0037, "step": 25190 }, { "epoch": 0.4123373967111184, "grad_norm": 0.09153486889125334, "learning_rate": 7.311342675385499e-06, "loss": 0.0067, "step": 25200 }, { "epoch": 0.41250102266219424, "grad_norm": 0.10998413633811534, "learning_rate": 7.308809913160172e-06, "loss": 0.006, "step": 25210 }, { "epoch": 0.41266464861327007, "grad_norm": 0.1389045403795814, "learning_rate": 7.306276397726441e-06, "loss": 0.0028, "step": 25220 }, { "epoch": 0.4128282745643459, "grad_norm": 0.3011361382987698, "learning_rate": 7.303742129910822e-06, "loss": 0.0057, "step": 25230 }, { "epoch": 0.41299190051542173, "grad_norm": 0.1288056882596583, "learning_rate": 7.301207110540075e-06, "loss": 0.0028, "step": 25240 }, { "epoch": 0.4131555264664976, "grad_norm": 0.07245430049306827, "learning_rate": 7.298671340441203e-06, "loss": 0.0039, "step": 25250 }, { "epoch": 0.41331915241757344, "grad_norm": 0.19863024664792595, "learning_rate": 7.296134820441458e-06, "loss": 0.0042, "step": 25260 }, { "epoch": 0.4134827783686493, "grad_norm": 0.32841361805311275, "learning_rate": 7.293597551368336e-06, "loss": 0.0027, "step": 25270 }, { "epoch": 0.4136464043197251, "grad_norm": 0.3205918095557655, "learning_rate": 7.291059534049573e-06, "loss": 0.0044, "step": 25280 }, { "epoch": 0.41381003027080093, "grad_norm": 0.2735797336259977, "learning_rate": 7.288520769313154e-06, "loss": 0.004, "step": 25290 }, { "epoch": 0.41397365622187676, "grad_norm": 0.2548705959056739, "learning_rate": 7.2859812579873044e-06, "loss": 0.0039, "step": 25300 }, { "epoch": 0.41413728217295265, "grad_norm": 0.23120724601638037, "learning_rate": 7.283441000900499e-06, "loss": 0.0043, "step": 25310 }, { "epoch": 0.4143009081240285, "grad_norm": 0.2588090059982368, "learning_rate": 7.280899998881448e-06, "loss": 0.0053, "step": 25320 }, { "epoch": 0.4144645340751043, "grad_norm": 0.11638425555277944, "learning_rate": 7.278358252759108e-06, "loss": 0.0065, "step": 25330 }, { "epoch": 0.41462816002618014, "grad_norm": 0.44621288665326675, "learning_rate": 7.275815763362681e-06, "loss": 0.0053, "step": 25340 }, { "epoch": 0.41479178597725597, "grad_norm": 0.056298070020892176, "learning_rate": 7.27327253152161e-06, "loss": 0.0036, "step": 25350 }, { "epoch": 0.41495541192833185, "grad_norm": 0.07306393984173162, "learning_rate": 7.270728558065577e-06, "loss": 0.0035, "step": 25360 }, { "epoch": 0.4151190378794077, "grad_norm": 0.08300442543002406, "learning_rate": 7.268183843824508e-06, "loss": 0.0046, "step": 25370 }, { "epoch": 0.4152826638304835, "grad_norm": 0.13002235276868931, "learning_rate": 7.265638389628573e-06, "loss": 0.0037, "step": 25380 }, { "epoch": 0.41544628978155934, "grad_norm": 0.3929847956837395, "learning_rate": 7.263092196308181e-06, "loss": 0.0042, "step": 25390 }, { "epoch": 0.41560991573263517, "grad_norm": 0.059800544730971614, "learning_rate": 7.260545264693981e-06, "loss": 0.003, "step": 25400 }, { "epoch": 0.41577354168371106, "grad_norm": 0.07178877133618652, "learning_rate": 7.257997595616868e-06, "loss": 0.0043, "step": 25410 }, { "epoch": 0.4159371676347869, "grad_norm": 0.1400546119060847, "learning_rate": 7.255449189907969e-06, "loss": 0.0041, "step": 25420 }, { "epoch": 0.4161007935858627, "grad_norm": 0.2590985389353461, "learning_rate": 7.252900048398659e-06, "loss": 0.0031, "step": 25430 }, { "epoch": 0.41626441953693855, "grad_norm": 0.2879370643738906, "learning_rate": 7.250350171920552e-06, "loss": 0.0049, "step": 25440 }, { "epoch": 0.4164280454880144, "grad_norm": 0.19251311708620675, "learning_rate": 7.2477995613054976e-06, "loss": 0.0048, "step": 25450 }, { "epoch": 0.41659167143909026, "grad_norm": 0.04774703019911711, "learning_rate": 7.245248217385587e-06, "loss": 0.0047, "step": 25460 }, { "epoch": 0.4167552973901661, "grad_norm": 0.15131050239706828, "learning_rate": 7.242696140993152e-06, "loss": 0.0039, "step": 25470 }, { "epoch": 0.4169189233412419, "grad_norm": 0.1304384226187665, "learning_rate": 7.240143332960763e-06, "loss": 0.0026, "step": 25480 }, { "epoch": 0.41708254929231775, "grad_norm": 0.3406655896680717, "learning_rate": 7.237589794121228e-06, "loss": 0.0045, "step": 25490 }, { "epoch": 0.4172461752433936, "grad_norm": 0.2754758331290669, "learning_rate": 7.235035525307594e-06, "loss": 0.0047, "step": 25500 }, { "epoch": 0.41740980119446947, "grad_norm": 0.12593022257383235, "learning_rate": 7.232480527353146e-06, "loss": 0.0035, "step": 25510 }, { "epoch": 0.4175734271455453, "grad_norm": 0.08595491025536435, "learning_rate": 7.2299248010914055e-06, "loss": 0.0036, "step": 25520 }, { "epoch": 0.4177370530966211, "grad_norm": 0.440752191744352, "learning_rate": 7.227368347356136e-06, "loss": 0.0036, "step": 25530 }, { "epoch": 0.41790067904769695, "grad_norm": 0.1741835725824583, "learning_rate": 7.224811166981333e-06, "loss": 0.004, "step": 25540 }, { "epoch": 0.4180643049987728, "grad_norm": 0.1183051500251292, "learning_rate": 7.222253260801232e-06, "loss": 0.0042, "step": 25550 }, { "epoch": 0.41822793094984867, "grad_norm": 0.29602314694956017, "learning_rate": 7.219694629650305e-06, "loss": 0.0038, "step": 25560 }, { "epoch": 0.4183915569009245, "grad_norm": 0.6777188826272532, "learning_rate": 7.217135274363259e-06, "loss": 0.0031, "step": 25570 }, { "epoch": 0.41855518285200033, "grad_norm": 0.07905566450227594, "learning_rate": 7.214575195775039e-06, "loss": 0.0041, "step": 25580 }, { "epoch": 0.41871880880307616, "grad_norm": 0.05497533982981633, "learning_rate": 7.212014394720827e-06, "loss": 0.0047, "step": 25590 }, { "epoch": 0.418882434754152, "grad_norm": 0.34419156132658113, "learning_rate": 7.209452872036036e-06, "loss": 0.0082, "step": 25600 }, { "epoch": 0.4190460607052279, "grad_norm": 0.12328705480874431, "learning_rate": 7.206890628556319e-06, "loss": 0.0029, "step": 25610 }, { "epoch": 0.4192096866563037, "grad_norm": 0.3380323392210668, "learning_rate": 7.2043276651175644e-06, "loss": 0.0067, "step": 25620 }, { "epoch": 0.41937331260737953, "grad_norm": 0.2995823746266645, "learning_rate": 7.201763982555892e-06, "loss": 0.0045, "step": 25630 }, { "epoch": 0.41953693855845536, "grad_norm": 0.10651008665162544, "learning_rate": 7.199199581707655e-06, "loss": 0.0039, "step": 25640 }, { "epoch": 0.4197005645095312, "grad_norm": 0.16919199590973988, "learning_rate": 7.196634463409449e-06, "loss": 0.0027, "step": 25650 }, { "epoch": 0.4198641904606071, "grad_norm": 0.18006726829413203, "learning_rate": 7.1940686284980964e-06, "loss": 0.0036, "step": 25660 }, { "epoch": 0.4200278164116829, "grad_norm": 0.13336002912538997, "learning_rate": 7.191502077810655e-06, "loss": 0.0051, "step": 25670 }, { "epoch": 0.42019144236275874, "grad_norm": 0.1484632223575381, "learning_rate": 7.188934812184416e-06, "loss": 0.0032, "step": 25680 }, { "epoch": 0.42035506831383457, "grad_norm": 0.13638726658727227, "learning_rate": 7.186366832456906e-06, "loss": 0.004, "step": 25690 }, { "epoch": 0.4205186942649104, "grad_norm": 0.0875012743478111, "learning_rate": 7.1837981394658815e-06, "loss": 0.0026, "step": 25700 }, { "epoch": 0.4206823202159863, "grad_norm": 0.10142134174183257, "learning_rate": 7.181228734049335e-06, "loss": 0.004, "step": 25710 }, { "epoch": 0.4208459461670621, "grad_norm": 0.23113478252167108, "learning_rate": 7.178658617045489e-06, "loss": 0.0028, "step": 25720 }, { "epoch": 0.42100957211813794, "grad_norm": 0.28072939840163075, "learning_rate": 7.1760877892927975e-06, "loss": 0.0043, "step": 25730 }, { "epoch": 0.42117319806921377, "grad_norm": 0.2424052598356967, "learning_rate": 7.173516251629946e-06, "loss": 0.0023, "step": 25740 }, { "epoch": 0.4213368240202896, "grad_norm": 0.23369629464094502, "learning_rate": 7.170944004895858e-06, "loss": 0.0031, "step": 25750 }, { "epoch": 0.42150044997136543, "grad_norm": 0.33863423999794695, "learning_rate": 7.168371049929681e-06, "loss": 0.0052, "step": 25760 }, { "epoch": 0.4216640759224413, "grad_norm": 0.3606761212599277, "learning_rate": 7.165797387570794e-06, "loss": 0.0037, "step": 25770 }, { "epoch": 0.42182770187351715, "grad_norm": 0.15759483747548642, "learning_rate": 7.1632230186588135e-06, "loss": 0.004, "step": 25780 }, { "epoch": 0.421991327824593, "grad_norm": 0.13523477786409308, "learning_rate": 7.160647944033579e-06, "loss": 0.004, "step": 25790 }, { "epoch": 0.4221549537756688, "grad_norm": 0.14815362536092092, "learning_rate": 7.15807216453516e-06, "loss": 0.0049, "step": 25800 }, { "epoch": 0.42231857972674464, "grad_norm": 0.2747708770203793, "learning_rate": 7.155495681003865e-06, "loss": 0.0037, "step": 25810 }, { "epoch": 0.4224822056778205, "grad_norm": 0.08811620193173504, "learning_rate": 7.152918494280223e-06, "loss": 0.0031, "step": 25820 }, { "epoch": 0.42264583162889635, "grad_norm": 0.2074250495338809, "learning_rate": 7.150340605204995e-06, "loss": 0.0034, "step": 25830 }, { "epoch": 0.4228094575799722, "grad_norm": 0.07747833596524517, "learning_rate": 7.147762014619175e-06, "loss": 0.0042, "step": 25840 }, { "epoch": 0.422973083531048, "grad_norm": 0.2780726539107247, "learning_rate": 7.145182723363979e-06, "loss": 0.005, "step": 25850 }, { "epoch": 0.42313670948212384, "grad_norm": 0.13958914640352982, "learning_rate": 7.142602732280855e-06, "loss": 0.003, "step": 25860 }, { "epoch": 0.4233003354331997, "grad_norm": 0.3234123600236006, "learning_rate": 7.140022042211483e-06, "loss": 0.0042, "step": 25870 }, { "epoch": 0.42346396138427556, "grad_norm": 0.15880185611290065, "learning_rate": 7.1374406539977655e-06, "loss": 0.0034, "step": 25880 }, { "epoch": 0.4236275873353514, "grad_norm": 0.11649412886238553, "learning_rate": 7.1348585684818336e-06, "loss": 0.0026, "step": 25890 }, { "epoch": 0.4237912132864272, "grad_norm": 0.26888195828160083, "learning_rate": 7.13227578650605e-06, "loss": 0.0039, "step": 25900 }, { "epoch": 0.42395483923750305, "grad_norm": 0.1690432687754525, "learning_rate": 7.1296923089129975e-06, "loss": 0.0042, "step": 25910 }, { "epoch": 0.42411846518857893, "grad_norm": 0.16282231202630046, "learning_rate": 7.127108136545493e-06, "loss": 0.0041, "step": 25920 }, { "epoch": 0.42428209113965476, "grad_norm": 0.31043797341011037, "learning_rate": 7.124523270246577e-06, "loss": 0.0043, "step": 25930 }, { "epoch": 0.4244457170907306, "grad_norm": 0.07987518740001981, "learning_rate": 7.121937710859514e-06, "loss": 0.0051, "step": 25940 }, { "epoch": 0.4246093430418064, "grad_norm": 0.03108273118593692, "learning_rate": 7.119351459227801e-06, "loss": 0.0021, "step": 25950 }, { "epoch": 0.42477296899288225, "grad_norm": 0.2657725790529966, "learning_rate": 7.116764516195154e-06, "loss": 0.0041, "step": 25960 }, { "epoch": 0.42493659494395813, "grad_norm": 0.25190590215514863, "learning_rate": 7.114176882605517e-06, "loss": 0.0034, "step": 25970 }, { "epoch": 0.42510022089503396, "grad_norm": 0.28143789275703346, "learning_rate": 7.111588559303062e-06, "loss": 0.0031, "step": 25980 }, { "epoch": 0.4252638468461098, "grad_norm": 0.14686164813988473, "learning_rate": 7.108999547132181e-06, "loss": 0.0026, "step": 25990 }, { "epoch": 0.4254274727971856, "grad_norm": 0.3997855765683396, "learning_rate": 7.106409846937495e-06, "loss": 0.0039, "step": 26000 }, { "epoch": 0.42559109874826145, "grad_norm": 0.12174768720184286, "learning_rate": 7.103819459563849e-06, "loss": 0.0059, "step": 26010 }, { "epoch": 0.42575472469933734, "grad_norm": 0.3043690654399745, "learning_rate": 7.1012283858563095e-06, "loss": 0.0054, "step": 26020 }, { "epoch": 0.42591835065041317, "grad_norm": 0.3010284748838983, "learning_rate": 7.098636626660167e-06, "loss": 0.004, "step": 26030 }, { "epoch": 0.426081976601489, "grad_norm": 0.1536985377324005, "learning_rate": 7.09604418282094e-06, "loss": 0.0026, "step": 26040 }, { "epoch": 0.42624560255256483, "grad_norm": 0.2598214488887838, "learning_rate": 7.0934510551843685e-06, "loss": 0.0047, "step": 26050 }, { "epoch": 0.42640922850364066, "grad_norm": 0.08222984498904659, "learning_rate": 7.09085724459641e-06, "loss": 0.0027, "step": 26060 }, { "epoch": 0.42657285445471654, "grad_norm": 0.2771049633768196, "learning_rate": 7.088262751903252e-06, "loss": 0.0034, "step": 26070 }, { "epoch": 0.4267364804057924, "grad_norm": 0.319512606286483, "learning_rate": 7.085667577951302e-06, "loss": 0.005, "step": 26080 }, { "epoch": 0.4269001063568682, "grad_norm": 0.07960582866559789, "learning_rate": 7.083071723587187e-06, "loss": 0.005, "step": 26090 }, { "epoch": 0.42706373230794403, "grad_norm": 0.11816912444471785, "learning_rate": 7.080475189657764e-06, "loss": 0.0041, "step": 26100 }, { "epoch": 0.42722735825901986, "grad_norm": 0.21320226479503496, "learning_rate": 7.077877977010101e-06, "loss": 0.0029, "step": 26110 }, { "epoch": 0.42739098421009575, "grad_norm": 0.21308824686181474, "learning_rate": 7.075280086491495e-06, "loss": 0.0056, "step": 26120 }, { "epoch": 0.4275546101611716, "grad_norm": 0.37611247773284, "learning_rate": 7.072681518949462e-06, "loss": 0.0045, "step": 26130 }, { "epoch": 0.4277182361122474, "grad_norm": 0.23094808106785786, "learning_rate": 7.0700822752317385e-06, "loss": 0.0047, "step": 26140 }, { "epoch": 0.42788186206332324, "grad_norm": 0.14097639312914642, "learning_rate": 7.067482356186282e-06, "loss": 0.002, "step": 26150 }, { "epoch": 0.42804548801439907, "grad_norm": 0.16980402129117889, "learning_rate": 7.064881762661269e-06, "loss": 0.0037, "step": 26160 }, { "epoch": 0.4282091139654749, "grad_norm": 0.2767072071028258, "learning_rate": 7.062280495505097e-06, "loss": 0.0059, "step": 26170 }, { "epoch": 0.4283727399165508, "grad_norm": 0.10576538767518015, "learning_rate": 7.059678555566385e-06, "loss": 0.0033, "step": 26180 }, { "epoch": 0.4285363658676266, "grad_norm": 0.7782760945480418, "learning_rate": 7.057075943693969e-06, "loss": 0.0036, "step": 26190 }, { "epoch": 0.42869999181870244, "grad_norm": 0.059320742372660176, "learning_rate": 7.054472660736906e-06, "loss": 0.0039, "step": 26200 }, { "epoch": 0.42886361776977827, "grad_norm": 0.3857469899120441, "learning_rate": 7.051868707544469e-06, "loss": 0.0036, "step": 26210 }, { "epoch": 0.4290272437208541, "grad_norm": 0.11383051411665328, "learning_rate": 7.049264084966153e-06, "loss": 0.0046, "step": 26220 }, { "epoch": 0.42919086967193, "grad_norm": 0.10196668250279828, "learning_rate": 7.046658793851671e-06, "loss": 0.0028, "step": 26230 }, { "epoch": 0.4293544956230058, "grad_norm": 0.2820621453676008, "learning_rate": 7.044052835050952e-06, "loss": 0.0029, "step": 26240 }, { "epoch": 0.42951812157408165, "grad_norm": 0.1523478188072128, "learning_rate": 7.041446209414143e-06, "loss": 0.0045, "step": 26250 }, { "epoch": 0.4296817475251575, "grad_norm": 0.3534594705835794, "learning_rate": 7.0388389177916115e-06, "loss": 0.0062, "step": 26260 }, { "epoch": 0.4298453734762333, "grad_norm": 0.2071102533757351, "learning_rate": 7.036230961033939e-06, "loss": 0.0024, "step": 26270 }, { "epoch": 0.4300089994273092, "grad_norm": 0.05559876762546286, "learning_rate": 7.033622339991925e-06, "loss": 0.0028, "step": 26280 }, { "epoch": 0.430172625378385, "grad_norm": 0.28636141542971366, "learning_rate": 7.031013055516589e-06, "loss": 0.0039, "step": 26290 }, { "epoch": 0.43033625132946085, "grad_norm": 0.16371011533088622, "learning_rate": 7.02840310845916e-06, "loss": 0.0033, "step": 26300 }, { "epoch": 0.4304998772805367, "grad_norm": 0.2454076254881016, "learning_rate": 7.025792499671088e-06, "loss": 0.0055, "step": 26310 }, { "epoch": 0.4306635032316125, "grad_norm": 0.059874613857367136, "learning_rate": 7.023181230004039e-06, "loss": 0.0036, "step": 26320 }, { "epoch": 0.4308271291826884, "grad_norm": 0.3551563966825225, "learning_rate": 7.020569300309894e-06, "loss": 0.0044, "step": 26330 }, { "epoch": 0.4309907551337642, "grad_norm": 0.1815498421283578, "learning_rate": 7.0179567114407466e-06, "loss": 0.0063, "step": 26340 }, { "epoch": 0.43115438108484005, "grad_norm": 0.300644638111522, "learning_rate": 7.0153434642489105e-06, "loss": 0.0027, "step": 26350 }, { "epoch": 0.4313180070359159, "grad_norm": 0.24561887504609559, "learning_rate": 7.012729559586909e-06, "loss": 0.0044, "step": 26360 }, { "epoch": 0.4314816329869917, "grad_norm": 0.14308150717914173, "learning_rate": 7.0101149983074835e-06, "loss": 0.0039, "step": 26370 }, { "epoch": 0.4316452589380676, "grad_norm": 0.08432327872081744, "learning_rate": 7.007499781263589e-06, "loss": 0.0049, "step": 26380 }, { "epoch": 0.43180888488914343, "grad_norm": 0.1319968120871778, "learning_rate": 7.0048839093083924e-06, "loss": 0.0025, "step": 26390 }, { "epoch": 0.43197251084021926, "grad_norm": 0.3925233589550464, "learning_rate": 7.0022673832952755e-06, "loss": 0.0039, "step": 26400 }, { "epoch": 0.4321361367912951, "grad_norm": 0.11656273847565152, "learning_rate": 6.999650204077836e-06, "loss": 0.0045, "step": 26410 }, { "epoch": 0.4322997627423709, "grad_norm": 0.11793715394093093, "learning_rate": 6.99703237250988e-06, "loss": 0.0071, "step": 26420 }, { "epoch": 0.4324633886934468, "grad_norm": 0.04020616294005387, "learning_rate": 6.9944138894454286e-06, "loss": 0.0121, "step": 26430 }, { "epoch": 0.43262701464452263, "grad_norm": 0.2896521614662805, "learning_rate": 6.991794755738718e-06, "loss": 0.0032, "step": 26440 }, { "epoch": 0.43279064059559846, "grad_norm": 0.28585578637671455, "learning_rate": 6.9891749722441905e-06, "loss": 0.0036, "step": 26450 }, { "epoch": 0.4329542665466743, "grad_norm": 0.08138209778883475, "learning_rate": 6.986554539816507e-06, "loss": 0.0035, "step": 26460 }, { "epoch": 0.4331178924977501, "grad_norm": 0.4606576564019319, "learning_rate": 6.983933459310538e-06, "loss": 0.0028, "step": 26470 }, { "epoch": 0.433281518448826, "grad_norm": 0.050669523940494635, "learning_rate": 6.981311731581362e-06, "loss": 0.0028, "step": 26480 }, { "epoch": 0.43344514439990184, "grad_norm": 0.3367169531022323, "learning_rate": 6.978689357484272e-06, "loss": 0.0043, "step": 26490 }, { "epoch": 0.43360877035097767, "grad_norm": 1.0214040298479903, "learning_rate": 6.976066337874774e-06, "loss": 0.01, "step": 26500 }, { "epoch": 0.4337723963020535, "grad_norm": 0.2398571509653539, "learning_rate": 6.973442673608577e-06, "loss": 0.0054, "step": 26510 }, { "epoch": 0.4339360222531293, "grad_norm": 0.35119390589056326, "learning_rate": 6.970818365541609e-06, "loss": 0.0032, "step": 26520 }, { "epoch": 0.4340996482042052, "grad_norm": 0.2546658645679215, "learning_rate": 6.968193414530003e-06, "loss": 0.0038, "step": 26530 }, { "epoch": 0.43426327415528104, "grad_norm": 0.08359499744380625, "learning_rate": 6.965567821430101e-06, "loss": 0.0024, "step": 26540 }, { "epoch": 0.4344269001063569, "grad_norm": 0.15206130028073248, "learning_rate": 6.962941587098459e-06, "loss": 0.002, "step": 26550 }, { "epoch": 0.4345905260574327, "grad_norm": 0.13538814682661104, "learning_rate": 6.960314712391838e-06, "loss": 0.0024, "step": 26560 }, { "epoch": 0.43475415200850853, "grad_norm": 0.3383678400522058, "learning_rate": 6.957687198167209e-06, "loss": 0.0037, "step": 26570 }, { "epoch": 0.4349177779595844, "grad_norm": 0.4931067122688483, "learning_rate": 6.955059045281754e-06, "loss": 0.003, "step": 26580 }, { "epoch": 0.43508140391066025, "grad_norm": 0.09294194993299032, "learning_rate": 6.95243025459286e-06, "loss": 0.0057, "step": 26590 }, { "epoch": 0.4352450298617361, "grad_norm": 0.21814255096405427, "learning_rate": 6.9498008269581215e-06, "loss": 0.0042, "step": 26600 }, { "epoch": 0.4354086558128119, "grad_norm": 0.0655241806742665, "learning_rate": 6.947170763235348e-06, "loss": 0.0037, "step": 26610 }, { "epoch": 0.43557228176388774, "grad_norm": 0.09426947713383059, "learning_rate": 6.944540064282546e-06, "loss": 0.0024, "step": 26620 }, { "epoch": 0.43573590771496357, "grad_norm": 0.47283577995391657, "learning_rate": 6.941908730957938e-06, "loss": 0.0096, "step": 26630 }, { "epoch": 0.43589953366603945, "grad_norm": 0.1333318790356309, "learning_rate": 6.939276764119949e-06, "loss": 0.0047, "step": 26640 }, { "epoch": 0.4360631596171153, "grad_norm": 0.2404295140637122, "learning_rate": 6.936644164627211e-06, "loss": 0.004, "step": 26650 }, { "epoch": 0.4362267855681911, "grad_norm": 0.24518198351447984, "learning_rate": 6.934010933338562e-06, "loss": 0.0038, "step": 26660 }, { "epoch": 0.43639041151926694, "grad_norm": 0.3185812874595286, "learning_rate": 6.931377071113049e-06, "loss": 0.0057, "step": 26670 }, { "epoch": 0.43655403747034277, "grad_norm": 0.37415758177021174, "learning_rate": 6.928742578809923e-06, "loss": 0.0037, "step": 26680 }, { "epoch": 0.43671766342141866, "grad_norm": 0.11597583755022083, "learning_rate": 6.926107457288639e-06, "loss": 0.0065, "step": 26690 }, { "epoch": 0.4368812893724945, "grad_norm": 0.18726594627044127, "learning_rate": 6.923471707408861e-06, "loss": 0.003, "step": 26700 }, { "epoch": 0.4370449153235703, "grad_norm": 0.11393662229777222, "learning_rate": 6.920835330030454e-06, "loss": 0.0056, "step": 26710 }, { "epoch": 0.43720854127464615, "grad_norm": 0.18781339406360215, "learning_rate": 6.91819832601349e-06, "loss": 0.0029, "step": 26720 }, { "epoch": 0.437372167225722, "grad_norm": 0.36467731302309664, "learning_rate": 6.915560696218244e-06, "loss": 0.0057, "step": 26730 }, { "epoch": 0.43753579317679786, "grad_norm": 0.07667592611856083, "learning_rate": 6.912922441505199e-06, "loss": 0.0036, "step": 26740 }, { "epoch": 0.4376994191278737, "grad_norm": 0.25489474028229125, "learning_rate": 6.9102835627350366e-06, "loss": 0.0051, "step": 26750 }, { "epoch": 0.4378630450789495, "grad_norm": 0.3282044914912616, "learning_rate": 6.907644060768643e-06, "loss": 0.0068, "step": 26760 }, { "epoch": 0.43802667103002535, "grad_norm": 0.45992873172913684, "learning_rate": 6.905003936467113e-06, "loss": 0.0034, "step": 26770 }, { "epoch": 0.4381902969811012, "grad_norm": 0.2532412518026153, "learning_rate": 6.902363190691737e-06, "loss": 0.004, "step": 26780 }, { "epoch": 0.43835392293217706, "grad_norm": 0.3905362384664212, "learning_rate": 6.899721824304013e-06, "loss": 0.0048, "step": 26790 }, { "epoch": 0.4385175488832529, "grad_norm": 0.20911831716236948, "learning_rate": 6.897079838165639e-06, "loss": 0.009, "step": 26800 }, { "epoch": 0.4386811748343287, "grad_norm": 0.1258964154101348, "learning_rate": 6.894437233138519e-06, "loss": 0.0026, "step": 26810 }, { "epoch": 0.43884480078540455, "grad_norm": 0.037943220304833394, "learning_rate": 6.891794010084752e-06, "loss": 0.004, "step": 26820 }, { "epoch": 0.4390084267364804, "grad_norm": 0.2798017272641533, "learning_rate": 6.889150169866648e-06, "loss": 0.0042, "step": 26830 }, { "epoch": 0.43917205268755627, "grad_norm": 0.10511146771166849, "learning_rate": 6.886505713346708e-06, "loss": 0.0027, "step": 26840 }, { "epoch": 0.4393356786386321, "grad_norm": 0.13947022131414216, "learning_rate": 6.883860641387642e-06, "loss": 0.0052, "step": 26850 }, { "epoch": 0.43949930458970793, "grad_norm": 0.08452819925743989, "learning_rate": 6.8812149548523575e-06, "loss": 0.0031, "step": 26860 }, { "epoch": 0.43966293054078376, "grad_norm": 0.1434245223487868, "learning_rate": 6.878568654603963e-06, "loss": 0.0033, "step": 26870 }, { "epoch": 0.4398265564918596, "grad_norm": 0.28453676144312634, "learning_rate": 6.8759217415057656e-06, "loss": 0.0053, "step": 26880 }, { "epoch": 0.4399901824429355, "grad_norm": 0.18478552034540832, "learning_rate": 6.873274216421277e-06, "loss": 0.0029, "step": 26890 }, { "epoch": 0.4401538083940113, "grad_norm": 0.2676192159492127, "learning_rate": 6.8706260802142024e-06, "loss": 0.0027, "step": 26900 }, { "epoch": 0.44031743434508713, "grad_norm": 0.07319288925996524, "learning_rate": 6.86797733374845e-06, "loss": 0.0035, "step": 26910 }, { "epoch": 0.44048106029616296, "grad_norm": 0.29009586763264417, "learning_rate": 6.86532797788813e-06, "loss": 0.003, "step": 26920 }, { "epoch": 0.4406446862472388, "grad_norm": 0.08665517937386834, "learning_rate": 6.862678013497542e-06, "loss": 0.0072, "step": 26930 }, { "epoch": 0.4408083121983147, "grad_norm": 0.2691543553758849, "learning_rate": 6.8600274414411935e-06, "loss": 0.0043, "step": 26940 }, { "epoch": 0.4409719381493905, "grad_norm": 0.10455772810240402, "learning_rate": 6.857376262583788e-06, "loss": 0.0037, "step": 26950 }, { "epoch": 0.44113556410046634, "grad_norm": 0.19673337511626976, "learning_rate": 6.854724477790224e-06, "loss": 0.0025, "step": 26960 }, { "epoch": 0.44129919005154217, "grad_norm": 0.3046458368383548, "learning_rate": 6.852072087925596e-06, "loss": 0.0056, "step": 26970 }, { "epoch": 0.441462816002618, "grad_norm": 0.41068400098969343, "learning_rate": 6.849419093855207e-06, "loss": 0.0039, "step": 26980 }, { "epoch": 0.4416264419536939, "grad_norm": 0.19644071604477897, "learning_rate": 6.846765496444543e-06, "loss": 0.0039, "step": 26990 }, { "epoch": 0.4417900679047697, "grad_norm": 0.441587072847681, "learning_rate": 6.844111296559295e-06, "loss": 0.0034, "step": 27000 }, { "epoch": 0.44195369385584554, "grad_norm": 0.049335780060254096, "learning_rate": 6.841456495065352e-06, "loss": 0.0026, "step": 27010 }, { "epoch": 0.44211731980692137, "grad_norm": 0.22555364982685128, "learning_rate": 6.838801092828789e-06, "loss": 0.0055, "step": 27020 }, { "epoch": 0.4422809457579972, "grad_norm": 0.3506820752376438, "learning_rate": 6.8361450907158914e-06, "loss": 0.0037, "step": 27030 }, { "epoch": 0.4424445717090731, "grad_norm": 0.37252589746462683, "learning_rate": 6.833488489593129e-06, "loss": 0.0048, "step": 27040 }, { "epoch": 0.4426081976601489, "grad_norm": 0.12596561076287582, "learning_rate": 6.83083129032717e-06, "loss": 0.0032, "step": 27050 }, { "epoch": 0.44277182361122475, "grad_norm": 0.36855003586646656, "learning_rate": 6.828173493784882e-06, "loss": 0.0034, "step": 27060 }, { "epoch": 0.4429354495623006, "grad_norm": 0.1187093229332145, "learning_rate": 6.825515100833322e-06, "loss": 0.0027, "step": 27070 }, { "epoch": 0.4430990755133764, "grad_norm": 0.21420625444207794, "learning_rate": 6.822856112339743e-06, "loss": 0.0044, "step": 27080 }, { "epoch": 0.44326270146445224, "grad_norm": 0.13212195982297378, "learning_rate": 6.820196529171595e-06, "loss": 0.0029, "step": 27090 }, { "epoch": 0.4434263274155281, "grad_norm": 0.16991669895329012, "learning_rate": 6.81753635219652e-06, "loss": 0.0055, "step": 27100 }, { "epoch": 0.44358995336660395, "grad_norm": 0.1301498199844634, "learning_rate": 6.814875582282351e-06, "loss": 0.0033, "step": 27110 }, { "epoch": 0.4437535793176798, "grad_norm": 0.27160532100392515, "learning_rate": 6.81221422029712e-06, "loss": 0.003, "step": 27120 }, { "epoch": 0.4439172052687556, "grad_norm": 0.09349616541647653, "learning_rate": 6.809552267109048e-06, "loss": 0.0039, "step": 27130 }, { "epoch": 0.44408083121983144, "grad_norm": 0.0619327774518488, "learning_rate": 6.806889723586549e-06, "loss": 0.0033, "step": 27140 }, { "epoch": 0.4442444571709073, "grad_norm": 0.06738360657410429, "learning_rate": 6.804226590598234e-06, "loss": 0.0033, "step": 27150 }, { "epoch": 0.44440808312198316, "grad_norm": 0.19304807966978316, "learning_rate": 6.801562869012901e-06, "loss": 0.0031, "step": 27160 }, { "epoch": 0.444571709073059, "grad_norm": 0.22817749270864807, "learning_rate": 6.7988985596995406e-06, "loss": 0.0035, "step": 27170 }, { "epoch": 0.4447353350241348, "grad_norm": 0.2501569833202537, "learning_rate": 6.796233663527338e-06, "loss": 0.0024, "step": 27180 }, { "epoch": 0.44489896097521064, "grad_norm": 0.2511496296375615, "learning_rate": 6.793568181365671e-06, "loss": 0.0037, "step": 27190 }, { "epoch": 0.44506258692628653, "grad_norm": 0.2057920450301289, "learning_rate": 6.790902114084102e-06, "loss": 0.0059, "step": 27200 }, { "epoch": 0.44522621287736236, "grad_norm": 0.30349328966832656, "learning_rate": 6.78823546255239e-06, "loss": 0.0041, "step": 27210 }, { "epoch": 0.4453898388284382, "grad_norm": 0.3238361132473224, "learning_rate": 6.785568227640482e-06, "loss": 0.0044, "step": 27220 }, { "epoch": 0.445553464779514, "grad_norm": 0.2539030981669684, "learning_rate": 6.7829004102185196e-06, "loss": 0.0028, "step": 27230 }, { "epoch": 0.44571709073058985, "grad_norm": 0.19106719969786964, "learning_rate": 6.780232011156826e-06, "loss": 0.0031, "step": 27240 }, { "epoch": 0.44588071668166573, "grad_norm": 0.15011373766246566, "learning_rate": 6.7775630313259226e-06, "loss": 0.0026, "step": 27250 }, { "epoch": 0.44604434263274156, "grad_norm": 0.6300576875990713, "learning_rate": 6.774893471596515e-06, "loss": 0.0062, "step": 27260 }, { "epoch": 0.4462079685838174, "grad_norm": 0.12215105585574247, "learning_rate": 6.772223332839502e-06, "loss": 0.0044, "step": 27270 }, { "epoch": 0.4463715945348932, "grad_norm": 0.14687192121190618, "learning_rate": 6.769552615925967e-06, "loss": 0.0034, "step": 27280 }, { "epoch": 0.44653522048596905, "grad_norm": 0.23091678634828572, "learning_rate": 6.766881321727187e-06, "loss": 0.0036, "step": 27290 }, { "epoch": 0.44669884643704494, "grad_norm": 0.12763779831920075, "learning_rate": 6.7642094511146206e-06, "loss": 0.0029, "step": 27300 }, { "epoch": 0.44686247238812077, "grad_norm": 0.1685973600876189, "learning_rate": 6.761537004959922e-06, "loss": 0.0029, "step": 27310 }, { "epoch": 0.4470260983391966, "grad_norm": 0.07115366140522682, "learning_rate": 6.758863984134927e-06, "loss": 0.0035, "step": 27320 }, { "epoch": 0.44718972429027243, "grad_norm": 0.20852679954973063, "learning_rate": 6.756190389511661e-06, "loss": 0.0022, "step": 27330 }, { "epoch": 0.44735335024134826, "grad_norm": 0.1620276853985165, "learning_rate": 6.75351622196234e-06, "loss": 0.0051, "step": 27340 }, { "epoch": 0.44751697619242414, "grad_norm": 0.10082416701852726, "learning_rate": 6.7508414823593615e-06, "loss": 0.0035, "step": 27350 }, { "epoch": 0.4476806021435, "grad_norm": 0.2683457363565609, "learning_rate": 6.748166171575312e-06, "loss": 0.0036, "step": 27360 }, { "epoch": 0.4478442280945758, "grad_norm": 0.08933184564006597, "learning_rate": 6.7454902904829665e-06, "loss": 0.0037, "step": 27370 }, { "epoch": 0.44800785404565163, "grad_norm": 0.3206275654916656, "learning_rate": 6.74281383995528e-06, "loss": 0.0034, "step": 27380 }, { "epoch": 0.44817147999672746, "grad_norm": 0.19322088702913504, "learning_rate": 6.740136820865399e-06, "loss": 0.0039, "step": 27390 }, { "epoch": 0.44833510594780335, "grad_norm": 0.21529193356299864, "learning_rate": 6.737459234086659e-06, "loss": 0.0035, "step": 27400 }, { "epoch": 0.4484987318988792, "grad_norm": 0.16180435444783095, "learning_rate": 6.734781080492565e-06, "loss": 0.003, "step": 27410 }, { "epoch": 0.448662357849955, "grad_norm": 0.1898129571454145, "learning_rate": 6.732102360956824e-06, "loss": 0.0033, "step": 27420 }, { "epoch": 0.44882598380103084, "grad_norm": 0.18386068824888088, "learning_rate": 6.729423076353321e-06, "loss": 0.002, "step": 27430 }, { "epoch": 0.44898960975210667, "grad_norm": 0.2817407603197924, "learning_rate": 6.7267432275561226e-06, "loss": 0.0018, "step": 27440 }, { "epoch": 0.44915323570318255, "grad_norm": 0.31293412475848786, "learning_rate": 6.724062815439483e-06, "loss": 0.0053, "step": 27450 }, { "epoch": 0.4493168616542584, "grad_norm": 0.15001263918443392, "learning_rate": 6.721381840877841e-06, "loss": 0.0067, "step": 27460 }, { "epoch": 0.4494804876053342, "grad_norm": 0.11503062692936089, "learning_rate": 6.718700304745813e-06, "loss": 0.0054, "step": 27470 }, { "epoch": 0.44964411355641004, "grad_norm": 0.011530787489426025, "learning_rate": 6.716018207918207e-06, "loss": 0.0052, "step": 27480 }, { "epoch": 0.44980773950748587, "grad_norm": 0.3394037368213191, "learning_rate": 6.713335551270008e-06, "loss": 0.0031, "step": 27490 }, { "epoch": 0.4499713654585617, "grad_norm": 0.4962577480068975, "learning_rate": 6.710652335676385e-06, "loss": 0.0029, "step": 27500 }, { "epoch": 0.4501349914096376, "grad_norm": 0.13910441443303384, "learning_rate": 6.70796856201269e-06, "loss": 0.0041, "step": 27510 }, { "epoch": 0.4502986173607134, "grad_norm": 0.1728998996073941, "learning_rate": 6.705284231154457e-06, "loss": 0.0032, "step": 27520 }, { "epoch": 0.45046224331178925, "grad_norm": 0.4107431421970128, "learning_rate": 6.7025993439774005e-06, "loss": 0.0048, "step": 27530 }, { "epoch": 0.4506258692628651, "grad_norm": 0.13430169359339686, "learning_rate": 6.699913901357419e-06, "loss": 0.0043, "step": 27540 }, { "epoch": 0.4507894952139409, "grad_norm": 0.28455499650246296, "learning_rate": 6.697227904170589e-06, "loss": 0.0028, "step": 27550 }, { "epoch": 0.4509531211650168, "grad_norm": 0.28013527746554107, "learning_rate": 6.6945413532931695e-06, "loss": 0.0029, "step": 27560 }, { "epoch": 0.4511167471160926, "grad_norm": 0.2546719535210105, "learning_rate": 6.6918542496016015e-06, "loss": 0.0025, "step": 27570 }, { "epoch": 0.45128037306716845, "grad_norm": 0.4091843429159266, "learning_rate": 6.689166593972506e-06, "loss": 0.0034, "step": 27580 }, { "epoch": 0.4514439990182443, "grad_norm": 0.29023836466149916, "learning_rate": 6.68647838728268e-06, "loss": 0.0106, "step": 27590 }, { "epoch": 0.4516076249693201, "grad_norm": 0.10220764048375632, "learning_rate": 6.683789630409105e-06, "loss": 0.0051, "step": 27600 }, { "epoch": 0.451771250920396, "grad_norm": 0.1853279600570272, "learning_rate": 6.681100324228942e-06, "loss": 0.0034, "step": 27610 }, { "epoch": 0.4519348768714718, "grad_norm": 0.29853930768537046, "learning_rate": 6.678410469619526e-06, "loss": 0.0047, "step": 27620 }, { "epoch": 0.45209850282254765, "grad_norm": 0.09633031660729302, "learning_rate": 6.675720067458378e-06, "loss": 0.0036, "step": 27630 }, { "epoch": 0.4522621287736235, "grad_norm": 0.15881005505147477, "learning_rate": 6.673029118623192e-06, "loss": 0.0041, "step": 27640 }, { "epoch": 0.4524257547246993, "grad_norm": 0.4448010176951898, "learning_rate": 6.670337623991843e-06, "loss": 0.0036, "step": 27650 }, { "epoch": 0.4525893806757752, "grad_norm": 0.21669927361220584, "learning_rate": 6.667645584442384e-06, "loss": 0.0038, "step": 27660 }, { "epoch": 0.45275300662685103, "grad_norm": 0.23087343395496432, "learning_rate": 6.664953000853045e-06, "loss": 0.0041, "step": 27670 }, { "epoch": 0.45291663257792686, "grad_norm": 0.11427074319863043, "learning_rate": 6.662259874102234e-06, "loss": 0.0041, "step": 27680 }, { "epoch": 0.4530802585290027, "grad_norm": 0.23530300356438355, "learning_rate": 6.659566205068535e-06, "loss": 0.0028, "step": 27690 }, { "epoch": 0.4532438844800785, "grad_norm": 0.21368281631611508, "learning_rate": 6.656871994630711e-06, "loss": 0.0038, "step": 27700 }, { "epoch": 0.4534075104311544, "grad_norm": 0.18810898599614018, "learning_rate": 6.654177243667701e-06, "loss": 0.0031, "step": 27710 }, { "epoch": 0.45357113638223023, "grad_norm": 0.22847075665782174, "learning_rate": 6.651481953058617e-06, "loss": 0.0023, "step": 27720 }, { "epoch": 0.45373476233330606, "grad_norm": 0.1006610330293393, "learning_rate": 6.648786123682753e-06, "loss": 0.0031, "step": 27730 }, { "epoch": 0.4538983882843819, "grad_norm": 0.15704252599502816, "learning_rate": 6.646089756419575e-06, "loss": 0.0039, "step": 27740 }, { "epoch": 0.4540620142354577, "grad_norm": 0.14882139131445798, "learning_rate": 6.6433928521487234e-06, "loss": 0.0035, "step": 27750 }, { "epoch": 0.4542256401865336, "grad_norm": 0.0617542920675112, "learning_rate": 6.640695411750017e-06, "loss": 0.0027, "step": 27760 }, { "epoch": 0.45438926613760944, "grad_norm": 0.23990166752639389, "learning_rate": 6.637997436103449e-06, "loss": 0.0049, "step": 27770 }, { "epoch": 0.45455289208868527, "grad_norm": 0.1513712189962754, "learning_rate": 6.635298926089183e-06, "loss": 0.003, "step": 27780 }, { "epoch": 0.4547165180397611, "grad_norm": 0.09116766049656864, "learning_rate": 6.632599882587564e-06, "loss": 0.003, "step": 27790 }, { "epoch": 0.4548801439908369, "grad_norm": 0.2635636914762161, "learning_rate": 6.629900306479105e-06, "loss": 0.0022, "step": 27800 }, { "epoch": 0.4550437699419128, "grad_norm": 0.14830682433501483, "learning_rate": 6.627200198644495e-06, "loss": 0.0041, "step": 27810 }, { "epoch": 0.45520739589298864, "grad_norm": 0.11682866365036997, "learning_rate": 6.6244995599645965e-06, "loss": 0.0039, "step": 27820 }, { "epoch": 0.45537102184406447, "grad_norm": 0.18068296123738037, "learning_rate": 6.621798391320447e-06, "loss": 0.0024, "step": 27830 }, { "epoch": 0.4555346477951403, "grad_norm": 0.06292653796356126, "learning_rate": 6.61909669359325e-06, "loss": 0.0028, "step": 27840 }, { "epoch": 0.45569827374621613, "grad_norm": 0.17913351015372944, "learning_rate": 6.616394467664393e-06, "loss": 0.0048, "step": 27850 }, { "epoch": 0.455861899697292, "grad_norm": 0.14321371585553475, "learning_rate": 6.613691714415426e-06, "loss": 0.0041, "step": 27860 }, { "epoch": 0.45602552564836785, "grad_norm": 0.25830690828249425, "learning_rate": 6.610988434728074e-06, "loss": 0.0037, "step": 27870 }, { "epoch": 0.4561891515994437, "grad_norm": 0.3233300514334663, "learning_rate": 6.608284629484236e-06, "loss": 0.0044, "step": 27880 }, { "epoch": 0.4563527775505195, "grad_norm": 0.17197116439186733, "learning_rate": 6.605580299565979e-06, "loss": 0.0039, "step": 27890 }, { "epoch": 0.45651640350159534, "grad_norm": 0.08076616052499506, "learning_rate": 6.602875445855544e-06, "loss": 0.0028, "step": 27900 }, { "epoch": 0.4566800294526712, "grad_norm": 0.22018222274379615, "learning_rate": 6.600170069235341e-06, "loss": 0.0044, "step": 27910 }, { "epoch": 0.45684365540374705, "grad_norm": 0.0943511793138976, "learning_rate": 6.597464170587952e-06, "loss": 0.0055, "step": 27920 }, { "epoch": 0.4570072813548229, "grad_norm": 0.10811601592329687, "learning_rate": 6.594757750796126e-06, "loss": 0.004, "step": 27930 }, { "epoch": 0.4571709073058987, "grad_norm": 0.345361207625253, "learning_rate": 6.5920508107427895e-06, "loss": 0.0063, "step": 27940 }, { "epoch": 0.45733453325697454, "grad_norm": 0.12460512429776673, "learning_rate": 6.58934335131103e-06, "loss": 0.0039, "step": 27950 }, { "epoch": 0.45749815920805037, "grad_norm": 0.4157533186199701, "learning_rate": 6.586635373384108e-06, "loss": 0.0027, "step": 27960 }, { "epoch": 0.45766178515912626, "grad_norm": 0.19605744477493942, "learning_rate": 6.583926877845457e-06, "loss": 0.0029, "step": 27970 }, { "epoch": 0.4578254111102021, "grad_norm": 0.31972212468392336, "learning_rate": 6.5812178655786706e-06, "loss": 0.0048, "step": 27980 }, { "epoch": 0.4579890370612779, "grad_norm": 0.11560751669539406, "learning_rate": 6.5785083374675205e-06, "loss": 0.0023, "step": 27990 }, { "epoch": 0.45815266301235374, "grad_norm": 0.2452682001892355, "learning_rate": 6.575798294395942e-06, "loss": 0.0028, "step": 28000 }, { "epoch": 0.4583162889634296, "grad_norm": 0.24777936951951843, "learning_rate": 6.573087737248036e-06, "loss": 0.0036, "step": 28010 }, { "epoch": 0.45847991491450546, "grad_norm": 0.13406181121718788, "learning_rate": 6.570376666908076e-06, "loss": 0.0056, "step": 28020 }, { "epoch": 0.4586435408655813, "grad_norm": 0.04274947482068868, "learning_rate": 6.5676650842605e-06, "loss": 0.0037, "step": 28030 }, { "epoch": 0.4588071668166571, "grad_norm": 0.23387800396372455, "learning_rate": 6.564952990189912e-06, "loss": 0.0041, "step": 28040 }, { "epoch": 0.45897079276773295, "grad_norm": 0.12753395391179928, "learning_rate": 6.56224038558109e-06, "loss": 0.0063, "step": 28050 }, { "epoch": 0.4591344187188088, "grad_norm": 0.04267521522810831, "learning_rate": 6.559527271318968e-06, "loss": 0.0027, "step": 28060 }, { "epoch": 0.45929804466988466, "grad_norm": 0.19258106104619244, "learning_rate": 6.556813648288654e-06, "loss": 0.0037, "step": 28070 }, { "epoch": 0.4594616706209605, "grad_norm": 0.10709265415044303, "learning_rate": 6.55409951737542e-06, "loss": 0.0031, "step": 28080 }, { "epoch": 0.4596252965720363, "grad_norm": 0.5781668050613947, "learning_rate": 6.551384879464701e-06, "loss": 0.0052, "step": 28090 }, { "epoch": 0.45978892252311215, "grad_norm": 0.05237590097146237, "learning_rate": 6.548669735442102e-06, "loss": 0.0044, "step": 28100 }, { "epoch": 0.459952548474188, "grad_norm": 0.10969787323608506, "learning_rate": 6.545954086193388e-06, "loss": 0.0043, "step": 28110 }, { "epoch": 0.46011617442526387, "grad_norm": 0.3622901438224775, "learning_rate": 6.543237932604495e-06, "loss": 0.0043, "step": 28120 }, { "epoch": 0.4602798003763397, "grad_norm": 0.157251485534472, "learning_rate": 6.5405212755615164e-06, "loss": 0.0029, "step": 28130 }, { "epoch": 0.46044342632741553, "grad_norm": 0.11641807471512758, "learning_rate": 6.537804115950717e-06, "loss": 0.0032, "step": 28140 }, { "epoch": 0.46060705227849136, "grad_norm": 0.08176782794943495, "learning_rate": 6.53508645465852e-06, "loss": 0.0028, "step": 28150 }, { "epoch": 0.4607706782295672, "grad_norm": 0.11294801996424937, "learning_rate": 6.5323682925715136e-06, "loss": 0.0045, "step": 28160 }, { "epoch": 0.4609343041806431, "grad_norm": 0.22385870160449708, "learning_rate": 6.5296496305764525e-06, "loss": 0.0069, "step": 28170 }, { "epoch": 0.4610979301317189, "grad_norm": 0.21869673012488364, "learning_rate": 6.526930469560251e-06, "loss": 0.0043, "step": 28180 }, { "epoch": 0.46126155608279473, "grad_norm": 0.1413104353645075, "learning_rate": 6.524210810409988e-06, "loss": 0.004, "step": 28190 }, { "epoch": 0.46142518203387056, "grad_norm": 0.2351052479553508, "learning_rate": 6.521490654012901e-06, "loss": 0.0031, "step": 28200 }, { "epoch": 0.4615888079849464, "grad_norm": 0.09478239102803084, "learning_rate": 6.518770001256396e-06, "loss": 0.0042, "step": 28210 }, { "epoch": 0.4617524339360223, "grad_norm": 0.1218401218070308, "learning_rate": 6.5160488530280385e-06, "loss": 0.0023, "step": 28220 }, { "epoch": 0.4619160598870981, "grad_norm": 0.06855636961544907, "learning_rate": 6.513327210215552e-06, "loss": 0.0055, "step": 28230 }, { "epoch": 0.46207968583817394, "grad_norm": 0.1261837964054517, "learning_rate": 6.510605073706826e-06, "loss": 0.0056, "step": 28240 }, { "epoch": 0.46224331178924977, "grad_norm": 0.2578633714803832, "learning_rate": 6.5078824443899115e-06, "loss": 0.0027, "step": 28250 }, { "epoch": 0.4624069377403256, "grad_norm": 0.14503415327099703, "learning_rate": 6.505159323153013e-06, "loss": 0.0032, "step": 28260 }, { "epoch": 0.4625705636914015, "grad_norm": 0.09252036412698689, "learning_rate": 6.502435710884506e-06, "loss": 0.0027, "step": 28270 }, { "epoch": 0.4627341896424773, "grad_norm": 0.15244492131846205, "learning_rate": 6.499711608472918e-06, "loss": 0.0051, "step": 28280 }, { "epoch": 0.46289781559355314, "grad_norm": 0.26100027524936276, "learning_rate": 6.496987016806938e-06, "loss": 0.0024, "step": 28290 }, { "epoch": 0.46306144154462897, "grad_norm": 0.1089780942651381, "learning_rate": 6.49426193677542e-06, "loss": 0.0039, "step": 28300 }, { "epoch": 0.4632250674957048, "grad_norm": 0.5848199749858723, "learning_rate": 6.491536369267371e-06, "loss": 0.0078, "step": 28310 }, { "epoch": 0.4633886934467807, "grad_norm": 0.2802170074961866, "learning_rate": 6.488810315171956e-06, "loss": 0.0035, "step": 28320 }, { "epoch": 0.4635523193978565, "grad_norm": 0.35529530886876903, "learning_rate": 6.4860837753785066e-06, "loss": 0.0047, "step": 28330 }, { "epoch": 0.46371594534893235, "grad_norm": 0.2406100141099862, "learning_rate": 6.483356750776507e-06, "loss": 0.0046, "step": 28340 }, { "epoch": 0.4638795713000082, "grad_norm": 0.10982221706496778, "learning_rate": 6.480629242255598e-06, "loss": 0.003, "step": 28350 }, { "epoch": 0.464043197251084, "grad_norm": 0.2851794022887627, "learning_rate": 6.4779012507055864e-06, "loss": 0.0049, "step": 28360 }, { "epoch": 0.46420682320215984, "grad_norm": 0.02129570408499468, "learning_rate": 6.4751727770164275e-06, "loss": 0.0029, "step": 28370 }, { "epoch": 0.4643704491532357, "grad_norm": 0.04854633804719744, "learning_rate": 6.472443822078236e-06, "loss": 0.0035, "step": 28380 }, { "epoch": 0.46453407510431155, "grad_norm": 0.20110606401675876, "learning_rate": 6.469714386781288e-06, "loss": 0.0026, "step": 28390 }, { "epoch": 0.4646977010553874, "grad_norm": 0.6136880304702995, "learning_rate": 6.4669844720160145e-06, "loss": 0.005, "step": 28400 }, { "epoch": 0.4648613270064632, "grad_norm": 0.2631771719340116, "learning_rate": 6.464254078672998e-06, "loss": 0.0049, "step": 28410 }, { "epoch": 0.46502495295753904, "grad_norm": 0.1278674156545175, "learning_rate": 6.461523207642986e-06, "loss": 0.0037, "step": 28420 }, { "epoch": 0.4651885789086149, "grad_norm": 0.1751038952152329, "learning_rate": 6.458791859816872e-06, "loss": 0.0031, "step": 28430 }, { "epoch": 0.46535220485969075, "grad_norm": 0.06645694751595674, "learning_rate": 6.4560600360857115e-06, "loss": 0.0025, "step": 28440 }, { "epoch": 0.4655158308107666, "grad_norm": 0.11945802561504729, "learning_rate": 6.4533277373407154e-06, "loss": 0.0032, "step": 28450 }, { "epoch": 0.4656794567618424, "grad_norm": 0.13495307293374847, "learning_rate": 6.450594964473248e-06, "loss": 0.003, "step": 28460 }, { "epoch": 0.46584308271291824, "grad_norm": 0.04360982806844561, "learning_rate": 6.447861718374825e-06, "loss": 0.0047, "step": 28470 }, { "epoch": 0.46600670866399413, "grad_norm": 0.1541740574734713, "learning_rate": 6.445127999937122e-06, "loss": 0.0032, "step": 28480 }, { "epoch": 0.46617033461506996, "grad_norm": 0.151070101830312, "learning_rate": 6.442393810051966e-06, "loss": 0.0033, "step": 28490 }, { "epoch": 0.4663339605661458, "grad_norm": 0.11840643569788492, "learning_rate": 6.439659149611337e-06, "loss": 0.0025, "step": 28500 }, { "epoch": 0.4664975865172216, "grad_norm": 0.21699485517620457, "learning_rate": 6.436924019507371e-06, "loss": 0.0051, "step": 28510 }, { "epoch": 0.46666121246829745, "grad_norm": 0.11668382574992901, "learning_rate": 6.434188420632356e-06, "loss": 0.0038, "step": 28520 }, { "epoch": 0.46682483841937333, "grad_norm": 0.15067176771456303, "learning_rate": 6.431452353878731e-06, "loss": 0.004, "step": 28530 }, { "epoch": 0.46698846437044916, "grad_norm": 0.034745285684074256, "learning_rate": 6.42871582013909e-06, "loss": 0.0026, "step": 28540 }, { "epoch": 0.467152090321525, "grad_norm": 0.286874910094359, "learning_rate": 6.42597882030618e-06, "loss": 0.0041, "step": 28550 }, { "epoch": 0.4673157162726008, "grad_norm": 0.07062296015363291, "learning_rate": 6.423241355272898e-06, "loss": 0.0046, "step": 28560 }, { "epoch": 0.46747934222367665, "grad_norm": 0.10331640685732608, "learning_rate": 6.420503425932292e-06, "loss": 0.0053, "step": 28570 }, { "epoch": 0.46764296817475254, "grad_norm": 0.45732462070496194, "learning_rate": 6.417765033177566e-06, "loss": 0.0063, "step": 28580 }, { "epoch": 0.46780659412582837, "grad_norm": 0.19433731930437192, "learning_rate": 6.415026177902071e-06, "loss": 0.0031, "step": 28590 }, { "epoch": 0.4679702200769042, "grad_norm": 0.30586951493956016, "learning_rate": 6.412286860999308e-06, "loss": 0.0027, "step": 28600 }, { "epoch": 0.46813384602798, "grad_norm": 0.1288625194013928, "learning_rate": 6.409547083362935e-06, "loss": 0.0032, "step": 28610 }, { "epoch": 0.46829747197905586, "grad_norm": 0.04114386864208688, "learning_rate": 6.406806845886752e-06, "loss": 0.0043, "step": 28620 }, { "epoch": 0.46846109793013174, "grad_norm": 0.0856279354914779, "learning_rate": 6.404066149464716e-06, "loss": 0.0026, "step": 28630 }, { "epoch": 0.46862472388120757, "grad_norm": 0.058458105618210764, "learning_rate": 6.401324994990928e-06, "loss": 0.0071, "step": 28640 }, { "epoch": 0.4687883498322834, "grad_norm": 0.1948822405731856, "learning_rate": 6.398583383359642e-06, "loss": 0.0051, "step": 28650 }, { "epoch": 0.46895197578335923, "grad_norm": 0.3138218844570942, "learning_rate": 6.395841315465261e-06, "loss": 0.0053, "step": 28660 }, { "epoch": 0.46911560173443506, "grad_norm": 0.060558873230134716, "learning_rate": 6.3930987922023355e-06, "loss": 0.0047, "step": 28670 }, { "epoch": 0.46927922768551095, "grad_norm": 0.1294892096926167, "learning_rate": 6.390355814465567e-06, "loss": 0.002, "step": 28680 }, { "epoch": 0.4694428536365868, "grad_norm": 0.31763248895548724, "learning_rate": 6.387612383149801e-06, "loss": 0.0035, "step": 28690 }, { "epoch": 0.4696064795876626, "grad_norm": 0.20096824516147963, "learning_rate": 6.3848684991500365e-06, "loss": 0.0061, "step": 28700 }, { "epoch": 0.46977010553873844, "grad_norm": 0.2515339739204591, "learning_rate": 6.382124163361413e-06, "loss": 0.0044, "step": 28710 }, { "epoch": 0.46993373148981427, "grad_norm": 0.12116647769642865, "learning_rate": 6.379379376679225e-06, "loss": 0.0044, "step": 28720 }, { "epoch": 0.47009735744089015, "grad_norm": 0.19943133060781973, "learning_rate": 6.376634139998909e-06, "loss": 0.004, "step": 28730 }, { "epoch": 0.470260983391966, "grad_norm": 0.12343505694302365, "learning_rate": 6.373888454216048e-06, "loss": 0.0018, "step": 28740 }, { "epoch": 0.4704246093430418, "grad_norm": 0.2743382043042762, "learning_rate": 6.371142320226378e-06, "loss": 0.006, "step": 28750 }, { "epoch": 0.47058823529411764, "grad_norm": 0.12485617833650155, "learning_rate": 6.368395738925774e-06, "loss": 0.003, "step": 28760 }, { "epoch": 0.47075186124519347, "grad_norm": 0.08022424403163103, "learning_rate": 6.36564871121026e-06, "loss": 0.0021, "step": 28770 }, { "epoch": 0.47091548719626936, "grad_norm": 0.30242117509979616, "learning_rate": 6.3629012379760045e-06, "loss": 0.0045, "step": 28780 }, { "epoch": 0.4710791131473452, "grad_norm": 0.41906582507647294, "learning_rate": 6.360153320119325e-06, "loss": 0.0045, "step": 28790 }, { "epoch": 0.471242739098421, "grad_norm": 0.24362118206744512, "learning_rate": 6.357404958536675e-06, "loss": 0.0052, "step": 28800 }, { "epoch": 0.47140636504949684, "grad_norm": 0.2430525258174969, "learning_rate": 6.354656154124664e-06, "loss": 0.0028, "step": 28810 }, { "epoch": 0.4715699910005727, "grad_norm": 0.5686912092833595, "learning_rate": 6.351906907780041e-06, "loss": 0.0048, "step": 28820 }, { "epoch": 0.4717336169516485, "grad_norm": 0.27074485600419673, "learning_rate": 6.349157220399696e-06, "loss": 0.004, "step": 28830 }, { "epoch": 0.4718972429027244, "grad_norm": 0.09511359161388898, "learning_rate": 6.346407092880668e-06, "loss": 0.0109, "step": 28840 }, { "epoch": 0.4720608688538002, "grad_norm": 0.2617628188305595, "learning_rate": 6.343656526120136e-06, "loss": 0.003, "step": 28850 }, { "epoch": 0.47222449480487605, "grad_norm": 0.2486964205057107, "learning_rate": 6.340905521015425e-06, "loss": 0.0037, "step": 28860 }, { "epoch": 0.4723881207559519, "grad_norm": 0.14914835295600168, "learning_rate": 6.3381540784640015e-06, "loss": 0.0031, "step": 28870 }, { "epoch": 0.4725517467070277, "grad_norm": 0.04547128775840444, "learning_rate": 6.335402199363475e-06, "loss": 0.0063, "step": 28880 }, { "epoch": 0.4727153726581036, "grad_norm": 0.04228099950194356, "learning_rate": 6.332649884611596e-06, "loss": 0.0037, "step": 28890 }, { "epoch": 0.4728789986091794, "grad_norm": 0.12810134150955807, "learning_rate": 6.329897135106261e-06, "loss": 0.0033, "step": 28900 }, { "epoch": 0.47304262456025525, "grad_norm": 0.08144098121994872, "learning_rate": 6.327143951745505e-06, "loss": 0.0031, "step": 28910 }, { "epoch": 0.4732062505113311, "grad_norm": 0.41958007681560294, "learning_rate": 6.324390335427503e-06, "loss": 0.0077, "step": 28920 }, { "epoch": 0.4733698764624069, "grad_norm": 0.20645399551116456, "learning_rate": 6.321636287050576e-06, "loss": 0.0036, "step": 28930 }, { "epoch": 0.4735335024134828, "grad_norm": 0.15725748055727215, "learning_rate": 6.318881807513184e-06, "loss": 0.0055, "step": 28940 }, { "epoch": 0.47369712836455863, "grad_norm": 0.2545139945740543, "learning_rate": 6.316126897713926e-06, "loss": 0.0054, "step": 28950 }, { "epoch": 0.47386075431563446, "grad_norm": 0.27174063945616145, "learning_rate": 6.313371558551543e-06, "loss": 0.0036, "step": 28960 }, { "epoch": 0.4740243802667103, "grad_norm": 0.05672784788987418, "learning_rate": 6.310615790924917e-06, "loss": 0.0019, "step": 28970 }, { "epoch": 0.4741880062177861, "grad_norm": 0.2603619501217855, "learning_rate": 6.307859595733065e-06, "loss": 0.0029, "step": 28980 }, { "epoch": 0.474351632168862, "grad_norm": 0.1939413865923206, "learning_rate": 6.30510297387515e-06, "loss": 0.005, "step": 28990 }, { "epoch": 0.47451525811993783, "grad_norm": 0.13642460594537312, "learning_rate": 6.302345926250471e-06, "loss": 0.0043, "step": 29000 }, { "epoch": 0.47467888407101366, "grad_norm": 0.10448644155649928, "learning_rate": 6.299588453758464e-06, "loss": 0.0037, "step": 29010 }, { "epoch": 0.4748425100220895, "grad_norm": 0.13191174318270216, "learning_rate": 6.296830557298706e-06, "loss": 0.0032, "step": 29020 }, { "epoch": 0.4750061359731653, "grad_norm": 0.09503486937519907, "learning_rate": 6.294072237770914e-06, "loss": 0.0041, "step": 29030 }, { "epoch": 0.4751697619242412, "grad_norm": 0.3307413030033592, "learning_rate": 6.29131349607494e-06, "loss": 0.0026, "step": 29040 }, { "epoch": 0.47533338787531704, "grad_norm": 0.28180421914480325, "learning_rate": 6.288554333110774e-06, "loss": 0.0043, "step": 29050 }, { "epoch": 0.47549701382639287, "grad_norm": 0.3900740015344946, "learning_rate": 6.285794749778544e-06, "loss": 0.0044, "step": 29060 }, { "epoch": 0.4756606397774687, "grad_norm": 0.45925117285410255, "learning_rate": 6.283034746978518e-06, "loss": 0.0032, "step": 29070 }, { "epoch": 0.4758242657285445, "grad_norm": 0.0756195075816101, "learning_rate": 6.280274325611093e-06, "loss": 0.0028, "step": 29080 }, { "epoch": 0.4759878916796204, "grad_norm": 0.278711814137114, "learning_rate": 6.277513486576813e-06, "loss": 0.0056, "step": 29090 }, { "epoch": 0.47615151763069624, "grad_norm": 0.29830645637032976, "learning_rate": 6.274752230776348e-06, "loss": 0.0028, "step": 29100 }, { "epoch": 0.47631514358177207, "grad_norm": 0.09497249015083563, "learning_rate": 6.271990559110514e-06, "loss": 0.0035, "step": 29110 }, { "epoch": 0.4764787695328479, "grad_norm": 0.1344227517782784, "learning_rate": 6.269228472480253e-06, "loss": 0.0041, "step": 29120 }, { "epoch": 0.47664239548392373, "grad_norm": 0.14281697068255028, "learning_rate": 6.266465971786651e-06, "loss": 0.0043, "step": 29130 }, { "epoch": 0.4768060214349996, "grad_norm": 0.37209888683235626, "learning_rate": 6.26370305793092e-06, "loss": 0.0052, "step": 29140 }, { "epoch": 0.47696964738607545, "grad_norm": 0.18622507040117436, "learning_rate": 6.260939731814416e-06, "loss": 0.0043, "step": 29150 }, { "epoch": 0.4771332733371513, "grad_norm": 0.07666704675208601, "learning_rate": 6.258175994338625e-06, "loss": 0.0027, "step": 29160 }, { "epoch": 0.4772968992882271, "grad_norm": 0.08999712346659572, "learning_rate": 6.255411846405164e-06, "loss": 0.003, "step": 29170 }, { "epoch": 0.47746052523930294, "grad_norm": 0.16231717123856423, "learning_rate": 6.252647288915792e-06, "loss": 0.0043, "step": 29180 }, { "epoch": 0.4776241511903788, "grad_norm": 0.1977245389799726, "learning_rate": 6.249882322772393e-06, "loss": 0.0031, "step": 29190 }, { "epoch": 0.47778777714145465, "grad_norm": 0.11919245973263586, "learning_rate": 6.24711694887699e-06, "loss": 0.0068, "step": 29200 }, { "epoch": 0.4779514030925305, "grad_norm": 0.08787363425062858, "learning_rate": 6.24435116813174e-06, "loss": 0.0029, "step": 29210 }, { "epoch": 0.4781150290436063, "grad_norm": 0.2766724736503148, "learning_rate": 6.241584981438924e-06, "loss": 0.0036, "step": 29220 }, { "epoch": 0.47827865499468214, "grad_norm": 0.357116634700747, "learning_rate": 6.2388183897009655e-06, "loss": 0.0027, "step": 29230 }, { "epoch": 0.47844228094575797, "grad_norm": 0.32954126294664965, "learning_rate": 6.236051393820418e-06, "loss": 0.004, "step": 29240 }, { "epoch": 0.47860590689683385, "grad_norm": 0.049986108641716494, "learning_rate": 6.233283994699961e-06, "loss": 0.0031, "step": 29250 }, { "epoch": 0.4787695328479097, "grad_norm": 0.09923681602857796, "learning_rate": 6.230516193242411e-06, "loss": 0.0054, "step": 29260 }, { "epoch": 0.4789331587989855, "grad_norm": 0.14121999737143903, "learning_rate": 6.227747990350714e-06, "loss": 0.0036, "step": 29270 }, { "epoch": 0.47909678475006134, "grad_norm": 0.45301113150038785, "learning_rate": 6.2249793869279476e-06, "loss": 0.0054, "step": 29280 }, { "epoch": 0.4792604107011372, "grad_norm": 0.060497857501768164, "learning_rate": 6.222210383877321e-06, "loss": 0.0027, "step": 29290 }, { "epoch": 0.47942403665221306, "grad_norm": 0.06584707053590898, "learning_rate": 6.21944098210217e-06, "loss": 0.0029, "step": 29300 }, { "epoch": 0.4795876626032889, "grad_norm": 0.11174683355609316, "learning_rate": 6.216671182505963e-06, "loss": 0.0026, "step": 29310 }, { "epoch": 0.4797512885543647, "grad_norm": 0.2155194570913787, "learning_rate": 6.2139009859923e-06, "loss": 0.0025, "step": 29320 }, { "epoch": 0.47991491450544055, "grad_norm": 0.16046648580711412, "learning_rate": 6.2111303934649094e-06, "loss": 0.0053, "step": 29330 }, { "epoch": 0.4800785404565164, "grad_norm": 0.11485991252632435, "learning_rate": 6.208359405827644e-06, "loss": 0.0035, "step": 29340 }, { "epoch": 0.48024216640759226, "grad_norm": 0.05619253121699922, "learning_rate": 6.2055880239844935e-06, "loss": 0.0051, "step": 29350 }, { "epoch": 0.4804057923586681, "grad_norm": 0.21659237643116297, "learning_rate": 6.20281624883957e-06, "loss": 0.0034, "step": 29360 }, { "epoch": 0.4805694183097439, "grad_norm": 0.23762841550220795, "learning_rate": 6.200044081297117e-06, "loss": 0.004, "step": 29370 }, { "epoch": 0.48073304426081975, "grad_norm": 0.21476950374828657, "learning_rate": 6.197271522261505e-06, "loss": 0.0031, "step": 29380 }, { "epoch": 0.4808966702118956, "grad_norm": 0.32604097466934767, "learning_rate": 6.194498572637233e-06, "loss": 0.0033, "step": 29390 }, { "epoch": 0.48106029616297147, "grad_norm": 0.11988467414905409, "learning_rate": 6.191725233328925e-06, "loss": 0.003, "step": 29400 }, { "epoch": 0.4812239221140473, "grad_norm": 0.03519817068283349, "learning_rate": 6.188951505241336e-06, "loss": 0.0023, "step": 29410 }, { "epoch": 0.4813875480651231, "grad_norm": 0.2672677710543047, "learning_rate": 6.186177389279344e-06, "loss": 0.0041, "step": 29420 }, { "epoch": 0.48155117401619896, "grad_norm": 0.09367708117426597, "learning_rate": 6.183402886347956e-06, "loss": 0.0035, "step": 29430 }, { "epoch": 0.4817147999672748, "grad_norm": 0.16038023134905086, "learning_rate": 6.180627997352304e-06, "loss": 0.0032, "step": 29440 }, { "epoch": 0.4818784259183507, "grad_norm": 0.1274330605509065, "learning_rate": 6.177852723197648e-06, "loss": 0.0025, "step": 29450 }, { "epoch": 0.4820420518694265, "grad_norm": 0.0997741782596983, "learning_rate": 6.17507706478937e-06, "loss": 0.0039, "step": 29460 }, { "epoch": 0.48220567782050233, "grad_norm": 0.1274370438987351, "learning_rate": 6.172301023032982e-06, "loss": 0.0046, "step": 29470 }, { "epoch": 0.48236930377157816, "grad_norm": 0.12090327294473836, "learning_rate": 6.169524598834115e-06, "loss": 0.0053, "step": 29480 }, { "epoch": 0.482532929722654, "grad_norm": 0.17800858058758556, "learning_rate": 6.166747793098532e-06, "loss": 0.0026, "step": 29490 }, { "epoch": 0.4826965556737299, "grad_norm": 0.3117835369535704, "learning_rate": 6.163970606732114e-06, "loss": 0.004, "step": 29500 }, { "epoch": 0.4828601816248057, "grad_norm": 0.22595653630859455, "learning_rate": 6.1611930406408725e-06, "loss": 0.0048, "step": 29510 }, { "epoch": 0.48302380757588154, "grad_norm": 0.2665941274469625, "learning_rate": 6.158415095730935e-06, "loss": 0.0044, "step": 29520 }, { "epoch": 0.48318743352695737, "grad_norm": 0.1297586895289029, "learning_rate": 6.155636772908559e-06, "loss": 0.0032, "step": 29530 }, { "epoch": 0.4833510594780332, "grad_norm": 0.1331237484173101, "learning_rate": 6.152858073080122e-06, "loss": 0.0032, "step": 29540 }, { "epoch": 0.4835146854291091, "grad_norm": 0.17594100183081174, "learning_rate": 6.150078997152129e-06, "loss": 0.0028, "step": 29550 }, { "epoch": 0.4836783113801849, "grad_norm": 0.18970555062186414, "learning_rate": 6.147299546031199e-06, "loss": 0.0045, "step": 29560 }, { "epoch": 0.48384193733126074, "grad_norm": 0.08830290239249784, "learning_rate": 6.1445197206240835e-06, "loss": 0.0038, "step": 29570 }, { "epoch": 0.48400556328233657, "grad_norm": 0.147673850374333, "learning_rate": 6.141739521837651e-06, "loss": 0.0042, "step": 29580 }, { "epoch": 0.4841691892334124, "grad_norm": 0.23240585086855328, "learning_rate": 6.138958950578888e-06, "loss": 0.0037, "step": 29590 }, { "epoch": 0.4843328151844883, "grad_norm": 0.13426945198825696, "learning_rate": 6.1361780077549115e-06, "loss": 0.0059, "step": 29600 }, { "epoch": 0.4844964411355641, "grad_norm": 0.3009426394403168, "learning_rate": 6.133396694272953e-06, "loss": 0.0025, "step": 29610 }, { "epoch": 0.48466006708663995, "grad_norm": 0.018990673250287287, "learning_rate": 6.130615011040365e-06, "loss": 0.0028, "step": 29620 }, { "epoch": 0.4848236930377158, "grad_norm": 0.2862320731774738, "learning_rate": 6.127832958964626e-06, "loss": 0.0037, "step": 29630 }, { "epoch": 0.4849873189887916, "grad_norm": 0.13973691724306697, "learning_rate": 6.12505053895333e-06, "loss": 0.004, "step": 29640 }, { "epoch": 0.4851509449398675, "grad_norm": 0.142199815006184, "learning_rate": 6.122267751914189e-06, "loss": 0.0031, "step": 29650 }, { "epoch": 0.4853145708909433, "grad_norm": 0.31804166612815354, "learning_rate": 6.119484598755043e-06, "loss": 0.0035, "step": 29660 }, { "epoch": 0.48547819684201915, "grad_norm": 0.15919738313465603, "learning_rate": 6.116701080383846e-06, "loss": 0.0035, "step": 29670 }, { "epoch": 0.485641822793095, "grad_norm": 0.19162922534280483, "learning_rate": 6.113917197708667e-06, "loss": 0.0036, "step": 29680 }, { "epoch": 0.4858054487441708, "grad_norm": 0.10584382125969954, "learning_rate": 6.111132951637705e-06, "loss": 0.0057, "step": 29690 }, { "epoch": 0.48596907469524664, "grad_norm": 0.13336140197536098, "learning_rate": 6.108348343079265e-06, "loss": 0.0028, "step": 29700 }, { "epoch": 0.4861327006463225, "grad_norm": 0.25763108446364763, "learning_rate": 6.105563372941779e-06, "loss": 0.0041, "step": 29710 }, { "epoch": 0.48629632659739835, "grad_norm": 0.08544780138585156, "learning_rate": 6.102778042133796e-06, "loss": 0.0044, "step": 29720 }, { "epoch": 0.4864599525484742, "grad_norm": 0.19120116610632867, "learning_rate": 6.099992351563977e-06, "loss": 0.0033, "step": 29730 }, { "epoch": 0.48662357849955, "grad_norm": 0.29861792181844093, "learning_rate": 6.097206302141106e-06, "loss": 0.0044, "step": 29740 }, { "epoch": 0.48678720445062584, "grad_norm": 0.9647579872544751, "learning_rate": 6.094419894774085e-06, "loss": 0.0039, "step": 29750 }, { "epoch": 0.48695083040170173, "grad_norm": 0.08814048699267185, "learning_rate": 6.091633130371927e-06, "loss": 0.0028, "step": 29760 }, { "epoch": 0.48711445635277756, "grad_norm": 0.1252353890662789, "learning_rate": 6.088846009843764e-06, "loss": 0.0028, "step": 29770 }, { "epoch": 0.4872780823038534, "grad_norm": 0.4355197354594601, "learning_rate": 6.0860585340988485e-06, "loss": 0.0054, "step": 29780 }, { "epoch": 0.4874417082549292, "grad_norm": 0.16955114705321353, "learning_rate": 6.083270704046542e-06, "loss": 0.0019, "step": 29790 }, { "epoch": 0.48760533420600505, "grad_norm": 0.2540289533131497, "learning_rate": 6.0804825205963245e-06, "loss": 0.0062, "step": 29800 }, { "epoch": 0.48776896015708093, "grad_norm": 0.09725313204975622, "learning_rate": 6.077693984657795e-06, "loss": 0.0026, "step": 29810 }, { "epoch": 0.48793258610815676, "grad_norm": 0.05517603762687097, "learning_rate": 6.074905097140659e-06, "loss": 0.0026, "step": 29820 }, { "epoch": 0.4880962120592326, "grad_norm": 0.13683282067049105, "learning_rate": 6.0721158589547455e-06, "loss": 0.003, "step": 29830 }, { "epoch": 0.4882598380103084, "grad_norm": 0.22452494900506506, "learning_rate": 6.069326271009996e-06, "loss": 0.0037, "step": 29840 }, { "epoch": 0.48842346396138425, "grad_norm": 0.2801635301143507, "learning_rate": 6.066536334216458e-06, "loss": 0.0034, "step": 29850 }, { "epoch": 0.48858708991246014, "grad_norm": 0.16596463883097873, "learning_rate": 6.0637460494843035e-06, "loss": 0.0035, "step": 29860 }, { "epoch": 0.48875071586353597, "grad_norm": 0.43837351174035766, "learning_rate": 6.060955417723813e-06, "loss": 0.0042, "step": 29870 }, { "epoch": 0.4889143418146118, "grad_norm": 0.11003383109654377, "learning_rate": 6.058164439845378e-06, "loss": 0.0039, "step": 29880 }, { "epoch": 0.4890779677656876, "grad_norm": 0.11727728463669576, "learning_rate": 6.05537311675951e-06, "loss": 0.0034, "step": 29890 }, { "epoch": 0.48924159371676346, "grad_norm": 0.13843011908947217, "learning_rate": 6.0525814493768266e-06, "loss": 0.0027, "step": 29900 }, { "epoch": 0.48940521966783934, "grad_norm": 0.15742073352125482, "learning_rate": 6.049789438608058e-06, "loss": 0.0031, "step": 29910 }, { "epoch": 0.48956884561891517, "grad_norm": 0.35526797173624325, "learning_rate": 6.0469970853640516e-06, "loss": 0.003, "step": 29920 }, { "epoch": 0.489732471569991, "grad_norm": 0.5083059073977578, "learning_rate": 6.044204390555763e-06, "loss": 0.003, "step": 29930 }, { "epoch": 0.48989609752106683, "grad_norm": 0.06707101445552398, "learning_rate": 6.041411355094255e-06, "loss": 0.0021, "step": 29940 }, { "epoch": 0.49005972347214266, "grad_norm": 0.19851638352856973, "learning_rate": 6.038617979890713e-06, "loss": 0.0033, "step": 29950 }, { "epoch": 0.49022334942321855, "grad_norm": 0.140088111498455, "learning_rate": 6.0358242658564224e-06, "loss": 0.002, "step": 29960 }, { "epoch": 0.4903869753742944, "grad_norm": 0.10289592596656784, "learning_rate": 6.033030213902782e-06, "loss": 0.004, "step": 29970 }, { "epoch": 0.4905506013253702, "grad_norm": 0.2299604978984176, "learning_rate": 6.030235824941304e-06, "loss": 0.0037, "step": 29980 }, { "epoch": 0.49071422727644604, "grad_norm": 0.3168382458986692, "learning_rate": 6.027441099883607e-06, "loss": 0.004, "step": 29990 }, { "epoch": 0.49087785322752187, "grad_norm": 0.23270788130372036, "learning_rate": 6.024646039641422e-06, "loss": 0.0021, "step": 30000 }, { "epoch": 0.49104147917859775, "grad_norm": 0.3560858473976975, "learning_rate": 6.021850645126586e-06, "loss": 0.0025, "step": 30010 }, { "epoch": 0.4912051051296736, "grad_norm": 0.40369975814106696, "learning_rate": 6.019054917251049e-06, "loss": 0.0042, "step": 30020 }, { "epoch": 0.4913687310807494, "grad_norm": 0.4963912779417439, "learning_rate": 6.016258856926868e-06, "loss": 0.0032, "step": 30030 }, { "epoch": 0.49153235703182524, "grad_norm": 0.23538670014973337, "learning_rate": 6.013462465066205e-06, "loss": 0.0049, "step": 30040 }, { "epoch": 0.49169598298290107, "grad_norm": 0.09055079974655157, "learning_rate": 6.010665742581336e-06, "loss": 0.0023, "step": 30050 }, { "epoch": 0.49185960893397696, "grad_norm": 0.17989733674634537, "learning_rate": 6.007868690384643e-06, "loss": 0.005, "step": 30060 }, { "epoch": 0.4920232348850528, "grad_norm": 0.16300437269371626, "learning_rate": 6.005071309388612e-06, "loss": 0.0035, "step": 30070 }, { "epoch": 0.4921868608361286, "grad_norm": 0.27451328919098483, "learning_rate": 6.002273600505842e-06, "loss": 0.0031, "step": 30080 }, { "epoch": 0.49235048678720444, "grad_norm": 0.09989386063104876, "learning_rate": 5.999475564649035e-06, "loss": 0.0038, "step": 30090 }, { "epoch": 0.4925141127382803, "grad_norm": 0.10637711464226826, "learning_rate": 5.996677202731e-06, "loss": 0.0022, "step": 30100 }, { "epoch": 0.49267773868935616, "grad_norm": 0.29259948733056984, "learning_rate": 5.993878515664654e-06, "loss": 0.0032, "step": 30110 }, { "epoch": 0.492841364640432, "grad_norm": 0.1245250294705668, "learning_rate": 5.991079504363019e-06, "loss": 0.0031, "step": 30120 }, { "epoch": 0.4930049905915078, "grad_norm": 0.16003737625109335, "learning_rate": 5.988280169739221e-06, "loss": 0.0029, "step": 30130 }, { "epoch": 0.49316861654258365, "grad_norm": 0.08849713684105148, "learning_rate": 5.985480512706496e-06, "loss": 0.0029, "step": 30140 }, { "epoch": 0.4933322424936595, "grad_norm": 0.3895293210767575, "learning_rate": 5.982680534178182e-06, "loss": 0.0046, "step": 30150 }, { "epoch": 0.4934958684447353, "grad_norm": 0.2956355961203383, "learning_rate": 5.979880235067722e-06, "loss": 0.004, "step": 30160 }, { "epoch": 0.4936594943958112, "grad_norm": 0.21900161855985223, "learning_rate": 5.977079616288666e-06, "loss": 0.0034, "step": 30170 }, { "epoch": 0.493823120346887, "grad_norm": 0.13293376446720137, "learning_rate": 5.974278678754661e-06, "loss": 0.0034, "step": 30180 }, { "epoch": 0.49398674629796285, "grad_norm": 0.1109432136093818, "learning_rate": 5.9714774233794685e-06, "loss": 0.0034, "step": 30190 }, { "epoch": 0.4941503722490387, "grad_norm": 0.1308311551348442, "learning_rate": 5.968675851076949e-06, "loss": 0.0038, "step": 30200 }, { "epoch": 0.4943139982001145, "grad_norm": 0.12716538908467298, "learning_rate": 5.965873962761061e-06, "loss": 0.0027, "step": 30210 }, { "epoch": 0.4944776241511904, "grad_norm": 0.10859512178270475, "learning_rate": 5.963071759345874e-06, "loss": 0.0041, "step": 30220 }, { "epoch": 0.49464125010226623, "grad_norm": 0.16979859564474095, "learning_rate": 5.96026924174556e-06, "loss": 0.0039, "step": 30230 }, { "epoch": 0.49480487605334206, "grad_norm": 0.10969800358731176, "learning_rate": 5.957466410874385e-06, "loss": 0.0035, "step": 30240 }, { "epoch": 0.4949685020044179, "grad_norm": 0.1816067515391278, "learning_rate": 5.954663267646726e-06, "loss": 0.003, "step": 30250 }, { "epoch": 0.4951321279554937, "grad_norm": 0.10358004337229164, "learning_rate": 5.951859812977062e-06, "loss": 0.0039, "step": 30260 }, { "epoch": 0.4952957539065696, "grad_norm": 0.14893742261748405, "learning_rate": 5.949056047779964e-06, "loss": 0.0027, "step": 30270 }, { "epoch": 0.49545937985764543, "grad_norm": 0.20663819327916472, "learning_rate": 5.946251972970115e-06, "loss": 0.002, "step": 30280 }, { "epoch": 0.49562300580872126, "grad_norm": 0.2512481243531907, "learning_rate": 5.9434475894622965e-06, "loss": 0.0032, "step": 30290 }, { "epoch": 0.4957866317597971, "grad_norm": 0.31103661899216584, "learning_rate": 5.9406428981713835e-06, "loss": 0.0024, "step": 30300 }, { "epoch": 0.4959502577108729, "grad_norm": 0.27810187298058087, "learning_rate": 5.937837900012361e-06, "loss": 0.0036, "step": 30310 }, { "epoch": 0.4961138836619488, "grad_norm": 0.12598607243994947, "learning_rate": 5.935032595900311e-06, "loss": 0.0032, "step": 30320 }, { "epoch": 0.49627750961302464, "grad_norm": 0.23324557797760348, "learning_rate": 5.932226986750409e-06, "loss": 0.0027, "step": 30330 }, { "epoch": 0.49644113556410047, "grad_norm": 0.25153654471727305, "learning_rate": 5.929421073477941e-06, "loss": 0.004, "step": 30340 }, { "epoch": 0.4966047615151763, "grad_norm": 0.16419486902282132, "learning_rate": 5.926614856998284e-06, "loss": 0.0042, "step": 30350 }, { "epoch": 0.4967683874662521, "grad_norm": 0.05139620099146174, "learning_rate": 5.923808338226916e-06, "loss": 0.0037, "step": 30360 }, { "epoch": 0.496932013417328, "grad_norm": 0.07125926653504847, "learning_rate": 5.921001518079416e-06, "loss": 0.002, "step": 30370 }, { "epoch": 0.49709563936840384, "grad_norm": 0.17962100163706446, "learning_rate": 5.918194397471459e-06, "loss": 0.0038, "step": 30380 }, { "epoch": 0.49725926531947967, "grad_norm": 0.10040518531269127, "learning_rate": 5.915386977318815e-06, "loss": 0.0056, "step": 30390 }, { "epoch": 0.4974228912705555, "grad_norm": 0.2686932315579384, "learning_rate": 5.912579258537361e-06, "loss": 0.004, "step": 30400 }, { "epoch": 0.49758651722163133, "grad_norm": 0.10463183117508539, "learning_rate": 5.909771242043061e-06, "loss": 0.0045, "step": 30410 }, { "epoch": 0.4977501431727072, "grad_norm": 0.3012263587092307, "learning_rate": 5.906962928751983e-06, "loss": 0.0062, "step": 30420 }, { "epoch": 0.49791376912378305, "grad_norm": 0.09366529301477908, "learning_rate": 5.904154319580289e-06, "loss": 0.0031, "step": 30430 }, { "epoch": 0.4980773950748589, "grad_norm": 0.28861923478639584, "learning_rate": 5.901345415444237e-06, "loss": 0.0019, "step": 30440 }, { "epoch": 0.4982410210259347, "grad_norm": 0.11662173066465188, "learning_rate": 5.898536217260183e-06, "loss": 0.0049, "step": 30450 }, { "epoch": 0.49840464697701053, "grad_norm": 0.27824799281949225, "learning_rate": 5.895726725944579e-06, "loss": 0.003, "step": 30460 }, { "epoch": 0.4985682729280864, "grad_norm": 0.11498238988639774, "learning_rate": 5.892916942413974e-06, "loss": 0.0032, "step": 30470 }, { "epoch": 0.49873189887916225, "grad_norm": 0.12476545299725358, "learning_rate": 5.8901068675850035e-06, "loss": 0.0034, "step": 30480 }, { "epoch": 0.4988955248302381, "grad_norm": 0.1434869372567432, "learning_rate": 5.887296502374411e-06, "loss": 0.0023, "step": 30490 }, { "epoch": 0.4990591507813139, "grad_norm": 0.1214203514478344, "learning_rate": 5.884485847699026e-06, "loss": 0.0046, "step": 30500 }, { "epoch": 0.49922277673238974, "grad_norm": 0.16798148312028227, "learning_rate": 5.8816749044757746e-06, "loss": 0.0043, "step": 30510 }, { "epoch": 0.4993864026834656, "grad_norm": 0.19686930918908155, "learning_rate": 5.878863673621678e-06, "loss": 0.0038, "step": 30520 }, { "epoch": 0.49955002863454145, "grad_norm": 0.1933806256148756, "learning_rate": 5.876052156053851e-06, "loss": 0.0025, "step": 30530 }, { "epoch": 0.4997136545856173, "grad_norm": 0.197958555043247, "learning_rate": 5.8732403526895e-06, "loss": 0.0036, "step": 30540 }, { "epoch": 0.4998772805366931, "grad_norm": 0.18543167326358576, "learning_rate": 5.870428264445926e-06, "loss": 0.0046, "step": 30550 }, { "epoch": 0.500040906487769, "grad_norm": 0.13615105113344544, "learning_rate": 5.867615892240526e-06, "loss": 0.0038, "step": 30560 }, { "epoch": 0.5002045324388448, "grad_norm": 0.06299064743082385, "learning_rate": 5.864803236990783e-06, "loss": 0.0046, "step": 30570 }, { "epoch": 0.5003681583899207, "grad_norm": 0.15832340558482352, "learning_rate": 5.86199029961428e-06, "loss": 0.0032, "step": 30580 }, { "epoch": 0.5005317843409964, "grad_norm": 0.1285976553180435, "learning_rate": 5.859177081028684e-06, "loss": 0.0032, "step": 30590 }, { "epoch": 0.5006954102920723, "grad_norm": 0.10136167967629879, "learning_rate": 5.856363582151761e-06, "loss": 0.0027, "step": 30600 }, { "epoch": 0.5008590362431482, "grad_norm": 0.150559013114819, "learning_rate": 5.853549803901362e-06, "loss": 0.0044, "step": 30610 }, { "epoch": 0.501022662194224, "grad_norm": 0.1888069140603963, "learning_rate": 5.850735747195435e-06, "loss": 0.0038, "step": 30620 }, { "epoch": 0.5011862881452999, "grad_norm": 0.15832518871632617, "learning_rate": 5.847921412952016e-06, "loss": 0.0031, "step": 30630 }, { "epoch": 0.5013499140963756, "grad_norm": 0.0670852691615687, "learning_rate": 5.845106802089231e-06, "loss": 0.0048, "step": 30640 }, { "epoch": 0.5015135400474515, "grad_norm": 0.2013968173486725, "learning_rate": 5.842291915525298e-06, "loss": 0.0034, "step": 30650 }, { "epoch": 0.5016771659985274, "grad_norm": 0.15507268193424312, "learning_rate": 5.839476754178522e-06, "loss": 0.0047, "step": 30660 }, { "epoch": 0.5018407919496032, "grad_norm": 0.04931863680466435, "learning_rate": 5.836661318967301e-06, "loss": 0.0039, "step": 30670 }, { "epoch": 0.5020044179006791, "grad_norm": 0.1579966332258695, "learning_rate": 5.833845610810123e-06, "loss": 0.0023, "step": 30680 }, { "epoch": 0.5021680438517548, "grad_norm": 0.351839065048057, "learning_rate": 5.831029630625557e-06, "loss": 0.0028, "step": 30690 }, { "epoch": 0.5023316698028307, "grad_norm": 0.1270213332537777, "learning_rate": 5.828213379332272e-06, "loss": 0.0028, "step": 30700 }, { "epoch": 0.5024952957539066, "grad_norm": 0.4442225510575845, "learning_rate": 5.825396857849022e-06, "loss": 0.0063, "step": 30710 }, { "epoch": 0.5026589217049824, "grad_norm": 0.02512476076602587, "learning_rate": 5.8225800670946404e-06, "loss": 0.0044, "step": 30720 }, { "epoch": 0.5028225476560583, "grad_norm": 0.1787766257453066, "learning_rate": 5.81976300798806e-06, "loss": 0.0029, "step": 30730 }, { "epoch": 0.502986173607134, "grad_norm": 0.27963491370934557, "learning_rate": 5.816945681448296e-06, "loss": 0.0025, "step": 30740 }, { "epoch": 0.5031497995582099, "grad_norm": 0.1245197589191199, "learning_rate": 5.814128088394451e-06, "loss": 0.0034, "step": 30750 }, { "epoch": 0.5033134255092858, "grad_norm": 0.044971505181116304, "learning_rate": 5.811310229745713e-06, "loss": 0.0025, "step": 30760 }, { "epoch": 0.5034770514603616, "grad_norm": 0.1287923144313871, "learning_rate": 5.808492106421363e-06, "loss": 0.0032, "step": 30770 }, { "epoch": 0.5036406774114375, "grad_norm": 0.08738496979239627, "learning_rate": 5.80567371934076e-06, "loss": 0.0024, "step": 30780 }, { "epoch": 0.5038043033625133, "grad_norm": 0.14446477682128556, "learning_rate": 5.8028550694233525e-06, "loss": 0.0033, "step": 30790 }, { "epoch": 0.5039679293135891, "grad_norm": 0.3124559208936593, "learning_rate": 5.8000361575886796e-06, "loss": 0.0034, "step": 30800 }, { "epoch": 0.504131555264665, "grad_norm": 0.2838342098305543, "learning_rate": 5.797216984756357e-06, "loss": 0.0022, "step": 30810 }, { "epoch": 0.5042951812157408, "grad_norm": 0.12793453552653467, "learning_rate": 5.794397551846092e-06, "loss": 0.0023, "step": 30820 }, { "epoch": 0.5044588071668167, "grad_norm": 0.09883559668496325, "learning_rate": 5.791577859777676e-06, "loss": 0.0037, "step": 30830 }, { "epoch": 0.5046224331178925, "grad_norm": 0.4943226875179745, "learning_rate": 5.78875790947098e-06, "loss": 0.0031, "step": 30840 }, { "epoch": 0.5047860590689683, "grad_norm": 0.1887230231511555, "learning_rate": 5.785937701845966e-06, "loss": 0.0027, "step": 30850 }, { "epoch": 0.5049496850200442, "grad_norm": 0.16701855268765975, "learning_rate": 5.783117237822676e-06, "loss": 0.0042, "step": 30860 }, { "epoch": 0.50511331097112, "grad_norm": 0.217177156739254, "learning_rate": 5.780296518321235e-06, "loss": 0.0032, "step": 30870 }, { "epoch": 0.5052769369221959, "grad_norm": 0.39187331233316197, "learning_rate": 5.777475544261857e-06, "loss": 0.0037, "step": 30880 }, { "epoch": 0.5054405628732717, "grad_norm": 0.14065489905819717, "learning_rate": 5.77465431656483e-06, "loss": 0.0027, "step": 30890 }, { "epoch": 0.5056041888243475, "grad_norm": 0.11569206773931066, "learning_rate": 5.771832836150532e-06, "loss": 0.0034, "step": 30900 }, { "epoch": 0.5057678147754234, "grad_norm": 0.28828213040861433, "learning_rate": 5.769011103939422e-06, "loss": 0.0032, "step": 30910 }, { "epoch": 0.5059314407264992, "grad_norm": 0.3013538155415872, "learning_rate": 5.766189120852037e-06, "loss": 0.0023, "step": 30920 }, { "epoch": 0.5060950666775751, "grad_norm": 0.13195666409540505, "learning_rate": 5.7633668878090024e-06, "loss": 0.0029, "step": 30930 }, { "epoch": 0.5062586926286509, "grad_norm": 0.069755959637198, "learning_rate": 5.760544405731021e-06, "loss": 0.0023, "step": 30940 }, { "epoch": 0.5064223185797267, "grad_norm": 0.31627300981273365, "learning_rate": 5.757721675538876e-06, "loss": 0.0031, "step": 30950 }, { "epoch": 0.5065859445308026, "grad_norm": 0.18569620792723815, "learning_rate": 5.754898698153435e-06, "loss": 0.0022, "step": 30960 }, { "epoch": 0.5067495704818784, "grad_norm": 0.22106776794500202, "learning_rate": 5.752075474495646e-06, "loss": 0.0025, "step": 30970 }, { "epoch": 0.5069131964329543, "grad_norm": 0.13399851166904211, "learning_rate": 5.749252005486531e-06, "loss": 0.0031, "step": 30980 }, { "epoch": 0.5070768223840301, "grad_norm": 0.1318008389377712, "learning_rate": 5.7464282920472e-06, "loss": 0.0045, "step": 30990 }, { "epoch": 0.507240448335106, "grad_norm": 0.12848614648887258, "learning_rate": 5.74360433509884e-06, "loss": 0.0036, "step": 31000 }, { "epoch": 0.5074040742861818, "grad_norm": 0.135060505859908, "learning_rate": 5.740780135562716e-06, "loss": 0.0022, "step": 31010 }, { "epoch": 0.5075677002372576, "grad_norm": 0.40443599113234835, "learning_rate": 5.737955694360173e-06, "loss": 0.005, "step": 31020 }, { "epoch": 0.5077313261883335, "grad_norm": 0.16996645715037323, "learning_rate": 5.7351310124126344e-06, "loss": 0.0031, "step": 31030 }, { "epoch": 0.5078949521394093, "grad_norm": 0.213626336256897, "learning_rate": 5.732306090641604e-06, "loss": 0.0059, "step": 31040 }, { "epoch": 0.5080585780904852, "grad_norm": 0.4174294523480365, "learning_rate": 5.729480929968664e-06, "loss": 0.0028, "step": 31050 }, { "epoch": 0.508222204041561, "grad_norm": 0.20896201209299195, "learning_rate": 5.726655531315468e-06, "loss": 0.0048, "step": 31060 }, { "epoch": 0.5083858299926368, "grad_norm": 0.21591243393165307, "learning_rate": 5.723829895603758e-06, "loss": 0.0052, "step": 31070 }, { "epoch": 0.5085494559437127, "grad_norm": 0.20850838065178728, "learning_rate": 5.721004023755346e-06, "loss": 0.0027, "step": 31080 }, { "epoch": 0.5087130818947885, "grad_norm": 0.09051915422380967, "learning_rate": 5.71817791669212e-06, "loss": 0.0036, "step": 31090 }, { "epoch": 0.5088767078458644, "grad_norm": 0.10766145060043082, "learning_rate": 5.71535157533605e-06, "loss": 0.0023, "step": 31100 }, { "epoch": 0.5090403337969402, "grad_norm": 0.2917857138845481, "learning_rate": 5.712525000609179e-06, "loss": 0.0048, "step": 31110 }, { "epoch": 0.509203959748016, "grad_norm": 0.0639142994402491, "learning_rate": 5.709698193433628e-06, "loss": 0.0061, "step": 31120 }, { "epoch": 0.5093675856990919, "grad_norm": 0.4365875913618581, "learning_rate": 5.706871154731592e-06, "loss": 0.0038, "step": 31130 }, { "epoch": 0.5095312116501677, "grad_norm": 0.14276256643303722, "learning_rate": 5.704043885425343e-06, "loss": 0.0038, "step": 31140 }, { "epoch": 0.5096948376012436, "grad_norm": 0.12471012400758794, "learning_rate": 5.701216386437227e-06, "loss": 0.002, "step": 31150 }, { "epoch": 0.5098584635523195, "grad_norm": 0.26905194375019686, "learning_rate": 5.698388658689667e-06, "loss": 0.0026, "step": 31160 }, { "epoch": 0.5100220895033952, "grad_norm": 0.13676870817226802, "learning_rate": 5.695560703105156e-06, "loss": 0.0035, "step": 31170 }, { "epoch": 0.5101857154544711, "grad_norm": 0.3097708675748771, "learning_rate": 5.6927325206062655e-06, "loss": 0.0021, "step": 31180 }, { "epoch": 0.5103493414055469, "grad_norm": 0.09127883981909614, "learning_rate": 5.689904112115643e-06, "loss": 0.0026, "step": 31190 }, { "epoch": 0.5105129673566228, "grad_norm": 0.1853852710904266, "learning_rate": 5.687075478556005e-06, "loss": 0.0039, "step": 31200 }, { "epoch": 0.5106765933076987, "grad_norm": 0.10094171984173658, "learning_rate": 5.684246620850141e-06, "loss": 0.0023, "step": 31210 }, { "epoch": 0.5108402192587744, "grad_norm": 0.09760976915300021, "learning_rate": 5.681417539920919e-06, "loss": 0.0028, "step": 31220 }, { "epoch": 0.5110038452098503, "grad_norm": 0.08920536898318436, "learning_rate": 5.678588236691276e-06, "loss": 0.0027, "step": 31230 }, { "epoch": 0.5111674711609261, "grad_norm": 0.17848121080390708, "learning_rate": 5.67575871208422e-06, "loss": 0.0036, "step": 31240 }, { "epoch": 0.511331097112002, "grad_norm": 0.09645307961795557, "learning_rate": 5.672928967022835e-06, "loss": 0.0025, "step": 31250 }, { "epoch": 0.5114947230630777, "grad_norm": 0.13369272767976886, "learning_rate": 5.670099002430276e-06, "loss": 0.0024, "step": 31260 }, { "epoch": 0.5116583490141536, "grad_norm": 0.13316919562347096, "learning_rate": 5.667268819229767e-06, "loss": 0.0037, "step": 31270 }, { "epoch": 0.5118219749652295, "grad_norm": 0.209351912947307, "learning_rate": 5.664438418344607e-06, "loss": 0.0027, "step": 31280 }, { "epoch": 0.5119856009163053, "grad_norm": 0.06621207763966933, "learning_rate": 5.661607800698162e-06, "loss": 0.0025, "step": 31290 }, { "epoch": 0.5121492268673812, "grad_norm": 0.21419253463821036, "learning_rate": 5.658776967213872e-06, "loss": 0.0038, "step": 31300 }, { "epoch": 0.512312852818457, "grad_norm": 0.16431342556027037, "learning_rate": 5.65594591881525e-06, "loss": 0.0033, "step": 31310 }, { "epoch": 0.5124764787695328, "grad_norm": 0.1560009045206467, "learning_rate": 5.6531146564258695e-06, "loss": 0.0039, "step": 31320 }, { "epoch": 0.5126401047206087, "grad_norm": 0.19634692794600694, "learning_rate": 5.650283180969384e-06, "loss": 0.0039, "step": 31330 }, { "epoch": 0.5128037306716845, "grad_norm": 0.2277691507722992, "learning_rate": 5.64745149336951e-06, "loss": 0.003, "step": 31340 }, { "epoch": 0.5129673566227604, "grad_norm": 0.2114546235807009, "learning_rate": 5.644619594550036e-06, "loss": 0.004, "step": 31350 }, { "epoch": 0.5131309825738362, "grad_norm": 0.1879014501585944, "learning_rate": 5.6417874854348194e-06, "loss": 0.006, "step": 31360 }, { "epoch": 0.513294608524912, "grad_norm": 0.20446875146640012, "learning_rate": 5.638955166947786e-06, "loss": 0.0054, "step": 31370 }, { "epoch": 0.5134582344759879, "grad_norm": 0.11165868664950125, "learning_rate": 5.636122640012928e-06, "loss": 0.0034, "step": 31380 }, { "epoch": 0.5136218604270637, "grad_norm": 0.1652247375133814, "learning_rate": 5.63328990555431e-06, "loss": 0.0021, "step": 31390 }, { "epoch": 0.5137854863781396, "grad_norm": 0.6646531052235036, "learning_rate": 5.63045696449606e-06, "loss": 0.0041, "step": 31400 }, { "epoch": 0.5139491123292154, "grad_norm": 0.12452814759001027, "learning_rate": 5.6276238177623724e-06, "loss": 0.0024, "step": 31410 }, { "epoch": 0.5141127382802912, "grad_norm": 0.07746065000798537, "learning_rate": 5.624790466277516e-06, "loss": 0.005, "step": 31420 }, { "epoch": 0.5142763642313671, "grad_norm": 0.0750660248586226, "learning_rate": 5.621956910965818e-06, "loss": 0.0035, "step": 31430 }, { "epoch": 0.5144399901824429, "grad_norm": 0.24576498868817262, "learning_rate": 5.619123152751677e-06, "loss": 0.0023, "step": 31440 }, { "epoch": 0.5146036161335188, "grad_norm": 0.1545274315863196, "learning_rate": 5.616289192559556e-06, "loss": 0.0044, "step": 31450 }, { "epoch": 0.5147672420845946, "grad_norm": 0.13503356061505511, "learning_rate": 5.613455031313987e-06, "loss": 0.0041, "step": 31460 }, { "epoch": 0.5149308680356705, "grad_norm": 0.2873909660629769, "learning_rate": 5.610620669939561e-06, "loss": 0.0036, "step": 31470 }, { "epoch": 0.5150944939867463, "grad_norm": 0.17793425866920332, "learning_rate": 5.60778610936094e-06, "loss": 0.0049, "step": 31480 }, { "epoch": 0.5152581199378221, "grad_norm": 0.08700013255323455, "learning_rate": 5.60495135050285e-06, "loss": 0.01, "step": 31490 }, { "epoch": 0.515421745888898, "grad_norm": 0.3682165078819258, "learning_rate": 5.60211639429008e-06, "loss": 0.0047, "step": 31500 }, { "epoch": 0.5155853718399738, "grad_norm": 0.12089238214760419, "learning_rate": 5.599281241647484e-06, "loss": 0.0031, "step": 31510 }, { "epoch": 0.5157489977910497, "grad_norm": 0.37900731566690254, "learning_rate": 5.596445893499982e-06, "loss": 0.0029, "step": 31520 }, { "epoch": 0.5159126237421255, "grad_norm": 0.44071421310508796, "learning_rate": 5.593610350772556e-06, "loss": 0.0035, "step": 31530 }, { "epoch": 0.5160762496932013, "grad_norm": 0.28480737892729513, "learning_rate": 5.590774614390249e-06, "loss": 0.0025, "step": 31540 }, { "epoch": 0.5162398756442772, "grad_norm": 0.2563751400558066, "learning_rate": 5.587938685278172e-06, "loss": 0.0043, "step": 31550 }, { "epoch": 0.516403501595353, "grad_norm": 0.11935051150569086, "learning_rate": 5.585102564361499e-06, "loss": 0.0065, "step": 31560 }, { "epoch": 0.5165671275464289, "grad_norm": 0.17922696191171508, "learning_rate": 5.582266252565458e-06, "loss": 0.0024, "step": 31570 }, { "epoch": 0.5167307534975047, "grad_norm": 0.08469500777436423, "learning_rate": 5.579429750815351e-06, "loss": 0.0057, "step": 31580 }, { "epoch": 0.5168943794485805, "grad_norm": 0.3688162466844152, "learning_rate": 5.576593060036537e-06, "loss": 0.0044, "step": 31590 }, { "epoch": 0.5170580053996564, "grad_norm": 0.1670144960303306, "learning_rate": 5.57375618115443e-06, "loss": 0.0043, "step": 31600 }, { "epoch": 0.5172216313507322, "grad_norm": 0.22742852578415487, "learning_rate": 5.570919115094518e-06, "loss": 0.0048, "step": 31610 }, { "epoch": 0.5173852573018081, "grad_norm": 0.40427429575846835, "learning_rate": 5.568081862782342e-06, "loss": 0.0033, "step": 31620 }, { "epoch": 0.517548883252884, "grad_norm": 0.06370511116338798, "learning_rate": 5.565244425143504e-06, "loss": 0.0039, "step": 31630 }, { "epoch": 0.5177125092039597, "grad_norm": 0.16427213215346223, "learning_rate": 5.562406803103668e-06, "loss": 0.0031, "step": 31640 }, { "epoch": 0.5178761351550356, "grad_norm": 0.09402786959017584, "learning_rate": 5.559568997588558e-06, "loss": 0.0028, "step": 31650 }, { "epoch": 0.5180397611061114, "grad_norm": 0.36332542976726234, "learning_rate": 5.556731009523959e-06, "loss": 0.0033, "step": 31660 }, { "epoch": 0.5182033870571873, "grad_norm": 0.19619561151328846, "learning_rate": 5.553892839835714e-06, "loss": 0.0031, "step": 31670 }, { "epoch": 0.5183670130082632, "grad_norm": 0.09720159035212074, "learning_rate": 5.551054489449725e-06, "loss": 0.0037, "step": 31680 }, { "epoch": 0.5185306389593389, "grad_norm": 0.3377310656945975, "learning_rate": 5.548215959291954e-06, "loss": 0.0027, "step": 31690 }, { "epoch": 0.5186942649104148, "grad_norm": 0.11641622347522938, "learning_rate": 5.545377250288422e-06, "loss": 0.0023, "step": 31700 }, { "epoch": 0.5188578908614906, "grad_norm": 0.06900096406129809, "learning_rate": 5.542538363365208e-06, "loss": 0.0037, "step": 31710 }, { "epoch": 0.5190215168125665, "grad_norm": 0.13304384519839824, "learning_rate": 5.539699299448444e-06, "loss": 0.0041, "step": 31720 }, { "epoch": 0.5191851427636424, "grad_norm": 0.06589612568895965, "learning_rate": 5.53686005946433e-06, "loss": 0.0024, "step": 31730 }, { "epoch": 0.5193487687147181, "grad_norm": 0.20920091198196153, "learning_rate": 5.534020644339118e-06, "loss": 0.0025, "step": 31740 }, { "epoch": 0.519512394665794, "grad_norm": 0.27992363186227776, "learning_rate": 5.5311810549991115e-06, "loss": 0.0038, "step": 31750 }, { "epoch": 0.5196760206168698, "grad_norm": 0.14871714870407046, "learning_rate": 5.5283412923706815e-06, "loss": 0.0029, "step": 31760 }, { "epoch": 0.5198396465679457, "grad_norm": 0.16493032172596309, "learning_rate": 5.525501357380247e-06, "loss": 0.0023, "step": 31770 }, { "epoch": 0.5200032725190216, "grad_norm": 0.16648262941400113, "learning_rate": 5.522661250954289e-06, "loss": 0.004, "step": 31780 }, { "epoch": 0.5201668984700973, "grad_norm": 0.3121100438063519, "learning_rate": 5.519820974019341e-06, "loss": 0.0055, "step": 31790 }, { "epoch": 0.5203305244211732, "grad_norm": 0.19435441150776234, "learning_rate": 5.516980527501994e-06, "loss": 0.0031, "step": 31800 }, { "epoch": 0.520494150372249, "grad_norm": 0.11350818341588174, "learning_rate": 5.51413991232889e-06, "loss": 0.0024, "step": 31810 }, { "epoch": 0.5206577763233249, "grad_norm": 0.1418992813321056, "learning_rate": 5.511299129426735e-06, "loss": 0.004, "step": 31820 }, { "epoch": 0.5208214022744008, "grad_norm": 0.09775767424980518, "learning_rate": 5.508458179722279e-06, "loss": 0.0033, "step": 31830 }, { "epoch": 0.5209850282254765, "grad_norm": 0.124817127282726, "learning_rate": 5.505617064142336e-06, "loss": 0.0027, "step": 31840 }, { "epoch": 0.5211486541765524, "grad_norm": 0.4482925886691343, "learning_rate": 5.502775783613765e-06, "loss": 0.0032, "step": 31850 }, { "epoch": 0.5213122801276282, "grad_norm": 0.18621437567168356, "learning_rate": 5.4999343390634875e-06, "loss": 0.0022, "step": 31860 }, { "epoch": 0.5214759060787041, "grad_norm": 0.23038293817958985, "learning_rate": 5.497092731418473e-06, "loss": 0.0044, "step": 31870 }, { "epoch": 0.52163953202978, "grad_norm": 0.23885597895309504, "learning_rate": 5.494250961605745e-06, "loss": 0.004, "step": 31880 }, { "epoch": 0.5218031579808557, "grad_norm": 0.13655926035999974, "learning_rate": 5.491409030552383e-06, "loss": 0.0028, "step": 31890 }, { "epoch": 0.5219667839319316, "grad_norm": 0.04216317177801863, "learning_rate": 5.488566939185514e-06, "loss": 0.0032, "step": 31900 }, { "epoch": 0.5221304098830074, "grad_norm": 0.2383644432125547, "learning_rate": 5.48572468843232e-06, "loss": 0.0036, "step": 31910 }, { "epoch": 0.5222940358340833, "grad_norm": 0.14568711990382036, "learning_rate": 5.482882279220039e-06, "loss": 0.0023, "step": 31920 }, { "epoch": 0.5224576617851592, "grad_norm": 0.07362329808097236, "learning_rate": 5.480039712475953e-06, "loss": 0.0024, "step": 31930 }, { "epoch": 0.522621287736235, "grad_norm": 0.1671837891244254, "learning_rate": 5.4771969891274e-06, "loss": 0.0022, "step": 31940 }, { "epoch": 0.5227849136873108, "grad_norm": 0.15838017118807296, "learning_rate": 5.474354110101769e-06, "loss": 0.0035, "step": 31950 }, { "epoch": 0.5229485396383866, "grad_norm": 0.38880547054207254, "learning_rate": 5.471511076326498e-06, "loss": 0.0032, "step": 31960 }, { "epoch": 0.5231121655894625, "grad_norm": 0.030920400618432743, "learning_rate": 5.468667888729077e-06, "loss": 0.0019, "step": 31970 }, { "epoch": 0.5232757915405384, "grad_norm": 0.31725380635774564, "learning_rate": 5.4658245482370455e-06, "loss": 0.003, "step": 31980 }, { "epoch": 0.5234394174916142, "grad_norm": 0.10378538611670388, "learning_rate": 5.462981055777992e-06, "loss": 0.0024, "step": 31990 }, { "epoch": 0.52360304344269, "grad_norm": 0.13202900436006346, "learning_rate": 5.460137412279558e-06, "loss": 0.0015, "step": 32000 }, { "epoch": 0.5237666693937658, "grad_norm": 0.18499624840034487, "learning_rate": 5.457293618669431e-06, "loss": 0.0029, "step": 32010 }, { "epoch": 0.5239302953448417, "grad_norm": 0.14631558094745362, "learning_rate": 5.454449675875346e-06, "loss": 0.0031, "step": 32020 }, { "epoch": 0.5240939212959176, "grad_norm": 0.23860297779401496, "learning_rate": 5.4516055848250935e-06, "loss": 0.0042, "step": 32030 }, { "epoch": 0.5242575472469934, "grad_norm": 0.34970092659224294, "learning_rate": 5.448761346446505e-06, "loss": 0.004, "step": 32040 }, { "epoch": 0.5244211731980692, "grad_norm": 0.16857497169223268, "learning_rate": 5.445916961667463e-06, "loss": 0.0027, "step": 32050 }, { "epoch": 0.524584799149145, "grad_norm": 0.24628985800494357, "learning_rate": 5.4430724314158975e-06, "loss": 0.0035, "step": 32060 }, { "epoch": 0.5247484251002209, "grad_norm": 0.3172309700172557, "learning_rate": 5.440227756619788e-06, "loss": 0.0045, "step": 32070 }, { "epoch": 0.5249120510512968, "grad_norm": 0.16941070640265618, "learning_rate": 5.437382938207157e-06, "loss": 0.0025, "step": 32080 }, { "epoch": 0.5250756770023726, "grad_norm": 0.3504327893132381, "learning_rate": 5.434537977106078e-06, "loss": 0.0021, "step": 32090 }, { "epoch": 0.5252393029534485, "grad_norm": 0.09708043060019855, "learning_rate": 5.43169287424467e-06, "loss": 0.0033, "step": 32100 }, { "epoch": 0.5254029289045242, "grad_norm": 0.11955537377432242, "learning_rate": 5.428847630551093e-06, "loss": 0.0021, "step": 32110 }, { "epoch": 0.5255665548556001, "grad_norm": 0.16419746435046614, "learning_rate": 5.4260022469535635e-06, "loss": 0.0026, "step": 32120 }, { "epoch": 0.5257301808066759, "grad_norm": 0.19862634391131348, "learning_rate": 5.423156724380334e-06, "loss": 0.0053, "step": 32130 }, { "epoch": 0.5258938067577518, "grad_norm": 0.24748169031056463, "learning_rate": 5.420311063759704e-06, "loss": 0.0033, "step": 32140 }, { "epoch": 0.5260574327088277, "grad_norm": 0.03293657902841861, "learning_rate": 5.417465266020027e-06, "loss": 0.0015, "step": 32150 }, { "epoch": 0.5262210586599034, "grad_norm": 0.22575355482585527, "learning_rate": 5.4146193320896864e-06, "loss": 0.0031, "step": 32160 }, { "epoch": 0.5263846846109793, "grad_norm": 0.33363219636371877, "learning_rate": 5.411773262897122e-06, "loss": 0.0035, "step": 32170 }, { "epoch": 0.5265483105620551, "grad_norm": 0.13360375267772587, "learning_rate": 5.408927059370813e-06, "loss": 0.0046, "step": 32180 }, { "epoch": 0.526711936513131, "grad_norm": 0.0537121976658464, "learning_rate": 5.406080722439283e-06, "loss": 0.0032, "step": 32190 }, { "epoch": 0.5268755624642069, "grad_norm": 0.24586689840729387, "learning_rate": 5.403234253031098e-06, "loss": 0.0046, "step": 32200 }, { "epoch": 0.5270391884152826, "grad_norm": 0.14156560396243956, "learning_rate": 5.40038765207487e-06, "loss": 0.0024, "step": 32210 }, { "epoch": 0.5272028143663585, "grad_norm": 0.24342417836894994, "learning_rate": 5.39754092049925e-06, "loss": 0.003, "step": 32220 }, { "epoch": 0.5273664403174343, "grad_norm": 0.31095122634654004, "learning_rate": 5.394694059232936e-06, "loss": 0.0036, "step": 32230 }, { "epoch": 0.5275300662685102, "grad_norm": 0.09999622296454513, "learning_rate": 5.3918470692046645e-06, "loss": 0.0017, "step": 32240 }, { "epoch": 0.5276936922195861, "grad_norm": 0.20965542748728558, "learning_rate": 5.388999951343217e-06, "loss": 0.0026, "step": 32250 }, { "epoch": 0.5278573181706618, "grad_norm": 0.27259757963727593, "learning_rate": 5.386152706577414e-06, "loss": 0.0032, "step": 32260 }, { "epoch": 0.5280209441217377, "grad_norm": 0.19015882541433954, "learning_rate": 5.383305335836121e-06, "loss": 0.0043, "step": 32270 }, { "epoch": 0.5281845700728135, "grad_norm": 0.047006709635032125, "learning_rate": 5.38045784004824e-06, "loss": 0.004, "step": 32280 }, { "epoch": 0.5283481960238894, "grad_norm": 0.1301786403627152, "learning_rate": 5.377610220142716e-06, "loss": 0.0028, "step": 32290 }, { "epoch": 0.5285118219749653, "grad_norm": 0.31751720324993454, "learning_rate": 5.374762477048537e-06, "loss": 0.004, "step": 32300 }, { "epoch": 0.528675447926041, "grad_norm": 0.3061096890655228, "learning_rate": 5.371914611694726e-06, "loss": 0.0042, "step": 32310 }, { "epoch": 0.5288390738771169, "grad_norm": 0.08582772403980092, "learning_rate": 5.369066625010352e-06, "loss": 0.0029, "step": 32320 }, { "epoch": 0.5290026998281927, "grad_norm": 0.15411337365355393, "learning_rate": 5.3662185179245175e-06, "loss": 0.0044, "step": 32330 }, { "epoch": 0.5291663257792686, "grad_norm": 0.11775395357718962, "learning_rate": 5.363370291366367e-06, "loss": 0.0046, "step": 32340 }, { "epoch": 0.5293299517303445, "grad_norm": 0.19408392170722583, "learning_rate": 5.360521946265087e-06, "loss": 0.0031, "step": 32350 }, { "epoch": 0.5294935776814202, "grad_norm": 0.022966091151615253, "learning_rate": 5.357673483549896e-06, "loss": 0.0017, "step": 32360 }, { "epoch": 0.5296572036324961, "grad_norm": 0.21200362027542247, "learning_rate": 5.3548249041500575e-06, "loss": 0.0032, "step": 32370 }, { "epoch": 0.5298208295835719, "grad_norm": 0.14168072438362506, "learning_rate": 5.351976208994869e-06, "loss": 0.0038, "step": 32380 }, { "epoch": 0.5299844555346478, "grad_norm": 0.09675837115436164, "learning_rate": 5.349127399013666e-06, "loss": 0.0043, "step": 32390 }, { "epoch": 0.5301480814857237, "grad_norm": 0.1832838640122271, "learning_rate": 5.346278475135824e-06, "loss": 0.0029, "step": 32400 }, { "epoch": 0.5303117074367995, "grad_norm": 0.21587723371828704, "learning_rate": 5.343429438290754e-06, "loss": 0.0024, "step": 32410 }, { "epoch": 0.5304753333878753, "grad_norm": 0.21866364162163882, "learning_rate": 5.3405802894079016e-06, "loss": 0.0041, "step": 32420 }, { "epoch": 0.5306389593389511, "grad_norm": 0.2158061676519111, "learning_rate": 5.337731029416754e-06, "loss": 0.002, "step": 32430 }, { "epoch": 0.530802585290027, "grad_norm": 0.06003821699663223, "learning_rate": 5.334881659246831e-06, "loss": 0.0038, "step": 32440 }, { "epoch": 0.5309662112411029, "grad_norm": 0.17315046602738104, "learning_rate": 5.3320321798276874e-06, "loss": 0.0039, "step": 32450 }, { "epoch": 0.5311298371921787, "grad_norm": 0.11690745806499225, "learning_rate": 5.329182592088918e-06, "loss": 0.0041, "step": 32460 }, { "epoch": 0.5312934631432545, "grad_norm": 0.38057251490459354, "learning_rate": 5.32633289696015e-06, "loss": 0.0039, "step": 32470 }, { "epoch": 0.5314570890943303, "grad_norm": 0.13089402350070317, "learning_rate": 5.323483095371045e-06, "loss": 0.0038, "step": 32480 }, { "epoch": 0.5316207150454062, "grad_norm": 0.11504406830203273, "learning_rate": 5.320633188251302e-06, "loss": 0.0025, "step": 32490 }, { "epoch": 0.5317843409964821, "grad_norm": 0.29247749249446015, "learning_rate": 5.3177831765306485e-06, "loss": 0.0042, "step": 32500 }, { "epoch": 0.5319479669475579, "grad_norm": 0.1486389625634579, "learning_rate": 5.314933061138855e-06, "loss": 0.003, "step": 32510 }, { "epoch": 0.5321115928986337, "grad_norm": 0.11432697436806108, "learning_rate": 5.312082843005723e-06, "loss": 0.004, "step": 32520 }, { "epoch": 0.5322752188497095, "grad_norm": 0.15569108730077688, "learning_rate": 5.309232523061079e-06, "loss": 0.0044, "step": 32530 }, { "epoch": 0.5324388448007854, "grad_norm": 0.5086813064898466, "learning_rate": 5.306382102234793e-06, "loss": 0.0041, "step": 32540 }, { "epoch": 0.5326024707518613, "grad_norm": 0.04862547931235565, "learning_rate": 5.303531581456767e-06, "loss": 0.0038, "step": 32550 }, { "epoch": 0.5327660967029371, "grad_norm": 0.2553986481414818, "learning_rate": 5.300680961656928e-06, "loss": 0.0036, "step": 32560 }, { "epoch": 0.532929722654013, "grad_norm": 0.05273455354864918, "learning_rate": 5.297830243765243e-06, "loss": 0.0021, "step": 32570 }, { "epoch": 0.5330933486050887, "grad_norm": 0.284042405125996, "learning_rate": 5.294979428711708e-06, "loss": 0.0027, "step": 32580 }, { "epoch": 0.5332569745561646, "grad_norm": 0.06671907906860931, "learning_rate": 5.292128517426351e-06, "loss": 0.0024, "step": 32590 }, { "epoch": 0.5334206005072405, "grad_norm": 0.1413664959204848, "learning_rate": 5.289277510839232e-06, "loss": 0.0021, "step": 32600 }, { "epoch": 0.5335842264583163, "grad_norm": 0.194005344841848, "learning_rate": 5.286426409880442e-06, "loss": 0.0031, "step": 32610 }, { "epoch": 0.5337478524093922, "grad_norm": 0.10959839605150984, "learning_rate": 5.283575215480099e-06, "loss": 0.0035, "step": 32620 }, { "epoch": 0.5339114783604679, "grad_norm": 0.13946966142624195, "learning_rate": 5.280723928568359e-06, "loss": 0.0027, "step": 32630 }, { "epoch": 0.5340751043115438, "grad_norm": 0.042340973387254746, "learning_rate": 5.2778725500754e-06, "loss": 0.002, "step": 32640 }, { "epoch": 0.5342387302626197, "grad_norm": 0.09736978612490262, "learning_rate": 5.275021080931436e-06, "loss": 0.0023, "step": 32650 }, { "epoch": 0.5344023562136955, "grad_norm": 0.17673280573270805, "learning_rate": 5.272169522066709e-06, "loss": 0.0022, "step": 32660 }, { "epoch": 0.5345659821647714, "grad_norm": 0.29721778957767603, "learning_rate": 5.2693178744114885e-06, "loss": 0.0019, "step": 32670 }, { "epoch": 0.5347296081158471, "grad_norm": 0.20032287859769843, "learning_rate": 5.266466138896073e-06, "loss": 0.0021, "step": 32680 }, { "epoch": 0.534893234066923, "grad_norm": 0.19603243451910135, "learning_rate": 5.263614316450793e-06, "loss": 0.0021, "step": 32690 }, { "epoch": 0.5350568600179989, "grad_norm": 0.26549289908160717, "learning_rate": 5.260762408006005e-06, "loss": 0.0024, "step": 32700 }, { "epoch": 0.5352204859690747, "grad_norm": 0.12324822462546113, "learning_rate": 5.257910414492091e-06, "loss": 0.0018, "step": 32710 }, { "epoch": 0.5353841119201506, "grad_norm": 0.34276891316117636, "learning_rate": 5.255058336839467e-06, "loss": 0.0025, "step": 32720 }, { "epoch": 0.5355477378712263, "grad_norm": 0.2577683840495486, "learning_rate": 5.25220617597857e-06, "loss": 0.0035, "step": 32730 }, { "epoch": 0.5357113638223022, "grad_norm": 0.2875364766500727, "learning_rate": 5.249353932839869e-06, "loss": 0.0027, "step": 32740 }, { "epoch": 0.5358749897733781, "grad_norm": 0.12128678196184674, "learning_rate": 5.246501608353857e-06, "loss": 0.0028, "step": 32750 }, { "epoch": 0.5360386157244539, "grad_norm": 0.13333318369324978, "learning_rate": 5.243649203451055e-06, "loss": 0.0036, "step": 32760 }, { "epoch": 0.5362022416755298, "grad_norm": 0.10917143885680675, "learning_rate": 5.240796719062008e-06, "loss": 0.0032, "step": 32770 }, { "epoch": 0.5363658676266055, "grad_norm": 0.12232298157681375, "learning_rate": 5.237944156117292e-06, "loss": 0.007, "step": 32780 }, { "epoch": 0.5365294935776814, "grad_norm": 0.056364958576616234, "learning_rate": 5.235091515547502e-06, "loss": 0.0018, "step": 32790 }, { "epoch": 0.5366931195287573, "grad_norm": 0.0502022622744375, "learning_rate": 5.232238798283261e-06, "loss": 0.0026, "step": 32800 }, { "epoch": 0.5368567454798331, "grad_norm": 0.15367439161022073, "learning_rate": 5.2293860052552206e-06, "loss": 0.005, "step": 32810 }, { "epoch": 0.537020371430909, "grad_norm": 0.0701227352220075, "learning_rate": 5.226533137394053e-06, "loss": 0.0028, "step": 32820 }, { "epoch": 0.5371839973819847, "grad_norm": 0.2674614758520082, "learning_rate": 5.223680195630455e-06, "loss": 0.0028, "step": 32830 }, { "epoch": 0.5373476233330606, "grad_norm": 0.07660975046700641, "learning_rate": 5.220827180895149e-06, "loss": 0.0033, "step": 32840 }, { "epoch": 0.5375112492841365, "grad_norm": 0.28556638746886626, "learning_rate": 5.21797409411888e-06, "loss": 0.0021, "step": 32850 }, { "epoch": 0.5376748752352123, "grad_norm": 0.10768165428977265, "learning_rate": 5.215120936232418e-06, "loss": 0.0023, "step": 32860 }, { "epoch": 0.5378385011862882, "grad_norm": 0.24504134439443845, "learning_rate": 5.212267708166555e-06, "loss": 0.004, "step": 32870 }, { "epoch": 0.538002127137364, "grad_norm": 0.18686983849963693, "learning_rate": 5.209414410852106e-06, "loss": 0.0031, "step": 32880 }, { "epoch": 0.5381657530884398, "grad_norm": 0.06830813591743791, "learning_rate": 5.20656104521991e-06, "loss": 0.0024, "step": 32890 }, { "epoch": 0.5383293790395157, "grad_norm": 0.187470766627642, "learning_rate": 5.203707612200824e-06, "loss": 0.0041, "step": 32900 }, { "epoch": 0.5384930049905915, "grad_norm": 0.17047553536809082, "learning_rate": 5.200854112725733e-06, "loss": 0.002, "step": 32910 }, { "epoch": 0.5386566309416674, "grad_norm": 0.24568116847062507, "learning_rate": 5.19800054772554e-06, "loss": 0.0025, "step": 32920 }, { "epoch": 0.5388202568927432, "grad_norm": 0.10954059333396925, "learning_rate": 5.195146918131169e-06, "loss": 0.0042, "step": 32930 }, { "epoch": 0.538983882843819, "grad_norm": 0.2941907908846616, "learning_rate": 5.192293224873568e-06, "loss": 0.0043, "step": 32940 }, { "epoch": 0.5391475087948949, "grad_norm": 0.1971298247797568, "learning_rate": 5.189439468883704e-06, "loss": 0.0025, "step": 32950 }, { "epoch": 0.5393111347459707, "grad_norm": 0.04797655456154884, "learning_rate": 5.1865856510925605e-06, "loss": 0.0019, "step": 32960 }, { "epoch": 0.5394747606970466, "grad_norm": 0.1638415827656726, "learning_rate": 5.183731772431151e-06, "loss": 0.0035, "step": 32970 }, { "epoch": 0.5396383866481224, "grad_norm": 0.22496829683090672, "learning_rate": 5.180877833830499e-06, "loss": 0.0025, "step": 32980 }, { "epoch": 0.5398020125991982, "grad_norm": 0.17983115314668138, "learning_rate": 5.178023836221651e-06, "loss": 0.0026, "step": 32990 }, { "epoch": 0.539965638550274, "grad_norm": 0.26245625000853895, "learning_rate": 5.175169780535677e-06, "loss": 0.0028, "step": 33000 }, { "epoch": 0.5401292645013499, "grad_norm": 0.6596782126225295, "learning_rate": 5.172315667703658e-06, "loss": 0.0048, "step": 33010 }, { "epoch": 0.5402928904524258, "grad_norm": 0.21311748822093782, "learning_rate": 5.1694614986567005e-06, "loss": 0.0025, "step": 33020 }, { "epoch": 0.5404565164035016, "grad_norm": 0.11242835083113199, "learning_rate": 5.166607274325928e-06, "loss": 0.0041, "step": 33030 }, { "epoch": 0.5406201423545774, "grad_norm": 0.10927477790466357, "learning_rate": 5.163752995642476e-06, "loss": 0.0028, "step": 33040 }, { "epoch": 0.5407837683056532, "grad_norm": 0.27271916460538836, "learning_rate": 5.160898663537505e-06, "loss": 0.0022, "step": 33050 }, { "epoch": 0.5409473942567291, "grad_norm": 0.15440376123770488, "learning_rate": 5.158044278942192e-06, "loss": 0.0042, "step": 33060 }, { "epoch": 0.541111020207805, "grad_norm": 0.04384963533770578, "learning_rate": 5.155189842787727e-06, "loss": 0.0024, "step": 33070 }, { "epoch": 0.5412746461588808, "grad_norm": 0.09458686641321318, "learning_rate": 5.15233535600532e-06, "loss": 0.0035, "step": 33080 }, { "epoch": 0.5414382721099567, "grad_norm": 0.35165263126856267, "learning_rate": 5.149480819526199e-06, "loss": 0.0022, "step": 33090 }, { "epoch": 0.5416018980610324, "grad_norm": 0.1772141395168747, "learning_rate": 5.146626234281602e-06, "loss": 0.0031, "step": 33100 }, { "epoch": 0.5417655240121083, "grad_norm": 0.4200715322836401, "learning_rate": 5.143771601202789e-06, "loss": 0.003, "step": 33110 }, { "epoch": 0.5419291499631842, "grad_norm": 0.109457101425537, "learning_rate": 5.140916921221037e-06, "loss": 0.0037, "step": 33120 }, { "epoch": 0.54209277591426, "grad_norm": 0.1791063740353525, "learning_rate": 5.138062195267628e-06, "loss": 0.004, "step": 33130 }, { "epoch": 0.5422564018653359, "grad_norm": 0.12639814295352572, "learning_rate": 5.1352074242738705e-06, "loss": 0.0033, "step": 33140 }, { "epoch": 0.5424200278164116, "grad_norm": 0.5381983057446634, "learning_rate": 5.1323526091710826e-06, "loss": 0.0029, "step": 33150 }, { "epoch": 0.5425836537674875, "grad_norm": 0.20999813192750963, "learning_rate": 5.129497750890595e-06, "loss": 0.0042, "step": 33160 }, { "epoch": 0.5427472797185634, "grad_norm": 0.1734021565056439, "learning_rate": 5.126642850363758e-06, "loss": 0.003, "step": 33170 }, { "epoch": 0.5429109056696392, "grad_norm": 0.09173405918512315, "learning_rate": 5.1237879085219296e-06, "loss": 0.0026, "step": 33180 }, { "epoch": 0.5430745316207151, "grad_norm": 0.1798198313788904, "learning_rate": 5.120932926296486e-06, "loss": 0.0033, "step": 33190 }, { "epoch": 0.5432381575717908, "grad_norm": 0.28003916371376075, "learning_rate": 5.118077904618813e-06, "loss": 0.0027, "step": 33200 }, { "epoch": 0.5434017835228667, "grad_norm": 0.09071919941396615, "learning_rate": 5.115222844420312e-06, "loss": 0.0031, "step": 33210 }, { "epoch": 0.5435654094739426, "grad_norm": 0.43652298650022087, "learning_rate": 5.1123677466323966e-06, "loss": 0.0039, "step": 33220 }, { "epoch": 0.5437290354250184, "grad_norm": 0.1137658708628742, "learning_rate": 5.10951261218649e-06, "loss": 0.0014, "step": 33230 }, { "epoch": 0.5438926613760943, "grad_norm": 0.23871677391112014, "learning_rate": 5.106657442014031e-06, "loss": 0.0085, "step": 33240 }, { "epoch": 0.54405628732717, "grad_norm": 0.10758978587401452, "learning_rate": 5.103802237046467e-06, "loss": 0.0032, "step": 33250 }, { "epoch": 0.5442199132782459, "grad_norm": 0.24119290786921255, "learning_rate": 5.100946998215259e-06, "loss": 0.0038, "step": 33260 }, { "epoch": 0.5443835392293218, "grad_norm": 0.27739026732622596, "learning_rate": 5.09809172645188e-06, "loss": 0.0032, "step": 33270 }, { "epoch": 0.5445471651803976, "grad_norm": 0.1463538400044755, "learning_rate": 5.095236422687807e-06, "loss": 0.0021, "step": 33280 }, { "epoch": 0.5447107911314735, "grad_norm": 0.13556738603928378, "learning_rate": 5.092381087854538e-06, "loss": 0.0032, "step": 33290 }, { "epoch": 0.5448744170825492, "grad_norm": 0.28503955281491294, "learning_rate": 5.0895257228835715e-06, "loss": 0.0028, "step": 33300 }, { "epoch": 0.5450380430336251, "grad_norm": 0.01337841405799922, "learning_rate": 5.086670328706421e-06, "loss": 0.0049, "step": 33310 }, { "epoch": 0.545201668984701, "grad_norm": 0.12692851218008064, "learning_rate": 5.083814906254608e-06, "loss": 0.0014, "step": 33320 }, { "epoch": 0.5453652949357768, "grad_norm": 0.4945647827718782, "learning_rate": 5.080959456459666e-06, "loss": 0.0034, "step": 33330 }, { "epoch": 0.5455289208868527, "grad_norm": 0.05670437061252929, "learning_rate": 5.078103980253132e-06, "loss": 0.0052, "step": 33340 }, { "epoch": 0.5456925468379284, "grad_norm": 0.1615537427401772, "learning_rate": 5.075248478566555e-06, "loss": 0.0023, "step": 33350 }, { "epoch": 0.5458561727890043, "grad_norm": 0.11367964380546852, "learning_rate": 5.072392952331493e-06, "loss": 0.0014, "step": 33360 }, { "epoch": 0.5460197987400802, "grad_norm": 0.2324827051522824, "learning_rate": 5.069537402479511e-06, "loss": 0.0035, "step": 33370 }, { "epoch": 0.546183424691156, "grad_norm": 0.20424525843626234, "learning_rate": 5.06668182994218e-06, "loss": 0.0029, "step": 33380 }, { "epoch": 0.5463470506422319, "grad_norm": 0.14397939122142903, "learning_rate": 5.063826235651082e-06, "loss": 0.0037, "step": 33390 }, { "epoch": 0.5465106765933077, "grad_norm": 0.09089785812272315, "learning_rate": 5.060970620537805e-06, "loss": 0.0032, "step": 33400 }, { "epoch": 0.5466743025443835, "grad_norm": 0.09960089827377018, "learning_rate": 5.058114985533939e-06, "loss": 0.0041, "step": 33410 }, { "epoch": 0.5468379284954594, "grad_norm": 0.10802924139868546, "learning_rate": 5.055259331571087e-06, "loss": 0.0029, "step": 33420 }, { "epoch": 0.5470015544465352, "grad_norm": 0.229487867972326, "learning_rate": 5.052403659580857e-06, "loss": 0.0024, "step": 33430 }, { "epoch": 0.5471651803976111, "grad_norm": 0.3389646502670947, "learning_rate": 5.049547970494858e-06, "loss": 0.003, "step": 33440 }, { "epoch": 0.5473288063486869, "grad_norm": 0.5227087776264103, "learning_rate": 5.04669226524471e-06, "loss": 0.0031, "step": 33450 }, { "epoch": 0.5474924322997627, "grad_norm": 0.2223338992273839, "learning_rate": 5.043836544762039e-06, "loss": 0.0023, "step": 33460 }, { "epoch": 0.5476560582508386, "grad_norm": 0.1574476280829336, "learning_rate": 5.040980809978466e-06, "loss": 0.002, "step": 33470 }, { "epoch": 0.5478196842019144, "grad_norm": 0.0616870356740467, "learning_rate": 5.038125061825633e-06, "loss": 0.0024, "step": 33480 }, { "epoch": 0.5479833101529903, "grad_norm": 0.23573832514411017, "learning_rate": 5.03526930123517e-06, "loss": 0.003, "step": 33490 }, { "epoch": 0.5481469361040661, "grad_norm": 0.11478678725584612, "learning_rate": 5.032413529138721e-06, "loss": 0.0031, "step": 33500 }, { "epoch": 0.548310562055142, "grad_norm": 0.1893585896738234, "learning_rate": 5.029557746467933e-06, "loss": 0.0029, "step": 33510 }, { "epoch": 0.5484741880062178, "grad_norm": 0.2559089757330635, "learning_rate": 5.026701954154451e-06, "loss": 0.0026, "step": 33520 }, { "epoch": 0.5486378139572936, "grad_norm": 0.13134963795175011, "learning_rate": 5.02384615312993e-06, "loss": 0.0057, "step": 33530 }, { "epoch": 0.5488014399083695, "grad_norm": 0.07364027139468214, "learning_rate": 5.020990344326023e-06, "loss": 0.0017, "step": 33540 }, { "epoch": 0.5489650658594453, "grad_norm": 0.27319302302887216, "learning_rate": 5.0181345286743875e-06, "loss": 0.0036, "step": 33550 }, { "epoch": 0.5491286918105212, "grad_norm": 0.3380040932206558, "learning_rate": 5.015278707106682e-06, "loss": 0.0027, "step": 33560 }, { "epoch": 0.549292317761597, "grad_norm": 0.19913843755402597, "learning_rate": 5.012422880554571e-06, "loss": 0.003, "step": 33570 }, { "epoch": 0.5494559437126728, "grad_norm": 0.13405105052743013, "learning_rate": 5.009567049949712e-06, "loss": 0.0035, "step": 33580 }, { "epoch": 0.5496195696637487, "grad_norm": 0.16442196281602223, "learning_rate": 5.006711216223772e-06, "loss": 0.0028, "step": 33590 }, { "epoch": 0.5497831956148245, "grad_norm": 0.15015553899152415, "learning_rate": 5.003855380308419e-06, "loss": 0.0025, "step": 33600 }, { "epoch": 0.5499468215659004, "grad_norm": 0.11737836370995329, "learning_rate": 5.000999543135314e-06, "loss": 0.0026, "step": 33610 }, { "epoch": 0.5501104475169762, "grad_norm": 0.13744438558998606, "learning_rate": 4.998143705636126e-06, "loss": 0.0019, "step": 33620 }, { "epoch": 0.550274073468052, "grad_norm": 0.127752365199122, "learning_rate": 4.995287868742523e-06, "loss": 0.0038, "step": 33630 }, { "epoch": 0.5504376994191279, "grad_norm": 0.04251525298122103, "learning_rate": 4.9924320333861686e-06, "loss": 0.0027, "step": 33640 }, { "epoch": 0.5506013253702037, "grad_norm": 0.28985697719302506, "learning_rate": 4.989576200498728e-06, "loss": 0.0037, "step": 33650 }, { "epoch": 0.5507649513212796, "grad_norm": 0.19167194301838225, "learning_rate": 4.98672037101187e-06, "loss": 0.0034, "step": 33660 }, { "epoch": 0.5509285772723554, "grad_norm": 0.19859354650862426, "learning_rate": 4.983864545857255e-06, "loss": 0.0022, "step": 33670 }, { "epoch": 0.5510922032234312, "grad_norm": 0.16589850815242665, "learning_rate": 4.981008725966545e-06, "loss": 0.0027, "step": 33680 }, { "epoch": 0.5512558291745071, "grad_norm": 0.08582570900098556, "learning_rate": 4.978152912271405e-06, "loss": 0.0025, "step": 33690 }, { "epoch": 0.5514194551255829, "grad_norm": 0.054666075775777624, "learning_rate": 4.97529710570349e-06, "loss": 0.0035, "step": 33700 }, { "epoch": 0.5515830810766588, "grad_norm": 0.08829880505141141, "learning_rate": 4.972441307194455e-06, "loss": 0.0016, "step": 33710 }, { "epoch": 0.5517467070277347, "grad_norm": 0.25669690909609333, "learning_rate": 4.96958551767596e-06, "loss": 0.0026, "step": 33720 }, { "epoch": 0.5519103329788104, "grad_norm": 0.10134768577943967, "learning_rate": 4.966729738079651e-06, "loss": 0.0026, "step": 33730 }, { "epoch": 0.5520739589298863, "grad_norm": 0.5135401637531692, "learning_rate": 4.963873969337174e-06, "loss": 0.0048, "step": 33740 }, { "epoch": 0.5522375848809621, "grad_norm": 0.09612637754984373, "learning_rate": 4.961018212380179e-06, "loss": 0.0018, "step": 33750 }, { "epoch": 0.552401210832038, "grad_norm": 0.07296451956170297, "learning_rate": 4.958162468140302e-06, "loss": 0.0021, "step": 33760 }, { "epoch": 0.5525648367831139, "grad_norm": 0.0690768341493563, "learning_rate": 4.955306737549177e-06, "loss": 0.0045, "step": 33770 }, { "epoch": 0.5527284627341896, "grad_norm": 0.06613254029036551, "learning_rate": 4.9524510215384424e-06, "loss": 0.0017, "step": 33780 }, { "epoch": 0.5528920886852655, "grad_norm": 0.1404849340239566, "learning_rate": 4.949595321039719e-06, "loss": 0.0016, "step": 33790 }, { "epoch": 0.5530557146363413, "grad_norm": 0.29227668471988766, "learning_rate": 4.94673963698463e-06, "loss": 0.0042, "step": 33800 }, { "epoch": 0.5532193405874172, "grad_norm": 0.07905115053964465, "learning_rate": 4.943883970304795e-06, "loss": 0.003, "step": 33810 }, { "epoch": 0.5533829665384931, "grad_norm": 0.35047513622471355, "learning_rate": 4.941028321931821e-06, "loss": 0.0036, "step": 33820 }, { "epoch": 0.5535465924895688, "grad_norm": 0.19227056667487888, "learning_rate": 4.938172692797313e-06, "loss": 0.0022, "step": 33830 }, { "epoch": 0.5537102184406447, "grad_norm": 0.15843649704867058, "learning_rate": 4.9353170838328725e-06, "loss": 0.0018, "step": 33840 }, { "epoch": 0.5538738443917205, "grad_norm": 0.40886755463128766, "learning_rate": 4.9324614959700894e-06, "loss": 0.0052, "step": 33850 }, { "epoch": 0.5540374703427964, "grad_norm": 0.09701874312030412, "learning_rate": 4.929605930140546e-06, "loss": 0.0023, "step": 33860 }, { "epoch": 0.5542010962938722, "grad_norm": 0.11219644676221913, "learning_rate": 4.9267503872758275e-06, "loss": 0.0028, "step": 33870 }, { "epoch": 0.554364722244948, "grad_norm": 0.20261954232756732, "learning_rate": 4.9238948683074975e-06, "loss": 0.0044, "step": 33880 }, { "epoch": 0.5545283481960239, "grad_norm": 0.10757315193541118, "learning_rate": 4.921039374167121e-06, "loss": 0.0022, "step": 33890 }, { "epoch": 0.5546919741470997, "grad_norm": 0.19342549766791128, "learning_rate": 4.918183905786251e-06, "loss": 0.0019, "step": 33900 }, { "epoch": 0.5548556000981756, "grad_norm": 0.24104349304967013, "learning_rate": 4.915328464096436e-06, "loss": 0.0029, "step": 33910 }, { "epoch": 0.5550192260492514, "grad_norm": 0.3863572505902604, "learning_rate": 4.912473050029212e-06, "loss": 0.0026, "step": 33920 }, { "epoch": 0.5551828520003272, "grad_norm": 0.3262174658297306, "learning_rate": 4.909617664516107e-06, "loss": 0.0034, "step": 33930 }, { "epoch": 0.5553464779514031, "grad_norm": 0.1800889387564932, "learning_rate": 4.906762308488641e-06, "loss": 0.0031, "step": 33940 }, { "epoch": 0.5555101039024789, "grad_norm": 0.09749444915029594, "learning_rate": 4.903906982878324e-06, "loss": 0.0045, "step": 33950 }, { "epoch": 0.5556737298535548, "grad_norm": 0.0666383914937107, "learning_rate": 4.901051688616652e-06, "loss": 0.0027, "step": 33960 }, { "epoch": 0.5558373558046306, "grad_norm": 0.05634549849460267, "learning_rate": 4.898196426635119e-06, "loss": 0.004, "step": 33970 }, { "epoch": 0.5560009817557064, "grad_norm": 0.12841463188721838, "learning_rate": 4.895341197865201e-06, "loss": 0.0023, "step": 33980 }, { "epoch": 0.5561646077067823, "grad_norm": 0.10917926943019311, "learning_rate": 4.8924860032383656e-06, "loss": 0.0037, "step": 33990 }, { "epoch": 0.5563282336578581, "grad_norm": 0.025895706420346245, "learning_rate": 4.88963084368607e-06, "loss": 0.0033, "step": 34000 }, { "epoch": 0.556491859608934, "grad_norm": 0.04506525106912019, "learning_rate": 4.886775720139762e-06, "loss": 0.003, "step": 34010 }, { "epoch": 0.5566554855600098, "grad_norm": 0.05898426144768243, "learning_rate": 4.88392063353087e-06, "loss": 0.0028, "step": 34020 }, { "epoch": 0.5568191115110857, "grad_norm": 0.15042547484089042, "learning_rate": 4.881065584790821e-06, "loss": 0.003, "step": 34030 }, { "epoch": 0.5569827374621615, "grad_norm": 0.1674247857008809, "learning_rate": 4.8782105748510205e-06, "loss": 0.0029, "step": 34040 }, { "epoch": 0.5571463634132373, "grad_norm": 0.23741060395347063, "learning_rate": 4.875355604642866e-06, "loss": 0.0034, "step": 34050 }, { "epoch": 0.5573099893643132, "grad_norm": 0.2556491577071466, "learning_rate": 4.872500675097741e-06, "loss": 0.002, "step": 34060 }, { "epoch": 0.557473615315389, "grad_norm": 0.18550731753118735, "learning_rate": 4.869645787147017e-06, "loss": 0.0025, "step": 34070 }, { "epoch": 0.5576372412664649, "grad_norm": 0.09432895896236418, "learning_rate": 4.866790941722048e-06, "loss": 0.0041, "step": 34080 }, { "epoch": 0.5578008672175407, "grad_norm": 0.1944323765104268, "learning_rate": 4.86393613975418e-06, "loss": 0.0035, "step": 34090 }, { "epoch": 0.5579644931686165, "grad_norm": 0.2422932611855969, "learning_rate": 4.861081382174741e-06, "loss": 0.0033, "step": 34100 }, { "epoch": 0.5581281191196924, "grad_norm": 0.11254669175779865, "learning_rate": 4.858226669915042e-06, "loss": 0.0031, "step": 34110 }, { "epoch": 0.5582917450707682, "grad_norm": 0.10623624942837201, "learning_rate": 4.855372003906386e-06, "loss": 0.0033, "step": 34120 }, { "epoch": 0.5584553710218441, "grad_norm": 0.07576238211488211, "learning_rate": 4.852517385080055e-06, "loss": 0.0019, "step": 34130 }, { "epoch": 0.55861899697292, "grad_norm": 0.20947958705101766, "learning_rate": 4.8496628143673196e-06, "loss": 0.0036, "step": 34140 }, { "epoch": 0.5587826229239957, "grad_norm": 0.08025112510771956, "learning_rate": 4.846808292699432e-06, "loss": 0.0023, "step": 34150 }, { "epoch": 0.5589462488750716, "grad_norm": 0.03933167879665618, "learning_rate": 4.843953821007631e-06, "loss": 0.002, "step": 34160 }, { "epoch": 0.5591098748261474, "grad_norm": 0.14153836070683692, "learning_rate": 4.841099400223135e-06, "loss": 0.0021, "step": 34170 }, { "epoch": 0.5592735007772233, "grad_norm": 0.2761771339525101, "learning_rate": 4.83824503127715e-06, "loss": 0.0032, "step": 34180 }, { "epoch": 0.5594371267282992, "grad_norm": 0.013084796565139185, "learning_rate": 4.835390715100864e-06, "loss": 0.0025, "step": 34190 }, { "epoch": 0.5596007526793749, "grad_norm": 0.17115944385719223, "learning_rate": 4.832536452625444e-06, "loss": 0.0036, "step": 34200 }, { "epoch": 0.5597643786304508, "grad_norm": 0.2139708289428106, "learning_rate": 4.829682244782046e-06, "loss": 0.0029, "step": 34210 }, { "epoch": 0.5599280045815266, "grad_norm": 0.1250146382281352, "learning_rate": 4.8268280925018034e-06, "loss": 0.0021, "step": 34220 }, { "epoch": 0.5600916305326025, "grad_norm": 0.08994884800786558, "learning_rate": 4.8239739967158335e-06, "loss": 0.0018, "step": 34230 }, { "epoch": 0.5602552564836784, "grad_norm": 0.18660576986633415, "learning_rate": 4.821119958355233e-06, "loss": 0.0029, "step": 34240 }, { "epoch": 0.5604188824347541, "grad_norm": 0.08006527165237609, "learning_rate": 4.818265978351084e-06, "loss": 0.0017, "step": 34250 }, { "epoch": 0.56058250838583, "grad_norm": 0.2476745725678214, "learning_rate": 4.815412057634445e-06, "loss": 0.0026, "step": 34260 }, { "epoch": 0.5607461343369058, "grad_norm": 0.060597485547975505, "learning_rate": 4.812558197136358e-06, "loss": 0.0025, "step": 34270 }, { "epoch": 0.5609097602879817, "grad_norm": 0.22124179620506648, "learning_rate": 4.8097043977878445e-06, "loss": 0.0027, "step": 34280 }, { "epoch": 0.5610733862390576, "grad_norm": 0.1700427490529885, "learning_rate": 4.806850660519906e-06, "loss": 0.0037, "step": 34290 }, { "epoch": 0.5612370121901333, "grad_norm": 0.5274311959724609, "learning_rate": 4.803996986263523e-06, "loss": 0.004, "step": 34300 }, { "epoch": 0.5614006381412092, "grad_norm": 0.3455460538436016, "learning_rate": 4.801143375949657e-06, "loss": 0.0026, "step": 34310 }, { "epoch": 0.561564264092285, "grad_norm": 0.38129445487355873, "learning_rate": 4.798289830509249e-06, "loss": 0.0043, "step": 34320 }, { "epoch": 0.5617278900433609, "grad_norm": 0.17586702686796832, "learning_rate": 4.7954363508732145e-06, "loss": 0.0029, "step": 34330 }, { "epoch": 0.5618915159944368, "grad_norm": 0.24283639700390494, "learning_rate": 4.792582937972455e-06, "loss": 0.005, "step": 34340 }, { "epoch": 0.5620551419455125, "grad_norm": 0.1870736059601488, "learning_rate": 4.7897295927378435e-06, "loss": 0.0041, "step": 34350 }, { "epoch": 0.5622187678965884, "grad_norm": 0.2168414949137978, "learning_rate": 4.786876316100233e-06, "loss": 0.0025, "step": 34360 }, { "epoch": 0.5623823938476642, "grad_norm": 0.03875193052343486, "learning_rate": 4.784023108990456e-06, "loss": 0.0018, "step": 34370 }, { "epoch": 0.5625460197987401, "grad_norm": 0.2533499606070049, "learning_rate": 4.7811699723393205e-06, "loss": 0.0039, "step": 34380 }, { "epoch": 0.562709645749816, "grad_norm": 0.15105636757304827, "learning_rate": 4.7783169070776095e-06, "loss": 0.0058, "step": 34390 }, { "epoch": 0.5628732717008917, "grad_norm": 0.04670597080851487, "learning_rate": 4.775463914136089e-06, "loss": 0.0016, "step": 34400 }, { "epoch": 0.5630368976519676, "grad_norm": 0.2940880891102453, "learning_rate": 4.772610994445496e-06, "loss": 0.0017, "step": 34410 }, { "epoch": 0.5632005236030434, "grad_norm": 0.17700238963468307, "learning_rate": 4.769758148936543e-06, "loss": 0.003, "step": 34420 }, { "epoch": 0.5633641495541193, "grad_norm": 0.18867232861400654, "learning_rate": 4.766905378539923e-06, "loss": 0.0029, "step": 34430 }, { "epoch": 0.5635277755051952, "grad_norm": 0.2760582445608577, "learning_rate": 4.764052684186302e-06, "loss": 0.0027, "step": 34440 }, { "epoch": 0.563691401456271, "grad_norm": 0.07304146055666083, "learning_rate": 4.761200066806319e-06, "loss": 0.0021, "step": 34450 }, { "epoch": 0.5638550274073468, "grad_norm": 0.4745721174776518, "learning_rate": 4.758347527330591e-06, "loss": 0.0053, "step": 34460 }, { "epoch": 0.5640186533584226, "grad_norm": 0.22246678722086954, "learning_rate": 4.7554950666897094e-06, "loss": 0.0032, "step": 34470 }, { "epoch": 0.5641822793094985, "grad_norm": 0.10964403914953515, "learning_rate": 4.752642685814238e-06, "loss": 0.0031, "step": 34480 }, { "epoch": 0.5643459052605744, "grad_norm": 0.18020922498121175, "learning_rate": 4.749790385634716e-06, "loss": 0.0046, "step": 34490 }, { "epoch": 0.5645095312116502, "grad_norm": 0.2612613727880996, "learning_rate": 4.746938167081656e-06, "loss": 0.004, "step": 34500 }, { "epoch": 0.564673157162726, "grad_norm": 0.34073258867079126, "learning_rate": 4.7440860310855426e-06, "loss": 0.0017, "step": 34510 }, { "epoch": 0.5648367831138018, "grad_norm": 0.15262172659323311, "learning_rate": 4.741233978576837e-06, "loss": 0.0031, "step": 34520 }, { "epoch": 0.5650004090648777, "grad_norm": 0.2001287798321962, "learning_rate": 4.738382010485969e-06, "loss": 0.0037, "step": 34530 }, { "epoch": 0.5651640350159536, "grad_norm": 0.04479679502364937, "learning_rate": 4.7355301277433455e-06, "loss": 0.0024, "step": 34540 }, { "epoch": 0.5653276609670294, "grad_norm": 0.0442003310591338, "learning_rate": 4.732678331279337e-06, "loss": 0.0023, "step": 34550 }, { "epoch": 0.5654912869181052, "grad_norm": 0.17239141213988352, "learning_rate": 4.729826622024297e-06, "loss": 0.0034, "step": 34560 }, { "epoch": 0.565654912869181, "grad_norm": 0.2931721128750487, "learning_rate": 4.726975000908546e-06, "loss": 0.0038, "step": 34570 }, { "epoch": 0.5658185388202569, "grad_norm": 0.147521649636593, "learning_rate": 4.724123468862368e-06, "loss": 0.0032, "step": 34580 }, { "epoch": 0.5659821647713328, "grad_norm": 0.19929045062751566, "learning_rate": 4.721272026816034e-06, "loss": 0.0023, "step": 34590 }, { "epoch": 0.5661457907224086, "grad_norm": 0.31199198775033304, "learning_rate": 4.7184206756997705e-06, "loss": 0.0032, "step": 34600 }, { "epoch": 0.5663094166734844, "grad_norm": 0.26629633213529075, "learning_rate": 4.7155694164437794e-06, "loss": 0.0027, "step": 34610 }, { "epoch": 0.5664730426245602, "grad_norm": 0.4314375273907548, "learning_rate": 4.7127182499782394e-06, "loss": 0.0056, "step": 34620 }, { "epoch": 0.5666366685756361, "grad_norm": 0.0823529941841386, "learning_rate": 4.709867177233288e-06, "loss": 0.0013, "step": 34630 }, { "epoch": 0.566800294526712, "grad_norm": 0.31904225921200446, "learning_rate": 4.707016199139039e-06, "loss": 0.0036, "step": 34640 }, { "epoch": 0.5669639204777878, "grad_norm": 0.065714048697883, "learning_rate": 4.704165316625575e-06, "loss": 0.0025, "step": 34650 }, { "epoch": 0.5671275464288636, "grad_norm": 0.019652022695815365, "learning_rate": 4.701314530622945e-06, "loss": 0.0032, "step": 34660 }, { "epoch": 0.5672911723799394, "grad_norm": 0.11393755230884864, "learning_rate": 4.698463842061164e-06, "loss": 0.0043, "step": 34670 }, { "epoch": 0.5674547983310153, "grad_norm": 0.23481955593949247, "learning_rate": 4.695613251870226e-06, "loss": 0.0035, "step": 34680 }, { "epoch": 0.5676184242820912, "grad_norm": 0.2430415671057859, "learning_rate": 4.69276276098008e-06, "loss": 0.0029, "step": 34690 }, { "epoch": 0.567782050233167, "grad_norm": 0.09372633377784939, "learning_rate": 4.689912370320647e-06, "loss": 0.0029, "step": 34700 }, { "epoch": 0.5679456761842429, "grad_norm": 0.1737665124954546, "learning_rate": 4.687062080821824e-06, "loss": 0.0024, "step": 34710 }, { "epoch": 0.5681093021353186, "grad_norm": 0.26333387980378437, "learning_rate": 4.68421189341346e-06, "loss": 0.0023, "step": 34720 }, { "epoch": 0.5682729280863945, "grad_norm": 0.15374022618927896, "learning_rate": 4.681361809025381e-06, "loss": 0.0019, "step": 34730 }, { "epoch": 0.5684365540374703, "grad_norm": 0.12217669515144067, "learning_rate": 4.6785118285873795e-06, "loss": 0.0027, "step": 34740 }, { "epoch": 0.5686001799885462, "grad_norm": 0.20773944903862368, "learning_rate": 4.675661953029207e-06, "loss": 0.002, "step": 34750 }, { "epoch": 0.5687638059396221, "grad_norm": 0.23256929227538928, "learning_rate": 4.672812183280584e-06, "loss": 0.0026, "step": 34760 }, { "epoch": 0.5689274318906978, "grad_norm": 0.19365959146065578, "learning_rate": 4.669962520271204e-06, "loss": 0.004, "step": 34770 }, { "epoch": 0.5690910578417737, "grad_norm": 0.23368407210628542, "learning_rate": 4.667112964930714e-06, "loss": 0.0042, "step": 34780 }, { "epoch": 0.5692546837928495, "grad_norm": 0.1469614490142267, "learning_rate": 4.6642635181887295e-06, "loss": 0.0045, "step": 34790 }, { "epoch": 0.5694183097439254, "grad_norm": 0.08581751202717164, "learning_rate": 4.6614141809748385e-06, "loss": 0.004, "step": 34800 }, { "epoch": 0.5695819356950013, "grad_norm": 0.27520429857050993, "learning_rate": 4.658564954218581e-06, "loss": 0.0031, "step": 34810 }, { "epoch": 0.569745561646077, "grad_norm": 0.2552820164942705, "learning_rate": 4.6557158388494676e-06, "loss": 0.0033, "step": 34820 }, { "epoch": 0.5699091875971529, "grad_norm": 0.3122888841833536, "learning_rate": 4.652866835796977e-06, "loss": 0.0036, "step": 34830 }, { "epoch": 0.5700728135482287, "grad_norm": 0.185488168629714, "learning_rate": 4.65001794599054e-06, "loss": 0.0035, "step": 34840 }, { "epoch": 0.5702364394993046, "grad_norm": 0.1091345426121472, "learning_rate": 4.647169170359558e-06, "loss": 0.0021, "step": 34850 }, { "epoch": 0.5704000654503805, "grad_norm": 0.198385665576042, "learning_rate": 4.644320509833397e-06, "loss": 0.002, "step": 34860 }, { "epoch": 0.5705636914014562, "grad_norm": 0.42822651029816045, "learning_rate": 4.641471965341378e-06, "loss": 0.0058, "step": 34870 }, { "epoch": 0.5707273173525321, "grad_norm": 0.18266343768138915, "learning_rate": 4.63862353781279e-06, "loss": 0.004, "step": 34880 }, { "epoch": 0.5708909433036079, "grad_norm": 0.04457514956056342, "learning_rate": 4.635775228176882e-06, "loss": 0.003, "step": 34890 }, { "epoch": 0.5710545692546838, "grad_norm": 0.3484718784970026, "learning_rate": 4.6329270373628645e-06, "loss": 0.0049, "step": 34900 }, { "epoch": 0.5712181952057597, "grad_norm": 0.0647244875524105, "learning_rate": 4.63007896629991e-06, "loss": 0.0041, "step": 34910 }, { "epoch": 0.5713818211568354, "grad_norm": 0.15067403917686212, "learning_rate": 4.627231015917149e-06, "loss": 0.0017, "step": 34920 }, { "epoch": 0.5715454471079113, "grad_norm": 0.138335357204624, "learning_rate": 4.624383187143678e-06, "loss": 0.0031, "step": 34930 }, { "epoch": 0.5717090730589871, "grad_norm": 0.3433662125079007, "learning_rate": 4.621535480908551e-06, "loss": 0.005, "step": 34940 }, { "epoch": 0.571872699010063, "grad_norm": 0.2781255736222882, "learning_rate": 4.618687898140777e-06, "loss": 0.0018, "step": 34950 }, { "epoch": 0.5720363249611389, "grad_norm": 0.16556140968733749, "learning_rate": 4.615840439769335e-06, "loss": 0.0035, "step": 34960 }, { "epoch": 0.5721999509122146, "grad_norm": 0.11192981865341041, "learning_rate": 4.612993106723156e-06, "loss": 0.0034, "step": 34970 }, { "epoch": 0.5723635768632905, "grad_norm": 0.18455268316402856, "learning_rate": 4.6101458999311305e-06, "loss": 0.0026, "step": 34980 }, { "epoch": 0.5725272028143663, "grad_norm": 0.09519673337402712, "learning_rate": 4.607298820322112e-06, "loss": 0.0022, "step": 34990 }, { "epoch": 0.5726908287654422, "grad_norm": 0.28592452873446594, "learning_rate": 4.604451868824908e-06, "loss": 0.0042, "step": 35000 }, { "epoch": 0.5728544547165181, "grad_norm": 0.06808254018368712, "learning_rate": 4.601605046368286e-06, "loss": 0.0027, "step": 35010 }, { "epoch": 0.5730180806675939, "grad_norm": 0.1961892191344398, "learning_rate": 4.598758353880973e-06, "loss": 0.0031, "step": 35020 }, { "epoch": 0.5731817066186697, "grad_norm": 0.004998944998617602, "learning_rate": 4.595911792291651e-06, "loss": 0.0023, "step": 35030 }, { "epoch": 0.5733453325697455, "grad_norm": 0.09886125808936169, "learning_rate": 4.59306536252896e-06, "loss": 0.0019, "step": 35040 }, { "epoch": 0.5735089585208214, "grad_norm": 0.12288144918918424, "learning_rate": 4.590219065521498e-06, "loss": 0.0028, "step": 35050 }, { "epoch": 0.5736725844718973, "grad_norm": 0.16628383535940017, "learning_rate": 4.58737290219782e-06, "loss": 0.0024, "step": 35060 }, { "epoch": 0.5738362104229731, "grad_norm": 0.23073372945036774, "learning_rate": 4.584526873486434e-06, "loss": 0.0043, "step": 35070 }, { "epoch": 0.5739998363740489, "grad_norm": 0.04255723519591567, "learning_rate": 4.58168098031581e-06, "loss": 0.0024, "step": 35080 }, { "epoch": 0.5741634623251247, "grad_norm": 0.29005401785023754, "learning_rate": 4.578835223614367e-06, "loss": 0.0035, "step": 35090 }, { "epoch": 0.5743270882762006, "grad_norm": 0.14690027524514798, "learning_rate": 4.575989604310483e-06, "loss": 0.0036, "step": 35100 }, { "epoch": 0.5744907142272765, "grad_norm": 0.12595642158838552, "learning_rate": 4.573144123332494e-06, "loss": 0.0026, "step": 35110 }, { "epoch": 0.5746543401783523, "grad_norm": 0.2763350987435719, "learning_rate": 4.570298781608686e-06, "loss": 0.0029, "step": 35120 }, { "epoch": 0.5748179661294281, "grad_norm": 0.35190145688776797, "learning_rate": 4.5674535800673e-06, "loss": 0.0024, "step": 35130 }, { "epoch": 0.5749815920805039, "grad_norm": 0.07416830453420056, "learning_rate": 4.564608519636535e-06, "loss": 0.0026, "step": 35140 }, { "epoch": 0.5751452180315798, "grad_norm": 0.4236063947368003, "learning_rate": 4.561763601244541e-06, "loss": 0.0027, "step": 35150 }, { "epoch": 0.5753088439826557, "grad_norm": 0.08428294166818885, "learning_rate": 4.55891882581942e-06, "loss": 0.0021, "step": 35160 }, { "epoch": 0.5754724699337315, "grad_norm": 0.19782619085405367, "learning_rate": 4.5560741942892335e-06, "loss": 0.003, "step": 35170 }, { "epoch": 0.5756360958848074, "grad_norm": 0.44167748705152654, "learning_rate": 4.553229707581991e-06, "loss": 0.0017, "step": 35180 }, { "epoch": 0.5757997218358831, "grad_norm": 0.3324364830142215, "learning_rate": 4.550385366625653e-06, "loss": 0.0055, "step": 35190 }, { "epoch": 0.575963347786959, "grad_norm": 0.3023508666240383, "learning_rate": 4.54754117234814e-06, "loss": 0.003, "step": 35200 }, { "epoch": 0.5761269737380349, "grad_norm": 0.22533043942256858, "learning_rate": 4.544697125677317e-06, "loss": 0.0047, "step": 35210 }, { "epoch": 0.5762905996891107, "grad_norm": 0.20307455718785172, "learning_rate": 4.5418532275410045e-06, "loss": 0.0031, "step": 35220 }, { "epoch": 0.5764542256401866, "grad_norm": 0.08921232291241027, "learning_rate": 4.539009478866975e-06, "loss": 0.005, "step": 35230 }, { "epoch": 0.5766178515912623, "grad_norm": 0.10398808734965202, "learning_rate": 4.53616588058295e-06, "loss": 0.0025, "step": 35240 }, { "epoch": 0.5767814775423382, "grad_norm": 0.06834456850097631, "learning_rate": 4.533322433616604e-06, "loss": 0.003, "step": 35250 }, { "epoch": 0.5769451034934141, "grad_norm": 0.1329335882340543, "learning_rate": 4.530479138895561e-06, "loss": 0.0018, "step": 35260 }, { "epoch": 0.5771087294444899, "grad_norm": 0.18961894567010026, "learning_rate": 4.527635997347395e-06, "loss": 0.0018, "step": 35270 }, { "epoch": 0.5772723553955658, "grad_norm": 0.12992920761495436, "learning_rate": 4.524793009899633e-06, "loss": 0.0023, "step": 35280 }, { "epoch": 0.5774359813466415, "grad_norm": 0.028874766206016186, "learning_rate": 4.521950177479745e-06, "loss": 0.0024, "step": 35290 }, { "epoch": 0.5775996072977174, "grad_norm": 0.11220881517690003, "learning_rate": 4.519107501015159e-06, "loss": 0.0019, "step": 35300 }, { "epoch": 0.5777632332487933, "grad_norm": 0.2345739368095933, "learning_rate": 4.516264981433246e-06, "loss": 0.0029, "step": 35310 }, { "epoch": 0.5779268591998691, "grad_norm": 0.16103023796128926, "learning_rate": 4.513422619661328e-06, "loss": 0.0031, "step": 35320 }, { "epoch": 0.578090485150945, "grad_norm": 0.1860806586145409, "learning_rate": 4.510580416626675e-06, "loss": 0.0034, "step": 35330 }, { "epoch": 0.5782541111020207, "grad_norm": 0.1850667541021402, "learning_rate": 4.507738373256507e-06, "loss": 0.0036, "step": 35340 }, { "epoch": 0.5784177370530966, "grad_norm": 0.10324916870975388, "learning_rate": 4.504896490477988e-06, "loss": 0.0035, "step": 35350 }, { "epoch": 0.5785813630041725, "grad_norm": 0.5119864671505694, "learning_rate": 4.502054769218234e-06, "loss": 0.0027, "step": 35360 }, { "epoch": 0.5787449889552483, "grad_norm": 0.32733033073044265, "learning_rate": 4.499213210404305e-06, "loss": 0.0034, "step": 35370 }, { "epoch": 0.5789086149063242, "grad_norm": 0.4681426877133344, "learning_rate": 4.49637181496321e-06, "loss": 0.0036, "step": 35380 }, { "epoch": 0.5790722408574, "grad_norm": 0.18864255296210705, "learning_rate": 4.493530583821904e-06, "loss": 0.0037, "step": 35390 }, { "epoch": 0.5792358668084758, "grad_norm": 0.21523847610998229, "learning_rate": 4.490689517907288e-06, "loss": 0.0051, "step": 35400 }, { "epoch": 0.5793994927595517, "grad_norm": 0.20821127391843855, "learning_rate": 4.487848618146209e-06, "loss": 0.0042, "step": 35410 }, { "epoch": 0.5795631187106275, "grad_norm": 0.016955066373417756, "learning_rate": 4.485007885465462e-06, "loss": 0.003, "step": 35420 }, { "epoch": 0.5797267446617034, "grad_norm": 0.13746991830458877, "learning_rate": 4.482167320791786e-06, "loss": 0.0024, "step": 35430 }, { "epoch": 0.5798903706127791, "grad_norm": 0.3522013700940443, "learning_rate": 4.479326925051862e-06, "loss": 0.0041, "step": 35440 }, { "epoch": 0.580053996563855, "grad_norm": 0.46507976524046846, "learning_rate": 4.476486699172322e-06, "loss": 0.0033, "step": 35450 }, { "epoch": 0.5802176225149309, "grad_norm": 0.6915992970846698, "learning_rate": 4.473646644079738e-06, "loss": 0.0029, "step": 35460 }, { "epoch": 0.5803812484660067, "grad_norm": 0.020799982935066054, "learning_rate": 4.470806760700627e-06, "loss": 0.0027, "step": 35470 }, { "epoch": 0.5805448744170826, "grad_norm": 0.27726665504849785, "learning_rate": 4.467967049961452e-06, "loss": 0.0029, "step": 35480 }, { "epoch": 0.5807085003681584, "grad_norm": 0.17385091629629307, "learning_rate": 4.465127512788619e-06, "loss": 0.0031, "step": 35490 }, { "epoch": 0.5808721263192342, "grad_norm": 0.07759368622562746, "learning_rate": 4.4622881501084745e-06, "loss": 0.0021, "step": 35500 }, { "epoch": 0.5810357522703101, "grad_norm": 0.015916280396538602, "learning_rate": 4.459448962847313e-06, "loss": 0.0025, "step": 35510 }, { "epoch": 0.5811993782213859, "grad_norm": 0.14307596543750917, "learning_rate": 4.456609951931367e-06, "loss": 0.0029, "step": 35520 }, { "epoch": 0.5813630041724618, "grad_norm": 0.045026270071096046, "learning_rate": 4.4537711182868164e-06, "loss": 0.0027, "step": 35530 }, { "epoch": 0.5815266301235376, "grad_norm": 0.1875311325041476, "learning_rate": 4.450932462839775e-06, "loss": 0.0029, "step": 35540 }, { "epoch": 0.5816902560746134, "grad_norm": 0.046246788565296154, "learning_rate": 4.44809398651631e-06, "loss": 0.0029, "step": 35550 }, { "epoch": 0.5818538820256893, "grad_norm": 0.20613248243906757, "learning_rate": 4.445255690242422e-06, "loss": 0.0021, "step": 35560 }, { "epoch": 0.5820175079767651, "grad_norm": 0.12109330242773472, "learning_rate": 4.442417574944052e-06, "loss": 0.003, "step": 35570 }, { "epoch": 0.582181133927841, "grad_norm": 0.07933825244372066, "learning_rate": 4.439579641547089e-06, "loss": 0.0037, "step": 35580 }, { "epoch": 0.5823447598789168, "grad_norm": 0.20272782117099777, "learning_rate": 4.4367418909773595e-06, "loss": 0.0026, "step": 35590 }, { "epoch": 0.5825083858299926, "grad_norm": 0.25216766311627675, "learning_rate": 4.433904324160622e-06, "loss": 0.0049, "step": 35600 }, { "epoch": 0.5826720117810685, "grad_norm": 0.129372277136597, "learning_rate": 4.431066942022591e-06, "loss": 0.0029, "step": 35610 }, { "epoch": 0.5828356377321443, "grad_norm": 0.03031690936312735, "learning_rate": 4.428229745488909e-06, "loss": 0.0033, "step": 35620 }, { "epoch": 0.5829992636832202, "grad_norm": 0.20997047578296665, "learning_rate": 4.425392735485158e-06, "loss": 0.005, "step": 35630 }, { "epoch": 0.583162889634296, "grad_norm": 0.19467087799413413, "learning_rate": 4.4225559129368675e-06, "loss": 0.0041, "step": 35640 }, { "epoch": 0.5833265155853719, "grad_norm": 0.23683170458369324, "learning_rate": 4.419719278769499e-06, "loss": 0.004, "step": 35650 }, { "epoch": 0.5834901415364476, "grad_norm": 0.0963018572373444, "learning_rate": 4.41688283390845e-06, "loss": 0.0038, "step": 35660 }, { "epoch": 0.5836537674875235, "grad_norm": 0.12351831580258514, "learning_rate": 4.414046579279068e-06, "loss": 0.0026, "step": 35670 }, { "epoch": 0.5838173934385994, "grad_norm": 0.2287933481164842, "learning_rate": 4.411210515806627e-06, "loss": 0.0026, "step": 35680 }, { "epoch": 0.5839810193896752, "grad_norm": 0.14680820117195276, "learning_rate": 4.40837464441634e-06, "loss": 0.0031, "step": 35690 }, { "epoch": 0.5841446453407511, "grad_norm": 0.04870131994647957, "learning_rate": 4.405538966033364e-06, "loss": 0.0009, "step": 35700 }, { "epoch": 0.5843082712918268, "grad_norm": 0.25407334832327355, "learning_rate": 4.40270348158279e-06, "loss": 0.0035, "step": 35710 }, { "epoch": 0.5844718972429027, "grad_norm": 0.30090296303199754, "learning_rate": 4.399868191989637e-06, "loss": 0.003, "step": 35720 }, { "epoch": 0.5846355231939786, "grad_norm": 0.3024970167537329, "learning_rate": 4.397033098178876e-06, "loss": 0.0027, "step": 35730 }, { "epoch": 0.5847991491450544, "grad_norm": 0.062468027634000146, "learning_rate": 4.394198201075405e-06, "loss": 0.003, "step": 35740 }, { "epoch": 0.5849627750961303, "grad_norm": 0.18311095051418277, "learning_rate": 4.391363501604054e-06, "loss": 0.0029, "step": 35750 }, { "epoch": 0.585126401047206, "grad_norm": 0.16001482253822655, "learning_rate": 4.388529000689599e-06, "loss": 0.0082, "step": 35760 }, { "epoch": 0.5852900269982819, "grad_norm": 0.1803767564390275, "learning_rate": 4.385694699256745e-06, "loss": 0.0037, "step": 35770 }, { "epoch": 0.5854536529493578, "grad_norm": 0.06806524486248523, "learning_rate": 4.382860598230128e-06, "loss": 0.0026, "step": 35780 }, { "epoch": 0.5856172789004336, "grad_norm": 0.0750015732753322, "learning_rate": 4.380026698534329e-06, "loss": 0.0029, "step": 35790 }, { "epoch": 0.5857809048515095, "grad_norm": 0.16755614572288596, "learning_rate": 4.377193001093858e-06, "loss": 0.0037, "step": 35800 }, { "epoch": 0.5859445308025852, "grad_norm": 0.10425387318683599, "learning_rate": 4.374359506833151e-06, "loss": 0.0026, "step": 35810 }, { "epoch": 0.5861081567536611, "grad_norm": 0.2508146305261222, "learning_rate": 4.371526216676594e-06, "loss": 0.0029, "step": 35820 }, { "epoch": 0.586271782704737, "grad_norm": 0.19149196682855552, "learning_rate": 4.3686931315484944e-06, "loss": 0.0016, "step": 35830 }, { "epoch": 0.5864354086558128, "grad_norm": 0.10075282915132526, "learning_rate": 4.3658602523730945e-06, "loss": 0.0023, "step": 35840 }, { "epoch": 0.5865990346068887, "grad_norm": 0.0323738266313214, "learning_rate": 4.363027580074574e-06, "loss": 0.0018, "step": 35850 }, { "epoch": 0.5867626605579644, "grad_norm": 0.030786385606715486, "learning_rate": 4.3601951155770416e-06, "loss": 0.005, "step": 35860 }, { "epoch": 0.5869262865090403, "grad_norm": 0.08847758291392628, "learning_rate": 4.357362859804537e-06, "loss": 0.0017, "step": 35870 }, { "epoch": 0.5870899124601162, "grad_norm": 0.06109116090231912, "learning_rate": 4.3545308136810325e-06, "loss": 0.0028, "step": 35880 }, { "epoch": 0.587253538411192, "grad_norm": 0.24084798163202856, "learning_rate": 4.3516989781304385e-06, "loss": 0.0035, "step": 35890 }, { "epoch": 0.5874171643622679, "grad_norm": 0.026981477878423255, "learning_rate": 4.348867354076587e-06, "loss": 0.0024, "step": 35900 }, { "epoch": 0.5875807903133436, "grad_norm": 0.13507826268212794, "learning_rate": 4.346035942443244e-06, "loss": 0.0021, "step": 35910 }, { "epoch": 0.5877444162644195, "grad_norm": 0.028145795378504878, "learning_rate": 4.343204744154111e-06, "loss": 0.003, "step": 35920 }, { "epoch": 0.5879080422154954, "grad_norm": 0.10039567198233354, "learning_rate": 4.340373760132816e-06, "loss": 0.0021, "step": 35930 }, { "epoch": 0.5880716681665712, "grad_norm": 0.18722086296414542, "learning_rate": 4.337542991302914e-06, "loss": 0.0027, "step": 35940 }, { "epoch": 0.5882352941176471, "grad_norm": 0.0838904958578955, "learning_rate": 4.334712438587898e-06, "loss": 0.0023, "step": 35950 }, { "epoch": 0.5883989200687229, "grad_norm": 0.1269674298723188, "learning_rate": 4.331882102911183e-06, "loss": 0.0022, "step": 35960 }, { "epoch": 0.5885625460197987, "grad_norm": 0.05790536181225277, "learning_rate": 4.329051985196116e-06, "loss": 0.0026, "step": 35970 }, { "epoch": 0.5887261719708746, "grad_norm": 0.13829500902628994, "learning_rate": 4.326222086365973e-06, "loss": 0.0032, "step": 35980 }, { "epoch": 0.5888897979219504, "grad_norm": 0.10219341941910584, "learning_rate": 4.323392407343959e-06, "loss": 0.0029, "step": 35990 }, { "epoch": 0.5890534238730263, "grad_norm": 0.09537531408532889, "learning_rate": 4.320562949053206e-06, "loss": 0.0022, "step": 36000 }, { "epoch": 0.5892170498241021, "grad_norm": 0.13231540796366972, "learning_rate": 4.317733712416776e-06, "loss": 0.003, "step": 36010 }, { "epoch": 0.5893806757751779, "grad_norm": 0.09675361578730217, "learning_rate": 4.314904698357657e-06, "loss": 0.0021, "step": 36020 }, { "epoch": 0.5895443017262538, "grad_norm": 0.17332796963974267, "learning_rate": 4.312075907798763e-06, "loss": 0.0032, "step": 36030 }, { "epoch": 0.5897079276773296, "grad_norm": 0.0934293702770415, "learning_rate": 4.3092473416629395e-06, "loss": 0.003, "step": 36040 }, { "epoch": 0.5898715536284055, "grad_norm": 0.0481067267652945, "learning_rate": 4.306419000872955e-06, "loss": 0.0035, "step": 36050 }, { "epoch": 0.5900351795794813, "grad_norm": 0.16582398036499876, "learning_rate": 4.303590886351505e-06, "loss": 0.0036, "step": 36060 }, { "epoch": 0.5901988055305571, "grad_norm": 0.15322998141371008, "learning_rate": 4.300762999021213e-06, "loss": 0.0026, "step": 36070 }, { "epoch": 0.590362431481633, "grad_norm": 0.10188242147606635, "learning_rate": 4.297935339804627e-06, "loss": 0.003, "step": 36080 }, { "epoch": 0.5905260574327088, "grad_norm": 0.13989161940417744, "learning_rate": 4.295107909624219e-06, "loss": 0.0017, "step": 36090 }, { "epoch": 0.5906896833837847, "grad_norm": 0.02070882726330858, "learning_rate": 4.292280709402391e-06, "loss": 0.0019, "step": 36100 }, { "epoch": 0.5908533093348605, "grad_norm": 0.2668412935401631, "learning_rate": 4.289453740061466e-06, "loss": 0.0033, "step": 36110 }, { "epoch": 0.5910169352859364, "grad_norm": 0.17066280042492676, "learning_rate": 4.286627002523691e-06, "loss": 0.0023, "step": 36120 }, { "epoch": 0.5911805612370122, "grad_norm": 0.1269775365479083, "learning_rate": 4.2838004977112425e-06, "loss": 0.002, "step": 36130 }, { "epoch": 0.591344187188088, "grad_norm": 0.2055963107680671, "learning_rate": 4.280974226546215e-06, "loss": 0.0035, "step": 36140 }, { "epoch": 0.5915078131391639, "grad_norm": 0.13044974868014672, "learning_rate": 4.278148189950629e-06, "loss": 0.0017, "step": 36150 }, { "epoch": 0.5916714390902397, "grad_norm": 0.0914530302327454, "learning_rate": 4.275322388846432e-06, "loss": 0.0028, "step": 36160 }, { "epoch": 0.5918350650413156, "grad_norm": 0.12257619100752548, "learning_rate": 4.2724968241554895e-06, "loss": 0.0029, "step": 36170 }, { "epoch": 0.5919986909923914, "grad_norm": 0.12296267011965546, "learning_rate": 4.269671496799592e-06, "loss": 0.0052, "step": 36180 }, { "epoch": 0.5921623169434672, "grad_norm": 0.2131324547467885, "learning_rate": 4.266846407700454e-06, "loss": 0.0033, "step": 36190 }, { "epoch": 0.5923259428945431, "grad_norm": 0.059827416722857116, "learning_rate": 4.264021557779709e-06, "loss": 0.0021, "step": 36200 }, { "epoch": 0.5924895688456189, "grad_norm": 0.20032620373688537, "learning_rate": 4.261196947958916e-06, "loss": 0.004, "step": 36210 }, { "epoch": 0.5926531947966948, "grad_norm": 0.31232870247444217, "learning_rate": 4.258372579159552e-06, "loss": 0.0039, "step": 36220 }, { "epoch": 0.5928168207477706, "grad_norm": 0.10748511261964865, "learning_rate": 4.2555484523030194e-06, "loss": 0.0023, "step": 36230 }, { "epoch": 0.5929804466988464, "grad_norm": 0.08386966760149098, "learning_rate": 4.2527245683106385e-06, "loss": 0.0052, "step": 36240 }, { "epoch": 0.5931440726499223, "grad_norm": 0.1438195469608127, "learning_rate": 4.249900928103652e-06, "loss": 0.0039, "step": 36250 }, { "epoch": 0.5933076986009981, "grad_norm": 0.08296149716315411, "learning_rate": 4.247077532603222e-06, "loss": 0.0021, "step": 36260 }, { "epoch": 0.593471324552074, "grad_norm": 0.16364012130807842, "learning_rate": 4.244254382730433e-06, "loss": 0.003, "step": 36270 }, { "epoch": 0.5936349505031498, "grad_norm": 0.18309654039073942, "learning_rate": 4.2414314794062845e-06, "loss": 0.0019, "step": 36280 }, { "epoch": 0.5937985764542256, "grad_norm": 0.0879103603041914, "learning_rate": 4.238608823551702e-06, "loss": 0.0018, "step": 36290 }, { "epoch": 0.5939622024053015, "grad_norm": 0.19810480649906168, "learning_rate": 4.2357864160875265e-06, "loss": 0.0025, "step": 36300 }, { "epoch": 0.5941258283563773, "grad_norm": 0.2783089017279225, "learning_rate": 4.2329642579345156e-06, "loss": 0.0044, "step": 36310 }, { "epoch": 0.5942894543074532, "grad_norm": 0.10276683428460374, "learning_rate": 4.230142350013353e-06, "loss": 0.0022, "step": 36320 }, { "epoch": 0.594453080258529, "grad_norm": 0.03892344742568337, "learning_rate": 4.227320693244633e-06, "loss": 0.0038, "step": 36330 }, { "epoch": 0.5946167062096048, "grad_norm": 0.21970769719073138, "learning_rate": 4.224499288548871e-06, "loss": 0.0041, "step": 36340 }, { "epoch": 0.5947803321606807, "grad_norm": 0.15486301289989388, "learning_rate": 4.221678136846503e-06, "loss": 0.0021, "step": 36350 }, { "epoch": 0.5949439581117565, "grad_norm": 0.31586739587149004, "learning_rate": 4.218857239057879e-06, "loss": 0.0025, "step": 36360 }, { "epoch": 0.5951075840628324, "grad_norm": 0.2612885259064541, "learning_rate": 4.216036596103265e-06, "loss": 0.0033, "step": 36370 }, { "epoch": 0.5952712100139083, "grad_norm": 0.1735913451421948, "learning_rate": 4.213216208902848e-06, "loss": 0.0023, "step": 36380 }, { "epoch": 0.595434835964984, "grad_norm": 0.23817832074767442, "learning_rate": 4.21039607837673e-06, "loss": 0.0023, "step": 36390 }, { "epoch": 0.5955984619160599, "grad_norm": 0.2035655389936723, "learning_rate": 4.207576205444925e-06, "loss": 0.0041, "step": 36400 }, { "epoch": 0.5957620878671357, "grad_norm": 0.35237767609710274, "learning_rate": 4.204756591027372e-06, "loss": 0.0034, "step": 36410 }, { "epoch": 0.5959257138182116, "grad_norm": 0.1554371718966407, "learning_rate": 4.201937236043916e-06, "loss": 0.0033, "step": 36420 }, { "epoch": 0.5960893397692875, "grad_norm": 0.03575689776280272, "learning_rate": 4.199118141414322e-06, "loss": 0.0036, "step": 36430 }, { "epoch": 0.5962529657203632, "grad_norm": 0.050627732821681336, "learning_rate": 4.196299308058273e-06, "loss": 0.0043, "step": 36440 }, { "epoch": 0.5964165916714391, "grad_norm": 0.06014265321114109, "learning_rate": 4.19348073689536e-06, "loss": 0.0038, "step": 36450 }, { "epoch": 0.5965802176225149, "grad_norm": 0.13389561860287294, "learning_rate": 4.190662428845093e-06, "loss": 0.0014, "step": 36460 }, { "epoch": 0.5967438435735908, "grad_norm": 0.07033387873776083, "learning_rate": 4.187844384826898e-06, "loss": 0.0015, "step": 36470 }, { "epoch": 0.5969074695246667, "grad_norm": 0.09645725433389785, "learning_rate": 4.185026605760109e-06, "loss": 0.0014, "step": 36480 }, { "epoch": 0.5970710954757424, "grad_norm": 0.1634236932678504, "learning_rate": 4.182209092563976e-06, "loss": 0.0029, "step": 36490 }, { "epoch": 0.5972347214268183, "grad_norm": 0.1833489140401746, "learning_rate": 4.1793918461576655e-06, "loss": 0.0017, "step": 36500 }, { "epoch": 0.5973983473778941, "grad_norm": 0.18765253298952278, "learning_rate": 4.176574867460253e-06, "loss": 0.0035, "step": 36510 }, { "epoch": 0.59756197332897, "grad_norm": 0.0994645623030929, "learning_rate": 4.173758157390727e-06, "loss": 0.0028, "step": 36520 }, { "epoch": 0.5977255992800458, "grad_norm": 0.25489759257490063, "learning_rate": 4.1709417168679905e-06, "loss": 0.0024, "step": 36530 }, { "epoch": 0.5978892252311216, "grad_norm": 0.04038336713327486, "learning_rate": 4.168125546810858e-06, "loss": 0.0032, "step": 36540 }, { "epoch": 0.5980528511821975, "grad_norm": 0.33041963522856893, "learning_rate": 4.165309648138054e-06, "loss": 0.0047, "step": 36550 }, { "epoch": 0.5982164771332733, "grad_norm": 0.09704289692522848, "learning_rate": 4.162494021768215e-06, "loss": 0.0033, "step": 36560 }, { "epoch": 0.5983801030843492, "grad_norm": 0.12082442249109245, "learning_rate": 4.159678668619891e-06, "loss": 0.0024, "step": 36570 }, { "epoch": 0.598543729035425, "grad_norm": 0.008410919028756136, "learning_rate": 4.156863589611541e-06, "loss": 0.0034, "step": 36580 }, { "epoch": 0.5987073549865009, "grad_norm": 0.1445664055030885, "learning_rate": 4.154048785661533e-06, "loss": 0.0019, "step": 36590 }, { "epoch": 0.5988709809375767, "grad_norm": 0.05458038788310649, "learning_rate": 4.151234257688149e-06, "loss": 0.004, "step": 36600 }, { "epoch": 0.5990346068886525, "grad_norm": 0.17373473755129382, "learning_rate": 4.148420006609581e-06, "loss": 0.0033, "step": 36610 }, { "epoch": 0.5991982328397284, "grad_norm": 0.09752425065598318, "learning_rate": 4.145606033343921e-06, "loss": 0.0021, "step": 36620 }, { "epoch": 0.5993618587908042, "grad_norm": 0.05326524387431546, "learning_rate": 4.142792338809185e-06, "loss": 0.0021, "step": 36630 }, { "epoch": 0.59952548474188, "grad_norm": 0.2774742155489998, "learning_rate": 4.1399789239232905e-06, "loss": 0.0034, "step": 36640 }, { "epoch": 0.5996891106929559, "grad_norm": 0.3661841911097322, "learning_rate": 4.137165789604059e-06, "loss": 0.0022, "step": 36650 }, { "epoch": 0.5998527366440317, "grad_norm": 0.3337166481242711, "learning_rate": 4.134352936769233e-06, "loss": 0.0028, "step": 36660 }, { "epoch": 0.6000163625951076, "grad_norm": 0.10019584515669817, "learning_rate": 4.131540366336453e-06, "loss": 0.0038, "step": 36670 }, { "epoch": 0.6001799885461834, "grad_norm": 0.23534840914400726, "learning_rate": 4.128728079223267e-06, "loss": 0.003, "step": 36680 }, { "epoch": 0.6003436144972593, "grad_norm": 0.1622811439425585, "learning_rate": 4.125916076347139e-06, "loss": 0.0025, "step": 36690 }, { "epoch": 0.6005072404483351, "grad_norm": 0.1966225087609585, "learning_rate": 4.123104358625434e-06, "loss": 0.0033, "step": 36700 }, { "epoch": 0.6006708663994109, "grad_norm": 0.0897234155943217, "learning_rate": 4.120292926975421e-06, "loss": 0.0032, "step": 36710 }, { "epoch": 0.6008344923504868, "grad_norm": 0.10885653523189812, "learning_rate": 4.117481782314286e-06, "loss": 0.002, "step": 36720 }, { "epoch": 0.6009981183015626, "grad_norm": 0.041460265582368684, "learning_rate": 4.114670925559113e-06, "loss": 0.0046, "step": 36730 }, { "epoch": 0.6011617442526385, "grad_norm": 0.06796192765697653, "learning_rate": 4.11186035762689e-06, "loss": 0.0018, "step": 36740 }, { "epoch": 0.6013253702037143, "grad_norm": 0.14463025606061014, "learning_rate": 4.109050079434522e-06, "loss": 0.003, "step": 36750 }, { "epoch": 0.6014889961547901, "grad_norm": 0.1994863267544617, "learning_rate": 4.106240091898811e-06, "loss": 0.0033, "step": 36760 }, { "epoch": 0.601652622105866, "grad_norm": 0.1297294055624739, "learning_rate": 4.10343039593646e-06, "loss": 0.0026, "step": 36770 }, { "epoch": 0.6018162480569418, "grad_norm": 0.20390609796758918, "learning_rate": 4.100620992464089e-06, "loss": 0.0036, "step": 36780 }, { "epoch": 0.6019798740080177, "grad_norm": 0.18485992041494434, "learning_rate": 4.0978118823982136e-06, "loss": 0.0025, "step": 36790 }, { "epoch": 0.6021434999590936, "grad_norm": 0.1038961414462875, "learning_rate": 4.095003066655255e-06, "loss": 0.0017, "step": 36800 }, { "epoch": 0.6023071259101693, "grad_norm": 0.4356324242393016, "learning_rate": 4.0921945461515425e-06, "loss": 0.0023, "step": 36810 }, { "epoch": 0.6024707518612452, "grad_norm": 0.07308170975634545, "learning_rate": 4.0893863218033066e-06, "loss": 0.0023, "step": 36820 }, { "epoch": 0.602634377812321, "grad_norm": 0.22754433366114282, "learning_rate": 4.086578394526676e-06, "loss": 0.003, "step": 36830 }, { "epoch": 0.6027980037633969, "grad_norm": 0.08012394365231984, "learning_rate": 4.083770765237693e-06, "loss": 0.0055, "step": 36840 }, { "epoch": 0.6029616297144728, "grad_norm": 0.21745660210956258, "learning_rate": 4.080963434852295e-06, "loss": 0.0012, "step": 36850 }, { "epoch": 0.6031252556655485, "grad_norm": 0.3736937339095875, "learning_rate": 4.078156404286323e-06, "loss": 0.0048, "step": 36860 }, { "epoch": 0.6032888816166244, "grad_norm": 0.07613640999537188, "learning_rate": 4.075349674455519e-06, "loss": 0.0038, "step": 36870 }, { "epoch": 0.6034525075677002, "grad_norm": 0.08246448875083312, "learning_rate": 4.0725432462755355e-06, "loss": 0.004, "step": 36880 }, { "epoch": 0.6036161335187761, "grad_norm": 0.13389691250146835, "learning_rate": 4.069737120661914e-06, "loss": 0.0035, "step": 36890 }, { "epoch": 0.603779759469852, "grad_norm": 0.0678422066898779, "learning_rate": 4.066931298530104e-06, "loss": 0.0037, "step": 36900 }, { "epoch": 0.6039433854209277, "grad_norm": 0.15672036820565613, "learning_rate": 4.06412578079546e-06, "loss": 0.0037, "step": 36910 }, { "epoch": 0.6041070113720036, "grad_norm": 0.20990148390882177, "learning_rate": 4.061320568373228e-06, "loss": 0.003, "step": 36920 }, { "epoch": 0.6042706373230794, "grad_norm": 0.2462615075851671, "learning_rate": 4.0585156621785576e-06, "loss": 0.0033, "step": 36930 }, { "epoch": 0.6044342632741553, "grad_norm": 0.20828173096515765, "learning_rate": 4.055711063126506e-06, "loss": 0.0035, "step": 36940 }, { "epoch": 0.6045978892252312, "grad_norm": 0.10782458570355521, "learning_rate": 4.0529067721320204e-06, "loss": 0.0031, "step": 36950 }, { "epoch": 0.6047615151763069, "grad_norm": 0.12215624783187451, "learning_rate": 4.050102790109948e-06, "loss": 0.0017, "step": 36960 }, { "epoch": 0.6049251411273828, "grad_norm": 0.1804569574945797, "learning_rate": 4.047299117975045e-06, "loss": 0.0044, "step": 36970 }, { "epoch": 0.6050887670784586, "grad_norm": 0.10299280275963474, "learning_rate": 4.044495756641956e-06, "loss": 0.0041, "step": 36980 }, { "epoch": 0.6052523930295345, "grad_norm": 0.2941023808095268, "learning_rate": 4.041692707025227e-06, "loss": 0.002, "step": 36990 }, { "epoch": 0.6054160189806104, "grad_norm": 0.16487873733420882, "learning_rate": 4.038889970039307e-06, "loss": 0.0027, "step": 37000 }, { "epoch": 0.6055796449316861, "grad_norm": 0.2517207590776455, "learning_rate": 4.036087546598536e-06, "loss": 0.003, "step": 37010 }, { "epoch": 0.605743270882762, "grad_norm": 0.07944757172323902, "learning_rate": 4.0332854376171546e-06, "loss": 0.0029, "step": 37020 }, { "epoch": 0.6059068968338378, "grad_norm": 0.1917589337769471, "learning_rate": 4.0304836440093065e-06, "loss": 0.0024, "step": 37030 }, { "epoch": 0.6060705227849137, "grad_norm": 0.1907877079757886, "learning_rate": 4.0276821666890235e-06, "loss": 0.002, "step": 37040 }, { "epoch": 0.6062341487359896, "grad_norm": 0.062424312543617085, "learning_rate": 4.0248810065702346e-06, "loss": 0.0079, "step": 37050 }, { "epoch": 0.6063977746870653, "grad_norm": 0.4235486738652975, "learning_rate": 4.022080164566777e-06, "loss": 0.0045, "step": 37060 }, { "epoch": 0.6065614006381412, "grad_norm": 0.12975289348897504, "learning_rate": 4.019279641592371e-06, "loss": 0.0029, "step": 37070 }, { "epoch": 0.606725026589217, "grad_norm": 0.2539890381047292, "learning_rate": 4.016479438560634e-06, "loss": 0.0031, "step": 37080 }, { "epoch": 0.6068886525402929, "grad_norm": 0.22943395179923234, "learning_rate": 4.013679556385092e-06, "loss": 0.0016, "step": 37090 }, { "epoch": 0.6070522784913688, "grad_norm": 0.08396111337592173, "learning_rate": 4.01087999597915e-06, "loss": 0.0021, "step": 37100 }, { "epoch": 0.6072159044424446, "grad_norm": 0.1399627198997756, "learning_rate": 4.0080807582561145e-06, "loss": 0.0037, "step": 37110 }, { "epoch": 0.6073795303935204, "grad_norm": 0.15299468351126108, "learning_rate": 4.005281844129194e-06, "loss": 0.0023, "step": 37120 }, { "epoch": 0.6075431563445962, "grad_norm": 0.15501451555039059, "learning_rate": 4.002483254511479e-06, "loss": 0.0023, "step": 37130 }, { "epoch": 0.6077067822956721, "grad_norm": 0.17285212226404453, "learning_rate": 3.9996849903159585e-06, "loss": 0.0021, "step": 37140 }, { "epoch": 0.607870408246748, "grad_norm": 0.09784911200727456, "learning_rate": 3.996887052455524e-06, "loss": 0.0045, "step": 37150 }, { "epoch": 0.6080340341978238, "grad_norm": 0.10470677315630754, "learning_rate": 3.9940894418429474e-06, "loss": 0.0023, "step": 37160 }, { "epoch": 0.6081976601488996, "grad_norm": 0.23220666798845344, "learning_rate": 3.991292159390898e-06, "loss": 0.0022, "step": 37170 }, { "epoch": 0.6083612860999754, "grad_norm": 0.5422134926419073, "learning_rate": 3.988495206011947e-06, "loss": 0.0044, "step": 37180 }, { "epoch": 0.6085249120510513, "grad_norm": 0.0967235690257041, "learning_rate": 3.985698582618547e-06, "loss": 0.0028, "step": 37190 }, { "epoch": 0.6086885380021272, "grad_norm": 0.20590916766387377, "learning_rate": 3.982902290123044e-06, "loss": 0.0027, "step": 37200 }, { "epoch": 0.608852163953203, "grad_norm": 0.06860356303644909, "learning_rate": 3.980106329437683e-06, "loss": 0.0032, "step": 37210 }, { "epoch": 0.6090157899042788, "grad_norm": 0.4353390584756546, "learning_rate": 3.977310701474596e-06, "loss": 0.0078, "step": 37220 }, { "epoch": 0.6091794158553546, "grad_norm": 0.22871093474962506, "learning_rate": 3.974515407145806e-06, "loss": 0.0022, "step": 37230 }, { "epoch": 0.6093430418064305, "grad_norm": 0.15099576724593036, "learning_rate": 3.971720447363228e-06, "loss": 0.0023, "step": 37240 }, { "epoch": 0.6095066677575064, "grad_norm": 0.1556828916147556, "learning_rate": 3.968925823038671e-06, "loss": 0.0023, "step": 37250 }, { "epoch": 0.6096702937085822, "grad_norm": 0.07833375696779173, "learning_rate": 3.966131535083829e-06, "loss": 0.002, "step": 37260 }, { "epoch": 0.609833919659658, "grad_norm": 0.39285409207073324, "learning_rate": 3.96333758441029e-06, "loss": 0.0045, "step": 37270 }, { "epoch": 0.6099975456107338, "grad_norm": 0.10784014870158588, "learning_rate": 3.9605439719295305e-06, "loss": 0.002, "step": 37280 }, { "epoch": 0.6101611715618097, "grad_norm": 0.15094745720172337, "learning_rate": 3.957750698552919e-06, "loss": 0.0037, "step": 37290 }, { "epoch": 0.6103247975128856, "grad_norm": 0.08645059213020569, "learning_rate": 3.954957765191708e-06, "loss": 0.0023, "step": 37300 }, { "epoch": 0.6104884234639614, "grad_norm": 0.4757451267718464, "learning_rate": 3.952165172757048e-06, "loss": 0.0025, "step": 37310 }, { "epoch": 0.6106520494150373, "grad_norm": 0.1824344222585368, "learning_rate": 3.949372922159968e-06, "loss": 0.0026, "step": 37320 }, { "epoch": 0.610815675366113, "grad_norm": 0.18145061440349983, "learning_rate": 3.946581014311392e-06, "loss": 0.002, "step": 37330 }, { "epoch": 0.6109793013171889, "grad_norm": 0.21078370551271883, "learning_rate": 3.94378945012213e-06, "loss": 0.0042, "step": 37340 }, { "epoch": 0.6111429272682648, "grad_norm": 0.09180734517427364, "learning_rate": 3.940998230502882e-06, "loss": 0.0028, "step": 37350 }, { "epoch": 0.6113065532193406, "grad_norm": 0.25692979150780904, "learning_rate": 3.9382073563642325e-06, "loss": 0.0035, "step": 37360 }, { "epoch": 0.6114701791704165, "grad_norm": 0.08201816363954675, "learning_rate": 3.935416828616656e-06, "loss": 0.0027, "step": 37370 }, { "epoch": 0.6116338051214922, "grad_norm": 0.23271137943738318, "learning_rate": 3.932626648170513e-06, "loss": 0.0035, "step": 37380 }, { "epoch": 0.6117974310725681, "grad_norm": 0.10211620419731243, "learning_rate": 3.929836815936048e-06, "loss": 0.0019, "step": 37390 }, { "epoch": 0.6119610570236439, "grad_norm": 0.15475262246981503, "learning_rate": 3.927047332823397e-06, "loss": 0.0013, "step": 37400 }, { "epoch": 0.6121246829747198, "grad_norm": 0.20688028125012914, "learning_rate": 3.92425819974258e-06, "loss": 0.0026, "step": 37410 }, { "epoch": 0.6122883089257957, "grad_norm": 0.077587263136665, "learning_rate": 3.9214694176035e-06, "loss": 0.0018, "step": 37420 }, { "epoch": 0.6124519348768714, "grad_norm": 0.09263516544069764, "learning_rate": 3.918680987315949e-06, "loss": 0.0036, "step": 37430 }, { "epoch": 0.6126155608279473, "grad_norm": 0.27478607281761475, "learning_rate": 3.915892909789604e-06, "loss": 0.0023, "step": 37440 }, { "epoch": 0.6127791867790231, "grad_norm": 0.10399753613880765, "learning_rate": 3.913105185934023e-06, "loss": 0.0016, "step": 37450 }, { "epoch": 0.612942812730099, "grad_norm": 0.1632163488952384, "learning_rate": 3.910317816658655e-06, "loss": 0.004, "step": 37460 }, { "epoch": 0.6131064386811749, "grad_norm": 0.166577690581626, "learning_rate": 3.907530802872829e-06, "loss": 0.0024, "step": 37470 }, { "epoch": 0.6132700646322506, "grad_norm": 0.17666681093128778, "learning_rate": 3.904744145485758e-06, "loss": 0.0024, "step": 37480 }, { "epoch": 0.6134336905833265, "grad_norm": 0.27118543042674303, "learning_rate": 3.90195784540654e-06, "loss": 0.0043, "step": 37490 }, { "epoch": 0.6135973165344023, "grad_norm": 0.11806123229635974, "learning_rate": 3.899171903544157e-06, "loss": 0.0036, "step": 37500 }, { "epoch": 0.6137609424854782, "grad_norm": 0.134944003566278, "learning_rate": 3.896386320807472e-06, "loss": 0.0036, "step": 37510 }, { "epoch": 0.6139245684365541, "grad_norm": 0.34464866186388027, "learning_rate": 3.893601098105233e-06, "loss": 0.0045, "step": 37520 }, { "epoch": 0.6140881943876298, "grad_norm": 0.15278754145298523, "learning_rate": 3.89081623634607e-06, "loss": 0.0017, "step": 37530 }, { "epoch": 0.6142518203387057, "grad_norm": 0.018011850841128607, "learning_rate": 3.8880317364384935e-06, "loss": 0.0041, "step": 37540 }, { "epoch": 0.6144154462897815, "grad_norm": 0.1805694074917229, "learning_rate": 3.8852475992908974e-06, "loss": 0.0019, "step": 37550 }, { "epoch": 0.6145790722408574, "grad_norm": 0.011590662245095196, "learning_rate": 3.882463825811558e-06, "loss": 0.0016, "step": 37560 }, { "epoch": 0.6147426981919333, "grad_norm": 0.4635811278447878, "learning_rate": 3.879680416908634e-06, "loss": 0.0031, "step": 37570 }, { "epoch": 0.614906324143009, "grad_norm": 0.1743790650992657, "learning_rate": 3.876897373490159e-06, "loss": 0.0037, "step": 37580 }, { "epoch": 0.6150699500940849, "grad_norm": 0.22982782593495038, "learning_rate": 3.874114696464056e-06, "loss": 0.0041, "step": 37590 }, { "epoch": 0.6152335760451607, "grad_norm": 0.08763199988950265, "learning_rate": 3.871332386738122e-06, "loss": 0.0068, "step": 37600 }, { "epoch": 0.6153972019962366, "grad_norm": 0.24790797675490406, "learning_rate": 3.868550445220036e-06, "loss": 0.0019, "step": 37610 }, { "epoch": 0.6155608279473125, "grad_norm": 0.16530750972333086, "learning_rate": 3.865768872817358e-06, "loss": 0.0029, "step": 37620 }, { "epoch": 0.6157244538983883, "grad_norm": 0.08996404129814316, "learning_rate": 3.862987670437528e-06, "loss": 0.0021, "step": 37630 }, { "epoch": 0.6158880798494641, "grad_norm": 0.33987167523188566, "learning_rate": 3.86020683898786e-06, "loss": 0.0037, "step": 37640 }, { "epoch": 0.6160517058005399, "grad_norm": 0.19902976122416022, "learning_rate": 3.857426379375556e-06, "loss": 0.0033, "step": 37650 }, { "epoch": 0.6162153317516158, "grad_norm": 0.3512830031517978, "learning_rate": 3.85464629250769e-06, "loss": 0.0029, "step": 37660 }, { "epoch": 0.6163789577026917, "grad_norm": 0.1077441435232946, "learning_rate": 3.851866579291214e-06, "loss": 0.0029, "step": 37670 }, { "epoch": 0.6165425836537675, "grad_norm": 0.1277194500869837, "learning_rate": 3.849087240632963e-06, "loss": 0.0031, "step": 37680 }, { "epoch": 0.6167062096048433, "grad_norm": 0.20216431905643192, "learning_rate": 3.846308277439647e-06, "loss": 0.0021, "step": 37690 }, { "epoch": 0.6168698355559191, "grad_norm": 0.13412720432296538, "learning_rate": 3.84352969061785e-06, "loss": 0.0019, "step": 37700 }, { "epoch": 0.617033461506995, "grad_norm": 0.11020329427162362, "learning_rate": 3.840751481074042e-06, "loss": 0.0034, "step": 37710 }, { "epoch": 0.6171970874580709, "grad_norm": 0.07540799013254593, "learning_rate": 3.837973649714562e-06, "loss": 0.0031, "step": 37720 }, { "epoch": 0.6173607134091467, "grad_norm": 0.24201166602564259, "learning_rate": 3.835196197445627e-06, "loss": 0.0037, "step": 37730 }, { "epoch": 0.6175243393602226, "grad_norm": 0.161167806069703, "learning_rate": 3.832419125173336e-06, "loss": 0.0044, "step": 37740 }, { "epoch": 0.6176879653112983, "grad_norm": 0.43729911749337025, "learning_rate": 3.829642433803656e-06, "loss": 0.0029, "step": 37750 }, { "epoch": 0.6178515912623742, "grad_norm": 0.0931949460377228, "learning_rate": 3.826866124242433e-06, "loss": 0.0023, "step": 37760 }, { "epoch": 0.6180152172134501, "grad_norm": 0.2932474484248866, "learning_rate": 3.824090197395393e-06, "loss": 0.0044, "step": 37770 }, { "epoch": 0.6181788431645259, "grad_norm": 0.21375748714345, "learning_rate": 3.821314654168129e-06, "loss": 0.0036, "step": 37780 }, { "epoch": 0.6183424691156018, "grad_norm": 0.06162195006286049, "learning_rate": 3.8185394954661135e-06, "loss": 0.0022, "step": 37790 }, { "epoch": 0.6185060950666775, "grad_norm": 0.009047925373010546, "learning_rate": 3.8157647221946944e-06, "loss": 0.003, "step": 37800 }, { "epoch": 0.6186697210177534, "grad_norm": 0.022962267625751605, "learning_rate": 3.812990335259093e-06, "loss": 0.0027, "step": 37810 }, { "epoch": 0.6188333469688293, "grad_norm": 0.2811568043788382, "learning_rate": 3.8102163355644014e-06, "loss": 0.0044, "step": 37820 }, { "epoch": 0.6189969729199051, "grad_norm": 0.5305987868241939, "learning_rate": 3.8074427240155898e-06, "loss": 0.003, "step": 37830 }, { "epoch": 0.619160598870981, "grad_norm": 0.6526296697879033, "learning_rate": 3.8046695015175e-06, "loss": 0.0041, "step": 37840 }, { "epoch": 0.6193242248220567, "grad_norm": 0.11485414593826405, "learning_rate": 3.801896668974847e-06, "loss": 0.004, "step": 37850 }, { "epoch": 0.6194878507731326, "grad_norm": 0.4085473348055722, "learning_rate": 3.7991242272922146e-06, "loss": 0.0038, "step": 37860 }, { "epoch": 0.6196514767242085, "grad_norm": 0.15413543882918404, "learning_rate": 3.7963521773740674e-06, "loss": 0.0027, "step": 37870 }, { "epoch": 0.6198151026752843, "grad_norm": 0.10574787854796569, "learning_rate": 3.793580520124738e-06, "loss": 0.0017, "step": 37880 }, { "epoch": 0.6199787286263602, "grad_norm": 0.28003193496730594, "learning_rate": 3.790809256448425e-06, "loss": 0.0039, "step": 37890 }, { "epoch": 0.6201423545774359, "grad_norm": 0.4820275381089115, "learning_rate": 3.7880383872492106e-06, "loss": 0.0024, "step": 37900 }, { "epoch": 0.6203059805285118, "grad_norm": 0.14164787902785522, "learning_rate": 3.7852679134310394e-06, "loss": 0.0036, "step": 37910 }, { "epoch": 0.6204696064795877, "grad_norm": 0.2926391412970169, "learning_rate": 3.7824978358977267e-06, "loss": 0.0025, "step": 37920 }, { "epoch": 0.6206332324306635, "grad_norm": 0.2222956132011905, "learning_rate": 3.779728155552966e-06, "loss": 0.003, "step": 37930 }, { "epoch": 0.6207968583817394, "grad_norm": 0.21625047244474455, "learning_rate": 3.776958873300315e-06, "loss": 0.0064, "step": 37940 }, { "epoch": 0.6209604843328151, "grad_norm": 0.08386944922895864, "learning_rate": 3.774189990043199e-06, "loss": 0.0025, "step": 37950 }, { "epoch": 0.621124110283891, "grad_norm": 0.18876617995396233, "learning_rate": 3.7714215066849243e-06, "loss": 0.0034, "step": 37960 }, { "epoch": 0.6212877362349669, "grad_norm": 0.361279451699795, "learning_rate": 3.768653424128654e-06, "loss": 0.0019, "step": 37970 }, { "epoch": 0.6214513621860427, "grad_norm": 0.04661353442884274, "learning_rate": 3.765885743277427e-06, "loss": 0.002, "step": 37980 }, { "epoch": 0.6216149881371186, "grad_norm": 0.14946435935451746, "learning_rate": 3.7631184650341545e-06, "loss": 0.0031, "step": 37990 }, { "epoch": 0.6217786140881943, "grad_norm": 0.15677448053994492, "learning_rate": 3.7603515903016077e-06, "loss": 0.0022, "step": 38000 }, { "epoch": 0.6219422400392702, "grad_norm": 0.14970270745471875, "learning_rate": 3.7575851199824288e-06, "loss": 0.0025, "step": 38010 }, { "epoch": 0.6221058659903461, "grad_norm": 0.10205677266152648, "learning_rate": 3.7548190549791365e-06, "loss": 0.0017, "step": 38020 }, { "epoch": 0.6222694919414219, "grad_norm": 0.08852553786046366, "learning_rate": 3.7520533961941056e-06, "loss": 0.0027, "step": 38030 }, { "epoch": 0.6224331178924978, "grad_norm": 0.12071588883982835, "learning_rate": 3.7492881445295824e-06, "loss": 0.0032, "step": 38040 }, { "epoch": 0.6225967438435736, "grad_norm": 0.09490543561382184, "learning_rate": 3.7465233008876874e-06, "loss": 0.0033, "step": 38050 }, { "epoch": 0.6227603697946494, "grad_norm": 0.07237853015358595, "learning_rate": 3.743758866170396e-06, "loss": 0.0019, "step": 38060 }, { "epoch": 0.6229239957457253, "grad_norm": 0.40755676204257185, "learning_rate": 3.740994841279557e-06, "loss": 0.0027, "step": 38070 }, { "epoch": 0.6230876216968011, "grad_norm": 0.16112462438342146, "learning_rate": 3.7382312271168884e-06, "loss": 0.0025, "step": 38080 }, { "epoch": 0.623251247647877, "grad_norm": 0.044961857739818256, "learning_rate": 3.735468024583967e-06, "loss": 0.0033, "step": 38090 }, { "epoch": 0.6234148735989528, "grad_norm": 0.12512456061487884, "learning_rate": 3.7327052345822377e-06, "loss": 0.0023, "step": 38100 }, { "epoch": 0.6235784995500286, "grad_norm": 0.3653751614848998, "learning_rate": 3.729942858013017e-06, "loss": 0.0056, "step": 38110 }, { "epoch": 0.6237421255011045, "grad_norm": 0.0888992719732976, "learning_rate": 3.7271808957774775e-06, "loss": 0.0018, "step": 38120 }, { "epoch": 0.6239057514521803, "grad_norm": 0.1705172512795876, "learning_rate": 3.724419348776659e-06, "loss": 0.0015, "step": 38130 }, { "epoch": 0.6240693774032562, "grad_norm": 0.01578039865196833, "learning_rate": 3.7216582179114725e-06, "loss": 0.0031, "step": 38140 }, { "epoch": 0.624233003354332, "grad_norm": 0.25284340632351615, "learning_rate": 3.7188975040826843e-06, "loss": 0.0038, "step": 38150 }, { "epoch": 0.6243966293054078, "grad_norm": 0.3067106693432949, "learning_rate": 3.7161372081909273e-06, "loss": 0.0044, "step": 38160 }, { "epoch": 0.6245602552564837, "grad_norm": 0.08615320886033166, "learning_rate": 3.7133773311367043e-06, "loss": 0.0024, "step": 38170 }, { "epoch": 0.6247238812075595, "grad_norm": 0.2344352622695516, "learning_rate": 3.710617873820373e-06, "loss": 0.0032, "step": 38180 }, { "epoch": 0.6248875071586354, "grad_norm": 0.010428522749680256, "learning_rate": 3.7078588371421577e-06, "loss": 0.0039, "step": 38190 }, { "epoch": 0.6250511331097112, "grad_norm": 0.13603768638902353, "learning_rate": 3.7051002220021448e-06, "loss": 0.002, "step": 38200 }, { "epoch": 0.625214759060787, "grad_norm": 0.17730493551653576, "learning_rate": 3.702342029300285e-06, "loss": 0.0024, "step": 38210 }, { "epoch": 0.6253783850118629, "grad_norm": 0.12409248306868036, "learning_rate": 3.6995842599363897e-06, "loss": 0.0024, "step": 38220 }, { "epoch": 0.6255420109629387, "grad_norm": 0.14838497015282662, "learning_rate": 3.696826914810131e-06, "loss": 0.0018, "step": 38230 }, { "epoch": 0.6257056369140146, "grad_norm": 0.05772436206324633, "learning_rate": 3.694069994821046e-06, "loss": 0.0019, "step": 38240 }, { "epoch": 0.6258692628650904, "grad_norm": 0.25101461980211764, "learning_rate": 3.6913135008685306e-06, "loss": 0.0023, "step": 38250 }, { "epoch": 0.6260328888161663, "grad_norm": 0.14784230056014358, "learning_rate": 3.6885574338518405e-06, "loss": 0.003, "step": 38260 }, { "epoch": 0.626196514767242, "grad_norm": 0.04744194891011138, "learning_rate": 3.6858017946700957e-06, "loss": 0.0022, "step": 38270 }, { "epoch": 0.6263601407183179, "grad_norm": 0.3992167168686326, "learning_rate": 3.6830465842222747e-06, "loss": 0.0034, "step": 38280 }, { "epoch": 0.6265237666693938, "grad_norm": 0.08340280859893437, "learning_rate": 3.6802918034072142e-06, "loss": 0.003, "step": 38290 }, { "epoch": 0.6266873926204696, "grad_norm": 0.14859698103047322, "learning_rate": 3.6775374531236153e-06, "loss": 0.002, "step": 38300 }, { "epoch": 0.6268510185715455, "grad_norm": 0.29810720883017816, "learning_rate": 3.6747835342700355e-06, "loss": 0.0025, "step": 38310 }, { "epoch": 0.6270146445226212, "grad_norm": 0.19357298751918414, "learning_rate": 3.67203004774489e-06, "loss": 0.0043, "step": 38320 }, { "epoch": 0.6271782704736971, "grad_norm": 0.13921365232679245, "learning_rate": 3.6692769944464575e-06, "loss": 0.0024, "step": 38330 }, { "epoch": 0.627341896424773, "grad_norm": 0.10467501382692902, "learning_rate": 3.666524375272872e-06, "loss": 0.0026, "step": 38340 }, { "epoch": 0.6275055223758488, "grad_norm": 0.0433689597337994, "learning_rate": 3.6637721911221265e-06, "loss": 0.0021, "step": 38350 }, { "epoch": 0.6276691483269247, "grad_norm": 0.017340680117201867, "learning_rate": 3.6610204428920735e-06, "loss": 0.0042, "step": 38360 }, { "epoch": 0.6278327742780004, "grad_norm": 0.03517518518574745, "learning_rate": 3.6582691314804215e-06, "loss": 0.0022, "step": 38370 }, { "epoch": 0.6279964002290763, "grad_norm": 0.3998061331245079, "learning_rate": 3.655518257784736e-06, "loss": 0.0025, "step": 38380 }, { "epoch": 0.6281600261801522, "grad_norm": 0.11417011733817845, "learning_rate": 3.652767822702443e-06, "loss": 0.0021, "step": 38390 }, { "epoch": 0.628323652131228, "grad_norm": 0.1384373064343689, "learning_rate": 3.6500178271308227e-06, "loss": 0.002, "step": 38400 }, { "epoch": 0.6284872780823039, "grad_norm": 0.08775581040639487, "learning_rate": 3.6472682719670118e-06, "loss": 0.0033, "step": 38410 }, { "epoch": 0.6286509040333796, "grad_norm": 0.10249673582007192, "learning_rate": 3.644519158108004e-06, "loss": 0.0039, "step": 38420 }, { "epoch": 0.6288145299844555, "grad_norm": 0.1935489375264137, "learning_rate": 3.641770486450651e-06, "loss": 0.003, "step": 38430 }, { "epoch": 0.6289781559355314, "grad_norm": 0.06760697358930276, "learning_rate": 3.639022257891655e-06, "loss": 0.0028, "step": 38440 }, { "epoch": 0.6291417818866072, "grad_norm": 0.10566765312791716, "learning_rate": 3.63627447332758e-06, "loss": 0.0035, "step": 38450 }, { "epoch": 0.6293054078376831, "grad_norm": 0.06728765716506759, "learning_rate": 3.6335271336548417e-06, "loss": 0.0024, "step": 38460 }, { "epoch": 0.6294690337887588, "grad_norm": 0.14316234630000466, "learning_rate": 3.6307802397697078e-06, "loss": 0.0029, "step": 38470 }, { "epoch": 0.6296326597398347, "grad_norm": 0.07815642999977915, "learning_rate": 3.6280337925683083e-06, "loss": 0.0029, "step": 38480 }, { "epoch": 0.6297962856909106, "grad_norm": 0.10653821487349018, "learning_rate": 3.625287792946621e-06, "loss": 0.002, "step": 38490 }, { "epoch": 0.6299599116419864, "grad_norm": 0.06881104670043096, "learning_rate": 3.6225422418004807e-06, "loss": 0.0022, "step": 38500 }, { "epoch": 0.6301235375930623, "grad_norm": 0.07557137509277463, "learning_rate": 3.619797140025572e-06, "loss": 0.0019, "step": 38510 }, { "epoch": 0.630287163544138, "grad_norm": 0.03591441450603031, "learning_rate": 3.617052488517439e-06, "loss": 0.0035, "step": 38520 }, { "epoch": 0.6304507894952139, "grad_norm": 0.25097639885870293, "learning_rate": 3.614308288171475e-06, "loss": 0.002, "step": 38530 }, { "epoch": 0.6306144154462898, "grad_norm": 0.1389971610613418, "learning_rate": 3.6115645398829248e-06, "loss": 0.0034, "step": 38540 }, { "epoch": 0.6307780413973656, "grad_norm": 0.17589646894936803, "learning_rate": 3.6088212445468907e-06, "loss": 0.0022, "step": 38550 }, { "epoch": 0.6309416673484415, "grad_norm": 0.08658727822755675, "learning_rate": 3.606078403058323e-06, "loss": 0.0038, "step": 38560 }, { "epoch": 0.6311052932995173, "grad_norm": 0.23045403396874473, "learning_rate": 3.6033360163120233e-06, "loss": 0.0024, "step": 38570 }, { "epoch": 0.6312689192505931, "grad_norm": 0.2830268893794633, "learning_rate": 3.6005940852026513e-06, "loss": 0.0055, "step": 38580 }, { "epoch": 0.631432545201669, "grad_norm": 0.24679752073941583, "learning_rate": 3.59785261062471e-06, "loss": 0.0041, "step": 38590 }, { "epoch": 0.6315961711527448, "grad_norm": 0.06928124901430129, "learning_rate": 3.595111593472557e-06, "loss": 0.0022, "step": 38600 }, { "epoch": 0.6317597971038207, "grad_norm": 0.06582509661013895, "learning_rate": 3.5923710346404017e-06, "loss": 0.002, "step": 38610 }, { "epoch": 0.6319234230548965, "grad_norm": 0.2349798041444064, "learning_rate": 3.5896309350223036e-06, "loss": 0.0021, "step": 38620 }, { "epoch": 0.6320870490059723, "grad_norm": 0.14511668992385693, "learning_rate": 3.5868912955121697e-06, "loss": 0.0028, "step": 38630 }, { "epoch": 0.6322506749570482, "grad_norm": 0.26415147374666026, "learning_rate": 3.5841521170037614e-06, "loss": 0.0028, "step": 38640 }, { "epoch": 0.632414300908124, "grad_norm": 0.15530303630261916, "learning_rate": 3.581413400390686e-06, "loss": 0.0036, "step": 38650 }, { "epoch": 0.6325779268591999, "grad_norm": 0.17375612971469037, "learning_rate": 3.5786751465664003e-06, "loss": 0.0024, "step": 38660 }, { "epoch": 0.6327415528102757, "grad_norm": 0.1385849198586867, "learning_rate": 3.5759373564242135e-06, "loss": 0.0019, "step": 38670 }, { "epoch": 0.6329051787613516, "grad_norm": 0.130304237375768, "learning_rate": 3.5732000308572793e-06, "loss": 0.0021, "step": 38680 }, { "epoch": 0.6330688047124274, "grad_norm": 0.20550082373084577, "learning_rate": 3.5704631707586023e-06, "loss": 0.0028, "step": 38690 }, { "epoch": 0.6332324306635032, "grad_norm": 0.08068544638794777, "learning_rate": 3.5677267770210354e-06, "loss": 0.0029, "step": 38700 }, { "epoch": 0.6333960566145791, "grad_norm": 0.0405596941365278, "learning_rate": 3.5649908505372787e-06, "loss": 0.0023, "step": 38710 }, { "epoch": 0.6335596825656549, "grad_norm": 0.0319238182833129, "learning_rate": 3.562255392199877e-06, "loss": 0.0025, "step": 38720 }, { "epoch": 0.6337233085167308, "grad_norm": 0.06638724826502856, "learning_rate": 3.55952040290123e-06, "loss": 0.0017, "step": 38730 }, { "epoch": 0.6338869344678066, "grad_norm": 0.055841150920700036, "learning_rate": 3.5567858835335762e-06, "loss": 0.0023, "step": 38740 }, { "epoch": 0.6340505604188824, "grad_norm": 0.2755851712666514, "learning_rate": 3.554051834989004e-06, "loss": 0.0024, "step": 38750 }, { "epoch": 0.6342141863699583, "grad_norm": 0.07089017127900447, "learning_rate": 3.5513182581594507e-06, "loss": 0.0019, "step": 38760 }, { "epoch": 0.6343778123210341, "grad_norm": 0.1665739743608836, "learning_rate": 3.5485851539366957e-06, "loss": 0.0029, "step": 38770 }, { "epoch": 0.63454143827211, "grad_norm": 0.33496896780129287, "learning_rate": 3.545852523212366e-06, "loss": 0.0034, "step": 38780 }, { "epoch": 0.6347050642231858, "grad_norm": 0.027694154292521325, "learning_rate": 3.5431203668779344e-06, "loss": 0.0017, "step": 38790 }, { "epoch": 0.6348686901742616, "grad_norm": 0.10418731321733239, "learning_rate": 3.5403886858247195e-06, "loss": 0.0033, "step": 38800 }, { "epoch": 0.6350323161253375, "grad_norm": 0.21186309953856952, "learning_rate": 3.537657480943882e-06, "loss": 0.0033, "step": 38810 }, { "epoch": 0.6351959420764133, "grad_norm": 0.15116136763187724, "learning_rate": 3.534926753126431e-06, "loss": 0.0035, "step": 38820 }, { "epoch": 0.6353595680274892, "grad_norm": 0.11003966879294831, "learning_rate": 3.532196503263217e-06, "loss": 0.0025, "step": 38830 }, { "epoch": 0.635523193978565, "grad_norm": 0.389729405112115, "learning_rate": 3.5294667322449376e-06, "loss": 0.0026, "step": 38840 }, { "epoch": 0.6356868199296408, "grad_norm": 0.06262217513342504, "learning_rate": 3.526737440962128e-06, "loss": 0.0029, "step": 38850 }, { "epoch": 0.6358504458807167, "grad_norm": 0.05695564873693938, "learning_rate": 3.524008630305175e-06, "loss": 0.0027, "step": 38860 }, { "epoch": 0.6360140718317925, "grad_norm": 0.031833906099472446, "learning_rate": 3.521280301164306e-06, "loss": 0.0026, "step": 38870 }, { "epoch": 0.6361776977828684, "grad_norm": 0.07692799187508191, "learning_rate": 3.518552454429585e-06, "loss": 0.0023, "step": 38880 }, { "epoch": 0.6363413237339443, "grad_norm": 0.28107378208070105, "learning_rate": 3.5158250909909274e-06, "loss": 0.0027, "step": 38890 }, { "epoch": 0.63650494968502, "grad_norm": 0.19079093013755902, "learning_rate": 3.5130982117380887e-06, "loss": 0.0024, "step": 38900 }, { "epoch": 0.6366685756360959, "grad_norm": 0.2284570473305802, "learning_rate": 3.510371817560659e-06, "loss": 0.0028, "step": 38910 }, { "epoch": 0.6368322015871717, "grad_norm": 0.41034425449583406, "learning_rate": 3.507645909348083e-06, "loss": 0.0026, "step": 38920 }, { "epoch": 0.6369958275382476, "grad_norm": 0.10690221900288284, "learning_rate": 3.504920487989638e-06, "loss": 0.0032, "step": 38930 }, { "epoch": 0.6371594534893235, "grad_norm": 0.32264827471237356, "learning_rate": 3.5021955543744414e-06, "loss": 0.0035, "step": 38940 }, { "epoch": 0.6373230794403992, "grad_norm": 0.1095029293673731, "learning_rate": 3.499471109391458e-06, "loss": 0.0029, "step": 38950 }, { "epoch": 0.6374867053914751, "grad_norm": 0.06859882506483665, "learning_rate": 3.4967471539294897e-06, "loss": 0.0027, "step": 38960 }, { "epoch": 0.6376503313425509, "grad_norm": 0.2360184927574292, "learning_rate": 3.494023688877175e-06, "loss": 0.0055, "step": 38970 }, { "epoch": 0.6378139572936268, "grad_norm": 0.15935277716643212, "learning_rate": 3.491300715123001e-06, "loss": 0.0026, "step": 38980 }, { "epoch": 0.6379775832447027, "grad_norm": 0.18765905096543423, "learning_rate": 3.4885782335552886e-06, "loss": 0.0029, "step": 38990 }, { "epoch": 0.6381412091957784, "grad_norm": 0.17056334573291837, "learning_rate": 3.485856245062196e-06, "loss": 0.0026, "step": 39000 }, { "epoch": 0.6383048351468543, "grad_norm": 0.12479206908974337, "learning_rate": 3.4831347505317277e-06, "loss": 0.0047, "step": 39010 }, { "epoch": 0.6384684610979301, "grad_norm": 0.05848187346061733, "learning_rate": 3.480413750851723e-06, "loss": 0.0025, "step": 39020 }, { "epoch": 0.638632087049006, "grad_norm": 0.061245253722046145, "learning_rate": 3.4776932469098553e-06, "loss": 0.0016, "step": 39030 }, { "epoch": 0.6387957130000819, "grad_norm": 0.2604992415635095, "learning_rate": 3.474973239593646e-06, "loss": 0.0039, "step": 39040 }, { "epoch": 0.6389593389511576, "grad_norm": 0.15942433710193663, "learning_rate": 3.472253729790449e-06, "loss": 0.0017, "step": 39050 }, { "epoch": 0.6391229649022335, "grad_norm": 0.22631456028260613, "learning_rate": 3.469534718387452e-06, "loss": 0.002, "step": 39060 }, { "epoch": 0.6392865908533093, "grad_norm": 0.15510403091064973, "learning_rate": 3.4668162062716893e-06, "loss": 0.002, "step": 39070 }, { "epoch": 0.6394502168043852, "grad_norm": 0.03259973575364838, "learning_rate": 3.4640981943300257e-06, "loss": 0.0018, "step": 39080 }, { "epoch": 0.6396138427554611, "grad_norm": 0.27997364757017107, "learning_rate": 3.4613806834491616e-06, "loss": 0.0032, "step": 39090 }, { "epoch": 0.6397774687065368, "grad_norm": 0.12568721517320217, "learning_rate": 3.458663674515641e-06, "loss": 0.0037, "step": 39100 }, { "epoch": 0.6399410946576127, "grad_norm": 0.21657601456856848, "learning_rate": 3.45594716841584e-06, "loss": 0.0031, "step": 39110 }, { "epoch": 0.6401047206086885, "grad_norm": 0.16483118280487852, "learning_rate": 3.4532311660359663e-06, "loss": 0.0014, "step": 39120 }, { "epoch": 0.6402683465597644, "grad_norm": 0.10833034929183712, "learning_rate": 3.4505156682620723e-06, "loss": 0.003, "step": 39130 }, { "epoch": 0.6404319725108402, "grad_norm": 0.053708298906984524, "learning_rate": 3.44780067598004e-06, "loss": 0.0026, "step": 39140 }, { "epoch": 0.640595598461916, "grad_norm": 0.08706665174363926, "learning_rate": 3.445086190075584e-06, "loss": 0.0022, "step": 39150 }, { "epoch": 0.6407592244129919, "grad_norm": 0.09663525873904123, "learning_rate": 3.4423722114342627e-06, "loss": 0.0022, "step": 39160 }, { "epoch": 0.6409228503640677, "grad_norm": 0.2617878128758688, "learning_rate": 3.4396587409414616e-06, "loss": 0.0026, "step": 39170 }, { "epoch": 0.6410864763151436, "grad_norm": 0.019585625635606414, "learning_rate": 3.436945779482401e-06, "loss": 0.0026, "step": 39180 }, { "epoch": 0.6412501022662194, "grad_norm": 0.482092772708202, "learning_rate": 3.4342333279421365e-06, "loss": 0.0023, "step": 39190 }, { "epoch": 0.6414137282172953, "grad_norm": 0.10950673329294372, "learning_rate": 3.4315213872055608e-06, "loss": 0.0025, "step": 39200 }, { "epoch": 0.6415773541683711, "grad_norm": 0.03607768986921398, "learning_rate": 3.428809958157394e-06, "loss": 0.0032, "step": 39210 }, { "epoch": 0.6417409801194469, "grad_norm": 0.12558146764801126, "learning_rate": 3.4260990416821905e-06, "loss": 0.0051, "step": 39220 }, { "epoch": 0.6419046060705228, "grad_norm": 0.06545152650964281, "learning_rate": 3.423388638664342e-06, "loss": 0.0046, "step": 39230 }, { "epoch": 0.6420682320215986, "grad_norm": 0.28751749668765947, "learning_rate": 3.420678749988069e-06, "loss": 0.0045, "step": 39240 }, { "epoch": 0.6422318579726745, "grad_norm": 0.09914673240841411, "learning_rate": 3.4179693765374217e-06, "loss": 0.004, "step": 39250 }, { "epoch": 0.6423954839237503, "grad_norm": 0.1624249882707871, "learning_rate": 3.415260519196289e-06, "loss": 0.0031, "step": 39260 }, { "epoch": 0.6425591098748261, "grad_norm": 0.06780680146887115, "learning_rate": 3.412552178848386e-06, "loss": 0.0016, "step": 39270 }, { "epoch": 0.642722735825902, "grad_norm": 0.08071851840400393, "learning_rate": 3.4098443563772603e-06, "loss": 0.0023, "step": 39280 }, { "epoch": 0.6428863617769778, "grad_norm": 0.0865581863579484, "learning_rate": 3.4071370526662927e-06, "loss": 0.0021, "step": 39290 }, { "epoch": 0.6430499877280537, "grad_norm": 0.1206850955827089, "learning_rate": 3.404430268598693e-06, "loss": 0.0025, "step": 39300 }, { "epoch": 0.6432136136791295, "grad_norm": 0.037373664251977204, "learning_rate": 3.401724005057499e-06, "loss": 0.0021, "step": 39310 }, { "epoch": 0.6433772396302053, "grad_norm": 0.2981050405267764, "learning_rate": 3.3990182629255846e-06, "loss": 0.0031, "step": 39320 }, { "epoch": 0.6435408655812812, "grad_norm": 0.2660153177096172, "learning_rate": 3.396313043085649e-06, "loss": 0.0043, "step": 39330 }, { "epoch": 0.643704491532357, "grad_norm": 0.11660320312791532, "learning_rate": 3.3936083464202215e-06, "loss": 0.0021, "step": 39340 }, { "epoch": 0.6438681174834329, "grad_norm": 0.09542051793649162, "learning_rate": 3.390904173811663e-06, "loss": 0.0025, "step": 39350 }, { "epoch": 0.6440317434345088, "grad_norm": 0.11962384436668982, "learning_rate": 3.3882005261421614e-06, "loss": 0.0014, "step": 39360 }, { "epoch": 0.6441953693855845, "grad_norm": 0.17334410337924983, "learning_rate": 3.3854974042937327e-06, "loss": 0.0027, "step": 39370 }, { "epoch": 0.6443589953366604, "grad_norm": 0.08640245323925387, "learning_rate": 3.3827948091482254e-06, "loss": 0.0027, "step": 39380 }, { "epoch": 0.6445226212877362, "grad_norm": 0.19022382429604776, "learning_rate": 3.3800927415873118e-06, "loss": 0.0034, "step": 39390 }, { "epoch": 0.6446862472388121, "grad_norm": 0.13862264139332806, "learning_rate": 3.3773912024924926e-06, "loss": 0.0015, "step": 39400 }, { "epoch": 0.644849873189888, "grad_norm": 0.2905314794592806, "learning_rate": 3.3746901927450985e-06, "loss": 0.0044, "step": 39410 }, { "epoch": 0.6450134991409637, "grad_norm": 0.0864932818181526, "learning_rate": 3.3719897132262865e-06, "loss": 0.0045, "step": 39420 }, { "epoch": 0.6451771250920396, "grad_norm": 0.10044096407664471, "learning_rate": 3.3692897648170385e-06, "loss": 0.0019, "step": 39430 }, { "epoch": 0.6453407510431154, "grad_norm": 0.33443881843169976, "learning_rate": 3.3665903483981667e-06, "loss": 0.0023, "step": 39440 }, { "epoch": 0.6455043769941913, "grad_norm": 0.17473955994237858, "learning_rate": 3.363891464850307e-06, "loss": 0.0023, "step": 39450 }, { "epoch": 0.6456680029452672, "grad_norm": 0.17463385678983154, "learning_rate": 3.3611931150539228e-06, "loss": 0.004, "step": 39460 }, { "epoch": 0.6458316288963429, "grad_norm": 0.21844348098830813, "learning_rate": 3.358495299889304e-06, "loss": 0.0063, "step": 39470 }, { "epoch": 0.6459952548474188, "grad_norm": 0.44237402085899674, "learning_rate": 3.355798020236563e-06, "loss": 0.0037, "step": 39480 }, { "epoch": 0.6461588807984946, "grad_norm": 0.05849516192366982, "learning_rate": 3.3531012769756398e-06, "loss": 0.002, "step": 39490 }, { "epoch": 0.6463225067495705, "grad_norm": 0.23793781468187503, "learning_rate": 3.350405070986302e-06, "loss": 0.0031, "step": 39500 }, { "epoch": 0.6464861327006464, "grad_norm": 0.26682048461196695, "learning_rate": 3.347709403148136e-06, "loss": 0.007, "step": 39510 }, { "epoch": 0.6466497586517221, "grad_norm": 0.16024914869492465, "learning_rate": 3.3450142743405577e-06, "loss": 0.0024, "step": 39520 }, { "epoch": 0.646813384602798, "grad_norm": 0.05431893310284545, "learning_rate": 3.342319685442803e-06, "loss": 0.0027, "step": 39530 }, { "epoch": 0.6469770105538738, "grad_norm": 0.07468792672795713, "learning_rate": 3.3396256373339363e-06, "loss": 0.0026, "step": 39540 }, { "epoch": 0.6471406365049497, "grad_norm": 0.19813218668722152, "learning_rate": 3.3369321308928427e-06, "loss": 0.0045, "step": 39550 }, { "epoch": 0.6473042624560256, "grad_norm": 0.28014089150395327, "learning_rate": 3.334239166998229e-06, "loss": 0.0025, "step": 39560 }, { "epoch": 0.6474678884071013, "grad_norm": 0.09728016720515517, "learning_rate": 3.3315467465286288e-06, "loss": 0.0036, "step": 39570 }, { "epoch": 0.6476315143581772, "grad_norm": 0.06672731755674141, "learning_rate": 3.3288548703623975e-06, "loss": 0.0022, "step": 39580 }, { "epoch": 0.647795140309253, "grad_norm": 0.1940579031224912, "learning_rate": 3.3261635393777094e-06, "loss": 0.0035, "step": 39590 }, { "epoch": 0.6479587662603289, "grad_norm": 0.022336458287932782, "learning_rate": 3.323472754452567e-06, "loss": 0.0017, "step": 39600 }, { "epoch": 0.6481223922114048, "grad_norm": 0.13438386381020884, "learning_rate": 3.3207825164647887e-06, "loss": 0.0024, "step": 39610 }, { "epoch": 0.6482860181624805, "grad_norm": 0.17504290350410256, "learning_rate": 3.3180928262920172e-06, "loss": 0.0041, "step": 39620 }, { "epoch": 0.6484496441135564, "grad_norm": 0.1386345402065505, "learning_rate": 3.315403684811718e-06, "loss": 0.003, "step": 39630 }, { "epoch": 0.6486132700646322, "grad_norm": 0.11579370961462987, "learning_rate": 3.3127150929011746e-06, "loss": 0.0017, "step": 39640 }, { "epoch": 0.6487768960157081, "grad_norm": 0.1202485041560297, "learning_rate": 3.310027051437492e-06, "loss": 0.0024, "step": 39650 }, { "epoch": 0.648940521966784, "grad_norm": 0.12061792889466585, "learning_rate": 3.307339561297599e-06, "loss": 0.0018, "step": 39660 }, { "epoch": 0.6491041479178598, "grad_norm": 0.11712073233399824, "learning_rate": 3.3046526233582395e-06, "loss": 0.0031, "step": 39670 }, { "epoch": 0.6492677738689356, "grad_norm": 0.14256584072034356, "learning_rate": 3.3019662384959785e-06, "loss": 0.0023, "step": 39680 }, { "epoch": 0.6494313998200114, "grad_norm": 0.1709936322373615, "learning_rate": 3.2992804075872044e-06, "loss": 0.002, "step": 39690 }, { "epoch": 0.6495950257710873, "grad_norm": 0.09806191266742119, "learning_rate": 3.29659513150812e-06, "loss": 0.0018, "step": 39700 }, { "epoch": 0.6497586517221632, "grad_norm": 0.12957565176309693, "learning_rate": 3.2939104111347486e-06, "loss": 0.0022, "step": 39710 }, { "epoch": 0.649922277673239, "grad_norm": 0.20161926415889336, "learning_rate": 3.2912262473429356e-06, "loss": 0.0017, "step": 39720 }, { "epoch": 0.6500859036243148, "grad_norm": 0.04588152739432965, "learning_rate": 3.28854264100834e-06, "loss": 0.0024, "step": 39730 }, { "epoch": 0.6502495295753906, "grad_norm": 0.09643820719669284, "learning_rate": 3.2858595930064396e-06, "loss": 0.0028, "step": 39740 }, { "epoch": 0.6504131555264665, "grad_norm": 0.05130347824026865, "learning_rate": 3.283177104212534e-06, "loss": 0.0043, "step": 39750 }, { "epoch": 0.6505767814775424, "grad_norm": 0.02360428084797385, "learning_rate": 3.2804951755017367e-06, "loss": 0.0025, "step": 39760 }, { "epoch": 0.6507404074286182, "grad_norm": 0.2494168643005269, "learning_rate": 3.2778138077489787e-06, "loss": 0.0023, "step": 39770 }, { "epoch": 0.650904033379694, "grad_norm": 0.028880625257212113, "learning_rate": 3.27513300182901e-06, "loss": 0.0022, "step": 39780 }, { "epoch": 0.6510676593307698, "grad_norm": 0.1292486455571358, "learning_rate": 3.2724527586163964e-06, "loss": 0.0031, "step": 39790 }, { "epoch": 0.6512312852818457, "grad_norm": 0.145923383876627, "learning_rate": 3.2697730789855177e-06, "loss": 0.0035, "step": 39800 }, { "epoch": 0.6513949112329216, "grad_norm": 0.2389334119865392, "learning_rate": 3.2670939638105735e-06, "loss": 0.0027, "step": 39810 }, { "epoch": 0.6515585371839974, "grad_norm": 0.05829126606389657, "learning_rate": 3.2644154139655785e-06, "loss": 0.004, "step": 39820 }, { "epoch": 0.6517221631350733, "grad_norm": 0.1287373268218403, "learning_rate": 3.2617374303243616e-06, "loss": 0.0032, "step": 39830 }, { "epoch": 0.651885789086149, "grad_norm": 0.12808499409673615, "learning_rate": 3.2590600137605657e-06, "loss": 0.002, "step": 39840 }, { "epoch": 0.6520494150372249, "grad_norm": 0.07306170449142108, "learning_rate": 3.2563831651476536e-06, "loss": 0.002, "step": 39850 }, { "epoch": 0.6522130409883008, "grad_norm": 0.07775133813934278, "learning_rate": 3.253706885358898e-06, "loss": 0.0031, "step": 39860 }, { "epoch": 0.6523766669393766, "grad_norm": 0.13357317538168761, "learning_rate": 3.2510311752673884e-06, "loss": 0.0055, "step": 39870 }, { "epoch": 0.6525402928904525, "grad_norm": 0.1521847707567769, "learning_rate": 3.2483560357460276e-06, "loss": 0.003, "step": 39880 }, { "epoch": 0.6527039188415282, "grad_norm": 0.4127917503806797, "learning_rate": 3.2456814676675334e-06, "loss": 0.0033, "step": 39890 }, { "epoch": 0.6528675447926041, "grad_norm": 0.18561189078890014, "learning_rate": 3.2430074719044345e-06, "loss": 0.002, "step": 39900 }, { "epoch": 0.65303117074368, "grad_norm": 0.12787073109409958, "learning_rate": 3.240334049329077e-06, "loss": 0.0044, "step": 39910 }, { "epoch": 0.6531947966947558, "grad_norm": 0.2866621415497609, "learning_rate": 3.2376612008136173e-06, "loss": 0.0034, "step": 39920 }, { "epoch": 0.6533584226458317, "grad_norm": 0.15739206094542157, "learning_rate": 3.234988927230023e-06, "loss": 0.002, "step": 39930 }, { "epoch": 0.6535220485969074, "grad_norm": 0.2795931432108628, "learning_rate": 3.232317229450078e-06, "loss": 0.0021, "step": 39940 }, { "epoch": 0.6536856745479833, "grad_norm": 0.09170065126543997, "learning_rate": 3.2296461083453768e-06, "loss": 0.003, "step": 39950 }, { "epoch": 0.6538493004990592, "grad_norm": 0.08503289135452773, "learning_rate": 3.226975564787322e-06, "loss": 0.0032, "step": 39960 }, { "epoch": 0.654012926450135, "grad_norm": 0.11842476542051038, "learning_rate": 3.224305599647135e-06, "loss": 0.0028, "step": 39970 }, { "epoch": 0.6541765524012109, "grad_norm": 0.20862033495901083, "learning_rate": 3.2216362137958456e-06, "loss": 0.0039, "step": 39980 }, { "epoch": 0.6543401783522866, "grad_norm": 0.12705672164595538, "learning_rate": 3.2189674081042875e-06, "loss": 0.0027, "step": 39990 }, { "epoch": 0.6545038043033625, "grad_norm": 0.2860372628199809, "learning_rate": 3.216299183443118e-06, "loss": 0.0017, "step": 40000 }, { "epoch": 0.6546674302544384, "grad_norm": 0.23223418361787682, "learning_rate": 3.2136315406827967e-06, "loss": 0.0023, "step": 40010 }, { "epoch": 0.6548310562055142, "grad_norm": 0.012887209491306267, "learning_rate": 3.21096448069359e-06, "loss": 0.0042, "step": 40020 }, { "epoch": 0.6549946821565901, "grad_norm": 0.08575492368848339, "learning_rate": 3.208298004345586e-06, "loss": 0.0032, "step": 40030 }, { "epoch": 0.6551583081076658, "grad_norm": 0.14995352057200062, "learning_rate": 3.2056321125086724e-06, "loss": 0.002, "step": 40040 }, { "epoch": 0.6553219340587417, "grad_norm": 0.06966696589240441, "learning_rate": 3.2029668060525467e-06, "loss": 0.0018, "step": 40050 }, { "epoch": 0.6554855600098175, "grad_norm": 0.05765247420159761, "learning_rate": 3.2003020858467224e-06, "loss": 0.0025, "step": 40060 }, { "epoch": 0.6556491859608934, "grad_norm": 0.2751262712319028, "learning_rate": 3.1976379527605163e-06, "loss": 0.0034, "step": 40070 }, { "epoch": 0.6558128119119693, "grad_norm": 0.08558690181995803, "learning_rate": 3.1949744076630505e-06, "loss": 0.0024, "step": 40080 }, { "epoch": 0.655976437863045, "grad_norm": 0.08569937119368176, "learning_rate": 3.192311451423266e-06, "loss": 0.0017, "step": 40090 }, { "epoch": 0.6561400638141209, "grad_norm": 0.28402650808015234, "learning_rate": 3.1896490849099015e-06, "loss": 0.0016, "step": 40100 }, { "epoch": 0.6563036897651967, "grad_norm": 0.2821033728842668, "learning_rate": 3.186987308991505e-06, "loss": 0.0019, "step": 40110 }, { "epoch": 0.6564673157162726, "grad_norm": 0.06455344900533107, "learning_rate": 3.1843261245364383e-06, "loss": 0.0021, "step": 40120 }, { "epoch": 0.6566309416673485, "grad_norm": 0.06232349812964358, "learning_rate": 3.181665532412865e-06, "loss": 0.0046, "step": 40130 }, { "epoch": 0.6567945676184243, "grad_norm": 0.1712542184772028, "learning_rate": 3.1790055334887516e-06, "loss": 0.0035, "step": 40140 }, { "epoch": 0.6569581935695001, "grad_norm": 0.4522706649109714, "learning_rate": 3.1763461286318815e-06, "loss": 0.0081, "step": 40150 }, { "epoch": 0.6571218195205759, "grad_norm": 0.10160296605985261, "learning_rate": 3.173687318709837e-06, "loss": 0.0022, "step": 40160 }, { "epoch": 0.6572854454716518, "grad_norm": 0.19452493654430852, "learning_rate": 3.1710291045900054e-06, "loss": 0.003, "step": 40170 }, { "epoch": 0.6574490714227277, "grad_norm": 0.08345665002389553, "learning_rate": 3.168371487139582e-06, "loss": 0.0028, "step": 40180 }, { "epoch": 0.6576126973738035, "grad_norm": 0.06317711366288563, "learning_rate": 3.1657144672255726e-06, "loss": 0.0017, "step": 40190 }, { "epoch": 0.6577763233248793, "grad_norm": 0.0568274186794387, "learning_rate": 3.1630580457147777e-06, "loss": 0.0044, "step": 40200 }, { "epoch": 0.6579399492759551, "grad_norm": 0.14384420536684045, "learning_rate": 3.1604022234738073e-06, "loss": 0.0031, "step": 40210 }, { "epoch": 0.658103575227031, "grad_norm": 0.15772566652543338, "learning_rate": 3.1577470013690823e-06, "loss": 0.0038, "step": 40220 }, { "epoch": 0.6582672011781069, "grad_norm": 0.10366514866569605, "learning_rate": 3.155092380266817e-06, "loss": 0.0021, "step": 40230 }, { "epoch": 0.6584308271291827, "grad_norm": 0.1699050099509039, "learning_rate": 3.1524383610330345e-06, "loss": 0.0026, "step": 40240 }, { "epoch": 0.6585944530802585, "grad_norm": 0.07912439335916925, "learning_rate": 3.1497849445335654e-06, "loss": 0.003, "step": 40250 }, { "epoch": 0.6587580790313343, "grad_norm": 0.1522661454787654, "learning_rate": 3.1471321316340364e-06, "loss": 0.0028, "step": 40260 }, { "epoch": 0.6589217049824102, "grad_norm": 0.04008214753582326, "learning_rate": 3.1444799231998805e-06, "loss": 0.0013, "step": 40270 }, { "epoch": 0.6590853309334861, "grad_norm": 0.12155508234919483, "learning_rate": 3.1418283200963385e-06, "loss": 0.0034, "step": 40280 }, { "epoch": 0.6592489568845619, "grad_norm": 0.2074379356953341, "learning_rate": 3.139177323188445e-06, "loss": 0.0045, "step": 40290 }, { "epoch": 0.6594125828356378, "grad_norm": 0.03942891942010023, "learning_rate": 3.1365269333410385e-06, "loss": 0.0032, "step": 40300 }, { "epoch": 0.6595762087867135, "grad_norm": 0.08922873729187883, "learning_rate": 3.133877151418769e-06, "loss": 0.0023, "step": 40310 }, { "epoch": 0.6597398347377894, "grad_norm": 0.07838412449617105, "learning_rate": 3.1312279782860755e-06, "loss": 0.003, "step": 40320 }, { "epoch": 0.6599034606888653, "grad_norm": 0.05000926522499979, "learning_rate": 3.1285794148072034e-06, "loss": 0.0048, "step": 40330 }, { "epoch": 0.6600670866399411, "grad_norm": 0.05765057889722899, "learning_rate": 3.1259314618462046e-06, "loss": 0.0016, "step": 40340 }, { "epoch": 0.660230712591017, "grad_norm": 0.2655843045276707, "learning_rate": 3.1232841202669228e-06, "loss": 0.0026, "step": 40350 }, { "epoch": 0.6603943385420927, "grad_norm": 0.30198475608781566, "learning_rate": 3.1206373909330057e-06, "loss": 0.0041, "step": 40360 }, { "epoch": 0.6605579644931686, "grad_norm": 0.12496489904880949, "learning_rate": 3.1179912747079065e-06, "loss": 0.0035, "step": 40370 }, { "epoch": 0.6607215904442445, "grad_norm": 0.02567933770593977, "learning_rate": 3.1153457724548707e-06, "loss": 0.0028, "step": 40380 }, { "epoch": 0.6608852163953203, "grad_norm": 0.23412548210268688, "learning_rate": 3.112700885036945e-06, "loss": 0.0029, "step": 40390 }, { "epoch": 0.6610488423463962, "grad_norm": 0.13946125132828505, "learning_rate": 3.110056613316982e-06, "loss": 0.002, "step": 40400 }, { "epoch": 0.6612124682974719, "grad_norm": 0.09678302262651918, "learning_rate": 3.107412958157625e-06, "loss": 0.0021, "step": 40410 }, { "epoch": 0.6613760942485478, "grad_norm": 0.19509377889889065, "learning_rate": 3.1047699204213176e-06, "loss": 0.0021, "step": 40420 }, { "epoch": 0.6615397201996237, "grad_norm": 0.21633787104740393, "learning_rate": 3.102127500970311e-06, "loss": 0.002, "step": 40430 }, { "epoch": 0.6617033461506995, "grad_norm": 0.09215322596108254, "learning_rate": 3.0994857006666424e-06, "loss": 0.0033, "step": 40440 }, { "epoch": 0.6618669721017754, "grad_norm": 0.14275816767678096, "learning_rate": 3.096844520372152e-06, "loss": 0.0013, "step": 40450 }, { "epoch": 0.6620305980528511, "grad_norm": 0.3473689006139139, "learning_rate": 3.0942039609484833e-06, "loss": 0.0025, "step": 40460 }, { "epoch": 0.662194224003927, "grad_norm": 0.09610750533745403, "learning_rate": 3.091564023257067e-06, "loss": 0.0025, "step": 40470 }, { "epoch": 0.6623578499550029, "grad_norm": 0.23561934528778736, "learning_rate": 3.0889247081591368e-06, "loss": 0.0011, "step": 40480 }, { "epoch": 0.6625214759060787, "grad_norm": 0.16203910305151026, "learning_rate": 3.086286016515726e-06, "loss": 0.0019, "step": 40490 }, { "epoch": 0.6626851018571546, "grad_norm": 0.19275699794863643, "learning_rate": 3.0836479491876582e-06, "loss": 0.0024, "step": 40500 }, { "epoch": 0.6628487278082303, "grad_norm": 0.32697829671129397, "learning_rate": 3.0810105070355567e-06, "loss": 0.0048, "step": 40510 }, { "epoch": 0.6630123537593062, "grad_norm": 0.36792904683152244, "learning_rate": 3.0783736909198393e-06, "loss": 0.0021, "step": 40520 }, { "epoch": 0.6631759797103821, "grad_norm": 0.21469863595469219, "learning_rate": 3.075737501700723e-06, "loss": 0.0031, "step": 40530 }, { "epoch": 0.6633396056614579, "grad_norm": 0.14359639461928936, "learning_rate": 3.0731019402382167e-06, "loss": 0.0019, "step": 40540 }, { "epoch": 0.6635032316125338, "grad_norm": 0.054395701590773886, "learning_rate": 3.070467007392125e-06, "loss": 0.0025, "step": 40550 }, { "epoch": 0.6636668575636095, "grad_norm": 0.24050371405752213, "learning_rate": 3.067832704022049e-06, "loss": 0.0031, "step": 40560 }, { "epoch": 0.6638304835146854, "grad_norm": 0.1290172505629635, "learning_rate": 3.0651990309873846e-06, "loss": 0.0039, "step": 40570 }, { "epoch": 0.6639941094657613, "grad_norm": 0.03202927829674529, "learning_rate": 3.062565989147318e-06, "loss": 0.0038, "step": 40580 }, { "epoch": 0.6641577354168371, "grad_norm": 0.18586337574070458, "learning_rate": 3.0599335793608353e-06, "loss": 0.0025, "step": 40590 }, { "epoch": 0.664321361367913, "grad_norm": 0.36856125265353057, "learning_rate": 3.057301802486714e-06, "loss": 0.003, "step": 40600 }, { "epoch": 0.6644849873189888, "grad_norm": 0.14686951370124568, "learning_rate": 3.0546706593835214e-06, "loss": 0.0033, "step": 40610 }, { "epoch": 0.6646486132700646, "grad_norm": 0.021412108609240532, "learning_rate": 3.0520401509096248e-06, "loss": 0.0019, "step": 40620 }, { "epoch": 0.6648122392211405, "grad_norm": 0.005325258307676092, "learning_rate": 3.04941027792318e-06, "loss": 0.0027, "step": 40630 }, { "epoch": 0.6649758651722163, "grad_norm": 0.11931541068795616, "learning_rate": 3.046781041282135e-06, "loss": 0.0023, "step": 40640 }, { "epoch": 0.6651394911232922, "grad_norm": 0.17313174219269734, "learning_rate": 3.044152441844235e-06, "loss": 0.0035, "step": 40650 }, { "epoch": 0.665303117074368, "grad_norm": 0.15497922283450521, "learning_rate": 3.0415244804670106e-06, "loss": 0.0029, "step": 40660 }, { "epoch": 0.6654667430254438, "grad_norm": 0.2689743844869192, "learning_rate": 3.0388971580077896e-06, "loss": 0.0023, "step": 40670 }, { "epoch": 0.6656303689765197, "grad_norm": 0.129486847360236, "learning_rate": 3.0362704753236905e-06, "loss": 0.0038, "step": 40680 }, { "epoch": 0.6657939949275955, "grad_norm": 0.11890158294012872, "learning_rate": 3.0336444332716195e-06, "loss": 0.0028, "step": 40690 }, { "epoch": 0.6659576208786714, "grad_norm": 0.16904242009082043, "learning_rate": 3.031019032708278e-06, "loss": 0.0023, "step": 40700 }, { "epoch": 0.6661212468297472, "grad_norm": 0.20878737715978546, "learning_rate": 3.0283942744901562e-06, "loss": 0.0039, "step": 40710 }, { "epoch": 0.666284872780823, "grad_norm": 0.2644785465065048, "learning_rate": 3.0257701594735356e-06, "loss": 0.003, "step": 40720 }, { "epoch": 0.6664484987318989, "grad_norm": 0.12457724055598955, "learning_rate": 3.0231466885144856e-06, "loss": 0.0024, "step": 40730 }, { "epoch": 0.6666121246829747, "grad_norm": 0.14195933205667619, "learning_rate": 3.0205238624688692e-06, "loss": 0.0025, "step": 40740 }, { "epoch": 0.6667757506340506, "grad_norm": 0.05734868462805376, "learning_rate": 3.0179016821923357e-06, "loss": 0.0014, "step": 40750 }, { "epoch": 0.6669393765851264, "grad_norm": 0.07968922096620716, "learning_rate": 3.0152801485403254e-06, "loss": 0.0016, "step": 40760 }, { "epoch": 0.6671030025362022, "grad_norm": 0.0863898649762563, "learning_rate": 3.0126592623680682e-06, "loss": 0.0017, "step": 40770 }, { "epoch": 0.6672666284872781, "grad_norm": 0.3219425469609863, "learning_rate": 3.010039024530581e-06, "loss": 0.003, "step": 40780 }, { "epoch": 0.6674302544383539, "grad_norm": 0.21590436910313154, "learning_rate": 3.0074194358826686e-06, "loss": 0.0018, "step": 40790 }, { "epoch": 0.6675938803894298, "grad_norm": 0.09149583124572988, "learning_rate": 3.0048004972789284e-06, "loss": 0.0016, "step": 40800 }, { "epoch": 0.6677575063405056, "grad_norm": 0.35200166106536224, "learning_rate": 3.002182209573741e-06, "loss": 0.003, "step": 40810 }, { "epoch": 0.6679211322915815, "grad_norm": 0.07196795209815954, "learning_rate": 2.9995645736212777e-06, "loss": 0.0007, "step": 40820 }, { "epoch": 0.6680847582426573, "grad_norm": 0.10749828898044372, "learning_rate": 2.9969475902754933e-06, "loss": 0.0018, "step": 40830 }, { "epoch": 0.6682483841937331, "grad_norm": 0.11516874932859363, "learning_rate": 2.9943312603901355e-06, "loss": 0.0021, "step": 40840 }, { "epoch": 0.668412010144809, "grad_norm": 0.29385288483058314, "learning_rate": 2.991715584818734e-06, "loss": 0.0071, "step": 40850 }, { "epoch": 0.6685756360958848, "grad_norm": 0.07950238268890146, "learning_rate": 2.9891005644146064e-06, "loss": 0.0033, "step": 40860 }, { "epoch": 0.6687392620469607, "grad_norm": 0.19094952208535834, "learning_rate": 2.9864862000308583e-06, "loss": 0.003, "step": 40870 }, { "epoch": 0.6689028879980365, "grad_norm": 0.10810314690855122, "learning_rate": 2.983872492520379e-06, "loss": 0.0025, "step": 40880 }, { "epoch": 0.6690665139491123, "grad_norm": 0.08182233997925421, "learning_rate": 2.981259442735843e-06, "loss": 0.0021, "step": 40890 }, { "epoch": 0.6692301399001882, "grad_norm": 0.01660795710215538, "learning_rate": 2.978647051529714e-06, "loss": 0.0025, "step": 40900 }, { "epoch": 0.669393765851264, "grad_norm": 0.0821574715269833, "learning_rate": 2.976035319754238e-06, "loss": 0.0019, "step": 40910 }, { "epoch": 0.6695573918023399, "grad_norm": 0.270674784852337, "learning_rate": 2.973424248261444e-06, "loss": 0.002, "step": 40920 }, { "epoch": 0.6697210177534156, "grad_norm": 0.13100342881669663, "learning_rate": 2.9708138379031503e-06, "loss": 0.002, "step": 40930 }, { "epoch": 0.6698846437044915, "grad_norm": 0.0820058590816915, "learning_rate": 2.9682040895309572e-06, "loss": 0.005, "step": 40940 }, { "epoch": 0.6700482696555674, "grad_norm": 0.040961428748845265, "learning_rate": 2.9655950039962465e-06, "loss": 0.0021, "step": 40950 }, { "epoch": 0.6702118956066432, "grad_norm": 0.06012620824639548, "learning_rate": 2.962986582150189e-06, "loss": 0.0074, "step": 40960 }, { "epoch": 0.6703755215577191, "grad_norm": 0.1834192949993341, "learning_rate": 2.960378824843735e-06, "loss": 0.0027, "step": 40970 }, { "epoch": 0.6705391475087948, "grad_norm": 0.028166952745558246, "learning_rate": 2.9577717329276186e-06, "loss": 0.0015, "step": 40980 }, { "epoch": 0.6707027734598707, "grad_norm": 0.6164414045027284, "learning_rate": 2.9551653072523584e-06, "loss": 0.0025, "step": 40990 }, { "epoch": 0.6708663994109466, "grad_norm": 0.10695079591084021, "learning_rate": 2.9525595486682545e-06, "loss": 0.0026, "step": 41000 }, { "epoch": 0.6710300253620224, "grad_norm": 0.07195091333837633, "learning_rate": 2.9499544580253893e-06, "loss": 0.0015, "step": 41010 }, { "epoch": 0.6711936513130983, "grad_norm": 0.1884421931087768, "learning_rate": 2.9473500361736273e-06, "loss": 0.0023, "step": 41020 }, { "epoch": 0.671357277264174, "grad_norm": 0.1028414966117629, "learning_rate": 2.9447462839626163e-06, "loss": 0.0024, "step": 41030 }, { "epoch": 0.6715209032152499, "grad_norm": 0.16820385801526425, "learning_rate": 2.942143202241782e-06, "loss": 0.0024, "step": 41040 }, { "epoch": 0.6716845291663258, "grad_norm": 0.2680442443312905, "learning_rate": 2.939540791860337e-06, "loss": 0.0038, "step": 41050 }, { "epoch": 0.6718481551174016, "grad_norm": 0.022807104573804717, "learning_rate": 2.936939053667269e-06, "loss": 0.0013, "step": 41060 }, { "epoch": 0.6720117810684775, "grad_norm": 0.007735201551940222, "learning_rate": 2.9343379885113497e-06, "loss": 0.0017, "step": 41070 }, { "epoch": 0.6721754070195533, "grad_norm": 0.06415022831170143, "learning_rate": 2.9317375972411322e-06, "loss": 0.0025, "step": 41080 }, { "epoch": 0.6723390329706291, "grad_norm": 0.06374971429116409, "learning_rate": 2.929137880704947e-06, "loss": 0.002, "step": 41090 }, { "epoch": 0.672502658921705, "grad_norm": 0.26021841945345336, "learning_rate": 2.9265388397509043e-06, "loss": 0.0023, "step": 41100 }, { "epoch": 0.6726662848727808, "grad_norm": 0.17526730558955822, "learning_rate": 2.9239404752268973e-06, "loss": 0.0034, "step": 41110 }, { "epoch": 0.6728299108238567, "grad_norm": 0.0962750531408298, "learning_rate": 2.9213427879805954e-06, "loss": 0.0026, "step": 41120 }, { "epoch": 0.6729935367749325, "grad_norm": 0.09437558482822837, "learning_rate": 2.91874577885945e-06, "loss": 0.0027, "step": 41130 }, { "epoch": 0.6731571627260083, "grad_norm": 0.010445814699426627, "learning_rate": 2.916149448710686e-06, "loss": 0.0024, "step": 41140 }, { "epoch": 0.6733207886770842, "grad_norm": 0.10779266810784531, "learning_rate": 2.913553798381313e-06, "loss": 0.0018, "step": 41150 }, { "epoch": 0.67348441462816, "grad_norm": 0.1617484228394904, "learning_rate": 2.9109588287181144e-06, "loss": 0.0029, "step": 41160 }, { "epoch": 0.6736480405792359, "grad_norm": 0.10506760160477985, "learning_rate": 2.908364540567652e-06, "loss": 0.0021, "step": 41170 }, { "epoch": 0.6738116665303117, "grad_norm": 0.2934059769692995, "learning_rate": 2.905770934776271e-06, "loss": 0.0025, "step": 41180 }, { "epoch": 0.6739752924813875, "grad_norm": 0.09724591524984555, "learning_rate": 2.9031780121900855e-06, "loss": 0.0029, "step": 41190 }, { "epoch": 0.6741389184324634, "grad_norm": 0.16370221246817585, "learning_rate": 2.9005857736549926e-06, "loss": 0.0031, "step": 41200 }, { "epoch": 0.6743025443835392, "grad_norm": 0.10688248175711239, "learning_rate": 2.8979942200166635e-06, "loss": 0.0015, "step": 41210 }, { "epoch": 0.6744661703346151, "grad_norm": 0.37216064439188923, "learning_rate": 2.895403352120546e-06, "loss": 0.0032, "step": 41220 }, { "epoch": 0.6746297962856909, "grad_norm": 0.289860475146755, "learning_rate": 2.8928131708118635e-06, "loss": 0.0035, "step": 41230 }, { "epoch": 0.6747934222367667, "grad_norm": 0.08497006426129707, "learning_rate": 2.890223676935622e-06, "loss": 0.0032, "step": 41240 }, { "epoch": 0.6749570481878426, "grad_norm": 0.2525358679804982, "learning_rate": 2.8876348713365944e-06, "loss": 0.0018, "step": 41250 }, { "epoch": 0.6751206741389184, "grad_norm": 0.4327332370270823, "learning_rate": 2.885046754859333e-06, "loss": 0.006, "step": 41260 }, { "epoch": 0.6752843000899943, "grad_norm": 0.12277440634157726, "learning_rate": 2.8824593283481663e-06, "loss": 0.0033, "step": 41270 }, { "epoch": 0.6754479260410701, "grad_norm": 0.24532649375838544, "learning_rate": 2.8798725926471953e-06, "loss": 0.002, "step": 41280 }, { "epoch": 0.675611551992146, "grad_norm": 0.11038957527955254, "learning_rate": 2.8772865486002955e-06, "loss": 0.0045, "step": 41290 }, { "epoch": 0.6757751779432218, "grad_norm": 0.11954932068585873, "learning_rate": 2.8747011970511207e-06, "loss": 0.0027, "step": 41300 }, { "epoch": 0.6759388038942976, "grad_norm": 0.09155624218907009, "learning_rate": 2.8721165388430956e-06, "loss": 0.003, "step": 41310 }, { "epoch": 0.6761024298453735, "grad_norm": 0.10862737127753798, "learning_rate": 2.869532574819418e-06, "loss": 0.0021, "step": 41320 }, { "epoch": 0.6762660557964493, "grad_norm": 0.031737075572473454, "learning_rate": 2.866949305823062e-06, "loss": 0.0031, "step": 41330 }, { "epoch": 0.6764296817475252, "grad_norm": 0.1627864680478158, "learning_rate": 2.8643667326967715e-06, "loss": 0.0048, "step": 41340 }, { "epoch": 0.676593307698601, "grad_norm": 0.13272210155579808, "learning_rate": 2.8617848562830653e-06, "loss": 0.0016, "step": 41350 }, { "epoch": 0.6767569336496768, "grad_norm": 0.1289969767177936, "learning_rate": 2.8592036774242383e-06, "loss": 0.0016, "step": 41360 }, { "epoch": 0.6769205596007527, "grad_norm": 0.08914399738044176, "learning_rate": 2.8566231969623507e-06, "loss": 0.0008, "step": 41370 }, { "epoch": 0.6770841855518285, "grad_norm": 0.35248995228486585, "learning_rate": 2.854043415739241e-06, "loss": 0.0032, "step": 41380 }, { "epoch": 0.6772478115029044, "grad_norm": 0.12126033527672908, "learning_rate": 2.8514643345965163e-06, "loss": 0.0013, "step": 41390 }, { "epoch": 0.6774114374539802, "grad_norm": 0.021444309320496187, "learning_rate": 2.848885954375556e-06, "loss": 0.0022, "step": 41400 }, { "epoch": 0.677575063405056, "grad_norm": 0.20079350632874507, "learning_rate": 2.846308275917509e-06, "loss": 0.0043, "step": 41410 }, { "epoch": 0.6777386893561319, "grad_norm": 0.2263365126980432, "learning_rate": 2.8437313000633018e-06, "loss": 0.0024, "step": 41420 }, { "epoch": 0.6779023153072077, "grad_norm": 0.16264446107881175, "learning_rate": 2.841155027653625e-06, "loss": 0.0018, "step": 41430 }, { "epoch": 0.6780659412582836, "grad_norm": 0.09150596021591757, "learning_rate": 2.8385794595289407e-06, "loss": 0.0018, "step": 41440 }, { "epoch": 0.6782295672093595, "grad_norm": 0.17786958743712, "learning_rate": 2.8360045965294847e-06, "loss": 0.0019, "step": 41450 }, { "epoch": 0.6783931931604352, "grad_norm": 0.1974893193267756, "learning_rate": 2.8334304394952596e-06, "loss": 0.0033, "step": 41460 }, { "epoch": 0.6785568191115111, "grad_norm": 0.14526319601973367, "learning_rate": 2.8308569892660353e-06, "loss": 0.0028, "step": 41470 }, { "epoch": 0.6787204450625869, "grad_norm": 0.15592358737235928, "learning_rate": 2.8282842466813604e-06, "loss": 0.0023, "step": 41480 }, { "epoch": 0.6788840710136628, "grad_norm": 0.09372493912025202, "learning_rate": 2.825712212580544e-06, "loss": 0.0031, "step": 41490 }, { "epoch": 0.6790476969647387, "grad_norm": 0.21160221563883158, "learning_rate": 2.8231408878026657e-06, "loss": 0.0041, "step": 41500 }, { "epoch": 0.6792113229158144, "grad_norm": 0.11999611124703345, "learning_rate": 2.8205702731865757e-06, "loss": 0.0016, "step": 41510 }, { "epoch": 0.6793749488668903, "grad_norm": 0.21433712641464028, "learning_rate": 2.8180003695708925e-06, "loss": 0.0017, "step": 41520 }, { "epoch": 0.6795385748179661, "grad_norm": 0.15433427510459735, "learning_rate": 2.8154311777939995e-06, "loss": 0.0017, "step": 41530 }, { "epoch": 0.679702200769042, "grad_norm": 0.18691691966755242, "learning_rate": 2.81286269869405e-06, "loss": 0.0032, "step": 41540 }, { "epoch": 0.6798658267201179, "grad_norm": 0.1360947914601187, "learning_rate": 2.8102949331089676e-06, "loss": 0.0025, "step": 41550 }, { "epoch": 0.6800294526711936, "grad_norm": 0.21745306544946064, "learning_rate": 2.8077278818764386e-06, "loss": 0.0041, "step": 41560 }, { "epoch": 0.6801930786222695, "grad_norm": 0.181486665325196, "learning_rate": 2.8051615458339188e-06, "loss": 0.0026, "step": 41570 }, { "epoch": 0.6803567045733453, "grad_norm": 0.10783998267729482, "learning_rate": 2.80259592581863e-06, "loss": 0.0017, "step": 41580 }, { "epoch": 0.6805203305244212, "grad_norm": 0.3006325954414733, "learning_rate": 2.8000310226675593e-06, "loss": 0.0022, "step": 41590 }, { "epoch": 0.6806839564754971, "grad_norm": 0.17007047018142366, "learning_rate": 2.7974668372174594e-06, "loss": 0.0025, "step": 41600 }, { "epoch": 0.6808475824265728, "grad_norm": 0.08376332811390634, "learning_rate": 2.7949033703048555e-06, "loss": 0.003, "step": 41610 }, { "epoch": 0.6810112083776487, "grad_norm": 0.30359373417007296, "learning_rate": 2.792340622766032e-06, "loss": 0.0027, "step": 41620 }, { "epoch": 0.6811748343287245, "grad_norm": 0.3445363412051261, "learning_rate": 2.789778595437034e-06, "loss": 0.0022, "step": 41630 }, { "epoch": 0.6813384602798004, "grad_norm": 0.07746944132159855, "learning_rate": 2.787217289153685e-06, "loss": 0.0045, "step": 41640 }, { "epoch": 0.6815020862308763, "grad_norm": 0.09365644305325534, "learning_rate": 2.784656704751562e-06, "loss": 0.0027, "step": 41650 }, { "epoch": 0.681665712181952, "grad_norm": 0.030013011260191038, "learning_rate": 2.7820968430660102e-06, "loss": 0.0022, "step": 41660 }, { "epoch": 0.6818293381330279, "grad_norm": 0.08126143794990909, "learning_rate": 2.7795377049321413e-06, "loss": 0.0025, "step": 41670 }, { "epoch": 0.6819929640841037, "grad_norm": 0.13863908921669518, "learning_rate": 2.7769792911848304e-06, "loss": 0.003, "step": 41680 }, { "epoch": 0.6821565900351796, "grad_norm": 0.21547303100062978, "learning_rate": 2.774421602658709e-06, "loss": 0.0014, "step": 41690 }, { "epoch": 0.6823202159862555, "grad_norm": 0.15936463847171142, "learning_rate": 2.771864640188182e-06, "loss": 0.0015, "step": 41700 }, { "epoch": 0.6824838419373312, "grad_norm": 0.1121288519069009, "learning_rate": 2.7693084046074127e-06, "loss": 0.0021, "step": 41710 }, { "epoch": 0.6826474678884071, "grad_norm": 0.08663973554616647, "learning_rate": 2.7667528967503244e-06, "loss": 0.0017, "step": 41720 }, { "epoch": 0.6828110938394829, "grad_norm": 0.033079284330341914, "learning_rate": 2.7641981174506118e-06, "loss": 0.0014, "step": 41730 }, { "epoch": 0.6829747197905588, "grad_norm": 0.30450504500214215, "learning_rate": 2.7616440675417257e-06, "loss": 0.0017, "step": 41740 }, { "epoch": 0.6831383457416347, "grad_norm": 0.12578510081389757, "learning_rate": 2.759090747856874e-06, "loss": 0.0055, "step": 41750 }, { "epoch": 0.6833019716927105, "grad_norm": 0.0365232978479947, "learning_rate": 2.7565381592290375e-06, "loss": 0.0014, "step": 41760 }, { "epoch": 0.6834655976437863, "grad_norm": 0.06815296567336027, "learning_rate": 2.753986302490952e-06, "loss": 0.0024, "step": 41770 }, { "epoch": 0.6836292235948621, "grad_norm": 0.11923673598337081, "learning_rate": 2.7514351784751126e-06, "loss": 0.0021, "step": 41780 }, { "epoch": 0.683792849545938, "grad_norm": 0.02716008884793376, "learning_rate": 2.7488847880137833e-06, "loss": 0.0043, "step": 41790 }, { "epoch": 0.6839564754970138, "grad_norm": 0.1538618945099668, "learning_rate": 2.746335131938983e-06, "loss": 0.0023, "step": 41800 }, { "epoch": 0.6841201014480897, "grad_norm": 0.013631361854169284, "learning_rate": 2.743786211082489e-06, "loss": 0.002, "step": 41810 }, { "epoch": 0.6842837273991655, "grad_norm": 0.07306195999373453, "learning_rate": 2.7412380262758407e-06, "loss": 0.0034, "step": 41820 }, { "epoch": 0.6844473533502413, "grad_norm": 0.22165486790302227, "learning_rate": 2.738690578350344e-06, "loss": 0.0031, "step": 41830 }, { "epoch": 0.6846109793013172, "grad_norm": 0.09818286395401093, "learning_rate": 2.7361438681370544e-06, "loss": 0.0026, "step": 41840 }, { "epoch": 0.684774605252393, "grad_norm": 0.10467488516708244, "learning_rate": 2.7335978964667897e-06, "loss": 0.0017, "step": 41850 }, { "epoch": 0.6849382312034689, "grad_norm": 0.039540836019590214, "learning_rate": 2.7310526641701353e-06, "loss": 0.0022, "step": 41860 }, { "epoch": 0.6851018571545447, "grad_norm": 0.10848181635045313, "learning_rate": 2.7285081720774207e-06, "loss": 0.0013, "step": 41870 }, { "epoch": 0.6852654831056205, "grad_norm": 0.09182993793651825, "learning_rate": 2.725964421018743e-06, "loss": 0.005, "step": 41880 }, { "epoch": 0.6854291090566964, "grad_norm": 0.2313383011637226, "learning_rate": 2.7234214118239592e-06, "loss": 0.0025, "step": 41890 }, { "epoch": 0.6855927350077722, "grad_norm": 0.44247973710128646, "learning_rate": 2.720879145322679e-06, "loss": 0.0037, "step": 41900 }, { "epoch": 0.6857563609588481, "grad_norm": 0.1468770738686299, "learning_rate": 2.7183376223442704e-06, "loss": 0.0021, "step": 41910 }, { "epoch": 0.685919986909924, "grad_norm": 0.06078744822822211, "learning_rate": 2.715796843717865e-06, "loss": 0.0025, "step": 41920 }, { "epoch": 0.6860836128609997, "grad_norm": 0.09079303704546728, "learning_rate": 2.7132568102723424e-06, "loss": 0.0029, "step": 41930 }, { "epoch": 0.6862472388120756, "grad_norm": 0.37809724214213103, "learning_rate": 2.7107175228363425e-06, "loss": 0.0028, "step": 41940 }, { "epoch": 0.6864108647631514, "grad_norm": 0.19551475233796622, "learning_rate": 2.708178982238268e-06, "loss": 0.002, "step": 41950 }, { "epoch": 0.6865744907142273, "grad_norm": 0.09981874090353988, "learning_rate": 2.70564118930627e-06, "loss": 0.003, "step": 41960 }, { "epoch": 0.6867381166653032, "grad_norm": 0.2986000350999538, "learning_rate": 2.7031041448682573e-06, "loss": 0.0026, "step": 41970 }, { "epoch": 0.6869017426163789, "grad_norm": 0.20915521672323764, "learning_rate": 2.7005678497519007e-06, "loss": 0.002, "step": 41980 }, { "epoch": 0.6870653685674548, "grad_norm": 0.2791850969433655, "learning_rate": 2.698032304784617e-06, "loss": 0.0039, "step": 41990 }, { "epoch": 0.6872289945185306, "grad_norm": 0.13210936726214428, "learning_rate": 2.695497510793581e-06, "loss": 0.0028, "step": 42000 }, { "epoch": 0.6873926204696065, "grad_norm": 0.2695609555567078, "learning_rate": 2.6929634686057305e-06, "loss": 0.0026, "step": 42010 }, { "epoch": 0.6875562464206824, "grad_norm": 0.10556648377407195, "learning_rate": 2.690430179047749e-06, "loss": 0.0017, "step": 42020 }, { "epoch": 0.6877198723717581, "grad_norm": 0.07840149888778411, "learning_rate": 2.687897642946075e-06, "loss": 0.0015, "step": 42030 }, { "epoch": 0.687883498322834, "grad_norm": 0.06528244167430895, "learning_rate": 2.685365861126911e-06, "loss": 0.0032, "step": 42040 }, { "epoch": 0.6880471242739098, "grad_norm": 0.24806960049129503, "learning_rate": 2.682834834416198e-06, "loss": 0.0032, "step": 42050 }, { "epoch": 0.6882107502249857, "grad_norm": 0.12819523596603435, "learning_rate": 2.68030456363964e-06, "loss": 0.0023, "step": 42060 }, { "epoch": 0.6883743761760616, "grad_norm": 0.12031307731792824, "learning_rate": 2.677775049622697e-06, "loss": 0.0031, "step": 42070 }, { "epoch": 0.6885380021271373, "grad_norm": 0.10054300019770546, "learning_rate": 2.6752462931905764e-06, "loss": 0.0039, "step": 42080 }, { "epoch": 0.6887016280782132, "grad_norm": 0.2599112448472979, "learning_rate": 2.6727182951682383e-06, "loss": 0.0039, "step": 42090 }, { "epoch": 0.688865254029289, "grad_norm": 0.05321191812424597, "learning_rate": 2.670191056380403e-06, "loss": 0.0014, "step": 42100 }, { "epoch": 0.6890288799803649, "grad_norm": 1.0787030677670457, "learning_rate": 2.667664577651531e-06, "loss": 0.004, "step": 42110 }, { "epoch": 0.6891925059314408, "grad_norm": 0.14782688536671787, "learning_rate": 2.665138859805842e-06, "loss": 0.0019, "step": 42120 }, { "epoch": 0.6893561318825165, "grad_norm": 0.18113953384072565, "learning_rate": 2.6626139036673106e-06, "loss": 0.0033, "step": 42130 }, { "epoch": 0.6895197578335924, "grad_norm": 0.37737101692411124, "learning_rate": 2.660089710059657e-06, "loss": 0.0038, "step": 42140 }, { "epoch": 0.6896833837846682, "grad_norm": 0.06192965118034621, "learning_rate": 2.657566279806355e-06, "loss": 0.0016, "step": 42150 }, { "epoch": 0.6898470097357441, "grad_norm": 0.2419755439107186, "learning_rate": 2.6550436137306283e-06, "loss": 0.0043, "step": 42160 }, { "epoch": 0.69001063568682, "grad_norm": 0.19497767055163226, "learning_rate": 2.6525217126554536e-06, "loss": 0.0026, "step": 42170 }, { "epoch": 0.6901742616378957, "grad_norm": 0.18707493061328978, "learning_rate": 2.650000577403555e-06, "loss": 0.0013, "step": 42180 }, { "epoch": 0.6903378875889716, "grad_norm": 0.22304668472140066, "learning_rate": 2.6474802087974072e-06, "loss": 0.0047, "step": 42190 }, { "epoch": 0.6905015135400474, "grad_norm": 0.1679943475615923, "learning_rate": 2.6449606076592394e-06, "loss": 0.0029, "step": 42200 }, { "epoch": 0.6906651394911233, "grad_norm": 0.15798290736110393, "learning_rate": 2.642441774811025e-06, "loss": 0.0021, "step": 42210 }, { "epoch": 0.6908287654421992, "grad_norm": 0.1559890354693163, "learning_rate": 2.639923711074489e-06, "loss": 0.0017, "step": 42220 }, { "epoch": 0.690992391393275, "grad_norm": 0.21856857513277864, "learning_rate": 2.6374064172711046e-06, "loss": 0.0032, "step": 42230 }, { "epoch": 0.6911560173443508, "grad_norm": 0.14487209534974688, "learning_rate": 2.6348898942220954e-06, "loss": 0.0013, "step": 42240 }, { "epoch": 0.6913196432954266, "grad_norm": 0.057600959260587864, "learning_rate": 2.6323741427484294e-06, "loss": 0.0011, "step": 42250 }, { "epoch": 0.6914832692465025, "grad_norm": 0.20340513087614323, "learning_rate": 2.6298591636708304e-06, "loss": 0.0027, "step": 42260 }, { "epoch": 0.6916468951975784, "grad_norm": 0.08370605370788999, "learning_rate": 2.627344957809764e-06, "loss": 0.0038, "step": 42270 }, { "epoch": 0.6918105211486542, "grad_norm": 0.19153242011097887, "learning_rate": 2.6248315259854455e-06, "loss": 0.0048, "step": 42280 }, { "epoch": 0.69197414709973, "grad_norm": 0.6059768206292153, "learning_rate": 2.6223188690178377e-06, "loss": 0.0057, "step": 42290 }, { "epoch": 0.6921377730508058, "grad_norm": 0.13463119945813823, "learning_rate": 2.6198069877266496e-06, "loss": 0.0028, "step": 42300 }, { "epoch": 0.6923013990018817, "grad_norm": 0.07834107458140202, "learning_rate": 2.6172958829313365e-06, "loss": 0.0024, "step": 42310 }, { "epoch": 0.6924650249529576, "grad_norm": 0.05727839177059226, "learning_rate": 2.614785555451107e-06, "loss": 0.0025, "step": 42320 }, { "epoch": 0.6926286509040334, "grad_norm": 0.25599497018985073, "learning_rate": 2.612276006104908e-06, "loss": 0.0026, "step": 42330 }, { "epoch": 0.6927922768551092, "grad_norm": 0.17929990937914794, "learning_rate": 2.609767235711435e-06, "loss": 0.0027, "step": 42340 }, { "epoch": 0.692955902806185, "grad_norm": 0.027929323260466855, "learning_rate": 2.607259245089131e-06, "loss": 0.0021, "step": 42350 }, { "epoch": 0.6931195287572609, "grad_norm": 0.033992221815899504, "learning_rate": 2.604752035056184e-06, "loss": 0.003, "step": 42360 }, { "epoch": 0.6932831547083368, "grad_norm": 0.10127130861645584, "learning_rate": 2.6022456064305234e-06, "loss": 0.0025, "step": 42370 }, { "epoch": 0.6934467806594126, "grad_norm": 0.21152248927512007, "learning_rate": 2.5997399600298325e-06, "loss": 0.0018, "step": 42380 }, { "epoch": 0.6936104066104885, "grad_norm": 0.046222316072202875, "learning_rate": 2.5972350966715317e-06, "loss": 0.0019, "step": 42390 }, { "epoch": 0.6937740325615642, "grad_norm": 0.17598986019860094, "learning_rate": 2.594731017172788e-06, "loss": 0.0028, "step": 42400 }, { "epoch": 0.6939376585126401, "grad_norm": 0.029921102258220013, "learning_rate": 2.592227722350513e-06, "loss": 0.0013, "step": 42410 }, { "epoch": 0.694101284463716, "grad_norm": 0.22065976748320415, "learning_rate": 2.5897252130213633e-06, "loss": 0.0022, "step": 42420 }, { "epoch": 0.6942649104147918, "grad_norm": 0.1778260313593099, "learning_rate": 2.5872234900017367e-06, "loss": 0.0022, "step": 42430 }, { "epoch": 0.6944285363658677, "grad_norm": 0.07997018527474191, "learning_rate": 2.584722554107778e-06, "loss": 0.0017, "step": 42440 }, { "epoch": 0.6945921623169434, "grad_norm": 0.11294514882758254, "learning_rate": 2.5822224061553723e-06, "loss": 0.0024, "step": 42450 }, { "epoch": 0.6947557882680193, "grad_norm": 0.1605883759224009, "learning_rate": 2.5797230469601495e-06, "loss": 0.0019, "step": 42460 }, { "epoch": 0.6949194142190952, "grad_norm": 0.1357951844986823, "learning_rate": 2.577224477337481e-06, "loss": 0.0037, "step": 42470 }, { "epoch": 0.695083040170171, "grad_norm": 0.09727077421465809, "learning_rate": 2.5747266981024797e-06, "loss": 0.0035, "step": 42480 }, { "epoch": 0.6952466661212469, "grad_norm": 0.07238823904652714, "learning_rate": 2.572229710070003e-06, "loss": 0.0037, "step": 42490 }, { "epoch": 0.6954102920723226, "grad_norm": 0.05850072874101029, "learning_rate": 2.5697335140546476e-06, "loss": 0.0015, "step": 42500 }, { "epoch": 0.6955739180233985, "grad_norm": 0.08006524619896124, "learning_rate": 2.5672381108707568e-06, "loss": 0.0026, "step": 42510 }, { "epoch": 0.6957375439744744, "grad_norm": 0.22582375911499258, "learning_rate": 2.5647435013324083e-06, "loss": 0.0015, "step": 42520 }, { "epoch": 0.6959011699255502, "grad_norm": 0.1593822373487001, "learning_rate": 2.562249686253426e-06, "loss": 0.0012, "step": 42530 }, { "epoch": 0.6960647958766261, "grad_norm": 0.14592473505468992, "learning_rate": 2.5597566664473733e-06, "loss": 0.0022, "step": 42540 }, { "epoch": 0.6962284218277018, "grad_norm": 0.13714616441540808, "learning_rate": 2.5572644427275526e-06, "loss": 0.0044, "step": 42550 }, { "epoch": 0.6963920477787777, "grad_norm": 0.07518848826728511, "learning_rate": 2.554773015907007e-06, "loss": 0.002, "step": 42560 }, { "epoch": 0.6965556737298536, "grad_norm": 0.39159404842972634, "learning_rate": 2.5522823867985225e-06, "loss": 0.0025, "step": 42570 }, { "epoch": 0.6967192996809294, "grad_norm": 0.1929937804922995, "learning_rate": 2.549792556214623e-06, "loss": 0.0036, "step": 42580 }, { "epoch": 0.6968829256320053, "grad_norm": 0.08372516332858088, "learning_rate": 2.54730352496757e-06, "loss": 0.0027, "step": 42590 }, { "epoch": 0.697046551583081, "grad_norm": 0.26972807598467047, "learning_rate": 2.544815293869368e-06, "loss": 0.0014, "step": 42600 }, { "epoch": 0.6972101775341569, "grad_norm": 0.06408195803365016, "learning_rate": 2.542327863731756e-06, "loss": 0.0031, "step": 42610 }, { "epoch": 0.6973738034852328, "grad_norm": 0.14703981151765072, "learning_rate": 2.539841235366214e-06, "loss": 0.0016, "step": 42620 }, { "epoch": 0.6975374294363086, "grad_norm": 0.3291114015317981, "learning_rate": 2.5373554095839627e-06, "loss": 0.0022, "step": 42630 }, { "epoch": 0.6977010553873845, "grad_norm": 0.0974522599479989, "learning_rate": 2.5348703871959584e-06, "loss": 0.0022, "step": 42640 }, { "epoch": 0.6978646813384602, "grad_norm": 0.08170836080619386, "learning_rate": 2.5323861690128946e-06, "loss": 0.0035, "step": 42650 }, { "epoch": 0.6980283072895361, "grad_norm": 0.3298263758887845, "learning_rate": 2.529902755845205e-06, "loss": 0.0041, "step": 42660 }, { "epoch": 0.6981919332406119, "grad_norm": 0.24360244605998008, "learning_rate": 2.527420148503058e-06, "loss": 0.0017, "step": 42670 }, { "epoch": 0.6983555591916878, "grad_norm": 0.12323794976042991, "learning_rate": 2.5249383477963584e-06, "loss": 0.0021, "step": 42680 }, { "epoch": 0.6985191851427637, "grad_norm": 0.08182833676286878, "learning_rate": 2.522457354534754e-06, "loss": 0.0023, "step": 42690 }, { "epoch": 0.6986828110938395, "grad_norm": 0.1596435859906205, "learning_rate": 2.5199771695276233e-06, "loss": 0.0014, "step": 42700 }, { "epoch": 0.6988464370449153, "grad_norm": 0.2531167413612979, "learning_rate": 2.5174977935840817e-06, "loss": 0.0032, "step": 42710 }, { "epoch": 0.6990100629959911, "grad_norm": 0.01499691021840432, "learning_rate": 2.515019227512983e-06, "loss": 0.0015, "step": 42720 }, { "epoch": 0.699173688947067, "grad_norm": 0.24050260370449672, "learning_rate": 2.5125414721229152e-06, "loss": 0.0026, "step": 42730 }, { "epoch": 0.6993373148981429, "grad_norm": 0.008165072452152035, "learning_rate": 2.5100645282222004e-06, "loss": 0.0024, "step": 42740 }, { "epoch": 0.6995009408492187, "grad_norm": 0.213110493539859, "learning_rate": 2.507588396618901e-06, "loss": 0.0019, "step": 42750 }, { "epoch": 0.6996645668002945, "grad_norm": 0.006691400971860885, "learning_rate": 2.5051130781208095e-06, "loss": 0.0017, "step": 42760 }, { "epoch": 0.6998281927513703, "grad_norm": 0.16603569730029816, "learning_rate": 2.5026385735354553e-06, "loss": 0.0025, "step": 42770 }, { "epoch": 0.6999918187024462, "grad_norm": 0.1240409447365724, "learning_rate": 2.500164883670101e-06, "loss": 0.0023, "step": 42780 }, { "epoch": 0.7001554446535221, "grad_norm": 0.14596170231879715, "learning_rate": 2.4976920093317442e-06, "loss": 0.0013, "step": 42790 }, { "epoch": 0.7003190706045979, "grad_norm": 0.06703826203191914, "learning_rate": 2.495219951327117e-06, "loss": 0.002, "step": 42800 }, { "epoch": 0.7004826965556737, "grad_norm": 0.30908993348752073, "learning_rate": 2.4927487104626826e-06, "loss": 0.0025, "step": 42810 }, { "epoch": 0.7006463225067495, "grad_norm": 0.03648786122586192, "learning_rate": 2.4902782875446424e-06, "loss": 0.0031, "step": 42820 }, { "epoch": 0.7008099484578254, "grad_norm": 0.11710588033916008, "learning_rate": 2.48780868337893e-06, "loss": 0.0018, "step": 42830 }, { "epoch": 0.7009735744089013, "grad_norm": 0.11180839243936214, "learning_rate": 2.485339898771203e-06, "loss": 0.0019, "step": 42840 }, { "epoch": 0.7011372003599771, "grad_norm": 0.12161946187742793, "learning_rate": 2.4828719345268647e-06, "loss": 0.0024, "step": 42850 }, { "epoch": 0.701300826311053, "grad_norm": 0.33568839966015046, "learning_rate": 2.480404791451043e-06, "loss": 0.0026, "step": 42860 }, { "epoch": 0.7014644522621287, "grad_norm": 0.06373395615617619, "learning_rate": 2.477938470348598e-06, "loss": 0.0027, "step": 42870 }, { "epoch": 0.7016280782132046, "grad_norm": 0.25675438810550766, "learning_rate": 2.4754729720241295e-06, "loss": 0.0033, "step": 42880 }, { "epoch": 0.7017917041642805, "grad_norm": 0.1859951100071559, "learning_rate": 2.473008297281955e-06, "loss": 0.0022, "step": 42890 }, { "epoch": 0.7019553301153563, "grad_norm": 0.13239680154073993, "learning_rate": 2.4705444469261335e-06, "loss": 0.0031, "step": 42900 }, { "epoch": 0.7021189560664322, "grad_norm": 0.13048337605727764, "learning_rate": 2.4680814217604547e-06, "loss": 0.0018, "step": 42910 }, { "epoch": 0.7022825820175079, "grad_norm": 0.04740025112416992, "learning_rate": 2.465619222588436e-06, "loss": 0.0025, "step": 42920 }, { "epoch": 0.7024462079685838, "grad_norm": 0.09142486136667569, "learning_rate": 2.4631578502133246e-06, "loss": 0.0015, "step": 42930 }, { "epoch": 0.7026098339196597, "grad_norm": 0.15076886995063912, "learning_rate": 2.460697305438105e-06, "loss": 0.0033, "step": 42940 }, { "epoch": 0.7027734598707355, "grad_norm": 0.07234335583239838, "learning_rate": 2.45823758906548e-06, "loss": 0.0024, "step": 42950 }, { "epoch": 0.7029370858218114, "grad_norm": 0.2102063393373935, "learning_rate": 2.4557787018978896e-06, "loss": 0.0022, "step": 42960 }, { "epoch": 0.7031007117728871, "grad_norm": 0.1252135179822643, "learning_rate": 2.453320644737506e-06, "loss": 0.0016, "step": 42970 }, { "epoch": 0.703264337723963, "grad_norm": 0.14203836993027255, "learning_rate": 2.4508634183862243e-06, "loss": 0.003, "step": 42980 }, { "epoch": 0.7034279636750389, "grad_norm": 0.12570227124008562, "learning_rate": 2.44840702364567e-06, "loss": 0.0044, "step": 42990 }, { "epoch": 0.7035915896261147, "grad_norm": 0.10927408205950763, "learning_rate": 2.4459514613172036e-06, "loss": 0.0011, "step": 43000 }, { "epoch": 0.7037552155771906, "grad_norm": 0.15512188159669973, "learning_rate": 2.443496732201903e-06, "loss": 0.0023, "step": 43010 }, { "epoch": 0.7039188415282663, "grad_norm": 0.08240699106798485, "learning_rate": 2.4410428371005812e-06, "loss": 0.0017, "step": 43020 }, { "epoch": 0.7040824674793422, "grad_norm": 0.06914727288266999, "learning_rate": 2.4385897768137806e-06, "loss": 0.0018, "step": 43030 }, { "epoch": 0.7042460934304181, "grad_norm": 0.10836406557590571, "learning_rate": 2.436137552141768e-06, "loss": 0.0048, "step": 43040 }, { "epoch": 0.7044097193814939, "grad_norm": 0.12569508329837245, "learning_rate": 2.4336861638845353e-06, "loss": 0.002, "step": 43050 }, { "epoch": 0.7045733453325698, "grad_norm": 0.09387843603584835, "learning_rate": 2.431235612841811e-06, "loss": 0.0041, "step": 43060 }, { "epoch": 0.7047369712836455, "grad_norm": 0.045913252266143366, "learning_rate": 2.4287858998130387e-06, "loss": 0.0023, "step": 43070 }, { "epoch": 0.7049005972347214, "grad_norm": 0.058679956327134374, "learning_rate": 2.426337025597393e-06, "loss": 0.002, "step": 43080 }, { "epoch": 0.7050642231857973, "grad_norm": 0.06851702085204948, "learning_rate": 2.423888990993781e-06, "loss": 0.0018, "step": 43090 }, { "epoch": 0.7052278491368731, "grad_norm": 0.12662128009270104, "learning_rate": 2.4214417968008263e-06, "loss": 0.0024, "step": 43100 }, { "epoch": 0.705391475087949, "grad_norm": 0.2661755365389091, "learning_rate": 2.4189954438168834e-06, "loss": 0.0028, "step": 43110 }, { "epoch": 0.7055551010390247, "grad_norm": 0.44854412174273567, "learning_rate": 2.416549932840036e-06, "loss": 0.0022, "step": 43120 }, { "epoch": 0.7057187269901006, "grad_norm": 0.013531746978043236, "learning_rate": 2.4141052646680836e-06, "loss": 0.0027, "step": 43130 }, { "epoch": 0.7058823529411765, "grad_norm": 0.3333458081072214, "learning_rate": 2.411661440098558e-06, "loss": 0.0014, "step": 43140 }, { "epoch": 0.7060459788922523, "grad_norm": 0.056146300361681764, "learning_rate": 2.409218459928711e-06, "loss": 0.0027, "step": 43150 }, { "epoch": 0.7062096048433282, "grad_norm": 0.1947461125292986, "learning_rate": 2.4067763249555265e-06, "loss": 0.002, "step": 43160 }, { "epoch": 0.706373230794404, "grad_norm": 0.12745218924730045, "learning_rate": 2.404335035975705e-06, "loss": 0.0021, "step": 43170 }, { "epoch": 0.7065368567454798, "grad_norm": 0.08093980793897779, "learning_rate": 2.401894593785675e-06, "loss": 0.0035, "step": 43180 }, { "epoch": 0.7067004826965557, "grad_norm": 0.05602852887606721, "learning_rate": 2.399454999181587e-06, "loss": 0.0021, "step": 43190 }, { "epoch": 0.7068641086476315, "grad_norm": 0.16011167735956833, "learning_rate": 2.397016252959315e-06, "loss": 0.0039, "step": 43200 }, { "epoch": 0.7070277345987074, "grad_norm": 0.23870543082287238, "learning_rate": 2.3945783559144558e-06, "loss": 0.0039, "step": 43210 }, { "epoch": 0.7071913605497832, "grad_norm": 0.17849197238636635, "learning_rate": 2.3921413088423335e-06, "loss": 0.0035, "step": 43220 }, { "epoch": 0.707354986500859, "grad_norm": 0.08108283362383914, "learning_rate": 2.38970511253799e-06, "loss": 0.0081, "step": 43230 }, { "epoch": 0.7075186124519349, "grad_norm": 0.10638935923987021, "learning_rate": 2.3872697677961916e-06, "loss": 0.0021, "step": 43240 }, { "epoch": 0.7076822384030107, "grad_norm": 0.1957088501451425, "learning_rate": 2.3848352754114257e-06, "loss": 0.0016, "step": 43250 }, { "epoch": 0.7078458643540866, "grad_norm": 0.04746579754730785, "learning_rate": 2.382401636177902e-06, "loss": 0.0029, "step": 43260 }, { "epoch": 0.7080094903051624, "grad_norm": 0.09525427884736594, "learning_rate": 2.3799688508895523e-06, "loss": 0.0021, "step": 43270 }, { "epoch": 0.7081731162562382, "grad_norm": 0.193376770408006, "learning_rate": 2.377536920340032e-06, "loss": 0.0017, "step": 43280 }, { "epoch": 0.7083367422073141, "grad_norm": 0.005833740717229194, "learning_rate": 2.3751058453227135e-06, "loss": 0.0019, "step": 43290 }, { "epoch": 0.7085003681583899, "grad_norm": 0.15113952709746847, "learning_rate": 2.3726756266306923e-06, "loss": 0.0018, "step": 43300 }, { "epoch": 0.7086639941094658, "grad_norm": 0.12522710726632713, "learning_rate": 2.370246265056785e-06, "loss": 0.0034, "step": 43310 }, { "epoch": 0.7088276200605416, "grad_norm": 0.15463162496647995, "learning_rate": 2.3678177613935264e-06, "loss": 0.0017, "step": 43320 }, { "epoch": 0.7089912460116174, "grad_norm": 0.14780219613726406, "learning_rate": 2.365390116433172e-06, "loss": 0.0027, "step": 43330 }, { "epoch": 0.7091548719626933, "grad_norm": 0.19317825983289047, "learning_rate": 2.3629633309677024e-06, "loss": 0.005, "step": 43340 }, { "epoch": 0.7093184979137691, "grad_norm": 0.22066408561235396, "learning_rate": 2.36053740578881e-06, "loss": 0.0045, "step": 43350 }, { "epoch": 0.709482123864845, "grad_norm": 0.04282996708322745, "learning_rate": 2.3581123416879107e-06, "loss": 0.0017, "step": 43360 }, { "epoch": 0.7096457498159208, "grad_norm": 0.0278570579881955, "learning_rate": 2.355688139456139e-06, "loss": 0.0017, "step": 43370 }, { "epoch": 0.7098093757669967, "grad_norm": 0.06375907651803764, "learning_rate": 2.3532647998843484e-06, "loss": 0.0021, "step": 43380 }, { "epoch": 0.7099730017180725, "grad_norm": 0.2699096096749135, "learning_rate": 2.350842323763108e-06, "loss": 0.0022, "step": 43390 }, { "epoch": 0.7101366276691483, "grad_norm": 0.3456814471074755, "learning_rate": 2.3484207118827113e-06, "loss": 0.004, "step": 43400 }, { "epoch": 0.7103002536202242, "grad_norm": 0.1759268920962095, "learning_rate": 2.345999965033165e-06, "loss": 0.0032, "step": 43410 }, { "epoch": 0.7104638795713, "grad_norm": 0.1529787854355532, "learning_rate": 2.343580084004195e-06, "loss": 0.0025, "step": 43420 }, { "epoch": 0.7106275055223759, "grad_norm": 0.05528188598290255, "learning_rate": 2.3411610695852445e-06, "loss": 0.0033, "step": 43430 }, { "epoch": 0.7107911314734517, "grad_norm": 0.06423704928730894, "learning_rate": 2.338742922565474e-06, "loss": 0.0014, "step": 43440 }, { "epoch": 0.7109547574245275, "grad_norm": 0.013826571404669113, "learning_rate": 2.3363256437337596e-06, "loss": 0.0018, "step": 43450 }, { "epoch": 0.7111183833756034, "grad_norm": 0.2217710459788391, "learning_rate": 2.333909233878699e-06, "loss": 0.0023, "step": 43460 }, { "epoch": 0.7112820093266792, "grad_norm": 0.2506381107981868, "learning_rate": 2.3314936937886024e-06, "loss": 0.0017, "step": 43470 }, { "epoch": 0.7114456352777551, "grad_norm": 0.17950313337282317, "learning_rate": 2.329079024251496e-06, "loss": 0.0041, "step": 43480 }, { "epoch": 0.711609261228831, "grad_norm": 0.15253353251779353, "learning_rate": 2.3266652260551236e-06, "loss": 0.0025, "step": 43490 }, { "epoch": 0.7117728871799067, "grad_norm": 0.20537571719409897, "learning_rate": 2.324252299986944e-06, "loss": 0.0015, "step": 43500 }, { "epoch": 0.7119365131309826, "grad_norm": 0.2387573361634284, "learning_rate": 2.321840246834132e-06, "loss": 0.0015, "step": 43510 }, { "epoch": 0.7121001390820584, "grad_norm": 0.20600349275903465, "learning_rate": 2.319429067383574e-06, "loss": 0.0029, "step": 43520 }, { "epoch": 0.7122637650331343, "grad_norm": 0.15537405002499002, "learning_rate": 2.3170187624218797e-06, "loss": 0.0028, "step": 43530 }, { "epoch": 0.71242739098421, "grad_norm": 0.09589913480007048, "learning_rate": 2.314609332735367e-06, "loss": 0.0024, "step": 43540 }, { "epoch": 0.7125910169352859, "grad_norm": 0.17927289651980752, "learning_rate": 2.3122007791100686e-06, "loss": 0.002, "step": 43550 }, { "epoch": 0.7127546428863618, "grad_norm": 0.10984114397872247, "learning_rate": 2.3097931023317333e-06, "loss": 0.0017, "step": 43560 }, { "epoch": 0.7129182688374376, "grad_norm": 0.08926713175592099, "learning_rate": 2.307386303185823e-06, "loss": 0.0023, "step": 43570 }, { "epoch": 0.7130818947885135, "grad_norm": 0.06438544351367055, "learning_rate": 2.3049803824575106e-06, "loss": 0.0022, "step": 43580 }, { "epoch": 0.7132455207395892, "grad_norm": 0.1881532308514187, "learning_rate": 2.3025753409316898e-06, "loss": 0.0027, "step": 43590 }, { "epoch": 0.7134091466906651, "grad_norm": 0.15189753840136275, "learning_rate": 2.300171179392961e-06, "loss": 0.0024, "step": 43600 }, { "epoch": 0.713572772641741, "grad_norm": 0.06675455958498402, "learning_rate": 2.297767898625639e-06, "loss": 0.004, "step": 43610 }, { "epoch": 0.7137363985928168, "grad_norm": 0.14670566170918492, "learning_rate": 2.295365499413752e-06, "loss": 0.0024, "step": 43620 }, { "epoch": 0.7139000245438927, "grad_norm": 0.0361588632529197, "learning_rate": 2.29296398254104e-06, "loss": 0.0042, "step": 43630 }, { "epoch": 0.7140636504949684, "grad_norm": 0.06195784555230843, "learning_rate": 2.2905633487909536e-06, "loss": 0.0024, "step": 43640 }, { "epoch": 0.7142272764460443, "grad_norm": 0.22346359704624974, "learning_rate": 2.2881635989466604e-06, "loss": 0.0034, "step": 43650 }, { "epoch": 0.7143909023971202, "grad_norm": 0.4799521223345969, "learning_rate": 2.2857647337910357e-06, "loss": 0.0022, "step": 43660 }, { "epoch": 0.714554528348196, "grad_norm": 0.1967031112667705, "learning_rate": 2.283366754106665e-06, "loss": 0.0019, "step": 43670 }, { "epoch": 0.7147181542992719, "grad_norm": 0.056401445992154894, "learning_rate": 2.280969660675849e-06, "loss": 0.0027, "step": 43680 }, { "epoch": 0.7148817802503477, "grad_norm": 0.09302699789301122, "learning_rate": 2.2785734542805955e-06, "loss": 0.0018, "step": 43690 }, { "epoch": 0.7150454062014235, "grad_norm": 0.1151691221586268, "learning_rate": 2.2761781357026226e-06, "loss": 0.0031, "step": 43700 }, { "epoch": 0.7152090321524994, "grad_norm": 0.13517416407079014, "learning_rate": 2.273783705723365e-06, "loss": 0.0017, "step": 43710 }, { "epoch": 0.7153726581035752, "grad_norm": 0.019787468378722174, "learning_rate": 2.2713901651239613e-06, "loss": 0.0021, "step": 43720 }, { "epoch": 0.7155362840546511, "grad_norm": 0.06635075822986672, "learning_rate": 2.268997514685261e-06, "loss": 0.0018, "step": 43730 }, { "epoch": 0.7156999100057269, "grad_norm": 0.13965861928611048, "learning_rate": 2.2666057551878247e-06, "loss": 0.0031, "step": 43740 }, { "epoch": 0.7158635359568027, "grad_norm": 0.14232147466305387, "learning_rate": 2.264214887411921e-06, "loss": 0.0017, "step": 43750 }, { "epoch": 0.7160271619078786, "grad_norm": 0.23181736420671092, "learning_rate": 2.2618249121375275e-06, "loss": 0.0024, "step": 43760 }, { "epoch": 0.7161907878589544, "grad_norm": 0.28469485245830134, "learning_rate": 2.2594358301443336e-06, "loss": 0.0014, "step": 43770 }, { "epoch": 0.7163544138100303, "grad_norm": 0.33087670695465904, "learning_rate": 2.257047642211735e-06, "loss": 0.0025, "step": 43780 }, { "epoch": 0.7165180397611061, "grad_norm": 0.19231458849622177, "learning_rate": 2.2546603491188356e-06, "loss": 0.0017, "step": 43790 }, { "epoch": 0.716681665712182, "grad_norm": 0.0997182112424718, "learning_rate": 2.2522739516444435e-06, "loss": 0.0028, "step": 43800 }, { "epoch": 0.7168452916632578, "grad_norm": 0.126665187207932, "learning_rate": 2.2498884505670836e-06, "loss": 0.0029, "step": 43810 }, { "epoch": 0.7170089176143336, "grad_norm": 0.01911342995733695, "learning_rate": 2.2475038466649817e-06, "loss": 0.0021, "step": 43820 }, { "epoch": 0.7171725435654095, "grad_norm": 0.21444305364064356, "learning_rate": 2.2451201407160703e-06, "loss": 0.0027, "step": 43830 }, { "epoch": 0.7173361695164853, "grad_norm": 0.26301214500054393, "learning_rate": 2.242737333497997e-06, "loss": 0.0039, "step": 43840 }, { "epoch": 0.7174997954675612, "grad_norm": 0.047847911797802664, "learning_rate": 2.2403554257881076e-06, "loss": 0.0018, "step": 43850 }, { "epoch": 0.717663421418637, "grad_norm": 0.24435002096423136, "learning_rate": 2.237974418363454e-06, "loss": 0.0017, "step": 43860 }, { "epoch": 0.7178270473697128, "grad_norm": 0.091333669138778, "learning_rate": 2.235594312000802e-06, "loss": 0.0017, "step": 43870 }, { "epoch": 0.7179906733207887, "grad_norm": 0.3037204446614908, "learning_rate": 2.2332151074766186e-06, "loss": 0.0027, "step": 43880 }, { "epoch": 0.7181542992718645, "grad_norm": 0.0567493362066839, "learning_rate": 2.2308368055670745e-06, "loss": 0.003, "step": 43890 }, { "epoch": 0.7183179252229404, "grad_norm": 0.08440151226310144, "learning_rate": 2.228459407048052e-06, "loss": 0.0027, "step": 43900 }, { "epoch": 0.7184815511740162, "grad_norm": 0.0063384282290316115, "learning_rate": 2.2260829126951362e-06, "loss": 0.0016, "step": 43910 }, { "epoch": 0.718645177125092, "grad_norm": 0.3099517528169145, "learning_rate": 2.2237073232836103e-06, "loss": 0.0016, "step": 43920 }, { "epoch": 0.7188088030761679, "grad_norm": 0.17749750184925078, "learning_rate": 2.221332639588474e-06, "loss": 0.0035, "step": 43930 }, { "epoch": 0.7189724290272437, "grad_norm": 0.05835565075511775, "learning_rate": 2.2189588623844245e-06, "loss": 0.0011, "step": 43940 }, { "epoch": 0.7191360549783196, "grad_norm": 0.10807162065007092, "learning_rate": 2.2165859924458617e-06, "loss": 0.002, "step": 43950 }, { "epoch": 0.7192996809293954, "grad_norm": 0.04893275895119157, "learning_rate": 2.214214030546897e-06, "loss": 0.0015, "step": 43960 }, { "epoch": 0.7194633068804712, "grad_norm": 0.19951019309564152, "learning_rate": 2.211842977461341e-06, "loss": 0.0026, "step": 43970 }, { "epoch": 0.7196269328315471, "grad_norm": 0.11636389971016477, "learning_rate": 2.209472833962702e-06, "loss": 0.0026, "step": 43980 }, { "epoch": 0.7197905587826229, "grad_norm": 0.09482053466583638, "learning_rate": 2.2071036008242032e-06, "loss": 0.0011, "step": 43990 }, { "epoch": 0.7199541847336988, "grad_norm": 0.14965693451034104, "learning_rate": 2.2047352788187625e-06, "loss": 0.0067, "step": 44000 }, { "epoch": 0.7201178106847747, "grad_norm": 0.18236496754725728, "learning_rate": 2.202367868719002e-06, "loss": 0.0019, "step": 44010 }, { "epoch": 0.7202814366358504, "grad_norm": 0.06773068688590426, "learning_rate": 2.200001371297251e-06, "loss": 0.002, "step": 44020 }, { "epoch": 0.7204450625869263, "grad_norm": 0.0680912365484517, "learning_rate": 2.197635787325537e-06, "loss": 0.01, "step": 44030 }, { "epoch": 0.7206086885380021, "grad_norm": 0.10737250671488516, "learning_rate": 2.195271117575585e-06, "loss": 0.0022, "step": 44040 }, { "epoch": 0.720772314489078, "grad_norm": 0.06894446287476105, "learning_rate": 2.1929073628188314e-06, "loss": 0.0028, "step": 44050 }, { "epoch": 0.7209359404401539, "grad_norm": 0.09155080887020478, "learning_rate": 2.190544523826408e-06, "loss": 0.0015, "step": 44060 }, { "epoch": 0.7210995663912296, "grad_norm": 0.08955388247423478, "learning_rate": 2.1881826013691475e-06, "loss": 0.0031, "step": 44070 }, { "epoch": 0.7212631923423055, "grad_norm": 0.08151558600167949, "learning_rate": 2.1858215962175887e-06, "loss": 0.0028, "step": 44080 }, { "epoch": 0.7214268182933813, "grad_norm": 0.12270750869330221, "learning_rate": 2.183461509141968e-06, "loss": 0.0017, "step": 44090 }, { "epoch": 0.7215904442444572, "grad_norm": 0.1086911873077073, "learning_rate": 2.1811023409122174e-06, "loss": 0.0029, "step": 44100 }, { "epoch": 0.7217540701955331, "grad_norm": 0.13031915620520185, "learning_rate": 2.1787440922979776e-06, "loss": 0.0025, "step": 44110 }, { "epoch": 0.7219176961466088, "grad_norm": 0.1918384283654667, "learning_rate": 2.176386764068584e-06, "loss": 0.0026, "step": 44120 }, { "epoch": 0.7220813220976847, "grad_norm": 0.08215371381681229, "learning_rate": 2.174030356993075e-06, "loss": 0.002, "step": 44130 }, { "epoch": 0.7222449480487605, "grad_norm": 0.022897038373005066, "learning_rate": 2.1716748718401833e-06, "loss": 0.002, "step": 44140 }, { "epoch": 0.7224085739998364, "grad_norm": 0.21844538237668543, "learning_rate": 2.1693203093783497e-06, "loss": 0.0018, "step": 44150 }, { "epoch": 0.7225721999509123, "grad_norm": 0.28596291092602744, "learning_rate": 2.1669666703757038e-06, "loss": 0.0034, "step": 44160 }, { "epoch": 0.722735825901988, "grad_norm": 0.06619562484565944, "learning_rate": 2.1646139556000783e-06, "loss": 0.0026, "step": 44170 }, { "epoch": 0.7228994518530639, "grad_norm": 0.0711077445365739, "learning_rate": 2.162262165819009e-06, "loss": 0.0017, "step": 44180 }, { "epoch": 0.7230630778041397, "grad_norm": 0.08008297807818518, "learning_rate": 2.1599113017997233e-06, "loss": 0.0024, "step": 44190 }, { "epoch": 0.7232267037552156, "grad_norm": 0.15186561774405344, "learning_rate": 2.1575613643091487e-06, "loss": 0.0017, "step": 44200 }, { "epoch": 0.7233903297062915, "grad_norm": 0.2122531406948828, "learning_rate": 2.1552123541139114e-06, "loss": 0.0021, "step": 44210 }, { "epoch": 0.7235539556573672, "grad_norm": 0.10312562754713221, "learning_rate": 2.1528642719803346e-06, "loss": 0.0012, "step": 44220 }, { "epoch": 0.7237175816084431, "grad_norm": 0.10026909434049311, "learning_rate": 2.150517118674436e-06, "loss": 0.0023, "step": 44230 }, { "epoch": 0.7238812075595189, "grad_norm": 0.0732554146837231, "learning_rate": 2.1481708949619373e-06, "loss": 0.0021, "step": 44240 }, { "epoch": 0.7240448335105948, "grad_norm": 0.051036274644266494, "learning_rate": 2.14582560160825e-06, "loss": 0.0029, "step": 44250 }, { "epoch": 0.7242084594616707, "grad_norm": 0.10978618056921247, "learning_rate": 2.143481239378485e-06, "loss": 0.0021, "step": 44260 }, { "epoch": 0.7243720854127464, "grad_norm": 0.04916092976035135, "learning_rate": 2.1411378090374486e-06, "loss": 0.0023, "step": 44270 }, { "epoch": 0.7245357113638223, "grad_norm": 0.11548845765242653, "learning_rate": 2.138795311349644e-06, "loss": 0.0023, "step": 44280 }, { "epoch": 0.7246993373148981, "grad_norm": 0.1926979081538654, "learning_rate": 2.1364537470792673e-06, "loss": 0.0024, "step": 44290 }, { "epoch": 0.724862963265974, "grad_norm": 0.24763817412954164, "learning_rate": 2.134113116990217e-06, "loss": 0.0016, "step": 44300 }, { "epoch": 0.7250265892170499, "grad_norm": 0.10005298246392416, "learning_rate": 2.1317734218460793e-06, "loss": 0.0019, "step": 44310 }, { "epoch": 0.7251902151681257, "grad_norm": 0.2012219429862326, "learning_rate": 2.1294346624101393e-06, "loss": 0.0022, "step": 44320 }, { "epoch": 0.7253538411192015, "grad_norm": 0.08353068461496911, "learning_rate": 2.127096839445375e-06, "loss": 0.0029, "step": 44330 }, { "epoch": 0.7255174670702773, "grad_norm": 0.11564180946661307, "learning_rate": 2.1247599537144607e-06, "loss": 0.0013, "step": 44340 }, { "epoch": 0.7256810930213532, "grad_norm": 0.1005973123267607, "learning_rate": 2.1224240059797617e-06, "loss": 0.0017, "step": 44350 }, { "epoch": 0.7258447189724291, "grad_norm": 0.12810613187539827, "learning_rate": 2.1200889970033433e-06, "loss": 0.0019, "step": 44360 }, { "epoch": 0.7260083449235049, "grad_norm": 0.22320887556695676, "learning_rate": 2.1177549275469596e-06, "loss": 0.0023, "step": 44370 }, { "epoch": 0.7261719708745807, "grad_norm": 0.03672780886869358, "learning_rate": 2.1154217983720586e-06, "loss": 0.0012, "step": 44380 }, { "epoch": 0.7263355968256565, "grad_norm": 0.15200116430435626, "learning_rate": 2.113089610239783e-06, "loss": 0.0022, "step": 44390 }, { "epoch": 0.7264992227767324, "grad_norm": 0.1773420716724589, "learning_rate": 2.110758363910968e-06, "loss": 0.0019, "step": 44400 }, { "epoch": 0.7266628487278082, "grad_norm": 0.14074458289167532, "learning_rate": 2.108428060146139e-06, "loss": 0.0034, "step": 44410 }, { "epoch": 0.7268264746788841, "grad_norm": 0.10200750779468538, "learning_rate": 2.106098699705521e-06, "loss": 0.0021, "step": 44420 }, { "epoch": 0.72699010062996, "grad_norm": 0.4328796941513089, "learning_rate": 2.103770283349024e-06, "loss": 0.0033, "step": 44430 }, { "epoch": 0.7271537265810357, "grad_norm": 0.22376031920537298, "learning_rate": 2.1014428118362535e-06, "loss": 0.0023, "step": 44440 }, { "epoch": 0.7273173525321116, "grad_norm": 0.05157085891916257, "learning_rate": 2.099116285926505e-06, "loss": 0.0025, "step": 44450 }, { "epoch": 0.7274809784831874, "grad_norm": 0.2098957254780201, "learning_rate": 2.096790706378767e-06, "loss": 0.002, "step": 44460 }, { "epoch": 0.7276446044342633, "grad_norm": 0.1524086509507011, "learning_rate": 2.0944660739517187e-06, "loss": 0.0048, "step": 44470 }, { "epoch": 0.7278082303853391, "grad_norm": 0.07275966527166236, "learning_rate": 2.092142389403728e-06, "loss": 0.0013, "step": 44480 }, { "epoch": 0.7279718563364149, "grad_norm": 0.14016817379157911, "learning_rate": 2.08981965349286e-06, "loss": 0.0015, "step": 44490 }, { "epoch": 0.7281354822874908, "grad_norm": 0.1047604176529723, "learning_rate": 2.087497866976864e-06, "loss": 0.0021, "step": 44500 }, { "epoch": 0.7282991082385666, "grad_norm": 0.06882833873036522, "learning_rate": 2.085177030613183e-06, "loss": 0.0026, "step": 44510 }, { "epoch": 0.7284627341896425, "grad_norm": 0.1991065278976609, "learning_rate": 2.082857145158947e-06, "loss": 0.0028, "step": 44520 }, { "epoch": 0.7286263601407184, "grad_norm": 0.07120367759215092, "learning_rate": 2.080538211370979e-06, "loss": 0.0015, "step": 44530 }, { "epoch": 0.7287899860917941, "grad_norm": 0.24189413174854393, "learning_rate": 2.0782202300057875e-06, "loss": 0.0043, "step": 44540 }, { "epoch": 0.72895361204287, "grad_norm": 0.13727730179264033, "learning_rate": 2.0759032018195768e-06, "loss": 0.0019, "step": 44550 }, { "epoch": 0.7291172379939458, "grad_norm": 0.08816536843889627, "learning_rate": 2.073587127568235e-06, "loss": 0.0026, "step": 44560 }, { "epoch": 0.7292808639450217, "grad_norm": 0.14618166300666374, "learning_rate": 2.07127200800734e-06, "loss": 0.0027, "step": 44570 }, { "epoch": 0.7294444898960976, "grad_norm": 0.06422692706143053, "learning_rate": 2.0689578438921583e-06, "loss": 0.0027, "step": 44580 }, { "epoch": 0.7296081158471733, "grad_norm": 0.5534561855697258, "learning_rate": 2.0666446359776455e-06, "loss": 0.003, "step": 44590 }, { "epoch": 0.7297717417982492, "grad_norm": 0.12322313180578576, "learning_rate": 2.064332385018443e-06, "loss": 0.0029, "step": 44600 }, { "epoch": 0.729935367749325, "grad_norm": 0.10289307393065446, "learning_rate": 2.0620210917688843e-06, "loss": 0.0024, "step": 44610 }, { "epoch": 0.7300989937004009, "grad_norm": 0.1773294797103362, "learning_rate": 2.0597107569829877e-06, "loss": 0.0027, "step": 44620 }, { "epoch": 0.7302626196514768, "grad_norm": 0.051749257568555446, "learning_rate": 2.057401381414459e-06, "loss": 0.0017, "step": 44630 }, { "epoch": 0.7304262456025525, "grad_norm": 0.08641623436531766, "learning_rate": 2.0550929658166895e-06, "loss": 0.0018, "step": 44640 }, { "epoch": 0.7305898715536284, "grad_norm": 0.16757332908417882, "learning_rate": 2.05278551094276e-06, "loss": 0.0039, "step": 44650 }, { "epoch": 0.7307534975047042, "grad_norm": 0.34034136118993524, "learning_rate": 2.050479017545436e-06, "loss": 0.0019, "step": 44660 }, { "epoch": 0.7309171234557801, "grad_norm": 0.20456944247652428, "learning_rate": 2.0481734863771722e-06, "loss": 0.0025, "step": 44670 }, { "epoch": 0.731080749406856, "grad_norm": 0.16420890629398852, "learning_rate": 2.0458689181901063e-06, "loss": 0.0017, "step": 44680 }, { "epoch": 0.7312443753579317, "grad_norm": 0.23586257079547152, "learning_rate": 2.043565313736063e-06, "loss": 0.0016, "step": 44690 }, { "epoch": 0.7314080013090076, "grad_norm": 0.3829713795652063, "learning_rate": 2.041262673766552e-06, "loss": 0.0036, "step": 44700 }, { "epoch": 0.7315716272600834, "grad_norm": 0.22532166000163395, "learning_rate": 2.038960999032769e-06, "loss": 0.0016, "step": 44710 }, { "epoch": 0.7317352532111593, "grad_norm": 0.04739444860449337, "learning_rate": 2.0366602902855937e-06, "loss": 0.0018, "step": 44720 }, { "epoch": 0.7318988791622352, "grad_norm": 0.20575924543160157, "learning_rate": 2.034360548275594e-06, "loss": 0.0026, "step": 44730 }, { "epoch": 0.732062505113311, "grad_norm": 0.15894139683413838, "learning_rate": 2.0320617737530196e-06, "loss": 0.0017, "step": 44740 }, { "epoch": 0.7322261310643868, "grad_norm": 0.13377660460013746, "learning_rate": 2.029763967467804e-06, "loss": 0.0032, "step": 44750 }, { "epoch": 0.7323897570154626, "grad_norm": 0.06717132701862027, "learning_rate": 2.027467130169566e-06, "loss": 0.0023, "step": 44760 }, { "epoch": 0.7325533829665385, "grad_norm": 0.20909526028426656, "learning_rate": 2.025171262607608e-06, "loss": 0.0031, "step": 44770 }, { "epoch": 0.7327170089176144, "grad_norm": 0.08374260391433279, "learning_rate": 2.0228763655309167e-06, "loss": 0.0019, "step": 44780 }, { "epoch": 0.7328806348686902, "grad_norm": 0.09072123468106098, "learning_rate": 2.02058243968816e-06, "loss": 0.002, "step": 44790 }, { "epoch": 0.733044260819766, "grad_norm": 0.18573295478430615, "learning_rate": 2.018289485827694e-06, "loss": 0.0019, "step": 44800 }, { "epoch": 0.7332078867708418, "grad_norm": 0.11616154655057644, "learning_rate": 2.015997504697553e-06, "loss": 0.0014, "step": 44810 }, { "epoch": 0.7333715127219177, "grad_norm": 0.40644715335569065, "learning_rate": 2.013706497045454e-06, "loss": 0.0015, "step": 44820 }, { "epoch": 0.7335351386729936, "grad_norm": 0.2240162872258777, "learning_rate": 2.0114164636187983e-06, "loss": 0.0029, "step": 44830 }, { "epoch": 0.7336987646240694, "grad_norm": 0.22131508711574477, "learning_rate": 2.0091274051646697e-06, "loss": 0.0024, "step": 44840 }, { "epoch": 0.7338623905751452, "grad_norm": 0.139439179612051, "learning_rate": 2.0068393224298304e-06, "loss": 0.0027, "step": 44850 }, { "epoch": 0.734026016526221, "grad_norm": 0.1280390475124704, "learning_rate": 2.0045522161607306e-06, "loss": 0.0019, "step": 44860 }, { "epoch": 0.7341896424772969, "grad_norm": 0.08854489695305247, "learning_rate": 2.0022660871034966e-06, "loss": 0.002, "step": 44870 }, { "epoch": 0.7343532684283728, "grad_norm": 0.347302081479223, "learning_rate": 1.9999809360039372e-06, "loss": 0.0042, "step": 44880 }, { "epoch": 0.7345168943794486, "grad_norm": 0.0907303455001772, "learning_rate": 1.9976967636075434e-06, "loss": 0.003, "step": 44890 }, { "epoch": 0.7346805203305244, "grad_norm": 0.14189535923614688, "learning_rate": 1.9954135706594853e-06, "loss": 0.0021, "step": 44900 }, { "epoch": 0.7348441462816002, "grad_norm": 0.16516134939959948, "learning_rate": 1.993131357904613e-06, "loss": 0.0021, "step": 44910 }, { "epoch": 0.7350077722326761, "grad_norm": 0.12287025419956622, "learning_rate": 1.990850126087461e-06, "loss": 0.0031, "step": 44920 }, { "epoch": 0.735171398183752, "grad_norm": 0.07019523587990566, "learning_rate": 1.9885698759522408e-06, "loss": 0.0032, "step": 44930 }, { "epoch": 0.7353350241348278, "grad_norm": 0.20799262453246364, "learning_rate": 1.986290608242839e-06, "loss": 0.0022, "step": 44940 }, { "epoch": 0.7354986500859036, "grad_norm": 0.03875321360011786, "learning_rate": 1.984012323702832e-06, "loss": 0.0018, "step": 44950 }, { "epoch": 0.7356622760369794, "grad_norm": 0.068044816275422, "learning_rate": 1.9817350230754665e-06, "loss": 0.0017, "step": 44960 }, { "epoch": 0.7358259019880553, "grad_norm": 0.26818669510729753, "learning_rate": 1.9794587071036714e-06, "loss": 0.0029, "step": 44970 }, { "epoch": 0.7359895279391312, "grad_norm": 0.07581770693558115, "learning_rate": 1.977183376530057e-06, "loss": 0.0022, "step": 44980 }, { "epoch": 0.736153153890207, "grad_norm": 0.07587232075385605, "learning_rate": 1.97490903209691e-06, "loss": 0.0013, "step": 44990 }, { "epoch": 0.7363167798412829, "grad_norm": 0.06944411015081073, "learning_rate": 1.97263567454619e-06, "loss": 0.0023, "step": 45000 }, { "epoch": 0.7364804057923586, "grad_norm": 0.02386021230886774, "learning_rate": 1.9703633046195446e-06, "loss": 0.0034, "step": 45010 }, { "epoch": 0.7366440317434345, "grad_norm": 0.1710096932848367, "learning_rate": 1.9680919230582927e-06, "loss": 0.0027, "step": 45020 }, { "epoch": 0.7368076576945104, "grad_norm": 0.0938919028809799, "learning_rate": 1.965821530603431e-06, "loss": 0.0025, "step": 45030 }, { "epoch": 0.7369712836455862, "grad_norm": 0.07001442061169248, "learning_rate": 1.9635521279956383e-06, "loss": 0.0018, "step": 45040 }, { "epoch": 0.7371349095966621, "grad_norm": 0.06993680248490873, "learning_rate": 1.961283715975266e-06, "loss": 0.0022, "step": 45050 }, { "epoch": 0.7372985355477378, "grad_norm": 0.1694704095026415, "learning_rate": 1.95901629528234e-06, "loss": 0.0016, "step": 45060 }, { "epoch": 0.7374621614988137, "grad_norm": 0.2005480930223308, "learning_rate": 1.95674986665657e-06, "loss": 0.0024, "step": 45070 }, { "epoch": 0.7376257874498896, "grad_norm": 0.1874518209816124, "learning_rate": 1.9544844308373367e-06, "loss": 0.0032, "step": 45080 }, { "epoch": 0.7377894134009654, "grad_norm": 0.16697680388886196, "learning_rate": 1.952219988563697e-06, "loss": 0.0031, "step": 45090 }, { "epoch": 0.7379530393520413, "grad_norm": 0.09093627501903218, "learning_rate": 1.9499565405743886e-06, "loss": 0.0028, "step": 45100 }, { "epoch": 0.738116665303117, "grad_norm": 0.24135580440753351, "learning_rate": 1.9476940876078214e-06, "loss": 0.0021, "step": 45110 }, { "epoch": 0.7382802912541929, "grad_norm": 0.18026667668544955, "learning_rate": 1.9454326304020773e-06, "loss": 0.0024, "step": 45120 }, { "epoch": 0.7384439172052688, "grad_norm": 0.3016966771453516, "learning_rate": 1.9431721696949165e-06, "loss": 0.003, "step": 45130 }, { "epoch": 0.7386075431563446, "grad_norm": 0.15173579693850883, "learning_rate": 1.9409127062237777e-06, "loss": 0.0018, "step": 45140 }, { "epoch": 0.7387711691074205, "grad_norm": 0.11620601530956795, "learning_rate": 1.938654240725769e-06, "loss": 0.0014, "step": 45150 }, { "epoch": 0.7389347950584962, "grad_norm": 0.10542180131383322, "learning_rate": 1.936396773937674e-06, "loss": 0.0012, "step": 45160 }, { "epoch": 0.7390984210095721, "grad_norm": 0.03722660016681537, "learning_rate": 1.9341403065959564e-06, "loss": 0.0024, "step": 45170 }, { "epoch": 0.739262046960648, "grad_norm": 0.18887618947431387, "learning_rate": 1.9318848394367423e-06, "loss": 0.0022, "step": 45180 }, { "epoch": 0.7394256729117238, "grad_norm": 0.15358224706067536, "learning_rate": 1.929630373195839e-06, "loss": 0.0018, "step": 45190 }, { "epoch": 0.7395892988627997, "grad_norm": 0.10649897725800517, "learning_rate": 1.9273769086087296e-06, "loss": 0.0028, "step": 45200 }, { "epoch": 0.7397529248138754, "grad_norm": 0.13976114201848927, "learning_rate": 1.9251244464105657e-06, "loss": 0.0048, "step": 45210 }, { "epoch": 0.7399165507649513, "grad_norm": 0.16145332507081675, "learning_rate": 1.9228729873361702e-06, "loss": 0.0011, "step": 45220 }, { "epoch": 0.7400801767160272, "grad_norm": 0.11590750512284433, "learning_rate": 1.920622532120048e-06, "loss": 0.0019, "step": 45230 }, { "epoch": 0.740243802667103, "grad_norm": 0.24927081755085978, "learning_rate": 1.9183730814963658e-06, "loss": 0.0022, "step": 45240 }, { "epoch": 0.7404074286181789, "grad_norm": 0.15121382289967583, "learning_rate": 1.9161246361989657e-06, "loss": 0.0022, "step": 45250 }, { "epoch": 0.7405710545692546, "grad_norm": 0.147385205806761, "learning_rate": 1.9138771969613674e-06, "loss": 0.0026, "step": 45260 }, { "epoch": 0.7407346805203305, "grad_norm": 0.24503940760113305, "learning_rate": 1.911630764516756e-06, "loss": 0.0024, "step": 45270 }, { "epoch": 0.7408983064714064, "grad_norm": 0.09040951479567234, "learning_rate": 1.9093853395979893e-06, "loss": 0.0012, "step": 45280 }, { "epoch": 0.7410619324224822, "grad_norm": 0.1755614111364217, "learning_rate": 1.907140922937602e-06, "loss": 0.0026, "step": 45290 }, { "epoch": 0.7412255583735581, "grad_norm": 0.08732935330092154, "learning_rate": 1.9048975152677907e-06, "loss": 0.0012, "step": 45300 }, { "epoch": 0.7413891843246339, "grad_norm": 0.3409927076977567, "learning_rate": 1.9026551173204272e-06, "loss": 0.0047, "step": 45310 }, { "epoch": 0.7415528102757097, "grad_norm": 0.36886882739189664, "learning_rate": 1.900413729827058e-06, "loss": 0.0033, "step": 45320 }, { "epoch": 0.7417164362267855, "grad_norm": 0.04912119302042821, "learning_rate": 1.898173353518894e-06, "loss": 0.0039, "step": 45330 }, { "epoch": 0.7418800621778614, "grad_norm": 0.2547902304322046, "learning_rate": 1.8959339891268164e-06, "loss": 0.0017, "step": 45340 }, { "epoch": 0.7420436881289373, "grad_norm": 0.02119320991032157, "learning_rate": 1.8936956373813837e-06, "loss": 0.002, "step": 45350 }, { "epoch": 0.7422073140800131, "grad_norm": 0.045534037443961166, "learning_rate": 1.8914582990128133e-06, "loss": 0.0017, "step": 45360 }, { "epoch": 0.7423709400310889, "grad_norm": 0.11198447139565858, "learning_rate": 1.8892219747509978e-06, "loss": 0.0035, "step": 45370 }, { "epoch": 0.7425345659821647, "grad_norm": 0.23907883237786615, "learning_rate": 1.8869866653255015e-06, "loss": 0.0025, "step": 45380 }, { "epoch": 0.7426981919332406, "grad_norm": 0.17118705542594306, "learning_rate": 1.8847523714655525e-06, "loss": 0.0023, "step": 45390 }, { "epoch": 0.7428618178843165, "grad_norm": 0.24736742017835792, "learning_rate": 1.8825190939000482e-06, "loss": 0.0029, "step": 45400 }, { "epoch": 0.7430254438353923, "grad_norm": 0.10767291834913967, "learning_rate": 1.8802868333575614e-06, "loss": 0.0013, "step": 45410 }, { "epoch": 0.7431890697864681, "grad_norm": 0.09482697240303418, "learning_rate": 1.8780555905663223e-06, "loss": 0.0016, "step": 45420 }, { "epoch": 0.7433526957375439, "grad_norm": 0.1840747034603889, "learning_rate": 1.8758253662542336e-06, "loss": 0.003, "step": 45430 }, { "epoch": 0.7435163216886198, "grad_norm": 0.4689505965538142, "learning_rate": 1.8735961611488712e-06, "loss": 0.0024, "step": 45440 }, { "epoch": 0.7436799476396957, "grad_norm": 0.11113900725758293, "learning_rate": 1.8713679759774718e-06, "loss": 0.0011, "step": 45450 }, { "epoch": 0.7438435735907715, "grad_norm": 0.19390647895564087, "learning_rate": 1.8691408114669406e-06, "loss": 0.0015, "step": 45460 }, { "epoch": 0.7440071995418474, "grad_norm": 0.17661179148210726, "learning_rate": 1.866914668343852e-06, "loss": 0.001, "step": 45470 }, { "epoch": 0.7441708254929231, "grad_norm": 0.24105734218861763, "learning_rate": 1.8646895473344446e-06, "loss": 0.0021, "step": 45480 }, { "epoch": 0.744334451443999, "grad_norm": 0.22231021008987104, "learning_rate": 1.8624654491646254e-06, "loss": 0.0025, "step": 45490 }, { "epoch": 0.7444980773950749, "grad_norm": 0.12027864243007136, "learning_rate": 1.8602423745599652e-06, "loss": 0.0028, "step": 45500 }, { "epoch": 0.7446617033461507, "grad_norm": 0.02764323081565079, "learning_rate": 1.8580203242457061e-06, "loss": 0.0017, "step": 45510 }, { "epoch": 0.7448253292972266, "grad_norm": 0.17601443336291742, "learning_rate": 1.8557992989467517e-06, "loss": 0.0018, "step": 45520 }, { "epoch": 0.7449889552483023, "grad_norm": 0.1671539370895811, "learning_rate": 1.8535792993876712e-06, "loss": 0.0012, "step": 45530 }, { "epoch": 0.7451525811993782, "grad_norm": 0.03842371507904231, "learning_rate": 1.8513603262927005e-06, "loss": 0.0009, "step": 45540 }, { "epoch": 0.7453162071504541, "grad_norm": 0.09876800924544066, "learning_rate": 1.8491423803857405e-06, "loss": 0.0017, "step": 45550 }, { "epoch": 0.7454798331015299, "grad_norm": 0.09453110457444099, "learning_rate": 1.8469254623903554e-06, "loss": 0.0017, "step": 45560 }, { "epoch": 0.7456434590526058, "grad_norm": 0.1488783985696372, "learning_rate": 1.844709573029778e-06, "loss": 0.003, "step": 45570 }, { "epoch": 0.7458070850036815, "grad_norm": 0.17183523010699714, "learning_rate": 1.8424947130269027e-06, "loss": 0.0022, "step": 45580 }, { "epoch": 0.7459707109547574, "grad_norm": 0.004651303189011751, "learning_rate": 1.8402808831042873e-06, "loss": 0.0043, "step": 45590 }, { "epoch": 0.7461343369058333, "grad_norm": 0.18327532512226255, "learning_rate": 1.8380680839841553e-06, "loss": 0.0016, "step": 45600 }, { "epoch": 0.7462979628569091, "grad_norm": 0.09656479761314256, "learning_rate": 1.8358563163883924e-06, "loss": 0.0024, "step": 45610 }, { "epoch": 0.746461588807985, "grad_norm": 0.36363930387145943, "learning_rate": 1.8336455810385478e-06, "loss": 0.0029, "step": 45620 }, { "epoch": 0.7466252147590607, "grad_norm": 0.17106124723869456, "learning_rate": 1.8314358786558384e-06, "loss": 0.0017, "step": 45630 }, { "epoch": 0.7467888407101366, "grad_norm": 0.01420596762759395, "learning_rate": 1.8292272099611375e-06, "loss": 0.0028, "step": 45640 }, { "epoch": 0.7469524666612125, "grad_norm": 0.17650140423031205, "learning_rate": 1.827019575674986e-06, "loss": 0.0032, "step": 45650 }, { "epoch": 0.7471160926122883, "grad_norm": 0.04427962288809695, "learning_rate": 1.8248129765175837e-06, "loss": 0.0019, "step": 45660 }, { "epoch": 0.7472797185633642, "grad_norm": 0.21856651721766107, "learning_rate": 1.8226074132087962e-06, "loss": 0.0013, "step": 45670 }, { "epoch": 0.74744334451444, "grad_norm": 0.11581839091157066, "learning_rate": 1.8204028864681467e-06, "loss": 0.0019, "step": 45680 }, { "epoch": 0.7476069704655158, "grad_norm": 0.04061043625629293, "learning_rate": 1.8181993970148272e-06, "loss": 0.0009, "step": 45690 }, { "epoch": 0.7477705964165917, "grad_norm": 0.10997613632037238, "learning_rate": 1.8159969455676845e-06, "loss": 0.0011, "step": 45700 }, { "epoch": 0.7479342223676675, "grad_norm": 0.4589938775349932, "learning_rate": 1.8137955328452307e-06, "loss": 0.0026, "step": 45710 }, { "epoch": 0.7480978483187434, "grad_norm": 0.18704913241387183, "learning_rate": 1.8115951595656366e-06, "loss": 0.0023, "step": 45720 }, { "epoch": 0.7482614742698191, "grad_norm": 0.1329641459361191, "learning_rate": 1.8093958264467359e-06, "loss": 0.0034, "step": 45730 }, { "epoch": 0.748425100220895, "grad_norm": 0.19816799287925482, "learning_rate": 1.8071975342060204e-06, "loss": 0.0025, "step": 45740 }, { "epoch": 0.7485887261719709, "grad_norm": 0.1267937946325208, "learning_rate": 1.8050002835606468e-06, "loss": 0.0028, "step": 45750 }, { "epoch": 0.7487523521230467, "grad_norm": 0.19350279082836166, "learning_rate": 1.8028040752274283e-06, "loss": 0.0021, "step": 45760 }, { "epoch": 0.7489159780741226, "grad_norm": 0.09875867404983932, "learning_rate": 1.8006089099228386e-06, "loss": 0.0024, "step": 45770 }, { "epoch": 0.7490796040251984, "grad_norm": 0.0848138155360765, "learning_rate": 1.7984147883630115e-06, "loss": 0.0015, "step": 45780 }, { "epoch": 0.7492432299762742, "grad_norm": 0.16643147003561135, "learning_rate": 1.7962217112637404e-06, "loss": 0.0014, "step": 45790 }, { "epoch": 0.7494068559273501, "grad_norm": 0.14499421686238045, "learning_rate": 1.7940296793404783e-06, "loss": 0.0027, "step": 45800 }, { "epoch": 0.7495704818784259, "grad_norm": 0.11939407827616952, "learning_rate": 1.7918386933083348e-06, "loss": 0.0019, "step": 45810 }, { "epoch": 0.7497341078295018, "grad_norm": 0.09886453832912799, "learning_rate": 1.7896487538820833e-06, "loss": 0.0012, "step": 45820 }, { "epoch": 0.7498977337805776, "grad_norm": 0.06467277177554229, "learning_rate": 1.7874598617761524e-06, "loss": 0.0009, "step": 45830 }, { "epoch": 0.7500613597316534, "grad_norm": 0.10456585610589049, "learning_rate": 1.7852720177046284e-06, "loss": 0.0029, "step": 45840 }, { "epoch": 0.7502249856827293, "grad_norm": 0.11273119911729533, "learning_rate": 1.7830852223812573e-06, "loss": 0.0017, "step": 45850 }, { "epoch": 0.7503886116338051, "grad_norm": 0.11300270318524014, "learning_rate": 1.7808994765194414e-06, "loss": 0.0017, "step": 45860 }, { "epoch": 0.750552237584881, "grad_norm": 0.1315058089385145, "learning_rate": 1.7787147808322413e-06, "loss": 0.0022, "step": 45870 }, { "epoch": 0.7507158635359568, "grad_norm": 0.15766203037906645, "learning_rate": 1.7765311360323778e-06, "loss": 0.0035, "step": 45880 }, { "epoch": 0.7508794894870326, "grad_norm": 0.03440059766839715, "learning_rate": 1.774348542832225e-06, "loss": 0.0019, "step": 45890 }, { "epoch": 0.7510431154381085, "grad_norm": 0.09004760283375655, "learning_rate": 1.7721670019438153e-06, "loss": 0.0027, "step": 45900 }, { "epoch": 0.7512067413891843, "grad_norm": 0.12460464571217722, "learning_rate": 1.769986514078838e-06, "loss": 0.0056, "step": 45910 }, { "epoch": 0.7513703673402602, "grad_norm": 0.3541459106115299, "learning_rate": 1.7678070799486386e-06, "loss": 0.0054, "step": 45920 }, { "epoch": 0.751533993291336, "grad_norm": 0.08881843583724262, "learning_rate": 1.765628700264217e-06, "loss": 0.0023, "step": 45930 }, { "epoch": 0.7516976192424119, "grad_norm": 0.04137372932640359, "learning_rate": 1.7634513757362343e-06, "loss": 0.0031, "step": 45940 }, { "epoch": 0.7518612451934877, "grad_norm": 0.029204014591625475, "learning_rate": 1.7612751070750033e-06, "loss": 0.0018, "step": 45950 }, { "epoch": 0.7520248711445635, "grad_norm": 0.1039995498049076, "learning_rate": 1.7590998949904924e-06, "loss": 0.003, "step": 45960 }, { "epoch": 0.7521884970956394, "grad_norm": 0.13248874400385757, "learning_rate": 1.7569257401923263e-06, "loss": 0.0017, "step": 45970 }, { "epoch": 0.7523521230467152, "grad_norm": 0.17715847502014476, "learning_rate": 1.7547526433897848e-06, "loss": 0.0024, "step": 45980 }, { "epoch": 0.7525157489977911, "grad_norm": 0.26151554617946166, "learning_rate": 1.7525806052918e-06, "loss": 0.0014, "step": 45990 }, { "epoch": 0.7526793749488669, "grad_norm": 0.046693960697858757, "learning_rate": 1.7504096266069647e-06, "loss": 0.0014, "step": 46000 }, { "epoch": 0.7528430008999427, "grad_norm": 0.08446428144037361, "learning_rate": 1.7482397080435209e-06, "loss": 0.0023, "step": 46010 }, { "epoch": 0.7530066268510186, "grad_norm": 0.18679126424579465, "learning_rate": 1.7460708503093655e-06, "loss": 0.0016, "step": 46020 }, { "epoch": 0.7531702528020944, "grad_norm": 0.15606888719379577, "learning_rate": 1.7439030541120506e-06, "loss": 0.0021, "step": 46030 }, { "epoch": 0.7533338787531703, "grad_norm": 0.06365374359783149, "learning_rate": 1.7417363201587812e-06, "loss": 0.0013, "step": 46040 }, { "epoch": 0.7534975047042461, "grad_norm": 0.16804757464299622, "learning_rate": 1.7395706491564135e-06, "loss": 0.0034, "step": 46050 }, { "epoch": 0.7536611306553219, "grad_norm": 0.07866634967892441, "learning_rate": 1.7374060418114642e-06, "loss": 0.0028, "step": 46060 }, { "epoch": 0.7538247566063978, "grad_norm": 0.1447043436267361, "learning_rate": 1.7352424988300948e-06, "loss": 0.0018, "step": 46070 }, { "epoch": 0.7539883825574736, "grad_norm": 0.17424757155382511, "learning_rate": 1.7330800209181241e-06, "loss": 0.0018, "step": 46080 }, { "epoch": 0.7541520085085495, "grad_norm": 0.05951055823103956, "learning_rate": 1.730918608781022e-06, "loss": 0.0012, "step": 46090 }, { "epoch": 0.7543156344596254, "grad_norm": 0.1066577868167116, "learning_rate": 1.7287582631239114e-06, "loss": 0.0018, "step": 46100 }, { "epoch": 0.7544792604107011, "grad_norm": 0.06897828734782331, "learning_rate": 1.7265989846515657e-06, "loss": 0.0015, "step": 46110 }, { "epoch": 0.754642886361777, "grad_norm": 0.21023559560074864, "learning_rate": 1.7244407740684105e-06, "loss": 0.0034, "step": 46120 }, { "epoch": 0.7548065123128528, "grad_norm": 0.17842566677001792, "learning_rate": 1.7222836320785275e-06, "loss": 0.0019, "step": 46130 }, { "epoch": 0.7549701382639287, "grad_norm": 0.20762704759038897, "learning_rate": 1.7201275593856454e-06, "loss": 0.0018, "step": 46140 }, { "epoch": 0.7551337642150046, "grad_norm": 0.07038335604206682, "learning_rate": 1.7179725566931399e-06, "loss": 0.0021, "step": 46150 }, { "epoch": 0.7552973901660803, "grad_norm": 0.26129034568728193, "learning_rate": 1.7158186247040476e-06, "loss": 0.0015, "step": 46160 }, { "epoch": 0.7554610161171562, "grad_norm": 0.18913411890710702, "learning_rate": 1.7136657641210492e-06, "loss": 0.002, "step": 46170 }, { "epoch": 0.755624642068232, "grad_norm": 0.0830088014143581, "learning_rate": 1.7115139756464761e-06, "loss": 0.0016, "step": 46180 }, { "epoch": 0.7557882680193079, "grad_norm": 0.1611380596120119, "learning_rate": 1.7093632599823136e-06, "loss": 0.0009, "step": 46190 }, { "epoch": 0.7559518939703836, "grad_norm": 0.07591516709823387, "learning_rate": 1.707213617830195e-06, "loss": 0.0023, "step": 46200 }, { "epoch": 0.7561155199214595, "grad_norm": 0.14892147848480908, "learning_rate": 1.7050650498913984e-06, "loss": 0.0014, "step": 46210 }, { "epoch": 0.7562791458725354, "grad_norm": 0.052792007782790265, "learning_rate": 1.702917556866861e-06, "loss": 0.0013, "step": 46220 }, { "epoch": 0.7564427718236112, "grad_norm": 0.010226413070532027, "learning_rate": 1.700771139457163e-06, "loss": 0.0013, "step": 46230 }, { "epoch": 0.7566063977746871, "grad_norm": 0.10869114769926211, "learning_rate": 1.6986257983625326e-06, "loss": 0.0028, "step": 46240 }, { "epoch": 0.7567700237257629, "grad_norm": 0.17238230178850342, "learning_rate": 1.696481534282855e-06, "loss": 0.0029, "step": 46250 }, { "epoch": 0.7569336496768387, "grad_norm": 0.1389662974472085, "learning_rate": 1.6943383479176534e-06, "loss": 0.0024, "step": 46260 }, { "epoch": 0.7570972756279146, "grad_norm": 0.11471205834050704, "learning_rate": 1.692196239966104e-06, "loss": 0.0041, "step": 46270 }, { "epoch": 0.7572609015789904, "grad_norm": 0.06957039215390441, "learning_rate": 1.6900552111270356e-06, "loss": 0.0019, "step": 46280 }, { "epoch": 0.7574245275300663, "grad_norm": 0.07234541738303123, "learning_rate": 1.6879152620989191e-06, "loss": 0.0025, "step": 46290 }, { "epoch": 0.7575881534811421, "grad_norm": 0.13107428634116425, "learning_rate": 1.6857763935798733e-06, "loss": 0.0025, "step": 46300 }, { "epoch": 0.7577517794322179, "grad_norm": 0.15651518690608868, "learning_rate": 1.6836386062676712e-06, "loss": 0.0028, "step": 46310 }, { "epoch": 0.7579154053832938, "grad_norm": 0.07394710865282514, "learning_rate": 1.6815019008597232e-06, "loss": 0.0047, "step": 46320 }, { "epoch": 0.7580790313343696, "grad_norm": 0.14916797442407503, "learning_rate": 1.6793662780530916e-06, "loss": 0.0015, "step": 46330 }, { "epoch": 0.7582426572854455, "grad_norm": 0.10545076104125851, "learning_rate": 1.6772317385444892e-06, "loss": 0.0024, "step": 46340 }, { "epoch": 0.7584062832365213, "grad_norm": 0.1853139494490722, "learning_rate": 1.6750982830302699e-06, "loss": 0.0021, "step": 46350 }, { "epoch": 0.7585699091875971, "grad_norm": 0.06326378627227601, "learning_rate": 1.672965912206434e-06, "loss": 0.0019, "step": 46360 }, { "epoch": 0.758733535138673, "grad_norm": 0.07604827670661202, "learning_rate": 1.6708346267686343e-06, "loss": 0.0014, "step": 46370 }, { "epoch": 0.7588971610897488, "grad_norm": 0.08526544901432646, "learning_rate": 1.668704427412161e-06, "loss": 0.0016, "step": 46380 }, { "epoch": 0.7590607870408247, "grad_norm": 0.1403100702221909, "learning_rate": 1.6665753148319535e-06, "loss": 0.0011, "step": 46390 }, { "epoch": 0.7592244129919005, "grad_norm": 0.0717180421433381, "learning_rate": 1.6644472897226004e-06, "loss": 0.0023, "step": 46400 }, { "epoch": 0.7593880389429764, "grad_norm": 0.13747307040847662, "learning_rate": 1.6623203527783304e-06, "loss": 0.003, "step": 46410 }, { "epoch": 0.7595516648940522, "grad_norm": 0.03655971584837516, "learning_rate": 1.6601945046930184e-06, "loss": 0.0023, "step": 46420 }, { "epoch": 0.759715290845128, "grad_norm": 0.27527597967681267, "learning_rate": 1.6580697461601885e-06, "loss": 0.0034, "step": 46430 }, { "epoch": 0.7598789167962039, "grad_norm": 0.09193592655360804, "learning_rate": 1.6559460778730007e-06, "loss": 0.0017, "step": 46440 }, { "epoch": 0.7600425427472797, "grad_norm": 0.1123773067565704, "learning_rate": 1.653823500524267e-06, "loss": 0.004, "step": 46450 }, { "epoch": 0.7602061686983556, "grad_norm": 0.1621251570260926, "learning_rate": 1.6517020148064384e-06, "loss": 0.0021, "step": 46460 }, { "epoch": 0.7603697946494314, "grad_norm": 0.04539460286656017, "learning_rate": 1.649581621411615e-06, "loss": 0.0021, "step": 46470 }, { "epoch": 0.7605334206005072, "grad_norm": 0.3136356898861863, "learning_rate": 1.647462321031536e-06, "loss": 0.0032, "step": 46480 }, { "epoch": 0.7606970465515831, "grad_norm": 0.1859689850362368, "learning_rate": 1.6453441143575865e-06, "loss": 0.0023, "step": 46490 }, { "epoch": 0.7608606725026589, "grad_norm": 0.06229952800958062, "learning_rate": 1.6432270020807933e-06, "loss": 0.0017, "step": 46500 }, { "epoch": 0.7610242984537348, "grad_norm": 0.026158627781626156, "learning_rate": 1.641110984891827e-06, "loss": 0.0018, "step": 46510 }, { "epoch": 0.7611879244048106, "grad_norm": 0.11928646256441852, "learning_rate": 1.6389960634809998e-06, "loss": 0.0017, "step": 46520 }, { "epoch": 0.7613515503558864, "grad_norm": 0.3741799477528657, "learning_rate": 1.6368822385382694e-06, "loss": 0.0036, "step": 46530 }, { "epoch": 0.7615151763069623, "grad_norm": 0.1437330767658559, "learning_rate": 1.6347695107532329e-06, "loss": 0.0023, "step": 46540 }, { "epoch": 0.7616788022580381, "grad_norm": 0.09280638125276448, "learning_rate": 1.6326578808151305e-06, "loss": 0.0015, "step": 46550 }, { "epoch": 0.761842428209114, "grad_norm": 0.1178824916932626, "learning_rate": 1.6305473494128437e-06, "loss": 0.002, "step": 46560 }, { "epoch": 0.7620060541601898, "grad_norm": 0.32909055714790286, "learning_rate": 1.6284379172348968e-06, "loss": 0.0022, "step": 46570 }, { "epoch": 0.7621696801112656, "grad_norm": 0.09575210698464756, "learning_rate": 1.6263295849694522e-06, "loss": 0.0014, "step": 46580 }, { "epoch": 0.7623333060623415, "grad_norm": 0.028458873881841106, "learning_rate": 1.6242223533043194e-06, "loss": 0.0027, "step": 46590 }, { "epoch": 0.7624969320134173, "grad_norm": 0.0848806430118753, "learning_rate": 1.6221162229269444e-06, "loss": 0.0021, "step": 46600 }, { "epoch": 0.7626605579644932, "grad_norm": 0.33646600663602283, "learning_rate": 1.6200111945244151e-06, "loss": 0.0044, "step": 46610 }, { "epoch": 0.762824183915569, "grad_norm": 0.21849747987078838, "learning_rate": 1.6179072687834586e-06, "loss": 0.0039, "step": 46620 }, { "epoch": 0.7629878098666448, "grad_norm": 0.12799221768823424, "learning_rate": 1.6158044463904448e-06, "loss": 0.002, "step": 46630 }, { "epoch": 0.7631514358177207, "grad_norm": 0.10123207359588249, "learning_rate": 1.6137027280313799e-06, "loss": 0.0022, "step": 46640 }, { "epoch": 0.7633150617687965, "grad_norm": 0.31255734553364206, "learning_rate": 1.6116021143919158e-06, "loss": 0.0014, "step": 46650 }, { "epoch": 0.7634786877198724, "grad_norm": 0.0796031598281909, "learning_rate": 1.6095026061573393e-06, "loss": 0.002, "step": 46660 }, { "epoch": 0.7636423136709483, "grad_norm": 0.40405296768151544, "learning_rate": 1.607404204012577e-06, "loss": 0.0016, "step": 46670 }, { "epoch": 0.763805939622024, "grad_norm": 0.5970723212634294, "learning_rate": 1.6053069086421962e-06, "loss": 0.0025, "step": 46680 }, { "epoch": 0.7639695655730999, "grad_norm": 0.14978991314871895, "learning_rate": 1.6032107207304021e-06, "loss": 0.0017, "step": 46690 }, { "epoch": 0.7641331915241757, "grad_norm": 0.1483389179165418, "learning_rate": 1.6011156409610379e-06, "loss": 0.0022, "step": 46700 }, { "epoch": 0.7642968174752516, "grad_norm": 0.20166498602157654, "learning_rate": 1.5990216700175887e-06, "loss": 0.0023, "step": 46710 }, { "epoch": 0.7644604434263275, "grad_norm": 0.4141697727184578, "learning_rate": 1.5969288085831741e-06, "loss": 0.0025, "step": 46720 }, { "epoch": 0.7646240693774032, "grad_norm": 0.11035183399177058, "learning_rate": 1.5948370573405536e-06, "loss": 0.0016, "step": 46730 }, { "epoch": 0.7647876953284791, "grad_norm": 0.2544156090677158, "learning_rate": 1.5927464169721234e-06, "loss": 0.0026, "step": 46740 }, { "epoch": 0.7649513212795549, "grad_norm": 0.06960523098979804, "learning_rate": 1.5906568881599187e-06, "loss": 0.0018, "step": 46750 }, { "epoch": 0.7651149472306308, "grad_norm": 0.14514292351443067, "learning_rate": 1.5885684715856108e-06, "loss": 0.0019, "step": 46760 }, { "epoch": 0.7652785731817067, "grad_norm": 0.2008801199666789, "learning_rate": 1.586481167930507e-06, "loss": 0.0025, "step": 46770 }, { "epoch": 0.7654421991327824, "grad_norm": 0.3152734410769143, "learning_rate": 1.584394977875557e-06, "loss": 0.0032, "step": 46780 }, { "epoch": 0.7656058250838583, "grad_norm": 0.11425898740560331, "learning_rate": 1.5823099021013411e-06, "loss": 0.0024, "step": 46790 }, { "epoch": 0.7657694510349341, "grad_norm": 0.2391848987670341, "learning_rate": 1.5802259412880783e-06, "loss": 0.0019, "step": 46800 }, { "epoch": 0.76593307698601, "grad_norm": 0.2137738232451391, "learning_rate": 1.5781430961156247e-06, "loss": 0.0016, "step": 46810 }, { "epoch": 0.7660967029370859, "grad_norm": 0.08676653787845492, "learning_rate": 1.5760613672634717e-06, "loss": 0.004, "step": 46820 }, { "epoch": 0.7662603288881616, "grad_norm": 0.1892549004346875, "learning_rate": 1.573980755410744e-06, "loss": 0.0019, "step": 46830 }, { "epoch": 0.7664239548392375, "grad_norm": 0.05132221080266076, "learning_rate": 1.5719012612362089e-06, "loss": 0.004, "step": 46840 }, { "epoch": 0.7665875807903133, "grad_norm": 0.07532403764829511, "learning_rate": 1.5698228854182618e-06, "loss": 0.0013, "step": 46850 }, { "epoch": 0.7667512067413892, "grad_norm": 0.12189073409820742, "learning_rate": 1.5677456286349363e-06, "loss": 0.0019, "step": 46860 }, { "epoch": 0.7669148326924651, "grad_norm": 0.176215294450804, "learning_rate": 1.5656694915639015e-06, "loss": 0.0023, "step": 46870 }, { "epoch": 0.7670784586435409, "grad_norm": 0.13490310675082046, "learning_rate": 1.56359447488246e-06, "loss": 0.0017, "step": 46880 }, { "epoch": 0.7672420845946167, "grad_norm": 0.07373989019568637, "learning_rate": 1.561520579267547e-06, "loss": 0.002, "step": 46890 }, { "epoch": 0.7674057105456925, "grad_norm": 0.37681341863303214, "learning_rate": 1.559447805395738e-06, "loss": 0.0028, "step": 46900 }, { "epoch": 0.7675693364967684, "grad_norm": 0.10340771957578078, "learning_rate": 1.5573761539432376e-06, "loss": 0.0019, "step": 46910 }, { "epoch": 0.7677329624478443, "grad_norm": 0.10695387173693185, "learning_rate": 1.555305625585885e-06, "loss": 0.0023, "step": 46920 }, { "epoch": 0.76789658839892, "grad_norm": 0.16248780928775, "learning_rate": 1.5532362209991536e-06, "loss": 0.0021, "step": 46930 }, { "epoch": 0.7680602143499959, "grad_norm": 0.24593994420605056, "learning_rate": 1.5511679408581499e-06, "loss": 0.0036, "step": 46940 }, { "epoch": 0.7682238403010717, "grad_norm": 0.09380877185707595, "learning_rate": 1.5491007858376122e-06, "loss": 0.003, "step": 46950 }, { "epoch": 0.7683874662521476, "grad_norm": 0.18750704195515064, "learning_rate": 1.5470347566119166e-06, "loss": 0.004, "step": 46960 }, { "epoch": 0.7685510922032235, "grad_norm": 0.13081269984348015, "learning_rate": 1.5449698538550667e-06, "loss": 0.0017, "step": 46970 }, { "epoch": 0.7687147181542993, "grad_norm": 0.1868569686816854, "learning_rate": 1.5429060782407006e-06, "loss": 0.0015, "step": 46980 }, { "epoch": 0.7688783441053751, "grad_norm": 0.2702879995099629, "learning_rate": 1.5408434304420889e-06, "loss": 0.0016, "step": 46990 }, { "epoch": 0.7690419700564509, "grad_norm": 0.054722269961225894, "learning_rate": 1.5387819111321334e-06, "loss": 0.0027, "step": 47000 }, { "epoch": 0.7692055960075268, "grad_norm": 0.03239142968600071, "learning_rate": 1.5367215209833675e-06, "loss": 0.0017, "step": 47010 }, { "epoch": 0.7693692219586027, "grad_norm": 0.05714288830972208, "learning_rate": 1.5346622606679596e-06, "loss": 0.0016, "step": 47020 }, { "epoch": 0.7695328479096785, "grad_norm": 0.09679205082249472, "learning_rate": 1.532604130857706e-06, "loss": 0.0027, "step": 47030 }, { "epoch": 0.7696964738607543, "grad_norm": 0.05487403771481312, "learning_rate": 1.5305471322240346e-06, "loss": 0.0015, "step": 47040 }, { "epoch": 0.7698600998118301, "grad_norm": 0.012955878998263351, "learning_rate": 1.5284912654380046e-06, "loss": 0.0058, "step": 47050 }, { "epoch": 0.770023725762906, "grad_norm": 0.0852189414542244, "learning_rate": 1.5264365311703067e-06, "loss": 0.0015, "step": 47060 }, { "epoch": 0.7701873517139818, "grad_norm": 0.07220694492826542, "learning_rate": 1.5243829300912594e-06, "loss": 0.0027, "step": 47070 }, { "epoch": 0.7703509776650577, "grad_norm": 0.0778743303377901, "learning_rate": 1.522330462870818e-06, "loss": 0.0016, "step": 47080 }, { "epoch": 0.7705146036161336, "grad_norm": 0.18949959868428612, "learning_rate": 1.5202791301785613e-06, "loss": 0.0021, "step": 47090 }, { "epoch": 0.7706782295672093, "grad_norm": 0.08432579281828237, "learning_rate": 1.518228932683702e-06, "loss": 0.003, "step": 47100 }, { "epoch": 0.7708418555182852, "grad_norm": 0.017073098747401177, "learning_rate": 1.5161798710550763e-06, "loss": 0.002, "step": 47110 }, { "epoch": 0.771005481469361, "grad_norm": 0.19474752412318322, "learning_rate": 1.5141319459611581e-06, "loss": 0.0018, "step": 47120 }, { "epoch": 0.7711691074204369, "grad_norm": 0.05104824485267002, "learning_rate": 1.5120851580700463e-06, "loss": 0.0034, "step": 47130 }, { "epoch": 0.7713327333715128, "grad_norm": 0.11746610436212994, "learning_rate": 1.5100395080494673e-06, "loss": 0.0023, "step": 47140 }, { "epoch": 0.7714963593225885, "grad_norm": 0.08425259258470946, "learning_rate": 1.5079949965667818e-06, "loss": 0.0023, "step": 47150 }, { "epoch": 0.7716599852736644, "grad_norm": 0.17879719345724795, "learning_rate": 1.5059516242889743e-06, "loss": 0.0015, "step": 47160 }, { "epoch": 0.7718236112247402, "grad_norm": 0.05076336929818043, "learning_rate": 1.5039093918826553e-06, "loss": 0.0014, "step": 47170 }, { "epoch": 0.7719872371758161, "grad_norm": 0.17239404813424308, "learning_rate": 1.501868300014071e-06, "loss": 0.0016, "step": 47180 }, { "epoch": 0.772150863126892, "grad_norm": 0.30236201269090357, "learning_rate": 1.4998283493490907e-06, "loss": 0.0028, "step": 47190 }, { "epoch": 0.7723144890779677, "grad_norm": 0.22153207285212329, "learning_rate": 1.49778954055321e-06, "loss": 0.003, "step": 47200 }, { "epoch": 0.7724781150290436, "grad_norm": 0.09080062709685986, "learning_rate": 1.4957518742915577e-06, "loss": 0.0014, "step": 47210 }, { "epoch": 0.7726417409801194, "grad_norm": 0.1146970518033467, "learning_rate": 1.4937153512288866e-06, "loss": 0.0019, "step": 47220 }, { "epoch": 0.7728053669311953, "grad_norm": 0.3137666781138699, "learning_rate": 1.4916799720295706e-06, "loss": 0.0024, "step": 47230 }, { "epoch": 0.7729689928822712, "grad_norm": 0.35709372668557193, "learning_rate": 1.4896457373576216e-06, "loss": 0.0022, "step": 47240 }, { "epoch": 0.7731326188333469, "grad_norm": 0.11045829800627718, "learning_rate": 1.4876126478766717e-06, "loss": 0.0025, "step": 47250 }, { "epoch": 0.7732962447844228, "grad_norm": 0.12589074670583394, "learning_rate": 1.4855807042499782e-06, "loss": 0.0011, "step": 47260 }, { "epoch": 0.7734598707354986, "grad_norm": 0.2564285886702629, "learning_rate": 1.4835499071404298e-06, "loss": 0.0033, "step": 47270 }, { "epoch": 0.7736234966865745, "grad_norm": 0.15687589364817492, "learning_rate": 1.4815202572105381e-06, "loss": 0.0016, "step": 47280 }, { "epoch": 0.7737871226376504, "grad_norm": 0.12472476213384284, "learning_rate": 1.479491755122436e-06, "loss": 0.0025, "step": 47290 }, { "epoch": 0.7739507485887261, "grad_norm": 0.055125679180532, "learning_rate": 1.4774644015378913e-06, "loss": 0.0021, "step": 47300 }, { "epoch": 0.774114374539802, "grad_norm": 0.04131063431432316, "learning_rate": 1.4754381971182901e-06, "loss": 0.0012, "step": 47310 }, { "epoch": 0.7742780004908778, "grad_norm": 0.3233626509152186, "learning_rate": 1.473413142524645e-06, "loss": 0.0019, "step": 47320 }, { "epoch": 0.7744416264419537, "grad_norm": 0.12489828881837006, "learning_rate": 1.4713892384175965e-06, "loss": 0.0021, "step": 47330 }, { "epoch": 0.7746052523930296, "grad_norm": 0.0837679681996852, "learning_rate": 1.4693664854574085e-06, "loss": 0.0025, "step": 47340 }, { "epoch": 0.7747688783441053, "grad_norm": 0.11027716071468342, "learning_rate": 1.4673448843039627e-06, "loss": 0.0019, "step": 47350 }, { "epoch": 0.7749325042951812, "grad_norm": 0.16096349194869602, "learning_rate": 1.4653244356167761e-06, "loss": 0.0015, "step": 47360 }, { "epoch": 0.775096130246257, "grad_norm": 0.06365189842850415, "learning_rate": 1.463305140054983e-06, "loss": 0.002, "step": 47370 }, { "epoch": 0.7752597561973329, "grad_norm": 0.029708642818202652, "learning_rate": 1.4612869982773403e-06, "loss": 0.0015, "step": 47380 }, { "epoch": 0.7754233821484088, "grad_norm": 0.3175061481642342, "learning_rate": 1.4592700109422352e-06, "loss": 0.0014, "step": 47390 }, { "epoch": 0.7755870080994846, "grad_norm": 0.10276834131282311, "learning_rate": 1.4572541787076732e-06, "loss": 0.0015, "step": 47400 }, { "epoch": 0.7757506340505604, "grad_norm": 0.0933544847800168, "learning_rate": 1.4552395022312804e-06, "loss": 0.0031, "step": 47410 }, { "epoch": 0.7759142600016362, "grad_norm": 0.09320693844665237, "learning_rate": 1.4532259821703133e-06, "loss": 0.0015, "step": 47420 }, { "epoch": 0.7760778859527121, "grad_norm": 0.09729233353527017, "learning_rate": 1.451213619181645e-06, "loss": 0.0014, "step": 47430 }, { "epoch": 0.776241511903788, "grad_norm": 0.38660681675956393, "learning_rate": 1.4492024139217747e-06, "loss": 0.0032, "step": 47440 }, { "epoch": 0.7764051378548638, "grad_norm": 0.17431782836530066, "learning_rate": 1.4471923670468202e-06, "loss": 0.0017, "step": 47450 }, { "epoch": 0.7765687638059396, "grad_norm": 0.2299255608305766, "learning_rate": 1.445183479212529e-06, "loss": 0.0029, "step": 47460 }, { "epoch": 0.7767323897570154, "grad_norm": 0.25449690777728545, "learning_rate": 1.44317575107426e-06, "loss": 0.003, "step": 47470 }, { "epoch": 0.7768960157080913, "grad_norm": 0.23810321308908192, "learning_rate": 1.4411691832869989e-06, "loss": 0.0018, "step": 47480 }, { "epoch": 0.7770596416591672, "grad_norm": 0.09106636597409241, "learning_rate": 1.4391637765053562e-06, "loss": 0.0023, "step": 47490 }, { "epoch": 0.777223267610243, "grad_norm": 0.14903263089934696, "learning_rate": 1.4371595313835596e-06, "loss": 0.0014, "step": 47500 }, { "epoch": 0.7773868935613188, "grad_norm": 0.1791936543731685, "learning_rate": 1.4351564485754577e-06, "loss": 0.0019, "step": 47510 }, { "epoch": 0.7775505195123946, "grad_norm": 0.14007063752594243, "learning_rate": 1.433154528734521e-06, "loss": 0.0016, "step": 47520 }, { "epoch": 0.7777141454634705, "grad_norm": 0.21569852442636941, "learning_rate": 1.4311537725138402e-06, "loss": 0.0021, "step": 47530 }, { "epoch": 0.7778777714145464, "grad_norm": 0.1477813097012733, "learning_rate": 1.4291541805661264e-06, "loss": 0.0044, "step": 47540 }, { "epoch": 0.7780413973656222, "grad_norm": 0.06717715660781988, "learning_rate": 1.4271557535437125e-06, "loss": 0.002, "step": 47550 }, { "epoch": 0.778205023316698, "grad_norm": 0.284568101737055, "learning_rate": 1.4251584920985494e-06, "loss": 0.0023, "step": 47560 }, { "epoch": 0.7783686492677738, "grad_norm": 0.14013774290816464, "learning_rate": 1.4231623968822083e-06, "loss": 0.0022, "step": 47570 }, { "epoch": 0.7785322752188497, "grad_norm": 0.038572173764087905, "learning_rate": 1.42116746854588e-06, "loss": 0.002, "step": 47580 }, { "epoch": 0.7786959011699256, "grad_norm": 0.17048032316101788, "learning_rate": 1.4191737077403738e-06, "loss": 0.0029, "step": 47590 }, { "epoch": 0.7788595271210014, "grad_norm": 0.17016912181734972, "learning_rate": 1.4171811151161186e-06, "loss": 0.0013, "step": 47600 }, { "epoch": 0.7790231530720773, "grad_norm": 0.09619362880440897, "learning_rate": 1.4151896913231645e-06, "loss": 0.0013, "step": 47610 }, { "epoch": 0.779186779023153, "grad_norm": 0.24352342912027405, "learning_rate": 1.4131994370111773e-06, "loss": 0.0026, "step": 47620 }, { "epoch": 0.7793504049742289, "grad_norm": 0.20584183996451946, "learning_rate": 1.4112103528294418e-06, "loss": 0.0016, "step": 47630 }, { "epoch": 0.7795140309253048, "grad_norm": 0.1416382089350118, "learning_rate": 1.4092224394268629e-06, "loss": 0.002, "step": 47640 }, { "epoch": 0.7796776568763806, "grad_norm": 0.103123160930211, "learning_rate": 1.4072356974519601e-06, "loss": 0.0023, "step": 47650 }, { "epoch": 0.7798412828274565, "grad_norm": 0.18341025861418078, "learning_rate": 1.405250127552873e-06, "loss": 0.0018, "step": 47660 }, { "epoch": 0.7800049087785322, "grad_norm": 0.10167835698104476, "learning_rate": 1.4032657303773606e-06, "loss": 0.0017, "step": 47670 }, { "epoch": 0.7801685347296081, "grad_norm": 0.19719538253535882, "learning_rate": 1.401282506572797e-06, "loss": 0.0031, "step": 47680 }, { "epoch": 0.780332160680684, "grad_norm": 0.23152140546286598, "learning_rate": 1.3993004567861728e-06, "loss": 0.0023, "step": 47690 }, { "epoch": 0.7804957866317598, "grad_norm": 0.07451915494433714, "learning_rate": 1.3973195816640973e-06, "loss": 0.0015, "step": 47700 }, { "epoch": 0.7806594125828357, "grad_norm": 0.30495971623377205, "learning_rate": 1.3953398818527959e-06, "loss": 0.0035, "step": 47710 }, { "epoch": 0.7808230385339114, "grad_norm": 0.22746142323368346, "learning_rate": 1.3933613579981093e-06, "loss": 0.0024, "step": 47720 }, { "epoch": 0.7809866644849873, "grad_norm": 0.22205970809478168, "learning_rate": 1.391384010745498e-06, "loss": 0.0026, "step": 47730 }, { "epoch": 0.7811502904360632, "grad_norm": 0.12180686757075486, "learning_rate": 1.3894078407400362e-06, "loss": 0.0037, "step": 47740 }, { "epoch": 0.781313916387139, "grad_norm": 0.05990027464909791, "learning_rate": 1.3874328486264143e-06, "loss": 0.0012, "step": 47750 }, { "epoch": 0.7814775423382149, "grad_norm": 0.21658144097064255, "learning_rate": 1.3854590350489378e-06, "loss": 0.0026, "step": 47760 }, { "epoch": 0.7816411682892906, "grad_norm": 0.11016702789422426, "learning_rate": 1.3834864006515292e-06, "loss": 0.0028, "step": 47770 }, { "epoch": 0.7818047942403665, "grad_norm": 0.20510262414746427, "learning_rate": 1.381514946077725e-06, "loss": 0.0048, "step": 47780 }, { "epoch": 0.7819684201914424, "grad_norm": 0.06465576354909051, "learning_rate": 1.3795446719706767e-06, "loss": 0.0013, "step": 47790 }, { "epoch": 0.7821320461425182, "grad_norm": 0.23943426033453444, "learning_rate": 1.3775755789731533e-06, "loss": 0.0037, "step": 47800 }, { "epoch": 0.7822956720935941, "grad_norm": 0.2811009319554033, "learning_rate": 1.375607667727536e-06, "loss": 0.0034, "step": 47810 }, { "epoch": 0.7824592980446698, "grad_norm": 0.15350533999288016, "learning_rate": 1.3736409388758203e-06, "loss": 0.0029, "step": 47820 }, { "epoch": 0.7826229239957457, "grad_norm": 0.11218334863159217, "learning_rate": 1.3716753930596177e-06, "loss": 0.0013, "step": 47830 }, { "epoch": 0.7827865499468216, "grad_norm": 0.06904977146678239, "learning_rate": 1.3697110309201518e-06, "loss": 0.0024, "step": 47840 }, { "epoch": 0.7829501758978974, "grad_norm": 0.06278450412118958, "learning_rate": 1.3677478530982597e-06, "loss": 0.0018, "step": 47850 }, { "epoch": 0.7831138018489733, "grad_norm": 0.09427927705302362, "learning_rate": 1.3657858602343972e-06, "loss": 0.002, "step": 47860 }, { "epoch": 0.783277427800049, "grad_norm": 0.14483117646688543, "learning_rate": 1.363825052968627e-06, "loss": 0.0023, "step": 47870 }, { "epoch": 0.7834410537511249, "grad_norm": 0.13039105429753506, "learning_rate": 1.3618654319406287e-06, "loss": 0.0026, "step": 47880 }, { "epoch": 0.7836046797022008, "grad_norm": 0.3842447508638256, "learning_rate": 1.3599069977896939e-06, "loss": 0.001, "step": 47890 }, { "epoch": 0.7837683056532766, "grad_norm": 0.18417246564249867, "learning_rate": 1.3579497511547268e-06, "loss": 0.0022, "step": 47900 }, { "epoch": 0.7839319316043525, "grad_norm": 0.06308406191247852, "learning_rate": 1.3559936926742433e-06, "loss": 0.0023, "step": 47910 }, { "epoch": 0.7840955575554283, "grad_norm": 0.05545072416061435, "learning_rate": 1.354038822986376e-06, "loss": 0.0022, "step": 47920 }, { "epoch": 0.7842591835065041, "grad_norm": 0.045346089711888325, "learning_rate": 1.3520851427288651e-06, "loss": 0.0023, "step": 47930 }, { "epoch": 0.7844228094575799, "grad_norm": 0.12886169849205031, "learning_rate": 1.3501326525390635e-06, "loss": 0.0056, "step": 47940 }, { "epoch": 0.7845864354086558, "grad_norm": 0.10413595122795785, "learning_rate": 1.3481813530539368e-06, "loss": 0.0035, "step": 47950 }, { "epoch": 0.7847500613597317, "grad_norm": 0.05225019331871401, "learning_rate": 1.3462312449100628e-06, "loss": 0.0017, "step": 47960 }, { "epoch": 0.7849136873108075, "grad_norm": 0.09285602950112436, "learning_rate": 1.3442823287436274e-06, "loss": 0.0025, "step": 47970 }, { "epoch": 0.7850773132618833, "grad_norm": 0.5464102477723739, "learning_rate": 1.3423346051904334e-06, "loss": 0.002, "step": 47980 }, { "epoch": 0.7852409392129591, "grad_norm": 0.3564787486186036, "learning_rate": 1.3403880748858894e-06, "loss": 0.0025, "step": 47990 }, { "epoch": 0.785404565164035, "grad_norm": 0.15288055382589755, "learning_rate": 1.3384427384650166e-06, "loss": 0.0033, "step": 48000 }, { "epoch": 0.7855681911151109, "grad_norm": 0.17280972223040875, "learning_rate": 1.3364985965624473e-06, "loss": 0.0024, "step": 48010 }, { "epoch": 0.7857318170661867, "grad_norm": 0.0591808163754419, "learning_rate": 1.3345556498124228e-06, "loss": 0.0015, "step": 48020 }, { "epoch": 0.7858954430172626, "grad_norm": 0.23056957798171168, "learning_rate": 1.332613898848793e-06, "loss": 0.0024, "step": 48030 }, { "epoch": 0.7860590689683383, "grad_norm": 0.38065396519306094, "learning_rate": 1.330673344305024e-06, "loss": 0.0032, "step": 48040 }, { "epoch": 0.7862226949194142, "grad_norm": 0.09354012381691237, "learning_rate": 1.3287339868141851e-06, "loss": 0.0013, "step": 48050 }, { "epoch": 0.7863863208704901, "grad_norm": 0.062426577917024884, "learning_rate": 1.3267958270089582e-06, "loss": 0.0008, "step": 48060 }, { "epoch": 0.7865499468215659, "grad_norm": 0.14164616216577655, "learning_rate": 1.324858865521632e-06, "loss": 0.0024, "step": 48070 }, { "epoch": 0.7867135727726418, "grad_norm": 0.1378638949127868, "learning_rate": 1.3229231029841077e-06, "loss": 0.0017, "step": 48080 }, { "epoch": 0.7868771987237175, "grad_norm": 0.10423390547344576, "learning_rate": 1.3209885400278933e-06, "loss": 0.0013, "step": 48090 }, { "epoch": 0.7870408246747934, "grad_norm": 0.11497434302093766, "learning_rate": 1.3190551772841032e-06, "loss": 0.0018, "step": 48100 }, { "epoch": 0.7872044506258693, "grad_norm": 0.14612236400511033, "learning_rate": 1.3171230153834669e-06, "loss": 0.0014, "step": 48110 }, { "epoch": 0.7873680765769451, "grad_norm": 0.0469320150030638, "learning_rate": 1.3151920549563158e-06, "loss": 0.0023, "step": 48120 }, { "epoch": 0.787531702528021, "grad_norm": 0.003913219145657117, "learning_rate": 1.313262296632592e-06, "loss": 0.0012, "step": 48130 }, { "epoch": 0.7876953284790967, "grad_norm": 0.08066569413358624, "learning_rate": 1.3113337410418453e-06, "loss": 0.0015, "step": 48140 }, { "epoch": 0.7878589544301726, "grad_norm": 0.13076089636497218, "learning_rate": 1.3094063888132319e-06, "loss": 0.0017, "step": 48150 }, { "epoch": 0.7880225803812485, "grad_norm": 0.06958654413189007, "learning_rate": 1.307480240575516e-06, "loss": 0.0019, "step": 48160 }, { "epoch": 0.7881862063323243, "grad_norm": 0.293656214552158, "learning_rate": 1.3055552969570717e-06, "loss": 0.0022, "step": 48170 }, { "epoch": 0.7883498322834002, "grad_norm": 0.017069204530643426, "learning_rate": 1.3036315585858766e-06, "loss": 0.0023, "step": 48180 }, { "epoch": 0.7885134582344759, "grad_norm": 0.13197021774189005, "learning_rate": 1.301709026089516e-06, "loss": 0.0017, "step": 48190 }, { "epoch": 0.7886770841855518, "grad_norm": 0.022928867645394774, "learning_rate": 1.2997877000951825e-06, "loss": 0.0034, "step": 48200 }, { "epoch": 0.7888407101366277, "grad_norm": 0.0703507963375967, "learning_rate": 1.2978675812296742e-06, "loss": 0.002, "step": 48210 }, { "epoch": 0.7890043360877035, "grad_norm": 0.18970392836297792, "learning_rate": 1.2959486701193951e-06, "loss": 0.0009, "step": 48220 }, { "epoch": 0.7891679620387794, "grad_norm": 0.04444627565394381, "learning_rate": 1.294030967390359e-06, "loss": 0.0025, "step": 48230 }, { "epoch": 0.7893315879898551, "grad_norm": 0.09752282742047552, "learning_rate": 1.2921144736681817e-06, "loss": 0.0019, "step": 48240 }, { "epoch": 0.789495213940931, "grad_norm": 0.5250058983449449, "learning_rate": 1.2901991895780818e-06, "loss": 0.0016, "step": 48250 }, { "epoch": 0.7896588398920069, "grad_norm": 0.017380514340823345, "learning_rate": 1.2882851157448906e-06, "loss": 0.0028, "step": 48260 }, { "epoch": 0.7898224658430827, "grad_norm": 0.08951283146517515, "learning_rate": 1.2863722527930401e-06, "loss": 0.0029, "step": 48270 }, { "epoch": 0.7899860917941586, "grad_norm": 0.15813523519123485, "learning_rate": 1.2844606013465655e-06, "loss": 0.0019, "step": 48280 }, { "epoch": 0.7901497177452343, "grad_norm": 0.07379427184668581, "learning_rate": 1.2825501620291136e-06, "loss": 0.0016, "step": 48290 }, { "epoch": 0.7903133436963102, "grad_norm": 0.23647016431558107, "learning_rate": 1.28064093546393e-06, "loss": 0.0034, "step": 48300 }, { "epoch": 0.7904769696473861, "grad_norm": 0.049473921908787875, "learning_rate": 1.2787329222738626e-06, "loss": 0.0015, "step": 48310 }, { "epoch": 0.7906405955984619, "grad_norm": 0.2953573060433273, "learning_rate": 1.2768261230813717e-06, "loss": 0.0031, "step": 48320 }, { "epoch": 0.7908042215495378, "grad_norm": 0.06211686656672406, "learning_rate": 1.274920538508514e-06, "loss": 0.002, "step": 48330 }, { "epoch": 0.7909678475006136, "grad_norm": 0.12325816364804497, "learning_rate": 1.2730161691769526e-06, "loss": 0.0025, "step": 48340 }, { "epoch": 0.7911314734516894, "grad_norm": 0.1411198920338921, "learning_rate": 1.2711130157079566e-06, "loss": 0.0034, "step": 48350 }, { "epoch": 0.7912950994027653, "grad_norm": 0.20799919714040022, "learning_rate": 1.2692110787223965e-06, "loss": 0.0035, "step": 48360 }, { "epoch": 0.7914587253538411, "grad_norm": 0.09009724020755089, "learning_rate": 1.267310358840741e-06, "loss": 0.003, "step": 48370 }, { "epoch": 0.791622351304917, "grad_norm": 0.2481705526184436, "learning_rate": 1.2654108566830702e-06, "loss": 0.0028, "step": 48380 }, { "epoch": 0.7917859772559928, "grad_norm": 0.07467848485961295, "learning_rate": 1.2635125728690627e-06, "loss": 0.0027, "step": 48390 }, { "epoch": 0.7919496032070686, "grad_norm": 0.05418448507075133, "learning_rate": 1.2616155080179982e-06, "loss": 0.0012, "step": 48400 }, { "epoch": 0.7921132291581445, "grad_norm": 0.2845613768330517, "learning_rate": 1.2597196627487629e-06, "loss": 0.0017, "step": 48410 }, { "epoch": 0.7922768551092203, "grad_norm": 0.11375271010381821, "learning_rate": 1.2578250376798434e-06, "loss": 0.0014, "step": 48420 }, { "epoch": 0.7924404810602962, "grad_norm": 0.1341817335122393, "learning_rate": 1.2559316334293248e-06, "loss": 0.0029, "step": 48430 }, { "epoch": 0.792604107011372, "grad_norm": 0.2924596770633388, "learning_rate": 1.254039450614896e-06, "loss": 0.0019, "step": 48440 }, { "epoch": 0.7927677329624478, "grad_norm": 0.04525819640855382, "learning_rate": 1.2521484898538523e-06, "loss": 0.0013, "step": 48450 }, { "epoch": 0.7929313589135237, "grad_norm": 0.0639154925905611, "learning_rate": 1.2502587517630832e-06, "loss": 0.0013, "step": 48460 }, { "epoch": 0.7930949848645995, "grad_norm": 0.16817728882628474, "learning_rate": 1.2483702369590823e-06, "loss": 0.0018, "step": 48470 }, { "epoch": 0.7932586108156754, "grad_norm": 0.26547309580554723, "learning_rate": 1.2464829460579475e-06, "loss": 0.0014, "step": 48480 }, { "epoch": 0.7934222367667512, "grad_norm": 0.04125464367940636, "learning_rate": 1.2445968796753704e-06, "loss": 0.0023, "step": 48490 }, { "epoch": 0.793585862717827, "grad_norm": 0.13973628683449885, "learning_rate": 1.242712038426646e-06, "loss": 0.0026, "step": 48500 }, { "epoch": 0.7937494886689029, "grad_norm": 0.13808481908338202, "learning_rate": 1.2408284229266748e-06, "loss": 0.0021, "step": 48510 }, { "epoch": 0.7939131146199787, "grad_norm": 0.25530876687539716, "learning_rate": 1.23894603378995e-06, "loss": 0.0056, "step": 48520 }, { "epoch": 0.7940767405710546, "grad_norm": 0.10519822627650355, "learning_rate": 1.2370648716305672e-06, "loss": 0.0014, "step": 48530 }, { "epoch": 0.7942403665221304, "grad_norm": 0.16921569586770094, "learning_rate": 1.2351849370622266e-06, "loss": 0.002, "step": 48540 }, { "epoch": 0.7944039924732063, "grad_norm": 0.27240336912427127, "learning_rate": 1.2333062306982186e-06, "loss": 0.0033, "step": 48550 }, { "epoch": 0.7945676184242821, "grad_norm": 0.2090694531639897, "learning_rate": 1.2314287531514385e-06, "loss": 0.0033, "step": 48560 }, { "epoch": 0.7947312443753579, "grad_norm": 0.15972070993977083, "learning_rate": 1.2295525050343832e-06, "loss": 0.0028, "step": 48570 }, { "epoch": 0.7948948703264338, "grad_norm": 0.12952037646266074, "learning_rate": 1.227677486959144e-06, "loss": 0.0018, "step": 48580 }, { "epoch": 0.7950584962775096, "grad_norm": 0.03591734762951992, "learning_rate": 1.2258036995374102e-06, "loss": 0.0015, "step": 48590 }, { "epoch": 0.7952221222285855, "grad_norm": 0.2014415389307618, "learning_rate": 1.2239311433804769e-06, "loss": 0.0016, "step": 48600 }, { "epoch": 0.7953857481796613, "grad_norm": 0.004998662143198341, "learning_rate": 1.2220598190992277e-06, "loss": 0.0013, "step": 48610 }, { "epoch": 0.7955493741307371, "grad_norm": 0.08218378151299863, "learning_rate": 1.220189727304149e-06, "loss": 0.0017, "step": 48620 }, { "epoch": 0.795713000081813, "grad_norm": 0.226071398313477, "learning_rate": 1.2183208686053289e-06, "loss": 0.0022, "step": 48630 }, { "epoch": 0.7958766260328888, "grad_norm": 0.2575719719662208, "learning_rate": 1.2164532436124472e-06, "loss": 0.0016, "step": 48640 }, { "epoch": 0.7960402519839647, "grad_norm": 0.5916270759039125, "learning_rate": 1.2145868529347821e-06, "loss": 0.0027, "step": 48650 }, { "epoch": 0.7962038779350405, "grad_norm": 0.1025646487770363, "learning_rate": 1.212721697181215e-06, "loss": 0.0028, "step": 48660 }, { "epoch": 0.7963675038861163, "grad_norm": 0.0919091202605032, "learning_rate": 1.2108577769602165e-06, "loss": 0.0018, "step": 48670 }, { "epoch": 0.7965311298371922, "grad_norm": 0.025338708012351826, "learning_rate": 1.208995092879856e-06, "loss": 0.003, "step": 48680 }, { "epoch": 0.796694755788268, "grad_norm": 0.04743675698561767, "learning_rate": 1.2071336455478055e-06, "loss": 0.0023, "step": 48690 }, { "epoch": 0.7968583817393439, "grad_norm": 0.372528354296269, "learning_rate": 1.205273435571327e-06, "loss": 0.0046, "step": 48700 }, { "epoch": 0.7970220076904198, "grad_norm": 0.5094668952787667, "learning_rate": 1.2034144635572797e-06, "loss": 0.0068, "step": 48710 }, { "epoch": 0.7971856336414955, "grad_norm": 0.1817852062929723, "learning_rate": 1.2015567301121256e-06, "loss": 0.0029, "step": 48720 }, { "epoch": 0.7973492595925714, "grad_norm": 0.08401039650203053, "learning_rate": 1.1997002358419118e-06, "loss": 0.002, "step": 48730 }, { "epoch": 0.7975128855436472, "grad_norm": 0.270742669156899, "learning_rate": 1.197844981352289e-06, "loss": 0.0028, "step": 48740 }, { "epoch": 0.7976765114947231, "grad_norm": 0.16051683474496112, "learning_rate": 1.1959909672484998e-06, "loss": 0.0025, "step": 48750 }, { "epoch": 0.797840137445799, "grad_norm": 0.13389212542110335, "learning_rate": 1.1941381941353857e-06, "loss": 0.0015, "step": 48760 }, { "epoch": 0.7980037633968747, "grad_norm": 0.09398602728654756, "learning_rate": 1.1922866626173802e-06, "loss": 0.0018, "step": 48770 }, { "epoch": 0.7981673893479506, "grad_norm": 0.09871255224743344, "learning_rate": 1.1904363732985124e-06, "loss": 0.003, "step": 48780 }, { "epoch": 0.7983310152990264, "grad_norm": 0.0492863277487948, "learning_rate": 1.1885873267824072e-06, "loss": 0.0018, "step": 48790 }, { "epoch": 0.7984946412501023, "grad_norm": 0.07554320419631094, "learning_rate": 1.1867395236722828e-06, "loss": 0.0018, "step": 48800 }, { "epoch": 0.798658267201178, "grad_norm": 0.18350981573709402, "learning_rate": 1.1848929645709506e-06, "loss": 0.0019, "step": 48810 }, { "epoch": 0.7988218931522539, "grad_norm": 0.13345806362801924, "learning_rate": 1.1830476500808207e-06, "loss": 0.0012, "step": 48820 }, { "epoch": 0.7989855191033298, "grad_norm": 0.15621868766177915, "learning_rate": 1.1812035808038934e-06, "loss": 0.0044, "step": 48830 }, { "epoch": 0.7991491450544056, "grad_norm": 0.23928409511538293, "learning_rate": 1.1793607573417638e-06, "loss": 0.0036, "step": 48840 }, { "epoch": 0.7993127710054815, "grad_norm": 0.18891972137459614, "learning_rate": 1.1775191802956199e-06, "loss": 0.0017, "step": 48850 }, { "epoch": 0.7994763969565573, "grad_norm": 0.1426560669229477, "learning_rate": 1.1756788502662436e-06, "loss": 0.0025, "step": 48860 }, { "epoch": 0.7996400229076331, "grad_norm": 0.14488672582204726, "learning_rate": 1.1738397678540091e-06, "loss": 0.002, "step": 48870 }, { "epoch": 0.799803648858709, "grad_norm": 0.11709875183911246, "learning_rate": 1.1720019336588873e-06, "loss": 0.0021, "step": 48880 }, { "epoch": 0.7999672748097848, "grad_norm": 0.10438392829273505, "learning_rate": 1.170165348280437e-06, "loss": 0.0024, "step": 48890 }, { "epoch": 0.8001309007608607, "grad_norm": 0.1089760491860878, "learning_rate": 1.1683300123178126e-06, "loss": 0.0022, "step": 48900 }, { "epoch": 0.8002945267119365, "grad_norm": 0.18184087226355755, "learning_rate": 1.16649592636976e-06, "loss": 0.0019, "step": 48910 }, { "epoch": 0.8004581526630123, "grad_norm": 0.09544216569412499, "learning_rate": 1.1646630910346162e-06, "loss": 0.003, "step": 48920 }, { "epoch": 0.8006217786140882, "grad_norm": 0.10098643535187125, "learning_rate": 1.1628315069103108e-06, "loss": 0.002, "step": 48930 }, { "epoch": 0.800785404565164, "grad_norm": 0.21537063001659107, "learning_rate": 1.161001174594369e-06, "loss": 0.0026, "step": 48940 }, { "epoch": 0.8009490305162399, "grad_norm": 0.04227623803558214, "learning_rate": 1.1591720946839014e-06, "loss": 0.0007, "step": 48950 }, { "epoch": 0.8011126564673157, "grad_norm": 0.15476867935214153, "learning_rate": 1.1573442677756142e-06, "loss": 0.0024, "step": 48960 }, { "epoch": 0.8012762824183915, "grad_norm": 0.29047802757149555, "learning_rate": 1.1555176944658035e-06, "loss": 0.0041, "step": 48970 }, { "epoch": 0.8014399083694674, "grad_norm": 0.018645180535673708, "learning_rate": 1.1536923753503566e-06, "loss": 0.0012, "step": 48980 }, { "epoch": 0.8016035343205432, "grad_norm": 0.0191645132658388, "learning_rate": 1.151868311024749e-06, "loss": 0.0013, "step": 48990 }, { "epoch": 0.8017671602716191, "grad_norm": 0.12958291259007618, "learning_rate": 1.150045502084054e-06, "loss": 0.0019, "step": 49000 }, { "epoch": 0.8019307862226949, "grad_norm": 0.020998683239434567, "learning_rate": 1.148223949122928e-06, "loss": 0.0032, "step": 49010 }, { "epoch": 0.8020944121737708, "grad_norm": 0.03771036832828451, "learning_rate": 1.1464036527356209e-06, "loss": 0.0019, "step": 49020 }, { "epoch": 0.8022580381248466, "grad_norm": 0.08186291505510673, "learning_rate": 1.1445846135159727e-06, "loss": 0.0035, "step": 49030 }, { "epoch": 0.8024216640759224, "grad_norm": 0.2142463727565218, "learning_rate": 1.142766832057412e-06, "loss": 0.0031, "step": 49040 }, { "epoch": 0.8025852900269983, "grad_norm": 0.13872718864960729, "learning_rate": 1.1409503089529573e-06, "loss": 0.0029, "step": 49050 }, { "epoch": 0.8027489159780741, "grad_norm": 0.18749714463965136, "learning_rate": 1.1391350447952188e-06, "loss": 0.0023, "step": 49060 }, { "epoch": 0.80291254192915, "grad_norm": 0.2547333633417284, "learning_rate": 1.137321040176394e-06, "loss": 0.0014, "step": 49070 }, { "epoch": 0.8030761678802258, "grad_norm": 0.42253488851951854, "learning_rate": 1.135508295688269e-06, "loss": 0.0024, "step": 49080 }, { "epoch": 0.8032397938313016, "grad_norm": 0.11363707593928521, "learning_rate": 1.1336968119222203e-06, "loss": 0.0013, "step": 49090 }, { "epoch": 0.8034034197823775, "grad_norm": 0.13872117607886797, "learning_rate": 1.131886589469212e-06, "loss": 0.003, "step": 49100 }, { "epoch": 0.8035670457334533, "grad_norm": 0.3812063477244664, "learning_rate": 1.1300776289197979e-06, "loss": 0.0013, "step": 49110 }, { "epoch": 0.8037306716845292, "grad_norm": 0.03595518080274262, "learning_rate": 1.1282699308641166e-06, "loss": 0.0017, "step": 49120 }, { "epoch": 0.803894297635605, "grad_norm": 0.09008680669170954, "learning_rate": 1.126463495891902e-06, "loss": 0.0027, "step": 49130 }, { "epoch": 0.8040579235866808, "grad_norm": 0.09142113329494993, "learning_rate": 1.1246583245924687e-06, "loss": 0.0012, "step": 49140 }, { "epoch": 0.8042215495377567, "grad_norm": 0.0911831511286142, "learning_rate": 1.122854417554723e-06, "loss": 0.0026, "step": 49150 }, { "epoch": 0.8043851754888325, "grad_norm": 0.15472805919059357, "learning_rate": 1.1210517753671568e-06, "loss": 0.0023, "step": 49160 }, { "epoch": 0.8045488014399084, "grad_norm": 0.13093212419936698, "learning_rate": 1.1192503986178515e-06, "loss": 0.0042, "step": 49170 }, { "epoch": 0.8047124273909843, "grad_norm": 0.10252447479831282, "learning_rate": 1.1174502878944715e-06, "loss": 0.0014, "step": 49180 }, { "epoch": 0.80487605334206, "grad_norm": 0.08114703827453858, "learning_rate": 1.1156514437842753e-06, "loss": 0.0041, "step": 49190 }, { "epoch": 0.8050396792931359, "grad_norm": 0.2855196029497082, "learning_rate": 1.1138538668741011e-06, "loss": 0.002, "step": 49200 }, { "epoch": 0.8052033052442117, "grad_norm": 0.18086581430494428, "learning_rate": 1.1120575577503778e-06, "loss": 0.0016, "step": 49210 }, { "epoch": 0.8053669311952876, "grad_norm": 0.1264583316951292, "learning_rate": 1.1102625169991194e-06, "loss": 0.002, "step": 49220 }, { "epoch": 0.8055305571463635, "grad_norm": 0.12753249715892995, "learning_rate": 1.1084687452059256e-06, "loss": 0.0021, "step": 49230 }, { "epoch": 0.8056941830974392, "grad_norm": 0.10987794054723932, "learning_rate": 1.1066762429559812e-06, "loss": 0.0018, "step": 49240 }, { "epoch": 0.8058578090485151, "grad_norm": 0.44109008749859413, "learning_rate": 1.104885010834062e-06, "loss": 0.0041, "step": 49250 }, { "epoch": 0.8060214349995909, "grad_norm": 0.09642172589835633, "learning_rate": 1.1030950494245234e-06, "loss": 0.0021, "step": 49260 }, { "epoch": 0.8061850609506668, "grad_norm": 0.03951203650870052, "learning_rate": 1.1013063593113098e-06, "loss": 0.0008, "step": 49270 }, { "epoch": 0.8063486869017427, "grad_norm": 0.041398264768328676, "learning_rate": 1.0995189410779489e-06, "loss": 0.0013, "step": 49280 }, { "epoch": 0.8065123128528184, "grad_norm": 0.327907268720022, "learning_rate": 1.0977327953075544e-06, "loss": 0.0019, "step": 49290 }, { "epoch": 0.8066759388038943, "grad_norm": 0.21949316599751556, "learning_rate": 1.0959479225828234e-06, "loss": 0.0022, "step": 49300 }, { "epoch": 0.8068395647549701, "grad_norm": 0.26978019970528955, "learning_rate": 1.0941643234860422e-06, "loss": 0.0046, "step": 49310 }, { "epoch": 0.807003190706046, "grad_norm": 0.07665381958896401, "learning_rate": 1.0923819985990768e-06, "loss": 0.0021, "step": 49320 }, { "epoch": 0.8071668166571219, "grad_norm": 0.2542194490507373, "learning_rate": 1.0906009485033786e-06, "loss": 0.0014, "step": 49330 }, { "epoch": 0.8073304426081976, "grad_norm": 0.05549457868175632, "learning_rate": 1.0888211737799847e-06, "loss": 0.0013, "step": 49340 }, { "epoch": 0.8074940685592735, "grad_norm": 0.166608547736739, "learning_rate": 1.0870426750095148e-06, "loss": 0.0018, "step": 49350 }, { "epoch": 0.8076576945103493, "grad_norm": 0.3178274859363724, "learning_rate": 1.0852654527721712e-06, "loss": 0.0026, "step": 49360 }, { "epoch": 0.8078213204614252, "grad_norm": 0.06130743603463829, "learning_rate": 1.0834895076477437e-06, "loss": 0.0015, "step": 49370 }, { "epoch": 0.8079849464125011, "grad_norm": 0.010331530406221326, "learning_rate": 1.0817148402156019e-06, "loss": 0.0012, "step": 49380 }, { "epoch": 0.8081485723635768, "grad_norm": 0.04341121348287595, "learning_rate": 1.0799414510547003e-06, "loss": 0.0018, "step": 49390 }, { "epoch": 0.8083121983146527, "grad_norm": 0.07123786874704853, "learning_rate": 1.0781693407435745e-06, "loss": 0.0021, "step": 49400 }, { "epoch": 0.8084758242657285, "grad_norm": 0.2624504874115645, "learning_rate": 1.0763985098603447e-06, "loss": 0.0025, "step": 49410 }, { "epoch": 0.8086394502168044, "grad_norm": 0.2501878316786769, "learning_rate": 1.074628958982713e-06, "loss": 0.0021, "step": 49420 }, { "epoch": 0.8088030761678803, "grad_norm": 0.14268568534069873, "learning_rate": 1.0728606886879634e-06, "loss": 0.0026, "step": 49430 }, { "epoch": 0.808966702118956, "grad_norm": 0.06297368975380918, "learning_rate": 1.0710936995529646e-06, "loss": 0.0007, "step": 49440 }, { "epoch": 0.8091303280700319, "grad_norm": 0.11489188480460669, "learning_rate": 1.0693279921541665e-06, "loss": 0.0022, "step": 49450 }, { "epoch": 0.8092939540211077, "grad_norm": 0.1829565295745005, "learning_rate": 1.0675635670675954e-06, "loss": 0.0018, "step": 49460 }, { "epoch": 0.8094575799721836, "grad_norm": 0.1268995703237754, "learning_rate": 1.0658004248688674e-06, "loss": 0.0014, "step": 49470 }, { "epoch": 0.8096212059232595, "grad_norm": 0.06750752138816665, "learning_rate": 1.0640385661331758e-06, "loss": 0.0015, "step": 49480 }, { "epoch": 0.8097848318743353, "grad_norm": 0.0590578051088296, "learning_rate": 1.0622779914352943e-06, "loss": 0.0014, "step": 49490 }, { "epoch": 0.8099484578254111, "grad_norm": 0.15650020542304577, "learning_rate": 1.0605187013495815e-06, "loss": 0.0029, "step": 49500 }, { "epoch": 0.8101120837764869, "grad_norm": 0.19558107522012827, "learning_rate": 1.0587606964499753e-06, "loss": 0.0024, "step": 49510 }, { "epoch": 0.8102757097275628, "grad_norm": 0.10335553112846346, "learning_rate": 1.0570039773099894e-06, "loss": 0.0016, "step": 49520 }, { "epoch": 0.8104393356786387, "grad_norm": 0.02449399370254586, "learning_rate": 1.0552485445027261e-06, "loss": 0.003, "step": 49530 }, { "epoch": 0.8106029616297145, "grad_norm": 0.1363349693400131, "learning_rate": 1.0534943986008633e-06, "loss": 0.0027, "step": 49540 }, { "epoch": 0.8107665875807903, "grad_norm": 0.10317328739452923, "learning_rate": 1.051741540176659e-06, "loss": 0.0018, "step": 49550 }, { "epoch": 0.8109302135318661, "grad_norm": 0.09270617389713492, "learning_rate": 1.049989969801956e-06, "loss": 0.0018, "step": 49560 }, { "epoch": 0.811093839482942, "grad_norm": 0.05002425077595298, "learning_rate": 1.0482396880481692e-06, "loss": 0.0018, "step": 49570 }, { "epoch": 0.8112574654340179, "grad_norm": 0.0860051333214948, "learning_rate": 1.046490695486297e-06, "loss": 0.0018, "step": 49580 }, { "epoch": 0.8114210913850937, "grad_norm": 0.21056914898429296, "learning_rate": 1.0447429926869196e-06, "loss": 0.0029, "step": 49590 }, { "epoch": 0.8115847173361695, "grad_norm": 0.05975768799229619, "learning_rate": 1.0429965802201935e-06, "loss": 0.001, "step": 49600 }, { "epoch": 0.8117483432872453, "grad_norm": 0.04347185584560614, "learning_rate": 1.0412514586558531e-06, "loss": 0.002, "step": 49610 }, { "epoch": 0.8119119692383212, "grad_norm": 0.04928992122041644, "learning_rate": 1.0395076285632177e-06, "loss": 0.0026, "step": 49620 }, { "epoch": 0.8120755951893971, "grad_norm": 0.03852364332709138, "learning_rate": 1.0377650905111764e-06, "loss": 0.0019, "step": 49630 }, { "epoch": 0.8122392211404729, "grad_norm": 0.02202195388414747, "learning_rate": 1.0360238450682026e-06, "loss": 0.0029, "step": 49640 }, { "epoch": 0.8124028470915488, "grad_norm": 0.07055682156013596, "learning_rate": 1.0342838928023486e-06, "loss": 0.0017, "step": 49650 }, { "epoch": 0.8125664730426245, "grad_norm": 0.041976855526547, "learning_rate": 1.0325452342812414e-06, "loss": 0.0028, "step": 49660 }, { "epoch": 0.8127300989937004, "grad_norm": 0.056436283705117885, "learning_rate": 1.0308078700720875e-06, "loss": 0.0015, "step": 49670 }, { "epoch": 0.8128937249447763, "grad_norm": 0.14382317879234516, "learning_rate": 1.029071800741674e-06, "loss": 0.0018, "step": 49680 }, { "epoch": 0.8130573508958521, "grad_norm": 0.11519219643693086, "learning_rate": 1.0273370268563598e-06, "loss": 0.0011, "step": 49690 }, { "epoch": 0.813220976846928, "grad_norm": 0.11587177410544204, "learning_rate": 1.0256035489820836e-06, "loss": 0.0017, "step": 49700 }, { "epoch": 0.8133846027980037, "grad_norm": 0.03465703032699276, "learning_rate": 1.0238713676843648e-06, "loss": 0.0019, "step": 49710 }, { "epoch": 0.8135482287490796, "grad_norm": 0.23581928375421, "learning_rate": 1.0221404835282956e-06, "loss": 0.0021, "step": 49720 }, { "epoch": 0.8137118547001554, "grad_norm": 0.13247542666398174, "learning_rate": 1.0204108970785458e-06, "loss": 0.0033, "step": 49730 }, { "epoch": 0.8138754806512313, "grad_norm": 0.0904144560927875, "learning_rate": 1.0186826088993634e-06, "loss": 0.0018, "step": 49740 }, { "epoch": 0.8140391066023072, "grad_norm": 0.11022979830028298, "learning_rate": 1.016955619554571e-06, "loss": 0.0014, "step": 49750 }, { "epoch": 0.8142027325533829, "grad_norm": 0.28995232148683214, "learning_rate": 1.0152299296075686e-06, "loss": 0.0031, "step": 49760 }, { "epoch": 0.8143663585044588, "grad_norm": 0.061515727976063445, "learning_rate": 1.0135055396213306e-06, "loss": 0.0012, "step": 49770 }, { "epoch": 0.8145299844555346, "grad_norm": 0.17838213642965908, "learning_rate": 1.0117824501584117e-06, "loss": 0.0027, "step": 49780 }, { "epoch": 0.8146936104066105, "grad_norm": 0.0865376384716027, "learning_rate": 1.0100606617809383e-06, "loss": 0.0024, "step": 49790 }, { "epoch": 0.8148572363576864, "grad_norm": 0.129675101660701, "learning_rate": 1.0083401750506122e-06, "loss": 0.0015, "step": 49800 }, { "epoch": 0.8150208623087621, "grad_norm": 0.04522226685261948, "learning_rate": 1.0066209905287132e-06, "loss": 0.0017, "step": 49810 }, { "epoch": 0.815184488259838, "grad_norm": 0.05308396513483676, "learning_rate": 1.004903108776094e-06, "loss": 0.004, "step": 49820 }, { "epoch": 0.8153481142109138, "grad_norm": 0.11263181848090847, "learning_rate": 1.0031865303531819e-06, "loss": 0.0023, "step": 49830 }, { "epoch": 0.8155117401619897, "grad_norm": 0.08759125851902075, "learning_rate": 1.001471255819983e-06, "loss": 0.0016, "step": 49840 }, { "epoch": 0.8156753661130656, "grad_norm": 0.1293976357907533, "learning_rate": 9.997572857360739e-07, "loss": 0.002, "step": 49850 }, { "epoch": 0.8158389920641413, "grad_norm": 0.17874225970660113, "learning_rate": 9.98044620660607e-07, "loss": 0.0048, "step": 49860 }, { "epoch": 0.8160026180152172, "grad_norm": 0.08490026081882147, "learning_rate": 9.963332611523096e-07, "loss": 0.0019, "step": 49870 }, { "epoch": 0.816166243966293, "grad_norm": 0.06825394309634786, "learning_rate": 9.946232077694818e-07, "loss": 0.0021, "step": 49880 }, { "epoch": 0.8163298699173689, "grad_norm": 0.08619646365586696, "learning_rate": 9.92914461069997e-07, "loss": 0.0025, "step": 49890 }, { "epoch": 0.8164934958684448, "grad_norm": 0.11573814133465485, "learning_rate": 9.912070216113063e-07, "loss": 0.0028, "step": 49900 }, { "epoch": 0.8166571218195205, "grad_norm": 0.08048732632430432, "learning_rate": 9.895008899504304e-07, "loss": 0.0032, "step": 49910 }, { "epoch": 0.8168207477705964, "grad_norm": 0.04886100114918426, "learning_rate": 9.877960666439646e-07, "loss": 0.0023, "step": 49920 }, { "epoch": 0.8169843737216722, "grad_norm": 0.06809259882783376, "learning_rate": 9.86092552248077e-07, "loss": 0.0015, "step": 49930 }, { "epoch": 0.8171479996727481, "grad_norm": 0.23727560381540982, "learning_rate": 9.843903473185097e-07, "loss": 0.0013, "step": 49940 }, { "epoch": 0.817311625623824, "grad_norm": 0.07050594527448661, "learning_rate": 9.82689452410575e-07, "loss": 0.0028, "step": 49950 }, { "epoch": 0.8174752515748998, "grad_norm": 0.11806638591375535, "learning_rate": 9.80989868079163e-07, "loss": 0.0019, "step": 49960 }, { "epoch": 0.8176388775259756, "grad_norm": 0.034993116396804566, "learning_rate": 9.792915948787313e-07, "loss": 0.0014, "step": 49970 }, { "epoch": 0.8178025034770514, "grad_norm": 0.2980401910651879, "learning_rate": 9.775946333633114e-07, "loss": 0.0012, "step": 49980 }, { "epoch": 0.8179661294281273, "grad_norm": 0.0583978725264821, "learning_rate": 9.758989840865074e-07, "loss": 0.0026, "step": 49990 }, { "epoch": 0.8181297553792032, "grad_norm": 0.05977471028561325, "learning_rate": 9.742046476014943e-07, "loss": 0.0017, "step": 50000 }, { "epoch": 0.8181297553792032, "eval_loss": 0.0015543417539447546, "eval_runtime": 7.5271, "eval_samples_per_second": 26.571, "eval_steps_per_second": 6.643, "step": 50000 }, { "epoch": 0.818293381330279, "grad_norm": 0.2413835624202922, "learning_rate": 9.725116244610189e-07, "loss": 0.0028, "step": 50010 }, { "epoch": 0.8184570072813548, "grad_norm": 0.1578326168667706, "learning_rate": 9.708199152174025e-07, "loss": 0.0024, "step": 50020 }, { "epoch": 0.8186206332324306, "grad_norm": 0.18755369335015012, "learning_rate": 9.691295204225332e-07, "loss": 0.0027, "step": 50030 }, { "epoch": 0.8187842591835065, "grad_norm": 0.09001391610301843, "learning_rate": 9.674404406278731e-07, "loss": 0.0023, "step": 50040 }, { "epoch": 0.8189478851345824, "grad_norm": 0.176816353102448, "learning_rate": 9.65752676384455e-07, "loss": 0.0019, "step": 50050 }, { "epoch": 0.8191115110856582, "grad_norm": 0.016845614263576194, "learning_rate": 9.640662282428815e-07, "loss": 0.002, "step": 50060 }, { "epoch": 0.819275137036734, "grad_norm": 0.07352834614778095, "learning_rate": 9.623810967533258e-07, "loss": 0.0018, "step": 50070 }, { "epoch": 0.8194387629878098, "grad_norm": 0.05401263593021629, "learning_rate": 9.606972824655325e-07, "loss": 0.0018, "step": 50080 }, { "epoch": 0.8196023889388857, "grad_norm": 0.17464752112973664, "learning_rate": 9.59014785928818e-07, "loss": 0.0028, "step": 50090 }, { "epoch": 0.8197660148899616, "grad_norm": 0.2209633555436409, "learning_rate": 9.57333607692066e-07, "loss": 0.0026, "step": 50100 }, { "epoch": 0.8199296408410374, "grad_norm": 0.1290585879361367, "learning_rate": 9.55653748303731e-07, "loss": 0.0024, "step": 50110 }, { "epoch": 0.8200932667921133, "grad_norm": 0.07405520690057404, "learning_rate": 9.53975208311837e-07, "loss": 0.0014, "step": 50120 }, { "epoch": 0.820256892743189, "grad_norm": 0.18147721133148767, "learning_rate": 9.522979882639787e-07, "loss": 0.0032, "step": 50130 }, { "epoch": 0.8204205186942649, "grad_norm": 0.1819848452900205, "learning_rate": 9.506220887073181e-07, "loss": 0.0026, "step": 50140 }, { "epoch": 0.8205841446453408, "grad_norm": 0.10807158000811144, "learning_rate": 9.489475101885898e-07, "loss": 0.0016, "step": 50150 }, { "epoch": 0.8207477705964166, "grad_norm": 0.14902551968406735, "learning_rate": 9.472742532540946e-07, "loss": 0.0018, "step": 50160 }, { "epoch": 0.8209113965474925, "grad_norm": 0.034174935035031044, "learning_rate": 9.456023184497026e-07, "loss": 0.0019, "step": 50170 }, { "epoch": 0.8210750224985682, "grad_norm": 0.10284340835244944, "learning_rate": 9.439317063208536e-07, "loss": 0.0023, "step": 50180 }, { "epoch": 0.8212386484496441, "grad_norm": 0.15983382150203546, "learning_rate": 9.42262417412555e-07, "loss": 0.0018, "step": 50190 }, { "epoch": 0.82140227440072, "grad_norm": 0.09381858035963471, "learning_rate": 9.405944522693816e-07, "loss": 0.003, "step": 50200 }, { "epoch": 0.8215659003517958, "grad_norm": 0.21904266266915087, "learning_rate": 9.389278114354794e-07, "loss": 0.0017, "step": 50210 }, { "epoch": 0.8217295263028717, "grad_norm": 0.14840739811293743, "learning_rate": 9.3726249545456e-07, "loss": 0.0019, "step": 50220 }, { "epoch": 0.8218931522539474, "grad_norm": 0.12881060104468983, "learning_rate": 9.355985048699029e-07, "loss": 0.0019, "step": 50230 }, { "epoch": 0.8220567782050233, "grad_norm": 0.13188153686497928, "learning_rate": 9.339358402243559e-07, "loss": 0.0019, "step": 50240 }, { "epoch": 0.8222204041560992, "grad_norm": 0.09848077177804467, "learning_rate": 9.32274502060334e-07, "loss": 0.0029, "step": 50250 }, { "epoch": 0.822384030107175, "grad_norm": 0.13853652774023031, "learning_rate": 9.306144909198173e-07, "loss": 0.0014, "step": 50260 }, { "epoch": 0.8225476560582509, "grad_norm": 0.06852128138828285, "learning_rate": 9.289558073443588e-07, "loss": 0.0019, "step": 50270 }, { "epoch": 0.8227112820093266, "grad_norm": 0.08092590263386222, "learning_rate": 9.272984518750722e-07, "loss": 0.0017, "step": 50280 }, { "epoch": 0.8228749079604025, "grad_norm": 0.03663578751872039, "learning_rate": 9.256424250526413e-07, "loss": 0.0014, "step": 50290 }, { "epoch": 0.8230385339114784, "grad_norm": 0.11615936047811047, "learning_rate": 9.239877274173154e-07, "loss": 0.0021, "step": 50300 }, { "epoch": 0.8232021598625542, "grad_norm": 0.05123251220931151, "learning_rate": 9.223343595089102e-07, "loss": 0.0017, "step": 50310 }, { "epoch": 0.8233657858136301, "grad_norm": 0.15836925225373494, "learning_rate": 9.206823218668065e-07, "loss": 0.0031, "step": 50320 }, { "epoch": 0.8235294117647058, "grad_norm": 0.14603176717438798, "learning_rate": 9.190316150299549e-07, "loss": 0.0021, "step": 50330 }, { "epoch": 0.8236930377157817, "grad_norm": 0.2801561075796078, "learning_rate": 9.173822395368682e-07, "loss": 0.0017, "step": 50340 }, { "epoch": 0.8238566636668576, "grad_norm": 0.1514760749592684, "learning_rate": 9.157341959256255e-07, "loss": 0.0047, "step": 50350 }, { "epoch": 0.8240202896179334, "grad_norm": 0.28785075763690343, "learning_rate": 9.140874847338726e-07, "loss": 0.002, "step": 50360 }, { "epoch": 0.8241839155690093, "grad_norm": 0.09648953641175018, "learning_rate": 9.124421064988193e-07, "loss": 0.0012, "step": 50370 }, { "epoch": 0.824347541520085, "grad_norm": 0.0986778610424153, "learning_rate": 9.107980617572398e-07, "loss": 0.0008, "step": 50380 }, { "epoch": 0.8245111674711609, "grad_norm": 0.052513689326658876, "learning_rate": 9.091553510454781e-07, "loss": 0.0015, "step": 50390 }, { "epoch": 0.8246747934222368, "grad_norm": 0.1567370836669226, "learning_rate": 9.075139748994371e-07, "loss": 0.0019, "step": 50400 }, { "epoch": 0.8248384193733126, "grad_norm": 0.2179903254338755, "learning_rate": 9.058739338545886e-07, "loss": 0.0019, "step": 50410 }, { "epoch": 0.8250020453243885, "grad_norm": 0.06885349749541883, "learning_rate": 9.042352284459632e-07, "loss": 0.002, "step": 50420 }, { "epoch": 0.8251656712754643, "grad_norm": 0.026752320460924166, "learning_rate": 9.025978592081635e-07, "loss": 0.0019, "step": 50430 }, { "epoch": 0.8253292972265401, "grad_norm": 0.22818447017191723, "learning_rate": 9.009618266753506e-07, "loss": 0.0022, "step": 50440 }, { "epoch": 0.825492923177616, "grad_norm": 0.06606107578756397, "learning_rate": 8.993271313812501e-07, "loss": 0.0012, "step": 50450 }, { "epoch": 0.8256565491286918, "grad_norm": 0.07415603453216045, "learning_rate": 8.976937738591551e-07, "loss": 0.0015, "step": 50460 }, { "epoch": 0.8258201750797677, "grad_norm": 0.1409778977604642, "learning_rate": 8.960617546419192e-07, "loss": 0.0032, "step": 50470 }, { "epoch": 0.8259838010308435, "grad_norm": 0.16307468584882612, "learning_rate": 8.944310742619561e-07, "loss": 0.0019, "step": 50480 }, { "epoch": 0.8261474269819193, "grad_norm": 0.35255933462664824, "learning_rate": 8.928017332512506e-07, "loss": 0.0022, "step": 50490 }, { "epoch": 0.8263110529329952, "grad_norm": 0.04905779175011822, "learning_rate": 8.91173732141345e-07, "loss": 0.003, "step": 50500 }, { "epoch": 0.826474678884071, "grad_norm": 0.21010202412904783, "learning_rate": 8.895470714633442e-07, "loss": 0.0025, "step": 50510 }, { "epoch": 0.8266383048351469, "grad_norm": 0.04929280082834834, "learning_rate": 8.87921751747921e-07, "loss": 0.002, "step": 50520 }, { "epoch": 0.8268019307862227, "grad_norm": 0.16293924627340495, "learning_rate": 8.862977735253059e-07, "loss": 0.002, "step": 50530 }, { "epoch": 0.8269655567372985, "grad_norm": 0.12614630476395672, "learning_rate": 8.846751373252899e-07, "loss": 0.002, "step": 50540 }, { "epoch": 0.8271291826883744, "grad_norm": 0.056843479618960586, "learning_rate": 8.830538436772335e-07, "loss": 0.0017, "step": 50550 }, { "epoch": 0.8272928086394502, "grad_norm": 0.10827381267991691, "learning_rate": 8.814338931100536e-07, "loss": 0.0022, "step": 50560 }, { "epoch": 0.8274564345905261, "grad_norm": 0.04587476460158164, "learning_rate": 8.798152861522291e-07, "loss": 0.0014, "step": 50570 }, { "epoch": 0.8276200605416019, "grad_norm": 0.09398186214182185, "learning_rate": 8.781980233318038e-07, "loss": 0.0013, "step": 50580 }, { "epoch": 0.8277836864926778, "grad_norm": 0.08724773974785589, "learning_rate": 8.765821051763823e-07, "loss": 0.0017, "step": 50590 }, { "epoch": 0.8279473124437535, "grad_norm": 0.05999215074593843, "learning_rate": 8.749675322131246e-07, "loss": 0.0014, "step": 50600 }, { "epoch": 0.8281109383948294, "grad_norm": 0.09769846021230107, "learning_rate": 8.733543049687604e-07, "loss": 0.0014, "step": 50610 }, { "epoch": 0.8282745643459053, "grad_norm": 0.13252293646277716, "learning_rate": 8.71742423969576e-07, "loss": 0.0022, "step": 50620 }, { "epoch": 0.8284381902969811, "grad_norm": 0.03554623628157254, "learning_rate": 8.701318897414174e-07, "loss": 0.0017, "step": 50630 }, { "epoch": 0.828601816248057, "grad_norm": 0.1463673548574833, "learning_rate": 8.685227028096954e-07, "loss": 0.0022, "step": 50640 }, { "epoch": 0.8287654421991327, "grad_norm": 0.03943670502028477, "learning_rate": 8.669148636993785e-07, "loss": 0.0019, "step": 50650 }, { "epoch": 0.8289290681502086, "grad_norm": 0.2869232369504291, "learning_rate": 8.653083729349926e-07, "loss": 0.0027, "step": 50660 }, { "epoch": 0.8290926941012845, "grad_norm": 0.04908878257764215, "learning_rate": 8.637032310406301e-07, "loss": 0.0017, "step": 50670 }, { "epoch": 0.8292563200523603, "grad_norm": 0.11119104493211474, "learning_rate": 8.62099438539939e-07, "loss": 0.0019, "step": 50680 }, { "epoch": 0.8294199460034362, "grad_norm": 0.31403290954339175, "learning_rate": 8.604969959561277e-07, "loss": 0.0016, "step": 50690 }, { "epoch": 0.8295835719545119, "grad_norm": 0.18905018602389537, "learning_rate": 8.588959038119665e-07, "loss": 0.0027, "step": 50700 }, { "epoch": 0.8297471979055878, "grad_norm": 0.06544412546012782, "learning_rate": 8.572961626297832e-07, "loss": 0.0018, "step": 50710 }, { "epoch": 0.8299108238566637, "grad_norm": 0.1552052366482621, "learning_rate": 8.556977729314636e-07, "loss": 0.0027, "step": 50720 }, { "epoch": 0.8300744498077395, "grad_norm": 0.034206068773142924, "learning_rate": 8.541007352384528e-07, "loss": 0.0009, "step": 50730 }, { "epoch": 0.8302380757588154, "grad_norm": 0.14659575755679302, "learning_rate": 8.525050500717597e-07, "loss": 0.0018, "step": 50740 }, { "epoch": 0.8304017017098911, "grad_norm": 0.40649534229681034, "learning_rate": 8.509107179519465e-07, "loss": 0.002, "step": 50750 }, { "epoch": 0.830565327660967, "grad_norm": 0.060005238814698304, "learning_rate": 8.493177393991353e-07, "loss": 0.0023, "step": 50760 }, { "epoch": 0.8307289536120429, "grad_norm": 0.3410742717349367, "learning_rate": 8.477261149330096e-07, "loss": 0.0019, "step": 50770 }, { "epoch": 0.8308925795631187, "grad_norm": 0.2636283248786855, "learning_rate": 8.461358450728063e-07, "loss": 0.0019, "step": 50780 }, { "epoch": 0.8310562055141946, "grad_norm": 0.16422577700578947, "learning_rate": 8.445469303373222e-07, "loss": 0.0027, "step": 50790 }, { "epoch": 0.8312198314652703, "grad_norm": 0.2892102437909206, "learning_rate": 8.429593712449153e-07, "loss": 0.0027, "step": 50800 }, { "epoch": 0.8313834574163462, "grad_norm": 0.021395587389908512, "learning_rate": 8.413731683134968e-07, "loss": 0.004, "step": 50810 }, { "epoch": 0.8315470833674221, "grad_norm": 0.14529101152450363, "learning_rate": 8.397883220605385e-07, "loss": 0.0034, "step": 50820 }, { "epoch": 0.8317107093184979, "grad_norm": 0.07154327415000333, "learning_rate": 8.382048330030679e-07, "loss": 0.0011, "step": 50830 }, { "epoch": 0.8318743352695738, "grad_norm": 0.12196579815944476, "learning_rate": 8.3662270165767e-07, "loss": 0.0028, "step": 50840 }, { "epoch": 0.8320379612206495, "grad_norm": 0.10066322728156621, "learning_rate": 8.350419285404865e-07, "loss": 0.0019, "step": 50850 }, { "epoch": 0.8322015871717254, "grad_norm": 0.22409745004352574, "learning_rate": 8.334625141672187e-07, "loss": 0.0017, "step": 50860 }, { "epoch": 0.8323652131228013, "grad_norm": 0.11860832310852683, "learning_rate": 8.318844590531217e-07, "loss": 0.0034, "step": 50870 }, { "epoch": 0.8325288390738771, "grad_norm": 0.1651348359192965, "learning_rate": 8.30307763713008e-07, "loss": 0.0023, "step": 50880 }, { "epoch": 0.832692465024953, "grad_norm": 0.4416311078822497, "learning_rate": 8.287324286612463e-07, "loss": 0.0019, "step": 50890 }, { "epoch": 0.8328560909760288, "grad_norm": 0.10979841570852097, "learning_rate": 8.271584544117617e-07, "loss": 0.0021, "step": 50900 }, { "epoch": 0.8330197169271046, "grad_norm": 0.06759634631166857, "learning_rate": 8.255858414780349e-07, "loss": 0.0029, "step": 50910 }, { "epoch": 0.8331833428781805, "grad_norm": 0.18493834740235032, "learning_rate": 8.24014590373105e-07, "loss": 0.0014, "step": 50920 }, { "epoch": 0.8333469688292563, "grad_norm": 0.08861719209062553, "learning_rate": 8.224447016095638e-07, "loss": 0.002, "step": 50930 }, { "epoch": 0.8335105947803322, "grad_norm": 0.04343136704379043, "learning_rate": 8.208761756995598e-07, "loss": 0.0025, "step": 50940 }, { "epoch": 0.833674220731408, "grad_norm": 0.29287910843299075, "learning_rate": 8.193090131547971e-07, "loss": 0.0029, "step": 50950 }, { "epoch": 0.8338378466824838, "grad_norm": 0.15949785879001482, "learning_rate": 8.177432144865338e-07, "loss": 0.0024, "step": 50960 }, { "epoch": 0.8340014726335597, "grad_norm": 0.03290814485614608, "learning_rate": 8.161787802055837e-07, "loss": 0.0027, "step": 50970 }, { "epoch": 0.8341650985846355, "grad_norm": 0.20128565805810664, "learning_rate": 8.146157108223179e-07, "loss": 0.0013, "step": 50980 }, { "epoch": 0.8343287245357114, "grad_norm": 0.2424816390645313, "learning_rate": 8.130540068466597e-07, "loss": 0.0034, "step": 50990 }, { "epoch": 0.8344923504867872, "grad_norm": 0.026862225545815525, "learning_rate": 8.114936687880859e-07, "loss": 0.0025, "step": 51000 }, { "epoch": 0.834655976437863, "grad_norm": 0.13147698999489818, "learning_rate": 8.099346971556305e-07, "loss": 0.0023, "step": 51010 }, { "epoch": 0.8348196023889389, "grad_norm": 0.27115844335841505, "learning_rate": 8.083770924578798e-07, "loss": 0.0041, "step": 51020 }, { "epoch": 0.8349832283400147, "grad_norm": 0.13099361045857733, "learning_rate": 8.068208552029739e-07, "loss": 0.0029, "step": 51030 }, { "epoch": 0.8351468542910906, "grad_norm": 0.09446867495792653, "learning_rate": 8.052659858986101e-07, "loss": 0.0011, "step": 51040 }, { "epoch": 0.8353104802421664, "grad_norm": 0.08345202968882982, "learning_rate": 8.03712485052035e-07, "loss": 0.0015, "step": 51050 }, { "epoch": 0.8354741061932422, "grad_norm": 0.16706391686782135, "learning_rate": 8.021603531700517e-07, "loss": 0.002, "step": 51060 }, { "epoch": 0.8356377321443181, "grad_norm": 0.4072252204280583, "learning_rate": 8.006095907590156e-07, "loss": 0.0024, "step": 51070 }, { "epoch": 0.8358013580953939, "grad_norm": 0.08938189021570003, "learning_rate": 7.990601983248353e-07, "loss": 0.0018, "step": 51080 }, { "epoch": 0.8359649840464698, "grad_norm": 0.07525252949969802, "learning_rate": 7.975121763729732e-07, "loss": 0.0021, "step": 51090 }, { "epoch": 0.8361286099975456, "grad_norm": 0.04587479196833858, "learning_rate": 7.95965525408442e-07, "loss": 0.0012, "step": 51100 }, { "epoch": 0.8362922359486215, "grad_norm": 0.24156113948710264, "learning_rate": 7.944202459358125e-07, "loss": 0.0031, "step": 51110 }, { "epoch": 0.8364558618996973, "grad_norm": 0.17386140419697935, "learning_rate": 7.928763384592025e-07, "loss": 0.002, "step": 51120 }, { "epoch": 0.8366194878507731, "grad_norm": 0.19379535484109214, "learning_rate": 7.913338034822859e-07, "loss": 0.0031, "step": 51130 }, { "epoch": 0.836783113801849, "grad_norm": 0.24590074715695087, "learning_rate": 7.897926415082863e-07, "loss": 0.0028, "step": 51140 }, { "epoch": 0.8369467397529248, "grad_norm": 0.09136406481245382, "learning_rate": 7.882528530399813e-07, "loss": 0.0026, "step": 51150 }, { "epoch": 0.8371103657040007, "grad_norm": 0.0816631799044623, "learning_rate": 7.867144385796982e-07, "loss": 0.001, "step": 51160 }, { "epoch": 0.8372739916550765, "grad_norm": 0.1345884689992293, "learning_rate": 7.851773986293198e-07, "loss": 0.002, "step": 51170 }, { "epoch": 0.8374376176061523, "grad_norm": 0.15480442618999135, "learning_rate": 7.836417336902769e-07, "loss": 0.003, "step": 51180 }, { "epoch": 0.8376012435572282, "grad_norm": 0.052260353636827746, "learning_rate": 7.821074442635535e-07, "loss": 0.003, "step": 51190 }, { "epoch": 0.837764869508304, "grad_norm": 0.03771533629645811, "learning_rate": 7.805745308496837e-07, "loss": 0.0019, "step": 51200 }, { "epoch": 0.8379284954593799, "grad_norm": 0.3151183532356463, "learning_rate": 7.790429939487543e-07, "loss": 0.0023, "step": 51210 }, { "epoch": 0.8380921214104557, "grad_norm": 0.16476186967728582, "learning_rate": 7.775128340604005e-07, "loss": 0.0014, "step": 51220 }, { "epoch": 0.8382557473615315, "grad_norm": 0.04805967136827938, "learning_rate": 7.759840516838113e-07, "loss": 0.002, "step": 51230 }, { "epoch": 0.8384193733126074, "grad_norm": 0.14837128760492765, "learning_rate": 7.744566473177256e-07, "loss": 0.0032, "step": 51240 }, { "epoch": 0.8385829992636832, "grad_norm": 0.08214173793318282, "learning_rate": 7.729306214604304e-07, "loss": 0.0019, "step": 51250 }, { "epoch": 0.8387466252147591, "grad_norm": 0.18382026524054587, "learning_rate": 7.714059746097658e-07, "loss": 0.0022, "step": 51260 }, { "epoch": 0.838910251165835, "grad_norm": 0.037459655610016435, "learning_rate": 7.698827072631204e-07, "loss": 0.0026, "step": 51270 }, { "epoch": 0.8390738771169107, "grad_norm": 0.1136281649777543, "learning_rate": 7.68360819917432e-07, "loss": 0.002, "step": 51280 }, { "epoch": 0.8392375030679866, "grad_norm": 0.058479074561873776, "learning_rate": 7.66840313069192e-07, "loss": 0.002, "step": 51290 }, { "epoch": 0.8394011290190624, "grad_norm": 0.2771937587096952, "learning_rate": 7.653211872144373e-07, "loss": 0.0018, "step": 51300 }, { "epoch": 0.8395647549701383, "grad_norm": 0.2433673238993186, "learning_rate": 7.638034428487562e-07, "loss": 0.0021, "step": 51310 }, { "epoch": 0.8397283809212142, "grad_norm": 0.09490281069297671, "learning_rate": 7.622870804672856e-07, "loss": 0.0015, "step": 51320 }, { "epoch": 0.8398920068722899, "grad_norm": 0.1417323080183743, "learning_rate": 7.607721005647123e-07, "loss": 0.0013, "step": 51330 }, { "epoch": 0.8400556328233658, "grad_norm": 0.15363848675551178, "learning_rate": 7.592585036352707e-07, "loss": 0.0021, "step": 51340 }, { "epoch": 0.8402192587744416, "grad_norm": 0.14441305539114285, "learning_rate": 7.577462901727467e-07, "loss": 0.0029, "step": 51350 }, { "epoch": 0.8403828847255175, "grad_norm": 0.1527856647553139, "learning_rate": 7.562354606704725e-07, "loss": 0.0017, "step": 51360 }, { "epoch": 0.8405465106765934, "grad_norm": 0.11497204405397646, "learning_rate": 7.547260156213299e-07, "loss": 0.0015, "step": 51370 }, { "epoch": 0.8407101366276691, "grad_norm": 0.17506635507093313, "learning_rate": 7.532179555177477e-07, "loss": 0.0026, "step": 51380 }, { "epoch": 0.840873762578745, "grad_norm": 0.015149470299458822, "learning_rate": 7.517112808517041e-07, "loss": 0.0013, "step": 51390 }, { "epoch": 0.8410373885298208, "grad_norm": 0.04639418222344294, "learning_rate": 7.502059921147254e-07, "loss": 0.002, "step": 51400 }, { "epoch": 0.8412010144808967, "grad_norm": 0.07515616992256932, "learning_rate": 7.487020897978842e-07, "loss": 0.0016, "step": 51410 }, { "epoch": 0.8413646404319726, "grad_norm": 0.07860643601847318, "learning_rate": 7.471995743918048e-07, "loss": 0.0016, "step": 51420 }, { "epoch": 0.8415282663830483, "grad_norm": 0.20559525098616413, "learning_rate": 7.45698446386654e-07, "loss": 0.0027, "step": 51430 }, { "epoch": 0.8416918923341242, "grad_norm": 0.07322614402814903, "learning_rate": 7.44198706272149e-07, "loss": 0.0016, "step": 51440 }, { "epoch": 0.8418555182852, "grad_norm": 0.09639931046323989, "learning_rate": 7.427003545375533e-07, "loss": 0.0014, "step": 51450 }, { "epoch": 0.8420191442362759, "grad_norm": 0.07688377279583636, "learning_rate": 7.412033916716777e-07, "loss": 0.0017, "step": 51460 }, { "epoch": 0.8421827701873517, "grad_norm": 0.1581010876398973, "learning_rate": 7.397078181628786e-07, "loss": 0.0026, "step": 51470 }, { "epoch": 0.8423463961384275, "grad_norm": 0.05258380599910729, "learning_rate": 7.382136344990631e-07, "loss": 0.0016, "step": 51480 }, { "epoch": 0.8425100220895034, "grad_norm": 0.25292190662449676, "learning_rate": 7.367208411676807e-07, "loss": 0.0016, "step": 51490 }, { "epoch": 0.8426736480405792, "grad_norm": 0.09837018809512779, "learning_rate": 7.35229438655729e-07, "loss": 0.0016, "step": 51500 }, { "epoch": 0.8428372739916551, "grad_norm": 0.0413775220456556, "learning_rate": 7.337394274497517e-07, "loss": 0.0023, "step": 51510 }, { "epoch": 0.8430008999427309, "grad_norm": 0.17312222856018422, "learning_rate": 7.322508080358381e-07, "loss": 0.0017, "step": 51520 }, { "epoch": 0.8431645258938067, "grad_norm": 0.14487331937591338, "learning_rate": 7.307635808996238e-07, "loss": 0.0014, "step": 51530 }, { "epoch": 0.8433281518448826, "grad_norm": 0.17394547510314537, "learning_rate": 7.292777465262918e-07, "loss": 0.0037, "step": 51540 }, { "epoch": 0.8434917777959584, "grad_norm": 0.1497988030582846, "learning_rate": 7.277933054005693e-07, "loss": 0.0019, "step": 51550 }, { "epoch": 0.8436554037470343, "grad_norm": 0.22204597193606948, "learning_rate": 7.263102580067261e-07, "loss": 0.002, "step": 51560 }, { "epoch": 0.8438190296981101, "grad_norm": 0.035145299111214376, "learning_rate": 7.24828604828583e-07, "loss": 0.0018, "step": 51570 }, { "epoch": 0.843982655649186, "grad_norm": 0.0839146223194527, "learning_rate": 7.233483463495023e-07, "loss": 0.0018, "step": 51580 }, { "epoch": 0.8441462816002618, "grad_norm": 0.1831161009926824, "learning_rate": 7.218694830523915e-07, "loss": 0.0019, "step": 51590 }, { "epoch": 0.8443099075513376, "grad_norm": 0.3867268494242546, "learning_rate": 7.203920154197047e-07, "loss": 0.003, "step": 51600 }, { "epoch": 0.8444735335024135, "grad_norm": 0.0550704573591412, "learning_rate": 7.189159439334409e-07, "loss": 0.0011, "step": 51610 }, { "epoch": 0.8446371594534893, "grad_norm": 0.14741913756905575, "learning_rate": 7.174412690751381e-07, "loss": 0.003, "step": 51620 }, { "epoch": 0.8448007854045652, "grad_norm": 0.14234583793212882, "learning_rate": 7.15967991325886e-07, "loss": 0.0016, "step": 51630 }, { "epoch": 0.844964411355641, "grad_norm": 0.03509612654027745, "learning_rate": 7.144961111663151e-07, "loss": 0.0022, "step": 51640 }, { "epoch": 0.8451280373067168, "grad_norm": 0.04156446712431083, "learning_rate": 7.130256290765991e-07, "loss": 0.0035, "step": 51650 }, { "epoch": 0.8452916632577927, "grad_norm": 0.09579251044275162, "learning_rate": 7.115565455364587e-07, "loss": 0.0025, "step": 51660 }, { "epoch": 0.8454552892088685, "grad_norm": 0.16665627384187398, "learning_rate": 7.100888610251561e-07, "loss": 0.0047, "step": 51670 }, { "epoch": 0.8456189151599444, "grad_norm": 0.12202959560555679, "learning_rate": 7.086225760214943e-07, "loss": 0.0022, "step": 51680 }, { "epoch": 0.8457825411110202, "grad_norm": 0.06183587377263898, "learning_rate": 7.071576910038269e-07, "loss": 0.0025, "step": 51690 }, { "epoch": 0.845946167062096, "grad_norm": 0.02878578287509975, "learning_rate": 7.056942064500449e-07, "loss": 0.002, "step": 51700 }, { "epoch": 0.8461097930131719, "grad_norm": 0.055006659917020535, "learning_rate": 7.042321228375842e-07, "loss": 0.0016, "step": 51710 }, { "epoch": 0.8462734189642477, "grad_norm": 0.1452951845271317, "learning_rate": 7.027714406434228e-07, "loss": 0.0016, "step": 51720 }, { "epoch": 0.8464370449153236, "grad_norm": 0.1035547797587581, "learning_rate": 7.013121603440853e-07, "loss": 0.0014, "step": 51730 }, { "epoch": 0.8466006708663995, "grad_norm": 0.35937694191909625, "learning_rate": 6.99854282415634e-07, "loss": 0.0033, "step": 51740 }, { "epoch": 0.8467642968174752, "grad_norm": 0.08032949091035617, "learning_rate": 6.983978073336745e-07, "loss": 0.0015, "step": 51750 }, { "epoch": 0.8469279227685511, "grad_norm": 0.13558943540384816, "learning_rate": 6.969427355733593e-07, "loss": 0.003, "step": 51760 }, { "epoch": 0.8470915487196269, "grad_norm": 0.18510281005243345, "learning_rate": 6.954890676093778e-07, "loss": 0.0021, "step": 51770 }, { "epoch": 0.8472551746707028, "grad_norm": 0.05198851668884105, "learning_rate": 6.940368039159628e-07, "loss": 0.0033, "step": 51780 }, { "epoch": 0.8474188006217787, "grad_norm": 0.16621986594732618, "learning_rate": 6.925859449668932e-07, "loss": 0.0016, "step": 51790 }, { "epoch": 0.8475824265728544, "grad_norm": 0.3858628864880291, "learning_rate": 6.911364912354818e-07, "loss": 0.0028, "step": 51800 }, { "epoch": 0.8477460525239303, "grad_norm": 0.10725221025586196, "learning_rate": 6.896884431945883e-07, "loss": 0.0028, "step": 51810 }, { "epoch": 0.8479096784750061, "grad_norm": 0.18871399736461378, "learning_rate": 6.882418013166148e-07, "loss": 0.0027, "step": 51820 }, { "epoch": 0.848073304426082, "grad_norm": 0.09924791660572244, "learning_rate": 6.867965660735009e-07, "loss": 0.0039, "step": 51830 }, { "epoch": 0.8482369303771579, "grad_norm": 0.23416178308576668, "learning_rate": 6.853527379367287e-07, "loss": 0.0017, "step": 51840 }, { "epoch": 0.8484005563282336, "grad_norm": 0.21107843270472454, "learning_rate": 6.839103173773249e-07, "loss": 0.0019, "step": 51850 }, { "epoch": 0.8485641822793095, "grad_norm": 0.07236215742971258, "learning_rate": 6.824693048658493e-07, "loss": 0.0022, "step": 51860 }, { "epoch": 0.8487278082303853, "grad_norm": 0.10434310910458901, "learning_rate": 6.810297008724076e-07, "loss": 0.0019, "step": 51870 }, { "epoch": 0.8488914341814612, "grad_norm": 0.31593886723942927, "learning_rate": 6.795915058666475e-07, "loss": 0.0018, "step": 51880 }, { "epoch": 0.8490550601325371, "grad_norm": 0.0794576665774269, "learning_rate": 6.781547203177535e-07, "loss": 0.002, "step": 51890 }, { "epoch": 0.8492186860836128, "grad_norm": 0.09240107521271751, "learning_rate": 6.767193446944498e-07, "loss": 0.0027, "step": 51900 }, { "epoch": 0.8493823120346887, "grad_norm": 0.014270476704582336, "learning_rate": 6.752853794650066e-07, "loss": 0.0013, "step": 51910 }, { "epoch": 0.8495459379857645, "grad_norm": 0.27294100711286085, "learning_rate": 6.738528250972254e-07, "loss": 0.003, "step": 51920 }, { "epoch": 0.8497095639368404, "grad_norm": 0.061180788937362555, "learning_rate": 6.724216820584523e-07, "loss": 0.0022, "step": 51930 }, { "epoch": 0.8498731898879163, "grad_norm": 0.04180615470625443, "learning_rate": 6.709919508155743e-07, "loss": 0.0017, "step": 51940 }, { "epoch": 0.850036815838992, "grad_norm": 0.15860955205243665, "learning_rate": 6.695636318350146e-07, "loss": 0.002, "step": 51950 }, { "epoch": 0.8502004417900679, "grad_norm": 0.17798542217976646, "learning_rate": 6.68136725582737e-07, "loss": 0.0074, "step": 51960 }, { "epoch": 0.8503640677411437, "grad_norm": 0.11193149267452511, "learning_rate": 6.66711232524247e-07, "loss": 0.0015, "step": 51970 }, { "epoch": 0.8505276936922196, "grad_norm": 0.21659725194057805, "learning_rate": 6.652871531245824e-07, "loss": 0.0025, "step": 51980 }, { "epoch": 0.8506913196432955, "grad_norm": 0.11564710038302363, "learning_rate": 6.638644878483258e-07, "loss": 0.0023, "step": 51990 }, { "epoch": 0.8508549455943712, "grad_norm": 0.07542550323447239, "learning_rate": 6.624432371595973e-07, "loss": 0.0018, "step": 52000 }, { "epoch": 0.8510185715454471, "grad_norm": 0.05280554518032804, "learning_rate": 6.61023401522054e-07, "loss": 0.0016, "step": 52010 }, { "epoch": 0.8511821974965229, "grad_norm": 0.06165095218360115, "learning_rate": 6.596049813988914e-07, "loss": 0.003, "step": 52020 }, { "epoch": 0.8513458234475988, "grad_norm": 0.13644096325430377, "learning_rate": 6.581879772528465e-07, "loss": 0.0017, "step": 52030 }, { "epoch": 0.8515094493986747, "grad_norm": 0.201970400449003, "learning_rate": 6.567723895461897e-07, "loss": 0.0027, "step": 52040 }, { "epoch": 0.8516730753497505, "grad_norm": 0.061274663988517754, "learning_rate": 6.553582187407321e-07, "loss": 0.0029, "step": 52050 }, { "epoch": 0.8518367013008263, "grad_norm": 0.10336651176071354, "learning_rate": 6.539454652978206e-07, "loss": 0.0012, "step": 52060 }, { "epoch": 0.8520003272519021, "grad_norm": 0.11743499209845157, "learning_rate": 6.525341296783433e-07, "loss": 0.0014, "step": 52070 }, { "epoch": 0.852163953202978, "grad_norm": 0.07829051609670336, "learning_rate": 6.51124212342722e-07, "loss": 0.0013, "step": 52080 }, { "epoch": 0.8523275791540539, "grad_norm": 0.05080957610540467, "learning_rate": 6.49715713750918e-07, "loss": 0.0015, "step": 52090 }, { "epoch": 0.8524912051051297, "grad_norm": 0.0908703857451172, "learning_rate": 6.48308634362429e-07, "loss": 0.0015, "step": 52100 }, { "epoch": 0.8526548310562055, "grad_norm": 0.23704966484065404, "learning_rate": 6.469029746362892e-07, "loss": 0.0014, "step": 52110 }, { "epoch": 0.8528184570072813, "grad_norm": 0.04828670876426694, "learning_rate": 6.454987350310699e-07, "loss": 0.0022, "step": 52120 }, { "epoch": 0.8529820829583572, "grad_norm": 0.10559746989920246, "learning_rate": 6.440959160048804e-07, "loss": 0.0024, "step": 52130 }, { "epoch": 0.8531457089094331, "grad_norm": 0.015323861212025834, "learning_rate": 6.426945180153659e-07, "loss": 0.0015, "step": 52140 }, { "epoch": 0.8533093348605089, "grad_norm": 0.06530436168572278, "learning_rate": 6.412945415197069e-07, "loss": 0.0022, "step": 52150 }, { "epoch": 0.8534729608115847, "grad_norm": 0.07343875600310258, "learning_rate": 6.398959869746213e-07, "loss": 0.0016, "step": 52160 }, { "epoch": 0.8536365867626605, "grad_norm": 0.21771313246539128, "learning_rate": 6.38498854836363e-07, "loss": 0.0015, "step": 52170 }, { "epoch": 0.8538002127137364, "grad_norm": 0.1402612424262812, "learning_rate": 6.371031455607196e-07, "loss": 0.0014, "step": 52180 }, { "epoch": 0.8539638386648123, "grad_norm": 0.04033303822465881, "learning_rate": 6.357088596030198e-07, "loss": 0.0011, "step": 52190 }, { "epoch": 0.8541274646158881, "grad_norm": 0.05997505408967628, "learning_rate": 6.343159974181229e-07, "loss": 0.0032, "step": 52200 }, { "epoch": 0.854291090566964, "grad_norm": 0.18372273530665725, "learning_rate": 6.329245594604261e-07, "loss": 0.0014, "step": 52210 }, { "epoch": 0.8544547165180397, "grad_norm": 0.28965353286235224, "learning_rate": 6.315345461838613e-07, "loss": 0.0031, "step": 52220 }, { "epoch": 0.8546183424691156, "grad_norm": 0.09484990909992451, "learning_rate": 6.301459580418956e-07, "loss": 0.0029, "step": 52230 }, { "epoch": 0.8547819684201915, "grad_norm": 0.016684374255821185, "learning_rate": 6.287587954875302e-07, "loss": 0.001, "step": 52240 }, { "epoch": 0.8549455943712673, "grad_norm": 0.17546523518277643, "learning_rate": 6.273730589733046e-07, "loss": 0.003, "step": 52250 }, { "epoch": 0.8551092203223432, "grad_norm": 0.08778242690724009, "learning_rate": 6.259887489512894e-07, "loss": 0.0015, "step": 52260 }, { "epoch": 0.8552728462734189, "grad_norm": 0.18069549889107744, "learning_rate": 6.246058658730919e-07, "loss": 0.0015, "step": 52270 }, { "epoch": 0.8554364722244948, "grad_norm": 0.19874591977062805, "learning_rate": 6.232244101898533e-07, "loss": 0.0016, "step": 52280 }, { "epoch": 0.8556000981755707, "grad_norm": 0.05776878507644744, "learning_rate": 6.218443823522486e-07, "loss": 0.0017, "step": 52290 }, { "epoch": 0.8557637241266465, "grad_norm": 0.02236539449108147, "learning_rate": 6.204657828104865e-07, "loss": 0.0033, "step": 52300 }, { "epoch": 0.8559273500777224, "grad_norm": 0.09153008946792006, "learning_rate": 6.190886120143131e-07, "loss": 0.0012, "step": 52310 }, { "epoch": 0.8560909760287981, "grad_norm": 0.1574011274374583, "learning_rate": 6.177128704130042e-07, "loss": 0.0016, "step": 52320 }, { "epoch": 0.856254601979874, "grad_norm": 0.05777198234322402, "learning_rate": 6.163385584553721e-07, "loss": 0.0013, "step": 52330 }, { "epoch": 0.8564182279309498, "grad_norm": 0.16200610508291988, "learning_rate": 6.149656765897616e-07, "loss": 0.0021, "step": 52340 }, { "epoch": 0.8565818538820257, "grad_norm": 0.11574745523289802, "learning_rate": 6.135942252640509e-07, "loss": 0.0014, "step": 52350 }, { "epoch": 0.8567454798331016, "grad_norm": 0.0717968683026434, "learning_rate": 6.122242049256505e-07, "loss": 0.0014, "step": 52360 }, { "epoch": 0.8569091057841773, "grad_norm": 0.15415360375355136, "learning_rate": 6.108556160215078e-07, "loss": 0.002, "step": 52370 }, { "epoch": 0.8570727317352532, "grad_norm": 0.09176524267552756, "learning_rate": 6.094884589980993e-07, "loss": 0.0027, "step": 52380 }, { "epoch": 0.857236357686329, "grad_norm": 0.07484066561072046, "learning_rate": 6.081227343014362e-07, "loss": 0.0014, "step": 52390 }, { "epoch": 0.8573999836374049, "grad_norm": 0.0938109351429162, "learning_rate": 6.067584423770623e-07, "loss": 0.0019, "step": 52400 }, { "epoch": 0.8575636095884808, "grad_norm": 0.0585635815222195, "learning_rate": 6.053955836700525e-07, "loss": 0.0021, "step": 52410 }, { "epoch": 0.8577272355395565, "grad_norm": 0.13194656612859215, "learning_rate": 6.040341586250165e-07, "loss": 0.0027, "step": 52420 }, { "epoch": 0.8578908614906324, "grad_norm": 0.09135163056146216, "learning_rate": 6.026741676860937e-07, "loss": 0.0024, "step": 52430 }, { "epoch": 0.8580544874417082, "grad_norm": 0.05650079473641278, "learning_rate": 6.013156112969592e-07, "loss": 0.0026, "step": 52440 }, { "epoch": 0.8582181133927841, "grad_norm": 0.09003152257225695, "learning_rate": 5.999584899008171e-07, "loss": 0.0018, "step": 52450 }, { "epoch": 0.85838173934386, "grad_norm": 0.06036638937432826, "learning_rate": 5.986028039404035e-07, "loss": 0.0016, "step": 52460 }, { "epoch": 0.8585453652949357, "grad_norm": 0.23854734828025737, "learning_rate": 5.972485538579881e-07, "loss": 0.0041, "step": 52470 }, { "epoch": 0.8587089912460116, "grad_norm": 0.1515366610704092, "learning_rate": 5.958957400953702e-07, "loss": 0.0017, "step": 52480 }, { "epoch": 0.8588726171970874, "grad_norm": 0.22480788600762538, "learning_rate": 5.945443630938808e-07, "loss": 0.0019, "step": 52490 }, { "epoch": 0.8590362431481633, "grad_norm": 0.2984675505008906, "learning_rate": 5.931944232943848e-07, "loss": 0.0021, "step": 52500 }, { "epoch": 0.8591998690992392, "grad_norm": 0.1165462403644148, "learning_rate": 5.918459211372752e-07, "loss": 0.003, "step": 52510 }, { "epoch": 0.859363495050315, "grad_norm": 0.20867340547815147, "learning_rate": 5.904988570624765e-07, "loss": 0.0021, "step": 52520 }, { "epoch": 0.8595271210013908, "grad_norm": 0.19202000377979206, "learning_rate": 5.891532315094444e-07, "loss": 0.0029, "step": 52530 }, { "epoch": 0.8596907469524666, "grad_norm": 0.117843679234529, "learning_rate": 5.878090449171664e-07, "loss": 0.0013, "step": 52540 }, { "epoch": 0.8598543729035425, "grad_norm": 0.14644827473227537, "learning_rate": 5.864662977241581e-07, "loss": 0.0023, "step": 52550 }, { "epoch": 0.8600179988546184, "grad_norm": 0.027084560741157386, "learning_rate": 5.851249903684686e-07, "loss": 0.0014, "step": 52560 }, { "epoch": 0.8601816248056942, "grad_norm": 0.11779208060748769, "learning_rate": 5.837851232876756e-07, "loss": 0.0021, "step": 52570 }, { "epoch": 0.86034525075677, "grad_norm": 0.2130106261048795, "learning_rate": 5.824466969188863e-07, "loss": 0.0027, "step": 52580 }, { "epoch": 0.8605088767078458, "grad_norm": 0.0961055460582622, "learning_rate": 5.811097116987386e-07, "loss": 0.0017, "step": 52590 }, { "epoch": 0.8606725026589217, "grad_norm": 0.19588721980560836, "learning_rate": 5.797741680634006e-07, "loss": 0.0015, "step": 52600 }, { "epoch": 0.8608361286099976, "grad_norm": 0.07420320083143492, "learning_rate": 5.78440066448569e-07, "loss": 0.0014, "step": 52610 }, { "epoch": 0.8609997545610734, "grad_norm": 0.31134719831976676, "learning_rate": 5.771074072894728e-07, "loss": 0.0056, "step": 52620 }, { "epoch": 0.8611633805121492, "grad_norm": 0.024279194574428942, "learning_rate": 5.757761910208671e-07, "loss": 0.0021, "step": 52630 }, { "epoch": 0.861327006463225, "grad_norm": 0.22763556971819193, "learning_rate": 5.744464180770376e-07, "loss": 0.0025, "step": 52640 }, { "epoch": 0.8614906324143009, "grad_norm": 0.07547680740189375, "learning_rate": 5.731180888917997e-07, "loss": 0.0034, "step": 52650 }, { "epoch": 0.8616542583653768, "grad_norm": 0.0724695126313348, "learning_rate": 5.717912038984968e-07, "loss": 0.0027, "step": 52660 }, { "epoch": 0.8618178843164526, "grad_norm": 0.2455345367765211, "learning_rate": 5.704657635300015e-07, "loss": 0.002, "step": 52670 }, { "epoch": 0.8619815102675285, "grad_norm": 0.09865935846515249, "learning_rate": 5.691417682187162e-07, "loss": 0.0013, "step": 52680 }, { "epoch": 0.8621451362186042, "grad_norm": 0.02524261990991333, "learning_rate": 5.678192183965708e-07, "loss": 0.0018, "step": 52690 }, { "epoch": 0.8623087621696801, "grad_norm": 0.22105627442863288, "learning_rate": 5.664981144950243e-07, "loss": 0.0018, "step": 52700 }, { "epoch": 0.862472388120756, "grad_norm": 0.20563804941198235, "learning_rate": 5.651784569450603e-07, "loss": 0.0019, "step": 52710 }, { "epoch": 0.8626360140718318, "grad_norm": 0.24014956448128946, "learning_rate": 5.638602461771975e-07, "loss": 0.0023, "step": 52720 }, { "epoch": 0.8627996400229077, "grad_norm": 0.020529044658343633, "learning_rate": 5.625434826214776e-07, "loss": 0.0018, "step": 52730 }, { "epoch": 0.8629632659739834, "grad_norm": 0.227136884496555, "learning_rate": 5.612281667074698e-07, "loss": 0.0017, "step": 52740 }, { "epoch": 0.8631268919250593, "grad_norm": 0.23116390876608647, "learning_rate": 5.599142988642753e-07, "loss": 0.0024, "step": 52750 }, { "epoch": 0.8632905178761352, "grad_norm": 0.11937413835087557, "learning_rate": 5.5860187952052e-07, "loss": 0.0017, "step": 52760 }, { "epoch": 0.863454143827211, "grad_norm": 0.10569138418165225, "learning_rate": 5.572909091043549e-07, "loss": 0.003, "step": 52770 }, { "epoch": 0.8636177697782869, "grad_norm": 0.15475087380870345, "learning_rate": 5.559813880434634e-07, "loss": 0.0015, "step": 52780 }, { "epoch": 0.8637813957293626, "grad_norm": 0.04565700061160876, "learning_rate": 5.546733167650526e-07, "loss": 0.0026, "step": 52790 }, { "epoch": 0.8639450216804385, "grad_norm": 0.07683103550785202, "learning_rate": 5.533666956958567e-07, "loss": 0.0016, "step": 52800 }, { "epoch": 0.8641086476315144, "grad_norm": 0.14157236883376392, "learning_rate": 5.520615252621397e-07, "loss": 0.003, "step": 52810 }, { "epoch": 0.8642722735825902, "grad_norm": 0.16044370082407694, "learning_rate": 5.507578058896906e-07, "loss": 0.0025, "step": 52820 }, { "epoch": 0.8644358995336661, "grad_norm": 0.08129999725963098, "learning_rate": 5.494555380038219e-07, "loss": 0.0033, "step": 52830 }, { "epoch": 0.8645995254847418, "grad_norm": 0.09973333399020222, "learning_rate": 5.481547220293781e-07, "loss": 0.0014, "step": 52840 }, { "epoch": 0.8647631514358177, "grad_norm": 0.018302630775638176, "learning_rate": 5.468553583907265e-07, "loss": 0.0023, "step": 52850 }, { "epoch": 0.8649267773868936, "grad_norm": 0.08830658747873654, "learning_rate": 5.455574475117609e-07, "loss": 0.0011, "step": 52860 }, { "epoch": 0.8650904033379694, "grad_norm": 0.10012930289047829, "learning_rate": 5.44260989815904e-07, "loss": 0.0024, "step": 52870 }, { "epoch": 0.8652540292890453, "grad_norm": 0.0860343061252671, "learning_rate": 5.429659857260994e-07, "loss": 0.0023, "step": 52880 }, { "epoch": 0.865417655240121, "grad_norm": 0.09569661808726233, "learning_rate": 5.416724356648201e-07, "loss": 0.0011, "step": 52890 }, { "epoch": 0.8655812811911969, "grad_norm": 0.23115574330581126, "learning_rate": 5.403803400540647e-07, "loss": 0.0019, "step": 52900 }, { "epoch": 0.8657449071422728, "grad_norm": 0.15052083013044068, "learning_rate": 5.390896993153566e-07, "loss": 0.0018, "step": 52910 }, { "epoch": 0.8659085330933486, "grad_norm": 0.32993503018569426, "learning_rate": 5.378005138697429e-07, "loss": 0.0016, "step": 52920 }, { "epoch": 0.8660721590444245, "grad_norm": 0.13224367959295272, "learning_rate": 5.365127841378009e-07, "loss": 0.0013, "step": 52930 }, { "epoch": 0.8662357849955002, "grad_norm": 0.06792313523280331, "learning_rate": 5.352265105396265e-07, "loss": 0.0009, "step": 52940 }, { "epoch": 0.8663994109465761, "grad_norm": 0.27876454923053867, "learning_rate": 5.339416934948432e-07, "loss": 0.003, "step": 52950 }, { "epoch": 0.866563036897652, "grad_norm": 0.2061583456972525, "learning_rate": 5.326583334226027e-07, "loss": 0.0017, "step": 52960 }, { "epoch": 0.8667266628487278, "grad_norm": 0.1333709945814969, "learning_rate": 5.313764307415775e-07, "loss": 0.002, "step": 52970 }, { "epoch": 0.8668902887998037, "grad_norm": 0.09200962338132083, "learning_rate": 5.300959858699639e-07, "loss": 0.0027, "step": 52980 }, { "epoch": 0.8670539147508795, "grad_norm": 0.28327725025852285, "learning_rate": 5.288169992254876e-07, "loss": 0.0028, "step": 52990 }, { "epoch": 0.8672175407019553, "grad_norm": 0.0716261702395288, "learning_rate": 5.275394712253934e-07, "loss": 0.0042, "step": 53000 }, { "epoch": 0.8673811666530312, "grad_norm": 0.17379223396592003, "learning_rate": 5.26263402286451e-07, "loss": 0.0028, "step": 53010 }, { "epoch": 0.867544792604107, "grad_norm": 0.09973840359683082, "learning_rate": 5.249887928249575e-07, "loss": 0.0026, "step": 53020 }, { "epoch": 0.8677084185551829, "grad_norm": 0.004321720996621527, "learning_rate": 5.237156432567314e-07, "loss": 0.0008, "step": 53030 }, { "epoch": 0.8678720445062587, "grad_norm": 0.12832616328743654, "learning_rate": 5.224439539971149e-07, "loss": 0.0025, "step": 53040 }, { "epoch": 0.8680356704573345, "grad_norm": 0.1292776939667015, "learning_rate": 5.211737254609733e-07, "loss": 0.0025, "step": 53050 }, { "epoch": 0.8681992964084104, "grad_norm": 0.15244731600139727, "learning_rate": 5.199049580626975e-07, "loss": 0.0012, "step": 53060 }, { "epoch": 0.8683629223594862, "grad_norm": 0.09142084590072416, "learning_rate": 5.186376522162001e-07, "loss": 0.0011, "step": 53070 }, { "epoch": 0.8685265483105621, "grad_norm": 0.04002642863934641, "learning_rate": 5.173718083349155e-07, "loss": 0.002, "step": 53080 }, { "epoch": 0.8686901742616379, "grad_norm": 0.019807730514134005, "learning_rate": 5.161074268318051e-07, "loss": 0.0027, "step": 53090 }, { "epoch": 0.8688538002127137, "grad_norm": 0.14325996360228122, "learning_rate": 5.148445081193503e-07, "loss": 0.0023, "step": 53100 }, { "epoch": 0.8690174261637896, "grad_norm": 0.12808615726841782, "learning_rate": 5.135830526095564e-07, "loss": 0.002, "step": 53110 }, { "epoch": 0.8691810521148654, "grad_norm": 0.02043394104307728, "learning_rate": 5.123230607139507e-07, "loss": 0.0014, "step": 53120 }, { "epoch": 0.8693446780659413, "grad_norm": 0.13202722662127603, "learning_rate": 5.110645328435826e-07, "loss": 0.0017, "step": 53130 }, { "epoch": 0.8695083040170171, "grad_norm": 0.10325424969490757, "learning_rate": 5.098074694090238e-07, "loss": 0.0015, "step": 53140 }, { "epoch": 0.869671929968093, "grad_norm": 0.15111091453422165, "learning_rate": 5.085518708203713e-07, "loss": 0.0018, "step": 53150 }, { "epoch": 0.8698355559191688, "grad_norm": 0.1971086113314265, "learning_rate": 5.072977374872412e-07, "loss": 0.0043, "step": 53160 }, { "epoch": 0.8699991818702446, "grad_norm": 0.16472595260359932, "learning_rate": 5.060450698187719e-07, "loss": 0.002, "step": 53170 }, { "epoch": 0.8701628078213205, "grad_norm": 0.31783381371282987, "learning_rate": 5.047938682236242e-07, "loss": 0.0016, "step": 53180 }, { "epoch": 0.8703264337723963, "grad_norm": 0.1423298218776779, "learning_rate": 5.035441331099805e-07, "loss": 0.0017, "step": 53190 }, { "epoch": 0.8704900597234722, "grad_norm": 0.04422393663621081, "learning_rate": 5.022958648855436e-07, "loss": 0.0026, "step": 53200 }, { "epoch": 0.8706536856745479, "grad_norm": 0.365739191864283, "learning_rate": 5.01049063957541e-07, "loss": 0.0034, "step": 53210 }, { "epoch": 0.8708173116256238, "grad_norm": 0.08494119388996696, "learning_rate": 4.998037307327186e-07, "loss": 0.0011, "step": 53220 }, { "epoch": 0.8709809375766997, "grad_norm": 0.11808998918537728, "learning_rate": 4.985598656173446e-07, "loss": 0.0053, "step": 53230 }, { "epoch": 0.8711445635277755, "grad_norm": 0.21010444009633525, "learning_rate": 4.973174690172072e-07, "loss": 0.0026, "step": 53240 }, { "epoch": 0.8713081894788514, "grad_norm": 0.019209233079003914, "learning_rate": 4.960765413376173e-07, "loss": 0.0007, "step": 53250 }, { "epoch": 0.8714718154299271, "grad_norm": 0.07876334253518731, "learning_rate": 4.948370829834037e-07, "loss": 0.0017, "step": 53260 }, { "epoch": 0.871635441381003, "grad_norm": 0.10374913829084749, "learning_rate": 4.935990943589203e-07, "loss": 0.0014, "step": 53270 }, { "epoch": 0.8717990673320789, "grad_norm": 0.06539122247159852, "learning_rate": 4.923625758680378e-07, "loss": 0.0018, "step": 53280 }, { "epoch": 0.8719626932831547, "grad_norm": 0.2039532097257218, "learning_rate": 4.911275279141486e-07, "loss": 0.0022, "step": 53290 }, { "epoch": 0.8721263192342306, "grad_norm": 0.09022899773557073, "learning_rate": 4.898939509001649e-07, "loss": 0.0019, "step": 53300 }, { "epoch": 0.8722899451853063, "grad_norm": 0.08054252312342947, "learning_rate": 4.886618452285202e-07, "loss": 0.0028, "step": 53310 }, { "epoch": 0.8724535711363822, "grad_norm": 0.3406458898516414, "learning_rate": 4.874312113011653e-07, "loss": 0.002, "step": 53320 }, { "epoch": 0.8726171970874581, "grad_norm": 0.0938125858484772, "learning_rate": 4.862020495195752e-07, "loss": 0.0036, "step": 53330 }, { "epoch": 0.8727808230385339, "grad_norm": 0.21807165308440107, "learning_rate": 4.849743602847412e-07, "loss": 0.0024, "step": 53340 }, { "epoch": 0.8729444489896098, "grad_norm": 0.1649710947303191, "learning_rate": 4.837481439971753e-07, "loss": 0.002, "step": 53350 }, { "epoch": 0.8731080749406855, "grad_norm": 0.2248221708402171, "learning_rate": 4.825234010569086e-07, "loss": 0.0011, "step": 53360 }, { "epoch": 0.8732717008917614, "grad_norm": 0.09772602427703993, "learning_rate": 4.813001318634919e-07, "loss": 0.0023, "step": 53370 }, { "epoch": 0.8734353268428373, "grad_norm": 0.17687873426667197, "learning_rate": 4.800783368159951e-07, "loss": 0.002, "step": 53380 }, { "epoch": 0.8735989527939131, "grad_norm": 0.2108073740248584, "learning_rate": 4.788580163130064e-07, "loss": 0.0064, "step": 53390 }, { "epoch": 0.873762578744989, "grad_norm": 0.04967326737688878, "learning_rate": 4.776391707526351e-07, "loss": 0.0017, "step": 53400 }, { "epoch": 0.8739262046960647, "grad_norm": 0.07946181666706736, "learning_rate": 4.764218005325083e-07, "loss": 0.0015, "step": 53410 }, { "epoch": 0.8740898306471406, "grad_norm": 0.06337516191278485, "learning_rate": 4.7520590604977044e-07, "loss": 0.0015, "step": 53420 }, { "epoch": 0.8742534565982165, "grad_norm": 0.1201558068281623, "learning_rate": 4.739914877010865e-07, "loss": 0.0012, "step": 53430 }, { "epoch": 0.8744170825492923, "grad_norm": 0.14414874058411617, "learning_rate": 4.727785458826378e-07, "loss": 0.0023, "step": 53440 }, { "epoch": 0.8745807085003682, "grad_norm": 0.09523403081110585, "learning_rate": 4.7156708099012516e-07, "loss": 0.0015, "step": 53450 }, { "epoch": 0.874744334451444, "grad_norm": 0.15767513204009692, "learning_rate": 4.70357093418769e-07, "loss": 0.0014, "step": 53460 }, { "epoch": 0.8749079604025198, "grad_norm": 0.14282526627084383, "learning_rate": 4.6914858356330585e-07, "loss": 0.0031, "step": 53470 }, { "epoch": 0.8750715863535957, "grad_norm": 0.19851638681558015, "learning_rate": 4.6794155181798973e-07, "loss": 0.0028, "step": 53480 }, { "epoch": 0.8752352123046715, "grad_norm": 0.13470833926114842, "learning_rate": 4.667359985765946e-07, "loss": 0.0012, "step": 53490 }, { "epoch": 0.8753988382557474, "grad_norm": 0.11728149889212863, "learning_rate": 4.6553192423241023e-07, "loss": 0.0023, "step": 53500 }, { "epoch": 0.8755624642068232, "grad_norm": 0.10738486319585844, "learning_rate": 4.6432932917824414e-07, "loss": 0.0024, "step": 53510 }, { "epoch": 0.875726090157899, "grad_norm": 0.129271038427575, "learning_rate": 4.6312821380642303e-07, "loss": 0.0014, "step": 53520 }, { "epoch": 0.8758897161089749, "grad_norm": 0.38420873380307424, "learning_rate": 4.619285785087885e-07, "loss": 0.0017, "step": 53530 }, { "epoch": 0.8760533420600507, "grad_norm": 0.07116971014817766, "learning_rate": 4.6073042367670073e-07, "loss": 0.0031, "step": 53540 }, { "epoch": 0.8762169680111266, "grad_norm": 0.1665876892809746, "learning_rate": 4.595337497010366e-07, "loss": 0.0017, "step": 53550 }, { "epoch": 0.8763805939622024, "grad_norm": 0.2621137902335014, "learning_rate": 4.5833855697218986e-07, "loss": 0.0023, "step": 53560 }, { "epoch": 0.8765442199132782, "grad_norm": 0.07771633953405434, "learning_rate": 4.571448458800698e-07, "loss": 0.001, "step": 53570 }, { "epoch": 0.8767078458643541, "grad_norm": 0.038610590107402844, "learning_rate": 4.559526168141054e-07, "loss": 0.0012, "step": 53580 }, { "epoch": 0.8768714718154299, "grad_norm": 0.31763014093488184, "learning_rate": 4.547618701632395e-07, "loss": 0.0015, "step": 53590 }, { "epoch": 0.8770350977665058, "grad_norm": 0.17053397122383354, "learning_rate": 4.535726063159318e-07, "loss": 0.002, "step": 53600 }, { "epoch": 0.8771987237175816, "grad_norm": 0.04642860460063473, "learning_rate": 4.523848256601593e-07, "loss": 0.0024, "step": 53610 }, { "epoch": 0.8773623496686574, "grad_norm": 0.2898670144995844, "learning_rate": 4.511985285834142e-07, "loss": 0.0015, "step": 53620 }, { "epoch": 0.8775259756197333, "grad_norm": 0.04486655775339955, "learning_rate": 4.5001371547270354e-07, "loss": 0.0019, "step": 53630 }, { "epoch": 0.8776896015708091, "grad_norm": 0.15206448318997695, "learning_rate": 4.488303867145538e-07, "loss": 0.0017, "step": 53640 }, { "epoch": 0.877853227521885, "grad_norm": 0.32105805752573213, "learning_rate": 4.476485426950045e-07, "loss": 0.0011, "step": 53650 }, { "epoch": 0.8780168534729608, "grad_norm": 0.021227266076589566, "learning_rate": 4.46468183799611e-07, "loss": 0.0016, "step": 53660 }, { "epoch": 0.8781804794240367, "grad_norm": 0.3212763073713288, "learning_rate": 4.4528931041344427e-07, "loss": 0.0028, "step": 53670 }, { "epoch": 0.8783441053751125, "grad_norm": 0.034399933806645556, "learning_rate": 4.4411192292109106e-07, "loss": 0.0028, "step": 53680 }, { "epoch": 0.8785077313261883, "grad_norm": 0.1969253891794196, "learning_rate": 4.4293602170665363e-07, "loss": 0.0025, "step": 53690 }, { "epoch": 0.8786713572772642, "grad_norm": 0.13290308509176238, "learning_rate": 4.417616071537478e-07, "loss": 0.0015, "step": 53700 }, { "epoch": 0.87883498322834, "grad_norm": 0.09727129804254891, "learning_rate": 4.405886796455072e-07, "loss": 0.0015, "step": 53710 }, { "epoch": 0.8789986091794159, "grad_norm": 0.0644514350780173, "learning_rate": 4.3941723956457847e-07, "loss": 0.0011, "step": 53720 }, { "epoch": 0.8791622351304917, "grad_norm": 0.10295240686542464, "learning_rate": 4.3824728729312147e-07, "loss": 0.0014, "step": 53730 }, { "epoch": 0.8793258610815675, "grad_norm": 0.0783082125628668, "learning_rate": 4.3707882321281423e-07, "loss": 0.0029, "step": 53740 }, { "epoch": 0.8794894870326434, "grad_norm": 0.3203794090684449, "learning_rate": 4.3591184770484684e-07, "loss": 0.002, "step": 53750 }, { "epoch": 0.8796531129837192, "grad_norm": 0.2802376506305925, "learning_rate": 4.3474636114992306e-07, "loss": 0.0034, "step": 53760 }, { "epoch": 0.8798167389347951, "grad_norm": 0.046880285925879714, "learning_rate": 4.3358236392826546e-07, "loss": 0.0023, "step": 53770 }, { "epoch": 0.879980364885871, "grad_norm": 0.06307601033764723, "learning_rate": 4.3241985641960584e-07, "loss": 0.002, "step": 53780 }, { "epoch": 0.8801439908369467, "grad_norm": 0.16857441040650556, "learning_rate": 4.312588390031902e-07, "loss": 0.0026, "step": 53790 }, { "epoch": 0.8803076167880226, "grad_norm": 0.01280808876797322, "learning_rate": 4.300993120577823e-07, "loss": 0.0013, "step": 53800 }, { "epoch": 0.8804712427390984, "grad_norm": 0.06734894634459568, "learning_rate": 4.2894127596165625e-07, "loss": 0.0018, "step": 53810 }, { "epoch": 0.8806348686901743, "grad_norm": 0.23419157677032917, "learning_rate": 4.2778473109260033e-07, "loss": 0.0023, "step": 53820 }, { "epoch": 0.8807984946412502, "grad_norm": 0.027255321763394916, "learning_rate": 4.2662967782791774e-07, "loss": 0.0018, "step": 53830 }, { "epoch": 0.8809621205923259, "grad_norm": 0.10411335262980473, "learning_rate": 4.2547611654442547e-07, "loss": 0.0032, "step": 53840 }, { "epoch": 0.8811257465434018, "grad_norm": 0.11376777266235329, "learning_rate": 4.2432404761844903e-07, "loss": 0.0011, "step": 53850 }, { "epoch": 0.8812893724944776, "grad_norm": 0.27601367047008346, "learning_rate": 4.231734714258329e-07, "loss": 0.0014, "step": 53860 }, { "epoch": 0.8814529984455535, "grad_norm": 0.06169260025959098, "learning_rate": 4.2202438834193227e-07, "loss": 0.0014, "step": 53870 }, { "epoch": 0.8816166243966294, "grad_norm": 0.14622464552539996, "learning_rate": 4.208767987416135e-07, "loss": 0.0019, "step": 53880 }, { "epoch": 0.8817802503477051, "grad_norm": 0.21130735399801473, "learning_rate": 4.1973070299925924e-07, "loss": 0.0023, "step": 53890 }, { "epoch": 0.881943876298781, "grad_norm": 0.08930553175493725, "learning_rate": 4.185861014887632e-07, "loss": 0.0018, "step": 53900 }, { "epoch": 0.8821075022498568, "grad_norm": 0.10020663364029199, "learning_rate": 4.17442994583529e-07, "loss": 0.0023, "step": 53910 }, { "epoch": 0.8822711282009327, "grad_norm": 0.05394851757821303, "learning_rate": 4.1630138265647766e-07, "loss": 0.0012, "step": 53920 }, { "epoch": 0.8824347541520086, "grad_norm": 0.029007281318480946, "learning_rate": 4.1516126608003806e-07, "loss": 0.0021, "step": 53930 }, { "epoch": 0.8825983801030843, "grad_norm": 0.18257748327548776, "learning_rate": 4.140226452261531e-07, "loss": 0.0032, "step": 53940 }, { "epoch": 0.8827620060541602, "grad_norm": 0.03882285326528979, "learning_rate": 4.128855204662796e-07, "loss": 0.0011, "step": 53950 }, { "epoch": 0.882925632005236, "grad_norm": 0.17172805248420886, "learning_rate": 4.117498921713836e-07, "loss": 0.0016, "step": 53960 }, { "epoch": 0.8830892579563119, "grad_norm": 0.05375737232576831, "learning_rate": 4.1061576071194155e-07, "loss": 0.0016, "step": 53970 }, { "epoch": 0.8832528839073878, "grad_norm": 0.15040914229733757, "learning_rate": 4.094831264579463e-07, "loss": 0.0023, "step": 53980 }, { "epoch": 0.8834165098584635, "grad_norm": 0.1544821322086737, "learning_rate": 4.0835198977889956e-07, "loss": 0.0015, "step": 53990 }, { "epoch": 0.8835801358095394, "grad_norm": 0.03111987932389926, "learning_rate": 4.072223510438128e-07, "loss": 0.0013, "step": 54000 }, { "epoch": 0.8837437617606152, "grad_norm": 0.08006658021123461, "learning_rate": 4.060942106212129e-07, "loss": 0.0021, "step": 54010 }, { "epoch": 0.8839073877116911, "grad_norm": 0.06874729993856908, "learning_rate": 4.0496756887913547e-07, "loss": 0.0011, "step": 54020 }, { "epoch": 0.884071013662767, "grad_norm": 0.06459473239526173, "learning_rate": 4.0384242618512594e-07, "loss": 0.0011, "step": 54030 }, { "epoch": 0.8842346396138427, "grad_norm": 0.03136468961192377, "learning_rate": 4.027187829062423e-07, "loss": 0.0026, "step": 54040 }, { "epoch": 0.8843982655649186, "grad_norm": 0.1977382234937298, "learning_rate": 4.015966394090548e-07, "loss": 0.0032, "step": 54050 }, { "epoch": 0.8845618915159944, "grad_norm": 0.5797273827607149, "learning_rate": 4.0047599605964216e-07, "loss": 0.0015, "step": 54060 }, { "epoch": 0.8847255174670703, "grad_norm": 0.2302733516883932, "learning_rate": 3.9935685322359363e-07, "loss": 0.0024, "step": 54070 }, { "epoch": 0.8848891434181462, "grad_norm": 0.36832712919217075, "learning_rate": 3.982392112660122e-07, "loss": 0.0015, "step": 54080 }, { "epoch": 0.885052769369222, "grad_norm": 0.24603542305961687, "learning_rate": 3.9712307055150666e-07, "loss": 0.0017, "step": 54090 }, { "epoch": 0.8852163953202978, "grad_norm": 0.05307829525712286, "learning_rate": 3.9600843144419756e-07, "loss": 0.0037, "step": 54100 }, { "epoch": 0.8853800212713736, "grad_norm": 0.17170822710118297, "learning_rate": 3.9489529430771833e-07, "loss": 0.001, "step": 54110 }, { "epoch": 0.8855436472224495, "grad_norm": 0.07291546121777329, "learning_rate": 3.9378365950520916e-07, "loss": 0.0017, "step": 54120 }, { "epoch": 0.8857072731735253, "grad_norm": 0.19535512247320605, "learning_rate": 3.926735273993215e-07, "loss": 0.0026, "step": 54130 }, { "epoch": 0.8858708991246012, "grad_norm": 0.22297715289383083, "learning_rate": 3.915648983522158e-07, "loss": 0.0013, "step": 54140 }, { "epoch": 0.886034525075677, "grad_norm": 0.43808295267869907, "learning_rate": 3.9045777272556316e-07, "loss": 0.0035, "step": 54150 }, { "epoch": 0.8861981510267528, "grad_norm": 0.1879227815058943, "learning_rate": 3.893521508805431e-07, "loss": 0.0018, "step": 54160 }, { "epoch": 0.8863617769778287, "grad_norm": 0.11198352830741005, "learning_rate": 3.8824803317784654e-07, "loss": 0.002, "step": 54170 }, { "epoch": 0.8865254029289045, "grad_norm": 0.05711237659510425, "learning_rate": 3.871454199776714e-07, "loss": 0.0021, "step": 54180 }, { "epoch": 0.8866890288799804, "grad_norm": 0.20370932821518398, "learning_rate": 3.8604431163972555e-07, "loss": 0.0031, "step": 54190 }, { "epoch": 0.8868526548310562, "grad_norm": 0.10589123024168287, "learning_rate": 3.8494470852322716e-07, "loss": 0.0027, "step": 54200 }, { "epoch": 0.887016280782132, "grad_norm": 0.06264924283497242, "learning_rate": 3.8384661098690103e-07, "loss": 0.0027, "step": 54210 }, { "epoch": 0.8871799067332079, "grad_norm": 0.014958898156792496, "learning_rate": 3.8275001938898217e-07, "loss": 0.0017, "step": 54220 }, { "epoch": 0.8873435326842837, "grad_norm": 0.16023502601202608, "learning_rate": 3.81654934087215e-07, "loss": 0.0016, "step": 54230 }, { "epoch": 0.8875071586353596, "grad_norm": 0.2150135482211736, "learning_rate": 3.8056135543885153e-07, "loss": 0.0018, "step": 54240 }, { "epoch": 0.8876707845864354, "grad_norm": 0.04338428722111253, "learning_rate": 3.7946928380065254e-07, "loss": 0.0017, "step": 54250 }, { "epoch": 0.8878344105375112, "grad_norm": 0.16542739333222103, "learning_rate": 3.7837871952888683e-07, "loss": 0.0028, "step": 54260 }, { "epoch": 0.8879980364885871, "grad_norm": 0.03559082868066973, "learning_rate": 3.7728966297933156e-07, "loss": 0.0027, "step": 54270 }, { "epoch": 0.8881616624396629, "grad_norm": 0.14000396759341385, "learning_rate": 3.76202114507272e-07, "loss": 0.0021, "step": 54280 }, { "epoch": 0.8883252883907388, "grad_norm": 0.1545033049940637, "learning_rate": 3.75116074467502e-07, "loss": 0.0031, "step": 54290 }, { "epoch": 0.8884889143418147, "grad_norm": 0.14906306387113252, "learning_rate": 3.740315432143232e-07, "loss": 0.0017, "step": 54300 }, { "epoch": 0.8886525402928904, "grad_norm": 0.08371583695343607, "learning_rate": 3.729485211015443e-07, "loss": 0.001, "step": 54310 }, { "epoch": 0.8888161662439663, "grad_norm": 0.13456061786399692, "learning_rate": 3.718670084824827e-07, "loss": 0.0033, "step": 54320 }, { "epoch": 0.8889797921950421, "grad_norm": 0.24772188772653891, "learning_rate": 3.707870057099616e-07, "loss": 0.0019, "step": 54330 }, { "epoch": 0.889143418146118, "grad_norm": 0.051093700685423474, "learning_rate": 3.6970851313631317e-07, "loss": 0.0018, "step": 54340 }, { "epoch": 0.8893070440971939, "grad_norm": 0.23808305643775388, "learning_rate": 3.6863153111337756e-07, "loss": 0.0022, "step": 54350 }, { "epoch": 0.8894706700482696, "grad_norm": 0.05669352609086232, "learning_rate": 3.675560599924999e-07, "loss": 0.0022, "step": 54360 }, { "epoch": 0.8896342959993455, "grad_norm": 0.24019804719382926, "learning_rate": 3.66482100124535e-07, "loss": 0.0026, "step": 54370 }, { "epoch": 0.8897979219504213, "grad_norm": 0.029697980901882793, "learning_rate": 3.6540965185984156e-07, "loss": 0.002, "step": 54380 }, { "epoch": 0.8899615479014972, "grad_norm": 0.47784508750364657, "learning_rate": 3.64338715548288e-07, "loss": 0.0035, "step": 54390 }, { "epoch": 0.8901251738525731, "grad_norm": 0.10419988491445924, "learning_rate": 3.632692915392483e-07, "loss": 0.0015, "step": 54400 }, { "epoch": 0.8902887998036488, "grad_norm": 0.05081034400484595, "learning_rate": 3.622013801816021e-07, "loss": 0.0019, "step": 54410 }, { "epoch": 0.8904524257547247, "grad_norm": 0.02749762117578119, "learning_rate": 3.611349818237375e-07, "loss": 0.003, "step": 54420 }, { "epoch": 0.8906160517058005, "grad_norm": 0.06531369305662299, "learning_rate": 3.60070096813549e-07, "loss": 0.0016, "step": 54430 }, { "epoch": 0.8907796776568764, "grad_norm": 0.05547720222133388, "learning_rate": 3.5900672549843475e-07, "loss": 0.0024, "step": 54440 }, { "epoch": 0.8909433036079523, "grad_norm": 0.09571061592254314, "learning_rate": 3.5794486822530173e-07, "loss": 0.0012, "step": 54450 }, { "epoch": 0.891106929559028, "grad_norm": 0.21127385161467688, "learning_rate": 3.568845253405617e-07, "loss": 0.0026, "step": 54460 }, { "epoch": 0.8912705555101039, "grad_norm": 0.05100508380499609, "learning_rate": 3.5582569719013247e-07, "loss": 0.0019, "step": 54470 }, { "epoch": 0.8914341814611797, "grad_norm": 0.22273146699160937, "learning_rate": 3.547683841194394e-07, "loss": 0.004, "step": 54480 }, { "epoch": 0.8915978074122556, "grad_norm": 0.1137759305141339, "learning_rate": 3.5371258647341145e-07, "loss": 0.0023, "step": 54490 }, { "epoch": 0.8917614333633315, "grad_norm": 0.13145345088630248, "learning_rate": 3.5265830459648375e-07, "loss": 0.0038, "step": 54500 }, { "epoch": 0.8919250593144072, "grad_norm": 0.26679332576026116, "learning_rate": 3.516055388325967e-07, "loss": 0.0018, "step": 54510 }, { "epoch": 0.8920886852654831, "grad_norm": 0.12013169027463527, "learning_rate": 3.505542895251979e-07, "loss": 0.0017, "step": 54520 }, { "epoch": 0.8922523112165589, "grad_norm": 0.14120997762289345, "learning_rate": 3.495045570172362e-07, "loss": 0.0015, "step": 54530 }, { "epoch": 0.8924159371676348, "grad_norm": 0.12922124181363964, "learning_rate": 3.4845634165117103e-07, "loss": 0.0016, "step": 54540 }, { "epoch": 0.8925795631187107, "grad_norm": 0.35322515244869157, "learning_rate": 3.4740964376896334e-07, "loss": 0.0034, "step": 54550 }, { "epoch": 0.8927431890697864, "grad_norm": 0.0980118241284525, "learning_rate": 3.4636446371207945e-07, "loss": 0.0026, "step": 54560 }, { "epoch": 0.8929068150208623, "grad_norm": 0.06271630709421423, "learning_rate": 3.4532080182149095e-07, "loss": 0.0018, "step": 54570 }, { "epoch": 0.8930704409719381, "grad_norm": 0.09583763064063557, "learning_rate": 3.4427865843767395e-07, "loss": 0.0011, "step": 54580 }, { "epoch": 0.893234066923014, "grad_norm": 0.1465297321123273, "learning_rate": 3.4323803390060916e-07, "loss": 0.0019, "step": 54590 }, { "epoch": 0.8933976928740899, "grad_norm": 0.2375373810117404, "learning_rate": 3.421989285497823e-07, "loss": 0.002, "step": 54600 }, { "epoch": 0.8935613188251657, "grad_norm": 0.18263315913439176, "learning_rate": 3.4116134272418333e-07, "loss": 0.0026, "step": 54610 }, { "epoch": 0.8937249447762415, "grad_norm": 0.05228682221906292, "learning_rate": 3.401252767623059e-07, "loss": 0.0022, "step": 54620 }, { "epoch": 0.8938885707273173, "grad_norm": 0.13450242690776676, "learning_rate": 3.390907310021485e-07, "loss": 0.0017, "step": 54630 }, { "epoch": 0.8940521966783932, "grad_norm": 0.01106225381209532, "learning_rate": 3.3805770578121286e-07, "loss": 0.0016, "step": 54640 }, { "epoch": 0.8942158226294691, "grad_norm": 0.11958272649717445, "learning_rate": 3.37026201436505e-07, "loss": 0.0041, "step": 54650 }, { "epoch": 0.8943794485805449, "grad_norm": 0.13981988791123232, "learning_rate": 3.3599621830453623e-07, "loss": 0.002, "step": 54660 }, { "epoch": 0.8945430745316207, "grad_norm": 0.1086043642402669, "learning_rate": 3.3496775672131944e-07, "loss": 0.0019, "step": 54670 }, { "epoch": 0.8947067004826965, "grad_norm": 0.10485065405472645, "learning_rate": 3.339408170223718e-07, "loss": 0.001, "step": 54680 }, { "epoch": 0.8948703264337724, "grad_norm": 0.047607194239284965, "learning_rate": 3.329153995427153e-07, "loss": 0.0017, "step": 54690 }, { "epoch": 0.8950339523848483, "grad_norm": 0.1282441154058051, "learning_rate": 3.31891504616873e-07, "loss": 0.0024, "step": 54700 }, { "epoch": 0.8951975783359241, "grad_norm": 0.1535722953131099, "learning_rate": 3.308691325788732e-07, "loss": 0.0029, "step": 54710 }, { "epoch": 0.895361204287, "grad_norm": 0.17968293282700348, "learning_rate": 3.2984828376224577e-07, "loss": 0.0022, "step": 54720 }, { "epoch": 0.8955248302380757, "grad_norm": 0.05500806083358326, "learning_rate": 3.288289585000265e-07, "loss": 0.0021, "step": 54730 }, { "epoch": 0.8956884561891516, "grad_norm": 0.1499708842516863, "learning_rate": 3.278111571247505e-07, "loss": 0.0016, "step": 54740 }, { "epoch": 0.8958520821402275, "grad_norm": 0.15846802856555633, "learning_rate": 3.267948799684584e-07, "loss": 0.0016, "step": 54750 }, { "epoch": 0.8960157080913033, "grad_norm": 0.06276388343098598, "learning_rate": 3.257801273626926e-07, "loss": 0.0017, "step": 54760 }, { "epoch": 0.8961793340423791, "grad_norm": 0.040890379777446126, "learning_rate": 3.247668996384973e-07, "loss": 0.0014, "step": 54770 }, { "epoch": 0.8963429599934549, "grad_norm": 0.0876444442901716, "learning_rate": 3.237551971264202e-07, "loss": 0.0024, "step": 54780 }, { "epoch": 0.8965065859445308, "grad_norm": 0.18767107501538152, "learning_rate": 3.22745020156513e-07, "loss": 0.0017, "step": 54790 }, { "epoch": 0.8966702118956067, "grad_norm": 0.19251069475252744, "learning_rate": 3.2173636905832693e-07, "loss": 0.0022, "step": 54800 }, { "epoch": 0.8968338378466825, "grad_norm": 0.14086856538327036, "learning_rate": 3.207292441609161e-07, "loss": 0.0019, "step": 54810 }, { "epoch": 0.8969974637977584, "grad_norm": 0.05757627548559175, "learning_rate": 3.197236457928382e-07, "loss": 0.0016, "step": 54820 }, { "epoch": 0.8971610897488341, "grad_norm": 0.030941850603707167, "learning_rate": 3.1871957428215083e-07, "loss": 0.0016, "step": 54830 }, { "epoch": 0.89732471569991, "grad_norm": 0.1727152217216821, "learning_rate": 3.1771702995641474e-07, "loss": 0.0018, "step": 54840 }, { "epoch": 0.8974883416509859, "grad_norm": 0.21024436962336948, "learning_rate": 3.1671601314269385e-07, "loss": 0.0032, "step": 54850 }, { "epoch": 0.8976519676020617, "grad_norm": 0.14867534929409998, "learning_rate": 3.157165241675514e-07, "loss": 0.0035, "step": 54860 }, { "epoch": 0.8978155935531376, "grad_norm": 0.04390733531272599, "learning_rate": 3.14718563357051e-07, "loss": 0.0016, "step": 54870 }, { "epoch": 0.8979792195042133, "grad_norm": 0.21032225030609492, "learning_rate": 3.1372213103676216e-07, "loss": 0.0036, "step": 54880 }, { "epoch": 0.8981428454552892, "grad_norm": 0.2205845380768619, "learning_rate": 3.127272275317522e-07, "loss": 0.0015, "step": 54890 }, { "epoch": 0.8983064714063651, "grad_norm": 0.10830606000662713, "learning_rate": 3.117338531665909e-07, "loss": 0.0014, "step": 54900 }, { "epoch": 0.8984700973574409, "grad_norm": 0.05644491812245202, "learning_rate": 3.107420082653495e-07, "loss": 0.0017, "step": 54910 }, { "epoch": 0.8986337233085168, "grad_norm": 0.10431068297762724, "learning_rate": 3.0975169315160093e-07, "loss": 0.0015, "step": 54920 }, { "epoch": 0.8987973492595925, "grad_norm": 0.16705474976151624, "learning_rate": 3.0876290814841503e-07, "loss": 0.0031, "step": 54930 }, { "epoch": 0.8989609752106684, "grad_norm": 0.37901055382188303, "learning_rate": 3.077756535783677e-07, "loss": 0.0024, "step": 54940 }, { "epoch": 0.8991246011617443, "grad_norm": 0.058447289944995116, "learning_rate": 3.067899297635324e-07, "loss": 0.0029, "step": 54950 }, { "epoch": 0.8992882271128201, "grad_norm": 0.14604681219932686, "learning_rate": 3.0580573702548357e-07, "loss": 0.0009, "step": 54960 }, { "epoch": 0.899451853063896, "grad_norm": 0.2025788601108559, "learning_rate": 3.0482307568529877e-07, "loss": 0.0023, "step": 54970 }, { "epoch": 0.8996154790149717, "grad_norm": 0.174359620200919, "learning_rate": 3.038419460635528e-07, "loss": 0.0024, "step": 54980 }, { "epoch": 0.8997791049660476, "grad_norm": 0.04537432838368517, "learning_rate": 3.028623484803206e-07, "loss": 0.0027, "step": 54990 }, { "epoch": 0.8999427309171234, "grad_norm": 0.0345357890341505, "learning_rate": 3.018842832551799e-07, "loss": 0.0018, "step": 55000 }, { "epoch": 0.9001063568681993, "grad_norm": 0.09801966779350116, "learning_rate": 3.0090775070720724e-07, "loss": 0.0025, "step": 55010 }, { "epoch": 0.9002699828192752, "grad_norm": 0.07649213164184805, "learning_rate": 2.999327511549782e-07, "loss": 0.0038, "step": 55020 }, { "epoch": 0.900433608770351, "grad_norm": 0.1606547782899063, "learning_rate": 2.9895928491656903e-07, "loss": 0.0019, "step": 55030 }, { "epoch": 0.9005972347214268, "grad_norm": 0.08315474526242514, "learning_rate": 2.979873523095583e-07, "loss": 0.0017, "step": 55040 }, { "epoch": 0.9007608606725026, "grad_norm": 0.1619305372853114, "learning_rate": 2.9701695365101926e-07, "loss": 0.0022, "step": 55050 }, { "epoch": 0.9009244866235785, "grad_norm": 0.21429890882802613, "learning_rate": 2.960480892575268e-07, "loss": 0.0026, "step": 55060 }, { "epoch": 0.9010881125746544, "grad_norm": 0.10352649985733839, "learning_rate": 2.9508075944515824e-07, "loss": 0.0019, "step": 55070 }, { "epoch": 0.9012517385257302, "grad_norm": 0.015935293654937756, "learning_rate": 2.9411496452948663e-07, "loss": 0.0048, "step": 55080 }, { "epoch": 0.901415364476806, "grad_norm": 0.15620151700533172, "learning_rate": 2.931507048255849e-07, "loss": 0.0019, "step": 55090 }, { "epoch": 0.9015789904278818, "grad_norm": 0.0705950916497526, "learning_rate": 2.921879806480277e-07, "loss": 0.002, "step": 55100 }, { "epoch": 0.9017426163789577, "grad_norm": 0.10795043150059214, "learning_rate": 2.912267923108847e-07, "loss": 0.0023, "step": 55110 }, { "epoch": 0.9019062423300336, "grad_norm": 0.17197534170231377, "learning_rate": 2.902671401277268e-07, "loss": 0.0024, "step": 55120 }, { "epoch": 0.9020698682811094, "grad_norm": 0.1261312565539737, "learning_rate": 2.8930902441162423e-07, "loss": 0.0021, "step": 55130 }, { "epoch": 0.9022334942321852, "grad_norm": 0.13054427755160755, "learning_rate": 2.883524454751457e-07, "loss": 0.0017, "step": 55140 }, { "epoch": 0.902397120183261, "grad_norm": 0.11592971792779985, "learning_rate": 2.873974036303562e-07, "loss": 0.0016, "step": 55150 }, { "epoch": 0.9025607461343369, "grad_norm": 0.1159608936726887, "learning_rate": 2.8644389918882463e-07, "loss": 0.0023, "step": 55160 }, { "epoch": 0.9027243720854128, "grad_norm": 0.15366642328978197, "learning_rate": 2.8549193246161233e-07, "loss": 0.0018, "step": 55170 }, { "epoch": 0.9028879980364886, "grad_norm": 0.06744822157575542, "learning_rate": 2.845415037592808e-07, "loss": 0.0017, "step": 55180 }, { "epoch": 0.9030516239875644, "grad_norm": 0.020999347694959196, "learning_rate": 2.835926133918937e-07, "loss": 0.0022, "step": 55190 }, { "epoch": 0.9032152499386402, "grad_norm": 0.11219823826524569, "learning_rate": 2.826452616690073e-07, "loss": 0.0021, "step": 55200 }, { "epoch": 0.9033788758897161, "grad_norm": 0.04788470037606798, "learning_rate": 2.816994488996788e-07, "loss": 0.0015, "step": 55210 }, { "epoch": 0.903542501840792, "grad_norm": 0.14119615719003709, "learning_rate": 2.807551753924642e-07, "loss": 0.002, "step": 55220 }, { "epoch": 0.9037061277918678, "grad_norm": 0.45189869322670817, "learning_rate": 2.7981244145541486e-07, "loss": 0.0032, "step": 55230 }, { "epoch": 0.9038697537429436, "grad_norm": 0.06539300542807684, "learning_rate": 2.788712473960803e-07, "loss": 0.0014, "step": 55240 }, { "epoch": 0.9040333796940194, "grad_norm": 0.08359094918823193, "learning_rate": 2.779315935215099e-07, "loss": 0.0029, "step": 55250 }, { "epoch": 0.9041970056450953, "grad_norm": 0.12224832662660219, "learning_rate": 2.7699348013824903e-07, "loss": 0.0026, "step": 55260 }, { "epoch": 0.9043606315961712, "grad_norm": 0.26172504280552056, "learning_rate": 2.7605690755233896e-07, "loss": 0.0024, "step": 55270 }, { "epoch": 0.904524257547247, "grad_norm": 0.09217145101803007, "learning_rate": 2.7512187606932305e-07, "loss": 0.0011, "step": 55280 }, { "epoch": 0.9046878834983229, "grad_norm": 0.026259947656491672, "learning_rate": 2.741883859942357e-07, "loss": 0.0011, "step": 55290 }, { "epoch": 0.9048515094493986, "grad_norm": 0.13618765604554886, "learning_rate": 2.7325643763161213e-07, "loss": 0.0012, "step": 55300 }, { "epoch": 0.9050151354004745, "grad_norm": 0.07239486736134429, "learning_rate": 2.723260312854853e-07, "loss": 0.0026, "step": 55310 }, { "epoch": 0.9051787613515504, "grad_norm": 0.005495350910934415, "learning_rate": 2.7139716725938293e-07, "loss": 0.0028, "step": 55320 }, { "epoch": 0.9053423873026262, "grad_norm": 0.2488829957549035, "learning_rate": 2.704698458563304e-07, "loss": 0.0021, "step": 55330 }, { "epoch": 0.9055060132537021, "grad_norm": 0.21766250197040005, "learning_rate": 2.695440673788513e-07, "loss": 0.0025, "step": 55340 }, { "epoch": 0.9056696392047778, "grad_norm": 0.10953824545046612, "learning_rate": 2.6861983212896236e-07, "loss": 0.0014, "step": 55350 }, { "epoch": 0.9058332651558537, "grad_norm": 0.031958995251045834, "learning_rate": 2.6769714040818073e-07, "loss": 0.001, "step": 55360 }, { "epoch": 0.9059968911069296, "grad_norm": 0.024051180391602693, "learning_rate": 2.667759925175162e-07, "loss": 0.001, "step": 55370 }, { "epoch": 0.9061605170580054, "grad_norm": 0.08799517109715474, "learning_rate": 2.6585638875747885e-07, "loss": 0.0026, "step": 55380 }, { "epoch": 0.9063241430090813, "grad_norm": 0.37663692924875547, "learning_rate": 2.649383294280733e-07, "loss": 0.0018, "step": 55390 }, { "epoch": 0.906487768960157, "grad_norm": 0.1733224251920871, "learning_rate": 2.6402181482879873e-07, "loss": 0.0026, "step": 55400 }, { "epoch": 0.9066513949112329, "grad_norm": 0.033847806687230954, "learning_rate": 2.631068452586533e-07, "loss": 0.0013, "step": 55410 }, { "epoch": 0.9068150208623088, "grad_norm": 0.07988489999548476, "learning_rate": 2.621934210161281e-07, "loss": 0.0009, "step": 55420 }, { "epoch": 0.9069786468133846, "grad_norm": 0.033828729523273586, "learning_rate": 2.612815423992121e-07, "loss": 0.0041, "step": 55430 }, { "epoch": 0.9071422727644605, "grad_norm": 0.14756675761151994, "learning_rate": 2.6037120970539065e-07, "loss": 0.0026, "step": 55440 }, { "epoch": 0.9073058987155362, "grad_norm": 0.012179204387660587, "learning_rate": 2.5946242323164336e-07, "loss": 0.0021, "step": 55450 }, { "epoch": 0.9074695246666121, "grad_norm": 0.05349848219179725, "learning_rate": 2.585551832744448e-07, "loss": 0.0013, "step": 55460 }, { "epoch": 0.907633150617688, "grad_norm": 0.1337655768884975, "learning_rate": 2.576494901297666e-07, "loss": 0.0015, "step": 55470 }, { "epoch": 0.9077967765687638, "grad_norm": 0.09731400949961659, "learning_rate": 2.5674534409307507e-07, "loss": 0.0016, "step": 55480 }, { "epoch": 0.9079604025198397, "grad_norm": 0.09452974825624491, "learning_rate": 2.55842745459331e-07, "loss": 0.0012, "step": 55490 }, { "epoch": 0.9081240284709154, "grad_norm": 0.021427705701072268, "learning_rate": 2.549416945229932e-07, "loss": 0.0024, "step": 55500 }, { "epoch": 0.9082876544219913, "grad_norm": 0.08505538446844334, "learning_rate": 2.5404219157801224e-07, "loss": 0.0033, "step": 55510 }, { "epoch": 0.9084512803730672, "grad_norm": 0.19727355048470685, "learning_rate": 2.5314423691783496e-07, "loss": 0.0015, "step": 55520 }, { "epoch": 0.908614906324143, "grad_norm": 0.20772209724049945, "learning_rate": 2.5224783083540373e-07, "loss": 0.0026, "step": 55530 }, { "epoch": 0.9087785322752189, "grad_norm": 0.24227787227924222, "learning_rate": 2.513529736231551e-07, "loss": 0.0029, "step": 55540 }, { "epoch": 0.9089421582262946, "grad_norm": 0.0835086290877973, "learning_rate": 2.5045966557301957e-07, "loss": 0.0018, "step": 55550 }, { "epoch": 0.9091057841773705, "grad_norm": 0.04785476658194691, "learning_rate": 2.4956790697642396e-07, "loss": 0.002, "step": 55560 }, { "epoch": 0.9092694101284464, "grad_norm": 0.07602260406299748, "learning_rate": 2.486776981242889e-07, "loss": 0.0012, "step": 55570 }, { "epoch": 0.9094330360795222, "grad_norm": 0.19717844371832224, "learning_rate": 2.4778903930702925e-07, "loss": 0.0019, "step": 55580 }, { "epoch": 0.9095966620305981, "grad_norm": 0.03427350533854744, "learning_rate": 2.469019308145537e-07, "loss": 0.0015, "step": 55590 }, { "epoch": 0.9097602879816739, "grad_norm": 0.19813913703631816, "learning_rate": 2.460163729362658e-07, "loss": 0.0028, "step": 55600 }, { "epoch": 0.9099239139327497, "grad_norm": 0.04017201311209813, "learning_rate": 2.451323659610627e-07, "loss": 0.0019, "step": 55610 }, { "epoch": 0.9100875398838256, "grad_norm": 0.05326045818163813, "learning_rate": 2.442499101773377e-07, "loss": 0.0015, "step": 55620 }, { "epoch": 0.9102511658349014, "grad_norm": 0.008843341391014631, "learning_rate": 2.4336900587297485e-07, "loss": 0.0018, "step": 55630 }, { "epoch": 0.9104147917859773, "grad_norm": 0.07799195820569942, "learning_rate": 2.424896533353538e-07, "loss": 0.0013, "step": 55640 }, { "epoch": 0.9105784177370531, "grad_norm": 0.053930016010225734, "learning_rate": 2.416118528513489e-07, "loss": 0.0019, "step": 55650 }, { "epoch": 0.9107420436881289, "grad_norm": 0.03646383683449795, "learning_rate": 2.407356047073256e-07, "loss": 0.0022, "step": 55660 }, { "epoch": 0.9109056696392048, "grad_norm": 0.1308189607527901, "learning_rate": 2.3986090918914515e-07, "loss": 0.0019, "step": 55670 }, { "epoch": 0.9110692955902806, "grad_norm": 0.05017846308077851, "learning_rate": 2.389877665821605e-07, "loss": 0.0018, "step": 55680 }, { "epoch": 0.9112329215413565, "grad_norm": 0.12846589249825804, "learning_rate": 2.3811617717122037e-07, "loss": 0.0033, "step": 55690 }, { "epoch": 0.9113965474924323, "grad_norm": 0.017885047868760363, "learning_rate": 2.3724614124066457e-07, "loss": 0.0009, "step": 55700 }, { "epoch": 0.9115601734435081, "grad_norm": 0.058179899984852686, "learning_rate": 2.3637765907432776e-07, "loss": 0.0012, "step": 55710 }, { "epoch": 0.911723799394584, "grad_norm": 0.08149246882345132, "learning_rate": 2.35510730955536e-07, "loss": 0.001, "step": 55720 }, { "epoch": 0.9118874253456598, "grad_norm": 0.15611638239376435, "learning_rate": 2.3464535716710924e-07, "loss": 0.0013, "step": 55730 }, { "epoch": 0.9120510512967357, "grad_norm": 0.15634849274687765, "learning_rate": 2.337815379913605e-07, "loss": 0.0022, "step": 55740 }, { "epoch": 0.9122146772478115, "grad_norm": 0.07734116577605984, "learning_rate": 2.3291927371009605e-07, "loss": 0.0014, "step": 55750 }, { "epoch": 0.9123783031988874, "grad_norm": 0.06718425911709938, "learning_rate": 2.3205856460461363e-07, "loss": 0.002, "step": 55760 }, { "epoch": 0.9125419291499632, "grad_norm": 0.02078459824712382, "learning_rate": 2.311994109557053e-07, "loss": 0.0011, "step": 55770 }, { "epoch": 0.912705555101039, "grad_norm": 0.15015139647321193, "learning_rate": 2.3034181304365411e-07, "loss": 0.0017, "step": 55780 }, { "epoch": 0.9128691810521149, "grad_norm": 0.034504031704151936, "learning_rate": 2.2948577114823623e-07, "loss": 0.0012, "step": 55790 }, { "epoch": 0.9130328070031907, "grad_norm": 0.18892323840720948, "learning_rate": 2.2863128554871938e-07, "loss": 0.0021, "step": 55800 }, { "epoch": 0.9131964329542666, "grad_norm": 0.05855581031692049, "learning_rate": 2.2777835652386615e-07, "loss": 0.0025, "step": 55810 }, { "epoch": 0.9133600589053424, "grad_norm": 0.06081122465622154, "learning_rate": 2.269269843519284e-07, "loss": 0.0025, "step": 55820 }, { "epoch": 0.9135236848564182, "grad_norm": 0.19891858434444634, "learning_rate": 2.2607716931065171e-07, "loss": 0.0019, "step": 55830 }, { "epoch": 0.9136873108074941, "grad_norm": 0.12957716396190783, "learning_rate": 2.2522891167727267e-07, "loss": 0.0018, "step": 55840 }, { "epoch": 0.9138509367585699, "grad_norm": 0.2873020674713747, "learning_rate": 2.2438221172852105e-07, "loss": 0.0033, "step": 55850 }, { "epoch": 0.9140145627096458, "grad_norm": 0.18801450198741834, "learning_rate": 2.235370697406164e-07, "loss": 0.0024, "step": 55860 }, { "epoch": 0.9141781886607215, "grad_norm": 0.655952568273636, "learning_rate": 2.2269348598927266e-07, "loss": 0.0018, "step": 55870 }, { "epoch": 0.9143418146117974, "grad_norm": 0.10980673319771161, "learning_rate": 2.218514607496941e-07, "loss": 0.002, "step": 55880 }, { "epoch": 0.9145054405628733, "grad_norm": 0.16711795285296266, "learning_rate": 2.21010994296576e-07, "loss": 0.0019, "step": 55890 }, { "epoch": 0.9146690665139491, "grad_norm": 0.10836437644539305, "learning_rate": 2.201720869041063e-07, "loss": 0.0017, "step": 55900 }, { "epoch": 0.914832692465025, "grad_norm": 0.031611988333894246, "learning_rate": 2.1933473884596267e-07, "loss": 0.0027, "step": 55910 }, { "epoch": 0.9149963184161007, "grad_norm": 0.13354100904584598, "learning_rate": 2.1849895039531555e-07, "loss": 0.0017, "step": 55920 }, { "epoch": 0.9151599443671766, "grad_norm": 0.1121473421304103, "learning_rate": 2.1766472182482624e-07, "loss": 0.0013, "step": 55930 }, { "epoch": 0.9153235703182525, "grad_norm": 0.07901031108362461, "learning_rate": 2.168320534066476e-07, "loss": 0.001, "step": 55940 }, { "epoch": 0.9154871962693283, "grad_norm": 0.12346570943725273, "learning_rate": 2.1600094541242234e-07, "loss": 0.0021, "step": 55950 }, { "epoch": 0.9156508222204042, "grad_norm": 0.07418633428329167, "learning_rate": 2.1517139811328523e-07, "loss": 0.0033, "step": 55960 }, { "epoch": 0.91581444817148, "grad_norm": 0.29340279984858275, "learning_rate": 2.1434341177986094e-07, "loss": 0.0026, "step": 55970 }, { "epoch": 0.9159780741225558, "grad_norm": 0.17036844895899028, "learning_rate": 2.135169866822645e-07, "loss": 0.0034, "step": 55980 }, { "epoch": 0.9161417000736317, "grad_norm": 0.12099077881816002, "learning_rate": 2.1269212309010411e-07, "loss": 0.0029, "step": 55990 }, { "epoch": 0.9163053260247075, "grad_norm": 0.05223029540291728, "learning_rate": 2.1186882127247678e-07, "loss": 0.0014, "step": 56000 }, { "epoch": 0.9164689519757834, "grad_norm": 0.31384412561880726, "learning_rate": 2.1104708149796982e-07, "loss": 0.003, "step": 56010 }, { "epoch": 0.9166325779268591, "grad_norm": 0.0747577346650278, "learning_rate": 2.1022690403465994e-07, "loss": 0.0028, "step": 56020 }, { "epoch": 0.916796203877935, "grad_norm": 0.0721854685751019, "learning_rate": 2.0940828915011757e-07, "loss": 0.0013, "step": 56030 }, { "epoch": 0.9169598298290109, "grad_norm": 0.11573605420316137, "learning_rate": 2.0859123711140072e-07, "loss": 0.0038, "step": 56040 }, { "epoch": 0.9171234557800867, "grad_norm": 0.14292415946337345, "learning_rate": 2.077757481850573e-07, "loss": 0.0022, "step": 56050 }, { "epoch": 0.9172870817311626, "grad_norm": 0.1182264424909099, "learning_rate": 2.0696182263712726e-07, "loss": 0.0024, "step": 56060 }, { "epoch": 0.9174507076822384, "grad_norm": 0.12415764963071088, "learning_rate": 2.0614946073313934e-07, "loss": 0.0025, "step": 56070 }, { "epoch": 0.9176143336333142, "grad_norm": 0.2281540260842942, "learning_rate": 2.0533866273811097e-07, "loss": 0.0017, "step": 56080 }, { "epoch": 0.9177779595843901, "grad_norm": 0.19525872256303528, "learning_rate": 2.0452942891655226e-07, "loss": 0.0021, "step": 56090 }, { "epoch": 0.9179415855354659, "grad_norm": 0.11312533920422914, "learning_rate": 2.0372175953246032e-07, "loss": 0.0023, "step": 56100 }, { "epoch": 0.9181052114865418, "grad_norm": 0.05861596001143664, "learning_rate": 2.029156548493233e-07, "loss": 0.003, "step": 56110 }, { "epoch": 0.9182688374376176, "grad_norm": 0.1453170418948343, "learning_rate": 2.0211111513011916e-07, "loss": 0.0018, "step": 56120 }, { "epoch": 0.9184324633886934, "grad_norm": 0.31667349994033256, "learning_rate": 2.0130814063731463e-07, "loss": 0.003, "step": 56130 }, { "epoch": 0.9185960893397693, "grad_norm": 0.04255089892916259, "learning_rate": 2.005067316328646e-07, "loss": 0.0016, "step": 56140 }, { "epoch": 0.9187597152908451, "grad_norm": 0.1025138155260419, "learning_rate": 1.9970688837821662e-07, "loss": 0.001, "step": 56150 }, { "epoch": 0.918923341241921, "grad_norm": 0.1664173343061674, "learning_rate": 1.989086111343036e-07, "loss": 0.0013, "step": 56160 }, { "epoch": 0.9190869671929968, "grad_norm": 0.0966479358239818, "learning_rate": 1.9811190016155002e-07, "loss": 0.0013, "step": 56170 }, { "epoch": 0.9192505931440726, "grad_norm": 0.16063982165806956, "learning_rate": 1.973167557198702e-07, "loss": 0.0024, "step": 56180 }, { "epoch": 0.9194142190951485, "grad_norm": 0.041822309286628036, "learning_rate": 1.9652317806866383e-07, "loss": 0.0013, "step": 56190 }, { "epoch": 0.9195778450462243, "grad_norm": 0.07067927088531724, "learning_rate": 1.957311674668222e-07, "loss": 0.0014, "step": 56200 }, { "epoch": 0.9197414709973002, "grad_norm": 0.032332041213308044, "learning_rate": 1.9494072417272524e-07, "loss": 0.0023, "step": 56210 }, { "epoch": 0.919905096948376, "grad_norm": 0.11884091322439831, "learning_rate": 1.9415184844424063e-07, "loss": 0.0026, "step": 56220 }, { "epoch": 0.9200687228994519, "grad_norm": 0.2499504673842196, "learning_rate": 1.9336454053872522e-07, "loss": 0.002, "step": 56230 }, { "epoch": 0.9202323488505277, "grad_norm": 0.05339050055431217, "learning_rate": 1.9257880071302525e-07, "loss": 0.0014, "step": 56240 }, { "epoch": 0.9203959748016035, "grad_norm": 0.13663361761044288, "learning_rate": 1.9179462922347292e-07, "loss": 0.0017, "step": 56250 }, { "epoch": 0.9205596007526794, "grad_norm": 0.07428785140305373, "learning_rate": 1.9101202632589077e-07, "loss": 0.0012, "step": 56260 }, { "epoch": 0.9207232267037552, "grad_norm": 0.3614251728057188, "learning_rate": 1.902309922755896e-07, "loss": 0.0027, "step": 56270 }, { "epoch": 0.9208868526548311, "grad_norm": 0.11004066929447891, "learning_rate": 1.8945152732736783e-07, "loss": 0.0017, "step": 56280 }, { "epoch": 0.9210504786059069, "grad_norm": 0.09062584213277752, "learning_rate": 1.886736317355109e-07, "loss": 0.002, "step": 56290 }, { "epoch": 0.9212141045569827, "grad_norm": 0.17483758213472744, "learning_rate": 1.8789730575379638e-07, "loss": 0.0019, "step": 56300 }, { "epoch": 0.9213777305080586, "grad_norm": 0.09132705281069708, "learning_rate": 1.871225496354845e-07, "loss": 0.0015, "step": 56310 }, { "epoch": 0.9215413564591344, "grad_norm": 0.15235146319019063, "learning_rate": 1.8634936363332523e-07, "loss": 0.0026, "step": 56320 }, { "epoch": 0.9217049824102103, "grad_norm": 0.07322562758897258, "learning_rate": 1.8557774799955907e-07, "loss": 0.0011, "step": 56330 }, { "epoch": 0.9218686083612861, "grad_norm": 0.30773583146583877, "learning_rate": 1.8480770298591078e-07, "loss": 0.002, "step": 56340 }, { "epoch": 0.9220322343123619, "grad_norm": 0.16329344491213882, "learning_rate": 1.840392288435938e-07, "loss": 0.0014, "step": 56350 }, { "epoch": 0.9221958602634378, "grad_norm": 0.3489466202091755, "learning_rate": 1.8327232582330934e-07, "loss": 0.0021, "step": 56360 }, { "epoch": 0.9223594862145136, "grad_norm": 0.08177529706484211, "learning_rate": 1.8250699417524666e-07, "loss": 0.0028, "step": 56370 }, { "epoch": 0.9225231121655895, "grad_norm": 0.18051426226661335, "learning_rate": 1.8174323414908046e-07, "loss": 0.0017, "step": 56380 }, { "epoch": 0.9226867381166654, "grad_norm": 0.15806451601488578, "learning_rate": 1.809810459939737e-07, "loss": 0.0021, "step": 56390 }, { "epoch": 0.9228503640677411, "grad_norm": 0.0872565287801616, "learning_rate": 1.8022042995857859e-07, "loss": 0.0017, "step": 56400 }, { "epoch": 0.923013990018817, "grad_norm": 0.12068942960599625, "learning_rate": 1.794613862910316e-07, "loss": 0.0012, "step": 56410 }, { "epoch": 0.9231776159698928, "grad_norm": 0.06756677647445036, "learning_rate": 1.7870391523895746e-07, "loss": 0.0013, "step": 56420 }, { "epoch": 0.9233412419209687, "grad_norm": 0.13204465865906662, "learning_rate": 1.7794801704946795e-07, "loss": 0.0012, "step": 56430 }, { "epoch": 0.9235048678720446, "grad_norm": 0.2603801495279046, "learning_rate": 1.7719369196916081e-07, "loss": 0.0027, "step": 56440 }, { "epoch": 0.9236684938231203, "grad_norm": 0.1140901751491441, "learning_rate": 1.7644094024412083e-07, "loss": 0.0014, "step": 56450 }, { "epoch": 0.9238321197741962, "grad_norm": 0.12178043751776504, "learning_rate": 1.7568976211992162e-07, "loss": 0.0014, "step": 56460 }, { "epoch": 0.923995745725272, "grad_norm": 0.051216740076593374, "learning_rate": 1.7494015784162045e-07, "loss": 0.0018, "step": 56470 }, { "epoch": 0.9241593716763479, "grad_norm": 0.13807739409114403, "learning_rate": 1.7419212765376336e-07, "loss": 0.003, "step": 56480 }, { "epoch": 0.9243229976274238, "grad_norm": 0.15391130662494193, "learning_rate": 1.734456718003813e-07, "loss": 0.0027, "step": 56490 }, { "epoch": 0.9244866235784995, "grad_norm": 0.03434975143523334, "learning_rate": 1.7270079052499278e-07, "loss": 0.0018, "step": 56500 }, { "epoch": 0.9246502495295754, "grad_norm": 0.00658772233653941, "learning_rate": 1.7195748407060064e-07, "loss": 0.0012, "step": 56510 }, { "epoch": 0.9248138754806512, "grad_norm": 0.2947282495327704, "learning_rate": 1.7121575267969758e-07, "loss": 0.0016, "step": 56520 }, { "epoch": 0.9249775014317271, "grad_norm": 0.14051321186154042, "learning_rate": 1.7047559659425948e-07, "loss": 0.0022, "step": 56530 }, { "epoch": 0.925141127382803, "grad_norm": 0.17219877969543573, "learning_rate": 1.6973701605574934e-07, "loss": 0.0015, "step": 56540 }, { "epoch": 0.9253047533338787, "grad_norm": 0.05393988161970387, "learning_rate": 1.6900001130511546e-07, "loss": 0.0022, "step": 56550 }, { "epoch": 0.9254683792849546, "grad_norm": 0.08722251094028792, "learning_rate": 1.6826458258279331e-07, "loss": 0.0013, "step": 56560 }, { "epoch": 0.9256320052360304, "grad_norm": 0.17524374493945982, "learning_rate": 1.6753073012870212e-07, "loss": 0.002, "step": 56570 }, { "epoch": 0.9257956311871063, "grad_norm": 0.11825250771653613, "learning_rate": 1.6679845418225038e-07, "loss": 0.0024, "step": 56580 }, { "epoch": 0.9259592571381822, "grad_norm": 0.1937679049022653, "learning_rate": 1.6606775498232863e-07, "loss": 0.0019, "step": 56590 }, { "epoch": 0.9261228830892579, "grad_norm": 0.3087846680899814, "learning_rate": 1.653386327673151e-07, "loss": 0.0027, "step": 56600 }, { "epoch": 0.9262865090403338, "grad_norm": 0.06636982167504778, "learning_rate": 1.6461108777507285e-07, "loss": 0.0022, "step": 56610 }, { "epoch": 0.9264501349914096, "grad_norm": 0.11071169781810984, "learning_rate": 1.638851202429509e-07, "loss": 0.002, "step": 56620 }, { "epoch": 0.9266137609424855, "grad_norm": 0.08030151167553365, "learning_rate": 1.6316073040778203e-07, "loss": 0.0014, "step": 56630 }, { "epoch": 0.9267773868935614, "grad_norm": 0.01085950884936691, "learning_rate": 1.6243791850588774e-07, "loss": 0.0027, "step": 56640 }, { "epoch": 0.9269410128446371, "grad_norm": 0.15374391783959676, "learning_rate": 1.6171668477307113e-07, "loss": 0.0025, "step": 56650 }, { "epoch": 0.927104638795713, "grad_norm": 0.14697337043700706, "learning_rate": 1.609970294446228e-07, "loss": 0.0016, "step": 56660 }, { "epoch": 0.9272682647467888, "grad_norm": 0.0920212191121231, "learning_rate": 1.6027895275531668e-07, "loss": 0.0015, "step": 56670 }, { "epoch": 0.9274318906978647, "grad_norm": 0.15788819854947958, "learning_rate": 1.5956245493941313e-07, "loss": 0.0016, "step": 56680 }, { "epoch": 0.9275955166489406, "grad_norm": 0.037872452092925085, "learning_rate": 1.5884753623065684e-07, "loss": 0.001, "step": 56690 }, { "epoch": 0.9277591426000164, "grad_norm": 0.1098913508461192, "learning_rate": 1.5813419686227682e-07, "loss": 0.0017, "step": 56700 }, { "epoch": 0.9279227685510922, "grad_norm": 0.10799060137985783, "learning_rate": 1.5742243706698857e-07, "loss": 0.0018, "step": 56710 }, { "epoch": 0.928086394502168, "grad_norm": 0.17065968094713618, "learning_rate": 1.5671225707699078e-07, "loss": 0.0024, "step": 56720 }, { "epoch": 0.9282500204532439, "grad_norm": 0.08845286044398096, "learning_rate": 1.5600365712396647e-07, "loss": 0.0031, "step": 56730 }, { "epoch": 0.9284136464043197, "grad_norm": 0.18631657208935526, "learning_rate": 1.5529663743908406e-07, "loss": 0.0015, "step": 56740 }, { "epoch": 0.9285772723553956, "grad_norm": 0.2756028334416573, "learning_rate": 1.5459119825299684e-07, "loss": 0.0015, "step": 56750 }, { "epoch": 0.9287408983064714, "grad_norm": 0.11596582185376654, "learning_rate": 1.538873397958407e-07, "loss": 0.0025, "step": 56760 }, { "epoch": 0.9289045242575472, "grad_norm": 0.2733233755138023, "learning_rate": 1.5318506229723862e-07, "loss": 0.0031, "step": 56770 }, { "epoch": 0.9290681502086231, "grad_norm": 0.06742790971744204, "learning_rate": 1.5248436598629457e-07, "loss": 0.0012, "step": 56780 }, { "epoch": 0.9292317761596989, "grad_norm": 0.05183414729130159, "learning_rate": 1.517852510915996e-07, "loss": 0.0018, "step": 56790 }, { "epoch": 0.9293954021107748, "grad_norm": 0.24072429414971452, "learning_rate": 1.5108771784122735e-07, "loss": 0.0022, "step": 56800 }, { "epoch": 0.9295590280618506, "grad_norm": 0.1832841627061076, "learning_rate": 1.5039176646273467e-07, "loss": 0.0019, "step": 56810 }, { "epoch": 0.9297226540129264, "grad_norm": 0.17816897464389503, "learning_rate": 1.4969739718316445e-07, "loss": 0.0017, "step": 56820 }, { "epoch": 0.9298862799640023, "grad_norm": 0.17091305679947827, "learning_rate": 1.4900461022904212e-07, "loss": 0.0013, "step": 56830 }, { "epoch": 0.9300499059150781, "grad_norm": 0.1245130229761237, "learning_rate": 1.4831340582637688e-07, "loss": 0.002, "step": 56840 }, { "epoch": 0.930213531866154, "grad_norm": 0.14531875019943838, "learning_rate": 1.476237842006628e-07, "loss": 0.0014, "step": 56850 }, { "epoch": 0.9303771578172298, "grad_norm": 0.12145816635339683, "learning_rate": 1.469357455768755e-07, "loss": 0.0028, "step": 56860 }, { "epoch": 0.9305407837683056, "grad_norm": 0.04004686636789763, "learning_rate": 1.4624929017947654e-07, "loss": 0.001, "step": 56870 }, { "epoch": 0.9307044097193815, "grad_norm": 0.029071426448493906, "learning_rate": 1.4556441823240895e-07, "loss": 0.0013, "step": 56880 }, { "epoch": 0.9308680356704573, "grad_norm": 0.06711716197325754, "learning_rate": 1.4488112995910076e-07, "loss": 0.0023, "step": 56890 }, { "epoch": 0.9310316616215332, "grad_norm": 0.035698899055916326, "learning_rate": 1.4419942558246248e-07, "loss": 0.0007, "step": 56900 }, { "epoch": 0.931195287572609, "grad_norm": 0.13272245030738503, "learning_rate": 1.435193053248879e-07, "loss": 0.0023, "step": 56910 }, { "epoch": 0.9313589135236848, "grad_norm": 0.078569201300077, "learning_rate": 1.4284076940825454e-07, "loss": 0.0026, "step": 56920 }, { "epoch": 0.9315225394747607, "grad_norm": 0.1980841259891301, "learning_rate": 1.4216381805392254e-07, "loss": 0.0016, "step": 56930 }, { "epoch": 0.9316861654258365, "grad_norm": 0.26067245295665536, "learning_rate": 1.4148845148273472e-07, "loss": 0.0023, "step": 56940 }, { "epoch": 0.9318497913769124, "grad_norm": 0.1065616297874259, "learning_rate": 1.408146699150187e-07, "loss": 0.0017, "step": 56950 }, { "epoch": 0.9320134173279883, "grad_norm": 0.11164504871003998, "learning_rate": 1.4014247357058363e-07, "loss": 0.0025, "step": 56960 }, { "epoch": 0.932177043279064, "grad_norm": 0.09350048848984986, "learning_rate": 1.394718626687208e-07, "loss": 0.0024, "step": 56970 }, { "epoch": 0.9323406692301399, "grad_norm": 0.03791663160604899, "learning_rate": 1.3880283742820577e-07, "loss": 0.0009, "step": 56980 }, { "epoch": 0.9325042951812157, "grad_norm": 0.10606463926942465, "learning_rate": 1.3813539806729558e-07, "loss": 0.0026, "step": 56990 }, { "epoch": 0.9326679211322916, "grad_norm": 0.06524063400901291, "learning_rate": 1.3746954480373166e-07, "loss": 0.0018, "step": 57000 }, { "epoch": 0.9328315470833675, "grad_norm": 0.035178293335618324, "learning_rate": 1.3680527785473574e-07, "loss": 0.0025, "step": 57010 }, { "epoch": 0.9329951730344432, "grad_norm": 0.07009348355695444, "learning_rate": 1.361425974370134e-07, "loss": 0.0016, "step": 57020 }, { "epoch": 0.9331587989855191, "grad_norm": 0.14075558565600693, "learning_rate": 1.3548150376675396e-07, "loss": 0.0015, "step": 57030 }, { "epoch": 0.9333224249365949, "grad_norm": 0.08762978664546711, "learning_rate": 1.348219970596243e-07, "loss": 0.0014, "step": 57040 }, { "epoch": 0.9334860508876708, "grad_norm": 0.06341772653663495, "learning_rate": 1.341640775307801e-07, "loss": 0.0032, "step": 57050 }, { "epoch": 0.9336496768387467, "grad_norm": 0.19012312932323627, "learning_rate": 1.335077453948541e-07, "loss": 0.0032, "step": 57060 }, { "epoch": 0.9338133027898224, "grad_norm": 0.20454370362838475, "learning_rate": 1.3285300086596287e-07, "loss": 0.0018, "step": 57070 }, { "epoch": 0.9339769287408983, "grad_norm": 0.015959309125952014, "learning_rate": 1.321998441577066e-07, "loss": 0.0019, "step": 57080 }, { "epoch": 0.9341405546919741, "grad_norm": 0.05012942195603012, "learning_rate": 1.3154827548316596e-07, "loss": 0.0024, "step": 57090 }, { "epoch": 0.93430418064305, "grad_norm": 0.0740420141950303, "learning_rate": 1.3089829505490258e-07, "loss": 0.0022, "step": 57100 }, { "epoch": 0.9344678065941259, "grad_norm": 0.1300506971368136, "learning_rate": 1.3024990308496133e-07, "loss": 0.0024, "step": 57110 }, { "epoch": 0.9346314325452016, "grad_norm": 0.1279669067745699, "learning_rate": 1.296030997848696e-07, "loss": 0.0028, "step": 57120 }, { "epoch": 0.9347950584962775, "grad_norm": 0.11357931591916387, "learning_rate": 1.2895788536563425e-07, "loss": 0.0022, "step": 57130 }, { "epoch": 0.9349586844473533, "grad_norm": 0.0730682195625526, "learning_rate": 1.2831426003774629e-07, "loss": 0.0015, "step": 57140 }, { "epoch": 0.9351223103984292, "grad_norm": 0.07298688772885675, "learning_rate": 1.2767222401117662e-07, "loss": 0.002, "step": 57150 }, { "epoch": 0.9352859363495051, "grad_norm": 0.43135653167278326, "learning_rate": 1.270317774953772e-07, "loss": 0.0027, "step": 57160 }, { "epoch": 0.9354495623005809, "grad_norm": 0.20469288391008236, "learning_rate": 1.263929206992842e-07, "loss": 0.002, "step": 57170 }, { "epoch": 0.9356131882516567, "grad_norm": 0.11269816826346526, "learning_rate": 1.2575565383131206e-07, "loss": 0.0014, "step": 57180 }, { "epoch": 0.9357768142027325, "grad_norm": 0.10954650216225208, "learning_rate": 1.251199770993572e-07, "loss": 0.0014, "step": 57190 }, { "epoch": 0.9359404401538084, "grad_norm": 0.0659310650420113, "learning_rate": 1.2448589071079987e-07, "loss": 0.0021, "step": 57200 }, { "epoch": 0.9361040661048843, "grad_norm": 0.07375968158490528, "learning_rate": 1.2385339487249904e-07, "loss": 0.0016, "step": 57210 }, { "epoch": 0.93626769205596, "grad_norm": 0.02882472831203947, "learning_rate": 1.2322248979079355e-07, "loss": 0.0015, "step": 57220 }, { "epoch": 0.9364313180070359, "grad_norm": 0.1522293991094588, "learning_rate": 1.2259317567150654e-07, "loss": 0.0019, "step": 57230 }, { "epoch": 0.9365949439581117, "grad_norm": 0.1616738125606185, "learning_rate": 1.21965452719941e-07, "loss": 0.002, "step": 57240 }, { "epoch": 0.9367585699091876, "grad_norm": 0.1369339319940151, "learning_rate": 1.2133932114087865e-07, "loss": 0.002, "step": 57250 }, { "epoch": 0.9369221958602635, "grad_norm": 0.2101787684253629, "learning_rate": 1.2071478113858558e-07, "loss": 0.0025, "step": 57260 }, { "epoch": 0.9370858218113393, "grad_norm": 0.3025341491122814, "learning_rate": 1.2009183291680714e-07, "loss": 0.0033, "step": 57270 }, { "epoch": 0.9372494477624151, "grad_norm": 0.19748354962634784, "learning_rate": 1.194704766787669e-07, "loss": 0.0016, "step": 57280 }, { "epoch": 0.9374130737134909, "grad_norm": 0.10334510332518691, "learning_rate": 1.1885071262717318e-07, "loss": 0.0013, "step": 57290 }, { "epoch": 0.9375766996645668, "grad_norm": 0.2849642905969645, "learning_rate": 1.1823254096421322e-07, "loss": 0.0015, "step": 57300 }, { "epoch": 0.9377403256156427, "grad_norm": 0.098187221295278, "learning_rate": 1.1761596189155288e-07, "loss": 0.0023, "step": 57310 }, { "epoch": 0.9379039515667185, "grad_norm": 0.08360113878202924, "learning_rate": 1.1700097561034241e-07, "loss": 0.0017, "step": 57320 }, { "epoch": 0.9380675775177943, "grad_norm": 0.14315025435316453, "learning_rate": 1.1638758232120906e-07, "loss": 0.0011, "step": 57330 }, { "epoch": 0.9382312034688701, "grad_norm": 0.06717658369038658, "learning_rate": 1.1577578222426167e-07, "loss": 0.0026, "step": 57340 }, { "epoch": 0.938394829419946, "grad_norm": 0.10961696660116207, "learning_rate": 1.151655755190878e-07, "loss": 0.0016, "step": 57350 }, { "epoch": 0.9385584553710219, "grad_norm": 0.061201656868999824, "learning_rate": 1.1455696240475933e-07, "loss": 0.0018, "step": 57360 }, { "epoch": 0.9387220813220977, "grad_norm": 0.15769008040150126, "learning_rate": 1.1394994307982354e-07, "loss": 0.0013, "step": 57370 }, { "epoch": 0.9388857072731736, "grad_norm": 0.11343356284083142, "learning_rate": 1.1334451774230981e-07, "loss": 0.0021, "step": 57380 }, { "epoch": 0.9390493332242493, "grad_norm": 0.01264929846501807, "learning_rate": 1.1274068658972904e-07, "loss": 0.0014, "step": 57390 }, { "epoch": 0.9392129591753252, "grad_norm": 0.059587544571843856, "learning_rate": 1.1213844981906918e-07, "loss": 0.0015, "step": 57400 }, { "epoch": 0.9393765851264011, "grad_norm": 0.15512595960570466, "learning_rate": 1.1153780762679867e-07, "loss": 0.0021, "step": 57410 }, { "epoch": 0.9395402110774769, "grad_norm": 0.3498625192855743, "learning_rate": 1.1093876020886796e-07, "loss": 0.0019, "step": 57420 }, { "epoch": 0.9397038370285528, "grad_norm": 0.26820861576025584, "learning_rate": 1.1034130776070517e-07, "loss": 0.0021, "step": 57430 }, { "epoch": 0.9398674629796285, "grad_norm": 0.26564742324063867, "learning_rate": 1.0974545047721774e-07, "loss": 0.0019, "step": 57440 }, { "epoch": 0.9400310889307044, "grad_norm": 0.14831202440308094, "learning_rate": 1.091511885527946e-07, "loss": 0.0027, "step": 57450 }, { "epoch": 0.9401947148817803, "grad_norm": 0.1467088339922101, "learning_rate": 1.0855852218130292e-07, "loss": 0.002, "step": 57460 }, { "epoch": 0.9403583408328561, "grad_norm": 0.22317499697435758, "learning_rate": 1.0796745155608912e-07, "loss": 0.0046, "step": 57470 }, { "epoch": 0.940521966783932, "grad_norm": 0.3008313420863906, "learning_rate": 1.0737797686998063e-07, "loss": 0.0033, "step": 57480 }, { "epoch": 0.9406855927350077, "grad_norm": 0.10023575467983976, "learning_rate": 1.0679009831528197e-07, "loss": 0.0012, "step": 57490 }, { "epoch": 0.9408492186860836, "grad_norm": 0.1560097083887483, "learning_rate": 1.0620381608377861e-07, "loss": 0.0024, "step": 57500 }, { "epoch": 0.9410128446371595, "grad_norm": 0.14623449655897894, "learning_rate": 1.0561913036673532e-07, "loss": 0.0016, "step": 57510 }, { "epoch": 0.9411764705882353, "grad_norm": 0.1746786265840518, "learning_rate": 1.0503604135489454e-07, "loss": 0.002, "step": 57520 }, { "epoch": 0.9413400965393112, "grad_norm": 0.19754127043718617, "learning_rate": 1.0445454923847854e-07, "loss": 0.0009, "step": 57530 }, { "epoch": 0.9415037224903869, "grad_norm": 0.10687056682983168, "learning_rate": 1.0387465420719e-07, "loss": 0.0041, "step": 57540 }, { "epoch": 0.9416673484414628, "grad_norm": 0.07158143132011872, "learning_rate": 1.0329635645020874e-07, "loss": 0.0022, "step": 57550 }, { "epoch": 0.9418309743925387, "grad_norm": 0.3643703200421384, "learning_rate": 1.0271965615619439e-07, "loss": 0.0024, "step": 57560 }, { "epoch": 0.9419946003436145, "grad_norm": 0.16318978961743827, "learning_rate": 1.0214455351328534e-07, "loss": 0.0021, "step": 57570 }, { "epoch": 0.9421582262946904, "grad_norm": 0.38479551199978956, "learning_rate": 1.0157104870909818e-07, "loss": 0.0027, "step": 57580 }, { "epoch": 0.9423218522457661, "grad_norm": 0.0439078938311911, "learning_rate": 1.009991419307288e-07, "loss": 0.0016, "step": 57590 }, { "epoch": 0.942485478196842, "grad_norm": 0.07117097248671124, "learning_rate": 1.0042883336475184e-07, "loss": 0.0021, "step": 57600 }, { "epoch": 0.9426491041479178, "grad_norm": 0.03976758142853129, "learning_rate": 9.986012319722072e-08, "loss": 0.0009, "step": 57610 }, { "epoch": 0.9428127300989937, "grad_norm": 0.21025729793978354, "learning_rate": 9.929301161366645e-08, "loss": 0.002, "step": 57620 }, { "epoch": 0.9429763560500696, "grad_norm": 0.08384339552263549, "learning_rate": 9.872749879909938e-08, "loss": 0.0021, "step": 57630 }, { "epoch": 0.9431399820011453, "grad_norm": 0.00995640040133507, "learning_rate": 9.816358493800859e-08, "loss": 0.0017, "step": 57640 }, { "epoch": 0.9433036079522212, "grad_norm": 0.16673134761115105, "learning_rate": 9.760127021436028e-08, "loss": 0.0018, "step": 57650 }, { "epoch": 0.943467233903297, "grad_norm": 0.15131448777040565, "learning_rate": 9.704055481159935e-08, "loss": 0.0019, "step": 57660 }, { "epoch": 0.9436308598543729, "grad_norm": 0.17648385860033064, "learning_rate": 9.648143891265061e-08, "loss": 0.0016, "step": 57670 }, { "epoch": 0.9437944858054488, "grad_norm": 0.11145648299233683, "learning_rate": 9.59239226999148e-08, "loss": 0.0092, "step": 57680 }, { "epoch": 0.9439581117565246, "grad_norm": 0.05464597136457868, "learning_rate": 9.536800635527199e-08, "loss": 0.0015, "step": 57690 }, { "epoch": 0.9441217377076004, "grad_norm": 0.0852903111123607, "learning_rate": 9.481369006007989e-08, "loss": 0.0017, "step": 57700 }, { "epoch": 0.9442853636586762, "grad_norm": 0.28332547458117857, "learning_rate": 9.426097399517442e-08, "loss": 0.0021, "step": 57710 }, { "epoch": 0.9444489896097521, "grad_norm": 0.07890118215253496, "learning_rate": 9.370985834086965e-08, "loss": 0.0033, "step": 57720 }, { "epoch": 0.944612615560828, "grad_norm": 0.11731707107158314, "learning_rate": 9.316034327695678e-08, "loss": 0.0018, "step": 57730 }, { "epoch": 0.9447762415119038, "grad_norm": 0.10439705564443469, "learning_rate": 9.261242898270628e-08, "loss": 0.0015, "step": 57740 }, { "epoch": 0.9449398674629796, "grad_norm": 0.2118576170435444, "learning_rate": 9.206611563686462e-08, "loss": 0.002, "step": 57750 }, { "epoch": 0.9451034934140554, "grad_norm": 0.23106391372005236, "learning_rate": 9.152140341765758e-08, "loss": 0.0032, "step": 57760 }, { "epoch": 0.9452671193651313, "grad_norm": 0.13841575980113488, "learning_rate": 9.097829250278745e-08, "loss": 0.0026, "step": 57770 }, { "epoch": 0.9454307453162072, "grad_norm": 0.24185666531752786, "learning_rate": 9.043678306943415e-08, "loss": 0.0021, "step": 57780 }, { "epoch": 0.945594371267283, "grad_norm": 0.04672540598081172, "learning_rate": 8.98968752942564e-08, "loss": 0.0012, "step": 57790 }, { "epoch": 0.9457579972183588, "grad_norm": 0.059538991817699075, "learning_rate": 8.935856935338938e-08, "loss": 0.0013, "step": 57800 }, { "epoch": 0.9459216231694346, "grad_norm": 0.3392578938019102, "learning_rate": 8.882186542244541e-08, "loss": 0.0031, "step": 57810 }, { "epoch": 0.9460852491205105, "grad_norm": 0.31083706253497007, "learning_rate": 8.828676367651557e-08, "loss": 0.0013, "step": 57820 }, { "epoch": 0.9462488750715864, "grad_norm": 0.1860213339422809, "learning_rate": 8.775326429016629e-08, "loss": 0.0016, "step": 57830 }, { "epoch": 0.9464125010226622, "grad_norm": 0.1705466382144579, "learning_rate": 8.722136743744226e-08, "loss": 0.0023, "step": 57840 }, { "epoch": 0.946576126973738, "grad_norm": 0.07463009332992492, "learning_rate": 8.669107329186632e-08, "loss": 0.002, "step": 57850 }, { "epoch": 0.9467397529248138, "grad_norm": 0.07838546247182297, "learning_rate": 8.616238202643734e-08, "loss": 0.0021, "step": 57860 }, { "epoch": 0.9469033788758897, "grad_norm": 0.17287996555879356, "learning_rate": 8.563529381363122e-08, "loss": 0.0019, "step": 57870 }, { "epoch": 0.9470670048269656, "grad_norm": 0.07361842165655666, "learning_rate": 8.510980882540154e-08, "loss": 0.0034, "step": 57880 }, { "epoch": 0.9472306307780414, "grad_norm": 0.11886692029617128, "learning_rate": 8.458592723317782e-08, "loss": 0.0017, "step": 57890 }, { "epoch": 0.9473942567291173, "grad_norm": 0.10832935220816493, "learning_rate": 8.406364920786725e-08, "loss": 0.001, "step": 57900 }, { "epoch": 0.947557882680193, "grad_norm": 0.13630495427533593, "learning_rate": 8.354297491985464e-08, "loss": 0.0035, "step": 57910 }, { "epoch": 0.9477215086312689, "grad_norm": 0.07950426101852105, "learning_rate": 8.302390453900022e-08, "loss": 0.0025, "step": 57920 }, { "epoch": 0.9478851345823448, "grad_norm": 0.1487496874647099, "learning_rate": 8.250643823464133e-08, "loss": 0.0019, "step": 57930 }, { "epoch": 0.9480487605334206, "grad_norm": 0.020843188758355054, "learning_rate": 8.199057617559236e-08, "loss": 0.001, "step": 57940 }, { "epoch": 0.9482123864844965, "grad_norm": 0.2952459486863922, "learning_rate": 8.147631853014482e-08, "loss": 0.0018, "step": 57950 }, { "epoch": 0.9483760124355722, "grad_norm": 0.08148179194821677, "learning_rate": 8.096366546606449e-08, "loss": 0.0025, "step": 57960 }, { "epoch": 0.9485396383866481, "grad_norm": 0.10041543337758739, "learning_rate": 8.045261715059704e-08, "loss": 0.0013, "step": 57970 }, { "epoch": 0.948703264337724, "grad_norm": 0.10200542727009591, "learning_rate": 7.994317375046245e-08, "loss": 0.0033, "step": 57980 }, { "epoch": 0.9488668902887998, "grad_norm": 0.1860606213210743, "learning_rate": 7.943533543185721e-08, "loss": 0.0016, "step": 57990 }, { "epoch": 0.9490305162398757, "grad_norm": 0.24175031837053038, "learning_rate": 7.892910236045548e-08, "loss": 0.0015, "step": 58000 }, { "epoch": 0.9491941421909514, "grad_norm": 0.20399133500850378, "learning_rate": 7.842447470140568e-08, "loss": 0.0019, "step": 58010 }, { "epoch": 0.9493577681420273, "grad_norm": 0.15817768432957002, "learning_rate": 7.792145261933448e-08, "loss": 0.0022, "step": 58020 }, { "epoch": 0.9495213940931032, "grad_norm": 0.16162145096613162, "learning_rate": 7.742003627834338e-08, "loss": 0.0014, "step": 58030 }, { "epoch": 0.949685020044179, "grad_norm": 0.18688942702686606, "learning_rate": 7.692022584201154e-08, "loss": 0.0021, "step": 58040 }, { "epoch": 0.9498486459952549, "grad_norm": 0.11563409014251906, "learning_rate": 7.64220214733924e-08, "loss": 0.001, "step": 58050 }, { "epoch": 0.9500122719463306, "grad_norm": 0.051110764280992774, "learning_rate": 7.592542333501651e-08, "loss": 0.0011, "step": 58060 }, { "epoch": 0.9501758978974065, "grad_norm": 0.03884345266024981, "learning_rate": 7.54304315888904e-08, "loss": 0.0013, "step": 58070 }, { "epoch": 0.9503395238484824, "grad_norm": 0.054317386873103, "learning_rate": 7.493704639649602e-08, "loss": 0.0023, "step": 58080 }, { "epoch": 0.9505031497995582, "grad_norm": 0.07201268639119436, "learning_rate": 7.444526791879181e-08, "loss": 0.0012, "step": 58090 }, { "epoch": 0.9506667757506341, "grad_norm": 0.21490043800639175, "learning_rate": 7.395509631621223e-08, "loss": 0.0034, "step": 58100 }, { "epoch": 0.9508304017017098, "grad_norm": 0.15654913306028245, "learning_rate": 7.346653174866603e-08, "loss": 0.0013, "step": 58110 }, { "epoch": 0.9509940276527857, "grad_norm": 0.14601754224007132, "learning_rate": 7.297957437554016e-08, "loss": 0.0018, "step": 58120 }, { "epoch": 0.9511576536038616, "grad_norm": 0.07126028025654964, "learning_rate": 7.249422435569475e-08, "loss": 0.0025, "step": 58130 }, { "epoch": 0.9513212795549374, "grad_norm": 0.0694428526235549, "learning_rate": 7.201048184746706e-08, "loss": 0.0022, "step": 58140 }, { "epoch": 0.9514849055060133, "grad_norm": 0.08929014996456064, "learning_rate": 7.152834700866918e-08, "loss": 0.0019, "step": 58150 }, { "epoch": 0.951648531457089, "grad_norm": 0.060434368732969986, "learning_rate": 7.104781999658972e-08, "loss": 0.0014, "step": 58160 }, { "epoch": 0.9518121574081649, "grad_norm": 0.15602636410976803, "learning_rate": 7.056890096799219e-08, "loss": 0.0011, "step": 58170 }, { "epoch": 0.9519757833592408, "grad_norm": 0.0996863302610937, "learning_rate": 7.009159007911437e-08, "loss": 0.0017, "step": 58180 }, { "epoch": 0.9521394093103166, "grad_norm": 0.12972064806547276, "learning_rate": 6.961588748567116e-08, "loss": 0.0021, "step": 58190 }, { "epoch": 0.9523030352613925, "grad_norm": 0.08658670534574064, "learning_rate": 6.914179334285231e-08, "loss": 0.0015, "step": 58200 }, { "epoch": 0.9524666612124683, "grad_norm": 0.15722632037480724, "learning_rate": 6.866930780532244e-08, "loss": 0.0018, "step": 58210 }, { "epoch": 0.9526302871635441, "grad_norm": 0.017855789143822184, "learning_rate": 6.819843102722156e-08, "loss": 0.0019, "step": 58220 }, { "epoch": 0.95279391311462, "grad_norm": 0.11670280063917686, "learning_rate": 6.772916316216515e-08, "loss": 0.0015, "step": 58230 }, { "epoch": 0.9529575390656958, "grad_norm": 0.07319547865123004, "learning_rate": 6.726150436324297e-08, "loss": 0.0016, "step": 58240 }, { "epoch": 0.9531211650167717, "grad_norm": 0.1718648617408185, "learning_rate": 6.67954547830213e-08, "loss": 0.0015, "step": 58250 }, { "epoch": 0.9532847909678475, "grad_norm": 0.1570285759968204, "learning_rate": 6.633101457353963e-08, "loss": 0.0021, "step": 58260 }, { "epoch": 0.9534484169189233, "grad_norm": 0.08621283209454726, "learning_rate": 6.5868183886314e-08, "loss": 0.0022, "step": 58270 }, { "epoch": 0.9536120428699992, "grad_norm": 0.1883249645522094, "learning_rate": 6.540696287233473e-08, "loss": 0.0014, "step": 58280 }, { "epoch": 0.953775668821075, "grad_norm": 0.11254000514423784, "learning_rate": 6.4947351682067e-08, "loss": 0.0014, "step": 58290 }, { "epoch": 0.9539392947721509, "grad_norm": 0.08483314696834378, "learning_rate": 6.448935046545035e-08, "loss": 0.0021, "step": 58300 }, { "epoch": 0.9541029207232267, "grad_norm": 0.09377186964172586, "learning_rate": 6.403295937190024e-08, "loss": 0.0014, "step": 58310 }, { "epoch": 0.9542665466743026, "grad_norm": 0.2818619264692372, "learning_rate": 6.357817855030535e-08, "loss": 0.0021, "step": 58320 }, { "epoch": 0.9544301726253784, "grad_norm": 0.1573992950822469, "learning_rate": 6.312500814903089e-08, "loss": 0.0027, "step": 58330 }, { "epoch": 0.9545937985764542, "grad_norm": 0.10990883119366637, "learning_rate": 6.267344831591471e-08, "loss": 0.0025, "step": 58340 }, { "epoch": 0.9547574245275301, "grad_norm": 0.05807698840918103, "learning_rate": 6.22234991982712e-08, "loss": 0.0031, "step": 58350 }, { "epoch": 0.9549210504786059, "grad_norm": 0.13845174000889665, "learning_rate": 6.177516094288794e-08, "loss": 0.0037, "step": 58360 }, { "epoch": 0.9550846764296818, "grad_norm": 0.2171505790129501, "learning_rate": 6.132843369602626e-08, "loss": 0.0024, "step": 58370 }, { "epoch": 0.9552483023807576, "grad_norm": 0.09716249160551478, "learning_rate": 6.088331760342514e-08, "loss": 0.0028, "step": 58380 }, { "epoch": 0.9554119283318334, "grad_norm": 0.1062334325527875, "learning_rate": 6.043981281029399e-08, "loss": 0.0024, "step": 58390 }, { "epoch": 0.9555755542829093, "grad_norm": 0.056018049459040584, "learning_rate": 5.999791946131872e-08, "loss": 0.0014, "step": 58400 }, { "epoch": 0.9557391802339851, "grad_norm": 0.11403456451433759, "learning_rate": 5.95576377006607e-08, "loss": 0.0009, "step": 58410 }, { "epoch": 0.955902806185061, "grad_norm": 0.036328271352262125, "learning_rate": 5.91189676719528e-08, "loss": 0.0027, "step": 58420 }, { "epoch": 0.9560664321361368, "grad_norm": 0.07385231077441959, "learning_rate": 5.868190951830277e-08, "loss": 0.0022, "step": 58430 }, { "epoch": 0.9562300580872126, "grad_norm": 0.09146875614916235, "learning_rate": 5.824646338229434e-08, "loss": 0.001, "step": 58440 }, { "epoch": 0.9563936840382885, "grad_norm": 0.06927950598409106, "learning_rate": 5.781262940598331e-08, "loss": 0.0025, "step": 58450 }, { "epoch": 0.9565573099893643, "grad_norm": 0.05861033656856944, "learning_rate": 5.7380407730900365e-08, "loss": 0.0014, "step": 58460 }, { "epoch": 0.9567209359404402, "grad_norm": 0.09198446793586258, "learning_rate": 5.69497984980516e-08, "loss": 0.0024, "step": 58470 }, { "epoch": 0.9568845618915159, "grad_norm": 0.08272597589262085, "learning_rate": 5.6520801847914086e-08, "loss": 0.0023, "step": 58480 }, { "epoch": 0.9570481878425918, "grad_norm": 0.49210044509054446, "learning_rate": 5.609341792044032e-08, "loss": 0.002, "step": 58490 }, { "epoch": 0.9572118137936677, "grad_norm": 0.1264082969257141, "learning_rate": 5.566764685505766e-08, "loss": 0.0018, "step": 58500 }, { "epoch": 0.9573754397447435, "grad_norm": 0.043639992297095156, "learning_rate": 5.52434887906661e-08, "loss": 0.0019, "step": 58510 }, { "epoch": 0.9575390656958194, "grad_norm": 0.06063293893603418, "learning_rate": 5.4820943865639406e-08, "loss": 0.0016, "step": 58520 }, { "epoch": 0.9577026916468951, "grad_norm": 0.10659193134897706, "learning_rate": 5.4400012217826203e-08, "loss": 0.0016, "step": 58530 }, { "epoch": 0.957866317597971, "grad_norm": 0.055529780267450785, "learning_rate": 5.398069398454664e-08, "loss": 0.0013, "step": 58540 }, { "epoch": 0.9580299435490469, "grad_norm": 0.0952217861096849, "learning_rate": 5.3562989302596844e-08, "loss": 0.0025, "step": 58550 }, { "epoch": 0.9581935695001227, "grad_norm": 0.05072254899464642, "learning_rate": 5.3146898308245044e-08, "loss": 0.0014, "step": 58560 }, { "epoch": 0.9583571954511986, "grad_norm": 0.020095364657897512, "learning_rate": 5.273242113723431e-08, "loss": 0.0021, "step": 58570 }, { "epoch": 0.9585208214022743, "grad_norm": 0.1907754411091576, "learning_rate": 5.231955792477983e-08, "loss": 0.0037, "step": 58580 }, { "epoch": 0.9586844473533502, "grad_norm": 0.045497601401816176, "learning_rate": 5.190830880557163e-08, "loss": 0.0009, "step": 58590 }, { "epoch": 0.9588480733044261, "grad_norm": 0.15124947919922588, "learning_rate": 5.149867391377183e-08, "loss": 0.001, "step": 58600 }, { "epoch": 0.9590116992555019, "grad_norm": 0.09432583288913221, "learning_rate": 5.109065338301633e-08, "loss": 0.0016, "step": 58610 }, { "epoch": 0.9591753252065778, "grad_norm": 0.09533512774151824, "learning_rate": 5.068424734641531e-08, "loss": 0.0014, "step": 58620 }, { "epoch": 0.9593389511576536, "grad_norm": 0.04971137859136583, "learning_rate": 5.027945593655159e-08, "loss": 0.0019, "step": 58630 }, { "epoch": 0.9595025771087294, "grad_norm": 0.14007880921430796, "learning_rate": 4.987627928548011e-08, "loss": 0.0013, "step": 58640 }, { "epoch": 0.9596662030598053, "grad_norm": 0.32399685364011604, "learning_rate": 4.9474717524731766e-08, "loss": 0.0019, "step": 58650 }, { "epoch": 0.9598298290108811, "grad_norm": 0.23621912378973192, "learning_rate": 4.9074770785307314e-08, "loss": 0.0025, "step": 58660 }, { "epoch": 0.959993454961957, "grad_norm": 0.07226511489232992, "learning_rate": 4.8676439197683503e-08, "loss": 0.002, "step": 58670 }, { "epoch": 0.9601570809130328, "grad_norm": 0.12753092311203978, "learning_rate": 4.8279722891808045e-08, "loss": 0.0028, "step": 58680 }, { "epoch": 0.9603207068641086, "grad_norm": 0.1713443720081189, "learning_rate": 4.788462199710409e-08, "loss": 0.0016, "step": 58690 }, { "epoch": 0.9604843328151845, "grad_norm": 0.1619404154763327, "learning_rate": 4.749113664246463e-08, "loss": 0.0017, "step": 58700 }, { "epoch": 0.9606479587662603, "grad_norm": 0.1762528319638496, "learning_rate": 4.7099266956258106e-08, "loss": 0.0013, "step": 58710 }, { "epoch": 0.9608115847173362, "grad_norm": 0.13254920026153588, "learning_rate": 4.6709013066325584e-08, "loss": 0.0019, "step": 58720 }, { "epoch": 0.960975210668412, "grad_norm": 0.1114083813222817, "learning_rate": 4.632037509997911e-08, "loss": 0.0018, "step": 58730 }, { "epoch": 0.9611388366194878, "grad_norm": 0.14792282514812852, "learning_rate": 4.593335318400616e-08, "loss": 0.002, "step": 58740 }, { "epoch": 0.9613024625705637, "grad_norm": 0.07469875358178601, "learning_rate": 4.554794744466518e-08, "loss": 0.0013, "step": 58750 }, { "epoch": 0.9614660885216395, "grad_norm": 0.1330607806646815, "learning_rate": 4.516415800768836e-08, "loss": 0.0026, "step": 58760 }, { "epoch": 0.9616297144727154, "grad_norm": 0.05893461980846049, "learning_rate": 4.4781984998280016e-08, "loss": 0.0017, "step": 58770 }, { "epoch": 0.9617933404237912, "grad_norm": 0.21197573672220962, "learning_rate": 4.4401428541117065e-08, "loss": 0.0018, "step": 58780 }, { "epoch": 0.961956966374867, "grad_norm": 0.035055742843520976, "learning_rate": 4.40224887603502e-08, "loss": 0.0018, "step": 58790 }, { "epoch": 0.9621205923259429, "grad_norm": 0.1767009529574022, "learning_rate": 4.364516577960054e-08, "loss": 0.0021, "step": 58800 }, { "epoch": 0.9622842182770187, "grad_norm": 0.03855386249913598, "learning_rate": 4.326945972196406e-08, "loss": 0.0011, "step": 58810 }, { "epoch": 0.9624478442280946, "grad_norm": 0.04992735238216703, "learning_rate": 4.289537071000827e-08, "loss": 0.0014, "step": 58820 }, { "epoch": 0.9626114701791704, "grad_norm": 0.29948722544256373, "learning_rate": 4.252289886577221e-08, "loss": 0.0021, "step": 58830 }, { "epoch": 0.9627750961302463, "grad_norm": 0.4223010527745787, "learning_rate": 4.215204431076869e-08, "loss": 0.0021, "step": 58840 }, { "epoch": 0.9629387220813221, "grad_norm": 0.5181825178333329, "learning_rate": 4.17828071659826e-08, "loss": 0.0033, "step": 58850 }, { "epoch": 0.9631023480323979, "grad_norm": 0.2576587417736698, "learning_rate": 4.1415187551870904e-08, "loss": 0.0026, "step": 58860 }, { "epoch": 0.9632659739834738, "grad_norm": 0.1288760711502654, "learning_rate": 4.1049185588363235e-08, "loss": 0.0013, "step": 58870 }, { "epoch": 0.9634295999345496, "grad_norm": 0.1157015071951014, "learning_rate": 4.068480139486075e-08, "loss": 0.003, "step": 58880 }, { "epoch": 0.9635932258856255, "grad_norm": 0.09418429479830841, "learning_rate": 4.03220350902378e-08, "loss": 0.0024, "step": 58890 }, { "epoch": 0.9637568518367013, "grad_norm": 0.15711689506567122, "learning_rate": 3.996088679284027e-08, "loss": 0.0012, "step": 58900 }, { "epoch": 0.9639204777877771, "grad_norm": 0.1982201105431582, "learning_rate": 3.96013566204867e-08, "loss": 0.0019, "step": 58910 }, { "epoch": 0.964084103738853, "grad_norm": 0.08307877563910385, "learning_rate": 3.9243444690466595e-08, "loss": 0.0014, "step": 58920 }, { "epoch": 0.9642477296899288, "grad_norm": 0.13529120702789255, "learning_rate": 3.8887151119543224e-08, "loss": 0.0034, "step": 58930 }, { "epoch": 0.9644113556410047, "grad_norm": 0.06915962588247587, "learning_rate": 3.8532476023950826e-08, "loss": 0.002, "step": 58940 }, { "epoch": 0.9645749815920805, "grad_norm": 0.163514832598477, "learning_rate": 3.81794195193963e-08, "loss": 0.0012, "step": 58950 }, { "epoch": 0.9647386075431563, "grad_norm": 0.1861182780364438, "learning_rate": 3.7827981721057506e-08, "loss": 0.0021, "step": 58960 }, { "epoch": 0.9649022334942322, "grad_norm": 0.08265726360160489, "learning_rate": 3.747816274358496e-08, "loss": 0.0035, "step": 58970 }, { "epoch": 0.965065859445308, "grad_norm": 0.09025027947093056, "learning_rate": 3.7129962701100695e-08, "loss": 0.0018, "step": 58980 }, { "epoch": 0.9652294853963839, "grad_norm": 0.026983059154927706, "learning_rate": 3.6783381707198864e-08, "loss": 0.0021, "step": 58990 }, { "epoch": 0.9653931113474598, "grad_norm": 0.10786923256664574, "learning_rate": 3.6438419874946226e-08, "loss": 0.0017, "step": 59000 }, { "epoch": 0.9655567372985355, "grad_norm": 0.18649585935766294, "learning_rate": 3.609507731687945e-08, "loss": 0.002, "step": 59010 }, { "epoch": 0.9657203632496114, "grad_norm": 0.1826757814165257, "learning_rate": 3.575335414500836e-08, "loss": 0.0019, "step": 59020 }, { "epoch": 0.9658839892006872, "grad_norm": 0.14165149828885565, "learning_rate": 3.5413250470814345e-08, "loss": 0.0016, "step": 59030 }, { "epoch": 0.9660476151517631, "grad_norm": 0.09198019882211068, "learning_rate": 3.507476640524976e-08, "loss": 0.0031, "step": 59040 }, { "epoch": 0.966211241102839, "grad_norm": 0.32003227795756034, "learning_rate": 3.473790205873906e-08, "loss": 0.002, "step": 59050 }, { "epoch": 0.9663748670539147, "grad_norm": 0.04296782108078748, "learning_rate": 3.4402657541178774e-08, "loss": 0.0016, "step": 59060 }, { "epoch": 0.9665384930049906, "grad_norm": 0.1058783659322161, "learning_rate": 3.406903296193587e-08, "loss": 0.0018, "step": 59070 }, { "epoch": 0.9667021189560664, "grad_norm": 0.0658202332603944, "learning_rate": 3.373702842984994e-08, "loss": 0.0017, "step": 59080 }, { "epoch": 0.9668657449071423, "grad_norm": 0.05841415716324278, "learning_rate": 3.3406644053231575e-08, "loss": 0.0014, "step": 59090 }, { "epoch": 0.9670293708582182, "grad_norm": 0.14747002710639198, "learning_rate": 3.307787993986289e-08, "loss": 0.0021, "step": 59100 }, { "epoch": 0.9671929968092939, "grad_norm": 0.07797708003476621, "learning_rate": 3.275073619699698e-08, "loss": 0.0011, "step": 59110 }, { "epoch": 0.9673566227603698, "grad_norm": 0.061814869828232005, "learning_rate": 3.2425212931358474e-08, "loss": 0.0031, "step": 59120 }, { "epoch": 0.9675202487114456, "grad_norm": 0.21167295534928415, "learning_rate": 3.210131024914465e-08, "loss": 0.0012, "step": 59130 }, { "epoch": 0.9676838746625215, "grad_norm": 0.19233268342782223, "learning_rate": 3.177902825602208e-08, "loss": 0.0016, "step": 59140 }, { "epoch": 0.9678475006135974, "grad_norm": 0.05341542310600103, "learning_rate": 3.1458367057130014e-08, "loss": 0.0019, "step": 59150 }, { "epoch": 0.9680111265646731, "grad_norm": 0.08800903468986607, "learning_rate": 3.113932675707809e-08, "loss": 0.0016, "step": 59160 }, { "epoch": 0.968174752515749, "grad_norm": 0.07436792341772674, "learning_rate": 3.0821907459948065e-08, "loss": 0.0015, "step": 59170 }, { "epoch": 0.9683383784668248, "grad_norm": 0.08332377943824636, "learning_rate": 3.0506109269292096e-08, "loss": 0.0014, "step": 59180 }, { "epoch": 0.9685020044179007, "grad_norm": 0.04003653261401171, "learning_rate": 3.019193228813388e-08, "loss": 0.0017, "step": 59190 }, { "epoch": 0.9686656303689766, "grad_norm": 0.11065206703349333, "learning_rate": 2.987937661896756e-08, "loss": 0.0022, "step": 59200 }, { "epoch": 0.9688292563200523, "grad_norm": 0.14535483657078666, "learning_rate": 2.9568442363759886e-08, "loss": 0.0026, "step": 59210 }, { "epoch": 0.9689928822711282, "grad_norm": 0.2643876476246675, "learning_rate": 2.9259129623946947e-08, "loss": 0.0026, "step": 59220 }, { "epoch": 0.969156508222204, "grad_norm": 0.3101119129184491, "learning_rate": 2.8951438500436356e-08, "loss": 0.0028, "step": 59230 }, { "epoch": 0.9693201341732799, "grad_norm": 0.1360174006912135, "learning_rate": 2.8645369093607822e-08, "loss": 0.0012, "step": 59240 }, { "epoch": 0.9694837601243558, "grad_norm": 0.08346650051480006, "learning_rate": 2.834092150331036e-08, "loss": 0.0015, "step": 59250 }, { "epoch": 0.9696473860754315, "grad_norm": 0.4930706098389054, "learning_rate": 2.8038095828865076e-08, "loss": 0.0029, "step": 59260 }, { "epoch": 0.9698110120265074, "grad_norm": 0.25214083220587347, "learning_rate": 2.7736892169062946e-08, "loss": 0.0016, "step": 59270 }, { "epoch": 0.9699746379775832, "grad_norm": 0.09960147036513034, "learning_rate": 2.743731062216648e-08, "loss": 0.0049, "step": 59280 }, { "epoch": 0.9701382639286591, "grad_norm": 0.21324623017451128, "learning_rate": 2.713935128590861e-08, "loss": 0.0016, "step": 59290 }, { "epoch": 0.970301889879735, "grad_norm": 0.27334612285223886, "learning_rate": 2.684301425749436e-08, "loss": 0.0008, "step": 59300 }, { "epoch": 0.9704655158308108, "grad_norm": 0.11816460247486872, "learning_rate": 2.654829963359751e-08, "loss": 0.0021, "step": 59310 }, { "epoch": 0.9706291417818866, "grad_norm": 0.04813337233950942, "learning_rate": 2.6255207510363922e-08, "loss": 0.0021, "step": 59320 }, { "epoch": 0.9707927677329624, "grad_norm": 0.17738386614394322, "learning_rate": 2.5963737983408788e-08, "loss": 0.0038, "step": 59330 }, { "epoch": 0.9709563936840383, "grad_norm": 0.24040021250330543, "learning_rate": 2.5673891147820485e-08, "loss": 0.002, "step": 59340 }, { "epoch": 0.9711200196351142, "grad_norm": 0.19466918878162232, "learning_rate": 2.538566709815504e-08, "loss": 0.0025, "step": 59350 }, { "epoch": 0.97128364558619, "grad_norm": 0.17773145149528702, "learning_rate": 2.5099065928441134e-08, "loss": 0.0034, "step": 59360 }, { "epoch": 0.9714472715372658, "grad_norm": 0.05459227072386862, "learning_rate": 2.4814087732177306e-08, "loss": 0.0012, "step": 59370 }, { "epoch": 0.9716108974883416, "grad_norm": 0.08172895539775601, "learning_rate": 2.4530732602332518e-08, "loss": 0.0021, "step": 59380 }, { "epoch": 0.9717745234394175, "grad_norm": 0.14318585848052304, "learning_rate": 2.4249000631346166e-08, "loss": 0.0017, "step": 59390 }, { "epoch": 0.9719381493904933, "grad_norm": 0.04051257625889486, "learning_rate": 2.396889191112861e-08, "loss": 0.0013, "step": 59400 }, { "epoch": 0.9721017753415692, "grad_norm": 0.107637063203466, "learning_rate": 2.3690406533060094e-08, "loss": 0.0029, "step": 59410 }, { "epoch": 0.972265401292645, "grad_norm": 0.11457841291240357, "learning_rate": 2.3413544587991833e-08, "loss": 0.0024, "step": 59420 }, { "epoch": 0.9724290272437208, "grad_norm": 0.2300968102212999, "learning_rate": 2.3138306166244907e-08, "loss": 0.0019, "step": 59430 }, { "epoch": 0.9725926531947967, "grad_norm": 0.18972001386808235, "learning_rate": 2.286469135761138e-08, "loss": 0.0016, "step": 59440 }, { "epoch": 0.9727562791458725, "grad_norm": 0.04985368895327254, "learning_rate": 2.2592700251353182e-08, "loss": 0.0023, "step": 59450 }, { "epoch": 0.9729199050969484, "grad_norm": 0.03357155345479672, "learning_rate": 2.2322332936202117e-08, "loss": 0.0012, "step": 59460 }, { "epoch": 0.9730835310480243, "grad_norm": 0.09113797402212406, "learning_rate": 2.2053589500360962e-08, "loss": 0.0022, "step": 59470 }, { "epoch": 0.9732471569991, "grad_norm": 0.21901168326204895, "learning_rate": 2.178647003150236e-08, "loss": 0.0022, "step": 59480 }, { "epoch": 0.9734107829501759, "grad_norm": 0.2551297975725779, "learning_rate": 2.1520974616769942e-08, "loss": 0.0025, "step": 59490 }, { "epoch": 0.9735744089012517, "grad_norm": 0.0618221506773981, "learning_rate": 2.125710334277664e-08, "loss": 0.002, "step": 59500 }, { "epoch": 0.9737380348523276, "grad_norm": 0.03967075894498542, "learning_rate": 2.0994856295604716e-08, "loss": 0.0023, "step": 59510 }, { "epoch": 0.9739016608034035, "grad_norm": 0.2032083535959931, "learning_rate": 2.0734233560809613e-08, "loss": 0.0019, "step": 59520 }, { "epoch": 0.9740652867544792, "grad_norm": 0.12680773828821879, "learning_rate": 2.0475235223413326e-08, "loss": 0.0021, "step": 59530 }, { "epoch": 0.9742289127055551, "grad_norm": 0.14590244319232062, "learning_rate": 2.0217861367909374e-08, "loss": 0.0023, "step": 59540 }, { "epoch": 0.9743925386566309, "grad_norm": 0.07087200504061775, "learning_rate": 1.996211207826282e-08, "loss": 0.0019, "step": 59550 }, { "epoch": 0.9745561646077068, "grad_norm": 0.12425254022015622, "learning_rate": 1.9707987437906918e-08, "loss": 0.002, "step": 59560 }, { "epoch": 0.9747197905587827, "grad_norm": 0.17336981494620313, "learning_rate": 1.9455487529744243e-08, "loss": 0.0028, "step": 59570 }, { "epoch": 0.9748834165098584, "grad_norm": 0.043014389223232675, "learning_rate": 1.9204612436148906e-08, "loss": 0.0023, "step": 59580 }, { "epoch": 0.9750470424609343, "grad_norm": 0.12651873851608017, "learning_rate": 1.8955362238965434e-08, "loss": 0.0015, "step": 59590 }, { "epoch": 0.9752106684120101, "grad_norm": 0.081356149594276, "learning_rate": 1.870773701950601e-08, "loss": 0.0011, "step": 59600 }, { "epoch": 0.975374294363086, "grad_norm": 0.11469312910784575, "learning_rate": 1.84617368585549e-08, "loss": 0.0019, "step": 59610 }, { "epoch": 0.9755379203141619, "grad_norm": 0.3527551942721497, "learning_rate": 1.8217361836365133e-08, "loss": 0.0035, "step": 59620 }, { "epoch": 0.9757015462652376, "grad_norm": 0.38648847626159505, "learning_rate": 1.7974612032659045e-08, "loss": 0.0021, "step": 59630 }, { "epoch": 0.9758651722163135, "grad_norm": 0.10299573418207243, "learning_rate": 1.773348752663051e-08, "loss": 0.0015, "step": 59640 }, { "epoch": 0.9760287981673893, "grad_norm": 0.10301574535942533, "learning_rate": 1.7493988396941054e-08, "loss": 0.0019, "step": 59650 }, { "epoch": 0.9761924241184652, "grad_norm": 0.11802126826548867, "learning_rate": 1.7256114721724282e-08, "loss": 0.002, "step": 59660 }, { "epoch": 0.9763560500695411, "grad_norm": 0.0030538399335342265, "learning_rate": 1.7019866578580903e-08, "loss": 0.0014, "step": 59670 }, { "epoch": 0.9765196760206168, "grad_norm": 0.136377095464529, "learning_rate": 1.6785244044583704e-08, "loss": 0.0016, "step": 59680 }, { "epoch": 0.9766833019716927, "grad_norm": 0.16419438578215229, "learning_rate": 1.6552247196273685e-08, "loss": 0.0041, "step": 59690 }, { "epoch": 0.9768469279227685, "grad_norm": 0.09723657990337446, "learning_rate": 1.6320876109662263e-08, "loss": 0.0023, "step": 59700 }, { "epoch": 0.9770105538738444, "grad_norm": 0.03841561328032325, "learning_rate": 1.609113086022962e-08, "loss": 0.0014, "step": 59710 }, { "epoch": 0.9771741798249203, "grad_norm": 0.2924383671696704, "learning_rate": 1.586301152292691e-08, "loss": 0.0028, "step": 59720 }, { "epoch": 0.977337805775996, "grad_norm": 0.07734073986322952, "learning_rate": 1.563651817217293e-08, "loss": 0.002, "step": 59730 }, { "epoch": 0.9775014317270719, "grad_norm": 0.16447490146375676, "learning_rate": 1.5411650881858032e-08, "loss": 0.0015, "step": 59740 }, { "epoch": 0.9776650576781477, "grad_norm": 0.011035378061128838, "learning_rate": 1.5188409725341303e-08, "loss": 0.0023, "step": 59750 }, { "epoch": 0.9778286836292236, "grad_norm": 0.22294155272790692, "learning_rate": 1.4966794775450045e-08, "loss": 0.0017, "step": 59760 }, { "epoch": 0.9779923095802995, "grad_norm": 0.0329407718034747, "learning_rate": 1.4746806104483647e-08, "loss": 0.0013, "step": 59770 }, { "epoch": 0.9781559355313753, "grad_norm": 0.19837866998813475, "learning_rate": 1.452844378420859e-08, "loss": 0.0021, "step": 59780 }, { "epoch": 0.9783195614824511, "grad_norm": 0.23418321617723242, "learning_rate": 1.4311707885862336e-08, "loss": 0.0024, "step": 59790 }, { "epoch": 0.9784831874335269, "grad_norm": 0.16701794287448482, "learning_rate": 1.4096598480150547e-08, "loss": 0.0012, "step": 59800 }, { "epoch": 0.9786468133846028, "grad_norm": 0.05403760177995624, "learning_rate": 1.3883115637249311e-08, "loss": 0.0019, "step": 59810 }, { "epoch": 0.9788104393356787, "grad_norm": 0.20010892685513446, "learning_rate": 1.3671259426802919e-08, "loss": 0.0019, "step": 59820 }, { "epoch": 0.9789740652867545, "grad_norm": 0.05099498487915146, "learning_rate": 1.3461029917926639e-08, "loss": 0.0023, "step": 59830 }, { "epoch": 0.9791376912378303, "grad_norm": 0.19569519818610545, "learning_rate": 1.3252427179203942e-08, "loss": 0.0025, "step": 59840 }, { "epoch": 0.9793013171889061, "grad_norm": 0.16812385832725257, "learning_rate": 1.304545127868706e-08, "loss": 0.0018, "step": 59850 }, { "epoch": 0.979464943139982, "grad_norm": 0.37850236999141923, "learning_rate": 1.2840102283899203e-08, "loss": 0.0019, "step": 59860 }, { "epoch": 0.9796285690910579, "grad_norm": 0.06222418048503737, "learning_rate": 1.2636380261831227e-08, "loss": 0.0017, "step": 59870 }, { "epoch": 0.9797921950421337, "grad_norm": 0.02610674765750475, "learning_rate": 1.2434285278943858e-08, "loss": 0.0013, "step": 59880 }, { "epoch": 0.9799558209932095, "grad_norm": 0.0649896475655067, "learning_rate": 1.2233817401167136e-08, "loss": 0.0023, "step": 59890 }, { "epoch": 0.9801194469442853, "grad_norm": 0.14589999646242188, "learning_rate": 1.2034976693900414e-08, "loss": 0.0017, "step": 59900 }, { "epoch": 0.9802830728953612, "grad_norm": 0.1447093633269783, "learning_rate": 1.1837763222011245e-08, "loss": 0.0022, "step": 59910 }, { "epoch": 0.9804466988464371, "grad_norm": 0.08877573551656542, "learning_rate": 1.1642177049837612e-08, "loss": 0.0014, "step": 59920 }, { "epoch": 0.9806103247975129, "grad_norm": 0.17573779159000005, "learning_rate": 1.1448218241186249e-08, "loss": 0.0019, "step": 59930 }, { "epoch": 0.9807739507485888, "grad_norm": 0.009034029920050495, "learning_rate": 1.1255886859332099e-08, "loss": 0.0022, "step": 59940 }, { "epoch": 0.9809375766996645, "grad_norm": 0.14292365039736996, "learning_rate": 1.1065182967020526e-08, "loss": 0.0016, "step": 59950 }, { "epoch": 0.9811012026507404, "grad_norm": 0.05376385954739454, "learning_rate": 1.08761066264651e-08, "loss": 0.0018, "step": 59960 }, { "epoch": 0.9812648286018163, "grad_norm": 0.11353706314926723, "learning_rate": 1.0688657899348699e-08, "loss": 0.0018, "step": 59970 }, { "epoch": 0.9814284545528921, "grad_norm": 0.10294632008534421, "learning_rate": 1.0502836846822405e-08, "loss": 0.0015, "step": 59980 }, { "epoch": 0.981592080503968, "grad_norm": 0.17648528721068663, "learning_rate": 1.0318643529508287e-08, "loss": 0.0031, "step": 59990 }, { "epoch": 0.9817557064550437, "grad_norm": 0.18783365905344576, "learning_rate": 1.0136078007494944e-08, "loss": 0.003, "step": 60000 }, { "epoch": 0.9819193324061196, "grad_norm": 0.48773854582930476, "learning_rate": 9.955140340341951e-09, "loss": 0.0027, "step": 60010 }, { "epoch": 0.9820829583571955, "grad_norm": 0.10260869997809423, "learning_rate": 9.775830587077095e-09, "loss": 0.0019, "step": 60020 }, { "epoch": 0.9822465843082713, "grad_norm": 0.04587447910006072, "learning_rate": 9.59814880619636e-09, "loss": 0.0021, "step": 60030 }, { "epoch": 0.9824102102593472, "grad_norm": 0.06979774753379442, "learning_rate": 9.422095055665603e-09, "loss": 0.0022, "step": 60040 }, { "epoch": 0.9825738362104229, "grad_norm": 0.08442468186483452, "learning_rate": 9.247669392919433e-09, "loss": 0.0015, "step": 60050 }, { "epoch": 0.9827374621614988, "grad_norm": 0.12147376585719137, "learning_rate": 9.074871874861224e-09, "loss": 0.0027, "step": 60060 }, { "epoch": 0.9829010881125747, "grad_norm": 0.04215229422578133, "learning_rate": 8.903702557862549e-09, "loss": 0.0024, "step": 60070 }, { "epoch": 0.9830647140636505, "grad_norm": 0.07337007150459161, "learning_rate": 8.734161497764848e-09, "loss": 0.001, "step": 60080 }, { "epoch": 0.9832283400147264, "grad_norm": 0.08379642415713438, "learning_rate": 8.56624874987777e-09, "loss": 0.0032, "step": 60090 }, { "epoch": 0.9833919659658021, "grad_norm": 0.15789040921661168, "learning_rate": 8.399964368979719e-09, "loss": 0.002, "step": 60100 }, { "epoch": 0.983555591916878, "grad_norm": 0.5568401126340339, "learning_rate": 8.23530840931841e-09, "loss": 0.0024, "step": 60110 }, { "epoch": 0.9837192178679539, "grad_norm": 0.0796814150994545, "learning_rate": 8.07228092460921e-09, "loss": 0.0023, "step": 60120 }, { "epoch": 0.9838828438190297, "grad_norm": 0.1087612451064515, "learning_rate": 7.910881968037909e-09, "loss": 0.0033, "step": 60130 }, { "epoch": 0.9840464697701056, "grad_norm": 0.11558822922658991, "learning_rate": 7.751111592257388e-09, "loss": 0.0019, "step": 60140 }, { "epoch": 0.9842100957211813, "grad_norm": 0.10832463574397937, "learning_rate": 7.592969849390398e-09, "loss": 0.0009, "step": 60150 }, { "epoch": 0.9843737216722572, "grad_norm": 0.19450215338490973, "learning_rate": 7.436456791027336e-09, "loss": 0.003, "step": 60160 }, { "epoch": 0.9845373476233331, "grad_norm": 0.37797237332341194, "learning_rate": 7.2815724682279154e-09, "loss": 0.0029, "step": 60170 }, { "epoch": 0.9847009735744089, "grad_norm": 0.014815793272699846, "learning_rate": 7.128316931521162e-09, "loss": 0.0012, "step": 60180 }, { "epoch": 0.9848645995254848, "grad_norm": 0.08314315539099056, "learning_rate": 6.976690230902639e-09, "loss": 0.0017, "step": 60190 }, { "epoch": 0.9850282254765605, "grad_norm": 0.006542973292016352, "learning_rate": 6.826692415838887e-09, "loss": 0.0012, "step": 60200 }, { "epoch": 0.9851918514276364, "grad_norm": 0.07894588516068819, "learning_rate": 6.6783235352640976e-09, "loss": 0.0011, "step": 60210 }, { "epoch": 0.9853554773787123, "grad_norm": 0.15213493909389453, "learning_rate": 6.531583637580663e-09, "loss": 0.001, "step": 60220 }, { "epoch": 0.9855191033297881, "grad_norm": 0.065571064615504, "learning_rate": 6.386472770659735e-09, "loss": 0.0022, "step": 60230 }, { "epoch": 0.985682729280864, "grad_norm": 0.12963975197460978, "learning_rate": 6.242990981841224e-09, "loss": 0.0021, "step": 60240 }, { "epoch": 0.9858463552319398, "grad_norm": 0.11514803523916374, "learning_rate": 6.1011383179337966e-09, "loss": 0.0018, "step": 60250 }, { "epoch": 0.9860099811830156, "grad_norm": 0.1571361272708824, "learning_rate": 5.960914825214326e-09, "loss": 0.0018, "step": 60260 }, { "epoch": 0.9861736071340914, "grad_norm": 0.12742103712251568, "learning_rate": 5.822320549427884e-09, "loss": 0.0016, "step": 60270 }, { "epoch": 0.9863372330851673, "grad_norm": 0.2458001491079989, "learning_rate": 5.685355535788861e-09, "loss": 0.0011, "step": 60280 }, { "epoch": 0.9865008590362432, "grad_norm": 0.11390066827535633, "learning_rate": 5.5500198289792915e-09, "loss": 0.004, "step": 60290 }, { "epoch": 0.986664484987319, "grad_norm": 0.19573018563892566, "learning_rate": 5.41631347314997e-09, "loss": 0.0025, "step": 60300 }, { "epoch": 0.9868281109383948, "grad_norm": 0.012970873493516008, "learning_rate": 5.284236511921004e-09, "loss": 0.0028, "step": 60310 }, { "epoch": 0.9869917368894706, "grad_norm": 0.055039228827746596, "learning_rate": 5.153788988379593e-09, "loss": 0.0032, "step": 60320 }, { "epoch": 0.9871553628405465, "grad_norm": 0.1409922209437654, "learning_rate": 5.0249709450816976e-09, "loss": 0.0016, "step": 60330 }, { "epoch": 0.9873189887916224, "grad_norm": 0.0653035032829552, "learning_rate": 4.897782424052589e-09, "loss": 0.0015, "step": 60340 }, { "epoch": 0.9874826147426982, "grad_norm": 0.10716871217137415, "learning_rate": 4.772223466784631e-09, "loss": 0.0019, "step": 60350 }, { "epoch": 0.987646240693774, "grad_norm": 0.26245431623881244, "learning_rate": 4.648294114239505e-09, "loss": 0.0044, "step": 60360 }, { "epoch": 0.9878098666448498, "grad_norm": 0.0721433153889858, "learning_rate": 4.5259944068470894e-09, "loss": 0.0018, "step": 60370 }, { "epoch": 0.9879734925959257, "grad_norm": 0.13675506840579402, "learning_rate": 4.4053243845049165e-09, "loss": 0.002, "step": 60380 }, { "epoch": 0.9881371185470016, "grad_norm": 0.0978644226734315, "learning_rate": 4.286284086580383e-09, "loss": 0.0013, "step": 60390 }, { "epoch": 0.9883007444980774, "grad_norm": 0.042673595689395584, "learning_rate": 4.168873551907982e-09, "loss": 0.0023, "step": 60400 }, { "epoch": 0.9884643704491533, "grad_norm": 0.12913790495598768, "learning_rate": 4.053092818790405e-09, "loss": 0.0019, "step": 60410 }, { "epoch": 0.988627996400229, "grad_norm": 0.24522300422463322, "learning_rate": 3.9389419249991065e-09, "loss": 0.0018, "step": 60420 }, { "epoch": 0.9887916223513049, "grad_norm": 0.0409492574790415, "learning_rate": 3.8264209077742975e-09, "loss": 0.0014, "step": 60430 }, { "epoch": 0.9889552483023808, "grad_norm": 0.050325729929773584, "learning_rate": 3.715529803823281e-09, "loss": 0.0017, "step": 60440 }, { "epoch": 0.9891188742534566, "grad_norm": 0.08215262686447887, "learning_rate": 3.606268649322675e-09, "loss": 0.0014, "step": 60450 }, { "epoch": 0.9892825002045325, "grad_norm": 0.16526693680903798, "learning_rate": 3.4986374799172995e-09, "loss": 0.0017, "step": 60460 }, { "epoch": 0.9894461261556082, "grad_norm": 0.1745853996568687, "learning_rate": 3.392636330719623e-09, "loss": 0.0016, "step": 60470 }, { "epoch": 0.9896097521066841, "grad_norm": 0.07733288679227483, "learning_rate": 3.2882652363108726e-09, "loss": 0.0007, "step": 60480 }, { "epoch": 0.98977337805776, "grad_norm": 0.1123718270286566, "learning_rate": 3.1855242307399226e-09, "loss": 0.0022, "step": 60490 }, { "epoch": 0.9899370040088358, "grad_norm": 0.1410498263659475, "learning_rate": 3.0844133475238513e-09, "loss": 0.0018, "step": 60500 }, { "epoch": 0.9901006299599117, "grad_norm": 0.16450532311059904, "learning_rate": 2.9849326196496055e-09, "loss": 0.0016, "step": 60510 }, { "epoch": 0.9902642559109874, "grad_norm": 0.09082114787697831, "learning_rate": 2.8870820795695585e-09, "loss": 0.0016, "step": 60520 }, { "epoch": 0.9904278818620633, "grad_norm": 0.013173648504481422, "learning_rate": 2.7908617592065092e-09, "loss": 0.0021, "step": 60530 }, { "epoch": 0.9905915078131392, "grad_norm": 0.021228948905903194, "learning_rate": 2.6962716899509024e-09, "loss": 0.0017, "step": 60540 }, { "epoch": 0.990755133764215, "grad_norm": 0.2829491886722695, "learning_rate": 2.603311902660277e-09, "loss": 0.0026, "step": 60550 }, { "epoch": 0.9909187597152909, "grad_norm": 0.10691415150689901, "learning_rate": 2.5119824276614857e-09, "loss": 0.0018, "step": 60560 }, { "epoch": 0.9910823856663666, "grad_norm": 0.14732791728354516, "learning_rate": 2.4222832947490284e-09, "loss": 0.0023, "step": 60570 }, { "epoch": 0.9912460116174425, "grad_norm": 0.09089862558597359, "learning_rate": 2.3342145331861633e-09, "loss": 0.0016, "step": 60580 }, { "epoch": 0.9914096375685184, "grad_norm": 0.06804651704965908, "learning_rate": 2.247776171703242e-09, "loss": 0.002, "step": 60590 }, { "epoch": 0.9915732635195942, "grad_norm": 0.5243362934115394, "learning_rate": 2.1629682384999295e-09, "loss": 0.0039, "step": 60600 }, { "epoch": 0.9917368894706701, "grad_norm": 0.039580329474393994, "learning_rate": 2.0797907612424286e-09, "loss": 0.002, "step": 60610 }, { "epoch": 0.9919005154217458, "grad_norm": 0.07388725810098937, "learning_rate": 1.9982437670662546e-09, "loss": 0.0018, "step": 60620 }, { "epoch": 0.9920641413728217, "grad_norm": 0.32566069699409705, "learning_rate": 1.9183272825745726e-09, "loss": 0.0017, "step": 60630 }, { "epoch": 0.9922277673238976, "grad_norm": 0.11709789684202111, "learning_rate": 1.840041333838749e-09, "loss": 0.0017, "step": 60640 }, { "epoch": 0.9923913932749734, "grad_norm": 0.2858933598390796, "learning_rate": 1.76338594639891e-09, "loss": 0.0017, "step": 60650 }, { "epoch": 0.9925550192260493, "grad_norm": 0.23445480320381554, "learning_rate": 1.6883611452611636e-09, "loss": 0.0031, "step": 60660 }, { "epoch": 0.992718645177125, "grad_norm": 0.19399728146755638, "learning_rate": 1.614966954902597e-09, "loss": 0.0018, "step": 60670 }, { "epoch": 0.9928822711282009, "grad_norm": 0.11307517802571326, "learning_rate": 1.5432033992651696e-09, "loss": 0.0016, "step": 60680 }, { "epoch": 0.9930458970792768, "grad_norm": 0.10675311099084006, "learning_rate": 1.4730705017618196e-09, "loss": 0.0032, "step": 60690 }, { "epoch": 0.9932095230303526, "grad_norm": 0.19889546965927152, "learning_rate": 1.4045682852714682e-09, "loss": 0.0019, "step": 60700 }, { "epoch": 0.9933731489814285, "grad_norm": 0.10604112466126486, "learning_rate": 1.3376967721417945e-09, "loss": 0.002, "step": 60710 }, { "epoch": 0.9935367749325043, "grad_norm": 0.06959550731607973, "learning_rate": 1.2724559841886808e-09, "loss": 0.0018, "step": 60720 }, { "epoch": 0.9937004008835801, "grad_norm": 0.10219405662664624, "learning_rate": 1.2088459426951026e-09, "loss": 0.0021, "step": 60730 }, { "epoch": 0.993864026834656, "grad_norm": 0.15125987983040315, "learning_rate": 1.146866668413904e-09, "loss": 0.0038, "step": 60740 }, { "epoch": 0.9940276527857318, "grad_norm": 0.05925288769258631, "learning_rate": 1.0865181815633563e-09, "loss": 0.0016, "step": 60750 }, { "epoch": 0.9941912787368077, "grad_norm": 0.16897204394201365, "learning_rate": 1.0278005018327097e-09, "loss": 0.0026, "step": 60760 }, { "epoch": 0.9943549046878835, "grad_norm": 0.07610001527467722, "learning_rate": 9.70713648376087e-10, "loss": 0.0024, "step": 60770 }, { "epoch": 0.9945185306389593, "grad_norm": 0.11757244247354288, "learning_rate": 9.152576398174795e-10, "loss": 0.001, "step": 60780 }, { "epoch": 0.9946821565900352, "grad_norm": 0.14934542054597172, "learning_rate": 8.614324942490815e-10, "loss": 0.0035, "step": 60790 }, { "epoch": 0.994845782541111, "grad_norm": 0.16757262928104008, "learning_rate": 8.092382292296253e-10, "loss": 0.003, "step": 60800 }, { "epoch": 0.9950094084921869, "grad_norm": 0.33882316409394125, "learning_rate": 7.586748617866013e-10, "loss": 0.0028, "step": 60810 }, { "epoch": 0.9951730344432627, "grad_norm": 0.0746601687953866, "learning_rate": 7.097424084157034e-10, "loss": 0.0007, "step": 60820 }, { "epoch": 0.9953366603943385, "grad_norm": 0.1824925243432742, "learning_rate": 6.624408850808284e-10, "loss": 0.0031, "step": 60830 }, { "epoch": 0.9955002863454144, "grad_norm": 0.09700631805414069, "learning_rate": 6.167703072118558e-10, "loss": 0.0015, "step": 60840 }, { "epoch": 0.9956639122964902, "grad_norm": 0.09849100566872825, "learning_rate": 5.72730689709089e-10, "loss": 0.0023, "step": 60850 }, { "epoch": 0.9958275382475661, "grad_norm": 0.19214481254598562, "learning_rate": 5.303220469399239e-10, "loss": 0.0035, "step": 60860 }, { "epoch": 0.9959911641986419, "grad_norm": 0.11120133144872563, "learning_rate": 4.895443927382948e-10, "loss": 0.0021, "step": 60870 }, { "epoch": 0.9961547901497178, "grad_norm": 0.155889349230826, "learning_rate": 4.5039774040800397e-10, "loss": 0.0017, "step": 60880 }, { "epoch": 0.9963184161007936, "grad_norm": 0.004320524512787298, "learning_rate": 4.128821027193919e-10, "loss": 0.0009, "step": 60890 }, { "epoch": 0.9964820420518694, "grad_norm": 0.4231217069702302, "learning_rate": 3.7699749191211223e-10, "loss": 0.0013, "step": 60900 }, { "epoch": 0.9966456680029453, "grad_norm": 0.012752050307830578, "learning_rate": 3.4274391969180143e-10, "loss": 0.0027, "step": 60910 }, { "epoch": 0.9968092939540211, "grad_norm": 0.00982818521532133, "learning_rate": 3.101213972345196e-10, "loss": 0.0018, "step": 60920 }, { "epoch": 0.996972919905097, "grad_norm": 0.2416885167538227, "learning_rate": 2.7912993518119936e-10, "loss": 0.003, "step": 60930 }, { "epoch": 0.9971365458561728, "grad_norm": 0.15182605977009, "learning_rate": 2.4976954364319683e-10, "loss": 0.0024, "step": 60940 }, { "epoch": 0.9973001718072486, "grad_norm": 0.14261567522183033, "learning_rate": 2.220402321989612e-10, "loss": 0.002, "step": 60950 }, { "epoch": 0.9974637977583245, "grad_norm": 0.11603194992930861, "learning_rate": 1.9594200989403455e-10, "loss": 0.0035, "step": 60960 }, { "epoch": 0.9976274237094003, "grad_norm": 0.25133548517888693, "learning_rate": 1.7147488524271728e-10, "loss": 0.0012, "step": 60970 }, { "epoch": 0.9977910496604762, "grad_norm": 0.06633368950966952, "learning_rate": 1.486388662275129e-10, "loss": 0.002, "step": 60980 }, { "epoch": 0.997954675611552, "grad_norm": 0.16637022913787614, "learning_rate": 1.274339602974628e-10, "loss": 0.0032, "step": 60990 }, { "epoch": 0.9981183015626278, "grad_norm": 0.09767478228395131, "learning_rate": 1.0786017437092177e-10, "loss": 0.0011, "step": 61000 }, { "epoch": 0.9982819275137037, "grad_norm": 0.12156838897465133, "learning_rate": 8.991751483278243e-11, "loss": 0.0011, "step": 61010 }, { "epoch": 0.9984455534647795, "grad_norm": 0.12433531178406149, "learning_rate": 7.360598753725079e-11, "loss": 0.0019, "step": 61020 }, { "epoch": 0.9986091794158554, "grad_norm": 0.13511939950557417, "learning_rate": 5.892559780562579e-11, "loss": 0.0014, "step": 61030 }, { "epoch": 0.9987728053669312, "grad_norm": 0.03609854062444235, "learning_rate": 4.587635042629934e-11, "loss": 0.0016, "step": 61040 }, { "epoch": 0.998936431318007, "grad_norm": 0.07079022175742873, "learning_rate": 3.445824965753186e-11, "loss": 0.0012, "step": 61050 }, { "epoch": 0.9991000572690829, "grad_norm": 0.035937299268860086, "learning_rate": 2.467129922356648e-11, "loss": 0.001, "step": 61060 }, { "epoch": 0.9992636832201587, "grad_norm": 0.18251185390265254, "learning_rate": 1.651550231740462e-11, "loss": 0.0013, "step": 61070 }, { "epoch": 0.9994273091712346, "grad_norm": 0.13268026673927336, "learning_rate": 9.990861599140644e-12, "loss": 0.002, "step": 61080 }, { "epoch": 0.9995909351223105, "grad_norm": 0.184941253110589, "learning_rate": 5.097379198737429e-12, "loss": 0.0022, "step": 61090 }, { "epoch": 0.9997545610733862, "grad_norm": 0.09222311635005549, "learning_rate": 1.8350567110303474e-12, "loss": 0.0011, "step": 61100 }, { "epoch": 0.9999181870244621, "grad_norm": 0.12849870174228717, "learning_rate": 2.0389520127839234e-13, "loss": 0.0016, "step": 61110 }, { "epoch": 1.0, "step": 61115, "total_flos": 389042235549696.0, "train_loss": 0.008355458706034012, "train_runtime": 62709.2593, "train_samples_per_second": 7.797, "train_steps_per_second": 0.975 } ], "logging_steps": 10, "max_steps": 61115, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 389042235549696.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }