{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9999553657774536, "eval_steps": 25000, "global_step": 100818, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002975614836415574, "grad_norm": 9.453598976135254, "learning_rate": 9.918666931164452e-09, "loss": 0.811, "step": 10 }, { "epoch": 0.0005951229672831148, "grad_norm": 8.282325744628906, "learning_rate": 1.9837333862328904e-08, "loss": 0.8433, "step": 20 }, { "epoch": 0.0008926844509246723, "grad_norm": 8.99701976776123, "learning_rate": 2.9756000793493356e-08, "loss": 0.8854, "step": 30 }, { "epoch": 0.0011902459345662297, "grad_norm": 9.757439613342285, "learning_rate": 3.967466772465781e-08, "loss": 0.8087, "step": 40 }, { "epoch": 0.0014878074182077873, "grad_norm": 10.15872859954834, "learning_rate": 4.959333465582226e-08, "loss": 0.8832, "step": 50 }, { "epoch": 0.0017853689018493446, "grad_norm": 8.739439964294434, "learning_rate": 5.951200158698671e-08, "loss": 0.8431, "step": 60 }, { "epoch": 0.002082930385490902, "grad_norm": 8.063074111938477, "learning_rate": 6.943066851815117e-08, "loss": 0.7804, "step": 70 }, { "epoch": 0.0023804918691324593, "grad_norm": 8.873162269592285, "learning_rate": 7.934933544931562e-08, "loss": 0.86, "step": 80 }, { "epoch": 0.002678053352774017, "grad_norm": 8.487876892089844, "learning_rate": 8.926800238048008e-08, "loss": 0.8274, "step": 90 }, { "epoch": 0.0029756148364155745, "grad_norm": 11.248519897460938, "learning_rate": 9.918666931164452e-08, "loss": 0.7825, "step": 100 }, { "epoch": 0.003273176320057132, "grad_norm": 7.93142557144165, "learning_rate": 1.0910533624280897e-07, "loss": 0.7915, "step": 110 }, { "epoch": 0.0035707378036986892, "grad_norm": 7.886508464813232, "learning_rate": 1.1902400317397343e-07, "loss": 0.7885, "step": 120 }, { "epoch": 0.0038682992873402466, "grad_norm": 7.370453834533691, "learning_rate": 1.2894267010513787e-07, "loss": 0.7518, "step": 130 }, { "epoch": 0.004165860770981804, "grad_norm": 8.78321647644043, "learning_rate": 1.3886133703630234e-07, "loss": 0.7243, "step": 140 }, { "epoch": 0.004463422254623361, "grad_norm": 6.715193748474121, "learning_rate": 1.487800039674668e-07, "loss": 0.767, "step": 150 }, { "epoch": 0.004760983738264919, "grad_norm": 7.5613861083984375, "learning_rate": 1.5869867089863123e-07, "loss": 0.7067, "step": 160 }, { "epoch": 0.005058545221906476, "grad_norm": 6.366057395935059, "learning_rate": 1.6861733782979568e-07, "loss": 0.695, "step": 170 }, { "epoch": 0.005356106705548034, "grad_norm": 6.285976886749268, "learning_rate": 1.7853600476096015e-07, "loss": 0.6272, "step": 180 }, { "epoch": 0.005653668189189592, "grad_norm": 5.211022853851318, "learning_rate": 1.884546716921246e-07, "loss": 0.5703, "step": 190 }, { "epoch": 0.005951229672831149, "grad_norm": 4.982776641845703, "learning_rate": 1.9837333862328904e-07, "loss": 0.5263, "step": 200 }, { "epoch": 0.006248791156472706, "grad_norm": 5.922787666320801, "learning_rate": 2.0829200555445351e-07, "loss": 0.5109, "step": 210 }, { "epoch": 0.006546352640114264, "grad_norm": 4.555914402008057, "learning_rate": 2.1821067248561793e-07, "loss": 0.4608, "step": 220 }, { "epoch": 0.006843914123755821, "grad_norm": 3.3586277961730957, "learning_rate": 2.281293394167824e-07, "loss": 0.4237, "step": 230 }, { "epoch": 0.0071414756073973785, "grad_norm": 3.410503625869751, "learning_rate": 2.3804800634794685e-07, "loss": 0.3645, "step": 240 }, { "epoch": 0.007439037091038936, "grad_norm": 4.175695419311523, "learning_rate": 2.479666732791113e-07, "loss": 0.3277, "step": 250 }, { "epoch": 0.007736598574680493, "grad_norm": 2.744114398956299, "learning_rate": 2.5788534021027574e-07, "loss": 0.2974, "step": 260 }, { "epoch": 0.00803416005832205, "grad_norm": 2.248288631439209, "learning_rate": 2.678040071414402e-07, "loss": 0.2749, "step": 270 }, { "epoch": 0.008331721541963608, "grad_norm": 2.656511068344116, "learning_rate": 2.777226740726047e-07, "loss": 0.2372, "step": 280 }, { "epoch": 0.008629283025605165, "grad_norm": 4.739702224731445, "learning_rate": 2.8764134100376913e-07, "loss": 0.2279, "step": 290 }, { "epoch": 0.008926844509246723, "grad_norm": 2.498535394668579, "learning_rate": 2.975600079349336e-07, "loss": 0.2328, "step": 300 }, { "epoch": 0.00922440599288828, "grad_norm": 2.4777724742889404, "learning_rate": 3.07478674866098e-07, "loss": 0.1985, "step": 310 }, { "epoch": 0.009521967476529837, "grad_norm": 2.296701669692993, "learning_rate": 3.1739734179726247e-07, "loss": 0.1963, "step": 320 }, { "epoch": 0.009819528960171395, "grad_norm": 2.603011131286621, "learning_rate": 3.273160087284269e-07, "loss": 0.1918, "step": 330 }, { "epoch": 0.010117090443812952, "grad_norm": 1.633563756942749, "learning_rate": 3.3723467565959136e-07, "loss": 0.1962, "step": 340 }, { "epoch": 0.01041465192745451, "grad_norm": 2.339439868927002, "learning_rate": 3.4715334259075586e-07, "loss": 0.1689, "step": 350 }, { "epoch": 0.010712213411096069, "grad_norm": 2.5012876987457275, "learning_rate": 3.570720095219203e-07, "loss": 0.1683, "step": 360 }, { "epoch": 0.011009774894737626, "grad_norm": 2.697737455368042, "learning_rate": 3.669906764530847e-07, "loss": 0.1618, "step": 370 }, { "epoch": 0.011307336378379183, "grad_norm": 2.282151699066162, "learning_rate": 3.769093433842492e-07, "loss": 0.1678, "step": 380 }, { "epoch": 0.01160489786202074, "grad_norm": 2.265638828277588, "learning_rate": 3.8682801031541364e-07, "loss": 0.1565, "step": 390 }, { "epoch": 0.011902459345662298, "grad_norm": 2.333787202835083, "learning_rate": 3.967466772465781e-07, "loss": 0.1581, "step": 400 }, { "epoch": 0.012200020829303855, "grad_norm": 2.220900297164917, "learning_rate": 4.0666534417774253e-07, "loss": 0.1508, "step": 410 }, { "epoch": 0.012497582312945413, "grad_norm": 2.582387924194336, "learning_rate": 4.1658401110890703e-07, "loss": 0.1381, "step": 420 }, { "epoch": 0.01279514379658697, "grad_norm": 2.628873348236084, "learning_rate": 4.265026780400715e-07, "loss": 0.1327, "step": 430 }, { "epoch": 0.013092705280228528, "grad_norm": 2.8012752532958984, "learning_rate": 4.3642134497123587e-07, "loss": 0.1506, "step": 440 }, { "epoch": 0.013390266763870085, "grad_norm": 1.7567951679229736, "learning_rate": 4.4634001190240037e-07, "loss": 0.1498, "step": 450 }, { "epoch": 0.013687828247511642, "grad_norm": 2.082446813583374, "learning_rate": 4.562586788335648e-07, "loss": 0.1214, "step": 460 }, { "epoch": 0.0139853897311532, "grad_norm": 1.9990344047546387, "learning_rate": 4.661773457647292e-07, "loss": 0.1198, "step": 470 }, { "epoch": 0.014282951214794757, "grad_norm": 3.123953104019165, "learning_rate": 4.760960126958937e-07, "loss": 0.1205, "step": 480 }, { "epoch": 0.014580512698436314, "grad_norm": 2.655498504638672, "learning_rate": 4.860146796270581e-07, "loss": 0.1393, "step": 490 }, { "epoch": 0.014878074182077872, "grad_norm": 1.8887054920196533, "learning_rate": 4.959333465582226e-07, "loss": 0.1135, "step": 500 }, { "epoch": 0.015175635665719429, "grad_norm": 2.2586145401000977, "learning_rate": 5.05852013489387e-07, "loss": 0.1134, "step": 510 }, { "epoch": 0.015473197149360986, "grad_norm": 2.599902629852295, "learning_rate": 5.157706804205515e-07, "loss": 0.0995, "step": 520 }, { "epoch": 0.015770758633002546, "grad_norm": 2.5462872982025146, "learning_rate": 5.25689347351716e-07, "loss": 0.1036, "step": 530 }, { "epoch": 0.0160683201166441, "grad_norm": 1.8687094449996948, "learning_rate": 5.356080142828804e-07, "loss": 0.1081, "step": 540 }, { "epoch": 0.01636588160028566, "grad_norm": 2.344144105911255, "learning_rate": 5.455266812140448e-07, "loss": 0.0871, "step": 550 }, { "epoch": 0.016663443083927216, "grad_norm": 1.5861790180206299, "learning_rate": 5.554453481452094e-07, "loss": 0.1147, "step": 560 }, { "epoch": 0.016961004567568775, "grad_norm": 1.8777775764465332, "learning_rate": 5.653640150763738e-07, "loss": 0.1009, "step": 570 }, { "epoch": 0.01725856605121033, "grad_norm": 2.4832401275634766, "learning_rate": 5.752826820075383e-07, "loss": 0.0962, "step": 580 }, { "epoch": 0.01755612753485189, "grad_norm": 1.794165015220642, "learning_rate": 5.852013489387027e-07, "loss": 0.0949, "step": 590 }, { "epoch": 0.017853689018493445, "grad_norm": 2.478044033050537, "learning_rate": 5.951200158698672e-07, "loss": 0.0898, "step": 600 }, { "epoch": 0.018151250502135004, "grad_norm": 2.0921642780303955, "learning_rate": 6.050386828010316e-07, "loss": 0.0974, "step": 610 }, { "epoch": 0.01844881198577656, "grad_norm": 2.245126247406006, "learning_rate": 6.14957349732196e-07, "loss": 0.0962, "step": 620 }, { "epoch": 0.01874637346941812, "grad_norm": 2.2334938049316406, "learning_rate": 6.248760166633605e-07, "loss": 0.0913, "step": 630 }, { "epoch": 0.019043934953059675, "grad_norm": 2.047128200531006, "learning_rate": 6.347946835945249e-07, "loss": 0.1119, "step": 640 }, { "epoch": 0.019341496436701234, "grad_norm": 2.0140793323516846, "learning_rate": 6.447133505256895e-07, "loss": 0.08, "step": 650 }, { "epoch": 0.01963905792034279, "grad_norm": 2.4656386375427246, "learning_rate": 6.546320174568538e-07, "loss": 0.1053, "step": 660 }, { "epoch": 0.01993661940398435, "grad_norm": 1.714719295501709, "learning_rate": 6.645506843880183e-07, "loss": 0.0812, "step": 670 }, { "epoch": 0.020234180887625904, "grad_norm": 2.311635732650757, "learning_rate": 6.744693513191827e-07, "loss": 0.0838, "step": 680 }, { "epoch": 0.020531742371267463, "grad_norm": 1.9411611557006836, "learning_rate": 6.843880182503472e-07, "loss": 0.0996, "step": 690 }, { "epoch": 0.02082930385490902, "grad_norm": 2.055972099304199, "learning_rate": 6.943066851815117e-07, "loss": 0.0879, "step": 700 }, { "epoch": 0.021126865338550578, "grad_norm": 2.2464587688446045, "learning_rate": 7.042253521126762e-07, "loss": 0.0911, "step": 710 }, { "epoch": 0.021424426822192137, "grad_norm": 1.9784224033355713, "learning_rate": 7.141440190438406e-07, "loss": 0.0826, "step": 720 }, { "epoch": 0.021721988305833693, "grad_norm": 1.6096675395965576, "learning_rate": 7.240626859750049e-07, "loss": 0.073, "step": 730 }, { "epoch": 0.022019549789475252, "grad_norm": 1.7382457256317139, "learning_rate": 7.339813529061694e-07, "loss": 0.079, "step": 740 }, { "epoch": 0.022317111273116808, "grad_norm": 1.5027981996536255, "learning_rate": 7.439000198373339e-07, "loss": 0.075, "step": 750 }, { "epoch": 0.022614672756758367, "grad_norm": 1.345682144165039, "learning_rate": 7.538186867684984e-07, "loss": 0.0883, "step": 760 }, { "epoch": 0.022912234240399922, "grad_norm": 1.5278325080871582, "learning_rate": 7.637373536996628e-07, "loss": 0.0812, "step": 770 }, { "epoch": 0.02320979572404148, "grad_norm": 2.0530402660369873, "learning_rate": 7.736560206308273e-07, "loss": 0.0871, "step": 780 }, { "epoch": 0.023507357207683037, "grad_norm": 1.7554374933242798, "learning_rate": 7.835746875619918e-07, "loss": 0.0821, "step": 790 }, { "epoch": 0.023804918691324596, "grad_norm": 2.320605993270874, "learning_rate": 7.934933544931562e-07, "loss": 0.0771, "step": 800 }, { "epoch": 0.02410248017496615, "grad_norm": 1.8429840803146362, "learning_rate": 8.034120214243206e-07, "loss": 0.0664, "step": 810 }, { "epoch": 0.02440004165860771, "grad_norm": 1.9477684497833252, "learning_rate": 8.133306883554851e-07, "loss": 0.0681, "step": 820 }, { "epoch": 0.024697603142249266, "grad_norm": 1.653464913368225, "learning_rate": 8.232493552866495e-07, "loss": 0.0605, "step": 830 }, { "epoch": 0.024995164625890826, "grad_norm": 1.718320608139038, "learning_rate": 8.331680222178141e-07, "loss": 0.08, "step": 840 }, { "epoch": 0.02529272610953238, "grad_norm": 2.040410041809082, "learning_rate": 8.430866891489785e-07, "loss": 0.0599, "step": 850 }, { "epoch": 0.02559028759317394, "grad_norm": 2.0534141063690186, "learning_rate": 8.53005356080143e-07, "loss": 0.0823, "step": 860 }, { "epoch": 0.025887849076815496, "grad_norm": 2.0753307342529297, "learning_rate": 8.629240230113073e-07, "loss": 0.071, "step": 870 }, { "epoch": 0.026185410560457055, "grad_norm": 1.7911736965179443, "learning_rate": 8.728426899424717e-07, "loss": 0.0709, "step": 880 }, { "epoch": 0.02648297204409861, "grad_norm": 1.8306130170822144, "learning_rate": 8.827613568736362e-07, "loss": 0.0723, "step": 890 }, { "epoch": 0.02678053352774017, "grad_norm": 1.4623206853866577, "learning_rate": 8.926800238048007e-07, "loss": 0.0688, "step": 900 }, { "epoch": 0.027078095011381725, "grad_norm": 1.7800779342651367, "learning_rate": 9.025986907359652e-07, "loss": 0.0683, "step": 910 }, { "epoch": 0.027375656495023284, "grad_norm": 1.5156844854354858, "learning_rate": 9.125173576671296e-07, "loss": 0.0566, "step": 920 }, { "epoch": 0.02767321797866484, "grad_norm": 2.313584089279175, "learning_rate": 9.224360245982941e-07, "loss": 0.0663, "step": 930 }, { "epoch": 0.0279707794623064, "grad_norm": 2.052504539489746, "learning_rate": 9.323546915294584e-07, "loss": 0.0604, "step": 940 }, { "epoch": 0.028268340945947955, "grad_norm": 1.6100000143051147, "learning_rate": 9.42273358460623e-07, "loss": 0.0733, "step": 950 }, { "epoch": 0.028565902429589514, "grad_norm": 1.304732084274292, "learning_rate": 9.521920253917874e-07, "loss": 0.058, "step": 960 }, { "epoch": 0.028863463913231073, "grad_norm": 1.548081398010254, "learning_rate": 9.62110692322952e-07, "loss": 0.0591, "step": 970 }, { "epoch": 0.02916102539687263, "grad_norm": 1.7664852142333984, "learning_rate": 9.720293592541163e-07, "loss": 0.0684, "step": 980 }, { "epoch": 0.029458586880514188, "grad_norm": 2.01761531829834, "learning_rate": 9.819480261852808e-07, "loss": 0.055, "step": 990 }, { "epoch": 0.029756148364155743, "grad_norm": 2.187394142150879, "learning_rate": 9.918666931164452e-07, "loss": 0.071, "step": 1000 }, { "epoch": 0.030053709847797302, "grad_norm": 1.5267683267593384, "learning_rate": 1.0017853600476095e-06, "loss": 0.0555, "step": 1010 }, { "epoch": 0.030351271331438858, "grad_norm": 2.3282172679901123, "learning_rate": 1.011704026978774e-06, "loss": 0.0619, "step": 1020 }, { "epoch": 0.030648832815080417, "grad_norm": 1.842720627784729, "learning_rate": 1.0216226939099386e-06, "loss": 0.0659, "step": 1030 }, { "epoch": 0.030946394298721973, "grad_norm": 1.6442571878433228, "learning_rate": 1.031541360841103e-06, "loss": 0.0553, "step": 1040 }, { "epoch": 0.031243955782363532, "grad_norm": 1.3040857315063477, "learning_rate": 1.0414600277722675e-06, "loss": 0.0613, "step": 1050 }, { "epoch": 0.03154151726600509, "grad_norm": 2.04188871383667, "learning_rate": 1.051378694703432e-06, "loss": 0.073, "step": 1060 }, { "epoch": 0.03183907874964664, "grad_norm": 1.8380076885223389, "learning_rate": 1.0612973616345964e-06, "loss": 0.0659, "step": 1070 }, { "epoch": 0.0321366402332882, "grad_norm": 1.4512139558792114, "learning_rate": 1.0712160285657608e-06, "loss": 0.0548, "step": 1080 }, { "epoch": 0.03243420171692976, "grad_norm": 1.6880100965499878, "learning_rate": 1.0811346954969253e-06, "loss": 0.0638, "step": 1090 }, { "epoch": 0.03273176320057132, "grad_norm": 1.6145412921905518, "learning_rate": 1.0910533624280896e-06, "loss": 0.0556, "step": 1100 }, { "epoch": 0.03302932468421287, "grad_norm": 2.1953887939453125, "learning_rate": 1.1009720293592542e-06, "loss": 0.0538, "step": 1110 }, { "epoch": 0.03332688616785443, "grad_norm": 1.8898247480392456, "learning_rate": 1.1108906962904187e-06, "loss": 0.051, "step": 1120 }, { "epoch": 0.03362444765149599, "grad_norm": 2.22832989692688, "learning_rate": 1.120809363221583e-06, "loss": 0.0585, "step": 1130 }, { "epoch": 0.03392200913513755, "grad_norm": 1.345299243927002, "learning_rate": 1.1307280301527476e-06, "loss": 0.0594, "step": 1140 }, { "epoch": 0.0342195706187791, "grad_norm": 2.1186532974243164, "learning_rate": 1.140646697083912e-06, "loss": 0.0433, "step": 1150 }, { "epoch": 0.03451713210242066, "grad_norm": 1.4174141883850098, "learning_rate": 1.1505653640150765e-06, "loss": 0.0541, "step": 1160 }, { "epoch": 0.03481469358606222, "grad_norm": 1.4992884397506714, "learning_rate": 1.1604840309462409e-06, "loss": 0.058, "step": 1170 }, { "epoch": 0.03511225506970378, "grad_norm": 1.1359139680862427, "learning_rate": 1.1704026978774054e-06, "loss": 0.0571, "step": 1180 }, { "epoch": 0.03540981655334533, "grad_norm": 2.470154285430908, "learning_rate": 1.1803213648085698e-06, "loss": 0.0553, "step": 1190 }, { "epoch": 0.03570737803698689, "grad_norm": 2.284266233444214, "learning_rate": 1.1902400317397343e-06, "loss": 0.0575, "step": 1200 }, { "epoch": 0.03600493952062845, "grad_norm": 1.682321548461914, "learning_rate": 1.2001586986708989e-06, "loss": 0.0647, "step": 1210 }, { "epoch": 0.03630250100427001, "grad_norm": 1.5505924224853516, "learning_rate": 1.2100773656020632e-06, "loss": 0.054, "step": 1220 }, { "epoch": 0.03660006248791157, "grad_norm": 1.339859127998352, "learning_rate": 1.2199960325332275e-06, "loss": 0.0554, "step": 1230 }, { "epoch": 0.03689762397155312, "grad_norm": 1.8718786239624023, "learning_rate": 1.229914699464392e-06, "loss": 0.0538, "step": 1240 }, { "epoch": 0.03719518545519468, "grad_norm": 1.549979567527771, "learning_rate": 1.2398333663955566e-06, "loss": 0.0521, "step": 1250 }, { "epoch": 0.03749274693883624, "grad_norm": 2.289271354675293, "learning_rate": 1.249752033326721e-06, "loss": 0.0507, "step": 1260 }, { "epoch": 0.0377903084224778, "grad_norm": 1.4536746740341187, "learning_rate": 1.2596707002578853e-06, "loss": 0.0589, "step": 1270 }, { "epoch": 0.03808786990611935, "grad_norm": 2.2610924243927, "learning_rate": 1.2695893671890499e-06, "loss": 0.0506, "step": 1280 }, { "epoch": 0.03838543138976091, "grad_norm": 1.3701549768447876, "learning_rate": 1.2795080341202142e-06, "loss": 0.0526, "step": 1290 }, { "epoch": 0.03868299287340247, "grad_norm": 1.9586050510406494, "learning_rate": 1.289426701051379e-06, "loss": 0.0532, "step": 1300 }, { "epoch": 0.03898055435704403, "grad_norm": 1.4318004846572876, "learning_rate": 1.2993453679825433e-06, "loss": 0.0455, "step": 1310 }, { "epoch": 0.03927811584068558, "grad_norm": 1.4940145015716553, "learning_rate": 1.3092640349137077e-06, "loss": 0.0488, "step": 1320 }, { "epoch": 0.03957567732432714, "grad_norm": 1.4930942058563232, "learning_rate": 1.3191827018448722e-06, "loss": 0.0542, "step": 1330 }, { "epoch": 0.0398732388079687, "grad_norm": 1.4891045093536377, "learning_rate": 1.3291013687760365e-06, "loss": 0.0383, "step": 1340 }, { "epoch": 0.040170800291610256, "grad_norm": 1.5925698280334473, "learning_rate": 1.339020035707201e-06, "loss": 0.0449, "step": 1350 }, { "epoch": 0.04046836177525181, "grad_norm": 1.8717914819717407, "learning_rate": 1.3489387026383654e-06, "loss": 0.0583, "step": 1360 }, { "epoch": 0.04076592325889337, "grad_norm": 2.0292372703552246, "learning_rate": 1.35885736956953e-06, "loss": 0.0565, "step": 1370 }, { "epoch": 0.04106348474253493, "grad_norm": 1.286319613456726, "learning_rate": 1.3687760365006943e-06, "loss": 0.0538, "step": 1380 }, { "epoch": 0.041361046226176486, "grad_norm": 1.8364289999008179, "learning_rate": 1.3786947034318587e-06, "loss": 0.0563, "step": 1390 }, { "epoch": 0.04165860770981804, "grad_norm": 1.4975905418395996, "learning_rate": 1.3886133703630234e-06, "loss": 0.0505, "step": 1400 }, { "epoch": 0.0419561691934596, "grad_norm": 1.9601168632507324, "learning_rate": 1.3985320372941878e-06, "loss": 0.0469, "step": 1410 }, { "epoch": 0.042253730677101156, "grad_norm": 1.6901415586471558, "learning_rate": 1.4084507042253523e-06, "loss": 0.0429, "step": 1420 }, { "epoch": 0.042551292160742715, "grad_norm": 1.525531530380249, "learning_rate": 1.4183693711565167e-06, "loss": 0.0368, "step": 1430 }, { "epoch": 0.042848853644384274, "grad_norm": 1.9606751203536987, "learning_rate": 1.4282880380876812e-06, "loss": 0.0459, "step": 1440 }, { "epoch": 0.043146415128025827, "grad_norm": 1.4238057136535645, "learning_rate": 1.4382067050188456e-06, "loss": 0.0336, "step": 1450 }, { "epoch": 0.043443976611667386, "grad_norm": 1.6824605464935303, "learning_rate": 1.4481253719500099e-06, "loss": 0.0449, "step": 1460 }, { "epoch": 0.043741538095308945, "grad_norm": 1.4285272359848022, "learning_rate": 1.4580440388811744e-06, "loss": 0.0393, "step": 1470 }, { "epoch": 0.044039099578950504, "grad_norm": 1.50301992893219, "learning_rate": 1.4679627058123388e-06, "loss": 0.0561, "step": 1480 }, { "epoch": 0.044336661062592056, "grad_norm": 1.3862038850784302, "learning_rate": 1.4778813727435035e-06, "loss": 0.0385, "step": 1490 }, { "epoch": 0.044634222546233615, "grad_norm": 1.5643495321273804, "learning_rate": 1.4878000396746679e-06, "loss": 0.0386, "step": 1500 }, { "epoch": 0.044931784029875174, "grad_norm": 0.9693464040756226, "learning_rate": 1.4977187066058324e-06, "loss": 0.0584, "step": 1510 }, { "epoch": 0.04522934551351673, "grad_norm": 1.2208012342453003, "learning_rate": 1.5076373735369968e-06, "loss": 0.0361, "step": 1520 }, { "epoch": 0.045526906997158285, "grad_norm": 0.7623074054718018, "learning_rate": 1.5175560404681611e-06, "loss": 0.0537, "step": 1530 }, { "epoch": 0.045824468480799845, "grad_norm": 1.3594554662704468, "learning_rate": 1.5274747073993257e-06, "loss": 0.0316, "step": 1540 }, { "epoch": 0.046122029964441404, "grad_norm": 1.250177025794983, "learning_rate": 1.53739337433049e-06, "loss": 0.0368, "step": 1550 }, { "epoch": 0.04641959144808296, "grad_norm": 1.3692585229873657, "learning_rate": 1.5473120412616546e-06, "loss": 0.0405, "step": 1560 }, { "epoch": 0.046717152931724515, "grad_norm": 1.812532663345337, "learning_rate": 1.557230708192819e-06, "loss": 0.0469, "step": 1570 }, { "epoch": 0.047014714415366074, "grad_norm": 1.4229375123977661, "learning_rate": 1.5671493751239837e-06, "loss": 0.0399, "step": 1580 }, { "epoch": 0.04731227589900763, "grad_norm": 2.038996458053589, "learning_rate": 1.577068042055148e-06, "loss": 0.0417, "step": 1590 }, { "epoch": 0.04760983738264919, "grad_norm": 1.9195847511291504, "learning_rate": 1.5869867089863123e-06, "loss": 0.0359, "step": 1600 }, { "epoch": 0.047907398866290744, "grad_norm": 1.661428689956665, "learning_rate": 1.596905375917477e-06, "loss": 0.0426, "step": 1610 }, { "epoch": 0.0482049603499323, "grad_norm": 2.1504573822021484, "learning_rate": 1.6068240428486412e-06, "loss": 0.0507, "step": 1620 }, { "epoch": 0.04850252183357386, "grad_norm": 1.5875496864318848, "learning_rate": 1.6167427097798058e-06, "loss": 0.0438, "step": 1630 }, { "epoch": 0.04880008331721542, "grad_norm": 1.2543103694915771, "learning_rate": 1.6266613767109701e-06, "loss": 0.0397, "step": 1640 }, { "epoch": 0.049097644800856974, "grad_norm": 1.412137508392334, "learning_rate": 1.6365800436421347e-06, "loss": 0.0368, "step": 1650 }, { "epoch": 0.04939520628449853, "grad_norm": 1.3814455270767212, "learning_rate": 1.646498710573299e-06, "loss": 0.0483, "step": 1660 }, { "epoch": 0.04969276776814009, "grad_norm": 1.7136839628219604, "learning_rate": 1.6564173775044634e-06, "loss": 0.0366, "step": 1670 }, { "epoch": 0.04999032925178165, "grad_norm": 1.5712133646011353, "learning_rate": 1.6663360444356281e-06, "loss": 0.0429, "step": 1680 }, { "epoch": 0.05028789073542321, "grad_norm": 1.636375904083252, "learning_rate": 1.6762547113667925e-06, "loss": 0.0393, "step": 1690 }, { "epoch": 0.05058545221906476, "grad_norm": 1.2734482288360596, "learning_rate": 1.686173378297957e-06, "loss": 0.0355, "step": 1700 }, { "epoch": 0.05088301370270632, "grad_norm": 1.6326795816421509, "learning_rate": 1.6960920452291213e-06, "loss": 0.047, "step": 1710 }, { "epoch": 0.05118057518634788, "grad_norm": 1.7052007913589478, "learning_rate": 1.706010712160286e-06, "loss": 0.0457, "step": 1720 }, { "epoch": 0.05147813666998944, "grad_norm": 1.5658637285232544, "learning_rate": 1.7159293790914502e-06, "loss": 0.0437, "step": 1730 }, { "epoch": 0.05177569815363099, "grad_norm": 1.8295564651489258, "learning_rate": 1.7258480460226146e-06, "loss": 0.0426, "step": 1740 }, { "epoch": 0.05207325963727255, "grad_norm": 1.5543344020843506, "learning_rate": 1.7357667129537791e-06, "loss": 0.0392, "step": 1750 }, { "epoch": 0.05237082112091411, "grad_norm": 1.3178679943084717, "learning_rate": 1.7456853798849435e-06, "loss": 0.042, "step": 1760 }, { "epoch": 0.05266838260455567, "grad_norm": 1.4904403686523438, "learning_rate": 1.7556040468161082e-06, "loss": 0.0462, "step": 1770 }, { "epoch": 0.05296594408819722, "grad_norm": 2.041773796081543, "learning_rate": 1.7655227137472724e-06, "loss": 0.0351, "step": 1780 }, { "epoch": 0.05326350557183878, "grad_norm": 2.1884021759033203, "learning_rate": 1.7754413806784371e-06, "loss": 0.0529, "step": 1790 }, { "epoch": 0.05356106705548034, "grad_norm": 1.6917122602462769, "learning_rate": 1.7853600476096015e-06, "loss": 0.0424, "step": 1800 }, { "epoch": 0.0538586285391219, "grad_norm": 1.5323195457458496, "learning_rate": 1.7952787145407658e-06, "loss": 0.0389, "step": 1810 }, { "epoch": 0.05415619002276345, "grad_norm": 1.7048876285552979, "learning_rate": 1.8051973814719304e-06, "loss": 0.0361, "step": 1820 }, { "epoch": 0.05445375150640501, "grad_norm": 1.1966559886932373, "learning_rate": 1.8151160484030947e-06, "loss": 0.0319, "step": 1830 }, { "epoch": 0.05475131299004657, "grad_norm": 1.3997738361358643, "learning_rate": 1.8250347153342592e-06, "loss": 0.0311, "step": 1840 }, { "epoch": 0.05504887447368813, "grad_norm": 1.0833847522735596, "learning_rate": 1.8349533822654236e-06, "loss": 0.0437, "step": 1850 }, { "epoch": 0.05534643595732968, "grad_norm": 1.2470440864562988, "learning_rate": 1.8448720491965881e-06, "loss": 0.0368, "step": 1860 }, { "epoch": 0.05564399744097124, "grad_norm": 1.4647935628890991, "learning_rate": 1.8547907161277525e-06, "loss": 0.0386, "step": 1870 }, { "epoch": 0.0559415589246128, "grad_norm": 1.4403966665267944, "learning_rate": 1.8647093830589168e-06, "loss": 0.0418, "step": 1880 }, { "epoch": 0.05623912040825436, "grad_norm": 1.168269395828247, "learning_rate": 1.8746280499900816e-06, "loss": 0.0349, "step": 1890 }, { "epoch": 0.05653668189189591, "grad_norm": 1.5061393976211548, "learning_rate": 1.884546716921246e-06, "loss": 0.0353, "step": 1900 }, { "epoch": 0.05683424337553747, "grad_norm": 1.7922148704528809, "learning_rate": 1.8944653838524105e-06, "loss": 0.0523, "step": 1910 }, { "epoch": 0.05713180485917903, "grad_norm": 1.7412105798721313, "learning_rate": 1.9043840507835748e-06, "loss": 0.04, "step": 1920 }, { "epoch": 0.05742936634282059, "grad_norm": 1.2710986137390137, "learning_rate": 1.9143027177147396e-06, "loss": 0.0363, "step": 1930 }, { "epoch": 0.057726927826462146, "grad_norm": 1.4793649911880493, "learning_rate": 1.924221384645904e-06, "loss": 0.0355, "step": 1940 }, { "epoch": 0.0580244893101037, "grad_norm": 1.231336236000061, "learning_rate": 1.9341400515770683e-06, "loss": 0.0277, "step": 1950 }, { "epoch": 0.05832205079374526, "grad_norm": 1.2349121570587158, "learning_rate": 1.9440587185082326e-06, "loss": 0.0442, "step": 1960 }, { "epoch": 0.058619612277386816, "grad_norm": 1.366809606552124, "learning_rate": 1.953977385439397e-06, "loss": 0.0387, "step": 1970 }, { "epoch": 0.058917173761028376, "grad_norm": 1.728867530822754, "learning_rate": 1.9638960523705617e-06, "loss": 0.0409, "step": 1980 }, { "epoch": 0.05921473524466993, "grad_norm": 1.1726995706558228, "learning_rate": 1.973814719301726e-06, "loss": 0.045, "step": 1990 }, { "epoch": 0.05951229672831149, "grad_norm": 1.7858306169509888, "learning_rate": 1.9837333862328904e-06, "loss": 0.0484, "step": 2000 }, { "epoch": 0.059809858211953046, "grad_norm": 1.995993733406067, "learning_rate": 1.9936520531640547e-06, "loss": 0.0427, "step": 2010 }, { "epoch": 0.060107419695594605, "grad_norm": 1.403613567352295, "learning_rate": 2.003570720095219e-06, "loss": 0.0347, "step": 2020 }, { "epoch": 0.06040498117923616, "grad_norm": 1.9348969459533691, "learning_rate": 2.013489387026384e-06, "loss": 0.035, "step": 2030 }, { "epoch": 0.060702542662877716, "grad_norm": 1.2448413372039795, "learning_rate": 2.023408053957548e-06, "loss": 0.0376, "step": 2040 }, { "epoch": 0.061000104146519275, "grad_norm": 1.4409246444702148, "learning_rate": 2.033326720888713e-06, "loss": 0.0342, "step": 2050 }, { "epoch": 0.061297665630160834, "grad_norm": 1.586350917816162, "learning_rate": 2.0432453878198773e-06, "loss": 0.0482, "step": 2060 }, { "epoch": 0.06159522711380239, "grad_norm": 1.1175971031188965, "learning_rate": 2.0531640547510416e-06, "loss": 0.0276, "step": 2070 }, { "epoch": 0.061892788597443946, "grad_norm": 1.046417236328125, "learning_rate": 2.063082721682206e-06, "loss": 0.0341, "step": 2080 }, { "epoch": 0.062190350081085505, "grad_norm": 1.552367925643921, "learning_rate": 2.0730013886133703e-06, "loss": 0.0268, "step": 2090 }, { "epoch": 0.062487911564727064, "grad_norm": 1.899198055267334, "learning_rate": 2.082920055544535e-06, "loss": 0.04, "step": 2100 }, { "epoch": 0.06278547304836862, "grad_norm": 1.963585376739502, "learning_rate": 2.0928387224756994e-06, "loss": 0.0423, "step": 2110 }, { "epoch": 0.06308303453201018, "grad_norm": 1.6888213157653809, "learning_rate": 2.102757389406864e-06, "loss": 0.0293, "step": 2120 }, { "epoch": 0.06338059601565173, "grad_norm": 1.4797827005386353, "learning_rate": 2.1126760563380285e-06, "loss": 0.0392, "step": 2130 }, { "epoch": 0.06367815749929329, "grad_norm": 1.2732115983963013, "learning_rate": 2.122594723269193e-06, "loss": 0.0344, "step": 2140 }, { "epoch": 0.06397571898293485, "grad_norm": 1.0359984636306763, "learning_rate": 2.132513390200357e-06, "loss": 0.0341, "step": 2150 }, { "epoch": 0.0642732804665764, "grad_norm": 1.3509562015533447, "learning_rate": 2.1424320571315215e-06, "loss": 0.0305, "step": 2160 }, { "epoch": 0.06457084195021796, "grad_norm": 1.2556174993515015, "learning_rate": 2.1523507240626863e-06, "loss": 0.0305, "step": 2170 }, { "epoch": 0.06486840343385952, "grad_norm": 1.4635345935821533, "learning_rate": 2.1622693909938506e-06, "loss": 0.0353, "step": 2180 }, { "epoch": 0.06516596491750108, "grad_norm": 1.7996950149536133, "learning_rate": 2.172188057925015e-06, "loss": 0.0322, "step": 2190 }, { "epoch": 0.06546352640114264, "grad_norm": 1.03203284740448, "learning_rate": 2.1821067248561793e-06, "loss": 0.0297, "step": 2200 }, { "epoch": 0.0657610878847842, "grad_norm": 1.1466728448867798, "learning_rate": 2.192025391787344e-06, "loss": 0.0265, "step": 2210 }, { "epoch": 0.06605864936842575, "grad_norm": 1.6867567300796509, "learning_rate": 2.2019440587185084e-06, "loss": 0.0452, "step": 2220 }, { "epoch": 0.0663562108520673, "grad_norm": 1.5670320987701416, "learning_rate": 2.2118627256496727e-06, "loss": 0.0403, "step": 2230 }, { "epoch": 0.06665377233570886, "grad_norm": 1.4706109762191772, "learning_rate": 2.2217813925808375e-06, "loss": 0.0402, "step": 2240 }, { "epoch": 0.06695133381935042, "grad_norm": 2.9569878578186035, "learning_rate": 2.231700059512002e-06, "loss": 0.0319, "step": 2250 }, { "epoch": 0.06724889530299198, "grad_norm": 1.6580865383148193, "learning_rate": 2.241618726443166e-06, "loss": 0.0334, "step": 2260 }, { "epoch": 0.06754645678663354, "grad_norm": 1.1274923086166382, "learning_rate": 2.2515373933743305e-06, "loss": 0.0303, "step": 2270 }, { "epoch": 0.0678440182702751, "grad_norm": 1.012357473373413, "learning_rate": 2.2614560603054953e-06, "loss": 0.0359, "step": 2280 }, { "epoch": 0.06814157975391666, "grad_norm": 1.570279836654663, "learning_rate": 2.2713747272366596e-06, "loss": 0.0278, "step": 2290 }, { "epoch": 0.0684391412375582, "grad_norm": 1.4103542566299438, "learning_rate": 2.281293394167824e-06, "loss": 0.0386, "step": 2300 }, { "epoch": 0.06873670272119976, "grad_norm": 0.8713282942771912, "learning_rate": 2.2912120610989887e-06, "loss": 0.0332, "step": 2310 }, { "epoch": 0.06903426420484132, "grad_norm": 1.2854361534118652, "learning_rate": 2.301130728030153e-06, "loss": 0.0334, "step": 2320 }, { "epoch": 0.06933182568848288, "grad_norm": 0.9121401309967041, "learning_rate": 2.3110493949613174e-06, "loss": 0.0314, "step": 2330 }, { "epoch": 0.06962938717212444, "grad_norm": 0.8951564431190491, "learning_rate": 2.3209680618924817e-06, "loss": 0.0316, "step": 2340 }, { "epoch": 0.069926948655766, "grad_norm": 1.9168643951416016, "learning_rate": 2.3308867288236465e-06, "loss": 0.0298, "step": 2350 }, { "epoch": 0.07022451013940756, "grad_norm": 0.9461068511009216, "learning_rate": 2.340805395754811e-06, "loss": 0.0306, "step": 2360 }, { "epoch": 0.07052207162304912, "grad_norm": 1.2855700254440308, "learning_rate": 2.350724062685975e-06, "loss": 0.0323, "step": 2370 }, { "epoch": 0.07081963310669066, "grad_norm": 1.0572980642318726, "learning_rate": 2.3606427296171395e-06, "loss": 0.0295, "step": 2380 }, { "epoch": 0.07111719459033222, "grad_norm": 1.1030042171478271, "learning_rate": 2.370561396548304e-06, "loss": 0.0326, "step": 2390 }, { "epoch": 0.07141475607397378, "grad_norm": 1.2941950559616089, "learning_rate": 2.3804800634794686e-06, "loss": 0.0395, "step": 2400 }, { "epoch": 0.07171231755761534, "grad_norm": 1.8247507810592651, "learning_rate": 2.390398730410633e-06, "loss": 0.0249, "step": 2410 }, { "epoch": 0.0720098790412569, "grad_norm": 1.2691149711608887, "learning_rate": 2.4003173973417977e-06, "loss": 0.0297, "step": 2420 }, { "epoch": 0.07230744052489846, "grad_norm": 1.8713440895080566, "learning_rate": 2.410236064272962e-06, "loss": 0.0373, "step": 2430 }, { "epoch": 0.07260500200854002, "grad_norm": 1.6047874689102173, "learning_rate": 2.4201547312041264e-06, "loss": 0.036, "step": 2440 }, { "epoch": 0.07290256349218158, "grad_norm": 1.6088621616363525, "learning_rate": 2.4300733981352907e-06, "loss": 0.024, "step": 2450 }, { "epoch": 0.07320012497582314, "grad_norm": 1.1210105419158936, "learning_rate": 2.439992065066455e-06, "loss": 0.0306, "step": 2460 }, { "epoch": 0.07349768645946468, "grad_norm": 1.4742913246154785, "learning_rate": 2.44991073199762e-06, "loss": 0.029, "step": 2470 }, { "epoch": 0.07379524794310624, "grad_norm": 1.0146715641021729, "learning_rate": 2.459829398928784e-06, "loss": 0.0295, "step": 2480 }, { "epoch": 0.0740928094267478, "grad_norm": 1.2945916652679443, "learning_rate": 2.469748065859949e-06, "loss": 0.0384, "step": 2490 }, { "epoch": 0.07439037091038936, "grad_norm": 0.8717120885848999, "learning_rate": 2.4796667327911133e-06, "loss": 0.0285, "step": 2500 }, { "epoch": 0.07468793239403092, "grad_norm": 1.4267789125442505, "learning_rate": 2.4895853997222776e-06, "loss": 0.0289, "step": 2510 }, { "epoch": 0.07498549387767248, "grad_norm": 1.063924789428711, "learning_rate": 2.499504066653442e-06, "loss": 0.0314, "step": 2520 }, { "epoch": 0.07528305536131404, "grad_norm": 1.0943413972854614, "learning_rate": 2.5094227335846067e-06, "loss": 0.0258, "step": 2530 }, { "epoch": 0.0755806168449556, "grad_norm": 1.4480385780334473, "learning_rate": 2.5193414005157706e-06, "loss": 0.0285, "step": 2540 }, { "epoch": 0.07587817832859714, "grad_norm": 0.6008680462837219, "learning_rate": 2.5292600674469354e-06, "loss": 0.036, "step": 2550 }, { "epoch": 0.0761757398122387, "grad_norm": 1.8697504997253418, "learning_rate": 2.5391787343780997e-06, "loss": 0.0298, "step": 2560 }, { "epoch": 0.07647330129588026, "grad_norm": 0.9977149367332458, "learning_rate": 2.549097401309264e-06, "loss": 0.0337, "step": 2570 }, { "epoch": 0.07677086277952182, "grad_norm": 1.2778698205947876, "learning_rate": 2.5590160682404284e-06, "loss": 0.0374, "step": 2580 }, { "epoch": 0.07706842426316338, "grad_norm": 0.8288964629173279, "learning_rate": 2.568934735171593e-06, "loss": 0.0313, "step": 2590 }, { "epoch": 0.07736598574680494, "grad_norm": 1.349778652191162, "learning_rate": 2.578853402102758e-06, "loss": 0.0275, "step": 2600 }, { "epoch": 0.0776635472304465, "grad_norm": 1.4634191989898682, "learning_rate": 2.588772069033922e-06, "loss": 0.0356, "step": 2610 }, { "epoch": 0.07796110871408805, "grad_norm": 1.3152512311935425, "learning_rate": 2.5986907359650866e-06, "loss": 0.0317, "step": 2620 }, { "epoch": 0.0782586701977296, "grad_norm": 1.3050181865692139, "learning_rate": 2.608609402896251e-06, "loss": 0.0316, "step": 2630 }, { "epoch": 0.07855623168137116, "grad_norm": 1.0603924989700317, "learning_rate": 2.6185280698274153e-06, "loss": 0.0273, "step": 2640 }, { "epoch": 0.07885379316501272, "grad_norm": 1.2375917434692383, "learning_rate": 2.6284467367585797e-06, "loss": 0.0228, "step": 2650 }, { "epoch": 0.07915135464865428, "grad_norm": 0.9375885725021362, "learning_rate": 2.6383654036897444e-06, "loss": 0.0336, "step": 2660 }, { "epoch": 0.07944891613229584, "grad_norm": 0.6398669481277466, "learning_rate": 2.648284070620909e-06, "loss": 0.0408, "step": 2670 }, { "epoch": 0.0797464776159374, "grad_norm": 1.3067296743392944, "learning_rate": 2.658202737552073e-06, "loss": 0.0258, "step": 2680 }, { "epoch": 0.08004403909957895, "grad_norm": 1.0194523334503174, "learning_rate": 2.6681214044832374e-06, "loss": 0.0403, "step": 2690 }, { "epoch": 0.08034160058322051, "grad_norm": 1.0798542499542236, "learning_rate": 2.678040071414402e-06, "loss": 0.023, "step": 2700 }, { "epoch": 0.08063916206686207, "grad_norm": 1.7627888917922974, "learning_rate": 2.687958738345566e-06, "loss": 0.0312, "step": 2710 }, { "epoch": 0.08093672355050362, "grad_norm": 1.3444452285766602, "learning_rate": 2.697877405276731e-06, "loss": 0.0282, "step": 2720 }, { "epoch": 0.08123428503414518, "grad_norm": 1.092696189880371, "learning_rate": 2.7077960722078956e-06, "loss": 0.0319, "step": 2730 }, { "epoch": 0.08153184651778674, "grad_norm": 1.5263723134994507, "learning_rate": 2.71771473913906e-06, "loss": 0.0222, "step": 2740 }, { "epoch": 0.0818294080014283, "grad_norm": 0.9059678912162781, "learning_rate": 2.7276334060702243e-06, "loss": 0.0343, "step": 2750 }, { "epoch": 0.08212696948506985, "grad_norm": 1.4169903993606567, "learning_rate": 2.7375520730013887e-06, "loss": 0.0377, "step": 2760 }, { "epoch": 0.08242453096871141, "grad_norm": 0.9298326969146729, "learning_rate": 2.7474707399325534e-06, "loss": 0.0293, "step": 2770 }, { "epoch": 0.08272209245235297, "grad_norm": 0.847879946231842, "learning_rate": 2.7573894068637173e-06, "loss": 0.0273, "step": 2780 }, { "epoch": 0.08301965393599453, "grad_norm": 0.9041763544082642, "learning_rate": 2.767308073794882e-06, "loss": 0.0288, "step": 2790 }, { "epoch": 0.08331721541963608, "grad_norm": 1.043771505355835, "learning_rate": 2.777226740726047e-06, "loss": 0.0254, "step": 2800 }, { "epoch": 0.08361477690327764, "grad_norm": 0.9164698719978333, "learning_rate": 2.787145407657211e-06, "loss": 0.0194, "step": 2810 }, { "epoch": 0.0839123383869192, "grad_norm": 0.6525437235832214, "learning_rate": 2.7970640745883755e-06, "loss": 0.0267, "step": 2820 }, { "epoch": 0.08420989987056075, "grad_norm": 1.004866123199463, "learning_rate": 2.80698274151954e-06, "loss": 0.0248, "step": 2830 }, { "epoch": 0.08450746135420231, "grad_norm": 1.1188596487045288, "learning_rate": 2.8169014084507046e-06, "loss": 0.0324, "step": 2840 }, { "epoch": 0.08480502283784387, "grad_norm": 1.0804064273834229, "learning_rate": 2.8268200753818686e-06, "loss": 0.0216, "step": 2850 }, { "epoch": 0.08510258432148543, "grad_norm": 1.6219732761383057, "learning_rate": 2.8367387423130333e-06, "loss": 0.0252, "step": 2860 }, { "epoch": 0.08540014580512699, "grad_norm": 0.7085915207862854, "learning_rate": 2.8466574092441977e-06, "loss": 0.0297, "step": 2870 }, { "epoch": 0.08569770728876855, "grad_norm": 0.9946885108947754, "learning_rate": 2.8565760761753624e-06, "loss": 0.0312, "step": 2880 }, { "epoch": 0.0859952687724101, "grad_norm": 0.9689688682556152, "learning_rate": 2.8664947431065263e-06, "loss": 0.0284, "step": 2890 }, { "epoch": 0.08629283025605165, "grad_norm": 1.4144108295440674, "learning_rate": 2.876413410037691e-06, "loss": 0.0247, "step": 2900 }, { "epoch": 0.08659039173969321, "grad_norm": 1.29939603805542, "learning_rate": 2.886332076968856e-06, "loss": 0.0294, "step": 2910 }, { "epoch": 0.08688795322333477, "grad_norm": 0.789351224899292, "learning_rate": 2.8962507439000198e-06, "loss": 0.0266, "step": 2920 }, { "epoch": 0.08718551470697633, "grad_norm": 0.556095540523529, "learning_rate": 2.9061694108311846e-06, "loss": 0.024, "step": 2930 }, { "epoch": 0.08748307619061789, "grad_norm": 1.002523422241211, "learning_rate": 2.916088077762349e-06, "loss": 0.0259, "step": 2940 }, { "epoch": 0.08778063767425945, "grad_norm": 1.7148549556732178, "learning_rate": 2.9260067446935137e-06, "loss": 0.0348, "step": 2950 }, { "epoch": 0.08807819915790101, "grad_norm": 0.9882059693336487, "learning_rate": 2.9359254116246776e-06, "loss": 0.0256, "step": 2960 }, { "epoch": 0.08837576064154255, "grad_norm": 0.7615707516670227, "learning_rate": 2.9458440785558423e-06, "loss": 0.0349, "step": 2970 }, { "epoch": 0.08867332212518411, "grad_norm": 0.7712468504905701, "learning_rate": 2.955762745487007e-06, "loss": 0.0369, "step": 2980 }, { "epoch": 0.08897088360882567, "grad_norm": 1.0488815307617188, "learning_rate": 2.965681412418171e-06, "loss": 0.0318, "step": 2990 }, { "epoch": 0.08926844509246723, "grad_norm": 1.2389843463897705, "learning_rate": 2.9756000793493358e-06, "loss": 0.028, "step": 3000 }, { "epoch": 0.08956600657610879, "grad_norm": 0.8732922673225403, "learning_rate": 2.9855187462805e-06, "loss": 0.0237, "step": 3010 }, { "epoch": 0.08986356805975035, "grad_norm": 0.7037858963012695, "learning_rate": 2.995437413211665e-06, "loss": 0.0288, "step": 3020 }, { "epoch": 0.09016112954339191, "grad_norm": 1.1154979467391968, "learning_rate": 3.005356080142829e-06, "loss": 0.0287, "step": 3030 }, { "epoch": 0.09045869102703347, "grad_norm": 1.0033998489379883, "learning_rate": 3.0152747470739936e-06, "loss": 0.0308, "step": 3040 }, { "epoch": 0.09075625251067501, "grad_norm": 1.7315657138824463, "learning_rate": 3.025193414005158e-06, "loss": 0.0284, "step": 3050 }, { "epoch": 0.09105381399431657, "grad_norm": 1.0935368537902832, "learning_rate": 3.0351120809363222e-06, "loss": 0.0319, "step": 3060 }, { "epoch": 0.09135137547795813, "grad_norm": 1.085435152053833, "learning_rate": 3.0450307478674866e-06, "loss": 0.0208, "step": 3070 }, { "epoch": 0.09164893696159969, "grad_norm": 0.7938528060913086, "learning_rate": 3.0549494147986513e-06, "loss": 0.0203, "step": 3080 }, { "epoch": 0.09194649844524125, "grad_norm": 0.7968424558639526, "learning_rate": 3.064868081729816e-06, "loss": 0.0242, "step": 3090 }, { "epoch": 0.09224405992888281, "grad_norm": 1.1725728511810303, "learning_rate": 3.07478674866098e-06, "loss": 0.0328, "step": 3100 }, { "epoch": 0.09254162141252437, "grad_norm": 1.1346259117126465, "learning_rate": 3.0847054155921448e-06, "loss": 0.0268, "step": 3110 }, { "epoch": 0.09283918289616593, "grad_norm": 1.0226625204086304, "learning_rate": 3.094624082523309e-06, "loss": 0.0262, "step": 3120 }, { "epoch": 0.09313674437980748, "grad_norm": 1.1256163120269775, "learning_rate": 3.1045427494544735e-06, "loss": 0.0305, "step": 3130 }, { "epoch": 0.09343430586344903, "grad_norm": 1.4314672946929932, "learning_rate": 3.114461416385638e-06, "loss": 0.0257, "step": 3140 }, { "epoch": 0.09373186734709059, "grad_norm": 1.0869081020355225, "learning_rate": 3.1243800833168026e-06, "loss": 0.0296, "step": 3150 }, { "epoch": 0.09402942883073215, "grad_norm": 1.3689395189285278, "learning_rate": 3.1342987502479673e-06, "loss": 0.0271, "step": 3160 }, { "epoch": 0.09432699031437371, "grad_norm": 1.0685235261917114, "learning_rate": 3.1442174171791312e-06, "loss": 0.0255, "step": 3170 }, { "epoch": 0.09462455179801527, "grad_norm": 1.4802970886230469, "learning_rate": 3.154136084110296e-06, "loss": 0.0296, "step": 3180 }, { "epoch": 0.09492211328165683, "grad_norm": 1.299682855606079, "learning_rate": 3.1640547510414603e-06, "loss": 0.0298, "step": 3190 }, { "epoch": 0.09521967476529838, "grad_norm": 1.1908252239227295, "learning_rate": 3.1739734179726247e-06, "loss": 0.0306, "step": 3200 }, { "epoch": 0.09551723624893994, "grad_norm": 1.2756342887878418, "learning_rate": 3.183892084903789e-06, "loss": 0.0228, "step": 3210 }, { "epoch": 0.09581479773258149, "grad_norm": 0.96559739112854, "learning_rate": 3.193810751834954e-06, "loss": 0.0321, "step": 3220 }, { "epoch": 0.09611235921622305, "grad_norm": 1.059968113899231, "learning_rate": 3.203729418766118e-06, "loss": 0.0205, "step": 3230 }, { "epoch": 0.0964099206998646, "grad_norm": 1.158929467201233, "learning_rate": 3.2136480856972825e-06, "loss": 0.0217, "step": 3240 }, { "epoch": 0.09670748218350617, "grad_norm": 0.9821065068244934, "learning_rate": 3.223566752628447e-06, "loss": 0.0238, "step": 3250 }, { "epoch": 0.09700504366714773, "grad_norm": 0.8953707218170166, "learning_rate": 3.2334854195596116e-06, "loss": 0.0213, "step": 3260 }, { "epoch": 0.09730260515078928, "grad_norm": 1.1939125061035156, "learning_rate": 3.2434040864907755e-06, "loss": 0.0256, "step": 3270 }, { "epoch": 0.09760016663443084, "grad_norm": 1.2447983026504517, "learning_rate": 3.2533227534219402e-06, "loss": 0.0249, "step": 3280 }, { "epoch": 0.0978977281180724, "grad_norm": 1.369057297706604, "learning_rate": 3.263241420353105e-06, "loss": 0.0255, "step": 3290 }, { "epoch": 0.09819528960171395, "grad_norm": 0.6457459926605225, "learning_rate": 3.2731600872842694e-06, "loss": 0.0248, "step": 3300 }, { "epoch": 0.0984928510853555, "grad_norm": 1.558974027633667, "learning_rate": 3.2830787542154337e-06, "loss": 0.0265, "step": 3310 }, { "epoch": 0.09879041256899707, "grad_norm": 0.996368408203125, "learning_rate": 3.292997421146598e-06, "loss": 0.0247, "step": 3320 }, { "epoch": 0.09908797405263862, "grad_norm": 1.127586841583252, "learning_rate": 3.302916088077763e-06, "loss": 0.0299, "step": 3330 }, { "epoch": 0.09938553553628018, "grad_norm": 0.9186244606971741, "learning_rate": 3.3128347550089267e-06, "loss": 0.0285, "step": 3340 }, { "epoch": 0.09968309701992174, "grad_norm": 1.0221513509750366, "learning_rate": 3.3227534219400915e-06, "loss": 0.0251, "step": 3350 }, { "epoch": 0.0999806585035633, "grad_norm": 0.9414447546005249, "learning_rate": 3.3326720888712562e-06, "loss": 0.0221, "step": 3360 }, { "epoch": 0.10027821998720486, "grad_norm": 0.8065342307090759, "learning_rate": 3.3425907558024206e-06, "loss": 0.0272, "step": 3370 }, { "epoch": 0.10057578147084642, "grad_norm": 1.2787001132965088, "learning_rate": 3.352509422733585e-06, "loss": 0.0222, "step": 3380 }, { "epoch": 0.10087334295448797, "grad_norm": 1.4104317426681519, "learning_rate": 3.3624280896647493e-06, "loss": 0.0151, "step": 3390 }, { "epoch": 0.10117090443812952, "grad_norm": 0.8916610479354858, "learning_rate": 3.372346756595914e-06, "loss": 0.0296, "step": 3400 }, { "epoch": 0.10146846592177108, "grad_norm": 0.9556142091751099, "learning_rate": 3.382265423527078e-06, "loss": 0.0297, "step": 3410 }, { "epoch": 0.10176602740541264, "grad_norm": 0.9227050542831421, "learning_rate": 3.3921840904582427e-06, "loss": 0.0215, "step": 3420 }, { "epoch": 0.1020635888890542, "grad_norm": 0.9551855325698853, "learning_rate": 3.402102757389407e-06, "loss": 0.024, "step": 3430 }, { "epoch": 0.10236115037269576, "grad_norm": 1.14336359500885, "learning_rate": 3.412021424320572e-06, "loss": 0.0257, "step": 3440 }, { "epoch": 0.10265871185633732, "grad_norm": 0.745895266532898, "learning_rate": 3.4219400912517357e-06, "loss": 0.0223, "step": 3450 }, { "epoch": 0.10295627333997888, "grad_norm": 1.0911139249801636, "learning_rate": 3.4318587581829005e-06, "loss": 0.0216, "step": 3460 }, { "epoch": 0.10325383482362042, "grad_norm": 0.7685094475746155, "learning_rate": 3.4417774251140652e-06, "loss": 0.0282, "step": 3470 }, { "epoch": 0.10355139630726198, "grad_norm": 1.1197466850280762, "learning_rate": 3.451696092045229e-06, "loss": 0.0316, "step": 3480 }, { "epoch": 0.10384895779090354, "grad_norm": 1.5077733993530273, "learning_rate": 3.461614758976394e-06, "loss": 0.024, "step": 3490 }, { "epoch": 0.1041465192745451, "grad_norm": 0.7094936370849609, "learning_rate": 3.4715334259075583e-06, "loss": 0.0227, "step": 3500 }, { "epoch": 0.10444408075818666, "grad_norm": 0.7722456455230713, "learning_rate": 3.481452092838723e-06, "loss": 0.0235, "step": 3510 }, { "epoch": 0.10474164224182822, "grad_norm": 0.9431714415550232, "learning_rate": 3.491370759769887e-06, "loss": 0.0267, "step": 3520 }, { "epoch": 0.10503920372546978, "grad_norm": 0.8686506152153015, "learning_rate": 3.5012894267010517e-06, "loss": 0.0309, "step": 3530 }, { "epoch": 0.10533676520911134, "grad_norm": 1.111708641052246, "learning_rate": 3.5112080936322165e-06, "loss": 0.0323, "step": 3540 }, { "epoch": 0.10563432669275288, "grad_norm": 0.904381513595581, "learning_rate": 3.5211267605633804e-06, "loss": 0.029, "step": 3550 }, { "epoch": 0.10593188817639444, "grad_norm": 0.8216888308525085, "learning_rate": 3.5310454274945447e-06, "loss": 0.0316, "step": 3560 }, { "epoch": 0.106229449660036, "grad_norm": 1.356307029724121, "learning_rate": 3.5409640944257095e-06, "loss": 0.0247, "step": 3570 }, { "epoch": 0.10652701114367756, "grad_norm": 1.1694786548614502, "learning_rate": 3.5508827613568743e-06, "loss": 0.0274, "step": 3580 }, { "epoch": 0.10682457262731912, "grad_norm": 0.853155255317688, "learning_rate": 3.560801428288038e-06, "loss": 0.0198, "step": 3590 }, { "epoch": 0.10712213411096068, "grad_norm": 1.147273063659668, "learning_rate": 3.570720095219203e-06, "loss": 0.0237, "step": 3600 }, { "epoch": 0.10741969559460224, "grad_norm": 1.4984309673309326, "learning_rate": 3.5806387621503673e-06, "loss": 0.0243, "step": 3610 }, { "epoch": 0.1077172570782438, "grad_norm": 0.8236483931541443, "learning_rate": 3.5905574290815316e-06, "loss": 0.0227, "step": 3620 }, { "epoch": 0.10801481856188536, "grad_norm": 0.8420001268386841, "learning_rate": 3.600476096012696e-06, "loss": 0.0263, "step": 3630 }, { "epoch": 0.1083123800455269, "grad_norm": 0.7101237773895264, "learning_rate": 3.6103947629438607e-06, "loss": 0.0267, "step": 3640 }, { "epoch": 0.10860994152916846, "grad_norm": 0.8179559707641602, "learning_rate": 3.6203134298750255e-06, "loss": 0.0217, "step": 3650 }, { "epoch": 0.10890750301281002, "grad_norm": 0.7057069540023804, "learning_rate": 3.6302320968061894e-06, "loss": 0.0235, "step": 3660 }, { "epoch": 0.10920506449645158, "grad_norm": 0.45420727133750916, "learning_rate": 3.640150763737354e-06, "loss": 0.028, "step": 3670 }, { "epoch": 0.10950262598009314, "grad_norm": 0.5516594052314758, "learning_rate": 3.6500694306685185e-06, "loss": 0.0302, "step": 3680 }, { "epoch": 0.1098001874637347, "grad_norm": 1.3948936462402344, "learning_rate": 3.659988097599683e-06, "loss": 0.0295, "step": 3690 }, { "epoch": 0.11009774894737626, "grad_norm": 0.8102099299430847, "learning_rate": 3.669906764530847e-06, "loss": 0.0155, "step": 3700 }, { "epoch": 0.11039531043101782, "grad_norm": 0.7337104082107544, "learning_rate": 3.679825431462012e-06, "loss": 0.0212, "step": 3710 }, { "epoch": 0.11069287191465936, "grad_norm": 1.0427439212799072, "learning_rate": 3.6897440983931763e-06, "loss": 0.0263, "step": 3720 }, { "epoch": 0.11099043339830092, "grad_norm": 0.7480818033218384, "learning_rate": 3.6996627653243406e-06, "loss": 0.0241, "step": 3730 }, { "epoch": 0.11128799488194248, "grad_norm": 0.8698295950889587, "learning_rate": 3.709581432255505e-06, "loss": 0.0246, "step": 3740 }, { "epoch": 0.11158555636558404, "grad_norm": 0.9874035716056824, "learning_rate": 3.7195000991866697e-06, "loss": 0.0255, "step": 3750 }, { "epoch": 0.1118831178492256, "grad_norm": 0.8215842247009277, "learning_rate": 3.7294187661178336e-06, "loss": 0.028, "step": 3760 }, { "epoch": 0.11218067933286716, "grad_norm": 0.6368029713630676, "learning_rate": 3.7393374330489984e-06, "loss": 0.0247, "step": 3770 }, { "epoch": 0.11247824081650871, "grad_norm": 0.7725998759269714, "learning_rate": 3.749256099980163e-06, "loss": 0.0226, "step": 3780 }, { "epoch": 0.11277580230015027, "grad_norm": 1.1058201789855957, "learning_rate": 3.7591747669113275e-06, "loss": 0.0218, "step": 3790 }, { "epoch": 0.11307336378379182, "grad_norm": 1.85744047164917, "learning_rate": 3.769093433842492e-06, "loss": 0.0208, "step": 3800 }, { "epoch": 0.11337092526743338, "grad_norm": 1.0145779848098755, "learning_rate": 3.779012100773656e-06, "loss": 0.0206, "step": 3810 }, { "epoch": 0.11366848675107494, "grad_norm": 0.9520381689071655, "learning_rate": 3.788930767704821e-06, "loss": 0.018, "step": 3820 }, { "epoch": 0.1139660482347165, "grad_norm": 1.038387417793274, "learning_rate": 3.798849434635985e-06, "loss": 0.0271, "step": 3830 }, { "epoch": 0.11426360971835806, "grad_norm": 1.1109968423843384, "learning_rate": 3.8087681015671496e-06, "loss": 0.0253, "step": 3840 }, { "epoch": 0.11456117120199961, "grad_norm": 0.39903706312179565, "learning_rate": 3.818686768498314e-06, "loss": 0.0195, "step": 3850 }, { "epoch": 0.11485873268564117, "grad_norm": 0.9857504963874817, "learning_rate": 3.828605435429479e-06, "loss": 0.0231, "step": 3860 }, { "epoch": 0.11515629416928273, "grad_norm": 0.8757176399230957, "learning_rate": 3.838524102360643e-06, "loss": 0.038, "step": 3870 }, { "epoch": 0.11545385565292429, "grad_norm": 1.0845752954483032, "learning_rate": 3.848442769291808e-06, "loss": 0.0288, "step": 3880 }, { "epoch": 0.11575141713656584, "grad_norm": 0.5662214756011963, "learning_rate": 3.858361436222972e-06, "loss": 0.0247, "step": 3890 }, { "epoch": 0.1160489786202074, "grad_norm": 0.9469514489173889, "learning_rate": 3.8682801031541365e-06, "loss": 0.0159, "step": 3900 }, { "epoch": 0.11634654010384896, "grad_norm": 0.9942561388015747, "learning_rate": 3.8781987700853004e-06, "loss": 0.0237, "step": 3910 }, { "epoch": 0.11664410158749051, "grad_norm": 1.0120766162872314, "learning_rate": 3.888117437016465e-06, "loss": 0.0224, "step": 3920 }, { "epoch": 0.11694166307113207, "grad_norm": 1.3203915357589722, "learning_rate": 3.89803610394763e-06, "loss": 0.0277, "step": 3930 }, { "epoch": 0.11723922455477363, "grad_norm": 1.18076753616333, "learning_rate": 3.907954770878794e-06, "loss": 0.0196, "step": 3940 }, { "epoch": 0.11753678603841519, "grad_norm": 1.3022279739379883, "learning_rate": 3.917873437809959e-06, "loss": 0.0289, "step": 3950 }, { "epoch": 0.11783434752205675, "grad_norm": 1.060773253440857, "learning_rate": 3.927792104741123e-06, "loss": 0.0296, "step": 3960 }, { "epoch": 0.1181319090056983, "grad_norm": 1.0609890222549438, "learning_rate": 3.937710771672287e-06, "loss": 0.0252, "step": 3970 }, { "epoch": 0.11842947048933986, "grad_norm": 1.0547178983688354, "learning_rate": 3.947629438603452e-06, "loss": 0.0174, "step": 3980 }, { "epoch": 0.11872703197298141, "grad_norm": 0.8388320803642273, "learning_rate": 3.957548105534617e-06, "loss": 0.0199, "step": 3990 }, { "epoch": 0.11902459345662297, "grad_norm": 0.8977870345115662, "learning_rate": 3.967466772465781e-06, "loss": 0.0252, "step": 4000 }, { "epoch": 0.11932215494026453, "grad_norm": 0.9750604033470154, "learning_rate": 3.9773854393969455e-06, "loss": 0.0147, "step": 4010 }, { "epoch": 0.11961971642390609, "grad_norm": 0.7938702702522278, "learning_rate": 3.9873041063281094e-06, "loss": 0.0268, "step": 4020 }, { "epoch": 0.11991727790754765, "grad_norm": 0.7662438154220581, "learning_rate": 3.997222773259274e-06, "loss": 0.0228, "step": 4030 }, { "epoch": 0.12021483939118921, "grad_norm": 0.5876055955886841, "learning_rate": 4.007141440190438e-06, "loss": 0.021, "step": 4040 }, { "epoch": 0.12051240087483076, "grad_norm": 1.3427727222442627, "learning_rate": 4.017060107121603e-06, "loss": 0.0216, "step": 4050 }, { "epoch": 0.12080996235847231, "grad_norm": 1.1259666681289673, "learning_rate": 4.026978774052768e-06, "loss": 0.031, "step": 4060 }, { "epoch": 0.12110752384211387, "grad_norm": 1.4089915752410889, "learning_rate": 4.036897440983932e-06, "loss": 0.0273, "step": 4070 }, { "epoch": 0.12140508532575543, "grad_norm": 0.7183972597122192, "learning_rate": 4.046816107915096e-06, "loss": 0.0264, "step": 4080 }, { "epoch": 0.12170264680939699, "grad_norm": 0.8185482621192932, "learning_rate": 4.056734774846261e-06, "loss": 0.0241, "step": 4090 }, { "epoch": 0.12200020829303855, "grad_norm": 0.8545914888381958, "learning_rate": 4.066653441777426e-06, "loss": 0.0244, "step": 4100 }, { "epoch": 0.12229776977668011, "grad_norm": 1.3654264211654663, "learning_rate": 4.07657210870859e-06, "loss": 0.0214, "step": 4110 }, { "epoch": 0.12259533126032167, "grad_norm": 0.8737972974777222, "learning_rate": 4.0864907756397545e-06, "loss": 0.0191, "step": 4120 }, { "epoch": 0.12289289274396323, "grad_norm": 1.0690584182739258, "learning_rate": 4.096409442570919e-06, "loss": 0.024, "step": 4130 }, { "epoch": 0.12319045422760477, "grad_norm": 0.6160016059875488, "learning_rate": 4.106328109502083e-06, "loss": 0.0233, "step": 4140 }, { "epoch": 0.12348801571124633, "grad_norm": 1.241460919380188, "learning_rate": 4.116246776433248e-06, "loss": 0.0217, "step": 4150 }, { "epoch": 0.12378557719488789, "grad_norm": 0.7851904630661011, "learning_rate": 4.126165443364412e-06, "loss": 0.0232, "step": 4160 }, { "epoch": 0.12408313867852945, "grad_norm": 0.8023395538330078, "learning_rate": 4.136084110295577e-06, "loss": 0.0154, "step": 4170 }, { "epoch": 0.12438070016217101, "grad_norm": 0.8090947270393372, "learning_rate": 4.1460027772267406e-06, "loss": 0.0248, "step": 4180 }, { "epoch": 0.12467826164581257, "grad_norm": 1.2013516426086426, "learning_rate": 4.155921444157905e-06, "loss": 0.0255, "step": 4190 }, { "epoch": 0.12497582312945413, "grad_norm": 0.7371988892555237, "learning_rate": 4.16584011108907e-06, "loss": 0.0193, "step": 4200 }, { "epoch": 0.12527338461309567, "grad_norm": 0.9563808441162109, "learning_rate": 4.175758778020235e-06, "loss": 0.019, "step": 4210 }, { "epoch": 0.12557094609673725, "grad_norm": 0.8831337690353394, "learning_rate": 4.185677444951399e-06, "loss": 0.0213, "step": 4220 }, { "epoch": 0.1258685075803788, "grad_norm": 0.8568177819252014, "learning_rate": 4.1955961118825635e-06, "loss": 0.0206, "step": 4230 }, { "epoch": 0.12616606906402036, "grad_norm": 1.0133894681930542, "learning_rate": 4.205514778813728e-06, "loss": 0.0182, "step": 4240 }, { "epoch": 0.1264636305476619, "grad_norm": 0.8891164660453796, "learning_rate": 4.215433445744892e-06, "loss": 0.0237, "step": 4250 }, { "epoch": 0.12676119203130345, "grad_norm": 0.7363294959068298, "learning_rate": 4.225352112676057e-06, "loss": 0.0231, "step": 4260 }, { "epoch": 0.12705875351494503, "grad_norm": 0.9041340947151184, "learning_rate": 4.235270779607221e-06, "loss": 0.024, "step": 4270 }, { "epoch": 0.12735631499858657, "grad_norm": 0.8041317462921143, "learning_rate": 4.245189446538386e-06, "loss": 0.0228, "step": 4280 }, { "epoch": 0.12765387648222815, "grad_norm": 0.6687336564064026, "learning_rate": 4.2551081134695496e-06, "loss": 0.0185, "step": 4290 }, { "epoch": 0.1279514379658697, "grad_norm": 1.293837547302246, "learning_rate": 4.265026780400714e-06, "loss": 0.022, "step": 4300 }, { "epoch": 0.12824899944951126, "grad_norm": 1.1305828094482422, "learning_rate": 4.274945447331879e-06, "loss": 0.0236, "step": 4310 }, { "epoch": 0.1285465609331528, "grad_norm": 1.0490407943725586, "learning_rate": 4.284864114263043e-06, "loss": 0.0243, "step": 4320 }, { "epoch": 0.12884412241679438, "grad_norm": 0.884227991104126, "learning_rate": 4.294782781194208e-06, "loss": 0.0201, "step": 4330 }, { "epoch": 0.12914168390043593, "grad_norm": 1.138670563697815, "learning_rate": 4.3047014481253725e-06, "loss": 0.0224, "step": 4340 }, { "epoch": 0.12943924538407747, "grad_norm": 0.5384246706962585, "learning_rate": 4.314620115056537e-06, "loss": 0.0238, "step": 4350 }, { "epoch": 0.12973680686771905, "grad_norm": 1.078755497932434, "learning_rate": 4.324538781987701e-06, "loss": 0.0208, "step": 4360 }, { "epoch": 0.1300343683513606, "grad_norm": 0.6033352613449097, "learning_rate": 4.334457448918866e-06, "loss": 0.0241, "step": 4370 }, { "epoch": 0.13033192983500216, "grad_norm": 0.8568964004516602, "learning_rate": 4.34437611585003e-06, "loss": 0.0159, "step": 4380 }, { "epoch": 0.1306294913186437, "grad_norm": 0.8118696808815002, "learning_rate": 4.354294782781195e-06, "loss": 0.0227, "step": 4390 }, { "epoch": 0.13092705280228528, "grad_norm": 0.9359805583953857, "learning_rate": 4.3642134497123586e-06, "loss": 0.0249, "step": 4400 }, { "epoch": 0.13122461428592683, "grad_norm": 0.5428081750869751, "learning_rate": 4.374132116643523e-06, "loss": 0.0214, "step": 4410 }, { "epoch": 0.1315221757695684, "grad_norm": 1.0463958978652954, "learning_rate": 4.384050783574688e-06, "loss": 0.0178, "step": 4420 }, { "epoch": 0.13181973725320995, "grad_norm": 0.8976492285728455, "learning_rate": 4.393969450505852e-06, "loss": 0.0206, "step": 4430 }, { "epoch": 0.1321172987368515, "grad_norm": 1.0160496234893799, "learning_rate": 4.403888117437017e-06, "loss": 0.017, "step": 4440 }, { "epoch": 0.13241486022049306, "grad_norm": 0.6479525566101074, "learning_rate": 4.4138067843681815e-06, "loss": 0.0167, "step": 4450 }, { "epoch": 0.1327124217041346, "grad_norm": 0.752596378326416, "learning_rate": 4.4237254512993455e-06, "loss": 0.0224, "step": 4460 }, { "epoch": 0.13300998318777618, "grad_norm": 1.1788909435272217, "learning_rate": 4.43364411823051e-06, "loss": 0.0249, "step": 4470 }, { "epoch": 0.13330754467141773, "grad_norm": 1.3199071884155273, "learning_rate": 4.443562785161675e-06, "loss": 0.0195, "step": 4480 }, { "epoch": 0.1336051061550593, "grad_norm": 0.6868587136268616, "learning_rate": 4.45348145209284e-06, "loss": 0.0244, "step": 4490 }, { "epoch": 0.13390266763870085, "grad_norm": 0.8032627701759338, "learning_rate": 4.463400119024004e-06, "loss": 0.0249, "step": 4500 }, { "epoch": 0.1342002291223424, "grad_norm": 0.8796859383583069, "learning_rate": 4.473318785955168e-06, "loss": 0.0242, "step": 4510 }, { "epoch": 0.13449779060598396, "grad_norm": 0.7611225843429565, "learning_rate": 4.483237452886332e-06, "loss": 0.0192, "step": 4520 }, { "epoch": 0.1347953520896255, "grad_norm": 0.7379097938537598, "learning_rate": 4.493156119817496e-06, "loss": 0.017, "step": 4530 }, { "epoch": 0.13509291357326708, "grad_norm": 0.768040120601654, "learning_rate": 4.503074786748661e-06, "loss": 0.023, "step": 4540 }, { "epoch": 0.13539047505690863, "grad_norm": 1.1270999908447266, "learning_rate": 4.512993453679826e-06, "loss": 0.0206, "step": 4550 }, { "epoch": 0.1356880365405502, "grad_norm": 0.8380557298660278, "learning_rate": 4.5229121206109905e-06, "loss": 0.0203, "step": 4560 }, { "epoch": 0.13598559802419175, "grad_norm": 0.5051143169403076, "learning_rate": 4.5328307875421545e-06, "loss": 0.0206, "step": 4570 }, { "epoch": 0.13628315950783332, "grad_norm": 0.7948262691497803, "learning_rate": 4.542749454473319e-06, "loss": 0.0189, "step": 4580 }, { "epoch": 0.13658072099147486, "grad_norm": 0.751680850982666, "learning_rate": 4.552668121404484e-06, "loss": 0.0416, "step": 4590 }, { "epoch": 0.1368782824751164, "grad_norm": 0.6078497767448425, "learning_rate": 4.562586788335648e-06, "loss": 0.0187, "step": 4600 }, { "epoch": 0.13717584395875798, "grad_norm": 0.886657178401947, "learning_rate": 4.572505455266813e-06, "loss": 0.0276, "step": 4610 }, { "epoch": 0.13747340544239953, "grad_norm": 0.7378482818603516, "learning_rate": 4.5824241221979774e-06, "loss": 0.0216, "step": 4620 }, { "epoch": 0.1377709669260411, "grad_norm": 1.186688780784607, "learning_rate": 4.592342789129141e-06, "loss": 0.024, "step": 4630 }, { "epoch": 0.13806852840968264, "grad_norm": 0.6691302061080933, "learning_rate": 4.602261456060306e-06, "loss": 0.02, "step": 4640 }, { "epoch": 0.13836608989332422, "grad_norm": 0.6595141291618347, "learning_rate": 4.61218012299147e-06, "loss": 0.0144, "step": 4650 }, { "epoch": 0.13866365137696576, "grad_norm": 0.9201310873031616, "learning_rate": 4.622098789922635e-06, "loss": 0.0208, "step": 4660 }, { "epoch": 0.13896121286060734, "grad_norm": 1.0041778087615967, "learning_rate": 4.632017456853799e-06, "loss": 0.0202, "step": 4670 }, { "epoch": 0.13925877434424888, "grad_norm": 1.0548121929168701, "learning_rate": 4.6419361237849635e-06, "loss": 0.0267, "step": 4680 }, { "epoch": 0.13955633582789043, "grad_norm": 0.9084816575050354, "learning_rate": 4.651854790716128e-06, "loss": 0.0181, "step": 4690 }, { "epoch": 0.139853897311532, "grad_norm": 0.7562587857246399, "learning_rate": 4.661773457647293e-06, "loss": 0.016, "step": 4700 }, { "epoch": 0.14015145879517354, "grad_norm": 0.7055025100708008, "learning_rate": 4.671692124578457e-06, "loss": 0.0285, "step": 4710 }, { "epoch": 0.14044902027881512, "grad_norm": 0.8775312900543213, "learning_rate": 4.681610791509622e-06, "loss": 0.0173, "step": 4720 }, { "epoch": 0.14074658176245666, "grad_norm": 0.6354302167892456, "learning_rate": 4.6915294584407864e-06, "loss": 0.0207, "step": 4730 }, { "epoch": 0.14104414324609824, "grad_norm": 1.3053086996078491, "learning_rate": 4.70144812537195e-06, "loss": 0.024, "step": 4740 }, { "epoch": 0.14134170472973978, "grad_norm": 0.7879724502563477, "learning_rate": 4.711366792303115e-06, "loss": 0.0168, "step": 4750 }, { "epoch": 0.14163926621338133, "grad_norm": 0.8245661854743958, "learning_rate": 4.721285459234279e-06, "loss": 0.02, "step": 4760 }, { "epoch": 0.1419368276970229, "grad_norm": 0.7638753056526184, "learning_rate": 4.731204126165444e-06, "loss": 0.022, "step": 4770 }, { "epoch": 0.14223438918066444, "grad_norm": 0.8078051209449768, "learning_rate": 4.741122793096608e-06, "loss": 0.0157, "step": 4780 }, { "epoch": 0.14253195066430602, "grad_norm": 0.591114342212677, "learning_rate": 4.7510414600277725e-06, "loss": 0.0172, "step": 4790 }, { "epoch": 0.14282951214794756, "grad_norm": 0.848561704158783, "learning_rate": 4.760960126958937e-06, "loss": 0.0232, "step": 4800 }, { "epoch": 0.14312707363158914, "grad_norm": 0.4405738115310669, "learning_rate": 4.770878793890101e-06, "loss": 0.0156, "step": 4810 }, { "epoch": 0.14342463511523068, "grad_norm": 0.8114716410636902, "learning_rate": 4.780797460821266e-06, "loss": 0.0183, "step": 4820 }, { "epoch": 0.14372219659887225, "grad_norm": 0.6860371232032776, "learning_rate": 4.790716127752431e-06, "loss": 0.0179, "step": 4830 }, { "epoch": 0.1440197580825138, "grad_norm": 0.8545287847518921, "learning_rate": 4.8006347946835954e-06, "loss": 0.0191, "step": 4840 }, { "epoch": 0.14431731956615534, "grad_norm": 1.0109283924102783, "learning_rate": 4.810553461614759e-06, "loss": 0.0251, "step": 4850 }, { "epoch": 0.14461488104979692, "grad_norm": 1.1049234867095947, "learning_rate": 4.820472128545924e-06, "loss": 0.0179, "step": 4860 }, { "epoch": 0.14491244253343846, "grad_norm": 0.7811404466629028, "learning_rate": 4.830390795477088e-06, "loss": 0.0215, "step": 4870 }, { "epoch": 0.14521000401708004, "grad_norm": 0.8581144213676453, "learning_rate": 4.840309462408253e-06, "loss": 0.014, "step": 4880 }, { "epoch": 0.14550756550072158, "grad_norm": 0.645479679107666, "learning_rate": 4.850228129339417e-06, "loss": 0.033, "step": 4890 }, { "epoch": 0.14580512698436315, "grad_norm": 1.454235315322876, "learning_rate": 4.8601467962705815e-06, "loss": 0.0162, "step": 4900 }, { "epoch": 0.1461026884680047, "grad_norm": 1.0372810363769531, "learning_rate": 4.870065463201746e-06, "loss": 0.0301, "step": 4910 }, { "epoch": 0.14640024995164627, "grad_norm": 0.9281333088874817, "learning_rate": 4.87998413013291e-06, "loss": 0.0243, "step": 4920 }, { "epoch": 0.14669781143528782, "grad_norm": 0.7368916869163513, "learning_rate": 4.889902797064075e-06, "loss": 0.0179, "step": 4930 }, { "epoch": 0.14699537291892936, "grad_norm": 0.48965415358543396, "learning_rate": 4.89982146399524e-06, "loss": 0.0218, "step": 4940 }, { "epoch": 0.14729293440257094, "grad_norm": 0.58979731798172, "learning_rate": 4.909740130926404e-06, "loss": 0.0199, "step": 4950 }, { "epoch": 0.14759049588621248, "grad_norm": 0.6125749945640564, "learning_rate": 4.919658797857568e-06, "loss": 0.0234, "step": 4960 }, { "epoch": 0.14788805736985405, "grad_norm": 0.7615112066268921, "learning_rate": 4.929577464788733e-06, "loss": 0.0171, "step": 4970 }, { "epoch": 0.1481856188534956, "grad_norm": 0.4208472669124603, "learning_rate": 4.939496131719898e-06, "loss": 0.0137, "step": 4980 }, { "epoch": 0.14848318033713717, "grad_norm": 0.5210906267166138, "learning_rate": 4.949414798651062e-06, "loss": 0.0185, "step": 4990 }, { "epoch": 0.14878074182077872, "grad_norm": 0.9346924424171448, "learning_rate": 4.9593334655822266e-06, "loss": 0.0205, "step": 5000 }, { "epoch": 0.14907830330442026, "grad_norm": 0.5227291584014893, "learning_rate": 4.9692521325133905e-06, "loss": 0.0175, "step": 5010 }, { "epoch": 0.14937586478806184, "grad_norm": 1.1737834215164185, "learning_rate": 4.979170799444555e-06, "loss": 0.0195, "step": 5020 }, { "epoch": 0.14967342627170338, "grad_norm": 0.7716584801673889, "learning_rate": 4.989089466375719e-06, "loss": 0.0211, "step": 5030 }, { "epoch": 0.14997098775534495, "grad_norm": 1.1868743896484375, "learning_rate": 4.999008133306884e-06, "loss": 0.0245, "step": 5040 }, { "epoch": 0.1502685492389865, "grad_norm": 0.8582828044891357, "learning_rate": 5.008926800238049e-06, "loss": 0.0173, "step": 5050 }, { "epoch": 0.15056611072262807, "grad_norm": 0.6624183058738708, "learning_rate": 5.0188454671692135e-06, "loss": 0.0197, "step": 5060 }, { "epoch": 0.15086367220626962, "grad_norm": 0.8911364078521729, "learning_rate": 5.0287641341003765e-06, "loss": 0.018, "step": 5070 }, { "epoch": 0.1511612336899112, "grad_norm": 0.9480071663856506, "learning_rate": 5.038682801031541e-06, "loss": 0.0172, "step": 5080 }, { "epoch": 0.15145879517355273, "grad_norm": 0.6935778856277466, "learning_rate": 5.048601467962706e-06, "loss": 0.0219, "step": 5090 }, { "epoch": 0.15175635665719428, "grad_norm": 0.7279891967773438, "learning_rate": 5.058520134893871e-06, "loss": 0.0166, "step": 5100 }, { "epoch": 0.15205391814083585, "grad_norm": 0.9101371169090271, "learning_rate": 5.068438801825036e-06, "loss": 0.0258, "step": 5110 }, { "epoch": 0.1523514796244774, "grad_norm": 0.9158045053482056, "learning_rate": 5.0783574687561995e-06, "loss": 0.0266, "step": 5120 }, { "epoch": 0.15264904110811897, "grad_norm": 1.1610599756240845, "learning_rate": 5.088276135687364e-06, "loss": 0.019, "step": 5130 }, { "epoch": 0.15294660259176052, "grad_norm": 0.6802767515182495, "learning_rate": 5.098194802618528e-06, "loss": 0.0267, "step": 5140 }, { "epoch": 0.1532441640754021, "grad_norm": 0.6131333112716675, "learning_rate": 5.108113469549693e-06, "loss": 0.0205, "step": 5150 }, { "epoch": 0.15354172555904363, "grad_norm": 0.864658534526825, "learning_rate": 5.118032136480857e-06, "loss": 0.0177, "step": 5160 }, { "epoch": 0.1538392870426852, "grad_norm": 1.0043835639953613, "learning_rate": 5.127950803412022e-06, "loss": 0.0165, "step": 5170 }, { "epoch": 0.15413684852632675, "grad_norm": 0.578406810760498, "learning_rate": 5.137869470343186e-06, "loss": 0.0181, "step": 5180 }, { "epoch": 0.1544344100099683, "grad_norm": 0.5262647271156311, "learning_rate": 5.147788137274351e-06, "loss": 0.0153, "step": 5190 }, { "epoch": 0.15473197149360987, "grad_norm": 0.6592648029327393, "learning_rate": 5.157706804205516e-06, "loss": 0.0137, "step": 5200 }, { "epoch": 0.15502953297725142, "grad_norm": 0.6886563897132874, "learning_rate": 5.167625471136679e-06, "loss": 0.0233, "step": 5210 }, { "epoch": 0.155327094460893, "grad_norm": 1.165763020515442, "learning_rate": 5.177544138067844e-06, "loss": 0.0238, "step": 5220 }, { "epoch": 0.15562465594453453, "grad_norm": 0.9839634299278259, "learning_rate": 5.1874628049990085e-06, "loss": 0.0227, "step": 5230 }, { "epoch": 0.1559222174281761, "grad_norm": 0.7777414321899414, "learning_rate": 5.197381471930173e-06, "loss": 0.0231, "step": 5240 }, { "epoch": 0.15621977891181765, "grad_norm": 0.9021438360214233, "learning_rate": 5.207300138861337e-06, "loss": 0.0225, "step": 5250 }, { "epoch": 0.1565173403954592, "grad_norm": 0.7397061586380005, "learning_rate": 5.217218805792502e-06, "loss": 0.0221, "step": 5260 }, { "epoch": 0.15681490187910077, "grad_norm": 0.7931336760520935, "learning_rate": 5.227137472723667e-06, "loss": 0.0148, "step": 5270 }, { "epoch": 0.15711246336274232, "grad_norm": 0.6352207064628601, "learning_rate": 5.237056139654831e-06, "loss": 0.02, "step": 5280 }, { "epoch": 0.1574100248463839, "grad_norm": 0.8885530829429626, "learning_rate": 5.2469748065859945e-06, "loss": 0.015, "step": 5290 }, { "epoch": 0.15770758633002543, "grad_norm": 1.1638803482055664, "learning_rate": 5.256893473517159e-06, "loss": 0.0163, "step": 5300 }, { "epoch": 0.158005147813667, "grad_norm": 0.7104415893554688, "learning_rate": 5.266812140448324e-06, "loss": 0.0185, "step": 5310 }, { "epoch": 0.15830270929730855, "grad_norm": 0.79265958070755, "learning_rate": 5.276730807379489e-06, "loss": 0.0164, "step": 5320 }, { "epoch": 0.15860027078095013, "grad_norm": 0.7691792249679565, "learning_rate": 5.286649474310654e-06, "loss": 0.0182, "step": 5330 }, { "epoch": 0.15889783226459167, "grad_norm": 0.8810810446739197, "learning_rate": 5.296568141241818e-06, "loss": 0.0181, "step": 5340 }, { "epoch": 0.15919539374823322, "grad_norm": 0.5554110407829285, "learning_rate": 5.3064868081729814e-06, "loss": 0.0206, "step": 5350 }, { "epoch": 0.1594929552318748, "grad_norm": 0.6571983098983765, "learning_rate": 5.316405475104146e-06, "loss": 0.0177, "step": 5360 }, { "epoch": 0.15979051671551633, "grad_norm": 1.2546825408935547, "learning_rate": 5.326324142035311e-06, "loss": 0.0134, "step": 5370 }, { "epoch": 0.1600880781991579, "grad_norm": 0.9029501080513, "learning_rate": 5.336242808966475e-06, "loss": 0.0227, "step": 5380 }, { "epoch": 0.16038563968279945, "grad_norm": 1.2888411283493042, "learning_rate": 5.34616147589764e-06, "loss": 0.0238, "step": 5390 }, { "epoch": 0.16068320116644103, "grad_norm": 0.498780220746994, "learning_rate": 5.356080142828804e-06, "loss": 0.0172, "step": 5400 }, { "epoch": 0.16098076265008257, "grad_norm": 0.632793664932251, "learning_rate": 5.365998809759969e-06, "loss": 0.0181, "step": 5410 }, { "epoch": 0.16127832413372414, "grad_norm": 0.722195029258728, "learning_rate": 5.375917476691132e-06, "loss": 0.0182, "step": 5420 }, { "epoch": 0.1615758856173657, "grad_norm": 1.6111044883728027, "learning_rate": 5.385836143622297e-06, "loss": 0.0197, "step": 5430 }, { "epoch": 0.16187344710100723, "grad_norm": 0.6911579370498657, "learning_rate": 5.395754810553462e-06, "loss": 0.0178, "step": 5440 }, { "epoch": 0.1621710085846488, "grad_norm": 0.43977272510528564, "learning_rate": 5.4056734774846265e-06, "loss": 0.0163, "step": 5450 }, { "epoch": 0.16246857006829035, "grad_norm": 0.7671920657157898, "learning_rate": 5.415592144415791e-06, "loss": 0.0195, "step": 5460 }, { "epoch": 0.16276613155193193, "grad_norm": 1.569055438041687, "learning_rate": 5.425510811346956e-06, "loss": 0.02, "step": 5470 }, { "epoch": 0.16306369303557347, "grad_norm": 1.0007177591323853, "learning_rate": 5.43542947827812e-06, "loss": 0.0201, "step": 5480 }, { "epoch": 0.16336125451921504, "grad_norm": 0.9277750253677368, "learning_rate": 5.445348145209284e-06, "loss": 0.0152, "step": 5490 }, { "epoch": 0.1636588160028566, "grad_norm": 0.526936411857605, "learning_rate": 5.455266812140449e-06, "loss": 0.0187, "step": 5500 }, { "epoch": 0.16395637748649813, "grad_norm": 1.4058870077133179, "learning_rate": 5.465185479071613e-06, "loss": 0.0199, "step": 5510 }, { "epoch": 0.1642539389701397, "grad_norm": 0.3041781485080719, "learning_rate": 5.475104146002777e-06, "loss": 0.0191, "step": 5520 }, { "epoch": 0.16455150045378125, "grad_norm": 0.89137202501297, "learning_rate": 5.485022812933942e-06, "loss": 0.022, "step": 5530 }, { "epoch": 0.16484906193742282, "grad_norm": 0.8437548279762268, "learning_rate": 5.494941479865107e-06, "loss": 0.0183, "step": 5540 }, { "epoch": 0.16514662342106437, "grad_norm": 0.7703707814216614, "learning_rate": 5.504860146796272e-06, "loss": 0.017, "step": 5550 }, { "epoch": 0.16544418490470594, "grad_norm": 0.6376718282699585, "learning_rate": 5.514778813727435e-06, "loss": 0.0194, "step": 5560 }, { "epoch": 0.1657417463883475, "grad_norm": 0.8046645522117615, "learning_rate": 5.5246974806585994e-06, "loss": 0.0142, "step": 5570 }, { "epoch": 0.16603930787198906, "grad_norm": 0.8698367476463318, "learning_rate": 5.534616147589764e-06, "loss": 0.0195, "step": 5580 }, { "epoch": 0.1663368693556306, "grad_norm": 0.9055472016334534, "learning_rate": 5.544534814520929e-06, "loss": 0.018, "step": 5590 }, { "epoch": 0.16663443083927215, "grad_norm": 0.6962809562683105, "learning_rate": 5.554453481452094e-06, "loss": 0.0164, "step": 5600 }, { "epoch": 0.16693199232291372, "grad_norm": 0.6989609003067017, "learning_rate": 5.564372148383258e-06, "loss": 0.0217, "step": 5610 }, { "epoch": 0.16722955380655527, "grad_norm": 0.21388785541057587, "learning_rate": 5.574290815314422e-06, "loss": 0.0168, "step": 5620 }, { "epoch": 0.16752711529019684, "grad_norm": 0.5826854109764099, "learning_rate": 5.584209482245586e-06, "loss": 0.0178, "step": 5630 }, { "epoch": 0.1678246767738384, "grad_norm": 0.7379862070083618, "learning_rate": 5.594128149176751e-06, "loss": 0.0215, "step": 5640 }, { "epoch": 0.16812223825747996, "grad_norm": 0.4857451021671295, "learning_rate": 5.604046816107915e-06, "loss": 0.0203, "step": 5650 }, { "epoch": 0.1684197997411215, "grad_norm": 0.5936374068260193, "learning_rate": 5.61396548303908e-06, "loss": 0.0242, "step": 5660 }, { "epoch": 0.16871736122476308, "grad_norm": 0.8272753953933716, "learning_rate": 5.6238841499702445e-06, "loss": 0.0181, "step": 5670 }, { "epoch": 0.16901492270840462, "grad_norm": 0.8439192175865173, "learning_rate": 5.633802816901409e-06, "loss": 0.0222, "step": 5680 }, { "epoch": 0.16931248419204617, "grad_norm": 0.7865721583366394, "learning_rate": 5.643721483832574e-06, "loss": 0.0143, "step": 5690 }, { "epoch": 0.16961004567568774, "grad_norm": 0.8132695555686951, "learning_rate": 5.653640150763737e-06, "loss": 0.0203, "step": 5700 }, { "epoch": 0.1699076071593293, "grad_norm": 0.6086786389350891, "learning_rate": 5.663558817694902e-06, "loss": 0.0185, "step": 5710 }, { "epoch": 0.17020516864297086, "grad_norm": 0.604552149772644, "learning_rate": 5.673477484626067e-06, "loss": 0.0178, "step": 5720 }, { "epoch": 0.1705027301266124, "grad_norm": 0.9776806235313416, "learning_rate": 5.683396151557231e-06, "loss": 0.019, "step": 5730 }, { "epoch": 0.17080029161025398, "grad_norm": 0.4682866036891937, "learning_rate": 5.693314818488395e-06, "loss": 0.0134, "step": 5740 }, { "epoch": 0.17109785309389552, "grad_norm": 1.28706955909729, "learning_rate": 5.70323348541956e-06, "loss": 0.0196, "step": 5750 }, { "epoch": 0.1713954145775371, "grad_norm": 0.5669745206832886, "learning_rate": 5.713152152350725e-06, "loss": 0.0141, "step": 5760 }, { "epoch": 0.17169297606117864, "grad_norm": 0.577875554561615, "learning_rate": 5.723070819281889e-06, "loss": 0.0207, "step": 5770 }, { "epoch": 0.1719905375448202, "grad_norm": 0.7826892137527466, "learning_rate": 5.732989486213053e-06, "loss": 0.0209, "step": 5780 }, { "epoch": 0.17228809902846176, "grad_norm": 0.5729511976242065, "learning_rate": 5.7429081531442175e-06, "loss": 0.0172, "step": 5790 }, { "epoch": 0.1725856605121033, "grad_norm": 0.7617861032485962, "learning_rate": 5.752826820075382e-06, "loss": 0.0175, "step": 5800 }, { "epoch": 0.17288322199574488, "grad_norm": 0.909443199634552, "learning_rate": 5.762745487006547e-06, "loss": 0.0172, "step": 5810 }, { "epoch": 0.17318078347938642, "grad_norm": 0.6008189916610718, "learning_rate": 5.772664153937712e-06, "loss": 0.0194, "step": 5820 }, { "epoch": 0.173478344963028, "grad_norm": 0.441224604845047, "learning_rate": 5.7825828208688765e-06, "loss": 0.0145, "step": 5830 }, { "epoch": 0.17377590644666954, "grad_norm": 0.5400571823120117, "learning_rate": 5.7925014878000396e-06, "loss": 0.0126, "step": 5840 }, { "epoch": 0.1740734679303111, "grad_norm": 0.668343186378479, "learning_rate": 5.802420154731204e-06, "loss": 0.0155, "step": 5850 }, { "epoch": 0.17437102941395266, "grad_norm": 0.5859923362731934, "learning_rate": 5.812338821662369e-06, "loss": 0.0204, "step": 5860 }, { "epoch": 0.1746685908975942, "grad_norm": 0.6208030581474304, "learning_rate": 5.822257488593534e-06, "loss": 0.0179, "step": 5870 }, { "epoch": 0.17496615238123578, "grad_norm": 1.010622262954712, "learning_rate": 5.832176155524698e-06, "loss": 0.0197, "step": 5880 }, { "epoch": 0.17526371386487732, "grad_norm": 0.723790168762207, "learning_rate": 5.8420948224558625e-06, "loss": 0.017, "step": 5890 }, { "epoch": 0.1755612753485189, "grad_norm": 0.786346971988678, "learning_rate": 5.852013489387027e-06, "loss": 0.0167, "step": 5900 }, { "epoch": 0.17585883683216044, "grad_norm": 1.4446730613708496, "learning_rate": 5.86193215631819e-06, "loss": 0.0175, "step": 5910 }, { "epoch": 0.17615639831580202, "grad_norm": 0.4917304515838623, "learning_rate": 5.871850823249355e-06, "loss": 0.0146, "step": 5920 }, { "epoch": 0.17645395979944356, "grad_norm": 0.6675592660903931, "learning_rate": 5.88176949018052e-06, "loss": 0.0174, "step": 5930 }, { "epoch": 0.1767515212830851, "grad_norm": 0.48382967710494995, "learning_rate": 5.891688157111685e-06, "loss": 0.0183, "step": 5940 }, { "epoch": 0.17704908276672668, "grad_norm": 0.6168689727783203, "learning_rate": 5.9016068240428494e-06, "loss": 0.0182, "step": 5950 }, { "epoch": 0.17734664425036822, "grad_norm": 0.7473022937774658, "learning_rate": 5.911525490974014e-06, "loss": 0.0172, "step": 5960 }, { "epoch": 0.1776442057340098, "grad_norm": 0.8745890855789185, "learning_rate": 5.921444157905178e-06, "loss": 0.018, "step": 5970 }, { "epoch": 0.17794176721765134, "grad_norm": 1.5493327379226685, "learning_rate": 5.931362824836342e-06, "loss": 0.0212, "step": 5980 }, { "epoch": 0.17823932870129292, "grad_norm": 0.6132165193557739, "learning_rate": 5.941281491767507e-06, "loss": 0.0185, "step": 5990 }, { "epoch": 0.17853689018493446, "grad_norm": 1.4045782089233398, "learning_rate": 5.9512001586986716e-06, "loss": 0.0224, "step": 6000 }, { "epoch": 0.17883445166857603, "grad_norm": 0.6658136248588562, "learning_rate": 5.9611188256298355e-06, "loss": 0.0153, "step": 6010 }, { "epoch": 0.17913201315221758, "grad_norm": 1.114580750465393, "learning_rate": 5.971037492561e-06, "loss": 0.0181, "step": 6020 }, { "epoch": 0.17942957463585912, "grad_norm": 0.9523652791976929, "learning_rate": 5.980956159492165e-06, "loss": 0.0167, "step": 6030 }, { "epoch": 0.1797271361195007, "grad_norm": 0.8120092153549194, "learning_rate": 5.99087482642333e-06, "loss": 0.016, "step": 6040 }, { "epoch": 0.18002469760314224, "grad_norm": 0.8188436031341553, "learning_rate": 6.000793493354493e-06, "loss": 0.0203, "step": 6050 }, { "epoch": 0.18032225908678381, "grad_norm": 0.9809921979904175, "learning_rate": 6.010712160285658e-06, "loss": 0.0134, "step": 6060 }, { "epoch": 0.18061982057042536, "grad_norm": 0.6206534504890442, "learning_rate": 6.020630827216822e-06, "loss": 0.0225, "step": 6070 }, { "epoch": 0.18091738205406693, "grad_norm": 0.8130549192428589, "learning_rate": 6.030549494147987e-06, "loss": 0.0256, "step": 6080 }, { "epoch": 0.18121494353770848, "grad_norm": 0.47456061840057373, "learning_rate": 6.040468161079152e-06, "loss": 0.0159, "step": 6090 }, { "epoch": 0.18151250502135002, "grad_norm": 0.9311735033988953, "learning_rate": 6.050386828010316e-06, "loss": 0.019, "step": 6100 }, { "epoch": 0.1818100665049916, "grad_norm": 0.5348070859909058, "learning_rate": 6.0603054949414806e-06, "loss": 0.0156, "step": 6110 }, { "epoch": 0.18210762798863314, "grad_norm": 0.9379359483718872, "learning_rate": 6.0702241618726445e-06, "loss": 0.0142, "step": 6120 }, { "epoch": 0.18240518947227471, "grad_norm": 0.36227133870124817, "learning_rate": 6.080142828803809e-06, "loss": 0.0133, "step": 6130 }, { "epoch": 0.18270275095591626, "grad_norm": 0.8447180390357971, "learning_rate": 6.090061495734973e-06, "loss": 0.0188, "step": 6140 }, { "epoch": 0.18300031243955783, "grad_norm": 0.8023980259895325, "learning_rate": 6.099980162666138e-06, "loss": 0.0215, "step": 6150 }, { "epoch": 0.18329787392319938, "grad_norm": 0.5049998760223389, "learning_rate": 6.109898829597303e-06, "loss": 0.0124, "step": 6160 }, { "epoch": 0.18359543540684095, "grad_norm": 0.5275910496711731, "learning_rate": 6.1198174965284674e-06, "loss": 0.0254, "step": 6170 }, { "epoch": 0.1838929968904825, "grad_norm": 0.7937012314796448, "learning_rate": 6.129736163459632e-06, "loss": 0.0158, "step": 6180 }, { "epoch": 0.18419055837412404, "grad_norm": 0.5552460551261902, "learning_rate": 6.139654830390795e-06, "loss": 0.0223, "step": 6190 }, { "epoch": 0.18448811985776561, "grad_norm": 0.48848676681518555, "learning_rate": 6.14957349732196e-06, "loss": 0.0196, "step": 6200 }, { "epoch": 0.18478568134140716, "grad_norm": 0.33887386322021484, "learning_rate": 6.159492164253125e-06, "loss": 0.0138, "step": 6210 }, { "epoch": 0.18508324282504873, "grad_norm": 0.7016013264656067, "learning_rate": 6.1694108311842896e-06, "loss": 0.0146, "step": 6220 }, { "epoch": 0.18538080430869028, "grad_norm": 0.7078377604484558, "learning_rate": 6.179329498115454e-06, "loss": 0.0153, "step": 6230 }, { "epoch": 0.18567836579233185, "grad_norm": 0.5683500170707703, "learning_rate": 6.189248165046618e-06, "loss": 0.015, "step": 6240 }, { "epoch": 0.1859759272759734, "grad_norm": 0.7351623773574829, "learning_rate": 6.199166831977783e-06, "loss": 0.0193, "step": 6250 }, { "epoch": 0.18627348875961497, "grad_norm": 1.2081639766693115, "learning_rate": 6.209085498908947e-06, "loss": 0.0183, "step": 6260 }, { "epoch": 0.18657105024325651, "grad_norm": 0.5316331386566162, "learning_rate": 6.219004165840111e-06, "loss": 0.0188, "step": 6270 }, { "epoch": 0.18686861172689806, "grad_norm": 0.7363402843475342, "learning_rate": 6.228922832771276e-06, "loss": 0.019, "step": 6280 }, { "epoch": 0.18716617321053963, "grad_norm": 0.5929723381996155, "learning_rate": 6.23884149970244e-06, "loss": 0.0226, "step": 6290 }, { "epoch": 0.18746373469418118, "grad_norm": 0.8004336357116699, "learning_rate": 6.248760166633605e-06, "loss": 0.0169, "step": 6300 }, { "epoch": 0.18776129617782275, "grad_norm": 0.6105899214744568, "learning_rate": 6.25867883356477e-06, "loss": 0.0159, "step": 6310 }, { "epoch": 0.1880588576614643, "grad_norm": 0.7639376521110535, "learning_rate": 6.268597500495935e-06, "loss": 0.0183, "step": 6320 }, { "epoch": 0.18835641914510587, "grad_norm": 0.3886498808860779, "learning_rate": 6.278516167427098e-06, "loss": 0.0189, "step": 6330 }, { "epoch": 0.18865398062874741, "grad_norm": 1.2649954557418823, "learning_rate": 6.2884348343582625e-06, "loss": 0.0157, "step": 6340 }, { "epoch": 0.18895154211238896, "grad_norm": 0.8250831365585327, "learning_rate": 6.298353501289427e-06, "loss": 0.0172, "step": 6350 }, { "epoch": 0.18924910359603053, "grad_norm": 0.731031060218811, "learning_rate": 6.308272168220592e-06, "loss": 0.0149, "step": 6360 }, { "epoch": 0.18954666507967208, "grad_norm": 0.5846880078315735, "learning_rate": 6.318190835151756e-06, "loss": 0.0168, "step": 6370 }, { "epoch": 0.18984422656331365, "grad_norm": 0.5164114832878113, "learning_rate": 6.328109502082921e-06, "loss": 0.0203, "step": 6380 }, { "epoch": 0.1901417880469552, "grad_norm": 0.8243165016174316, "learning_rate": 6.3380281690140855e-06, "loss": 0.0201, "step": 6390 }, { "epoch": 0.19043934953059677, "grad_norm": 0.4044800400733948, "learning_rate": 6.347946835945249e-06, "loss": 0.017, "step": 6400 }, { "epoch": 0.19073691101423831, "grad_norm": 0.25009608268737793, "learning_rate": 6.357865502876413e-06, "loss": 0.0148, "step": 6410 }, { "epoch": 0.1910344724978799, "grad_norm": 0.8446623682975769, "learning_rate": 6.367784169807578e-06, "loss": 0.0138, "step": 6420 }, { "epoch": 0.19133203398152143, "grad_norm": 0.5882318019866943, "learning_rate": 6.377702836738743e-06, "loss": 0.0125, "step": 6430 }, { "epoch": 0.19162959546516298, "grad_norm": 0.4519135653972626, "learning_rate": 6.387621503669908e-06, "loss": 0.0185, "step": 6440 }, { "epoch": 0.19192715694880455, "grad_norm": 0.9911952018737793, "learning_rate": 6.397540170601072e-06, "loss": 0.0179, "step": 6450 }, { "epoch": 0.1922247184324461, "grad_norm": 0.6403932571411133, "learning_rate": 6.407458837532236e-06, "loss": 0.0176, "step": 6460 }, { "epoch": 0.19252227991608767, "grad_norm": 1.0003505945205688, "learning_rate": 6.4173775044634e-06, "loss": 0.0151, "step": 6470 }, { "epoch": 0.1928198413997292, "grad_norm": 0.7956016659736633, "learning_rate": 6.427296171394565e-06, "loss": 0.0162, "step": 6480 }, { "epoch": 0.1931174028833708, "grad_norm": 0.7249001860618591, "learning_rate": 6.43721483832573e-06, "loss": 0.0141, "step": 6490 }, { "epoch": 0.19341496436701233, "grad_norm": 0.7454989552497864, "learning_rate": 6.447133505256894e-06, "loss": 0.0156, "step": 6500 }, { "epoch": 0.1937125258506539, "grad_norm": 0.47531336545944214, "learning_rate": 6.457052172188058e-06, "loss": 0.0144, "step": 6510 }, { "epoch": 0.19401008733429545, "grad_norm": 0.6629605293273926, "learning_rate": 6.466970839119223e-06, "loss": 0.0158, "step": 6520 }, { "epoch": 0.194307648817937, "grad_norm": 0.8079919219017029, "learning_rate": 6.476889506050388e-06, "loss": 0.0193, "step": 6530 }, { "epoch": 0.19460521030157857, "grad_norm": 0.7602612972259521, "learning_rate": 6.486808172981551e-06, "loss": 0.0203, "step": 6540 }, { "epoch": 0.1949027717852201, "grad_norm": 0.5464874505996704, "learning_rate": 6.496726839912716e-06, "loss": 0.0155, "step": 6550 }, { "epoch": 0.1952003332688617, "grad_norm": 0.6592918038368225, "learning_rate": 6.5066455068438805e-06, "loss": 0.0134, "step": 6560 }, { "epoch": 0.19549789475250323, "grad_norm": 0.835834264755249, "learning_rate": 6.516564173775045e-06, "loss": 0.0125, "step": 6570 }, { "epoch": 0.1957954562361448, "grad_norm": 0.3692575693130493, "learning_rate": 6.52648284070621e-06, "loss": 0.0251, "step": 6580 }, { "epoch": 0.19609301771978635, "grad_norm": 0.5027411580085754, "learning_rate": 6.536401507637374e-06, "loss": 0.0165, "step": 6590 }, { "epoch": 0.1963905792034279, "grad_norm": 0.8743631839752197, "learning_rate": 6.546320174568539e-06, "loss": 0.0146, "step": 6600 }, { "epoch": 0.19668814068706947, "grad_norm": 0.4648347795009613, "learning_rate": 6.556238841499703e-06, "loss": 0.02, "step": 6610 }, { "epoch": 0.196985702170711, "grad_norm": 0.6392470002174377, "learning_rate": 6.566157508430867e-06, "loss": 0.0156, "step": 6620 }, { "epoch": 0.1972832636543526, "grad_norm": 0.4944845139980316, "learning_rate": 6.576076175362031e-06, "loss": 0.0109, "step": 6630 }, { "epoch": 0.19758082513799413, "grad_norm": 0.5273575186729431, "learning_rate": 6.585994842293196e-06, "loss": 0.0188, "step": 6640 }, { "epoch": 0.1978783866216357, "grad_norm": 0.7545974254608154, "learning_rate": 6.595913509224361e-06, "loss": 0.0127, "step": 6650 }, { "epoch": 0.19817594810527725, "grad_norm": 0.7707309722900391, "learning_rate": 6.605832176155526e-06, "loss": 0.0255, "step": 6660 }, { "epoch": 0.19847350958891882, "grad_norm": 1.006441593170166, "learning_rate": 6.61575084308669e-06, "loss": 0.0179, "step": 6670 }, { "epoch": 0.19877107107256037, "grad_norm": 0.7054467797279358, "learning_rate": 6.625669510017853e-06, "loss": 0.0264, "step": 6680 }, { "epoch": 0.1990686325562019, "grad_norm": 0.7726307511329651, "learning_rate": 6.635588176949018e-06, "loss": 0.0167, "step": 6690 }, { "epoch": 0.1993661940398435, "grad_norm": 0.6841588020324707, "learning_rate": 6.645506843880183e-06, "loss": 0.0179, "step": 6700 }, { "epoch": 0.19966375552348503, "grad_norm": 0.8845564126968384, "learning_rate": 6.655425510811348e-06, "loss": 0.0189, "step": 6710 }, { "epoch": 0.1999613170071266, "grad_norm": 0.42456939816474915, "learning_rate": 6.6653441777425125e-06, "loss": 0.0188, "step": 6720 }, { "epoch": 0.20025887849076815, "grad_norm": 0.8562147617340088, "learning_rate": 6.675262844673676e-06, "loss": 0.0164, "step": 6730 }, { "epoch": 0.20055643997440972, "grad_norm": 0.6481916904449463, "learning_rate": 6.685181511604841e-06, "loss": 0.0143, "step": 6740 }, { "epoch": 0.20085400145805127, "grad_norm": 0.81474369764328, "learning_rate": 6.695100178536005e-06, "loss": 0.0174, "step": 6750 }, { "epoch": 0.20115156294169284, "grad_norm": 0.2805514335632324, "learning_rate": 6.70501884546717e-06, "loss": 0.0157, "step": 6760 }, { "epoch": 0.20144912442533439, "grad_norm": 0.5997917652130127, "learning_rate": 6.714937512398334e-06, "loss": 0.0175, "step": 6770 }, { "epoch": 0.20174668590897593, "grad_norm": 0.6203995943069458, "learning_rate": 6.7248561793294985e-06, "loss": 0.0145, "step": 6780 }, { "epoch": 0.2020442473926175, "grad_norm": 0.6727122664451599, "learning_rate": 6.734774846260663e-06, "loss": 0.016, "step": 6790 }, { "epoch": 0.20234180887625905, "grad_norm": 0.5797989368438721, "learning_rate": 6.744693513191828e-06, "loss": 0.0236, "step": 6800 }, { "epoch": 0.20263937035990062, "grad_norm": 0.4259112477302551, "learning_rate": 6.754612180122993e-06, "loss": 0.0199, "step": 6810 }, { "epoch": 0.20293693184354217, "grad_norm": 0.38479194045066833, "learning_rate": 6.764530847054156e-06, "loss": 0.0177, "step": 6820 }, { "epoch": 0.20323449332718374, "grad_norm": 0.917850136756897, "learning_rate": 6.774449513985321e-06, "loss": 0.0167, "step": 6830 }, { "epoch": 0.20353205481082529, "grad_norm": 0.9241329431533813, "learning_rate": 6.784368180916485e-06, "loss": 0.0241, "step": 6840 }, { "epoch": 0.20382961629446683, "grad_norm": 0.5306258797645569, "learning_rate": 6.79428684784765e-06, "loss": 0.0168, "step": 6850 }, { "epoch": 0.2041271777781084, "grad_norm": 0.5961940884590149, "learning_rate": 6.804205514778814e-06, "loss": 0.0183, "step": 6860 }, { "epoch": 0.20442473926174995, "grad_norm": 0.4303014278411865, "learning_rate": 6.814124181709979e-06, "loss": 0.0143, "step": 6870 }, { "epoch": 0.20472230074539152, "grad_norm": 0.5473105311393738, "learning_rate": 6.824042848641144e-06, "loss": 0.015, "step": 6880 }, { "epoch": 0.20501986222903307, "grad_norm": 0.4311329424381256, "learning_rate": 6.8339615155723075e-06, "loss": 0.014, "step": 6890 }, { "epoch": 0.20531742371267464, "grad_norm": 0.8632122278213501, "learning_rate": 6.8438801825034714e-06, "loss": 0.015, "step": 6900 }, { "epoch": 0.20561498519631619, "grad_norm": 0.6494260430335999, "learning_rate": 6.853798849434636e-06, "loss": 0.018, "step": 6910 }, { "epoch": 0.20591254667995776, "grad_norm": 0.5689601898193359, "learning_rate": 6.863717516365801e-06, "loss": 0.0132, "step": 6920 }, { "epoch": 0.2062101081635993, "grad_norm": 0.6026427745819092, "learning_rate": 6.873636183296966e-06, "loss": 0.0213, "step": 6930 }, { "epoch": 0.20650766964724085, "grad_norm": 0.6805518865585327, "learning_rate": 6.8835548502281305e-06, "loss": 0.0183, "step": 6940 }, { "epoch": 0.20680523113088242, "grad_norm": 0.8876956105232239, "learning_rate": 6.893473517159294e-06, "loss": 0.0174, "step": 6950 }, { "epoch": 0.20710279261452397, "grad_norm": 0.8123579025268555, "learning_rate": 6.903392184090458e-06, "loss": 0.0184, "step": 6960 }, { "epoch": 0.20740035409816554, "grad_norm": 1.3177679777145386, "learning_rate": 6.913310851021623e-06, "loss": 0.0219, "step": 6970 }, { "epoch": 0.20769791558180709, "grad_norm": 0.5001023411750793, "learning_rate": 6.923229517952788e-06, "loss": 0.0166, "step": 6980 }, { "epoch": 0.20799547706544866, "grad_norm": 0.5131513476371765, "learning_rate": 6.933148184883952e-06, "loss": 0.0222, "step": 6990 }, { "epoch": 0.2082930385490902, "grad_norm": 0.6607728600502014, "learning_rate": 6.9430668518151165e-06, "loss": 0.0153, "step": 7000 }, { "epoch": 0.20859060003273178, "grad_norm": 0.851743221282959, "learning_rate": 6.952985518746281e-06, "loss": 0.0183, "step": 7010 }, { "epoch": 0.20888816151637332, "grad_norm": 0.5525038838386536, "learning_rate": 6.962904185677446e-06, "loss": 0.0192, "step": 7020 }, { "epoch": 0.20918572300001487, "grad_norm": 0.33146947622299194, "learning_rate": 6.972822852608609e-06, "loss": 0.0114, "step": 7030 }, { "epoch": 0.20948328448365644, "grad_norm": 0.801416277885437, "learning_rate": 6.982741519539774e-06, "loss": 0.0138, "step": 7040 }, { "epoch": 0.20978084596729799, "grad_norm": 0.7644625902175903, "learning_rate": 6.992660186470939e-06, "loss": 0.0161, "step": 7050 }, { "epoch": 0.21007840745093956, "grad_norm": 0.5371155142784119, "learning_rate": 7.002578853402103e-06, "loss": 0.0127, "step": 7060 }, { "epoch": 0.2103759689345811, "grad_norm": 0.6309472322463989, "learning_rate": 7.012497520333268e-06, "loss": 0.0149, "step": 7070 }, { "epoch": 0.21067353041822268, "grad_norm": 1.1386758089065552, "learning_rate": 7.022416187264433e-06, "loss": 0.0129, "step": 7080 }, { "epoch": 0.21097109190186422, "grad_norm": 0.7978598475456238, "learning_rate": 7.032334854195597e-06, "loss": 0.019, "step": 7090 }, { "epoch": 0.21126865338550577, "grad_norm": 0.3876984715461731, "learning_rate": 7.042253521126761e-06, "loss": 0.0165, "step": 7100 }, { "epoch": 0.21156621486914734, "grad_norm": 0.4466468095779419, "learning_rate": 7.0521721880579255e-06, "loss": 0.0146, "step": 7110 }, { "epoch": 0.21186377635278889, "grad_norm": 0.5730988383293152, "learning_rate": 7.0620908549890894e-06, "loss": 0.0129, "step": 7120 }, { "epoch": 0.21216133783643046, "grad_norm": 0.7743613719940186, "learning_rate": 7.072009521920254e-06, "loss": 0.0177, "step": 7130 }, { "epoch": 0.212458899320072, "grad_norm": 0.6237893104553223, "learning_rate": 7.081928188851419e-06, "loss": 0.0132, "step": 7140 }, { "epoch": 0.21275646080371358, "grad_norm": 0.7604868412017822, "learning_rate": 7.091846855782584e-06, "loss": 0.0162, "step": 7150 }, { "epoch": 0.21305402228735512, "grad_norm": 0.43441423773765564, "learning_rate": 7.1017655227137485e-06, "loss": 0.0153, "step": 7160 }, { "epoch": 0.2133515837709967, "grad_norm": 0.7605547904968262, "learning_rate": 7.1116841896449116e-06, "loss": 0.0234, "step": 7170 }, { "epoch": 0.21364914525463824, "grad_norm": 0.8699829578399658, "learning_rate": 7.121602856576076e-06, "loss": 0.0198, "step": 7180 }, { "epoch": 0.21394670673827978, "grad_norm": 0.6903035044670105, "learning_rate": 7.131521523507241e-06, "loss": 0.0204, "step": 7190 }, { "epoch": 0.21424426822192136, "grad_norm": 0.7946373224258423, "learning_rate": 7.141440190438406e-06, "loss": 0.0125, "step": 7200 }, { "epoch": 0.2145418297055629, "grad_norm": 0.4668390452861786, "learning_rate": 7.151358857369571e-06, "loss": 0.013, "step": 7210 }, { "epoch": 0.21483939118920448, "grad_norm": 0.6780369877815247, "learning_rate": 7.1612775243007345e-06, "loss": 0.0125, "step": 7220 }, { "epoch": 0.21513695267284602, "grad_norm": 0.6298226118087769, "learning_rate": 7.171196191231899e-06, "loss": 0.015, "step": 7230 }, { "epoch": 0.2154345141564876, "grad_norm": 0.742961585521698, "learning_rate": 7.181114858163063e-06, "loss": 0.0178, "step": 7240 }, { "epoch": 0.21573207564012914, "grad_norm": 0.6639779806137085, "learning_rate": 7.191033525094228e-06, "loss": 0.0186, "step": 7250 }, { "epoch": 0.2160296371237707, "grad_norm": 0.6462213397026062, "learning_rate": 7.200952192025392e-06, "loss": 0.0157, "step": 7260 }, { "epoch": 0.21632719860741226, "grad_norm": 0.7194512486457825, "learning_rate": 7.210870858956557e-06, "loss": 0.0207, "step": 7270 }, { "epoch": 0.2166247600910538, "grad_norm": 0.9251431822776794, "learning_rate": 7.2207895258877214e-06, "loss": 0.0208, "step": 7280 }, { "epoch": 0.21692232157469538, "grad_norm": 0.3771769106388092, "learning_rate": 7.230708192818886e-06, "loss": 0.0147, "step": 7290 }, { "epoch": 0.21721988305833692, "grad_norm": 0.7075599431991577, "learning_rate": 7.240626859750051e-06, "loss": 0.0189, "step": 7300 }, { "epoch": 0.2175174445419785, "grad_norm": 0.6475569009780884, "learning_rate": 7.250545526681214e-06, "loss": 0.0156, "step": 7310 }, { "epoch": 0.21781500602562004, "grad_norm": 0.4315061867237091, "learning_rate": 7.260464193612379e-06, "loss": 0.012, "step": 7320 }, { "epoch": 0.2181125675092616, "grad_norm": 0.35273051261901855, "learning_rate": 7.2703828605435435e-06, "loss": 0.0296, "step": 7330 }, { "epoch": 0.21841012899290316, "grad_norm": 0.8253064751625061, "learning_rate": 7.280301527474708e-06, "loss": 0.0199, "step": 7340 }, { "epoch": 0.2187076904765447, "grad_norm": 0.5713982582092285, "learning_rate": 7.290220194405872e-06, "loss": 0.0193, "step": 7350 }, { "epoch": 0.21900525196018628, "grad_norm": 0.562283456325531, "learning_rate": 7.300138861337037e-06, "loss": 0.0109, "step": 7360 }, { "epoch": 0.21930281344382782, "grad_norm": 0.608805239200592, "learning_rate": 7.310057528268202e-06, "loss": 0.0103, "step": 7370 }, { "epoch": 0.2196003749274694, "grad_norm": 0.5822507739067078, "learning_rate": 7.319976195199366e-06, "loss": 0.0156, "step": 7380 }, { "epoch": 0.21989793641111094, "grad_norm": 0.5982786417007446, "learning_rate": 7.32989486213053e-06, "loss": 0.017, "step": 7390 }, { "epoch": 0.2201954978947525, "grad_norm": 0.7703768014907837, "learning_rate": 7.339813529061694e-06, "loss": 0.0183, "step": 7400 }, { "epoch": 0.22049305937839406, "grad_norm": 0.49577417969703674, "learning_rate": 7.349732195992859e-06, "loss": 0.0146, "step": 7410 }, { "epoch": 0.22079062086203563, "grad_norm": 0.678773820400238, "learning_rate": 7.359650862924024e-06, "loss": 0.0163, "step": 7420 }, { "epoch": 0.22108818234567718, "grad_norm": 0.5761216878890991, "learning_rate": 7.369569529855189e-06, "loss": 0.0157, "step": 7430 }, { "epoch": 0.22138574382931872, "grad_norm": 0.400280237197876, "learning_rate": 7.3794881967863526e-06, "loss": 0.0154, "step": 7440 }, { "epoch": 0.2216833053129603, "grad_norm": 0.6218938827514648, "learning_rate": 7.3894068637175165e-06, "loss": 0.0181, "step": 7450 }, { "epoch": 0.22198086679660184, "grad_norm": 0.6725154519081116, "learning_rate": 7.399325530648681e-06, "loss": 0.0291, "step": 7460 }, { "epoch": 0.2222784282802434, "grad_norm": 0.423283189535141, "learning_rate": 7.409244197579846e-06, "loss": 0.0172, "step": 7470 }, { "epoch": 0.22257598976388496, "grad_norm": 0.35142800211906433, "learning_rate": 7.41916286451101e-06, "loss": 0.014, "step": 7480 }, { "epoch": 0.22287355124752653, "grad_norm": 0.5702025890350342, "learning_rate": 7.429081531442175e-06, "loss": 0.0177, "step": 7490 }, { "epoch": 0.22317111273116808, "grad_norm": 0.4592575132846832, "learning_rate": 7.4390001983733394e-06, "loss": 0.0185, "step": 7500 }, { "epoch": 0.22346867421480965, "grad_norm": 0.6007624864578247, "learning_rate": 7.448918865304504e-06, "loss": 0.0158, "step": 7510 }, { "epoch": 0.2237662356984512, "grad_norm": 0.9341857433319092, "learning_rate": 7.458837532235667e-06, "loss": 0.0177, "step": 7520 }, { "epoch": 0.22406379718209274, "grad_norm": 0.5943304300308228, "learning_rate": 7.468756199166832e-06, "loss": 0.0162, "step": 7530 }, { "epoch": 0.2243613586657343, "grad_norm": 0.6871801018714905, "learning_rate": 7.478674866097997e-06, "loss": 0.0183, "step": 7540 }, { "epoch": 0.22465892014937586, "grad_norm": 1.0524852275848389, "learning_rate": 7.4885935330291616e-06, "loss": 0.0135, "step": 7550 }, { "epoch": 0.22495648163301743, "grad_norm": 0.6039806008338928, "learning_rate": 7.498512199960326e-06, "loss": 0.0118, "step": 7560 }, { "epoch": 0.22525404311665898, "grad_norm": 0.5162005424499512, "learning_rate": 7.508430866891491e-06, "loss": 0.0116, "step": 7570 }, { "epoch": 0.22555160460030055, "grad_norm": 0.4040123224258423, "learning_rate": 7.518349533822655e-06, "loss": 0.0133, "step": 7580 }, { "epoch": 0.2258491660839421, "grad_norm": 0.792870283126831, "learning_rate": 7.528268200753819e-06, "loss": 0.0166, "step": 7590 }, { "epoch": 0.22614672756758364, "grad_norm": 0.6873290538787842, "learning_rate": 7.538186867684984e-06, "loss": 0.0157, "step": 7600 }, { "epoch": 0.2264442890512252, "grad_norm": 0.8748090267181396, "learning_rate": 7.5481055346161484e-06, "loss": 0.0181, "step": 7610 }, { "epoch": 0.22674185053486676, "grad_norm": 0.8150085806846619, "learning_rate": 7.558024201547312e-06, "loss": 0.0128, "step": 7620 }, { "epoch": 0.22703941201850833, "grad_norm": 0.5624592900276184, "learning_rate": 7.567942868478477e-06, "loss": 0.0247, "step": 7630 }, { "epoch": 0.22733697350214987, "grad_norm": 0.6027617454528809, "learning_rate": 7.577861535409642e-06, "loss": 0.0187, "step": 7640 }, { "epoch": 0.22763453498579145, "grad_norm": 0.6295135617256165, "learning_rate": 7.587780202340807e-06, "loss": 0.0159, "step": 7650 }, { "epoch": 0.227932096469433, "grad_norm": 0.6465359926223755, "learning_rate": 7.59769886927197e-06, "loss": 0.0123, "step": 7660 }, { "epoch": 0.22822965795307457, "grad_norm": 0.5965896248817444, "learning_rate": 7.6076175362031345e-06, "loss": 0.0169, "step": 7670 }, { "epoch": 0.2285272194367161, "grad_norm": 0.6079211831092834, "learning_rate": 7.617536203134299e-06, "loss": 0.0152, "step": 7680 }, { "epoch": 0.22882478092035766, "grad_norm": 0.517196536064148, "learning_rate": 7.627454870065464e-06, "loss": 0.0213, "step": 7690 }, { "epoch": 0.22912234240399923, "grad_norm": 0.7729844450950623, "learning_rate": 7.637373536996629e-06, "loss": 0.0201, "step": 7700 }, { "epoch": 0.22941990388764077, "grad_norm": 0.5407263040542603, "learning_rate": 7.647292203927794e-06, "loss": 0.0154, "step": 7710 }, { "epoch": 0.22971746537128235, "grad_norm": 0.4914335310459137, "learning_rate": 7.657210870858958e-06, "loss": 0.0178, "step": 7720 }, { "epoch": 0.2300150268549239, "grad_norm": 0.7944117784500122, "learning_rate": 7.667129537790121e-06, "loss": 0.0152, "step": 7730 }, { "epoch": 0.23031258833856547, "grad_norm": 0.5945983529090881, "learning_rate": 7.677048204721286e-06, "loss": 0.0148, "step": 7740 }, { "epoch": 0.230610149822207, "grad_norm": 0.40930965542793274, "learning_rate": 7.686966871652451e-06, "loss": 0.0121, "step": 7750 }, { "epoch": 0.23090771130584858, "grad_norm": 0.529886782169342, "learning_rate": 7.696885538583616e-06, "loss": 0.0153, "step": 7760 }, { "epoch": 0.23120527278949013, "grad_norm": 0.48753949999809265, "learning_rate": 7.70680420551478e-06, "loss": 0.0183, "step": 7770 }, { "epoch": 0.23150283427313167, "grad_norm": 0.8083294034004211, "learning_rate": 7.716722872445943e-06, "loss": 0.0126, "step": 7780 }, { "epoch": 0.23180039575677325, "grad_norm": 1.0227248668670654, "learning_rate": 7.726641539377108e-06, "loss": 0.0141, "step": 7790 }, { "epoch": 0.2320979572404148, "grad_norm": 0.3197932839393616, "learning_rate": 7.736560206308273e-06, "loss": 0.013, "step": 7800 }, { "epoch": 0.23239551872405637, "grad_norm": 0.3056640326976776, "learning_rate": 7.746478873239436e-06, "loss": 0.0115, "step": 7810 }, { "epoch": 0.2326930802076979, "grad_norm": 0.3884243369102478, "learning_rate": 7.756397540170601e-06, "loss": 0.0122, "step": 7820 }, { "epoch": 0.23299064169133948, "grad_norm": 1.0435750484466553, "learning_rate": 7.766316207101766e-06, "loss": 0.0124, "step": 7830 }, { "epoch": 0.23328820317498103, "grad_norm": 0.9092775583267212, "learning_rate": 7.77623487403293e-06, "loss": 0.0139, "step": 7840 }, { "epoch": 0.23358576465862257, "grad_norm": 0.3400535583496094, "learning_rate": 7.786153540964095e-06, "loss": 0.0168, "step": 7850 }, { "epoch": 0.23388332614226415, "grad_norm": 0.5030868053436279, "learning_rate": 7.79607220789526e-06, "loss": 0.0205, "step": 7860 }, { "epoch": 0.2341808876259057, "grad_norm": 0.3758860230445862, "learning_rate": 7.805990874826423e-06, "loss": 0.0169, "step": 7870 }, { "epoch": 0.23447844910954727, "grad_norm": 0.513222336769104, "learning_rate": 7.815909541757588e-06, "loss": 0.015, "step": 7880 }, { "epoch": 0.2347760105931888, "grad_norm": 0.4795960783958435, "learning_rate": 7.825828208688752e-06, "loss": 0.0118, "step": 7890 }, { "epoch": 0.23507357207683038, "grad_norm": 0.3774482309818268, "learning_rate": 7.835746875619917e-06, "loss": 0.012, "step": 7900 }, { "epoch": 0.23537113356047193, "grad_norm": 0.47883880138397217, "learning_rate": 7.845665542551082e-06, "loss": 0.0136, "step": 7910 }, { "epoch": 0.2356686950441135, "grad_norm": 0.9038412570953369, "learning_rate": 7.855584209482247e-06, "loss": 0.0157, "step": 7920 }, { "epoch": 0.23596625652775505, "grad_norm": 0.6576539874076843, "learning_rate": 7.865502876413412e-06, "loss": 0.0171, "step": 7930 }, { "epoch": 0.2362638180113966, "grad_norm": 0.7430427670478821, "learning_rate": 7.875421543344575e-06, "loss": 0.0181, "step": 7940 }, { "epoch": 0.23656137949503817, "grad_norm": 0.6196504831314087, "learning_rate": 7.88534021027574e-06, "loss": 0.0218, "step": 7950 }, { "epoch": 0.2368589409786797, "grad_norm": 1.2380245923995972, "learning_rate": 7.895258877206904e-06, "loss": 0.0226, "step": 7960 }, { "epoch": 0.23715650246232128, "grad_norm": 0.902662992477417, "learning_rate": 7.905177544138069e-06, "loss": 0.0154, "step": 7970 }, { "epoch": 0.23745406394596283, "grad_norm": 0.40743792057037354, "learning_rate": 7.915096211069234e-06, "loss": 0.014, "step": 7980 }, { "epoch": 0.2377516254296044, "grad_norm": 0.6482931971549988, "learning_rate": 7.925014878000398e-06, "loss": 0.015, "step": 7990 }, { "epoch": 0.23804918691324595, "grad_norm": 0.5129244327545166, "learning_rate": 7.934933544931562e-06, "loss": 0.013, "step": 8000 }, { "epoch": 0.23834674839688752, "grad_norm": 0.6703622937202454, "learning_rate": 7.944852211862726e-06, "loss": 0.0144, "step": 8010 }, { "epoch": 0.23864430988052907, "grad_norm": 0.5047257542610168, "learning_rate": 7.954770878793891e-06, "loss": 0.016, "step": 8020 }, { "epoch": 0.2389418713641706, "grad_norm": 0.6091992259025574, "learning_rate": 7.964689545725056e-06, "loss": 0.0173, "step": 8030 }, { "epoch": 0.23923943284781218, "grad_norm": 0.27071258425712585, "learning_rate": 7.974608212656219e-06, "loss": 0.0098, "step": 8040 }, { "epoch": 0.23953699433145373, "grad_norm": 0.3940989077091217, "learning_rate": 7.984526879587384e-06, "loss": 0.0162, "step": 8050 }, { "epoch": 0.2398345558150953, "grad_norm": 0.877709150314331, "learning_rate": 7.994445546518548e-06, "loss": 0.0153, "step": 8060 }, { "epoch": 0.24013211729873685, "grad_norm": 0.5596256852149963, "learning_rate": 8.004364213449713e-06, "loss": 0.0145, "step": 8070 }, { "epoch": 0.24042967878237842, "grad_norm": 0.46272578835487366, "learning_rate": 8.014282880380876e-06, "loss": 0.0177, "step": 8080 }, { "epoch": 0.24072724026601997, "grad_norm": 0.4761720299720764, "learning_rate": 8.024201547312041e-06, "loss": 0.0145, "step": 8090 }, { "epoch": 0.2410248017496615, "grad_norm": 0.5085681080818176, "learning_rate": 8.034120214243206e-06, "loss": 0.0173, "step": 8100 }, { "epoch": 0.24132236323330308, "grad_norm": 0.6724675893783569, "learning_rate": 8.04403888117437e-06, "loss": 0.0173, "step": 8110 }, { "epoch": 0.24161992471694463, "grad_norm": 0.62349933385849, "learning_rate": 8.053957548105535e-06, "loss": 0.0238, "step": 8120 }, { "epoch": 0.2419174862005862, "grad_norm": 0.5272676944732666, "learning_rate": 8.0638762150367e-06, "loss": 0.0174, "step": 8130 }, { "epoch": 0.24221504768422775, "grad_norm": 0.3092692792415619, "learning_rate": 8.073794881967865e-06, "loss": 0.0155, "step": 8140 }, { "epoch": 0.24251260916786932, "grad_norm": 0.6973236203193665, "learning_rate": 8.083713548899028e-06, "loss": 0.0129, "step": 8150 }, { "epoch": 0.24281017065151086, "grad_norm": 0.6124131679534912, "learning_rate": 8.093632215830193e-06, "loss": 0.0223, "step": 8160 }, { "epoch": 0.24310773213515244, "grad_norm": 0.3203859329223633, "learning_rate": 8.103550882761357e-06, "loss": 0.0142, "step": 8170 }, { "epoch": 0.24340529361879398, "grad_norm": 0.8167921900749207, "learning_rate": 8.113469549692522e-06, "loss": 0.0148, "step": 8180 }, { "epoch": 0.24370285510243553, "grad_norm": 0.3120350241661072, "learning_rate": 8.123388216623687e-06, "loss": 0.0178, "step": 8190 }, { "epoch": 0.2440004165860771, "grad_norm": 0.6836142539978027, "learning_rate": 8.133306883554852e-06, "loss": 0.0173, "step": 8200 }, { "epoch": 0.24429797806971865, "grad_norm": 0.5480268001556396, "learning_rate": 8.143225550486016e-06, "loss": 0.0235, "step": 8210 }, { "epoch": 0.24459553955336022, "grad_norm": 0.37977129220962524, "learning_rate": 8.15314421741718e-06, "loss": 0.0124, "step": 8220 }, { "epoch": 0.24489310103700176, "grad_norm": 0.5182702541351318, "learning_rate": 8.163062884348344e-06, "loss": 0.0156, "step": 8230 }, { "epoch": 0.24519066252064334, "grad_norm": 1.073556900024414, "learning_rate": 8.172981551279509e-06, "loss": 0.0179, "step": 8240 }, { "epoch": 0.24548822400428488, "grad_norm": 0.7981760501861572, "learning_rate": 8.182900218210674e-06, "loss": 0.016, "step": 8250 }, { "epoch": 0.24578578548792646, "grad_norm": 0.47664642333984375, "learning_rate": 8.192818885141839e-06, "loss": 0.0138, "step": 8260 }, { "epoch": 0.246083346971568, "grad_norm": 0.7849586606025696, "learning_rate": 8.202737552073002e-06, "loss": 0.0144, "step": 8270 }, { "epoch": 0.24638090845520955, "grad_norm": 0.46246087551116943, "learning_rate": 8.212656219004166e-06, "loss": 0.0124, "step": 8280 }, { "epoch": 0.24667846993885112, "grad_norm": 0.631933331489563, "learning_rate": 8.222574885935331e-06, "loss": 0.0163, "step": 8290 }, { "epoch": 0.24697603142249266, "grad_norm": 0.6868851184844971, "learning_rate": 8.232493552866496e-06, "loss": 0.0158, "step": 8300 }, { "epoch": 0.24727359290613424, "grad_norm": 0.5178279876708984, "learning_rate": 8.242412219797659e-06, "loss": 0.0156, "step": 8310 }, { "epoch": 0.24757115438977578, "grad_norm": 0.7490805387496948, "learning_rate": 8.252330886728824e-06, "loss": 0.0172, "step": 8320 }, { "epoch": 0.24786871587341736, "grad_norm": 0.5171768069267273, "learning_rate": 8.262249553659989e-06, "loss": 0.0138, "step": 8330 }, { "epoch": 0.2481662773570589, "grad_norm": 0.349791944026947, "learning_rate": 8.272168220591153e-06, "loss": 0.0184, "step": 8340 }, { "epoch": 0.24846383884070045, "grad_norm": 0.5190218091011047, "learning_rate": 8.282086887522318e-06, "loss": 0.0163, "step": 8350 }, { "epoch": 0.24876140032434202, "grad_norm": 0.654606282711029, "learning_rate": 8.292005554453481e-06, "loss": 0.0198, "step": 8360 }, { "epoch": 0.24905896180798356, "grad_norm": 0.7599221467971802, "learning_rate": 8.301924221384646e-06, "loss": 0.0163, "step": 8370 }, { "epoch": 0.24935652329162514, "grad_norm": 0.7671296000480652, "learning_rate": 8.31184288831581e-06, "loss": 0.0158, "step": 8380 }, { "epoch": 0.24965408477526668, "grad_norm": 0.4937571883201599, "learning_rate": 8.321761555246975e-06, "loss": 0.0131, "step": 8390 }, { "epoch": 0.24995164625890826, "grad_norm": 0.9294701218605042, "learning_rate": 8.33168022217814e-06, "loss": 0.0245, "step": 8400 }, { "epoch": 0.25024920774254983, "grad_norm": 0.40145254135131836, "learning_rate": 8.341598889109305e-06, "loss": 0.012, "step": 8410 }, { "epoch": 0.25054676922619135, "grad_norm": 0.40648406744003296, "learning_rate": 8.35151755604047e-06, "loss": 0.0201, "step": 8420 }, { "epoch": 0.2508443307098329, "grad_norm": 0.6091222763061523, "learning_rate": 8.361436222971633e-06, "loss": 0.0155, "step": 8430 }, { "epoch": 0.2511418921934745, "grad_norm": 0.2629440724849701, "learning_rate": 8.371354889902798e-06, "loss": 0.0107, "step": 8440 }, { "epoch": 0.251439453677116, "grad_norm": 0.6317318677902222, "learning_rate": 8.381273556833962e-06, "loss": 0.0155, "step": 8450 }, { "epoch": 0.2517370151607576, "grad_norm": 0.5993021726608276, "learning_rate": 8.391192223765127e-06, "loss": 0.0153, "step": 8460 }, { "epoch": 0.25203457664439916, "grad_norm": 0.471182256937027, "learning_rate": 8.401110890696292e-06, "loss": 0.0198, "step": 8470 }, { "epoch": 0.25233213812804073, "grad_norm": 0.5860681533813477, "learning_rate": 8.411029557627457e-06, "loss": 0.0178, "step": 8480 }, { "epoch": 0.25262969961168225, "grad_norm": 0.7553436160087585, "learning_rate": 8.42094822455862e-06, "loss": 0.0151, "step": 8490 }, { "epoch": 0.2529272610953238, "grad_norm": 0.48325806856155396, "learning_rate": 8.430866891489784e-06, "loss": 0.0219, "step": 8500 }, { "epoch": 0.2532248225789654, "grad_norm": 0.5173944234848022, "learning_rate": 8.44078555842095e-06, "loss": 0.0108, "step": 8510 }, { "epoch": 0.2535223840626069, "grad_norm": 1.1119730472564697, "learning_rate": 8.450704225352114e-06, "loss": 0.011, "step": 8520 }, { "epoch": 0.2538199455462485, "grad_norm": 0.4228108525276184, "learning_rate": 8.460622892283277e-06, "loss": 0.0114, "step": 8530 }, { "epoch": 0.25411750702989006, "grad_norm": 0.7204010486602783, "learning_rate": 8.470541559214442e-06, "loss": 0.0137, "step": 8540 }, { "epoch": 0.25441506851353163, "grad_norm": 0.8205647468566895, "learning_rate": 8.480460226145607e-06, "loss": 0.0123, "step": 8550 }, { "epoch": 0.25471262999717315, "grad_norm": 1.877057433128357, "learning_rate": 8.490378893076771e-06, "loss": 0.0149, "step": 8560 }, { "epoch": 0.2550101914808147, "grad_norm": 0.47041019797325134, "learning_rate": 8.500297560007934e-06, "loss": 0.0149, "step": 8570 }, { "epoch": 0.2553077529644563, "grad_norm": 0.52569180727005, "learning_rate": 8.510216226939099e-06, "loss": 0.0207, "step": 8580 }, { "epoch": 0.25560531444809786, "grad_norm": 0.2755090594291687, "learning_rate": 8.520134893870264e-06, "loss": 0.0173, "step": 8590 }, { "epoch": 0.2559028759317394, "grad_norm": 0.754173994064331, "learning_rate": 8.530053560801429e-06, "loss": 0.0181, "step": 8600 }, { "epoch": 0.25620043741538095, "grad_norm": 0.5330216884613037, "learning_rate": 8.539972227732593e-06, "loss": 0.015, "step": 8610 }, { "epoch": 0.25649799889902253, "grad_norm": 0.3382478356361389, "learning_rate": 8.549890894663758e-06, "loss": 0.0138, "step": 8620 }, { "epoch": 0.25679556038266405, "grad_norm": 0.6486343741416931, "learning_rate": 8.559809561594923e-06, "loss": 0.0167, "step": 8630 }, { "epoch": 0.2570931218663056, "grad_norm": 0.9703971743583679, "learning_rate": 8.569728228526086e-06, "loss": 0.0118, "step": 8640 }, { "epoch": 0.2573906833499472, "grad_norm": 0.6882675886154175, "learning_rate": 8.57964689545725e-06, "loss": 0.0116, "step": 8650 }, { "epoch": 0.25768824483358876, "grad_norm": 0.20261318981647491, "learning_rate": 8.589565562388416e-06, "loss": 0.0205, "step": 8660 }, { "epoch": 0.2579858063172303, "grad_norm": 0.4832499027252197, "learning_rate": 8.59948422931958e-06, "loss": 0.0126, "step": 8670 }, { "epoch": 0.25828336780087185, "grad_norm": 0.6393494606018066, "learning_rate": 8.609402896250745e-06, "loss": 0.0151, "step": 8680 }, { "epoch": 0.2585809292845134, "grad_norm": 0.3985641300678253, "learning_rate": 8.61932156318191e-06, "loss": 0.0124, "step": 8690 }, { "epoch": 0.25887849076815495, "grad_norm": 0.7247487306594849, "learning_rate": 8.629240230113075e-06, "loss": 0.0175, "step": 8700 }, { "epoch": 0.2591760522517965, "grad_norm": 0.5035407543182373, "learning_rate": 8.639158897044238e-06, "loss": 0.0152, "step": 8710 }, { "epoch": 0.2594736137354381, "grad_norm": 0.5998431444168091, "learning_rate": 8.649077563975402e-06, "loss": 0.0124, "step": 8720 }, { "epoch": 0.25977117521907966, "grad_norm": 0.3828756809234619, "learning_rate": 8.658996230906567e-06, "loss": 0.0153, "step": 8730 }, { "epoch": 0.2600687367027212, "grad_norm": 0.5215823650360107, "learning_rate": 8.668914897837732e-06, "loss": 0.0182, "step": 8740 }, { "epoch": 0.26036629818636275, "grad_norm": 0.42054322361946106, "learning_rate": 8.678833564768897e-06, "loss": 0.0168, "step": 8750 }, { "epoch": 0.2606638596700043, "grad_norm": 0.43942925333976746, "learning_rate": 8.68875223170006e-06, "loss": 0.0192, "step": 8760 }, { "epoch": 0.26096142115364585, "grad_norm": 0.45135870575904846, "learning_rate": 8.698670898631225e-06, "loss": 0.0171, "step": 8770 }, { "epoch": 0.2612589826372874, "grad_norm": 0.4152545630931854, "learning_rate": 8.70858956556239e-06, "loss": 0.0144, "step": 8780 }, { "epoch": 0.261556544120929, "grad_norm": 0.39877650141716003, "learning_rate": 8.718508232493554e-06, "loss": 0.014, "step": 8790 }, { "epoch": 0.26185410560457056, "grad_norm": 0.5102506279945374, "learning_rate": 8.728426899424717e-06, "loss": 0.0124, "step": 8800 }, { "epoch": 0.2621516670882121, "grad_norm": 0.2904888093471527, "learning_rate": 8.738345566355882e-06, "loss": 0.0123, "step": 8810 }, { "epoch": 0.26244922857185365, "grad_norm": 0.697994589805603, "learning_rate": 8.748264233287047e-06, "loss": 0.0121, "step": 8820 }, { "epoch": 0.2627467900554952, "grad_norm": 0.4626402258872986, "learning_rate": 8.758182900218211e-06, "loss": 0.02, "step": 8830 }, { "epoch": 0.2630443515391368, "grad_norm": 0.7324886918067932, "learning_rate": 8.768101567149376e-06, "loss": 0.0128, "step": 8840 }, { "epoch": 0.2633419130227783, "grad_norm": 0.5236402153968811, "learning_rate": 8.77802023408054e-06, "loss": 0.0157, "step": 8850 }, { "epoch": 0.2636394745064199, "grad_norm": 0.6287659406661987, "learning_rate": 8.787938901011704e-06, "loss": 0.0184, "step": 8860 }, { "epoch": 0.26393703599006146, "grad_norm": 0.5670686960220337, "learning_rate": 8.797857567942869e-06, "loss": 0.0119, "step": 8870 }, { "epoch": 0.264234597473703, "grad_norm": 0.5275831818580627, "learning_rate": 8.807776234874034e-06, "loss": 0.0154, "step": 8880 }, { "epoch": 0.26453215895734455, "grad_norm": 0.5244991183280945, "learning_rate": 8.817694901805198e-06, "loss": 0.0127, "step": 8890 }, { "epoch": 0.2648297204409861, "grad_norm": 0.712235689163208, "learning_rate": 8.827613568736363e-06, "loss": 0.0146, "step": 8900 }, { "epoch": 0.2651272819246277, "grad_norm": 0.7405701279640198, "learning_rate": 8.837532235667528e-06, "loss": 0.0167, "step": 8910 }, { "epoch": 0.2654248434082692, "grad_norm": 0.561721920967102, "learning_rate": 8.847450902598691e-06, "loss": 0.0095, "step": 8920 }, { "epoch": 0.2657224048919108, "grad_norm": 1.097970724105835, "learning_rate": 8.857369569529856e-06, "loss": 0.0139, "step": 8930 }, { "epoch": 0.26601996637555236, "grad_norm": 0.4682813882827759, "learning_rate": 8.86728823646102e-06, "loss": 0.0144, "step": 8940 }, { "epoch": 0.2663175278591939, "grad_norm": 1.7503995895385742, "learning_rate": 8.877206903392185e-06, "loss": 0.0123, "step": 8950 }, { "epoch": 0.26661508934283545, "grad_norm": 0.49588441848754883, "learning_rate": 8.88712557032335e-06, "loss": 0.0152, "step": 8960 }, { "epoch": 0.266912650826477, "grad_norm": 1.0617148876190186, "learning_rate": 8.897044237254515e-06, "loss": 0.0179, "step": 8970 }, { "epoch": 0.2672102123101186, "grad_norm": 0.3905886113643646, "learning_rate": 8.90696290418568e-06, "loss": 0.0165, "step": 8980 }, { "epoch": 0.2675077737937601, "grad_norm": 0.6094058156013489, "learning_rate": 8.916881571116843e-06, "loss": 0.0166, "step": 8990 }, { "epoch": 0.2678053352774017, "grad_norm": 0.769842803478241, "learning_rate": 8.926800238048007e-06, "loss": 0.0146, "step": 9000 }, { "epoch": 0.26810289676104326, "grad_norm": 0.9211681485176086, "learning_rate": 8.936718904979172e-06, "loss": 0.0176, "step": 9010 }, { "epoch": 0.2684004582446848, "grad_norm": 0.6569142937660217, "learning_rate": 8.946637571910335e-06, "loss": 0.012, "step": 9020 }, { "epoch": 0.26869801972832635, "grad_norm": 0.4752437174320221, "learning_rate": 8.9565562388415e-06, "loss": 0.014, "step": 9030 }, { "epoch": 0.2689955812119679, "grad_norm": 0.9269880056381226, "learning_rate": 8.966474905772665e-06, "loss": 0.0247, "step": 9040 }, { "epoch": 0.2692931426956095, "grad_norm": 0.49340465664863586, "learning_rate": 8.97639357270383e-06, "loss": 0.0263, "step": 9050 }, { "epoch": 0.269590704179251, "grad_norm": 0.33699995279312134, "learning_rate": 8.986312239634993e-06, "loss": 0.0123, "step": 9060 }, { "epoch": 0.2698882656628926, "grad_norm": 0.4913179278373718, "learning_rate": 8.996230906566157e-06, "loss": 0.0186, "step": 9070 }, { "epoch": 0.27018582714653416, "grad_norm": 0.8215954303741455, "learning_rate": 9.006149573497322e-06, "loss": 0.0174, "step": 9080 }, { "epoch": 0.27048338863017574, "grad_norm": 0.2961207926273346, "learning_rate": 9.016068240428487e-06, "loss": 0.0147, "step": 9090 }, { "epoch": 0.27078095011381725, "grad_norm": 0.7186858654022217, "learning_rate": 9.025986907359652e-06, "loss": 0.0138, "step": 9100 }, { "epoch": 0.2710785115974588, "grad_norm": 0.32624855637550354, "learning_rate": 9.035905574290816e-06, "loss": 0.0127, "step": 9110 }, { "epoch": 0.2713760730811004, "grad_norm": 0.41772228479385376, "learning_rate": 9.045824241221981e-06, "loss": 0.0153, "step": 9120 }, { "epoch": 0.2716736345647419, "grad_norm": 0.6125513315200806, "learning_rate": 9.055742908153144e-06, "loss": 0.0215, "step": 9130 }, { "epoch": 0.2719711960483835, "grad_norm": 0.27547451853752136, "learning_rate": 9.065661575084309e-06, "loss": 0.0219, "step": 9140 }, { "epoch": 0.27226875753202506, "grad_norm": 0.5044788718223572, "learning_rate": 9.075580242015474e-06, "loss": 0.0198, "step": 9150 }, { "epoch": 0.27256631901566664, "grad_norm": 1.1603400707244873, "learning_rate": 9.085498908946638e-06, "loss": 0.0145, "step": 9160 }, { "epoch": 0.27286388049930815, "grad_norm": 0.4668659269809723, "learning_rate": 9.095417575877803e-06, "loss": 0.0156, "step": 9170 }, { "epoch": 0.2731614419829497, "grad_norm": 0.5736173987388611, "learning_rate": 9.105336242808968e-06, "loss": 0.012, "step": 9180 }, { "epoch": 0.2734590034665913, "grad_norm": 0.24326801300048828, "learning_rate": 9.115254909740133e-06, "loss": 0.0095, "step": 9190 }, { "epoch": 0.2737565649502328, "grad_norm": 0.6012185215950012, "learning_rate": 9.125173576671296e-06, "loss": 0.0236, "step": 9200 }, { "epoch": 0.2740541264338744, "grad_norm": 0.6299578547477722, "learning_rate": 9.13509224360246e-06, "loss": 0.0155, "step": 9210 }, { "epoch": 0.27435168791751596, "grad_norm": 0.2617078125476837, "learning_rate": 9.145010910533625e-06, "loss": 0.013, "step": 9220 }, { "epoch": 0.27464924940115754, "grad_norm": 0.5772706270217896, "learning_rate": 9.15492957746479e-06, "loss": 0.0159, "step": 9230 }, { "epoch": 0.27494681088479905, "grad_norm": 0.5442424416542053, "learning_rate": 9.164848244395955e-06, "loss": 0.0095, "step": 9240 }, { "epoch": 0.2752443723684406, "grad_norm": 0.5458807945251465, "learning_rate": 9.174766911327118e-06, "loss": 0.0145, "step": 9250 }, { "epoch": 0.2755419338520822, "grad_norm": 0.41718369722366333, "learning_rate": 9.184685578258283e-06, "loss": 0.0147, "step": 9260 }, { "epoch": 0.2758394953357237, "grad_norm": 0.8602967262268066, "learning_rate": 9.194604245189447e-06, "loss": 0.013, "step": 9270 }, { "epoch": 0.2761370568193653, "grad_norm": 0.3270423710346222, "learning_rate": 9.204522912120612e-06, "loss": 0.0165, "step": 9280 }, { "epoch": 0.27643461830300686, "grad_norm": 0.46726343035697937, "learning_rate": 9.214441579051775e-06, "loss": 0.0225, "step": 9290 }, { "epoch": 0.27673217978664844, "grad_norm": 0.37679165601730347, "learning_rate": 9.22436024598294e-06, "loss": 0.0175, "step": 9300 }, { "epoch": 0.27702974127028995, "grad_norm": 0.5867924690246582, "learning_rate": 9.234278912914105e-06, "loss": 0.0122, "step": 9310 }, { "epoch": 0.2773273027539315, "grad_norm": 0.5271454453468323, "learning_rate": 9.24419757984527e-06, "loss": 0.0169, "step": 9320 }, { "epoch": 0.2776248642375731, "grad_norm": 1.6812784671783447, "learning_rate": 9.254116246776434e-06, "loss": 0.022, "step": 9330 }, { "epoch": 0.27792242572121467, "grad_norm": 0.15809622406959534, "learning_rate": 9.264034913707597e-06, "loss": 0.0125, "step": 9340 }, { "epoch": 0.2782199872048562, "grad_norm": 0.4805338978767395, "learning_rate": 9.273953580638762e-06, "loss": 0.0103, "step": 9350 }, { "epoch": 0.27851754868849776, "grad_norm": 0.4333735704421997, "learning_rate": 9.283872247569927e-06, "loss": 0.0167, "step": 9360 }, { "epoch": 0.27881511017213934, "grad_norm": 0.4997933506965637, "learning_rate": 9.293790914501092e-06, "loss": 0.0151, "step": 9370 }, { "epoch": 0.27911267165578085, "grad_norm": 0.4522557854652405, "learning_rate": 9.303709581432256e-06, "loss": 0.0138, "step": 9380 }, { "epoch": 0.2794102331394224, "grad_norm": 0.4808894097805023, "learning_rate": 9.313628248363421e-06, "loss": 0.0143, "step": 9390 }, { "epoch": 0.279707794623064, "grad_norm": 0.6110131144523621, "learning_rate": 9.323546915294586e-06, "loss": 0.0147, "step": 9400 }, { "epoch": 0.28000535610670557, "grad_norm": 0.6026374697685242, "learning_rate": 9.333465582225749e-06, "loss": 0.0166, "step": 9410 }, { "epoch": 0.2803029175903471, "grad_norm": 0.5437732934951782, "learning_rate": 9.343384249156914e-06, "loss": 0.013, "step": 9420 }, { "epoch": 0.28060047907398866, "grad_norm": 0.3459857106208801, "learning_rate": 9.353302916088079e-06, "loss": 0.0136, "step": 9430 }, { "epoch": 0.28089804055763024, "grad_norm": 0.8187248110771179, "learning_rate": 9.363221583019243e-06, "loss": 0.0134, "step": 9440 }, { "epoch": 0.28119560204127175, "grad_norm": 0.6045753359794617, "learning_rate": 9.373140249950408e-06, "loss": 0.0149, "step": 9450 }, { "epoch": 0.2814931635249133, "grad_norm": 0.40841859579086304, "learning_rate": 9.383058916881573e-06, "loss": 0.0109, "step": 9460 }, { "epoch": 0.2817907250085549, "grad_norm": 0.503578245639801, "learning_rate": 9.392977583812738e-06, "loss": 0.0224, "step": 9470 }, { "epoch": 0.28208828649219647, "grad_norm": 0.3741840422153473, "learning_rate": 9.4028962507439e-06, "loss": 0.0321, "step": 9480 }, { "epoch": 0.282385847975838, "grad_norm": 0.8149176836013794, "learning_rate": 9.412814917675065e-06, "loss": 0.0146, "step": 9490 }, { "epoch": 0.28268340945947956, "grad_norm": 0.31001806259155273, "learning_rate": 9.42273358460623e-06, "loss": 0.0161, "step": 9500 }, { "epoch": 0.28298097094312114, "grad_norm": 0.34264835715293884, "learning_rate": 9.432652251537395e-06, "loss": 0.0115, "step": 9510 }, { "epoch": 0.28327853242676265, "grad_norm": 0.6363386511802673, "learning_rate": 9.442570918468558e-06, "loss": 0.0119, "step": 9520 }, { "epoch": 0.2835760939104042, "grad_norm": 0.5027802586555481, "learning_rate": 9.452489585399723e-06, "loss": 0.0142, "step": 9530 }, { "epoch": 0.2838736553940458, "grad_norm": 0.6252486109733582, "learning_rate": 9.462408252330888e-06, "loss": 0.0091, "step": 9540 }, { "epoch": 0.28417121687768737, "grad_norm": 0.5386717319488525, "learning_rate": 9.47232691926205e-06, "loss": 0.0136, "step": 9550 }, { "epoch": 0.2844687783613289, "grad_norm": 0.42492401599884033, "learning_rate": 9.482245586193215e-06, "loss": 0.0148, "step": 9560 }, { "epoch": 0.28476633984497046, "grad_norm": 0.668133556842804, "learning_rate": 9.49216425312438e-06, "loss": 0.0151, "step": 9570 }, { "epoch": 0.28506390132861203, "grad_norm": 0.5564215779304504, "learning_rate": 9.502082920055545e-06, "loss": 0.0112, "step": 9580 }, { "epoch": 0.2853614628122536, "grad_norm": 0.46605604887008667, "learning_rate": 9.51200158698671e-06, "loss": 0.0124, "step": 9590 }, { "epoch": 0.2856590242958951, "grad_norm": 0.8876585960388184, "learning_rate": 9.521920253917874e-06, "loss": 0.0209, "step": 9600 }, { "epoch": 0.2859565857795367, "grad_norm": 0.42812424898147583, "learning_rate": 9.53183892084904e-06, "loss": 0.0097, "step": 9610 }, { "epoch": 0.28625414726317827, "grad_norm": 0.18297861516475677, "learning_rate": 9.541757587780202e-06, "loss": 0.0109, "step": 9620 }, { "epoch": 0.2865517087468198, "grad_norm": 0.47454601526260376, "learning_rate": 9.551676254711367e-06, "loss": 0.0128, "step": 9630 }, { "epoch": 0.28684927023046136, "grad_norm": 0.8019658327102661, "learning_rate": 9.561594921642532e-06, "loss": 0.0154, "step": 9640 }, { "epoch": 0.28714683171410293, "grad_norm": 0.43133658170700073, "learning_rate": 9.571513588573697e-06, "loss": 0.0174, "step": 9650 }, { "epoch": 0.2874443931977445, "grad_norm": 0.4747347831726074, "learning_rate": 9.581432255504861e-06, "loss": 0.0137, "step": 9660 }, { "epoch": 0.287741954681386, "grad_norm": 0.4304235279560089, "learning_rate": 9.591350922436026e-06, "loss": 0.0146, "step": 9670 }, { "epoch": 0.2880395161650276, "grad_norm": 0.38584864139556885, "learning_rate": 9.601269589367191e-06, "loss": 0.0067, "step": 9680 }, { "epoch": 0.28833707764866917, "grad_norm": 0.6923319697380066, "learning_rate": 9.611188256298354e-06, "loss": 0.019, "step": 9690 }, { "epoch": 0.2886346391323107, "grad_norm": 0.5236111283302307, "learning_rate": 9.621106923229519e-06, "loss": 0.0105, "step": 9700 }, { "epoch": 0.28893220061595226, "grad_norm": 0.4844159185886383, "learning_rate": 9.631025590160683e-06, "loss": 0.0184, "step": 9710 }, { "epoch": 0.28922976209959383, "grad_norm": 0.6302916407585144, "learning_rate": 9.640944257091848e-06, "loss": 0.0222, "step": 9720 }, { "epoch": 0.2895273235832354, "grad_norm": 0.3792654573917389, "learning_rate": 9.650862924023013e-06, "loss": 0.0116, "step": 9730 }, { "epoch": 0.2898248850668769, "grad_norm": 1.0487691164016724, "learning_rate": 9.660781590954176e-06, "loss": 0.0208, "step": 9740 }, { "epoch": 0.2901224465505185, "grad_norm": 0.4528041481971741, "learning_rate": 9.67070025788534e-06, "loss": 0.0126, "step": 9750 }, { "epoch": 0.29042000803416007, "grad_norm": 0.43467003107070923, "learning_rate": 9.680618924816506e-06, "loss": 0.0134, "step": 9760 }, { "epoch": 0.2907175695178016, "grad_norm": 0.39329612255096436, "learning_rate": 9.69053759174767e-06, "loss": 0.0126, "step": 9770 }, { "epoch": 0.29101513100144316, "grad_norm": 0.6529423594474792, "learning_rate": 9.700456258678833e-06, "loss": 0.0138, "step": 9780 }, { "epoch": 0.29131269248508473, "grad_norm": 0.3749392032623291, "learning_rate": 9.710374925609998e-06, "loss": 0.0164, "step": 9790 }, { "epoch": 0.2916102539687263, "grad_norm": 0.27115580439567566, "learning_rate": 9.720293592541163e-06, "loss": 0.0115, "step": 9800 }, { "epoch": 0.2919078154523678, "grad_norm": 0.6312379240989685, "learning_rate": 9.730212259472328e-06, "loss": 0.0118, "step": 9810 }, { "epoch": 0.2922053769360094, "grad_norm": 0.7533280849456787, "learning_rate": 9.740130926403492e-06, "loss": 0.0112, "step": 9820 }, { "epoch": 0.29250293841965097, "grad_norm": 0.308864951133728, "learning_rate": 9.750049593334656e-06, "loss": 0.0165, "step": 9830 }, { "epoch": 0.29280049990329254, "grad_norm": 0.6236409544944763, "learning_rate": 9.75996826026582e-06, "loss": 0.0156, "step": 9840 }, { "epoch": 0.29309806138693406, "grad_norm": 0.4364309310913086, "learning_rate": 9.769886927196985e-06, "loss": 0.0127, "step": 9850 }, { "epoch": 0.29339562287057563, "grad_norm": 0.5675424933433533, "learning_rate": 9.77980559412815e-06, "loss": 0.0151, "step": 9860 }, { "epoch": 0.2936931843542172, "grad_norm": 0.3826470971107483, "learning_rate": 9.789724261059315e-06, "loss": 0.0132, "step": 9870 }, { "epoch": 0.2939907458378587, "grad_norm": 0.31760427355766296, "learning_rate": 9.79964292799048e-06, "loss": 0.0149, "step": 9880 }, { "epoch": 0.2942883073215003, "grad_norm": 0.40251439809799194, "learning_rate": 9.809561594921644e-06, "loss": 0.0102, "step": 9890 }, { "epoch": 0.29458586880514187, "grad_norm": 0.4185325801372528, "learning_rate": 9.819480261852807e-06, "loss": 0.01, "step": 9900 }, { "epoch": 0.29488343028878344, "grad_norm": 0.3756747841835022, "learning_rate": 9.829398928783972e-06, "loss": 0.0132, "step": 9910 }, { "epoch": 0.29518099177242496, "grad_norm": 0.4514119029045105, "learning_rate": 9.839317595715137e-06, "loss": 0.0129, "step": 9920 }, { "epoch": 0.29547855325606653, "grad_norm": 0.5353307127952576, "learning_rate": 9.849236262646302e-06, "loss": 0.0148, "step": 9930 }, { "epoch": 0.2957761147397081, "grad_norm": 0.45527687668800354, "learning_rate": 9.859154929577466e-06, "loss": 0.0166, "step": 9940 }, { "epoch": 0.2960736762233496, "grad_norm": 0.24893710017204285, "learning_rate": 9.869073596508631e-06, "loss": 0.0129, "step": 9950 }, { "epoch": 0.2963712377069912, "grad_norm": 0.5629971027374268, "learning_rate": 9.878992263439796e-06, "loss": 0.0129, "step": 9960 }, { "epoch": 0.29666879919063277, "grad_norm": 0.3046913146972656, "learning_rate": 9.888910930370959e-06, "loss": 0.0139, "step": 9970 }, { "epoch": 0.29696636067427434, "grad_norm": 0.32616257667541504, "learning_rate": 9.898829597302124e-06, "loss": 0.0122, "step": 9980 }, { "epoch": 0.29726392215791586, "grad_norm": 0.5924601554870605, "learning_rate": 9.908748264233288e-06, "loss": 0.0127, "step": 9990 }, { "epoch": 0.29756148364155743, "grad_norm": 0.4889148473739624, "learning_rate": 9.918666931164453e-06, "loss": 0.0085, "step": 10000 }, { "epoch": 0.297859045125199, "grad_norm": 0.4413103759288788, "learning_rate": 9.928585598095616e-06, "loss": 0.0135, "step": 10010 }, { "epoch": 0.2981566066088405, "grad_norm": 0.782293438911438, "learning_rate": 9.938504265026781e-06, "loss": 0.0166, "step": 10020 }, { "epoch": 0.2984541680924821, "grad_norm": 0.3486711084842682, "learning_rate": 9.948422931957946e-06, "loss": 0.0154, "step": 10030 }, { "epoch": 0.29875172957612367, "grad_norm": 0.45415788888931274, "learning_rate": 9.95834159888911e-06, "loss": 0.0135, "step": 10040 }, { "epoch": 0.29904929105976524, "grad_norm": 0.2694961428642273, "learning_rate": 9.968260265820274e-06, "loss": 0.0117, "step": 10050 }, { "epoch": 0.29934685254340676, "grad_norm": 0.309845507144928, "learning_rate": 9.978178932751438e-06, "loss": 0.0128, "step": 10060 }, { "epoch": 0.29964441402704833, "grad_norm": 0.28835657238960266, "learning_rate": 9.988097599682603e-06, "loss": 0.011, "step": 10070 }, { "epoch": 0.2999419755106899, "grad_norm": 0.31709787249565125, "learning_rate": 9.998016266613768e-06, "loss": 0.0147, "step": 10080 }, { "epoch": 0.3002395369943315, "grad_norm": 0.6893587708473206, "learning_rate": 9.999999808194762e-06, "loss": 0.02, "step": 10090 }, { "epoch": 0.300537098477973, "grad_norm": 0.34410399198532104, "learning_rate": 9.999999028986004e-06, "loss": 0.0098, "step": 10100 }, { "epoch": 0.30083465996161457, "grad_norm": 0.4573063552379608, "learning_rate": 9.999997650385993e-06, "loss": 0.0124, "step": 10110 }, { "epoch": 0.30113222144525614, "grad_norm": 0.2321852296590805, "learning_rate": 9.999995672394893e-06, "loss": 0.0109, "step": 10120 }, { "epoch": 0.30142978292889766, "grad_norm": 0.9354647994041443, "learning_rate": 9.999993095012943e-06, "loss": 0.0147, "step": 10130 }, { "epoch": 0.30172734441253923, "grad_norm": 0.5227007865905762, "learning_rate": 9.99998991824045e-06, "loss": 0.0174, "step": 10140 }, { "epoch": 0.3020249058961808, "grad_norm": 0.721588134765625, "learning_rate": 9.999986142077796e-06, "loss": 0.0189, "step": 10150 }, { "epoch": 0.3023224673798224, "grad_norm": 0.6401588320732117, "learning_rate": 9.999981766525434e-06, "loss": 0.0116, "step": 10160 }, { "epoch": 0.3026200288634639, "grad_norm": 0.30370593070983887, "learning_rate": 9.999976791583887e-06, "loss": 0.0112, "step": 10170 }, { "epoch": 0.30291759034710547, "grad_norm": 0.5145534873008728, "learning_rate": 9.999971217253754e-06, "loss": 0.0121, "step": 10180 }, { "epoch": 0.30321515183074704, "grad_norm": 0.6251223087310791, "learning_rate": 9.999965043535701e-06, "loss": 0.0139, "step": 10190 }, { "epoch": 0.30351271331438856, "grad_norm": 0.16498425602912903, "learning_rate": 9.99995827043047e-06, "loss": 0.0102, "step": 10200 }, { "epoch": 0.30381027479803013, "grad_norm": 0.4519118666648865, "learning_rate": 9.99995089793887e-06, "loss": 0.0183, "step": 10210 }, { "epoch": 0.3041078362816717, "grad_norm": 0.7203572392463684, "learning_rate": 9.999942926061787e-06, "loss": 0.0123, "step": 10220 }, { "epoch": 0.3044053977653133, "grad_norm": 0.4548749625682831, "learning_rate": 9.999934354800179e-06, "loss": 0.0139, "step": 10230 }, { "epoch": 0.3047029592489548, "grad_norm": 0.410245418548584, "learning_rate": 9.999925184155067e-06, "loss": 0.0093, "step": 10240 }, { "epoch": 0.30500052073259637, "grad_norm": 0.8550477623939514, "learning_rate": 9.999915414127554e-06, "loss": 0.02, "step": 10250 }, { "epoch": 0.30529808221623794, "grad_norm": 0.2464868724346161, "learning_rate": 9.999905044718814e-06, "loss": 0.0111, "step": 10260 }, { "epoch": 0.30559564369987946, "grad_norm": 0.36310845613479614, "learning_rate": 9.999894075930086e-06, "loss": 0.011, "step": 10270 }, { "epoch": 0.30589320518352103, "grad_norm": 0.5293440818786621, "learning_rate": 9.999882507762685e-06, "loss": 0.0101, "step": 10280 }, { "epoch": 0.3061907666671626, "grad_norm": 0.19306829571723938, "learning_rate": 9.999870340218e-06, "loss": 0.0128, "step": 10290 }, { "epoch": 0.3064883281508042, "grad_norm": 0.20131340622901917, "learning_rate": 9.99985757329749e-06, "loss": 0.0116, "step": 10300 }, { "epoch": 0.3067858896344457, "grad_norm": 0.30778515338897705, "learning_rate": 9.999844207002685e-06, "loss": 0.0137, "step": 10310 }, { "epoch": 0.30708345111808727, "grad_norm": 0.42202016711235046, "learning_rate": 9.999830241335183e-06, "loss": 0.013, "step": 10320 }, { "epoch": 0.30738101260172884, "grad_norm": 0.35067078471183777, "learning_rate": 9.999815676296663e-06, "loss": 0.0165, "step": 10330 }, { "epoch": 0.3076785740853704, "grad_norm": 0.5194374918937683, "learning_rate": 9.999800511888872e-06, "loss": 0.0103, "step": 10340 }, { "epoch": 0.30797613556901193, "grad_norm": 0.4891358017921448, "learning_rate": 9.999784748113622e-06, "loss": 0.0154, "step": 10350 }, { "epoch": 0.3082736970526535, "grad_norm": 0.4496306777000427, "learning_rate": 9.999768384972808e-06, "loss": 0.011, "step": 10360 }, { "epoch": 0.3085712585362951, "grad_norm": 0.6419751644134521, "learning_rate": 9.99975142246839e-06, "loss": 0.0189, "step": 10370 }, { "epoch": 0.3088688200199366, "grad_norm": 0.5321207046508789, "learning_rate": 9.9997338606024e-06, "loss": 0.0145, "step": 10380 }, { "epoch": 0.30916638150357817, "grad_norm": 0.9949659705162048, "learning_rate": 9.999715699376947e-06, "loss": 0.0127, "step": 10390 }, { "epoch": 0.30946394298721974, "grad_norm": 0.5619217157363892, "learning_rate": 9.999696938794204e-06, "loss": 0.0169, "step": 10400 }, { "epoch": 0.3097615044708613, "grad_norm": 0.6778241395950317, "learning_rate": 9.999677578856423e-06, "loss": 0.0185, "step": 10410 }, { "epoch": 0.31005906595450283, "grad_norm": 0.5296579003334045, "learning_rate": 9.999657619565923e-06, "loss": 0.0204, "step": 10420 }, { "epoch": 0.3103566274381444, "grad_norm": 0.5335755944252014, "learning_rate": 9.999637060925097e-06, "loss": 0.0137, "step": 10430 }, { "epoch": 0.310654188921786, "grad_norm": 0.6682588458061218, "learning_rate": 9.99961590293641e-06, "loss": 0.0171, "step": 10440 }, { "epoch": 0.3109517504054275, "grad_norm": 0.5306206345558167, "learning_rate": 9.999594145602397e-06, "loss": 0.009, "step": 10450 }, { "epoch": 0.31124931188906907, "grad_norm": 0.5298035740852356, "learning_rate": 9.99957178892567e-06, "loss": 0.0108, "step": 10460 }, { "epoch": 0.31154687337271064, "grad_norm": 0.5030165910720825, "learning_rate": 9.999548832908904e-06, "loss": 0.0117, "step": 10470 }, { "epoch": 0.3118444348563522, "grad_norm": 0.38292428851127625, "learning_rate": 9.999525277554855e-06, "loss": 0.0182, "step": 10480 }, { "epoch": 0.31214199633999373, "grad_norm": 0.6320590972900391, "learning_rate": 9.999501122866345e-06, "loss": 0.0103, "step": 10490 }, { "epoch": 0.3124395578236353, "grad_norm": 0.28817427158355713, "learning_rate": 9.999476368846268e-06, "loss": 0.0117, "step": 10500 }, { "epoch": 0.3127371193072769, "grad_norm": 0.4794212281703949, "learning_rate": 9.999451015497595e-06, "loss": 0.0187, "step": 10510 }, { "epoch": 0.3130346807909184, "grad_norm": 0.49707821011543274, "learning_rate": 9.999425062823364e-06, "loss": 0.0157, "step": 10520 }, { "epoch": 0.31333224227455997, "grad_norm": 0.6833332180976868, "learning_rate": 9.999398510826684e-06, "loss": 0.0115, "step": 10530 }, { "epoch": 0.31362980375820154, "grad_norm": 0.4004535675048828, "learning_rate": 9.999371359510741e-06, "loss": 0.0099, "step": 10540 }, { "epoch": 0.3139273652418431, "grad_norm": 0.36124351620674133, "learning_rate": 9.999343608878789e-06, "loss": 0.0159, "step": 10550 }, { "epoch": 0.31422492672548463, "grad_norm": 0.16871695220470428, "learning_rate": 9.999315258934154e-06, "loss": 0.0109, "step": 10560 }, { "epoch": 0.3145224882091262, "grad_norm": 0.5227507948875427, "learning_rate": 9.999286309680234e-06, "loss": 0.0155, "step": 10570 }, { "epoch": 0.3148200496927678, "grad_norm": 0.7250481843948364, "learning_rate": 9.999256761120502e-06, "loss": 0.0118, "step": 10580 }, { "epoch": 0.31511761117640935, "grad_norm": 0.5330941081047058, "learning_rate": 9.999226613258496e-06, "loss": 0.0135, "step": 10590 }, { "epoch": 0.31541517266005087, "grad_norm": 0.545985221862793, "learning_rate": 9.999195866097835e-06, "loss": 0.0109, "step": 10600 }, { "epoch": 0.31571273414369244, "grad_norm": 0.5231500267982483, "learning_rate": 9.999164519642201e-06, "loss": 0.0145, "step": 10610 }, { "epoch": 0.316010295627334, "grad_norm": 0.487122118473053, "learning_rate": 9.999132573895354e-06, "loss": 0.0088, "step": 10620 }, { "epoch": 0.31630785711097553, "grad_norm": 0.4109269380569458, "learning_rate": 9.999100028861122e-06, "loss": 0.0112, "step": 10630 }, { "epoch": 0.3166054185946171, "grad_norm": 0.505040168762207, "learning_rate": 9.999066884543407e-06, "loss": 0.0063, "step": 10640 }, { "epoch": 0.3169029800782587, "grad_norm": 0.4869079291820526, "learning_rate": 9.999033140946183e-06, "loss": 0.0118, "step": 10650 }, { "epoch": 0.31720054156190025, "grad_norm": 0.28847432136535645, "learning_rate": 9.998998798073496e-06, "loss": 0.0132, "step": 10660 }, { "epoch": 0.31749810304554177, "grad_norm": 0.6261017322540283, "learning_rate": 9.99896385592946e-06, "loss": 0.0157, "step": 10670 }, { "epoch": 0.31779566452918334, "grad_norm": 0.527285635471344, "learning_rate": 9.998928314518267e-06, "loss": 0.0142, "step": 10680 }, { "epoch": 0.3180932260128249, "grad_norm": 0.3684939742088318, "learning_rate": 9.998892173844175e-06, "loss": 0.0175, "step": 10690 }, { "epoch": 0.31839078749646643, "grad_norm": 0.19684630632400513, "learning_rate": 9.998855433911518e-06, "loss": 0.0078, "step": 10700 }, { "epoch": 0.318688348980108, "grad_norm": 0.32339829206466675, "learning_rate": 9.9988180947247e-06, "loss": 0.0113, "step": 10710 }, { "epoch": 0.3189859104637496, "grad_norm": 0.70550936460495, "learning_rate": 9.998780156288196e-06, "loss": 0.0156, "step": 10720 }, { "epoch": 0.31928347194739115, "grad_norm": 0.3788110315799713, "learning_rate": 9.998741618606556e-06, "loss": 0.0127, "step": 10730 }, { "epoch": 0.31958103343103267, "grad_norm": 0.43445849418640137, "learning_rate": 9.998702481684398e-06, "loss": 0.0092, "step": 10740 }, { "epoch": 0.31987859491467424, "grad_norm": 0.45810580253601074, "learning_rate": 9.998662745526415e-06, "loss": 0.0133, "step": 10750 }, { "epoch": 0.3201761563983158, "grad_norm": 1.0193525552749634, "learning_rate": 9.998622410137371e-06, "loss": 0.019, "step": 10760 }, { "epoch": 0.32047371788195733, "grad_norm": 0.36909911036491394, "learning_rate": 9.998581475522099e-06, "loss": 0.011, "step": 10770 }, { "epoch": 0.3207712793655989, "grad_norm": 0.43537279963493347, "learning_rate": 9.998539941685507e-06, "loss": 0.011, "step": 10780 }, { "epoch": 0.3210688408492405, "grad_norm": 0.6145304441452026, "learning_rate": 9.998497808632577e-06, "loss": 0.012, "step": 10790 }, { "epoch": 0.32136640233288205, "grad_norm": 0.37349334359169006, "learning_rate": 9.998455076368356e-06, "loss": 0.0127, "step": 10800 }, { "epoch": 0.32166396381652357, "grad_norm": 0.4179721176624298, "learning_rate": 9.998411744897968e-06, "loss": 0.0127, "step": 10810 }, { "epoch": 0.32196152530016514, "grad_norm": 0.3754368722438812, "learning_rate": 9.998367814226608e-06, "loss": 0.0244, "step": 10820 }, { "epoch": 0.3222590867838067, "grad_norm": 0.6991063356399536, "learning_rate": 9.998323284359542e-06, "loss": 0.0116, "step": 10830 }, { "epoch": 0.3225566482674483, "grad_norm": 0.4626149535179138, "learning_rate": 9.998278155302107e-06, "loss": 0.0117, "step": 10840 }, { "epoch": 0.3228542097510898, "grad_norm": 0.5296069979667664, "learning_rate": 9.998232427059715e-06, "loss": 0.0121, "step": 10850 }, { "epoch": 0.3231517712347314, "grad_norm": 0.3757537007331848, "learning_rate": 9.998186099637846e-06, "loss": 0.0222, "step": 10860 }, { "epoch": 0.32344933271837295, "grad_norm": 0.19361945986747742, "learning_rate": 9.998139173042055e-06, "loss": 0.0121, "step": 10870 }, { "epoch": 0.32374689420201447, "grad_norm": 0.5471607446670532, "learning_rate": 9.998091647277967e-06, "loss": 0.0151, "step": 10880 }, { "epoch": 0.32404445568565604, "grad_norm": 0.530515730381012, "learning_rate": 9.998043522351279e-06, "loss": 0.0101, "step": 10890 }, { "epoch": 0.3243420171692976, "grad_norm": 0.491339772939682, "learning_rate": 9.997994798267761e-06, "loss": 0.0118, "step": 10900 }, { "epoch": 0.3246395786529392, "grad_norm": 0.7524451017379761, "learning_rate": 9.997945475033254e-06, "loss": 0.0146, "step": 10910 }, { "epoch": 0.3249371401365807, "grad_norm": 0.4578477442264557, "learning_rate": 9.99789555265367e-06, "loss": 0.0167, "step": 10920 }, { "epoch": 0.3252347016202223, "grad_norm": 0.6267381310462952, "learning_rate": 9.997845031134992e-06, "loss": 0.0119, "step": 10930 }, { "epoch": 0.32553226310386385, "grad_norm": 0.5719846487045288, "learning_rate": 9.99779391048328e-06, "loss": 0.0121, "step": 10940 }, { "epoch": 0.32582982458750537, "grad_norm": 0.3451966941356659, "learning_rate": 9.99774219070466e-06, "loss": 0.0142, "step": 10950 }, { "epoch": 0.32612738607114694, "grad_norm": 0.9896847605705261, "learning_rate": 9.997689871805333e-06, "loss": 0.0146, "step": 10960 }, { "epoch": 0.3264249475547885, "grad_norm": 0.7139310836791992, "learning_rate": 9.99763695379157e-06, "loss": 0.0188, "step": 10970 }, { "epoch": 0.3267225090384301, "grad_norm": 0.3984435200691223, "learning_rate": 9.997583436669715e-06, "loss": 0.0089, "step": 10980 }, { "epoch": 0.3270200705220716, "grad_norm": 0.5556191205978394, "learning_rate": 9.997529320446184e-06, "loss": 0.0106, "step": 10990 }, { "epoch": 0.3273176320057132, "grad_norm": 0.4745534360408783, "learning_rate": 9.997474605127464e-06, "loss": 0.0146, "step": 11000 }, { "epoch": 0.32761519348935475, "grad_norm": 0.4979180693626404, "learning_rate": 9.997419290720114e-06, "loss": 0.0127, "step": 11010 }, { "epoch": 0.32791275497299627, "grad_norm": 0.464169442653656, "learning_rate": 9.997363377230767e-06, "loss": 0.016, "step": 11020 }, { "epoch": 0.32821031645663784, "grad_norm": 0.5297141671180725, "learning_rate": 9.997306864666121e-06, "loss": 0.0151, "step": 11030 }, { "epoch": 0.3285078779402794, "grad_norm": 0.4801328182220459, "learning_rate": 9.997249753032956e-06, "loss": 0.0142, "step": 11040 }, { "epoch": 0.328805439423921, "grad_norm": 0.8632891774177551, "learning_rate": 9.997192042338115e-06, "loss": 0.0108, "step": 11050 }, { "epoch": 0.3291030009075625, "grad_norm": 0.2721772789955139, "learning_rate": 9.997133732588518e-06, "loss": 0.0111, "step": 11060 }, { "epoch": 0.3294005623912041, "grad_norm": 0.6422079801559448, "learning_rate": 9.997074823791154e-06, "loss": 0.0126, "step": 11070 }, { "epoch": 0.32969812387484565, "grad_norm": 0.46114832162857056, "learning_rate": 9.997015315953086e-06, "loss": 0.0339, "step": 11080 }, { "epoch": 0.3299956853584872, "grad_norm": 0.3420480489730835, "learning_rate": 9.996955209081446e-06, "loss": 0.013, "step": 11090 }, { "epoch": 0.33029324684212874, "grad_norm": 0.3080139756202698, "learning_rate": 9.996894503183444e-06, "loss": 0.0197, "step": 11100 }, { "epoch": 0.3305908083257703, "grad_norm": 0.35120755434036255, "learning_rate": 9.99683319826635e-06, "loss": 0.0159, "step": 11110 }, { "epoch": 0.3308883698094119, "grad_norm": 0.3545265793800354, "learning_rate": 9.996771294337518e-06, "loss": 0.0097, "step": 11120 }, { "epoch": 0.3311859312930534, "grad_norm": 0.4151776134967804, "learning_rate": 9.996708791404367e-06, "loss": 0.0089, "step": 11130 }, { "epoch": 0.331483492776695, "grad_norm": 0.3396027386188507, "learning_rate": 9.996645689474394e-06, "loss": 0.0136, "step": 11140 }, { "epoch": 0.33178105426033655, "grad_norm": 0.3764437437057495, "learning_rate": 9.996581988555158e-06, "loss": 0.017, "step": 11150 }, { "epoch": 0.3320786157439781, "grad_norm": 0.5085279941558838, "learning_rate": 9.996517688654296e-06, "loss": 0.0126, "step": 11160 }, { "epoch": 0.33237617722761964, "grad_norm": 0.70924973487854, "learning_rate": 9.99645278977952e-06, "loss": 0.0133, "step": 11170 }, { "epoch": 0.3326737387112612, "grad_norm": 0.6383556723594666, "learning_rate": 9.996387291938609e-06, "loss": 0.0096, "step": 11180 }, { "epoch": 0.3329713001949028, "grad_norm": 0.5856186747550964, "learning_rate": 9.996321195139412e-06, "loss": 0.0131, "step": 11190 }, { "epoch": 0.3332688616785443, "grad_norm": 0.8727948665618896, "learning_rate": 9.996254499389852e-06, "loss": 0.0094, "step": 11200 }, { "epoch": 0.3335664231621859, "grad_norm": 0.5351476073265076, "learning_rate": 9.99618720469793e-06, "loss": 0.0156, "step": 11210 }, { "epoch": 0.33386398464582745, "grad_norm": 0.3324931263923645, "learning_rate": 9.996119311071707e-06, "loss": 0.0123, "step": 11220 }, { "epoch": 0.334161546129469, "grad_norm": 0.7265337705612183, "learning_rate": 9.996050818519325e-06, "loss": 0.0113, "step": 11230 }, { "epoch": 0.33445910761311054, "grad_norm": 0.3643503785133362, "learning_rate": 9.995981727048996e-06, "loss": 0.0133, "step": 11240 }, { "epoch": 0.3347566690967521, "grad_norm": 0.5390623807907104, "learning_rate": 9.995912036668998e-06, "loss": 0.0187, "step": 11250 }, { "epoch": 0.3350542305803937, "grad_norm": 0.4956072270870209, "learning_rate": 9.99584174738769e-06, "loss": 0.0085, "step": 11260 }, { "epoch": 0.33535179206403526, "grad_norm": 0.4313375651836395, "learning_rate": 9.995770859213498e-06, "loss": 0.0214, "step": 11270 }, { "epoch": 0.3356493535476768, "grad_norm": 0.21204957365989685, "learning_rate": 9.995699372154915e-06, "loss": 0.0119, "step": 11280 }, { "epoch": 0.33594691503131835, "grad_norm": 0.6571580171585083, "learning_rate": 9.995627286220517e-06, "loss": 0.0154, "step": 11290 }, { "epoch": 0.3362444765149599, "grad_norm": 0.5846821069717407, "learning_rate": 9.995554601418941e-06, "loss": 0.0142, "step": 11300 }, { "epoch": 0.33654203799860144, "grad_norm": 0.5606188774108887, "learning_rate": 9.995481317758904e-06, "loss": 0.0171, "step": 11310 }, { "epoch": 0.336839599482243, "grad_norm": 1.1015520095825195, "learning_rate": 9.995407435249188e-06, "loss": 0.0148, "step": 11320 }, { "epoch": 0.3371371609658846, "grad_norm": 0.4062689244747162, "learning_rate": 9.995332953898649e-06, "loss": 0.0093, "step": 11330 }, { "epoch": 0.33743472244952616, "grad_norm": 0.5725558400154114, "learning_rate": 9.99525787371622e-06, "loss": 0.0115, "step": 11340 }, { "epoch": 0.3377322839331677, "grad_norm": 0.6322987079620361, "learning_rate": 9.995182194710897e-06, "loss": 0.0128, "step": 11350 }, { "epoch": 0.33802984541680925, "grad_norm": 0.34101173281669617, "learning_rate": 9.995105916891758e-06, "loss": 0.0131, "step": 11360 }, { "epoch": 0.3383274069004508, "grad_norm": 0.2764335870742798, "learning_rate": 9.99502904026794e-06, "loss": 0.0078, "step": 11370 }, { "epoch": 0.33862496838409234, "grad_norm": 0.3158474564552307, "learning_rate": 9.994951564848665e-06, "loss": 0.0205, "step": 11380 }, { "epoch": 0.3389225298677339, "grad_norm": 0.2657250463962555, "learning_rate": 9.994873490643217e-06, "loss": 0.0097, "step": 11390 }, { "epoch": 0.3392200913513755, "grad_norm": 0.40052691102027893, "learning_rate": 9.994794817660955e-06, "loss": 0.0073, "step": 11400 }, { "epoch": 0.33951765283501706, "grad_norm": 0.474507600069046, "learning_rate": 9.994715545911314e-06, "loss": 0.0157, "step": 11410 }, { "epoch": 0.3398152143186586, "grad_norm": 0.4408416450023651, "learning_rate": 9.994635675403792e-06, "loss": 0.0141, "step": 11420 }, { "epoch": 0.34011277580230015, "grad_norm": 0.41192421317100525, "learning_rate": 9.99455520614797e-06, "loss": 0.0118, "step": 11430 }, { "epoch": 0.3404103372859417, "grad_norm": 0.6706546545028687, "learning_rate": 9.994474138153487e-06, "loss": 0.0157, "step": 11440 }, { "epoch": 0.34070789876958324, "grad_norm": 0.43673962354660034, "learning_rate": 9.994392471430066e-06, "loss": 0.0096, "step": 11450 }, { "epoch": 0.3410054602532248, "grad_norm": 0.7992612719535828, "learning_rate": 9.994310205987497e-06, "loss": 0.012, "step": 11460 }, { "epoch": 0.3413030217368664, "grad_norm": 0.40185314416885376, "learning_rate": 9.99422734183564e-06, "loss": 0.0107, "step": 11470 }, { "epoch": 0.34160058322050796, "grad_norm": 0.3502514958381653, "learning_rate": 9.994143878984432e-06, "loss": 0.0151, "step": 11480 }, { "epoch": 0.3418981447041495, "grad_norm": 0.5338907837867737, "learning_rate": 9.994059817443873e-06, "loss": 0.0179, "step": 11490 }, { "epoch": 0.34219570618779105, "grad_norm": 0.46463507413864136, "learning_rate": 9.993975157224044e-06, "loss": 0.013, "step": 11500 }, { "epoch": 0.3424932676714326, "grad_norm": 0.7446004748344421, "learning_rate": 9.993889898335094e-06, "loss": 0.016, "step": 11510 }, { "epoch": 0.3427908291550742, "grad_norm": 0.4517410695552826, "learning_rate": 9.993804040787242e-06, "loss": 0.0162, "step": 11520 }, { "epoch": 0.3430883906387157, "grad_norm": 0.24098555743694305, "learning_rate": 9.99371758459078e-06, "loss": 0.0128, "step": 11530 }, { "epoch": 0.3433859521223573, "grad_norm": 0.1830569952726364, "learning_rate": 9.993630529756075e-06, "loss": 0.0149, "step": 11540 }, { "epoch": 0.34368351360599886, "grad_norm": 0.2962551712989807, "learning_rate": 9.993542876293563e-06, "loss": 0.0117, "step": 11550 }, { "epoch": 0.3439810750896404, "grad_norm": 0.48381999135017395, "learning_rate": 9.993454624213747e-06, "loss": 0.0092, "step": 11560 }, { "epoch": 0.34427863657328195, "grad_norm": 0.47515973448753357, "learning_rate": 9.99336577352721e-06, "loss": 0.0113, "step": 11570 }, { "epoch": 0.3445761980569235, "grad_norm": 0.20793002843856812, "learning_rate": 9.993276324244606e-06, "loss": 0.015, "step": 11580 }, { "epoch": 0.3448737595405651, "grad_norm": 0.5355597734451294, "learning_rate": 9.993186276376652e-06, "loss": 0.0097, "step": 11590 }, { "epoch": 0.3451713210242066, "grad_norm": 0.6109140515327454, "learning_rate": 9.993095629934145e-06, "loss": 0.0088, "step": 11600 }, { "epoch": 0.3454688825078482, "grad_norm": 0.2953310012817383, "learning_rate": 9.993004384927953e-06, "loss": 0.0137, "step": 11610 }, { "epoch": 0.34576644399148976, "grad_norm": 0.338090717792511, "learning_rate": 9.992912541369016e-06, "loss": 0.0133, "step": 11620 }, { "epoch": 0.3460640054751313, "grad_norm": 0.41956499218940735, "learning_rate": 9.99282009926834e-06, "loss": 0.008, "step": 11630 }, { "epoch": 0.34636156695877285, "grad_norm": 0.20898035168647766, "learning_rate": 9.992727058637007e-06, "loss": 0.0084, "step": 11640 }, { "epoch": 0.3466591284424144, "grad_norm": 0.3613472878932953, "learning_rate": 9.992633419486172e-06, "loss": 0.0129, "step": 11650 }, { "epoch": 0.346956689926056, "grad_norm": 0.3804510533809662, "learning_rate": 9.992539181827062e-06, "loss": 0.0115, "step": 11660 }, { "epoch": 0.3472542514096975, "grad_norm": 0.49209150671958923, "learning_rate": 9.992444345670973e-06, "loss": 0.0113, "step": 11670 }, { "epoch": 0.3475518128933391, "grad_norm": 1.100643515586853, "learning_rate": 9.99234891102927e-06, "loss": 0.0211, "step": 11680 }, { "epoch": 0.34784937437698066, "grad_norm": 0.49691182374954224, "learning_rate": 9.992252877913398e-06, "loss": 0.0117, "step": 11690 }, { "epoch": 0.3481469358606222, "grad_norm": 0.3078761398792267, "learning_rate": 9.992156246334869e-06, "loss": 0.01, "step": 11700 }, { "epoch": 0.34844449734426375, "grad_norm": 0.7168204188346863, "learning_rate": 9.992059016305264e-06, "loss": 0.0126, "step": 11710 }, { "epoch": 0.3487420588279053, "grad_norm": 0.43094345927238464, "learning_rate": 9.991961187836243e-06, "loss": 0.0126, "step": 11720 }, { "epoch": 0.3490396203115469, "grad_norm": 0.9009734988212585, "learning_rate": 9.99186276093953e-06, "loss": 0.0156, "step": 11730 }, { "epoch": 0.3493371817951884, "grad_norm": 0.23515698313713074, "learning_rate": 9.991763735626926e-06, "loss": 0.0102, "step": 11740 }, { "epoch": 0.34963474327883, "grad_norm": 0.2523135542869568, "learning_rate": 9.9916641119103e-06, "loss": 0.0135, "step": 11750 }, { "epoch": 0.34993230476247156, "grad_norm": 0.36473217606544495, "learning_rate": 9.991563889801597e-06, "loss": 0.0073, "step": 11760 }, { "epoch": 0.35022986624611313, "grad_norm": 1.0699262619018555, "learning_rate": 9.991463069312833e-06, "loss": 0.0173, "step": 11770 }, { "epoch": 0.35052742772975465, "grad_norm": 0.35589417815208435, "learning_rate": 9.991361650456088e-06, "loss": 0.016, "step": 11780 }, { "epoch": 0.3508249892133962, "grad_norm": 0.3014181852340698, "learning_rate": 9.991259633243525e-06, "loss": 0.0127, "step": 11790 }, { "epoch": 0.3511225506970378, "grad_norm": 0.6355476379394531, "learning_rate": 9.991157017687374e-06, "loss": 0.0115, "step": 11800 }, { "epoch": 0.3514201121806793, "grad_norm": 0.32808592915534973, "learning_rate": 9.991053803799933e-06, "loss": 0.0088, "step": 11810 }, { "epoch": 0.3517176736643209, "grad_norm": 0.30072587728500366, "learning_rate": 9.990949991593577e-06, "loss": 0.0156, "step": 11820 }, { "epoch": 0.35201523514796246, "grad_norm": 0.41168013215065, "learning_rate": 9.99084558108075e-06, "loss": 0.0125, "step": 11830 }, { "epoch": 0.35231279663160403, "grad_norm": 0.535986602306366, "learning_rate": 9.990740572273972e-06, "loss": 0.012, "step": 11840 }, { "epoch": 0.35261035811524555, "grad_norm": 0.5746467113494873, "learning_rate": 9.990634965185826e-06, "loss": 0.0147, "step": 11850 }, { "epoch": 0.3529079195988871, "grad_norm": 0.4167800545692444, "learning_rate": 9.990528759828974e-06, "loss": 0.014, "step": 11860 }, { "epoch": 0.3532054810825287, "grad_norm": 0.4015708267688751, "learning_rate": 9.990421956216149e-06, "loss": 0.0104, "step": 11870 }, { "epoch": 0.3535030425661702, "grad_norm": 0.5243723392486572, "learning_rate": 9.990314554360155e-06, "loss": 0.0122, "step": 11880 }, { "epoch": 0.3538006040498118, "grad_norm": 0.4575636684894562, "learning_rate": 9.990206554273864e-06, "loss": 0.009, "step": 11890 }, { "epoch": 0.35409816553345336, "grad_norm": 0.479749858379364, "learning_rate": 9.990097955970227e-06, "loss": 0.0135, "step": 11900 }, { "epoch": 0.35439572701709493, "grad_norm": 0.611494243144989, "learning_rate": 9.989988759462258e-06, "loss": 0.0139, "step": 11910 }, { "epoch": 0.35469328850073645, "grad_norm": 0.33073821663856506, "learning_rate": 9.989878964763048e-06, "loss": 0.0193, "step": 11920 }, { "epoch": 0.354990849984378, "grad_norm": 0.6168866157531738, "learning_rate": 9.989768571885764e-06, "loss": 0.0128, "step": 11930 }, { "epoch": 0.3552884114680196, "grad_norm": 0.26065129041671753, "learning_rate": 9.989657580843634e-06, "loss": 0.0109, "step": 11940 }, { "epoch": 0.3555859729516611, "grad_norm": 0.4767727553844452, "learning_rate": 9.989545991649967e-06, "loss": 0.0135, "step": 11950 }, { "epoch": 0.3558835344353027, "grad_norm": 0.3118370473384857, "learning_rate": 9.98943380431814e-06, "loss": 0.0147, "step": 11960 }, { "epoch": 0.35618109591894426, "grad_norm": 0.46178770065307617, "learning_rate": 9.989321018861598e-06, "loss": 0.0114, "step": 11970 }, { "epoch": 0.35647865740258583, "grad_norm": 0.47323542833328247, "learning_rate": 9.989207635293865e-06, "loss": 0.0096, "step": 11980 }, { "epoch": 0.35677621888622735, "grad_norm": 0.62230384349823, "learning_rate": 9.989093653628532e-06, "loss": 0.0163, "step": 11990 }, { "epoch": 0.3570737803698689, "grad_norm": 0.1572709083557129, "learning_rate": 9.988979073879264e-06, "loss": 0.0097, "step": 12000 }, { "epoch": 0.3573713418535105, "grad_norm": 0.2843494117259979, "learning_rate": 9.988863896059794e-06, "loss": 0.0114, "step": 12010 }, { "epoch": 0.35766890333715207, "grad_norm": 0.7474396824836731, "learning_rate": 9.988748120183934e-06, "loss": 0.0265, "step": 12020 }, { "epoch": 0.3579664648207936, "grad_norm": 0.5318084955215454, "learning_rate": 9.988631746265559e-06, "loss": 0.0102, "step": 12030 }, { "epoch": 0.35826402630443516, "grad_norm": 1.1838723421096802, "learning_rate": 9.98851477431862e-06, "loss": 0.0217, "step": 12040 }, { "epoch": 0.35856158778807673, "grad_norm": 0.47920531034469604, "learning_rate": 9.988397204357143e-06, "loss": 0.0165, "step": 12050 }, { "epoch": 0.35885914927171825, "grad_norm": 0.7636427879333496, "learning_rate": 9.988279036395217e-06, "loss": 0.0131, "step": 12060 }, { "epoch": 0.3591567107553598, "grad_norm": 0.490215539932251, "learning_rate": 9.988160270447011e-06, "loss": 0.0095, "step": 12070 }, { "epoch": 0.3594542722390014, "grad_norm": 0.6264039874076843, "learning_rate": 9.988040906526762e-06, "loss": 0.0132, "step": 12080 }, { "epoch": 0.35975183372264297, "grad_norm": 0.694599449634552, "learning_rate": 9.987920944648778e-06, "loss": 0.0141, "step": 12090 }, { "epoch": 0.3600493952062845, "grad_norm": 0.3815903067588806, "learning_rate": 9.98780038482744e-06, "loss": 0.0107, "step": 12100 }, { "epoch": 0.36034695668992606, "grad_norm": 0.1776367723941803, "learning_rate": 9.987679227077202e-06, "loss": 0.0099, "step": 12110 }, { "epoch": 0.36064451817356763, "grad_norm": 0.38806667923927307, "learning_rate": 9.987557471412587e-06, "loss": 0.0124, "step": 12120 }, { "epoch": 0.36094207965720915, "grad_norm": 0.34218117594718933, "learning_rate": 9.987435117848191e-06, "loss": 0.0118, "step": 12130 }, { "epoch": 0.3612396411408507, "grad_norm": 0.3631381094455719, "learning_rate": 9.987312166398682e-06, "loss": 0.0078, "step": 12140 }, { "epoch": 0.3615372026244923, "grad_norm": 0.6122136116027832, "learning_rate": 9.9871886170788e-06, "loss": 0.0104, "step": 12150 }, { "epoch": 0.36183476410813387, "grad_norm": 0.6244113445281982, "learning_rate": 9.987064469903353e-06, "loss": 0.0132, "step": 12160 }, { "epoch": 0.3621323255917754, "grad_norm": 0.22494854032993317, "learning_rate": 9.986939724887226e-06, "loss": 0.0099, "step": 12170 }, { "epoch": 0.36242988707541696, "grad_norm": 0.6752346754074097, "learning_rate": 9.986814382045372e-06, "loss": 0.0117, "step": 12180 }, { "epoch": 0.36272744855905853, "grad_norm": 0.41532403230667114, "learning_rate": 9.986688441392817e-06, "loss": 0.0149, "step": 12190 }, { "epoch": 0.36302501004270005, "grad_norm": 0.5930137038230896, "learning_rate": 9.98656190294466e-06, "loss": 0.0175, "step": 12200 }, { "epoch": 0.3633225715263416, "grad_norm": 0.6785852313041687, "learning_rate": 9.986434766716068e-06, "loss": 0.0166, "step": 12210 }, { "epoch": 0.3636201330099832, "grad_norm": 0.35947275161743164, "learning_rate": 9.986307032722286e-06, "loss": 0.0108, "step": 12220 }, { "epoch": 0.36391769449362477, "grad_norm": 0.48669207096099854, "learning_rate": 9.986178700978622e-06, "loss": 0.0082, "step": 12230 }, { "epoch": 0.3642152559772663, "grad_norm": 0.26437830924987793, "learning_rate": 9.98604977150046e-06, "loss": 0.0134, "step": 12240 }, { "epoch": 0.36451281746090786, "grad_norm": 0.6414132714271545, "learning_rate": 9.985920244303259e-06, "loss": 0.0139, "step": 12250 }, { "epoch": 0.36481037894454943, "grad_norm": 0.5688115358352661, "learning_rate": 9.985790119402546e-06, "loss": 0.0175, "step": 12260 }, { "epoch": 0.365107940428191, "grad_norm": 0.5013750195503235, "learning_rate": 9.985659396813917e-06, "loss": 0.0101, "step": 12270 }, { "epoch": 0.3654055019118325, "grad_norm": 0.5017697215080261, "learning_rate": 9.985528076553047e-06, "loss": 0.0122, "step": 12280 }, { "epoch": 0.3657030633954741, "grad_norm": 0.5444527864456177, "learning_rate": 9.985396158635674e-06, "loss": 0.0117, "step": 12290 }, { "epoch": 0.36600062487911567, "grad_norm": 0.233464777469635, "learning_rate": 9.985263643077618e-06, "loss": 0.0084, "step": 12300 }, { "epoch": 0.3662981863627572, "grad_norm": 0.3524467945098877, "learning_rate": 9.98513052989476e-06, "loss": 0.0191, "step": 12310 }, { "epoch": 0.36659574784639876, "grad_norm": 0.49815380573272705, "learning_rate": 9.984996819103058e-06, "loss": 0.0161, "step": 12320 }, { "epoch": 0.36689330933004033, "grad_norm": 0.21510310471057892, "learning_rate": 9.984862510718542e-06, "loss": 0.0098, "step": 12330 }, { "epoch": 0.3671908708136819, "grad_norm": 0.36183810234069824, "learning_rate": 9.984727604757311e-06, "loss": 0.011, "step": 12340 }, { "epoch": 0.3674884322973234, "grad_norm": 0.6122657060623169, "learning_rate": 9.984592101235542e-06, "loss": 0.0142, "step": 12350 }, { "epoch": 0.367785993780965, "grad_norm": 0.2843831181526184, "learning_rate": 9.984456000169473e-06, "loss": 0.0114, "step": 12360 }, { "epoch": 0.36808355526460657, "grad_norm": 0.8635808825492859, "learning_rate": 9.984319301575422e-06, "loss": 0.0133, "step": 12370 }, { "epoch": 0.3683811167482481, "grad_norm": 0.18546463549137115, "learning_rate": 9.984182005469779e-06, "loss": 0.0131, "step": 12380 }, { "epoch": 0.36867867823188966, "grad_norm": 0.4241634011268616, "learning_rate": 9.984044111868998e-06, "loss": 0.0125, "step": 12390 }, { "epoch": 0.36897623971553123, "grad_norm": 0.4608052968978882, "learning_rate": 9.983905620789612e-06, "loss": 0.0147, "step": 12400 }, { "epoch": 0.3692738011991728, "grad_norm": 0.5145608186721802, "learning_rate": 9.983766532248222e-06, "loss": 0.0111, "step": 12410 }, { "epoch": 0.3695713626828143, "grad_norm": 0.34598031640052795, "learning_rate": 9.983626846261503e-06, "loss": 0.0141, "step": 12420 }, { "epoch": 0.3698689241664559, "grad_norm": 0.27326539158821106, "learning_rate": 9.9834865628462e-06, "loss": 0.0132, "step": 12430 }, { "epoch": 0.37016648565009747, "grad_norm": 0.3918251395225525, "learning_rate": 9.98334568201913e-06, "loss": 0.0143, "step": 12440 }, { "epoch": 0.370464047133739, "grad_norm": 0.3389540910720825, "learning_rate": 9.98320420379718e-06, "loss": 0.0083, "step": 12450 }, { "epoch": 0.37076160861738056, "grad_norm": 0.34124046564102173, "learning_rate": 9.983062128197312e-06, "loss": 0.0086, "step": 12460 }, { "epoch": 0.37105917010102213, "grad_norm": 0.4714783728122711, "learning_rate": 9.982919455236557e-06, "loss": 0.0111, "step": 12470 }, { "epoch": 0.3713567315846637, "grad_norm": 0.44073882699012756, "learning_rate": 9.982776184932019e-06, "loss": 0.0154, "step": 12480 }, { "epoch": 0.3716542930683052, "grad_norm": 0.374673992395401, "learning_rate": 9.98263231730087e-06, "loss": 0.0178, "step": 12490 }, { "epoch": 0.3719518545519468, "grad_norm": 0.46899110078811646, "learning_rate": 9.982487852360364e-06, "loss": 0.0127, "step": 12500 }, { "epoch": 0.37224941603558837, "grad_norm": 0.4121083617210388, "learning_rate": 9.982342790127811e-06, "loss": 0.0097, "step": 12510 }, { "epoch": 0.37254697751922994, "grad_norm": 0.4429507255554199, "learning_rate": 9.982197130620603e-06, "loss": 0.0124, "step": 12520 }, { "epoch": 0.37284453900287146, "grad_norm": 0.4938521981239319, "learning_rate": 9.982050873856204e-06, "loss": 0.0126, "step": 12530 }, { "epoch": 0.37314210048651303, "grad_norm": 0.2996240258216858, "learning_rate": 9.981904019852146e-06, "loss": 0.0146, "step": 12540 }, { "epoch": 0.3734396619701546, "grad_norm": 0.28637632727622986, "learning_rate": 9.981756568626033e-06, "loss": 0.0142, "step": 12550 }, { "epoch": 0.3737372234537961, "grad_norm": 0.37545308470726013, "learning_rate": 9.981608520195543e-06, "loss": 0.0154, "step": 12560 }, { "epoch": 0.3740347849374377, "grad_norm": 0.4834034740924835, "learning_rate": 9.981459874578418e-06, "loss": 0.0124, "step": 12570 }, { "epoch": 0.37433234642107927, "grad_norm": 0.2664906978607178, "learning_rate": 9.981310631792485e-06, "loss": 0.0122, "step": 12580 }, { "epoch": 0.37462990790472084, "grad_norm": 0.5832645297050476, "learning_rate": 9.981160791855631e-06, "loss": 0.0131, "step": 12590 }, { "epoch": 0.37492746938836236, "grad_norm": 0.6332595348358154, "learning_rate": 9.981010354785819e-06, "loss": 0.0097, "step": 12600 }, { "epoch": 0.37522503087200393, "grad_norm": 0.48299506306648254, "learning_rate": 9.980859320601082e-06, "loss": 0.012, "step": 12610 }, { "epoch": 0.3755225923556455, "grad_norm": 0.3595734238624573, "learning_rate": 9.980707689319526e-06, "loss": 0.011, "step": 12620 }, { "epoch": 0.375820153839287, "grad_norm": 0.3304906189441681, "learning_rate": 9.980555460959332e-06, "loss": 0.0111, "step": 12630 }, { "epoch": 0.3761177153229286, "grad_norm": 0.3843144178390503, "learning_rate": 9.980402635538745e-06, "loss": 0.0106, "step": 12640 }, { "epoch": 0.37641527680657016, "grad_norm": 0.45222556591033936, "learning_rate": 9.980249213076085e-06, "loss": 0.0116, "step": 12650 }, { "epoch": 0.37671283829021174, "grad_norm": 0.8293495178222656, "learning_rate": 9.980095193589747e-06, "loss": 0.0157, "step": 12660 }, { "epoch": 0.37701039977385326, "grad_norm": 0.19565744698047638, "learning_rate": 9.979940577098192e-06, "loss": 0.0114, "step": 12670 }, { "epoch": 0.37730796125749483, "grad_norm": 0.3955139219760895, "learning_rate": 9.979785363619958e-06, "loss": 0.0085, "step": 12680 }, { "epoch": 0.3776055227411364, "grad_norm": 0.5225045084953308, "learning_rate": 9.97962955317365e-06, "loss": 0.0123, "step": 12690 }, { "epoch": 0.3779030842247779, "grad_norm": 0.4004742503166199, "learning_rate": 9.979473145777944e-06, "loss": 0.01, "step": 12700 }, { "epoch": 0.3782006457084195, "grad_norm": 0.3854118883609772, "learning_rate": 9.979316141451593e-06, "loss": 0.0143, "step": 12710 }, { "epoch": 0.37849820719206106, "grad_norm": 0.6673621535301208, "learning_rate": 9.97915854021342e-06, "loss": 0.0112, "step": 12720 }, { "epoch": 0.37879576867570264, "grad_norm": 0.5920084714889526, "learning_rate": 9.979000342082313e-06, "loss": 0.0138, "step": 12730 }, { "epoch": 0.37909333015934416, "grad_norm": 0.25866565108299255, "learning_rate": 9.978841547077241e-06, "loss": 0.0116, "step": 12740 }, { "epoch": 0.37939089164298573, "grad_norm": 0.3353945016860962, "learning_rate": 9.978682155217236e-06, "loss": 0.0136, "step": 12750 }, { "epoch": 0.3796884531266273, "grad_norm": 0.7299795150756836, "learning_rate": 9.978522166521408e-06, "loss": 0.0119, "step": 12760 }, { "epoch": 0.3799860146102689, "grad_norm": 0.48574379086494446, "learning_rate": 9.978361581008937e-06, "loss": 0.0144, "step": 12770 }, { "epoch": 0.3802835760939104, "grad_norm": 0.2015678435564041, "learning_rate": 9.978200398699071e-06, "loss": 0.0108, "step": 12780 }, { "epoch": 0.38058113757755196, "grad_norm": 0.2725028693675995, "learning_rate": 9.978038619611136e-06, "loss": 0.0202, "step": 12790 }, { "epoch": 0.38087869906119354, "grad_norm": 0.4275057315826416, "learning_rate": 9.977876243764521e-06, "loss": 0.0134, "step": 12800 }, { "epoch": 0.38117626054483505, "grad_norm": 0.3948391079902649, "learning_rate": 9.977713271178697e-06, "loss": 0.017, "step": 12810 }, { "epoch": 0.38147382202847663, "grad_norm": 0.4363970160484314, "learning_rate": 9.977549701873196e-06, "loss": 0.012, "step": 12820 }, { "epoch": 0.3817713835121182, "grad_norm": 0.39341139793395996, "learning_rate": 9.97738553586763e-06, "loss": 0.0156, "step": 12830 }, { "epoch": 0.3820689449957598, "grad_norm": 0.41050955653190613, "learning_rate": 9.977220773181676e-06, "loss": 0.0085, "step": 12840 }, { "epoch": 0.3823665064794013, "grad_norm": 0.19403250515460968, "learning_rate": 9.977055413835087e-06, "loss": 0.0104, "step": 12850 }, { "epoch": 0.38266406796304286, "grad_norm": 0.36044877767562866, "learning_rate": 9.976889457847684e-06, "loss": 0.0115, "step": 12860 }, { "epoch": 0.38296162944668444, "grad_norm": 0.37257763743400574, "learning_rate": 9.976722905239365e-06, "loss": 0.0085, "step": 12870 }, { "epoch": 0.38325919093032595, "grad_norm": 0.4866178035736084, "learning_rate": 9.976555756030093e-06, "loss": 0.0139, "step": 12880 }, { "epoch": 0.3835567524139675, "grad_norm": 0.1941790133714676, "learning_rate": 9.976388010239909e-06, "loss": 0.0088, "step": 12890 }, { "epoch": 0.3838543138976091, "grad_norm": 0.3777347505092621, "learning_rate": 9.976219667888916e-06, "loss": 0.011, "step": 12900 }, { "epoch": 0.3841518753812507, "grad_norm": 0.5330497026443481, "learning_rate": 9.976050728997303e-06, "loss": 0.0099, "step": 12910 }, { "epoch": 0.3844494368648922, "grad_norm": 0.9791615605354309, "learning_rate": 9.975881193585313e-06, "loss": 0.0138, "step": 12920 }, { "epoch": 0.38474699834853376, "grad_norm": 4.961268901824951, "learning_rate": 9.975711061673277e-06, "loss": 0.0126, "step": 12930 }, { "epoch": 0.38504455983217534, "grad_norm": 0.33928292989730835, "learning_rate": 9.975540333281587e-06, "loss": 0.0137, "step": 12940 }, { "epoch": 0.38534212131581685, "grad_norm": 0.43563738465309143, "learning_rate": 9.97536900843071e-06, "loss": 0.0117, "step": 12950 }, { "epoch": 0.3856396827994584, "grad_norm": 0.93022620677948, "learning_rate": 9.975197087141183e-06, "loss": 0.015, "step": 12960 }, { "epoch": 0.3859372442831, "grad_norm": 0.3089967668056488, "learning_rate": 9.975024569433617e-06, "loss": 0.0118, "step": 12970 }, { "epoch": 0.3862348057667416, "grad_norm": 0.3245333135128021, "learning_rate": 9.974851455328694e-06, "loss": 0.0111, "step": 12980 }, { "epoch": 0.3865323672503831, "grad_norm": 0.7858638167381287, "learning_rate": 9.974677744847165e-06, "loss": 0.0129, "step": 12990 }, { "epoch": 0.38682992873402466, "grad_norm": 0.4754137098789215, "learning_rate": 9.974503438009853e-06, "loss": 0.0133, "step": 13000 }, { "epoch": 0.38712749021766624, "grad_norm": 0.20670895278453827, "learning_rate": 9.974328534837657e-06, "loss": 0.011, "step": 13010 }, { "epoch": 0.3874250517013078, "grad_norm": 0.5455492734909058, "learning_rate": 9.974153035351542e-06, "loss": 0.0112, "step": 13020 }, { "epoch": 0.3877226131849493, "grad_norm": 0.33004260063171387, "learning_rate": 9.973976939572546e-06, "loss": 0.0097, "step": 13030 }, { "epoch": 0.3880201746685909, "grad_norm": 0.23980186879634857, "learning_rate": 9.97380024752178e-06, "loss": 0.0093, "step": 13040 }, { "epoch": 0.3883177361522325, "grad_norm": 0.21281233429908752, "learning_rate": 9.973622959220426e-06, "loss": 0.0102, "step": 13050 }, { "epoch": 0.388615297635874, "grad_norm": 0.2666653096675873, "learning_rate": 9.973445074689735e-06, "loss": 0.017, "step": 13060 }, { "epoch": 0.38891285911951556, "grad_norm": 0.6252766847610474, "learning_rate": 9.973266593951034e-06, "loss": 0.0183, "step": 13070 }, { "epoch": 0.38921042060315714, "grad_norm": 0.2799360752105713, "learning_rate": 9.973087517025718e-06, "loss": 0.0081, "step": 13080 }, { "epoch": 0.3895079820867987, "grad_norm": 0.5319117903709412, "learning_rate": 9.972907843935253e-06, "loss": 0.0125, "step": 13090 }, { "epoch": 0.3898055435704402, "grad_norm": 0.5421821475028992, "learning_rate": 9.972727574701181e-06, "loss": 0.0145, "step": 13100 }, { "epoch": 0.3901031050540818, "grad_norm": 0.5130824446678162, "learning_rate": 9.97254670934511e-06, "loss": 0.016, "step": 13110 }, { "epoch": 0.3904006665377234, "grad_norm": 0.37458181381225586, "learning_rate": 9.972365247888722e-06, "loss": 0.0146, "step": 13120 }, { "epoch": 0.3906982280213649, "grad_norm": 0.35058003664016724, "learning_rate": 9.97218319035377e-06, "loss": 0.0149, "step": 13130 }, { "epoch": 0.39099578950500646, "grad_norm": 0.5475626587867737, "learning_rate": 9.97200053676208e-06, "loss": 0.011, "step": 13140 }, { "epoch": 0.39129335098864804, "grad_norm": 0.21761499345302582, "learning_rate": 9.97181728713555e-06, "loss": 0.0097, "step": 13150 }, { "epoch": 0.3915909124722896, "grad_norm": 0.2041264921426773, "learning_rate": 9.971633441496142e-06, "loss": 0.0125, "step": 13160 }, { "epoch": 0.3918884739559311, "grad_norm": 0.5626574158668518, "learning_rate": 9.9714489998659e-06, "loss": 0.0132, "step": 13170 }, { "epoch": 0.3921860354395727, "grad_norm": 0.37816014885902405, "learning_rate": 9.971263962266932e-06, "loss": 0.0136, "step": 13180 }, { "epoch": 0.3924835969232143, "grad_norm": 0.4812019467353821, "learning_rate": 9.97107832872142e-06, "loss": 0.013, "step": 13190 }, { "epoch": 0.3927811584068558, "grad_norm": 0.4358269274234772, "learning_rate": 9.97089209925162e-06, "loss": 0.0103, "step": 13200 }, { "epoch": 0.39307871989049736, "grad_norm": 0.4463041126728058, "learning_rate": 9.970705273879855e-06, "loss": 0.009, "step": 13210 }, { "epoch": 0.39337628137413894, "grad_norm": 0.4813143014907837, "learning_rate": 9.970517852628522e-06, "loss": 0.0138, "step": 13220 }, { "epoch": 0.3936738428577805, "grad_norm": 0.2690597176551819, "learning_rate": 9.970329835520087e-06, "loss": 0.0078, "step": 13230 }, { "epoch": 0.393971404341422, "grad_norm": 0.559133768081665, "learning_rate": 9.97014122257709e-06, "loss": 0.0068, "step": 13240 }, { "epoch": 0.3942689658250636, "grad_norm": 0.5000692009925842, "learning_rate": 9.969952013822144e-06, "loss": 0.0108, "step": 13250 }, { "epoch": 0.3945665273087052, "grad_norm": 0.5271328091621399, "learning_rate": 9.969762209277926e-06, "loss": 0.0128, "step": 13260 }, { "epoch": 0.39486408879234675, "grad_norm": 0.4196057915687561, "learning_rate": 9.969571808967196e-06, "loss": 0.0138, "step": 13270 }, { "epoch": 0.39516165027598826, "grad_norm": 0.22591988742351532, "learning_rate": 9.969380812912773e-06, "loss": 0.0121, "step": 13280 }, { "epoch": 0.39545921175962984, "grad_norm": 0.41021302342414856, "learning_rate": 9.969189221137556e-06, "loss": 0.0095, "step": 13290 }, { "epoch": 0.3957567732432714, "grad_norm": 0.372424840927124, "learning_rate": 9.968997033664514e-06, "loss": 0.0078, "step": 13300 }, { "epoch": 0.3960543347269129, "grad_norm": 0.17642627656459808, "learning_rate": 9.968804250516682e-06, "loss": 0.0115, "step": 13310 }, { "epoch": 0.3963518962105545, "grad_norm": 0.36479422450065613, "learning_rate": 9.968610871717173e-06, "loss": 0.0093, "step": 13320 }, { "epoch": 0.3966494576941961, "grad_norm": 0.4988594949245453, "learning_rate": 9.96841689728917e-06, "loss": 0.0102, "step": 13330 }, { "epoch": 0.39694701917783765, "grad_norm": 0.4892342686653137, "learning_rate": 9.968222327255925e-06, "loss": 0.0164, "step": 13340 }, { "epoch": 0.39724458066147916, "grad_norm": 0.5367768406867981, "learning_rate": 9.968027161640762e-06, "loss": 0.0106, "step": 13350 }, { "epoch": 0.39754214214512074, "grad_norm": 0.4813811182975769, "learning_rate": 9.967831400467079e-06, "loss": 0.0108, "step": 13360 }, { "epoch": 0.3978397036287623, "grad_norm": 0.1354091614484787, "learning_rate": 9.967635043758342e-06, "loss": 0.0086, "step": 13370 }, { "epoch": 0.3981372651124038, "grad_norm": 0.7071147561073303, "learning_rate": 9.96743809153809e-06, "loss": 0.0089, "step": 13380 }, { "epoch": 0.3984348265960454, "grad_norm": 0.2834177613258362, "learning_rate": 9.967240543829934e-06, "loss": 0.0096, "step": 13390 }, { "epoch": 0.398732388079687, "grad_norm": 0.26339107751846313, "learning_rate": 9.967042400657554e-06, "loss": 0.0096, "step": 13400 }, { "epoch": 0.39902994956332855, "grad_norm": 0.4985535740852356, "learning_rate": 9.966843662044707e-06, "loss": 0.0169, "step": 13410 }, { "epoch": 0.39932751104697006, "grad_norm": 0.5375456809997559, "learning_rate": 9.966644328015214e-06, "loss": 0.0132, "step": 13420 }, { "epoch": 0.39962507253061164, "grad_norm": 0.3979247212409973, "learning_rate": 9.96644439859297e-06, "loss": 0.01, "step": 13430 }, { "epoch": 0.3999226340142532, "grad_norm": 0.3325600326061249, "learning_rate": 9.966243873801947e-06, "loss": 0.0248, "step": 13440 }, { "epoch": 0.4002201954978947, "grad_norm": 0.283286452293396, "learning_rate": 9.966042753666178e-06, "loss": 0.0115, "step": 13450 }, { "epoch": 0.4005177569815363, "grad_norm": 0.6627172827720642, "learning_rate": 9.965841038209776e-06, "loss": 0.0123, "step": 13460 }, { "epoch": 0.40081531846517787, "grad_norm": 0.376687228679657, "learning_rate": 9.96563872745692e-06, "loss": 0.0129, "step": 13470 }, { "epoch": 0.40111287994881945, "grad_norm": 0.3295801281929016, "learning_rate": 9.965435821431866e-06, "loss": 0.0077, "step": 13480 }, { "epoch": 0.40141044143246096, "grad_norm": 0.4338816702365875, "learning_rate": 9.965232320158937e-06, "loss": 0.0129, "step": 13490 }, { "epoch": 0.40170800291610254, "grad_norm": 0.19286389648914337, "learning_rate": 9.965028223662528e-06, "loss": 0.0116, "step": 13500 }, { "epoch": 0.4020055643997441, "grad_norm": 0.6141868233680725, "learning_rate": 9.964823531967102e-06, "loss": 0.0153, "step": 13510 }, { "epoch": 0.4023031258833857, "grad_norm": 0.41887956857681274, "learning_rate": 9.964618245097203e-06, "loss": 0.0108, "step": 13520 }, { "epoch": 0.4026006873670272, "grad_norm": 0.4379631280899048, "learning_rate": 9.964412363077439e-06, "loss": 0.0146, "step": 13530 }, { "epoch": 0.40289824885066877, "grad_norm": 0.5842875838279724, "learning_rate": 9.96420588593249e-06, "loss": 0.0124, "step": 13540 }, { "epoch": 0.40319581033431035, "grad_norm": 0.35329705476760864, "learning_rate": 9.963998813687106e-06, "loss": 0.0112, "step": 13550 }, { "epoch": 0.40349337181795186, "grad_norm": 0.22449864447116852, "learning_rate": 9.963791146366112e-06, "loss": 0.0091, "step": 13560 }, { "epoch": 0.40379093330159344, "grad_norm": 0.3904822766780853, "learning_rate": 9.963582883994405e-06, "loss": 0.007, "step": 13570 }, { "epoch": 0.404088494785235, "grad_norm": 0.26295366883277893, "learning_rate": 9.963374026596949e-06, "loss": 0.0088, "step": 13580 }, { "epoch": 0.4043860562688766, "grad_norm": 0.29655900597572327, "learning_rate": 9.963164574198781e-06, "loss": 0.015, "step": 13590 }, { "epoch": 0.4046836177525181, "grad_norm": 0.43268924951553345, "learning_rate": 9.96295452682501e-06, "loss": 0.0103, "step": 13600 }, { "epoch": 0.40498117923615967, "grad_norm": 0.24168244004249573, "learning_rate": 9.962743884500819e-06, "loss": 0.009, "step": 13610 }, { "epoch": 0.40527874071980124, "grad_norm": 0.2710997760295868, "learning_rate": 9.962532647251456e-06, "loss": 0.0106, "step": 13620 }, { "epoch": 0.40557630220344276, "grad_norm": 0.4304717183113098, "learning_rate": 9.962320815102244e-06, "loss": 0.0108, "step": 13630 }, { "epoch": 0.40587386368708434, "grad_norm": 0.3261704444885254, "learning_rate": 9.962108388078578e-06, "loss": 0.0116, "step": 13640 }, { "epoch": 0.4061714251707259, "grad_norm": 0.3184322714805603, "learning_rate": 9.961895366205923e-06, "loss": 0.0141, "step": 13650 }, { "epoch": 0.4064689866543675, "grad_norm": 0.3814280927181244, "learning_rate": 9.961681749509817e-06, "loss": 0.0111, "step": 13660 }, { "epoch": 0.406766548138009, "grad_norm": 0.28751710057258606, "learning_rate": 9.961467538015867e-06, "loss": 0.0128, "step": 13670 }, { "epoch": 0.40706410962165057, "grad_norm": 2.2058088779449463, "learning_rate": 9.961252731749752e-06, "loss": 0.0116, "step": 13680 }, { "epoch": 0.40736167110529214, "grad_norm": 0.4542764127254486, "learning_rate": 9.961037330737222e-06, "loss": 0.0125, "step": 13690 }, { "epoch": 0.40765923258893366, "grad_norm": 0.627312421798706, "learning_rate": 9.960821335004101e-06, "loss": 0.0208, "step": 13700 }, { "epoch": 0.40795679407257524, "grad_norm": 0.8324105739593506, "learning_rate": 9.96060474457628e-06, "loss": 0.0139, "step": 13710 }, { "epoch": 0.4082543555562168, "grad_norm": 0.2818623483181, "learning_rate": 9.960387559479727e-06, "loss": 0.0122, "step": 13720 }, { "epoch": 0.4085519170398584, "grad_norm": 0.11617530882358551, "learning_rate": 9.960169779740474e-06, "loss": 0.0082, "step": 13730 }, { "epoch": 0.4088494785234999, "grad_norm": 0.2859037220478058, "learning_rate": 9.95995140538463e-06, "loss": 0.0184, "step": 13740 }, { "epoch": 0.40914704000714147, "grad_norm": 0.327248752117157, "learning_rate": 9.95973243643837e-06, "loss": 0.0107, "step": 13750 }, { "epoch": 0.40944460149078304, "grad_norm": 0.4484041929244995, "learning_rate": 9.959512872927949e-06, "loss": 0.0112, "step": 13760 }, { "epoch": 0.4097421629744246, "grad_norm": 0.1875973343849182, "learning_rate": 9.959292714879683e-06, "loss": 0.0126, "step": 13770 }, { "epoch": 0.41003972445806613, "grad_norm": 0.15609611570835114, "learning_rate": 9.959071962319969e-06, "loss": 0.0108, "step": 13780 }, { "epoch": 0.4103372859417077, "grad_norm": 0.45991894602775574, "learning_rate": 9.958850615275265e-06, "loss": 0.0101, "step": 13790 }, { "epoch": 0.4106348474253493, "grad_norm": 0.411790132522583, "learning_rate": 9.95862867377211e-06, "loss": 0.0105, "step": 13800 }, { "epoch": 0.4109324089089908, "grad_norm": 0.24213536083698273, "learning_rate": 9.958406137837108e-06, "loss": 0.011, "step": 13810 }, { "epoch": 0.41122997039263237, "grad_norm": 0.4073928892612457, "learning_rate": 9.958183007496937e-06, "loss": 0.0108, "step": 13820 }, { "epoch": 0.41152753187627394, "grad_norm": 0.3304758667945862, "learning_rate": 9.957959282778344e-06, "loss": 0.0116, "step": 13830 }, { "epoch": 0.4118250933599155, "grad_norm": 0.32226428389549255, "learning_rate": 9.957734963708152e-06, "loss": 0.0108, "step": 13840 }, { "epoch": 0.41212265484355703, "grad_norm": 0.39870887994766235, "learning_rate": 9.957510050313249e-06, "loss": 0.0125, "step": 13850 }, { "epoch": 0.4124202163271986, "grad_norm": 0.32496142387390137, "learning_rate": 9.957284542620599e-06, "loss": 0.0098, "step": 13860 }, { "epoch": 0.4127177778108402, "grad_norm": 0.6421274542808533, "learning_rate": 9.957058440657232e-06, "loss": 0.0158, "step": 13870 }, { "epoch": 0.4130153392944817, "grad_norm": 0.37182608246803284, "learning_rate": 9.956831744450257e-06, "loss": 0.0078, "step": 13880 }, { "epoch": 0.41331290077812327, "grad_norm": 0.3418959379196167, "learning_rate": 9.95660445402685e-06, "loss": 0.0127, "step": 13890 }, { "epoch": 0.41361046226176484, "grad_norm": 0.3152831494808197, "learning_rate": 9.956376569414253e-06, "loss": 0.0099, "step": 13900 }, { "epoch": 0.4139080237454064, "grad_norm": 0.19724057614803314, "learning_rate": 9.95614809063979e-06, "loss": 0.0153, "step": 13910 }, { "epoch": 0.41420558522904793, "grad_norm": 0.416537880897522, "learning_rate": 9.955919017730849e-06, "loss": 0.0047, "step": 13920 }, { "epoch": 0.4145031467126895, "grad_norm": 0.6992807984352112, "learning_rate": 9.955689350714888e-06, "loss": 0.0164, "step": 13930 }, { "epoch": 0.4148007081963311, "grad_norm": 0.3733731508255005, "learning_rate": 9.955459089619444e-06, "loss": 0.0105, "step": 13940 }, { "epoch": 0.4150982696799726, "grad_norm": 0.32499945163726807, "learning_rate": 9.955228234472116e-06, "loss": 0.0068, "step": 13950 }, { "epoch": 0.41539583116361417, "grad_norm": 0.6097670197486877, "learning_rate": 9.95499678530058e-06, "loss": 0.0124, "step": 13960 }, { "epoch": 0.41569339264725574, "grad_norm": 0.354114830493927, "learning_rate": 9.954764742132584e-06, "loss": 0.0117, "step": 13970 }, { "epoch": 0.4159909541308973, "grad_norm": 0.12560781836509705, "learning_rate": 9.95453210499594e-06, "loss": 0.014, "step": 13980 }, { "epoch": 0.41628851561453883, "grad_norm": 0.3068927824497223, "learning_rate": 9.954298873918542e-06, "loss": 0.0119, "step": 13990 }, { "epoch": 0.4165860770981804, "grad_norm": 0.569267749786377, "learning_rate": 9.954065048928344e-06, "loss": 0.0102, "step": 14000 }, { "epoch": 0.416883638581822, "grad_norm": 0.5968879461288452, "learning_rate": 9.95383063005338e-06, "loss": 0.0085, "step": 14010 }, { "epoch": 0.41718120006546355, "grad_norm": 0.6662725806236267, "learning_rate": 9.953595617321753e-06, "loss": 0.0097, "step": 14020 }, { "epoch": 0.41747876154910507, "grad_norm": 0.5080601572990417, "learning_rate": 9.95336001076163e-06, "loss": 0.0325, "step": 14030 }, { "epoch": 0.41777632303274664, "grad_norm": 0.23072102665901184, "learning_rate": 9.95312381040126e-06, "loss": 0.013, "step": 14040 }, { "epoch": 0.4180738845163882, "grad_norm": 0.33480575680732727, "learning_rate": 9.952887016268955e-06, "loss": 0.01, "step": 14050 }, { "epoch": 0.41837144600002973, "grad_norm": 0.25577017664909363, "learning_rate": 9.952649628393107e-06, "loss": 0.0135, "step": 14060 }, { "epoch": 0.4186690074836713, "grad_norm": 0.3123877942562103, "learning_rate": 9.952411646802168e-06, "loss": 0.0108, "step": 14070 }, { "epoch": 0.4189665689673129, "grad_norm": 0.23080861568450928, "learning_rate": 9.95217307152467e-06, "loss": 0.0125, "step": 14080 }, { "epoch": 0.41926413045095445, "grad_norm": 0.2958548665046692, "learning_rate": 9.951933902589212e-06, "loss": 0.0086, "step": 14090 }, { "epoch": 0.41956169193459597, "grad_norm": 0.2862527370452881, "learning_rate": 9.951694140024463e-06, "loss": 0.0127, "step": 14100 }, { "epoch": 0.41985925341823754, "grad_norm": 0.2692274749279022, "learning_rate": 9.95145378385917e-06, "loss": 0.0131, "step": 14110 }, { "epoch": 0.4201568149018791, "grad_norm": 0.26180216670036316, "learning_rate": 9.95121283412214e-06, "loss": 0.007, "step": 14120 }, { "epoch": 0.42045437638552063, "grad_norm": 0.5726161599159241, "learning_rate": 9.950971290842267e-06, "loss": 0.0083, "step": 14130 }, { "epoch": 0.4207519378691622, "grad_norm": 0.4683322608470917, "learning_rate": 9.950729154048497e-06, "loss": 0.0143, "step": 14140 }, { "epoch": 0.4210494993528038, "grad_norm": 0.40163153409957886, "learning_rate": 9.950486423769864e-06, "loss": 0.0152, "step": 14150 }, { "epoch": 0.42134706083644535, "grad_norm": 0.4621299207210541, "learning_rate": 9.950243100035461e-06, "loss": 0.0129, "step": 14160 }, { "epoch": 0.42164462232008687, "grad_norm": 0.45565226674079895, "learning_rate": 9.949999182874461e-06, "loss": 0.0119, "step": 14170 }, { "epoch": 0.42194218380372844, "grad_norm": 0.3653741478919983, "learning_rate": 9.949754672316103e-06, "loss": 0.0085, "step": 14180 }, { "epoch": 0.42223974528737, "grad_norm": 0.2803938686847687, "learning_rate": 9.949509568389699e-06, "loss": 0.015, "step": 14190 }, { "epoch": 0.42253730677101153, "grad_norm": 0.34586480259895325, "learning_rate": 9.94926387112463e-06, "loss": 0.0167, "step": 14200 }, { "epoch": 0.4228348682546531, "grad_norm": 0.8639376163482666, "learning_rate": 9.94901758055035e-06, "loss": 0.0095, "step": 14210 }, { "epoch": 0.4231324297382947, "grad_norm": 0.33189132809638977, "learning_rate": 9.948770696696386e-06, "loss": 0.0117, "step": 14220 }, { "epoch": 0.42342999122193625, "grad_norm": 0.3894238770008087, "learning_rate": 9.948523219592334e-06, "loss": 0.0104, "step": 14230 }, { "epoch": 0.42372755270557777, "grad_norm": 0.4697173237800598, "learning_rate": 9.948275149267857e-06, "loss": 0.0081, "step": 14240 }, { "epoch": 0.42402511418921934, "grad_norm": 0.3821462392807007, "learning_rate": 9.948026485752697e-06, "loss": 0.009, "step": 14250 }, { "epoch": 0.4243226756728609, "grad_norm": 0.4429708421230316, "learning_rate": 9.947777229076664e-06, "loss": 0.0179, "step": 14260 }, { "epoch": 0.4246202371565025, "grad_norm": 0.4421488642692566, "learning_rate": 9.947527379269636e-06, "loss": 0.0112, "step": 14270 }, { "epoch": 0.424917798640144, "grad_norm": 0.49912646412849426, "learning_rate": 9.947276936361565e-06, "loss": 0.0113, "step": 14280 }, { "epoch": 0.4252153601237856, "grad_norm": 0.17592565715312958, "learning_rate": 9.947025900382474e-06, "loss": 0.0084, "step": 14290 }, { "epoch": 0.42551292160742715, "grad_norm": 0.318437397480011, "learning_rate": 9.946774271362458e-06, "loss": 0.0092, "step": 14300 }, { "epoch": 0.42581048309106867, "grad_norm": 0.2625085115432739, "learning_rate": 9.94652204933168e-06, "loss": 0.0093, "step": 14310 }, { "epoch": 0.42610804457471024, "grad_norm": 0.3026270270347595, "learning_rate": 9.946269234320377e-06, "loss": 0.0063, "step": 14320 }, { "epoch": 0.4264056060583518, "grad_norm": 0.6627457141876221, "learning_rate": 9.946015826358857e-06, "loss": 0.0082, "step": 14330 }, { "epoch": 0.4267031675419934, "grad_norm": 0.308194637298584, "learning_rate": 9.945761825477494e-06, "loss": 0.0092, "step": 14340 }, { "epoch": 0.4270007290256349, "grad_norm": 0.4754147231578827, "learning_rate": 9.945507231706743e-06, "loss": 0.0125, "step": 14350 }, { "epoch": 0.4272982905092765, "grad_norm": 8.94273567199707, "learning_rate": 9.945252045077118e-06, "loss": 0.0136, "step": 14360 }, { "epoch": 0.42759585199291805, "grad_norm": 0.6944133043289185, "learning_rate": 9.944996265619217e-06, "loss": 0.0113, "step": 14370 }, { "epoch": 0.42789341347655957, "grad_norm": 0.388967901468277, "learning_rate": 9.944739893363698e-06, "loss": 0.0136, "step": 14380 }, { "epoch": 0.42819097496020114, "grad_norm": 0.4612993896007538, "learning_rate": 9.944482928341295e-06, "loss": 0.0146, "step": 14390 }, { "epoch": 0.4284885364438427, "grad_norm": 0.4640811085700989, "learning_rate": 9.944225370582812e-06, "loss": 0.0091, "step": 14400 }, { "epoch": 0.4287860979274843, "grad_norm": 0.371633917093277, "learning_rate": 9.943967220119126e-06, "loss": 0.0102, "step": 14410 }, { "epoch": 0.4290836594111258, "grad_norm": 0.19461728632450104, "learning_rate": 9.943708476981184e-06, "loss": 0.0126, "step": 14420 }, { "epoch": 0.4293812208947674, "grad_norm": 0.5245858430862427, "learning_rate": 9.943449141200004e-06, "loss": 0.0106, "step": 14430 }, { "epoch": 0.42967878237840895, "grad_norm": 0.5388280153274536, "learning_rate": 9.943189212806672e-06, "loss": 0.0124, "step": 14440 }, { "epoch": 0.42997634386205047, "grad_norm": 0.4117432236671448, "learning_rate": 9.94292869183235e-06, "loss": 0.0119, "step": 14450 }, { "epoch": 0.43027390534569204, "grad_norm": 0.3954367935657501, "learning_rate": 9.942667578308267e-06, "loss": 0.0102, "step": 14460 }, { "epoch": 0.4305714668293336, "grad_norm": 0.4367576241493225, "learning_rate": 9.942405872265727e-06, "loss": 0.0101, "step": 14470 }, { "epoch": 0.4308690283129752, "grad_norm": 0.18531398475170135, "learning_rate": 9.942143573736102e-06, "loss": 0.0078, "step": 14480 }, { "epoch": 0.4311665897966167, "grad_norm": 0.2913738191127777, "learning_rate": 9.941880682750835e-06, "loss": 0.0079, "step": 14490 }, { "epoch": 0.4314641512802583, "grad_norm": 0.18112987279891968, "learning_rate": 9.941617199341442e-06, "loss": 0.008, "step": 14500 }, { "epoch": 0.43176171276389985, "grad_norm": 0.38558903336524963, "learning_rate": 9.94135312353951e-06, "loss": 0.015, "step": 14510 }, { "epoch": 0.4320592742475414, "grad_norm": 0.22630929946899414, "learning_rate": 9.941088455376692e-06, "loss": 0.0081, "step": 14520 }, { "epoch": 0.43235683573118294, "grad_norm": 0.25458836555480957, "learning_rate": 9.940823194884722e-06, "loss": 0.0111, "step": 14530 }, { "epoch": 0.4326543972148245, "grad_norm": 0.3874046802520752, "learning_rate": 9.940557342095394e-06, "loss": 0.0125, "step": 14540 }, { "epoch": 0.4329519586984661, "grad_norm": 0.1934567242860794, "learning_rate": 9.94029089704058e-06, "loss": 0.012, "step": 14550 }, { "epoch": 0.4332495201821076, "grad_norm": 0.2914760708808899, "learning_rate": 9.940023859752219e-06, "loss": 0.0106, "step": 14560 }, { "epoch": 0.4335470816657492, "grad_norm": 0.19172878563404083, "learning_rate": 9.939756230262324e-06, "loss": 0.0068, "step": 14570 }, { "epoch": 0.43384464314939075, "grad_norm": 0.4371519088745117, "learning_rate": 9.93948800860298e-06, "loss": 0.0103, "step": 14580 }, { "epoch": 0.4341422046330323, "grad_norm": 0.23462030291557312, "learning_rate": 9.93921919480634e-06, "loss": 0.0107, "step": 14590 }, { "epoch": 0.43443976611667384, "grad_norm": 0.9055469632148743, "learning_rate": 9.938949788904627e-06, "loss": 0.013, "step": 14600 }, { "epoch": 0.4347373276003154, "grad_norm": 0.3716064393520355, "learning_rate": 9.938679790930138e-06, "loss": 0.0094, "step": 14610 }, { "epoch": 0.435034889083957, "grad_norm": 0.3228563964366913, "learning_rate": 9.938409200915242e-06, "loss": 0.0097, "step": 14620 }, { "epoch": 0.4353324505675985, "grad_norm": 0.36255040764808655, "learning_rate": 9.938138018892373e-06, "loss": 0.013, "step": 14630 }, { "epoch": 0.4356300120512401, "grad_norm": 0.33600887656211853, "learning_rate": 9.937866244894041e-06, "loss": 0.0076, "step": 14640 }, { "epoch": 0.43592757353488165, "grad_norm": 0.3688001036643982, "learning_rate": 9.937593878952829e-06, "loss": 0.0093, "step": 14650 }, { "epoch": 0.4362251350185232, "grad_norm": 0.3248331546783447, "learning_rate": 9.937320921101384e-06, "loss": 0.0138, "step": 14660 }, { "epoch": 0.43652269650216474, "grad_norm": 0.40112927556037903, "learning_rate": 9.937047371372431e-06, "loss": 0.0092, "step": 14670 }, { "epoch": 0.4368202579858063, "grad_norm": 0.494870126247406, "learning_rate": 9.936773229798757e-06, "loss": 0.0118, "step": 14680 }, { "epoch": 0.4371178194694479, "grad_norm": 0.6891812682151794, "learning_rate": 9.936498496413232e-06, "loss": 0.0126, "step": 14690 }, { "epoch": 0.4374153809530894, "grad_norm": 0.265284925699234, "learning_rate": 9.936223171248787e-06, "loss": 0.0096, "step": 14700 }, { "epoch": 0.437712942436731, "grad_norm": 0.6545143723487854, "learning_rate": 9.93594725433843e-06, "loss": 0.0098, "step": 14710 }, { "epoch": 0.43801050392037255, "grad_norm": 0.24898208677768707, "learning_rate": 9.935670745715235e-06, "loss": 0.0119, "step": 14720 }, { "epoch": 0.4383080654040141, "grad_norm": 0.4835285246372223, "learning_rate": 9.93539364541235e-06, "loss": 0.0133, "step": 14730 }, { "epoch": 0.43860562688765564, "grad_norm": 0.3314194083213806, "learning_rate": 9.935115953462993e-06, "loss": 0.01, "step": 14740 }, { "epoch": 0.4389031883712972, "grad_norm": 0.4256010055541992, "learning_rate": 9.934837669900455e-06, "loss": 0.009, "step": 14750 }, { "epoch": 0.4392007498549388, "grad_norm": 0.40808436274528503, "learning_rate": 9.934558794758094e-06, "loss": 0.0108, "step": 14760 }, { "epoch": 0.43949831133858036, "grad_norm": 0.8132454752922058, "learning_rate": 9.934279328069344e-06, "loss": 0.0105, "step": 14770 }, { "epoch": 0.4397958728222219, "grad_norm": 0.5151378512382507, "learning_rate": 9.933999269867703e-06, "loss": 0.0086, "step": 14780 }, { "epoch": 0.44009343430586345, "grad_norm": 0.3103228807449341, "learning_rate": 9.933718620186745e-06, "loss": 0.0077, "step": 14790 }, { "epoch": 0.440390995789505, "grad_norm": 0.5545451045036316, "learning_rate": 9.933437379060115e-06, "loss": 0.0089, "step": 14800 }, { "epoch": 0.44068855727314654, "grad_norm": 0.17235225439071655, "learning_rate": 9.93315554652153e-06, "loss": 0.0089, "step": 14810 }, { "epoch": 0.4409861187567881, "grad_norm": 0.6439284682273865, "learning_rate": 9.932873122604772e-06, "loss": 0.0116, "step": 14820 }, { "epoch": 0.4412836802404297, "grad_norm": 0.44217076897621155, "learning_rate": 9.932590107343696e-06, "loss": 0.0121, "step": 14830 }, { "epoch": 0.44158124172407126, "grad_norm": 0.5888472199440002, "learning_rate": 9.932306500772234e-06, "loss": 0.0106, "step": 14840 }, { "epoch": 0.4418788032077128, "grad_norm": 0.3911009430885315, "learning_rate": 9.932022302924382e-06, "loss": 0.0142, "step": 14850 }, { "epoch": 0.44217636469135435, "grad_norm": 0.22024047374725342, "learning_rate": 9.931737513834209e-06, "loss": 0.0068, "step": 14860 }, { "epoch": 0.4424739261749959, "grad_norm": 0.41738250851631165, "learning_rate": 9.931452133535856e-06, "loss": 0.009, "step": 14870 }, { "epoch": 0.44277148765863744, "grad_norm": 0.3911879360675812, "learning_rate": 9.931166162063533e-06, "loss": 0.0113, "step": 14880 }, { "epoch": 0.443069049142279, "grad_norm": 0.28831684589385986, "learning_rate": 9.930879599451521e-06, "loss": 0.0095, "step": 14890 }, { "epoch": 0.4433666106259206, "grad_norm": 0.36234578490257263, "learning_rate": 9.930592445734174e-06, "loss": 0.015, "step": 14900 }, { "epoch": 0.44366417210956216, "grad_norm": 0.4068000912666321, "learning_rate": 9.930304700945915e-06, "loss": 0.0145, "step": 14910 }, { "epoch": 0.4439617335932037, "grad_norm": 0.7521135210990906, "learning_rate": 9.930016365121238e-06, "loss": 0.0098, "step": 14920 }, { "epoch": 0.44425929507684525, "grad_norm": 0.3604510426521301, "learning_rate": 9.92972743829471e-06, "loss": 0.0098, "step": 14930 }, { "epoch": 0.4445568565604868, "grad_norm": 0.35667893290519714, "learning_rate": 9.929437920500966e-06, "loss": 0.0085, "step": 14940 }, { "epoch": 0.44485441804412834, "grad_norm": 0.3064413368701935, "learning_rate": 9.929147811774711e-06, "loss": 0.0142, "step": 14950 }, { "epoch": 0.4451519795277699, "grad_norm": 0.4126516282558441, "learning_rate": 9.928857112150725e-06, "loss": 0.0096, "step": 14960 }, { "epoch": 0.4454495410114115, "grad_norm": 0.34690964221954346, "learning_rate": 9.928565821663856e-06, "loss": 0.0103, "step": 14970 }, { "epoch": 0.44574710249505306, "grad_norm": 0.39511242508888245, "learning_rate": 9.928273940349023e-06, "loss": 0.008, "step": 14980 }, { "epoch": 0.4460446639786946, "grad_norm": 0.2773914933204651, "learning_rate": 9.927981468241216e-06, "loss": 0.009, "step": 14990 }, { "epoch": 0.44634222546233615, "grad_norm": 0.41703012585639954, "learning_rate": 9.927688405375498e-06, "loss": 0.0091, "step": 15000 }, { "epoch": 0.4466397869459777, "grad_norm": 0.5645413994789124, "learning_rate": 9.927394751786999e-06, "loss": 0.0117, "step": 15010 }, { "epoch": 0.4469373484296193, "grad_norm": 0.28424957394599915, "learning_rate": 9.92710050751092e-06, "loss": 0.0092, "step": 15020 }, { "epoch": 0.4472349099132608, "grad_norm": 0.1482565850019455, "learning_rate": 9.92680567258254e-06, "loss": 0.0106, "step": 15030 }, { "epoch": 0.4475324713969024, "grad_norm": 0.39766550064086914, "learning_rate": 9.926510247037198e-06, "loss": 0.0099, "step": 15040 }, { "epoch": 0.44783003288054396, "grad_norm": 0.4845239520072937, "learning_rate": 9.926214230910312e-06, "loss": 0.0192, "step": 15050 }, { "epoch": 0.4481275943641855, "grad_norm": 0.2408638298511505, "learning_rate": 9.925917624237364e-06, "loss": 0.0088, "step": 15060 }, { "epoch": 0.44842515584782705, "grad_norm": 0.37144729495048523, "learning_rate": 9.925620427053917e-06, "loss": 0.0152, "step": 15070 }, { "epoch": 0.4487227173314686, "grad_norm": 0.5141400694847107, "learning_rate": 9.925322639395593e-06, "loss": 0.0172, "step": 15080 }, { "epoch": 0.4490202788151102, "grad_norm": 0.22145119309425354, "learning_rate": 9.925024261298093e-06, "loss": 0.0105, "step": 15090 }, { "epoch": 0.4493178402987517, "grad_norm": 0.37704771757125854, "learning_rate": 9.924725292797187e-06, "loss": 0.0109, "step": 15100 }, { "epoch": 0.4496154017823933, "grad_norm": 0.2653745710849762, "learning_rate": 9.924425733928712e-06, "loss": 0.0085, "step": 15110 }, { "epoch": 0.44991296326603486, "grad_norm": 0.4062599837779999, "learning_rate": 9.92412558472858e-06, "loss": 0.0118, "step": 15120 }, { "epoch": 0.4502105247496764, "grad_norm": 0.19882632791996002, "learning_rate": 9.923824845232772e-06, "loss": 0.0083, "step": 15130 }, { "epoch": 0.45050808623331795, "grad_norm": 0.22039034962654114, "learning_rate": 9.92352351547734e-06, "loss": 0.0112, "step": 15140 }, { "epoch": 0.4508056477169595, "grad_norm": 0.20613843202590942, "learning_rate": 9.923221595498408e-06, "loss": 0.0113, "step": 15150 }, { "epoch": 0.4511032092006011, "grad_norm": 0.7268940806388855, "learning_rate": 9.922919085332169e-06, "loss": 0.009, "step": 15160 }, { "epoch": 0.4514007706842426, "grad_norm": 0.30639609694480896, "learning_rate": 9.922615985014887e-06, "loss": 0.0053, "step": 15170 }, { "epoch": 0.4516983321678842, "grad_norm": 0.161318838596344, "learning_rate": 9.922312294582897e-06, "loss": 0.0096, "step": 15180 }, { "epoch": 0.45199589365152576, "grad_norm": 0.44330447912216187, "learning_rate": 9.922008014072606e-06, "loss": 0.0124, "step": 15190 }, { "epoch": 0.4522934551351673, "grad_norm": 0.30633988976478577, "learning_rate": 9.921703143520488e-06, "loss": 0.0077, "step": 15200 }, { "epoch": 0.45259101661880885, "grad_norm": 0.2753567397594452, "learning_rate": 9.921397682963095e-06, "loss": 0.0071, "step": 15210 }, { "epoch": 0.4528885781024504, "grad_norm": 0.1869964599609375, "learning_rate": 9.921091632437041e-06, "loss": 0.0119, "step": 15220 }, { "epoch": 0.453186139586092, "grad_norm": 0.41250964999198914, "learning_rate": 9.920784991979017e-06, "loss": 0.0066, "step": 15230 }, { "epoch": 0.4534837010697335, "grad_norm": 0.2770899832248688, "learning_rate": 9.920477761625781e-06, "loss": 0.0133, "step": 15240 }, { "epoch": 0.4537812625533751, "grad_norm": 0.5569055080413818, "learning_rate": 9.920169941414163e-06, "loss": 0.0104, "step": 15250 }, { "epoch": 0.45407882403701666, "grad_norm": 0.3658648133277893, "learning_rate": 9.919861531381067e-06, "loss": 0.0109, "step": 15260 }, { "epoch": 0.45437638552065823, "grad_norm": 0.6156923174858093, "learning_rate": 9.91955253156346e-06, "loss": 0.0123, "step": 15270 }, { "epoch": 0.45467394700429975, "grad_norm": 0.3327611982822418, "learning_rate": 9.91924294199839e-06, "loss": 0.0081, "step": 15280 }, { "epoch": 0.4549715084879413, "grad_norm": 0.34055933356285095, "learning_rate": 9.918932762722966e-06, "loss": 0.0137, "step": 15290 }, { "epoch": 0.4552690699715829, "grad_norm": 0.18490594625473022, "learning_rate": 9.918621993774373e-06, "loss": 0.0123, "step": 15300 }, { "epoch": 0.4555666314552244, "grad_norm": 0.6263028383255005, "learning_rate": 9.918310635189863e-06, "loss": 0.0118, "step": 15310 }, { "epoch": 0.455864192938866, "grad_norm": 0.515203058719635, "learning_rate": 9.917998687006765e-06, "loss": 0.0115, "step": 15320 }, { "epoch": 0.45616175442250756, "grad_norm": 0.35514259338378906, "learning_rate": 9.917686149262474e-06, "loss": 0.0112, "step": 15330 }, { "epoch": 0.45645931590614913, "grad_norm": 0.41714954376220703, "learning_rate": 9.917373021994454e-06, "loss": 0.0108, "step": 15340 }, { "epoch": 0.45675687738979065, "grad_norm": 0.49612709879875183, "learning_rate": 9.917059305240245e-06, "loss": 0.0117, "step": 15350 }, { "epoch": 0.4570544388734322, "grad_norm": 0.2793905735015869, "learning_rate": 9.916744999037453e-06, "loss": 0.0076, "step": 15360 }, { "epoch": 0.4573520003570738, "grad_norm": 0.19993245601654053, "learning_rate": 9.916430103423757e-06, "loss": 0.0082, "step": 15370 }, { "epoch": 0.4576495618407153, "grad_norm": 0.4554780125617981, "learning_rate": 9.916114618436906e-06, "loss": 0.0159, "step": 15380 }, { "epoch": 0.4579471233243569, "grad_norm": 0.38337448239326477, "learning_rate": 9.915798544114721e-06, "loss": 0.012, "step": 15390 }, { "epoch": 0.45824468480799846, "grad_norm": 0.6591759920120239, "learning_rate": 9.915481880495092e-06, "loss": 0.0101, "step": 15400 }, { "epoch": 0.45854224629164003, "grad_norm": 0.36740052700042725, "learning_rate": 9.915164627615979e-06, "loss": 0.023, "step": 15410 }, { "epoch": 0.45883980777528155, "grad_norm": 0.22504925727844238, "learning_rate": 9.914846785515414e-06, "loss": 0.0108, "step": 15420 }, { "epoch": 0.4591373692589231, "grad_norm": 0.21129809319972992, "learning_rate": 9.914528354231499e-06, "loss": 0.0163, "step": 15430 }, { "epoch": 0.4594349307425647, "grad_norm": 0.3865097165107727, "learning_rate": 9.914209333802407e-06, "loss": 0.0056, "step": 15440 }, { "epoch": 0.4597324922262062, "grad_norm": 0.35798072814941406, "learning_rate": 9.913889724266384e-06, "loss": 0.0126, "step": 15450 }, { "epoch": 0.4600300537098478, "grad_norm": 0.28991204500198364, "learning_rate": 9.913569525661742e-06, "loss": 0.0104, "step": 15460 }, { "epoch": 0.46032761519348936, "grad_norm": 0.15246698260307312, "learning_rate": 9.913248738026866e-06, "loss": 0.0093, "step": 15470 }, { "epoch": 0.46062517667713093, "grad_norm": 0.5281511545181274, "learning_rate": 9.912927361400212e-06, "loss": 0.009, "step": 15480 }, { "epoch": 0.46092273816077245, "grad_norm": 0.5015764832496643, "learning_rate": 9.912605395820306e-06, "loss": 0.0099, "step": 15490 }, { "epoch": 0.461220299644414, "grad_norm": 0.4667886197566986, "learning_rate": 9.912282841325745e-06, "loss": 0.01, "step": 15500 }, { "epoch": 0.4615178611280556, "grad_norm": 0.5790191292762756, "learning_rate": 9.911959697955196e-06, "loss": 0.0122, "step": 15510 }, { "epoch": 0.46181542261169717, "grad_norm": 0.46475738286972046, "learning_rate": 9.911635965747396e-06, "loss": 0.011, "step": 15520 }, { "epoch": 0.4621129840953387, "grad_norm": 0.4797046482563019, "learning_rate": 9.911311644741154e-06, "loss": 0.0124, "step": 15530 }, { "epoch": 0.46241054557898026, "grad_norm": 0.4487634003162384, "learning_rate": 9.910986734975349e-06, "loss": 0.0056, "step": 15540 }, { "epoch": 0.46270810706262183, "grad_norm": 0.17732428014278412, "learning_rate": 9.910661236488932e-06, "loss": 0.008, "step": 15550 }, { "epoch": 0.46300566854626335, "grad_norm": 0.6404876112937927, "learning_rate": 9.91033514932092e-06, "loss": 0.0182, "step": 15560 }, { "epoch": 0.4633032300299049, "grad_norm": 0.31776148080825806, "learning_rate": 9.910008473510408e-06, "loss": 0.0098, "step": 15570 }, { "epoch": 0.4636007915135465, "grad_norm": 0.3958609104156494, "learning_rate": 9.909681209096555e-06, "loss": 0.0119, "step": 15580 }, { "epoch": 0.46389835299718807, "grad_norm": 0.3381135165691376, "learning_rate": 9.909353356118592e-06, "loss": 0.0101, "step": 15590 }, { "epoch": 0.4641959144808296, "grad_norm": 0.5910330414772034, "learning_rate": 9.909024914615824e-06, "loss": 0.0127, "step": 15600 }, { "epoch": 0.46449347596447116, "grad_norm": 0.4812149107456207, "learning_rate": 9.908695884627622e-06, "loss": 0.0092, "step": 15610 }, { "epoch": 0.46479103744811273, "grad_norm": 0.33674052357673645, "learning_rate": 9.90836626619343e-06, "loss": 0.0075, "step": 15620 }, { "epoch": 0.46508859893175425, "grad_norm": 0.393716961145401, "learning_rate": 9.90803605935276e-06, "loss": 0.0077, "step": 15630 }, { "epoch": 0.4653861604153958, "grad_norm": 0.2319013625383377, "learning_rate": 9.9077052641452e-06, "loss": 0.0122, "step": 15640 }, { "epoch": 0.4656837218990374, "grad_norm": 0.31680572032928467, "learning_rate": 9.907373880610404e-06, "loss": 0.0084, "step": 15650 }, { "epoch": 0.46598128338267897, "grad_norm": 0.3668119013309479, "learning_rate": 9.907041908788099e-06, "loss": 0.0105, "step": 15660 }, { "epoch": 0.4662788448663205, "grad_norm": 0.585814356803894, "learning_rate": 9.90670934871808e-06, "loss": 0.0062, "step": 15670 }, { "epoch": 0.46657640634996206, "grad_norm": 0.09046407043933868, "learning_rate": 9.906376200440211e-06, "loss": 0.0089, "step": 15680 }, { "epoch": 0.46687396783360363, "grad_norm": 0.3825215697288513, "learning_rate": 9.906042463994435e-06, "loss": 0.0115, "step": 15690 }, { "epoch": 0.46717152931724515, "grad_norm": 0.32515689730644226, "learning_rate": 9.905708139420753e-06, "loss": 0.0104, "step": 15700 }, { "epoch": 0.4674690908008867, "grad_norm": 0.6399290561676025, "learning_rate": 9.905373226759249e-06, "loss": 0.0163, "step": 15710 }, { "epoch": 0.4677666522845283, "grad_norm": 0.4445931911468506, "learning_rate": 9.905037726050068e-06, "loss": 0.0119, "step": 15720 }, { "epoch": 0.46806421376816987, "grad_norm": 0.6850634813308716, "learning_rate": 9.904701637333432e-06, "loss": 0.013, "step": 15730 }, { "epoch": 0.4683617752518114, "grad_norm": 0.2807046175003052, "learning_rate": 9.90436496064963e-06, "loss": 0.0129, "step": 15740 }, { "epoch": 0.46865933673545296, "grad_norm": 0.14778712391853333, "learning_rate": 9.90402769603902e-06, "loss": 0.0199, "step": 15750 }, { "epoch": 0.46895689821909453, "grad_norm": 0.40536582469940186, "learning_rate": 9.903689843542036e-06, "loss": 0.0098, "step": 15760 }, { "epoch": 0.4692544597027361, "grad_norm": 0.4491485059261322, "learning_rate": 9.903351403199177e-06, "loss": 0.012, "step": 15770 }, { "epoch": 0.4695520211863776, "grad_norm": 0.22412531077861786, "learning_rate": 9.903012375051015e-06, "loss": 0.009, "step": 15780 }, { "epoch": 0.4698495826700192, "grad_norm": 0.37650322914123535, "learning_rate": 9.902672759138193e-06, "loss": 0.0099, "step": 15790 }, { "epoch": 0.47014714415366077, "grad_norm": 0.4753552973270416, "learning_rate": 9.902332555501421e-06, "loss": 0.0079, "step": 15800 }, { "epoch": 0.4704447056373023, "grad_norm": 0.4152197539806366, "learning_rate": 9.901991764181487e-06, "loss": 0.0088, "step": 15810 }, { "epoch": 0.47074226712094386, "grad_norm": 0.46766287088394165, "learning_rate": 9.90165038521924e-06, "loss": 0.0125, "step": 15820 }, { "epoch": 0.47103982860458543, "grad_norm": 0.4825969934463501, "learning_rate": 9.901308418655605e-06, "loss": 0.0109, "step": 15830 }, { "epoch": 0.471337390088227, "grad_norm": 0.5929585695266724, "learning_rate": 9.900965864531577e-06, "loss": 0.0134, "step": 15840 }, { "epoch": 0.4716349515718685, "grad_norm": 0.12306448072195053, "learning_rate": 9.90062272288822e-06, "loss": 0.0089, "step": 15850 }, { "epoch": 0.4719325130555101, "grad_norm": 0.509642481803894, "learning_rate": 9.90027899376667e-06, "loss": 0.0157, "step": 15860 }, { "epoch": 0.47223007453915167, "grad_norm": 0.42046812176704407, "learning_rate": 9.899934677208133e-06, "loss": 0.0114, "step": 15870 }, { "epoch": 0.4725276360227932, "grad_norm": 0.3912586271762848, "learning_rate": 9.899589773253883e-06, "loss": 0.0082, "step": 15880 }, { "epoch": 0.47282519750643476, "grad_norm": 0.4358160197734833, "learning_rate": 9.899244281945271e-06, "loss": 0.0097, "step": 15890 }, { "epoch": 0.47312275899007633, "grad_norm": 0.32412058115005493, "learning_rate": 9.898898203323709e-06, "loss": 0.0097, "step": 15900 }, { "epoch": 0.4734203204737179, "grad_norm": 0.3117215037345886, "learning_rate": 9.898551537430685e-06, "loss": 0.0105, "step": 15910 }, { "epoch": 0.4737178819573594, "grad_norm": 0.35613858699798584, "learning_rate": 9.89820428430776e-06, "loss": 0.0112, "step": 15920 }, { "epoch": 0.474015443441001, "grad_norm": 0.22153617441654205, "learning_rate": 9.897856443996558e-06, "loss": 0.0115, "step": 15930 }, { "epoch": 0.47431300492464257, "grad_norm": 0.5065445303916931, "learning_rate": 9.89750801653878e-06, "loss": 0.0091, "step": 15940 }, { "epoch": 0.4746105664082841, "grad_norm": 0.30268922448158264, "learning_rate": 9.897159001976196e-06, "loss": 0.0102, "step": 15950 }, { "epoch": 0.47490812789192566, "grad_norm": 0.5165348052978516, "learning_rate": 9.896809400350642e-06, "loss": 0.0068, "step": 15960 }, { "epoch": 0.47520568937556723, "grad_norm": 0.1541779339313507, "learning_rate": 9.896459211704029e-06, "loss": 0.0148, "step": 15970 }, { "epoch": 0.4755032508592088, "grad_norm": 0.42384544014930725, "learning_rate": 9.896108436078338e-06, "loss": 0.0085, "step": 15980 }, { "epoch": 0.4758008123428503, "grad_norm": 0.49739721417427063, "learning_rate": 9.89575707351562e-06, "loss": 0.0111, "step": 15990 }, { "epoch": 0.4760983738264919, "grad_norm": 0.3811550438404083, "learning_rate": 9.895405124057993e-06, "loss": 0.0087, "step": 16000 }, { "epoch": 0.47639593531013347, "grad_norm": 0.20822487771511078, "learning_rate": 9.89505258774765e-06, "loss": 0.0083, "step": 16010 }, { "epoch": 0.47669349679377504, "grad_norm": 0.2531471848487854, "learning_rate": 9.89469946462685e-06, "loss": 0.0077, "step": 16020 }, { "epoch": 0.47699105827741656, "grad_norm": 0.3513260781764984, "learning_rate": 9.894345754737928e-06, "loss": 0.0167, "step": 16030 }, { "epoch": 0.47728861976105813, "grad_norm": 0.5230937004089355, "learning_rate": 9.893991458123285e-06, "loss": 0.0147, "step": 16040 }, { "epoch": 0.4775861812446997, "grad_norm": 0.3198680579662323, "learning_rate": 9.893636574825392e-06, "loss": 0.011, "step": 16050 }, { "epoch": 0.4778837427283412, "grad_norm": 0.3954460620880127, "learning_rate": 9.893281104886794e-06, "loss": 0.0075, "step": 16060 }, { "epoch": 0.4781813042119828, "grad_norm": 0.4198935031890869, "learning_rate": 9.892925048350104e-06, "loss": 0.0134, "step": 16070 }, { "epoch": 0.47847886569562437, "grad_norm": 0.4059743583202362, "learning_rate": 9.892568405258006e-06, "loss": 0.0106, "step": 16080 }, { "epoch": 0.47877642717926594, "grad_norm": 0.5495319366455078, "learning_rate": 9.892211175653249e-06, "loss": 0.0081, "step": 16090 }, { "epoch": 0.47907398866290746, "grad_norm": 0.7143232822418213, "learning_rate": 9.891853359578663e-06, "loss": 0.0085, "step": 16100 }, { "epoch": 0.47937155014654903, "grad_norm": 0.433359295129776, "learning_rate": 9.891494957077139e-06, "loss": 0.0078, "step": 16110 }, { "epoch": 0.4796691116301906, "grad_norm": 0.27171531319618225, "learning_rate": 9.891135968191642e-06, "loss": 0.0086, "step": 16120 }, { "epoch": 0.4799666731138321, "grad_norm": 0.3850434720516205, "learning_rate": 9.890776392965208e-06, "loss": 0.0168, "step": 16130 }, { "epoch": 0.4802642345974737, "grad_norm": 0.23522961139678955, "learning_rate": 9.890416231440943e-06, "loss": 0.01, "step": 16140 }, { "epoch": 0.48056179608111527, "grad_norm": 0.32323113083839417, "learning_rate": 9.890055483662019e-06, "loss": 0.0117, "step": 16150 }, { "epoch": 0.48085935756475684, "grad_norm": 0.5106415748596191, "learning_rate": 9.889694149671685e-06, "loss": 0.0121, "step": 16160 }, { "epoch": 0.48115691904839836, "grad_norm": 0.49447697401046753, "learning_rate": 9.889332229513256e-06, "loss": 0.0137, "step": 16170 }, { "epoch": 0.48145448053203993, "grad_norm": 1.6096928119659424, "learning_rate": 9.88896972323012e-06, "loss": 0.0089, "step": 16180 }, { "epoch": 0.4817520420156815, "grad_norm": 0.30118173360824585, "learning_rate": 9.888606630865731e-06, "loss": 0.0094, "step": 16190 }, { "epoch": 0.482049603499323, "grad_norm": 0.26305505633354187, "learning_rate": 9.888242952463619e-06, "loss": 0.0095, "step": 16200 }, { "epoch": 0.4823471649829646, "grad_norm": 0.1816551685333252, "learning_rate": 9.887878688067376e-06, "loss": 0.0092, "step": 16210 }, { "epoch": 0.48264472646660617, "grad_norm": 0.40779218077659607, "learning_rate": 9.887513837720674e-06, "loss": 0.0102, "step": 16220 }, { "epoch": 0.48294228795024774, "grad_norm": 0.5531004667282104, "learning_rate": 9.88714840146725e-06, "loss": 0.0138, "step": 16230 }, { "epoch": 0.48323984943388926, "grad_norm": 0.23760952055454254, "learning_rate": 9.88678237935091e-06, "loss": 0.0135, "step": 16240 }, { "epoch": 0.48353741091753083, "grad_norm": 0.3348184823989868, "learning_rate": 9.886415771415535e-06, "loss": 0.0094, "step": 16250 }, { "epoch": 0.4838349724011724, "grad_norm": 0.3578963577747345, "learning_rate": 9.88604857770507e-06, "loss": 0.0128, "step": 16260 }, { "epoch": 0.484132533884814, "grad_norm": 0.18406470119953156, "learning_rate": 9.885680798263536e-06, "loss": 0.0096, "step": 16270 }, { "epoch": 0.4844300953684555, "grad_norm": 0.36512094736099243, "learning_rate": 9.88531243313502e-06, "loss": 0.0102, "step": 16280 }, { "epoch": 0.48472765685209707, "grad_norm": 0.23143281042575836, "learning_rate": 9.884943482363683e-06, "loss": 0.0058, "step": 16290 }, { "epoch": 0.48502521833573864, "grad_norm": 0.5086589455604553, "learning_rate": 9.884573945993751e-06, "loss": 0.015, "step": 16300 }, { "epoch": 0.48532277981938016, "grad_norm": 0.19087237119674683, "learning_rate": 9.884203824069527e-06, "loss": 0.0057, "step": 16310 }, { "epoch": 0.48562034130302173, "grad_norm": 0.3407427668571472, "learning_rate": 9.88383311663538e-06, "loss": 0.0096, "step": 16320 }, { "epoch": 0.4859179027866633, "grad_norm": 0.1939208060503006, "learning_rate": 9.883461823735748e-06, "loss": 0.0153, "step": 16330 }, { "epoch": 0.4862154642703049, "grad_norm": 0.2647266983985901, "learning_rate": 9.88308994541514e-06, "loss": 0.0095, "step": 16340 }, { "epoch": 0.4865130257539464, "grad_norm": 0.40510573983192444, "learning_rate": 9.882717481718138e-06, "loss": 0.0139, "step": 16350 }, { "epoch": 0.48681058723758797, "grad_norm": 0.30694857239723206, "learning_rate": 9.882344432689396e-06, "loss": 0.0071, "step": 16360 }, { "epoch": 0.48710814872122954, "grad_norm": 0.37804722785949707, "learning_rate": 9.881970798373626e-06, "loss": 0.0086, "step": 16370 }, { "epoch": 0.48740571020487106, "grad_norm": 0.5750669836997986, "learning_rate": 9.881596578815624e-06, "loss": 0.0122, "step": 16380 }, { "epoch": 0.48770327168851263, "grad_norm": 0.29901546239852905, "learning_rate": 9.881221774060253e-06, "loss": 0.0093, "step": 16390 }, { "epoch": 0.4880008331721542, "grad_norm": 0.5378114581108093, "learning_rate": 9.880846384152438e-06, "loss": 0.0089, "step": 16400 }, { "epoch": 0.4882983946557958, "grad_norm": 0.5947179794311523, "learning_rate": 9.880470409137184e-06, "loss": 0.0087, "step": 16410 }, { "epoch": 0.4885959561394373, "grad_norm": 0.32834798097610474, "learning_rate": 9.880093849059561e-06, "loss": 0.009, "step": 16420 }, { "epoch": 0.48889351762307887, "grad_norm": 0.22255700826644897, "learning_rate": 9.879716703964712e-06, "loss": 0.0126, "step": 16430 }, { "epoch": 0.48919107910672044, "grad_norm": 0.3120337426662445, "learning_rate": 9.879338973897847e-06, "loss": 0.0131, "step": 16440 }, { "epoch": 0.48948864059036196, "grad_norm": 0.5427978038787842, "learning_rate": 9.878960658904247e-06, "loss": 0.0094, "step": 16450 }, { "epoch": 0.48978620207400353, "grad_norm": 0.5811604857444763, "learning_rate": 9.878581759029263e-06, "loss": 0.0111, "step": 16460 }, { "epoch": 0.4900837635576451, "grad_norm": 0.5218693614006042, "learning_rate": 9.878202274318322e-06, "loss": 0.0331, "step": 16470 }, { "epoch": 0.4903813250412867, "grad_norm": 0.4273027181625366, "learning_rate": 9.877822204816911e-06, "loss": 0.0063, "step": 16480 }, { "epoch": 0.4906788865249282, "grad_norm": 0.5086688995361328, "learning_rate": 9.877441550570593e-06, "loss": 0.0113, "step": 16490 }, { "epoch": 0.49097644800856977, "grad_norm": 0.15361739695072174, "learning_rate": 9.877060311625e-06, "loss": 0.009, "step": 16500 }, { "epoch": 0.49127400949221134, "grad_norm": 0.3585146367549896, "learning_rate": 9.876678488025837e-06, "loss": 0.0126, "step": 16510 }, { "epoch": 0.4915715709758529, "grad_norm": 0.4341617226600647, "learning_rate": 9.876296079818872e-06, "loss": 0.0094, "step": 16520 }, { "epoch": 0.49186913245949443, "grad_norm": 0.40528151392936707, "learning_rate": 9.87591308704995e-06, "loss": 0.0096, "step": 16530 }, { "epoch": 0.492166693943136, "grad_norm": 0.19094815850257874, "learning_rate": 9.875529509764985e-06, "loss": 0.0132, "step": 16540 }, { "epoch": 0.4924642554267776, "grad_norm": 0.3982858657836914, "learning_rate": 9.875145348009955e-06, "loss": 0.0071, "step": 16550 }, { "epoch": 0.4927618169104191, "grad_norm": 0.3742443919181824, "learning_rate": 9.874760601830919e-06, "loss": 0.0092, "step": 16560 }, { "epoch": 0.49305937839406067, "grad_norm": 0.3514479994773865, "learning_rate": 9.874375271273994e-06, "loss": 0.0104, "step": 16570 }, { "epoch": 0.49335693987770224, "grad_norm": 0.1112990528345108, "learning_rate": 9.873989356385374e-06, "loss": 0.008, "step": 16580 }, { "epoch": 0.4936545013613438, "grad_norm": 0.16965501010417938, "learning_rate": 9.873602857211326e-06, "loss": 0.0094, "step": 16590 }, { "epoch": 0.49395206284498533, "grad_norm": 0.43621644377708435, "learning_rate": 9.873215773798177e-06, "loss": 0.0093, "step": 16600 }, { "epoch": 0.4942496243286269, "grad_norm": 0.2948925793170929, "learning_rate": 9.872828106192334e-06, "loss": 0.0075, "step": 16610 }, { "epoch": 0.4945471858122685, "grad_norm": 0.33876699209213257, "learning_rate": 9.872439854440266e-06, "loss": 0.0098, "step": 16620 }, { "epoch": 0.49484474729591, "grad_norm": 0.7011319398880005, "learning_rate": 9.87205101858852e-06, "loss": 0.0102, "step": 16630 }, { "epoch": 0.49514230877955157, "grad_norm": 0.29537996649742126, "learning_rate": 9.871661598683708e-06, "loss": 0.01, "step": 16640 }, { "epoch": 0.49543987026319314, "grad_norm": 0.3775414824485779, "learning_rate": 9.871271594772511e-06, "loss": 0.0115, "step": 16650 }, { "epoch": 0.4957374317468347, "grad_norm": 0.22878186404705048, "learning_rate": 9.870881006901684e-06, "loss": 0.0099, "step": 16660 }, { "epoch": 0.49603499323047623, "grad_norm": 0.1480221450328827, "learning_rate": 9.87048983511805e-06, "loss": 0.0116, "step": 16670 }, { "epoch": 0.4963325547141178, "grad_norm": 0.24332858622074127, "learning_rate": 9.870098079468499e-06, "loss": 0.0078, "step": 16680 }, { "epoch": 0.4966301161977594, "grad_norm": 0.4568721354007721, "learning_rate": 9.869705739999999e-06, "loss": 0.0114, "step": 16690 }, { "epoch": 0.4969276776814009, "grad_norm": 0.28629645705223083, "learning_rate": 9.869312816759578e-06, "loss": 0.0075, "step": 16700 }, { "epoch": 0.49722523916504247, "grad_norm": 0.15389588475227356, "learning_rate": 9.868919309794342e-06, "loss": 0.0092, "step": 16710 }, { "epoch": 0.49752280064868404, "grad_norm": 0.4502575099468231, "learning_rate": 9.868525219151464e-06, "loss": 0.0108, "step": 16720 }, { "epoch": 0.4978203621323256, "grad_norm": 0.36462831497192383, "learning_rate": 9.868130544878185e-06, "loss": 0.0111, "step": 16730 }, { "epoch": 0.49811792361596713, "grad_norm": 0.8492862582206726, "learning_rate": 9.86773528702182e-06, "loss": 0.0145, "step": 16740 }, { "epoch": 0.4984154850996087, "grad_norm": 0.24111278355121613, "learning_rate": 9.86733944562975e-06, "loss": 0.0068, "step": 16750 }, { "epoch": 0.4987130465832503, "grad_norm": 0.3008051812648773, "learning_rate": 9.86694302074943e-06, "loss": 0.0111, "step": 16760 }, { "epoch": 0.49901060806689185, "grad_norm": 0.29339149594306946, "learning_rate": 9.86654601242838e-06, "loss": 0.0085, "step": 16770 }, { "epoch": 0.49930816955053337, "grad_norm": 0.2383221536874771, "learning_rate": 9.866148420714194e-06, "loss": 0.0098, "step": 16780 }, { "epoch": 0.49960573103417494, "grad_norm": 0.16660915315151215, "learning_rate": 9.865750245654535e-06, "loss": 0.0109, "step": 16790 }, { "epoch": 0.4999032925178165, "grad_norm": 0.30240747332572937, "learning_rate": 9.865351487297134e-06, "loss": 0.008, "step": 16800 }, { "epoch": 0.5002008540014581, "grad_norm": 0.5309282541275024, "learning_rate": 9.864952145689796e-06, "loss": 0.0079, "step": 16810 }, { "epoch": 0.5004984154850997, "grad_norm": 0.34308505058288574, "learning_rate": 9.864552220880391e-06, "loss": 0.0135, "step": 16820 }, { "epoch": 0.5007959769687411, "grad_norm": 0.3343799412250519, "learning_rate": 9.864151712916863e-06, "loss": 0.0102, "step": 16830 }, { "epoch": 0.5010935384523827, "grad_norm": 0.2032383233308792, "learning_rate": 9.863750621847224e-06, "loss": 0.0101, "step": 16840 }, { "epoch": 0.5013910999360243, "grad_norm": 0.34355098009109497, "learning_rate": 9.863348947719553e-06, "loss": 0.0101, "step": 16850 }, { "epoch": 0.5016886614196658, "grad_norm": 0.42638900876045227, "learning_rate": 9.862946690582007e-06, "loss": 0.0083, "step": 16860 }, { "epoch": 0.5019862229033074, "grad_norm": 0.5423917174339294, "learning_rate": 9.862543850482804e-06, "loss": 0.0154, "step": 16870 }, { "epoch": 0.502283784386949, "grad_norm": 0.42213961482048035, "learning_rate": 9.862140427470238e-06, "loss": 0.0082, "step": 16880 }, { "epoch": 0.5025813458705906, "grad_norm": 0.2605835199356079, "learning_rate": 9.86173642159267e-06, "loss": 0.0129, "step": 16890 }, { "epoch": 0.502878907354232, "grad_norm": 0.3429403007030487, "learning_rate": 9.86133183289853e-06, "loss": 0.0076, "step": 16900 }, { "epoch": 0.5031764688378736, "grad_norm": 0.26401904225349426, "learning_rate": 9.860926661436323e-06, "loss": 0.0111, "step": 16910 }, { "epoch": 0.5034740303215152, "grad_norm": 0.19465874135494232, "learning_rate": 9.860520907254618e-06, "loss": 0.011, "step": 16920 }, { "epoch": 0.5037715918051567, "grad_norm": 0.19183872640132904, "learning_rate": 9.860114570402055e-06, "loss": 0.0101, "step": 16930 }, { "epoch": 0.5040691532887983, "grad_norm": 0.34558501839637756, "learning_rate": 9.859707650927346e-06, "loss": 0.009, "step": 16940 }, { "epoch": 0.5043667147724399, "grad_norm": 0.3030933737754822, "learning_rate": 9.859300148879272e-06, "loss": 0.0082, "step": 16950 }, { "epoch": 0.5046642762560815, "grad_norm": 0.44578641653060913, "learning_rate": 9.858892064306685e-06, "loss": 0.0141, "step": 16960 }, { "epoch": 0.5049618377397229, "grad_norm": 0.32481974363327026, "learning_rate": 9.858483397258502e-06, "loss": 0.0084, "step": 16970 }, { "epoch": 0.5052593992233645, "grad_norm": 0.2260517179965973, "learning_rate": 9.858074147783716e-06, "loss": 0.0057, "step": 16980 }, { "epoch": 0.5055569607070061, "grad_norm": 0.4822303354740143, "learning_rate": 9.857664315931387e-06, "loss": 0.011, "step": 16990 }, { "epoch": 0.5058545221906476, "grad_norm": 0.2831304669380188, "learning_rate": 9.857253901750645e-06, "loss": 0.0086, "step": 17000 }, { "epoch": 0.5061520836742892, "grad_norm": 0.5669935345649719, "learning_rate": 9.85684290529069e-06, "loss": 0.0141, "step": 17010 }, { "epoch": 0.5064496451579308, "grad_norm": 0.299774169921875, "learning_rate": 9.856431326600787e-06, "loss": 0.0084, "step": 17020 }, { "epoch": 0.5067472066415724, "grad_norm": 0.21632836759090424, "learning_rate": 9.856019165730282e-06, "loss": 0.0076, "step": 17030 }, { "epoch": 0.5070447681252138, "grad_norm": 0.3086680471897125, "learning_rate": 9.85560642272858e-06, "loss": 0.0085, "step": 17040 }, { "epoch": 0.5073423296088554, "grad_norm": 0.27579429745674133, "learning_rate": 9.855193097645161e-06, "loss": 0.0066, "step": 17050 }, { "epoch": 0.507639891092497, "grad_norm": 0.27826303243637085, "learning_rate": 9.854779190529574e-06, "loss": 0.0097, "step": 17060 }, { "epoch": 0.5079374525761385, "grad_norm": 0.2725343406200409, "learning_rate": 9.854364701431438e-06, "loss": 0.0115, "step": 17070 }, { "epoch": 0.5082350140597801, "grad_norm": 0.13295456767082214, "learning_rate": 9.853949630400439e-06, "loss": 0.0097, "step": 17080 }, { "epoch": 0.5085325755434217, "grad_norm": 0.6475685238838196, "learning_rate": 9.853533977486338e-06, "loss": 0.0087, "step": 17090 }, { "epoch": 0.5088301370270633, "grad_norm": 0.4189862906932831, "learning_rate": 9.85311774273896e-06, "loss": 0.0092, "step": 17100 }, { "epoch": 0.5091276985107048, "grad_norm": 0.3591824769973755, "learning_rate": 9.852700926208205e-06, "loss": 0.0132, "step": 17110 }, { "epoch": 0.5094252599943463, "grad_norm": 0.5589945316314697, "learning_rate": 9.852283527944039e-06, "loss": 0.0124, "step": 17120 }, { "epoch": 0.5097228214779879, "grad_norm": 0.2863469123840332, "learning_rate": 9.851865547996497e-06, "loss": 0.0163, "step": 17130 }, { "epoch": 0.5100203829616294, "grad_norm": 0.23327118158340454, "learning_rate": 9.851446986415691e-06, "loss": 0.0117, "step": 17140 }, { "epoch": 0.510317944445271, "grad_norm": 0.43031781911849976, "learning_rate": 9.851027843251792e-06, "loss": 0.0095, "step": 17150 }, { "epoch": 0.5106155059289126, "grad_norm": 0.4201945662498474, "learning_rate": 9.850608118555048e-06, "loss": 0.0102, "step": 17160 }, { "epoch": 0.5109130674125542, "grad_norm": 0.15807771682739258, "learning_rate": 9.850187812375775e-06, "loss": 0.0068, "step": 17170 }, { "epoch": 0.5112106288961957, "grad_norm": 0.41311147809028625, "learning_rate": 9.849766924764362e-06, "loss": 0.0129, "step": 17180 }, { "epoch": 0.5115081903798372, "grad_norm": 0.23932240903377533, "learning_rate": 9.849345455771258e-06, "loss": 0.0127, "step": 17190 }, { "epoch": 0.5118057518634788, "grad_norm": 0.3980821669101715, "learning_rate": 9.84892340544699e-06, "loss": 0.0134, "step": 17200 }, { "epoch": 0.5121033133471203, "grad_norm": 0.2334887683391571, "learning_rate": 9.848500773842156e-06, "loss": 0.0085, "step": 17210 }, { "epoch": 0.5124008748307619, "grad_norm": 0.620241641998291, "learning_rate": 9.848077561007417e-06, "loss": 0.0121, "step": 17220 }, { "epoch": 0.5126984363144035, "grad_norm": 0.398953378200531, "learning_rate": 9.84765376699351e-06, "loss": 0.0089, "step": 17230 }, { "epoch": 0.5129959977980451, "grad_norm": 0.4972367584705353, "learning_rate": 9.847229391851233e-06, "loss": 0.0076, "step": 17240 }, { "epoch": 0.5132935592816866, "grad_norm": 0.2033800631761551, "learning_rate": 9.846804435631467e-06, "loss": 0.0245, "step": 17250 }, { "epoch": 0.5135911207653281, "grad_norm": 0.29426684975624084, "learning_rate": 9.846378898385149e-06, "loss": 0.0079, "step": 17260 }, { "epoch": 0.5138886822489697, "grad_norm": 0.43340224027633667, "learning_rate": 9.845952780163293e-06, "loss": 0.0133, "step": 17270 }, { "epoch": 0.5141862437326112, "grad_norm": 0.3477778136730194, "learning_rate": 9.845526081016984e-06, "loss": 0.0066, "step": 17280 }, { "epoch": 0.5144838052162528, "grad_norm": 0.17168910801410675, "learning_rate": 9.84509880099737e-06, "loss": 0.0077, "step": 17290 }, { "epoch": 0.5147813666998944, "grad_norm": 0.30023500323295593, "learning_rate": 9.844670940155675e-06, "loss": 0.0071, "step": 17300 }, { "epoch": 0.515078928183536, "grad_norm": 0.3142600357532501, "learning_rate": 9.844242498543192e-06, "loss": 0.0091, "step": 17310 }, { "epoch": 0.5153764896671775, "grad_norm": 0.1524084210395813, "learning_rate": 9.843813476211279e-06, "loss": 0.0056, "step": 17320 }, { "epoch": 0.515674051150819, "grad_norm": 0.3698466420173645, "learning_rate": 9.843383873211368e-06, "loss": 0.0103, "step": 17330 }, { "epoch": 0.5159716126344606, "grad_norm": 0.3053874373435974, "learning_rate": 9.842953689594956e-06, "loss": 0.0126, "step": 17340 }, { "epoch": 0.5162691741181021, "grad_norm": 0.28431570529937744, "learning_rate": 9.842522925413615e-06, "loss": 0.0099, "step": 17350 }, { "epoch": 0.5165667356017437, "grad_norm": 0.30613845586776733, "learning_rate": 9.842091580718986e-06, "loss": 0.0064, "step": 17360 }, { "epoch": 0.5168642970853853, "grad_norm": 0.6339049339294434, "learning_rate": 9.841659655562775e-06, "loss": 0.0095, "step": 17370 }, { "epoch": 0.5171618585690269, "grad_norm": 0.45618292689323425, "learning_rate": 9.841227149996762e-06, "loss": 0.0111, "step": 17380 }, { "epoch": 0.5174594200526684, "grad_norm": 0.24333664774894714, "learning_rate": 9.840794064072794e-06, "loss": 0.013, "step": 17390 }, { "epoch": 0.5177569815363099, "grad_norm": 0.18740342557430267, "learning_rate": 9.840360397842789e-06, "loss": 0.0093, "step": 17400 }, { "epoch": 0.5180545430199515, "grad_norm": 0.17470628023147583, "learning_rate": 9.839926151358735e-06, "loss": 0.0095, "step": 17410 }, { "epoch": 0.518352104503593, "grad_norm": 0.47103187441825867, "learning_rate": 9.839491324672685e-06, "loss": 0.0096, "step": 17420 }, { "epoch": 0.5186496659872346, "grad_norm": 0.316097229719162, "learning_rate": 9.839055917836772e-06, "loss": 0.0125, "step": 17430 }, { "epoch": 0.5189472274708762, "grad_norm": 0.3975897431373596, "learning_rate": 9.838619930903186e-06, "loss": 0.0084, "step": 17440 }, { "epoch": 0.5192447889545178, "grad_norm": 0.311219722032547, "learning_rate": 9.838183363924196e-06, "loss": 0.0076, "step": 17450 }, { "epoch": 0.5195423504381593, "grad_norm": 0.6888940334320068, "learning_rate": 9.837746216952133e-06, "loss": 0.01, "step": 17460 }, { "epoch": 0.5198399119218008, "grad_norm": 0.3997732400894165, "learning_rate": 9.837308490039406e-06, "loss": 0.0107, "step": 17470 }, { "epoch": 0.5201374734054424, "grad_norm": 0.8854795098304749, "learning_rate": 9.836870183238485e-06, "loss": 0.0078, "step": 17480 }, { "epoch": 0.5204350348890839, "grad_norm": 0.29363390803337097, "learning_rate": 9.836431296601916e-06, "loss": 0.0128, "step": 17490 }, { "epoch": 0.5207325963727255, "grad_norm": 0.37385818362236023, "learning_rate": 9.835991830182312e-06, "loss": 0.009, "step": 17500 }, { "epoch": 0.5210301578563671, "grad_norm": 0.21324008703231812, "learning_rate": 9.835551784032353e-06, "loss": 0.006, "step": 17510 }, { "epoch": 0.5213277193400087, "grad_norm": 0.2883143424987793, "learning_rate": 9.835111158204792e-06, "loss": 0.0087, "step": 17520 }, { "epoch": 0.5216252808236502, "grad_norm": 0.1962890475988388, "learning_rate": 9.834669952752454e-06, "loss": 0.0076, "step": 17530 }, { "epoch": 0.5219228423072917, "grad_norm": 0.2106345146894455, "learning_rate": 9.834228167728224e-06, "loss": 0.0089, "step": 17540 }, { "epoch": 0.5222204037909333, "grad_norm": 0.23171290755271912, "learning_rate": 9.833785803185068e-06, "loss": 0.0081, "step": 17550 }, { "epoch": 0.5225179652745748, "grad_norm": 0.2908906638622284, "learning_rate": 9.833342859176011e-06, "loss": 0.0097, "step": 17560 }, { "epoch": 0.5228155267582164, "grad_norm": 0.3793322741985321, "learning_rate": 9.832899335754156e-06, "loss": 0.01, "step": 17570 }, { "epoch": 0.523113088241858, "grad_norm": 0.43927493691444397, "learning_rate": 9.83245523297267e-06, "loss": 0.0116, "step": 17580 }, { "epoch": 0.5234106497254996, "grad_norm": 0.3145454227924347, "learning_rate": 9.832010550884792e-06, "loss": 0.0136, "step": 17590 }, { "epoch": 0.5237082112091411, "grad_norm": 0.3051755726337433, "learning_rate": 9.831565289543829e-06, "loss": 0.0092, "step": 17600 }, { "epoch": 0.5240057726927827, "grad_norm": 0.24205709993839264, "learning_rate": 9.83111944900316e-06, "loss": 0.0072, "step": 17610 }, { "epoch": 0.5243033341764242, "grad_norm": 0.19629435241222382, "learning_rate": 9.830673029316227e-06, "loss": 0.0111, "step": 17620 }, { "epoch": 0.5246008956600657, "grad_norm": 0.3794834315776825, "learning_rate": 9.830226030536553e-06, "loss": 0.0081, "step": 17630 }, { "epoch": 0.5248984571437073, "grad_norm": 0.22548380494117737, "learning_rate": 9.829778452717718e-06, "loss": 0.0101, "step": 17640 }, { "epoch": 0.5251960186273489, "grad_norm": 0.23700353503227234, "learning_rate": 9.829330295913379e-06, "loss": 0.0133, "step": 17650 }, { "epoch": 0.5254935801109905, "grad_norm": 0.11910125613212585, "learning_rate": 9.828881560177259e-06, "loss": 0.0078, "step": 17660 }, { "epoch": 0.525791141594632, "grad_norm": 0.4400736391544342, "learning_rate": 9.828432245563154e-06, "loss": 0.0156, "step": 17670 }, { "epoch": 0.5260887030782736, "grad_norm": 0.3277437388896942, "learning_rate": 9.827982352124922e-06, "loss": 0.0121, "step": 17680 }, { "epoch": 0.5263862645619151, "grad_norm": 0.40062376856803894, "learning_rate": 9.827531879916503e-06, "loss": 0.0087, "step": 17690 }, { "epoch": 0.5266838260455566, "grad_norm": 0.7117422223091125, "learning_rate": 9.827080828991892e-06, "loss": 0.0102, "step": 17700 }, { "epoch": 0.5269813875291982, "grad_norm": 0.5787592530250549, "learning_rate": 9.826629199405167e-06, "loss": 0.0102, "step": 17710 }, { "epoch": 0.5272789490128398, "grad_norm": 0.20426927506923676, "learning_rate": 9.82617699121046e-06, "loss": 0.0089, "step": 17720 }, { "epoch": 0.5275765104964814, "grad_norm": 0.31551191210746765, "learning_rate": 9.825724204461989e-06, "loss": 0.0171, "step": 17730 }, { "epoch": 0.5278740719801229, "grad_norm": 0.2443612664937973, "learning_rate": 9.825270839214029e-06, "loss": 0.01, "step": 17740 }, { "epoch": 0.5281716334637645, "grad_norm": 0.301341712474823, "learning_rate": 9.824816895520928e-06, "loss": 0.0117, "step": 17750 }, { "epoch": 0.528469194947406, "grad_norm": 0.34943845868110657, "learning_rate": 9.824362373437106e-06, "loss": 0.0076, "step": 17760 }, { "epoch": 0.5287667564310475, "grad_norm": 0.3046448826789856, "learning_rate": 9.82390727301705e-06, "loss": 0.0179, "step": 17770 }, { "epoch": 0.5290643179146891, "grad_norm": 0.34099265933036804, "learning_rate": 9.823451594315319e-06, "loss": 0.0084, "step": 17780 }, { "epoch": 0.5293618793983307, "grad_norm": 0.5582991242408752, "learning_rate": 9.822995337386533e-06, "loss": 0.0145, "step": 17790 }, { "epoch": 0.5296594408819723, "grad_norm": 0.34325137734413147, "learning_rate": 9.822538502285393e-06, "loss": 0.0077, "step": 17800 }, { "epoch": 0.5299570023656138, "grad_norm": 0.374220073223114, "learning_rate": 9.82208108906666e-06, "loss": 0.0074, "step": 17810 }, { "epoch": 0.5302545638492554, "grad_norm": 0.4533454477787018, "learning_rate": 9.821623097785169e-06, "loss": 0.0077, "step": 17820 }, { "epoch": 0.5305521253328969, "grad_norm": 0.4279352128505707, "learning_rate": 9.821164528495823e-06, "loss": 0.0088, "step": 17830 }, { "epoch": 0.5308496868165384, "grad_norm": 0.495755136013031, "learning_rate": 9.820705381253596e-06, "loss": 0.0129, "step": 17840 }, { "epoch": 0.53114724830018, "grad_norm": 0.5633271932601929, "learning_rate": 9.820245656113528e-06, "loss": 0.0103, "step": 17850 }, { "epoch": 0.5314448097838216, "grad_norm": 0.3154788017272949, "learning_rate": 9.819785353130732e-06, "loss": 0.0104, "step": 17860 }, { "epoch": 0.5317423712674632, "grad_norm": 0.43883681297302246, "learning_rate": 9.819324472360386e-06, "loss": 0.0172, "step": 17870 }, { "epoch": 0.5320399327511047, "grad_norm": 0.41157346963882446, "learning_rate": 9.81886301385774e-06, "loss": 0.0084, "step": 17880 }, { "epoch": 0.5323374942347463, "grad_norm": 0.40532565116882324, "learning_rate": 9.818400977678115e-06, "loss": 0.0091, "step": 17890 }, { "epoch": 0.5326350557183878, "grad_norm": 0.6086928844451904, "learning_rate": 9.817938363876898e-06, "loss": 0.0141, "step": 17900 }, { "epoch": 0.5329326172020293, "grad_norm": 0.32646313309669495, "learning_rate": 9.817475172509546e-06, "loss": 0.0086, "step": 17910 }, { "epoch": 0.5332301786856709, "grad_norm": 0.24135702848434448, "learning_rate": 9.817011403631585e-06, "loss": 0.0109, "step": 17920 }, { "epoch": 0.5335277401693125, "grad_norm": 0.45208290219306946, "learning_rate": 9.81654705729861e-06, "loss": 0.0104, "step": 17930 }, { "epoch": 0.533825301652954, "grad_norm": 0.5375087857246399, "learning_rate": 9.81608213356629e-06, "loss": 0.0121, "step": 17940 }, { "epoch": 0.5341228631365956, "grad_norm": 0.28390511870384216, "learning_rate": 9.815616632490354e-06, "loss": 0.0115, "step": 17950 }, { "epoch": 0.5344204246202372, "grad_norm": 0.18905401229858398, "learning_rate": 9.815150554126608e-06, "loss": 0.0088, "step": 17960 }, { "epoch": 0.5347179861038787, "grad_norm": 0.1882820874452591, "learning_rate": 9.814683898530927e-06, "loss": 0.01, "step": 17970 }, { "epoch": 0.5350155475875202, "grad_norm": 0.3260379731655121, "learning_rate": 9.81421666575925e-06, "loss": 0.0086, "step": 17980 }, { "epoch": 0.5353131090711618, "grad_norm": 0.41162562370300293, "learning_rate": 9.81374885586759e-06, "loss": 0.0093, "step": 17990 }, { "epoch": 0.5356106705548034, "grad_norm": 0.2818771004676819, "learning_rate": 9.813280468912024e-06, "loss": 0.0067, "step": 18000 }, { "epoch": 0.535908232038445, "grad_norm": 0.3573954701423645, "learning_rate": 9.812811504948702e-06, "loss": 0.0105, "step": 18010 }, { "epoch": 0.5362057935220865, "grad_norm": 0.42851218581199646, "learning_rate": 9.812341964033847e-06, "loss": 0.0088, "step": 18020 }, { "epoch": 0.5365033550057281, "grad_norm": 0.34959572553634644, "learning_rate": 9.811871846223742e-06, "loss": 0.0087, "step": 18030 }, { "epoch": 0.5368009164893696, "grad_norm": 0.42453640699386597, "learning_rate": 9.811401151574747e-06, "loss": 0.0084, "step": 18040 }, { "epoch": 0.5370984779730111, "grad_norm": 0.29477035999298096, "learning_rate": 9.810929880143284e-06, "loss": 0.01, "step": 18050 }, { "epoch": 0.5373960394566527, "grad_norm": 0.334740549325943, "learning_rate": 9.810458031985855e-06, "loss": 0.0113, "step": 18060 }, { "epoch": 0.5376936009402943, "grad_norm": 0.30094075202941895, "learning_rate": 9.809985607159017e-06, "loss": 0.0059, "step": 18070 }, { "epoch": 0.5379911624239359, "grad_norm": 0.5186489820480347, "learning_rate": 9.809512605719406e-06, "loss": 0.0086, "step": 18080 }, { "epoch": 0.5382887239075774, "grad_norm": 0.22497965395450592, "learning_rate": 9.809039027723728e-06, "loss": 0.009, "step": 18090 }, { "epoch": 0.538586285391219, "grad_norm": 0.32855162024497986, "learning_rate": 9.808564873228752e-06, "loss": 0.0062, "step": 18100 }, { "epoch": 0.5388838468748606, "grad_norm": 0.31360548734664917, "learning_rate": 9.808090142291318e-06, "loss": 0.0084, "step": 18110 }, { "epoch": 0.539181408358502, "grad_norm": 0.27697470784187317, "learning_rate": 9.807614834968336e-06, "loss": 0.0099, "step": 18120 }, { "epoch": 0.5394789698421436, "grad_norm": 0.28922295570373535, "learning_rate": 9.807138951316786e-06, "loss": 0.0095, "step": 18130 }, { "epoch": 0.5397765313257852, "grad_norm": 0.20349626243114471, "learning_rate": 9.806662491393716e-06, "loss": 0.0117, "step": 18140 }, { "epoch": 0.5400740928094268, "grad_norm": 0.3767148554325104, "learning_rate": 9.806185455256245e-06, "loss": 0.0103, "step": 18150 }, { "epoch": 0.5403716542930683, "grad_norm": 0.5793460607528687, "learning_rate": 9.805707842961555e-06, "loss": 0.0083, "step": 18160 }, { "epoch": 0.5406692157767099, "grad_norm": 0.4675944745540619, "learning_rate": 9.805229654566906e-06, "loss": 0.0108, "step": 18170 }, { "epoch": 0.5409667772603515, "grad_norm": 0.2849412262439728, "learning_rate": 9.804750890129619e-06, "loss": 0.0096, "step": 18180 }, { "epoch": 0.5412643387439929, "grad_norm": 0.41249212622642517, "learning_rate": 9.804271549707087e-06, "loss": 0.0117, "step": 18190 }, { "epoch": 0.5415619002276345, "grad_norm": 0.274791955947876, "learning_rate": 9.803791633356777e-06, "loss": 0.01, "step": 18200 }, { "epoch": 0.5418594617112761, "grad_norm": 0.35703474283218384, "learning_rate": 9.803311141136216e-06, "loss": 0.0145, "step": 18210 }, { "epoch": 0.5421570231949177, "grad_norm": 0.22479106485843658, "learning_rate": 9.802830073103007e-06, "loss": 0.0086, "step": 18220 }, { "epoch": 0.5424545846785592, "grad_norm": 0.7990044355392456, "learning_rate": 9.802348429314816e-06, "loss": 0.0083, "step": 18230 }, { "epoch": 0.5427521461622008, "grad_norm": 0.3814101219177246, "learning_rate": 9.801866209829387e-06, "loss": 0.007, "step": 18240 }, { "epoch": 0.5430497076458424, "grad_norm": 0.33040323853492737, "learning_rate": 9.801383414704523e-06, "loss": 0.0079, "step": 18250 }, { "epoch": 0.5433472691294838, "grad_norm": 0.14185234904289246, "learning_rate": 9.800900043998105e-06, "loss": 0.0072, "step": 18260 }, { "epoch": 0.5436448306131254, "grad_norm": 0.7039906978607178, "learning_rate": 9.800416097768076e-06, "loss": 0.011, "step": 18270 }, { "epoch": 0.543942392096767, "grad_norm": 0.3958764374256134, "learning_rate": 9.79993157607245e-06, "loss": 0.0108, "step": 18280 }, { "epoch": 0.5442399535804086, "grad_norm": 0.35718777775764465, "learning_rate": 9.799446478969312e-06, "loss": 0.006, "step": 18290 }, { "epoch": 0.5445375150640501, "grad_norm": 0.3998216390609741, "learning_rate": 9.798960806516815e-06, "loss": 0.0079, "step": 18300 }, { "epoch": 0.5448350765476917, "grad_norm": 0.31234997510910034, "learning_rate": 9.798474558773178e-06, "loss": 0.0159, "step": 18310 }, { "epoch": 0.5451326380313333, "grad_norm": 0.182073175907135, "learning_rate": 9.797987735796697e-06, "loss": 0.011, "step": 18320 }, { "epoch": 0.5454301995149747, "grad_norm": 0.37146079540252686, "learning_rate": 9.797500337645724e-06, "loss": 0.0088, "step": 18330 }, { "epoch": 0.5457277609986163, "grad_norm": 0.169163778424263, "learning_rate": 9.797012364378692e-06, "loss": 0.0079, "step": 18340 }, { "epoch": 0.5460253224822579, "grad_norm": 0.3559076189994812, "learning_rate": 9.7965238160541e-06, "loss": 0.0082, "step": 18350 }, { "epoch": 0.5463228839658995, "grad_norm": 0.37516921758651733, "learning_rate": 9.796034692730509e-06, "loss": 0.0101, "step": 18360 }, { "epoch": 0.546620445449541, "grad_norm": 0.49190133810043335, "learning_rate": 9.795544994466558e-06, "loss": 0.0093, "step": 18370 }, { "epoch": 0.5469180069331826, "grad_norm": 0.2285008579492569, "learning_rate": 9.795054721320952e-06, "loss": 0.0074, "step": 18380 }, { "epoch": 0.5472155684168242, "grad_norm": 0.38018205761909485, "learning_rate": 9.794563873352463e-06, "loss": 0.009, "step": 18390 }, { "epoch": 0.5475131299004656, "grad_norm": 0.20655643939971924, "learning_rate": 9.79407245061993e-06, "loss": 0.0054, "step": 18400 }, { "epoch": 0.5478106913841072, "grad_norm": 0.3526633083820343, "learning_rate": 9.793580453182267e-06, "loss": 0.0075, "step": 18410 }, { "epoch": 0.5481082528677488, "grad_norm": 0.14569509029388428, "learning_rate": 9.793087881098455e-06, "loss": 0.0102, "step": 18420 }, { "epoch": 0.5484058143513904, "grad_norm": 0.3605203628540039, "learning_rate": 9.792594734427539e-06, "loss": 0.0059, "step": 18430 }, { "epoch": 0.5487033758350319, "grad_norm": 0.40087008476257324, "learning_rate": 9.79210101322864e-06, "loss": 0.0077, "step": 18440 }, { "epoch": 0.5490009373186735, "grad_norm": 0.30256757140159607, "learning_rate": 9.791606717560942e-06, "loss": 0.0094, "step": 18450 }, { "epoch": 0.5492984988023151, "grad_norm": 0.11982476711273193, "learning_rate": 9.791111847483701e-06, "loss": 0.0077, "step": 18460 }, { "epoch": 0.5495960602859565, "grad_norm": 0.4627365469932556, "learning_rate": 9.790616403056243e-06, "loss": 0.0092, "step": 18470 }, { "epoch": 0.5498936217695981, "grad_norm": 0.477647066116333, "learning_rate": 9.790120384337958e-06, "loss": 0.0087, "step": 18480 }, { "epoch": 0.5501911832532397, "grad_norm": 0.3885911703109741, "learning_rate": 9.78962379138831e-06, "loss": 0.0127, "step": 18490 }, { "epoch": 0.5504887447368813, "grad_norm": 0.3767966032028198, "learning_rate": 9.78912662426683e-06, "loss": 0.0061, "step": 18500 }, { "epoch": 0.5507863062205228, "grad_norm": 0.5880500078201294, "learning_rate": 9.788628883033116e-06, "loss": 0.0105, "step": 18510 }, { "epoch": 0.5510838677041644, "grad_norm": 0.47985830903053284, "learning_rate": 9.788130567746836e-06, "loss": 0.0076, "step": 18520 }, { "epoch": 0.551381429187806, "grad_norm": 0.7167308926582336, "learning_rate": 9.787631678467728e-06, "loss": 0.0069, "step": 18530 }, { "epoch": 0.5516789906714474, "grad_norm": 0.18020616471767426, "learning_rate": 9.7871322152556e-06, "loss": 0.0088, "step": 18540 }, { "epoch": 0.551976552155089, "grad_norm": 0.1475643515586853, "learning_rate": 9.786632178170322e-06, "loss": 0.0061, "step": 18550 }, { "epoch": 0.5522741136387306, "grad_norm": 0.2482248693704605, "learning_rate": 9.786131567271843e-06, "loss": 0.0063, "step": 18560 }, { "epoch": 0.5525716751223722, "grad_norm": 0.40509313344955444, "learning_rate": 9.785630382620172e-06, "loss": 0.0134, "step": 18570 }, { "epoch": 0.5528692366060137, "grad_norm": 0.2619132995605469, "learning_rate": 9.78512862427539e-06, "loss": 0.0107, "step": 18580 }, { "epoch": 0.5531667980896553, "grad_norm": 0.3420645296573639, "learning_rate": 9.78462629229765e-06, "loss": 0.0067, "step": 18590 }, { "epoch": 0.5534643595732969, "grad_norm": 0.3794693648815155, "learning_rate": 9.784123386747166e-06, "loss": 0.0116, "step": 18600 }, { "epoch": 0.5537619210569384, "grad_norm": 0.4464503228664398, "learning_rate": 9.783619907684229e-06, "loss": 0.0052, "step": 18610 }, { "epoch": 0.5540594825405799, "grad_norm": 0.5175393223762512, "learning_rate": 9.783115855169196e-06, "loss": 0.0134, "step": 18620 }, { "epoch": 0.5543570440242215, "grad_norm": 0.1288047879934311, "learning_rate": 9.782611229262488e-06, "loss": 0.0085, "step": 18630 }, { "epoch": 0.554654605507863, "grad_norm": 4.410498142242432, "learning_rate": 9.782106030024603e-06, "loss": 0.0142, "step": 18640 }, { "epoch": 0.5549521669915046, "grad_norm": 0.3523697853088379, "learning_rate": 9.781600257516101e-06, "loss": 0.0066, "step": 18650 }, { "epoch": 0.5552497284751462, "grad_norm": 0.11839048564434052, "learning_rate": 9.781093911797613e-06, "loss": 0.0092, "step": 18660 }, { "epoch": 0.5555472899587878, "grad_norm": 0.35573461651802063, "learning_rate": 9.78058699292984e-06, "loss": 0.0111, "step": 18670 }, { "epoch": 0.5558448514424293, "grad_norm": 0.43074047565460205, "learning_rate": 9.780079500973548e-06, "loss": 0.0079, "step": 18680 }, { "epoch": 0.5561424129260708, "grad_norm": 0.46997371315956116, "learning_rate": 9.779571435989577e-06, "loss": 0.0137, "step": 18690 }, { "epoch": 0.5564399744097124, "grad_norm": 0.14055246114730835, "learning_rate": 9.779062798038832e-06, "loss": 0.0094, "step": 18700 }, { "epoch": 0.556737535893354, "grad_norm": 0.5811620354652405, "learning_rate": 9.778553587182287e-06, "loss": 0.0128, "step": 18710 }, { "epoch": 0.5570350973769955, "grad_norm": 0.436049222946167, "learning_rate": 9.778043803480988e-06, "loss": 0.0108, "step": 18720 }, { "epoch": 0.5573326588606371, "grad_norm": 0.704390823841095, "learning_rate": 9.777533446996043e-06, "loss": 0.0078, "step": 18730 }, { "epoch": 0.5576302203442787, "grad_norm": 0.1658323109149933, "learning_rate": 9.777022517788635e-06, "loss": 0.0091, "step": 18740 }, { "epoch": 0.5579277818279202, "grad_norm": 0.4515634775161743, "learning_rate": 9.776511015920013e-06, "loss": 0.0104, "step": 18750 }, { "epoch": 0.5582253433115617, "grad_norm": 0.3666280508041382, "learning_rate": 9.775998941451497e-06, "loss": 0.0068, "step": 18760 }, { "epoch": 0.5585229047952033, "grad_norm": 0.6319342255592346, "learning_rate": 9.77548629444447e-06, "loss": 0.0142, "step": 18770 }, { "epoch": 0.5588204662788449, "grad_norm": 0.4001700282096863, "learning_rate": 9.774973074960388e-06, "loss": 0.0085, "step": 18780 }, { "epoch": 0.5591180277624864, "grad_norm": 0.42171555757522583, "learning_rate": 9.774459283060775e-06, "loss": 0.0096, "step": 18790 }, { "epoch": 0.559415589246128, "grad_norm": 0.35706186294555664, "learning_rate": 9.773944918807226e-06, "loss": 0.0078, "step": 18800 }, { "epoch": 0.5597131507297696, "grad_norm": 0.42081889510154724, "learning_rate": 9.7734299822614e-06, "loss": 0.0135, "step": 18810 }, { "epoch": 0.5600107122134111, "grad_norm": 0.2895030379295349, "learning_rate": 9.772914473485027e-06, "loss": 0.0096, "step": 18820 }, { "epoch": 0.5603082736970526, "grad_norm": 0.3523268401622772, "learning_rate": 9.772398392539905e-06, "loss": 0.0093, "step": 18830 }, { "epoch": 0.5606058351806942, "grad_norm": 0.47897782921791077, "learning_rate": 9.771881739487901e-06, "loss": 0.0116, "step": 18840 }, { "epoch": 0.5609033966643358, "grad_norm": 0.26677200198173523, "learning_rate": 9.771364514390951e-06, "loss": 0.0081, "step": 18850 }, { "epoch": 0.5612009581479773, "grad_norm": 0.42532044649124146, "learning_rate": 9.770846717311058e-06, "loss": 0.0076, "step": 18860 }, { "epoch": 0.5614985196316189, "grad_norm": 0.232378289103508, "learning_rate": 9.770328348310295e-06, "loss": 0.0076, "step": 18870 }, { "epoch": 0.5617960811152605, "grad_norm": 0.2437020242214203, "learning_rate": 9.769809407450805e-06, "loss": 0.01, "step": 18880 }, { "epoch": 0.562093642598902, "grad_norm": 0.39205247163772583, "learning_rate": 9.769289894794795e-06, "loss": 0.0188, "step": 18890 }, { "epoch": 0.5623912040825435, "grad_norm": 0.36873316764831543, "learning_rate": 9.768769810404545e-06, "loss": 0.011, "step": 18900 }, { "epoch": 0.5626887655661851, "grad_norm": 0.29022443294525146, "learning_rate": 9.768249154342401e-06, "loss": 0.0118, "step": 18910 }, { "epoch": 0.5629863270498267, "grad_norm": 0.6013668179512024, "learning_rate": 9.767727926670778e-06, "loss": 0.0087, "step": 18920 }, { "epoch": 0.5632838885334682, "grad_norm": 0.2863173484802246, "learning_rate": 9.767206127452162e-06, "loss": 0.0099, "step": 18930 }, { "epoch": 0.5635814500171098, "grad_norm": 0.3514541685581207, "learning_rate": 9.766683756749102e-06, "loss": 0.0107, "step": 18940 }, { "epoch": 0.5638790115007514, "grad_norm": 0.3932209610939026, "learning_rate": 9.766160814624224e-06, "loss": 0.0075, "step": 18950 }, { "epoch": 0.5641765729843929, "grad_norm": 0.20349472761154175, "learning_rate": 9.765637301140211e-06, "loss": 0.0092, "step": 18960 }, { "epoch": 0.5644741344680344, "grad_norm": 0.4690685272216797, "learning_rate": 9.765113216359824e-06, "loss": 0.008, "step": 18970 }, { "epoch": 0.564771695951676, "grad_norm": 0.21050713956356049, "learning_rate": 9.764588560345891e-06, "loss": 0.0055, "step": 18980 }, { "epoch": 0.5650692574353176, "grad_norm": 0.3438703119754791, "learning_rate": 9.764063333161307e-06, "loss": 0.0098, "step": 18990 }, { "epoch": 0.5653668189189591, "grad_norm": 0.4420584440231323, "learning_rate": 9.763537534869033e-06, "loss": 0.0095, "step": 19000 }, { "epoch": 0.5656643804026007, "grad_norm": 0.2245372235774994, "learning_rate": 9.7630111655321e-06, "loss": 0.0086, "step": 19010 }, { "epoch": 0.5659619418862423, "grad_norm": 0.40547966957092285, "learning_rate": 9.762484225213609e-06, "loss": 0.009, "step": 19020 }, { "epoch": 0.5662595033698838, "grad_norm": 0.10174775868654251, "learning_rate": 9.76195671397673e-06, "loss": 0.0076, "step": 19030 }, { "epoch": 0.5665570648535253, "grad_norm": 0.28030943870544434, "learning_rate": 9.7614286318847e-06, "loss": 0.0077, "step": 19040 }, { "epoch": 0.5668546263371669, "grad_norm": 0.24696755409240723, "learning_rate": 9.760899979000823e-06, "loss": 0.0068, "step": 19050 }, { "epoch": 0.5671521878208085, "grad_norm": 0.3508254289627075, "learning_rate": 9.760370755388475e-06, "loss": 0.0074, "step": 19060 }, { "epoch": 0.56744974930445, "grad_norm": 0.725328266620636, "learning_rate": 9.759840961111098e-06, "loss": 0.0101, "step": 19070 }, { "epoch": 0.5677473107880916, "grad_norm": 0.2775318920612335, "learning_rate": 9.759310596232203e-06, "loss": 0.007, "step": 19080 }, { "epoch": 0.5680448722717332, "grad_norm": 0.4016996920108795, "learning_rate": 9.758779660815367e-06, "loss": 0.0084, "step": 19090 }, { "epoch": 0.5683424337553747, "grad_norm": 0.48064807057380676, "learning_rate": 9.758248154924239e-06, "loss": 0.011, "step": 19100 }, { "epoch": 0.5686399952390163, "grad_norm": 0.37065157294273376, "learning_rate": 9.757716078622537e-06, "loss": 0.007, "step": 19110 }, { "epoch": 0.5689375567226578, "grad_norm": 0.3498515784740448, "learning_rate": 9.757183431974042e-06, "loss": 0.0069, "step": 19120 }, { "epoch": 0.5692351182062994, "grad_norm": 0.2734645903110504, "learning_rate": 9.75665021504261e-06, "loss": 0.0127, "step": 19130 }, { "epoch": 0.5695326796899409, "grad_norm": 0.32424721121788025, "learning_rate": 9.756116427892158e-06, "loss": 0.0075, "step": 19140 }, { "epoch": 0.5698302411735825, "grad_norm": 0.2833949625492096, "learning_rate": 9.75558207058668e-06, "loss": 0.0107, "step": 19150 }, { "epoch": 0.5701278026572241, "grad_norm": 0.5459156036376953, "learning_rate": 9.755047143190231e-06, "loss": 0.008, "step": 19160 }, { "epoch": 0.5704253641408656, "grad_norm": 0.4977879226207733, "learning_rate": 9.754511645766938e-06, "loss": 0.0098, "step": 19170 }, { "epoch": 0.5707229256245072, "grad_norm": 0.18857580423355103, "learning_rate": 9.753975578380995e-06, "loss": 0.0093, "step": 19180 }, { "epoch": 0.5710204871081487, "grad_norm": 0.31946924328804016, "learning_rate": 9.753438941096665e-06, "loss": 0.0099, "step": 19190 }, { "epoch": 0.5713180485917903, "grad_norm": 0.3564087152481079, "learning_rate": 9.752901733978281e-06, "loss": 0.01, "step": 19200 }, { "epoch": 0.5716156100754318, "grad_norm": 0.2585727572441101, "learning_rate": 9.752363957090239e-06, "loss": 0.0116, "step": 19210 }, { "epoch": 0.5719131715590734, "grad_norm": 0.42782023549079895, "learning_rate": 9.75182561049701e-06, "loss": 0.0111, "step": 19220 }, { "epoch": 0.572210733042715, "grad_norm": 0.31472164392471313, "learning_rate": 9.75128669426313e-06, "loss": 0.0125, "step": 19230 }, { "epoch": 0.5725082945263565, "grad_norm": 0.4309266209602356, "learning_rate": 9.750747208453199e-06, "loss": 0.0096, "step": 19240 }, { "epoch": 0.5728058560099981, "grad_norm": 0.2698543965816498, "learning_rate": 9.750207153131894e-06, "loss": 0.0096, "step": 19250 }, { "epoch": 0.5731034174936396, "grad_norm": 0.22043396532535553, "learning_rate": 9.749666528363956e-06, "loss": 0.0095, "step": 19260 }, { "epoch": 0.5734009789772812, "grad_norm": 0.2698490023612976, "learning_rate": 9.749125334214191e-06, "loss": 0.0126, "step": 19270 }, { "epoch": 0.5736985404609227, "grad_norm": 0.25075581669807434, "learning_rate": 9.74858357074748e-06, "loss": 0.0073, "step": 19280 }, { "epoch": 0.5739961019445643, "grad_norm": 0.30857568979263306, "learning_rate": 9.748041238028765e-06, "loss": 0.0073, "step": 19290 }, { "epoch": 0.5742936634282059, "grad_norm": 0.16387055814266205, "learning_rate": 9.747498336123062e-06, "loss": 0.0068, "step": 19300 }, { "epoch": 0.5745912249118474, "grad_norm": 0.23907671868801117, "learning_rate": 9.746954865095455e-06, "loss": 0.009, "step": 19310 }, { "epoch": 0.574888786395489, "grad_norm": 0.32952243089675903, "learning_rate": 9.74641082501109e-06, "loss": 0.0079, "step": 19320 }, { "epoch": 0.5751863478791305, "grad_norm": 0.3948637545108795, "learning_rate": 9.74586621593519e-06, "loss": 0.0094, "step": 19330 }, { "epoch": 0.575483909362772, "grad_norm": 0.346492737531662, "learning_rate": 9.745321037933038e-06, "loss": 0.007, "step": 19340 }, { "epoch": 0.5757814708464136, "grad_norm": 0.3969590961933136, "learning_rate": 9.74477529106999e-06, "loss": 0.0091, "step": 19350 }, { "epoch": 0.5760790323300552, "grad_norm": 0.1714908480644226, "learning_rate": 9.744228975411472e-06, "loss": 0.0099, "step": 19360 }, { "epoch": 0.5763765938136968, "grad_norm": 0.44606611132621765, "learning_rate": 9.74368209102297e-06, "loss": 0.0092, "step": 19370 }, { "epoch": 0.5766741552973383, "grad_norm": 0.21147790551185608, "learning_rate": 9.74313463797005e-06, "loss": 0.0098, "step": 19380 }, { "epoch": 0.5769717167809799, "grad_norm": 0.38268598914146423, "learning_rate": 9.742586616318334e-06, "loss": 0.0064, "step": 19390 }, { "epoch": 0.5772692782646214, "grad_norm": 0.21946796774864197, "learning_rate": 9.742038026133523e-06, "loss": 0.0118, "step": 19400 }, { "epoch": 0.577566839748263, "grad_norm": 0.09106426686048508, "learning_rate": 9.741488867481377e-06, "loss": 0.0066, "step": 19410 }, { "epoch": 0.5778644012319045, "grad_norm": 0.24885359406471252, "learning_rate": 9.740939140427729e-06, "loss": 0.0105, "step": 19420 }, { "epoch": 0.5781619627155461, "grad_norm": 0.49317261576652527, "learning_rate": 9.740388845038482e-06, "loss": 0.0085, "step": 19430 }, { "epoch": 0.5784595241991877, "grad_norm": 0.4182494878768921, "learning_rate": 9.7398379813796e-06, "loss": 0.0086, "step": 19440 }, { "epoch": 0.5787570856828292, "grad_norm": 0.603411078453064, "learning_rate": 9.739286549517124e-06, "loss": 0.0087, "step": 19450 }, { "epoch": 0.5790546471664708, "grad_norm": 0.38952842354774475, "learning_rate": 9.738734549517156e-06, "loss": 0.011, "step": 19460 }, { "epoch": 0.5793522086501123, "grad_norm": 0.20458391308784485, "learning_rate": 9.73818198144587e-06, "loss": 0.0075, "step": 19470 }, { "epoch": 0.5796497701337538, "grad_norm": 0.46291956305503845, "learning_rate": 9.737628845369506e-06, "loss": 0.0078, "step": 19480 }, { "epoch": 0.5799473316173954, "grad_norm": 0.38631561398506165, "learning_rate": 9.737075141354375e-06, "loss": 0.0084, "step": 19490 }, { "epoch": 0.580244893101037, "grad_norm": 0.33667513728141785, "learning_rate": 9.73652086946685e-06, "loss": 0.0105, "step": 19500 }, { "epoch": 0.5805424545846786, "grad_norm": 0.16204631328582764, "learning_rate": 9.73596602977338e-06, "loss": 0.025, "step": 19510 }, { "epoch": 0.5808400160683201, "grad_norm": 0.29840654134750366, "learning_rate": 9.735410622340476e-06, "loss": 0.0079, "step": 19520 }, { "epoch": 0.5811375775519617, "grad_norm": 0.30436304211616516, "learning_rate": 9.734854647234722e-06, "loss": 0.0085, "step": 19530 }, { "epoch": 0.5814351390356032, "grad_norm": 0.5843255519866943, "learning_rate": 9.734298104522763e-06, "loss": 0.0118, "step": 19540 }, { "epoch": 0.5817327005192447, "grad_norm": 0.15394501388072968, "learning_rate": 9.733740994271322e-06, "loss": 0.0082, "step": 19550 }, { "epoch": 0.5820302620028863, "grad_norm": 0.5043519139289856, "learning_rate": 9.733183316547178e-06, "loss": 0.0078, "step": 19560 }, { "epoch": 0.5823278234865279, "grad_norm": 0.47045478224754333, "learning_rate": 9.73262507141719e-06, "loss": 0.0084, "step": 19570 }, { "epoch": 0.5826253849701695, "grad_norm": 0.4484022557735443, "learning_rate": 9.732066258948277e-06, "loss": 0.0109, "step": 19580 }, { "epoch": 0.582922946453811, "grad_norm": 0.3244040906429291, "learning_rate": 9.731506879207426e-06, "loss": 0.0064, "step": 19590 }, { "epoch": 0.5832205079374526, "grad_norm": 0.6962573528289795, "learning_rate": 9.7309469322617e-06, "loss": 0.0091, "step": 19600 }, { "epoch": 0.5835180694210942, "grad_norm": 0.4777285158634186, "learning_rate": 9.730386418178222e-06, "loss": 0.0113, "step": 19610 }, { "epoch": 0.5838156309047356, "grad_norm": 0.1349014937877655, "learning_rate": 9.729825337024184e-06, "loss": 0.0131, "step": 19620 }, { "epoch": 0.5841131923883772, "grad_norm": 0.7153880000114441, "learning_rate": 9.729263688866848e-06, "loss": 0.0079, "step": 19630 }, { "epoch": 0.5844107538720188, "grad_norm": 0.2973940670490265, "learning_rate": 9.728701473773546e-06, "loss": 0.0077, "step": 19640 }, { "epoch": 0.5847083153556604, "grad_norm": 0.1751062422990799, "learning_rate": 9.728138691811671e-06, "loss": 0.0074, "step": 19650 }, { "epoch": 0.5850058768393019, "grad_norm": 0.2956047058105469, "learning_rate": 9.72757534304869e-06, "loss": 0.0072, "step": 19660 }, { "epoch": 0.5853034383229435, "grad_norm": 0.36738109588623047, "learning_rate": 9.72701142755214e-06, "loss": 0.0072, "step": 19670 }, { "epoch": 0.5856009998065851, "grad_norm": 0.24110059440135956, "learning_rate": 9.726446945389617e-06, "loss": 0.005, "step": 19680 }, { "epoch": 0.5858985612902265, "grad_norm": 0.8240504264831543, "learning_rate": 9.725881896628792e-06, "loss": 0.0165, "step": 19690 }, { "epoch": 0.5861961227738681, "grad_norm": 0.4159442186355591, "learning_rate": 9.725316281337405e-06, "loss": 0.0102, "step": 19700 }, { "epoch": 0.5864936842575097, "grad_norm": 0.3743553161621094, "learning_rate": 9.724750099583255e-06, "loss": 0.0143, "step": 19710 }, { "epoch": 0.5867912457411513, "grad_norm": 0.25946056842803955, "learning_rate": 9.724183351434222e-06, "loss": 0.0078, "step": 19720 }, { "epoch": 0.5870888072247928, "grad_norm": 0.3611993193626404, "learning_rate": 9.723616036958241e-06, "loss": 0.0072, "step": 19730 }, { "epoch": 0.5873863687084344, "grad_norm": 0.17470009624958038, "learning_rate": 9.723048156223322e-06, "loss": 0.0114, "step": 19740 }, { "epoch": 0.587683930192076, "grad_norm": 0.3151416778564453, "learning_rate": 9.722479709297543e-06, "loss": 0.0072, "step": 19750 }, { "epoch": 0.5879814916757174, "grad_norm": 0.3665282726287842, "learning_rate": 9.721910696249048e-06, "loss": 0.0077, "step": 19760 }, { "epoch": 0.588279053159359, "grad_norm": 0.4964962601661682, "learning_rate": 9.721341117146048e-06, "loss": 0.012, "step": 19770 }, { "epoch": 0.5885766146430006, "grad_norm": 0.33320432901382446, "learning_rate": 9.720770972056826e-06, "loss": 0.0083, "step": 19780 }, { "epoch": 0.5888741761266422, "grad_norm": 0.3292909860610962, "learning_rate": 9.720200261049726e-06, "loss": 0.0095, "step": 19790 }, { "epoch": 0.5891717376102837, "grad_norm": 0.3648908734321594, "learning_rate": 9.719628984193168e-06, "loss": 0.0089, "step": 19800 }, { "epoch": 0.5894692990939253, "grad_norm": 0.18651609122753143, "learning_rate": 9.719057141555632e-06, "loss": 0.0076, "step": 19810 }, { "epoch": 0.5897668605775669, "grad_norm": 0.2221132069826126, "learning_rate": 9.718484733205673e-06, "loss": 0.01, "step": 19820 }, { "epoch": 0.5900644220612083, "grad_norm": 0.2386142909526825, "learning_rate": 9.717911759211908e-06, "loss": 0.0045, "step": 19830 }, { "epoch": 0.5903619835448499, "grad_norm": 0.42269057035446167, "learning_rate": 9.717338219643024e-06, "loss": 0.0106, "step": 19840 }, { "epoch": 0.5906595450284915, "grad_norm": 0.5261791944503784, "learning_rate": 9.716764114567777e-06, "loss": 0.0071, "step": 19850 }, { "epoch": 0.5909571065121331, "grad_norm": 0.4540623724460602, "learning_rate": 9.716189444054988e-06, "loss": 0.0087, "step": 19860 }, { "epoch": 0.5912546679957746, "grad_norm": 0.19461588561534882, "learning_rate": 9.715614208173552e-06, "loss": 0.0088, "step": 19870 }, { "epoch": 0.5915522294794162, "grad_norm": 0.3174531161785126, "learning_rate": 9.715038406992423e-06, "loss": 0.0095, "step": 19880 }, { "epoch": 0.5918497909630578, "grad_norm": 0.14007142186164856, "learning_rate": 9.714462040580626e-06, "loss": 0.0109, "step": 19890 }, { "epoch": 0.5921473524466992, "grad_norm": 0.22633235156536102, "learning_rate": 9.71388510900726e-06, "loss": 0.0054, "step": 19900 }, { "epoch": 0.5924449139303408, "grad_norm": 0.3094829022884369, "learning_rate": 9.713307612341481e-06, "loss": 0.0078, "step": 19910 }, { "epoch": 0.5927424754139824, "grad_norm": 0.2239256650209427, "learning_rate": 9.712729550652523e-06, "loss": 0.006, "step": 19920 }, { "epoch": 0.593040036897624, "grad_norm": 0.37787526845932007, "learning_rate": 9.71215092400968e-06, "loss": 0.0135, "step": 19930 }, { "epoch": 0.5933375983812655, "grad_norm": 0.0891125276684761, "learning_rate": 9.711571732482317e-06, "loss": 0.0108, "step": 19940 }, { "epoch": 0.5936351598649071, "grad_norm": 0.3176509141921997, "learning_rate": 9.710991976139867e-06, "loss": 0.0073, "step": 19950 }, { "epoch": 0.5939327213485487, "grad_norm": 0.23617742955684662, "learning_rate": 9.710411655051833e-06, "loss": 0.0098, "step": 19960 }, { "epoch": 0.5942302828321901, "grad_norm": 0.3559308648109436, "learning_rate": 9.709830769287778e-06, "loss": 0.0094, "step": 19970 }, { "epoch": 0.5945278443158317, "grad_norm": 0.24487446248531342, "learning_rate": 9.70924931891734e-06, "loss": 0.0057, "step": 19980 }, { "epoch": 0.5948254057994733, "grad_norm": 0.28533875942230225, "learning_rate": 9.708667304010222e-06, "loss": 0.0104, "step": 19990 }, { "epoch": 0.5951229672831149, "grad_norm": 0.31303641200065613, "learning_rate": 9.708084724636196e-06, "loss": 0.0079, "step": 20000 }, { "epoch": 0.5954205287667564, "grad_norm": 0.31144100427627563, "learning_rate": 9.707501580865098e-06, "loss": 0.0074, "step": 20010 }, { "epoch": 0.595718090250398, "grad_norm": 0.32739266753196716, "learning_rate": 9.706917872766837e-06, "loss": 0.011, "step": 20020 }, { "epoch": 0.5960156517340396, "grad_norm": 0.40365031361579895, "learning_rate": 9.706333600411386e-06, "loss": 0.0115, "step": 20030 }, { "epoch": 0.596313213217681, "grad_norm": 0.5210962295532227, "learning_rate": 9.705748763868785e-06, "loss": 0.0117, "step": 20040 }, { "epoch": 0.5966107747013226, "grad_norm": 0.1402880996465683, "learning_rate": 9.705163363209146e-06, "loss": 0.0073, "step": 20050 }, { "epoch": 0.5969083361849642, "grad_norm": 0.38123974204063416, "learning_rate": 9.704577398502644e-06, "loss": 0.0089, "step": 20060 }, { "epoch": 0.5972058976686058, "grad_norm": 0.1209920346736908, "learning_rate": 9.703990869819524e-06, "loss": 0.0063, "step": 20070 }, { "epoch": 0.5975034591522473, "grad_norm": 0.4964916408061981, "learning_rate": 9.703403777230096e-06, "loss": 0.0148, "step": 20080 }, { "epoch": 0.5978010206358889, "grad_norm": 0.2696128487586975, "learning_rate": 9.702816120804744e-06, "loss": 0.0066, "step": 20090 }, { "epoch": 0.5980985821195305, "grad_norm": 0.24698171019554138, "learning_rate": 9.702227900613913e-06, "loss": 0.0092, "step": 20100 }, { "epoch": 0.5983961436031721, "grad_norm": 0.32431116700172424, "learning_rate": 9.701639116728117e-06, "loss": 0.0075, "step": 20110 }, { "epoch": 0.5986937050868135, "grad_norm": 0.46128708124160767, "learning_rate": 9.701049769217938e-06, "loss": 0.0108, "step": 20120 }, { "epoch": 0.5989912665704551, "grad_norm": 0.21205510199069977, "learning_rate": 9.700459858154028e-06, "loss": 0.006, "step": 20130 }, { "epoch": 0.5992888280540967, "grad_norm": 0.1495111584663391, "learning_rate": 9.6998693836071e-06, "loss": 0.0089, "step": 20140 }, { "epoch": 0.5995863895377382, "grad_norm": 0.637485146522522, "learning_rate": 9.699278345647947e-06, "loss": 0.009, "step": 20150 }, { "epoch": 0.5998839510213798, "grad_norm": 0.5811619758605957, "learning_rate": 9.698686744347417e-06, "loss": 0.0118, "step": 20160 }, { "epoch": 0.6001815125050214, "grad_norm": 0.4733251631259918, "learning_rate": 9.698094579776429e-06, "loss": 0.01, "step": 20170 }, { "epoch": 0.600479073988663, "grad_norm": 0.4572403132915497, "learning_rate": 9.69750185200597e-06, "loss": 0.0083, "step": 20180 }, { "epoch": 0.6007766354723044, "grad_norm": 0.1501239538192749, "learning_rate": 9.6969085611071e-06, "loss": 0.0054, "step": 20190 }, { "epoch": 0.601074196955946, "grad_norm": 0.19097349047660828, "learning_rate": 9.696314707150939e-06, "loss": 0.0102, "step": 20200 }, { "epoch": 0.6013717584395876, "grad_norm": 0.36627987027168274, "learning_rate": 9.695720290208674e-06, "loss": 0.0123, "step": 20210 }, { "epoch": 0.6016693199232291, "grad_norm": 0.20594565570354462, "learning_rate": 9.695125310351567e-06, "loss": 0.0081, "step": 20220 }, { "epoch": 0.6019668814068707, "grad_norm": 0.20183469355106354, "learning_rate": 9.694529767650945e-06, "loss": 0.0059, "step": 20230 }, { "epoch": 0.6022644428905123, "grad_norm": 0.3131386637687683, "learning_rate": 9.693933662178194e-06, "loss": 0.008, "step": 20240 }, { "epoch": 0.6025620043741539, "grad_norm": 0.35803091526031494, "learning_rate": 9.693336994004779e-06, "loss": 0.006, "step": 20250 }, { "epoch": 0.6028595658577953, "grad_norm": 0.2902534008026123, "learning_rate": 9.692739763202225e-06, "loss": 0.008, "step": 20260 }, { "epoch": 0.6031571273414369, "grad_norm": 0.2901231646537781, "learning_rate": 9.692141969842129e-06, "loss": 0.0081, "step": 20270 }, { "epoch": 0.6034546888250785, "grad_norm": 0.5102410912513733, "learning_rate": 9.691543613996151e-06, "loss": 0.0091, "step": 20280 }, { "epoch": 0.60375225030872, "grad_norm": 0.31243813037872314, "learning_rate": 9.690944695736025e-06, "loss": 0.0066, "step": 20290 }, { "epoch": 0.6040498117923616, "grad_norm": 0.26462438702583313, "learning_rate": 9.690345215133544e-06, "loss": 0.0093, "step": 20300 }, { "epoch": 0.6043473732760032, "grad_norm": 0.6457266211509705, "learning_rate": 9.689745172260575e-06, "loss": 0.0084, "step": 20310 }, { "epoch": 0.6046449347596448, "grad_norm": 0.21627622842788696, "learning_rate": 9.689144567189049e-06, "loss": 0.0079, "step": 20320 }, { "epoch": 0.6049424962432862, "grad_norm": 0.3928670585155487, "learning_rate": 9.688543399990967e-06, "loss": 0.0121, "step": 20330 }, { "epoch": 0.6052400577269278, "grad_norm": 0.1896641105413437, "learning_rate": 9.687941670738394e-06, "loss": 0.0116, "step": 20340 }, { "epoch": 0.6055376192105694, "grad_norm": 0.44095638394355774, "learning_rate": 9.687339379503466e-06, "loss": 0.0084, "step": 20350 }, { "epoch": 0.6058351806942109, "grad_norm": 0.4608731269836426, "learning_rate": 9.686736526358383e-06, "loss": 0.0116, "step": 20360 }, { "epoch": 0.6061327421778525, "grad_norm": 0.1595456302165985, "learning_rate": 9.686133111375416e-06, "loss": 0.0116, "step": 20370 }, { "epoch": 0.6064303036614941, "grad_norm": 0.2439194917678833, "learning_rate": 9.685529134626898e-06, "loss": 0.0102, "step": 20380 }, { "epoch": 0.6067278651451357, "grad_norm": 0.16395531594753265, "learning_rate": 9.684924596185236e-06, "loss": 0.0086, "step": 20390 }, { "epoch": 0.6070254266287771, "grad_norm": 0.33158227801322937, "learning_rate": 9.6843194961229e-06, "loss": 0.0124, "step": 20400 }, { "epoch": 0.6073229881124187, "grad_norm": 0.3328937590122223, "learning_rate": 9.683713834512429e-06, "loss": 0.0064, "step": 20410 }, { "epoch": 0.6076205495960603, "grad_norm": 0.2665422856807709, "learning_rate": 9.683107611426426e-06, "loss": 0.0066, "step": 20420 }, { "epoch": 0.6079181110797018, "grad_norm": 0.33467620611190796, "learning_rate": 9.682500826937566e-06, "loss": 0.0139, "step": 20430 }, { "epoch": 0.6082156725633434, "grad_norm": 0.1826905459165573, "learning_rate": 9.681893481118592e-06, "loss": 0.0091, "step": 20440 }, { "epoch": 0.608513234046985, "grad_norm": 0.45849841833114624, "learning_rate": 9.681285574042305e-06, "loss": 0.0118, "step": 20450 }, { "epoch": 0.6088107955306266, "grad_norm": 0.484697163105011, "learning_rate": 9.680677105781587e-06, "loss": 0.0084, "step": 20460 }, { "epoch": 0.609108357014268, "grad_norm": 0.5309630632400513, "learning_rate": 9.680068076409373e-06, "loss": 0.0074, "step": 20470 }, { "epoch": 0.6094059184979096, "grad_norm": 0.20552726089954376, "learning_rate": 9.679458485998678e-06, "loss": 0.0094, "step": 20480 }, { "epoch": 0.6097034799815512, "grad_norm": 0.5081194639205933, "learning_rate": 9.678848334622577e-06, "loss": 0.0093, "step": 20490 }, { "epoch": 0.6100010414651927, "grad_norm": 0.3578824996948242, "learning_rate": 9.678237622354213e-06, "loss": 0.0087, "step": 20500 }, { "epoch": 0.6102986029488343, "grad_norm": 0.197836771607399, "learning_rate": 9.677626349266799e-06, "loss": 0.0105, "step": 20510 }, { "epoch": 0.6105961644324759, "grad_norm": 0.1596057265996933, "learning_rate": 9.677014515433611e-06, "loss": 0.0097, "step": 20520 }, { "epoch": 0.6108937259161175, "grad_norm": 0.24943184852600098, "learning_rate": 9.676402120927995e-06, "loss": 0.0141, "step": 20530 }, { "epoch": 0.6111912873997589, "grad_norm": 0.10930389165878296, "learning_rate": 9.675789165823365e-06, "loss": 0.0104, "step": 20540 }, { "epoch": 0.6114888488834005, "grad_norm": 0.42206165194511414, "learning_rate": 9.675175650193202e-06, "loss": 0.0072, "step": 20550 }, { "epoch": 0.6117864103670421, "grad_norm": 0.4122210741043091, "learning_rate": 9.67456157411105e-06, "loss": 0.012, "step": 20560 }, { "epoch": 0.6120839718506836, "grad_norm": 0.23461632430553436, "learning_rate": 9.673946937650526e-06, "loss": 0.0081, "step": 20570 }, { "epoch": 0.6123815333343252, "grad_norm": 0.2375238984823227, "learning_rate": 9.67333174088531e-06, "loss": 0.0096, "step": 20580 }, { "epoch": 0.6126790948179668, "grad_norm": 0.23594804108142853, "learning_rate": 9.672715983889152e-06, "loss": 0.01, "step": 20590 }, { "epoch": 0.6129766563016084, "grad_norm": 0.2564846873283386, "learning_rate": 9.672099666735866e-06, "loss": 0.0096, "step": 20600 }, { "epoch": 0.6132742177852499, "grad_norm": 1.1396507024765015, "learning_rate": 9.671482789499337e-06, "loss": 0.0076, "step": 20610 }, { "epoch": 0.6135717792688914, "grad_norm": 0.08571207523345947, "learning_rate": 9.670865352253513e-06, "loss": 0.0085, "step": 20620 }, { "epoch": 0.613869340752533, "grad_norm": 0.28254443407058716, "learning_rate": 9.670247355072415e-06, "loss": 0.0094, "step": 20630 }, { "epoch": 0.6141669022361745, "grad_norm": 0.5764407515525818, "learning_rate": 9.669628798030124e-06, "loss": 0.0093, "step": 20640 }, { "epoch": 0.6144644637198161, "grad_norm": 0.4088795483112335, "learning_rate": 9.669009681200794e-06, "loss": 0.0092, "step": 20650 }, { "epoch": 0.6147620252034577, "grad_norm": 0.37828773260116577, "learning_rate": 9.66839000465864e-06, "loss": 0.0086, "step": 20660 }, { "epoch": 0.6150595866870993, "grad_norm": 0.2863672971725464, "learning_rate": 9.667769768477952e-06, "loss": 0.0088, "step": 20670 }, { "epoch": 0.6153571481707408, "grad_norm": 0.30622774362564087, "learning_rate": 9.667148972733081e-06, "loss": 0.0091, "step": 20680 }, { "epoch": 0.6156547096543823, "grad_norm": 0.22975565493106842, "learning_rate": 9.666527617498447e-06, "loss": 0.0059, "step": 20690 }, { "epoch": 0.6159522711380239, "grad_norm": 0.6922864317893982, "learning_rate": 9.665905702848536e-06, "loss": 0.0092, "step": 20700 }, { "epoch": 0.6162498326216654, "grad_norm": 0.33339622616767883, "learning_rate": 9.665283228857903e-06, "loss": 0.0078, "step": 20710 }, { "epoch": 0.616547394105307, "grad_norm": 0.1456335484981537, "learning_rate": 9.66466019560117e-06, "loss": 0.0105, "step": 20720 }, { "epoch": 0.6168449555889486, "grad_norm": 0.18313580751419067, "learning_rate": 9.664036603153025e-06, "loss": 0.0085, "step": 20730 }, { "epoch": 0.6171425170725902, "grad_norm": 0.23098346590995789, "learning_rate": 9.66341245158822e-06, "loss": 0.0078, "step": 20740 }, { "epoch": 0.6174400785562317, "grad_norm": 0.34413301944732666, "learning_rate": 9.66278774098158e-06, "loss": 0.0075, "step": 20750 }, { "epoch": 0.6177376400398732, "grad_norm": 0.11571083217859268, "learning_rate": 9.662162471407996e-06, "loss": 0.0066, "step": 20760 }, { "epoch": 0.6180352015235148, "grad_norm": 0.19548307359218597, "learning_rate": 9.66153664294242e-06, "loss": 0.0102, "step": 20770 }, { "epoch": 0.6183327630071563, "grad_norm": 0.35306763648986816, "learning_rate": 9.660910255659878e-06, "loss": 0.0088, "step": 20780 }, { "epoch": 0.6186303244907979, "grad_norm": 0.10592693835496902, "learning_rate": 9.660283309635458e-06, "loss": 0.0089, "step": 20790 }, { "epoch": 0.6189278859744395, "grad_norm": 0.30208534002304077, "learning_rate": 9.65965580494432e-06, "loss": 0.0076, "step": 20800 }, { "epoch": 0.6192254474580811, "grad_norm": 0.2669471502304077, "learning_rate": 9.659027741661688e-06, "loss": 0.0111, "step": 20810 }, { "epoch": 0.6195230089417226, "grad_norm": 0.2934175431728363, "learning_rate": 9.658399119862853e-06, "loss": 0.0158, "step": 20820 }, { "epoch": 0.6198205704253641, "grad_norm": 0.32349148392677307, "learning_rate": 9.65776993962317e-06, "loss": 0.0094, "step": 20830 }, { "epoch": 0.6201181319090057, "grad_norm": 0.38871628046035767, "learning_rate": 9.657140201018066e-06, "loss": 0.0093, "step": 20840 }, { "epoch": 0.6204156933926472, "grad_norm": 0.3585462272167206, "learning_rate": 9.656509904123035e-06, "loss": 0.0077, "step": 20850 }, { "epoch": 0.6207132548762888, "grad_norm": 0.1463146060705185, "learning_rate": 9.655879049013632e-06, "loss": 0.0091, "step": 20860 }, { "epoch": 0.6210108163599304, "grad_norm": 0.41897687315940857, "learning_rate": 9.655247635765485e-06, "loss": 0.0073, "step": 20870 }, { "epoch": 0.621308377843572, "grad_norm": 0.36188507080078125, "learning_rate": 9.654615664454289e-06, "loss": 0.0126, "step": 20880 }, { "epoch": 0.6216059393272135, "grad_norm": 0.1860654205083847, "learning_rate": 9.6539831351558e-06, "loss": 0.0092, "step": 20890 }, { "epoch": 0.621903500810855, "grad_norm": 0.31541064381599426, "learning_rate": 9.653350047945846e-06, "loss": 0.0111, "step": 20900 }, { "epoch": 0.6222010622944966, "grad_norm": 0.3347232937812805, "learning_rate": 9.652716402900319e-06, "loss": 0.0088, "step": 20910 }, { "epoch": 0.6224986237781381, "grad_norm": 0.34870022535324097, "learning_rate": 9.65208220009518e-06, "loss": 0.0076, "step": 20920 }, { "epoch": 0.6227961852617797, "grad_norm": 0.4340633451938629, "learning_rate": 9.651447439606458e-06, "loss": 0.0095, "step": 20930 }, { "epoch": 0.6230937467454213, "grad_norm": 0.6429239511489868, "learning_rate": 9.650812121510246e-06, "loss": 0.0115, "step": 20940 }, { "epoch": 0.6233913082290629, "grad_norm": 0.3936198353767395, "learning_rate": 9.650176245882705e-06, "loss": 0.0094, "step": 20950 }, { "epoch": 0.6236888697127044, "grad_norm": 0.26862603425979614, "learning_rate": 9.649539812800061e-06, "loss": 0.0082, "step": 20960 }, { "epoch": 0.6239864311963459, "grad_norm": 0.414657860994339, "learning_rate": 9.64890282233861e-06, "loss": 0.0064, "step": 20970 }, { "epoch": 0.6242839926799875, "grad_norm": 0.1813056915998459, "learning_rate": 9.648265274574711e-06, "loss": 0.0067, "step": 20980 }, { "epoch": 0.624581554163629, "grad_norm": 0.28888261318206787, "learning_rate": 9.647627169584796e-06, "loss": 0.0096, "step": 20990 }, { "epoch": 0.6248791156472706, "grad_norm": 0.2951388955116272, "learning_rate": 9.646988507445358e-06, "loss": 0.0054, "step": 21000 }, { "epoch": 0.6251766771309122, "grad_norm": 0.22177286446094513, "learning_rate": 9.646349288232959e-06, "loss": 0.0087, "step": 21010 }, { "epoch": 0.6254742386145538, "grad_norm": 0.4517826735973358, "learning_rate": 9.645709512024226e-06, "loss": 0.0105, "step": 21020 }, { "epoch": 0.6257718000981953, "grad_norm": 0.22815421223640442, "learning_rate": 9.645069178895856e-06, "loss": 0.0097, "step": 21030 }, { "epoch": 0.6260693615818368, "grad_norm": 0.12498200684785843, "learning_rate": 9.64442828892461e-06, "loss": 0.0073, "step": 21040 }, { "epoch": 0.6263669230654784, "grad_norm": 0.3343292474746704, "learning_rate": 9.643786842187318e-06, "loss": 0.0088, "step": 21050 }, { "epoch": 0.6266644845491199, "grad_norm": 0.3086986243724823, "learning_rate": 9.643144838760876e-06, "loss": 0.0069, "step": 21060 }, { "epoch": 0.6269620460327615, "grad_norm": 0.4040556848049164, "learning_rate": 9.642502278722243e-06, "loss": 0.0091, "step": 21070 }, { "epoch": 0.6272596075164031, "grad_norm": 0.63780277967453, "learning_rate": 9.64185916214845e-06, "loss": 0.0116, "step": 21080 }, { "epoch": 0.6275571690000447, "grad_norm": 0.12061174213886261, "learning_rate": 9.641215489116593e-06, "loss": 0.008, "step": 21090 }, { "epoch": 0.6278547304836862, "grad_norm": 0.18224942684173584, "learning_rate": 9.640571259703835e-06, "loss": 0.0087, "step": 21100 }, { "epoch": 0.6281522919673278, "grad_norm": 0.2348356544971466, "learning_rate": 9.639926473987405e-06, "loss": 0.0063, "step": 21110 }, { "epoch": 0.6284498534509693, "grad_norm": 0.13267946243286133, "learning_rate": 9.639281132044595e-06, "loss": 0.0063, "step": 21120 }, { "epoch": 0.6287474149346108, "grad_norm": 0.34961146116256714, "learning_rate": 9.638635233952774e-06, "loss": 0.0077, "step": 21130 }, { "epoch": 0.6290449764182524, "grad_norm": 0.2852359712123871, "learning_rate": 9.637988779789365e-06, "loss": 0.0081, "step": 21140 }, { "epoch": 0.629342537901894, "grad_norm": 0.44487065076828003, "learning_rate": 9.63734176963187e-06, "loss": 0.0078, "step": 21150 }, { "epoch": 0.6296400993855356, "grad_norm": 0.26479199528694153, "learning_rate": 9.636694203557844e-06, "loss": 0.0114, "step": 21160 }, { "epoch": 0.6299376608691771, "grad_norm": 0.24395358562469482, "learning_rate": 9.636046081644923e-06, "loss": 0.0088, "step": 21170 }, { "epoch": 0.6302352223528187, "grad_norm": 0.4134235978126526, "learning_rate": 9.635397403970798e-06, "loss": 0.0087, "step": 21180 }, { "epoch": 0.6305327838364602, "grad_norm": 0.19199387729167938, "learning_rate": 9.634748170613233e-06, "loss": 0.0071, "step": 21190 }, { "epoch": 0.6308303453201017, "grad_norm": 0.4637651741504669, "learning_rate": 9.634098381650059e-06, "loss": 0.008, "step": 21200 }, { "epoch": 0.6311279068037433, "grad_norm": 0.28808578848838806, "learning_rate": 9.633448037159167e-06, "loss": 0.007, "step": 21210 }, { "epoch": 0.6314254682873849, "grad_norm": 0.30716970562934875, "learning_rate": 9.632797137218525e-06, "loss": 0.0069, "step": 21220 }, { "epoch": 0.6317230297710265, "grad_norm": 0.23644445836544037, "learning_rate": 9.632145681906156e-06, "loss": 0.0076, "step": 21230 }, { "epoch": 0.632020591254668, "grad_norm": 0.5870102047920227, "learning_rate": 9.63149367130016e-06, "loss": 0.0106, "step": 21240 }, { "epoch": 0.6323181527383096, "grad_norm": 0.234112948179245, "learning_rate": 9.630841105478697e-06, "loss": 0.0083, "step": 21250 }, { "epoch": 0.6326157142219511, "grad_norm": 0.2912760078907013, "learning_rate": 9.630187984519994e-06, "loss": 0.01, "step": 21260 }, { "epoch": 0.6329132757055926, "grad_norm": 0.3197513222694397, "learning_rate": 9.629534308502347e-06, "loss": 0.0056, "step": 21270 }, { "epoch": 0.6332108371892342, "grad_norm": 0.28619375824928284, "learning_rate": 9.628880077504118e-06, "loss": 0.0103, "step": 21280 }, { "epoch": 0.6335083986728758, "grad_norm": 0.4211459457874298, "learning_rate": 9.628225291603737e-06, "loss": 0.0099, "step": 21290 }, { "epoch": 0.6338059601565174, "grad_norm": 0.43222931027412415, "learning_rate": 9.627569950879696e-06, "loss": 0.0116, "step": 21300 }, { "epoch": 0.6341035216401589, "grad_norm": 0.23865224421024323, "learning_rate": 9.626914055410555e-06, "loss": 0.0109, "step": 21310 }, { "epoch": 0.6344010831238005, "grad_norm": 0.45670321583747864, "learning_rate": 9.626257605274945e-06, "loss": 0.0113, "step": 21320 }, { "epoch": 0.634698644607442, "grad_norm": 0.4615451693534851, "learning_rate": 9.625600600551558e-06, "loss": 0.0086, "step": 21330 }, { "epoch": 0.6349962060910835, "grad_norm": 0.20511996746063232, "learning_rate": 9.624943041319154e-06, "loss": 0.0075, "step": 21340 }, { "epoch": 0.6352937675747251, "grad_norm": 0.6461846828460693, "learning_rate": 9.624284927656564e-06, "loss": 0.0091, "step": 21350 }, { "epoch": 0.6355913290583667, "grad_norm": 0.1326315850019455, "learning_rate": 9.623626259642676e-06, "loss": 0.0089, "step": 21360 }, { "epoch": 0.6358888905420083, "grad_norm": 0.3176882863044739, "learning_rate": 9.622967037356454e-06, "loss": 0.0079, "step": 21370 }, { "epoch": 0.6361864520256498, "grad_norm": 0.27960720658302307, "learning_rate": 9.622307260876922e-06, "loss": 0.0054, "step": 21380 }, { "epoch": 0.6364840135092914, "grad_norm": 0.5185760855674744, "learning_rate": 9.621646930283174e-06, "loss": 0.008, "step": 21390 }, { "epoch": 0.6367815749929329, "grad_norm": 0.24382399022579193, "learning_rate": 9.620986045654371e-06, "loss": 0.0068, "step": 21400 }, { "epoch": 0.6370791364765744, "grad_norm": 0.44401222467422485, "learning_rate": 9.620324607069735e-06, "loss": 0.0072, "step": 21410 }, { "epoch": 0.637376697960216, "grad_norm": 0.33774909377098083, "learning_rate": 9.61966261460856e-06, "loss": 0.0063, "step": 21420 }, { "epoch": 0.6376742594438576, "grad_norm": 0.5435693860054016, "learning_rate": 9.619000068350206e-06, "loss": 0.0065, "step": 21430 }, { "epoch": 0.6379718209274992, "grad_norm": 0.23031480610370636, "learning_rate": 9.618336968374096e-06, "loss": 0.0106, "step": 21440 }, { "epoch": 0.6382693824111407, "grad_norm": 0.36933112144470215, "learning_rate": 9.61767331475972e-06, "loss": 0.0065, "step": 21450 }, { "epoch": 0.6385669438947823, "grad_norm": 0.298225998878479, "learning_rate": 9.617009107586641e-06, "loss": 0.009, "step": 21460 }, { "epoch": 0.6388645053784238, "grad_norm": 0.30258864164352417, "learning_rate": 9.616344346934477e-06, "loss": 0.0137, "step": 21470 }, { "epoch": 0.6391620668620653, "grad_norm": 0.09598720818758011, "learning_rate": 9.615679032882921e-06, "loss": 0.0073, "step": 21480 }, { "epoch": 0.6394596283457069, "grad_norm": 0.4146534204483032, "learning_rate": 9.615013165511728e-06, "loss": 0.0075, "step": 21490 }, { "epoch": 0.6397571898293485, "grad_norm": 0.11309127509593964, "learning_rate": 9.614346744900726e-06, "loss": 0.0063, "step": 21500 }, { "epoch": 0.6400547513129901, "grad_norm": 0.1883792281150818, "learning_rate": 9.613679771129798e-06, "loss": 0.0042, "step": 21510 }, { "epoch": 0.6403523127966316, "grad_norm": 0.30848684906959534, "learning_rate": 9.613012244278903e-06, "loss": 0.0085, "step": 21520 }, { "epoch": 0.6406498742802732, "grad_norm": 0.5700306296348572, "learning_rate": 9.612344164428063e-06, "loss": 0.0124, "step": 21530 }, { "epoch": 0.6409474357639147, "grad_norm": 0.2251908928155899, "learning_rate": 9.611675531657365e-06, "loss": 0.0174, "step": 21540 }, { "epoch": 0.6412449972475562, "grad_norm": 0.357551246881485, "learning_rate": 9.611006346046965e-06, "loss": 0.0076, "step": 21550 }, { "epoch": 0.6415425587311978, "grad_norm": 0.457070529460907, "learning_rate": 9.610336607677085e-06, "loss": 0.0079, "step": 21560 }, { "epoch": 0.6418401202148394, "grad_norm": 0.3272172510623932, "learning_rate": 9.609666316628006e-06, "loss": 0.0083, "step": 21570 }, { "epoch": 0.642137681698481, "grad_norm": 0.10113327205181122, "learning_rate": 9.60899547298009e-06, "loss": 0.0068, "step": 21580 }, { "epoch": 0.6424352431821225, "grad_norm": 0.15780064463615417, "learning_rate": 9.608324076813748e-06, "loss": 0.0094, "step": 21590 }, { "epoch": 0.6427328046657641, "grad_norm": 0.05337774008512497, "learning_rate": 9.607652128209473e-06, "loss": 0.0075, "step": 21600 }, { "epoch": 0.6430303661494057, "grad_norm": 0.28100210428237915, "learning_rate": 9.606979627247812e-06, "loss": 0.0065, "step": 21610 }, { "epoch": 0.6433279276330471, "grad_norm": 0.3445228636264801, "learning_rate": 9.606306574009384e-06, "loss": 0.0069, "step": 21620 }, { "epoch": 0.6436254891166887, "grad_norm": 0.32166051864624023, "learning_rate": 9.605632968574878e-06, "loss": 0.0098, "step": 21630 }, { "epoch": 0.6439230506003303, "grad_norm": 0.14538532495498657, "learning_rate": 9.60495881102504e-06, "loss": 0.0056, "step": 21640 }, { "epoch": 0.6442206120839719, "grad_norm": 0.22736941277980804, "learning_rate": 9.604284101440688e-06, "loss": 0.0098, "step": 21650 }, { "epoch": 0.6445181735676134, "grad_norm": 0.32877472043037415, "learning_rate": 9.603608839902704e-06, "loss": 0.0146, "step": 21660 }, { "epoch": 0.644815735051255, "grad_norm": 0.2237085998058319, "learning_rate": 9.60293302649204e-06, "loss": 0.0067, "step": 21670 }, { "epoch": 0.6451132965348966, "grad_norm": 0.2241733968257904, "learning_rate": 9.602256661289709e-06, "loss": 0.0112, "step": 21680 }, { "epoch": 0.645410858018538, "grad_norm": 0.3213943839073181, "learning_rate": 9.601579744376793e-06, "loss": 0.0064, "step": 21690 }, { "epoch": 0.6457084195021796, "grad_norm": 0.3018629848957062, "learning_rate": 9.60090227583444e-06, "loss": 0.0069, "step": 21700 }, { "epoch": 0.6460059809858212, "grad_norm": 0.29098057746887207, "learning_rate": 9.600224255743864e-06, "loss": 0.0071, "step": 21710 }, { "epoch": 0.6463035424694628, "grad_norm": 0.25671830773353577, "learning_rate": 9.599545684186345e-06, "loss": 0.0073, "step": 21720 }, { "epoch": 0.6466011039531043, "grad_norm": 0.21436867117881775, "learning_rate": 9.598866561243227e-06, "loss": 0.0048, "step": 21730 }, { "epoch": 0.6468986654367459, "grad_norm": 0.4128243327140808, "learning_rate": 9.598186886995925e-06, "loss": 0.0094, "step": 21740 }, { "epoch": 0.6471962269203875, "grad_norm": 0.15078042447566986, "learning_rate": 9.597506661525915e-06, "loss": 0.0047, "step": 21750 }, { "epoch": 0.6474937884040289, "grad_norm": 0.3413553237915039, "learning_rate": 9.596825884914742e-06, "loss": 0.0087, "step": 21760 }, { "epoch": 0.6477913498876705, "grad_norm": 0.13259214162826538, "learning_rate": 9.596144557244016e-06, "loss": 0.0058, "step": 21770 }, { "epoch": 0.6480889113713121, "grad_norm": 0.4482950270175934, "learning_rate": 9.595462678595414e-06, "loss": 0.0081, "step": 21780 }, { "epoch": 0.6483864728549537, "grad_norm": 0.2383595108985901, "learning_rate": 9.594780249050679e-06, "loss": 0.006, "step": 21790 }, { "epoch": 0.6486840343385952, "grad_norm": 0.19863295555114746, "learning_rate": 9.594097268691616e-06, "loss": 0.0061, "step": 21800 }, { "epoch": 0.6489815958222368, "grad_norm": 0.16273027658462524, "learning_rate": 9.593413737600105e-06, "loss": 0.0082, "step": 21810 }, { "epoch": 0.6492791573058784, "grad_norm": 0.2969605326652527, "learning_rate": 9.592729655858082e-06, "loss": 0.0073, "step": 21820 }, { "epoch": 0.6495767187895198, "grad_norm": 0.4254649579524994, "learning_rate": 9.592045023547555e-06, "loss": 0.0093, "step": 21830 }, { "epoch": 0.6498742802731614, "grad_norm": 0.20889905095100403, "learning_rate": 9.591359840750598e-06, "loss": 0.0096, "step": 21840 }, { "epoch": 0.650171841756803, "grad_norm": 0.16703487932682037, "learning_rate": 9.590674107549347e-06, "loss": 0.0079, "step": 21850 }, { "epoch": 0.6504694032404446, "grad_norm": 0.20025762915611267, "learning_rate": 9.589987824026009e-06, "loss": 0.0053, "step": 21860 }, { "epoch": 0.6507669647240861, "grad_norm": 0.4702557921409607, "learning_rate": 9.589300990262852e-06, "loss": 0.0068, "step": 21870 }, { "epoch": 0.6510645262077277, "grad_norm": 0.421667218208313, "learning_rate": 9.588613606342214e-06, "loss": 0.0085, "step": 21880 }, { "epoch": 0.6513620876913693, "grad_norm": 1.1677082777023315, "learning_rate": 9.587925672346497e-06, "loss": 0.0171, "step": 21890 }, { "epoch": 0.6516596491750107, "grad_norm": 0.44909489154815674, "learning_rate": 9.587237188358169e-06, "loss": 0.0092, "step": 21900 }, { "epoch": 0.6519572106586523, "grad_norm": 0.19469541311264038, "learning_rate": 9.586548154459767e-06, "loss": 0.0087, "step": 21910 }, { "epoch": 0.6522547721422939, "grad_norm": 0.23165346682071686, "learning_rate": 9.585858570733887e-06, "loss": 0.006, "step": 21920 }, { "epoch": 0.6525523336259355, "grad_norm": 0.2730174660682678, "learning_rate": 9.585168437263197e-06, "loss": 0.0092, "step": 21930 }, { "epoch": 0.652849895109577, "grad_norm": 0.21853278577327728, "learning_rate": 9.58447775413043e-06, "loss": 0.0057, "step": 21940 }, { "epoch": 0.6531474565932186, "grad_norm": 0.6362423896789551, "learning_rate": 9.583786521418381e-06, "loss": 0.0069, "step": 21950 }, { "epoch": 0.6534450180768602, "grad_norm": 0.17269901931285858, "learning_rate": 9.583094739209917e-06, "loss": 0.0097, "step": 21960 }, { "epoch": 0.6537425795605016, "grad_norm": 0.37299844622612, "learning_rate": 9.582402407587967e-06, "loss": 0.0078, "step": 21970 }, { "epoch": 0.6540401410441432, "grad_norm": 0.1729481965303421, "learning_rate": 9.581709526635525e-06, "loss": 0.0062, "step": 21980 }, { "epoch": 0.6543377025277848, "grad_norm": 0.24562771618366241, "learning_rate": 9.581016096435654e-06, "loss": 0.006, "step": 21990 }, { "epoch": 0.6546352640114264, "grad_norm": 0.15976306796073914, "learning_rate": 9.580322117071479e-06, "loss": 0.0056, "step": 22000 }, { "epoch": 0.6549328254950679, "grad_norm": 0.353225439786911, "learning_rate": 9.579627588626195e-06, "loss": 0.0105, "step": 22010 }, { "epoch": 0.6552303869787095, "grad_norm": 0.14967751502990723, "learning_rate": 9.57893251118306e-06, "loss": 0.0069, "step": 22020 }, { "epoch": 0.6555279484623511, "grad_norm": 0.24114981293678284, "learning_rate": 9.5782368848254e-06, "loss": 0.008, "step": 22030 }, { "epoch": 0.6558255099459925, "grad_norm": 0.45362433791160583, "learning_rate": 9.577540709636604e-06, "loss": 0.0074, "step": 22040 }, { "epoch": 0.6561230714296341, "grad_norm": 0.15075178444385529, "learning_rate": 9.57684398570013e-06, "loss": 0.0071, "step": 22050 }, { "epoch": 0.6564206329132757, "grad_norm": 0.17812663316726685, "learning_rate": 9.576146713099498e-06, "loss": 0.01, "step": 22060 }, { "epoch": 0.6567181943969173, "grad_norm": 0.47693079710006714, "learning_rate": 9.575448891918298e-06, "loss": 0.0129, "step": 22070 }, { "epoch": 0.6570157558805588, "grad_norm": 0.40227851271629333, "learning_rate": 9.57475052224018e-06, "loss": 0.0072, "step": 22080 }, { "epoch": 0.6573133173642004, "grad_norm": 0.348410427570343, "learning_rate": 9.574051604148867e-06, "loss": 0.006, "step": 22090 }, { "epoch": 0.657610878847842, "grad_norm": 0.36138689517974854, "learning_rate": 9.573352137728143e-06, "loss": 0.0089, "step": 22100 }, { "epoch": 0.6579084403314835, "grad_norm": 0.312491238117218, "learning_rate": 9.572652123061857e-06, "loss": 0.0088, "step": 22110 }, { "epoch": 0.658206001815125, "grad_norm": 0.19053012132644653, "learning_rate": 9.571951560233927e-06, "loss": 0.0063, "step": 22120 }, { "epoch": 0.6585035632987666, "grad_norm": 0.4799821078777313, "learning_rate": 9.571250449328338e-06, "loss": 0.0074, "step": 22130 }, { "epoch": 0.6588011247824082, "grad_norm": 0.3039401173591614, "learning_rate": 9.570548790429134e-06, "loss": 0.008, "step": 22140 }, { "epoch": 0.6590986862660497, "grad_norm": 0.631786584854126, "learning_rate": 9.56984658362043e-06, "loss": 0.0112, "step": 22150 }, { "epoch": 0.6593962477496913, "grad_norm": 0.3115386962890625, "learning_rate": 9.569143828986404e-06, "loss": 0.0084, "step": 22160 }, { "epoch": 0.6596938092333329, "grad_norm": 0.39143404364585876, "learning_rate": 9.568440526611304e-06, "loss": 0.009, "step": 22170 }, { "epoch": 0.6599913707169744, "grad_norm": 0.22813495993614197, "learning_rate": 9.56773667657944e-06, "loss": 0.0058, "step": 22180 }, { "epoch": 0.6602889322006159, "grad_norm": 0.38943254947662354, "learning_rate": 9.567032278975186e-06, "loss": 0.0076, "step": 22190 }, { "epoch": 0.6605864936842575, "grad_norm": 0.22629569470882416, "learning_rate": 9.566327333882985e-06, "loss": 0.0096, "step": 22200 }, { "epoch": 0.660884055167899, "grad_norm": 0.3047274351119995, "learning_rate": 9.565621841387347e-06, "loss": 0.0067, "step": 22210 }, { "epoch": 0.6611816166515406, "grad_norm": 0.35689061880111694, "learning_rate": 9.564915801572841e-06, "loss": 0.0069, "step": 22220 }, { "epoch": 0.6614791781351822, "grad_norm": 0.3649801015853882, "learning_rate": 9.56420921452411e-06, "loss": 0.0077, "step": 22230 }, { "epoch": 0.6617767396188238, "grad_norm": 0.4313599169254303, "learning_rate": 9.563502080325853e-06, "loss": 0.0078, "step": 22240 }, { "epoch": 0.6620743011024653, "grad_norm": 0.2361501008272171, "learning_rate": 9.562794399062847e-06, "loss": 0.0085, "step": 22250 }, { "epoch": 0.6623718625861068, "grad_norm": 0.1943630576133728, "learning_rate": 9.562086170819923e-06, "loss": 0.0046, "step": 22260 }, { "epoch": 0.6626694240697484, "grad_norm": 0.20277877151966095, "learning_rate": 9.561377395681983e-06, "loss": 0.0083, "step": 22270 }, { "epoch": 0.66296698555339, "grad_norm": 0.2849730849266052, "learning_rate": 9.560668073733993e-06, "loss": 0.0174, "step": 22280 }, { "epoch": 0.6632645470370315, "grad_norm": 0.552423894405365, "learning_rate": 9.559958205060988e-06, "loss": 0.0159, "step": 22290 }, { "epoch": 0.6635621085206731, "grad_norm": 0.24630896747112274, "learning_rate": 9.559247789748063e-06, "loss": 0.0067, "step": 22300 }, { "epoch": 0.6638596700043147, "grad_norm": 0.3973037004470825, "learning_rate": 9.558536827880384e-06, "loss": 0.0089, "step": 22310 }, { "epoch": 0.6641572314879562, "grad_norm": 0.2766541838645935, "learning_rate": 9.557825319543176e-06, "loss": 0.0093, "step": 22320 }, { "epoch": 0.6644547929715977, "grad_norm": 0.19068224728107452, "learning_rate": 9.557113264821738e-06, "loss": 0.006, "step": 22330 }, { "epoch": 0.6647523544552393, "grad_norm": 0.6485040187835693, "learning_rate": 9.556400663801425e-06, "loss": 0.0099, "step": 22340 }, { "epoch": 0.6650499159388809, "grad_norm": 0.22854335606098175, "learning_rate": 9.55568751656767e-06, "loss": 0.0076, "step": 22350 }, { "epoch": 0.6653474774225224, "grad_norm": 0.2738165855407715, "learning_rate": 9.554973823205955e-06, "loss": 0.0128, "step": 22360 }, { "epoch": 0.665645038906164, "grad_norm": 0.21260246634483337, "learning_rate": 9.554259583801838e-06, "loss": 0.0065, "step": 22370 }, { "epoch": 0.6659426003898056, "grad_norm": 0.37643787264823914, "learning_rate": 9.553544798440949e-06, "loss": 0.009, "step": 22380 }, { "epoch": 0.6662401618734471, "grad_norm": 0.22013817727565765, "learning_rate": 9.552829467208964e-06, "loss": 0.0077, "step": 22390 }, { "epoch": 0.6665377233570886, "grad_norm": 0.233629509806633, "learning_rate": 9.552113590191645e-06, "loss": 0.0097, "step": 22400 }, { "epoch": 0.6668352848407302, "grad_norm": 0.46405789256095886, "learning_rate": 9.551397167474804e-06, "loss": 0.006, "step": 22410 }, { "epoch": 0.6671328463243718, "grad_norm": 0.17627671360969543, "learning_rate": 9.550680199144326e-06, "loss": 0.0077, "step": 22420 }, { "epoch": 0.6674304078080133, "grad_norm": 0.10101522505283356, "learning_rate": 9.549962685286161e-06, "loss": 0.0089, "step": 22430 }, { "epoch": 0.6677279692916549, "grad_norm": 0.17074647545814514, "learning_rate": 9.549244625986322e-06, "loss": 0.0082, "step": 22440 }, { "epoch": 0.6680255307752965, "grad_norm": 0.37613120675086975, "learning_rate": 9.548526021330891e-06, "loss": 0.0077, "step": 22450 }, { "epoch": 0.668323092258938, "grad_norm": 0.3334188759326935, "learning_rate": 9.547806871406012e-06, "loss": 0.0074, "step": 22460 }, { "epoch": 0.6686206537425795, "grad_norm": 0.42881113290786743, "learning_rate": 9.547087176297892e-06, "loss": 0.0106, "step": 22470 }, { "epoch": 0.6689182152262211, "grad_norm": 0.07939434051513672, "learning_rate": 9.546366936092813e-06, "loss": 0.0107, "step": 22480 }, { "epoch": 0.6692157767098627, "grad_norm": 0.276473730802536, "learning_rate": 9.54564615087711e-06, "loss": 0.0071, "step": 22490 }, { "epoch": 0.6695133381935042, "grad_norm": 0.4513223171234131, "learning_rate": 9.544924820737195e-06, "loss": 0.0095, "step": 22500 }, { "epoch": 0.6698108996771458, "grad_norm": 0.11915582418441772, "learning_rate": 9.544202945759538e-06, "loss": 0.0062, "step": 22510 }, { "epoch": 0.6701084611607874, "grad_norm": 0.08711552619934082, "learning_rate": 9.543480526030674e-06, "loss": 0.0051, "step": 22520 }, { "epoch": 0.670406022644429, "grad_norm": 0.11667946726083755, "learning_rate": 9.542757561637205e-06, "loss": 0.0068, "step": 22530 }, { "epoch": 0.6707035841280705, "grad_norm": 0.1809021234512329, "learning_rate": 9.542034052665802e-06, "loss": 0.0101, "step": 22540 }, { "epoch": 0.671001145611712, "grad_norm": 0.26264333724975586, "learning_rate": 9.541309999203198e-06, "loss": 0.006, "step": 22550 }, { "epoch": 0.6712987070953536, "grad_norm": 0.3822880685329437, "learning_rate": 9.54058540133619e-06, "loss": 0.0107, "step": 22560 }, { "epoch": 0.6715962685789951, "grad_norm": 0.35901138186454773, "learning_rate": 9.539860259151638e-06, "loss": 0.0107, "step": 22570 }, { "epoch": 0.6718938300626367, "grad_norm": 0.1708342730998993, "learning_rate": 9.539134572736477e-06, "loss": 0.0076, "step": 22580 }, { "epoch": 0.6721913915462783, "grad_norm": 0.29876214265823364, "learning_rate": 9.538408342177699e-06, "loss": 0.0085, "step": 22590 }, { "epoch": 0.6724889530299198, "grad_norm": 0.24748577177524567, "learning_rate": 9.53768156756236e-06, "loss": 0.007, "step": 22600 }, { "epoch": 0.6727865145135614, "grad_norm": 0.2364530712366104, "learning_rate": 9.536954248977587e-06, "loss": 0.0142, "step": 22610 }, { "epoch": 0.6730840759972029, "grad_norm": 0.18767204880714417, "learning_rate": 9.536226386510571e-06, "loss": 0.0147, "step": 22620 }, { "epoch": 0.6733816374808445, "grad_norm": 0.2735312283039093, "learning_rate": 9.535497980248567e-06, "loss": 0.0055, "step": 22630 }, { "epoch": 0.673679198964486, "grad_norm": 0.31246089935302734, "learning_rate": 9.534769030278893e-06, "loss": 0.0067, "step": 22640 }, { "epoch": 0.6739767604481276, "grad_norm": 0.3684634864330292, "learning_rate": 9.534039536688933e-06, "loss": 0.0112, "step": 22650 }, { "epoch": 0.6742743219317692, "grad_norm": 0.25751668214797974, "learning_rate": 9.53330949956614e-06, "loss": 0.0075, "step": 22660 }, { "epoch": 0.6745718834154107, "grad_norm": 0.11550670862197876, "learning_rate": 9.53257891899803e-06, "loss": 0.0078, "step": 22670 }, { "epoch": 0.6748694448990523, "grad_norm": 0.15926821529865265, "learning_rate": 9.53184779507218e-06, "loss": 0.0104, "step": 22680 }, { "epoch": 0.6751670063826938, "grad_norm": 0.4076858460903168, "learning_rate": 9.53111612787624e-06, "loss": 0.014, "step": 22690 }, { "epoch": 0.6754645678663354, "grad_norm": 0.3055570721626282, "learning_rate": 9.530383917497921e-06, "loss": 0.0082, "step": 22700 }, { "epoch": 0.6757621293499769, "grad_norm": 0.10764236003160477, "learning_rate": 9.529651164024997e-06, "loss": 0.0063, "step": 22710 }, { "epoch": 0.6760596908336185, "grad_norm": 0.27820727229118347, "learning_rate": 9.528917867545308e-06, "loss": 0.0058, "step": 22720 }, { "epoch": 0.6763572523172601, "grad_norm": 0.321627676486969, "learning_rate": 9.528184028146765e-06, "loss": 0.0072, "step": 22730 }, { "epoch": 0.6766548138009016, "grad_norm": 0.19322319328784943, "learning_rate": 9.527449645917334e-06, "loss": 0.0078, "step": 22740 }, { "epoch": 0.6769523752845432, "grad_norm": 0.23644495010375977, "learning_rate": 9.526714720945057e-06, "loss": 0.0066, "step": 22750 }, { "epoch": 0.6772499367681847, "grad_norm": 0.28218379616737366, "learning_rate": 9.525979253318031e-06, "loss": 0.0061, "step": 22760 }, { "epoch": 0.6775474982518263, "grad_norm": 0.3029440939426422, "learning_rate": 9.525243243124425e-06, "loss": 0.0061, "step": 22770 }, { "epoch": 0.6778450597354678, "grad_norm": 0.36275559663772583, "learning_rate": 9.52450669045247e-06, "loss": 0.0112, "step": 22780 }, { "epoch": 0.6781426212191094, "grad_norm": 0.2032415121793747, "learning_rate": 9.52376959539046e-06, "loss": 0.0052, "step": 22790 }, { "epoch": 0.678440182702751, "grad_norm": 0.43924838304519653, "learning_rate": 9.52303195802676e-06, "loss": 0.0082, "step": 22800 }, { "epoch": 0.6787377441863925, "grad_norm": 0.2728802263736725, "learning_rate": 9.522293778449799e-06, "loss": 0.0071, "step": 22810 }, { "epoch": 0.6790353056700341, "grad_norm": 0.33144205808639526, "learning_rate": 9.521555056748062e-06, "loss": 0.0084, "step": 22820 }, { "epoch": 0.6793328671536756, "grad_norm": 0.21254205703735352, "learning_rate": 9.52081579301011e-06, "loss": 0.0108, "step": 22830 }, { "epoch": 0.6796304286373172, "grad_norm": 0.22322341799736023, "learning_rate": 9.520075987324565e-06, "loss": 0.0079, "step": 22840 }, { "epoch": 0.6799279901209587, "grad_norm": 0.3435145914554596, "learning_rate": 9.519335639780112e-06, "loss": 0.0071, "step": 22850 }, { "epoch": 0.6802255516046003, "grad_norm": 0.20865249633789062, "learning_rate": 9.518594750465504e-06, "loss": 0.0084, "step": 22860 }, { "epoch": 0.6805231130882419, "grad_norm": 0.16214802861213684, "learning_rate": 9.517853319469556e-06, "loss": 0.0095, "step": 22870 }, { "epoch": 0.6808206745718834, "grad_norm": 0.28085339069366455, "learning_rate": 9.51711134688115e-06, "loss": 0.009, "step": 22880 }, { "epoch": 0.681118236055525, "grad_norm": 0.29939794540405273, "learning_rate": 9.516368832789231e-06, "loss": 0.0112, "step": 22890 }, { "epoch": 0.6814157975391665, "grad_norm": 0.18707236647605896, "learning_rate": 9.515625777282814e-06, "loss": 0.0054, "step": 22900 }, { "epoch": 0.681713359022808, "grad_norm": 0.2615492045879364, "learning_rate": 9.514882180450973e-06, "loss": 0.0108, "step": 22910 }, { "epoch": 0.6820109205064496, "grad_norm": 0.4332752823829651, "learning_rate": 9.514138042382849e-06, "loss": 0.0078, "step": 22920 }, { "epoch": 0.6823084819900912, "grad_norm": 0.15594765543937683, "learning_rate": 9.513393363167646e-06, "loss": 0.005, "step": 22930 }, { "epoch": 0.6826060434737328, "grad_norm": 0.4482266902923584, "learning_rate": 9.51264814289464e-06, "loss": 0.0101, "step": 22940 }, { "epoch": 0.6829036049573743, "grad_norm": 0.2924148142337799, "learning_rate": 9.511902381653163e-06, "loss": 0.0074, "step": 22950 }, { "epoch": 0.6832011664410159, "grad_norm": 0.3962571620941162, "learning_rate": 9.511156079532615e-06, "loss": 0.0073, "step": 22960 }, { "epoch": 0.6834987279246574, "grad_norm": 0.9830182790756226, "learning_rate": 9.510409236622463e-06, "loss": 0.0116, "step": 22970 }, { "epoch": 0.683796289408299, "grad_norm": 0.4424169361591339, "learning_rate": 9.509661853012237e-06, "loss": 0.0091, "step": 22980 }, { "epoch": 0.6840938508919405, "grad_norm": 0.35413146018981934, "learning_rate": 9.508913928791532e-06, "loss": 0.0075, "step": 22990 }, { "epoch": 0.6843914123755821, "grad_norm": 0.23701056838035583, "learning_rate": 9.508165464050008e-06, "loss": 0.0047, "step": 23000 }, { "epoch": 0.6846889738592237, "grad_norm": 0.2571433186531067, "learning_rate": 9.50741645887739e-06, "loss": 0.0123, "step": 23010 }, { "epoch": 0.6849865353428652, "grad_norm": 0.17078490555286407, "learning_rate": 9.506666913363467e-06, "loss": 0.0148, "step": 23020 }, { "epoch": 0.6852840968265068, "grad_norm": 0.19799865782260895, "learning_rate": 9.505916827598093e-06, "loss": 0.0082, "step": 23030 }, { "epoch": 0.6855816583101484, "grad_norm": 0.15861642360687256, "learning_rate": 9.505166201671188e-06, "loss": 0.0061, "step": 23040 }, { "epoch": 0.6858792197937899, "grad_norm": 0.3790207803249359, "learning_rate": 9.504415035672734e-06, "loss": 0.0079, "step": 23050 }, { "epoch": 0.6861767812774314, "grad_norm": 0.2717900276184082, "learning_rate": 9.503663329692781e-06, "loss": 0.0084, "step": 23060 }, { "epoch": 0.686474342761073, "grad_norm": 0.25889715552330017, "learning_rate": 9.50291108382144e-06, "loss": 0.0147, "step": 23070 }, { "epoch": 0.6867719042447146, "grad_norm": 0.3902670443058014, "learning_rate": 9.502158298148891e-06, "loss": 0.0093, "step": 23080 }, { "epoch": 0.6870694657283561, "grad_norm": 0.581488311290741, "learning_rate": 9.501404972765377e-06, "loss": 0.0054, "step": 23090 }, { "epoch": 0.6873670272119977, "grad_norm": 0.15847676992416382, "learning_rate": 9.500651107761203e-06, "loss": 0.0071, "step": 23100 }, { "epoch": 0.6876645886956393, "grad_norm": 0.3761844336986542, "learning_rate": 9.499896703226746e-06, "loss": 0.0075, "step": 23110 }, { "epoch": 0.6879621501792808, "grad_norm": 0.4010442793369293, "learning_rate": 9.499141759252437e-06, "loss": 0.0101, "step": 23120 }, { "epoch": 0.6882597116629223, "grad_norm": 0.28095513582229614, "learning_rate": 9.498386275928779e-06, "loss": 0.008, "step": 23130 }, { "epoch": 0.6885572731465639, "grad_norm": 0.33596283197402954, "learning_rate": 9.497630253346339e-06, "loss": 0.0126, "step": 23140 }, { "epoch": 0.6888548346302055, "grad_norm": 0.15468545258045197, "learning_rate": 9.496873691595749e-06, "loss": 0.0098, "step": 23150 }, { "epoch": 0.689152396113847, "grad_norm": 0.57514488697052, "learning_rate": 9.4961165907677e-06, "loss": 0.0135, "step": 23160 }, { "epoch": 0.6894499575974886, "grad_norm": 0.3120264708995819, "learning_rate": 9.495358950952957e-06, "loss": 0.007, "step": 23170 }, { "epoch": 0.6897475190811302, "grad_norm": 0.08490779995918274, "learning_rate": 9.494600772242342e-06, "loss": 0.01, "step": 23180 }, { "epoch": 0.6900450805647717, "grad_norm": 0.18416127562522888, "learning_rate": 9.493842054726743e-06, "loss": 0.0065, "step": 23190 }, { "epoch": 0.6903426420484132, "grad_norm": 0.32382795214653015, "learning_rate": 9.493082798497117e-06, "loss": 0.0085, "step": 23200 }, { "epoch": 0.6906402035320548, "grad_norm": 0.38022366166114807, "learning_rate": 9.49232300364448e-06, "loss": 0.0105, "step": 23210 }, { "epoch": 0.6909377650156964, "grad_norm": 0.24550127983093262, "learning_rate": 9.491562670259916e-06, "loss": 0.0095, "step": 23220 }, { "epoch": 0.6912353264993379, "grad_norm": 0.308030903339386, "learning_rate": 9.49080179843457e-06, "loss": 0.0086, "step": 23230 }, { "epoch": 0.6915328879829795, "grad_norm": 0.5341984629631042, "learning_rate": 9.490040388259656e-06, "loss": 0.0068, "step": 23240 }, { "epoch": 0.6918304494666211, "grad_norm": 0.13028036057949066, "learning_rate": 9.489278439826451e-06, "loss": 0.01, "step": 23250 }, { "epoch": 0.6921280109502626, "grad_norm": 0.5514881014823914, "learning_rate": 9.488515953226295e-06, "loss": 0.0106, "step": 23260 }, { "epoch": 0.6924255724339041, "grad_norm": 0.18416330218315125, "learning_rate": 9.487752928550593e-06, "loss": 0.0088, "step": 23270 }, { "epoch": 0.6927231339175457, "grad_norm": 0.25475823879241943, "learning_rate": 9.486989365890817e-06, "loss": 0.0072, "step": 23280 }, { "epoch": 0.6930206954011873, "grad_norm": 0.36381611227989197, "learning_rate": 9.4862252653385e-06, "loss": 0.0097, "step": 23290 }, { "epoch": 0.6933182568848288, "grad_norm": 0.25674140453338623, "learning_rate": 9.485460626985243e-06, "loss": 0.0053, "step": 23300 }, { "epoch": 0.6936158183684704, "grad_norm": 0.4083678424358368, "learning_rate": 9.484695450922708e-06, "loss": 0.0113, "step": 23310 }, { "epoch": 0.693913379852112, "grad_norm": 0.2804156541824341, "learning_rate": 9.483929737242624e-06, "loss": 0.0054, "step": 23320 }, { "epoch": 0.6942109413357535, "grad_norm": 0.2860485911369324, "learning_rate": 9.48316348603678e-06, "loss": 0.007, "step": 23330 }, { "epoch": 0.694508502819395, "grad_norm": 0.2607080340385437, "learning_rate": 9.482396697397037e-06, "loss": 0.0112, "step": 23340 }, { "epoch": 0.6948060643030366, "grad_norm": 0.23199696838855743, "learning_rate": 9.481629371415315e-06, "loss": 0.0064, "step": 23350 }, { "epoch": 0.6951036257866782, "grad_norm": 0.5124818086624146, "learning_rate": 9.4808615081836e-06, "loss": 0.0117, "step": 23360 }, { "epoch": 0.6954011872703197, "grad_norm": 0.21213658154010773, "learning_rate": 9.48009310779394e-06, "loss": 0.0099, "step": 23370 }, { "epoch": 0.6956987487539613, "grad_norm": 0.28792840242385864, "learning_rate": 9.479324170338451e-06, "loss": 0.0068, "step": 23380 }, { "epoch": 0.6959963102376029, "grad_norm": 0.1737380474805832, "learning_rate": 9.478554695909312e-06, "loss": 0.0051, "step": 23390 }, { "epoch": 0.6962938717212444, "grad_norm": 0.35679444670677185, "learning_rate": 9.477784684598768e-06, "loss": 0.0068, "step": 23400 }, { "epoch": 0.6965914332048859, "grad_norm": 0.12451303005218506, "learning_rate": 9.477014136499125e-06, "loss": 0.0102, "step": 23410 }, { "epoch": 0.6968889946885275, "grad_norm": 0.39306938648223877, "learning_rate": 9.476243051702754e-06, "loss": 0.0097, "step": 23420 }, { "epoch": 0.6971865561721691, "grad_norm": 0.29731640219688416, "learning_rate": 9.475471430302092e-06, "loss": 0.008, "step": 23430 }, { "epoch": 0.6974841176558106, "grad_norm": 0.3225642442703247, "learning_rate": 9.474699272389641e-06, "loss": 0.009, "step": 23440 }, { "epoch": 0.6977816791394522, "grad_norm": 0.16904444992542267, "learning_rate": 9.473926578057962e-06, "loss": 0.0088, "step": 23450 }, { "epoch": 0.6980792406230938, "grad_norm": 0.24002428352832794, "learning_rate": 9.47315334739969e-06, "loss": 0.0086, "step": 23460 }, { "epoch": 0.6983768021067353, "grad_norm": 0.18784688413143158, "learning_rate": 9.472379580507514e-06, "loss": 0.0067, "step": 23470 }, { "epoch": 0.6986743635903768, "grad_norm": 0.33852848410606384, "learning_rate": 9.471605277474193e-06, "loss": 0.007, "step": 23480 }, { "epoch": 0.6989719250740184, "grad_norm": 0.36606618762016296, "learning_rate": 9.470830438392552e-06, "loss": 0.009, "step": 23490 }, { "epoch": 0.69926948655766, "grad_norm": 0.1967165619134903, "learning_rate": 9.470055063355474e-06, "loss": 0.0085, "step": 23500 }, { "epoch": 0.6995670480413015, "grad_norm": 0.3895793557167053, "learning_rate": 9.46927915245591e-06, "loss": 0.0068, "step": 23510 }, { "epoch": 0.6998646095249431, "grad_norm": 0.3596702814102173, "learning_rate": 9.468502705786876e-06, "loss": 0.0064, "step": 23520 }, { "epoch": 0.7001621710085847, "grad_norm": 0.9739412069320679, "learning_rate": 9.46772572344145e-06, "loss": 0.0116, "step": 23530 }, { "epoch": 0.7004597324922263, "grad_norm": 0.2924022376537323, "learning_rate": 9.466948205512775e-06, "loss": 0.0052, "step": 23540 }, { "epoch": 0.7007572939758677, "grad_norm": 0.2599669098854065, "learning_rate": 9.466170152094062e-06, "loss": 0.0081, "step": 23550 }, { "epoch": 0.7010548554595093, "grad_norm": 0.7740808725357056, "learning_rate": 9.465391563278578e-06, "loss": 0.009, "step": 23560 }, { "epoch": 0.7013524169431509, "grad_norm": 0.3148338198661804, "learning_rate": 9.464612439159661e-06, "loss": 0.0096, "step": 23570 }, { "epoch": 0.7016499784267924, "grad_norm": 0.2855739891529083, "learning_rate": 9.46383277983071e-06, "loss": 0.0079, "step": 23580 }, { "epoch": 0.701947539910434, "grad_norm": 0.3664199709892273, "learning_rate": 9.463052585385193e-06, "loss": 0.0078, "step": 23590 }, { "epoch": 0.7022451013940756, "grad_norm": 0.13584080338478088, "learning_rate": 9.462271855916634e-06, "loss": 0.0071, "step": 23600 }, { "epoch": 0.7025426628777172, "grad_norm": 0.2758374512195587, "learning_rate": 9.461490591518627e-06, "loss": 0.0074, "step": 23610 }, { "epoch": 0.7028402243613586, "grad_norm": 0.23744319379329681, "learning_rate": 9.460708792284828e-06, "loss": 0.0061, "step": 23620 }, { "epoch": 0.7031377858450002, "grad_norm": 0.17862553894519806, "learning_rate": 9.459926458308959e-06, "loss": 0.0067, "step": 23630 }, { "epoch": 0.7034353473286418, "grad_norm": 0.2599029541015625, "learning_rate": 9.459143589684805e-06, "loss": 0.0048, "step": 23640 }, { "epoch": 0.7037329088122833, "grad_norm": 0.17413091659545898, "learning_rate": 9.458360186506212e-06, "loss": 0.0055, "step": 23650 }, { "epoch": 0.7040304702959249, "grad_norm": 0.23669977486133575, "learning_rate": 9.457576248867098e-06, "loss": 0.0077, "step": 23660 }, { "epoch": 0.7043280317795665, "grad_norm": 0.7091428637504578, "learning_rate": 9.456791776861436e-06, "loss": 0.0095, "step": 23670 }, { "epoch": 0.7046255932632081, "grad_norm": 0.4380301237106323, "learning_rate": 9.456006770583267e-06, "loss": 0.0069, "step": 23680 }, { "epoch": 0.7049231547468495, "grad_norm": 0.28348055481910706, "learning_rate": 9.455221230126702e-06, "loss": 0.0073, "step": 23690 }, { "epoch": 0.7052207162304911, "grad_norm": 0.16162382066249847, "learning_rate": 9.454435155585901e-06, "loss": 0.0081, "step": 23700 }, { "epoch": 0.7055182777141327, "grad_norm": 0.489422082901001, "learning_rate": 9.453648547055106e-06, "loss": 0.0079, "step": 23710 }, { "epoch": 0.7058158391977742, "grad_norm": 0.3616507649421692, "learning_rate": 9.452861404628608e-06, "loss": 0.0114, "step": 23720 }, { "epoch": 0.7061134006814158, "grad_norm": 0.5985490083694458, "learning_rate": 9.452073728400772e-06, "loss": 0.0076, "step": 23730 }, { "epoch": 0.7064109621650574, "grad_norm": 0.33024805784225464, "learning_rate": 9.451285518466022e-06, "loss": 0.0064, "step": 23740 }, { "epoch": 0.706708523648699, "grad_norm": 0.3317938446998596, "learning_rate": 9.450496774918845e-06, "loss": 0.0085, "step": 23750 }, { "epoch": 0.7070060851323404, "grad_norm": 0.4753468632698059, "learning_rate": 9.449707497853799e-06, "loss": 0.0091, "step": 23760 }, { "epoch": 0.707303646615982, "grad_norm": 0.1300479918718338, "learning_rate": 9.448917687365497e-06, "loss": 0.0065, "step": 23770 }, { "epoch": 0.7076012080996236, "grad_norm": 1.4134365320205688, "learning_rate": 9.448127343548622e-06, "loss": 0.0144, "step": 23780 }, { "epoch": 0.7078987695832651, "grad_norm": 0.22941601276397705, "learning_rate": 9.44733646649792e-06, "loss": 0.0056, "step": 23790 }, { "epoch": 0.7081963310669067, "grad_norm": 0.5055261254310608, "learning_rate": 9.446545056308199e-06, "loss": 0.0098, "step": 23800 }, { "epoch": 0.7084938925505483, "grad_norm": 0.2454095482826233, "learning_rate": 9.44575311307433e-06, "loss": 0.0045, "step": 23810 }, { "epoch": 0.7087914540341899, "grad_norm": 0.20497505366802216, "learning_rate": 9.444960636891252e-06, "loss": 0.0078, "step": 23820 }, { "epoch": 0.7090890155178313, "grad_norm": 0.39716285467147827, "learning_rate": 9.444167627853965e-06, "loss": 0.0104, "step": 23830 }, { "epoch": 0.7093865770014729, "grad_norm": 0.3986462354660034, "learning_rate": 9.443374086057533e-06, "loss": 0.0104, "step": 23840 }, { "epoch": 0.7096841384851145, "grad_norm": 0.3229111135005951, "learning_rate": 9.442580011597088e-06, "loss": 0.0067, "step": 23850 }, { "epoch": 0.709981699968756, "grad_norm": 0.35461294651031494, "learning_rate": 9.441785404567815e-06, "loss": 0.0061, "step": 23860 }, { "epoch": 0.7102792614523976, "grad_norm": 0.21694792807102203, "learning_rate": 9.440990265064977e-06, "loss": 0.01, "step": 23870 }, { "epoch": 0.7105768229360392, "grad_norm": 0.26987549662590027, "learning_rate": 9.440194593183892e-06, "loss": 0.0088, "step": 23880 }, { "epoch": 0.7108743844196808, "grad_norm": 0.2382144331932068, "learning_rate": 9.439398389019943e-06, "loss": 0.0081, "step": 23890 }, { "epoch": 0.7111719459033222, "grad_norm": 0.2915726900100708, "learning_rate": 9.438601652668577e-06, "loss": 0.0108, "step": 23900 }, { "epoch": 0.7114695073869638, "grad_norm": 0.18088024854660034, "learning_rate": 9.437804384225306e-06, "loss": 0.0052, "step": 23910 }, { "epoch": 0.7117670688706054, "grad_norm": 0.3602524399757385, "learning_rate": 9.437006583785706e-06, "loss": 0.0115, "step": 23920 }, { "epoch": 0.7120646303542469, "grad_norm": 0.3232496678829193, "learning_rate": 9.436208251445415e-06, "loss": 0.0078, "step": 23930 }, { "epoch": 0.7123621918378885, "grad_norm": 0.2802635431289673, "learning_rate": 9.435409387300136e-06, "loss": 0.0055, "step": 23940 }, { "epoch": 0.7126597533215301, "grad_norm": 0.18389847874641418, "learning_rate": 9.434609991445635e-06, "loss": 0.0054, "step": 23950 }, { "epoch": 0.7129573148051717, "grad_norm": 1.0408180952072144, "learning_rate": 9.433810063977742e-06, "loss": 0.0052, "step": 23960 }, { "epoch": 0.7132548762888131, "grad_norm": 0.25499075651168823, "learning_rate": 9.433009604992353e-06, "loss": 0.0084, "step": 23970 }, { "epoch": 0.7135524377724547, "grad_norm": 0.2884058356285095, "learning_rate": 9.432208614585424e-06, "loss": 0.0069, "step": 23980 }, { "epoch": 0.7138499992560963, "grad_norm": 0.39327412843704224, "learning_rate": 9.431407092852975e-06, "loss": 0.0077, "step": 23990 }, { "epoch": 0.7141475607397378, "grad_norm": 0.2176707237958908, "learning_rate": 9.430605039891094e-06, "loss": 0.0072, "step": 24000 }, { "epoch": 0.7144451222233794, "grad_norm": 0.24325436353683472, "learning_rate": 9.429802455795928e-06, "loss": 0.0087, "step": 24010 }, { "epoch": 0.714742683707021, "grad_norm": 0.27710163593292236, "learning_rate": 9.42899934066369e-06, "loss": 0.0069, "step": 24020 }, { "epoch": 0.7150402451906626, "grad_norm": 0.14953969419002533, "learning_rate": 9.428195694590654e-06, "loss": 0.0076, "step": 24030 }, { "epoch": 0.7153378066743041, "grad_norm": 0.3801277279853821, "learning_rate": 9.427391517673164e-06, "loss": 0.0075, "step": 24040 }, { "epoch": 0.7156353681579456, "grad_norm": 0.23560824990272522, "learning_rate": 9.426586810007621e-06, "loss": 0.0066, "step": 24050 }, { "epoch": 0.7159329296415872, "grad_norm": 0.39100053906440735, "learning_rate": 9.425781571690488e-06, "loss": 0.0111, "step": 24060 }, { "epoch": 0.7162304911252287, "grad_norm": 0.3030640184879303, "learning_rate": 9.424975802818304e-06, "loss": 0.0061, "step": 24070 }, { "epoch": 0.7165280526088703, "grad_norm": 0.2758542001247406, "learning_rate": 9.424169503487655e-06, "loss": 0.0068, "step": 24080 }, { "epoch": 0.7168256140925119, "grad_norm": 0.3129606246948242, "learning_rate": 9.423362673795204e-06, "loss": 0.01, "step": 24090 }, { "epoch": 0.7171231755761535, "grad_norm": 0.031234605237841606, "learning_rate": 9.422555313837671e-06, "loss": 0.0065, "step": 24100 }, { "epoch": 0.717420737059795, "grad_norm": 0.28471535444259644, "learning_rate": 9.421747423711839e-06, "loss": 0.0095, "step": 24110 }, { "epoch": 0.7177182985434365, "grad_norm": 0.3587903082370758, "learning_rate": 9.420939003514558e-06, "loss": 0.0071, "step": 24120 }, { "epoch": 0.7180158600270781, "grad_norm": 38.08343505859375, "learning_rate": 9.420130053342742e-06, "loss": 0.0668, "step": 24130 }, { "epoch": 0.7183134215107196, "grad_norm": 0.18899226188659668, "learning_rate": 9.419320573293364e-06, "loss": 0.0075, "step": 24140 }, { "epoch": 0.7186109829943612, "grad_norm": 0.1534000039100647, "learning_rate": 9.418510563463463e-06, "loss": 0.0072, "step": 24150 }, { "epoch": 0.7189085444780028, "grad_norm": 0.2289264053106308, "learning_rate": 9.417700023950143e-06, "loss": 0.0161, "step": 24160 }, { "epoch": 0.7192061059616444, "grad_norm": 0.12942136824131012, "learning_rate": 9.416888954850567e-06, "loss": 0.0087, "step": 24170 }, { "epoch": 0.7195036674452859, "grad_norm": 0.4599412977695465, "learning_rate": 9.41607735626197e-06, "loss": 0.0067, "step": 24180 }, { "epoch": 0.7198012289289274, "grad_norm": 0.5763114094734192, "learning_rate": 9.415265228281641e-06, "loss": 0.0106, "step": 24190 }, { "epoch": 0.720098790412569, "grad_norm": 0.24529683589935303, "learning_rate": 9.414452571006936e-06, "loss": 0.0109, "step": 24200 }, { "epoch": 0.7203963518962105, "grad_norm": 0.12278494238853455, "learning_rate": 9.413639384535278e-06, "loss": 0.0078, "step": 24210 }, { "epoch": 0.7206939133798521, "grad_norm": 0.24178126454353333, "learning_rate": 9.412825668964147e-06, "loss": 0.0091, "step": 24220 }, { "epoch": 0.7209914748634937, "grad_norm": 0.45152223110198975, "learning_rate": 9.412011424391094e-06, "loss": 0.0095, "step": 24230 }, { "epoch": 0.7212890363471353, "grad_norm": 0.22631224989891052, "learning_rate": 9.411196650913725e-06, "loss": 0.0075, "step": 24240 }, { "epoch": 0.7215865978307768, "grad_norm": 0.28993546962738037, "learning_rate": 9.410381348629715e-06, "loss": 0.0081, "step": 24250 }, { "epoch": 0.7218841593144183, "grad_norm": 0.19085030257701874, "learning_rate": 9.409565517636803e-06, "loss": 0.0056, "step": 24260 }, { "epoch": 0.7221817207980599, "grad_norm": 0.13904395699501038, "learning_rate": 9.408749158032788e-06, "loss": 0.0055, "step": 24270 }, { "epoch": 0.7224792822817014, "grad_norm": 0.25390148162841797, "learning_rate": 9.407932269915534e-06, "loss": 0.0105, "step": 24280 }, { "epoch": 0.722776843765343, "grad_norm": 0.23847977817058563, "learning_rate": 9.407114853382965e-06, "loss": 0.0083, "step": 24290 }, { "epoch": 0.7230744052489846, "grad_norm": 0.24267135560512543, "learning_rate": 9.406296908533076e-06, "loss": 0.0061, "step": 24300 }, { "epoch": 0.7233719667326262, "grad_norm": 0.28238075971603394, "learning_rate": 9.40547843546392e-06, "loss": 0.0083, "step": 24310 }, { "epoch": 0.7236695282162677, "grad_norm": 0.15659262239933014, "learning_rate": 9.404659434273614e-06, "loss": 0.0069, "step": 24320 }, { "epoch": 0.7239670896999092, "grad_norm": 0.25619012117385864, "learning_rate": 9.403839905060335e-06, "loss": 0.0073, "step": 24330 }, { "epoch": 0.7242646511835508, "grad_norm": 0.18978920578956604, "learning_rate": 9.403019847922332e-06, "loss": 0.0056, "step": 24340 }, { "epoch": 0.7245622126671923, "grad_norm": 0.24145711958408356, "learning_rate": 9.402199262957907e-06, "loss": 0.0083, "step": 24350 }, { "epoch": 0.7248597741508339, "grad_norm": 0.21738329529762268, "learning_rate": 9.401378150265435e-06, "loss": 0.0103, "step": 24360 }, { "epoch": 0.7251573356344755, "grad_norm": 0.23770861327648163, "learning_rate": 9.400556509943346e-06, "loss": 0.005, "step": 24370 }, { "epoch": 0.7254548971181171, "grad_norm": 0.10693898797035217, "learning_rate": 9.399734342090138e-06, "loss": 0.0074, "step": 24380 }, { "epoch": 0.7257524586017586, "grad_norm": 0.13548429310321808, "learning_rate": 9.398911646804375e-06, "loss": 0.0095, "step": 24390 }, { "epoch": 0.7260500200854001, "grad_norm": 0.20679333806037903, "learning_rate": 9.398088424184672e-06, "loss": 0.007, "step": 24400 }, { "epoch": 0.7263475815690417, "grad_norm": 0.1717689484357834, "learning_rate": 9.397264674329723e-06, "loss": 0.01, "step": 24410 }, { "epoch": 0.7266451430526832, "grad_norm": 0.2951616048812866, "learning_rate": 9.396440397338273e-06, "loss": 0.0081, "step": 24420 }, { "epoch": 0.7269427045363248, "grad_norm": 0.16658838093280792, "learning_rate": 9.395615593309137e-06, "loss": 0.0067, "step": 24430 }, { "epoch": 0.7272402660199664, "grad_norm": 0.2346036583185196, "learning_rate": 9.39479026234119e-06, "loss": 0.0089, "step": 24440 }, { "epoch": 0.727537827503608, "grad_norm": 0.32301050424575806, "learning_rate": 9.393964404533373e-06, "loss": 0.0056, "step": 24450 }, { "epoch": 0.7278353889872495, "grad_norm": 0.1629035621881485, "learning_rate": 9.393138019984686e-06, "loss": 0.0088, "step": 24460 }, { "epoch": 0.728132950470891, "grad_norm": 0.2092481404542923, "learning_rate": 9.3923111087942e-06, "loss": 0.0061, "step": 24470 }, { "epoch": 0.7284305119545326, "grad_norm": 0.5980663299560547, "learning_rate": 9.391483671061034e-06, "loss": 0.0104, "step": 24480 }, { "epoch": 0.7287280734381741, "grad_norm": 0.14121386408805847, "learning_rate": 9.39065570688439e-06, "loss": 0.0052, "step": 24490 }, { "epoch": 0.7290256349218157, "grad_norm": 0.21837611496448517, "learning_rate": 9.389827216363516e-06, "loss": 0.0081, "step": 24500 }, { "epoch": 0.7293231964054573, "grad_norm": 0.2847747802734375, "learning_rate": 9.388998199597733e-06, "loss": 0.0068, "step": 24510 }, { "epoch": 0.7296207578890989, "grad_norm": 0.4877472519874573, "learning_rate": 9.388168656686421e-06, "loss": 0.0121, "step": 24520 }, { "epoch": 0.7299183193727404, "grad_norm": 0.3271788954734802, "learning_rate": 9.387338587729026e-06, "loss": 0.0063, "step": 24530 }, { "epoch": 0.730215880856382, "grad_norm": 0.5421158671379089, "learning_rate": 9.386507992825054e-06, "loss": 0.0101, "step": 24540 }, { "epoch": 0.7305134423400235, "grad_norm": 0.24483659863471985, "learning_rate": 9.385676872074074e-06, "loss": 0.0065, "step": 24550 }, { "epoch": 0.730811003823665, "grad_norm": 0.2186552882194519, "learning_rate": 9.384845225575721e-06, "loss": 0.0076, "step": 24560 }, { "epoch": 0.7311085653073066, "grad_norm": 0.13936612010002136, "learning_rate": 9.38401305342969e-06, "loss": 0.0097, "step": 24570 }, { "epoch": 0.7314061267909482, "grad_norm": 0.15103520452976227, "learning_rate": 9.38318035573574e-06, "loss": 0.0099, "step": 24580 }, { "epoch": 0.7317036882745898, "grad_norm": 0.3790859580039978, "learning_rate": 9.3823471325937e-06, "loss": 0.0077, "step": 24590 }, { "epoch": 0.7320012497582313, "grad_norm": 0.1932523101568222, "learning_rate": 9.381513384103445e-06, "loss": 0.0088, "step": 24600 }, { "epoch": 0.7322988112418729, "grad_norm": 0.3599061667919159, "learning_rate": 9.38067911036493e-06, "loss": 0.0091, "step": 24610 }, { "epoch": 0.7325963727255144, "grad_norm": 0.05650612339377403, "learning_rate": 9.379844311478164e-06, "loss": 0.0063, "step": 24620 }, { "epoch": 0.7328939342091559, "grad_norm": 0.3488849699497223, "learning_rate": 9.379008987543223e-06, "loss": 0.0089, "step": 24630 }, { "epoch": 0.7331914956927975, "grad_norm": 0.221965491771698, "learning_rate": 9.378173138660241e-06, "loss": 0.0076, "step": 24640 }, { "epoch": 0.7334890571764391, "grad_norm": 0.1530165821313858, "learning_rate": 9.377336764929421e-06, "loss": 0.0073, "step": 24650 }, { "epoch": 0.7337866186600807, "grad_norm": 0.4217588007450104, "learning_rate": 9.376499866451025e-06, "loss": 0.0067, "step": 24660 }, { "epoch": 0.7340841801437222, "grad_norm": 0.3179064393043518, "learning_rate": 9.37566244332538e-06, "loss": 0.0064, "step": 24670 }, { "epoch": 0.7343817416273638, "grad_norm": 0.33117374777793884, "learning_rate": 9.374824495652875e-06, "loss": 0.0094, "step": 24680 }, { "epoch": 0.7346793031110053, "grad_norm": 0.08560477942228317, "learning_rate": 9.373986023533959e-06, "loss": 0.0075, "step": 24690 }, { "epoch": 0.7349768645946468, "grad_norm": 0.2944900095462799, "learning_rate": 9.373147027069147e-06, "loss": 0.0084, "step": 24700 }, { "epoch": 0.7352744260782884, "grad_norm": 0.12109352648258209, "learning_rate": 9.372307506359019e-06, "loss": 0.0054, "step": 24710 }, { "epoch": 0.73557198756193, "grad_norm": 0.24658538401126862, "learning_rate": 9.371467461504214e-06, "loss": 0.0076, "step": 24720 }, { "epoch": 0.7358695490455716, "grad_norm": 0.2043018490076065, "learning_rate": 9.370626892605435e-06, "loss": 0.0081, "step": 24730 }, { "epoch": 0.7361671105292131, "grad_norm": 0.1998809427022934, "learning_rate": 9.369785799763447e-06, "loss": 0.0071, "step": 24740 }, { "epoch": 0.7364646720128547, "grad_norm": 0.33251065015792847, "learning_rate": 9.368944183079083e-06, "loss": 0.0039, "step": 24750 }, { "epoch": 0.7367622334964962, "grad_norm": 0.40405189990997314, "learning_rate": 9.368102042653228e-06, "loss": 0.0043, "step": 24760 }, { "epoch": 0.7370597949801377, "grad_norm": 0.6122848987579346, "learning_rate": 9.367259378586841e-06, "loss": 0.0111, "step": 24770 }, { "epoch": 0.7373573564637793, "grad_norm": 0.1557818204164505, "learning_rate": 9.366416190980938e-06, "loss": 0.0086, "step": 24780 }, { "epoch": 0.7376549179474209, "grad_norm": 0.29371675848960876, "learning_rate": 9.365572479936599e-06, "loss": 0.0077, "step": 24790 }, { "epoch": 0.7379524794310625, "grad_norm": 0.034315526485443115, "learning_rate": 9.364728245554965e-06, "loss": 0.0049, "step": 24800 }, { "epoch": 0.738250040914704, "grad_norm": 0.20138177275657654, "learning_rate": 9.363883487937245e-06, "loss": 0.0063, "step": 24810 }, { "epoch": 0.7385476023983456, "grad_norm": 0.3874495327472687, "learning_rate": 9.363038207184703e-06, "loss": 0.0098, "step": 24820 }, { "epoch": 0.7388451638819871, "grad_norm": 0.35611599683761597, "learning_rate": 9.362192403398672e-06, "loss": 0.0076, "step": 24830 }, { "epoch": 0.7391427253656286, "grad_norm": 0.1407196968793869, "learning_rate": 9.361346076680545e-06, "loss": 0.0086, "step": 24840 }, { "epoch": 0.7394402868492702, "grad_norm": 0.18601584434509277, "learning_rate": 9.360499227131778e-06, "loss": 0.0066, "step": 24850 }, { "epoch": 0.7397378483329118, "grad_norm": 0.332838237285614, "learning_rate": 9.359651854853891e-06, "loss": 0.0097, "step": 24860 }, { "epoch": 0.7400354098165534, "grad_norm": 0.31915807723999023, "learning_rate": 9.358803959948464e-06, "loss": 0.0127, "step": 24870 }, { "epoch": 0.7403329713001949, "grad_norm": 0.2867189347743988, "learning_rate": 9.357955542517141e-06, "loss": 0.0071, "step": 24880 }, { "epoch": 0.7406305327838365, "grad_norm": 0.2957428991794586, "learning_rate": 9.357106602661632e-06, "loss": 0.0097, "step": 24890 }, { "epoch": 0.740928094267478, "grad_norm": 0.24487581849098206, "learning_rate": 9.356257140483702e-06, "loss": 0.0071, "step": 24900 }, { "epoch": 0.7412256557511195, "grad_norm": 0.37295249104499817, "learning_rate": 9.355407156085185e-06, "loss": 0.0062, "step": 24910 }, { "epoch": 0.7415232172347611, "grad_norm": 0.37994617223739624, "learning_rate": 9.354556649567978e-06, "loss": 0.0093, "step": 24920 }, { "epoch": 0.7418207787184027, "grad_norm": 0.24688604474067688, "learning_rate": 9.353705621034035e-06, "loss": 0.0065, "step": 24930 }, { "epoch": 0.7421183402020443, "grad_norm": 0.32836124300956726, "learning_rate": 9.352854070585375e-06, "loss": 0.0092, "step": 24940 }, { "epoch": 0.7424159016856858, "grad_norm": 0.1656997799873352, "learning_rate": 9.352001998324084e-06, "loss": 0.0078, "step": 24950 }, { "epoch": 0.7427134631693274, "grad_norm": 0.22223711013793945, "learning_rate": 9.351149404352306e-06, "loss": 0.0067, "step": 24960 }, { "epoch": 0.7430110246529689, "grad_norm": 0.14457765221595764, "learning_rate": 9.350296288772248e-06, "loss": 0.0067, "step": 24970 }, { "epoch": 0.7433085861366104, "grad_norm": 0.3593335449695587, "learning_rate": 9.349442651686178e-06, "loss": 0.0074, "step": 24980 }, { "epoch": 0.743606147620252, "grad_norm": 0.43400925397872925, "learning_rate": 9.34858849319643e-06, "loss": 0.0057, "step": 24990 }, { "epoch": 0.7439037091038936, "grad_norm": 0.4187566637992859, "learning_rate": 9.347733813405401e-06, "loss": 0.005, "step": 25000 }, { "epoch": 0.7439037091038936, "eval_loss": 0.006151492707431316, "eval_runtime": 3.9245, "eval_samples_per_second": 50.962, "eval_steps_per_second": 12.74, "step": 25000 }, { "epoch": 0.7442012705875352, "grad_norm": 0.2971975803375244, "learning_rate": 9.346878612415546e-06, "loss": 0.0059, "step": 25010 }, { "epoch": 0.7444988320711767, "grad_norm": 0.22507841885089874, "learning_rate": 9.346022890329385e-06, "loss": 0.0061, "step": 25020 }, { "epoch": 0.7447963935548183, "grad_norm": 0.28762075304985046, "learning_rate": 9.345166647249503e-06, "loss": 0.0089, "step": 25030 }, { "epoch": 0.7450939550384599, "grad_norm": 0.15973974764347076, "learning_rate": 9.344309883278543e-06, "loss": 0.0092, "step": 25040 }, { "epoch": 0.7453915165221013, "grad_norm": 0.1635669320821762, "learning_rate": 9.343452598519213e-06, "loss": 0.0047, "step": 25050 }, { "epoch": 0.7456890780057429, "grad_norm": 0.17085674405097961, "learning_rate": 9.342594793074282e-06, "loss": 0.0059, "step": 25060 }, { "epoch": 0.7459866394893845, "grad_norm": 0.3940717577934265, "learning_rate": 9.341736467046582e-06, "loss": 0.008, "step": 25070 }, { "epoch": 0.7462842009730261, "grad_norm": 0.27753356099128723, "learning_rate": 9.34087762053901e-06, "loss": 0.0061, "step": 25080 }, { "epoch": 0.7465817624566676, "grad_norm": 0.6083592772483826, "learning_rate": 9.34001825365452e-06, "loss": 0.0107, "step": 25090 }, { "epoch": 0.7468793239403092, "grad_norm": 0.20988331735134125, "learning_rate": 9.339158366496134e-06, "loss": 0.0043, "step": 25100 }, { "epoch": 0.7471768854239508, "grad_norm": 0.3113952875137329, "learning_rate": 9.338297959166931e-06, "loss": 0.0092, "step": 25110 }, { "epoch": 0.7474744469075922, "grad_norm": 0.28752684593200684, "learning_rate": 9.337437031770059e-06, "loss": 0.0077, "step": 25120 }, { "epoch": 0.7477720083912338, "grad_norm": 0.3443986475467682, "learning_rate": 9.336575584408721e-06, "loss": 0.0058, "step": 25130 }, { "epoch": 0.7480695698748754, "grad_norm": 0.35661739110946655, "learning_rate": 9.335713617186188e-06, "loss": 0.0086, "step": 25140 }, { "epoch": 0.748367131358517, "grad_norm": 0.23265135288238525, "learning_rate": 9.334851130205788e-06, "loss": 0.0091, "step": 25150 }, { "epoch": 0.7486646928421585, "grad_norm": 0.4129098057746887, "learning_rate": 9.33398812357092e-06, "loss": 0.0116, "step": 25160 }, { "epoch": 0.7489622543258001, "grad_norm": 0.29908666014671326, "learning_rate": 9.333124597385036e-06, "loss": 0.0118, "step": 25170 }, { "epoch": 0.7492598158094417, "grad_norm": 0.29034745693206787, "learning_rate": 9.332260551751652e-06, "loss": 0.0089, "step": 25180 }, { "epoch": 0.7495573772930831, "grad_norm": 0.2052055299282074, "learning_rate": 9.331395986774352e-06, "loss": 0.0072, "step": 25190 }, { "epoch": 0.7498549387767247, "grad_norm": 0.21511204540729523, "learning_rate": 9.330530902556779e-06, "loss": 0.0094, "step": 25200 }, { "epoch": 0.7501525002603663, "grad_norm": 0.24793770909309387, "learning_rate": 9.329665299202634e-06, "loss": 0.0078, "step": 25210 }, { "epoch": 0.7504500617440079, "grad_norm": 0.24100100994110107, "learning_rate": 9.328799176815687e-06, "loss": 0.0104, "step": 25220 }, { "epoch": 0.7507476232276494, "grad_norm": 0.2938843369483948, "learning_rate": 9.327932535499766e-06, "loss": 0.006, "step": 25230 }, { "epoch": 0.751045184711291, "grad_norm": 0.3578144907951355, "learning_rate": 9.327065375358763e-06, "loss": 0.0107, "step": 25240 }, { "epoch": 0.7513427461949326, "grad_norm": 0.23960252106189728, "learning_rate": 9.326197696496631e-06, "loss": 0.0091, "step": 25250 }, { "epoch": 0.751640307678574, "grad_norm": 0.4897559881210327, "learning_rate": 9.325329499017389e-06, "loss": 0.0069, "step": 25260 }, { "epoch": 0.7519378691622156, "grad_norm": 0.15696977078914642, "learning_rate": 9.32446078302511e-06, "loss": 0.0052, "step": 25270 }, { "epoch": 0.7522354306458572, "grad_norm": 0.27196988463401794, "learning_rate": 9.323591548623937e-06, "loss": 0.007, "step": 25280 }, { "epoch": 0.7525329921294988, "grad_norm": 0.3861430287361145, "learning_rate": 9.32272179591807e-06, "loss": 0.0133, "step": 25290 }, { "epoch": 0.7528305536131403, "grad_norm": 0.4447554349899292, "learning_rate": 9.321851525011779e-06, "loss": 0.0088, "step": 25300 }, { "epoch": 0.7531281150967819, "grad_norm": 0.3680817484855652, "learning_rate": 9.320980736009386e-06, "loss": 0.0066, "step": 25310 }, { "epoch": 0.7534256765804235, "grad_norm": 1.218264102935791, "learning_rate": 9.32010942901528e-06, "loss": 0.0084, "step": 25320 }, { "epoch": 0.7537232380640649, "grad_norm": 0.31144267320632935, "learning_rate": 9.319237604133912e-06, "loss": 0.0078, "step": 25330 }, { "epoch": 0.7540207995477065, "grad_norm": 0.28634756803512573, "learning_rate": 9.318365261469795e-06, "loss": 0.009, "step": 25340 }, { "epoch": 0.7543183610313481, "grad_norm": 0.2464950531721115, "learning_rate": 9.317492401127506e-06, "loss": 0.0086, "step": 25350 }, { "epoch": 0.7546159225149897, "grad_norm": 0.19851115345954895, "learning_rate": 9.316619023211678e-06, "loss": 0.0048, "step": 25360 }, { "epoch": 0.7549134839986312, "grad_norm": 0.3687644898891449, "learning_rate": 9.315745127827014e-06, "loss": 0.0067, "step": 25370 }, { "epoch": 0.7552110454822728, "grad_norm": 0.6362109184265137, "learning_rate": 9.314870715078273e-06, "loss": 0.0049, "step": 25380 }, { "epoch": 0.7555086069659144, "grad_norm": 0.23290172219276428, "learning_rate": 9.313995785070277e-06, "loss": 0.0082, "step": 25390 }, { "epoch": 0.7558061684495558, "grad_norm": 0.40099942684173584, "learning_rate": 9.313120337907915e-06, "loss": 0.0065, "step": 25400 }, { "epoch": 0.7561037299331974, "grad_norm": 0.2532956898212433, "learning_rate": 9.31224437369613e-06, "loss": 0.0087, "step": 25410 }, { "epoch": 0.756401291416839, "grad_norm": 0.2026778906583786, "learning_rate": 9.311367892539931e-06, "loss": 0.0091, "step": 25420 }, { "epoch": 0.7566988529004806, "grad_norm": 0.292764812707901, "learning_rate": 9.310490894544393e-06, "loss": 0.0101, "step": 25430 }, { "epoch": 0.7569964143841221, "grad_norm": 0.29953283071517944, "learning_rate": 9.309613379814646e-06, "loss": 0.0081, "step": 25440 }, { "epoch": 0.7572939758677637, "grad_norm": 0.28393271565437317, "learning_rate": 9.308735348455887e-06, "loss": 0.0077, "step": 25450 }, { "epoch": 0.7575915373514053, "grad_norm": 0.2568499445915222, "learning_rate": 9.307856800573369e-06, "loss": 0.0068, "step": 25460 }, { "epoch": 0.7578890988350467, "grad_norm": 0.5705145597457886, "learning_rate": 9.306977736272415e-06, "loss": 0.0087, "step": 25470 }, { "epoch": 0.7581866603186883, "grad_norm": 0.13480634987354279, "learning_rate": 9.306098155658405e-06, "loss": 0.0053, "step": 25480 }, { "epoch": 0.7584842218023299, "grad_norm": 0.3218764066696167, "learning_rate": 9.305218058836778e-06, "loss": 0.0063, "step": 25490 }, { "epoch": 0.7587817832859715, "grad_norm": 0.31835997104644775, "learning_rate": 9.304337445913042e-06, "loss": 0.0092, "step": 25500 }, { "epoch": 0.759079344769613, "grad_norm": 0.22228007018566132, "learning_rate": 9.303456316992764e-06, "loss": 0.0101, "step": 25510 }, { "epoch": 0.7593769062532546, "grad_norm": 0.287407249212265, "learning_rate": 9.302574672181571e-06, "loss": 0.008, "step": 25520 }, { "epoch": 0.7596744677368962, "grad_norm": 0.6019946336746216, "learning_rate": 9.301692511585151e-06, "loss": 0.008, "step": 25530 }, { "epoch": 0.7599720292205377, "grad_norm": 0.29678773880004883, "learning_rate": 9.300809835309258e-06, "loss": 0.0101, "step": 25540 }, { "epoch": 0.7602695907041792, "grad_norm": 0.3513493239879608, "learning_rate": 9.299926643459705e-06, "loss": 0.0075, "step": 25550 }, { "epoch": 0.7605671521878208, "grad_norm": 0.4046623110771179, "learning_rate": 9.29904293614237e-06, "loss": 0.0078, "step": 25560 }, { "epoch": 0.7608647136714624, "grad_norm": 0.29492485523223877, "learning_rate": 9.298158713463185e-06, "loss": 0.0085, "step": 25570 }, { "epoch": 0.7611622751551039, "grad_norm": 0.3480675518512726, "learning_rate": 9.297273975528156e-06, "loss": 0.0093, "step": 25580 }, { "epoch": 0.7614598366387455, "grad_norm": 0.20947346091270447, "learning_rate": 9.296388722443337e-06, "loss": 0.0083, "step": 25590 }, { "epoch": 0.7617573981223871, "grad_norm": 0.5494451522827148, "learning_rate": 9.295502954314855e-06, "loss": 0.0048, "step": 25600 }, { "epoch": 0.7620549596060286, "grad_norm": 0.3153597414493561, "learning_rate": 9.294616671248894e-06, "loss": 0.0061, "step": 25610 }, { "epoch": 0.7623525210896701, "grad_norm": 0.25926917791366577, "learning_rate": 9.293729873351697e-06, "loss": 0.0048, "step": 25620 }, { "epoch": 0.7626500825733117, "grad_norm": 0.19591015577316284, "learning_rate": 9.292842560729577e-06, "loss": 0.007, "step": 25630 }, { "epoch": 0.7629476440569533, "grad_norm": 0.2574273943901062, "learning_rate": 9.2919547334889e-06, "loss": 0.0068, "step": 25640 }, { "epoch": 0.7632452055405948, "grad_norm": 0.138124480843544, "learning_rate": 9.291066391736096e-06, "loss": 0.005, "step": 25650 }, { "epoch": 0.7635427670242364, "grad_norm": 0.22596673667430878, "learning_rate": 9.29017753557766e-06, "loss": 0.0088, "step": 25660 }, { "epoch": 0.763840328507878, "grad_norm": 0.133754163980484, "learning_rate": 9.289288165120147e-06, "loss": 0.0065, "step": 25670 }, { "epoch": 0.7641378899915195, "grad_norm": 0.19259409606456757, "learning_rate": 9.28839828047017e-06, "loss": 0.0105, "step": 25680 }, { "epoch": 0.764435451475161, "grad_norm": 0.2892868220806122, "learning_rate": 9.287507881734413e-06, "loss": 0.0077, "step": 25690 }, { "epoch": 0.7647330129588026, "grad_norm": 0.4283512234687805, "learning_rate": 9.286616969019609e-06, "loss": 0.0085, "step": 25700 }, { "epoch": 0.7650305744424442, "grad_norm": 0.10561180859804153, "learning_rate": 9.28572554243256e-06, "loss": 0.0064, "step": 25710 }, { "epoch": 0.7653281359260857, "grad_norm": 0.3852933943271637, "learning_rate": 9.284833602080133e-06, "loss": 0.0069, "step": 25720 }, { "epoch": 0.7656256974097273, "grad_norm": 0.28492826223373413, "learning_rate": 9.28394114806925e-06, "loss": 0.0046, "step": 25730 }, { "epoch": 0.7659232588933689, "grad_norm": 0.4761025905609131, "learning_rate": 9.283048180506896e-06, "loss": 0.0079, "step": 25740 }, { "epoch": 0.7662208203770104, "grad_norm": 0.41579335927963257, "learning_rate": 9.282154699500118e-06, "loss": 0.0069, "step": 25750 }, { "epoch": 0.7665183818606519, "grad_norm": 0.11670929938554764, "learning_rate": 9.281260705156027e-06, "loss": 0.0092, "step": 25760 }, { "epoch": 0.7668159433442935, "grad_norm": 0.25723448395729065, "learning_rate": 9.280366197581792e-06, "loss": 0.0089, "step": 25770 }, { "epoch": 0.767113504827935, "grad_norm": 0.24370495975017548, "learning_rate": 9.279471176884644e-06, "loss": 0.0104, "step": 25780 }, { "epoch": 0.7674110663115766, "grad_norm": 0.30800390243530273, "learning_rate": 9.27857564317188e-06, "loss": 0.007, "step": 25790 }, { "epoch": 0.7677086277952182, "grad_norm": 0.10047067701816559, "learning_rate": 9.277679596550851e-06, "loss": 0.0065, "step": 25800 }, { "epoch": 0.7680061892788598, "grad_norm": 0.24369177222251892, "learning_rate": 9.276783037128979e-06, "loss": 0.0083, "step": 25810 }, { "epoch": 0.7683037507625013, "grad_norm": 0.29610398411750793, "learning_rate": 9.275885965013735e-06, "loss": 0.0081, "step": 25820 }, { "epoch": 0.7686013122461428, "grad_norm": 0.2709450125694275, "learning_rate": 9.274988380312661e-06, "loss": 0.0065, "step": 25830 }, { "epoch": 0.7688988737297844, "grad_norm": 0.08142334222793579, "learning_rate": 9.274090283133362e-06, "loss": 0.0071, "step": 25840 }, { "epoch": 0.769196435213426, "grad_norm": 0.06089484691619873, "learning_rate": 9.273191673583497e-06, "loss": 0.0045, "step": 25850 }, { "epoch": 0.7694939966970675, "grad_norm": 0.2310083955526352, "learning_rate": 9.272292551770788e-06, "loss": 0.008, "step": 25860 }, { "epoch": 0.7697915581807091, "grad_norm": 0.10157692432403564, "learning_rate": 9.271392917803023e-06, "loss": 0.0095, "step": 25870 }, { "epoch": 0.7700891196643507, "grad_norm": 0.9444032907485962, "learning_rate": 9.270492771788048e-06, "loss": 0.0078, "step": 25880 }, { "epoch": 0.7703866811479922, "grad_norm": 0.32730990648269653, "learning_rate": 9.269592113833769e-06, "loss": 0.0067, "step": 25890 }, { "epoch": 0.7706842426316337, "grad_norm": 0.37342941761016846, "learning_rate": 9.26869094404816e-06, "loss": 0.0085, "step": 25900 }, { "epoch": 0.7709818041152753, "grad_norm": 0.08303911983966827, "learning_rate": 9.267789262539246e-06, "loss": 0.0053, "step": 25910 }, { "epoch": 0.7712793655989169, "grad_norm": 0.31357136368751526, "learning_rate": 9.266887069415123e-06, "loss": 0.0057, "step": 25920 }, { "epoch": 0.7715769270825584, "grad_norm": 0.2800579071044922, "learning_rate": 9.265984364783941e-06, "loss": 0.0076, "step": 25930 }, { "epoch": 0.7718744885662, "grad_norm": 0.27779054641723633, "learning_rate": 9.26508114875392e-06, "loss": 0.0067, "step": 25940 }, { "epoch": 0.7721720500498416, "grad_norm": 0.23697219789028168, "learning_rate": 9.26417742143333e-06, "loss": 0.0075, "step": 25950 }, { "epoch": 0.7724696115334831, "grad_norm": 0.5540744066238403, "learning_rate": 9.263273182930513e-06, "loss": 0.009, "step": 25960 }, { "epoch": 0.7727671730171246, "grad_norm": 0.33535894751548767, "learning_rate": 9.262368433353866e-06, "loss": 0.0101, "step": 25970 }, { "epoch": 0.7730647345007662, "grad_norm": 0.09650576114654541, "learning_rate": 9.261463172811846e-06, "loss": 0.0056, "step": 25980 }, { "epoch": 0.7733622959844078, "grad_norm": 0.45029687881469727, "learning_rate": 9.260557401412978e-06, "loss": 0.0063, "step": 25990 }, { "epoch": 0.7736598574680493, "grad_norm": 0.22214065492153168, "learning_rate": 9.25965111926584e-06, "loss": 0.0084, "step": 26000 }, { "epoch": 0.7739574189516909, "grad_norm": 0.2641727924346924, "learning_rate": 9.258744326479082e-06, "loss": 0.0057, "step": 26010 }, { "epoch": 0.7742549804353325, "grad_norm": 0.11543449014425278, "learning_rate": 9.257837023161402e-06, "loss": 0.0046, "step": 26020 }, { "epoch": 0.774552541918974, "grad_norm": 0.36525672674179077, "learning_rate": 9.25692920942157e-06, "loss": 0.0109, "step": 26030 }, { "epoch": 0.7748501034026156, "grad_norm": 0.307849645614624, "learning_rate": 9.25602088536841e-06, "loss": 0.0077, "step": 26040 }, { "epoch": 0.7751476648862571, "grad_norm": 0.3929269313812256, "learning_rate": 9.255112051110815e-06, "loss": 0.0068, "step": 26050 }, { "epoch": 0.7754452263698987, "grad_norm": 0.16805174946784973, "learning_rate": 9.25420270675773e-06, "loss": 0.0065, "step": 26060 }, { "epoch": 0.7757427878535402, "grad_norm": 0.22166313230991364, "learning_rate": 9.253292852418169e-06, "loss": 0.0057, "step": 26070 }, { "epoch": 0.7760403493371818, "grad_norm": 0.5445563197135925, "learning_rate": 9.252382488201204e-06, "loss": 0.0108, "step": 26080 }, { "epoch": 0.7763379108208234, "grad_norm": 0.18568114936351776, "learning_rate": 9.251471614215964e-06, "loss": 0.0073, "step": 26090 }, { "epoch": 0.776635472304465, "grad_norm": 0.21727369725704193, "learning_rate": 9.250560230571646e-06, "loss": 0.0067, "step": 26100 }, { "epoch": 0.7769330337881065, "grad_norm": 0.23062285780906677, "learning_rate": 9.249648337377504e-06, "loss": 0.0078, "step": 26110 }, { "epoch": 0.777230595271748, "grad_norm": 0.2825223207473755, "learning_rate": 9.248735934742852e-06, "loss": 0.008, "step": 26120 }, { "epoch": 0.7775281567553896, "grad_norm": 0.23265092074871063, "learning_rate": 9.247823022777073e-06, "loss": 0.0065, "step": 26130 }, { "epoch": 0.7778257182390311, "grad_norm": 0.42256656289100647, "learning_rate": 9.246909601589602e-06, "loss": 0.0075, "step": 26140 }, { "epoch": 0.7781232797226727, "grad_norm": 0.29673564434051514, "learning_rate": 9.245995671289935e-06, "loss": 0.0074, "step": 26150 }, { "epoch": 0.7784208412063143, "grad_norm": 0.19845159351825714, "learning_rate": 9.245081231987639e-06, "loss": 0.0094, "step": 26160 }, { "epoch": 0.7787184026899558, "grad_norm": 0.22081570327281952, "learning_rate": 9.24416628379233e-06, "loss": 0.0091, "step": 26170 }, { "epoch": 0.7790159641735974, "grad_norm": 0.3410279452800751, "learning_rate": 9.243250826813695e-06, "loss": 0.013, "step": 26180 }, { "epoch": 0.7793135256572389, "grad_norm": 0.3122926950454712, "learning_rate": 9.242334861161473e-06, "loss": 0.0067, "step": 26190 }, { "epoch": 0.7796110871408805, "grad_norm": 0.43131354451179504, "learning_rate": 9.24141838694547e-06, "loss": 0.0103, "step": 26200 }, { "epoch": 0.779908648624522, "grad_norm": 0.5382981300354004, "learning_rate": 9.240501404275555e-06, "loss": 0.008, "step": 26210 }, { "epoch": 0.7802062101081636, "grad_norm": 0.14161741733551025, "learning_rate": 9.239583913261648e-06, "loss": 0.0047, "step": 26220 }, { "epoch": 0.7805037715918052, "grad_norm": 0.12505972385406494, "learning_rate": 9.23866591401374e-06, "loss": 0.0053, "step": 26230 }, { "epoch": 0.7808013330754467, "grad_norm": 0.23787559568881989, "learning_rate": 9.237747406641878e-06, "loss": 0.0089, "step": 26240 }, { "epoch": 0.7810988945590883, "grad_norm": 0.6264654994010925, "learning_rate": 9.236828391256171e-06, "loss": 0.0081, "step": 26250 }, { "epoch": 0.7813964560427298, "grad_norm": 0.333666056394577, "learning_rate": 9.235908867966792e-06, "loss": 0.0073, "step": 26260 }, { "epoch": 0.7816940175263714, "grad_norm": 0.15725044906139374, "learning_rate": 9.234988836883967e-06, "loss": 0.0073, "step": 26270 }, { "epoch": 0.7819915790100129, "grad_norm": 0.21528689563274384, "learning_rate": 9.23406829811799e-06, "loss": 0.0072, "step": 26280 }, { "epoch": 0.7822891404936545, "grad_norm": 0.07696365565061569, "learning_rate": 9.233147251779215e-06, "loss": 0.0053, "step": 26290 }, { "epoch": 0.7825867019772961, "grad_norm": 0.26024380326271057, "learning_rate": 9.232225697978055e-06, "loss": 0.0068, "step": 26300 }, { "epoch": 0.7828842634609376, "grad_norm": 0.3645245134830475, "learning_rate": 9.231303636824981e-06, "loss": 0.0155, "step": 26310 }, { "epoch": 0.7831818249445792, "grad_norm": 0.1978723108768463, "learning_rate": 9.230381068430532e-06, "loss": 0.0116, "step": 26320 }, { "epoch": 0.7834793864282207, "grad_norm": 0.4240901470184326, "learning_rate": 9.229457992905302e-06, "loss": 0.006, "step": 26330 }, { "epoch": 0.7837769479118623, "grad_norm": 0.2108621746301651, "learning_rate": 9.228534410359947e-06, "loss": 0.0045, "step": 26340 }, { "epoch": 0.7840745093955038, "grad_norm": 0.36494579911231995, "learning_rate": 9.227610320905187e-06, "loss": 0.0069, "step": 26350 }, { "epoch": 0.7843720708791454, "grad_norm": 0.16783444583415985, "learning_rate": 9.226685724651799e-06, "loss": 0.0066, "step": 26360 }, { "epoch": 0.784669632362787, "grad_norm": 0.4481280744075775, "learning_rate": 9.22576062171062e-06, "loss": 0.011, "step": 26370 }, { "epoch": 0.7849671938464285, "grad_norm": 0.28448668122291565, "learning_rate": 9.224835012192552e-06, "loss": 0.004, "step": 26380 }, { "epoch": 0.7852647553300701, "grad_norm": 0.31856781244277954, "learning_rate": 9.223908896208556e-06, "loss": 0.009, "step": 26390 }, { "epoch": 0.7855623168137116, "grad_norm": 0.3495260179042816, "learning_rate": 9.222982273869652e-06, "loss": 0.0076, "step": 26400 }, { "epoch": 0.7858598782973532, "grad_norm": 0.32427677512168884, "learning_rate": 9.222055145286923e-06, "loss": 0.0074, "step": 26410 }, { "epoch": 0.7861574397809947, "grad_norm": 0.438852995634079, "learning_rate": 9.221127510571509e-06, "loss": 0.0042, "step": 26420 }, { "epoch": 0.7864550012646363, "grad_norm": 0.10880851000547409, "learning_rate": 9.220199369834616e-06, "loss": 0.0044, "step": 26430 }, { "epoch": 0.7867525627482779, "grad_norm": 0.14122198522090912, "learning_rate": 9.219270723187507e-06, "loss": 0.0063, "step": 26440 }, { "epoch": 0.7870501242319194, "grad_norm": 0.09640277922153473, "learning_rate": 9.218341570741506e-06, "loss": 0.0063, "step": 26450 }, { "epoch": 0.787347685715561, "grad_norm": 0.21726368367671967, "learning_rate": 9.217411912608e-06, "loss": 0.0056, "step": 26460 }, { "epoch": 0.7876452471992025, "grad_norm": 0.12204714864492416, "learning_rate": 9.216481748898431e-06, "loss": 0.008, "step": 26470 }, { "epoch": 0.787942808682844, "grad_norm": 0.44819939136505127, "learning_rate": 9.215551079724308e-06, "loss": 0.0046, "step": 26480 }, { "epoch": 0.7882403701664856, "grad_norm": 0.2753762900829315, "learning_rate": 9.214619905197199e-06, "loss": 0.01, "step": 26490 }, { "epoch": 0.7885379316501272, "grad_norm": 0.36476632952690125, "learning_rate": 9.21368822542873e-06, "loss": 0.0065, "step": 26500 }, { "epoch": 0.7888354931337688, "grad_norm": 0.2521599233150482, "learning_rate": 9.21275604053059e-06, "loss": 0.0187, "step": 26510 }, { "epoch": 0.7891330546174103, "grad_norm": 0.19981655478477478, "learning_rate": 9.211823350614526e-06, "loss": 0.0076, "step": 26520 }, { "epoch": 0.7894306161010519, "grad_norm": 0.1938696652650833, "learning_rate": 9.21089015579235e-06, "loss": 0.0073, "step": 26530 }, { "epoch": 0.7897281775846935, "grad_norm": 0.2828255891799927, "learning_rate": 9.20995645617593e-06, "loss": 0.0115, "step": 26540 }, { "epoch": 0.790025739068335, "grad_norm": 0.08668606728315353, "learning_rate": 9.209022251877198e-06, "loss": 0.0037, "step": 26550 }, { "epoch": 0.7903233005519765, "grad_norm": 0.22213222086429596, "learning_rate": 9.208087543008142e-06, "loss": 0.0073, "step": 26560 }, { "epoch": 0.7906208620356181, "grad_norm": 0.20423667132854462, "learning_rate": 9.207152329680813e-06, "loss": 0.0091, "step": 26570 }, { "epoch": 0.7909184235192597, "grad_norm": 0.0948043018579483, "learning_rate": 9.206216612007327e-06, "loss": 0.0073, "step": 26580 }, { "epoch": 0.7912159850029012, "grad_norm": 0.1732267588376999, "learning_rate": 9.205280390099853e-06, "loss": 0.0071, "step": 26590 }, { "epoch": 0.7915135464865428, "grad_norm": 0.17753909528255463, "learning_rate": 9.204343664070626e-06, "loss": 0.0052, "step": 26600 }, { "epoch": 0.7918111079701844, "grad_norm": 0.06437321752309799, "learning_rate": 9.203406434031934e-06, "loss": 0.0062, "step": 26610 }, { "epoch": 0.7921086694538259, "grad_norm": 0.20993466675281525, "learning_rate": 9.202468700096137e-06, "loss": 0.0064, "step": 26620 }, { "epoch": 0.7924062309374674, "grad_norm": 0.3090664744377136, "learning_rate": 9.201530462375645e-06, "loss": 0.0075, "step": 26630 }, { "epoch": 0.792703792421109, "grad_norm": 0.43022024631500244, "learning_rate": 9.200591720982932e-06, "loss": 0.0109, "step": 26640 }, { "epoch": 0.7930013539047506, "grad_norm": 0.2806295156478882, "learning_rate": 9.199652476030536e-06, "loss": 0.0079, "step": 26650 }, { "epoch": 0.7932989153883921, "grad_norm": 0.18562006950378418, "learning_rate": 9.19871272763105e-06, "loss": 0.007, "step": 26660 }, { "epoch": 0.7935964768720337, "grad_norm": 0.3208160996437073, "learning_rate": 9.197772475897127e-06, "loss": 0.0076, "step": 26670 }, { "epoch": 0.7938940383556753, "grad_norm": 0.38437655568122864, "learning_rate": 9.196831720941488e-06, "loss": 0.0121, "step": 26680 }, { "epoch": 0.7941915998393168, "grad_norm": 0.3044677674770355, "learning_rate": 9.195890462876904e-06, "loss": 0.0104, "step": 26690 }, { "epoch": 0.7944891613229583, "grad_norm": 0.4370233118534088, "learning_rate": 9.194948701816215e-06, "loss": 0.0114, "step": 26700 }, { "epoch": 0.7947867228065999, "grad_norm": 0.4343578517436981, "learning_rate": 9.194006437872315e-06, "loss": 0.0043, "step": 26710 }, { "epoch": 0.7950842842902415, "grad_norm": 0.1615247279405594, "learning_rate": 9.193063671158164e-06, "loss": 0.0088, "step": 26720 }, { "epoch": 0.795381845773883, "grad_norm": 0.20920662581920624, "learning_rate": 9.192120401786778e-06, "loss": 0.0084, "step": 26730 }, { "epoch": 0.7956794072575246, "grad_norm": 0.23515132069587708, "learning_rate": 9.191176629871232e-06, "loss": 0.0061, "step": 26740 }, { "epoch": 0.7959769687411662, "grad_norm": 0.4646921157836914, "learning_rate": 9.190232355524666e-06, "loss": 0.0106, "step": 26750 }, { "epoch": 0.7962745302248077, "grad_norm": 0.22135238349437714, "learning_rate": 9.18928757886028e-06, "loss": 0.0051, "step": 26760 }, { "epoch": 0.7965720917084492, "grad_norm": 0.1392134130001068, "learning_rate": 9.18834229999133e-06, "loss": 0.0084, "step": 26770 }, { "epoch": 0.7968696531920908, "grad_norm": 0.09081808477640152, "learning_rate": 9.187396519031131e-06, "loss": 0.006, "step": 26780 }, { "epoch": 0.7971672146757324, "grad_norm": 0.30086424946784973, "learning_rate": 9.186450236093067e-06, "loss": 0.0075, "step": 26790 }, { "epoch": 0.797464776159374, "grad_norm": 0.22906383872032166, "learning_rate": 9.185503451290574e-06, "loss": 0.0049, "step": 26800 }, { "epoch": 0.7977623376430155, "grad_norm": 0.1742074191570282, "learning_rate": 9.184556164737152e-06, "loss": 0.008, "step": 26810 }, { "epoch": 0.7980598991266571, "grad_norm": 0.2679184079170227, "learning_rate": 9.183608376546362e-06, "loss": 0.0075, "step": 26820 }, { "epoch": 0.7983574606102986, "grad_norm": 0.21567575633525848, "learning_rate": 9.182660086831819e-06, "loss": 0.009, "step": 26830 }, { "epoch": 0.7986550220939401, "grad_norm": 0.2514616847038269, "learning_rate": 9.181711295707204e-06, "loss": 0.0087, "step": 26840 }, { "epoch": 0.7989525835775817, "grad_norm": 0.03464846685528755, "learning_rate": 9.180762003286257e-06, "loss": 0.0063, "step": 26850 }, { "epoch": 0.7992501450612233, "grad_norm": 0.26108986139297485, "learning_rate": 9.179812209682777e-06, "loss": 0.0066, "step": 26860 }, { "epoch": 0.7995477065448648, "grad_norm": 0.25668954849243164, "learning_rate": 9.178861915010624e-06, "loss": 0.0074, "step": 26870 }, { "epoch": 0.7998452680285064, "grad_norm": 0.41659581661224365, "learning_rate": 9.177911119383718e-06, "loss": 0.0088, "step": 26880 }, { "epoch": 0.800142829512148, "grad_norm": 0.29900097846984863, "learning_rate": 9.176959822916039e-06, "loss": 0.0062, "step": 26890 }, { "epoch": 0.8004403909957895, "grad_norm": 0.16413816809654236, "learning_rate": 9.176008025721626e-06, "loss": 0.0087, "step": 26900 }, { "epoch": 0.800737952479431, "grad_norm": 0.18292182683944702, "learning_rate": 9.175055727914577e-06, "loss": 0.0054, "step": 26910 }, { "epoch": 0.8010355139630726, "grad_norm": 0.1066422313451767, "learning_rate": 9.174102929609056e-06, "loss": 0.0075, "step": 26920 }, { "epoch": 0.8013330754467142, "grad_norm": 0.438082754611969, "learning_rate": 9.17314963091928e-06, "loss": 0.0076, "step": 26930 }, { "epoch": 0.8016306369303557, "grad_norm": 0.21294192969799042, "learning_rate": 9.172195831959526e-06, "loss": 0.0084, "step": 26940 }, { "epoch": 0.8019281984139973, "grad_norm": 0.21755075454711914, "learning_rate": 9.171241532844141e-06, "loss": 0.0062, "step": 26950 }, { "epoch": 0.8022257598976389, "grad_norm": 0.47644978761672974, "learning_rate": 9.17028673368752e-06, "loss": 0.0066, "step": 26960 }, { "epoch": 0.8025233213812804, "grad_norm": 0.31169387698173523, "learning_rate": 9.169331434604123e-06, "loss": 0.0067, "step": 26970 }, { "epoch": 0.8028208828649219, "grad_norm": 0.2816583812236786, "learning_rate": 9.168375635708471e-06, "loss": 0.0069, "step": 26980 }, { "epoch": 0.8031184443485635, "grad_norm": 0.28073185682296753, "learning_rate": 9.167419337115142e-06, "loss": 0.0045, "step": 26990 }, { "epoch": 0.8034160058322051, "grad_norm": 0.3424496650695801, "learning_rate": 9.166462538938776e-06, "loss": 0.0088, "step": 27000 }, { "epoch": 0.8037135673158466, "grad_norm": 0.22061076760292053, "learning_rate": 9.165505241294073e-06, "loss": 0.006, "step": 27010 }, { "epoch": 0.8040111287994882, "grad_norm": 0.39630621671676636, "learning_rate": 9.164547444295791e-06, "loss": 0.0071, "step": 27020 }, { "epoch": 0.8043086902831298, "grad_norm": 0.16785828769207, "learning_rate": 9.16358914805875e-06, "loss": 0.0072, "step": 27030 }, { "epoch": 0.8046062517667714, "grad_norm": 0.7953236103057861, "learning_rate": 9.162630352697827e-06, "loss": 0.006, "step": 27040 }, { "epoch": 0.8049038132504128, "grad_norm": 0.20262648165225983, "learning_rate": 9.161671058327966e-06, "loss": 0.0097, "step": 27050 }, { "epoch": 0.8052013747340544, "grad_norm": 0.21680818498134613, "learning_rate": 9.160711265064158e-06, "loss": 0.0057, "step": 27060 }, { "epoch": 0.805498936217696, "grad_norm": 0.304169237613678, "learning_rate": 9.159750973021466e-06, "loss": 0.0067, "step": 27070 }, { "epoch": 0.8057964977013375, "grad_norm": 0.21786683797836304, "learning_rate": 9.158790182315008e-06, "loss": 0.004, "step": 27080 }, { "epoch": 0.8060940591849791, "grad_norm": 0.20017407834529877, "learning_rate": 9.157828893059961e-06, "loss": 0.0068, "step": 27090 }, { "epoch": 0.8063916206686207, "grad_norm": 0.25143733620643616, "learning_rate": 9.156867105371564e-06, "loss": 0.0076, "step": 27100 }, { "epoch": 0.8066891821522623, "grad_norm": 0.22567422688007355, "learning_rate": 9.155904819365112e-06, "loss": 0.0049, "step": 27110 }, { "epoch": 0.8069867436359037, "grad_norm": 0.2290969341993332, "learning_rate": 9.154942035155965e-06, "loss": 0.0053, "step": 27120 }, { "epoch": 0.8072843051195453, "grad_norm": 0.25396135449409485, "learning_rate": 9.153978752859537e-06, "loss": 0.0047, "step": 27130 }, { "epoch": 0.8075818666031869, "grad_norm": 0.3827689290046692, "learning_rate": 9.153014972591306e-06, "loss": 0.0065, "step": 27140 }, { "epoch": 0.8078794280868284, "grad_norm": 0.2946566939353943, "learning_rate": 9.15205069446681e-06, "loss": 0.009, "step": 27150 }, { "epoch": 0.80817698957047, "grad_norm": 0.17513243854045868, "learning_rate": 9.151085918601643e-06, "loss": 0.0105, "step": 27160 }, { "epoch": 0.8084745510541116, "grad_norm": 0.21358738839626312, "learning_rate": 9.15012064511146e-06, "loss": 0.0064, "step": 27170 }, { "epoch": 0.8087721125377532, "grad_norm": 0.21336480975151062, "learning_rate": 9.149154874111979e-06, "loss": 0.0046, "step": 27180 }, { "epoch": 0.8090696740213946, "grad_norm": 0.34857815504074097, "learning_rate": 9.14818860571897e-06, "loss": 0.0115, "step": 27190 }, { "epoch": 0.8093672355050362, "grad_norm": 0.2979324758052826, "learning_rate": 9.147221840048275e-06, "loss": 0.0067, "step": 27200 }, { "epoch": 0.8096647969886778, "grad_norm": 0.29113563895225525, "learning_rate": 9.146254577215782e-06, "loss": 0.007, "step": 27210 }, { "epoch": 0.8099623584723193, "grad_norm": 0.16711705923080444, "learning_rate": 9.145286817337446e-06, "loss": 0.007, "step": 27220 }, { "epoch": 0.8102599199559609, "grad_norm": 0.708496630191803, "learning_rate": 9.144318560529283e-06, "loss": 0.0075, "step": 27230 }, { "epoch": 0.8105574814396025, "grad_norm": 0.26885145902633667, "learning_rate": 9.143349806907365e-06, "loss": 0.0084, "step": 27240 }, { "epoch": 0.8108550429232441, "grad_norm": 0.22493876516819, "learning_rate": 9.142380556587822e-06, "loss": 0.0059, "step": 27250 }, { "epoch": 0.8111526044068855, "grad_norm": 0.2238633781671524, "learning_rate": 9.14141080968685e-06, "loss": 0.0055, "step": 27260 }, { "epoch": 0.8114501658905271, "grad_norm": 0.27513614296913147, "learning_rate": 9.140440566320697e-06, "loss": 0.0065, "step": 27270 }, { "epoch": 0.8117477273741687, "grad_norm": 0.18066798150539398, "learning_rate": 9.139469826605675e-06, "loss": 0.0044, "step": 27280 }, { "epoch": 0.8120452888578102, "grad_norm": 0.3814524710178375, "learning_rate": 9.138498590658156e-06, "loss": 0.0058, "step": 27290 }, { "epoch": 0.8123428503414518, "grad_norm": 0.38583213090896606, "learning_rate": 9.13752685859457e-06, "loss": 0.0103, "step": 27300 }, { "epoch": 0.8126404118250934, "grad_norm": 0.30201810598373413, "learning_rate": 9.136554630531405e-06, "loss": 0.0078, "step": 27310 }, { "epoch": 0.812937973308735, "grad_norm": 0.13107581436634064, "learning_rate": 9.13558190658521e-06, "loss": 0.0074, "step": 27320 }, { "epoch": 0.8132355347923764, "grad_norm": 0.22498634457588196, "learning_rate": 9.134608686872594e-06, "loss": 0.0073, "step": 27330 }, { "epoch": 0.813533096276018, "grad_norm": 0.18286745250225067, "learning_rate": 9.133634971510227e-06, "loss": 0.008, "step": 27340 }, { "epoch": 0.8138306577596596, "grad_norm": 0.15494213998317719, "learning_rate": 9.132660760614835e-06, "loss": 0.0046, "step": 27350 }, { "epoch": 0.8141282192433011, "grad_norm": 0.1893971860408783, "learning_rate": 9.131686054303202e-06, "loss": 0.0064, "step": 27360 }, { "epoch": 0.8144257807269427, "grad_norm": 0.15999996662139893, "learning_rate": 9.130710852692177e-06, "loss": 0.009, "step": 27370 }, { "epoch": 0.8147233422105843, "grad_norm": 0.4211752712726593, "learning_rate": 9.129735155898666e-06, "loss": 0.0092, "step": 27380 }, { "epoch": 0.8150209036942259, "grad_norm": 0.5878800749778748, "learning_rate": 9.12875896403963e-06, "loss": 0.008, "step": 27390 }, { "epoch": 0.8153184651778673, "grad_norm": 0.532942533493042, "learning_rate": 9.1277822772321e-06, "loss": 0.0092, "step": 27400 }, { "epoch": 0.8156160266615089, "grad_norm": 0.15745049715042114, "learning_rate": 9.126805095593153e-06, "loss": 0.007, "step": 27410 }, { "epoch": 0.8159135881451505, "grad_norm": 0.18377168476581573, "learning_rate": 9.125827419239933e-06, "loss": 0.0054, "step": 27420 }, { "epoch": 0.816211149628792, "grad_norm": 0.2819817364215851, "learning_rate": 9.124849248289645e-06, "loss": 0.0076, "step": 27430 }, { "epoch": 0.8165087111124336, "grad_norm": 0.1606207638978958, "learning_rate": 9.123870582859548e-06, "loss": 0.0074, "step": 27440 }, { "epoch": 0.8168062725960752, "grad_norm": 0.07448429614305496, "learning_rate": 9.122891423066964e-06, "loss": 0.0048, "step": 27450 }, { "epoch": 0.8171038340797168, "grad_norm": 0.16782432794570923, "learning_rate": 9.121911769029273e-06, "loss": 0.0082, "step": 27460 }, { "epoch": 0.8174013955633582, "grad_norm": 0.39760005474090576, "learning_rate": 9.120931620863913e-06, "loss": 0.0062, "step": 27470 }, { "epoch": 0.8176989570469998, "grad_norm": 0.2970113158226013, "learning_rate": 9.119950978688385e-06, "loss": 0.0082, "step": 27480 }, { "epoch": 0.8179965185306414, "grad_norm": 0.2240760624408722, "learning_rate": 9.118969842620242e-06, "loss": 0.0089, "step": 27490 }, { "epoch": 0.8182940800142829, "grad_norm": 0.2839045822620392, "learning_rate": 9.117988212777108e-06, "loss": 0.0075, "step": 27500 }, { "epoch": 0.8185916414979245, "grad_norm": 0.21372689306735992, "learning_rate": 9.117006089276652e-06, "loss": 0.0076, "step": 27510 }, { "epoch": 0.8188892029815661, "grad_norm": 0.33654075860977173, "learning_rate": 9.116023472236611e-06, "loss": 0.0091, "step": 27520 }, { "epoch": 0.8191867644652077, "grad_norm": 0.33575356006622314, "learning_rate": 9.115040361774783e-06, "loss": 0.0054, "step": 27530 }, { "epoch": 0.8194843259488492, "grad_norm": 0.18162426352500916, "learning_rate": 9.114056758009018e-06, "loss": 0.0061, "step": 27540 }, { "epoch": 0.8197818874324907, "grad_norm": 0.26625096797943115, "learning_rate": 9.11307266105723e-06, "loss": 0.0071, "step": 27550 }, { "epoch": 0.8200794489161323, "grad_norm": 0.2587176561355591, "learning_rate": 9.112088071037391e-06, "loss": 0.0085, "step": 27560 }, { "epoch": 0.8203770103997738, "grad_norm": 0.22836214303970337, "learning_rate": 9.111102988067533e-06, "loss": 0.0056, "step": 27570 }, { "epoch": 0.8206745718834154, "grad_norm": 0.10554267466068268, "learning_rate": 9.110117412265744e-06, "loss": 0.0063, "step": 27580 }, { "epoch": 0.820972133367057, "grad_norm": 0.1859339028596878, "learning_rate": 9.109131343750174e-06, "loss": 0.0062, "step": 27590 }, { "epoch": 0.8212696948506986, "grad_norm": 0.33819738030433655, "learning_rate": 9.10814478263903e-06, "loss": 0.0077, "step": 27600 }, { "epoch": 0.8215672563343401, "grad_norm": 0.18886475265026093, "learning_rate": 9.107157729050582e-06, "loss": 0.0069, "step": 27610 }, { "epoch": 0.8218648178179816, "grad_norm": 0.23861046135425568, "learning_rate": 9.106170183103153e-06, "loss": 0.0071, "step": 27620 }, { "epoch": 0.8221623793016232, "grad_norm": 0.51716148853302, "learning_rate": 9.10518214491513e-06, "loss": 0.0088, "step": 27630 }, { "epoch": 0.8224599407852647, "grad_norm": 0.322328120470047, "learning_rate": 9.10419361460496e-06, "loss": 0.0061, "step": 27640 }, { "epoch": 0.8227575022689063, "grad_norm": 0.23370040953159332, "learning_rate": 9.10320459229114e-06, "loss": 0.01, "step": 27650 }, { "epoch": 0.8230550637525479, "grad_norm": 0.32993611693382263, "learning_rate": 9.102215078092238e-06, "loss": 0.0066, "step": 27660 }, { "epoch": 0.8233526252361895, "grad_norm": 0.42932581901550293, "learning_rate": 9.101225072126872e-06, "loss": 0.0068, "step": 27670 }, { "epoch": 0.823650186719831, "grad_norm": 0.1421288698911667, "learning_rate": 9.100234574513723e-06, "loss": 0.0076, "step": 27680 }, { "epoch": 0.8239477482034725, "grad_norm": 0.12064357846975327, "learning_rate": 9.09924358537153e-06, "loss": 0.0044, "step": 27690 }, { "epoch": 0.8242453096871141, "grad_norm": 0.20950455963611603, "learning_rate": 9.098252104819095e-06, "loss": 0.0055, "step": 27700 }, { "epoch": 0.8245428711707556, "grad_norm": 0.3556748628616333, "learning_rate": 9.097260132975268e-06, "loss": 0.008, "step": 27710 }, { "epoch": 0.8248404326543972, "grad_norm": 0.4526337683200836, "learning_rate": 9.09626766995897e-06, "loss": 0.0108, "step": 27720 }, { "epoch": 0.8251379941380388, "grad_norm": 0.2346649318933487, "learning_rate": 9.095274715889174e-06, "loss": 0.0071, "step": 27730 }, { "epoch": 0.8254355556216804, "grad_norm": 0.1944381147623062, "learning_rate": 9.094281270884913e-06, "loss": 0.0085, "step": 27740 }, { "epoch": 0.8257331171053219, "grad_norm": 0.3206549882888794, "learning_rate": 9.093287335065281e-06, "loss": 0.0043, "step": 27750 }, { "epoch": 0.8260306785889634, "grad_norm": 0.22669686377048492, "learning_rate": 9.092292908549429e-06, "loss": 0.0049, "step": 27760 }, { "epoch": 0.826328240072605, "grad_norm": 0.4051969647407532, "learning_rate": 9.091297991456565e-06, "loss": 0.0101, "step": 27770 }, { "epoch": 0.8266258015562465, "grad_norm": 0.29600611329078674, "learning_rate": 9.09030258390596e-06, "loss": 0.0082, "step": 27780 }, { "epoch": 0.8269233630398881, "grad_norm": 0.1382584571838379, "learning_rate": 9.089306686016942e-06, "loss": 0.0093, "step": 27790 }, { "epoch": 0.8272209245235297, "grad_norm": 0.3553513288497925, "learning_rate": 9.088310297908895e-06, "loss": 0.0081, "step": 27800 }, { "epoch": 0.8275184860071713, "grad_norm": 0.30784493684768677, "learning_rate": 9.087313419701269e-06, "loss": 0.0104, "step": 27810 }, { "epoch": 0.8278160474908128, "grad_norm": 0.19845092296600342, "learning_rate": 9.086316051513564e-06, "loss": 0.0072, "step": 27820 }, { "epoch": 0.8281136089744543, "grad_norm": 0.21334193646907806, "learning_rate": 9.085318193465344e-06, "loss": 0.0053, "step": 27830 }, { "epoch": 0.8284111704580959, "grad_norm": 0.12769997119903564, "learning_rate": 9.08431984567623e-06, "loss": 0.007, "step": 27840 }, { "epoch": 0.8287087319417374, "grad_norm": 0.2822074294090271, "learning_rate": 9.083321008265902e-06, "loss": 0.0076, "step": 27850 }, { "epoch": 0.829006293425379, "grad_norm": 0.4504047930240631, "learning_rate": 9.082321681354101e-06, "loss": 0.0094, "step": 27860 }, { "epoch": 0.8293038549090206, "grad_norm": 0.27031099796295166, "learning_rate": 9.081321865060624e-06, "loss": 0.0094, "step": 27870 }, { "epoch": 0.8296014163926622, "grad_norm": 0.3738146424293518, "learning_rate": 9.080321559505326e-06, "loss": 0.0072, "step": 27880 }, { "epoch": 0.8298989778763037, "grad_norm": 0.32736650109291077, "learning_rate": 9.07932076480812e-06, "loss": 0.0088, "step": 27890 }, { "epoch": 0.8301965393599452, "grad_norm": 0.44481128454208374, "learning_rate": 9.078319481088984e-06, "loss": 0.0057, "step": 27900 }, { "epoch": 0.8304941008435868, "grad_norm": 0.24736547470092773, "learning_rate": 9.077317708467947e-06, "loss": 0.0061, "step": 27910 }, { "epoch": 0.8307916623272283, "grad_norm": 0.25642964243888855, "learning_rate": 9.0763154470651e-06, "loss": 0.0083, "step": 27920 }, { "epoch": 0.8310892238108699, "grad_norm": 0.1416681408882141, "learning_rate": 9.075312697000595e-06, "loss": 0.007, "step": 27930 }, { "epoch": 0.8313867852945115, "grad_norm": 0.1802503913640976, "learning_rate": 9.074309458394638e-06, "loss": 0.0107, "step": 27940 }, { "epoch": 0.8316843467781531, "grad_norm": 0.30045801401138306, "learning_rate": 9.073305731367495e-06, "loss": 0.0072, "step": 27950 }, { "epoch": 0.8319819082617946, "grad_norm": 0.28837481141090393, "learning_rate": 9.072301516039492e-06, "loss": 0.0056, "step": 27960 }, { "epoch": 0.8322794697454361, "grad_norm": 0.1537727564573288, "learning_rate": 9.071296812531011e-06, "loss": 0.0055, "step": 27970 }, { "epoch": 0.8325770312290777, "grad_norm": 0.37929612398147583, "learning_rate": 9.070291620962497e-06, "loss": 0.0044, "step": 27980 }, { "epoch": 0.8328745927127192, "grad_norm": 0.2687872648239136, "learning_rate": 9.06928594145445e-06, "loss": 0.0054, "step": 27990 }, { "epoch": 0.8331721541963608, "grad_norm": 0.25637903809547424, "learning_rate": 9.068279774127424e-06, "loss": 0.0076, "step": 28000 }, { "epoch": 0.8334697156800024, "grad_norm": 0.344940185546875, "learning_rate": 9.067273119102045e-06, "loss": 0.0074, "step": 28010 }, { "epoch": 0.833767277163644, "grad_norm": 0.12546406686306, "learning_rate": 9.066265976498983e-06, "loss": 0.006, "step": 28020 }, { "epoch": 0.8340648386472855, "grad_norm": 0.24366898834705353, "learning_rate": 9.065258346438974e-06, "loss": 0.0073, "step": 28030 }, { "epoch": 0.8343624001309271, "grad_norm": 0.2367687225341797, "learning_rate": 9.064250229042812e-06, "loss": 0.0054, "step": 28040 }, { "epoch": 0.8346599616145686, "grad_norm": 0.21558363735675812, "learning_rate": 9.063241624431347e-06, "loss": 0.0065, "step": 28050 }, { "epoch": 0.8349575230982101, "grad_norm": 0.29097822308540344, "learning_rate": 9.06223253272549e-06, "loss": 0.0069, "step": 28060 }, { "epoch": 0.8352550845818517, "grad_norm": 0.18332548439502716, "learning_rate": 9.06122295404621e-06, "loss": 0.0057, "step": 28070 }, { "epoch": 0.8355526460654933, "grad_norm": 0.22338086366653442, "learning_rate": 9.06021288851453e-06, "loss": 0.0078, "step": 28080 }, { "epoch": 0.8358502075491349, "grad_norm": 0.22344042360782623, "learning_rate": 9.059202336251538e-06, "loss": 0.0072, "step": 28090 }, { "epoch": 0.8361477690327764, "grad_norm": 0.3691119849681854, "learning_rate": 9.058191297378376e-06, "loss": 0.0057, "step": 28100 }, { "epoch": 0.836445330516418, "grad_norm": 0.36871984601020813, "learning_rate": 9.057179772016246e-06, "loss": 0.0065, "step": 28110 }, { "epoch": 0.8367428920000595, "grad_norm": 0.5113356113433838, "learning_rate": 9.056167760286407e-06, "loss": 0.0091, "step": 28120 }, { "epoch": 0.837040453483701, "grad_norm": 0.13768845796585083, "learning_rate": 9.05515526231018e-06, "loss": 0.008, "step": 28130 }, { "epoch": 0.8373380149673426, "grad_norm": 0.054866619408130646, "learning_rate": 9.054142278208937e-06, "loss": 0.0051, "step": 28140 }, { "epoch": 0.8376355764509842, "grad_norm": 0.1628129929304123, "learning_rate": 9.053128808104117e-06, "loss": 0.0081, "step": 28150 }, { "epoch": 0.8379331379346258, "grad_norm": 0.41051724553108215, "learning_rate": 9.052114852117213e-06, "loss": 0.0135, "step": 28160 }, { "epoch": 0.8382306994182673, "grad_norm": 0.3027917146682739, "learning_rate": 9.051100410369772e-06, "loss": 0.0071, "step": 28170 }, { "epoch": 0.8385282609019089, "grad_norm": 0.13623803853988647, "learning_rate": 9.050085482983408e-06, "loss": 0.0046, "step": 28180 }, { "epoch": 0.8388258223855504, "grad_norm": 0.27993443608283997, "learning_rate": 9.049070070079787e-06, "loss": 0.0084, "step": 28190 }, { "epoch": 0.8391233838691919, "grad_norm": 0.1710633635520935, "learning_rate": 9.048054171780633e-06, "loss": 0.0063, "step": 28200 }, { "epoch": 0.8394209453528335, "grad_norm": 0.1946341097354889, "learning_rate": 9.047037788207733e-06, "loss": 0.0056, "step": 28210 }, { "epoch": 0.8397185068364751, "grad_norm": 0.1906120926141739, "learning_rate": 9.04602091948293e-06, "loss": 0.0046, "step": 28220 }, { "epoch": 0.8400160683201167, "grad_norm": 0.37881532311439514, "learning_rate": 9.04500356572812e-06, "loss": 0.0046, "step": 28230 }, { "epoch": 0.8403136298037582, "grad_norm": 0.2054768204689026, "learning_rate": 9.043985727065268e-06, "loss": 0.0063, "step": 28240 }, { "epoch": 0.8406111912873998, "grad_norm": 0.21785834431648254, "learning_rate": 9.042967403616384e-06, "loss": 0.0065, "step": 28250 }, { "epoch": 0.8409087527710413, "grad_norm": 0.3201974034309387, "learning_rate": 9.041948595503548e-06, "loss": 0.0098, "step": 28260 }, { "epoch": 0.8412063142546828, "grad_norm": 0.28587809205055237, "learning_rate": 9.04092930284889e-06, "loss": 0.0081, "step": 28270 }, { "epoch": 0.8415038757383244, "grad_norm": 0.17791415750980377, "learning_rate": 9.039909525774602e-06, "loss": 0.0068, "step": 28280 }, { "epoch": 0.841801437221966, "grad_norm": 0.3492830991744995, "learning_rate": 9.038889264402934e-06, "loss": 0.0059, "step": 28290 }, { "epoch": 0.8420989987056076, "grad_norm": 0.37433624267578125, "learning_rate": 9.037868518856191e-06, "loss": 0.0064, "step": 28300 }, { "epoch": 0.8423965601892491, "grad_norm": 0.3284953236579895, "learning_rate": 9.03684728925674e-06, "loss": 0.0092, "step": 28310 }, { "epoch": 0.8426941216728907, "grad_norm": 0.21770507097244263, "learning_rate": 9.035825575727004e-06, "loss": 0.0057, "step": 28320 }, { "epoch": 0.8429916831565322, "grad_norm": 0.13525672256946564, "learning_rate": 9.034803378389465e-06, "loss": 0.0055, "step": 28330 }, { "epoch": 0.8432892446401737, "grad_norm": 0.06152542307972908, "learning_rate": 9.03378069736666e-06, "loss": 0.007, "step": 28340 }, { "epoch": 0.8435868061238153, "grad_norm": 0.17596100270748138, "learning_rate": 9.032757532781188e-06, "loss": 0.0106, "step": 28350 }, { "epoch": 0.8438843676074569, "grad_norm": 0.46355459094047546, "learning_rate": 9.031733884755704e-06, "loss": 0.0077, "step": 28360 }, { "epoch": 0.8441819290910985, "grad_norm": 0.2931249439716339, "learning_rate": 9.03070975341292e-06, "loss": 0.009, "step": 28370 }, { "epoch": 0.84447949057474, "grad_norm": 0.20413079857826233, "learning_rate": 9.029685138875609e-06, "loss": 0.0044, "step": 28380 }, { "epoch": 0.8447770520583816, "grad_norm": 0.309785932302475, "learning_rate": 9.0286600412666e-06, "loss": 0.0051, "step": 28390 }, { "epoch": 0.8450746135420231, "grad_norm": 0.28999483585357666, "learning_rate": 9.027634460708777e-06, "loss": 0.004, "step": 28400 }, { "epoch": 0.8453721750256646, "grad_norm": 0.45898374915122986, "learning_rate": 9.026608397325088e-06, "loss": 0.0049, "step": 28410 }, { "epoch": 0.8456697365093062, "grad_norm": 0.15946713089942932, "learning_rate": 9.025581851238534e-06, "loss": 0.0079, "step": 28420 }, { "epoch": 0.8459672979929478, "grad_norm": 0.2435189038515091, "learning_rate": 9.024554822572175e-06, "loss": 0.004, "step": 28430 }, { "epoch": 0.8462648594765894, "grad_norm": 0.12205982208251953, "learning_rate": 9.023527311449132e-06, "loss": 0.0066, "step": 28440 }, { "epoch": 0.8465624209602309, "grad_norm": 0.29152899980545044, "learning_rate": 9.02249931799258e-06, "loss": 0.0036, "step": 28450 }, { "epoch": 0.8468599824438725, "grad_norm": 0.38467690348625183, "learning_rate": 9.021470842325751e-06, "loss": 0.0082, "step": 28460 }, { "epoch": 0.847157543927514, "grad_norm": 0.47924795746803284, "learning_rate": 9.02044188457194e-06, "loss": 0.0063, "step": 28470 }, { "epoch": 0.8474551054111555, "grad_norm": 0.2311946302652359, "learning_rate": 9.019412444854494e-06, "loss": 0.0088, "step": 28480 }, { "epoch": 0.8477526668947971, "grad_norm": 0.24871113896369934, "learning_rate": 9.018382523296822e-06, "loss": 0.0066, "step": 28490 }, { "epoch": 0.8480502283784387, "grad_norm": 0.1125253364443779, "learning_rate": 9.017352120022392e-06, "loss": 0.0076, "step": 28500 }, { "epoch": 0.8483477898620803, "grad_norm": 0.6072919368743896, "learning_rate": 9.016321235154721e-06, "loss": 0.0106, "step": 28510 }, { "epoch": 0.8486453513457218, "grad_norm": 0.08939722180366516, "learning_rate": 9.015289868817394e-06, "loss": 0.006, "step": 28520 }, { "epoch": 0.8489429128293634, "grad_norm": 0.32767724990844727, "learning_rate": 9.014258021134048e-06, "loss": 0.0046, "step": 28530 }, { "epoch": 0.849240474313005, "grad_norm": 0.2654733657836914, "learning_rate": 9.01322569222838e-06, "loss": 0.006, "step": 28540 }, { "epoch": 0.8495380357966464, "grad_norm": 0.2708699107170105, "learning_rate": 9.012192882224142e-06, "loss": 0.0146, "step": 28550 }, { "epoch": 0.849835597280288, "grad_norm": 0.24544204771518707, "learning_rate": 9.011159591245147e-06, "loss": 0.0116, "step": 28560 }, { "epoch": 0.8501331587639296, "grad_norm": 0.315294474363327, "learning_rate": 9.010125819415262e-06, "loss": 0.0091, "step": 28570 }, { "epoch": 0.8504307202475712, "grad_norm": 0.2854279577732086, "learning_rate": 9.009091566858416e-06, "loss": 0.0083, "step": 28580 }, { "epoch": 0.8507282817312127, "grad_norm": 0.2828291058540344, "learning_rate": 9.008056833698594e-06, "loss": 0.0081, "step": 28590 }, { "epoch": 0.8510258432148543, "grad_norm": 0.16723757982254028, "learning_rate": 9.007021620059838e-06, "loss": 0.0045, "step": 28600 }, { "epoch": 0.8513234046984959, "grad_norm": 0.18453896045684814, "learning_rate": 9.005985926066245e-06, "loss": 0.0064, "step": 28610 }, { "epoch": 0.8516209661821373, "grad_norm": 0.14161868393421173, "learning_rate": 9.004949751841973e-06, "loss": 0.0096, "step": 28620 }, { "epoch": 0.8519185276657789, "grad_norm": 0.16944217681884766, "learning_rate": 9.00391309751124e-06, "loss": 0.0061, "step": 28630 }, { "epoch": 0.8522160891494205, "grad_norm": 0.3085317313671112, "learning_rate": 9.002875963198315e-06, "loss": 0.0059, "step": 28640 }, { "epoch": 0.8525136506330621, "grad_norm": 0.2983284890651703, "learning_rate": 9.001838349027527e-06, "loss": 0.0046, "step": 28650 }, { "epoch": 0.8528112121167036, "grad_norm": 0.05878368392586708, "learning_rate": 9.000800255123266e-06, "loss": 0.0042, "step": 28660 }, { "epoch": 0.8531087736003452, "grad_norm": 0.17312371730804443, "learning_rate": 8.999761681609977e-06, "loss": 0.0056, "step": 28670 }, { "epoch": 0.8534063350839868, "grad_norm": 0.2190232127904892, "learning_rate": 8.99872262861216e-06, "loss": 0.0041, "step": 28680 }, { "epoch": 0.8537038965676282, "grad_norm": 0.16112683713436127, "learning_rate": 8.997683096254377e-06, "loss": 0.0064, "step": 28690 }, { "epoch": 0.8540014580512698, "grad_norm": 0.28259605169296265, "learning_rate": 8.996643084661245e-06, "loss": 0.0066, "step": 28700 }, { "epoch": 0.8542990195349114, "grad_norm": 0.45713290572166443, "learning_rate": 8.995602593957437e-06, "loss": 0.0092, "step": 28710 }, { "epoch": 0.854596581018553, "grad_norm": 0.272676020860672, "learning_rate": 8.99456162426769e-06, "loss": 0.0062, "step": 28720 }, { "epoch": 0.8548941425021945, "grad_norm": 0.4634319841861725, "learning_rate": 8.993520175716788e-06, "loss": 0.0072, "step": 28730 }, { "epoch": 0.8551917039858361, "grad_norm": 0.25561758875846863, "learning_rate": 8.99247824842958e-06, "loss": 0.0085, "step": 28740 }, { "epoch": 0.8554892654694777, "grad_norm": 0.2520174980163574, "learning_rate": 8.991435842530971e-06, "loss": 0.0082, "step": 28750 }, { "epoch": 0.8557868269531191, "grad_norm": 0.3967890441417694, "learning_rate": 8.990392958145924e-06, "loss": 0.0112, "step": 28760 }, { "epoch": 0.8560843884367607, "grad_norm": 0.2334832102060318, "learning_rate": 8.989349595399455e-06, "loss": 0.0077, "step": 28770 }, { "epoch": 0.8563819499204023, "grad_norm": 0.6002263426780701, "learning_rate": 8.988305754416642e-06, "loss": 0.009, "step": 28780 }, { "epoch": 0.8566795114040439, "grad_norm": 0.20767828822135925, "learning_rate": 8.987261435322622e-06, "loss": 0.0089, "step": 28790 }, { "epoch": 0.8569770728876854, "grad_norm": 0.16828685998916626, "learning_rate": 8.986216638242583e-06, "loss": 0.007, "step": 28800 }, { "epoch": 0.857274634371327, "grad_norm": 0.26421448588371277, "learning_rate": 8.98517136330177e-06, "loss": 0.0058, "step": 28810 }, { "epoch": 0.8575721958549686, "grad_norm": 0.18690814077854156, "learning_rate": 8.984125610625495e-06, "loss": 0.0069, "step": 28820 }, { "epoch": 0.85786975733861, "grad_norm": 0.28863707184791565, "learning_rate": 8.98307938033912e-06, "loss": 0.0072, "step": 28830 }, { "epoch": 0.8581673188222516, "grad_norm": 0.22168952226638794, "learning_rate": 8.982032672568062e-06, "loss": 0.0094, "step": 28840 }, { "epoch": 0.8584648803058932, "grad_norm": 0.06600676476955414, "learning_rate": 8.980985487437801e-06, "loss": 0.0042, "step": 28850 }, { "epoch": 0.8587624417895348, "grad_norm": 0.17175506055355072, "learning_rate": 8.979937825073872e-06, "loss": 0.0061, "step": 28860 }, { "epoch": 0.8590600032731763, "grad_norm": 0.327666699886322, "learning_rate": 8.978889685601865e-06, "loss": 0.0049, "step": 28870 }, { "epoch": 0.8593575647568179, "grad_norm": 0.19009406864643097, "learning_rate": 8.97784106914743e-06, "loss": 0.0055, "step": 28880 }, { "epoch": 0.8596551262404595, "grad_norm": 0.14221641421318054, "learning_rate": 8.976791975836276e-06, "loss": 0.007, "step": 28890 }, { "epoch": 0.8599526877241009, "grad_norm": 0.26422029733657837, "learning_rate": 8.975742405794162e-06, "loss": 0.0096, "step": 28900 }, { "epoch": 0.8602502492077425, "grad_norm": 0.23243607580661774, "learning_rate": 8.97469235914691e-06, "loss": 0.0056, "step": 28910 }, { "epoch": 0.8605478106913841, "grad_norm": 0.13520483672618866, "learning_rate": 8.973641836020401e-06, "loss": 0.0087, "step": 28920 }, { "epoch": 0.8608453721750257, "grad_norm": 0.16091473400592804, "learning_rate": 8.972590836540566e-06, "loss": 0.0051, "step": 28930 }, { "epoch": 0.8611429336586672, "grad_norm": 0.06846651434898376, "learning_rate": 8.971539360833398e-06, "loss": 0.0067, "step": 28940 }, { "epoch": 0.8614404951423088, "grad_norm": 0.1973159909248352, "learning_rate": 8.970487409024949e-06, "loss": 0.0078, "step": 28950 }, { "epoch": 0.8617380566259504, "grad_norm": 0.3194645643234253, "learning_rate": 8.96943498124132e-06, "loss": 0.0078, "step": 28960 }, { "epoch": 0.8620356181095918, "grad_norm": 0.16308388113975525, "learning_rate": 8.968382077608677e-06, "loss": 0.0054, "step": 28970 }, { "epoch": 0.8623331795932334, "grad_norm": 0.1321457028388977, "learning_rate": 8.967328698253241e-06, "loss": 0.007, "step": 28980 }, { "epoch": 0.862630741076875, "grad_norm": 0.28729304671287537, "learning_rate": 8.966274843301287e-06, "loss": 0.0054, "step": 28990 }, { "epoch": 0.8629283025605166, "grad_norm": 0.28088244795799255, "learning_rate": 8.965220512879154e-06, "loss": 0.0067, "step": 29000 }, { "epoch": 0.8632258640441581, "grad_norm": 0.36097055673599243, "learning_rate": 8.964165707113229e-06, "loss": 0.0058, "step": 29010 }, { "epoch": 0.8635234255277997, "grad_norm": 0.613072395324707, "learning_rate": 8.96311042612996e-06, "loss": 0.0051, "step": 29020 }, { "epoch": 0.8638209870114413, "grad_norm": 0.21288810670375824, "learning_rate": 8.962054670055854e-06, "loss": 0.0082, "step": 29030 }, { "epoch": 0.8641185484950828, "grad_norm": 0.1816912740468979, "learning_rate": 8.960998439017474e-06, "loss": 0.005, "step": 29040 }, { "epoch": 0.8644161099787243, "grad_norm": 0.3651971220970154, "learning_rate": 8.959941733141437e-06, "loss": 0.0059, "step": 29050 }, { "epoch": 0.8647136714623659, "grad_norm": 0.17934206128120422, "learning_rate": 8.958884552554421e-06, "loss": 0.0052, "step": 29060 }, { "epoch": 0.8650112329460075, "grad_norm": 0.20283293724060059, "learning_rate": 8.957826897383158e-06, "loss": 0.0055, "step": 29070 }, { "epoch": 0.865308794429649, "grad_norm": 0.22539407014846802, "learning_rate": 8.956768767754437e-06, "loss": 0.0039, "step": 29080 }, { "epoch": 0.8656063559132906, "grad_norm": 0.3166949152946472, "learning_rate": 8.955710163795107e-06, "loss": 0.0077, "step": 29090 }, { "epoch": 0.8659039173969322, "grad_norm": 0.2730715572834015, "learning_rate": 8.954651085632068e-06, "loss": 0.0059, "step": 29100 }, { "epoch": 0.8662014788805737, "grad_norm": 0.17166641354560852, "learning_rate": 8.953591533392285e-06, "loss": 0.0074, "step": 29110 }, { "epoch": 0.8664990403642152, "grad_norm": 0.3127135634422302, "learning_rate": 8.952531507202772e-06, "loss": 0.0052, "step": 29120 }, { "epoch": 0.8667966018478568, "grad_norm": 0.17009027302265167, "learning_rate": 8.951471007190605e-06, "loss": 0.0047, "step": 29130 }, { "epoch": 0.8670941633314984, "grad_norm": 0.2773509621620178, "learning_rate": 8.950410033482915e-06, "loss": 0.0056, "step": 29140 }, { "epoch": 0.8673917248151399, "grad_norm": 0.4927901327610016, "learning_rate": 8.949348586206887e-06, "loss": 0.0128, "step": 29150 }, { "epoch": 0.8676892862987815, "grad_norm": 0.15916332602500916, "learning_rate": 8.948286665489767e-06, "loss": 0.0061, "step": 29160 }, { "epoch": 0.8679868477824231, "grad_norm": 0.11528217792510986, "learning_rate": 8.947224271458859e-06, "loss": 0.0092, "step": 29170 }, { "epoch": 0.8682844092660646, "grad_norm": 0.3863695561885834, "learning_rate": 8.946161404241516e-06, "loss": 0.0062, "step": 29180 }, { "epoch": 0.8685819707497061, "grad_norm": 0.45018550753593445, "learning_rate": 8.945098063965156e-06, "loss": 0.0064, "step": 29190 }, { "epoch": 0.8688795322333477, "grad_norm": 0.21028809249401093, "learning_rate": 8.944034250757249e-06, "loss": 0.0051, "step": 29200 }, { "epoch": 0.8691770937169893, "grad_norm": 0.15352915227413177, "learning_rate": 8.942969964745325e-06, "loss": 0.0066, "step": 29210 }, { "epoch": 0.8694746552006308, "grad_norm": 0.09383045881986618, "learning_rate": 8.941905206056967e-06, "loss": 0.0076, "step": 29220 }, { "epoch": 0.8697722166842724, "grad_norm": 0.27674397826194763, "learning_rate": 8.940839974819816e-06, "loss": 0.0046, "step": 29230 }, { "epoch": 0.870069778167914, "grad_norm": 0.1712418794631958, "learning_rate": 8.939774271161571e-06, "loss": 0.01, "step": 29240 }, { "epoch": 0.8703673396515555, "grad_norm": 0.22149688005447388, "learning_rate": 8.93870809520999e-06, "loss": 0.0103, "step": 29250 }, { "epoch": 0.870664901135197, "grad_norm": 0.3267630636692047, "learning_rate": 8.937641447092876e-06, "loss": 0.0064, "step": 29260 }, { "epoch": 0.8709624626188386, "grad_norm": 0.4262717068195343, "learning_rate": 8.936574326938105e-06, "loss": 0.0092, "step": 29270 }, { "epoch": 0.8712600241024802, "grad_norm": 0.14642277359962463, "learning_rate": 8.935506734873597e-06, "loss": 0.009, "step": 29280 }, { "epoch": 0.8715575855861217, "grad_norm": 0.2756298780441284, "learning_rate": 8.934438671027335e-06, "loss": 0.0059, "step": 29290 }, { "epoch": 0.8718551470697633, "grad_norm": 0.11626622080802917, "learning_rate": 8.933370135527356e-06, "loss": 0.0062, "step": 29300 }, { "epoch": 0.8721527085534049, "grad_norm": 0.21417130529880524, "learning_rate": 8.932301128501754e-06, "loss": 0.008, "step": 29310 }, { "epoch": 0.8724502700370464, "grad_norm": 0.48372432589530945, "learning_rate": 8.931231650078683e-06, "loss": 0.008, "step": 29320 }, { "epoch": 0.8727478315206879, "grad_norm": 0.1889890432357788, "learning_rate": 8.930161700386344e-06, "loss": 0.0053, "step": 29330 }, { "epoch": 0.8730453930043295, "grad_norm": 0.23724746704101562, "learning_rate": 8.929091279553003e-06, "loss": 0.0096, "step": 29340 }, { "epoch": 0.8733429544879711, "grad_norm": 0.12742450833320618, "learning_rate": 8.928020387706985e-06, "loss": 0.0045, "step": 29350 }, { "epoch": 0.8736405159716126, "grad_norm": 0.38300928473472595, "learning_rate": 8.92694902497666e-06, "loss": 0.0064, "step": 29360 }, { "epoch": 0.8739380774552542, "grad_norm": 0.3481825590133667, "learning_rate": 8.925877191490465e-06, "loss": 0.0044, "step": 29370 }, { "epoch": 0.8742356389388958, "grad_norm": 0.39635607600212097, "learning_rate": 8.92480488737689e-06, "loss": 0.0057, "step": 29380 }, { "epoch": 0.8745332004225373, "grad_norm": 0.4740910232067108, "learning_rate": 8.923732112764478e-06, "loss": 0.0104, "step": 29390 }, { "epoch": 0.8748307619061788, "grad_norm": 0.17019973695278168, "learning_rate": 8.922658867781834e-06, "loss": 0.008, "step": 29400 }, { "epoch": 0.8751283233898204, "grad_norm": 0.5216646790504456, "learning_rate": 8.921585152557615e-06, "loss": 0.0076, "step": 29410 }, { "epoch": 0.875425884873462, "grad_norm": 0.2736983001232147, "learning_rate": 8.920510967220537e-06, "loss": 0.0054, "step": 29420 }, { "epoch": 0.8757234463571035, "grad_norm": 0.6192612648010254, "learning_rate": 8.91943631189937e-06, "loss": 0.01, "step": 29430 }, { "epoch": 0.8760210078407451, "grad_norm": 0.3292745351791382, "learning_rate": 8.918361186722944e-06, "loss": 0.0094, "step": 29440 }, { "epoch": 0.8763185693243867, "grad_norm": 0.36095625162124634, "learning_rate": 8.917285591820142e-06, "loss": 0.0064, "step": 29450 }, { "epoch": 0.8766161308080282, "grad_norm": 0.42123332619667053, "learning_rate": 8.916209527319906e-06, "loss": 0.0041, "step": 29460 }, { "epoch": 0.8769136922916697, "grad_norm": 0.20529529452323914, "learning_rate": 8.915132993351229e-06, "loss": 0.0063, "step": 29470 }, { "epoch": 0.8772112537753113, "grad_norm": 0.29772210121154785, "learning_rate": 8.91405599004317e-06, "loss": 0.0067, "step": 29480 }, { "epoch": 0.8775088152589529, "grad_norm": 0.16450724005699158, "learning_rate": 8.912978517524833e-06, "loss": 0.0072, "step": 29490 }, { "epoch": 0.8778063767425944, "grad_norm": 0.0946357473731041, "learning_rate": 8.911900575925385e-06, "loss": 0.0041, "step": 29500 }, { "epoch": 0.878103938226236, "grad_norm": 0.22022101283073425, "learning_rate": 8.910822165374048e-06, "loss": 0.0106, "step": 29510 }, { "epoch": 0.8784014997098776, "grad_norm": 0.1426493376493454, "learning_rate": 8.9097432860001e-06, "loss": 0.0113, "step": 29520 }, { "epoch": 0.8786990611935191, "grad_norm": 0.14717453718185425, "learning_rate": 8.908663937932876e-06, "loss": 0.0104, "step": 29530 }, { "epoch": 0.8789966226771607, "grad_norm": 0.2548091411590576, "learning_rate": 8.907584121301766e-06, "loss": 0.0062, "step": 29540 }, { "epoch": 0.8792941841608022, "grad_norm": 0.11540672183036804, "learning_rate": 8.906503836236215e-06, "loss": 0.0054, "step": 29550 }, { "epoch": 0.8795917456444438, "grad_norm": 0.4704928696155548, "learning_rate": 8.905423082865727e-06, "loss": 0.0073, "step": 29560 }, { "epoch": 0.8798893071280853, "grad_norm": 0.2568216025829315, "learning_rate": 8.904341861319863e-06, "loss": 0.0079, "step": 29570 }, { "epoch": 0.8801868686117269, "grad_norm": 0.32385602593421936, "learning_rate": 8.903260171728234e-06, "loss": 0.0083, "step": 29580 }, { "epoch": 0.8804844300953685, "grad_norm": 0.1236925721168518, "learning_rate": 8.902178014220513e-06, "loss": 0.0085, "step": 29590 }, { "epoch": 0.88078199157901, "grad_norm": 0.5165863037109375, "learning_rate": 8.901095388926426e-06, "loss": 0.01, "step": 29600 }, { "epoch": 0.8810795530626516, "grad_norm": 0.3488670289516449, "learning_rate": 8.900012295975759e-06, "loss": 0.0096, "step": 29610 }, { "epoch": 0.8813771145462931, "grad_norm": 0.14591729640960693, "learning_rate": 8.89892873549835e-06, "loss": 0.0081, "step": 29620 }, { "epoch": 0.8816746760299347, "grad_norm": 0.14660485088825226, "learning_rate": 8.897844707624092e-06, "loss": 0.005, "step": 29630 }, { "epoch": 0.8819722375135762, "grad_norm": 0.3084566593170166, "learning_rate": 8.89676021248294e-06, "loss": 0.0073, "step": 29640 }, { "epoch": 0.8822697989972178, "grad_norm": 0.27290233969688416, "learning_rate": 8.895675250204898e-06, "loss": 0.0059, "step": 29650 }, { "epoch": 0.8825673604808594, "grad_norm": 0.2731853723526001, "learning_rate": 8.894589820920032e-06, "loss": 0.007, "step": 29660 }, { "epoch": 0.882864921964501, "grad_norm": 0.2847110629081726, "learning_rate": 8.893503924758463e-06, "loss": 0.0081, "step": 29670 }, { "epoch": 0.8831624834481425, "grad_norm": 0.29228660464286804, "learning_rate": 8.892417561850359e-06, "loss": 0.0104, "step": 29680 }, { "epoch": 0.883460044931784, "grad_norm": 0.07321317493915558, "learning_rate": 8.89133073232596e-06, "loss": 0.0082, "step": 29690 }, { "epoch": 0.8837576064154256, "grad_norm": 0.21116949617862701, "learning_rate": 8.890243436315549e-06, "loss": 0.0049, "step": 29700 }, { "epoch": 0.8840551678990671, "grad_norm": 0.18287278711795807, "learning_rate": 8.889155673949469e-06, "loss": 0.006, "step": 29710 }, { "epoch": 0.8843527293827087, "grad_norm": 0.1962156891822815, "learning_rate": 8.88806744535812e-06, "loss": 0.0045, "step": 29720 }, { "epoch": 0.8846502908663503, "grad_norm": 0.23840004205703735, "learning_rate": 8.886978750671955e-06, "loss": 0.0096, "step": 29730 }, { "epoch": 0.8849478523499918, "grad_norm": 0.30997100472450256, "learning_rate": 8.885889590021488e-06, "loss": 0.0081, "step": 29740 }, { "epoch": 0.8852454138336334, "grad_norm": 0.33951422572135925, "learning_rate": 8.884799963537285e-06, "loss": 0.0059, "step": 29750 }, { "epoch": 0.8855429753172749, "grad_norm": 0.2552538514137268, "learning_rate": 8.883709871349966e-06, "loss": 0.0055, "step": 29760 }, { "epoch": 0.8858405368009165, "grad_norm": 0.3076663315296173, "learning_rate": 8.882619313590212e-06, "loss": 0.007, "step": 29770 }, { "epoch": 0.886138098284558, "grad_norm": 0.2107084095478058, "learning_rate": 8.881528290388757e-06, "loss": 0.0068, "step": 29780 }, { "epoch": 0.8864356597681996, "grad_norm": 0.4745737612247467, "learning_rate": 8.88043680187639e-06, "loss": 0.0107, "step": 29790 }, { "epoch": 0.8867332212518412, "grad_norm": 0.27093684673309326, "learning_rate": 8.879344848183957e-06, "loss": 0.0048, "step": 29800 }, { "epoch": 0.8870307827354827, "grad_norm": 0.18241648375988007, "learning_rate": 8.878252429442358e-06, "loss": 0.0067, "step": 29810 }, { "epoch": 0.8873283442191243, "grad_norm": 0.2284761369228363, "learning_rate": 8.877159545782554e-06, "loss": 0.0067, "step": 29820 }, { "epoch": 0.8876259057027658, "grad_norm": 0.3717103898525238, "learning_rate": 8.876066197335556e-06, "loss": 0.0086, "step": 29830 }, { "epoch": 0.8879234671864074, "grad_norm": 0.12411895394325256, "learning_rate": 8.87497238423243e-06, "loss": 0.0062, "step": 29840 }, { "epoch": 0.8882210286700489, "grad_norm": 0.11342485249042511, "learning_rate": 8.873878106604305e-06, "loss": 0.005, "step": 29850 }, { "epoch": 0.8885185901536905, "grad_norm": 0.31595224142074585, "learning_rate": 8.87278336458236e-06, "loss": 0.0044, "step": 29860 }, { "epoch": 0.8888161516373321, "grad_norm": 0.18661363422870636, "learning_rate": 8.871688158297828e-06, "loss": 0.011, "step": 29870 }, { "epoch": 0.8891137131209736, "grad_norm": 0.06722011417150497, "learning_rate": 8.870592487882003e-06, "loss": 0.0079, "step": 29880 }, { "epoch": 0.8894112746046152, "grad_norm": 0.08093524724245071, "learning_rate": 8.869496353466232e-06, "loss": 0.0048, "step": 29890 }, { "epoch": 0.8897088360882567, "grad_norm": 0.11531519144773483, "learning_rate": 8.868399755181916e-06, "loss": 0.0059, "step": 29900 }, { "epoch": 0.8900063975718983, "grad_norm": 0.17846357822418213, "learning_rate": 8.867302693160513e-06, "loss": 0.0067, "step": 29910 }, { "epoch": 0.8903039590555398, "grad_norm": 0.20251035690307617, "learning_rate": 8.86620516753354e-06, "loss": 0.0069, "step": 29920 }, { "epoch": 0.8906015205391814, "grad_norm": 1.1831415891647339, "learning_rate": 8.865107178432566e-06, "loss": 0.0108, "step": 29930 }, { "epoch": 0.890899082022823, "grad_norm": 0.32990920543670654, "learning_rate": 8.864008725989214e-06, "loss": 0.008, "step": 29940 }, { "epoch": 0.8911966435064645, "grad_norm": 0.18953675031661987, "learning_rate": 8.862909810335166e-06, "loss": 0.0054, "step": 29950 }, { "epoch": 0.8914942049901061, "grad_norm": 0.18012267351150513, "learning_rate": 8.861810431602156e-06, "loss": 0.0069, "step": 29960 }, { "epoch": 0.8917917664737477, "grad_norm": 0.17179742455482483, "learning_rate": 8.860710589921978e-06, "loss": 0.0086, "step": 29970 }, { "epoch": 0.8920893279573892, "grad_norm": 0.2152920365333557, "learning_rate": 8.85961028542648e-06, "loss": 0.004, "step": 29980 }, { "epoch": 0.8923868894410307, "grad_norm": 0.20314864814281464, "learning_rate": 8.85850951824756e-06, "loss": 0.0045, "step": 29990 }, { "epoch": 0.8926844509246723, "grad_norm": 0.4023996591567993, "learning_rate": 8.85740828851718e-06, "loss": 0.0072, "step": 30000 }, { "epoch": 0.8929820124083139, "grad_norm": 0.22343002259731293, "learning_rate": 8.856306596367351e-06, "loss": 0.0064, "step": 30010 }, { "epoch": 0.8932795738919554, "grad_norm": 0.034607693552970886, "learning_rate": 8.855204441930147e-06, "loss": 0.0044, "step": 30020 }, { "epoch": 0.893577135375597, "grad_norm": 0.2793228328227997, "learning_rate": 8.854101825337687e-06, "loss": 0.0069, "step": 30030 }, { "epoch": 0.8938746968592386, "grad_norm": 0.11244816333055496, "learning_rate": 8.852998746722152e-06, "loss": 0.0056, "step": 30040 }, { "epoch": 0.89417225834288, "grad_norm": 0.28590208292007446, "learning_rate": 8.851895206215777e-06, "loss": 0.0068, "step": 30050 }, { "epoch": 0.8944698198265216, "grad_norm": 0.4163980484008789, "learning_rate": 8.850791203950855e-06, "loss": 0.0079, "step": 30060 }, { "epoch": 0.8947673813101632, "grad_norm": 0.23973391950130463, "learning_rate": 8.84968674005973e-06, "loss": 0.0046, "step": 30070 }, { "epoch": 0.8950649427938048, "grad_norm": 0.09772370010614395, "learning_rate": 8.848581814674803e-06, "loss": 0.0058, "step": 30080 }, { "epoch": 0.8953625042774463, "grad_norm": 0.1578727811574936, "learning_rate": 8.847476427928532e-06, "loss": 0.0059, "step": 30090 }, { "epoch": 0.8956600657610879, "grad_norm": 0.24211877584457397, "learning_rate": 8.846370579953425e-06, "loss": 0.0077, "step": 30100 }, { "epoch": 0.8959576272447295, "grad_norm": 0.2859814167022705, "learning_rate": 8.845264270882054e-06, "loss": 0.0077, "step": 30110 }, { "epoch": 0.896255188728371, "grad_norm": 0.18263372778892517, "learning_rate": 8.84415750084704e-06, "loss": 0.0041, "step": 30120 }, { "epoch": 0.8965527502120125, "grad_norm": 0.4272487163543701, "learning_rate": 8.843050269981057e-06, "loss": 0.0071, "step": 30130 }, { "epoch": 0.8968503116956541, "grad_norm": 0.3187211751937866, "learning_rate": 8.841942578416844e-06, "loss": 0.0077, "step": 30140 }, { "epoch": 0.8971478731792957, "grad_norm": 0.3817135989665985, "learning_rate": 8.840834426287184e-06, "loss": 0.0076, "step": 30150 }, { "epoch": 0.8974454346629372, "grad_norm": 0.3276672065258026, "learning_rate": 8.839725813724924e-06, "loss": 0.0052, "step": 30160 }, { "epoch": 0.8977429961465788, "grad_norm": 0.07894857972860336, "learning_rate": 8.838616740862962e-06, "loss": 0.007, "step": 30170 }, { "epoch": 0.8980405576302204, "grad_norm": 0.2566416263580322, "learning_rate": 8.837507207834249e-06, "loss": 0.0054, "step": 30180 }, { "epoch": 0.8983381191138619, "grad_norm": 0.44213899970054626, "learning_rate": 8.836397214771796e-06, "loss": 0.0077, "step": 30190 }, { "epoch": 0.8986356805975034, "grad_norm": 0.09986725449562073, "learning_rate": 8.835286761808666e-06, "loss": 0.0067, "step": 30200 }, { "epoch": 0.898933242081145, "grad_norm": 0.4276945888996124, "learning_rate": 8.834175849077979e-06, "loss": 0.0055, "step": 30210 }, { "epoch": 0.8992308035647866, "grad_norm": 0.22161594033241272, "learning_rate": 8.833064476712911e-06, "loss": 0.0054, "step": 30220 }, { "epoch": 0.8995283650484281, "grad_norm": 0.23008640110492706, "learning_rate": 8.831952644846689e-06, "loss": 0.0065, "step": 30230 }, { "epoch": 0.8998259265320697, "grad_norm": 0.4975872039794922, "learning_rate": 8.830840353612597e-06, "loss": 0.0076, "step": 30240 }, { "epoch": 0.9001234880157113, "grad_norm": 0.32673898339271545, "learning_rate": 8.829727603143976e-06, "loss": 0.005, "step": 30250 }, { "epoch": 0.9004210494993528, "grad_norm": 0.17232917249202728, "learning_rate": 8.82861439357422e-06, "loss": 0.0063, "step": 30260 }, { "epoch": 0.9007186109829943, "grad_norm": 0.13157449662685394, "learning_rate": 8.82750072503678e-06, "loss": 0.0071, "step": 30270 }, { "epoch": 0.9010161724666359, "grad_norm": 0.2477608025074005, "learning_rate": 8.82638659766516e-06, "loss": 0.0051, "step": 30280 }, { "epoch": 0.9013137339502775, "grad_norm": 0.11863505840301514, "learning_rate": 8.825272011592918e-06, "loss": 0.0053, "step": 30290 }, { "epoch": 0.901611295433919, "grad_norm": 0.2038070410490036, "learning_rate": 8.82415696695367e-06, "loss": 0.0056, "step": 30300 }, { "epoch": 0.9019088569175606, "grad_norm": 0.13485059142112732, "learning_rate": 8.823041463881086e-06, "loss": 0.0046, "step": 30310 }, { "epoch": 0.9022064184012022, "grad_norm": 0.21981684863567352, "learning_rate": 8.82192550250889e-06, "loss": 0.0044, "step": 30320 }, { "epoch": 0.9025039798848437, "grad_norm": 0.20173996686935425, "learning_rate": 8.82080908297086e-06, "loss": 0.0056, "step": 30330 }, { "epoch": 0.9028015413684852, "grad_norm": 0.38890907168388367, "learning_rate": 8.819692205400835e-06, "loss": 0.0051, "step": 30340 }, { "epoch": 0.9030991028521268, "grad_norm": 0.23006638884544373, "learning_rate": 8.8185748699327e-06, "loss": 0.0034, "step": 30350 }, { "epoch": 0.9033966643357684, "grad_norm": 0.2069072127342224, "learning_rate": 8.8174570767004e-06, "loss": 0.0093, "step": 30360 }, { "epoch": 0.90369422581941, "grad_norm": 0.317109078168869, "learning_rate": 8.816338825837936e-06, "loss": 0.0062, "step": 30370 }, { "epoch": 0.9039917873030515, "grad_norm": 0.09538739174604416, "learning_rate": 8.81522011747936e-06, "loss": 0.006, "step": 30380 }, { "epoch": 0.9042893487866931, "grad_norm": 0.3296467959880829, "learning_rate": 8.814100951758782e-06, "loss": 0.0048, "step": 30390 }, { "epoch": 0.9045869102703346, "grad_norm": 0.19902904331684113, "learning_rate": 8.812981328810363e-06, "loss": 0.0047, "step": 30400 }, { "epoch": 0.9048844717539761, "grad_norm": 0.29499122500419617, "learning_rate": 8.811861248768328e-06, "loss": 0.0098, "step": 30410 }, { "epoch": 0.9051820332376177, "grad_norm": 0.6816072463989258, "learning_rate": 8.810740711766944e-06, "loss": 0.0072, "step": 30420 }, { "epoch": 0.9054795947212593, "grad_norm": 0.21024425327777863, "learning_rate": 8.80961971794054e-06, "loss": 0.0059, "step": 30430 }, { "epoch": 0.9057771562049008, "grad_norm": 0.25863686203956604, "learning_rate": 8.8084982674235e-06, "loss": 0.0073, "step": 30440 }, { "epoch": 0.9060747176885424, "grad_norm": 0.1521761268377304, "learning_rate": 8.807376360350261e-06, "loss": 0.0071, "step": 30450 }, { "epoch": 0.906372279172184, "grad_norm": 0.2909493148326874, "learning_rate": 8.806253996855319e-06, "loss": 0.0073, "step": 30460 }, { "epoch": 0.9066698406558256, "grad_norm": 0.13068325817584991, "learning_rate": 8.805131177073215e-06, "loss": 0.0072, "step": 30470 }, { "epoch": 0.906967402139467, "grad_norm": 0.24423055350780487, "learning_rate": 8.804007901138552e-06, "loss": 0.0086, "step": 30480 }, { "epoch": 0.9072649636231086, "grad_norm": 0.14549824595451355, "learning_rate": 8.80288416918599e-06, "loss": 0.006, "step": 30490 }, { "epoch": 0.9075625251067502, "grad_norm": 0.15800677239894867, "learning_rate": 8.801759981350237e-06, "loss": 0.008, "step": 30500 }, { "epoch": 0.9078600865903917, "grad_norm": 0.42017221450805664, "learning_rate": 8.80063533776606e-06, "loss": 0.0101, "step": 30510 }, { "epoch": 0.9081576480740333, "grad_norm": 0.1910107284784317, "learning_rate": 8.799510238568278e-06, "loss": 0.0059, "step": 30520 }, { "epoch": 0.9084552095576749, "grad_norm": 0.1362983137369156, "learning_rate": 8.798384683891769e-06, "loss": 0.0044, "step": 30530 }, { "epoch": 0.9087527710413165, "grad_norm": 0.3959886133670807, "learning_rate": 8.797258673871459e-06, "loss": 0.0068, "step": 30540 }, { "epoch": 0.9090503325249579, "grad_norm": 0.16833244264125824, "learning_rate": 8.796132208642333e-06, "loss": 0.009, "step": 30550 }, { "epoch": 0.9093478940085995, "grad_norm": 0.35425499081611633, "learning_rate": 8.79500528833943e-06, "loss": 0.0081, "step": 30560 }, { "epoch": 0.9096454554922411, "grad_norm": 0.2030251920223236, "learning_rate": 8.793877913097844e-06, "loss": 0.0047, "step": 30570 }, { "epoch": 0.9099430169758826, "grad_norm": 0.30581122636795044, "learning_rate": 8.792750083052722e-06, "loss": 0.0073, "step": 30580 }, { "epoch": 0.9102405784595242, "grad_norm": 0.12587299942970276, "learning_rate": 8.791621798339264e-06, "loss": 0.0042, "step": 30590 }, { "epoch": 0.9105381399431658, "grad_norm": 0.16093602776527405, "learning_rate": 8.790493059092733e-06, "loss": 0.0054, "step": 30600 }, { "epoch": 0.9108357014268074, "grad_norm": 0.3862050175666809, "learning_rate": 8.789363865448434e-06, "loss": 0.0056, "step": 30610 }, { "epoch": 0.9111332629104488, "grad_norm": 0.1860198974609375, "learning_rate": 8.788234217541738e-06, "loss": 0.0056, "step": 30620 }, { "epoch": 0.9114308243940904, "grad_norm": 0.37934479117393494, "learning_rate": 8.787104115508062e-06, "loss": 0.0073, "step": 30630 }, { "epoch": 0.911728385877732, "grad_norm": 0.11370909959077835, "learning_rate": 8.785973559482881e-06, "loss": 0.0039, "step": 30640 }, { "epoch": 0.9120259473613735, "grad_norm": 0.1609254777431488, "learning_rate": 8.784842549601725e-06, "loss": 0.008, "step": 30650 }, { "epoch": 0.9123235088450151, "grad_norm": 0.1325770765542984, "learning_rate": 8.783711086000177e-06, "loss": 0.0037, "step": 30660 }, { "epoch": 0.9126210703286567, "grad_norm": 0.41811180114746094, "learning_rate": 8.782579168813874e-06, "loss": 0.005, "step": 30670 }, { "epoch": 0.9129186318122983, "grad_norm": 0.12654529511928558, "learning_rate": 8.78144679817851e-06, "loss": 0.0062, "step": 30680 }, { "epoch": 0.9132161932959397, "grad_norm": 0.15697965025901794, "learning_rate": 8.78031397422983e-06, "loss": 0.0066, "step": 30690 }, { "epoch": 0.9135137547795813, "grad_norm": 0.21093130111694336, "learning_rate": 8.779180697103637e-06, "loss": 0.0097, "step": 30700 }, { "epoch": 0.9138113162632229, "grad_norm": 0.19435888528823853, "learning_rate": 8.778046966935784e-06, "loss": 0.0071, "step": 30710 }, { "epoch": 0.9141088777468644, "grad_norm": 0.3701653480529785, "learning_rate": 8.776912783862182e-06, "loss": 0.0104, "step": 30720 }, { "epoch": 0.914406439230506, "grad_norm": 0.30063799023628235, "learning_rate": 8.775778148018795e-06, "loss": 0.0051, "step": 30730 }, { "epoch": 0.9147040007141476, "grad_norm": 0.12488551437854767, "learning_rate": 8.77464305954164e-06, "loss": 0.0051, "step": 30740 }, { "epoch": 0.9150015621977892, "grad_norm": 0.1413818597793579, "learning_rate": 8.773507518566792e-06, "loss": 0.0055, "step": 30750 }, { "epoch": 0.9152991236814306, "grad_norm": 0.38397082686424255, "learning_rate": 8.772371525230373e-06, "loss": 0.0077, "step": 30760 }, { "epoch": 0.9155966851650722, "grad_norm": 0.2430122196674347, "learning_rate": 8.771235079668569e-06, "loss": 0.0061, "step": 30770 }, { "epoch": 0.9158942466487138, "grad_norm": 0.389453262090683, "learning_rate": 8.770098182017615e-06, "loss": 0.0059, "step": 30780 }, { "epoch": 0.9161918081323553, "grad_norm": 0.2579396069049835, "learning_rate": 8.768960832413795e-06, "loss": 0.0072, "step": 30790 }, { "epoch": 0.9164893696159969, "grad_norm": 0.07543160766363144, "learning_rate": 8.767823030993458e-06, "loss": 0.0119, "step": 30800 }, { "epoch": 0.9167869310996385, "grad_norm": 0.10165770351886749, "learning_rate": 8.766684777892997e-06, "loss": 0.0101, "step": 30810 }, { "epoch": 0.9170844925832801, "grad_norm": 0.1080731451511383, "learning_rate": 8.76554607324887e-06, "loss": 0.0064, "step": 30820 }, { "epoch": 0.9173820540669215, "grad_norm": 0.12392587214708328, "learning_rate": 8.764406917197577e-06, "loss": 0.0061, "step": 30830 }, { "epoch": 0.9176796155505631, "grad_norm": 0.13170795142650604, "learning_rate": 8.76326730987568e-06, "loss": 0.0077, "step": 30840 }, { "epoch": 0.9179771770342047, "grad_norm": 0.16909849643707275, "learning_rate": 8.762127251419795e-06, "loss": 0.0039, "step": 30850 }, { "epoch": 0.9182747385178462, "grad_norm": 0.17278830707073212, "learning_rate": 8.760986741966585e-06, "loss": 0.0056, "step": 30860 }, { "epoch": 0.9185723000014878, "grad_norm": 0.4207233786582947, "learning_rate": 8.75984578165278e-06, "loss": 0.0042, "step": 30870 }, { "epoch": 0.9188698614851294, "grad_norm": 0.3086806833744049, "learning_rate": 8.758704370615152e-06, "loss": 0.0064, "step": 30880 }, { "epoch": 0.919167422968771, "grad_norm": 0.137636199593544, "learning_rate": 8.757562508990531e-06, "loss": 0.0052, "step": 30890 }, { "epoch": 0.9194649844524124, "grad_norm": 0.21224352717399597, "learning_rate": 8.756420196915802e-06, "loss": 0.0085, "step": 30900 }, { "epoch": 0.919762545936054, "grad_norm": 0.1718035787343979, "learning_rate": 8.755277434527903e-06, "loss": 0.008, "step": 30910 }, { "epoch": 0.9200601074196956, "grad_norm": 0.33285045623779297, "learning_rate": 8.754134221963828e-06, "loss": 0.0125, "step": 30920 }, { "epoch": 0.9203576689033371, "grad_norm": 0.120655357837677, "learning_rate": 8.752990559360621e-06, "loss": 0.0127, "step": 30930 }, { "epoch": 0.9206552303869787, "grad_norm": 0.26970887184143066, "learning_rate": 8.751846446855385e-06, "loss": 0.0029, "step": 30940 }, { "epoch": 0.9209527918706203, "grad_norm": 0.3083607256412506, "learning_rate": 8.750701884585274e-06, "loss": 0.0113, "step": 30950 }, { "epoch": 0.9212503533542619, "grad_norm": 0.08396840840578079, "learning_rate": 8.749556872687493e-06, "loss": 0.0045, "step": 30960 }, { "epoch": 0.9215479148379034, "grad_norm": 0.08159751445055008, "learning_rate": 8.748411411299307e-06, "loss": 0.006, "step": 30970 }, { "epoch": 0.9218454763215449, "grad_norm": 0.2398056834936142, "learning_rate": 8.74726550055803e-06, "loss": 0.0067, "step": 30980 }, { "epoch": 0.9221430378051865, "grad_norm": 0.22197653353214264, "learning_rate": 8.746119140601033e-06, "loss": 0.008, "step": 30990 }, { "epoch": 0.922440599288828, "grad_norm": 0.2796866297721863, "learning_rate": 8.74497233156574e-06, "loss": 0.0069, "step": 31000 }, { "epoch": 0.9227381607724696, "grad_norm": 0.2348196804523468, "learning_rate": 8.743825073589629e-06, "loss": 0.0057, "step": 31010 }, { "epoch": 0.9230357222561112, "grad_norm": 0.3439062237739563, "learning_rate": 8.742677366810229e-06, "loss": 0.009, "step": 31020 }, { "epoch": 0.9233332837397528, "grad_norm": 0.3568434417247772, "learning_rate": 8.741529211365126e-06, "loss": 0.006, "step": 31030 }, { "epoch": 0.9236308452233943, "grad_norm": 0.2099914848804474, "learning_rate": 8.740380607391958e-06, "loss": 0.0062, "step": 31040 }, { "epoch": 0.9239284067070358, "grad_norm": 0.226994127035141, "learning_rate": 8.73923155502842e-06, "loss": 0.0055, "step": 31050 }, { "epoch": 0.9242259681906774, "grad_norm": 0.2639252841472626, "learning_rate": 8.738082054412259e-06, "loss": 0.0068, "step": 31060 }, { "epoch": 0.924523529674319, "grad_norm": 0.11751439422369003, "learning_rate": 8.736932105681272e-06, "loss": 0.004, "step": 31070 }, { "epoch": 0.9248210911579605, "grad_norm": 0.42576295137405396, "learning_rate": 8.735781708973314e-06, "loss": 0.0063, "step": 31080 }, { "epoch": 0.9251186526416021, "grad_norm": 0.08569356054067612, "learning_rate": 8.734630864426294e-06, "loss": 0.0066, "step": 31090 }, { "epoch": 0.9254162141252437, "grad_norm": 0.4707294702529907, "learning_rate": 8.733479572178172e-06, "loss": 0.0086, "step": 31100 }, { "epoch": 0.9257137756088852, "grad_norm": 0.43066897988319397, "learning_rate": 8.732327832366963e-06, "loss": 0.0063, "step": 31110 }, { "epoch": 0.9260113370925267, "grad_norm": 0.13114717602729797, "learning_rate": 8.731175645130734e-06, "loss": 0.0051, "step": 31120 }, { "epoch": 0.9263088985761683, "grad_norm": 0.461001455783844, "learning_rate": 8.730023010607611e-06, "loss": 0.0055, "step": 31130 }, { "epoch": 0.9266064600598098, "grad_norm": 0.08847326785326004, "learning_rate": 8.728869928935767e-06, "loss": 0.0044, "step": 31140 }, { "epoch": 0.9269040215434514, "grad_norm": 0.25644686818122864, "learning_rate": 8.727716400253432e-06, "loss": 0.0065, "step": 31150 }, { "epoch": 0.927201583027093, "grad_norm": 0.2281002253293991, "learning_rate": 8.72656242469889e-06, "loss": 0.006, "step": 31160 }, { "epoch": 0.9274991445107346, "grad_norm": 0.2447938621044159, "learning_rate": 8.725408002410476e-06, "loss": 0.0044, "step": 31170 }, { "epoch": 0.9277967059943761, "grad_norm": 0.3928476572036743, "learning_rate": 8.72425313352658e-06, "loss": 0.0082, "step": 31180 }, { "epoch": 0.9280942674780176, "grad_norm": 0.15778273344039917, "learning_rate": 8.72309781818565e-06, "loss": 0.0041, "step": 31190 }, { "epoch": 0.9283918289616592, "grad_norm": 0.39751216769218445, "learning_rate": 8.721942056526176e-06, "loss": 0.008, "step": 31200 }, { "epoch": 0.9286893904453007, "grad_norm": 0.5365007519721985, "learning_rate": 8.720785848686715e-06, "loss": 0.0052, "step": 31210 }, { "epoch": 0.9289869519289423, "grad_norm": 0.12215153872966766, "learning_rate": 8.719629194805868e-06, "loss": 0.0064, "step": 31220 }, { "epoch": 0.9292845134125839, "grad_norm": 0.27414003014564514, "learning_rate": 8.718472095022292e-06, "loss": 0.0058, "step": 31230 }, { "epoch": 0.9295820748962255, "grad_norm": 0.09620242565870285, "learning_rate": 8.7173145494747e-06, "loss": 0.0072, "step": 31240 }, { "epoch": 0.929879636379867, "grad_norm": 0.1512899547815323, "learning_rate": 8.716156558301858e-06, "loss": 0.0059, "step": 31250 }, { "epoch": 0.9301771978635085, "grad_norm": 0.1811109483242035, "learning_rate": 8.714998121642581e-06, "loss": 0.0046, "step": 31260 }, { "epoch": 0.9304747593471501, "grad_norm": 0.4882468581199646, "learning_rate": 8.713839239635742e-06, "loss": 0.0081, "step": 31270 }, { "epoch": 0.9307723208307916, "grad_norm": 0.14308330416679382, "learning_rate": 8.712679912420263e-06, "loss": 0.0074, "step": 31280 }, { "epoch": 0.9310698823144332, "grad_norm": 0.27788764238357544, "learning_rate": 8.711520140135127e-06, "loss": 0.0102, "step": 31290 }, { "epoch": 0.9313674437980748, "grad_norm": 0.3653249144554138, "learning_rate": 8.710359922919363e-06, "loss": 0.0065, "step": 31300 }, { "epoch": 0.9316650052817164, "grad_norm": 0.3253200650215149, "learning_rate": 8.709199260912054e-06, "loss": 0.0045, "step": 31310 }, { "epoch": 0.9319625667653579, "grad_norm": 0.23188428580760956, "learning_rate": 8.708038154252341e-06, "loss": 0.0054, "step": 31320 }, { "epoch": 0.9322601282489994, "grad_norm": 0.3828508257865906, "learning_rate": 8.706876603079412e-06, "loss": 0.0078, "step": 31330 }, { "epoch": 0.932557689732641, "grad_norm": 0.5329213738441467, "learning_rate": 8.705714607532517e-06, "loss": 0.0064, "step": 31340 }, { "epoch": 0.9328552512162825, "grad_norm": 0.17800693213939667, "learning_rate": 8.70455216775095e-06, "loss": 0.0064, "step": 31350 }, { "epoch": 0.9331528126999241, "grad_norm": 0.7087182998657227, "learning_rate": 8.703389283874065e-06, "loss": 0.0158, "step": 31360 }, { "epoch": 0.9334503741835657, "grad_norm": 0.13367004692554474, "learning_rate": 8.702225956041264e-06, "loss": 0.0078, "step": 31370 }, { "epoch": 0.9337479356672073, "grad_norm": 0.2437422126531601, "learning_rate": 8.701062184392005e-06, "loss": 0.0055, "step": 31380 }, { "epoch": 0.9340454971508488, "grad_norm": 0.19674895703792572, "learning_rate": 8.699897969065798e-06, "loss": 0.0077, "step": 31390 }, { "epoch": 0.9343430586344903, "grad_norm": 0.1996590793132782, "learning_rate": 8.69873331020221e-06, "loss": 0.0054, "step": 31400 }, { "epoch": 0.9346406201181319, "grad_norm": 0.19985739886760712, "learning_rate": 8.697568207940858e-06, "loss": 0.0053, "step": 31410 }, { "epoch": 0.9349381816017734, "grad_norm": 0.17664404213428497, "learning_rate": 8.69640266242141e-06, "loss": 0.0059, "step": 31420 }, { "epoch": 0.935235743085415, "grad_norm": 0.2053299993276596, "learning_rate": 8.695236673783594e-06, "loss": 0.0066, "step": 31430 }, { "epoch": 0.9355333045690566, "grad_norm": 0.7533550262451172, "learning_rate": 8.69407024216718e-06, "loss": 0.0046, "step": 31440 }, { "epoch": 0.9358308660526982, "grad_norm": 0.18998464941978455, "learning_rate": 8.692903367712004e-06, "loss": 0.0052, "step": 31450 }, { "epoch": 0.9361284275363397, "grad_norm": 0.3079676628112793, "learning_rate": 8.691736050557947e-06, "loss": 0.0087, "step": 31460 }, { "epoch": 0.9364259890199813, "grad_norm": 0.23812110722064972, "learning_rate": 8.690568290844941e-06, "loss": 0.0078, "step": 31470 }, { "epoch": 0.9367235505036228, "grad_norm": 0.2916300594806671, "learning_rate": 8.689400088712981e-06, "loss": 0.0066, "step": 31480 }, { "epoch": 0.9370211119872643, "grad_norm": 0.09261263161897659, "learning_rate": 8.688231444302107e-06, "loss": 0.0083, "step": 31490 }, { "epoch": 0.9373186734709059, "grad_norm": 0.26914113759994507, "learning_rate": 8.687062357752412e-06, "loss": 0.0058, "step": 31500 }, { "epoch": 0.9376162349545475, "grad_norm": 0.33951300382614136, "learning_rate": 8.685892829204046e-06, "loss": 0.0073, "step": 31510 }, { "epoch": 0.9379137964381891, "grad_norm": 0.057058073580265045, "learning_rate": 8.68472285879721e-06, "loss": 0.0058, "step": 31520 }, { "epoch": 0.9382113579218306, "grad_norm": 0.31921690702438354, "learning_rate": 8.683552446672157e-06, "loss": 0.0091, "step": 31530 }, { "epoch": 0.9385089194054722, "grad_norm": 0.18024443089962006, "learning_rate": 8.682381592969196e-06, "loss": 0.0037, "step": 31540 }, { "epoch": 0.9388064808891137, "grad_norm": 0.05861928313970566, "learning_rate": 8.681210297828683e-06, "loss": 0.0074, "step": 31550 }, { "epoch": 0.9391040423727552, "grad_norm": 0.3277406692504883, "learning_rate": 8.680038561391037e-06, "loss": 0.0091, "step": 31560 }, { "epoch": 0.9394016038563968, "grad_norm": 0.0969482883810997, "learning_rate": 8.678866383796718e-06, "loss": 0.0084, "step": 31570 }, { "epoch": 0.9396991653400384, "grad_norm": 0.2473544478416443, "learning_rate": 8.677693765186248e-06, "loss": 0.0066, "step": 31580 }, { "epoch": 0.93999672682368, "grad_norm": 0.5428946614265442, "learning_rate": 8.676520705700198e-06, "loss": 0.0088, "step": 31590 }, { "epoch": 0.9402942883073215, "grad_norm": 0.2773357629776001, "learning_rate": 8.67534720547919e-06, "loss": 0.0072, "step": 31600 }, { "epoch": 0.9405918497909631, "grad_norm": 0.250612735748291, "learning_rate": 8.674173264663903e-06, "loss": 0.005, "step": 31610 }, { "epoch": 0.9408894112746046, "grad_norm": 0.192270427942276, "learning_rate": 8.672998883395068e-06, "loss": 0.0057, "step": 31620 }, { "epoch": 0.9411869727582461, "grad_norm": 0.2508808970451355, "learning_rate": 8.671824061813466e-06, "loss": 0.0041, "step": 31630 }, { "epoch": 0.9414845342418877, "grad_norm": 0.39888831973075867, "learning_rate": 8.670648800059931e-06, "loss": 0.0083, "step": 31640 }, { "epoch": 0.9417820957255293, "grad_norm": 0.25278300046920776, "learning_rate": 8.669473098275356e-06, "loss": 0.0063, "step": 31650 }, { "epoch": 0.9420796572091709, "grad_norm": 0.3798010051250458, "learning_rate": 8.668296956600681e-06, "loss": 0.0085, "step": 31660 }, { "epoch": 0.9423772186928124, "grad_norm": 0.48068705201148987, "learning_rate": 8.667120375176896e-06, "loss": 0.0063, "step": 31670 }, { "epoch": 0.942674780176454, "grad_norm": 0.1647948920726776, "learning_rate": 8.665943354145052e-06, "loss": 0.0061, "step": 31680 }, { "epoch": 0.9429723416600955, "grad_norm": 0.28191325068473816, "learning_rate": 8.664765893646246e-06, "loss": 0.0091, "step": 31690 }, { "epoch": 0.943269903143737, "grad_norm": 0.20541998744010925, "learning_rate": 8.663587993821628e-06, "loss": 0.007, "step": 31700 }, { "epoch": 0.9435674646273786, "grad_norm": 0.22964997589588165, "learning_rate": 8.662409654812408e-06, "loss": 0.0036, "step": 31710 }, { "epoch": 0.9438650261110202, "grad_norm": 0.16983141005039215, "learning_rate": 8.661230876759838e-06, "loss": 0.0068, "step": 31720 }, { "epoch": 0.9441625875946618, "grad_norm": 0.7295041084289551, "learning_rate": 8.660051659805231e-06, "loss": 0.0079, "step": 31730 }, { "epoch": 0.9444601490783033, "grad_norm": 0.19452053308486938, "learning_rate": 8.658872004089947e-06, "loss": 0.0075, "step": 31740 }, { "epoch": 0.9447577105619449, "grad_norm": 0.17408521473407745, "learning_rate": 8.657691909755404e-06, "loss": 0.006, "step": 31750 }, { "epoch": 0.9450552720455864, "grad_norm": 0.592324435710907, "learning_rate": 8.656511376943068e-06, "loss": 0.0085, "step": 31760 }, { "epoch": 0.9453528335292279, "grad_norm": 0.2682846784591675, "learning_rate": 8.65533040579446e-06, "loss": 0.0074, "step": 31770 }, { "epoch": 0.9456503950128695, "grad_norm": 0.22872520983219147, "learning_rate": 8.65414899645115e-06, "loss": 0.0088, "step": 31780 }, { "epoch": 0.9459479564965111, "grad_norm": 0.16838447749614716, "learning_rate": 8.652967149054768e-06, "loss": 0.0062, "step": 31790 }, { "epoch": 0.9462455179801527, "grad_norm": 0.3668053150177002, "learning_rate": 8.651784863746988e-06, "loss": 0.009, "step": 31800 }, { "epoch": 0.9465430794637942, "grad_norm": 0.35396069288253784, "learning_rate": 8.650602140669543e-06, "loss": 0.007, "step": 31810 }, { "epoch": 0.9468406409474358, "grad_norm": 0.2715469300746918, "learning_rate": 8.649418979964214e-06, "loss": 0.0063, "step": 31820 }, { "epoch": 0.9471382024310773, "grad_norm": 0.11585015803575516, "learning_rate": 8.648235381772835e-06, "loss": 0.0047, "step": 31830 }, { "epoch": 0.9474357639147188, "grad_norm": 0.3142949342727661, "learning_rate": 8.647051346237299e-06, "loss": 0.0053, "step": 31840 }, { "epoch": 0.9477333253983604, "grad_norm": 0.34262901544570923, "learning_rate": 8.64586687349954e-06, "loss": 0.0054, "step": 31850 }, { "epoch": 0.948030886882002, "grad_norm": 0.2346678227186203, "learning_rate": 8.644681963701556e-06, "loss": 0.0045, "step": 31860 }, { "epoch": 0.9483284483656436, "grad_norm": 0.11970458179712296, "learning_rate": 8.643496616985386e-06, "loss": 0.0107, "step": 31870 }, { "epoch": 0.9486260098492851, "grad_norm": 0.06369517743587494, "learning_rate": 8.64231083349313e-06, "loss": 0.0059, "step": 31880 }, { "epoch": 0.9489235713329267, "grad_norm": 0.09591595828533173, "learning_rate": 8.64112461336694e-06, "loss": 0.0056, "step": 31890 }, { "epoch": 0.9492211328165682, "grad_norm": 0.2613758146762848, "learning_rate": 8.639937956749016e-06, "loss": 0.0066, "step": 31900 }, { "epoch": 0.9495186943002097, "grad_norm": 0.3436281681060791, "learning_rate": 8.638750863781614e-06, "loss": 0.0098, "step": 31910 }, { "epoch": 0.9498162557838513, "grad_norm": 0.20736317336559296, "learning_rate": 8.637563334607036e-06, "loss": 0.0089, "step": 31920 }, { "epoch": 0.9501138172674929, "grad_norm": 0.2551878094673157, "learning_rate": 8.636375369367647e-06, "loss": 0.0061, "step": 31930 }, { "epoch": 0.9504113787511345, "grad_norm": 0.19914890825748444, "learning_rate": 8.635186968205854e-06, "loss": 0.0068, "step": 31940 }, { "epoch": 0.950708940234776, "grad_norm": 0.1860296130180359, "learning_rate": 8.63399813126412e-06, "loss": 0.0045, "step": 31950 }, { "epoch": 0.9510065017184176, "grad_norm": 0.22863496840000153, "learning_rate": 8.632808858684965e-06, "loss": 0.0055, "step": 31960 }, { "epoch": 0.9513040632020592, "grad_norm": 0.12173270434141159, "learning_rate": 8.631619150610953e-06, "loss": 0.0059, "step": 31970 }, { "epoch": 0.9516016246857006, "grad_norm": 0.19609349966049194, "learning_rate": 8.630429007184707e-06, "loss": 0.0066, "step": 31980 }, { "epoch": 0.9518991861693422, "grad_norm": 0.2818564176559448, "learning_rate": 8.629238428548896e-06, "loss": 0.0063, "step": 31990 }, { "epoch": 0.9521967476529838, "grad_norm": 0.3396468758583069, "learning_rate": 8.628047414846247e-06, "loss": 0.0076, "step": 32000 }, { "epoch": 0.9524943091366254, "grad_norm": 0.17251387238502502, "learning_rate": 8.626855966219538e-06, "loss": 0.0047, "step": 32010 }, { "epoch": 0.9527918706202669, "grad_norm": 0.4080025851726532, "learning_rate": 8.625664082811595e-06, "loss": 0.007, "step": 32020 }, { "epoch": 0.9530894321039085, "grad_norm": 0.1476832777261734, "learning_rate": 8.624471764765298e-06, "loss": 0.0054, "step": 32030 }, { "epoch": 0.9533869935875501, "grad_norm": 0.16585218906402588, "learning_rate": 8.623279012223585e-06, "loss": 0.005, "step": 32040 }, { "epoch": 0.9536845550711915, "grad_norm": 0.40213635563850403, "learning_rate": 8.622085825329436e-06, "loss": 0.006, "step": 32050 }, { "epoch": 0.9539821165548331, "grad_norm": 0.2050161063671112, "learning_rate": 8.62089220422589e-06, "loss": 0.0049, "step": 32060 }, { "epoch": 0.9542796780384747, "grad_norm": 0.40716391801834106, "learning_rate": 8.619698149056038e-06, "loss": 0.0083, "step": 32070 }, { "epoch": 0.9545772395221163, "grad_norm": 0.21336936950683594, "learning_rate": 8.618503659963018e-06, "loss": 0.0062, "step": 32080 }, { "epoch": 0.9548748010057578, "grad_norm": 0.11381100118160248, "learning_rate": 8.617308737090027e-06, "loss": 0.0083, "step": 32090 }, { "epoch": 0.9551723624893994, "grad_norm": 0.12182091921567917, "learning_rate": 8.616113380580305e-06, "loss": 0.0064, "step": 32100 }, { "epoch": 0.955469923973041, "grad_norm": 0.09381615370512009, "learning_rate": 8.614917590577155e-06, "loss": 0.0065, "step": 32110 }, { "epoch": 0.9557674854566824, "grad_norm": 0.30788886547088623, "learning_rate": 8.613721367223924e-06, "loss": 0.0071, "step": 32120 }, { "epoch": 0.956065046940324, "grad_norm": 0.26880699396133423, "learning_rate": 8.612524710664012e-06, "loss": 0.0053, "step": 32130 }, { "epoch": 0.9563626084239656, "grad_norm": 0.3838159739971161, "learning_rate": 8.611327621040873e-06, "loss": 0.0087, "step": 32140 }, { "epoch": 0.9566601699076072, "grad_norm": 0.14360743761062622, "learning_rate": 8.610130098498014e-06, "loss": 0.0062, "step": 32150 }, { "epoch": 0.9569577313912487, "grad_norm": 0.17338372766971588, "learning_rate": 8.608932143178987e-06, "loss": 0.0067, "step": 32160 }, { "epoch": 0.9572552928748903, "grad_norm": 0.25478917360305786, "learning_rate": 8.607733755227406e-06, "loss": 0.0052, "step": 32170 }, { "epoch": 0.9575528543585319, "grad_norm": 0.38297927379608154, "learning_rate": 8.60653493478693e-06, "loss": 0.0052, "step": 32180 }, { "epoch": 0.9578504158421733, "grad_norm": 0.15847206115722656, "learning_rate": 8.60533568200127e-06, "loss": 0.0052, "step": 32190 }, { "epoch": 0.9581479773258149, "grad_norm": 0.15528729557991028, "learning_rate": 8.604135997014192e-06, "loss": 0.0101, "step": 32200 }, { "epoch": 0.9584455388094565, "grad_norm": 0.2118617296218872, "learning_rate": 8.602935879969511e-06, "loss": 0.0055, "step": 32210 }, { "epoch": 0.9587431002930981, "grad_norm": 0.44637739658355713, "learning_rate": 8.601735331011096e-06, "loss": 0.0052, "step": 32220 }, { "epoch": 0.9590406617767396, "grad_norm": 0.21968892216682434, "learning_rate": 8.600534350282865e-06, "loss": 0.0061, "step": 32230 }, { "epoch": 0.9593382232603812, "grad_norm": 0.36245429515838623, "learning_rate": 8.599332937928793e-06, "loss": 0.0046, "step": 32240 }, { "epoch": 0.9596357847440228, "grad_norm": 0.21212175488471985, "learning_rate": 8.5981310940929e-06, "loss": 0.0071, "step": 32250 }, { "epoch": 0.9599333462276642, "grad_norm": 0.29311415553092957, "learning_rate": 8.596928818919262e-06, "loss": 0.0075, "step": 32260 }, { "epoch": 0.9602309077113058, "grad_norm": 0.20767457783222198, "learning_rate": 8.595726112552006e-06, "loss": 0.0055, "step": 32270 }, { "epoch": 0.9605284691949474, "grad_norm": 0.5494725704193115, "learning_rate": 8.59452297513531e-06, "loss": 0.0087, "step": 32280 }, { "epoch": 0.960826030678589, "grad_norm": 0.17203395068645477, "learning_rate": 8.593319406813402e-06, "loss": 0.0059, "step": 32290 }, { "epoch": 0.9611235921622305, "grad_norm": 0.15351292490959167, "learning_rate": 8.592115407730571e-06, "loss": 0.0067, "step": 32300 }, { "epoch": 0.9614211536458721, "grad_norm": 0.26875773072242737, "learning_rate": 8.590910978031141e-06, "loss": 0.0056, "step": 32310 }, { "epoch": 0.9617187151295137, "grad_norm": 0.1337185502052307, "learning_rate": 8.589706117859501e-06, "loss": 0.005, "step": 32320 }, { "epoch": 0.9620162766131551, "grad_norm": 0.22867664694786072, "learning_rate": 8.58850082736009e-06, "loss": 0.005, "step": 32330 }, { "epoch": 0.9623138380967967, "grad_norm": 0.44352656602859497, "learning_rate": 8.58729510667739e-06, "loss": 0.0077, "step": 32340 }, { "epoch": 0.9626113995804383, "grad_norm": 0.13575764000415802, "learning_rate": 8.58608895595595e-06, "loss": 0.0058, "step": 32350 }, { "epoch": 0.9629089610640799, "grad_norm": 0.24635207653045654, "learning_rate": 8.58488237534035e-06, "loss": 0.004, "step": 32360 }, { "epoch": 0.9632065225477214, "grad_norm": 0.2041739672422409, "learning_rate": 8.583675364975242e-06, "loss": 0.0059, "step": 32370 }, { "epoch": 0.963504084031363, "grad_norm": 0.2963026165962219, "learning_rate": 8.582467925005316e-06, "loss": 0.0076, "step": 32380 }, { "epoch": 0.9638016455150046, "grad_norm": 0.2542828321456909, "learning_rate": 8.581260055575318e-06, "loss": 0.0049, "step": 32390 }, { "epoch": 0.964099206998646, "grad_norm": 0.17244523763656616, "learning_rate": 8.580051756830048e-06, "loss": 0.0064, "step": 32400 }, { "epoch": 0.9643967684822876, "grad_norm": 0.5898179411888123, "learning_rate": 8.578843028914351e-06, "loss": 0.0102, "step": 32410 }, { "epoch": 0.9646943299659292, "grad_norm": 0.23324517905712128, "learning_rate": 8.57763387197313e-06, "loss": 0.008, "step": 32420 }, { "epoch": 0.9649918914495708, "grad_norm": 0.22617332637310028, "learning_rate": 8.576424286151335e-06, "loss": 0.0055, "step": 32430 }, { "epoch": 0.9652894529332123, "grad_norm": 0.4206101596355438, "learning_rate": 8.57521427159397e-06, "loss": 0.0055, "step": 32440 }, { "epoch": 0.9655870144168539, "grad_norm": 0.18297210335731506, "learning_rate": 8.574003828446088e-06, "loss": 0.0061, "step": 32450 }, { "epoch": 0.9658845759004955, "grad_norm": 0.29117077589035034, "learning_rate": 8.572792956852798e-06, "loss": 0.0104, "step": 32460 }, { "epoch": 0.966182137384137, "grad_norm": 0.13671395182609558, "learning_rate": 8.571581656959254e-06, "loss": 0.0054, "step": 32470 }, { "epoch": 0.9664796988677785, "grad_norm": 0.29470089077949524, "learning_rate": 8.570369928910664e-06, "loss": 0.0051, "step": 32480 }, { "epoch": 0.9667772603514201, "grad_norm": 0.3216587007045746, "learning_rate": 8.569157772852292e-06, "loss": 0.0056, "step": 32490 }, { "epoch": 0.9670748218350617, "grad_norm": 0.21015846729278564, "learning_rate": 8.567945188929448e-06, "loss": 0.0043, "step": 32500 }, { "epoch": 0.9673723833187032, "grad_norm": 0.17325034737586975, "learning_rate": 8.56673217728749e-06, "loss": 0.007, "step": 32510 }, { "epoch": 0.9676699448023448, "grad_norm": 0.16316112875938416, "learning_rate": 8.565518738071836e-06, "loss": 0.0052, "step": 32520 }, { "epoch": 0.9679675062859864, "grad_norm": 0.34236401319503784, "learning_rate": 8.564304871427951e-06, "loss": 0.0079, "step": 32530 }, { "epoch": 0.968265067769628, "grad_norm": 0.18375280499458313, "learning_rate": 8.56309057750135e-06, "loss": 0.0073, "step": 32540 }, { "epoch": 0.9685626292532694, "grad_norm": 0.11024104058742523, "learning_rate": 8.5618758564376e-06, "loss": 0.0046, "step": 32550 }, { "epoch": 0.968860190736911, "grad_norm": 0.49373602867126465, "learning_rate": 8.56066070838232e-06, "loss": 0.0107, "step": 32560 }, { "epoch": 0.9691577522205526, "grad_norm": 0.2166183590888977, "learning_rate": 8.55944513348118e-06, "loss": 0.0054, "step": 32570 }, { "epoch": 0.9694553137041941, "grad_norm": 0.12070154398679733, "learning_rate": 8.558229131879902e-06, "loss": 0.0083, "step": 32580 }, { "epoch": 0.9697528751878357, "grad_norm": 0.2003699392080307, "learning_rate": 8.557012703724257e-06, "loss": 0.0032, "step": 32590 }, { "epoch": 0.9700504366714773, "grad_norm": 0.33960145711898804, "learning_rate": 8.555795849160069e-06, "loss": 0.0057, "step": 32600 }, { "epoch": 0.9703479981551189, "grad_norm": 0.28268924355506897, "learning_rate": 8.554578568333213e-06, "loss": 0.005, "step": 32610 }, { "epoch": 0.9706455596387603, "grad_norm": 0.052511632442474365, "learning_rate": 8.553360861389613e-06, "loss": 0.0029, "step": 32620 }, { "epoch": 0.9709431211224019, "grad_norm": 0.1856713742017746, "learning_rate": 8.552142728475245e-06, "loss": 0.0076, "step": 32630 }, { "epoch": 0.9712406826060435, "grad_norm": 0.3454279601573944, "learning_rate": 8.550924169736138e-06, "loss": 0.0073, "step": 32640 }, { "epoch": 0.971538244089685, "grad_norm": 0.18809911608695984, "learning_rate": 8.549705185318374e-06, "loss": 0.0075, "step": 32650 }, { "epoch": 0.9718358055733266, "grad_norm": 0.1675788313150406, "learning_rate": 8.548485775368077e-06, "loss": 0.0171, "step": 32660 }, { "epoch": 0.9721333670569682, "grad_norm": 0.23099809885025024, "learning_rate": 8.547265940031428e-06, "loss": 0.0052, "step": 32670 }, { "epoch": 0.9724309285406098, "grad_norm": 0.19349491596221924, "learning_rate": 8.546045679454664e-06, "loss": 0.0082, "step": 32680 }, { "epoch": 0.9727284900242512, "grad_norm": 0.2761494517326355, "learning_rate": 8.544824993784064e-06, "loss": 0.006, "step": 32690 }, { "epoch": 0.9730260515078928, "grad_norm": 0.4844661355018616, "learning_rate": 8.54360388316596e-06, "loss": 0.0058, "step": 32700 }, { "epoch": 0.9733236129915344, "grad_norm": 0.19458279013633728, "learning_rate": 8.542382347746743e-06, "loss": 0.0062, "step": 32710 }, { "epoch": 0.9736211744751759, "grad_norm": 0.09141598641872406, "learning_rate": 8.54116038767284e-06, "loss": 0.0057, "step": 32720 }, { "epoch": 0.9739187359588175, "grad_norm": 0.33667245507240295, "learning_rate": 8.539938003090747e-06, "loss": 0.009, "step": 32730 }, { "epoch": 0.9742162974424591, "grad_norm": 0.06265678256750107, "learning_rate": 8.538715194146991e-06, "loss": 0.0076, "step": 32740 }, { "epoch": 0.9745138589261007, "grad_norm": 0.16449101269245148, "learning_rate": 8.537491960988169e-06, "loss": 0.0054, "step": 32750 }, { "epoch": 0.9748114204097421, "grad_norm": 0.28690433502197266, "learning_rate": 8.536268303760915e-06, "loss": 0.0045, "step": 32760 }, { "epoch": 0.9751089818933837, "grad_norm": 0.17409677803516388, "learning_rate": 8.535044222611923e-06, "loss": 0.0069, "step": 32770 }, { "epoch": 0.9754065433770253, "grad_norm": 0.30649492144584656, "learning_rate": 8.53381971768793e-06, "loss": 0.0095, "step": 32780 }, { "epoch": 0.9757041048606668, "grad_norm": 0.23914189636707306, "learning_rate": 8.532594789135727e-06, "loss": 0.0045, "step": 32790 }, { "epoch": 0.9760016663443084, "grad_norm": 0.21418872475624084, "learning_rate": 8.53136943710216e-06, "loss": 0.0075, "step": 32800 }, { "epoch": 0.97629922782795, "grad_norm": 0.15231697261333466, "learning_rate": 8.53014366173412e-06, "loss": 0.0081, "step": 32810 }, { "epoch": 0.9765967893115916, "grad_norm": 0.3571346700191498, "learning_rate": 8.528917463178551e-06, "loss": 0.0067, "step": 32820 }, { "epoch": 0.976894350795233, "grad_norm": 0.20626334846019745, "learning_rate": 8.527690841582448e-06, "loss": 0.0046, "step": 32830 }, { "epoch": 0.9771919122788746, "grad_norm": 0.18845871090888977, "learning_rate": 8.526463797092855e-06, "loss": 0.003, "step": 32840 }, { "epoch": 0.9774894737625162, "grad_norm": 0.17471756041049957, "learning_rate": 8.525236329856869e-06, "loss": 0.0057, "step": 32850 }, { "epoch": 0.9777870352461577, "grad_norm": 0.020081518217921257, "learning_rate": 8.524008440021636e-06, "loss": 0.0088, "step": 32860 }, { "epoch": 0.9780845967297993, "grad_norm": 0.29500168561935425, "learning_rate": 8.522780127734355e-06, "loss": 0.006, "step": 32870 }, { "epoch": 0.9783821582134409, "grad_norm": 0.692511260509491, "learning_rate": 8.521551393142273e-06, "loss": 0.0049, "step": 32880 }, { "epoch": 0.9786797196970825, "grad_norm": 0.29124370217323303, "learning_rate": 8.520322236392688e-06, "loss": 0.0043, "step": 32890 }, { "epoch": 0.9789772811807239, "grad_norm": 0.18834379315376282, "learning_rate": 8.519092657632949e-06, "loss": 0.0077, "step": 32900 }, { "epoch": 0.9792748426643655, "grad_norm": 0.43657341599464417, "learning_rate": 8.517862657010456e-06, "loss": 0.0097, "step": 32910 }, { "epoch": 0.9795724041480071, "grad_norm": 0.18376503884792328, "learning_rate": 8.516632234672662e-06, "loss": 0.0104, "step": 32920 }, { "epoch": 0.9798699656316486, "grad_norm": 0.19704918563365936, "learning_rate": 8.515401390767064e-06, "loss": 0.0053, "step": 32930 }, { "epoch": 0.9801675271152902, "grad_norm": 0.19118092954158783, "learning_rate": 8.514170125441214e-06, "loss": 0.006, "step": 32940 }, { "epoch": 0.9804650885989318, "grad_norm": 0.180504709482193, "learning_rate": 8.512938438842717e-06, "loss": 0.0068, "step": 32950 }, { "epoch": 0.9807626500825734, "grad_norm": 0.12227251380681992, "learning_rate": 8.511706331119223e-06, "loss": 0.0093, "step": 32960 }, { "epoch": 0.9810602115662149, "grad_norm": 0.329673171043396, "learning_rate": 8.510473802418435e-06, "loss": 0.0057, "step": 32970 }, { "epoch": 0.9813577730498564, "grad_norm": 0.29856279492378235, "learning_rate": 8.509240852888106e-06, "loss": 0.0044, "step": 32980 }, { "epoch": 0.981655334533498, "grad_norm": 0.2639288008213043, "learning_rate": 8.508007482676041e-06, "loss": 0.0053, "step": 32990 }, { "epoch": 0.9819528960171395, "grad_norm": 0.3681119382381439, "learning_rate": 8.506773691930096e-06, "loss": 0.0055, "step": 33000 }, { "epoch": 0.9822504575007811, "grad_norm": 0.627420961856842, "learning_rate": 8.505539480798173e-06, "loss": 0.0052, "step": 33010 }, { "epoch": 0.9825480189844227, "grad_norm": 0.19630788266658783, "learning_rate": 8.504304849428226e-06, "loss": 0.0064, "step": 33020 }, { "epoch": 0.9828455804680643, "grad_norm": 0.11886635422706604, "learning_rate": 8.503069797968264e-06, "loss": 0.0057, "step": 33030 }, { "epoch": 0.9831431419517058, "grad_norm": 0.38830140233039856, "learning_rate": 8.501834326566341e-06, "loss": 0.0067, "step": 33040 }, { "epoch": 0.9834407034353473, "grad_norm": 0.2674577534198761, "learning_rate": 8.500598435370563e-06, "loss": 0.01, "step": 33050 }, { "epoch": 0.9837382649189889, "grad_norm": 0.210089772939682, "learning_rate": 8.499362124529086e-06, "loss": 0.0119, "step": 33060 }, { "epoch": 0.9840358264026304, "grad_norm": 0.2053828239440918, "learning_rate": 8.498125394190119e-06, "loss": 0.0084, "step": 33070 }, { "epoch": 0.984333387886272, "grad_norm": 0.14498795568943024, "learning_rate": 8.496888244501917e-06, "loss": 0.0066, "step": 33080 }, { "epoch": 0.9846309493699136, "grad_norm": 0.11699993163347244, "learning_rate": 8.495650675612788e-06, "loss": 0.0095, "step": 33090 }, { "epoch": 0.9849285108535552, "grad_norm": 0.4401053786277771, "learning_rate": 8.49441268767109e-06, "loss": 0.0077, "step": 33100 }, { "epoch": 0.9852260723371967, "grad_norm": 0.186671182513237, "learning_rate": 8.49317428082523e-06, "loss": 0.0085, "step": 33110 }, { "epoch": 0.9855236338208382, "grad_norm": 0.13442657887935638, "learning_rate": 8.491935455223668e-06, "loss": 0.0057, "step": 33120 }, { "epoch": 0.9858211953044798, "grad_norm": 0.3159641921520233, "learning_rate": 8.490696211014908e-06, "loss": 0.0047, "step": 33130 }, { "epoch": 0.9861187567881213, "grad_norm": 0.21555715799331665, "learning_rate": 8.489456548347514e-06, "loss": 0.0085, "step": 33140 }, { "epoch": 0.9864163182717629, "grad_norm": 0.1397123634815216, "learning_rate": 8.488216467370092e-06, "loss": 0.0106, "step": 33150 }, { "epoch": 0.9867138797554045, "grad_norm": 0.17955940961837769, "learning_rate": 8.486975968231299e-06, "loss": 0.0075, "step": 33160 }, { "epoch": 0.987011441239046, "grad_norm": 0.2594596743583679, "learning_rate": 8.485735051079846e-06, "loss": 0.0082, "step": 33170 }, { "epoch": 0.9873090027226876, "grad_norm": 0.29369768500328064, "learning_rate": 8.484493716064492e-06, "loss": 0.0061, "step": 33180 }, { "epoch": 0.9876065642063291, "grad_norm": 0.2651720643043518, "learning_rate": 8.483251963334047e-06, "loss": 0.0047, "step": 33190 }, { "epoch": 0.9879041256899707, "grad_norm": 0.2903236150741577, "learning_rate": 8.482009793037369e-06, "loss": 0.0053, "step": 33200 }, { "epoch": 0.9882016871736122, "grad_norm": 0.5626542568206787, "learning_rate": 8.480767205323366e-06, "loss": 0.0069, "step": 33210 }, { "epoch": 0.9884992486572538, "grad_norm": 0.5609534978866577, "learning_rate": 8.479524200340998e-06, "loss": 0.0053, "step": 33220 }, { "epoch": 0.9887968101408954, "grad_norm": 0.4298420250415802, "learning_rate": 8.478280778239276e-06, "loss": 0.0066, "step": 33230 }, { "epoch": 0.989094371624537, "grad_norm": 0.27082738280296326, "learning_rate": 8.477036939167257e-06, "loss": 0.0071, "step": 33240 }, { "epoch": 0.9893919331081785, "grad_norm": 0.14414258301258087, "learning_rate": 8.47579268327405e-06, "loss": 0.0051, "step": 33250 }, { "epoch": 0.98968949459182, "grad_norm": 0.3582945466041565, "learning_rate": 8.474548010708819e-06, "loss": 0.0043, "step": 33260 }, { "epoch": 0.9899870560754616, "grad_norm": 0.08925255388021469, "learning_rate": 8.473302921620767e-06, "loss": 0.005, "step": 33270 }, { "epoch": 0.9902846175591031, "grad_norm": 0.10715233534574509, "learning_rate": 8.472057416159156e-06, "loss": 0.008, "step": 33280 }, { "epoch": 0.9905821790427447, "grad_norm": 0.19078585505485535, "learning_rate": 8.470811494473296e-06, "loss": 0.0077, "step": 33290 }, { "epoch": 0.9908797405263863, "grad_norm": 0.306915283203125, "learning_rate": 8.469565156712544e-06, "loss": 0.0058, "step": 33300 }, { "epoch": 0.9911773020100278, "grad_norm": 0.12197300791740417, "learning_rate": 8.468318403026312e-06, "loss": 0.0058, "step": 33310 }, { "epoch": 0.9914748634936694, "grad_norm": 0.13866490125656128, "learning_rate": 8.467071233564054e-06, "loss": 0.006, "step": 33320 }, { "epoch": 0.9917724249773109, "grad_norm": 1.286124587059021, "learning_rate": 8.465823648475283e-06, "loss": 0.0093, "step": 33330 }, { "epoch": 0.9920699864609525, "grad_norm": 0.3107824921607971, "learning_rate": 8.464575647909552e-06, "loss": 0.0071, "step": 33340 }, { "epoch": 0.992367547944594, "grad_norm": 0.14821989834308624, "learning_rate": 8.463327232016474e-06, "loss": 0.005, "step": 33350 }, { "epoch": 0.9926651094282356, "grad_norm": 0.2035806030035019, "learning_rate": 8.462078400945705e-06, "loss": 0.0098, "step": 33360 }, { "epoch": 0.9929626709118772, "grad_norm": 0.2054886668920517, "learning_rate": 8.460829154846955e-06, "loss": 0.004, "step": 33370 }, { "epoch": 0.9932602323955187, "grad_norm": 0.4715837836265564, "learning_rate": 8.459579493869979e-06, "loss": 0.0059, "step": 33380 }, { "epoch": 0.9935577938791603, "grad_norm": 0.2675012946128845, "learning_rate": 8.458329418164582e-06, "loss": 0.0065, "step": 33390 }, { "epoch": 0.9938553553628018, "grad_norm": 0.28235185146331787, "learning_rate": 8.457078927880625e-06, "loss": 0.0061, "step": 33400 }, { "epoch": 0.9941529168464434, "grad_norm": 0.23182542622089386, "learning_rate": 8.455828023168014e-06, "loss": 0.006, "step": 33410 }, { "epoch": 0.9944504783300849, "grad_norm": 0.4598105549812317, "learning_rate": 8.454576704176705e-06, "loss": 0.0078, "step": 33420 }, { "epoch": 0.9947480398137265, "grad_norm": 0.20012633502483368, "learning_rate": 8.453324971056702e-06, "loss": 0.0058, "step": 33430 }, { "epoch": 0.9950456012973681, "grad_norm": 0.34837400913238525, "learning_rate": 8.452072823958062e-06, "loss": 0.0088, "step": 33440 }, { "epoch": 0.9953431627810096, "grad_norm": 0.33997926115989685, "learning_rate": 8.450820263030891e-06, "loss": 0.0064, "step": 33450 }, { "epoch": 0.9956407242646512, "grad_norm": 0.05826393887400627, "learning_rate": 8.449567288425343e-06, "loss": 0.0041, "step": 33460 }, { "epoch": 0.9959382857482928, "grad_norm": 0.2095080018043518, "learning_rate": 8.448313900291623e-06, "loss": 0.0103, "step": 33470 }, { "epoch": 0.9962358472319343, "grad_norm": 0.27564695477485657, "learning_rate": 8.447060098779984e-06, "loss": 0.0055, "step": 33480 }, { "epoch": 0.9965334087155758, "grad_norm": 0.26288652420043945, "learning_rate": 8.445805884040732e-06, "loss": 0.0073, "step": 33490 }, { "epoch": 0.9968309701992174, "grad_norm": 0.09240790456533432, "learning_rate": 8.444551256224215e-06, "loss": 0.0045, "step": 33500 }, { "epoch": 0.997128531682859, "grad_norm": 0.20890288054943085, "learning_rate": 8.443296215480843e-06, "loss": 0.0039, "step": 33510 }, { "epoch": 0.9974260931665005, "grad_norm": 0.24579092860221863, "learning_rate": 8.442040761961062e-06, "loss": 0.006, "step": 33520 }, { "epoch": 0.9977236546501421, "grad_norm": 0.32049381732940674, "learning_rate": 8.440784895815376e-06, "loss": 0.0072, "step": 33530 }, { "epoch": 0.9980212161337837, "grad_norm": 0.3020797073841095, "learning_rate": 8.439528617194334e-06, "loss": 0.0035, "step": 33540 }, { "epoch": 0.9983187776174252, "grad_norm": 0.23337560892105103, "learning_rate": 8.438271926248538e-06, "loss": 0.0066, "step": 33550 }, { "epoch": 0.9986163391010667, "grad_norm": 0.40506142377853394, "learning_rate": 8.43701482312864e-06, "loss": 0.0094, "step": 33560 }, { "epoch": 0.9989139005847083, "grad_norm": 0.19769057631492615, "learning_rate": 8.435757307985337e-06, "loss": 0.0075, "step": 33570 }, { "epoch": 0.9992114620683499, "grad_norm": 0.24702464044094086, "learning_rate": 8.43449938096938e-06, "loss": 0.0059, "step": 33580 }, { "epoch": 0.9995090235519914, "grad_norm": 0.21207283437252045, "learning_rate": 8.433241042231561e-06, "loss": 0.0064, "step": 33590 }, { "epoch": 0.999806585035633, "grad_norm": 0.20754195749759674, "learning_rate": 8.431982291922735e-06, "loss": 0.0065, "step": 33600 }, { "epoch": 1.0001041465192746, "grad_norm": 0.2151709347963333, "learning_rate": 8.430723130193795e-06, "loss": 0.008, "step": 33610 }, { "epoch": 1.0004017080029162, "grad_norm": 0.08510072529315948, "learning_rate": 8.429463557195688e-06, "loss": 0.0036, "step": 33620 }, { "epoch": 1.0006992694865577, "grad_norm": 0.17240634560585022, "learning_rate": 8.42820357307941e-06, "loss": 0.0038, "step": 33630 }, { "epoch": 1.0009968309701993, "grad_norm": 0.27671703696250916, "learning_rate": 8.426943177996003e-06, "loss": 0.0044, "step": 33640 }, { "epoch": 1.0012943924538407, "grad_norm": 0.17512458562850952, "learning_rate": 8.425682372096565e-06, "loss": 0.0051, "step": 33650 }, { "epoch": 1.0015919539374822, "grad_norm": 0.15166346728801727, "learning_rate": 8.424421155532235e-06, "loss": 0.0051, "step": 33660 }, { "epoch": 1.0018895154211238, "grad_norm": 0.17399059236049652, "learning_rate": 8.423159528454209e-06, "loss": 0.0049, "step": 33670 }, { "epoch": 1.0021870769047654, "grad_norm": 0.2997557520866394, "learning_rate": 8.421897491013727e-06, "loss": 0.0054, "step": 33680 }, { "epoch": 1.002484638388407, "grad_norm": 0.2720637023448944, "learning_rate": 8.420635043362079e-06, "loss": 0.0057, "step": 33690 }, { "epoch": 1.0027821998720485, "grad_norm": 0.2781386375427246, "learning_rate": 8.419372185650607e-06, "loss": 0.0061, "step": 33700 }, { "epoch": 1.00307976135569, "grad_norm": 0.09107958525419235, "learning_rate": 8.4181089180307e-06, "loss": 0.0045, "step": 33710 }, { "epoch": 1.0033773228393317, "grad_norm": 0.2642669677734375, "learning_rate": 8.416845240653795e-06, "loss": 0.0043, "step": 33720 }, { "epoch": 1.0036748843229732, "grad_norm": 0.1333107054233551, "learning_rate": 8.415581153671381e-06, "loss": 0.0065, "step": 33730 }, { "epoch": 1.0039724458066148, "grad_norm": 0.41934841871261597, "learning_rate": 8.414316657234994e-06, "loss": 0.0059, "step": 33740 }, { "epoch": 1.0042700072902564, "grad_norm": 0.08344468474388123, "learning_rate": 8.413051751496218e-06, "loss": 0.0039, "step": 33750 }, { "epoch": 1.004567568773898, "grad_norm": 0.24196766316890717, "learning_rate": 8.41178643660669e-06, "loss": 0.004, "step": 33760 }, { "epoch": 1.0048651302575395, "grad_norm": 0.22668899595737457, "learning_rate": 8.410520712718094e-06, "loss": 0.0037, "step": 33770 }, { "epoch": 1.0051626917411811, "grad_norm": 0.32698819041252136, "learning_rate": 8.40925457998216e-06, "loss": 0.0069, "step": 33780 }, { "epoch": 1.0054602532248227, "grad_norm": 0.48044851422309875, "learning_rate": 8.407988038550674e-06, "loss": 0.0069, "step": 33790 }, { "epoch": 1.005757814708464, "grad_norm": 0.13809093832969666, "learning_rate": 8.406721088575463e-06, "loss": 0.0043, "step": 33800 }, { "epoch": 1.0060553761921056, "grad_norm": 0.10947513580322266, "learning_rate": 8.405453730208406e-06, "loss": 0.0062, "step": 33810 }, { "epoch": 1.0063529376757472, "grad_norm": 0.1698346734046936, "learning_rate": 8.404185963601437e-06, "loss": 0.0051, "step": 33820 }, { "epoch": 1.0066504991593888, "grad_norm": 0.07755757123231888, "learning_rate": 8.402917788906528e-06, "loss": 0.0032, "step": 33830 }, { "epoch": 1.0069480606430303, "grad_norm": 0.24400240182876587, "learning_rate": 8.401649206275708e-06, "loss": 0.0047, "step": 33840 }, { "epoch": 1.007245622126672, "grad_norm": 0.13685888051986694, "learning_rate": 8.400380215861053e-06, "loss": 0.0053, "step": 33850 }, { "epoch": 1.0075431836103135, "grad_norm": 0.48287126421928406, "learning_rate": 8.39911081781469e-06, "loss": 0.0054, "step": 33860 }, { "epoch": 1.007840745093955, "grad_norm": 0.30151599645614624, "learning_rate": 8.397841012288786e-06, "loss": 0.0042, "step": 33870 }, { "epoch": 1.0081383065775966, "grad_norm": 0.05425092577934265, "learning_rate": 8.396570799435565e-06, "loss": 0.0059, "step": 33880 }, { "epoch": 1.0084358680612382, "grad_norm": 0.2560768723487854, "learning_rate": 8.395300179407303e-06, "loss": 0.0057, "step": 33890 }, { "epoch": 1.0087334295448798, "grad_norm": 0.16490377485752106, "learning_rate": 8.394029152356315e-06, "loss": 0.0081, "step": 33900 }, { "epoch": 1.0090309910285213, "grad_norm": 0.403177946805954, "learning_rate": 8.39275771843497e-06, "loss": 0.0076, "step": 33910 }, { "epoch": 1.009328552512163, "grad_norm": 0.1443130373954773, "learning_rate": 8.391485877795686e-06, "loss": 0.0095, "step": 33920 }, { "epoch": 1.0096261139958045, "grad_norm": 0.3155766427516937, "learning_rate": 8.390213630590927e-06, "loss": 0.0082, "step": 33930 }, { "epoch": 1.0099236754794458, "grad_norm": 0.16163045167922974, "learning_rate": 8.388940976973212e-06, "loss": 0.007, "step": 33940 }, { "epoch": 1.0102212369630874, "grad_norm": 0.2791478633880615, "learning_rate": 8.387667917095102e-06, "loss": 0.0076, "step": 33950 }, { "epoch": 1.010518798446729, "grad_norm": 0.23041145503520966, "learning_rate": 8.386394451109209e-06, "loss": 0.0043, "step": 33960 }, { "epoch": 1.0108163599303706, "grad_norm": 0.1980172097682953, "learning_rate": 8.385120579168193e-06, "loss": 0.0059, "step": 33970 }, { "epoch": 1.0111139214140121, "grad_norm": 0.27693840861320496, "learning_rate": 8.383846301424767e-06, "loss": 0.0059, "step": 33980 }, { "epoch": 1.0114114828976537, "grad_norm": 0.5322197079658508, "learning_rate": 8.382571618031685e-06, "loss": 0.0077, "step": 33990 }, { "epoch": 1.0117090443812953, "grad_norm": 0.19035843014717102, "learning_rate": 8.381296529141758e-06, "loss": 0.0054, "step": 34000 }, { "epoch": 1.0120066058649368, "grad_norm": 0.061594028025865555, "learning_rate": 8.380021034907837e-06, "loss": 0.0039, "step": 34010 }, { "epoch": 1.0123041673485784, "grad_norm": 0.09884807467460632, "learning_rate": 8.378745135482832e-06, "loss": 0.0036, "step": 34020 }, { "epoch": 1.01260172883222, "grad_norm": 0.27732235193252563, "learning_rate": 8.377468831019688e-06, "loss": 0.0044, "step": 34030 }, { "epoch": 1.0128992903158616, "grad_norm": 0.20027108490467072, "learning_rate": 8.376192121671413e-06, "loss": 0.0046, "step": 34040 }, { "epoch": 1.0131968517995031, "grad_norm": 0.1748238503932953, "learning_rate": 8.374915007591053e-06, "loss": 0.0049, "step": 34050 }, { "epoch": 1.0134944132831447, "grad_norm": 0.13764385879039764, "learning_rate": 8.373637488931707e-06, "loss": 0.0053, "step": 34060 }, { "epoch": 1.0137919747667863, "grad_norm": 0.07607714086771011, "learning_rate": 8.372359565846522e-06, "loss": 0.0069, "step": 34070 }, { "epoch": 1.0140895362504276, "grad_norm": 0.4966714382171631, "learning_rate": 8.371081238488694e-06, "loss": 0.0059, "step": 34080 }, { "epoch": 1.0143870977340692, "grad_norm": 0.1688987761735916, "learning_rate": 8.369802507011467e-06, "loss": 0.0037, "step": 34090 }, { "epoch": 1.0146846592177108, "grad_norm": 0.3402809500694275, "learning_rate": 8.368523371568128e-06, "loss": 0.0069, "step": 34100 }, { "epoch": 1.0149822207013524, "grad_norm": 0.2529754638671875, "learning_rate": 8.367243832312024e-06, "loss": 0.0056, "step": 34110 }, { "epoch": 1.015279782184994, "grad_norm": 0.13482160866260529, "learning_rate": 8.36596388939654e-06, "loss": 0.0062, "step": 34120 }, { "epoch": 1.0155773436686355, "grad_norm": 0.04206480458378792, "learning_rate": 8.364683542975117e-06, "loss": 0.0045, "step": 34130 }, { "epoch": 1.015874905152277, "grad_norm": 0.485995352268219, "learning_rate": 8.363402793201238e-06, "loss": 0.0079, "step": 34140 }, { "epoch": 1.0161724666359186, "grad_norm": 0.27889329195022583, "learning_rate": 8.362121640228437e-06, "loss": 0.0066, "step": 34150 }, { "epoch": 1.0164700281195602, "grad_norm": 0.1396474391222, "learning_rate": 8.360840084210297e-06, "loss": 0.0047, "step": 34160 }, { "epoch": 1.0167675896032018, "grad_norm": 0.20427408814430237, "learning_rate": 8.35955812530045e-06, "loss": 0.0044, "step": 34170 }, { "epoch": 1.0170651510868434, "grad_norm": 0.2513078451156616, "learning_rate": 8.35827576365257e-06, "loss": 0.0029, "step": 34180 }, { "epoch": 1.017362712570485, "grad_norm": 0.40503206849098206, "learning_rate": 8.35699299942039e-06, "loss": 0.0075, "step": 34190 }, { "epoch": 1.0176602740541265, "grad_norm": 0.2173244208097458, "learning_rate": 8.355709832757686e-06, "loss": 0.0052, "step": 34200 }, { "epoch": 1.017957835537768, "grad_norm": 0.19843147695064545, "learning_rate": 8.354426263818276e-06, "loss": 0.0051, "step": 34210 }, { "epoch": 1.0182553970214094, "grad_norm": 0.2177095264196396, "learning_rate": 8.353142292756038e-06, "loss": 0.0044, "step": 34220 }, { "epoch": 1.018552958505051, "grad_norm": 0.3245178759098053, "learning_rate": 8.351857919724889e-06, "loss": 0.0052, "step": 34230 }, { "epoch": 1.0188505199886926, "grad_norm": 0.2603212893009186, "learning_rate": 8.350573144878796e-06, "loss": 0.0043, "step": 34240 }, { "epoch": 1.0191480814723342, "grad_norm": 0.3176768720149994, "learning_rate": 8.34928796837178e-06, "loss": 0.0051, "step": 34250 }, { "epoch": 1.0194456429559757, "grad_norm": 0.060194600373506546, "learning_rate": 8.348002390357901e-06, "loss": 0.0045, "step": 34260 }, { "epoch": 1.0197432044396173, "grad_norm": 0.45355358719825745, "learning_rate": 8.346716410991277e-06, "loss": 0.0041, "step": 34270 }, { "epoch": 1.0200407659232589, "grad_norm": 0.21934399008750916, "learning_rate": 8.345430030426066e-06, "loss": 0.0045, "step": 34280 }, { "epoch": 1.0203383274069004, "grad_norm": 0.2797807455062866, "learning_rate": 8.344143248816477e-06, "loss": 0.0066, "step": 34290 }, { "epoch": 1.020635888890542, "grad_norm": 0.16606368124485016, "learning_rate": 8.342856066316766e-06, "loss": 0.0057, "step": 34300 }, { "epoch": 1.0209334503741836, "grad_norm": 0.06948388367891312, "learning_rate": 8.34156848308124e-06, "loss": 0.0058, "step": 34310 }, { "epoch": 1.0212310118578252, "grad_norm": 0.22232261300086975, "learning_rate": 8.340280499264252e-06, "loss": 0.0054, "step": 34320 }, { "epoch": 1.0215285733414667, "grad_norm": 0.18707293272018433, "learning_rate": 8.338992115020206e-06, "loss": 0.0068, "step": 34330 }, { "epoch": 1.0218261348251083, "grad_norm": 0.3033280372619629, "learning_rate": 8.337703330503547e-06, "loss": 0.0076, "step": 34340 }, { "epoch": 1.0221236963087499, "grad_norm": 0.16062161326408386, "learning_rate": 8.336414145868772e-06, "loss": 0.0091, "step": 34350 }, { "epoch": 1.0224212577923915, "grad_norm": 0.11293874680995941, "learning_rate": 8.33512456127043e-06, "loss": 0.0039, "step": 34360 }, { "epoch": 1.0227188192760328, "grad_norm": 0.12124989181756973, "learning_rate": 8.333834576863111e-06, "loss": 0.0063, "step": 34370 }, { "epoch": 1.0230163807596744, "grad_norm": 0.11245889961719513, "learning_rate": 8.332544192801459e-06, "loss": 0.0061, "step": 34380 }, { "epoch": 1.023313942243316, "grad_norm": 0.28772199153900146, "learning_rate": 8.33125340924016e-06, "loss": 0.0055, "step": 34390 }, { "epoch": 1.0236115037269575, "grad_norm": 0.19529639184474945, "learning_rate": 8.329962226333952e-06, "loss": 0.0051, "step": 34400 }, { "epoch": 1.023909065210599, "grad_norm": 0.19357344508171082, "learning_rate": 8.32867064423762e-06, "loss": 0.0044, "step": 34410 }, { "epoch": 1.0242066266942407, "grad_norm": 0.18798021972179413, "learning_rate": 8.327378663105998e-06, "loss": 0.0051, "step": 34420 }, { "epoch": 1.0245041881778822, "grad_norm": 0.17475180327892303, "learning_rate": 8.326086283093964e-06, "loss": 0.0049, "step": 34430 }, { "epoch": 1.0248017496615238, "grad_norm": 0.34963658452033997, "learning_rate": 8.324793504356447e-06, "loss": 0.0056, "step": 34440 }, { "epoch": 1.0250993111451654, "grad_norm": 0.14211180806159973, "learning_rate": 8.323500327048426e-06, "loss": 0.0036, "step": 34450 }, { "epoch": 1.025396872628807, "grad_norm": 0.14487622678279877, "learning_rate": 8.322206751324919e-06, "loss": 0.0046, "step": 34460 }, { "epoch": 1.0256944341124485, "grad_norm": 0.10553353279829025, "learning_rate": 8.320912777341004e-06, "loss": 0.0051, "step": 34470 }, { "epoch": 1.0259919955960901, "grad_norm": 0.19492864608764648, "learning_rate": 8.319618405251795e-06, "loss": 0.0052, "step": 34480 }, { "epoch": 1.0262895570797317, "grad_norm": 0.4016003906726837, "learning_rate": 8.318323635212461e-06, "loss": 0.0054, "step": 34490 }, { "epoch": 1.0265871185633733, "grad_norm": 0.045963387936353683, "learning_rate": 8.31702846737822e-06, "loss": 0.004, "step": 34500 }, { "epoch": 1.0268846800470146, "grad_norm": 0.20379036664962769, "learning_rate": 8.31573290190433e-06, "loss": 0.0041, "step": 34510 }, { "epoch": 1.0271822415306562, "grad_norm": 0.16622182726860046, "learning_rate": 8.314436938946103e-06, "loss": 0.0062, "step": 34520 }, { "epoch": 1.0274798030142978, "grad_norm": 0.11879565566778183, "learning_rate": 8.313140578658897e-06, "loss": 0.0035, "step": 34530 }, { "epoch": 1.0277773644979393, "grad_norm": 0.1463734656572342, "learning_rate": 8.311843821198115e-06, "loss": 0.008, "step": 34540 }, { "epoch": 1.028074925981581, "grad_norm": 0.27429234981536865, "learning_rate": 8.310546666719215e-06, "loss": 0.0048, "step": 34550 }, { "epoch": 1.0283724874652225, "grad_norm": 0.34043845534324646, "learning_rate": 8.309249115377694e-06, "loss": 0.0057, "step": 34560 }, { "epoch": 1.028670048948864, "grad_norm": 0.07590330392122269, "learning_rate": 8.3079511673291e-06, "loss": 0.0051, "step": 34570 }, { "epoch": 1.0289676104325056, "grad_norm": 0.07998467981815338, "learning_rate": 8.306652822729031e-06, "loss": 0.004, "step": 34580 }, { "epoch": 1.0292651719161472, "grad_norm": 0.2074207067489624, "learning_rate": 8.305354081733129e-06, "loss": 0.0042, "step": 34590 }, { "epoch": 1.0295627333997888, "grad_norm": 0.32817214727401733, "learning_rate": 8.304054944497085e-06, "loss": 0.0041, "step": 34600 }, { "epoch": 1.0298602948834303, "grad_norm": 0.10991603881120682, "learning_rate": 8.302755411176637e-06, "loss": 0.006, "step": 34610 }, { "epoch": 1.030157856367072, "grad_norm": 0.07200415432453156, "learning_rate": 8.301455481927571e-06, "loss": 0.0056, "step": 34620 }, { "epoch": 1.0304554178507135, "grad_norm": 0.06278396397829056, "learning_rate": 8.30015515690572e-06, "loss": 0.0066, "step": 34630 }, { "epoch": 1.030752979334355, "grad_norm": 0.5180708765983582, "learning_rate": 8.298854436266966e-06, "loss": 0.006, "step": 34640 }, { "epoch": 1.0310505408179966, "grad_norm": 0.44347646832466125, "learning_rate": 8.297553320167234e-06, "loss": 0.0059, "step": 34650 }, { "epoch": 1.031348102301638, "grad_norm": 0.21992358565330505, "learning_rate": 8.296251808762503e-06, "loss": 0.0037, "step": 34660 }, { "epoch": 1.0316456637852796, "grad_norm": 0.11609631776809692, "learning_rate": 8.294949902208795e-06, "loss": 0.005, "step": 34670 }, { "epoch": 1.0319432252689211, "grad_norm": 0.24229736626148224, "learning_rate": 8.29364760066218e-06, "loss": 0.0064, "step": 34680 }, { "epoch": 1.0322407867525627, "grad_norm": 0.8596298098564148, "learning_rate": 8.292344904278775e-06, "loss": 0.0065, "step": 34690 }, { "epoch": 1.0325383482362043, "grad_norm": 0.11408000439405441, "learning_rate": 8.291041813214746e-06, "loss": 0.0041, "step": 34700 }, { "epoch": 1.0328359097198458, "grad_norm": 0.16701114177703857, "learning_rate": 8.289738327626305e-06, "loss": 0.0058, "step": 34710 }, { "epoch": 1.0331334712034874, "grad_norm": 0.3032881021499634, "learning_rate": 8.288434447669713e-06, "loss": 0.0052, "step": 34720 }, { "epoch": 1.033431032687129, "grad_norm": 0.37101536989212036, "learning_rate": 8.287130173501273e-06, "loss": 0.0057, "step": 34730 }, { "epoch": 1.0337285941707706, "grad_norm": 0.15355262160301208, "learning_rate": 8.285825505277343e-06, "loss": 0.0073, "step": 34740 }, { "epoch": 1.0340261556544121, "grad_norm": 0.17388322949409485, "learning_rate": 8.284520443154322e-06, "loss": 0.0052, "step": 34750 }, { "epoch": 1.0343237171380537, "grad_norm": 0.20400643348693848, "learning_rate": 8.283214987288661e-06, "loss": 0.0044, "step": 34760 }, { "epoch": 1.0346212786216953, "grad_norm": 0.2923274636268616, "learning_rate": 8.281909137836854e-06, "loss": 0.0071, "step": 34770 }, { "epoch": 1.0349188401053369, "grad_norm": 0.49186447262763977, "learning_rate": 8.280602894955443e-06, "loss": 0.0059, "step": 34780 }, { "epoch": 1.0352164015889784, "grad_norm": 0.2980741262435913, "learning_rate": 8.27929625880102e-06, "loss": 0.0067, "step": 34790 }, { "epoch": 1.0355139630726198, "grad_norm": 0.1860777586698532, "learning_rate": 8.277989229530224e-06, "loss": 0.0077, "step": 34800 }, { "epoch": 1.0358115245562614, "grad_norm": 0.1893516331911087, "learning_rate": 8.276681807299734e-06, "loss": 0.0054, "step": 34810 }, { "epoch": 1.036109086039903, "grad_norm": 0.30650582909584045, "learning_rate": 8.275373992266285e-06, "loss": 0.0042, "step": 34820 }, { "epoch": 1.0364066475235445, "grad_norm": 0.2937079966068268, "learning_rate": 8.274065784586656e-06, "loss": 0.009, "step": 34830 }, { "epoch": 1.036704209007186, "grad_norm": 0.1317378282546997, "learning_rate": 8.272757184417672e-06, "loss": 0.0043, "step": 34840 }, { "epoch": 1.0370017704908276, "grad_norm": 0.18695656955242157, "learning_rate": 8.271448191916205e-06, "loss": 0.0062, "step": 34850 }, { "epoch": 1.0372993319744692, "grad_norm": 0.37827765941619873, "learning_rate": 8.270138807239178e-06, "loss": 0.0065, "step": 34860 }, { "epoch": 1.0375968934581108, "grad_norm": 0.2514275312423706, "learning_rate": 8.268829030543553e-06, "loss": 0.0061, "step": 34870 }, { "epoch": 1.0378944549417524, "grad_norm": 0.23407457768917084, "learning_rate": 8.267518861986346e-06, "loss": 0.0039, "step": 34880 }, { "epoch": 1.038192016425394, "grad_norm": 0.2442561388015747, "learning_rate": 8.266208301724619e-06, "loss": 0.0059, "step": 34890 }, { "epoch": 1.0384895779090355, "grad_norm": 0.17650829255580902, "learning_rate": 8.264897349915478e-06, "loss": 0.0089, "step": 34900 }, { "epoch": 1.038787139392677, "grad_norm": 0.2366502583026886, "learning_rate": 8.263586006716078e-06, "loss": 0.0072, "step": 34910 }, { "epoch": 1.0390847008763187, "grad_norm": 0.11927567422389984, "learning_rate": 8.262274272283621e-06, "loss": 0.0052, "step": 34920 }, { "epoch": 1.0393822623599602, "grad_norm": 0.07356899231672287, "learning_rate": 8.260962146775354e-06, "loss": 0.0053, "step": 34930 }, { "epoch": 1.0396798238436016, "grad_norm": 0.2389124184846878, "learning_rate": 8.259649630348574e-06, "loss": 0.007, "step": 34940 }, { "epoch": 1.0399773853272432, "grad_norm": 0.30398714542388916, "learning_rate": 8.258336723160623e-06, "loss": 0.0088, "step": 34950 }, { "epoch": 1.0402749468108847, "grad_norm": 0.38746708631515503, "learning_rate": 8.25702342536889e-06, "loss": 0.0063, "step": 34960 }, { "epoch": 1.0405725082945263, "grad_norm": 0.2918686270713806, "learning_rate": 8.255709737130808e-06, "loss": 0.0054, "step": 34970 }, { "epoch": 1.0408700697781679, "grad_norm": 0.18947400152683258, "learning_rate": 8.254395658603864e-06, "loss": 0.0056, "step": 34980 }, { "epoch": 1.0411676312618094, "grad_norm": 0.20922231674194336, "learning_rate": 8.253081189945587e-06, "loss": 0.0051, "step": 34990 }, { "epoch": 1.041465192745451, "grad_norm": 0.36863788962364197, "learning_rate": 8.25176633131355e-06, "loss": 0.0045, "step": 35000 }, { "epoch": 1.0417627542290926, "grad_norm": 0.28445929288864136, "learning_rate": 8.25045108286538e-06, "loss": 0.0044, "step": 35010 }, { "epoch": 1.0420603157127342, "grad_norm": 0.23644281923770905, "learning_rate": 8.249135444758743e-06, "loss": 0.0048, "step": 35020 }, { "epoch": 1.0423578771963757, "grad_norm": 0.19216866791248322, "learning_rate": 8.247819417151358e-06, "loss": 0.0035, "step": 35030 }, { "epoch": 1.0426554386800173, "grad_norm": 0.2663336396217346, "learning_rate": 8.246503000200987e-06, "loss": 0.0039, "step": 35040 }, { "epoch": 1.0429530001636589, "grad_norm": 0.1587168574333191, "learning_rate": 8.24518619406544e-06, "loss": 0.0042, "step": 35050 }, { "epoch": 1.0432505616473005, "grad_norm": 0.6865988969802856, "learning_rate": 8.243868998902572e-06, "loss": 0.0056, "step": 35060 }, { "epoch": 1.043548123130942, "grad_norm": 0.2109050452709198, "learning_rate": 8.242551414870289e-06, "loss": 0.004, "step": 35070 }, { "epoch": 1.0438456846145834, "grad_norm": 0.33101755380630493, "learning_rate": 8.24123344212654e-06, "loss": 0.0046, "step": 35080 }, { "epoch": 1.044143246098225, "grad_norm": 0.3382909893989563, "learning_rate": 8.23991508082932e-06, "loss": 0.008, "step": 35090 }, { "epoch": 1.0444408075818665, "grad_norm": 0.11902140080928802, "learning_rate": 8.23859633113667e-06, "loss": 0.0035, "step": 35100 }, { "epoch": 1.044738369065508, "grad_norm": 0.13041387498378754, "learning_rate": 8.237277193206682e-06, "loss": 0.007, "step": 35110 }, { "epoch": 1.0450359305491497, "grad_norm": 0.2307671308517456, "learning_rate": 8.235957667197494e-06, "loss": 0.0031, "step": 35120 }, { "epoch": 1.0453334920327912, "grad_norm": 0.19818343222141266, "learning_rate": 8.234637753267288e-06, "loss": 0.0055, "step": 35130 }, { "epoch": 1.0456310535164328, "grad_norm": 0.1803029328584671, "learning_rate": 8.233317451574287e-06, "loss": 0.0084, "step": 35140 }, { "epoch": 1.0459286150000744, "grad_norm": 0.280544638633728, "learning_rate": 8.231996762276773e-06, "loss": 0.006, "step": 35150 }, { "epoch": 1.046226176483716, "grad_norm": 0.14635038375854492, "learning_rate": 8.230675685533067e-06, "loss": 0.0039, "step": 35160 }, { "epoch": 1.0465237379673575, "grad_norm": 0.23644496500492096, "learning_rate": 8.229354221501533e-06, "loss": 0.0061, "step": 35170 }, { "epoch": 1.046821299450999, "grad_norm": 0.2665797770023346, "learning_rate": 8.228032370340591e-06, "loss": 0.0048, "step": 35180 }, { "epoch": 1.0471188609346407, "grad_norm": 0.2772429287433624, "learning_rate": 8.226710132208702e-06, "loss": 0.0077, "step": 35190 }, { "epoch": 1.0474164224182823, "grad_norm": 0.20859664678573608, "learning_rate": 8.225387507264368e-06, "loss": 0.0053, "step": 35200 }, { "epoch": 1.0477139839019238, "grad_norm": 0.12349186092615128, "learning_rate": 8.224064495666148e-06, "loss": 0.0053, "step": 35210 }, { "epoch": 1.0480115453855654, "grad_norm": 0.34020665287971497, "learning_rate": 8.222741097572643e-06, "loss": 0.0083, "step": 35220 }, { "epoch": 1.0483091068692068, "grad_norm": 0.20579490065574646, "learning_rate": 8.221417313142497e-06, "loss": 0.0029, "step": 35230 }, { "epoch": 1.0486066683528483, "grad_norm": 0.16800165176391602, "learning_rate": 8.220093142534403e-06, "loss": 0.0059, "step": 35240 }, { "epoch": 1.04890422983649, "grad_norm": 0.30370303988456726, "learning_rate": 8.218768585907102e-06, "loss": 0.0046, "step": 35250 }, { "epoch": 1.0492017913201315, "grad_norm": 0.2161824256181717, "learning_rate": 8.217443643419379e-06, "loss": 0.0039, "step": 35260 }, { "epoch": 1.049499352803773, "grad_norm": 0.27986910939216614, "learning_rate": 8.216118315230064e-06, "loss": 0.0044, "step": 35270 }, { "epoch": 1.0497969142874146, "grad_norm": 0.3317975401878357, "learning_rate": 8.214792601498038e-06, "loss": 0.0053, "step": 35280 }, { "epoch": 1.0500944757710562, "grad_norm": 0.0011134631931781769, "learning_rate": 8.213466502382223e-06, "loss": 0.0042, "step": 35290 }, { "epoch": 1.0503920372546978, "grad_norm": 0.16154874861240387, "learning_rate": 8.21214001804159e-06, "loss": 0.0069, "step": 35300 }, { "epoch": 1.0506895987383393, "grad_norm": 0.10546211153268814, "learning_rate": 8.210813148635158e-06, "loss": 0.0043, "step": 35310 }, { "epoch": 1.050987160221981, "grad_norm": 0.24228498339653015, "learning_rate": 8.209485894321984e-06, "loss": 0.0069, "step": 35320 }, { "epoch": 1.0512847217056225, "grad_norm": 0.2784201502799988, "learning_rate": 8.208158255261185e-06, "loss": 0.0042, "step": 35330 }, { "epoch": 1.051582283189264, "grad_norm": 0.24154849350452423, "learning_rate": 8.20683023161191e-06, "loss": 0.005, "step": 35340 }, { "epoch": 1.0518798446729056, "grad_norm": 0.422711580991745, "learning_rate": 8.205501823533362e-06, "loss": 0.0065, "step": 35350 }, { "epoch": 1.0521774061565472, "grad_norm": 0.26589298248291016, "learning_rate": 8.204173031184788e-06, "loss": 0.0051, "step": 35360 }, { "epoch": 1.0524749676401886, "grad_norm": 0.03427087888121605, "learning_rate": 8.202843854725482e-06, "loss": 0.0051, "step": 35370 }, { "epoch": 1.0527725291238301, "grad_norm": 0.18340899050235748, "learning_rate": 8.201514294314783e-06, "loss": 0.0058, "step": 35380 }, { "epoch": 1.0530700906074717, "grad_norm": 0.26710450649261475, "learning_rate": 8.200184350112074e-06, "loss": 0.0064, "step": 35390 }, { "epoch": 1.0533676520911133, "grad_norm": 0.26319974660873413, "learning_rate": 8.198854022276792e-06, "loss": 0.0098, "step": 35400 }, { "epoch": 1.0536652135747548, "grad_norm": 0.27241361141204834, "learning_rate": 8.197523310968411e-06, "loss": 0.0044, "step": 35410 }, { "epoch": 1.0539627750583964, "grad_norm": 0.10374221950769424, "learning_rate": 8.196192216346454e-06, "loss": 0.0045, "step": 35420 }, { "epoch": 1.054260336542038, "grad_norm": 0.15832793712615967, "learning_rate": 8.194860738570491e-06, "loss": 0.0059, "step": 35430 }, { "epoch": 1.0545578980256796, "grad_norm": 0.29416415095329285, "learning_rate": 8.193528877800137e-06, "loss": 0.004, "step": 35440 }, { "epoch": 1.0548554595093211, "grad_norm": 0.3534478545188904, "learning_rate": 8.192196634195052e-06, "loss": 0.0067, "step": 35450 }, { "epoch": 1.0551530209929627, "grad_norm": 0.29813358187675476, "learning_rate": 8.190864007914947e-06, "loss": 0.0065, "step": 35460 }, { "epoch": 1.0554505824766043, "grad_norm": 0.3102301061153412, "learning_rate": 8.18953099911957e-06, "loss": 0.0048, "step": 35470 }, { "epoch": 1.0557481439602459, "grad_norm": 0.3101198375225067, "learning_rate": 8.188197607968724e-06, "loss": 0.007, "step": 35480 }, { "epoch": 1.0560457054438874, "grad_norm": 0.19670870900154114, "learning_rate": 8.18686383462225e-06, "loss": 0.0053, "step": 35490 }, { "epoch": 1.056343266927529, "grad_norm": 0.4326886832714081, "learning_rate": 8.185529679240041e-06, "loss": 0.0067, "step": 35500 }, { "epoch": 1.0566408284111704, "grad_norm": 0.31687626242637634, "learning_rate": 8.184195141982033e-06, "loss": 0.0083, "step": 35510 }, { "epoch": 1.056938389894812, "grad_norm": 0.14429043233394623, "learning_rate": 8.182860223008206e-06, "loss": 0.0062, "step": 35520 }, { "epoch": 1.0572359513784535, "grad_norm": 0.1816464513540268, "learning_rate": 8.181524922478592e-06, "loss": 0.0042, "step": 35530 }, { "epoch": 1.057533512862095, "grad_norm": 0.29810434579849243, "learning_rate": 8.180189240553259e-06, "loss": 0.0051, "step": 35540 }, { "epoch": 1.0578310743457366, "grad_norm": 0.18125487864017487, "learning_rate": 8.17885317739233e-06, "loss": 0.0072, "step": 35550 }, { "epoch": 1.0581286358293782, "grad_norm": 0.49802538752555847, "learning_rate": 8.17751673315597e-06, "loss": 0.0053, "step": 35560 }, { "epoch": 1.0584261973130198, "grad_norm": 0.38680902123451233, "learning_rate": 8.176179908004387e-06, "loss": 0.0053, "step": 35570 }, { "epoch": 1.0587237587966614, "grad_norm": 0.31175267696380615, "learning_rate": 8.174842702097841e-06, "loss": 0.0061, "step": 35580 }, { "epoch": 1.059021320280303, "grad_norm": 0.09256526827812195, "learning_rate": 8.173505115596629e-06, "loss": 0.0048, "step": 35590 }, { "epoch": 1.0593188817639445, "grad_norm": 0.2172652631998062, "learning_rate": 8.172167148661103e-06, "loss": 0.0042, "step": 35600 }, { "epoch": 1.059616443247586, "grad_norm": 0.24437986314296722, "learning_rate": 8.170828801451654e-06, "loss": 0.0079, "step": 35610 }, { "epoch": 1.0599140047312277, "grad_norm": 0.15093004703521729, "learning_rate": 8.169490074128723e-06, "loss": 0.0054, "step": 35620 }, { "epoch": 1.0602115662148692, "grad_norm": 0.31525781750679016, "learning_rate": 8.16815096685279e-06, "loss": 0.0034, "step": 35630 }, { "epoch": 1.0605091276985108, "grad_norm": 0.5016889572143555, "learning_rate": 8.16681147978439e-06, "loss": 0.0056, "step": 35640 }, { "epoch": 1.0608066891821522, "grad_norm": 0.19510523974895477, "learning_rate": 8.165471613084095e-06, "loss": 0.0044, "step": 35650 }, { "epoch": 1.0611042506657937, "grad_norm": 0.46244531869888306, "learning_rate": 8.164131366912526e-06, "loss": 0.006, "step": 35660 }, { "epoch": 1.0614018121494353, "grad_norm": 0.15607795119285583, "learning_rate": 8.162790741430351e-06, "loss": 0.0054, "step": 35670 }, { "epoch": 1.0616993736330769, "grad_norm": 0.15699243545532227, "learning_rate": 8.16144973679828e-06, "loss": 0.005, "step": 35680 }, { "epoch": 1.0619969351167184, "grad_norm": 0.39469990134239197, "learning_rate": 8.160108353177072e-06, "loss": 0.0038, "step": 35690 }, { "epoch": 1.06229449660036, "grad_norm": 0.1926303207874298, "learning_rate": 8.158766590727531e-06, "loss": 0.0048, "step": 35700 }, { "epoch": 1.0625920580840016, "grad_norm": 0.25406014919281006, "learning_rate": 8.1574244496105e-06, "loss": 0.0047, "step": 35710 }, { "epoch": 1.0628896195676432, "grad_norm": 0.21240031719207764, "learning_rate": 8.15608192998688e-06, "loss": 0.0066, "step": 35720 }, { "epoch": 1.0631871810512847, "grad_norm": 0.10862790048122406, "learning_rate": 8.154739032017602e-06, "loss": 0.0033, "step": 35730 }, { "epoch": 1.0634847425349263, "grad_norm": 0.025799395516514778, "learning_rate": 8.153395755863656e-06, "loss": 0.0043, "step": 35740 }, { "epoch": 1.0637823040185679, "grad_norm": 0.30568936467170715, "learning_rate": 8.152052101686068e-06, "loss": 0.0033, "step": 35750 }, { "epoch": 1.0640798655022095, "grad_norm": 0.357510507106781, "learning_rate": 8.150708069645917e-06, "loss": 0.0043, "step": 35760 }, { "epoch": 1.064377426985851, "grad_norm": 0.2112818956375122, "learning_rate": 8.149363659904318e-06, "loss": 0.0045, "step": 35770 }, { "epoch": 1.0646749884694926, "grad_norm": 0.21752916276454926, "learning_rate": 8.148018872622443e-06, "loss": 0.0049, "step": 35780 }, { "epoch": 1.0649725499531342, "grad_norm": 0.12615247070789337, "learning_rate": 8.146673707961498e-06, "loss": 0.0036, "step": 35790 }, { "epoch": 1.0652701114367755, "grad_norm": 0.06528332829475403, "learning_rate": 8.14532816608274e-06, "loss": 0.0052, "step": 35800 }, { "epoch": 1.065567672920417, "grad_norm": 0.11774212121963501, "learning_rate": 8.14398224714747e-06, "loss": 0.0054, "step": 35810 }, { "epoch": 1.0658652344040587, "grad_norm": 0.30175504088401794, "learning_rate": 8.142635951317035e-06, "loss": 0.0051, "step": 35820 }, { "epoch": 1.0661627958877002, "grad_norm": 0.15406236052513123, "learning_rate": 8.141289278752825e-06, "loss": 0.0062, "step": 35830 }, { "epoch": 1.0664603573713418, "grad_norm": 0.21761903166770935, "learning_rate": 8.13994222961628e-06, "loss": 0.0046, "step": 35840 }, { "epoch": 1.0667579188549834, "grad_norm": 0.2508879005908966, "learning_rate": 8.13859480406888e-06, "loss": 0.0051, "step": 35850 }, { "epoch": 1.067055480338625, "grad_norm": 0.14367863535881042, "learning_rate": 8.137247002272153e-06, "loss": 0.0026, "step": 35860 }, { "epoch": 1.0673530418222665, "grad_norm": 0.3887135684490204, "learning_rate": 8.13589882438767e-06, "loss": 0.0087, "step": 35870 }, { "epoch": 1.067650603305908, "grad_norm": 0.5286123752593994, "learning_rate": 8.134550270577048e-06, "loss": 0.0085, "step": 35880 }, { "epoch": 1.0679481647895497, "grad_norm": 0.30623680353164673, "learning_rate": 8.13320134100195e-06, "loss": 0.0103, "step": 35890 }, { "epoch": 1.0682457262731913, "grad_norm": 0.05271377041935921, "learning_rate": 8.131852035824086e-06, "loss": 0.0047, "step": 35900 }, { "epoch": 1.0685432877568328, "grad_norm": 0.27699166536331177, "learning_rate": 8.130502355205203e-06, "loss": 0.005, "step": 35910 }, { "epoch": 1.0688408492404744, "grad_norm": 0.11633244901895523, "learning_rate": 8.129152299307102e-06, "loss": 0.0048, "step": 35920 }, { "epoch": 1.069138410724116, "grad_norm": 0.1450781226158142, "learning_rate": 8.127801868291624e-06, "loss": 0.0058, "step": 35930 }, { "epoch": 1.0694359722077573, "grad_norm": 0.18460528552532196, "learning_rate": 8.126451062320658e-06, "loss": 0.0052, "step": 35940 }, { "epoch": 1.069733533691399, "grad_norm": 0.2579200267791748, "learning_rate": 8.125099881556132e-06, "loss": 0.0058, "step": 35950 }, { "epoch": 1.0700310951750405, "grad_norm": 0.37523725628852844, "learning_rate": 8.123748326160027e-06, "loss": 0.0049, "step": 35960 }, { "epoch": 1.070328656658682, "grad_norm": 0.2074570208787918, "learning_rate": 8.122396396294366e-06, "loss": 0.0041, "step": 35970 }, { "epoch": 1.0706262181423236, "grad_norm": 0.7657715678215027, "learning_rate": 8.121044092121216e-06, "loss": 0.006, "step": 35980 }, { "epoch": 1.0709237796259652, "grad_norm": 0.10356563329696655, "learning_rate": 8.119691413802684e-06, "loss": 0.0031, "step": 35990 }, { "epoch": 1.0712213411096068, "grad_norm": 0.10546636581420898, "learning_rate": 8.118338361500933e-06, "loss": 0.0028, "step": 36000 }, { "epoch": 1.0715189025932483, "grad_norm": 0.9305168986320496, "learning_rate": 8.116984935378158e-06, "loss": 0.0059, "step": 36010 }, { "epoch": 1.07181646407689, "grad_norm": 0.1278497576713562, "learning_rate": 8.115631135596613e-06, "loss": 0.0048, "step": 36020 }, { "epoch": 1.0721140255605315, "grad_norm": 0.1603877693414688, "learning_rate": 8.114276962318584e-06, "loss": 0.0037, "step": 36030 }, { "epoch": 1.072411587044173, "grad_norm": 0.34864139556884766, "learning_rate": 8.112922415706406e-06, "loss": 0.006, "step": 36040 }, { "epoch": 1.0727091485278146, "grad_norm": 0.2465546876192093, "learning_rate": 8.111567495922465e-06, "loss": 0.0094, "step": 36050 }, { "epoch": 1.0730067100114562, "grad_norm": 0.22383388876914978, "learning_rate": 8.110212203129182e-06, "loss": 0.0058, "step": 36060 }, { "epoch": 1.0733042714950978, "grad_norm": 0.09612047672271729, "learning_rate": 8.108856537489027e-06, "loss": 0.0097, "step": 36070 }, { "epoch": 1.0736018329787393, "grad_norm": 0.1571643054485321, "learning_rate": 8.107500499164518e-06, "loss": 0.0032, "step": 36080 }, { "epoch": 1.0738993944623807, "grad_norm": 0.5207036733627319, "learning_rate": 8.106144088318213e-06, "loss": 0.0069, "step": 36090 }, { "epoch": 1.0741969559460223, "grad_norm": 0.06265375018119812, "learning_rate": 8.104787305112716e-06, "loss": 0.0026, "step": 36100 }, { "epoch": 1.0744945174296638, "grad_norm": 0.4414049983024597, "learning_rate": 8.103430149710674e-06, "loss": 0.0048, "step": 36110 }, { "epoch": 1.0747920789133054, "grad_norm": 0.4693332016468048, "learning_rate": 8.102072622274784e-06, "loss": 0.0074, "step": 36120 }, { "epoch": 1.075089640396947, "grad_norm": 0.04474250227212906, "learning_rate": 8.100714722967783e-06, "loss": 0.0052, "step": 36130 }, { "epoch": 1.0753872018805886, "grad_norm": 0.08566275238990784, "learning_rate": 8.099356451952451e-06, "loss": 0.0044, "step": 36140 }, { "epoch": 1.0756847633642301, "grad_norm": 0.2959957718849182, "learning_rate": 8.09799780939162e-06, "loss": 0.0112, "step": 36150 }, { "epoch": 1.0759823248478717, "grad_norm": 0.3083747923374176, "learning_rate": 8.096638795448157e-06, "loss": 0.0076, "step": 36160 }, { "epoch": 1.0762798863315133, "grad_norm": 0.25482192635536194, "learning_rate": 8.095279410284982e-06, "loss": 0.005, "step": 36170 }, { "epoch": 1.0765774478151549, "grad_norm": 0.2291085124015808, "learning_rate": 8.093919654065053e-06, "loss": 0.0072, "step": 36180 }, { "epoch": 1.0768750092987964, "grad_norm": 0.26201021671295166, "learning_rate": 8.092559526951374e-06, "loss": 0.0055, "step": 36190 }, { "epoch": 1.077172570782438, "grad_norm": 0.2641172409057617, "learning_rate": 8.091199029107e-06, "loss": 0.0046, "step": 36200 }, { "epoch": 1.0774701322660796, "grad_norm": 0.12079257518053055, "learning_rate": 8.089838160695019e-06, "loss": 0.0046, "step": 36210 }, { "epoch": 1.077767693749721, "grad_norm": 0.1020803228020668, "learning_rate": 8.088476921878576e-06, "loss": 0.0039, "step": 36220 }, { "epoch": 1.0780652552333625, "grad_norm": 0.35045376420021057, "learning_rate": 8.087115312820848e-06, "loss": 0.0045, "step": 36230 }, { "epoch": 1.078362816717004, "grad_norm": 0.22630514204502106, "learning_rate": 8.085753333685065e-06, "loss": 0.005, "step": 36240 }, { "epoch": 1.0786603782006456, "grad_norm": 0.2152501344680786, "learning_rate": 8.084390984634498e-06, "loss": 0.0053, "step": 36250 }, { "epoch": 1.0789579396842872, "grad_norm": 0.1874002069234848, "learning_rate": 8.083028265832465e-06, "loss": 0.004, "step": 36260 }, { "epoch": 1.0792555011679288, "grad_norm": 0.24261367321014404, "learning_rate": 8.081665177442323e-06, "loss": 0.005, "step": 36270 }, { "epoch": 1.0795530626515704, "grad_norm": 0.28886711597442627, "learning_rate": 8.080301719627482e-06, "loss": 0.0055, "step": 36280 }, { "epoch": 1.079850624135212, "grad_norm": 0.15989325940608978, "learning_rate": 8.078937892551386e-06, "loss": 0.005, "step": 36290 }, { "epoch": 1.0801481856188535, "grad_norm": 0.5050478577613831, "learning_rate": 8.07757369637753e-06, "loss": 0.006, "step": 36300 }, { "epoch": 1.080445747102495, "grad_norm": 0.3394738733768463, "learning_rate": 8.07620913126945e-06, "loss": 0.006, "step": 36310 }, { "epoch": 1.0807433085861367, "grad_norm": 0.3928522765636444, "learning_rate": 8.07484419739073e-06, "loss": 0.0056, "step": 36320 }, { "epoch": 1.0810408700697782, "grad_norm": 0.1626632958650589, "learning_rate": 8.073478894904996e-06, "loss": 0.0065, "step": 36330 }, { "epoch": 1.0813384315534198, "grad_norm": 0.11257467418909073, "learning_rate": 8.072113223975915e-06, "loss": 0.0054, "step": 36340 }, { "epoch": 1.0816359930370614, "grad_norm": 0.1398744136095047, "learning_rate": 8.070747184767204e-06, "loss": 0.005, "step": 36350 }, { "epoch": 1.081933554520703, "grad_norm": 0.5586754083633423, "learning_rate": 8.069380777442622e-06, "loss": 0.0101, "step": 36360 }, { "epoch": 1.0822311160043443, "grad_norm": 0.06257274746894836, "learning_rate": 8.06801400216597e-06, "loss": 0.0039, "step": 36370 }, { "epoch": 1.0825286774879859, "grad_norm": 0.14742286503314972, "learning_rate": 8.066646859101096e-06, "loss": 0.0067, "step": 36380 }, { "epoch": 1.0828262389716274, "grad_norm": 0.2215903103351593, "learning_rate": 8.065279348411888e-06, "loss": 0.005, "step": 36390 }, { "epoch": 1.083123800455269, "grad_norm": 0.17675384879112244, "learning_rate": 8.063911470262282e-06, "loss": 0.0046, "step": 36400 }, { "epoch": 1.0834213619389106, "grad_norm": 0.1412411779165268, "learning_rate": 8.062543224816258e-06, "loss": 0.0046, "step": 36410 }, { "epoch": 1.0837189234225522, "grad_norm": 0.22964218258857727, "learning_rate": 8.061174612237839e-06, "loss": 0.0036, "step": 36420 }, { "epoch": 1.0840164849061937, "grad_norm": 0.09596525877714157, "learning_rate": 8.059805632691091e-06, "loss": 0.0064, "step": 36430 }, { "epoch": 1.0843140463898353, "grad_norm": 0.10067009925842285, "learning_rate": 8.058436286340126e-06, "loss": 0.0045, "step": 36440 }, { "epoch": 1.0846116078734769, "grad_norm": 0.2553558349609375, "learning_rate": 8.057066573349095e-06, "loss": 0.0048, "step": 36450 }, { "epoch": 1.0849091693571185, "grad_norm": 0.15721295773983002, "learning_rate": 8.055696493882203e-06, "loss": 0.0051, "step": 36460 }, { "epoch": 1.08520673084076, "grad_norm": 0.18279381096363068, "learning_rate": 8.054326048103688e-06, "loss": 0.005, "step": 36470 }, { "epoch": 1.0855042923244016, "grad_norm": 0.16732339560985565, "learning_rate": 8.05295523617784e-06, "loss": 0.0039, "step": 36480 }, { "epoch": 1.0858018538080432, "grad_norm": 0.24435916543006897, "learning_rate": 8.051584058268986e-06, "loss": 0.0042, "step": 36490 }, { "epoch": 1.0860994152916847, "grad_norm": 0.2976796627044678, "learning_rate": 8.050212514541503e-06, "loss": 0.0049, "step": 36500 }, { "epoch": 1.086396976775326, "grad_norm": 0.10688941180706024, "learning_rate": 8.048840605159809e-06, "loss": 0.0056, "step": 36510 }, { "epoch": 1.0866945382589677, "grad_norm": 0.14037473499774933, "learning_rate": 8.047468330288364e-06, "loss": 0.0057, "step": 36520 }, { "epoch": 1.0869920997426092, "grad_norm": 0.12654639780521393, "learning_rate": 8.046095690091678e-06, "loss": 0.0046, "step": 36530 }, { "epoch": 1.0872896612262508, "grad_norm": 0.22911600768566132, "learning_rate": 8.044722684734297e-06, "loss": 0.0043, "step": 36540 }, { "epoch": 1.0875872227098924, "grad_norm": 0.18039162456989288, "learning_rate": 8.043349314380817e-06, "loss": 0.0061, "step": 36550 }, { "epoch": 1.087884784193534, "grad_norm": 0.21809260547161102, "learning_rate": 8.041975579195873e-06, "loss": 0.0043, "step": 36560 }, { "epoch": 1.0881823456771755, "grad_norm": 0.11534997075796127, "learning_rate": 8.040601479344147e-06, "loss": 0.0064, "step": 36570 }, { "epoch": 1.088479907160817, "grad_norm": 0.21852877736091614, "learning_rate": 8.039227014990365e-06, "loss": 0.0042, "step": 36580 }, { "epoch": 1.0887774686444587, "grad_norm": 0.13286885619163513, "learning_rate": 8.037852186299295e-06, "loss": 0.0034, "step": 36590 }, { "epoch": 1.0890750301281003, "grad_norm": 0.3149300515651703, "learning_rate": 8.036476993435748e-06, "loss": 0.0051, "step": 36600 }, { "epoch": 1.0893725916117418, "grad_norm": 0.4068578779697418, "learning_rate": 8.035101436564579e-06, "loss": 0.0081, "step": 36610 }, { "epoch": 1.0896701530953834, "grad_norm": 0.3311266303062439, "learning_rate": 8.033725515850689e-06, "loss": 0.006, "step": 36620 }, { "epoch": 1.089967714579025, "grad_norm": 0.2118474692106247, "learning_rate": 8.03234923145902e-06, "loss": 0.0083, "step": 36630 }, { "epoch": 1.0902652760626665, "grad_norm": 0.40483561158180237, "learning_rate": 8.03097258355456e-06, "loss": 0.0053, "step": 36640 }, { "epoch": 1.0905628375463081, "grad_norm": 0.06976279616355896, "learning_rate": 8.029595572302337e-06, "loss": 0.0045, "step": 36650 }, { "epoch": 1.0908603990299495, "grad_norm": 0.19185040891170502, "learning_rate": 8.028218197867428e-06, "loss": 0.0047, "step": 36660 }, { "epoch": 1.091157960513591, "grad_norm": 0.115873321890831, "learning_rate": 8.026840460414947e-06, "loss": 0.0046, "step": 36670 }, { "epoch": 1.0914555219972326, "grad_norm": 0.22210821509361267, "learning_rate": 8.025462360110056e-06, "loss": 0.0047, "step": 36680 }, { "epoch": 1.0917530834808742, "grad_norm": 0.15453936159610748, "learning_rate": 8.02408389711796e-06, "loss": 0.0045, "step": 36690 }, { "epoch": 1.0920506449645158, "grad_norm": 0.26148122549057007, "learning_rate": 8.022705071603907e-06, "loss": 0.0062, "step": 36700 }, { "epoch": 1.0923482064481573, "grad_norm": 0.1862264722585678, "learning_rate": 8.021325883733186e-06, "loss": 0.0038, "step": 36710 }, { "epoch": 1.092645767931799, "grad_norm": 0.22240903973579407, "learning_rate": 8.019946333671133e-06, "loss": 0.0054, "step": 36720 }, { "epoch": 1.0929433294154405, "grad_norm": 0.2822146415710449, "learning_rate": 8.018566421583128e-06, "loss": 0.0058, "step": 36730 }, { "epoch": 1.093240890899082, "grad_norm": 0.06039225310087204, "learning_rate": 8.017186147634589e-06, "loss": 0.004, "step": 36740 }, { "epoch": 1.0935384523827236, "grad_norm": 0.21525712311267853, "learning_rate": 8.015805511990983e-06, "loss": 0.0042, "step": 36750 }, { "epoch": 1.0938360138663652, "grad_norm": 0.16305288672447205, "learning_rate": 8.014424514817818e-06, "loss": 0.0052, "step": 36760 }, { "epoch": 1.0941335753500068, "grad_norm": 1.0044692754745483, "learning_rate": 8.013043156280644e-06, "loss": 0.0055, "step": 36770 }, { "epoch": 1.0944311368336483, "grad_norm": 0.17358848452568054, "learning_rate": 8.011661436545057e-06, "loss": 0.0049, "step": 36780 }, { "epoch": 1.0947286983172897, "grad_norm": 0.07006779313087463, "learning_rate": 8.010279355776697e-06, "loss": 0.0056, "step": 36790 }, { "epoch": 1.0950262598009313, "grad_norm": 0.30041566491127014, "learning_rate": 8.008896914141243e-06, "loss": 0.0041, "step": 36800 }, { "epoch": 1.0953238212845728, "grad_norm": 0.33878976106643677, "learning_rate": 8.007514111804418e-06, "loss": 0.0061, "step": 36810 }, { "epoch": 1.0956213827682144, "grad_norm": 0.12148628383874893, "learning_rate": 8.006130948931997e-06, "loss": 0.0066, "step": 36820 }, { "epoch": 1.095918944251856, "grad_norm": 0.1503962129354477, "learning_rate": 8.004747425689782e-06, "loss": 0.0056, "step": 36830 }, { "epoch": 1.0962165057354976, "grad_norm": 0.11726119369268417, "learning_rate": 8.003363542243636e-06, "loss": 0.0062, "step": 36840 }, { "epoch": 1.0965140672191391, "grad_norm": 0.0927407443523407, "learning_rate": 8.00197929875945e-06, "loss": 0.0042, "step": 36850 }, { "epoch": 1.0968116287027807, "grad_norm": 0.17215222120285034, "learning_rate": 8.000594695403168e-06, "loss": 0.0048, "step": 36860 }, { "epoch": 1.0971091901864223, "grad_norm": 0.11511161923408508, "learning_rate": 7.999209732340774e-06, "loss": 0.0049, "step": 36870 }, { "epoch": 1.0974067516700639, "grad_norm": 0.17079457640647888, "learning_rate": 7.997824409738292e-06, "loss": 0.0085, "step": 36880 }, { "epoch": 1.0977043131537054, "grad_norm": 0.19632604718208313, "learning_rate": 7.996438727761795e-06, "loss": 0.0048, "step": 36890 }, { "epoch": 1.098001874637347, "grad_norm": 0.26801592111587524, "learning_rate": 7.995052686577397e-06, "loss": 0.0061, "step": 36900 }, { "epoch": 1.0982994361209886, "grad_norm": 0.19358986616134644, "learning_rate": 7.993666286351251e-06, "loss": 0.0052, "step": 36910 }, { "epoch": 1.0985969976046301, "grad_norm": 0.19990110397338867, "learning_rate": 7.992279527249558e-06, "loss": 0.004, "step": 36920 }, { "epoch": 1.0988945590882717, "grad_norm": 0.24834959208965302, "learning_rate": 7.99089240943856e-06, "loss": 0.0086, "step": 36930 }, { "epoch": 1.0991921205719133, "grad_norm": 0.49446436762809753, "learning_rate": 7.989504933084544e-06, "loss": 0.0028, "step": 36940 }, { "epoch": 1.0994896820555546, "grad_norm": 0.19114567339420319, "learning_rate": 7.988117098353834e-06, "loss": 0.0037, "step": 36950 }, { "epoch": 1.0997872435391962, "grad_norm": 0.11927981674671173, "learning_rate": 7.986728905412803e-06, "loss": 0.0038, "step": 36960 }, { "epoch": 1.1000848050228378, "grad_norm": 0.45552411675453186, "learning_rate": 7.985340354427871e-06, "loss": 0.0057, "step": 36970 }, { "epoch": 1.1003823665064794, "grad_norm": 0.25206053256988525, "learning_rate": 7.983951445565486e-06, "loss": 0.0078, "step": 36980 }, { "epoch": 1.100679927990121, "grad_norm": 0.23772047460079193, "learning_rate": 7.982562178992155e-06, "loss": 0.0042, "step": 36990 }, { "epoch": 1.1009774894737625, "grad_norm": 0.15697287023067474, "learning_rate": 7.981172554874417e-06, "loss": 0.0036, "step": 37000 }, { "epoch": 1.101275050957404, "grad_norm": 0.09397466480731964, "learning_rate": 7.979782573378859e-06, "loss": 0.004, "step": 37010 }, { "epoch": 1.1015726124410457, "grad_norm": 0.1437751203775406, "learning_rate": 7.978392234672107e-06, "loss": 0.0048, "step": 37020 }, { "epoch": 1.1018701739246872, "grad_norm": 0.07015292346477509, "learning_rate": 7.977001538920836e-06, "loss": 0.0045, "step": 37030 }, { "epoch": 1.1021677354083288, "grad_norm": 0.2476423680782318, "learning_rate": 7.97561048629176e-06, "loss": 0.004, "step": 37040 }, { "epoch": 1.1024652968919704, "grad_norm": 0.5669156908988953, "learning_rate": 7.974219076951636e-06, "loss": 0.0071, "step": 37050 }, { "epoch": 1.102762858375612, "grad_norm": 0.21395717561244965, "learning_rate": 7.972827311067261e-06, "loss": 0.0037, "step": 37060 }, { "epoch": 1.1030604198592535, "grad_norm": 0.14700213074684143, "learning_rate": 7.971435188805479e-06, "loss": 0.0064, "step": 37070 }, { "epoch": 1.1033579813428949, "grad_norm": 0.21789291501045227, "learning_rate": 7.970042710333176e-06, "loss": 0.004, "step": 37080 }, { "epoch": 1.1036555428265364, "grad_norm": 0.2607952654361725, "learning_rate": 7.968649875817281e-06, "loss": 0.0041, "step": 37090 }, { "epoch": 1.103953104310178, "grad_norm": 0.26965028047561646, "learning_rate": 7.96725668542476e-06, "loss": 0.0042, "step": 37100 }, { "epoch": 1.1042506657938196, "grad_norm": 0.10693632066249847, "learning_rate": 7.96586313932263e-06, "loss": 0.0041, "step": 37110 }, { "epoch": 1.1045482272774612, "grad_norm": 0.3327144980430603, "learning_rate": 7.964469237677948e-06, "loss": 0.0054, "step": 37120 }, { "epoch": 1.1048457887611027, "grad_norm": 0.5157318115234375, "learning_rate": 7.963074980657808e-06, "loss": 0.0061, "step": 37130 }, { "epoch": 1.1051433502447443, "grad_norm": 0.20944488048553467, "learning_rate": 7.961680368429355e-06, "loss": 0.0054, "step": 37140 }, { "epoch": 1.1054409117283859, "grad_norm": 0.21805498003959656, "learning_rate": 7.960285401159772e-06, "loss": 0.0096, "step": 37150 }, { "epoch": 1.1057384732120275, "grad_norm": 0.20086801052093506, "learning_rate": 7.958890079016284e-06, "loss": 0.0052, "step": 37160 }, { "epoch": 1.106036034695669, "grad_norm": 0.27332228422164917, "learning_rate": 7.957494402166161e-06, "loss": 0.004, "step": 37170 }, { "epoch": 1.1063335961793106, "grad_norm": 0.016881072893738747, "learning_rate": 7.956098370776714e-06, "loss": 0.0052, "step": 37180 }, { "epoch": 1.1066311576629522, "grad_norm": 0.10219903290271759, "learning_rate": 7.954701985015297e-06, "loss": 0.0044, "step": 37190 }, { "epoch": 1.1069287191465937, "grad_norm": 0.23093368113040924, "learning_rate": 7.953305245049306e-06, "loss": 0.0044, "step": 37200 }, { "epoch": 1.1072262806302353, "grad_norm": 0.1933908462524414, "learning_rate": 7.951908151046177e-06, "loss": 0.0044, "step": 37210 }, { "epoch": 1.1075238421138769, "grad_norm": 0.2857968211174011, "learning_rate": 7.950510703173397e-06, "loss": 0.0057, "step": 37220 }, { "epoch": 1.1078214035975182, "grad_norm": 0.37547722458839417, "learning_rate": 7.949112901598485e-06, "loss": 0.0051, "step": 37230 }, { "epoch": 1.1081189650811598, "grad_norm": 0.16794553399085999, "learning_rate": 7.94771474648901e-06, "loss": 0.0027, "step": 37240 }, { "epoch": 1.1084165265648014, "grad_norm": 0.3537865877151489, "learning_rate": 7.946316238012578e-06, "loss": 0.0064, "step": 37250 }, { "epoch": 1.108714088048443, "grad_norm": 0.11744102090597153, "learning_rate": 7.94491737633684e-06, "loss": 0.004, "step": 37260 }, { "epoch": 1.1090116495320845, "grad_norm": 0.18522244691848755, "learning_rate": 7.943518161629492e-06, "loss": 0.0049, "step": 37270 }, { "epoch": 1.109309211015726, "grad_norm": 0.1353406459093094, "learning_rate": 7.942118594058267e-06, "loss": 0.0039, "step": 37280 }, { "epoch": 1.1096067724993677, "grad_norm": 0.34991154074668884, "learning_rate": 7.940718673790944e-06, "loss": 0.008, "step": 37290 }, { "epoch": 1.1099043339830093, "grad_norm": 0.21491464972496033, "learning_rate": 7.939318400995341e-06, "loss": 0.0036, "step": 37300 }, { "epoch": 1.1102018954666508, "grad_norm": 0.08472764492034912, "learning_rate": 7.937917775839324e-06, "loss": 0.0052, "step": 37310 }, { "epoch": 1.1104994569502924, "grad_norm": 0.20861493051052094, "learning_rate": 7.936516798490792e-06, "loss": 0.0037, "step": 37320 }, { "epoch": 1.110797018433934, "grad_norm": 0.1992202252149582, "learning_rate": 7.935115469117696e-06, "loss": 0.0055, "step": 37330 }, { "epoch": 1.1110945799175755, "grad_norm": 0.229314923286438, "learning_rate": 7.933713787888025e-06, "loss": 0.0065, "step": 37340 }, { "epoch": 1.1113921414012171, "grad_norm": 0.13664865493774414, "learning_rate": 7.932311754969806e-06, "loss": 0.0048, "step": 37350 }, { "epoch": 1.1116897028848585, "grad_norm": 0.3178742527961731, "learning_rate": 7.930909370531117e-06, "loss": 0.0028, "step": 37360 }, { "epoch": 1.1119872643685, "grad_norm": 0.26180461049079895, "learning_rate": 7.929506634740072e-06, "loss": 0.0058, "step": 37370 }, { "epoch": 1.1122848258521416, "grad_norm": 0.202724426984787, "learning_rate": 7.928103547764826e-06, "loss": 0.0049, "step": 37380 }, { "epoch": 1.1125823873357832, "grad_norm": 0.3376179039478302, "learning_rate": 7.926700109773583e-06, "loss": 0.0037, "step": 37390 }, { "epoch": 1.1128799488194248, "grad_norm": 0.3026023805141449, "learning_rate": 7.925296320934581e-06, "loss": 0.0055, "step": 37400 }, { "epoch": 1.1131775103030663, "grad_norm": 0.24754267930984497, "learning_rate": 7.923892181416106e-06, "loss": 0.0036, "step": 37410 }, { "epoch": 1.113475071786708, "grad_norm": 0.21928319334983826, "learning_rate": 7.922487691386483e-06, "loss": 0.0047, "step": 37420 }, { "epoch": 1.1137726332703495, "grad_norm": 0.137909397482872, "learning_rate": 7.921082851014079e-06, "loss": 0.0078, "step": 37430 }, { "epoch": 1.114070194753991, "grad_norm": 0.1671326756477356, "learning_rate": 7.919677660467307e-06, "loss": 0.0087, "step": 37440 }, { "epoch": 1.1143677562376326, "grad_norm": 0.16486795246601105, "learning_rate": 7.918272119914614e-06, "loss": 0.0076, "step": 37450 }, { "epoch": 1.1146653177212742, "grad_norm": 0.13767270743846893, "learning_rate": 7.916866229524498e-06, "loss": 0.0059, "step": 37460 }, { "epoch": 1.1149628792049158, "grad_norm": 0.200306236743927, "learning_rate": 7.915459989465493e-06, "loss": 0.0031, "step": 37470 }, { "epoch": 1.1152604406885573, "grad_norm": 0.41105926036834717, "learning_rate": 7.914053399906176e-06, "loss": 0.0113, "step": 37480 }, { "epoch": 1.115558002172199, "grad_norm": 0.2743836045265198, "learning_rate": 7.912646461015168e-06, "loss": 0.0048, "step": 37490 }, { "epoch": 1.1158555636558405, "grad_norm": 0.2375636100769043, "learning_rate": 7.911239172961129e-06, "loss": 0.0072, "step": 37500 }, { "epoch": 1.116153125139482, "grad_norm": 0.2221359759569168, "learning_rate": 7.909831535912763e-06, "loss": 0.0038, "step": 37510 }, { "epoch": 1.1164506866231234, "grad_norm": 0.11830755323171616, "learning_rate": 7.908423550038815e-06, "loss": 0.0052, "step": 37520 }, { "epoch": 1.116748248106765, "grad_norm": 0.04590034857392311, "learning_rate": 7.907015215508072e-06, "loss": 0.0069, "step": 37530 }, { "epoch": 1.1170458095904066, "grad_norm": 0.2935585677623749, "learning_rate": 7.905606532489362e-06, "loss": 0.0059, "step": 37540 }, { "epoch": 1.1173433710740481, "grad_norm": 0.37153345346450806, "learning_rate": 7.904197501151558e-06, "loss": 0.0057, "step": 37550 }, { "epoch": 1.1176409325576897, "grad_norm": 0.5265730619430542, "learning_rate": 7.90278812166357e-06, "loss": 0.0057, "step": 37560 }, { "epoch": 1.1179384940413313, "grad_norm": 0.07657347619533539, "learning_rate": 7.90137839419435e-06, "loss": 0.0036, "step": 37570 }, { "epoch": 1.1182360555249728, "grad_norm": 0.21591249108314514, "learning_rate": 7.899968318912898e-06, "loss": 0.004, "step": 37580 }, { "epoch": 1.1185336170086144, "grad_norm": 0.20905038714408875, "learning_rate": 7.89855789598825e-06, "loss": 0.0129, "step": 37590 }, { "epoch": 1.118831178492256, "grad_norm": 0.23013347387313843, "learning_rate": 7.897147125589483e-06, "loss": 0.0063, "step": 37600 }, { "epoch": 1.1191287399758976, "grad_norm": 0.23463265597820282, "learning_rate": 7.895736007885719e-06, "loss": 0.0059, "step": 37610 }, { "epoch": 1.1194263014595391, "grad_norm": 0.2259024977684021, "learning_rate": 7.894324543046123e-06, "loss": 0.0079, "step": 37620 }, { "epoch": 1.1197238629431807, "grad_norm": 0.7742536664009094, "learning_rate": 7.892912731239894e-06, "loss": 0.0044, "step": 37630 }, { "epoch": 1.1200214244268223, "grad_norm": 0.2011597603559494, "learning_rate": 7.89150057263628e-06, "loss": 0.0047, "step": 37640 }, { "epoch": 1.1203189859104636, "grad_norm": 0.0884658545255661, "learning_rate": 7.890088067404571e-06, "loss": 0.0041, "step": 37650 }, { "epoch": 1.1206165473941052, "grad_norm": 0.27550891041755676, "learning_rate": 7.88867521571409e-06, "loss": 0.0049, "step": 37660 }, { "epoch": 1.1209141088777468, "grad_norm": 0.2403724491596222, "learning_rate": 7.887262017734211e-06, "loss": 0.0055, "step": 37670 }, { "epoch": 1.1212116703613884, "grad_norm": 0.8967224359512329, "learning_rate": 7.885848473634347e-06, "loss": 0.0055, "step": 37680 }, { "epoch": 1.12150923184503, "grad_norm": 0.1266971230506897, "learning_rate": 7.884434583583947e-06, "loss": 0.006, "step": 37690 }, { "epoch": 1.1218067933286715, "grad_norm": 0.07659218460321426, "learning_rate": 7.883020347752507e-06, "loss": 0.0048, "step": 37700 }, { "epoch": 1.122104354812313, "grad_norm": 0.20170970261096954, "learning_rate": 7.881605766309565e-06, "loss": 0.0086, "step": 37710 }, { "epoch": 1.1224019162959546, "grad_norm": 0.14603695273399353, "learning_rate": 7.8801908394247e-06, "loss": 0.0043, "step": 37720 }, { "epoch": 1.1226994777795962, "grad_norm": 0.37691226601600647, "learning_rate": 7.878775567267524e-06, "loss": 0.0051, "step": 37730 }, { "epoch": 1.1229970392632378, "grad_norm": 0.13617341220378876, "learning_rate": 7.877359950007706e-06, "loss": 0.0043, "step": 37740 }, { "epoch": 1.1232946007468794, "grad_norm": 0.28745681047439575, "learning_rate": 7.875943987814942e-06, "loss": 0.0153, "step": 37750 }, { "epoch": 1.123592162230521, "grad_norm": 0.1927502453327179, "learning_rate": 7.874527680858977e-06, "loss": 0.0051, "step": 37760 }, { "epoch": 1.1238897237141625, "grad_norm": 0.20215511322021484, "learning_rate": 7.873111029309594e-06, "loss": 0.0057, "step": 37770 }, { "epoch": 1.124187285197804, "grad_norm": 0.5868127942085266, "learning_rate": 7.871694033336621e-06, "loss": 0.0069, "step": 37780 }, { "epoch": 1.1244848466814457, "grad_norm": 0.38496270775794983, "learning_rate": 7.870276693109924e-06, "loss": 0.0048, "step": 37790 }, { "epoch": 1.124782408165087, "grad_norm": 0.16452911496162415, "learning_rate": 7.868859008799412e-06, "loss": 0.0042, "step": 37800 }, { "epoch": 1.1250799696487286, "grad_norm": 0.18861784040927887, "learning_rate": 7.867440980575031e-06, "loss": 0.0045, "step": 37810 }, { "epoch": 1.1253775311323702, "grad_norm": 0.12059766054153442, "learning_rate": 7.866022608606778e-06, "loss": 0.0046, "step": 37820 }, { "epoch": 1.1256750926160117, "grad_norm": 0.21328167617321014, "learning_rate": 7.864603893064679e-06, "loss": 0.0032, "step": 37830 }, { "epoch": 1.1259726540996533, "grad_norm": 0.2977180778980255, "learning_rate": 7.863184834118813e-06, "loss": 0.0063, "step": 37840 }, { "epoch": 1.1262702155832949, "grad_norm": 0.3386954367160797, "learning_rate": 7.861765431939287e-06, "loss": 0.0051, "step": 37850 }, { "epoch": 1.1265677770669364, "grad_norm": 0.1005447655916214, "learning_rate": 7.860345686696264e-06, "loss": 0.0038, "step": 37860 }, { "epoch": 1.126865338550578, "grad_norm": 0.19955959916114807, "learning_rate": 7.858925598559937e-06, "loss": 0.0059, "step": 37870 }, { "epoch": 1.1271629000342196, "grad_norm": 0.11441675573587418, "learning_rate": 7.857505167700542e-06, "loss": 0.0054, "step": 37880 }, { "epoch": 1.1274604615178612, "grad_norm": 0.3529955744743347, "learning_rate": 7.85608439428836e-06, "loss": 0.0049, "step": 37890 }, { "epoch": 1.1277580230015027, "grad_norm": 0.12256096303462982, "learning_rate": 7.854663278493711e-06, "loss": 0.005, "step": 37900 }, { "epoch": 1.1280555844851443, "grad_norm": 0.382980078458786, "learning_rate": 7.853241820486957e-06, "loss": 0.0048, "step": 37910 }, { "epoch": 1.1283531459687859, "grad_norm": 0.23362997174263, "learning_rate": 7.851820020438496e-06, "loss": 0.0055, "step": 37920 }, { "epoch": 1.1286507074524272, "grad_norm": 0.5184745192527771, "learning_rate": 7.850397878518773e-06, "loss": 0.0106, "step": 37930 }, { "epoch": 1.1289482689360688, "grad_norm": 0.14857156574726105, "learning_rate": 7.848975394898275e-06, "loss": 0.0039, "step": 37940 }, { "epoch": 1.1292458304197104, "grad_norm": 0.2986246347427368, "learning_rate": 7.847552569747523e-06, "loss": 0.0055, "step": 37950 }, { "epoch": 1.129543391903352, "grad_norm": 0.2293798327445984, "learning_rate": 7.846129403237084e-06, "loss": 0.0045, "step": 37960 }, { "epoch": 1.1298409533869935, "grad_norm": 0.2848927676677704, "learning_rate": 7.844705895537564e-06, "loss": 0.0049, "step": 37970 }, { "epoch": 1.130138514870635, "grad_norm": 0.37016335129737854, "learning_rate": 7.843282046819611e-06, "loss": 0.0034, "step": 37980 }, { "epoch": 1.1304360763542767, "grad_norm": 0.3020412027835846, "learning_rate": 7.841857857253917e-06, "loss": 0.0106, "step": 37990 }, { "epoch": 1.1307336378379182, "grad_norm": 0.21311232447624207, "learning_rate": 7.840433327011205e-06, "loss": 0.0048, "step": 38000 }, { "epoch": 1.1310311993215598, "grad_norm": 0.17482230067253113, "learning_rate": 7.83900845626225e-06, "loss": 0.0053, "step": 38010 }, { "epoch": 1.1313287608052014, "grad_norm": 0.2972821593284607, "learning_rate": 7.83758324517786e-06, "loss": 0.0048, "step": 38020 }, { "epoch": 1.131626322288843, "grad_norm": 0.22739289700984955, "learning_rate": 7.83615769392889e-06, "loss": 0.0066, "step": 38030 }, { "epoch": 1.1319238837724845, "grad_norm": 0.18896552920341492, "learning_rate": 7.83473180268623e-06, "loss": 0.0064, "step": 38040 }, { "epoch": 1.1322214452561261, "grad_norm": 0.09533222764730453, "learning_rate": 7.833305571620815e-06, "loss": 0.0112, "step": 38050 }, { "epoch": 1.1325190067397677, "grad_norm": 0.20254145562648773, "learning_rate": 7.83187900090362e-06, "loss": 0.0041, "step": 38060 }, { "epoch": 1.1328165682234093, "grad_norm": 0.1741844266653061, "learning_rate": 7.830452090705656e-06, "loss": 0.0026, "step": 38070 }, { "epoch": 1.1331141297070508, "grad_norm": 0.2481360137462616, "learning_rate": 7.829024841197983e-06, "loss": 0.0046, "step": 38080 }, { "epoch": 1.1334116911906922, "grad_norm": 0.24838459491729736, "learning_rate": 7.82759725255169e-06, "loss": 0.0036, "step": 38090 }, { "epoch": 1.1337092526743338, "grad_norm": 0.2607726752758026, "learning_rate": 7.826169324937924e-06, "loss": 0.0034, "step": 38100 }, { "epoch": 1.1340068141579753, "grad_norm": 0.12987343966960907, "learning_rate": 7.824741058527856e-06, "loss": 0.0073, "step": 38110 }, { "epoch": 1.134304375641617, "grad_norm": 0.38301968574523926, "learning_rate": 7.823312453492707e-06, "loss": 0.005, "step": 38120 }, { "epoch": 1.1346019371252585, "grad_norm": 0.12953625619411469, "learning_rate": 7.821883510003732e-06, "loss": 0.0043, "step": 38130 }, { "epoch": 1.1348994986089, "grad_norm": 0.35258767008781433, "learning_rate": 7.820454228232235e-06, "loss": 0.0089, "step": 38140 }, { "epoch": 1.1351970600925416, "grad_norm": 0.12751422822475433, "learning_rate": 7.819024608349552e-06, "loss": 0.0032, "step": 38150 }, { "epoch": 1.1354946215761832, "grad_norm": 0.19822458922863007, "learning_rate": 7.817594650527065e-06, "loss": 0.0059, "step": 38160 }, { "epoch": 1.1357921830598248, "grad_norm": 0.2601734399795532, "learning_rate": 7.816164354936193e-06, "loss": 0.0048, "step": 38170 }, { "epoch": 1.1360897445434663, "grad_norm": 0.18279270827770233, "learning_rate": 7.8147337217484e-06, "loss": 0.0048, "step": 38180 }, { "epoch": 1.136387306027108, "grad_norm": 0.31113994121551514, "learning_rate": 7.813302751135188e-06, "loss": 0.0095, "step": 38190 }, { "epoch": 1.1366848675107495, "grad_norm": 0.2781110405921936, "learning_rate": 7.811871443268097e-06, "loss": 0.0042, "step": 38200 }, { "epoch": 1.136982428994391, "grad_norm": 0.42570847272872925, "learning_rate": 7.810439798318713e-06, "loss": 0.0063, "step": 38210 }, { "epoch": 1.1372799904780324, "grad_norm": 0.5922281742095947, "learning_rate": 7.809007816458653e-06, "loss": 0.0041, "step": 38220 }, { "epoch": 1.137577551961674, "grad_norm": 0.24014440178871155, "learning_rate": 7.807575497859588e-06, "loss": 0.0056, "step": 38230 }, { "epoch": 1.1378751134453156, "grad_norm": 0.3150610625743866, "learning_rate": 7.806142842693216e-06, "loss": 0.0044, "step": 38240 }, { "epoch": 1.1381726749289571, "grad_norm": 0.2517816126346588, "learning_rate": 7.804709851131285e-06, "loss": 0.0047, "step": 38250 }, { "epoch": 1.1384702364125987, "grad_norm": 0.21039940416812897, "learning_rate": 7.803276523345578e-06, "loss": 0.0081, "step": 38260 }, { "epoch": 1.1387677978962403, "grad_norm": 0.26107057929039, "learning_rate": 7.801842859507918e-06, "loss": 0.0058, "step": 38270 }, { "epoch": 1.1390653593798818, "grad_norm": 0.1961282342672348, "learning_rate": 7.800408859790174e-06, "loss": 0.0039, "step": 38280 }, { "epoch": 1.1393629208635234, "grad_norm": 0.301940381526947, "learning_rate": 7.798974524364248e-06, "loss": 0.0044, "step": 38290 }, { "epoch": 1.139660482347165, "grad_norm": 0.20891156792640686, "learning_rate": 7.797539853402089e-06, "loss": 0.0054, "step": 38300 }, { "epoch": 1.1399580438308066, "grad_norm": 0.22943522036075592, "learning_rate": 7.796104847075682e-06, "loss": 0.0042, "step": 38310 }, { "epoch": 1.1402556053144481, "grad_norm": 0.4334520399570465, "learning_rate": 7.794669505557051e-06, "loss": 0.0065, "step": 38320 }, { "epoch": 1.1405531667980897, "grad_norm": 0.15041577816009521, "learning_rate": 7.793233829018263e-06, "loss": 0.0046, "step": 38330 }, { "epoch": 1.1408507282817313, "grad_norm": 0.10903327912092209, "learning_rate": 7.791797817631426e-06, "loss": 0.0048, "step": 38340 }, { "epoch": 1.1411482897653729, "grad_norm": 0.1836884617805481, "learning_rate": 7.790361471568684e-06, "loss": 0.0039, "step": 38350 }, { "epoch": 1.1414458512490144, "grad_norm": 0.28249120712280273, "learning_rate": 7.788924791002226e-06, "loss": 0.0045, "step": 38360 }, { "epoch": 1.141743412732656, "grad_norm": 0.37553033232688904, "learning_rate": 7.787487776104277e-06, "loss": 0.0043, "step": 38370 }, { "epoch": 1.1420409742162974, "grad_norm": 0.3589397072792053, "learning_rate": 7.786050427047105e-06, "loss": 0.0052, "step": 38380 }, { "epoch": 1.142338535699939, "grad_norm": 0.16067181527614594, "learning_rate": 7.784612744003017e-06, "loss": 0.0039, "step": 38390 }, { "epoch": 1.1426360971835805, "grad_norm": 0.1890009641647339, "learning_rate": 7.783174727144359e-06, "loss": 0.0054, "step": 38400 }, { "epoch": 1.142933658667222, "grad_norm": 0.08955230563879013, "learning_rate": 7.781736376643519e-06, "loss": 0.0048, "step": 38410 }, { "epoch": 1.1432312201508636, "grad_norm": 0.3940265476703644, "learning_rate": 7.780297692672925e-06, "loss": 0.0067, "step": 38420 }, { "epoch": 1.1435287816345052, "grad_norm": 0.0962168350815773, "learning_rate": 7.77885867540504e-06, "loss": 0.0044, "step": 38430 }, { "epoch": 1.1438263431181468, "grad_norm": 0.0667802169919014, "learning_rate": 7.777419325012376e-06, "loss": 0.0038, "step": 38440 }, { "epoch": 1.1441239046017884, "grad_norm": 0.13895787298679352, "learning_rate": 7.775979641667475e-06, "loss": 0.0037, "step": 38450 }, { "epoch": 1.14442146608543, "grad_norm": 0.12647370994091034, "learning_rate": 7.774539625542929e-06, "loss": 0.0031, "step": 38460 }, { "epoch": 1.1447190275690715, "grad_norm": 0.1524469405412674, "learning_rate": 7.77309927681136e-06, "loss": 0.0044, "step": 38470 }, { "epoch": 1.145016589052713, "grad_norm": 0.150900736451149, "learning_rate": 7.771658595645434e-06, "loss": 0.0025, "step": 38480 }, { "epoch": 1.1453141505363547, "grad_norm": 0.37213897705078125, "learning_rate": 7.770217582217863e-06, "loss": 0.0045, "step": 38490 }, { "epoch": 1.145611712019996, "grad_norm": 0.38992226123809814, "learning_rate": 7.76877623670139e-06, "loss": 0.0059, "step": 38500 }, { "epoch": 1.1459092735036376, "grad_norm": 0.3491656184196472, "learning_rate": 7.767334559268798e-06, "loss": 0.0038, "step": 38510 }, { "epoch": 1.1462068349872792, "grad_norm": 0.05611683428287506, "learning_rate": 7.765892550092918e-06, "loss": 0.003, "step": 38520 }, { "epoch": 1.1465043964709207, "grad_norm": 0.3123020827770233, "learning_rate": 7.764450209346612e-06, "loss": 0.0036, "step": 38530 }, { "epoch": 1.1468019579545623, "grad_norm": 0.13676969707012177, "learning_rate": 7.763007537202785e-06, "loss": 0.0043, "step": 38540 }, { "epoch": 1.1470995194382039, "grad_norm": 0.19314216077327728, "learning_rate": 7.761564533834385e-06, "loss": 0.0038, "step": 38550 }, { "epoch": 1.1473970809218454, "grad_norm": 0.08769934624433517, "learning_rate": 7.760121199414396e-06, "loss": 0.0054, "step": 38560 }, { "epoch": 1.147694642405487, "grad_norm": 0.18455035984516144, "learning_rate": 7.758677534115842e-06, "loss": 0.0049, "step": 38570 }, { "epoch": 1.1479922038891286, "grad_norm": 0.31115835905075073, "learning_rate": 7.757233538111786e-06, "loss": 0.0057, "step": 38580 }, { "epoch": 1.1482897653727702, "grad_norm": 0.12293124943971634, "learning_rate": 7.755789211575333e-06, "loss": 0.0056, "step": 38590 }, { "epoch": 1.1485873268564117, "grad_norm": 0.20392511785030365, "learning_rate": 7.754344554679625e-06, "loss": 0.0065, "step": 38600 }, { "epoch": 1.1488848883400533, "grad_norm": 0.06454680114984512, "learning_rate": 7.752899567597847e-06, "loss": 0.0078, "step": 38610 }, { "epoch": 1.1491824498236949, "grad_norm": 0.11001012474298477, "learning_rate": 7.75145425050322e-06, "loss": 0.0101, "step": 38620 }, { "epoch": 1.1494800113073365, "grad_norm": 0.10441993176937103, "learning_rate": 7.750008603569007e-06, "loss": 0.0046, "step": 38630 }, { "epoch": 1.149777572790978, "grad_norm": 0.16692005097866058, "learning_rate": 7.74856262696851e-06, "loss": 0.0093, "step": 38640 }, { "epoch": 1.1500751342746196, "grad_norm": 0.263753205537796, "learning_rate": 7.747116320875067e-06, "loss": 0.0058, "step": 38650 }, { "epoch": 1.150372695758261, "grad_norm": 0.2971910536289215, "learning_rate": 7.745669685462063e-06, "loss": 0.0053, "step": 38660 }, { "epoch": 1.1506702572419025, "grad_norm": 0.43720194697380066, "learning_rate": 7.744222720902916e-06, "loss": 0.0071, "step": 38670 }, { "epoch": 1.150967818725544, "grad_norm": 0.25712594389915466, "learning_rate": 7.742775427371085e-06, "loss": 0.0053, "step": 38680 }, { "epoch": 1.1512653802091857, "grad_norm": 0.18942569196224213, "learning_rate": 7.741327805040071e-06, "loss": 0.005, "step": 38690 }, { "epoch": 1.1515629416928272, "grad_norm": 0.17280858755111694, "learning_rate": 7.739879854083412e-06, "loss": 0.0024, "step": 38700 }, { "epoch": 1.1518605031764688, "grad_norm": 0.2542707920074463, "learning_rate": 7.738431574674684e-06, "loss": 0.0029, "step": 38710 }, { "epoch": 1.1521580646601104, "grad_norm": 0.22877372801303864, "learning_rate": 7.736982966987505e-06, "loss": 0.0038, "step": 38720 }, { "epoch": 1.152455626143752, "grad_norm": 0.35841378569602966, "learning_rate": 7.735534031195534e-06, "loss": 0.004, "step": 38730 }, { "epoch": 1.1527531876273935, "grad_norm": 0.1277097910642624, "learning_rate": 7.734084767472464e-06, "loss": 0.0037, "step": 38740 }, { "epoch": 1.1530507491110351, "grad_norm": 0.1710493415594101, "learning_rate": 7.732635175992032e-06, "loss": 0.0052, "step": 38750 }, { "epoch": 1.1533483105946767, "grad_norm": 0.20496119558811188, "learning_rate": 7.73118525692801e-06, "loss": 0.0081, "step": 38760 }, { "epoch": 1.1536458720783183, "grad_norm": 0.15798161923885345, "learning_rate": 7.729735010454215e-06, "loss": 0.0035, "step": 38770 }, { "epoch": 1.1539434335619598, "grad_norm": 0.11460746079683304, "learning_rate": 7.728284436744499e-06, "loss": 0.0049, "step": 38780 }, { "epoch": 1.1542409950456012, "grad_norm": 0.21108797192573547, "learning_rate": 7.726833535972753e-06, "loss": 0.005, "step": 38790 }, { "epoch": 1.1545385565292428, "grad_norm": 0.4048301875591278, "learning_rate": 7.72538230831291e-06, "loss": 0.0064, "step": 38800 }, { "epoch": 1.1548361180128843, "grad_norm": 0.1285344958305359, "learning_rate": 7.72393075393894e-06, "loss": 0.0068, "step": 38810 }, { "epoch": 1.155133679496526, "grad_norm": 0.4056020677089691, "learning_rate": 7.722478873024852e-06, "loss": 0.0047, "step": 38820 }, { "epoch": 1.1554312409801675, "grad_norm": 0.08836977928876877, "learning_rate": 7.721026665744695e-06, "loss": 0.0034, "step": 38830 }, { "epoch": 1.155728802463809, "grad_norm": 0.48355409502983093, "learning_rate": 7.71957413227256e-06, "loss": 0.0086, "step": 38840 }, { "epoch": 1.1560263639474506, "grad_norm": 0.4438818097114563, "learning_rate": 7.718121272782572e-06, "loss": 0.0091, "step": 38850 }, { "epoch": 1.1563239254310922, "grad_norm": 0.1843576431274414, "learning_rate": 7.716668087448897e-06, "loss": 0.0062, "step": 38860 }, { "epoch": 1.1566214869147338, "grad_norm": 0.34994229674339294, "learning_rate": 7.715214576445742e-06, "loss": 0.0062, "step": 38870 }, { "epoch": 1.1569190483983753, "grad_norm": 0.11947517842054367, "learning_rate": 7.713760739947348e-06, "loss": 0.0031, "step": 38880 }, { "epoch": 1.157216609882017, "grad_norm": 0.14863114058971405, "learning_rate": 7.712306578128002e-06, "loss": 0.0032, "step": 38890 }, { "epoch": 1.1575141713656585, "grad_norm": 0.17913152277469635, "learning_rate": 7.710852091162024e-06, "loss": 0.0038, "step": 38900 }, { "epoch": 1.1578117328493, "grad_norm": 0.16784453392028809, "learning_rate": 7.709397279223777e-06, "loss": 0.004, "step": 38910 }, { "epoch": 1.1581092943329416, "grad_norm": 0.1515560746192932, "learning_rate": 7.707942142487662e-06, "loss": 0.0045, "step": 38920 }, { "epoch": 1.1584068558165832, "grad_norm": 0.18049252033233643, "learning_rate": 7.706486681128116e-06, "loss": 0.0033, "step": 38930 }, { "epoch": 1.1587044173002248, "grad_norm": 0.32026684284210205, "learning_rate": 7.705030895319616e-06, "loss": 0.0051, "step": 38940 }, { "epoch": 1.1590019787838661, "grad_norm": 0.2644953727722168, "learning_rate": 7.703574785236684e-06, "loss": 0.005, "step": 38950 }, { "epoch": 1.1592995402675077, "grad_norm": 0.2179393470287323, "learning_rate": 7.702118351053872e-06, "loss": 0.0063, "step": 38960 }, { "epoch": 1.1595971017511493, "grad_norm": 0.24644441902637482, "learning_rate": 7.700661592945776e-06, "loss": 0.0053, "step": 38970 }, { "epoch": 1.1598946632347908, "grad_norm": 0.13316033780574799, "learning_rate": 7.69920451108703e-06, "loss": 0.0039, "step": 38980 }, { "epoch": 1.1601922247184324, "grad_norm": 0.13369321823120117, "learning_rate": 7.697747105652306e-06, "loss": 0.0034, "step": 38990 }, { "epoch": 1.160489786202074, "grad_norm": 0.1828380525112152, "learning_rate": 7.696289376816318e-06, "loss": 0.0038, "step": 39000 }, { "epoch": 1.1607873476857156, "grad_norm": 0.40652576088905334, "learning_rate": 7.69483132475381e-06, "loss": 0.0047, "step": 39010 }, { "epoch": 1.1610849091693571, "grad_norm": 0.07334025949239731, "learning_rate": 7.693372949639578e-06, "loss": 0.0048, "step": 39020 }, { "epoch": 1.1613824706529987, "grad_norm": 0.08045769482851028, "learning_rate": 7.691914251648442e-06, "loss": 0.0052, "step": 39030 }, { "epoch": 1.1616800321366403, "grad_norm": 0.12634271383285522, "learning_rate": 7.690455230955273e-06, "loss": 0.0049, "step": 39040 }, { "epoch": 1.1619775936202819, "grad_norm": 0.37489795684814453, "learning_rate": 7.688995887734976e-06, "loss": 0.0047, "step": 39050 }, { "epoch": 1.1622751551039234, "grad_norm": 0.25918301939964294, "learning_rate": 7.687536222162492e-06, "loss": 0.0047, "step": 39060 }, { "epoch": 1.162572716587565, "grad_norm": 0.4367339313030243, "learning_rate": 7.686076234412804e-06, "loss": 0.0058, "step": 39070 }, { "epoch": 1.1628702780712064, "grad_norm": 0.1658891886472702, "learning_rate": 7.684615924660934e-06, "loss": 0.003, "step": 39080 }, { "epoch": 1.163167839554848, "grad_norm": 0.08773937821388245, "learning_rate": 7.683155293081943e-06, "loss": 0.0062, "step": 39090 }, { "epoch": 1.1634654010384895, "grad_norm": 0.10344374924898148, "learning_rate": 7.681694339850923e-06, "loss": 0.006, "step": 39100 }, { "epoch": 1.163762962522131, "grad_norm": 0.046332452446222305, "learning_rate": 7.680233065143017e-06, "loss": 0.0043, "step": 39110 }, { "epoch": 1.1640605240057726, "grad_norm": 0.08468601852655411, "learning_rate": 7.678771469133396e-06, "loss": 0.0039, "step": 39120 }, { "epoch": 1.1643580854894142, "grad_norm": 0.27732667326927185, "learning_rate": 7.677309551997277e-06, "loss": 0.0067, "step": 39130 }, { "epoch": 1.1646556469730558, "grad_norm": 0.0894612967967987, "learning_rate": 7.675847313909908e-06, "loss": 0.0038, "step": 39140 }, { "epoch": 1.1649532084566974, "grad_norm": 0.3282499611377716, "learning_rate": 7.674384755046582e-06, "loss": 0.0065, "step": 39150 }, { "epoch": 1.165250769940339, "grad_norm": 0.09701038151979446, "learning_rate": 7.672921875582628e-06, "loss": 0.0086, "step": 39160 }, { "epoch": 1.1655483314239805, "grad_norm": 0.31390464305877686, "learning_rate": 7.671458675693413e-06, "loss": 0.0048, "step": 39170 }, { "epoch": 1.165845892907622, "grad_norm": 0.9043893814086914, "learning_rate": 7.669995155554342e-06, "loss": 0.0069, "step": 39180 }, { "epoch": 1.1661434543912637, "grad_norm": 0.1090615838766098, "learning_rate": 7.668531315340864e-06, "loss": 0.0054, "step": 39190 }, { "epoch": 1.1664410158749052, "grad_norm": 0.07861022651195526, "learning_rate": 7.667067155228453e-06, "loss": 0.006, "step": 39200 }, { "epoch": 1.1667385773585468, "grad_norm": 0.21887730062007904, "learning_rate": 7.665602675392637e-06, "loss": 0.0075, "step": 39210 }, { "epoch": 1.1670361388421884, "grad_norm": 0.27279752492904663, "learning_rate": 7.664137876008975e-06, "loss": 0.0047, "step": 39220 }, { "epoch": 1.16733370032583, "grad_norm": 0.136804461479187, "learning_rate": 7.662672757253061e-06, "loss": 0.0047, "step": 39230 }, { "epoch": 1.1676312618094713, "grad_norm": 0.1589263677597046, "learning_rate": 7.661207319300534e-06, "loss": 0.0051, "step": 39240 }, { "epoch": 1.1679288232931129, "grad_norm": 0.04237343370914459, "learning_rate": 7.659741562327065e-06, "loss": 0.0047, "step": 39250 }, { "epoch": 1.1682263847767544, "grad_norm": 0.31654879450798035, "learning_rate": 7.658275486508372e-06, "loss": 0.0045, "step": 39260 }, { "epoch": 1.168523946260396, "grad_norm": 0.41194653511047363, "learning_rate": 7.656809092020198e-06, "loss": 0.0057, "step": 39270 }, { "epoch": 1.1688215077440376, "grad_norm": 0.2834669053554535, "learning_rate": 7.655342379038339e-06, "loss": 0.0032, "step": 39280 }, { "epoch": 1.1691190692276792, "grad_norm": 0.2630418837070465, "learning_rate": 7.653875347738617e-06, "loss": 0.0036, "step": 39290 }, { "epoch": 1.1694166307113207, "grad_norm": 0.07256636768579483, "learning_rate": 7.6524079982969e-06, "loss": 0.0047, "step": 39300 }, { "epoch": 1.1697141921949623, "grad_norm": 0.1841672658920288, "learning_rate": 7.650940330889088e-06, "loss": 0.0056, "step": 39310 }, { "epoch": 1.1700117536786039, "grad_norm": 0.17615348100662231, "learning_rate": 7.649472345691127e-06, "loss": 0.004, "step": 39320 }, { "epoch": 1.1703093151622455, "grad_norm": 0.33828607201576233, "learning_rate": 7.648004042878994e-06, "loss": 0.0054, "step": 39330 }, { "epoch": 1.170606876645887, "grad_norm": 0.17584407329559326, "learning_rate": 7.646535422628706e-06, "loss": 0.0049, "step": 39340 }, { "epoch": 1.1709044381295286, "grad_norm": 0.20110255479812622, "learning_rate": 7.64506648511632e-06, "loss": 0.0055, "step": 39350 }, { "epoch": 1.17120199961317, "grad_norm": 0.1706783026456833, "learning_rate": 7.643597230517932e-06, "loss": 0.0045, "step": 39360 }, { "epoch": 1.1714995610968115, "grad_norm": 0.11532605439424515, "learning_rate": 7.642127659009668e-06, "loss": 0.0046, "step": 39370 }, { "epoch": 1.171797122580453, "grad_norm": 0.41321343183517456, "learning_rate": 7.640657770767702e-06, "loss": 0.0041, "step": 39380 }, { "epoch": 1.1720946840640947, "grad_norm": 0.10722044110298157, "learning_rate": 7.639187565968241e-06, "loss": 0.0044, "step": 39390 }, { "epoch": 1.1723922455477362, "grad_norm": 0.22178412973880768, "learning_rate": 7.637717044787527e-06, "loss": 0.0054, "step": 39400 }, { "epoch": 1.1726898070313778, "grad_norm": 0.22146865725517273, "learning_rate": 7.636246207401849e-06, "loss": 0.0075, "step": 39410 }, { "epoch": 1.1729873685150194, "grad_norm": 0.1674329936504364, "learning_rate": 7.634775053987524e-06, "loss": 0.0047, "step": 39420 }, { "epoch": 1.173284929998661, "grad_norm": 0.07830386608839035, "learning_rate": 7.633303584720916e-06, "loss": 0.0058, "step": 39430 }, { "epoch": 1.1735824914823025, "grad_norm": 0.05705832317471504, "learning_rate": 7.631831799778418e-06, "loss": 0.0032, "step": 39440 }, { "epoch": 1.173880052965944, "grad_norm": 0.047576695680618286, "learning_rate": 7.630359699336466e-06, "loss": 0.0037, "step": 39450 }, { "epoch": 1.1741776144495857, "grad_norm": 0.1678965985774994, "learning_rate": 7.628887283571535e-06, "loss": 0.0063, "step": 39460 }, { "epoch": 1.1744751759332273, "grad_norm": 0.4061940014362335, "learning_rate": 7.627414552660133e-06, "loss": 0.0045, "step": 39470 }, { "epoch": 1.1747727374168688, "grad_norm": 0.31552764773368835, "learning_rate": 7.625941506778809e-06, "loss": 0.0065, "step": 39480 }, { "epoch": 1.1750702989005104, "grad_norm": 0.12706446647644043, "learning_rate": 7.624468146104151e-06, "loss": 0.0027, "step": 39490 }, { "epoch": 1.175367860384152, "grad_norm": 0.22949156165122986, "learning_rate": 7.622994470812781e-06, "loss": 0.0035, "step": 39500 }, { "epoch": 1.1756654218677935, "grad_norm": 0.05417832359671593, "learning_rate": 7.621520481081362e-06, "loss": 0.0033, "step": 39510 }, { "epoch": 1.175962983351435, "grad_norm": 0.3763035237789154, "learning_rate": 7.6200461770865905e-06, "loss": 0.0058, "step": 39520 }, { "epoch": 1.1762605448350765, "grad_norm": 0.41170716285705566, "learning_rate": 7.618571559005208e-06, "loss": 0.0048, "step": 39530 }, { "epoch": 1.176558106318718, "grad_norm": 0.39473244547843933, "learning_rate": 7.617096627013985e-06, "loss": 0.0051, "step": 39540 }, { "epoch": 1.1768556678023596, "grad_norm": 0.1482718288898468, "learning_rate": 7.615621381289737e-06, "loss": 0.0025, "step": 39550 }, { "epoch": 1.1771532292860012, "grad_norm": 0.15607264637947083, "learning_rate": 7.6141458220093115e-06, "loss": 0.0043, "step": 39560 }, { "epoch": 1.1774507907696428, "grad_norm": 0.08975973725318909, "learning_rate": 7.612669949349598e-06, "loss": 0.0035, "step": 39570 }, { "epoch": 1.1777483522532843, "grad_norm": 0.18841107189655304, "learning_rate": 7.6111937634875196e-06, "loss": 0.0072, "step": 39580 }, { "epoch": 1.178045913736926, "grad_norm": 0.12627394497394562, "learning_rate": 7.609717264600041e-06, "loss": 0.0053, "step": 39590 }, { "epoch": 1.1783434752205675, "grad_norm": 0.11255042999982834, "learning_rate": 7.608240452864161e-06, "loss": 0.0055, "step": 39600 }, { "epoch": 1.178641036704209, "grad_norm": 0.1834598183631897, "learning_rate": 7.606763328456916e-06, "loss": 0.008, "step": 39610 }, { "epoch": 1.1789385981878506, "grad_norm": 0.41292059421539307, "learning_rate": 7.605285891555385e-06, "loss": 0.0056, "step": 39620 }, { "epoch": 1.1792361596714922, "grad_norm": 0.276438444852829, "learning_rate": 7.603808142336675e-06, "loss": 0.0032, "step": 39630 }, { "epoch": 1.1795337211551338, "grad_norm": 0.30623218417167664, "learning_rate": 7.602330080977942e-06, "loss": 0.0073, "step": 39640 }, { "epoch": 1.1798312826387751, "grad_norm": 0.3322868049144745, "learning_rate": 7.600851707656369e-06, "loss": 0.0036, "step": 39650 }, { "epoch": 1.1801288441224167, "grad_norm": 0.026358788833022118, "learning_rate": 7.599373022549183e-06, "loss": 0.0048, "step": 39660 }, { "epoch": 1.1804264056060583, "grad_norm": 0.2409738004207611, "learning_rate": 7.597894025833648e-06, "loss": 0.0055, "step": 39670 }, { "epoch": 1.1807239670896998, "grad_norm": 0.08338677138090134, "learning_rate": 7.596414717687057e-06, "loss": 0.008, "step": 39680 }, { "epoch": 1.1810215285733414, "grad_norm": 0.1400279849767685, "learning_rate": 7.594935098286755e-06, "loss": 0.0031, "step": 39690 }, { "epoch": 1.181319090056983, "grad_norm": 0.18119579553604126, "learning_rate": 7.593455167810111e-06, "loss": 0.0063, "step": 39700 }, { "epoch": 1.1816166515406246, "grad_norm": 0.2218523770570755, "learning_rate": 7.5919749264345374e-06, "loss": 0.0042, "step": 39710 }, { "epoch": 1.1819142130242661, "grad_norm": 0.11210118234157562, "learning_rate": 7.590494374337484e-06, "loss": 0.0048, "step": 39720 }, { "epoch": 1.1822117745079077, "grad_norm": 0.25764742493629456, "learning_rate": 7.589013511696437e-06, "loss": 0.0053, "step": 39730 }, { "epoch": 1.1825093359915493, "grad_norm": 0.34702762961387634, "learning_rate": 7.587532338688917e-06, "loss": 0.004, "step": 39740 }, { "epoch": 1.1828068974751909, "grad_norm": 0.06840833276510239, "learning_rate": 7.586050855492488e-06, "loss": 0.0026, "step": 39750 }, { "epoch": 1.1831044589588324, "grad_norm": 0.13773977756500244, "learning_rate": 7.584569062284745e-06, "loss": 0.0095, "step": 39760 }, { "epoch": 1.183402020442474, "grad_norm": 0.10273871570825577, "learning_rate": 7.583086959243326e-06, "loss": 0.0037, "step": 39770 }, { "epoch": 1.1836995819261156, "grad_norm": 0.1274704486131668, "learning_rate": 7.5816045465459e-06, "loss": 0.0076, "step": 39780 }, { "epoch": 1.1839971434097571, "grad_norm": 0.14791467785835266, "learning_rate": 7.580121824370177e-06, "loss": 0.0051, "step": 39790 }, { "epoch": 1.1842947048933987, "grad_norm": 0.11206796765327454, "learning_rate": 7.578638792893902e-06, "loss": 0.0042, "step": 39800 }, { "epoch": 1.18459226637704, "grad_norm": 0.44724300503730774, "learning_rate": 7.57715545229486e-06, "loss": 0.0055, "step": 39810 }, { "epoch": 1.1848898278606816, "grad_norm": 0.14534977078437805, "learning_rate": 7.57567180275087e-06, "loss": 0.0046, "step": 39820 }, { "epoch": 1.1851873893443232, "grad_norm": 0.297842413187027, "learning_rate": 7.574187844439792e-06, "loss": 0.0061, "step": 39830 }, { "epoch": 1.1854849508279648, "grad_norm": 0.16653230786323547, "learning_rate": 7.572703577539518e-06, "loss": 0.0062, "step": 39840 }, { "epoch": 1.1857825123116064, "grad_norm": 0.2958117723464966, "learning_rate": 7.571219002227979e-06, "loss": 0.0045, "step": 39850 }, { "epoch": 1.186080073795248, "grad_norm": 0.1837678700685501, "learning_rate": 7.569734118683144e-06, "loss": 0.0058, "step": 39860 }, { "epoch": 1.1863776352788895, "grad_norm": 0.3296162784099579, "learning_rate": 7.5682489270830185e-06, "loss": 0.007, "step": 39870 }, { "epoch": 1.186675196762531, "grad_norm": 0.08071581274271011, "learning_rate": 7.566763427605646e-06, "loss": 0.0042, "step": 39880 }, { "epoch": 1.1869727582461727, "grad_norm": 0.11354678124189377, "learning_rate": 7.565277620429101e-06, "loss": 0.0051, "step": 39890 }, { "epoch": 1.1872703197298142, "grad_norm": 0.23892951011657715, "learning_rate": 7.563791505731506e-06, "loss": 0.0037, "step": 39900 }, { "epoch": 1.1875678812134558, "grad_norm": 0.5132049918174744, "learning_rate": 7.562305083691008e-06, "loss": 0.0032, "step": 39910 }, { "epoch": 1.1878654426970974, "grad_norm": 1.0007297992706299, "learning_rate": 7.5608183544858005e-06, "loss": 0.0082, "step": 39920 }, { "epoch": 1.1881630041807387, "grad_norm": 0.47809070348739624, "learning_rate": 7.559331318294108e-06, "loss": 0.0071, "step": 39930 }, { "epoch": 1.1884605656643803, "grad_norm": 0.12177085131406784, "learning_rate": 7.557843975294195e-06, "loss": 0.0047, "step": 39940 }, { "epoch": 1.1887581271480219, "grad_norm": 0.2103278934955597, "learning_rate": 7.55635632566436e-06, "loss": 0.0055, "step": 39950 }, { "epoch": 1.1890556886316634, "grad_norm": 0.4362269937992096, "learning_rate": 7.5548683695829435e-06, "loss": 0.0042, "step": 39960 }, { "epoch": 1.189353250115305, "grad_norm": 0.3429122567176819, "learning_rate": 7.553380107228316e-06, "loss": 0.0053, "step": 39970 }, { "epoch": 1.1896508115989466, "grad_norm": 0.05592240393161774, "learning_rate": 7.551891538778888e-06, "loss": 0.0038, "step": 39980 }, { "epoch": 1.1899483730825882, "grad_norm": 0.30959761142730713, "learning_rate": 7.550402664413108e-06, "loss": 0.0036, "step": 39990 }, { "epoch": 1.1902459345662297, "grad_norm": 0.11768820881843567, "learning_rate": 7.548913484309457e-06, "loss": 0.0039, "step": 40000 }, { "epoch": 1.1905434960498713, "grad_norm": 0.18318066000938416, "learning_rate": 7.547423998646458e-06, "loss": 0.004, "step": 40010 }, { "epoch": 1.1908410575335129, "grad_norm": 0.11529634892940521, "learning_rate": 7.5459342076026675e-06, "loss": 0.0036, "step": 40020 }, { "epoch": 1.1911386190171545, "grad_norm": 0.4085333049297333, "learning_rate": 7.544444111356679e-06, "loss": 0.0059, "step": 40030 }, { "epoch": 1.191436180500796, "grad_norm": 0.1418701410293579, "learning_rate": 7.542953710087121e-06, "loss": 0.0048, "step": 40040 }, { "epoch": 1.1917337419844376, "grad_norm": 0.22703386843204498, "learning_rate": 7.541463003972662e-06, "loss": 0.0041, "step": 40050 }, { "epoch": 1.1920313034680792, "grad_norm": 0.3652152419090271, "learning_rate": 7.539971993192006e-06, "loss": 0.0062, "step": 40060 }, { "epoch": 1.1923288649517207, "grad_norm": 0.1447669267654419, "learning_rate": 7.538480677923889e-06, "loss": 0.0051, "step": 40070 }, { "epoch": 1.1926264264353623, "grad_norm": 0.0891769751906395, "learning_rate": 7.53698905834709e-06, "loss": 0.0042, "step": 40080 }, { "epoch": 1.1929239879190037, "grad_norm": 0.16192609071731567, "learning_rate": 7.535497134640424e-06, "loss": 0.0105, "step": 40090 }, { "epoch": 1.1932215494026452, "grad_norm": 0.0787472203373909, "learning_rate": 7.534004906982737e-06, "loss": 0.0044, "step": 40100 }, { "epoch": 1.1935191108862868, "grad_norm": 0.05919736623764038, "learning_rate": 7.532512375552916e-06, "loss": 0.0061, "step": 40110 }, { "epoch": 1.1938166723699284, "grad_norm": 0.046917036175727844, "learning_rate": 7.5310195405298815e-06, "loss": 0.0036, "step": 40120 }, { "epoch": 1.19411423385357, "grad_norm": 0.33993542194366455, "learning_rate": 7.5295264020925954e-06, "loss": 0.0075, "step": 40130 }, { "epoch": 1.1944117953372115, "grad_norm": 0.20456883311271667, "learning_rate": 7.528032960420046e-06, "loss": 0.0057, "step": 40140 }, { "epoch": 1.194709356820853, "grad_norm": 0.0961185097694397, "learning_rate": 7.526539215691273e-06, "loss": 0.0062, "step": 40150 }, { "epoch": 1.1950069183044947, "grad_norm": 0.18132489919662476, "learning_rate": 7.525045168085339e-06, "loss": 0.0062, "step": 40160 }, { "epoch": 1.1953044797881363, "grad_norm": 0.12387867271900177, "learning_rate": 7.523550817781347e-06, "loss": 0.0036, "step": 40170 }, { "epoch": 1.1956020412717778, "grad_norm": 0.07616773247718811, "learning_rate": 7.52205616495844e-06, "loss": 0.0048, "step": 40180 }, { "epoch": 1.1958996027554194, "grad_norm": 0.24596333503723145, "learning_rate": 7.520561209795793e-06, "loss": 0.0031, "step": 40190 }, { "epoch": 1.196197164239061, "grad_norm": 0.43978071212768555, "learning_rate": 7.519065952472618e-06, "loss": 0.0071, "step": 40200 }, { "epoch": 1.1964947257227025, "grad_norm": 0.14563417434692383, "learning_rate": 7.517570393168165e-06, "loss": 0.0047, "step": 40210 }, { "epoch": 1.196792287206344, "grad_norm": 0.28460782766342163, "learning_rate": 7.51607453206172e-06, "loss": 0.0053, "step": 40220 }, { "epoch": 1.1970898486899855, "grad_norm": 0.12221617251634598, "learning_rate": 7.514578369332602e-06, "loss": 0.0048, "step": 40230 }, { "epoch": 1.197387410173627, "grad_norm": 0.23972348868846893, "learning_rate": 7.51308190516017e-06, "loss": 0.0058, "step": 40240 }, { "epoch": 1.1976849716572686, "grad_norm": 0.3207867443561554, "learning_rate": 7.511585139723816e-06, "loss": 0.0048, "step": 40250 }, { "epoch": 1.1979825331409102, "grad_norm": 0.0846567377448082, "learning_rate": 7.510088073202972e-06, "loss": 0.0036, "step": 40260 }, { "epoch": 1.1982800946245518, "grad_norm": 0.3137308955192566, "learning_rate": 7.5085907057771e-06, "loss": 0.0053, "step": 40270 }, { "epoch": 1.1985776561081933, "grad_norm": 0.1246844232082367, "learning_rate": 7.5070930376257055e-06, "loss": 0.0048, "step": 40280 }, { "epoch": 1.198875217591835, "grad_norm": 0.27824652194976807, "learning_rate": 7.505595068928325e-06, "loss": 0.0045, "step": 40290 }, { "epoch": 1.1991727790754765, "grad_norm": 0.22683706879615784, "learning_rate": 7.504096799864533e-06, "loss": 0.0033, "step": 40300 }, { "epoch": 1.199470340559118, "grad_norm": 0.40669146180152893, "learning_rate": 7.502598230613938e-06, "loss": 0.0056, "step": 40310 }, { "epoch": 1.1997679020427596, "grad_norm": 0.3226563632488251, "learning_rate": 7.501099361356188e-06, "loss": 0.0038, "step": 40320 }, { "epoch": 1.2000654635264012, "grad_norm": 0.06980795413255692, "learning_rate": 7.499600192270962e-06, "loss": 0.0049, "step": 40330 }, { "epoch": 1.2003630250100428, "grad_norm": 0.22629974782466888, "learning_rate": 7.498100723537981e-06, "loss": 0.0054, "step": 40340 }, { "epoch": 1.2006605864936843, "grad_norm": 0.3675746023654938, "learning_rate": 7.496600955336996e-06, "loss": 0.008, "step": 40350 }, { "epoch": 1.200958147977326, "grad_norm": 0.21958982944488525, "learning_rate": 7.495100887847797e-06, "loss": 0.0046, "step": 40360 }, { "epoch": 1.2012557094609675, "grad_norm": 0.3082886040210724, "learning_rate": 7.493600521250212e-06, "loss": 0.0079, "step": 40370 }, { "epoch": 1.2015532709446088, "grad_norm": 0.28944459557533264, "learning_rate": 7.492099855724099e-06, "loss": 0.0046, "step": 40380 }, { "epoch": 1.2018508324282504, "grad_norm": 0.21484962105751038, "learning_rate": 7.490598891449359e-06, "loss": 0.0049, "step": 40390 }, { "epoch": 1.202148393911892, "grad_norm": 0.16274872422218323, "learning_rate": 7.48909762860592e-06, "loss": 0.0045, "step": 40400 }, { "epoch": 1.2024459553955336, "grad_norm": 0.17758524417877197, "learning_rate": 7.487596067373756e-06, "loss": 0.0076, "step": 40410 }, { "epoch": 1.2027435168791751, "grad_norm": 0.23830415308475494, "learning_rate": 7.486094207932868e-06, "loss": 0.0069, "step": 40420 }, { "epoch": 1.2030410783628167, "grad_norm": 0.2875100076198578, "learning_rate": 7.4845920504632974e-06, "loss": 0.0082, "step": 40430 }, { "epoch": 1.2033386398464583, "grad_norm": 0.04215987026691437, "learning_rate": 7.483089595145121e-06, "loss": 0.0042, "step": 40440 }, { "epoch": 1.2036362013300999, "grad_norm": 0.31964728236198425, "learning_rate": 7.481586842158449e-06, "loss": 0.0058, "step": 40450 }, { "epoch": 1.2039337628137414, "grad_norm": 0.3378388285636902, "learning_rate": 7.48008379168343e-06, "loss": 0.0052, "step": 40460 }, { "epoch": 1.204231324297383, "grad_norm": 0.31314989924430847, "learning_rate": 7.478580443900247e-06, "loss": 0.0035, "step": 40470 }, { "epoch": 1.2045288857810246, "grad_norm": 0.19904080033302307, "learning_rate": 7.477076798989119e-06, "loss": 0.0035, "step": 40480 }, { "epoch": 1.2048264472646661, "grad_norm": 0.10766661912202835, "learning_rate": 7.4755728571303e-06, "loss": 0.0037, "step": 40490 }, { "epoch": 1.2051240087483075, "grad_norm": 0.18422292172908783, "learning_rate": 7.474068618504079e-06, "loss": 0.0059, "step": 40500 }, { "epoch": 1.205421570231949, "grad_norm": 0.2238704413175583, "learning_rate": 7.472564083290783e-06, "loss": 0.0051, "step": 40510 }, { "epoch": 1.2057191317155906, "grad_norm": 0.07701525837182999, "learning_rate": 7.471059251670772e-06, "loss": 0.0067, "step": 40520 }, { "epoch": 1.2060166931992322, "grad_norm": 0.11307139694690704, "learning_rate": 7.469554123824443e-06, "loss": 0.0046, "step": 40530 }, { "epoch": 1.2063142546828738, "grad_norm": 0.24446982145309448, "learning_rate": 7.4680486999322284e-06, "loss": 0.01, "step": 40540 }, { "epoch": 1.2066118161665154, "grad_norm": 0.40913137793540955, "learning_rate": 7.4665429801745956e-06, "loss": 0.006, "step": 40550 }, { "epoch": 1.206909377650157, "grad_norm": 0.2653348743915558, "learning_rate": 7.465036964732049e-06, "loss": 0.0054, "step": 40560 }, { "epoch": 1.2072069391337985, "grad_norm": 0.2709723114967346, "learning_rate": 7.463530653785125e-06, "loss": 0.0029, "step": 40570 }, { "epoch": 1.20750450061744, "grad_norm": 0.25761425495147705, "learning_rate": 7.4620240475143976e-06, "loss": 0.0096, "step": 40580 }, { "epoch": 1.2078020621010817, "grad_norm": 0.15061978995800018, "learning_rate": 7.460517146100478e-06, "loss": 0.0031, "step": 40590 }, { "epoch": 1.2080996235847232, "grad_norm": 0.3289538621902466, "learning_rate": 7.4590099497240095e-06, "loss": 0.0053, "step": 40600 }, { "epoch": 1.2083971850683648, "grad_norm": 0.15506567060947418, "learning_rate": 7.457502458565673e-06, "loss": 0.0069, "step": 40610 }, { "epoch": 1.2086947465520064, "grad_norm": 0.5546056032180786, "learning_rate": 7.4559946728061835e-06, "loss": 0.0047, "step": 40620 }, { "epoch": 1.208992308035648, "grad_norm": 1.268202304840088, "learning_rate": 7.454486592626291e-06, "loss": 0.0076, "step": 40630 }, { "epoch": 1.2092898695192895, "grad_norm": 0.2305755615234375, "learning_rate": 7.452978218206784e-06, "loss": 0.0057, "step": 40640 }, { "epoch": 1.209587431002931, "grad_norm": 0.22885559499263763, "learning_rate": 7.451469549728481e-06, "loss": 0.0047, "step": 40650 }, { "epoch": 1.2098849924865724, "grad_norm": 0.26083964109420776, "learning_rate": 7.449960587372239e-06, "loss": 0.0068, "step": 40660 }, { "epoch": 1.210182553970214, "grad_norm": 0.22636821866035461, "learning_rate": 7.4484513313189525e-06, "loss": 0.0051, "step": 40670 }, { "epoch": 1.2104801154538556, "grad_norm": 0.11216633021831512, "learning_rate": 7.446941781749543e-06, "loss": 0.0048, "step": 40680 }, { "epoch": 1.2107776769374972, "grad_norm": 0.08310729265213013, "learning_rate": 7.44543193884498e-06, "loss": 0.0039, "step": 40690 }, { "epoch": 1.2110752384211387, "grad_norm": 0.14348100125789642, "learning_rate": 7.443921802786256e-06, "loss": 0.0052, "step": 40700 }, { "epoch": 1.2113727999047803, "grad_norm": 0.6049520373344421, "learning_rate": 7.442411373754405e-06, "loss": 0.0082, "step": 40710 }, { "epoch": 1.2116703613884219, "grad_norm": 0.3196507692337036, "learning_rate": 7.440900651930494e-06, "loss": 0.0048, "step": 40720 }, { "epoch": 1.2119679228720635, "grad_norm": 0.1163681223988533, "learning_rate": 7.439389637495625e-06, "loss": 0.005, "step": 40730 }, { "epoch": 1.212265484355705, "grad_norm": 0.21122142672538757, "learning_rate": 7.437878330630937e-06, "loss": 0.0037, "step": 40740 }, { "epoch": 1.2125630458393466, "grad_norm": 0.13849639892578125, "learning_rate": 7.436366731517605e-06, "loss": 0.0042, "step": 40750 }, { "epoch": 1.2128606073229882, "grad_norm": 0.4474945068359375, "learning_rate": 7.434854840336831e-06, "loss": 0.007, "step": 40760 }, { "epoch": 1.2131581688066297, "grad_norm": 0.08791899681091309, "learning_rate": 7.433342657269864e-06, "loss": 0.0055, "step": 40770 }, { "epoch": 1.2134557302902713, "grad_norm": 0.07304602861404419, "learning_rate": 7.431830182497978e-06, "loss": 0.0037, "step": 40780 }, { "epoch": 1.2137532917739127, "grad_norm": 0.1506364643573761, "learning_rate": 7.430317416202489e-06, "loss": 0.0059, "step": 40790 }, { "epoch": 1.2140508532575542, "grad_norm": 0.12443239986896515, "learning_rate": 7.42880435856474e-06, "loss": 0.0045, "step": 40800 }, { "epoch": 1.2143484147411958, "grad_norm": 0.22064447402954102, "learning_rate": 7.427291009766119e-06, "loss": 0.0059, "step": 40810 }, { "epoch": 1.2146459762248374, "grad_norm": 0.08512920141220093, "learning_rate": 7.425777369988041e-06, "loss": 0.0069, "step": 40820 }, { "epoch": 1.214943537708479, "grad_norm": 0.22777247428894043, "learning_rate": 7.424263439411958e-06, "loss": 0.0022, "step": 40830 }, { "epoch": 1.2152410991921205, "grad_norm": 0.1640978455543518, "learning_rate": 7.422749218219361e-06, "loss": 0.0034, "step": 40840 }, { "epoch": 1.215538660675762, "grad_norm": 0.12456946820020676, "learning_rate": 7.4212347065917665e-06, "loss": 0.0037, "step": 40850 }, { "epoch": 1.2158362221594037, "grad_norm": 0.13784539699554443, "learning_rate": 7.419719904710736e-06, "loss": 0.0042, "step": 40860 }, { "epoch": 1.2161337836430453, "grad_norm": 0.3285706043243408, "learning_rate": 7.418204812757859e-06, "loss": 0.004, "step": 40870 }, { "epoch": 1.2164313451266868, "grad_norm": 0.18311786651611328, "learning_rate": 7.416689430914764e-06, "loss": 0.0038, "step": 40880 }, { "epoch": 1.2167289066103284, "grad_norm": 0.6029544472694397, "learning_rate": 7.41517375936311e-06, "loss": 0.0053, "step": 40890 }, { "epoch": 1.21702646809397, "grad_norm": 0.09066896140575409, "learning_rate": 7.413657798284597e-06, "loss": 0.0042, "step": 40900 }, { "epoch": 1.2173240295776115, "grad_norm": 0.23066182434558868, "learning_rate": 7.41214154786095e-06, "loss": 0.0058, "step": 40910 }, { "epoch": 1.2176215910612531, "grad_norm": 0.08713747560977936, "learning_rate": 7.41062500827394e-06, "loss": 0.0059, "step": 40920 }, { "epoch": 1.2179191525448947, "grad_norm": 0.369974821805954, "learning_rate": 7.409108179705363e-06, "loss": 0.0047, "step": 40930 }, { "epoch": 1.2182167140285363, "grad_norm": 0.21550381183624268, "learning_rate": 7.407591062337058e-06, "loss": 0.0037, "step": 40940 }, { "epoch": 1.2185142755121776, "grad_norm": 0.028485296294093132, "learning_rate": 7.4060736563508905e-06, "loss": 0.0066, "step": 40950 }, { "epoch": 1.2188118369958192, "grad_norm": 0.20891417562961578, "learning_rate": 7.404555961928767e-06, "loss": 0.0062, "step": 40960 }, { "epoch": 1.2191093984794608, "grad_norm": 0.33460843563079834, "learning_rate": 7.403037979252624e-06, "loss": 0.0047, "step": 40970 }, { "epoch": 1.2194069599631023, "grad_norm": 0.24367941915988922, "learning_rate": 7.401519708504438e-06, "loss": 0.0052, "step": 40980 }, { "epoch": 1.219704521446744, "grad_norm": 0.42017146944999695, "learning_rate": 7.400001149866214e-06, "loss": 0.0052, "step": 40990 }, { "epoch": 1.2200020829303855, "grad_norm": 0.08943326771259308, "learning_rate": 7.398482303519993e-06, "loss": 0.0045, "step": 41000 }, { "epoch": 1.220299644414027, "grad_norm": 0.2032879889011383, "learning_rate": 7.396963169647854e-06, "loss": 0.0068, "step": 41010 }, { "epoch": 1.2205972058976686, "grad_norm": 0.17275771498680115, "learning_rate": 7.3954437484319074e-06, "loss": 0.0033, "step": 41020 }, { "epoch": 1.2208947673813102, "grad_norm": 0.2879769504070282, "learning_rate": 7.3939240400543e-06, "loss": 0.006, "step": 41030 }, { "epoch": 1.2211923288649518, "grad_norm": 0.2796042859554291, "learning_rate": 7.39240404469721e-06, "loss": 0.0051, "step": 41040 }, { "epoch": 1.2214898903485933, "grad_norm": 0.11834987998008728, "learning_rate": 7.390883762542852e-06, "loss": 0.007, "step": 41050 }, { "epoch": 1.221787451832235, "grad_norm": 0.1251363903284073, "learning_rate": 7.389363193773475e-06, "loss": 0.0049, "step": 41060 }, { "epoch": 1.2220850133158765, "grad_norm": 0.29538843035697937, "learning_rate": 7.387842338571363e-06, "loss": 0.0063, "step": 41070 }, { "epoch": 1.2223825747995178, "grad_norm": 0.008890393190085888, "learning_rate": 7.386321197118833e-06, "loss": 0.0034, "step": 41080 }, { "epoch": 1.2226801362831594, "grad_norm": 0.46231040358543396, "learning_rate": 7.3847997695982385e-06, "loss": 0.0073, "step": 41090 }, { "epoch": 1.222977697766801, "grad_norm": 0.1430261880159378, "learning_rate": 7.383278056191962e-06, "loss": 0.0027, "step": 41100 }, { "epoch": 1.2232752592504426, "grad_norm": 0.25001460313796997, "learning_rate": 7.381756057082427e-06, "loss": 0.0045, "step": 41110 }, { "epoch": 1.2235728207340841, "grad_norm": 0.05955687537789345, "learning_rate": 7.380233772452085e-06, "loss": 0.0047, "step": 41120 }, { "epoch": 1.2238703822177257, "grad_norm": 0.048272039741277695, "learning_rate": 7.37871120248343e-06, "loss": 0.0054, "step": 41130 }, { "epoch": 1.2241679437013673, "grad_norm": 0.09256336838006973, "learning_rate": 7.377188347358979e-06, "loss": 0.005, "step": 41140 }, { "epoch": 1.2244655051850089, "grad_norm": 0.3235575258731842, "learning_rate": 7.375665207261294e-06, "loss": 0.0031, "step": 41150 }, { "epoch": 1.2247630666686504, "grad_norm": 0.14371071755886078, "learning_rate": 7.3741417823729626e-06, "loss": 0.0038, "step": 41160 }, { "epoch": 1.225060628152292, "grad_norm": 0.15069977939128876, "learning_rate": 7.372618072876614e-06, "loss": 0.0038, "step": 41170 }, { "epoch": 1.2253581896359336, "grad_norm": 0.08267609030008316, "learning_rate": 7.3710940789549055e-06, "loss": 0.0047, "step": 41180 }, { "epoch": 1.2256557511195751, "grad_norm": 0.049222737550735474, "learning_rate": 7.36956980079053e-06, "loss": 0.0041, "step": 41190 }, { "epoch": 1.2259533126032167, "grad_norm": 0.1891029179096222, "learning_rate": 7.368045238566219e-06, "loss": 0.005, "step": 41200 }, { "epoch": 1.2262508740868583, "grad_norm": 0.1884700059890747, "learning_rate": 7.36652039246473e-06, "loss": 0.0032, "step": 41210 }, { "epoch": 1.2265484355704999, "grad_norm": 0.3554322421550751, "learning_rate": 7.364995262668863e-06, "loss": 0.0064, "step": 41220 }, { "epoch": 1.2268459970541414, "grad_norm": 0.15107682347297668, "learning_rate": 7.363469849361444e-06, "loss": 0.0046, "step": 41230 }, { "epoch": 1.2271435585377828, "grad_norm": 0.17003101110458374, "learning_rate": 7.361944152725341e-06, "loss": 0.0063, "step": 41240 }, { "epoch": 1.2274411200214244, "grad_norm": 0.06668277829885483, "learning_rate": 7.360418172943449e-06, "loss": 0.0068, "step": 41250 }, { "epoch": 1.227738681505066, "grad_norm": 0.17049269378185272, "learning_rate": 7.358891910198701e-06, "loss": 0.0041, "step": 41260 }, { "epoch": 1.2280362429887075, "grad_norm": 0.22064736485481262, "learning_rate": 7.357365364674061e-06, "loss": 0.0049, "step": 41270 }, { "epoch": 1.228333804472349, "grad_norm": 0.25606635212898254, "learning_rate": 7.355838536552533e-06, "loss": 0.0037, "step": 41280 }, { "epoch": 1.2286313659559907, "grad_norm": 0.3899454176425934, "learning_rate": 7.3543114260171446e-06, "loss": 0.0044, "step": 41290 }, { "epoch": 1.2289289274396322, "grad_norm": 0.2914743721485138, "learning_rate": 7.352784033250968e-06, "loss": 0.006, "step": 41300 }, { "epoch": 1.2292264889232738, "grad_norm": 0.19779019057750702, "learning_rate": 7.351256358437101e-06, "loss": 0.0051, "step": 41310 }, { "epoch": 1.2295240504069154, "grad_norm": 0.13488224148750305, "learning_rate": 7.349728401758682e-06, "loss": 0.0066, "step": 41320 }, { "epoch": 1.229821611890557, "grad_norm": 0.25355854630470276, "learning_rate": 7.3482001633988755e-06, "loss": 0.006, "step": 41330 }, { "epoch": 1.2301191733741985, "grad_norm": 0.3020761013031006, "learning_rate": 7.346671643540889e-06, "loss": 0.0051, "step": 41340 }, { "epoch": 1.23041673485784, "grad_norm": 0.25094568729400635, "learning_rate": 7.345142842367955e-06, "loss": 0.006, "step": 41350 }, { "epoch": 1.2307142963414814, "grad_norm": 0.28199148178100586, "learning_rate": 7.343613760063344e-06, "loss": 0.0049, "step": 41360 }, { "epoch": 1.231011857825123, "grad_norm": 0.3106856346130371, "learning_rate": 7.3420843968103625e-06, "loss": 0.0059, "step": 41370 }, { "epoch": 1.2313094193087646, "grad_norm": 0.09841188788414001, "learning_rate": 7.340554752792345e-06, "loss": 0.0044, "step": 41380 }, { "epoch": 1.2316069807924062, "grad_norm": 0.3052965998649597, "learning_rate": 7.339024828192665e-06, "loss": 0.0042, "step": 41390 }, { "epoch": 1.2319045422760477, "grad_norm": 0.16525867581367493, "learning_rate": 7.3374946231947245e-06, "loss": 0.0037, "step": 41400 }, { "epoch": 1.2322021037596893, "grad_norm": 0.1911003142595291, "learning_rate": 7.335964137981963e-06, "loss": 0.0046, "step": 41410 }, { "epoch": 1.2324996652433309, "grad_norm": 0.1299329400062561, "learning_rate": 7.334433372737854e-06, "loss": 0.0056, "step": 41420 }, { "epoch": 1.2327972267269725, "grad_norm": 0.4225741922855377, "learning_rate": 7.332902327645901e-06, "loss": 0.0051, "step": 41430 }, { "epoch": 1.233094788210614, "grad_norm": 0.15057532489299774, "learning_rate": 7.331371002889643e-06, "loss": 0.0049, "step": 41440 }, { "epoch": 1.2333923496942556, "grad_norm": 0.4168549180030823, "learning_rate": 7.329839398652655e-06, "loss": 0.0081, "step": 41450 }, { "epoch": 1.2336899111778972, "grad_norm": 0.30807602405548096, "learning_rate": 7.32830751511854e-06, "loss": 0.0051, "step": 41460 }, { "epoch": 1.2339874726615387, "grad_norm": 0.12276328355073929, "learning_rate": 7.3267753524709396e-06, "loss": 0.0026, "step": 41470 }, { "epoch": 1.2342850341451803, "grad_norm": 0.4196929335594177, "learning_rate": 7.3252429108935255e-06, "loss": 0.0079, "step": 41480 }, { "epoch": 1.2345825956288219, "grad_norm": 0.36328786611557007, "learning_rate": 7.323710190570004e-06, "loss": 0.0041, "step": 41490 }, { "epoch": 1.2348801571124635, "grad_norm": 0.12480828166007996, "learning_rate": 7.322177191684117e-06, "loss": 0.0054, "step": 41500 }, { "epoch": 1.235177718596105, "grad_norm": 0.1668044924736023, "learning_rate": 7.320643914419636e-06, "loss": 0.007, "step": 41510 }, { "epoch": 1.2354752800797464, "grad_norm": 0.18145012855529785, "learning_rate": 7.319110358960368e-06, "loss": 0.0043, "step": 41520 }, { "epoch": 1.235772841563388, "grad_norm": 0.14208802580833435, "learning_rate": 7.317576525490153e-06, "loss": 0.0049, "step": 41530 }, { "epoch": 1.2360704030470295, "grad_norm": 0.22433380782604218, "learning_rate": 7.316042414192866e-06, "loss": 0.0036, "step": 41540 }, { "epoch": 1.236367964530671, "grad_norm": 0.2676447331905365, "learning_rate": 7.314508025252409e-06, "loss": 0.0053, "step": 41550 }, { "epoch": 1.2366655260143127, "grad_norm": 0.12006411701440811, "learning_rate": 7.312973358852728e-06, "loss": 0.0033, "step": 41560 }, { "epoch": 1.2369630874979543, "grad_norm": 0.09026730060577393, "learning_rate": 7.311438415177791e-06, "loss": 0.0048, "step": 41570 }, { "epoch": 1.2372606489815958, "grad_norm": 0.1462932676076889, "learning_rate": 7.309903194411608e-06, "loss": 0.0076, "step": 41580 }, { "epoch": 1.2375582104652374, "grad_norm": 0.2649594247341156, "learning_rate": 7.3083676967382145e-06, "loss": 0.0083, "step": 41590 }, { "epoch": 1.237855771948879, "grad_norm": 0.12113962322473526, "learning_rate": 7.3068319223416885e-06, "loss": 0.0052, "step": 41600 }, { "epoch": 1.2381533334325205, "grad_norm": 0.16241857409477234, "learning_rate": 7.305295871406132e-06, "loss": 0.0048, "step": 41610 }, { "epoch": 1.2384508949161621, "grad_norm": 0.1603901982307434, "learning_rate": 7.303759544115686e-06, "loss": 0.005, "step": 41620 }, { "epoch": 1.2387484563998037, "grad_norm": 0.08982294797897339, "learning_rate": 7.302222940654522e-06, "loss": 0.0031, "step": 41630 }, { "epoch": 1.2390460178834453, "grad_norm": 0.18223434686660767, "learning_rate": 7.300686061206845e-06, "loss": 0.0048, "step": 41640 }, { "epoch": 1.2393435793670866, "grad_norm": 0.17349855601787567, "learning_rate": 7.299148905956896e-06, "loss": 0.0043, "step": 41650 }, { "epoch": 1.2396411408507282, "grad_norm": 0.179341122508049, "learning_rate": 7.297611475088942e-06, "loss": 0.0033, "step": 41660 }, { "epoch": 1.2399387023343698, "grad_norm": 0.15744562447071075, "learning_rate": 7.296073768787293e-06, "loss": 0.0045, "step": 41670 }, { "epoch": 1.2402362638180113, "grad_norm": 0.1860685646533966, "learning_rate": 7.294535787236282e-06, "loss": 0.0034, "step": 41680 }, { "epoch": 1.240533825301653, "grad_norm": 0.1986382156610489, "learning_rate": 7.292997530620283e-06, "loss": 0.0061, "step": 41690 }, { "epoch": 1.2408313867852945, "grad_norm": 0.11487630754709244, "learning_rate": 7.291458999123695e-06, "loss": 0.0056, "step": 41700 }, { "epoch": 1.241128948268936, "grad_norm": 0.2306968867778778, "learning_rate": 7.28992019293096e-06, "loss": 0.0042, "step": 41710 }, { "epoch": 1.2414265097525776, "grad_norm": 0.479615181684494, "learning_rate": 7.288381112226544e-06, "loss": 0.0074, "step": 41720 }, { "epoch": 1.2417240712362192, "grad_norm": 0.09606308490037918, "learning_rate": 7.28684175719495e-06, "loss": 0.0048, "step": 41730 }, { "epoch": 1.2420216327198608, "grad_norm": 0.14566878974437714, "learning_rate": 7.285302128020713e-06, "loss": 0.003, "step": 41740 }, { "epoch": 1.2423191942035023, "grad_norm": 0.31412166357040405, "learning_rate": 7.283762224888402e-06, "loss": 0.005, "step": 41750 }, { "epoch": 1.242616755687144, "grad_norm": 0.11711433529853821, "learning_rate": 7.282222047982617e-06, "loss": 0.0049, "step": 41760 }, { "epoch": 1.2429143171707855, "grad_norm": 0.06605412065982819, "learning_rate": 7.280681597487992e-06, "loss": 0.007, "step": 41770 }, { "epoch": 1.243211878654427, "grad_norm": 0.2684715688228607, "learning_rate": 7.279140873589194e-06, "loss": 0.0051, "step": 41780 }, { "epoch": 1.2435094401380686, "grad_norm": 0.317203551530838, "learning_rate": 7.277599876470922e-06, "loss": 0.0054, "step": 41790 }, { "epoch": 1.2438070016217102, "grad_norm": 0.1544216126203537, "learning_rate": 7.276058606317908e-06, "loss": 0.0044, "step": 41800 }, { "epoch": 1.2441045631053516, "grad_norm": 0.3078829348087311, "learning_rate": 7.274517063314918e-06, "loss": 0.0036, "step": 41810 }, { "epoch": 1.2444021245889931, "grad_norm": 0.2595576345920563, "learning_rate": 7.272975247646747e-06, "loss": 0.0052, "step": 41820 }, { "epoch": 1.2446996860726347, "grad_norm": 0.15998753905296326, "learning_rate": 7.2714331594982265e-06, "loss": 0.0047, "step": 41830 }, { "epoch": 1.2449972475562763, "grad_norm": 0.03975589945912361, "learning_rate": 7.269890799054221e-06, "loss": 0.0032, "step": 41840 }, { "epoch": 1.2452948090399178, "grad_norm": 0.07110457867383957, "learning_rate": 7.268348166499622e-06, "loss": 0.0038, "step": 41850 }, { "epoch": 1.2455923705235594, "grad_norm": 0.10612030327320099, "learning_rate": 7.266805262019363e-06, "loss": 0.0065, "step": 41860 }, { "epoch": 1.245889932007201, "grad_norm": 0.17125116288661957, "learning_rate": 7.265262085798399e-06, "loss": 0.0033, "step": 41870 }, { "epoch": 1.2461874934908426, "grad_norm": 0.10528320074081421, "learning_rate": 7.263718638021729e-06, "loss": 0.0043, "step": 41880 }, { "epoch": 1.2464850549744841, "grad_norm": 0.17181378602981567, "learning_rate": 7.262174918874374e-06, "loss": 0.002, "step": 41890 }, { "epoch": 1.2467826164581257, "grad_norm": 0.20320649445056915, "learning_rate": 7.260630928541395e-06, "loss": 0.0044, "step": 41900 }, { "epoch": 1.2470801779417673, "grad_norm": 0.22272686660289764, "learning_rate": 7.259086667207882e-06, "loss": 0.0052, "step": 41910 }, { "epoch": 1.2473777394254089, "grad_norm": 0.19635337591171265, "learning_rate": 7.257542135058958e-06, "loss": 0.0048, "step": 41920 }, { "epoch": 1.2476753009090502, "grad_norm": 0.08629731088876724, "learning_rate": 7.25599733227978e-06, "loss": 0.0047, "step": 41930 }, { "epoch": 1.2479728623926918, "grad_norm": 0.3522944450378418, "learning_rate": 7.254452259055535e-06, "loss": 0.0043, "step": 41940 }, { "epoch": 1.2482704238763334, "grad_norm": 0.11904551833868027, "learning_rate": 7.252906915571447e-06, "loss": 0.0047, "step": 41950 }, { "epoch": 1.248567985359975, "grad_norm": 0.20641595125198364, "learning_rate": 7.251361302012765e-06, "loss": 0.0042, "step": 41960 }, { "epoch": 1.2488655468436165, "grad_norm": 0.27306902408599854, "learning_rate": 7.249815418564776e-06, "loss": 0.0036, "step": 41970 }, { "epoch": 1.249163108327258, "grad_norm": 0.19366928935050964, "learning_rate": 7.2482692654127985e-06, "loss": 0.0055, "step": 41980 }, { "epoch": 1.2494606698108996, "grad_norm": 0.08743863552808762, "learning_rate": 7.246722842742182e-06, "loss": 0.0037, "step": 41990 }, { "epoch": 1.2497582312945412, "grad_norm": 0.1993524581193924, "learning_rate": 7.245176150738309e-06, "loss": 0.0033, "step": 42000 }, { "epoch": 1.2500557927781828, "grad_norm": 0.32202884554862976, "learning_rate": 7.243629189586594e-06, "loss": 0.0049, "step": 42010 }, { "epoch": 1.2503533542618244, "grad_norm": 0.31348803639411926, "learning_rate": 7.242081959472484e-06, "loss": 0.0058, "step": 42020 }, { "epoch": 1.250650915745466, "grad_norm": 0.22226813435554504, "learning_rate": 7.24053446058146e-06, "loss": 0.0047, "step": 42030 }, { "epoch": 1.2509484772291075, "grad_norm": 0.272251158952713, "learning_rate": 7.23898669309903e-06, "loss": 0.003, "step": 42040 }, { "epoch": 1.251246038712749, "grad_norm": 0.18643733859062195, "learning_rate": 7.237438657210742e-06, "loss": 0.0058, "step": 42050 }, { "epoch": 1.2515436001963907, "grad_norm": 0.17588354647159576, "learning_rate": 7.235890353102169e-06, "loss": 0.0041, "step": 42060 }, { "epoch": 1.2518411616800322, "grad_norm": 0.1567162424325943, "learning_rate": 7.234341780958919e-06, "loss": 0.0032, "step": 42070 }, { "epoch": 1.2521387231636738, "grad_norm": 0.1783338040113449, "learning_rate": 7.232792940966633e-06, "loss": 0.0034, "step": 42080 }, { "epoch": 1.2524362846473154, "grad_norm": 0.25727927684783936, "learning_rate": 7.231243833310984e-06, "loss": 0.0051, "step": 42090 }, { "epoch": 1.2527338461309567, "grad_norm": 0.0675860345363617, "learning_rate": 7.229694458177675e-06, "loss": 0.004, "step": 42100 }, { "epoch": 1.2530314076145983, "grad_norm": 0.12086424231529236, "learning_rate": 7.228144815752445e-06, "loss": 0.0035, "step": 42110 }, { "epoch": 1.2533289690982399, "grad_norm": 0.17616894841194153, "learning_rate": 7.226594906221057e-06, "loss": 0.0049, "step": 42120 }, { "epoch": 1.2536265305818814, "grad_norm": 0.16170090436935425, "learning_rate": 7.225044729769317e-06, "loss": 0.0288, "step": 42130 }, { "epoch": 1.253924092065523, "grad_norm": 0.18808045983314514, "learning_rate": 7.223494286583056e-06, "loss": 0.0047, "step": 42140 }, { "epoch": 1.2542216535491646, "grad_norm": 0.06263689696788788, "learning_rate": 7.221943576848136e-06, "loss": 0.0059, "step": 42150 }, { "epoch": 1.2545192150328062, "grad_norm": 0.26250773668289185, "learning_rate": 7.2203926007504566e-06, "loss": 0.0069, "step": 42160 }, { "epoch": 1.2548167765164477, "grad_norm": 0.34336256980895996, "learning_rate": 7.218841358475943e-06, "loss": 0.0064, "step": 42170 }, { "epoch": 1.2551143380000893, "grad_norm": 0.13508307933807373, "learning_rate": 7.217289850210559e-06, "loss": 0.0065, "step": 42180 }, { "epoch": 1.2554118994837309, "grad_norm": 0.2261788249015808, "learning_rate": 7.215738076140294e-06, "loss": 0.0052, "step": 42190 }, { "epoch": 1.2557094609673725, "grad_norm": 0.17885218560695648, "learning_rate": 7.214186036451172e-06, "loss": 0.004, "step": 42200 }, { "epoch": 1.2560070224510138, "grad_norm": 0.2593097984790802, "learning_rate": 7.212633731329251e-06, "loss": 0.0049, "step": 42210 }, { "epoch": 1.2563045839346554, "grad_norm": 0.2657735049724579, "learning_rate": 7.211081160960618e-06, "loss": 0.0047, "step": 42220 }, { "epoch": 1.256602145418297, "grad_norm": 0.14357924461364746, "learning_rate": 7.20952832553139e-06, "loss": 0.0068, "step": 42230 }, { "epoch": 1.2568997069019385, "grad_norm": 0.12606781721115112, "learning_rate": 7.207975225227721e-06, "loss": 0.0063, "step": 42240 }, { "epoch": 1.25719726838558, "grad_norm": 0.048442039638757706, "learning_rate": 7.206421860235794e-06, "loss": 0.0071, "step": 42250 }, { "epoch": 1.2574948298692217, "grad_norm": 0.19813258945941925, "learning_rate": 7.204868230741822e-06, "loss": 0.0035, "step": 42260 }, { "epoch": 1.2577923913528632, "grad_norm": 0.12249619513750076, "learning_rate": 7.203314336932053e-06, "loss": 0.0029, "step": 42270 }, { "epoch": 1.2580899528365048, "grad_norm": 0.22235140204429626, "learning_rate": 7.201760178992763e-06, "loss": 0.0053, "step": 42280 }, { "epoch": 1.2583875143201464, "grad_norm": 0.3532123565673828, "learning_rate": 7.200205757110263e-06, "loss": 0.0051, "step": 42290 }, { "epoch": 1.258685075803788, "grad_norm": 0.21224644780158997, "learning_rate": 7.198651071470896e-06, "loss": 0.0038, "step": 42300 }, { "epoch": 1.2589826372874295, "grad_norm": 0.2197572886943817, "learning_rate": 7.197096122261033e-06, "loss": 0.0043, "step": 42310 }, { "epoch": 1.2592801987710711, "grad_norm": 0.4106564521789551, "learning_rate": 7.195540909667079e-06, "loss": 0.0062, "step": 42320 }, { "epoch": 1.2595777602547127, "grad_norm": 0.3168265223503113, "learning_rate": 7.193985433875471e-06, "loss": 0.0032, "step": 42330 }, { "epoch": 1.2598753217383543, "grad_norm": 0.26110416650772095, "learning_rate": 7.1924296950726735e-06, "loss": 0.0033, "step": 42340 }, { "epoch": 1.2601728832219958, "grad_norm": 0.1886802613735199, "learning_rate": 7.1908736934451914e-06, "loss": 0.0031, "step": 42350 }, { "epoch": 1.2604704447056374, "grad_norm": 0.18137632310390472, "learning_rate": 7.189317429179552e-06, "loss": 0.0038, "step": 42360 }, { "epoch": 1.260768006189279, "grad_norm": 0.20039954781532288, "learning_rate": 7.187760902462317e-06, "loss": 0.0036, "step": 42370 }, { "epoch": 1.2610655676729206, "grad_norm": 0.09444184601306915, "learning_rate": 7.186204113480082e-06, "loss": 0.0047, "step": 42380 }, { "epoch": 1.261363129156562, "grad_norm": 0.1425677090883255, "learning_rate": 7.184647062419472e-06, "loss": 0.0045, "step": 42390 }, { "epoch": 1.2616606906402035, "grad_norm": 0.1277499943971634, "learning_rate": 7.183089749467141e-06, "loss": 0.0042, "step": 42400 }, { "epoch": 1.261958252123845, "grad_norm": 0.24251525104045868, "learning_rate": 7.181532174809781e-06, "loss": 0.005, "step": 42410 }, { "epoch": 1.2622558136074866, "grad_norm": 0.3688044250011444, "learning_rate": 7.179974338634108e-06, "loss": 0.0072, "step": 42420 }, { "epoch": 1.2625533750911282, "grad_norm": 0.14831320941448212, "learning_rate": 7.178416241126875e-06, "loss": 0.0054, "step": 42430 }, { "epoch": 1.2628509365747698, "grad_norm": 0.2425801008939743, "learning_rate": 7.176857882474862e-06, "loss": 0.0072, "step": 42440 }, { "epoch": 1.2631484980584113, "grad_norm": 0.2673693597316742, "learning_rate": 7.175299262864886e-06, "loss": 0.0062, "step": 42450 }, { "epoch": 1.263446059542053, "grad_norm": 0.2829046845436096, "learning_rate": 7.173740382483786e-06, "loss": 0.005, "step": 42460 }, { "epoch": 1.2637436210256945, "grad_norm": 0.11164994537830353, "learning_rate": 7.172181241518442e-06, "loss": 0.0039, "step": 42470 }, { "epoch": 1.264041182509336, "grad_norm": 0.3936760127544403, "learning_rate": 7.17062184015576e-06, "loss": 0.0071, "step": 42480 }, { "epoch": 1.2643387439929776, "grad_norm": 0.1130245253443718, "learning_rate": 7.169062178582678e-06, "loss": 0.0066, "step": 42490 }, { "epoch": 1.264636305476619, "grad_norm": 0.14427965879440308, "learning_rate": 7.1675022569861675e-06, "loss": 0.003, "step": 42500 }, { "epoch": 1.2649338669602606, "grad_norm": 0.17235808074474335, "learning_rate": 7.165942075553226e-06, "loss": 0.004, "step": 42510 }, { "epoch": 1.2652314284439021, "grad_norm": 0.10321405529975891, "learning_rate": 7.164381634470888e-06, "loss": 0.0039, "step": 42520 }, { "epoch": 1.2655289899275437, "grad_norm": 0.1590615212917328, "learning_rate": 7.162820933926214e-06, "loss": 0.0043, "step": 42530 }, { "epoch": 1.2658265514111853, "grad_norm": 0.08646810799837112, "learning_rate": 7.161259974106303e-06, "loss": 0.0051, "step": 42540 }, { "epoch": 1.2661241128948268, "grad_norm": 0.17817452549934387, "learning_rate": 7.1596987551982735e-06, "loss": 0.0038, "step": 42550 }, { "epoch": 1.2664216743784684, "grad_norm": 0.2557559013366699, "learning_rate": 7.158137277389287e-06, "loss": 0.0066, "step": 42560 }, { "epoch": 1.26671923586211, "grad_norm": 0.18711312115192413, "learning_rate": 7.156575540866526e-06, "loss": 0.0038, "step": 42570 }, { "epoch": 1.2670167973457516, "grad_norm": 0.8098559975624084, "learning_rate": 7.1550135458172144e-06, "loss": 0.0039, "step": 42580 }, { "epoch": 1.2673143588293931, "grad_norm": 0.21437378227710724, "learning_rate": 7.153451292428597e-06, "loss": 0.0051, "step": 42590 }, { "epoch": 1.2676119203130347, "grad_norm": 0.173285111784935, "learning_rate": 7.151888780887957e-06, "loss": 0.0033, "step": 42600 }, { "epoch": 1.2679094817966763, "grad_norm": 0.22855183482170105, "learning_rate": 7.1503260113826035e-06, "loss": 0.0032, "step": 42610 }, { "epoch": 1.2682070432803179, "grad_norm": 0.3192727565765381, "learning_rate": 7.148762984099878e-06, "loss": 0.0062, "step": 42620 }, { "epoch": 1.2685046047639594, "grad_norm": 0.18517740070819855, "learning_rate": 7.147199699227156e-06, "loss": 0.0046, "step": 42630 }, { "epoch": 1.268802166247601, "grad_norm": 0.08170625567436218, "learning_rate": 7.145636156951841e-06, "loss": 0.0047, "step": 42640 }, { "epoch": 1.2690997277312426, "grad_norm": 0.15345874428749084, "learning_rate": 7.1440723574613665e-06, "loss": 0.0036, "step": 42650 }, { "epoch": 1.2693972892148842, "grad_norm": 0.6950653195381165, "learning_rate": 7.142508300943198e-06, "loss": 0.0095, "step": 42660 }, { "epoch": 1.2696948506985255, "grad_norm": 0.1305381953716278, "learning_rate": 7.140943987584834e-06, "loss": 0.0042, "step": 42670 }, { "epoch": 1.269992412182167, "grad_norm": 0.27622291445732117, "learning_rate": 7.139379417573799e-06, "loss": 0.0056, "step": 42680 }, { "epoch": 1.2702899736658086, "grad_norm": 0.43281203508377075, "learning_rate": 7.137814591097652e-06, "loss": 0.0076, "step": 42690 }, { "epoch": 1.2705875351494502, "grad_norm": 0.1273132711648941, "learning_rate": 7.136249508343982e-06, "loss": 0.003, "step": 42700 }, { "epoch": 1.2708850966330918, "grad_norm": 0.5441837310791016, "learning_rate": 7.1346841695004086e-06, "loss": 0.0074, "step": 42710 }, { "epoch": 1.2711826581167334, "grad_norm": 0.503292441368103, "learning_rate": 7.13311857475458e-06, "loss": 0.004, "step": 42720 }, { "epoch": 1.271480219600375, "grad_norm": 0.1163734719157219, "learning_rate": 7.131552724294181e-06, "loss": 0.0041, "step": 42730 }, { "epoch": 1.2717777810840165, "grad_norm": 0.09355857223272324, "learning_rate": 7.129986618306919e-06, "loss": 0.0031, "step": 42740 }, { "epoch": 1.272075342567658, "grad_norm": 0.47626519203186035, "learning_rate": 7.128420256980538e-06, "loss": 0.0054, "step": 42750 }, { "epoch": 1.2723729040512997, "grad_norm": 0.10064128041267395, "learning_rate": 7.12685364050281e-06, "loss": 0.0044, "step": 42760 }, { "epoch": 1.2726704655349412, "grad_norm": 0.07508806139230728, "learning_rate": 7.1252867690615394e-06, "loss": 0.0053, "step": 42770 }, { "epoch": 1.2729680270185826, "grad_norm": 0.1394253373146057, "learning_rate": 7.123719642844559e-06, "loss": 0.0096, "step": 42780 }, { "epoch": 1.2732655885022242, "grad_norm": 0.1626645177602768, "learning_rate": 7.122152262039732e-06, "loss": 0.0055, "step": 42790 }, { "epoch": 1.2735631499858657, "grad_norm": 0.07909149676561356, "learning_rate": 7.120584626834958e-06, "loss": 0.0034, "step": 42800 }, { "epoch": 1.2738607114695073, "grad_norm": 0.1827545315027237, "learning_rate": 7.1190167374181564e-06, "loss": 0.0034, "step": 42810 }, { "epoch": 1.2741582729531489, "grad_norm": 0.4129802882671356, "learning_rate": 7.117448593977287e-06, "loss": 0.0042, "step": 42820 }, { "epoch": 1.2744558344367904, "grad_norm": 0.1109338328242302, "learning_rate": 7.115880196700334e-06, "loss": 0.0056, "step": 42830 }, { "epoch": 1.274753395920432, "grad_norm": 0.18288420140743256, "learning_rate": 7.114311545775317e-06, "loss": 0.0059, "step": 42840 }, { "epoch": 1.2750509574040736, "grad_norm": 0.3464890718460083, "learning_rate": 7.11274264139028e-06, "loss": 0.0046, "step": 42850 }, { "epoch": 1.2753485188877152, "grad_norm": 0.12035682797431946, "learning_rate": 7.111173483733302e-06, "loss": 0.0043, "step": 42860 }, { "epoch": 1.2756460803713567, "grad_norm": 0.2314075231552124, "learning_rate": 7.1096040729924906e-06, "loss": 0.0071, "step": 42870 }, { "epoch": 1.2759436418549983, "grad_norm": 0.14316782355308533, "learning_rate": 7.108034409355986e-06, "loss": 0.0038, "step": 42880 }, { "epoch": 1.2762412033386399, "grad_norm": 0.09451515972614288, "learning_rate": 7.106464493011953e-06, "loss": 0.003, "step": 42890 }, { "epoch": 1.2765387648222815, "grad_norm": 0.3487802743911743, "learning_rate": 7.104894324148594e-06, "loss": 0.0059, "step": 42900 }, { "epoch": 1.276836326305923, "grad_norm": 0.3732992708683014, "learning_rate": 7.103323902954134e-06, "loss": 0.0066, "step": 42910 }, { "epoch": 1.2771338877895646, "grad_norm": 0.2451336681842804, "learning_rate": 7.1017532296168365e-06, "loss": 0.0044, "step": 42920 }, { "epoch": 1.2774314492732062, "grad_norm": 0.24786515533924103, "learning_rate": 7.100182304324989e-06, "loss": 0.005, "step": 42930 }, { "epoch": 1.2777290107568477, "grad_norm": 0.1164993867278099, "learning_rate": 7.098611127266913e-06, "loss": 0.0057, "step": 42940 }, { "epoch": 1.2780265722404893, "grad_norm": 0.1014406830072403, "learning_rate": 7.097039698630957e-06, "loss": 0.0046, "step": 42950 }, { "epoch": 1.2783241337241307, "grad_norm": 0.3074647784233093, "learning_rate": 7.095468018605501e-06, "loss": 0.0046, "step": 42960 }, { "epoch": 1.2786216952077722, "grad_norm": 0.2290530949831009, "learning_rate": 7.093896087378957e-06, "loss": 0.0054, "step": 42970 }, { "epoch": 1.2789192566914138, "grad_norm": 0.10800440609455109, "learning_rate": 7.092323905139763e-06, "loss": 0.0036, "step": 42980 }, { "epoch": 1.2792168181750554, "grad_norm": 0.23549361526966095, "learning_rate": 7.0907514720763914e-06, "loss": 0.0063, "step": 42990 }, { "epoch": 1.279514379658697, "grad_norm": 0.10373973101377487, "learning_rate": 7.0891787883773425e-06, "loss": 0.0051, "step": 43000 }, { "epoch": 1.2798119411423385, "grad_norm": 0.08998510241508484, "learning_rate": 7.087605854231145e-06, "loss": 0.0049, "step": 43010 }, { "epoch": 1.2801095026259801, "grad_norm": 0.2780854403972626, "learning_rate": 7.086032669826362e-06, "loss": 0.0051, "step": 43020 }, { "epoch": 1.2804070641096217, "grad_norm": 0.10581074655056, "learning_rate": 7.084459235351584e-06, "loss": 0.0111, "step": 43030 }, { "epoch": 1.2807046255932633, "grad_norm": 0.22833186388015747, "learning_rate": 7.08288555099543e-06, "loss": 0.0031, "step": 43040 }, { "epoch": 1.2810021870769048, "grad_norm": 0.11976990848779678, "learning_rate": 7.08131161694655e-06, "loss": 0.0044, "step": 43050 }, { "epoch": 1.2812997485605464, "grad_norm": 0.15220782160758972, "learning_rate": 7.079737433393626e-06, "loss": 0.0042, "step": 43060 }, { "epoch": 1.2815973100441878, "grad_norm": 0.2739187180995941, "learning_rate": 7.078163000525371e-06, "loss": 0.0041, "step": 43070 }, { "epoch": 1.2818948715278293, "grad_norm": 0.377913236618042, "learning_rate": 7.07658831853052e-06, "loss": 0.0042, "step": 43080 }, { "epoch": 1.282192433011471, "grad_norm": 0.18990208208560944, "learning_rate": 7.0750133875978464e-06, "loss": 0.0064, "step": 43090 }, { "epoch": 1.2824899944951125, "grad_norm": 0.22556614875793457, "learning_rate": 7.07343820791615e-06, "loss": 0.0046, "step": 43100 }, { "epoch": 1.282787555978754, "grad_norm": 0.16628412902355194, "learning_rate": 7.071862779674259e-06, "loss": 0.0034, "step": 43110 }, { "epoch": 1.2830851174623956, "grad_norm": 0.28242969512939453, "learning_rate": 7.070287103061036e-06, "loss": 0.0048, "step": 43120 }, { "epoch": 1.2833826789460372, "grad_norm": 0.14403744041919708, "learning_rate": 7.068711178265366e-06, "loss": 0.0033, "step": 43130 }, { "epoch": 1.2836802404296788, "grad_norm": 0.47324129939079285, "learning_rate": 7.0671350054761734e-06, "loss": 0.0062, "step": 43140 }, { "epoch": 1.2839778019133203, "grad_norm": 0.19959813356399536, "learning_rate": 7.065558584882402e-06, "loss": 0.0035, "step": 43150 }, { "epoch": 1.284275363396962, "grad_norm": 0.05407213792204857, "learning_rate": 7.063981916673035e-06, "loss": 0.0048, "step": 43160 }, { "epoch": 1.2845729248806035, "grad_norm": 0.1597970724105835, "learning_rate": 7.062405001037076e-06, "loss": 0.0035, "step": 43170 }, { "epoch": 1.284870486364245, "grad_norm": 0.29347288608551025, "learning_rate": 7.060827838163566e-06, "loss": 0.0026, "step": 43180 }, { "epoch": 1.2851680478478866, "grad_norm": 0.24747921526432037, "learning_rate": 7.059250428241571e-06, "loss": 0.005, "step": 43190 }, { "epoch": 1.2854656093315282, "grad_norm": 0.171899676322937, "learning_rate": 7.057672771460191e-06, "loss": 0.0073, "step": 43200 }, { "epoch": 1.2857631708151698, "grad_norm": 0.20703467726707458, "learning_rate": 7.05609486800855e-06, "loss": 0.0032, "step": 43210 }, { "epoch": 1.2860607322988113, "grad_norm": 0.09702810645103455, "learning_rate": 7.054516718075806e-06, "loss": 0.0071, "step": 43220 }, { "epoch": 1.286358293782453, "grad_norm": 0.28160205483436584, "learning_rate": 7.052938321851141e-06, "loss": 0.0065, "step": 43230 }, { "epoch": 1.2866558552660945, "grad_norm": 0.0540291890501976, "learning_rate": 7.051359679523777e-06, "loss": 0.0034, "step": 43240 }, { "epoch": 1.2869534167497358, "grad_norm": 0.20446357131004333, "learning_rate": 7.049780791282954e-06, "loss": 0.0049, "step": 43250 }, { "epoch": 1.2872509782333774, "grad_norm": 0.18263770639896393, "learning_rate": 7.048201657317948e-06, "loss": 0.0032, "step": 43260 }, { "epoch": 1.287548539717019, "grad_norm": 0.3186996281147003, "learning_rate": 7.046622277818063e-06, "loss": 0.0046, "step": 43270 }, { "epoch": 1.2878461012006606, "grad_norm": 0.07237003743648529, "learning_rate": 7.0450426529726314e-06, "loss": 0.0048, "step": 43280 }, { "epoch": 1.2881436626843021, "grad_norm": 0.1084967777132988, "learning_rate": 7.043462782971017e-06, "loss": 0.0039, "step": 43290 }, { "epoch": 1.2884412241679437, "grad_norm": 0.09261152148246765, "learning_rate": 7.041882668002609e-06, "loss": 0.0043, "step": 43300 }, { "epoch": 1.2887387856515853, "grad_norm": 0.2845255136489868, "learning_rate": 7.0403023082568344e-06, "loss": 0.005, "step": 43310 }, { "epoch": 1.2890363471352269, "grad_norm": 0.34454482793807983, "learning_rate": 7.038721703923139e-06, "loss": 0.0051, "step": 43320 }, { "epoch": 1.2893339086188684, "grad_norm": 0.174835205078125, "learning_rate": 7.037140855191005e-06, "loss": 0.0034, "step": 43330 }, { "epoch": 1.28963147010251, "grad_norm": 0.2599528729915619, "learning_rate": 7.03555976224994e-06, "loss": 0.0041, "step": 43340 }, { "epoch": 1.2899290315861514, "grad_norm": 0.08810850977897644, "learning_rate": 7.033978425289487e-06, "loss": 0.0062, "step": 43350 }, { "epoch": 1.290226593069793, "grad_norm": 0.08057490736246109, "learning_rate": 7.032396844499209e-06, "loss": 0.0039, "step": 43360 }, { "epoch": 1.2905241545534345, "grad_norm": 0.06009955331683159, "learning_rate": 7.030815020068707e-06, "loss": 0.0033, "step": 43370 }, { "epoch": 1.290821716037076, "grad_norm": 0.14891237020492554, "learning_rate": 7.029232952187604e-06, "loss": 0.0037, "step": 43380 }, { "epoch": 1.2911192775207176, "grad_norm": 0.16333262622356415, "learning_rate": 7.027650641045558e-06, "loss": 0.0064, "step": 43390 }, { "epoch": 1.2914168390043592, "grad_norm": 0.08697754889726639, "learning_rate": 7.026068086832253e-06, "loss": 0.0049, "step": 43400 }, { "epoch": 1.2917144004880008, "grad_norm": 0.2003713846206665, "learning_rate": 7.024485289737404e-06, "loss": 0.0049, "step": 43410 }, { "epoch": 1.2920119619716424, "grad_norm": 0.2402167022228241, "learning_rate": 7.022902249950751e-06, "loss": 0.0034, "step": 43420 }, { "epoch": 1.292309523455284, "grad_norm": 0.0626913532614708, "learning_rate": 7.02131896766207e-06, "loss": 0.003, "step": 43430 }, { "epoch": 1.2926070849389255, "grad_norm": 0.5506872534751892, "learning_rate": 7.019735443061157e-06, "loss": 0.0062, "step": 43440 }, { "epoch": 1.292904646422567, "grad_norm": 0.2710551917552948, "learning_rate": 7.018151676337848e-06, "loss": 0.0044, "step": 43450 }, { "epoch": 1.2932022079062087, "grad_norm": 0.2624976634979248, "learning_rate": 7.016567667681999e-06, "loss": 0.0051, "step": 43460 }, { "epoch": 1.2934997693898502, "grad_norm": 0.13403107225894928, "learning_rate": 7.014983417283498e-06, "loss": 0.0034, "step": 43470 }, { "epoch": 1.2937973308734918, "grad_norm": 0.10376057773828506, "learning_rate": 7.013398925332262e-06, "loss": 0.0182, "step": 43480 }, { "epoch": 1.2940948923571334, "grad_norm": 0.2981589734554291, "learning_rate": 7.011814192018239e-06, "loss": 0.0051, "step": 43490 }, { "epoch": 1.294392453840775, "grad_norm": 0.25446030497550964, "learning_rate": 7.010229217531404e-06, "loss": 0.0028, "step": 43500 }, { "epoch": 1.2946900153244165, "grad_norm": 0.1778384894132614, "learning_rate": 7.00864400206176e-06, "loss": 0.0038, "step": 43510 }, { "epoch": 1.294987576808058, "grad_norm": 0.17051315307617188, "learning_rate": 7.007058545799339e-06, "loss": 0.0021, "step": 43520 }, { "epoch": 1.2952851382916994, "grad_norm": 0.2855662405490875, "learning_rate": 7.005472848934205e-06, "loss": 0.004, "step": 43530 }, { "epoch": 1.295582699775341, "grad_norm": 0.028804879635572433, "learning_rate": 7.003886911656447e-06, "loss": 0.0037, "step": 43540 }, { "epoch": 1.2958802612589826, "grad_norm": 0.2736136317253113, "learning_rate": 7.002300734156185e-06, "loss": 0.004, "step": 43550 }, { "epoch": 1.2961778227426242, "grad_norm": 0.25004842877388, "learning_rate": 7.000714316623567e-06, "loss": 0.0039, "step": 43560 }, { "epoch": 1.2964753842262657, "grad_norm": 0.19326151907444, "learning_rate": 6.999127659248769e-06, "loss": 0.0041, "step": 43570 }, { "epoch": 1.2967729457099073, "grad_norm": 0.14954209327697754, "learning_rate": 6.9975407622219995e-06, "loss": 0.0043, "step": 43580 }, { "epoch": 1.2970705071935489, "grad_norm": 0.1960446685552597, "learning_rate": 6.995953625733489e-06, "loss": 0.0044, "step": 43590 }, { "epoch": 1.2973680686771905, "grad_norm": 0.07813401520252228, "learning_rate": 6.994366249973505e-06, "loss": 0.0036, "step": 43600 }, { "epoch": 1.297665630160832, "grad_norm": 0.22487851977348328, "learning_rate": 6.992778635132337e-06, "loss": 0.0029, "step": 43610 }, { "epoch": 1.2979631916444736, "grad_norm": 0.30009979009628296, "learning_rate": 6.991190781400306e-06, "loss": 0.0057, "step": 43620 }, { "epoch": 1.2982607531281152, "grad_norm": 0.5553666949272156, "learning_rate": 6.98960268896776e-06, "loss": 0.0075, "step": 43630 }, { "epoch": 1.2985583146117565, "grad_norm": 0.11506466567516327, "learning_rate": 6.988014358025078e-06, "loss": 0.0037, "step": 43640 }, { "epoch": 1.298855876095398, "grad_norm": 0.3391457796096802, "learning_rate": 6.986425788762667e-06, "loss": 0.0045, "step": 43650 }, { "epoch": 1.2991534375790397, "grad_norm": 0.10340087860822678, "learning_rate": 6.984836981370961e-06, "loss": 0.0032, "step": 43660 }, { "epoch": 1.2994509990626812, "grad_norm": 0.1548883467912674, "learning_rate": 6.9832479360404235e-06, "loss": 0.0033, "step": 43670 }, { "epoch": 1.2997485605463228, "grad_norm": 0.0609808973968029, "learning_rate": 6.9816586529615475e-06, "loss": 0.0043, "step": 43680 }, { "epoch": 1.3000461220299644, "grad_norm": 0.26948919892311096, "learning_rate": 6.980069132324851e-06, "loss": 0.007, "step": 43690 }, { "epoch": 1.300343683513606, "grad_norm": 0.2288283258676529, "learning_rate": 6.978479374320886e-06, "loss": 0.0039, "step": 43700 }, { "epoch": 1.3006412449972475, "grad_norm": 0.1601840704679489, "learning_rate": 6.976889379140228e-06, "loss": 0.0057, "step": 43710 }, { "epoch": 1.300938806480889, "grad_norm": 0.1930752545595169, "learning_rate": 6.975299146973482e-06, "loss": 0.0051, "step": 43720 }, { "epoch": 1.3012363679645307, "grad_norm": 0.26581135392189026, "learning_rate": 6.973708678011286e-06, "loss": 0.0037, "step": 43730 }, { "epoch": 1.3015339294481723, "grad_norm": 0.057241324335336685, "learning_rate": 6.9721179724442985e-06, "loss": 0.0037, "step": 43740 }, { "epoch": 1.3018314909318138, "grad_norm": 0.05109138786792755, "learning_rate": 6.970527030463215e-06, "loss": 0.0028, "step": 43750 }, { "epoch": 1.3021290524154554, "grad_norm": 0.22397227585315704, "learning_rate": 6.96893585225875e-06, "loss": 0.0031, "step": 43760 }, { "epoch": 1.302426613899097, "grad_norm": 0.2693644165992737, "learning_rate": 6.967344438021653e-06, "loss": 0.0053, "step": 43770 }, { "epoch": 1.3027241753827385, "grad_norm": 0.3095777928829193, "learning_rate": 6.965752787942703e-06, "loss": 0.0037, "step": 43780 }, { "epoch": 1.3030217368663801, "grad_norm": 0.3732251226902008, "learning_rate": 6.9641609022127e-06, "loss": 0.0069, "step": 43790 }, { "epoch": 1.3033192983500217, "grad_norm": 0.15693853795528412, "learning_rate": 6.9625687810224805e-06, "loss": 0.0042, "step": 43800 }, { "epoch": 1.3036168598336633, "grad_norm": 0.08740872889757156, "learning_rate": 6.960976424562901e-06, "loss": 0.0062, "step": 43810 }, { "epoch": 1.3039144213173046, "grad_norm": 0.16926273703575134, "learning_rate": 6.959383833024853e-06, "loss": 0.0059, "step": 43820 }, { "epoch": 1.3042119828009462, "grad_norm": 0.03194435313344002, "learning_rate": 6.957791006599252e-06, "loss": 0.0052, "step": 43830 }, { "epoch": 1.3045095442845878, "grad_norm": 0.14512096345424652, "learning_rate": 6.9561979454770465e-06, "loss": 0.003, "step": 43840 }, { "epoch": 1.3048071057682293, "grad_norm": 0.040487922728061676, "learning_rate": 6.954604649849206e-06, "loss": 0.003, "step": 43850 }, { "epoch": 1.305104667251871, "grad_norm": 0.38237446546554565, "learning_rate": 6.9530111199067364e-06, "loss": 0.0032, "step": 43860 }, { "epoch": 1.3054022287355125, "grad_norm": 0.1500868797302246, "learning_rate": 6.951417355840662e-06, "loss": 0.0064, "step": 43870 }, { "epoch": 1.305699790219154, "grad_norm": 0.08543647080659866, "learning_rate": 6.949823357842045e-06, "loss": 0.0045, "step": 43880 }, { "epoch": 1.3059973517027956, "grad_norm": 0.1766434907913208, "learning_rate": 6.948229126101967e-06, "loss": 0.0055, "step": 43890 }, { "epoch": 1.3062949131864372, "grad_norm": 0.039250362664461136, "learning_rate": 6.946634660811545e-06, "loss": 0.0033, "step": 43900 }, { "epoch": 1.3065924746700788, "grad_norm": 0.112212635576725, "learning_rate": 6.945039962161922e-06, "loss": 0.0037, "step": 43910 }, { "epoch": 1.3068900361537201, "grad_norm": 0.0822845846414566, "learning_rate": 6.943445030344263e-06, "loss": 0.0038, "step": 43920 }, { "epoch": 1.3071875976373617, "grad_norm": 0.2254086583852768, "learning_rate": 6.94184986554977e-06, "loss": 0.0036, "step": 43930 }, { "epoch": 1.3074851591210033, "grad_norm": 0.25379887223243713, "learning_rate": 6.940254467969666e-06, "loss": 0.0047, "step": 43940 }, { "epoch": 1.3077827206046448, "grad_norm": 0.3737500309944153, "learning_rate": 6.938658837795207e-06, "loss": 0.0048, "step": 43950 }, { "epoch": 1.3080802820882864, "grad_norm": 0.2723862826824188, "learning_rate": 6.93706297521767e-06, "loss": 0.0046, "step": 43960 }, { "epoch": 1.308377843571928, "grad_norm": 0.03197691589593887, "learning_rate": 6.93546688042837e-06, "loss": 0.0045, "step": 43970 }, { "epoch": 1.3086754050555696, "grad_norm": 0.18352121114730835, "learning_rate": 6.933870553618638e-06, "loss": 0.0041, "step": 43980 }, { "epoch": 1.3089729665392111, "grad_norm": 0.15113820135593414, "learning_rate": 6.932273994979844e-06, "loss": 0.0043, "step": 43990 }, { "epoch": 1.3092705280228527, "grad_norm": 0.17869089543819427, "learning_rate": 6.930677204703377e-06, "loss": 0.0044, "step": 44000 }, { "epoch": 1.3095680895064943, "grad_norm": 0.11356021463871002, "learning_rate": 6.929080182980662e-06, "loss": 0.0049, "step": 44010 }, { "epoch": 1.3098656509901359, "grad_norm": 0.3541228771209717, "learning_rate": 6.927482930003142e-06, "loss": 0.0051, "step": 44020 }, { "epoch": 1.3101632124737774, "grad_norm": 0.12503136694431305, "learning_rate": 6.925885445962297e-06, "loss": 0.0057, "step": 44030 }, { "epoch": 1.310460773957419, "grad_norm": 0.13414236903190613, "learning_rate": 6.9242877310496255e-06, "loss": 0.0054, "step": 44040 }, { "epoch": 1.3107583354410606, "grad_norm": 0.1669098287820816, "learning_rate": 6.9226897854566654e-06, "loss": 0.0045, "step": 44050 }, { "epoch": 1.3110558969247021, "grad_norm": 0.23181560635566711, "learning_rate": 6.921091609374972e-06, "loss": 0.0046, "step": 44060 }, { "epoch": 1.3113534584083437, "grad_norm": 0.13527590036392212, "learning_rate": 6.919493202996133e-06, "loss": 0.0034, "step": 44070 }, { "epoch": 1.3116510198919853, "grad_norm": 0.22984492778778076, "learning_rate": 6.9178945665117615e-06, "loss": 0.0068, "step": 44080 }, { "epoch": 1.3119485813756269, "grad_norm": 0.19753497838974, "learning_rate": 6.916295700113499e-06, "loss": 0.0062, "step": 44090 }, { "epoch": 1.3122461428592682, "grad_norm": 0.09006530791521072, "learning_rate": 6.914696603993016e-06, "loss": 0.0048, "step": 44100 }, { "epoch": 1.3125437043429098, "grad_norm": 0.29515549540519714, "learning_rate": 6.9130972783420095e-06, "loss": 0.0051, "step": 44110 }, { "epoch": 1.3128412658265514, "grad_norm": 0.03803921118378639, "learning_rate": 6.911497723352203e-06, "loss": 0.0046, "step": 44120 }, { "epoch": 1.313138827310193, "grad_norm": 0.16674020886421204, "learning_rate": 6.909897939215348e-06, "loss": 0.0043, "step": 44130 }, { "epoch": 1.3134363887938345, "grad_norm": 0.06714217364788055, "learning_rate": 6.908297926123225e-06, "loss": 0.0041, "step": 44140 }, { "epoch": 1.313733950277476, "grad_norm": 0.1198076605796814, "learning_rate": 6.90669768426764e-06, "loss": 0.0035, "step": 44150 }, { "epoch": 1.3140315117611177, "grad_norm": 0.09709063172340393, "learning_rate": 6.9050972138404305e-06, "loss": 0.0048, "step": 44160 }, { "epoch": 1.3143290732447592, "grad_norm": 0.17598919570446014, "learning_rate": 6.903496515033452e-06, "loss": 0.0044, "step": 44170 }, { "epoch": 1.3146266347284008, "grad_norm": 0.34183597564697266, "learning_rate": 6.901895588038597e-06, "loss": 0.0067, "step": 44180 }, { "epoch": 1.3149241962120424, "grad_norm": 0.20552180707454681, "learning_rate": 6.900294433047783e-06, "loss": 0.0045, "step": 44190 }, { "epoch": 1.315221757695684, "grad_norm": 0.2309122234582901, "learning_rate": 6.8986930502529525e-06, "loss": 0.0036, "step": 44200 }, { "epoch": 1.3155193191793253, "grad_norm": 0.22394075989723206, "learning_rate": 6.897091439846075e-06, "loss": 0.0036, "step": 44210 }, { "epoch": 1.3158168806629669, "grad_norm": 0.11748769134283066, "learning_rate": 6.895489602019152e-06, "loss": 0.004, "step": 44220 }, { "epoch": 1.3161144421466084, "grad_norm": 0.2940877676010132, "learning_rate": 6.893887536964208e-06, "loss": 0.0049, "step": 44230 }, { "epoch": 1.31641200363025, "grad_norm": 0.10637671500444412, "learning_rate": 6.892285244873292e-06, "loss": 0.0038, "step": 44240 }, { "epoch": 1.3167095651138916, "grad_norm": 0.11999882012605667, "learning_rate": 6.890682725938489e-06, "loss": 0.0029, "step": 44250 }, { "epoch": 1.3170071265975332, "grad_norm": 0.042547114193439484, "learning_rate": 6.889079980351904e-06, "loss": 0.0075, "step": 44260 }, { "epoch": 1.3173046880811747, "grad_norm": 0.2854611575603485, "learning_rate": 6.8874770083056715e-06, "loss": 0.0058, "step": 44270 }, { "epoch": 1.3176022495648163, "grad_norm": 0.24534787237644196, "learning_rate": 6.8858738099919516e-06, "loss": 0.004, "step": 44280 }, { "epoch": 1.3178998110484579, "grad_norm": 0.18701784312725067, "learning_rate": 6.884270385602938e-06, "loss": 0.0039, "step": 44290 }, { "epoch": 1.3181973725320995, "grad_norm": 0.3190430998802185, "learning_rate": 6.8826667353308395e-06, "loss": 0.0057, "step": 44300 }, { "epoch": 1.318494934015741, "grad_norm": 0.412569135427475, "learning_rate": 6.881062859367904e-06, "loss": 0.0076, "step": 44310 }, { "epoch": 1.3187924954993826, "grad_norm": 0.10238606482744217, "learning_rate": 6.879458757906399e-06, "loss": 0.0054, "step": 44320 }, { "epoch": 1.3190900569830242, "grad_norm": 0.22645780444145203, "learning_rate": 6.877854431138622e-06, "loss": 0.004, "step": 44330 }, { "epoch": 1.3193876184666657, "grad_norm": 0.15842638909816742, "learning_rate": 6.876249879256898e-06, "loss": 0.0042, "step": 44340 }, { "epoch": 1.3196851799503073, "grad_norm": 0.24742719531059265, "learning_rate": 6.8746451024535765e-06, "loss": 0.0039, "step": 44350 }, { "epoch": 1.319982741433949, "grad_norm": 0.21266955137252808, "learning_rate": 6.873040100921036e-06, "loss": 0.006, "step": 44360 }, { "epoch": 1.3202803029175905, "grad_norm": 0.12002682685852051, "learning_rate": 6.8714348748516815e-06, "loss": 0.0053, "step": 44370 }, { "epoch": 1.320577864401232, "grad_norm": 0.40565648674964905, "learning_rate": 6.869829424437944e-06, "loss": 0.0067, "step": 44380 }, { "epoch": 1.3208754258848734, "grad_norm": 0.1819848269224167, "learning_rate": 6.868223749872282e-06, "loss": 0.0032, "step": 44390 }, { "epoch": 1.321172987368515, "grad_norm": 0.19040508568286896, "learning_rate": 6.866617851347183e-06, "loss": 0.004, "step": 44400 }, { "epoch": 1.3214705488521565, "grad_norm": 0.24850225448608398, "learning_rate": 6.865011729055158e-06, "loss": 0.0043, "step": 44410 }, { "epoch": 1.321768110335798, "grad_norm": 0.1584082543849945, "learning_rate": 6.863405383188745e-06, "loss": 0.0059, "step": 44420 }, { "epoch": 1.3220656718194397, "grad_norm": 0.18493995070457458, "learning_rate": 6.86179881394051e-06, "loss": 0.0066, "step": 44430 }, { "epoch": 1.3223632333030813, "grad_norm": 0.23556281626224518, "learning_rate": 6.860192021503048e-06, "loss": 0.0041, "step": 44440 }, { "epoch": 1.3226607947867228, "grad_norm": 0.14297747611999512, "learning_rate": 6.858585006068977e-06, "loss": 0.0069, "step": 44450 }, { "epoch": 1.3229583562703644, "grad_norm": 0.20799559354782104, "learning_rate": 6.8569777678309445e-06, "loss": 0.0031, "step": 44460 }, { "epoch": 1.323255917754006, "grad_norm": 0.10810793936252594, "learning_rate": 6.855370306981619e-06, "loss": 0.0053, "step": 44470 }, { "epoch": 1.3235534792376475, "grad_norm": 0.6088873744010925, "learning_rate": 6.853762623713708e-06, "loss": 0.0065, "step": 44480 }, { "epoch": 1.3238510407212891, "grad_norm": 0.26967787742614746, "learning_rate": 6.85215471821993e-06, "loss": 0.0065, "step": 44490 }, { "epoch": 1.3241486022049305, "grad_norm": 0.1055825799703598, "learning_rate": 6.850546590693044e-06, "loss": 0.0028, "step": 44500 }, { "epoch": 1.324446163688572, "grad_norm": 0.2192617654800415, "learning_rate": 6.848938241325825e-06, "loss": 0.0041, "step": 44510 }, { "epoch": 1.3247437251722136, "grad_norm": 0.2667882740497589, "learning_rate": 6.847329670311083e-06, "loss": 0.0053, "step": 44520 }, { "epoch": 1.3250412866558552, "grad_norm": 0.18059088289737701, "learning_rate": 6.8457208778416464e-06, "loss": 0.0055, "step": 44530 }, { "epoch": 1.3253388481394968, "grad_norm": 0.28450438380241394, "learning_rate": 6.844111864110378e-06, "loss": 0.0053, "step": 44540 }, { "epoch": 1.3256364096231383, "grad_norm": 0.10225208848714828, "learning_rate": 6.842502629310162e-06, "loss": 0.0029, "step": 44550 }, { "epoch": 1.32593397110678, "grad_norm": 0.06403900682926178, "learning_rate": 6.8408931736339115e-06, "loss": 0.0066, "step": 44560 }, { "epoch": 1.3262315325904215, "grad_norm": 0.13568773865699768, "learning_rate": 6.839283497274564e-06, "loss": 0.0091, "step": 44570 }, { "epoch": 1.326529094074063, "grad_norm": 0.09794487804174423, "learning_rate": 6.837673600425087e-06, "loss": 0.0035, "step": 44580 }, { "epoch": 1.3268266555577046, "grad_norm": 0.3054143488407135, "learning_rate": 6.8360634832784674e-06, "loss": 0.0051, "step": 44590 }, { "epoch": 1.3271242170413462, "grad_norm": 0.20784178376197815, "learning_rate": 6.834453146027731e-06, "loss": 0.0058, "step": 44600 }, { "epoch": 1.3274217785249878, "grad_norm": 0.30065494775772095, "learning_rate": 6.832842588865914e-06, "loss": 0.0054, "step": 44610 }, { "epoch": 1.3277193400086293, "grad_norm": 0.35856297612190247, "learning_rate": 6.831231811986093e-06, "loss": 0.0055, "step": 44620 }, { "epoch": 1.328016901492271, "grad_norm": 0.20140211284160614, "learning_rate": 6.829620815581364e-06, "loss": 0.0068, "step": 44630 }, { "epoch": 1.3283144629759125, "grad_norm": 0.12871019542217255, "learning_rate": 6.828009599844848e-06, "loss": 0.004, "step": 44640 }, { "epoch": 1.328612024459554, "grad_norm": 0.20128533244132996, "learning_rate": 6.826398164969698e-06, "loss": 0.0049, "step": 44650 }, { "epoch": 1.3289095859431956, "grad_norm": 0.40706226229667664, "learning_rate": 6.8247865111490864e-06, "loss": 0.0043, "step": 44660 }, { "epoch": 1.329207147426837, "grad_norm": 0.2892058193683624, "learning_rate": 6.82317463857622e-06, "loss": 0.0049, "step": 44670 }, { "epoch": 1.3295047089104786, "grad_norm": 0.12615054845809937, "learning_rate": 6.821562547444323e-06, "loss": 0.0047, "step": 44680 }, { "epoch": 1.3298022703941201, "grad_norm": 0.2565649449825287, "learning_rate": 6.819950237946652e-06, "loss": 0.0046, "step": 44690 }, { "epoch": 1.3300998318777617, "grad_norm": 0.12228656560182571, "learning_rate": 6.818337710276488e-06, "loss": 0.0034, "step": 44700 }, { "epoch": 1.3303973933614033, "grad_norm": 0.20030316710472107, "learning_rate": 6.816724964627138e-06, "loss": 0.0043, "step": 44710 }, { "epoch": 1.3306949548450449, "grad_norm": 0.4089352488517761, "learning_rate": 6.815112001191935e-06, "loss": 0.0054, "step": 44720 }, { "epoch": 1.3309925163286864, "grad_norm": 0.10634308308362961, "learning_rate": 6.813498820164239e-06, "loss": 0.0054, "step": 44730 }, { "epoch": 1.331290077812328, "grad_norm": 0.022627057507634163, "learning_rate": 6.811885421737434e-06, "loss": 0.0031, "step": 44740 }, { "epoch": 1.3315876392959696, "grad_norm": 0.18489287793636322, "learning_rate": 6.810271806104931e-06, "loss": 0.0062, "step": 44750 }, { "epoch": 1.3318852007796111, "grad_norm": 0.07632123678922653, "learning_rate": 6.808657973460171e-06, "loss": 0.0037, "step": 44760 }, { "epoch": 1.3321827622632527, "grad_norm": 0.11130078136920929, "learning_rate": 6.807043923996613e-06, "loss": 0.0061, "step": 44770 }, { "epoch": 1.332480323746894, "grad_norm": 0.0824674442410469, "learning_rate": 6.8054296579077495e-06, "loss": 0.0045, "step": 44780 }, { "epoch": 1.3327778852305356, "grad_norm": 0.23480159044265747, "learning_rate": 6.803815175387094e-06, "loss": 0.0047, "step": 44790 }, { "epoch": 1.3330754467141772, "grad_norm": 0.12161692976951599, "learning_rate": 6.802200476628189e-06, "loss": 0.0026, "step": 44800 }, { "epoch": 1.3333730081978188, "grad_norm": 0.22629812359809875, "learning_rate": 6.800585561824602e-06, "loss": 0.0036, "step": 44810 }, { "epoch": 1.3336705696814604, "grad_norm": 0.26233550906181335, "learning_rate": 6.7989704311699254e-06, "loss": 0.0055, "step": 44820 }, { "epoch": 1.333968131165102, "grad_norm": 0.6676262021064758, "learning_rate": 6.797355084857779e-06, "loss": 0.0054, "step": 44830 }, { "epoch": 1.3342656926487435, "grad_norm": 0.2028358280658722, "learning_rate": 6.795739523081807e-06, "loss": 0.0036, "step": 44840 }, { "epoch": 1.334563254132385, "grad_norm": 0.21023871004581451, "learning_rate": 6.79412374603568e-06, "loss": 0.0044, "step": 44850 }, { "epoch": 1.3348608156160267, "grad_norm": 0.30323633551597595, "learning_rate": 6.792507753913097e-06, "loss": 0.0046, "step": 44860 }, { "epoch": 1.3351583770996682, "grad_norm": 0.09435626119375229, "learning_rate": 6.790891546907776e-06, "loss": 0.0029, "step": 44870 }, { "epoch": 1.3354559385833098, "grad_norm": 0.4615434408187866, "learning_rate": 6.789275125213469e-06, "loss": 0.0043, "step": 44880 }, { "epoch": 1.3357535000669514, "grad_norm": 0.1416270136833191, "learning_rate": 6.787658489023948e-06, "loss": 0.0042, "step": 44890 }, { "epoch": 1.336051061550593, "grad_norm": 0.15834452211856842, "learning_rate": 6.786041638533012e-06, "loss": 0.0057, "step": 44900 }, { "epoch": 1.3363486230342345, "grad_norm": 0.2109888195991516, "learning_rate": 6.784424573934489e-06, "loss": 0.0043, "step": 44910 }, { "epoch": 1.336646184517876, "grad_norm": 0.44045013189315796, "learning_rate": 6.782807295422227e-06, "loss": 0.0036, "step": 44920 }, { "epoch": 1.3369437460015177, "grad_norm": 0.0765523836016655, "learning_rate": 6.781189803190105e-06, "loss": 0.003, "step": 44930 }, { "epoch": 1.3372413074851592, "grad_norm": 0.19521218538284302, "learning_rate": 6.779572097432021e-06, "loss": 0.005, "step": 44940 }, { "epoch": 1.3375388689688008, "grad_norm": 0.1380213350057602, "learning_rate": 6.777954178341909e-06, "loss": 0.0024, "step": 44950 }, { "epoch": 1.3378364304524422, "grad_norm": 0.15561793744564056, "learning_rate": 6.776336046113716e-06, "loss": 0.0024, "step": 44960 }, { "epoch": 1.3381339919360837, "grad_norm": 0.14130869507789612, "learning_rate": 6.774717700941426e-06, "loss": 0.0041, "step": 44970 }, { "epoch": 1.3384315534197253, "grad_norm": 0.12597666680812836, "learning_rate": 6.77309914301904e-06, "loss": 0.0038, "step": 44980 }, { "epoch": 1.3387291149033669, "grad_norm": 0.1950959414243698, "learning_rate": 6.771480372540591e-06, "loss": 0.0051, "step": 44990 }, { "epoch": 1.3390266763870085, "grad_norm": 0.17725682258605957, "learning_rate": 6.76986138970013e-06, "loss": 0.0045, "step": 45000 }, { "epoch": 1.33932423787065, "grad_norm": 0.2639728784561157, "learning_rate": 6.768242194691742e-06, "loss": 0.0051, "step": 45010 }, { "epoch": 1.3396217993542916, "grad_norm": 0.26818522810935974, "learning_rate": 6.766622787709531e-06, "loss": 0.0052, "step": 45020 }, { "epoch": 1.3399193608379332, "grad_norm": 0.05064502730965614, "learning_rate": 6.76500316894763e-06, "loss": 0.0049, "step": 45030 }, { "epoch": 1.3402169223215747, "grad_norm": 0.19289517402648926, "learning_rate": 6.7633833386001955e-06, "loss": 0.0076, "step": 45040 }, { "epoch": 1.3405144838052163, "grad_norm": 0.09579426050186157, "learning_rate": 6.76176329686141e-06, "loss": 0.0049, "step": 45050 }, { "epoch": 1.340812045288858, "grad_norm": 0.15759071707725525, "learning_rate": 6.760143043925482e-06, "loss": 0.0031, "step": 45060 }, { "epoch": 1.3411096067724992, "grad_norm": 0.08691520988941193, "learning_rate": 6.758522579986644e-06, "loss": 0.0049, "step": 45070 }, { "epoch": 1.3414071682561408, "grad_norm": 0.1748286485671997, "learning_rate": 6.756901905239155e-06, "loss": 0.0051, "step": 45080 }, { "epoch": 1.3417047297397824, "grad_norm": 0.05064921826124191, "learning_rate": 6.755281019877296e-06, "loss": 0.0033, "step": 45090 }, { "epoch": 1.342002291223424, "grad_norm": 0.11318766325712204, "learning_rate": 6.7536599240953794e-06, "loss": 0.0059, "step": 45100 }, { "epoch": 1.3422998527070655, "grad_norm": 0.15616267919540405, "learning_rate": 6.752038618087737e-06, "loss": 0.0035, "step": 45110 }, { "epoch": 1.342597414190707, "grad_norm": 0.21384939551353455, "learning_rate": 6.750417102048731e-06, "loss": 0.0055, "step": 45120 }, { "epoch": 1.3428949756743487, "grad_norm": 0.31922072172164917, "learning_rate": 6.748795376172743e-06, "loss": 0.0038, "step": 45130 }, { "epoch": 1.3431925371579903, "grad_norm": 0.2191956490278244, "learning_rate": 6.7471734406541845e-06, "loss": 0.006, "step": 45140 }, { "epoch": 1.3434900986416318, "grad_norm": 0.2907726764678955, "learning_rate": 6.745551295687488e-06, "loss": 0.0048, "step": 45150 }, { "epoch": 1.3437876601252734, "grad_norm": 0.19224511086940765, "learning_rate": 6.743928941467117e-06, "loss": 0.0027, "step": 45160 }, { "epoch": 1.344085221608915, "grad_norm": 0.1441614180803299, "learning_rate": 6.742306378187553e-06, "loss": 0.0035, "step": 45170 }, { "epoch": 1.3443827830925565, "grad_norm": 0.12443768978118896, "learning_rate": 6.740683606043308e-06, "loss": 0.0036, "step": 45180 }, { "epoch": 1.3446803445761981, "grad_norm": 0.23014073073863983, "learning_rate": 6.7390606252289185e-06, "loss": 0.0037, "step": 45190 }, { "epoch": 1.3449779060598397, "grad_norm": 0.1988321989774704, "learning_rate": 6.737437435938941e-06, "loss": 0.006, "step": 45200 }, { "epoch": 1.3452754675434813, "grad_norm": 0.17428013682365417, "learning_rate": 6.735814038367964e-06, "loss": 0.0054, "step": 45210 }, { "epoch": 1.3455730290271228, "grad_norm": 0.14755640923976898, "learning_rate": 6.7341904327105944e-06, "loss": 0.0055, "step": 45220 }, { "epoch": 1.3458705905107644, "grad_norm": 0.15017573535442352, "learning_rate": 6.7325666191614714e-06, "loss": 0.004, "step": 45230 }, { "epoch": 1.346168151994406, "grad_norm": 0.2735426127910614, "learning_rate": 6.730942597915251e-06, "loss": 0.0044, "step": 45240 }, { "epoch": 1.3464657134780473, "grad_norm": 0.17679961025714874, "learning_rate": 6.729318369166621e-06, "loss": 0.0044, "step": 45250 }, { "epoch": 1.346763274961689, "grad_norm": 0.20537219941616058, "learning_rate": 6.727693933110289e-06, "loss": 0.0078, "step": 45260 }, { "epoch": 1.3470608364453305, "grad_norm": 0.15770044922828674, "learning_rate": 6.72606928994099e-06, "loss": 0.0063, "step": 45270 }, { "epoch": 1.347358397928972, "grad_norm": 0.04690723866224289, "learning_rate": 6.724444439853483e-06, "loss": 0.0035, "step": 45280 }, { "epoch": 1.3476559594126136, "grad_norm": 0.15606656670570374, "learning_rate": 6.722819383042555e-06, "loss": 0.0057, "step": 45290 }, { "epoch": 1.3479535208962552, "grad_norm": 0.583426833152771, "learning_rate": 6.721194119703012e-06, "loss": 0.0109, "step": 45300 }, { "epoch": 1.3482510823798968, "grad_norm": 0.1806102693080902, "learning_rate": 6.71956865002969e-06, "loss": 0.0043, "step": 45310 }, { "epoch": 1.3485486438635383, "grad_norm": 0.06619546562433243, "learning_rate": 6.717942974217443e-06, "loss": 0.0041, "step": 45320 }, { "epoch": 1.34884620534718, "grad_norm": 0.08798535168170929, "learning_rate": 6.716317092461161e-06, "loss": 0.003, "step": 45330 }, { "epoch": 1.3491437668308215, "grad_norm": 0.058361101895570755, "learning_rate": 6.7146910049557465e-06, "loss": 0.004, "step": 45340 }, { "epoch": 1.3494413283144628, "grad_norm": 0.18064993619918823, "learning_rate": 6.713064711896136e-06, "loss": 0.007, "step": 45350 }, { "epoch": 1.3497388897981044, "grad_norm": 0.2629123032093048, "learning_rate": 6.711438213477281e-06, "loss": 0.0047, "step": 45360 }, { "epoch": 1.350036451281746, "grad_norm": 0.10744410753250122, "learning_rate": 6.709811509894171e-06, "loss": 0.0037, "step": 45370 }, { "epoch": 1.3503340127653876, "grad_norm": 0.34057432413101196, "learning_rate": 6.708184601341807e-06, "loss": 0.004, "step": 45380 }, { "epoch": 1.3506315742490291, "grad_norm": 0.1488313525915146, "learning_rate": 6.7065574880152214e-06, "loss": 0.0025, "step": 45390 }, { "epoch": 1.3509291357326707, "grad_norm": 0.08364728838205338, "learning_rate": 6.7049301701094705e-06, "loss": 0.0044, "step": 45400 }, { "epoch": 1.3512266972163123, "grad_norm": 0.27651447057724, "learning_rate": 6.703302647819632e-06, "loss": 0.0057, "step": 45410 }, { "epoch": 1.3515242586999539, "grad_norm": 0.15041512250900269, "learning_rate": 6.701674921340813e-06, "loss": 0.0046, "step": 45420 }, { "epoch": 1.3518218201835954, "grad_norm": 0.4340156614780426, "learning_rate": 6.700046990868141e-06, "loss": 0.0056, "step": 45430 }, { "epoch": 1.352119381667237, "grad_norm": 0.09821001440286636, "learning_rate": 6.698418856596771e-06, "loss": 0.0085, "step": 45440 }, { "epoch": 1.3524169431508786, "grad_norm": 0.14395315945148468, "learning_rate": 6.696790518721879e-06, "loss": 0.0041, "step": 45450 }, { "epoch": 1.3527145046345201, "grad_norm": 0.4369836151599884, "learning_rate": 6.6951619774386685e-06, "loss": 0.005, "step": 45460 }, { "epoch": 1.3530120661181617, "grad_norm": 0.4929656386375427, "learning_rate": 6.693533232942366e-06, "loss": 0.006, "step": 45470 }, { "epoch": 1.3533096276018033, "grad_norm": 0.8332598209381104, "learning_rate": 6.691904285428225e-06, "loss": 0.0061, "step": 45480 }, { "epoch": 1.3536071890854449, "grad_norm": 0.06114549562335014, "learning_rate": 6.690275135091515e-06, "loss": 0.003, "step": 45490 }, { "epoch": 1.3539047505690864, "grad_norm": 0.2988637387752533, "learning_rate": 6.688645782127542e-06, "loss": 0.0059, "step": 45500 }, { "epoch": 1.354202312052728, "grad_norm": 0.21104051172733307, "learning_rate": 6.687016226731626e-06, "loss": 0.0045, "step": 45510 }, { "epoch": 1.3544998735363696, "grad_norm": 0.2782413959503174, "learning_rate": 6.685386469099118e-06, "loss": 0.007, "step": 45520 }, { "epoch": 1.354797435020011, "grad_norm": 0.38002756237983704, "learning_rate": 6.6837565094253875e-06, "loss": 0.0067, "step": 45530 }, { "epoch": 1.3550949965036525, "grad_norm": 0.1714724600315094, "learning_rate": 6.682126347905834e-06, "loss": 0.0062, "step": 45540 }, { "epoch": 1.355392557987294, "grad_norm": 0.22442682087421417, "learning_rate": 6.680495984735877e-06, "loss": 0.0044, "step": 45550 }, { "epoch": 1.3556901194709357, "grad_norm": 0.3122345209121704, "learning_rate": 6.678865420110962e-06, "loss": 0.0058, "step": 45560 }, { "epoch": 1.3559876809545772, "grad_norm": 0.31091850996017456, "learning_rate": 6.677234654226558e-06, "loss": 0.0039, "step": 45570 }, { "epoch": 1.3562852424382188, "grad_norm": 0.18206661939620972, "learning_rate": 6.675603687278159e-06, "loss": 0.0046, "step": 45580 }, { "epoch": 1.3565828039218604, "grad_norm": 0.047578997910022736, "learning_rate": 6.673972519461282e-06, "loss": 0.0041, "step": 45590 }, { "epoch": 1.356880365405502, "grad_norm": 0.11031416803598404, "learning_rate": 6.672341150971469e-06, "loss": 0.0029, "step": 45600 }, { "epoch": 1.3571779268891435, "grad_norm": 0.17183589935302734, "learning_rate": 6.670709582004286e-06, "loss": 0.0045, "step": 45610 }, { "epoch": 1.357475488372785, "grad_norm": 0.09444374591112137, "learning_rate": 6.66907781275532e-06, "loss": 0.0044, "step": 45620 }, { "epoch": 1.3577730498564267, "grad_norm": 0.2490471601486206, "learning_rate": 6.667445843420188e-06, "loss": 0.0065, "step": 45630 }, { "epoch": 1.358070611340068, "grad_norm": 0.07647771388292313, "learning_rate": 6.665813674194527e-06, "loss": 0.0034, "step": 45640 }, { "epoch": 1.3583681728237096, "grad_norm": 0.5592878460884094, "learning_rate": 6.664181305273998e-06, "loss": 0.0054, "step": 45650 }, { "epoch": 1.3586657343073512, "grad_norm": 0.0853208675980568, "learning_rate": 6.662548736854285e-06, "loss": 0.0032, "step": 45660 }, { "epoch": 1.3589632957909927, "grad_norm": 0.2879674434661865, "learning_rate": 6.660915969131101e-06, "loss": 0.0059, "step": 45670 }, { "epoch": 1.3592608572746343, "grad_norm": 0.12274270504713058, "learning_rate": 6.659283002300176e-06, "loss": 0.006, "step": 45680 }, { "epoch": 1.3595584187582759, "grad_norm": 0.09739149361848831, "learning_rate": 6.657649836557269e-06, "loss": 0.0067, "step": 45690 }, { "epoch": 1.3598559802419175, "grad_norm": 0.07635568827390671, "learning_rate": 6.656016472098161e-06, "loss": 0.005, "step": 45700 }, { "epoch": 1.360153541725559, "grad_norm": 0.18460014462471008, "learning_rate": 6.654382909118657e-06, "loss": 0.0029, "step": 45710 }, { "epoch": 1.3604511032092006, "grad_norm": 0.17865025997161865, "learning_rate": 6.652749147814584e-06, "loss": 0.0033, "step": 45720 }, { "epoch": 1.3607486646928422, "grad_norm": 0.013704461045563221, "learning_rate": 6.651115188381798e-06, "loss": 0.0043, "step": 45730 }, { "epoch": 1.3610462261764837, "grad_norm": 0.11874528974294662, "learning_rate": 6.6494810310161715e-06, "loss": 0.002, "step": 45740 }, { "epoch": 1.3613437876601253, "grad_norm": 0.13784068822860718, "learning_rate": 6.647846675913607e-06, "loss": 0.0049, "step": 45750 }, { "epoch": 1.3616413491437669, "grad_norm": 0.12976540625095367, "learning_rate": 6.646212123270027e-06, "loss": 0.0052, "step": 45760 }, { "epoch": 1.3619389106274085, "grad_norm": 0.47896212339401245, "learning_rate": 6.644577373281379e-06, "loss": 0.0078, "step": 45770 }, { "epoch": 1.36223647211105, "grad_norm": 0.16204290091991425, "learning_rate": 6.642942426143635e-06, "loss": 0.0065, "step": 45780 }, { "epoch": 1.3625340335946916, "grad_norm": 0.1734137088060379, "learning_rate": 6.6413072820527865e-06, "loss": 0.0039, "step": 45790 }, { "epoch": 1.3628315950783332, "grad_norm": 0.13773603737354279, "learning_rate": 6.639671941204857e-06, "loss": 0.0028, "step": 45800 }, { "epoch": 1.3631291565619748, "grad_norm": 0.13016869127750397, "learning_rate": 6.638036403795883e-06, "loss": 0.004, "step": 45810 }, { "epoch": 1.363426718045616, "grad_norm": 0.1488340049982071, "learning_rate": 6.636400670021934e-06, "loss": 0.0043, "step": 45820 }, { "epoch": 1.3637242795292577, "grad_norm": 0.043306753039360046, "learning_rate": 6.634764740079096e-06, "loss": 0.0039, "step": 45830 }, { "epoch": 1.3640218410128992, "grad_norm": 0.06717687100172043, "learning_rate": 6.633128614163484e-06, "loss": 0.0063, "step": 45840 }, { "epoch": 1.3643194024965408, "grad_norm": 0.13445942103862762, "learning_rate": 6.631492292471231e-06, "loss": 0.0038, "step": 45850 }, { "epoch": 1.3646169639801824, "grad_norm": 0.13111862540245056, "learning_rate": 6.6298557751985e-06, "loss": 0.0049, "step": 45860 }, { "epoch": 1.364914525463824, "grad_norm": 0.12348075211048126, "learning_rate": 6.628219062541469e-06, "loss": 0.0098, "step": 45870 }, { "epoch": 1.3652120869474655, "grad_norm": 0.08666863292455673, "learning_rate": 6.62658215469635e-06, "loss": 0.0041, "step": 45880 }, { "epoch": 1.3655096484311071, "grad_norm": 0.142225444316864, "learning_rate": 6.624945051859368e-06, "loss": 0.0057, "step": 45890 }, { "epoch": 1.3658072099147487, "grad_norm": 0.22795598208904266, "learning_rate": 6.6233077542267775e-06, "loss": 0.0059, "step": 45900 }, { "epoch": 1.3661047713983903, "grad_norm": 0.16375888884067535, "learning_rate": 6.621670261994857e-06, "loss": 0.0047, "step": 45910 }, { "epoch": 1.3664023328820316, "grad_norm": 0.33989062905311584, "learning_rate": 6.620032575359904e-06, "loss": 0.0044, "step": 45920 }, { "epoch": 1.3666998943656732, "grad_norm": 0.30635637044906616, "learning_rate": 6.618394694518242e-06, "loss": 0.0052, "step": 45930 }, { "epoch": 1.3669974558493148, "grad_norm": 0.18396461009979248, "learning_rate": 6.616756619666218e-06, "loss": 0.005, "step": 45940 }, { "epoch": 1.3672950173329563, "grad_norm": 0.12613536417484283, "learning_rate": 6.6151183510002005e-06, "loss": 0.004, "step": 45950 }, { "epoch": 1.367592578816598, "grad_norm": 0.5862231850624084, "learning_rate": 6.613479888716583e-06, "loss": 0.0079, "step": 45960 }, { "epoch": 1.3678901403002395, "grad_norm": 0.25660091638565063, "learning_rate": 6.611841233011782e-06, "loss": 0.0059, "step": 45970 }, { "epoch": 1.368187701783881, "grad_norm": 0.3262876570224762, "learning_rate": 6.610202384082235e-06, "loss": 0.0094, "step": 45980 }, { "epoch": 1.3684852632675226, "grad_norm": 0.10569852590560913, "learning_rate": 6.608563342124407e-06, "loss": 0.0033, "step": 45990 }, { "epoch": 1.3687828247511642, "grad_norm": 0.23036019504070282, "learning_rate": 6.60692410733478e-06, "loss": 0.0028, "step": 46000 }, { "epoch": 1.3690803862348058, "grad_norm": 0.11196374893188477, "learning_rate": 6.605284679909866e-06, "loss": 0.0045, "step": 46010 }, { "epoch": 1.3693779477184473, "grad_norm": 0.14925411343574524, "learning_rate": 6.603645060046194e-06, "loss": 0.0042, "step": 46020 }, { "epoch": 1.369675509202089, "grad_norm": 0.09151490777730942, "learning_rate": 6.6020052479403215e-06, "loss": 0.0043, "step": 46030 }, { "epoch": 1.3699730706857305, "grad_norm": 0.1244431585073471, "learning_rate": 6.6003652437888254e-06, "loss": 0.0041, "step": 46040 }, { "epoch": 1.370270632169372, "grad_norm": 0.29015204310417175, "learning_rate": 6.598725047788305e-06, "loss": 0.0158, "step": 46050 }, { "epoch": 1.3705681936530136, "grad_norm": 0.29251137375831604, "learning_rate": 6.597084660135387e-06, "loss": 0.0073, "step": 46060 }, { "epoch": 1.3708657551366552, "grad_norm": 0.0978371649980545, "learning_rate": 6.595444081026714e-06, "loss": 0.0057, "step": 46070 }, { "epoch": 1.3711633166202968, "grad_norm": 0.09859690815210342, "learning_rate": 6.593803310658962e-06, "loss": 0.0047, "step": 46080 }, { "epoch": 1.3714608781039384, "grad_norm": 0.18351373076438904, "learning_rate": 6.5921623492288165e-06, "loss": 0.0059, "step": 46090 }, { "epoch": 1.3717584395875797, "grad_norm": 0.2519473433494568, "learning_rate": 6.590521196932998e-06, "loss": 0.0046, "step": 46100 }, { "epoch": 1.3720560010712213, "grad_norm": 0.18845121562480927, "learning_rate": 6.5888798539682444e-06, "loss": 0.0059, "step": 46110 }, { "epoch": 1.3723535625548628, "grad_norm": 0.28577128052711487, "learning_rate": 6.587238320531317e-06, "loss": 0.0043, "step": 46120 }, { "epoch": 1.3726511240385044, "grad_norm": 0.3943924605846405, "learning_rate": 6.585596596818997e-06, "loss": 0.0047, "step": 46130 }, { "epoch": 1.372948685522146, "grad_norm": 0.2322455495595932, "learning_rate": 6.583954683028096e-06, "loss": 0.0044, "step": 46140 }, { "epoch": 1.3732462470057876, "grad_norm": 0.3055024743080139, "learning_rate": 6.582312579355441e-06, "loss": 0.004, "step": 46150 }, { "epoch": 1.3735438084894291, "grad_norm": 0.2003074586391449, "learning_rate": 6.580670285997885e-06, "loss": 0.0042, "step": 46160 }, { "epoch": 1.3738413699730707, "grad_norm": 0.10518622398376465, "learning_rate": 6.5790278031523035e-06, "loss": 0.0041, "step": 46170 }, { "epoch": 1.3741389314567123, "grad_norm": 0.21633115410804749, "learning_rate": 6.577385131015593e-06, "loss": 0.0035, "step": 46180 }, { "epoch": 1.3744364929403539, "grad_norm": 0.27503833174705505, "learning_rate": 6.575742269784678e-06, "loss": 0.0032, "step": 46190 }, { "epoch": 1.3747340544239954, "grad_norm": 0.739810049533844, "learning_rate": 6.574099219656498e-06, "loss": 0.0062, "step": 46200 }, { "epoch": 1.3750316159076368, "grad_norm": 0.2666730284690857, "learning_rate": 6.572455980828022e-06, "loss": 0.0047, "step": 46210 }, { "epoch": 1.3753291773912784, "grad_norm": 0.15293923020362854, "learning_rate": 6.570812553496235e-06, "loss": 0.0039, "step": 46220 }, { "epoch": 1.37562673887492, "grad_norm": 0.2764635384082794, "learning_rate": 6.569168937858153e-06, "loss": 0.0061, "step": 46230 }, { "epoch": 1.3759243003585615, "grad_norm": 0.3706328570842743, "learning_rate": 6.567525134110806e-06, "loss": 0.0053, "step": 46240 }, { "epoch": 1.376221861842203, "grad_norm": 0.2612864673137665, "learning_rate": 6.565881142451251e-06, "loss": 0.003, "step": 46250 }, { "epoch": 1.3765194233258446, "grad_norm": 0.12587837874889374, "learning_rate": 6.564236963076568e-06, "loss": 0.0033, "step": 46260 }, { "epoch": 1.3768169848094862, "grad_norm": 0.13454140722751617, "learning_rate": 6.562592596183857e-06, "loss": 0.0031, "step": 46270 }, { "epoch": 1.3771145462931278, "grad_norm": 0.21324068307876587, "learning_rate": 6.5609480419702435e-06, "loss": 0.0045, "step": 46280 }, { "epoch": 1.3774121077767694, "grad_norm": 0.4961898624897003, "learning_rate": 6.559303300632873e-06, "loss": 0.0088, "step": 46290 }, { "epoch": 1.377709669260411, "grad_norm": 0.2448924481868744, "learning_rate": 6.557658372368914e-06, "loss": 0.003, "step": 46300 }, { "epoch": 1.3780072307440525, "grad_norm": 0.16990260779857635, "learning_rate": 6.556013257375557e-06, "loss": 0.0028, "step": 46310 }, { "epoch": 1.378304792227694, "grad_norm": 0.07976949959993362, "learning_rate": 6.554367955850018e-06, "loss": 0.0037, "step": 46320 }, { "epoch": 1.3786023537113357, "grad_norm": 0.2369716614484787, "learning_rate": 6.552722467989532e-06, "loss": 0.0037, "step": 46330 }, { "epoch": 1.3788999151949772, "grad_norm": 0.06910865753889084, "learning_rate": 6.551076793991355e-06, "loss": 0.0038, "step": 46340 }, { "epoch": 1.3791974766786188, "grad_norm": 0.27038854360580444, "learning_rate": 6.549430934052769e-06, "loss": 0.0059, "step": 46350 }, { "epoch": 1.3794950381622604, "grad_norm": 0.1630563586950302, "learning_rate": 6.54778488837108e-06, "loss": 0.0042, "step": 46360 }, { "epoch": 1.379792599645902, "grad_norm": 0.04098542034626007, "learning_rate": 6.546138657143607e-06, "loss": 0.0032, "step": 46370 }, { "epoch": 1.3800901611295435, "grad_norm": 0.2508062422275543, "learning_rate": 6.544492240567701e-06, "loss": 0.004, "step": 46380 }, { "epoch": 1.3803877226131849, "grad_norm": 0.2515852153301239, "learning_rate": 6.542845638840731e-06, "loss": 0.0045, "step": 46390 }, { "epoch": 1.3806852840968264, "grad_norm": 0.08731213957071304, "learning_rate": 6.541198852160091e-06, "loss": 0.0044, "step": 46400 }, { "epoch": 1.380982845580468, "grad_norm": 0.24388577044010162, "learning_rate": 6.539551880723191e-06, "loss": 0.0051, "step": 46410 }, { "epoch": 1.3812804070641096, "grad_norm": 0.2269730269908905, "learning_rate": 6.537904724727469e-06, "loss": 0.0066, "step": 46420 }, { "epoch": 1.3815779685477512, "grad_norm": 0.06272085756063461, "learning_rate": 6.536257384370382e-06, "loss": 0.0041, "step": 46430 }, { "epoch": 1.3818755300313927, "grad_norm": 0.2843303382396698, "learning_rate": 6.534609859849414e-06, "loss": 0.004, "step": 46440 }, { "epoch": 1.3821730915150343, "grad_norm": 0.06935053318738937, "learning_rate": 6.532962151362062e-06, "loss": 0.0276, "step": 46450 }, { "epoch": 1.3824706529986759, "grad_norm": 0.2963219881057739, "learning_rate": 6.531314259105855e-06, "loss": 0.0036, "step": 46460 }, { "epoch": 1.3827682144823175, "grad_norm": 0.3425544202327728, "learning_rate": 6.529666183278337e-06, "loss": 0.004, "step": 46470 }, { "epoch": 1.383065775965959, "grad_norm": 0.33271679282188416, "learning_rate": 6.528017924077079e-06, "loss": 0.0039, "step": 46480 }, { "epoch": 1.3833633374496006, "grad_norm": 1.3466403484344482, "learning_rate": 6.526369481699668e-06, "loss": 0.0166, "step": 46490 }, { "epoch": 1.383660898933242, "grad_norm": 0.10539926588535309, "learning_rate": 6.524720856343718e-06, "loss": 0.008, "step": 46500 }, { "epoch": 1.3839584604168835, "grad_norm": 0.10643478482961655, "learning_rate": 6.5230720482068655e-06, "loss": 0.0057, "step": 46510 }, { "epoch": 1.384256021900525, "grad_norm": 0.1664225459098816, "learning_rate": 6.521423057486762e-06, "loss": 0.0059, "step": 46520 }, { "epoch": 1.3845535833841667, "grad_norm": 0.24476851522922516, "learning_rate": 6.519773884381091e-06, "loss": 0.0055, "step": 46530 }, { "epoch": 1.3848511448678082, "grad_norm": 0.0859290063381195, "learning_rate": 6.518124529087548e-06, "loss": 0.004, "step": 46540 }, { "epoch": 1.3851487063514498, "grad_norm": 0.1653107851743698, "learning_rate": 6.516474991803859e-06, "loss": 0.0039, "step": 46550 }, { "epoch": 1.3854462678350914, "grad_norm": 0.40993788838386536, "learning_rate": 6.514825272727763e-06, "loss": 0.006, "step": 46560 }, { "epoch": 1.385743829318733, "grad_norm": 0.12736515700817108, "learning_rate": 6.513175372057029e-06, "loss": 0.0052, "step": 46570 }, { "epoch": 1.3860413908023745, "grad_norm": 0.27571389079093933, "learning_rate": 6.511525289989443e-06, "loss": 0.0045, "step": 46580 }, { "epoch": 1.3863389522860161, "grad_norm": 0.13496685028076172, "learning_rate": 6.509875026722814e-06, "loss": 0.0029, "step": 46590 }, { "epoch": 1.3866365137696577, "grad_norm": 0.09915434569120407, "learning_rate": 6.508224582454973e-06, "loss": 0.0039, "step": 46600 }, { "epoch": 1.3869340752532993, "grad_norm": 0.030542155727744102, "learning_rate": 6.506573957383773e-06, "loss": 0.0045, "step": 46610 }, { "epoch": 1.3872316367369408, "grad_norm": 0.2608891725540161, "learning_rate": 6.504923151707086e-06, "loss": 0.0037, "step": 46620 }, { "epoch": 1.3875291982205824, "grad_norm": 0.5458545684814453, "learning_rate": 6.503272165622811e-06, "loss": 0.0055, "step": 46630 }, { "epoch": 1.387826759704224, "grad_norm": 0.07400092482566833, "learning_rate": 6.501620999328864e-06, "loss": 0.0034, "step": 46640 }, { "epoch": 1.3881243211878656, "grad_norm": 0.34948456287384033, "learning_rate": 6.499969653023181e-06, "loss": 0.0074, "step": 46650 }, { "epoch": 1.3884218826715071, "grad_norm": 0.14727850258350372, "learning_rate": 6.4983181269037266e-06, "loss": 0.0073, "step": 46660 }, { "epoch": 1.3887194441551485, "grad_norm": 0.29593953490257263, "learning_rate": 6.496666421168482e-06, "loss": 0.0071, "step": 46670 }, { "epoch": 1.38901700563879, "grad_norm": 0.2388804852962494, "learning_rate": 6.495014536015448e-06, "loss": 0.0053, "step": 46680 }, { "epoch": 1.3893145671224316, "grad_norm": 0.2981007695198059, "learning_rate": 6.493362471642654e-06, "loss": 0.0057, "step": 46690 }, { "epoch": 1.3896121286060732, "grad_norm": 0.10819593071937561, "learning_rate": 6.4917102282481445e-06, "loss": 0.0034, "step": 46700 }, { "epoch": 1.3899096900897148, "grad_norm": 0.13940750062465668, "learning_rate": 6.490057806029985e-06, "loss": 0.0056, "step": 46710 }, { "epoch": 1.3902072515733563, "grad_norm": 0.08936002105474472, "learning_rate": 6.488405205186271e-06, "loss": 0.0033, "step": 46720 }, { "epoch": 1.390504813056998, "grad_norm": 0.03552425652742386, "learning_rate": 6.486752425915108e-06, "loss": 0.0046, "step": 46730 }, { "epoch": 1.3908023745406395, "grad_norm": 0.22489099204540253, "learning_rate": 6.48509946841463e-06, "loss": 0.0071, "step": 46740 }, { "epoch": 1.391099936024281, "grad_norm": 0.21307453513145447, "learning_rate": 6.4834463328829924e-06, "loss": 0.0044, "step": 46750 }, { "epoch": 1.3913974975079226, "grad_norm": 0.3482118844985962, "learning_rate": 6.481793019518369e-06, "loss": 0.0049, "step": 46760 }, { "epoch": 1.3916950589915642, "grad_norm": 0.2240402102470398, "learning_rate": 6.480139528518955e-06, "loss": 0.0038, "step": 46770 }, { "epoch": 1.3919926204752056, "grad_norm": 0.2067977786064148, "learning_rate": 6.47848586008297e-06, "loss": 0.0065, "step": 46780 }, { "epoch": 1.3922901819588471, "grad_norm": 0.13322584331035614, "learning_rate": 6.476832014408652e-06, "loss": 0.0038, "step": 46790 }, { "epoch": 1.3925877434424887, "grad_norm": 0.36572378873825073, "learning_rate": 6.475177991694262e-06, "loss": 0.0025, "step": 46800 }, { "epoch": 1.3928853049261303, "grad_norm": 0.21226289868354797, "learning_rate": 6.473523792138078e-06, "loss": 0.0056, "step": 46810 }, { "epoch": 1.3931828664097718, "grad_norm": 0.10216303169727325, "learning_rate": 6.471869415938408e-06, "loss": 0.0037, "step": 46820 }, { "epoch": 1.3934804278934134, "grad_norm": 0.2993444502353668, "learning_rate": 6.47021486329357e-06, "loss": 0.0076, "step": 46830 }, { "epoch": 1.393777989377055, "grad_norm": 0.27656662464141846, "learning_rate": 6.468560134401914e-06, "loss": 0.0044, "step": 46840 }, { "epoch": 1.3940755508606966, "grad_norm": 0.38332968950271606, "learning_rate": 6.466905229461804e-06, "loss": 0.0048, "step": 46850 }, { "epoch": 1.3943731123443381, "grad_norm": 0.17284847795963287, "learning_rate": 6.4652501486716266e-06, "loss": 0.0038, "step": 46860 }, { "epoch": 1.3946706738279797, "grad_norm": 0.2951538562774658, "learning_rate": 6.46359489222979e-06, "loss": 0.0049, "step": 46870 }, { "epoch": 1.3949682353116213, "grad_norm": 0.34112364053726196, "learning_rate": 6.461939460334724e-06, "loss": 0.0057, "step": 46880 }, { "epoch": 1.3952657967952629, "grad_norm": 0.020351318642497063, "learning_rate": 6.46028385318488e-06, "loss": 0.0028, "step": 46890 }, { "epoch": 1.3955633582789044, "grad_norm": 0.148218035697937, "learning_rate": 6.458628070978728e-06, "loss": 0.0026, "step": 46900 }, { "epoch": 1.395860919762546, "grad_norm": 0.22073793411254883, "learning_rate": 6.456972113914759e-06, "loss": 0.0036, "step": 46910 }, { "epoch": 1.3961584812461876, "grad_norm": 0.26132941246032715, "learning_rate": 6.455315982191489e-06, "loss": 0.0082, "step": 46920 }, { "epoch": 1.3964560427298292, "grad_norm": 0.1049269586801529, "learning_rate": 6.453659676007451e-06, "loss": 0.0036, "step": 46930 }, { "epoch": 1.3967536042134707, "grad_norm": 0.14631153643131256, "learning_rate": 6.452003195561199e-06, "loss": 0.0033, "step": 46940 }, { "epoch": 1.3970511656971123, "grad_norm": 0.06739485263824463, "learning_rate": 6.4503465410513125e-06, "loss": 0.0038, "step": 46950 }, { "epoch": 1.3973487271807536, "grad_norm": 0.047377150505781174, "learning_rate": 6.4486897126763835e-06, "loss": 0.0042, "step": 46960 }, { "epoch": 1.3976462886643952, "grad_norm": 0.4220438301563263, "learning_rate": 6.447032710635035e-06, "loss": 0.0042, "step": 46970 }, { "epoch": 1.3979438501480368, "grad_norm": 0.18382306396961212, "learning_rate": 6.4453755351259e-06, "loss": 0.0031, "step": 46980 }, { "epoch": 1.3982414116316784, "grad_norm": 0.16945204138755798, "learning_rate": 6.443718186347643e-06, "loss": 0.0064, "step": 46990 }, { "epoch": 1.39853897311532, "grad_norm": 0.15346848964691162, "learning_rate": 6.442060664498939e-06, "loss": 0.0034, "step": 47000 }, { "epoch": 1.3988365345989615, "grad_norm": 0.38989031314849854, "learning_rate": 6.440402969778494e-06, "loss": 0.006, "step": 47010 }, { "epoch": 1.399134096082603, "grad_norm": 0.16876496374607086, "learning_rate": 6.438745102385027e-06, "loss": 0.0052, "step": 47020 }, { "epoch": 1.3994316575662447, "grad_norm": 0.38030603528022766, "learning_rate": 6.437087062517278e-06, "loss": 0.0047, "step": 47030 }, { "epoch": 1.3997292190498862, "grad_norm": 0.5159138441085815, "learning_rate": 6.4354288503740155e-06, "loss": 0.004, "step": 47040 }, { "epoch": 1.4000267805335278, "grad_norm": 0.04880242794752121, "learning_rate": 6.433770466154019e-06, "loss": 0.0055, "step": 47050 }, { "epoch": 1.4003243420171694, "grad_norm": 0.21572202444076538, "learning_rate": 6.432111910056094e-06, "loss": 0.005, "step": 47060 }, { "epoch": 1.4006219035008107, "grad_norm": 0.22936226427555084, "learning_rate": 6.430453182279064e-06, "loss": 0.0088, "step": 47070 }, { "epoch": 1.4009194649844523, "grad_norm": 0.20314723253250122, "learning_rate": 6.428794283021778e-06, "loss": 0.0039, "step": 47080 }, { "epoch": 1.4012170264680939, "grad_norm": 0.20242741703987122, "learning_rate": 6.427135212483097e-06, "loss": 0.0029, "step": 47090 }, { "epoch": 1.4015145879517354, "grad_norm": 0.21841204166412354, "learning_rate": 6.4254759708619115e-06, "loss": 0.0032, "step": 47100 }, { "epoch": 1.401812149435377, "grad_norm": 0.2399841547012329, "learning_rate": 6.4238165583571255e-06, "loss": 0.0037, "step": 47110 }, { "epoch": 1.4021097109190186, "grad_norm": 0.5772267580032349, "learning_rate": 6.422156975167671e-06, "loss": 0.0045, "step": 47120 }, { "epoch": 1.4024072724026602, "grad_norm": 0.1744539439678192, "learning_rate": 6.42049722149249e-06, "loss": 0.0035, "step": 47130 }, { "epoch": 1.4027048338863017, "grad_norm": 0.09828586876392365, "learning_rate": 6.4188372975305555e-06, "loss": 0.0055, "step": 47140 }, { "epoch": 1.4030023953699433, "grad_norm": 0.10344573855400085, "learning_rate": 6.417177203480854e-06, "loss": 0.0057, "step": 47150 }, { "epoch": 1.4032999568535849, "grad_norm": 0.1748245656490326, "learning_rate": 6.415516939542395e-06, "loss": 0.0075, "step": 47160 }, { "epoch": 1.4035975183372265, "grad_norm": 0.03307868912816048, "learning_rate": 6.41385650591421e-06, "loss": 0.0046, "step": 47170 }, { "epoch": 1.403895079820868, "grad_norm": 0.04834744334220886, "learning_rate": 6.412195902795346e-06, "loss": 0.005, "step": 47180 }, { "epoch": 1.4041926413045096, "grad_norm": 0.21419429779052734, "learning_rate": 6.4105351303848766e-06, "loss": 0.006, "step": 47190 }, { "epoch": 1.4044902027881512, "grad_norm": 0.1649186909198761, "learning_rate": 6.40887418888189e-06, "loss": 0.0059, "step": 47200 }, { "epoch": 1.4047877642717927, "grad_norm": 0.2854464650154114, "learning_rate": 6.407213078485497e-06, "loss": 0.0051, "step": 47210 }, { "epoch": 1.4050853257554343, "grad_norm": 0.36818286776542664, "learning_rate": 6.405551799394828e-06, "loss": 0.0035, "step": 47220 }, { "epoch": 1.405382887239076, "grad_norm": 0.13215894997119904, "learning_rate": 6.403890351809037e-06, "loss": 0.005, "step": 47230 }, { "epoch": 1.4056804487227175, "grad_norm": 0.1430586576461792, "learning_rate": 6.402228735927293e-06, "loss": 0.0064, "step": 47240 }, { "epoch": 1.4059780102063588, "grad_norm": 0.3719353675842285, "learning_rate": 6.400566951948789e-06, "loss": 0.0023, "step": 47250 }, { "epoch": 1.4062755716900004, "grad_norm": 0.038647498935461044, "learning_rate": 6.398905000072735e-06, "loss": 0.0044, "step": 47260 }, { "epoch": 1.406573133173642, "grad_norm": 0.1464531421661377, "learning_rate": 6.3972428804983654e-06, "loss": 0.003, "step": 47270 }, { "epoch": 1.4068706946572835, "grad_norm": 0.043898578733205795, "learning_rate": 6.395580593424931e-06, "loss": 0.005, "step": 47280 }, { "epoch": 1.4071682561409251, "grad_norm": 0.0179314985871315, "learning_rate": 6.3939181390517035e-06, "loss": 0.003, "step": 47290 }, { "epoch": 1.4074658176245667, "grad_norm": 0.3677171468734741, "learning_rate": 6.392255517577975e-06, "loss": 0.0043, "step": 47300 }, { "epoch": 1.4077633791082083, "grad_norm": 0.27348199486732483, "learning_rate": 6.390592729203058e-06, "loss": 0.0034, "step": 47310 }, { "epoch": 1.4080609405918498, "grad_norm": 0.1559198945760727, "learning_rate": 6.3889297741262856e-06, "loss": 0.0059, "step": 47320 }, { "epoch": 1.4083585020754914, "grad_norm": 0.054123472422361374, "learning_rate": 6.387266652547009e-06, "loss": 0.01, "step": 47330 }, { "epoch": 1.408656063559133, "grad_norm": 0.13664944469928741, "learning_rate": 6.385603364664601e-06, "loss": 0.0049, "step": 47340 }, { "epoch": 1.4089536250427743, "grad_norm": 0.14760427176952362, "learning_rate": 6.383939910678452e-06, "loss": 0.0039, "step": 47350 }, { "epoch": 1.409251186526416, "grad_norm": 0.28984618186950684, "learning_rate": 6.382276290787978e-06, "loss": 0.006, "step": 47360 }, { "epoch": 1.4095487480100575, "grad_norm": 0.13900679349899292, "learning_rate": 6.380612505192606e-06, "loss": 0.0031, "step": 47370 }, { "epoch": 1.409846309493699, "grad_norm": 0.42676305770874023, "learning_rate": 6.37894855409179e-06, "loss": 0.0041, "step": 47380 }, { "epoch": 1.4101438709773406, "grad_norm": 0.1352849304676056, "learning_rate": 6.3772844376850016e-06, "loss": 0.0052, "step": 47390 }, { "epoch": 1.4104414324609822, "grad_norm": 0.5402880311012268, "learning_rate": 6.375620156171734e-06, "loss": 0.006, "step": 47400 }, { "epoch": 1.4107389939446238, "grad_norm": 0.0751517117023468, "learning_rate": 6.373955709751494e-06, "loss": 0.0044, "step": 47410 }, { "epoch": 1.4110365554282653, "grad_norm": 0.16978509724140167, "learning_rate": 6.3722910986238165e-06, "loss": 0.0037, "step": 47420 }, { "epoch": 1.411334116911907, "grad_norm": 0.31112900376319885, "learning_rate": 6.3706263229882495e-06, "loss": 0.0063, "step": 47430 }, { "epoch": 1.4116316783955485, "grad_norm": 0.2366117537021637, "learning_rate": 6.368961383044364e-06, "loss": 0.0034, "step": 47440 }, { "epoch": 1.41192923987919, "grad_norm": 0.10448846220970154, "learning_rate": 6.367296278991752e-06, "loss": 0.003, "step": 47450 }, { "epoch": 1.4122268013628316, "grad_norm": 0.1380787044763565, "learning_rate": 6.365631011030023e-06, "loss": 0.0048, "step": 47460 }, { "epoch": 1.4125243628464732, "grad_norm": 0.08605523407459259, "learning_rate": 6.363965579358805e-06, "loss": 0.0026, "step": 47470 }, { "epoch": 1.4128219243301148, "grad_norm": 0.08809246122837067, "learning_rate": 6.3622999841777465e-06, "loss": 0.0045, "step": 47480 }, { "epoch": 1.4131194858137563, "grad_norm": 0.014447089284658432, "learning_rate": 6.360634225686519e-06, "loss": 0.0041, "step": 47490 }, { "epoch": 1.413417047297398, "grad_norm": 0.20037499070167542, "learning_rate": 6.358968304084808e-06, "loss": 0.0076, "step": 47500 }, { "epoch": 1.4137146087810395, "grad_norm": 0.2450481355190277, "learning_rate": 6.357302219572323e-06, "loss": 0.0031, "step": 47510 }, { "epoch": 1.414012170264681, "grad_norm": 0.08149167895317078, "learning_rate": 6.355635972348791e-06, "loss": 0.0032, "step": 47520 }, { "epoch": 1.4143097317483224, "grad_norm": 0.13969853520393372, "learning_rate": 6.353969562613957e-06, "loss": 0.0064, "step": 47530 }, { "epoch": 1.414607293231964, "grad_norm": 0.37472695112228394, "learning_rate": 6.352302990567589e-06, "loss": 0.0058, "step": 47540 }, { "epoch": 1.4149048547156056, "grad_norm": 0.3326396942138672, "learning_rate": 6.350636256409474e-06, "loss": 0.006, "step": 47550 }, { "epoch": 1.4152024161992471, "grad_norm": 0.21999888122081757, "learning_rate": 6.348969360339416e-06, "loss": 0.0036, "step": 47560 }, { "epoch": 1.4154999776828887, "grad_norm": 0.20639584958553314, "learning_rate": 6.347302302557239e-06, "loss": 0.0039, "step": 47570 }, { "epoch": 1.4157975391665303, "grad_norm": 0.23449543118476868, "learning_rate": 6.345635083262787e-06, "loss": 0.0053, "step": 47580 }, { "epoch": 1.4160951006501719, "grad_norm": 0.1451433151960373, "learning_rate": 6.3439677026559225e-06, "loss": 0.0082, "step": 47590 }, { "epoch": 1.4163926621338134, "grad_norm": 0.09098321944475174, "learning_rate": 6.342300160936531e-06, "loss": 0.0044, "step": 47600 }, { "epoch": 1.416690223617455, "grad_norm": 0.2708550691604614, "learning_rate": 6.340632458304513e-06, "loss": 0.004, "step": 47610 }, { "epoch": 1.4169877851010966, "grad_norm": 0.05131784453988075, "learning_rate": 6.33896459495979e-06, "loss": 0.0051, "step": 47620 }, { "epoch": 1.4172853465847381, "grad_norm": 0.2731799781322479, "learning_rate": 6.337296571102303e-06, "loss": 0.0043, "step": 47630 }, { "epoch": 1.4175829080683795, "grad_norm": 0.46183526515960693, "learning_rate": 6.33562838693201e-06, "loss": 0.004, "step": 47640 }, { "epoch": 1.417880469552021, "grad_norm": 0.23588162660598755, "learning_rate": 6.333960042648894e-06, "loss": 0.0047, "step": 47650 }, { "epoch": 1.4181780310356626, "grad_norm": 0.2673051059246063, "learning_rate": 6.332291538452948e-06, "loss": 0.0048, "step": 47660 }, { "epoch": 1.4184755925193042, "grad_norm": 0.11092909425497055, "learning_rate": 6.330622874544193e-06, "loss": 0.0056, "step": 47670 }, { "epoch": 1.4187731540029458, "grad_norm": 0.11257754266262054, "learning_rate": 6.328954051122664e-06, "loss": 0.0039, "step": 47680 }, { "epoch": 1.4190707154865874, "grad_norm": 0.2683485150337219, "learning_rate": 6.327285068388418e-06, "loss": 0.0045, "step": 47690 }, { "epoch": 1.419368276970229, "grad_norm": 0.21860145032405853, "learning_rate": 6.325615926541528e-06, "loss": 0.0058, "step": 47700 }, { "epoch": 1.4196658384538705, "grad_norm": 0.12454312294721603, "learning_rate": 6.323946625782089e-06, "loss": 0.004, "step": 47710 }, { "epoch": 1.419963399937512, "grad_norm": 0.32802680134773254, "learning_rate": 6.322277166310216e-06, "loss": 0.0035, "step": 47720 }, { "epoch": 1.4202609614211537, "grad_norm": 0.1168881505727768, "learning_rate": 6.320607548326034e-06, "loss": 0.0065, "step": 47730 }, { "epoch": 1.4205585229047952, "grad_norm": 0.2538186311721802, "learning_rate": 6.318937772029703e-06, "loss": 0.0052, "step": 47740 }, { "epoch": 1.4208560843884368, "grad_norm": 0.13343816995620728, "learning_rate": 6.317267837621386e-06, "loss": 0.0056, "step": 47750 }, { "epoch": 1.4211536458720784, "grad_norm": 0.10499176383018494, "learning_rate": 6.315597745301275e-06, "loss": 0.0052, "step": 47760 }, { "epoch": 1.42145120735572, "grad_norm": 0.11019008606672287, "learning_rate": 6.313927495269576e-06, "loss": 0.0056, "step": 47770 }, { "epoch": 1.4217487688393615, "grad_norm": 0.28688111901283264, "learning_rate": 6.312257087726519e-06, "loss": 0.0037, "step": 47780 }, { "epoch": 1.422046330323003, "grad_norm": 0.12255258113145828, "learning_rate": 6.310586522872346e-06, "loss": 0.0032, "step": 47790 }, { "epoch": 1.4223438918066447, "grad_norm": 0.19215627014636993, "learning_rate": 6.308915800907323e-06, "loss": 0.0054, "step": 47800 }, { "epoch": 1.4226414532902862, "grad_norm": 0.24106380343437195, "learning_rate": 6.307244922031732e-06, "loss": 0.0045, "step": 47810 }, { "epoch": 1.4229390147739276, "grad_norm": 0.3486521244049072, "learning_rate": 6.305573886445878e-06, "loss": 0.0043, "step": 47820 }, { "epoch": 1.4232365762575692, "grad_norm": 0.05739132687449455, "learning_rate": 6.303902694350079e-06, "loss": 0.0048, "step": 47830 }, { "epoch": 1.4235341377412107, "grad_norm": 0.0709555447101593, "learning_rate": 6.302231345944676e-06, "loss": 0.0026, "step": 47840 }, { "epoch": 1.4238316992248523, "grad_norm": 0.42084163427352905, "learning_rate": 6.300559841430027e-06, "loss": 0.005, "step": 47850 }, { "epoch": 1.4241292607084939, "grad_norm": 0.23262856900691986, "learning_rate": 6.298888181006508e-06, "loss": 0.0038, "step": 47860 }, { "epoch": 1.4244268221921355, "grad_norm": 0.187661275267601, "learning_rate": 6.297216364874517e-06, "loss": 0.0041, "step": 47870 }, { "epoch": 1.424724383675777, "grad_norm": 0.26093700528144836, "learning_rate": 6.295544393234466e-06, "loss": 0.0071, "step": 47880 }, { "epoch": 1.4250219451594186, "grad_norm": 0.24912923574447632, "learning_rate": 6.293872266286791e-06, "loss": 0.0053, "step": 47890 }, { "epoch": 1.4253195066430602, "grad_norm": 0.16342823207378387, "learning_rate": 6.292199984231942e-06, "loss": 0.0023, "step": 47900 }, { "epoch": 1.4256170681267017, "grad_norm": 0.1551217883825302, "learning_rate": 6.2905275472703876e-06, "loss": 0.0041, "step": 47910 }, { "epoch": 1.425914629610343, "grad_norm": 0.09171873331069946, "learning_rate": 6.288854955602619e-06, "loss": 0.0033, "step": 47920 }, { "epoch": 1.4262121910939847, "grad_norm": 0.34169918298721313, "learning_rate": 6.287182209429143e-06, "loss": 0.0026, "step": 47930 }, { "epoch": 1.4265097525776262, "grad_norm": 0.16513007879257202, "learning_rate": 6.285509308950486e-06, "loss": 0.0042, "step": 47940 }, { "epoch": 1.4268073140612678, "grad_norm": 0.0998222678899765, "learning_rate": 6.2838362543671925e-06, "loss": 0.0048, "step": 47950 }, { "epoch": 1.4271048755449094, "grad_norm": 0.15953369438648224, "learning_rate": 6.2821630458798236e-06, "loss": 0.0041, "step": 47960 }, { "epoch": 1.427402437028551, "grad_norm": 0.2881554067134857, "learning_rate": 6.2804896836889616e-06, "loss": 0.0059, "step": 47970 }, { "epoch": 1.4276999985121925, "grad_norm": 0.18420298397541046, "learning_rate": 6.278816167995208e-06, "loss": 0.0034, "step": 47980 }, { "epoch": 1.427997559995834, "grad_norm": 0.1362832486629486, "learning_rate": 6.277142498999178e-06, "loss": 0.0057, "step": 47990 }, { "epoch": 1.4282951214794757, "grad_norm": 0.5902379751205444, "learning_rate": 6.27546867690151e-06, "loss": 0.0036, "step": 48000 }, { "epoch": 1.4285926829631173, "grad_norm": 0.15185679495334625, "learning_rate": 6.273794701902858e-06, "loss": 0.0039, "step": 48010 }, { "epoch": 1.4288902444467588, "grad_norm": 0.19465257227420807, "learning_rate": 6.272120574203897e-06, "loss": 0.0047, "step": 48020 }, { "epoch": 1.4291878059304004, "grad_norm": 0.09958235174417496, "learning_rate": 6.2704462940053165e-06, "loss": 0.0029, "step": 48030 }, { "epoch": 1.429485367414042, "grad_norm": 0.0389677956700325, "learning_rate": 6.268771861507827e-06, "loss": 0.0025, "step": 48040 }, { "epoch": 1.4297829288976835, "grad_norm": 0.1172918826341629, "learning_rate": 6.2670972769121575e-06, "loss": 0.0038, "step": 48050 }, { "epoch": 1.4300804903813251, "grad_norm": 0.19482772052288055, "learning_rate": 6.265422540419054e-06, "loss": 0.0061, "step": 48060 }, { "epoch": 1.4303780518649667, "grad_norm": 0.19169116020202637, "learning_rate": 6.2637476522292794e-06, "loss": 0.0037, "step": 48070 }, { "epoch": 1.4306756133486083, "grad_norm": 0.09817734360694885, "learning_rate": 6.262072612543618e-06, "loss": 0.0053, "step": 48080 }, { "epoch": 1.4309731748322498, "grad_norm": 0.14575468003749847, "learning_rate": 6.26039742156287e-06, "loss": 0.0044, "step": 48090 }, { "epoch": 1.4312707363158912, "grad_norm": 0.0991915613412857, "learning_rate": 6.2587220794878554e-06, "loss": 0.0075, "step": 48100 }, { "epoch": 1.4315682977995328, "grad_norm": 0.1472131311893463, "learning_rate": 6.257046586519409e-06, "loss": 0.0067, "step": 48110 }, { "epoch": 1.4318658592831743, "grad_norm": 0.13127391040325165, "learning_rate": 6.25537094285839e-06, "loss": 0.0064, "step": 48120 }, { "epoch": 1.432163420766816, "grad_norm": 0.14418117702007294, "learning_rate": 6.253695148705666e-06, "loss": 0.0073, "step": 48130 }, { "epoch": 1.4324609822504575, "grad_norm": 0.1744680106639862, "learning_rate": 6.252019204262134e-06, "loss": 0.0037, "step": 48140 }, { "epoch": 1.432758543734099, "grad_norm": 0.26532918214797974, "learning_rate": 6.2503431097287e-06, "loss": 0.0048, "step": 48150 }, { "epoch": 1.4330561052177406, "grad_norm": 0.18892599642276764, "learning_rate": 6.248666865306291e-06, "loss": 0.0033, "step": 48160 }, { "epoch": 1.4333536667013822, "grad_norm": 0.1969974935054779, "learning_rate": 6.246990471195855e-06, "loss": 0.006, "step": 48170 }, { "epoch": 1.4336512281850238, "grad_norm": 0.31229451298713684, "learning_rate": 6.245313927598353e-06, "loss": 0.0025, "step": 48180 }, { "epoch": 1.4339487896686653, "grad_norm": 0.2568083703517914, "learning_rate": 6.243637234714768e-06, "loss": 0.0055, "step": 48190 }, { "epoch": 1.434246351152307, "grad_norm": 0.1763681173324585, "learning_rate": 6.241960392746097e-06, "loss": 0.0035, "step": 48200 }, { "epoch": 1.4345439126359483, "grad_norm": 0.06803237646818161, "learning_rate": 6.240283401893358e-06, "loss": 0.003, "step": 48210 }, { "epoch": 1.4348414741195898, "grad_norm": 0.23671692609786987, "learning_rate": 6.238606262357585e-06, "loss": 0.0065, "step": 48220 }, { "epoch": 1.4351390356032314, "grad_norm": 0.1654801368713379, "learning_rate": 6.236928974339832e-06, "loss": 0.0038, "step": 48230 }, { "epoch": 1.435436597086873, "grad_norm": 0.22678475081920624, "learning_rate": 6.235251538041168e-06, "loss": 0.0055, "step": 48240 }, { "epoch": 1.4357341585705146, "grad_norm": 0.06631319224834442, "learning_rate": 6.233573953662683e-06, "loss": 0.0064, "step": 48250 }, { "epoch": 1.4360317200541561, "grad_norm": 0.21417106688022614, "learning_rate": 6.231896221405479e-06, "loss": 0.0034, "step": 48260 }, { "epoch": 1.4363292815377977, "grad_norm": 0.2812708914279938, "learning_rate": 6.230218341470685e-06, "loss": 0.0067, "step": 48270 }, { "epoch": 1.4366268430214393, "grad_norm": 0.4210301637649536, "learning_rate": 6.228540314059438e-06, "loss": 0.0057, "step": 48280 }, { "epoch": 1.4369244045050809, "grad_norm": 0.32728341221809387, "learning_rate": 6.2268621393729e-06, "loss": 0.0054, "step": 48290 }, { "epoch": 1.4372219659887224, "grad_norm": 0.16855452954769135, "learning_rate": 6.225183817612245e-06, "loss": 0.0052, "step": 48300 }, { "epoch": 1.437519527472364, "grad_norm": 0.17945969104766846, "learning_rate": 6.223505348978669e-06, "loss": 0.0056, "step": 48310 }, { "epoch": 1.4378170889560056, "grad_norm": 0.13114213943481445, "learning_rate": 6.221826733673384e-06, "loss": 0.0032, "step": 48320 }, { "epoch": 1.4381146504396471, "grad_norm": 0.13813315331935883, "learning_rate": 6.220147971897619e-06, "loss": 0.0085, "step": 48330 }, { "epoch": 1.4384122119232887, "grad_norm": 0.10214205831289291, "learning_rate": 6.218469063852622e-06, "loss": 0.0041, "step": 48340 }, { "epoch": 1.4387097734069303, "grad_norm": 0.3111129403114319, "learning_rate": 6.216790009739655e-06, "loss": 0.0086, "step": 48350 }, { "epoch": 1.4390073348905719, "grad_norm": 0.08329875022172928, "learning_rate": 6.2151108097600034e-06, "loss": 0.0083, "step": 48360 }, { "epoch": 1.4393048963742134, "grad_norm": 0.29692187905311584, "learning_rate": 6.213431464114964e-06, "loss": 0.0041, "step": 48370 }, { "epoch": 1.439602457857855, "grad_norm": 0.07617787271738052, "learning_rate": 6.211751973005856e-06, "loss": 0.0043, "step": 48380 }, { "epoch": 1.4399000193414964, "grad_norm": 0.3721470534801483, "learning_rate": 6.210072336634012e-06, "loss": 0.006, "step": 48390 }, { "epoch": 1.440197580825138, "grad_norm": 0.07923649996519089, "learning_rate": 6.208392555200787e-06, "loss": 0.0056, "step": 48400 }, { "epoch": 1.4404951423087795, "grad_norm": 0.31953975558280945, "learning_rate": 6.2067126289075465e-06, "loss": 0.0039, "step": 48410 }, { "epoch": 1.440792703792421, "grad_norm": 0.06561809778213501, "learning_rate": 6.20503255795568e-06, "loss": 0.0032, "step": 48420 }, { "epoch": 1.4410902652760627, "grad_norm": 0.5026379823684692, "learning_rate": 6.203352342546589e-06, "loss": 0.004, "step": 48430 }, { "epoch": 1.4413878267597042, "grad_norm": 0.3310597538948059, "learning_rate": 6.2016719828816966e-06, "loss": 0.0037, "step": 48440 }, { "epoch": 1.4416853882433458, "grad_norm": 0.23450516164302826, "learning_rate": 6.199991479162442e-06, "loss": 0.0034, "step": 48450 }, { "epoch": 1.4419829497269874, "grad_norm": 0.31186389923095703, "learning_rate": 6.19831083159028e-06, "loss": 0.0034, "step": 48460 }, { "epoch": 1.442280511210629, "grad_norm": 0.16149097681045532, "learning_rate": 6.196630040366684e-06, "loss": 0.0032, "step": 48470 }, { "epoch": 1.4425780726942705, "grad_norm": 0.19097134470939636, "learning_rate": 6.194949105693142e-06, "loss": 0.0054, "step": 48480 }, { "epoch": 1.442875634177912, "grad_norm": 0.1266334056854248, "learning_rate": 6.1932680277711666e-06, "loss": 0.0029, "step": 48490 }, { "epoch": 1.4431731956615534, "grad_norm": 0.25055405497550964, "learning_rate": 6.191586806802277e-06, "loss": 0.0055, "step": 48500 }, { "epoch": 1.443470757145195, "grad_norm": 0.6395346522331238, "learning_rate": 6.1899054429880214e-06, "loss": 0.0043, "step": 48510 }, { "epoch": 1.4437683186288366, "grad_norm": 0.3219303786754608, "learning_rate": 6.188223936529952e-06, "loss": 0.007, "step": 48520 }, { "epoch": 1.4440658801124782, "grad_norm": 0.3137677311897278, "learning_rate": 6.18654228762965e-06, "loss": 0.0039, "step": 48530 }, { "epoch": 1.4443634415961197, "grad_norm": 0.09710780531167984, "learning_rate": 6.184860496488705e-06, "loss": 0.0028, "step": 48540 }, { "epoch": 1.4446610030797613, "grad_norm": 0.16972564160823822, "learning_rate": 6.183178563308731e-06, "loss": 0.0029, "step": 48550 }, { "epoch": 1.4449585645634029, "grad_norm": 0.31116703152656555, "learning_rate": 6.18149648829135e-06, "loss": 0.0046, "step": 48560 }, { "epoch": 1.4452561260470445, "grad_norm": 0.16455107927322388, "learning_rate": 6.179814271638212e-06, "loss": 0.0065, "step": 48570 }, { "epoch": 1.445553687530686, "grad_norm": 0.03675078973174095, "learning_rate": 6.178131913550972e-06, "loss": 0.0035, "step": 48580 }, { "epoch": 1.4458512490143276, "grad_norm": 0.031090758740901947, "learning_rate": 6.176449414231315e-06, "loss": 0.003, "step": 48590 }, { "epoch": 1.4461488104979692, "grad_norm": 0.27938929200172424, "learning_rate": 6.1747667738809314e-06, "loss": 0.0055, "step": 48600 }, { "epoch": 1.4464463719816107, "grad_norm": 0.2611536383628845, "learning_rate": 6.173083992701536e-06, "loss": 0.0045, "step": 48610 }, { "epoch": 1.4467439334652523, "grad_norm": 0.3601819574832916, "learning_rate": 6.171401070894855e-06, "loss": 0.0043, "step": 48620 }, { "epoch": 1.447041494948894, "grad_norm": 0.23718290030956268, "learning_rate": 6.169718008662638e-06, "loss": 0.0038, "step": 48630 }, { "epoch": 1.4473390564325355, "grad_norm": 0.07924366742372513, "learning_rate": 6.168034806206643e-06, "loss": 0.0028, "step": 48640 }, { "epoch": 1.447636617916177, "grad_norm": 0.1710876077413559, "learning_rate": 6.166351463728651e-06, "loss": 0.0051, "step": 48650 }, { "epoch": 1.4479341793998186, "grad_norm": 0.28919684886932373, "learning_rate": 6.16466798143046e-06, "loss": 0.0047, "step": 48660 }, { "epoch": 1.4482317408834602, "grad_norm": 0.14124077558517456, "learning_rate": 6.16298435951388e-06, "loss": 0.0044, "step": 48670 }, { "epoch": 1.4485293023671015, "grad_norm": 0.2761612832546234, "learning_rate": 6.161300598180745e-06, "loss": 0.0045, "step": 48680 }, { "epoch": 1.448826863850743, "grad_norm": 0.5809240341186523, "learning_rate": 6.1596166976328964e-06, "loss": 0.0046, "step": 48690 }, { "epoch": 1.4491244253343847, "grad_norm": 0.05817806348204613, "learning_rate": 6.157932658072201e-06, "loss": 0.0053, "step": 48700 }, { "epoch": 1.4494219868180263, "grad_norm": 0.1908450424671173, "learning_rate": 6.156248479700536e-06, "loss": 0.003, "step": 48710 }, { "epoch": 1.4497195483016678, "grad_norm": 0.19600456953048706, "learning_rate": 6.154564162719799e-06, "loss": 0.0065, "step": 48720 }, { "epoch": 1.4500171097853094, "grad_norm": 0.2280735820531845, "learning_rate": 6.152879707331905e-06, "loss": 0.0055, "step": 48730 }, { "epoch": 1.450314671268951, "grad_norm": 0.17194055020809174, "learning_rate": 6.1511951137387805e-06, "loss": 0.0043, "step": 48740 }, { "epoch": 1.4506122327525925, "grad_norm": 0.27164316177368164, "learning_rate": 6.149510382142372e-06, "loss": 0.0043, "step": 48750 }, { "epoch": 1.4509097942362341, "grad_norm": 0.08214712142944336, "learning_rate": 6.147825512744644e-06, "loss": 0.0043, "step": 48760 }, { "epoch": 1.4512073557198757, "grad_norm": 0.17753048241138458, "learning_rate": 6.1461405057475745e-06, "loss": 0.0036, "step": 48770 }, { "epoch": 1.451504917203517, "grad_norm": 0.2898969054222107, "learning_rate": 6.14445536135316e-06, "loss": 0.0046, "step": 48780 }, { "epoch": 1.4518024786871586, "grad_norm": 0.05331593006849289, "learning_rate": 6.142770079763412e-06, "loss": 0.0025, "step": 48790 }, { "epoch": 1.4521000401708002, "grad_norm": 0.34693795442581177, "learning_rate": 6.1410846611803576e-06, "loss": 0.0034, "step": 48800 }, { "epoch": 1.4523976016544418, "grad_norm": 0.05700208246707916, "learning_rate": 6.1393991058060455e-06, "loss": 0.0033, "step": 48810 }, { "epoch": 1.4526951631380833, "grad_norm": 0.05060836300253868, "learning_rate": 6.137713413842534e-06, "loss": 0.0033, "step": 48820 }, { "epoch": 1.452992724621725, "grad_norm": 0.2942027747631073, "learning_rate": 6.136027585491904e-06, "loss": 0.0044, "step": 48830 }, { "epoch": 1.4532902861053665, "grad_norm": 0.33531415462493896, "learning_rate": 6.1343416209562465e-06, "loss": 0.007, "step": 48840 }, { "epoch": 1.453587847589008, "grad_norm": 0.1845424920320511, "learning_rate": 6.132655520437674e-06, "loss": 0.0043, "step": 48850 }, { "epoch": 1.4538854090726496, "grad_norm": 0.35381484031677246, "learning_rate": 6.1309692841383105e-06, "loss": 0.0037, "step": 48860 }, { "epoch": 1.4541829705562912, "grad_norm": 0.1855231076478958, "learning_rate": 6.129282912260305e-06, "loss": 0.0033, "step": 48870 }, { "epoch": 1.4544805320399328, "grad_norm": 0.19103747606277466, "learning_rate": 6.12759640500581e-06, "loss": 0.0042, "step": 48880 }, { "epoch": 1.4547780935235743, "grad_norm": 0.11118438839912415, "learning_rate": 6.125909762577007e-06, "loss": 0.0033, "step": 48890 }, { "epoch": 1.455075655007216, "grad_norm": 0.022404802963137627, "learning_rate": 6.1242229851760825e-06, "loss": 0.004, "step": 48900 }, { "epoch": 1.4553732164908575, "grad_norm": 0.1381920725107193, "learning_rate": 6.12253607300525e-06, "loss": 0.0047, "step": 48910 }, { "epoch": 1.455670777974499, "grad_norm": 0.05663410201668739, "learning_rate": 6.120849026266728e-06, "loss": 0.0031, "step": 48920 }, { "epoch": 1.4559683394581406, "grad_norm": 0.4665980935096741, "learning_rate": 6.119161845162761e-06, "loss": 0.0054, "step": 48930 }, { "epoch": 1.4562659009417822, "grad_norm": 0.18903768062591553, "learning_rate": 6.1174745298956015e-06, "loss": 0.0055, "step": 48940 }, { "epoch": 1.4565634624254238, "grad_norm": 0.1295923888683319, "learning_rate": 6.115787080667527e-06, "loss": 0.006, "step": 48950 }, { "epoch": 1.4568610239090651, "grad_norm": 0.19667726755142212, "learning_rate": 6.114099497680821e-06, "loss": 0.0034, "step": 48960 }, { "epoch": 1.4571585853927067, "grad_norm": 0.18900389969348907, "learning_rate": 6.112411781137792e-06, "loss": 0.0037, "step": 48970 }, { "epoch": 1.4574561468763483, "grad_norm": 0.04961364343762398, "learning_rate": 6.110723931240758e-06, "loss": 0.0043, "step": 48980 }, { "epoch": 1.4577537083599899, "grad_norm": 0.2532874345779419, "learning_rate": 6.109035948192056e-06, "loss": 0.0037, "step": 48990 }, { "epoch": 1.4580512698436314, "grad_norm": 0.26777389645576477, "learning_rate": 6.107347832194038e-06, "loss": 0.0079, "step": 49000 }, { "epoch": 1.458348831327273, "grad_norm": 0.103674978017807, "learning_rate": 6.105659583449074e-06, "loss": 0.0036, "step": 49010 }, { "epoch": 1.4586463928109146, "grad_norm": 0.17097461223602295, "learning_rate": 6.103971202159549e-06, "loss": 0.0062, "step": 49020 }, { "epoch": 1.4589439542945561, "grad_norm": 0.14668309688568115, "learning_rate": 6.10228268852786e-06, "loss": 0.0059, "step": 49030 }, { "epoch": 1.4592415157781977, "grad_norm": 0.21018825471401215, "learning_rate": 6.100594042756426e-06, "loss": 0.0035, "step": 49040 }, { "epoch": 1.4595390772618393, "grad_norm": 0.27930039167404175, "learning_rate": 6.098905265047676e-06, "loss": 0.0052, "step": 49050 }, { "epoch": 1.4598366387454809, "grad_norm": 0.35884493589401245, "learning_rate": 6.097216355604062e-06, "loss": 0.0032, "step": 49060 }, { "epoch": 1.4601342002291222, "grad_norm": 0.12207792699337006, "learning_rate": 6.095527314628044e-06, "loss": 0.0031, "step": 49070 }, { "epoch": 1.4604317617127638, "grad_norm": 0.10741178691387177, "learning_rate": 6.0938381423221026e-06, "loss": 0.0033, "step": 49080 }, { "epoch": 1.4607293231964054, "grad_norm": 0.12932686507701874, "learning_rate": 6.0921488388887315e-06, "loss": 0.005, "step": 49090 }, { "epoch": 1.461026884680047, "grad_norm": 0.17340537905693054, "learning_rate": 6.090459404530445e-06, "loss": 0.003, "step": 49100 }, { "epoch": 1.4613244461636885, "grad_norm": 0.09776054322719574, "learning_rate": 6.0887698394497665e-06, "loss": 0.0143, "step": 49110 }, { "epoch": 1.46162200764733, "grad_norm": 0.05732797831296921, "learning_rate": 6.087080143849238e-06, "loss": 0.0027, "step": 49120 }, { "epoch": 1.4619195691309717, "grad_norm": 0.0686463788151741, "learning_rate": 6.085390317931419e-06, "loss": 0.0041, "step": 49130 }, { "epoch": 1.4622171306146132, "grad_norm": 0.25813889503479004, "learning_rate": 6.083700361898882e-06, "loss": 0.0033, "step": 49140 }, { "epoch": 1.4625146920982548, "grad_norm": 0.3134388029575348, "learning_rate": 6.082010275954215e-06, "loss": 0.0062, "step": 49150 }, { "epoch": 1.4628122535818964, "grad_norm": 0.1404409259557724, "learning_rate": 6.0803200603000245e-06, "loss": 0.0038, "step": 49160 }, { "epoch": 1.463109815065538, "grad_norm": 0.08737575262784958, "learning_rate": 6.07862971513893e-06, "loss": 0.0032, "step": 49170 }, { "epoch": 1.4634073765491795, "grad_norm": 0.24938873946666718, "learning_rate": 6.076939240673568e-06, "loss": 0.0072, "step": 49180 }, { "epoch": 1.463704938032821, "grad_norm": 0.16829030215740204, "learning_rate": 6.075248637106588e-06, "loss": 0.0051, "step": 49190 }, { "epoch": 1.4640024995164627, "grad_norm": 0.15085352957248688, "learning_rate": 6.073557904640657e-06, "loss": 0.0041, "step": 49200 }, { "epoch": 1.4643000610001042, "grad_norm": 0.4795553982257843, "learning_rate": 6.071867043478458e-06, "loss": 0.006, "step": 49210 }, { "epoch": 1.4645976224837458, "grad_norm": 0.18942245841026306, "learning_rate": 6.070176053822687e-06, "loss": 0.0039, "step": 49220 }, { "epoch": 1.4648951839673874, "grad_norm": 0.14126892387866974, "learning_rate": 6.068484935876059e-06, "loss": 0.0035, "step": 49230 }, { "epoch": 1.465192745451029, "grad_norm": 0.12291204184293747, "learning_rate": 6.066793689841301e-06, "loss": 0.0048, "step": 49240 }, { "epoch": 1.4654903069346703, "grad_norm": 0.12055239826440811, "learning_rate": 6.065102315921157e-06, "loss": 0.0045, "step": 49250 }, { "epoch": 1.4657878684183119, "grad_norm": 0.21448035538196564, "learning_rate": 6.0634108143183835e-06, "loss": 0.0052, "step": 49260 }, { "epoch": 1.4660854299019535, "grad_norm": 0.08057031780481339, "learning_rate": 6.061719185235759e-06, "loss": 0.006, "step": 49270 }, { "epoch": 1.466382991385595, "grad_norm": 0.13882304728031158, "learning_rate": 6.0600274288760705e-06, "loss": 0.0043, "step": 49280 }, { "epoch": 1.4666805528692366, "grad_norm": 0.3207781910896301, "learning_rate": 6.058335545442123e-06, "loss": 0.0036, "step": 49290 }, { "epoch": 1.4669781143528782, "grad_norm": 0.06759500503540039, "learning_rate": 6.056643535136739e-06, "loss": 0.0049, "step": 49300 }, { "epoch": 1.4672756758365197, "grad_norm": 0.20114627480506897, "learning_rate": 6.05495139816275e-06, "loss": 0.0038, "step": 49310 }, { "epoch": 1.4675732373201613, "grad_norm": 0.09368225187063217, "learning_rate": 6.053259134723011e-06, "loss": 0.0026, "step": 49320 }, { "epoch": 1.467870798803803, "grad_norm": 0.1816040277481079, "learning_rate": 6.051566745020382e-06, "loss": 0.0055, "step": 49330 }, { "epoch": 1.4681683602874445, "grad_norm": 0.09374180436134338, "learning_rate": 6.049874229257749e-06, "loss": 0.0031, "step": 49340 }, { "epoch": 1.4684659217710858, "grad_norm": 0.21813732385635376, "learning_rate": 6.048181587638002e-06, "loss": 0.0045, "step": 49350 }, { "epoch": 1.4687634832547274, "grad_norm": 0.08967921137809753, "learning_rate": 6.046488820364059e-06, "loss": 0.0027, "step": 49360 }, { "epoch": 1.469061044738369, "grad_norm": 0.1271364390850067, "learning_rate": 6.04479592763884e-06, "loss": 0.0043, "step": 49370 }, { "epoch": 1.4693586062220105, "grad_norm": 0.2945977747440338, "learning_rate": 6.04310290966529e-06, "loss": 0.0055, "step": 49380 }, { "epoch": 1.469656167705652, "grad_norm": 0.3013438582420349, "learning_rate": 6.041409766646364e-06, "loss": 0.0053, "step": 49390 }, { "epoch": 1.4699537291892937, "grad_norm": 0.23834170401096344, "learning_rate": 6.039716498785032e-06, "loss": 0.0049, "step": 49400 }, { "epoch": 1.4702512906729353, "grad_norm": 0.09874951094388962, "learning_rate": 6.0380231062842806e-06, "loss": 0.0026, "step": 49410 }, { "epoch": 1.4705488521565768, "grad_norm": 0.18028371036052704, "learning_rate": 6.036329589347112e-06, "loss": 0.0044, "step": 49420 }, { "epoch": 1.4708464136402184, "grad_norm": 0.4009816646575928, "learning_rate": 6.03463594817654e-06, "loss": 0.0041, "step": 49430 }, { "epoch": 1.47114397512386, "grad_norm": 0.15772970020771027, "learning_rate": 6.032942182975597e-06, "loss": 0.0043, "step": 49440 }, { "epoch": 1.4714415366075015, "grad_norm": 0.23155473172664642, "learning_rate": 6.031248293947327e-06, "loss": 0.0029, "step": 49450 }, { "epoch": 1.4717390980911431, "grad_norm": 0.2989043593406677, "learning_rate": 6.0295542812947915e-06, "loss": 0.0075, "step": 49460 }, { "epoch": 1.4720366595747847, "grad_norm": 0.13166525959968567, "learning_rate": 6.027860145221067e-06, "loss": 0.0056, "step": 49470 }, { "epoch": 1.4723342210584263, "grad_norm": 0.2503198981285095, "learning_rate": 6.0261658859292415e-06, "loss": 0.0046, "step": 49480 }, { "epoch": 1.4726317825420678, "grad_norm": 0.11772693693637848, "learning_rate": 6.024471503622422e-06, "loss": 0.0047, "step": 49490 }, { "epoch": 1.4729293440257094, "grad_norm": 0.16071945428848267, "learning_rate": 6.022776998503725e-06, "loss": 0.0046, "step": 49500 }, { "epoch": 1.473226905509351, "grad_norm": 0.46126505732536316, "learning_rate": 6.021082370776289e-06, "loss": 0.0042, "step": 49510 }, { "epoch": 1.4735244669929926, "grad_norm": 0.1361508071422577, "learning_rate": 6.019387620643259e-06, "loss": 0.0031, "step": 49520 }, { "epoch": 1.473822028476634, "grad_norm": 0.5279197096824646, "learning_rate": 6.017692748307802e-06, "loss": 0.0067, "step": 49530 }, { "epoch": 1.4741195899602755, "grad_norm": 0.14672808349132538, "learning_rate": 6.015997753973093e-06, "loss": 0.0033, "step": 49540 }, { "epoch": 1.474417151443917, "grad_norm": 0.11429179459810257, "learning_rate": 6.014302637842329e-06, "loss": 0.0062, "step": 49550 }, { "epoch": 1.4747147129275586, "grad_norm": 0.08217284083366394, "learning_rate": 6.012607400118714e-06, "loss": 0.0036, "step": 49560 }, { "epoch": 1.4750122744112002, "grad_norm": 0.2519383430480957, "learning_rate": 6.010912041005472e-06, "loss": 0.0028, "step": 49570 }, { "epoch": 1.4753098358948418, "grad_norm": 0.26206493377685547, "learning_rate": 6.009216560705841e-06, "loss": 0.0035, "step": 49580 }, { "epoch": 1.4756073973784833, "grad_norm": 0.3074917495250702, "learning_rate": 6.007520959423068e-06, "loss": 0.0033, "step": 49590 }, { "epoch": 1.475904958862125, "grad_norm": 0.2714962363243103, "learning_rate": 6.0058252373604245e-06, "loss": 0.004, "step": 49600 }, { "epoch": 1.4762025203457665, "grad_norm": 0.10979459434747696, "learning_rate": 6.004129394721185e-06, "loss": 0.0037, "step": 49610 }, { "epoch": 1.476500081829408, "grad_norm": 0.1563170850276947, "learning_rate": 6.002433431708648e-06, "loss": 0.0061, "step": 49620 }, { "epoch": 1.4767976433130496, "grad_norm": 0.10654307901859283, "learning_rate": 6.000737348526121e-06, "loss": 0.0081, "step": 49630 }, { "epoch": 1.477095204796691, "grad_norm": 0.09814513474702835, "learning_rate": 5.999041145376929e-06, "loss": 0.0038, "step": 49640 }, { "epoch": 1.4773927662803326, "grad_norm": 0.359336793422699, "learning_rate": 5.997344822464408e-06, "loss": 0.0053, "step": 49650 }, { "epoch": 1.4776903277639741, "grad_norm": 0.18103483319282532, "learning_rate": 5.995648379991912e-06, "loss": 0.0051, "step": 49660 }, { "epoch": 1.4779878892476157, "grad_norm": 0.4783105254173279, "learning_rate": 5.993951818162806e-06, "loss": 0.0056, "step": 49670 }, { "epoch": 1.4782854507312573, "grad_norm": 0.06045113876461983, "learning_rate": 5.992255137180472e-06, "loss": 0.0029, "step": 49680 }, { "epoch": 1.4785830122148989, "grad_norm": 0.054252129048109055, "learning_rate": 5.990558337248303e-06, "loss": 0.0058, "step": 49690 }, { "epoch": 1.4788805736985404, "grad_norm": 0.15868154168128967, "learning_rate": 5.9888614185697135e-06, "loss": 0.0093, "step": 49700 }, { "epoch": 1.479178135182182, "grad_norm": 0.058791596442461014, "learning_rate": 5.987164381348121e-06, "loss": 0.0028, "step": 49710 }, { "epoch": 1.4794756966658236, "grad_norm": 0.1948658674955368, "learning_rate": 5.985467225786969e-06, "loss": 0.0029, "step": 49720 }, { "epoch": 1.4797732581494651, "grad_norm": 0.08288059383630753, "learning_rate": 5.983769952089704e-06, "loss": 0.0037, "step": 49730 }, { "epoch": 1.4800708196331067, "grad_norm": 0.3540458083152771, "learning_rate": 5.982072560459798e-06, "loss": 0.0066, "step": 49740 }, { "epoch": 1.4803683811167483, "grad_norm": 0.21332131326198578, "learning_rate": 5.980375051100727e-06, "loss": 0.0035, "step": 49750 }, { "epoch": 1.4806659426003899, "grad_norm": 0.11538995802402496, "learning_rate": 5.978677424215987e-06, "loss": 0.0045, "step": 49760 }, { "epoch": 1.4809635040840314, "grad_norm": 0.16307908296585083, "learning_rate": 5.976979680009088e-06, "loss": 0.0027, "step": 49770 }, { "epoch": 1.481261065567673, "grad_norm": 0.15618926286697388, "learning_rate": 5.975281818683551e-06, "loss": 0.004, "step": 49780 }, { "epoch": 1.4815586270513146, "grad_norm": 0.06751128286123276, "learning_rate": 5.973583840442912e-06, "loss": 0.0148, "step": 49790 }, { "epoch": 1.4818561885349562, "grad_norm": 0.29363593459129333, "learning_rate": 5.971885745490723e-06, "loss": 0.0064, "step": 49800 }, { "epoch": 1.4821537500185977, "grad_norm": 0.1908939778804779, "learning_rate": 5.970187534030549e-06, "loss": 0.0026, "step": 49810 }, { "epoch": 1.482451311502239, "grad_norm": 0.010413913056254387, "learning_rate": 5.968489206265967e-06, "loss": 0.0033, "step": 49820 }, { "epoch": 1.4827488729858807, "grad_norm": 0.1194826290011406, "learning_rate": 5.9667907624005716e-06, "loss": 0.0033, "step": 49830 }, { "epoch": 1.4830464344695222, "grad_norm": 0.3088236451148987, "learning_rate": 5.965092202637968e-06, "loss": 0.0056, "step": 49840 }, { "epoch": 1.4833439959531638, "grad_norm": 0.16464851796627045, "learning_rate": 5.963393527181775e-06, "loss": 0.0053, "step": 49850 }, { "epoch": 1.4836415574368054, "grad_norm": 0.32615670561790466, "learning_rate": 5.961694736235631e-06, "loss": 0.0064, "step": 49860 }, { "epoch": 1.483939118920447, "grad_norm": 0.21547943353652954, "learning_rate": 5.9599958300031815e-06, "loss": 0.005, "step": 49870 }, { "epoch": 1.4842366804040885, "grad_norm": 0.06413677334785461, "learning_rate": 5.9582968086880885e-06, "loss": 0.0032, "step": 49880 }, { "epoch": 1.48453424188773, "grad_norm": 0.10422787815332413, "learning_rate": 5.9565976724940285e-06, "loss": 0.0043, "step": 49890 }, { "epoch": 1.4848318033713717, "grad_norm": 0.19322383403778076, "learning_rate": 5.954898421624691e-06, "loss": 0.0037, "step": 49900 }, { "epoch": 1.4851293648550132, "grad_norm": 0.20465072989463806, "learning_rate": 5.95319905628378e-06, "loss": 0.0047, "step": 49910 }, { "epoch": 1.4854269263386548, "grad_norm": 0.0906219556927681, "learning_rate": 5.951499576675008e-06, "loss": 0.0047, "step": 49920 }, { "epoch": 1.4857244878222962, "grad_norm": 0.23700156807899475, "learning_rate": 5.949799983002112e-06, "loss": 0.0104, "step": 49930 }, { "epoch": 1.4860220493059377, "grad_norm": 0.12667696177959442, "learning_rate": 5.9481002754688345e-06, "loss": 0.0039, "step": 49940 }, { "epoch": 1.4863196107895793, "grad_norm": 0.1102672815322876, "learning_rate": 5.94640045427893e-06, "loss": 0.0051, "step": 49950 }, { "epoch": 1.4866171722732209, "grad_norm": 0.15366458892822266, "learning_rate": 5.944700519636173e-06, "loss": 0.0053, "step": 49960 }, { "epoch": 1.4869147337568625, "grad_norm": 0.4416714012622833, "learning_rate": 5.943000471744348e-06, "loss": 0.0072, "step": 49970 }, { "epoch": 1.487212295240504, "grad_norm": 0.13284343481063843, "learning_rate": 5.941300310807255e-06, "loss": 0.0037, "step": 49980 }, { "epoch": 1.4875098567241456, "grad_norm": 0.19346125423908234, "learning_rate": 5.939600037028706e-06, "loss": 0.0046, "step": 49990 }, { "epoch": 1.4878074182077872, "grad_norm": 0.33618247509002686, "learning_rate": 5.937899650612524e-06, "loss": 0.0059, "step": 50000 }, { "epoch": 1.4878074182077872, "eval_loss": 0.003639652393758297, "eval_runtime": 3.9212, "eval_samples_per_second": 51.005, "eval_steps_per_second": 12.751, "step": 50000 }, { "epoch": 1.4881049796914287, "grad_norm": 0.12152113765478134, "learning_rate": 5.936199151762552e-06, "loss": 0.0037, "step": 50010 }, { "epoch": 1.4884025411750703, "grad_norm": 0.1371675282716751, "learning_rate": 5.9344985406826415e-06, "loss": 0.0103, "step": 50020 }, { "epoch": 1.4887001026587119, "grad_norm": 0.14642685651779175, "learning_rate": 5.93279781757666e-06, "loss": 0.0052, "step": 50030 }, { "epoch": 1.4889976641423535, "grad_norm": 0.2007470726966858, "learning_rate": 5.931096982648485e-06, "loss": 0.0049, "step": 50040 }, { "epoch": 1.489295225625995, "grad_norm": 0.19101084768772125, "learning_rate": 5.929396036102011e-06, "loss": 0.0031, "step": 50050 }, { "epoch": 1.4895927871096366, "grad_norm": 0.25893107056617737, "learning_rate": 5.9276949781411456e-06, "loss": 0.0053, "step": 50060 }, { "epoch": 1.4898903485932782, "grad_norm": 0.13887573778629303, "learning_rate": 5.925993808969805e-06, "loss": 0.0045, "step": 50070 }, { "epoch": 1.4901879100769198, "grad_norm": 0.12901993095874786, "learning_rate": 5.9242925287919286e-06, "loss": 0.0064, "step": 50080 }, { "epoch": 1.4904854715605613, "grad_norm": 0.3558916449546814, "learning_rate": 5.922591137811456e-06, "loss": 0.0044, "step": 50090 }, { "epoch": 1.4907830330442027, "grad_norm": 0.069380983710289, "learning_rate": 5.920889636232352e-06, "loss": 0.0031, "step": 50100 }, { "epoch": 1.4910805945278442, "grad_norm": 0.10453063249588013, "learning_rate": 5.919188024258586e-06, "loss": 0.0042, "step": 50110 }, { "epoch": 1.4913781560114858, "grad_norm": 0.2262493520975113, "learning_rate": 5.917486302094149e-06, "loss": 0.0036, "step": 50120 }, { "epoch": 1.4916757174951274, "grad_norm": 0.10258586704730988, "learning_rate": 5.915784469943037e-06, "loss": 0.0016, "step": 50130 }, { "epoch": 1.491973278978769, "grad_norm": 0.7102752923965454, "learning_rate": 5.914082528009261e-06, "loss": 0.0031, "step": 50140 }, { "epoch": 1.4922708404624105, "grad_norm": 0.16674792766571045, "learning_rate": 5.91238047649685e-06, "loss": 0.0059, "step": 50150 }, { "epoch": 1.4925684019460521, "grad_norm": 0.07898742705583572, "learning_rate": 5.910678315609844e-06, "loss": 0.0032, "step": 50160 }, { "epoch": 1.4928659634296937, "grad_norm": 0.21384473145008087, "learning_rate": 5.9089760455522925e-06, "loss": 0.0037, "step": 50170 }, { "epoch": 1.4931635249133353, "grad_norm": 0.08274625241756439, "learning_rate": 5.9072736665282616e-06, "loss": 0.0054, "step": 50180 }, { "epoch": 1.4934610863969768, "grad_norm": 0.7939322590827942, "learning_rate": 5.9055711787418316e-06, "loss": 0.0039, "step": 50190 }, { "epoch": 1.4937586478806184, "grad_norm": 0.08904176950454712, "learning_rate": 5.903868582397088e-06, "loss": 0.0046, "step": 50200 }, { "epoch": 1.4940562093642598, "grad_norm": 0.07248519361019135, "learning_rate": 5.902165877698141e-06, "loss": 0.0061, "step": 50210 }, { "epoch": 1.4943537708479013, "grad_norm": 0.5712917447090149, "learning_rate": 5.900463064849106e-06, "loss": 0.0033, "step": 50220 }, { "epoch": 1.494651332331543, "grad_norm": 0.15567952394485474, "learning_rate": 5.898760144054113e-06, "loss": 0.0036, "step": 50230 }, { "epoch": 1.4949488938151845, "grad_norm": 0.5165019035339355, "learning_rate": 5.897057115517304e-06, "loss": 0.0042, "step": 50240 }, { "epoch": 1.495246455298826, "grad_norm": 0.1305946558713913, "learning_rate": 5.8953539794428385e-06, "loss": 0.0038, "step": 50250 }, { "epoch": 1.4955440167824676, "grad_norm": 0.1569116860628128, "learning_rate": 5.893650736034882e-06, "loss": 0.0042, "step": 50260 }, { "epoch": 1.4958415782661092, "grad_norm": 0.5233315229415894, "learning_rate": 5.891947385497617e-06, "loss": 0.004, "step": 50270 }, { "epoch": 1.4961391397497508, "grad_norm": 0.13975922763347626, "learning_rate": 5.89024392803524e-06, "loss": 0.0063, "step": 50280 }, { "epoch": 1.4964367012333923, "grad_norm": 0.30691349506378174, "learning_rate": 5.888540363851956e-06, "loss": 0.0067, "step": 50290 }, { "epoch": 1.496734262717034, "grad_norm": 0.17468902468681335, "learning_rate": 5.886836693151989e-06, "loss": 0.0042, "step": 50300 }, { "epoch": 1.4970318242006755, "grad_norm": 0.18004870414733887, "learning_rate": 5.885132916139568e-06, "loss": 0.0031, "step": 50310 }, { "epoch": 1.497329385684317, "grad_norm": 0.20087267458438873, "learning_rate": 5.883429033018944e-06, "loss": 0.0039, "step": 50320 }, { "epoch": 1.4976269471679586, "grad_norm": 0.1658814549446106, "learning_rate": 5.8817250439943695e-06, "loss": 0.0052, "step": 50330 }, { "epoch": 1.4979245086516002, "grad_norm": 0.10822619497776031, "learning_rate": 5.88002094927012e-06, "loss": 0.0085, "step": 50340 }, { "epoch": 1.4982220701352418, "grad_norm": 0.11288507282733917, "learning_rate": 5.878316749050477e-06, "loss": 0.0043, "step": 50350 }, { "epoch": 1.4985196316188834, "grad_norm": 0.19884401559829712, "learning_rate": 5.876612443539739e-06, "loss": 0.0048, "step": 50360 }, { "epoch": 1.498817193102525, "grad_norm": 0.10863658040761948, "learning_rate": 5.874908032942214e-06, "loss": 0.0024, "step": 50370 }, { "epoch": 1.4991147545861665, "grad_norm": 0.28060582280158997, "learning_rate": 5.873203517462225e-06, "loss": 0.0032, "step": 50380 }, { "epoch": 1.4994123160698078, "grad_norm": 0.24486330151557922, "learning_rate": 5.871498897304103e-06, "loss": 0.0036, "step": 50390 }, { "epoch": 1.4997098775534494, "grad_norm": 0.1121179386973381, "learning_rate": 5.8697941726722e-06, "loss": 0.0091, "step": 50400 }, { "epoch": 1.500007439037091, "grad_norm": 0.24617184698581696, "learning_rate": 5.868089343770872e-06, "loss": 0.003, "step": 50410 }, { "epoch": 1.5003050005207326, "grad_norm": 0.15140999853610992, "learning_rate": 5.866384410804491e-06, "loss": 0.0069, "step": 50420 }, { "epoch": 1.5006025620043741, "grad_norm": 0.2610926926136017, "learning_rate": 5.864679373977443e-06, "loss": 0.0049, "step": 50430 }, { "epoch": 1.5009001234880157, "grad_norm": 0.07294842600822449, "learning_rate": 5.862974233494124e-06, "loss": 0.004, "step": 50440 }, { "epoch": 1.5011976849716573, "grad_norm": 0.14680978655815125, "learning_rate": 5.8612689895589445e-06, "loss": 0.0062, "step": 50450 }, { "epoch": 1.5014952464552989, "grad_norm": 0.3394315242767334, "learning_rate": 5.859563642376324e-06, "loss": 0.0034, "step": 50460 }, { "epoch": 1.5017928079389404, "grad_norm": 0.12803536653518677, "learning_rate": 5.857858192150698e-06, "loss": 0.0029, "step": 50470 }, { "epoch": 1.5020903694225818, "grad_norm": 0.3972744643688202, "learning_rate": 5.856152639086513e-06, "loss": 0.0052, "step": 50480 }, { "epoch": 1.5023879309062234, "grad_norm": 0.034096892923116684, "learning_rate": 5.8544469833882275e-06, "loss": 0.0056, "step": 50490 }, { "epoch": 1.502685492389865, "grad_norm": 0.20221100747585297, "learning_rate": 5.852741225260311e-06, "loss": 0.0045, "step": 50500 }, { "epoch": 1.5029830538735065, "grad_norm": 0.12006990611553192, "learning_rate": 5.8510353649072506e-06, "loss": 0.0036, "step": 50510 }, { "epoch": 1.503280615357148, "grad_norm": 0.07104434072971344, "learning_rate": 5.849329402533538e-06, "loss": 0.0058, "step": 50520 }, { "epoch": 1.5035781768407896, "grad_norm": 0.19052724540233612, "learning_rate": 5.847623338343683e-06, "loss": 0.0034, "step": 50530 }, { "epoch": 1.5038757383244312, "grad_norm": 0.3915010094642639, "learning_rate": 5.845917172542205e-06, "loss": 0.0036, "step": 50540 }, { "epoch": 1.5041732998080728, "grad_norm": 0.15829625725746155, "learning_rate": 5.844210905333637e-06, "loss": 0.0057, "step": 50550 }, { "epoch": 1.5044708612917144, "grad_norm": 0.04093565419316292, "learning_rate": 5.842504536922522e-06, "loss": 0.0054, "step": 50560 }, { "epoch": 1.504768422775356, "grad_norm": 0.24700655043125153, "learning_rate": 5.840798067513417e-06, "loss": 0.0029, "step": 50570 }, { "epoch": 1.5050659842589975, "grad_norm": 0.2798939347267151, "learning_rate": 5.839091497310892e-06, "loss": 0.0044, "step": 50580 }, { "epoch": 1.505363545742639, "grad_norm": 0.19739782810211182, "learning_rate": 5.837384826519527e-06, "loss": 0.0058, "step": 50590 }, { "epoch": 1.5056611072262807, "grad_norm": 0.10266823321580887, "learning_rate": 5.835678055343913e-06, "loss": 0.0033, "step": 50600 }, { "epoch": 1.5059586687099222, "grad_norm": 0.1466410756111145, "learning_rate": 5.833971183988657e-06, "loss": 0.0025, "step": 50610 }, { "epoch": 1.5062562301935638, "grad_norm": 0.21635082364082336, "learning_rate": 5.832264212658375e-06, "loss": 0.0055, "step": 50620 }, { "epoch": 1.5065537916772054, "grad_norm": 0.22265851497650146, "learning_rate": 5.830557141557696e-06, "loss": 0.0038, "step": 50630 }, { "epoch": 1.506851353160847, "grad_norm": 0.09767378866672516, "learning_rate": 5.82884997089126e-06, "loss": 0.0046, "step": 50640 }, { "epoch": 1.5071489146444885, "grad_norm": 0.17147234082221985, "learning_rate": 5.8271427008637195e-06, "loss": 0.0051, "step": 50650 }, { "epoch": 1.50744647612813, "grad_norm": 0.14313076436519623, "learning_rate": 5.825435331679741e-06, "loss": 0.0031, "step": 50660 }, { "epoch": 1.5077440376117717, "grad_norm": 0.12577661871910095, "learning_rate": 5.823727863543999e-06, "loss": 0.0034, "step": 50670 }, { "epoch": 1.5080415990954132, "grad_norm": 0.019219323992729187, "learning_rate": 5.822020296661182e-06, "loss": 0.0018, "step": 50680 }, { "epoch": 1.5083391605790546, "grad_norm": 0.22143130004405975, "learning_rate": 5.820312631235992e-06, "loss": 0.0034, "step": 50690 }, { "epoch": 1.5086367220626962, "grad_norm": 0.21042951941490173, "learning_rate": 5.818604867473139e-06, "loss": 0.0024, "step": 50700 }, { "epoch": 1.5089342835463377, "grad_norm": 0.04891940951347351, "learning_rate": 5.816897005577346e-06, "loss": 0.0026, "step": 50710 }, { "epoch": 1.5092318450299793, "grad_norm": 0.09657123684883118, "learning_rate": 5.815189045753351e-06, "loss": 0.0033, "step": 50720 }, { "epoch": 1.5095294065136209, "grad_norm": 0.15600889921188354, "learning_rate": 5.813480988205901e-06, "loss": 0.0026, "step": 50730 }, { "epoch": 1.5098269679972625, "grad_norm": 0.03915322199463844, "learning_rate": 5.811772833139754e-06, "loss": 0.0034, "step": 50740 }, { "epoch": 1.510124529480904, "grad_norm": 0.36136671900749207, "learning_rate": 5.810064580759681e-06, "loss": 0.0036, "step": 50750 }, { "epoch": 1.5104220909645454, "grad_norm": 0.39177361130714417, "learning_rate": 5.808356231270463e-06, "loss": 0.0073, "step": 50760 }, { "epoch": 1.510719652448187, "grad_norm": 0.12210454791784286, "learning_rate": 5.806647784876898e-06, "loss": 0.0037, "step": 50770 }, { "epoch": 1.5110172139318285, "grad_norm": 0.2496972233057022, "learning_rate": 5.8049392417837855e-06, "loss": 0.0039, "step": 50780 }, { "epoch": 1.51131477541547, "grad_norm": 0.17027316987514496, "learning_rate": 5.803230602195948e-06, "loss": 0.0028, "step": 50790 }, { "epoch": 1.5116123368991117, "grad_norm": 0.32548660039901733, "learning_rate": 5.801521866318213e-06, "loss": 0.0045, "step": 50800 }, { "epoch": 1.5119098983827532, "grad_norm": 0.19142359495162964, "learning_rate": 5.79981303435542e-06, "loss": 0.0031, "step": 50810 }, { "epoch": 1.5122074598663948, "grad_norm": 0.33260759711265564, "learning_rate": 5.798104106512419e-06, "loss": 0.0041, "step": 50820 }, { "epoch": 1.5125050213500364, "grad_norm": 0.057211730629205704, "learning_rate": 5.796395082994078e-06, "loss": 0.0025, "step": 50830 }, { "epoch": 1.512802582833678, "grad_norm": 0.18822340667247772, "learning_rate": 5.794685964005265e-06, "loss": 0.003, "step": 50840 }, { "epoch": 1.5131001443173195, "grad_norm": 0.16691260039806366, "learning_rate": 5.792976749750874e-06, "loss": 0.0056, "step": 50850 }, { "epoch": 1.5133977058009611, "grad_norm": 0.29186442494392395, "learning_rate": 5.791267440435797e-06, "loss": 0.0039, "step": 50860 }, { "epoch": 1.5136952672846027, "grad_norm": 0.010320492088794708, "learning_rate": 5.789558036264946e-06, "loss": 0.0035, "step": 50870 }, { "epoch": 1.5139928287682443, "grad_norm": 0.38235464692115784, "learning_rate": 5.787848537443239e-06, "loss": 0.0026, "step": 50880 }, { "epoch": 1.5142903902518858, "grad_norm": 0.11023290455341339, "learning_rate": 5.786138944175612e-06, "loss": 0.0092, "step": 50890 }, { "epoch": 1.5145879517355274, "grad_norm": 0.10104721039533615, "learning_rate": 5.784429256667003e-06, "loss": 0.0029, "step": 50900 }, { "epoch": 1.514885513219169, "grad_norm": 0.0751640796661377, "learning_rate": 5.78271947512237e-06, "loss": 0.004, "step": 50910 }, { "epoch": 1.5151830747028106, "grad_norm": 0.12327061593532562, "learning_rate": 5.781009599746677e-06, "loss": 0.0056, "step": 50920 }, { "epoch": 1.5154806361864521, "grad_norm": 0.15857523679733276, "learning_rate": 5.7792996307449015e-06, "loss": 0.003, "step": 50930 }, { "epoch": 1.5157781976700937, "grad_norm": 0.12778493762016296, "learning_rate": 5.777589568322031e-06, "loss": 0.0029, "step": 50940 }, { "epoch": 1.5160757591537353, "grad_norm": 0.4234083592891693, "learning_rate": 5.775879412683064e-06, "loss": 0.0042, "step": 50950 }, { "epoch": 1.5163733206373768, "grad_norm": 0.07862228900194168, "learning_rate": 5.774169164033014e-06, "loss": 0.0035, "step": 50960 }, { "epoch": 1.5166708821210184, "grad_norm": 0.1559690535068512, "learning_rate": 5.7724588225769e-06, "loss": 0.0069, "step": 50970 }, { "epoch": 1.5169684436046598, "grad_norm": 0.1740228533744812, "learning_rate": 5.770748388519757e-06, "loss": 0.0029, "step": 50980 }, { "epoch": 1.5172660050883013, "grad_norm": 0.23032434284687042, "learning_rate": 5.769037862066625e-06, "loss": 0.0038, "step": 50990 }, { "epoch": 1.517563566571943, "grad_norm": 0.24779780209064484, "learning_rate": 5.767327243422562e-06, "loss": 0.004, "step": 51000 }, { "epoch": 1.5178611280555845, "grad_norm": 0.21559113264083862, "learning_rate": 5.765616532792635e-06, "loss": 0.0041, "step": 51010 }, { "epoch": 1.518158689539226, "grad_norm": 0.0618782602250576, "learning_rate": 5.763905730381919e-06, "loss": 0.0034, "step": 51020 }, { "epoch": 1.5184562510228676, "grad_norm": 0.3642655313014984, "learning_rate": 5.7621948363955025e-06, "loss": 0.0052, "step": 51030 }, { "epoch": 1.5187538125065092, "grad_norm": 0.06381016969680786, "learning_rate": 5.760483851038485e-06, "loss": 0.0031, "step": 51040 }, { "epoch": 1.5190513739901506, "grad_norm": 0.17340096831321716, "learning_rate": 5.758772774515975e-06, "loss": 0.0077, "step": 51050 }, { "epoch": 1.5193489354737921, "grad_norm": 0.10655340552330017, "learning_rate": 5.757061607033097e-06, "loss": 0.0042, "step": 51060 }, { "epoch": 1.5196464969574337, "grad_norm": 0.07854633033275604, "learning_rate": 5.755350348794977e-06, "loss": 0.0049, "step": 51070 }, { "epoch": 1.5199440584410753, "grad_norm": 0.20664307475090027, "learning_rate": 5.753639000006763e-06, "loss": 0.0067, "step": 51080 }, { "epoch": 1.5202416199247168, "grad_norm": 0.3978140354156494, "learning_rate": 5.751927560873607e-06, "loss": 0.006, "step": 51090 }, { "epoch": 1.5205391814083584, "grad_norm": 0.23858818411827087, "learning_rate": 5.750216031600671e-06, "loss": 0.0055, "step": 51100 }, { "epoch": 1.520836742892, "grad_norm": 0.14702439308166504, "learning_rate": 5.748504412393133e-06, "loss": 0.004, "step": 51110 }, { "epoch": 1.5211343043756416, "grad_norm": 0.22397007048130035, "learning_rate": 5.746792703456177e-06, "loss": 0.0027, "step": 51120 }, { "epoch": 1.5214318658592831, "grad_norm": 0.21407632529735565, "learning_rate": 5.745080904995002e-06, "loss": 0.0024, "step": 51130 }, { "epoch": 1.5217294273429247, "grad_norm": 0.3989519774913788, "learning_rate": 5.743369017214812e-06, "loss": 0.0057, "step": 51140 }, { "epoch": 1.5220269888265663, "grad_norm": 0.18545490503311157, "learning_rate": 5.74165704032083e-06, "loss": 0.0065, "step": 51150 }, { "epoch": 1.5223245503102079, "grad_norm": 0.1691645234823227, "learning_rate": 5.7399449745182814e-06, "loss": 0.003, "step": 51160 }, { "epoch": 1.5226221117938494, "grad_norm": 0.30511632561683655, "learning_rate": 5.738232820012407e-06, "loss": 0.0038, "step": 51170 }, { "epoch": 1.522919673277491, "grad_norm": 0.16793791949748993, "learning_rate": 5.7365205770084556e-06, "loss": 0.0036, "step": 51180 }, { "epoch": 1.5232172347611326, "grad_norm": 0.21574869751930237, "learning_rate": 5.734808245711688e-06, "loss": 0.0038, "step": 51190 }, { "epoch": 1.5235147962447741, "grad_norm": 0.37076690793037415, "learning_rate": 5.7330958263273775e-06, "loss": 0.0037, "step": 51200 }, { "epoch": 1.5238123577284157, "grad_norm": 0.2058427929878235, "learning_rate": 5.731383319060805e-06, "loss": 0.0038, "step": 51210 }, { "epoch": 1.5241099192120573, "grad_norm": 0.12513470649719238, "learning_rate": 5.729670724117261e-06, "loss": 0.0054, "step": 51220 }, { "epoch": 1.5244074806956989, "grad_norm": 0.22531920671463013, "learning_rate": 5.727958041702051e-06, "loss": 0.0034, "step": 51230 }, { "epoch": 1.5247050421793404, "grad_norm": 0.12217914313077927, "learning_rate": 5.7262452720204866e-06, "loss": 0.0031, "step": 51240 }, { "epoch": 1.525002603662982, "grad_norm": 0.19161392748355865, "learning_rate": 5.724532415277895e-06, "loss": 0.005, "step": 51250 }, { "epoch": 1.5253001651466234, "grad_norm": 0.08564086258411407, "learning_rate": 5.7228194716796055e-06, "loss": 0.0036, "step": 51260 }, { "epoch": 1.525597726630265, "grad_norm": 0.17505796253681183, "learning_rate": 5.7211064414309646e-06, "loss": 0.0054, "step": 51270 }, { "epoch": 1.5258952881139065, "grad_norm": 0.3338027894496918, "learning_rate": 5.71939332473733e-06, "loss": 0.0042, "step": 51280 }, { "epoch": 1.526192849597548, "grad_norm": 0.21286742389202118, "learning_rate": 5.717680121804066e-06, "loss": 0.0037, "step": 51290 }, { "epoch": 1.5264904110811897, "grad_norm": 0.12392473220825195, "learning_rate": 5.715966832836547e-06, "loss": 0.0038, "step": 51300 }, { "epoch": 1.5267879725648312, "grad_norm": 0.20262670516967773, "learning_rate": 5.714253458040159e-06, "loss": 0.0046, "step": 51310 }, { "epoch": 1.5270855340484728, "grad_norm": 0.2029346376657486, "learning_rate": 5.7125399976203e-06, "loss": 0.0059, "step": 51320 }, { "epoch": 1.5273830955321144, "grad_norm": 0.106532983481884, "learning_rate": 5.7108264517823755e-06, "loss": 0.0031, "step": 51330 }, { "epoch": 1.5276806570157557, "grad_norm": 0.10211092233657837, "learning_rate": 5.709112820731804e-06, "loss": 0.006, "step": 51340 }, { "epoch": 1.5279782184993973, "grad_norm": 0.1310439109802246, "learning_rate": 5.70739910467401e-06, "loss": 0.004, "step": 51350 }, { "epoch": 1.5282757799830389, "grad_norm": 0.41871336102485657, "learning_rate": 5.705685303814434e-06, "loss": 0.0044, "step": 51360 }, { "epoch": 1.5285733414666804, "grad_norm": 0.23480679094791412, "learning_rate": 5.7039714183585205e-06, "loss": 0.004, "step": 51370 }, { "epoch": 1.528870902950322, "grad_norm": 0.13996422290802002, "learning_rate": 5.70225744851173e-06, "loss": 0.0041, "step": 51380 }, { "epoch": 1.5291684644339636, "grad_norm": 0.09949488937854767, "learning_rate": 5.700543394479528e-06, "loss": 0.0051, "step": 51390 }, { "epoch": 1.5294660259176052, "grad_norm": 0.2870221734046936, "learning_rate": 5.698829256467392e-06, "loss": 0.0044, "step": 51400 }, { "epoch": 1.5297635874012467, "grad_norm": 0.1595589518547058, "learning_rate": 5.697115034680811e-06, "loss": 0.0028, "step": 51410 }, { "epoch": 1.5300611488848883, "grad_norm": 0.35708603262901306, "learning_rate": 5.695400729325284e-06, "loss": 0.0059, "step": 51420 }, { "epoch": 1.5303587103685299, "grad_norm": 0.20823068916797638, "learning_rate": 5.693686340606318e-06, "loss": 0.0053, "step": 51430 }, { "epoch": 1.5306562718521715, "grad_norm": 0.4563087821006775, "learning_rate": 5.691971868729429e-06, "loss": 0.0045, "step": 51440 }, { "epoch": 1.530953833335813, "grad_norm": 0.18630509078502655, "learning_rate": 5.6902573139001496e-06, "loss": 0.0034, "step": 51450 }, { "epoch": 1.5312513948194546, "grad_norm": 0.18652044236660004, "learning_rate": 5.688542676324012e-06, "loss": 0.0039, "step": 51460 }, { "epoch": 1.5315489563030962, "grad_norm": 0.13742564618587494, "learning_rate": 5.686827956206569e-06, "loss": 0.0079, "step": 51470 }, { "epoch": 1.5318465177867377, "grad_norm": 0.272531121969223, "learning_rate": 5.6851131537533746e-06, "loss": 0.0025, "step": 51480 }, { "epoch": 1.5321440792703793, "grad_norm": 0.12200309336185455, "learning_rate": 5.6833982691699996e-06, "loss": 0.0055, "step": 51490 }, { "epoch": 1.532441640754021, "grad_norm": 0.10669661313295364, "learning_rate": 5.681683302662018e-06, "loss": 0.0041, "step": 51500 }, { "epoch": 1.5327392022376625, "grad_norm": 0.13879650831222534, "learning_rate": 5.67996825443502e-06, "loss": 0.0043, "step": 51510 }, { "epoch": 1.533036763721304, "grad_norm": 0.19435499608516693, "learning_rate": 5.678253124694599e-06, "loss": 0.0035, "step": 51520 }, { "epoch": 1.5333343252049456, "grad_norm": 0.09592799842357635, "learning_rate": 5.676537913646367e-06, "loss": 0.0031, "step": 51530 }, { "epoch": 1.5336318866885872, "grad_norm": 0.4074072241783142, "learning_rate": 5.674822621495935e-06, "loss": 0.0041, "step": 51540 }, { "epoch": 1.5339294481722285, "grad_norm": 0.16502636671066284, "learning_rate": 5.673107248448935e-06, "loss": 0.0033, "step": 51550 }, { "epoch": 1.53422700965587, "grad_norm": 0.2799740135669708, "learning_rate": 5.671391794710998e-06, "loss": 0.0045, "step": 51560 }, { "epoch": 1.5345245711395117, "grad_norm": 0.2596401870250702, "learning_rate": 5.669676260487771e-06, "loss": 0.0042, "step": 51570 }, { "epoch": 1.5348221326231533, "grad_norm": 0.12022741883993149, "learning_rate": 5.6679606459849105e-06, "loss": 0.0083, "step": 51580 }, { "epoch": 1.5351196941067948, "grad_norm": 0.08394128829240799, "learning_rate": 5.6662449514080795e-06, "loss": 0.0031, "step": 51590 }, { "epoch": 1.5354172555904364, "grad_norm": 0.12568779289722443, "learning_rate": 5.664529176962956e-06, "loss": 0.0037, "step": 51600 }, { "epoch": 1.535714817074078, "grad_norm": 0.19480633735656738, "learning_rate": 5.6628133228552204e-06, "loss": 0.0031, "step": 51610 }, { "epoch": 1.5360123785577193, "grad_norm": 0.11319985240697861, "learning_rate": 5.661097389290568e-06, "loss": 0.0025, "step": 51620 }, { "epoch": 1.536309940041361, "grad_norm": 0.13011614978313446, "learning_rate": 5.659381376474702e-06, "loss": 0.0046, "step": 51630 }, { "epoch": 1.5366075015250025, "grad_norm": 0.12054784595966339, "learning_rate": 5.657665284613334e-06, "loss": 0.0032, "step": 51640 }, { "epoch": 1.536905063008644, "grad_norm": 0.19208014011383057, "learning_rate": 5.655949113912187e-06, "loss": 0.0044, "step": 51650 }, { "epoch": 1.5372026244922856, "grad_norm": 0.19706737995147705, "learning_rate": 5.6542328645769936e-06, "loss": 0.0044, "step": 51660 }, { "epoch": 1.5375001859759272, "grad_norm": 0.18838195502758026, "learning_rate": 5.652516536813492e-06, "loss": 0.0042, "step": 51670 }, { "epoch": 1.5377977474595688, "grad_norm": 0.16395263373851776, "learning_rate": 5.650800130827436e-06, "loss": 0.0053, "step": 51680 }, { "epoch": 1.5380953089432103, "grad_norm": 0.13723447918891907, "learning_rate": 5.6490836468245815e-06, "loss": 0.0042, "step": 51690 }, { "epoch": 1.538392870426852, "grad_norm": 0.1412908434867859, "learning_rate": 5.6473670850107e-06, "loss": 0.0043, "step": 51700 }, { "epoch": 1.5386904319104935, "grad_norm": 0.12751729786396027, "learning_rate": 5.645650445591572e-06, "loss": 0.0043, "step": 51710 }, { "epoch": 1.538987993394135, "grad_norm": 0.06463716924190521, "learning_rate": 5.64393372877298e-06, "loss": 0.002, "step": 51720 }, { "epoch": 1.5392855548777766, "grad_norm": 0.32314780354499817, "learning_rate": 5.642216934760727e-06, "loss": 0.0057, "step": 51730 }, { "epoch": 1.5395831163614182, "grad_norm": 0.4292568564414978, "learning_rate": 5.640500063760615e-06, "loss": 0.0037, "step": 51740 }, { "epoch": 1.5398806778450598, "grad_norm": 0.18835465610027313, "learning_rate": 5.638783115978461e-06, "loss": 0.0046, "step": 51750 }, { "epoch": 1.5401782393287013, "grad_norm": 0.2029944658279419, "learning_rate": 5.637066091620089e-06, "loss": 0.0042, "step": 51760 }, { "epoch": 1.540475800812343, "grad_norm": 0.13935215771198273, "learning_rate": 5.635348990891334e-06, "loss": 0.0055, "step": 51770 }, { "epoch": 1.5407733622959845, "grad_norm": 0.007159961853176355, "learning_rate": 5.633631813998038e-06, "loss": 0.0018, "step": 51780 }, { "epoch": 1.541070923779626, "grad_norm": 0.2739007771015167, "learning_rate": 5.631914561146056e-06, "loss": 0.0035, "step": 51790 }, { "epoch": 1.5413684852632676, "grad_norm": 0.11505091935396194, "learning_rate": 5.630197232541245e-06, "loss": 0.0041, "step": 51800 }, { "epoch": 1.5416660467469092, "grad_norm": 0.21093231439590454, "learning_rate": 5.628479828389478e-06, "loss": 0.005, "step": 51810 }, { "epoch": 1.5419636082305508, "grad_norm": 0.3413921296596527, "learning_rate": 5.626762348896634e-06, "loss": 0.0034, "step": 51820 }, { "epoch": 1.5422611697141924, "grad_norm": 0.23547232151031494, "learning_rate": 5.625044794268603e-06, "loss": 0.0109, "step": 51830 }, { "epoch": 1.5425587311978337, "grad_norm": 0.1197640672326088, "learning_rate": 5.623327164711277e-06, "loss": 0.0037, "step": 51840 }, { "epoch": 1.5428562926814753, "grad_norm": 0.11505675315856934, "learning_rate": 5.621609460430569e-06, "loss": 0.0082, "step": 51850 }, { "epoch": 1.5431538541651169, "grad_norm": 0.12175139784812927, "learning_rate": 5.619891681632393e-06, "loss": 0.004, "step": 51860 }, { "epoch": 1.5434514156487584, "grad_norm": 0.0958792045712471, "learning_rate": 5.61817382852267e-06, "loss": 0.0034, "step": 51870 }, { "epoch": 1.5437489771324, "grad_norm": 0.15798503160476685, "learning_rate": 5.616455901307337e-06, "loss": 0.0046, "step": 51880 }, { "epoch": 1.5440465386160416, "grad_norm": 0.644140899181366, "learning_rate": 5.614737900192334e-06, "loss": 0.0033, "step": 51890 }, { "epoch": 1.5443441000996831, "grad_norm": 0.2750042974948883, "learning_rate": 5.613019825383613e-06, "loss": 0.004, "step": 51900 }, { "epoch": 1.5446416615833245, "grad_norm": 0.19288796186447144, "learning_rate": 5.611301677087132e-06, "loss": 0.0042, "step": 51910 }, { "epoch": 1.544939223066966, "grad_norm": 0.1275346875190735, "learning_rate": 5.609583455508862e-06, "loss": 0.0049, "step": 51920 }, { "epoch": 1.5452367845506076, "grad_norm": 0.17280012369155884, "learning_rate": 5.60786516085478e-06, "loss": 0.0048, "step": 51930 }, { "epoch": 1.5455343460342492, "grad_norm": 0.26886504888534546, "learning_rate": 5.6061467933308725e-06, "loss": 0.0044, "step": 51940 }, { "epoch": 1.5458319075178908, "grad_norm": 0.2344149500131607, "learning_rate": 5.604428353143131e-06, "loss": 0.0035, "step": 51950 }, { "epoch": 1.5461294690015324, "grad_norm": 0.24758194386959076, "learning_rate": 5.602709840497565e-06, "loss": 0.0057, "step": 51960 }, { "epoch": 1.546427030485174, "grad_norm": 0.06813642382621765, "learning_rate": 5.600991255600181e-06, "loss": 0.0043, "step": 51970 }, { "epoch": 1.5467245919688155, "grad_norm": 0.12155942618846893, "learning_rate": 5.599272598657003e-06, "loss": 0.0036, "step": 51980 }, { "epoch": 1.547022153452457, "grad_norm": 0.18144774436950684, "learning_rate": 5.597553869874059e-06, "loss": 0.0046, "step": 51990 }, { "epoch": 1.5473197149360987, "grad_norm": 0.2405378818511963, "learning_rate": 5.595835069457392e-06, "loss": 0.0053, "step": 52000 }, { "epoch": 1.5476172764197402, "grad_norm": 0.12346470355987549, "learning_rate": 5.594116197613043e-06, "loss": 0.0023, "step": 52010 }, { "epoch": 1.5479148379033818, "grad_norm": 0.2555863857269287, "learning_rate": 5.59239725454707e-06, "loss": 0.003, "step": 52020 }, { "epoch": 1.5482123993870234, "grad_norm": 0.17515641450881958, "learning_rate": 5.590678240465538e-06, "loss": 0.0054, "step": 52030 }, { "epoch": 1.548509960870665, "grad_norm": 0.09215941280126572, "learning_rate": 5.588959155574517e-06, "loss": 0.0029, "step": 52040 }, { "epoch": 1.5488075223543065, "grad_norm": 0.11238055676221848, "learning_rate": 5.587240000080089e-06, "loss": 0.0061, "step": 52050 }, { "epoch": 1.549105083837948, "grad_norm": 0.11073725670576096, "learning_rate": 5.585520774188343e-06, "loss": 0.0027, "step": 52060 }, { "epoch": 1.5494026453215897, "grad_norm": 0.16576126217842102, "learning_rate": 5.583801478105379e-06, "loss": 0.0063, "step": 52070 }, { "epoch": 1.5497002068052312, "grad_norm": 0.1482265144586563, "learning_rate": 5.582082112037299e-06, "loss": 0.0045, "step": 52080 }, { "epoch": 1.5499977682888728, "grad_norm": 0.25969141721725464, "learning_rate": 5.580362676190222e-06, "loss": 0.005, "step": 52090 }, { "epoch": 1.5502953297725144, "grad_norm": 0.016254879534244537, "learning_rate": 5.578643170770268e-06, "loss": 0.0032, "step": 52100 }, { "epoch": 1.550592891256156, "grad_norm": 0.31188374757766724, "learning_rate": 5.57692359598357e-06, "loss": 0.0057, "step": 52110 }, { "epoch": 1.5508904527397973, "grad_norm": 0.10908123105764389, "learning_rate": 5.575203952036265e-06, "loss": 0.0029, "step": 52120 }, { "epoch": 1.5511880142234389, "grad_norm": 0.3131710886955261, "learning_rate": 5.573484239134506e-06, "loss": 0.0052, "step": 52130 }, { "epoch": 1.5514855757070805, "grad_norm": 0.2532637417316437, "learning_rate": 5.571764457484445e-06, "loss": 0.0046, "step": 52140 }, { "epoch": 1.551783137190722, "grad_norm": 0.16852715611457825, "learning_rate": 5.570044607292249e-06, "loss": 0.0043, "step": 52150 }, { "epoch": 1.5520806986743636, "grad_norm": 0.3297599256038666, "learning_rate": 5.5683246887640876e-06, "loss": 0.0035, "step": 52160 }, { "epoch": 1.5523782601580052, "grad_norm": 0.1374940574169159, "learning_rate": 5.566604702106144e-06, "loss": 0.0049, "step": 52170 }, { "epoch": 1.5526758216416467, "grad_norm": 0.08557562530040741, "learning_rate": 5.564884647524606e-06, "loss": 0.0031, "step": 52180 }, { "epoch": 1.552973383125288, "grad_norm": 0.04409829154610634, "learning_rate": 5.563164525225674e-06, "loss": 0.0044, "step": 52190 }, { "epoch": 1.5532709446089297, "grad_norm": 0.2027062028646469, "learning_rate": 5.561444335415549e-06, "loss": 0.0033, "step": 52200 }, { "epoch": 1.5535685060925712, "grad_norm": 0.17904148995876312, "learning_rate": 5.559724078300447e-06, "loss": 0.0043, "step": 52210 }, { "epoch": 1.5538660675762128, "grad_norm": 0.3664449155330658, "learning_rate": 5.558003754086589e-06, "loss": 0.0106, "step": 52220 }, { "epoch": 1.5541636290598544, "grad_norm": 0.11558844149112701, "learning_rate": 5.556283362980202e-06, "loss": 0.004, "step": 52230 }, { "epoch": 1.554461190543496, "grad_norm": 0.25841018557548523, "learning_rate": 5.554562905187527e-06, "loss": 0.0054, "step": 52240 }, { "epoch": 1.5547587520271375, "grad_norm": 0.22660785913467407, "learning_rate": 5.552842380914808e-06, "loss": 0.0048, "step": 52250 }, { "epoch": 1.555056313510779, "grad_norm": 0.45250752568244934, "learning_rate": 5.551121790368299e-06, "loss": 0.0033, "step": 52260 }, { "epoch": 1.5553538749944207, "grad_norm": 0.4597504436969757, "learning_rate": 5.549401133754259e-06, "loss": 0.005, "step": 52270 }, { "epoch": 1.5556514364780623, "grad_norm": 0.19115950167179108, "learning_rate": 5.5476804112789614e-06, "loss": 0.0034, "step": 52280 }, { "epoch": 1.5559489979617038, "grad_norm": 0.16858404874801636, "learning_rate": 5.54595962314868e-06, "loss": 0.0055, "step": 52290 }, { "epoch": 1.5562465594453454, "grad_norm": 0.16073252260684967, "learning_rate": 5.544238769569703e-06, "loss": 0.0028, "step": 52300 }, { "epoch": 1.556544120928987, "grad_norm": 0.21469701826572418, "learning_rate": 5.54251785074832e-06, "loss": 0.0049, "step": 52310 }, { "epoch": 1.5568416824126285, "grad_norm": 0.20305249094963074, "learning_rate": 5.540796866890835e-06, "loss": 0.0035, "step": 52320 }, { "epoch": 1.5571392438962701, "grad_norm": 0.18429990112781525, "learning_rate": 5.539075818203554e-06, "loss": 0.0029, "step": 52330 }, { "epoch": 1.5574368053799117, "grad_norm": 0.08326006680727005, "learning_rate": 5.537354704892795e-06, "loss": 0.0022, "step": 52340 }, { "epoch": 1.5577343668635533, "grad_norm": 0.25891900062561035, "learning_rate": 5.535633527164882e-06, "loss": 0.0028, "step": 52350 }, { "epoch": 1.5580319283471948, "grad_norm": 0.1855863332748413, "learning_rate": 5.533912285226146e-06, "loss": 0.003, "step": 52360 }, { "epoch": 1.5583294898308364, "grad_norm": 0.3019920587539673, "learning_rate": 5.5321909792829265e-06, "loss": 0.0047, "step": 52370 }, { "epoch": 1.558627051314478, "grad_norm": 0.16655023396015167, "learning_rate": 5.530469609541571e-06, "loss": 0.0037, "step": 52380 }, { "epoch": 1.5589246127981196, "grad_norm": 0.10433715581893921, "learning_rate": 5.528748176208434e-06, "loss": 0.0051, "step": 52390 }, { "epoch": 1.5592221742817611, "grad_norm": 0.07334385812282562, "learning_rate": 5.527026679489879e-06, "loss": 0.0018, "step": 52400 }, { "epoch": 1.5595197357654025, "grad_norm": 0.128122940659523, "learning_rate": 5.525305119592275e-06, "loss": 0.0093, "step": 52410 }, { "epoch": 1.559817297249044, "grad_norm": 0.06357456743717194, "learning_rate": 5.523583496721999e-06, "loss": 0.0034, "step": 52420 }, { "epoch": 1.5601148587326856, "grad_norm": 0.24748685956001282, "learning_rate": 5.521861811085439e-06, "loss": 0.0053, "step": 52430 }, { "epoch": 1.5604124202163272, "grad_norm": 0.28751271963119507, "learning_rate": 5.520140062888984e-06, "loss": 0.0048, "step": 52440 }, { "epoch": 1.5607099816999688, "grad_norm": 0.09996926784515381, "learning_rate": 5.518418252339037e-06, "loss": 0.0045, "step": 52450 }, { "epoch": 1.5610075431836103, "grad_norm": 0.15420080721378326, "learning_rate": 5.516696379642005e-06, "loss": 0.0026, "step": 52460 }, { "epoch": 1.561305104667252, "grad_norm": 0.1327109932899475, "learning_rate": 5.514974445004303e-06, "loss": 0.0052, "step": 52470 }, { "epoch": 1.5616026661508933, "grad_norm": 0.017716532573103905, "learning_rate": 5.513252448632351e-06, "loss": 0.0034, "step": 52480 }, { "epoch": 1.5619002276345348, "grad_norm": 0.045708466321229935, "learning_rate": 5.511530390732584e-06, "loss": 0.0052, "step": 52490 }, { "epoch": 1.5621977891181764, "grad_norm": 0.18159669637680054, "learning_rate": 5.509808271511436e-06, "loss": 0.0041, "step": 52500 }, { "epoch": 1.562495350601818, "grad_norm": 0.36913955211639404, "learning_rate": 5.508086091175353e-06, "loss": 0.0028, "step": 52510 }, { "epoch": 1.5627929120854596, "grad_norm": 0.08171941339969635, "learning_rate": 5.506363849930785e-06, "loss": 0.0037, "step": 52520 }, { "epoch": 1.5630904735691011, "grad_norm": 0.187372624874115, "learning_rate": 5.5046415479841945e-06, "loss": 0.0051, "step": 52530 }, { "epoch": 1.5633880350527427, "grad_norm": 0.12811169028282166, "learning_rate": 5.502919185542045e-06, "loss": 0.0052, "step": 52540 }, { "epoch": 1.5636855965363843, "grad_norm": 0.01954549551010132, "learning_rate": 5.501196762810811e-06, "loss": 0.0028, "step": 52550 }, { "epoch": 1.5639831580200259, "grad_norm": 0.08423304557800293, "learning_rate": 5.499474279996975e-06, "loss": 0.0023, "step": 52560 }, { "epoch": 1.5642807195036674, "grad_norm": 0.16507185995578766, "learning_rate": 5.497751737307025e-06, "loss": 0.0034, "step": 52570 }, { "epoch": 1.564578280987309, "grad_norm": 0.1920015513896942, "learning_rate": 5.496029134947455e-06, "loss": 0.0043, "step": 52580 }, { "epoch": 1.5648758424709506, "grad_norm": 0.1029643565416336, "learning_rate": 5.494306473124769e-06, "loss": 0.0037, "step": 52590 }, { "epoch": 1.5651734039545921, "grad_norm": 0.2579663097858429, "learning_rate": 5.4925837520454764e-06, "loss": 0.004, "step": 52600 }, { "epoch": 1.5654709654382337, "grad_norm": 0.33057230710983276, "learning_rate": 5.490860971916092e-06, "loss": 0.0043, "step": 52610 }, { "epoch": 1.5657685269218753, "grad_norm": 0.39605140686035156, "learning_rate": 5.489138132943144e-06, "loss": 0.0053, "step": 52620 }, { "epoch": 1.5660660884055169, "grad_norm": 0.3416631817817688, "learning_rate": 5.487415235333159e-06, "loss": 0.0041, "step": 52630 }, { "epoch": 1.5663636498891584, "grad_norm": 0.125257208943367, "learning_rate": 5.485692279292678e-06, "loss": 0.0031, "step": 52640 }, { "epoch": 1.5666612113728, "grad_norm": 0.06401625275611877, "learning_rate": 5.483969265028244e-06, "loss": 0.0051, "step": 52650 }, { "epoch": 1.5669587728564416, "grad_norm": 0.1043553352355957, "learning_rate": 5.482246192746411e-06, "loss": 0.0035, "step": 52660 }, { "epoch": 1.5672563343400832, "grad_norm": 0.13200078904628754, "learning_rate": 5.480523062653736e-06, "loss": 0.0066, "step": 52670 }, { "epoch": 1.5675538958237247, "grad_norm": 0.10748230665922165, "learning_rate": 5.4787998749567865e-06, "loss": 0.0042, "step": 52680 }, { "epoch": 1.567851457307366, "grad_norm": 0.1711321622133255, "learning_rate": 5.477076629862133e-06, "loss": 0.0057, "step": 52690 }, { "epoch": 1.5681490187910077, "grad_norm": 0.11902665346860886, "learning_rate": 5.475353327576357e-06, "loss": 0.0043, "step": 52700 }, { "epoch": 1.5684465802746492, "grad_norm": 0.15818239748477936, "learning_rate": 5.4736299683060456e-06, "loss": 0.004, "step": 52710 }, { "epoch": 1.5687441417582908, "grad_norm": 0.09047261625528336, "learning_rate": 5.47190655225779e-06, "loss": 0.0035, "step": 52720 }, { "epoch": 1.5690417032419324, "grad_norm": 0.22821930050849915, "learning_rate": 5.470183079638193e-06, "loss": 0.0037, "step": 52730 }, { "epoch": 1.569339264725574, "grad_norm": 0.20110437273979187, "learning_rate": 5.468459550653859e-06, "loss": 0.0069, "step": 52740 }, { "epoch": 1.5696368262092155, "grad_norm": 0.048644278198480606, "learning_rate": 5.4667359655114025e-06, "loss": 0.0039, "step": 52750 }, { "epoch": 1.569934387692857, "grad_norm": 0.21800705790519714, "learning_rate": 5.465012324417444e-06, "loss": 0.0042, "step": 52760 }, { "epoch": 1.5702319491764984, "grad_norm": 0.2623956799507141, "learning_rate": 5.463288627578611e-06, "loss": 0.0029, "step": 52770 }, { "epoch": 1.57052951066014, "grad_norm": 0.19876588881015778, "learning_rate": 5.461564875201536e-06, "loss": 0.0034, "step": 52780 }, { "epoch": 1.5708270721437816, "grad_norm": 0.22084693610668182, "learning_rate": 5.459841067492863e-06, "loss": 0.0038, "step": 52790 }, { "epoch": 1.5711246336274232, "grad_norm": 0.525748074054718, "learning_rate": 5.4581172046592335e-06, "loss": 0.0055, "step": 52800 }, { "epoch": 1.5714221951110647, "grad_norm": 0.1087263822555542, "learning_rate": 5.4563932869073065e-06, "loss": 0.004, "step": 52810 }, { "epoch": 1.5717197565947063, "grad_norm": 0.18581563234329224, "learning_rate": 5.454669314443738e-06, "loss": 0.0036, "step": 52820 }, { "epoch": 1.5720173180783479, "grad_norm": 0.2190624177455902, "learning_rate": 5.452945287475198e-06, "loss": 0.0035, "step": 52830 }, { "epoch": 1.5723148795619895, "grad_norm": 0.32986539602279663, "learning_rate": 5.451221206208359e-06, "loss": 0.0041, "step": 52840 }, { "epoch": 1.572612441045631, "grad_norm": 0.21906119585037231, "learning_rate": 5.449497070849899e-06, "loss": 0.0035, "step": 52850 }, { "epoch": 1.5729100025292726, "grad_norm": 0.1591358780860901, "learning_rate": 5.4477728816065086e-06, "loss": 0.0054, "step": 52860 }, { "epoch": 1.5732075640129142, "grad_norm": 0.13361699879169464, "learning_rate": 5.446048638684877e-06, "loss": 0.0027, "step": 52870 }, { "epoch": 1.5735051254965557, "grad_norm": 0.14949487149715424, "learning_rate": 5.444324342291705e-06, "loss": 0.0055, "step": 52880 }, { "epoch": 1.5738026869801973, "grad_norm": 0.14086447656154633, "learning_rate": 5.442599992633696e-06, "loss": 0.0038, "step": 52890 }, { "epoch": 1.574100248463839, "grad_norm": 0.3163907527923584, "learning_rate": 5.4408755899175655e-06, "loss": 0.0052, "step": 52900 }, { "epoch": 1.5743978099474805, "grad_norm": 0.17905177175998688, "learning_rate": 5.439151134350029e-06, "loss": 0.0041, "step": 52910 }, { "epoch": 1.574695371431122, "grad_norm": 0.1862129271030426, "learning_rate": 5.437426626137814e-06, "loss": 0.0037, "step": 52920 }, { "epoch": 1.5749929329147636, "grad_norm": 0.10525983572006226, "learning_rate": 5.435702065487649e-06, "loss": 0.0035, "step": 52930 }, { "epoch": 1.5752904943984052, "grad_norm": 0.16664299368858337, "learning_rate": 5.433977452606273e-06, "loss": 0.0044, "step": 52940 }, { "epoch": 1.5755880558820468, "grad_norm": 0.28604739904403687, "learning_rate": 5.432252787700428e-06, "loss": 0.0053, "step": 52950 }, { "epoch": 1.5758856173656883, "grad_norm": 0.19537876546382904, "learning_rate": 5.430528070976866e-06, "loss": 0.0037, "step": 52960 }, { "epoch": 1.57618317884933, "grad_norm": 0.05481833592057228, "learning_rate": 5.428803302642341e-06, "loss": 0.004, "step": 52970 }, { "epoch": 1.5764807403329713, "grad_norm": 0.18336288630962372, "learning_rate": 5.427078482903617e-06, "loss": 0.0035, "step": 52980 }, { "epoch": 1.5767783018166128, "grad_norm": 0.14803721010684967, "learning_rate": 5.4253536119674625e-06, "loss": 0.0049, "step": 52990 }, { "epoch": 1.5770758633002544, "grad_norm": 0.2612758278846741, "learning_rate": 5.423628690040649e-06, "loss": 0.0042, "step": 53000 }, { "epoch": 1.577373424783896, "grad_norm": 0.07520970702171326, "learning_rate": 5.421903717329961e-06, "loss": 0.0023, "step": 53010 }, { "epoch": 1.5776709862675375, "grad_norm": 0.09684912860393524, "learning_rate": 5.420178694042183e-06, "loss": 0.0024, "step": 53020 }, { "epoch": 1.5779685477511791, "grad_norm": 0.382891446352005, "learning_rate": 5.418453620384109e-06, "loss": 0.0036, "step": 53030 }, { "epoch": 1.5782661092348207, "grad_norm": 0.029482122510671616, "learning_rate": 5.416728496562535e-06, "loss": 0.0026, "step": 53040 }, { "epoch": 1.578563670718462, "grad_norm": 0.2231556475162506, "learning_rate": 5.415003322784271e-06, "loss": 0.0044, "step": 53050 }, { "epoch": 1.5788612322021036, "grad_norm": 0.5330602526664734, "learning_rate": 5.413278099256124e-06, "loss": 0.0044, "step": 53060 }, { "epoch": 1.5791587936857452, "grad_norm": 0.12915806472301483, "learning_rate": 5.411552826184911e-06, "loss": 0.0025, "step": 53070 }, { "epoch": 1.5794563551693868, "grad_norm": 0.12725596129894257, "learning_rate": 5.4098275037774574e-06, "loss": 0.0034, "step": 53080 }, { "epoch": 1.5797539166530283, "grad_norm": 0.11827491968870163, "learning_rate": 5.408102132240589e-06, "loss": 0.0043, "step": 53090 }, { "epoch": 1.58005147813667, "grad_norm": 0.04778238758444786, "learning_rate": 5.406376711781141e-06, "loss": 0.0042, "step": 53100 }, { "epoch": 1.5803490396203115, "grad_norm": 0.02566893957555294, "learning_rate": 5.404651242605955e-06, "loss": 0.0025, "step": 53110 }, { "epoch": 1.580646601103953, "grad_norm": 0.12945601344108582, "learning_rate": 5.4029257249218735e-06, "loss": 0.0045, "step": 53120 }, { "epoch": 1.5809441625875946, "grad_norm": 0.257060170173645, "learning_rate": 5.401200158935755e-06, "loss": 0.0049, "step": 53130 }, { "epoch": 1.5812417240712362, "grad_norm": 0.07616224884986877, "learning_rate": 5.399474544854453e-06, "loss": 0.0057, "step": 53140 }, { "epoch": 1.5815392855548778, "grad_norm": 0.22153443098068237, "learning_rate": 5.397748882884833e-06, "loss": 0.0045, "step": 53150 }, { "epoch": 1.5818368470385193, "grad_norm": 0.0798792764544487, "learning_rate": 5.3960231732337605e-06, "loss": 0.0033, "step": 53160 }, { "epoch": 1.582134408522161, "grad_norm": 0.283390074968338, "learning_rate": 5.394297416108116e-06, "loss": 0.0048, "step": 53170 }, { "epoch": 1.5824319700058025, "grad_norm": 0.1712367832660675, "learning_rate": 5.392571611714778e-06, "loss": 0.0034, "step": 53180 }, { "epoch": 1.582729531489444, "grad_norm": 0.15640860795974731, "learning_rate": 5.390845760260632e-06, "loss": 0.0043, "step": 53190 }, { "epoch": 1.5830270929730856, "grad_norm": 0.25683489441871643, "learning_rate": 5.389119861952572e-06, "loss": 0.0032, "step": 53200 }, { "epoch": 1.5833246544567272, "grad_norm": 0.23071417212486267, "learning_rate": 5.387393916997493e-06, "loss": 0.0029, "step": 53210 }, { "epoch": 1.5836222159403688, "grad_norm": 0.10062980651855469, "learning_rate": 5.385667925602302e-06, "loss": 0.0034, "step": 53220 }, { "epoch": 1.5839197774240104, "grad_norm": 0.6282534003257751, "learning_rate": 5.3839418879739035e-06, "loss": 0.0056, "step": 53230 }, { "epoch": 1.584217338907652, "grad_norm": 0.16872094571590424, "learning_rate": 5.3822158043192154e-06, "loss": 0.0038, "step": 53240 }, { "epoch": 1.5845149003912935, "grad_norm": 0.1438043862581253, "learning_rate": 5.380489674845155e-06, "loss": 0.0057, "step": 53250 }, { "epoch": 1.584812461874935, "grad_norm": 0.3027016222476959, "learning_rate": 5.37876349975865e-06, "loss": 0.0039, "step": 53260 }, { "epoch": 1.5851100233585764, "grad_norm": 0.5858364701271057, "learning_rate": 5.3770372792666305e-06, "loss": 0.0035, "step": 53270 }, { "epoch": 1.585407584842218, "grad_norm": 0.40278229117393494, "learning_rate": 5.375311013576033e-06, "loss": 0.0034, "step": 53280 }, { "epoch": 1.5857051463258596, "grad_norm": 0.09729357808828354, "learning_rate": 5.373584702893798e-06, "loss": 0.0031, "step": 53290 }, { "epoch": 1.5860027078095011, "grad_norm": 0.1329095959663391, "learning_rate": 5.371858347426875e-06, "loss": 0.0041, "step": 53300 }, { "epoch": 1.5863002692931427, "grad_norm": 0.14770789444446564, "learning_rate": 5.370131947382215e-06, "loss": 0.003, "step": 53310 }, { "epoch": 1.5865978307767843, "grad_norm": 0.14915505051612854, "learning_rate": 5.368405502966776e-06, "loss": 0.0037, "step": 53320 }, { "epoch": 1.5868953922604259, "grad_norm": 0.29437196254730225, "learning_rate": 5.36667901438752e-06, "loss": 0.0053, "step": 53330 }, { "epoch": 1.5871929537440672, "grad_norm": 0.1870180368423462, "learning_rate": 5.364952481851417e-06, "loss": 0.0038, "step": 53340 }, { "epoch": 1.5874905152277088, "grad_norm": 0.125754714012146, "learning_rate": 5.363225905565441e-06, "loss": 0.0045, "step": 53350 }, { "epoch": 1.5877880767113504, "grad_norm": 0.14928536117076874, "learning_rate": 5.3614992857365715e-06, "loss": 0.0032, "step": 53360 }, { "epoch": 1.588085638194992, "grad_norm": 0.3197043240070343, "learning_rate": 5.35977262257179e-06, "loss": 0.0065, "step": 53370 }, { "epoch": 1.5883831996786335, "grad_norm": 0.17599105834960938, "learning_rate": 5.358045916278089e-06, "loss": 0.0038, "step": 53380 }, { "epoch": 1.588680761162275, "grad_norm": 0.21403929591178894, "learning_rate": 5.356319167062461e-06, "loss": 0.0042, "step": 53390 }, { "epoch": 1.5889783226459167, "grad_norm": 0.3552076518535614, "learning_rate": 5.354592375131903e-06, "loss": 0.0042, "step": 53400 }, { "epoch": 1.5892758841295582, "grad_norm": 0.09860966354608536, "learning_rate": 5.352865540693427e-06, "loss": 0.0043, "step": 53410 }, { "epoch": 1.5895734456131998, "grad_norm": 0.16874374449253082, "learning_rate": 5.351138663954037e-06, "loss": 0.0022, "step": 53420 }, { "epoch": 1.5898710070968414, "grad_norm": 0.14823654294013977, "learning_rate": 5.349411745120752e-06, "loss": 0.0051, "step": 53430 }, { "epoch": 1.590168568580483, "grad_norm": 0.43657007813453674, "learning_rate": 5.347684784400588e-06, "loss": 0.0041, "step": 53440 }, { "epoch": 1.5904661300641245, "grad_norm": 0.22688347101211548, "learning_rate": 5.345957782000574e-06, "loss": 0.0058, "step": 53450 }, { "epoch": 1.590763691547766, "grad_norm": 0.24959884583950043, "learning_rate": 5.344230738127737e-06, "loss": 0.0055, "step": 53460 }, { "epoch": 1.5910612530314077, "grad_norm": 0.2679601013660431, "learning_rate": 5.342503652989114e-06, "loss": 0.0049, "step": 53470 }, { "epoch": 1.5913588145150492, "grad_norm": 0.12044385075569153, "learning_rate": 5.340776526791743e-06, "loss": 0.0049, "step": 53480 }, { "epoch": 1.5916563759986908, "grad_norm": 0.37852105498313904, "learning_rate": 5.339049359742672e-06, "loss": 0.0043, "step": 53490 }, { "epoch": 1.5919539374823324, "grad_norm": 0.1264672726392746, "learning_rate": 5.337322152048948e-06, "loss": 0.0036, "step": 53500 }, { "epoch": 1.592251498965974, "grad_norm": 0.2664845287799835, "learning_rate": 5.335594903917627e-06, "loss": 0.0042, "step": 53510 }, { "epoch": 1.5925490604496155, "grad_norm": 0.16292670369148254, "learning_rate": 5.333867615555768e-06, "loss": 0.0028, "step": 53520 }, { "epoch": 1.592846621933257, "grad_norm": 0.08810397237539291, "learning_rate": 5.332140287170435e-06, "loss": 0.0033, "step": 53530 }, { "epoch": 1.5931441834168987, "grad_norm": 0.16050243377685547, "learning_rate": 5.3304129189686985e-06, "loss": 0.0029, "step": 53540 }, { "epoch": 1.59344174490054, "grad_norm": 0.3595369756221771, "learning_rate": 5.328685511157631e-06, "loss": 0.0052, "step": 53550 }, { "epoch": 1.5937393063841816, "grad_norm": 0.11354702711105347, "learning_rate": 5.326958063944312e-06, "loss": 0.0031, "step": 53560 }, { "epoch": 1.5940368678678232, "grad_norm": 0.09522596746683121, "learning_rate": 5.3252305775358234e-06, "loss": 0.0036, "step": 53570 }, { "epoch": 1.5943344293514647, "grad_norm": 0.20832979679107666, "learning_rate": 5.323503052139257e-06, "loss": 0.0036, "step": 53580 }, { "epoch": 1.5946319908351063, "grad_norm": 0.0633404329419136, "learning_rate": 5.321775487961701e-06, "loss": 0.0038, "step": 53590 }, { "epoch": 1.594929552318748, "grad_norm": 0.2513751983642578, "learning_rate": 5.320047885210256e-06, "loss": 0.0053, "step": 53600 }, { "epoch": 1.5952271138023895, "grad_norm": 0.1200585663318634, "learning_rate": 5.318320244092022e-06, "loss": 0.0049, "step": 53610 }, { "epoch": 1.5955246752860308, "grad_norm": 0.11576662957668304, "learning_rate": 5.3165925648141085e-06, "loss": 0.0057, "step": 53620 }, { "epoch": 1.5958222367696724, "grad_norm": 0.20734903216362, "learning_rate": 5.314864847583623e-06, "loss": 0.0058, "step": 53630 }, { "epoch": 1.596119798253314, "grad_norm": 0.10557138919830322, "learning_rate": 5.313137092607684e-06, "loss": 0.0035, "step": 53640 }, { "epoch": 1.5964173597369555, "grad_norm": 0.30818647146224976, "learning_rate": 5.31140930009341e-06, "loss": 0.0038, "step": 53650 }, { "epoch": 1.596714921220597, "grad_norm": 0.11462659388780594, "learning_rate": 5.3096814702479275e-06, "loss": 0.0051, "step": 53660 }, { "epoch": 1.5970124827042387, "grad_norm": 0.11719115823507309, "learning_rate": 5.307953603278364e-06, "loss": 0.0049, "step": 53670 }, { "epoch": 1.5973100441878803, "grad_norm": 0.2543674111366272, "learning_rate": 5.306225699391854e-06, "loss": 0.0062, "step": 53680 }, { "epoch": 1.5976076056715218, "grad_norm": 0.13652537763118744, "learning_rate": 5.304497758795537e-06, "loss": 0.004, "step": 53690 }, { "epoch": 1.5979051671551634, "grad_norm": 0.12522168457508087, "learning_rate": 5.302769781696553e-06, "loss": 0.0037, "step": 53700 }, { "epoch": 1.598202728638805, "grad_norm": 0.12446336448192596, "learning_rate": 5.301041768302051e-06, "loss": 0.004, "step": 53710 }, { "epoch": 1.5985002901224465, "grad_norm": 0.20247945189476013, "learning_rate": 5.29931371881918e-06, "loss": 0.0036, "step": 53720 }, { "epoch": 1.5987978516060881, "grad_norm": 0.15384745597839355, "learning_rate": 5.297585633455099e-06, "loss": 0.0037, "step": 53730 }, { "epoch": 1.5990954130897297, "grad_norm": 0.22791127860546112, "learning_rate": 5.295857512416964e-06, "loss": 0.0032, "step": 53740 }, { "epoch": 1.5993929745733713, "grad_norm": 0.31267693638801575, "learning_rate": 5.294129355911943e-06, "loss": 0.0045, "step": 53750 }, { "epoch": 1.5996905360570128, "grad_norm": 0.29096853733062744, "learning_rate": 5.2924011641472015e-06, "loss": 0.004, "step": 53760 }, { "epoch": 1.5999880975406544, "grad_norm": 0.14021089673042297, "learning_rate": 5.290672937329914e-06, "loss": 0.0029, "step": 53770 }, { "epoch": 1.600285659024296, "grad_norm": 0.21604220569133759, "learning_rate": 5.288944675667254e-06, "loss": 0.0045, "step": 53780 }, { "epoch": 1.6005832205079376, "grad_norm": 0.27852872014045715, "learning_rate": 5.287216379366408e-06, "loss": 0.005, "step": 53790 }, { "epoch": 1.6008807819915791, "grad_norm": 0.10256298631429672, "learning_rate": 5.285488048634556e-06, "loss": 0.0025, "step": 53800 }, { "epoch": 1.6011783434752207, "grad_norm": 0.16622553765773773, "learning_rate": 5.2837596836788915e-06, "loss": 0.0034, "step": 53810 }, { "epoch": 1.6014759049588623, "grad_norm": 0.292953759431839, "learning_rate": 5.282031284706605e-06, "loss": 0.0045, "step": 53820 }, { "epoch": 1.6017734664425038, "grad_norm": 0.050137318670749664, "learning_rate": 5.2803028519248934e-06, "loss": 0.0057, "step": 53830 }, { "epoch": 1.6020710279261452, "grad_norm": 0.1306525319814682, "learning_rate": 5.278574385540962e-06, "loss": 0.0032, "step": 53840 }, { "epoch": 1.6023685894097868, "grad_norm": 0.2027091085910797, "learning_rate": 5.276845885762012e-06, "loss": 0.0038, "step": 53850 }, { "epoch": 1.6026661508934283, "grad_norm": 0.07612771540880203, "learning_rate": 5.275117352795257e-06, "loss": 0.0025, "step": 53860 }, { "epoch": 1.60296371237707, "grad_norm": 0.06513451039791107, "learning_rate": 5.273388786847907e-06, "loss": 0.0029, "step": 53870 }, { "epoch": 1.6032612738607115, "grad_norm": 0.24009107053279877, "learning_rate": 5.2716601881271835e-06, "loss": 0.006, "step": 53880 }, { "epoch": 1.603558835344353, "grad_norm": 0.12500673532485962, "learning_rate": 5.269931556840303e-06, "loss": 0.004, "step": 53890 }, { "epoch": 1.6038563968279946, "grad_norm": 0.29670384526252747, "learning_rate": 5.268202893194495e-06, "loss": 0.0038, "step": 53900 }, { "epoch": 1.604153958311636, "grad_norm": 0.16271668672561646, "learning_rate": 5.2664741973969855e-06, "loss": 0.0028, "step": 53910 }, { "epoch": 1.6044515197952776, "grad_norm": 0.31837621331214905, "learning_rate": 5.264745469655012e-06, "loss": 0.003, "step": 53920 }, { "epoch": 1.6047490812789191, "grad_norm": 0.08728347718715668, "learning_rate": 5.263016710175806e-06, "loss": 0.0028, "step": 53930 }, { "epoch": 1.6050466427625607, "grad_norm": 0.13634958863258362, "learning_rate": 5.261287919166611e-06, "loss": 0.0026, "step": 53940 }, { "epoch": 1.6053442042462023, "grad_norm": 0.07956057786941528, "learning_rate": 5.2595590968346725e-06, "loss": 0.0036, "step": 53950 }, { "epoch": 1.6056417657298439, "grad_norm": 0.19011525809764862, "learning_rate": 5.257830243387237e-06, "loss": 0.0049, "step": 53960 }, { "epoch": 1.6059393272134854, "grad_norm": 0.18282918632030487, "learning_rate": 5.256101359031556e-06, "loss": 0.0064, "step": 53970 }, { "epoch": 1.606236888697127, "grad_norm": 0.12205227464437485, "learning_rate": 5.254372443974886e-06, "loss": 0.0072, "step": 53980 }, { "epoch": 1.6065344501807686, "grad_norm": 0.10156282782554626, "learning_rate": 5.2526434984244875e-06, "loss": 0.0031, "step": 53990 }, { "epoch": 1.6068320116644101, "grad_norm": 0.2307613492012024, "learning_rate": 5.250914522587622e-06, "loss": 0.0028, "step": 54000 }, { "epoch": 1.6071295731480517, "grad_norm": 0.5733463168144226, "learning_rate": 5.249185516671557e-06, "loss": 0.0032, "step": 54010 }, { "epoch": 1.6074271346316933, "grad_norm": 0.21900580823421478, "learning_rate": 5.247456480883562e-06, "loss": 0.0037, "step": 54020 }, { "epoch": 1.6077246961153349, "grad_norm": 0.15131776034832, "learning_rate": 5.245727415430912e-06, "loss": 0.0035, "step": 54030 }, { "epoch": 1.6080222575989764, "grad_norm": 0.20158709585666656, "learning_rate": 5.243998320520882e-06, "loss": 0.0047, "step": 54040 }, { "epoch": 1.608319819082618, "grad_norm": 0.12437711656093597, "learning_rate": 5.242269196360757e-06, "loss": 0.0043, "step": 54050 }, { "epoch": 1.6086173805662596, "grad_norm": 0.28669121861457825, "learning_rate": 5.240540043157817e-06, "loss": 0.004, "step": 54060 }, { "epoch": 1.6089149420499012, "grad_norm": 0.1329382359981537, "learning_rate": 5.238810861119352e-06, "loss": 0.0024, "step": 54070 }, { "epoch": 1.6092125035335427, "grad_norm": 0.18510988354682922, "learning_rate": 5.237081650452653e-06, "loss": 0.0047, "step": 54080 }, { "epoch": 1.6095100650171843, "grad_norm": 0.10133805125951767, "learning_rate": 5.2353524113650155e-06, "loss": 0.0055, "step": 54090 }, { "epoch": 1.6098076265008259, "grad_norm": 0.4148884117603302, "learning_rate": 5.233623144063738e-06, "loss": 0.0054, "step": 54100 }, { "epoch": 1.6101051879844674, "grad_norm": 0.2611326575279236, "learning_rate": 5.231893848756119e-06, "loss": 0.0042, "step": 54110 }, { "epoch": 1.6104027494681088, "grad_norm": 0.24620507657527924, "learning_rate": 5.230164525649466e-06, "loss": 0.0046, "step": 54120 }, { "epoch": 1.6107003109517504, "grad_norm": 0.0845666229724884, "learning_rate": 5.228435174951089e-06, "loss": 0.0034, "step": 54130 }, { "epoch": 1.610997872435392, "grad_norm": 0.22454430162906647, "learning_rate": 5.2267057968682966e-06, "loss": 0.004, "step": 54140 }, { "epoch": 1.6112954339190335, "grad_norm": 0.12159029394388199, "learning_rate": 5.224976391608404e-06, "loss": 0.0036, "step": 54150 }, { "epoch": 1.611592995402675, "grad_norm": 0.23431038856506348, "learning_rate": 5.2232469593787305e-06, "loss": 0.0033, "step": 54160 }, { "epoch": 1.6118905568863167, "grad_norm": 0.09201110899448395, "learning_rate": 5.221517500386596e-06, "loss": 0.0028, "step": 54170 }, { "epoch": 1.6121881183699582, "grad_norm": 0.03620251640677452, "learning_rate": 5.219788014839328e-06, "loss": 0.0044, "step": 54180 }, { "epoch": 1.6124856798535996, "grad_norm": 0.18032167851924896, "learning_rate": 5.21805850294425e-06, "loss": 0.0033, "step": 54190 }, { "epoch": 1.6127832413372412, "grad_norm": 0.08187224715948105, "learning_rate": 5.216328964908696e-06, "loss": 0.0051, "step": 54200 }, { "epoch": 1.6130808028208827, "grad_norm": 0.2125226855278015, "learning_rate": 5.214599400939998e-06, "loss": 0.004, "step": 54210 }, { "epoch": 1.6133783643045243, "grad_norm": 0.247432678937912, "learning_rate": 5.212869811245496e-06, "loss": 0.0035, "step": 54220 }, { "epoch": 1.6136759257881659, "grad_norm": 0.19665084779262543, "learning_rate": 5.2111401960325266e-06, "loss": 0.004, "step": 54230 }, { "epoch": 1.6139734872718074, "grad_norm": 0.19234898686408997, "learning_rate": 5.2094105555084365e-06, "loss": 0.0048, "step": 54240 }, { "epoch": 1.614271048755449, "grad_norm": 0.09655778110027313, "learning_rate": 5.2076808898805676e-06, "loss": 0.0038, "step": 54250 }, { "epoch": 1.6145686102390906, "grad_norm": 0.2950292229652405, "learning_rate": 5.205951199356275e-06, "loss": 0.0039, "step": 54260 }, { "epoch": 1.6148661717227322, "grad_norm": 0.18171389400959015, "learning_rate": 5.204221484142908e-06, "loss": 0.0035, "step": 54270 }, { "epoch": 1.6151637332063737, "grad_norm": 0.12211652845144272, "learning_rate": 5.202491744447821e-06, "loss": 0.0046, "step": 54280 }, { "epoch": 1.6154612946900153, "grad_norm": 0.288983553647995, "learning_rate": 5.200761980478373e-06, "loss": 0.0052, "step": 54290 }, { "epoch": 1.6157588561736569, "grad_norm": 0.5373141169548035, "learning_rate": 5.199032192441926e-06, "loss": 0.0063, "step": 54300 }, { "epoch": 1.6160564176572985, "grad_norm": 0.2969058156013489, "learning_rate": 5.197302380545844e-06, "loss": 0.0054, "step": 54310 }, { "epoch": 1.61635397914094, "grad_norm": 0.23565252125263214, "learning_rate": 5.1955725449974925e-06, "loss": 0.0033, "step": 54320 }, { "epoch": 1.6166515406245816, "grad_norm": 0.23942561447620392, "learning_rate": 5.193842686004242e-06, "loss": 0.0044, "step": 54330 }, { "epoch": 1.6169491021082232, "grad_norm": 0.0759957805275917, "learning_rate": 5.192112803773464e-06, "loss": 0.0034, "step": 54340 }, { "epoch": 1.6172466635918648, "grad_norm": 0.26456013321876526, "learning_rate": 5.190382898512536e-06, "loss": 0.0034, "step": 54350 }, { "epoch": 1.6175442250755063, "grad_norm": 0.1400832235813141, "learning_rate": 5.1886529704288336e-06, "loss": 0.0039, "step": 54360 }, { "epoch": 1.617841786559148, "grad_norm": 0.0728176012635231, "learning_rate": 5.186923019729739e-06, "loss": 0.0047, "step": 54370 }, { "epoch": 1.6181393480427895, "grad_norm": 0.11012638360261917, "learning_rate": 5.185193046622636e-06, "loss": 0.004, "step": 54380 }, { "epoch": 1.618436909526431, "grad_norm": 0.2626281678676605, "learning_rate": 5.1834630513149086e-06, "loss": 0.0033, "step": 54390 }, { "epoch": 1.6187344710100726, "grad_norm": 0.3719075620174408, "learning_rate": 5.181733034013948e-06, "loss": 0.0032, "step": 54400 }, { "epoch": 1.619032032493714, "grad_norm": 0.14670738577842712, "learning_rate": 5.180002994927145e-06, "loss": 0.0034, "step": 54410 }, { "epoch": 1.6193295939773555, "grad_norm": 0.20680734515190125, "learning_rate": 5.1782729342618944e-06, "loss": 0.0056, "step": 54420 }, { "epoch": 1.6196271554609971, "grad_norm": 0.05999872460961342, "learning_rate": 5.176542852225592e-06, "loss": 0.0023, "step": 54430 }, { "epoch": 1.6199247169446387, "grad_norm": 0.2505283057689667, "learning_rate": 5.1748127490256375e-06, "loss": 0.0025, "step": 54440 }, { "epoch": 1.6202222784282803, "grad_norm": 0.13939571380615234, "learning_rate": 5.173082624869432e-06, "loss": 0.003, "step": 54450 }, { "epoch": 1.6205198399119218, "grad_norm": 0.039016712456941605, "learning_rate": 5.17135247996438e-06, "loss": 0.0028, "step": 54460 }, { "epoch": 1.6208174013955634, "grad_norm": 0.06809639185667038, "learning_rate": 5.169622314517889e-06, "loss": 0.0014, "step": 54470 }, { "epoch": 1.6211149628792048, "grad_norm": 0.1400861144065857, "learning_rate": 5.167892128737366e-06, "loss": 0.0041, "step": 54480 }, { "epoch": 1.6214125243628463, "grad_norm": 0.13635386526584625, "learning_rate": 5.166161922830227e-06, "loss": 0.0053, "step": 54490 }, { "epoch": 1.621710085846488, "grad_norm": 0.28978899121284485, "learning_rate": 5.164431697003882e-06, "loss": 0.0068, "step": 54500 }, { "epoch": 1.6220076473301295, "grad_norm": 0.13913671672344208, "learning_rate": 5.1627014514657495e-06, "loss": 0.0024, "step": 54510 }, { "epoch": 1.622305208813771, "grad_norm": 0.31124943494796753, "learning_rate": 5.1609711864232485e-06, "loss": 0.0061, "step": 54520 }, { "epoch": 1.6226027702974126, "grad_norm": 0.09816445410251617, "learning_rate": 5.159240902083796e-06, "loss": 0.0058, "step": 54530 }, { "epoch": 1.6229003317810542, "grad_norm": 0.1267884522676468, "learning_rate": 5.157510598654822e-06, "loss": 0.0034, "step": 54540 }, { "epoch": 1.6231978932646958, "grad_norm": 0.13892203569412231, "learning_rate": 5.1557802763437495e-06, "loss": 0.0033, "step": 54550 }, { "epoch": 1.6234954547483373, "grad_norm": 0.6114488244056702, "learning_rate": 5.154049935358006e-06, "loss": 0.0052, "step": 54560 }, { "epoch": 1.623793016231979, "grad_norm": 0.1554958075284958, "learning_rate": 5.15231957590502e-06, "loss": 0.0052, "step": 54570 }, { "epoch": 1.6240905777156205, "grad_norm": 0.15745165944099426, "learning_rate": 5.150589198192229e-06, "loss": 0.0032, "step": 54580 }, { "epoch": 1.624388139199262, "grad_norm": 0.1393144577741623, "learning_rate": 5.148858802427063e-06, "loss": 0.0054, "step": 54590 }, { "epoch": 1.6246857006829036, "grad_norm": 0.14353463053703308, "learning_rate": 5.147128388816964e-06, "loss": 0.0039, "step": 54600 }, { "epoch": 1.6249832621665452, "grad_norm": 0.28064900636672974, "learning_rate": 5.145397957569364e-06, "loss": 0.004, "step": 54610 }, { "epoch": 1.6252808236501868, "grad_norm": 0.1263394057750702, "learning_rate": 5.143667508891709e-06, "loss": 0.0042, "step": 54620 }, { "epoch": 1.6255783851338284, "grad_norm": 0.1917559653520584, "learning_rate": 5.14193704299144e-06, "loss": 0.0025, "step": 54630 }, { "epoch": 1.62587594661747, "grad_norm": 0.11432643234729767, "learning_rate": 5.1402065600760055e-06, "loss": 0.0025, "step": 54640 }, { "epoch": 1.6261735081011115, "grad_norm": 0.14332151412963867, "learning_rate": 5.138476060352849e-06, "loss": 0.003, "step": 54650 }, { "epoch": 1.626471069584753, "grad_norm": 0.05560740455985069, "learning_rate": 5.136745544029422e-06, "loss": 0.0054, "step": 54660 }, { "epoch": 1.6267686310683946, "grad_norm": 0.10986346751451492, "learning_rate": 5.135015011313175e-06, "loss": 0.0044, "step": 54670 }, { "epoch": 1.6270661925520362, "grad_norm": 0.12579016387462616, "learning_rate": 5.133284462411561e-06, "loss": 0.0058, "step": 54680 }, { "epoch": 1.6273637540356776, "grad_norm": 0.23057612776756287, "learning_rate": 5.131553897532037e-06, "loss": 0.0038, "step": 54690 }, { "epoch": 1.6276613155193191, "grad_norm": 0.2726815938949585, "learning_rate": 5.129823316882058e-06, "loss": 0.0045, "step": 54700 }, { "epoch": 1.6279588770029607, "grad_norm": 0.20868046581745148, "learning_rate": 5.128092720669085e-06, "loss": 0.0045, "step": 54710 }, { "epoch": 1.6282564384866023, "grad_norm": 0.1450687050819397, "learning_rate": 5.126362109100575e-06, "loss": 0.0033, "step": 54720 }, { "epoch": 1.6285539999702439, "grad_norm": 0.13043200969696045, "learning_rate": 5.124631482383996e-06, "loss": 0.0058, "step": 54730 }, { "epoch": 1.6288515614538854, "grad_norm": 0.22039654850959778, "learning_rate": 5.122900840726809e-06, "loss": 0.0045, "step": 54740 }, { "epoch": 1.629149122937527, "grad_norm": 0.18916930258274078, "learning_rate": 5.121170184336482e-06, "loss": 0.0027, "step": 54750 }, { "epoch": 1.6294466844211686, "grad_norm": 0.14723040163516998, "learning_rate": 5.119439513420481e-06, "loss": 0.0026, "step": 54760 }, { "epoch": 1.62974424590481, "grad_norm": 0.099269799888134, "learning_rate": 5.117708828186278e-06, "loss": 0.0057, "step": 54770 }, { "epoch": 1.6300418073884515, "grad_norm": 0.10111155360937119, "learning_rate": 5.115978128841344e-06, "loss": 0.0042, "step": 54780 }, { "epoch": 1.630339368872093, "grad_norm": 0.2885419726371765, "learning_rate": 5.114247415593153e-06, "loss": 0.0041, "step": 54790 }, { "epoch": 1.6306369303557346, "grad_norm": 0.3937731981277466, "learning_rate": 5.112516688649177e-06, "loss": 0.004, "step": 54800 }, { "epoch": 1.6309344918393762, "grad_norm": 0.20909184217453003, "learning_rate": 5.110785948216898e-06, "loss": 0.0041, "step": 54810 }, { "epoch": 1.6312320533230178, "grad_norm": 0.10926925390958786, "learning_rate": 5.109055194503787e-06, "loss": 0.0033, "step": 54820 }, { "epoch": 1.6315296148066594, "grad_norm": 0.16758236289024353, "learning_rate": 5.1073244277173285e-06, "loss": 0.0045, "step": 54830 }, { "epoch": 1.631827176290301, "grad_norm": 0.3279787302017212, "learning_rate": 5.1055936480650035e-06, "loss": 0.0052, "step": 54840 }, { "epoch": 1.6321247377739425, "grad_norm": 0.22118115425109863, "learning_rate": 5.103862855754295e-06, "loss": 0.004, "step": 54850 }, { "epoch": 1.632422299257584, "grad_norm": 0.4436344504356384, "learning_rate": 5.102132050992685e-06, "loss": 0.0065, "step": 54860 }, { "epoch": 1.6327198607412257, "grad_norm": 0.18536227941513062, "learning_rate": 5.100401233987662e-06, "loss": 0.0042, "step": 54870 }, { "epoch": 1.6330174222248672, "grad_norm": 0.20459330081939697, "learning_rate": 5.098670404946713e-06, "loss": 0.0041, "step": 54880 }, { "epoch": 1.6333149837085088, "grad_norm": 0.22469201683998108, "learning_rate": 5.096939564077324e-06, "loss": 0.0022, "step": 54890 }, { "epoch": 1.6336125451921504, "grad_norm": 0.27409347891807556, "learning_rate": 5.0952087115869895e-06, "loss": 0.0034, "step": 54900 }, { "epoch": 1.633910106675792, "grad_norm": 0.13553258776664734, "learning_rate": 5.093477847683198e-06, "loss": 0.0042, "step": 54910 }, { "epoch": 1.6342076681594335, "grad_norm": 0.16283895075321198, "learning_rate": 5.0917469725734434e-06, "loss": 0.0036, "step": 54920 }, { "epoch": 1.634505229643075, "grad_norm": 0.07208496332168579, "learning_rate": 5.090016086465221e-06, "loss": 0.0039, "step": 54930 }, { "epoch": 1.6348027911267167, "grad_norm": 0.17964860796928406, "learning_rate": 5.088285189566024e-06, "loss": 0.003, "step": 54940 }, { "epoch": 1.6351003526103582, "grad_norm": 0.21796870231628418, "learning_rate": 5.086554282083353e-06, "loss": 0.0028, "step": 54950 }, { "epoch": 1.6353979140939998, "grad_norm": 0.12400040775537491, "learning_rate": 5.084823364224701e-06, "loss": 0.0076, "step": 54960 }, { "epoch": 1.6356954755776414, "grad_norm": 0.08043141663074493, "learning_rate": 5.083092436197573e-06, "loss": 0.0044, "step": 54970 }, { "epoch": 1.6359930370612827, "grad_norm": 0.2278386950492859, "learning_rate": 5.081361498209466e-06, "loss": 0.0035, "step": 54980 }, { "epoch": 1.6362905985449243, "grad_norm": 0.19533710181713104, "learning_rate": 5.079630550467885e-06, "loss": 0.0026, "step": 54990 }, { "epoch": 1.6365881600285659, "grad_norm": 0.18474355340003967, "learning_rate": 5.077899593180328e-06, "loss": 0.0038, "step": 55000 }, { "epoch": 1.6368857215122075, "grad_norm": 0.2664247751235962, "learning_rate": 5.076168626554304e-06, "loss": 0.004, "step": 55010 }, { "epoch": 1.637183282995849, "grad_norm": 0.059164173901081085, "learning_rate": 5.0744376507973156e-06, "loss": 0.0032, "step": 55020 }, { "epoch": 1.6374808444794906, "grad_norm": 0.0985894650220871, "learning_rate": 5.072706666116872e-06, "loss": 0.0035, "step": 55030 }, { "epoch": 1.6377784059631322, "grad_norm": 0.12262377142906189, "learning_rate": 5.070975672720477e-06, "loss": 0.0029, "step": 55040 }, { "epoch": 1.6380759674467735, "grad_norm": 0.12567780911922455, "learning_rate": 5.069244670815642e-06, "loss": 0.0032, "step": 55050 }, { "epoch": 1.638373528930415, "grad_norm": 0.15668818354606628, "learning_rate": 5.067513660609874e-06, "loss": 0.004, "step": 55060 }, { "epoch": 1.6386710904140567, "grad_norm": 0.1600985825061798, "learning_rate": 5.065782642310687e-06, "loss": 0.0034, "step": 55070 }, { "epoch": 1.6389686518976982, "grad_norm": 0.20737439393997192, "learning_rate": 5.064051616125588e-06, "loss": 0.0034, "step": 55080 }, { "epoch": 1.6392662133813398, "grad_norm": 0.3326326906681061, "learning_rate": 5.062320582262093e-06, "loss": 0.003, "step": 55090 }, { "epoch": 1.6395637748649814, "grad_norm": 0.3013279139995575, "learning_rate": 5.060589540927714e-06, "loss": 0.0046, "step": 55100 }, { "epoch": 1.639861336348623, "grad_norm": 0.3270677328109741, "learning_rate": 5.058858492329966e-06, "loss": 0.0042, "step": 55110 }, { "epoch": 1.6401588978322645, "grad_norm": 0.11920859664678574, "learning_rate": 5.057127436676362e-06, "loss": 0.0034, "step": 55120 }, { "epoch": 1.6404564593159061, "grad_norm": 0.2006688117980957, "learning_rate": 5.055396374174421e-06, "loss": 0.0029, "step": 55130 }, { "epoch": 1.6407540207995477, "grad_norm": 0.12867598235607147, "learning_rate": 5.053665305031659e-06, "loss": 0.0034, "step": 55140 }, { "epoch": 1.6410515822831893, "grad_norm": 0.12425421178340912, "learning_rate": 5.0519342294555905e-06, "loss": 0.0028, "step": 55150 }, { "epoch": 1.6413491437668308, "grad_norm": 0.16368703544139862, "learning_rate": 5.0502031476537375e-06, "loss": 0.0044, "step": 55160 }, { "epoch": 1.6416467052504724, "grad_norm": 0.13092859089374542, "learning_rate": 5.048472059833618e-06, "loss": 0.002, "step": 55170 }, { "epoch": 1.641944266734114, "grad_norm": 0.22406408190727234, "learning_rate": 5.046740966202751e-06, "loss": 0.0045, "step": 55180 }, { "epoch": 1.6422418282177556, "grad_norm": 0.2216290533542633, "learning_rate": 5.0450098669686575e-06, "loss": 0.0052, "step": 55190 }, { "epoch": 1.6425393897013971, "grad_norm": 0.09718355536460876, "learning_rate": 5.04327876233886e-06, "loss": 0.0062, "step": 55200 }, { "epoch": 1.6428369511850387, "grad_norm": 0.1779540479183197, "learning_rate": 5.041547652520878e-06, "loss": 0.0032, "step": 55210 }, { "epoch": 1.6431345126686803, "grad_norm": 0.14144796133041382, "learning_rate": 5.039816537722236e-06, "loss": 0.0031, "step": 55220 }, { "epoch": 1.6434320741523218, "grad_norm": 0.09228905290365219, "learning_rate": 5.0380854181504554e-06, "loss": 0.0029, "step": 55230 }, { "epoch": 1.6437296356359634, "grad_norm": 0.21910135447978973, "learning_rate": 5.036354294013061e-06, "loss": 0.0034, "step": 55240 }, { "epoch": 1.644027197119605, "grad_norm": 0.14857469499111176, "learning_rate": 5.034623165517576e-06, "loss": 0.0063, "step": 55250 }, { "epoch": 1.6443247586032466, "grad_norm": 0.10656417161226273, "learning_rate": 5.032892032871527e-06, "loss": 0.0053, "step": 55260 }, { "epoch": 1.644622320086888, "grad_norm": 0.099132239818573, "learning_rate": 5.031160896282438e-06, "loss": 0.0048, "step": 55270 }, { "epoch": 1.6449198815705295, "grad_norm": 0.42100051045417786, "learning_rate": 5.029429755957832e-06, "loss": 0.0039, "step": 55280 }, { "epoch": 1.645217443054171, "grad_norm": 0.18766707181930542, "learning_rate": 5.027698612105241e-06, "loss": 0.003, "step": 55290 }, { "epoch": 1.6455150045378126, "grad_norm": 0.09528530389070511, "learning_rate": 5.0259674649321856e-06, "loss": 0.0032, "step": 55300 }, { "epoch": 1.6458125660214542, "grad_norm": 0.1566033661365509, "learning_rate": 5.024236314646197e-06, "loss": 0.0029, "step": 55310 }, { "epoch": 1.6461101275050958, "grad_norm": 0.14703330397605896, "learning_rate": 5.022505161454799e-06, "loss": 0.0032, "step": 55320 }, { "epoch": 1.6464076889887374, "grad_norm": 0.09615440666675568, "learning_rate": 5.020774005565523e-06, "loss": 0.0037, "step": 55330 }, { "epoch": 1.6467052504723787, "grad_norm": 0.18737339973449707, "learning_rate": 5.019042847185893e-06, "loss": 0.0051, "step": 55340 }, { "epoch": 1.6470028119560203, "grad_norm": 0.2624579966068268, "learning_rate": 5.0173116865234414e-06, "loss": 0.0043, "step": 55350 }, { "epoch": 1.6473003734396618, "grad_norm": 0.1044483557343483, "learning_rate": 5.015580523785694e-06, "loss": 0.0028, "step": 55360 }, { "epoch": 1.6475979349233034, "grad_norm": 0.12447598576545715, "learning_rate": 5.013849359180181e-06, "loss": 0.0037, "step": 55370 }, { "epoch": 1.647895496406945, "grad_norm": 0.16661718487739563, "learning_rate": 5.012118192914428e-06, "loss": 0.0058, "step": 55380 }, { "epoch": 1.6481930578905866, "grad_norm": 0.03534471243619919, "learning_rate": 5.010387025195969e-06, "loss": 0.0035, "step": 55390 }, { "epoch": 1.6484906193742281, "grad_norm": 0.10323190689086914, "learning_rate": 5.008655856232331e-06, "loss": 0.0033, "step": 55400 }, { "epoch": 1.6487881808578697, "grad_norm": 0.26897096633911133, "learning_rate": 5.006924686231045e-06, "loss": 0.003, "step": 55410 }, { "epoch": 1.6490857423415113, "grad_norm": 0.12555263936519623, "learning_rate": 5.005193515399638e-06, "loss": 0.0034, "step": 55420 }, { "epoch": 1.6493833038251529, "grad_norm": 0.13420341908931732, "learning_rate": 5.003462343945642e-06, "loss": 0.0037, "step": 55430 }, { "epoch": 1.6496808653087944, "grad_norm": 0.07930833846330643, "learning_rate": 5.001731172076586e-06, "loss": 0.0037, "step": 55440 }, { "epoch": 1.649978426792436, "grad_norm": 0.24981750547885895, "learning_rate": 5e-06, "loss": 0.0036, "step": 55450 }, { "epoch": 1.6502759882760776, "grad_norm": 0.2054806500673294, "learning_rate": 4.998268827923416e-06, "loss": 0.0031, "step": 55460 }, { "epoch": 1.6505735497597191, "grad_norm": 0.3379395604133606, "learning_rate": 4.99653765605436e-06, "loss": 0.0066, "step": 55470 }, { "epoch": 1.6508711112433607, "grad_norm": 0.20931532979011536, "learning_rate": 4.9948064846003645e-06, "loss": 0.0025, "step": 55480 }, { "epoch": 1.6511686727270023, "grad_norm": 0.2933846414089203, "learning_rate": 4.993075313768957e-06, "loss": 0.0039, "step": 55490 }, { "epoch": 1.6514662342106439, "grad_norm": 0.18392445147037506, "learning_rate": 4.991344143767671e-06, "loss": 0.0046, "step": 55500 }, { "epoch": 1.6517637956942854, "grad_norm": 0.25956520438194275, "learning_rate": 4.989612974804032e-06, "loss": 0.0085, "step": 55510 }, { "epoch": 1.652061357177927, "grad_norm": 0.48596298694610596, "learning_rate": 4.987881807085574e-06, "loss": 0.0045, "step": 55520 }, { "epoch": 1.6523589186615686, "grad_norm": 0.16974377632141113, "learning_rate": 4.986150640819822e-06, "loss": 0.0047, "step": 55530 }, { "epoch": 1.6526564801452102, "grad_norm": 0.07342176884412766, "learning_rate": 4.984419476214309e-06, "loss": 0.0024, "step": 55540 }, { "epoch": 1.6529540416288515, "grad_norm": 0.16973388195037842, "learning_rate": 4.982688313476561e-06, "loss": 0.0031, "step": 55550 }, { "epoch": 1.653251603112493, "grad_norm": 0.14145155251026154, "learning_rate": 4.9809571528141095e-06, "loss": 0.0049, "step": 55560 }, { "epoch": 1.6535491645961347, "grad_norm": 0.12539812922477722, "learning_rate": 4.97922599443448e-06, "loss": 0.0051, "step": 55570 }, { "epoch": 1.6538467260797762, "grad_norm": 0.28829535841941833, "learning_rate": 4.977494838545204e-06, "loss": 0.004, "step": 55580 }, { "epoch": 1.6541442875634178, "grad_norm": 0.051484838128089905, "learning_rate": 4.975763685353806e-06, "loss": 0.0047, "step": 55590 }, { "epoch": 1.6544418490470594, "grad_norm": 0.2202942669391632, "learning_rate": 4.974032535067815e-06, "loss": 0.0033, "step": 55600 }, { "epoch": 1.654739410530701, "grad_norm": 0.028530240058898926, "learning_rate": 4.9723013878947614e-06, "loss": 0.0057, "step": 55610 }, { "epoch": 1.6550369720143423, "grad_norm": 0.05518585443496704, "learning_rate": 4.970570244042168e-06, "loss": 0.0055, "step": 55620 }, { "epoch": 1.6553345334979839, "grad_norm": 0.11716295033693314, "learning_rate": 4.968839103717565e-06, "loss": 0.0029, "step": 55630 }, { "epoch": 1.6556320949816254, "grad_norm": 0.02873067371547222, "learning_rate": 4.967107967128474e-06, "loss": 0.0038, "step": 55640 }, { "epoch": 1.655929656465267, "grad_norm": 0.13462120294570923, "learning_rate": 4.965376834482425e-06, "loss": 0.0035, "step": 55650 }, { "epoch": 1.6562272179489086, "grad_norm": 0.14086854457855225, "learning_rate": 4.963645705986939e-06, "loss": 0.0036, "step": 55660 }, { "epoch": 1.6565247794325502, "grad_norm": 0.23343370854854584, "learning_rate": 4.961914581849546e-06, "loss": 0.005, "step": 55670 }, { "epoch": 1.6568223409161917, "grad_norm": 0.303772509098053, "learning_rate": 4.960183462277765e-06, "loss": 0.0069, "step": 55680 }, { "epoch": 1.6571199023998333, "grad_norm": 0.1350133866071701, "learning_rate": 4.958452347479123e-06, "loss": 0.0029, "step": 55690 }, { "epoch": 1.6574174638834749, "grad_norm": 0.1743207424879074, "learning_rate": 4.956721237661142e-06, "loss": 0.0055, "step": 55700 }, { "epoch": 1.6577150253671165, "grad_norm": 0.24805948138237, "learning_rate": 4.954990133031344e-06, "loss": 0.0036, "step": 55710 }, { "epoch": 1.658012586850758, "grad_norm": 0.1748303920030594, "learning_rate": 4.95325903379725e-06, "loss": 0.0043, "step": 55720 }, { "epoch": 1.6583101483343996, "grad_norm": 0.04535311460494995, "learning_rate": 4.9515279401663844e-06, "loss": 0.0026, "step": 55730 }, { "epoch": 1.6586077098180412, "grad_norm": 0.10655993968248367, "learning_rate": 4.949796852346263e-06, "loss": 0.0045, "step": 55740 }, { "epoch": 1.6589052713016827, "grad_norm": 0.16218110918998718, "learning_rate": 4.948065770544411e-06, "loss": 0.0032, "step": 55750 }, { "epoch": 1.6592028327853243, "grad_norm": 0.14854881167411804, "learning_rate": 4.9463346949683436e-06, "loss": 0.0045, "step": 55760 }, { "epoch": 1.659500394268966, "grad_norm": 0.09468226879835129, "learning_rate": 4.944603625825581e-06, "loss": 0.0028, "step": 55770 }, { "epoch": 1.6597979557526075, "grad_norm": 0.2202305793762207, "learning_rate": 4.942872563323639e-06, "loss": 0.0049, "step": 55780 }, { "epoch": 1.660095517236249, "grad_norm": 0.14321589469909668, "learning_rate": 4.941141507670036e-06, "loss": 0.0036, "step": 55790 }, { "epoch": 1.6603930787198906, "grad_norm": 0.17714138329029083, "learning_rate": 4.939410459072287e-06, "loss": 0.0043, "step": 55800 }, { "epoch": 1.6606906402035322, "grad_norm": 0.09543219953775406, "learning_rate": 4.937679417737909e-06, "loss": 0.0034, "step": 55810 }, { "epoch": 1.6609882016871738, "grad_norm": 0.07835899293422699, "learning_rate": 4.9359483838744145e-06, "loss": 0.004, "step": 55820 }, { "epoch": 1.6612857631708153, "grad_norm": 0.21526913344860077, "learning_rate": 4.934217357689316e-06, "loss": 0.0041, "step": 55830 }, { "epoch": 1.6615833246544567, "grad_norm": 0.3057086169719696, "learning_rate": 4.9324863393901284e-06, "loss": 0.0045, "step": 55840 }, { "epoch": 1.6618808861380983, "grad_norm": 0.11818304657936096, "learning_rate": 4.9307553291843605e-06, "loss": 0.0029, "step": 55850 }, { "epoch": 1.6621784476217398, "grad_norm": 0.13633370399475098, "learning_rate": 4.9290243272795255e-06, "loss": 0.0025, "step": 55860 }, { "epoch": 1.6624760091053814, "grad_norm": 0.19310986995697021, "learning_rate": 4.927293333883131e-06, "loss": 0.0034, "step": 55870 }, { "epoch": 1.662773570589023, "grad_norm": 0.10228358209133148, "learning_rate": 4.9255623492026836e-06, "loss": 0.0049, "step": 55880 }, { "epoch": 1.6630711320726645, "grad_norm": 0.1925053596496582, "learning_rate": 4.923831373445697e-06, "loss": 0.0024, "step": 55890 }, { "epoch": 1.6633686935563061, "grad_norm": 0.16343078017234802, "learning_rate": 4.922100406819672e-06, "loss": 0.0041, "step": 55900 }, { "epoch": 1.6636662550399475, "grad_norm": 0.16587327420711517, "learning_rate": 4.920369449532117e-06, "loss": 0.0043, "step": 55910 }, { "epoch": 1.663963816523589, "grad_norm": 0.3021373152732849, "learning_rate": 4.918638501790534e-06, "loss": 0.004, "step": 55920 }, { "epoch": 1.6642613780072306, "grad_norm": 0.2846515476703644, "learning_rate": 4.916907563802428e-06, "loss": 0.0024, "step": 55930 }, { "epoch": 1.6645589394908722, "grad_norm": 0.26256731152534485, "learning_rate": 4.915176635775299e-06, "loss": 0.0053, "step": 55940 }, { "epoch": 1.6648565009745138, "grad_norm": 0.2392265349626541, "learning_rate": 4.91344571791665e-06, "loss": 0.0034, "step": 55950 }, { "epoch": 1.6651540624581553, "grad_norm": 0.19955122470855713, "learning_rate": 4.9117148104339765e-06, "loss": 0.0043, "step": 55960 }, { "epoch": 1.665451623941797, "grad_norm": 0.009682849980890751, "learning_rate": 4.909983913534781e-06, "loss": 0.003, "step": 55970 }, { "epoch": 1.6657491854254385, "grad_norm": 0.11790639162063599, "learning_rate": 4.908253027426557e-06, "loss": 0.0033, "step": 55980 }, { "epoch": 1.66604674690908, "grad_norm": 0.26129263639450073, "learning_rate": 4.906522152316804e-06, "loss": 0.0046, "step": 55990 }, { "epoch": 1.6663443083927216, "grad_norm": 0.3894478976726532, "learning_rate": 4.904791288413011e-06, "loss": 0.0043, "step": 56000 }, { "epoch": 1.6666418698763632, "grad_norm": 0.20467694103717804, "learning_rate": 4.903060435922677e-06, "loss": 0.0025, "step": 56010 }, { "epoch": 1.6669394313600048, "grad_norm": 0.11582999676465988, "learning_rate": 4.901329595053289e-06, "loss": 0.0032, "step": 56020 }, { "epoch": 1.6672369928436463, "grad_norm": 0.14877274632453918, "learning_rate": 4.899598766012339e-06, "loss": 0.0049, "step": 56030 }, { "epoch": 1.667534554327288, "grad_norm": 0.08623144030570984, "learning_rate": 4.8978679490073165e-06, "loss": 0.0032, "step": 56040 }, { "epoch": 1.6678321158109295, "grad_norm": 0.11653853952884674, "learning_rate": 4.896137144245707e-06, "loss": 0.0021, "step": 56050 }, { "epoch": 1.668129677294571, "grad_norm": 0.1539393812417984, "learning_rate": 4.894406351934997e-06, "loss": 0.0109, "step": 56060 }, { "epoch": 1.6684272387782126, "grad_norm": 0.014497143216431141, "learning_rate": 4.892675572282673e-06, "loss": 0.0031, "step": 56070 }, { "epoch": 1.6687248002618542, "grad_norm": 0.11516312509775162, "learning_rate": 4.890944805496215e-06, "loss": 0.0035, "step": 56080 }, { "epoch": 1.6690223617454958, "grad_norm": 0.19675350189208984, "learning_rate": 4.889214051783106e-06, "loss": 0.003, "step": 56090 }, { "epoch": 1.6693199232291374, "grad_norm": 0.04715947434306145, "learning_rate": 4.887483311350824e-06, "loss": 0.0027, "step": 56100 }, { "epoch": 1.669617484712779, "grad_norm": 0.2835570275783539, "learning_rate": 4.885752584406849e-06, "loss": 0.0026, "step": 56110 }, { "epoch": 1.6699150461964203, "grad_norm": 0.1410444974899292, "learning_rate": 4.884021871158657e-06, "loss": 0.0022, "step": 56120 }, { "epoch": 1.6702126076800619, "grad_norm": 0.18401648104190826, "learning_rate": 4.882291171813724e-06, "loss": 0.005, "step": 56130 }, { "epoch": 1.6705101691637034, "grad_norm": 0.06366870552301407, "learning_rate": 4.88056048657952e-06, "loss": 0.0038, "step": 56140 }, { "epoch": 1.670807730647345, "grad_norm": 0.1875448226928711, "learning_rate": 4.878829815663521e-06, "loss": 0.0029, "step": 56150 }, { "epoch": 1.6711052921309866, "grad_norm": 0.14915186166763306, "learning_rate": 4.877099159273192e-06, "loss": 0.0032, "step": 56160 }, { "epoch": 1.6714028536146281, "grad_norm": 0.07840016484260559, "learning_rate": 4.875368517616005e-06, "loss": 0.0054, "step": 56170 }, { "epoch": 1.6717004150982697, "grad_norm": 0.08204205334186554, "learning_rate": 4.8736378908994256e-06, "loss": 0.0036, "step": 56180 }, { "epoch": 1.671997976581911, "grad_norm": 0.10206648707389832, "learning_rate": 4.871907279330916e-06, "loss": 0.0028, "step": 56190 }, { "epoch": 1.6722955380655526, "grad_norm": 0.09489230811595917, "learning_rate": 4.870176683117943e-06, "loss": 0.0029, "step": 56200 }, { "epoch": 1.6725930995491942, "grad_norm": 0.19411079585552216, "learning_rate": 4.868446102467963e-06, "loss": 0.0064, "step": 56210 }, { "epoch": 1.6728906610328358, "grad_norm": 0.2568354606628418, "learning_rate": 4.866715537588439e-06, "loss": 0.004, "step": 56220 }, { "epoch": 1.6731882225164774, "grad_norm": 0.11238200962543488, "learning_rate": 4.864984988686826e-06, "loss": 0.0076, "step": 56230 }, { "epoch": 1.673485784000119, "grad_norm": 0.08156625926494598, "learning_rate": 4.863254455970579e-06, "loss": 0.0023, "step": 56240 }, { "epoch": 1.6737833454837605, "grad_norm": 0.20975807309150696, "learning_rate": 4.861523939647152e-06, "loss": 0.0029, "step": 56250 }, { "epoch": 1.674080906967402, "grad_norm": 0.3189634382724762, "learning_rate": 4.859793439923996e-06, "loss": 0.0055, "step": 56260 }, { "epoch": 1.6743784684510437, "grad_norm": 0.11717194318771362, "learning_rate": 4.8580629570085604e-06, "loss": 0.0024, "step": 56270 }, { "epoch": 1.6746760299346852, "grad_norm": 0.20558616518974304, "learning_rate": 4.856332491108292e-06, "loss": 0.0038, "step": 56280 }, { "epoch": 1.6749735914183268, "grad_norm": 0.13458704948425293, "learning_rate": 4.854602042430638e-06, "loss": 0.0048, "step": 56290 }, { "epoch": 1.6752711529019684, "grad_norm": 0.08589571714401245, "learning_rate": 4.852871611183039e-06, "loss": 0.0041, "step": 56300 }, { "epoch": 1.67556871438561, "grad_norm": 0.17966203391551971, "learning_rate": 4.851141197572938e-06, "loss": 0.0043, "step": 56310 }, { "epoch": 1.6758662758692515, "grad_norm": 0.16052743792533875, "learning_rate": 4.8494108018077715e-06, "loss": 0.0037, "step": 56320 }, { "epoch": 1.676163837352893, "grad_norm": 0.16170631349086761, "learning_rate": 4.847680424094981e-06, "loss": 0.0064, "step": 56330 }, { "epoch": 1.6764613988365347, "grad_norm": 0.15063852071762085, "learning_rate": 4.845950064641996e-06, "loss": 0.0029, "step": 56340 }, { "epoch": 1.6767589603201762, "grad_norm": 0.3153272271156311, "learning_rate": 4.844219723656253e-06, "loss": 0.0044, "step": 56350 }, { "epoch": 1.6770565218038178, "grad_norm": 0.23320835828781128, "learning_rate": 4.842489401345179e-06, "loss": 0.0048, "step": 56360 }, { "epoch": 1.6773540832874594, "grad_norm": 0.20228900015354156, "learning_rate": 4.840759097916205e-06, "loss": 0.0024, "step": 56370 }, { "epoch": 1.677651644771101, "grad_norm": 0.08169365674257278, "learning_rate": 4.839028813576755e-06, "loss": 0.0038, "step": 56380 }, { "epoch": 1.6779492062547425, "grad_norm": 0.1452522873878479, "learning_rate": 4.837298548534253e-06, "loss": 0.0024, "step": 56390 }, { "epoch": 1.678246767738384, "grad_norm": 0.13801787793636322, "learning_rate": 4.8355683029961196e-06, "loss": 0.0027, "step": 56400 }, { "epoch": 1.6785443292220255, "grad_norm": 0.12615299224853516, "learning_rate": 4.833838077169776e-06, "loss": 0.0057, "step": 56410 }, { "epoch": 1.678841890705667, "grad_norm": 0.2591046988964081, "learning_rate": 4.8321078712626346e-06, "loss": 0.0032, "step": 56420 }, { "epoch": 1.6791394521893086, "grad_norm": 0.185095876455307, "learning_rate": 4.830377685482114e-06, "loss": 0.0022, "step": 56430 }, { "epoch": 1.6794370136729502, "grad_norm": 0.23058263957500458, "learning_rate": 4.828647520035622e-06, "loss": 0.0032, "step": 56440 }, { "epoch": 1.6797345751565917, "grad_norm": 0.10794201493263245, "learning_rate": 4.826917375130568e-06, "loss": 0.0033, "step": 56450 }, { "epoch": 1.6800321366402333, "grad_norm": 0.1734289526939392, "learning_rate": 4.825187250974363e-06, "loss": 0.004, "step": 56460 }, { "epoch": 1.680329698123875, "grad_norm": 0.04709816351532936, "learning_rate": 4.823457147774408e-06, "loss": 0.0034, "step": 56470 }, { "epoch": 1.6806272596075162, "grad_norm": 0.1356913149356842, "learning_rate": 4.821727065738106e-06, "loss": 0.0042, "step": 56480 }, { "epoch": 1.6809248210911578, "grad_norm": 0.1518930047750473, "learning_rate": 4.819997005072855e-06, "loss": 0.0036, "step": 56490 }, { "epoch": 1.6812223825747994, "grad_norm": 0.2429969161748886, "learning_rate": 4.818266965986053e-06, "loss": 0.0056, "step": 56500 }, { "epoch": 1.681519944058441, "grad_norm": 0.1948884129524231, "learning_rate": 4.816536948685091e-06, "loss": 0.0031, "step": 56510 }, { "epoch": 1.6818175055420825, "grad_norm": 0.47138628363609314, "learning_rate": 4.814806953377366e-06, "loss": 0.0077, "step": 56520 }, { "epoch": 1.682115067025724, "grad_norm": 0.032420817762613297, "learning_rate": 4.8130769802702624e-06, "loss": 0.0035, "step": 56530 }, { "epoch": 1.6824126285093657, "grad_norm": 0.1576841026544571, "learning_rate": 4.811347029571168e-06, "loss": 0.0034, "step": 56540 }, { "epoch": 1.6827101899930073, "grad_norm": 0.05432259663939476, "learning_rate": 4.809617101487466e-06, "loss": 0.0039, "step": 56550 }, { "epoch": 1.6830077514766488, "grad_norm": 0.1394440084695816, "learning_rate": 4.807887196226538e-06, "loss": 0.0038, "step": 56560 }, { "epoch": 1.6833053129602904, "grad_norm": 0.226559579372406, "learning_rate": 4.8061573139957595e-06, "loss": 0.0025, "step": 56570 }, { "epoch": 1.683602874443932, "grad_norm": 0.08510715514421463, "learning_rate": 4.804427455002509e-06, "loss": 0.0041, "step": 56580 }, { "epoch": 1.6839004359275735, "grad_norm": 0.20885685086250305, "learning_rate": 4.802697619454158e-06, "loss": 0.0028, "step": 56590 }, { "epoch": 1.6841979974112151, "grad_norm": 0.4100850224494934, "learning_rate": 4.800967807558075e-06, "loss": 0.0038, "step": 56600 }, { "epoch": 1.6844955588948567, "grad_norm": 0.16078534722328186, "learning_rate": 4.799238019521628e-06, "loss": 0.0033, "step": 56610 }, { "epoch": 1.6847931203784983, "grad_norm": 0.16416892409324646, "learning_rate": 4.79750825555218e-06, "loss": 0.0016, "step": 56620 }, { "epoch": 1.6850906818621398, "grad_norm": 0.06953554600477219, "learning_rate": 4.795778515857094e-06, "loss": 0.0034, "step": 56630 }, { "epoch": 1.6853882433457814, "grad_norm": 0.10239098966121674, "learning_rate": 4.7940488006437266e-06, "loss": 0.0029, "step": 56640 }, { "epoch": 1.685685804829423, "grad_norm": 0.0762387216091156, "learning_rate": 4.792319110119433e-06, "loss": 0.01, "step": 56650 }, { "epoch": 1.6859833663130646, "grad_norm": 0.29422590136528015, "learning_rate": 4.790589444491566e-06, "loss": 0.0043, "step": 56660 }, { "epoch": 1.6862809277967061, "grad_norm": 0.08651722967624664, "learning_rate": 4.788859803967476e-06, "loss": 0.0047, "step": 56670 }, { "epoch": 1.6865784892803477, "grad_norm": 0.10867974162101746, "learning_rate": 4.7871301887545066e-06, "loss": 0.003, "step": 56680 }, { "epoch": 1.686876050763989, "grad_norm": 0.13351969420909882, "learning_rate": 4.785400599060005e-06, "loss": 0.0044, "step": 56690 }, { "epoch": 1.6871736122476306, "grad_norm": 0.05518138408660889, "learning_rate": 4.7836710350913064e-06, "loss": 0.0026, "step": 56700 }, { "epoch": 1.6874711737312722, "grad_norm": 0.0641494169831276, "learning_rate": 4.781941497055753e-06, "loss": 0.0056, "step": 56710 }, { "epoch": 1.6877687352149138, "grad_norm": 0.22122174501419067, "learning_rate": 4.7802119851606755e-06, "loss": 0.0045, "step": 56720 }, { "epoch": 1.6880662966985553, "grad_norm": 0.12050977349281311, "learning_rate": 4.778482499613404e-06, "loss": 0.0033, "step": 56730 }, { "epoch": 1.688363858182197, "grad_norm": 0.18478740751743317, "learning_rate": 4.776753040621271e-06, "loss": 0.0028, "step": 56740 }, { "epoch": 1.6886614196658385, "grad_norm": 0.06819164752960205, "learning_rate": 4.775023608391596e-06, "loss": 0.0024, "step": 56750 }, { "epoch": 1.68895898114948, "grad_norm": 0.32657650113105774, "learning_rate": 4.773294203131705e-06, "loss": 0.0044, "step": 56760 }, { "epoch": 1.6892565426331214, "grad_norm": 0.09737192839384079, "learning_rate": 4.771564825048912e-06, "loss": 0.0055, "step": 56770 }, { "epoch": 1.689554104116763, "grad_norm": 0.037984658032655716, "learning_rate": 4.769835474350535e-06, "loss": 0.002, "step": 56780 }, { "epoch": 1.6898516656004046, "grad_norm": 0.07556426525115967, "learning_rate": 4.768106151243881e-06, "loss": 0.0032, "step": 56790 }, { "epoch": 1.6901492270840461, "grad_norm": 0.1032811775803566, "learning_rate": 4.766376855936265e-06, "loss": 0.0058, "step": 56800 }, { "epoch": 1.6904467885676877, "grad_norm": 0.23715433478355408, "learning_rate": 4.764647588634985e-06, "loss": 0.0072, "step": 56810 }, { "epoch": 1.6907443500513293, "grad_norm": 0.12932096421718597, "learning_rate": 4.762918349547349e-06, "loss": 0.0048, "step": 56820 }, { "epoch": 1.6910419115349709, "grad_norm": 0.12354619055986404, "learning_rate": 4.761189138880649e-06, "loss": 0.0042, "step": 56830 }, { "epoch": 1.6913394730186124, "grad_norm": 0.08872750401496887, "learning_rate": 4.759459956842185e-06, "loss": 0.0014, "step": 56840 }, { "epoch": 1.691637034502254, "grad_norm": 0.18491555750370026, "learning_rate": 4.757730803639245e-06, "loss": 0.0034, "step": 56850 }, { "epoch": 1.6919345959858956, "grad_norm": 0.2067139595746994, "learning_rate": 4.756001679479119e-06, "loss": 0.0041, "step": 56860 }, { "epoch": 1.6922321574695371, "grad_norm": 0.1435679793357849, "learning_rate": 4.754272584569089e-06, "loss": 0.0052, "step": 56870 }, { "epoch": 1.6925297189531787, "grad_norm": 0.0892552062869072, "learning_rate": 4.752543519116439e-06, "loss": 0.004, "step": 56880 }, { "epoch": 1.6928272804368203, "grad_norm": 0.006167230661958456, "learning_rate": 4.750814483328444e-06, "loss": 0.003, "step": 56890 }, { "epoch": 1.6931248419204619, "grad_norm": 0.14628039300441742, "learning_rate": 4.749085477412379e-06, "loss": 0.0029, "step": 56900 }, { "epoch": 1.6934224034041034, "grad_norm": 0.15291334688663483, "learning_rate": 4.747356501575514e-06, "loss": 0.0062, "step": 56910 }, { "epoch": 1.693719964887745, "grad_norm": 0.17629167437553406, "learning_rate": 4.745627556025116e-06, "loss": 0.0052, "step": 56920 }, { "epoch": 1.6940175263713866, "grad_norm": 0.06122083216905594, "learning_rate": 4.743898640968446e-06, "loss": 0.004, "step": 56930 }, { "epoch": 1.6943150878550282, "grad_norm": 0.5103716254234314, "learning_rate": 4.742169756612766e-06, "loss": 0.0076, "step": 56940 }, { "epoch": 1.6946126493386697, "grad_norm": 0.4265620708465576, "learning_rate": 4.74044090316533e-06, "loss": 0.0061, "step": 56950 }, { "epoch": 1.6949102108223113, "grad_norm": 0.10037688165903091, "learning_rate": 4.7387120808333905e-06, "loss": 0.0031, "step": 56960 }, { "epoch": 1.6952077723059529, "grad_norm": 0.29837802052497864, "learning_rate": 4.736983289824197e-06, "loss": 0.0039, "step": 56970 }, { "epoch": 1.6955053337895942, "grad_norm": 0.22506049275398254, "learning_rate": 4.735254530344992e-06, "loss": 0.0046, "step": 56980 }, { "epoch": 1.6958028952732358, "grad_norm": 0.3941097855567932, "learning_rate": 4.733525802603017e-06, "loss": 0.0059, "step": 56990 }, { "epoch": 1.6961004567568774, "grad_norm": 0.08296374976634979, "learning_rate": 4.731797106805508e-06, "loss": 0.0058, "step": 57000 }, { "epoch": 1.696398018240519, "grad_norm": 0.1400693953037262, "learning_rate": 4.730068443159697e-06, "loss": 0.0038, "step": 57010 }, { "epoch": 1.6966955797241605, "grad_norm": 0.1531883180141449, "learning_rate": 4.728339811872819e-06, "loss": 0.0046, "step": 57020 }, { "epoch": 1.696993141207802, "grad_norm": 0.241998553276062, "learning_rate": 4.726611213152092e-06, "loss": 0.0036, "step": 57030 }, { "epoch": 1.6972907026914437, "grad_norm": 0.16987302899360657, "learning_rate": 4.7248826472047445e-06, "loss": 0.0029, "step": 57040 }, { "epoch": 1.697588264175085, "grad_norm": 0.11169639229774475, "learning_rate": 4.723154114237988e-06, "loss": 0.0059, "step": 57050 }, { "epoch": 1.6978858256587266, "grad_norm": 0.1901177316904068, "learning_rate": 4.7214256144590395e-06, "loss": 0.0037, "step": 57060 }, { "epoch": 1.6981833871423682, "grad_norm": 0.13636434078216553, "learning_rate": 4.7196971480751065e-06, "loss": 0.007, "step": 57070 }, { "epoch": 1.6984809486260097, "grad_norm": 0.12377487123012543, "learning_rate": 4.7179687152933975e-06, "loss": 0.0035, "step": 57080 }, { "epoch": 1.6987785101096513, "grad_norm": 0.2209007740020752, "learning_rate": 4.716240316321109e-06, "loss": 0.0038, "step": 57090 }, { "epoch": 1.6990760715932929, "grad_norm": 0.09414348751306534, "learning_rate": 4.714511951365445e-06, "loss": 0.004, "step": 57100 }, { "epoch": 1.6993736330769345, "grad_norm": 0.10274631530046463, "learning_rate": 4.712783620633593e-06, "loss": 0.006, "step": 57110 }, { "epoch": 1.699671194560576, "grad_norm": 0.17512325942516327, "learning_rate": 4.7110553243327465e-06, "loss": 0.0027, "step": 57120 }, { "epoch": 1.6999687560442176, "grad_norm": 0.14717093110084534, "learning_rate": 4.709327062670088e-06, "loss": 0.003, "step": 57130 }, { "epoch": 1.7002663175278592, "grad_norm": 0.14727899432182312, "learning_rate": 4.7075988358528e-06, "loss": 0.0045, "step": 57140 }, { "epoch": 1.7005638790115007, "grad_norm": 0.08432206511497498, "learning_rate": 4.705870644088058e-06, "loss": 0.005, "step": 57150 }, { "epoch": 1.7008614404951423, "grad_norm": 0.6536521315574646, "learning_rate": 4.7041424875830366e-06, "loss": 0.0068, "step": 57160 }, { "epoch": 1.701159001978784, "grad_norm": 0.13799165189266205, "learning_rate": 4.702414366544902e-06, "loss": 0.0046, "step": 57170 }, { "epoch": 1.7014565634624255, "grad_norm": 0.11153461039066315, "learning_rate": 4.700686281180821e-06, "loss": 0.0049, "step": 57180 }, { "epoch": 1.701754124946067, "grad_norm": 0.17598849534988403, "learning_rate": 4.69895823169795e-06, "loss": 0.0059, "step": 57190 }, { "epoch": 1.7020516864297086, "grad_norm": 0.18435035645961761, "learning_rate": 4.6972302183034485e-06, "loss": 0.0043, "step": 57200 }, { "epoch": 1.7023492479133502, "grad_norm": 0.08550169318914413, "learning_rate": 4.695502241204465e-06, "loss": 0.0038, "step": 57210 }, { "epoch": 1.7026468093969918, "grad_norm": 0.30766114592552185, "learning_rate": 4.693774300608148e-06, "loss": 0.0035, "step": 57220 }, { "epoch": 1.7029443708806333, "grad_norm": 0.14507806301116943, "learning_rate": 4.692046396721638e-06, "loss": 0.0031, "step": 57230 }, { "epoch": 1.703241932364275, "grad_norm": 0.0758257806301117, "learning_rate": 4.690318529752075e-06, "loss": 0.003, "step": 57240 }, { "epoch": 1.7035394938479165, "grad_norm": 0.2628871500492096, "learning_rate": 4.688590699906591e-06, "loss": 0.0061, "step": 57250 }, { "epoch": 1.703837055331558, "grad_norm": 0.14006197452545166, "learning_rate": 4.686862907392318e-06, "loss": 0.0024, "step": 57260 }, { "epoch": 1.7041346168151994, "grad_norm": 0.022878773510456085, "learning_rate": 4.685135152416378e-06, "loss": 0.0043, "step": 57270 }, { "epoch": 1.704432178298841, "grad_norm": 0.24138213694095612, "learning_rate": 4.683407435185894e-06, "loss": 0.0056, "step": 57280 }, { "epoch": 1.7047297397824825, "grad_norm": 0.060854751616716385, "learning_rate": 4.681679755907979e-06, "loss": 0.004, "step": 57290 }, { "epoch": 1.7050273012661241, "grad_norm": 0.11837515980005264, "learning_rate": 4.679952114789744e-06, "loss": 0.0034, "step": 57300 }, { "epoch": 1.7053248627497657, "grad_norm": 0.2811118960380554, "learning_rate": 4.678224512038299e-06, "loss": 0.0036, "step": 57310 }, { "epoch": 1.7056224242334073, "grad_norm": 0.2203330546617508, "learning_rate": 4.676496947860744e-06, "loss": 0.0052, "step": 57320 }, { "epoch": 1.7059199857170488, "grad_norm": 0.1543380320072174, "learning_rate": 4.6747694224641765e-06, "loss": 0.0028, "step": 57330 }, { "epoch": 1.7062175472006902, "grad_norm": 0.105213962495327, "learning_rate": 4.673041936055689e-06, "loss": 0.0056, "step": 57340 }, { "epoch": 1.7065151086843318, "grad_norm": 0.04868678003549576, "learning_rate": 4.67131448884237e-06, "loss": 0.0113, "step": 57350 }, { "epoch": 1.7068126701679733, "grad_norm": 0.12519285082817078, "learning_rate": 4.669587081031302e-06, "loss": 0.0027, "step": 57360 }, { "epoch": 1.707110231651615, "grad_norm": 0.22179298102855682, "learning_rate": 4.6678597128295665e-06, "loss": 0.0063, "step": 57370 }, { "epoch": 1.7074077931352565, "grad_norm": 0.23009146749973297, "learning_rate": 4.666132384444234e-06, "loss": 0.004, "step": 57380 }, { "epoch": 1.707705354618898, "grad_norm": 0.257159024477005, "learning_rate": 4.664405096082374e-06, "loss": 0.0023, "step": 57390 }, { "epoch": 1.7080029161025396, "grad_norm": 0.11335336416959763, "learning_rate": 4.662677847951054e-06, "loss": 0.0029, "step": 57400 }, { "epoch": 1.7083004775861812, "grad_norm": 0.19137506186962128, "learning_rate": 4.660950640257329e-06, "loss": 0.0038, "step": 57410 }, { "epoch": 1.7085980390698228, "grad_norm": 0.42624637484550476, "learning_rate": 4.6592234732082584e-06, "loss": 0.0045, "step": 57420 }, { "epoch": 1.7088956005534643, "grad_norm": 0.09640103578567505, "learning_rate": 4.657496347010887e-06, "loss": 0.0045, "step": 57430 }, { "epoch": 1.709193162037106, "grad_norm": 0.44437482953071594, "learning_rate": 4.655769261872265e-06, "loss": 0.0027, "step": 57440 }, { "epoch": 1.7094907235207475, "grad_norm": 0.17154592275619507, "learning_rate": 4.654042217999428e-06, "loss": 0.0051, "step": 57450 }, { "epoch": 1.709788285004389, "grad_norm": 0.07253222167491913, "learning_rate": 4.652315215599413e-06, "loss": 0.0036, "step": 57460 }, { "epoch": 1.7100858464880306, "grad_norm": 0.27808713912963867, "learning_rate": 4.650588254879251e-06, "loss": 0.0043, "step": 57470 }, { "epoch": 1.7103834079716722, "grad_norm": 0.18484190106391907, "learning_rate": 4.648861336045964e-06, "loss": 0.0031, "step": 57480 }, { "epoch": 1.7106809694553138, "grad_norm": 0.3340548276901245, "learning_rate": 4.647134459306575e-06, "loss": 0.0032, "step": 57490 }, { "epoch": 1.7109785309389554, "grad_norm": 0.19408941268920898, "learning_rate": 4.645407624868098e-06, "loss": 0.0033, "step": 57500 }, { "epoch": 1.711276092422597, "grad_norm": 0.12697705626487732, "learning_rate": 4.643680832937543e-06, "loss": 0.0028, "step": 57510 }, { "epoch": 1.7115736539062385, "grad_norm": 0.2857551872730255, "learning_rate": 4.6419540837219145e-06, "loss": 0.004, "step": 57520 }, { "epoch": 1.71187121538988, "grad_norm": 0.15182197093963623, "learning_rate": 4.640227377428212e-06, "loss": 0.0047, "step": 57530 }, { "epoch": 1.7121687768735216, "grad_norm": 0.2575604021549225, "learning_rate": 4.638500714263431e-06, "loss": 0.0058, "step": 57540 }, { "epoch": 1.712466338357163, "grad_norm": 0.46290943026542664, "learning_rate": 4.6367740944345606e-06, "loss": 0.0035, "step": 57550 }, { "epoch": 1.7127638998408046, "grad_norm": 0.08948703855276108, "learning_rate": 4.635047518148584e-06, "loss": 0.0034, "step": 57560 }, { "epoch": 1.7130614613244461, "grad_norm": 0.15794400870800018, "learning_rate": 4.6333209856124814e-06, "loss": 0.0047, "step": 57570 }, { "epoch": 1.7133590228080877, "grad_norm": 0.10142537951469421, "learning_rate": 4.6315944970332245e-06, "loss": 0.0024, "step": 57580 }, { "epoch": 1.7136565842917293, "grad_norm": 0.0338590070605278, "learning_rate": 4.629868052617786e-06, "loss": 0.006, "step": 57590 }, { "epoch": 1.7139541457753709, "grad_norm": 0.1194421797990799, "learning_rate": 4.628141652573125e-06, "loss": 0.0022, "step": 57600 }, { "epoch": 1.7142517072590124, "grad_norm": 0.14001011848449707, "learning_rate": 4.626415297106202e-06, "loss": 0.0047, "step": 57610 }, { "epoch": 1.7145492687426538, "grad_norm": 0.12338123470544815, "learning_rate": 4.6246889864239675e-06, "loss": 0.0038, "step": 57620 }, { "epoch": 1.7148468302262954, "grad_norm": 0.2978852391242981, "learning_rate": 4.62296272073337e-06, "loss": 0.0035, "step": 57630 }, { "epoch": 1.715144391709937, "grad_norm": 0.23872986435890198, "learning_rate": 4.62123650024135e-06, "loss": 0.0026, "step": 57640 }, { "epoch": 1.7154419531935785, "grad_norm": 0.36476629972457886, "learning_rate": 4.619510325154846e-06, "loss": 0.0028, "step": 57650 }, { "epoch": 1.71573951467722, "grad_norm": 0.5726450681686401, "learning_rate": 4.617784195680786e-06, "loss": 0.0091, "step": 57660 }, { "epoch": 1.7160370761608617, "grad_norm": 0.49819573760032654, "learning_rate": 4.616058112026098e-06, "loss": 0.0031, "step": 57670 }, { "epoch": 1.7163346376445032, "grad_norm": 0.1211138442158699, "learning_rate": 4.6143320743977e-06, "loss": 0.0032, "step": 57680 }, { "epoch": 1.7166321991281448, "grad_norm": 0.26862263679504395, "learning_rate": 4.612606083002508e-06, "loss": 0.004, "step": 57690 }, { "epoch": 1.7169297606117864, "grad_norm": 0.32235726714134216, "learning_rate": 4.6108801380474295e-06, "loss": 0.0063, "step": 57700 }, { "epoch": 1.717227322095428, "grad_norm": 0.054952993988990784, "learning_rate": 4.609154239739369e-06, "loss": 0.005, "step": 57710 }, { "epoch": 1.7175248835790695, "grad_norm": 0.11827582120895386, "learning_rate": 4.6074283882852225e-06, "loss": 0.0035, "step": 57720 }, { "epoch": 1.717822445062711, "grad_norm": 0.058588314801454544, "learning_rate": 4.6057025838918846e-06, "loss": 0.0057, "step": 57730 }, { "epoch": 1.7181200065463527, "grad_norm": 0.12395431101322174, "learning_rate": 4.60397682676624e-06, "loss": 0.0034, "step": 57740 }, { "epoch": 1.7184175680299942, "grad_norm": 0.27230674028396606, "learning_rate": 4.602251117115169e-06, "loss": 0.0039, "step": 57750 }, { "epoch": 1.7187151295136358, "grad_norm": 0.13207682967185974, "learning_rate": 4.600525455145549e-06, "loss": 0.0051, "step": 57760 }, { "epoch": 1.7190126909972774, "grad_norm": 0.22895202040672302, "learning_rate": 4.598799841064246e-06, "loss": 0.0029, "step": 57770 }, { "epoch": 1.719310252480919, "grad_norm": 0.18187876045703888, "learning_rate": 4.597074275078127e-06, "loss": 0.0044, "step": 57780 }, { "epoch": 1.7196078139645605, "grad_norm": 0.4242703914642334, "learning_rate": 4.595348757394048e-06, "loss": 0.0024, "step": 57790 }, { "epoch": 1.719905375448202, "grad_norm": 0.13892397284507751, "learning_rate": 4.593623288218862e-06, "loss": 0.0039, "step": 57800 }, { "epoch": 1.7202029369318437, "grad_norm": 0.1244172677397728, "learning_rate": 4.591897867759414e-06, "loss": 0.0028, "step": 57810 }, { "epoch": 1.7205004984154852, "grad_norm": 0.18270345032215118, "learning_rate": 4.590172496222546e-06, "loss": 0.0039, "step": 57820 }, { "epoch": 1.7207980598991268, "grad_norm": 0.10218393057584763, "learning_rate": 4.58844717381509e-06, "loss": 0.004, "step": 57830 }, { "epoch": 1.7210956213827682, "grad_norm": 0.05630042031407356, "learning_rate": 4.586721900743879e-06, "loss": 0.0036, "step": 57840 }, { "epoch": 1.7213931828664097, "grad_norm": 0.3620792329311371, "learning_rate": 4.5849966772157315e-06, "loss": 0.0044, "step": 57850 }, { "epoch": 1.7216907443500513, "grad_norm": 0.4700883626937866, "learning_rate": 4.5832715034374666e-06, "loss": 0.0046, "step": 57860 }, { "epoch": 1.721988305833693, "grad_norm": 0.12390957027673721, "learning_rate": 4.581546379615893e-06, "loss": 0.0026, "step": 57870 }, { "epoch": 1.7222858673173345, "grad_norm": 0.19831934571266174, "learning_rate": 4.5798213059578175e-06, "loss": 0.0031, "step": 57880 }, { "epoch": 1.722583428800976, "grad_norm": 0.3751290440559387, "learning_rate": 4.5780962826700404e-06, "loss": 0.0035, "step": 57890 }, { "epoch": 1.7228809902846176, "grad_norm": 0.2000410407781601, "learning_rate": 4.576371309959351e-06, "loss": 0.0027, "step": 57900 }, { "epoch": 1.723178551768259, "grad_norm": 0.17989160120487213, "learning_rate": 4.57464638803254e-06, "loss": 0.0035, "step": 57910 }, { "epoch": 1.7234761132519005, "grad_norm": 0.1709069162607193, "learning_rate": 4.572921517096384e-06, "loss": 0.0055, "step": 57920 }, { "epoch": 1.723773674735542, "grad_norm": 0.08179005235433578, "learning_rate": 4.57119669735766e-06, "loss": 0.0042, "step": 57930 }, { "epoch": 1.7240712362191837, "grad_norm": 0.06522354483604431, "learning_rate": 4.569471929023135e-06, "loss": 0.0069, "step": 57940 }, { "epoch": 1.7243687977028253, "grad_norm": 0.7061606049537659, "learning_rate": 4.567747212299573e-06, "loss": 0.0026, "step": 57950 }, { "epoch": 1.7246663591864668, "grad_norm": 0.13596992194652557, "learning_rate": 4.566022547393729e-06, "loss": 0.0024, "step": 57960 }, { "epoch": 1.7249639206701084, "grad_norm": 0.4093843996524811, "learning_rate": 4.5642979345123524e-06, "loss": 0.0046, "step": 57970 }, { "epoch": 1.72526148215375, "grad_norm": 0.1515970379114151, "learning_rate": 4.562573373862187e-06, "loss": 0.0027, "step": 57980 }, { "epoch": 1.7255590436373915, "grad_norm": 0.14557774364948273, "learning_rate": 4.5608488656499726e-06, "loss": 0.0015, "step": 57990 }, { "epoch": 1.7258566051210331, "grad_norm": 0.2107568383216858, "learning_rate": 4.559124410082436e-06, "loss": 0.0024, "step": 58000 }, { "epoch": 1.7261541666046747, "grad_norm": 0.1981072574853897, "learning_rate": 4.557400007366306e-06, "loss": 0.0053, "step": 58010 }, { "epoch": 1.7264517280883163, "grad_norm": 0.17651374638080597, "learning_rate": 4.555675657708297e-06, "loss": 0.0046, "step": 58020 }, { "epoch": 1.7267492895719578, "grad_norm": 0.03154000639915466, "learning_rate": 4.553951361315125e-06, "loss": 0.0042, "step": 58030 }, { "epoch": 1.7270468510555994, "grad_norm": 0.04366311430931091, "learning_rate": 4.552227118393492e-06, "loss": 0.002, "step": 58040 }, { "epoch": 1.727344412539241, "grad_norm": 0.23651005327701569, "learning_rate": 4.550502929150102e-06, "loss": 0.0033, "step": 58050 }, { "epoch": 1.7276419740228826, "grad_norm": 0.1084604412317276, "learning_rate": 4.548778793791642e-06, "loss": 0.011, "step": 58060 }, { "epoch": 1.7279395355065241, "grad_norm": 0.6315945982933044, "learning_rate": 4.547054712524804e-06, "loss": 0.0044, "step": 58070 }, { "epoch": 1.7282370969901657, "grad_norm": 0.2047223299741745, "learning_rate": 4.545330685556263e-06, "loss": 0.0076, "step": 58080 }, { "epoch": 1.7285346584738073, "grad_norm": 0.23586206138134003, "learning_rate": 4.543606713092696e-06, "loss": 0.0045, "step": 58090 }, { "epoch": 1.7288322199574488, "grad_norm": 0.17055033147335052, "learning_rate": 4.541882795340769e-06, "loss": 0.0049, "step": 58100 }, { "epoch": 1.7291297814410904, "grad_norm": 0.12650497257709503, "learning_rate": 4.5401589325071405e-06, "loss": 0.0048, "step": 58110 }, { "epoch": 1.7294273429247318, "grad_norm": 0.2904629409313202, "learning_rate": 4.538435124798466e-06, "loss": 0.0033, "step": 58120 }, { "epoch": 1.7297249044083733, "grad_norm": 0.22906328737735748, "learning_rate": 4.5367113724213916e-06, "loss": 0.0046, "step": 58130 }, { "epoch": 1.730022465892015, "grad_norm": 0.20099471509456635, "learning_rate": 4.534987675582559e-06, "loss": 0.0056, "step": 58140 }, { "epoch": 1.7303200273756565, "grad_norm": 0.20377492904663086, "learning_rate": 4.533264034488598e-06, "loss": 0.0049, "step": 58150 }, { "epoch": 1.730617588859298, "grad_norm": 0.11982836574316025, "learning_rate": 4.531540449346142e-06, "loss": 0.0049, "step": 58160 }, { "epoch": 1.7309151503429396, "grad_norm": 0.1373097449541092, "learning_rate": 4.529816920361809e-06, "loss": 0.0025, "step": 58170 }, { "epoch": 1.7312127118265812, "grad_norm": 0.5545032024383545, "learning_rate": 4.5280934477422105e-06, "loss": 0.0053, "step": 58180 }, { "epoch": 1.7315102733102226, "grad_norm": 0.1815694123506546, "learning_rate": 4.526370031693955e-06, "loss": 0.0048, "step": 58190 }, { "epoch": 1.7318078347938641, "grad_norm": 0.010286667384207249, "learning_rate": 4.524646672423642e-06, "loss": 0.0031, "step": 58200 }, { "epoch": 1.7321053962775057, "grad_norm": 0.1411251276731491, "learning_rate": 4.5229233701378675e-06, "loss": 0.0019, "step": 58210 }, { "epoch": 1.7324029577611473, "grad_norm": 0.13409115374088287, "learning_rate": 4.521200125043214e-06, "loss": 0.0021, "step": 58220 }, { "epoch": 1.7327005192447889, "grad_norm": 0.22966809570789337, "learning_rate": 4.5194769373462656e-06, "loss": 0.004, "step": 58230 }, { "epoch": 1.7329980807284304, "grad_norm": 0.1821107566356659, "learning_rate": 4.51775380725359e-06, "loss": 0.0036, "step": 58240 }, { "epoch": 1.733295642212072, "grad_norm": 0.14796103537082672, "learning_rate": 4.516030734971757e-06, "loss": 0.0031, "step": 58250 }, { "epoch": 1.7335932036957136, "grad_norm": 0.07116673141717911, "learning_rate": 4.5143077207073235e-06, "loss": 0.0027, "step": 58260 }, { "epoch": 1.7338907651793551, "grad_norm": 0.3670378029346466, "learning_rate": 4.512584764666843e-06, "loss": 0.0031, "step": 58270 }, { "epoch": 1.7341883266629967, "grad_norm": 0.054625604301691055, "learning_rate": 4.510861867056858e-06, "loss": 0.0032, "step": 58280 }, { "epoch": 1.7344858881466383, "grad_norm": 0.05057597532868385, "learning_rate": 4.509139028083909e-06, "loss": 0.0037, "step": 58290 }, { "epoch": 1.7347834496302799, "grad_norm": 0.4805360436439514, "learning_rate": 4.507416247954526e-06, "loss": 0.0046, "step": 58300 }, { "epoch": 1.7350810111139214, "grad_norm": 0.0459190309047699, "learning_rate": 4.5056935268752336e-06, "loss": 0.0043, "step": 58310 }, { "epoch": 1.735378572597563, "grad_norm": 0.06920605897903442, "learning_rate": 4.503970865052547e-06, "loss": 0.0029, "step": 58320 }, { "epoch": 1.7356761340812046, "grad_norm": 0.09484326839447021, "learning_rate": 4.502248262692978e-06, "loss": 0.0023, "step": 58330 }, { "epoch": 1.7359736955648462, "grad_norm": 0.09747408330440521, "learning_rate": 4.500525720003026e-06, "loss": 0.0027, "step": 58340 }, { "epoch": 1.7362712570484877, "grad_norm": 0.03419435769319534, "learning_rate": 4.4988032371891905e-06, "loss": 0.0024, "step": 58350 }, { "epoch": 1.7365688185321293, "grad_norm": 0.11089003831148148, "learning_rate": 4.497080814457957e-06, "loss": 0.0034, "step": 58360 }, { "epoch": 1.7368663800157709, "grad_norm": 0.06882565468549728, "learning_rate": 4.495358452015808e-06, "loss": 0.0028, "step": 58370 }, { "epoch": 1.7371639414994124, "grad_norm": 0.0897381603717804, "learning_rate": 4.493636150069216e-06, "loss": 0.0032, "step": 58380 }, { "epoch": 1.737461502983054, "grad_norm": 0.05803757533431053, "learning_rate": 4.4919139088246494e-06, "loss": 0.0027, "step": 58390 }, { "epoch": 1.7377590644666956, "grad_norm": 0.18822385370731354, "learning_rate": 4.4901917284885655e-06, "loss": 0.005, "step": 58400 }, { "epoch": 1.738056625950337, "grad_norm": 0.07976222783327103, "learning_rate": 4.488469609267418e-06, "loss": 0.0034, "step": 58410 }, { "epoch": 1.7383541874339785, "grad_norm": 0.03587505966424942, "learning_rate": 4.4867475513676495e-06, "loss": 0.006, "step": 58420 }, { "epoch": 1.73865174891762, "grad_norm": 0.0996566116809845, "learning_rate": 4.485025554995698e-06, "loss": 0.0035, "step": 58430 }, { "epoch": 1.7389493104012617, "grad_norm": 0.28411662578582764, "learning_rate": 4.483303620357996e-06, "loss": 0.0038, "step": 58440 }, { "epoch": 1.7392468718849032, "grad_norm": 0.14146137237548828, "learning_rate": 4.481581747660963e-06, "loss": 0.0067, "step": 58450 }, { "epoch": 1.7395444333685448, "grad_norm": 0.07894323021173477, "learning_rate": 4.479859937111016e-06, "loss": 0.0028, "step": 58460 }, { "epoch": 1.7398419948521864, "grad_norm": 0.05584269389510155, "learning_rate": 4.478138188914562e-06, "loss": 0.0026, "step": 58470 }, { "epoch": 1.7401395563358277, "grad_norm": 0.44900083541870117, "learning_rate": 4.476416503278002e-06, "loss": 0.0092, "step": 58480 }, { "epoch": 1.7404371178194693, "grad_norm": 0.14746123552322388, "learning_rate": 4.474694880407726e-06, "loss": 0.0048, "step": 58490 }, { "epoch": 1.7407346793031109, "grad_norm": 0.07167379558086395, "learning_rate": 4.472973320510122e-06, "loss": 0.0024, "step": 58500 }, { "epoch": 1.7410322407867524, "grad_norm": 0.04728834331035614, "learning_rate": 4.471251823791568e-06, "loss": 0.0033, "step": 58510 }, { "epoch": 1.741329802270394, "grad_norm": 0.12255142629146576, "learning_rate": 4.46953039045843e-06, "loss": 0.0021, "step": 58520 }, { "epoch": 1.7416273637540356, "grad_norm": 0.37315136194229126, "learning_rate": 4.467809020717075e-06, "loss": 0.0054, "step": 58530 }, { "epoch": 1.7419249252376772, "grad_norm": 0.2982412874698639, "learning_rate": 4.466087714773855e-06, "loss": 0.003, "step": 58540 }, { "epoch": 1.7422224867213187, "grad_norm": 0.01749425195157528, "learning_rate": 4.46436647283512e-06, "loss": 0.0034, "step": 58550 }, { "epoch": 1.7425200482049603, "grad_norm": 0.19698764383792877, "learning_rate": 4.4626452951072055e-06, "loss": 0.003, "step": 58560 }, { "epoch": 1.7428176096886019, "grad_norm": 0.3823617696762085, "learning_rate": 4.460924181796447e-06, "loss": 0.003, "step": 58570 }, { "epoch": 1.7431151711722435, "grad_norm": 0.16568246483802795, "learning_rate": 4.4592031331091654e-06, "loss": 0.0031, "step": 58580 }, { "epoch": 1.743412732655885, "grad_norm": 0.08374922722578049, "learning_rate": 4.457482149251681e-06, "loss": 0.0026, "step": 58590 }, { "epoch": 1.7437102941395266, "grad_norm": 0.2149055302143097, "learning_rate": 4.455761230430299e-06, "loss": 0.0036, "step": 58600 }, { "epoch": 1.7440078556231682, "grad_norm": 0.06057882308959961, "learning_rate": 4.4540403768513214e-06, "loss": 0.0061, "step": 58610 }, { "epoch": 1.7443054171068098, "grad_norm": 0.06362989544868469, "learning_rate": 4.45231958872104e-06, "loss": 0.0028, "step": 58620 }, { "epoch": 1.7446029785904513, "grad_norm": 0.09097333252429962, "learning_rate": 4.450598866245743e-06, "loss": 0.0031, "step": 58630 }, { "epoch": 1.744900540074093, "grad_norm": 0.22219261527061462, "learning_rate": 4.4488782096317035e-06, "loss": 0.0071, "step": 58640 }, { "epoch": 1.7451981015577345, "grad_norm": 0.2583455741405487, "learning_rate": 4.447157619085195e-06, "loss": 0.0026, "step": 58650 }, { "epoch": 1.745495663041376, "grad_norm": 0.13823316991329193, "learning_rate": 4.445437094812476e-06, "loss": 0.0058, "step": 58660 }, { "epoch": 1.7457932245250176, "grad_norm": 0.02240706980228424, "learning_rate": 4.443716637019801e-06, "loss": 0.0026, "step": 58670 }, { "epoch": 1.7460907860086592, "grad_norm": 0.08647230267524719, "learning_rate": 4.441996245913414e-06, "loss": 0.0028, "step": 58680 }, { "epoch": 1.7463883474923005, "grad_norm": 0.13565903902053833, "learning_rate": 4.440275921699556e-06, "loss": 0.0038, "step": 58690 }, { "epoch": 1.7466859089759421, "grad_norm": 0.3845917880535126, "learning_rate": 4.438555664584452e-06, "loss": 0.0048, "step": 58700 }, { "epoch": 1.7469834704595837, "grad_norm": 0.10550200939178467, "learning_rate": 4.436835474774328e-06, "loss": 0.0025, "step": 58710 }, { "epoch": 1.7472810319432253, "grad_norm": 0.135442852973938, "learning_rate": 4.435115352475393e-06, "loss": 0.0054, "step": 58720 }, { "epoch": 1.7475785934268668, "grad_norm": 0.14762316644191742, "learning_rate": 4.433395297893855e-06, "loss": 0.0046, "step": 58730 }, { "epoch": 1.7478761549105084, "grad_norm": 0.26945924758911133, "learning_rate": 4.431675311235913e-06, "loss": 0.0037, "step": 58740 }, { "epoch": 1.74817371639415, "grad_norm": 0.10835565626621246, "learning_rate": 4.429955392707752e-06, "loss": 0.0025, "step": 58750 }, { "epoch": 1.7484712778777916, "grad_norm": 0.301167756319046, "learning_rate": 4.428235542515556e-06, "loss": 0.0048, "step": 58760 }, { "epoch": 1.748768839361433, "grad_norm": 0.3427819609642029, "learning_rate": 4.426515760865494e-06, "loss": 0.004, "step": 58770 }, { "epoch": 1.7490664008450745, "grad_norm": 0.14980295300483704, "learning_rate": 4.424796047963735e-06, "loss": 0.0032, "step": 58780 }, { "epoch": 1.749363962328716, "grad_norm": 0.13379232585430145, "learning_rate": 4.423076404016431e-06, "loss": 0.0032, "step": 58790 }, { "epoch": 1.7496615238123576, "grad_norm": 0.13427114486694336, "learning_rate": 4.421356829229734e-06, "loss": 0.0047, "step": 58800 }, { "epoch": 1.7499590852959992, "grad_norm": 0.16577625274658203, "learning_rate": 4.4196373238097796e-06, "loss": 0.0043, "step": 58810 }, { "epoch": 1.7502566467796408, "grad_norm": 0.15873858332633972, "learning_rate": 4.417917887962702e-06, "loss": 0.0037, "step": 58820 }, { "epoch": 1.7505542082632823, "grad_norm": 0.17492254078388214, "learning_rate": 4.416198521894623e-06, "loss": 0.0036, "step": 58830 }, { "epoch": 1.750851769746924, "grad_norm": 0.13544809818267822, "learning_rate": 4.4144792258116585e-06, "loss": 0.0026, "step": 58840 }, { "epoch": 1.7511493312305655, "grad_norm": 0.19219934940338135, "learning_rate": 4.412759999919912e-06, "loss": 0.0036, "step": 58850 }, { "epoch": 1.751446892714207, "grad_norm": 0.18866491317749023, "learning_rate": 4.411040844425485e-06, "loss": 0.0046, "step": 58860 }, { "epoch": 1.7517444541978486, "grad_norm": 0.20845335721969604, "learning_rate": 4.4093217595344635e-06, "loss": 0.0025, "step": 58870 }, { "epoch": 1.7520420156814902, "grad_norm": 0.3403685688972473, "learning_rate": 4.407602745452931e-06, "loss": 0.0198, "step": 58880 }, { "epoch": 1.7523395771651318, "grad_norm": 0.28197842836380005, "learning_rate": 4.4058838023869585e-06, "loss": 0.0109, "step": 58890 }, { "epoch": 1.7526371386487734, "grad_norm": 0.07741391658782959, "learning_rate": 4.40416493054261e-06, "loss": 0.0054, "step": 58900 }, { "epoch": 1.752934700132415, "grad_norm": 0.11159694939851761, "learning_rate": 4.402446130125942e-06, "loss": 0.0054, "step": 58910 }, { "epoch": 1.7532322616160565, "grad_norm": 0.3209034502506256, "learning_rate": 4.400727401342999e-06, "loss": 0.0043, "step": 58920 }, { "epoch": 1.753529823099698, "grad_norm": 0.12235207110643387, "learning_rate": 4.399008744399822e-06, "loss": 0.0027, "step": 58930 }, { "epoch": 1.7538273845833396, "grad_norm": 0.1661280244588852, "learning_rate": 4.3972901595024386e-06, "loss": 0.004, "step": 58940 }, { "epoch": 1.7541249460669812, "grad_norm": 0.23687505722045898, "learning_rate": 4.395571646856871e-06, "loss": 0.0043, "step": 58950 }, { "epoch": 1.7544225075506228, "grad_norm": 0.16833434998989105, "learning_rate": 4.393853206669131e-06, "loss": 0.0051, "step": 58960 }, { "epoch": 1.7547200690342644, "grad_norm": 0.09586189687252045, "learning_rate": 4.392134839145223e-06, "loss": 0.0043, "step": 58970 }, { "epoch": 1.7550176305179057, "grad_norm": 0.26606813073158264, "learning_rate": 4.3904165444911396e-06, "loss": 0.0057, "step": 58980 }, { "epoch": 1.7553151920015473, "grad_norm": 0.0410802848637104, "learning_rate": 4.388698322912871e-06, "loss": 0.0032, "step": 58990 }, { "epoch": 1.7556127534851889, "grad_norm": 0.24482442438602448, "learning_rate": 4.386980174616388e-06, "loss": 0.0044, "step": 59000 }, { "epoch": 1.7559103149688304, "grad_norm": 0.08542297780513763, "learning_rate": 4.385262099807667e-06, "loss": 0.0026, "step": 59010 }, { "epoch": 1.756207876452472, "grad_norm": 0.0522574856877327, "learning_rate": 4.383544098692664e-06, "loss": 0.0034, "step": 59020 }, { "epoch": 1.7565054379361136, "grad_norm": 0.10967102646827698, "learning_rate": 4.38182617147733e-06, "loss": 0.0039, "step": 59030 }, { "epoch": 1.7568029994197552, "grad_norm": 0.0984114408493042, "learning_rate": 4.380108318367609e-06, "loss": 0.003, "step": 59040 }, { "epoch": 1.7571005609033965, "grad_norm": 0.13824550807476044, "learning_rate": 4.3783905395694306e-06, "loss": 0.003, "step": 59050 }, { "epoch": 1.757398122387038, "grad_norm": 0.0925411581993103, "learning_rate": 4.376672835288724e-06, "loss": 0.0054, "step": 59060 }, { "epoch": 1.7576956838706796, "grad_norm": 0.12535983324050903, "learning_rate": 4.374955205731399e-06, "loss": 0.0044, "step": 59070 }, { "epoch": 1.7579932453543212, "grad_norm": 0.16545717418193817, "learning_rate": 4.373237651103367e-06, "loss": 0.0034, "step": 59080 }, { "epoch": 1.7582908068379628, "grad_norm": 0.04162881150841713, "learning_rate": 4.371520171610523e-06, "loss": 0.0026, "step": 59090 }, { "epoch": 1.7585883683216044, "grad_norm": 0.1848730742931366, "learning_rate": 4.369802767458757e-06, "loss": 0.0023, "step": 59100 }, { "epoch": 1.758885929805246, "grad_norm": 0.13021881878376007, "learning_rate": 4.368085438853946e-06, "loss": 0.0043, "step": 59110 }, { "epoch": 1.7591834912888875, "grad_norm": 0.17169417440891266, "learning_rate": 4.366368186001963e-06, "loss": 0.0045, "step": 59120 }, { "epoch": 1.759481052772529, "grad_norm": 0.11284234374761581, "learning_rate": 4.364651009108667e-06, "loss": 0.0046, "step": 59130 }, { "epoch": 1.7597786142561707, "grad_norm": 0.142589271068573, "learning_rate": 4.362933908379913e-06, "loss": 0.0035, "step": 59140 }, { "epoch": 1.7600761757398122, "grad_norm": 0.1317693293094635, "learning_rate": 4.361216884021541e-06, "loss": 0.0014, "step": 59150 }, { "epoch": 1.7603737372234538, "grad_norm": 0.16505900025367737, "learning_rate": 4.359499936239388e-06, "loss": 0.0032, "step": 59160 }, { "epoch": 1.7606712987070954, "grad_norm": 0.18854503333568573, "learning_rate": 4.357783065239275e-06, "loss": 0.0062, "step": 59170 }, { "epoch": 1.760968860190737, "grad_norm": 0.28398409485816956, "learning_rate": 4.356066271227021e-06, "loss": 0.0052, "step": 59180 }, { "epoch": 1.7612664216743785, "grad_norm": 0.4124407172203064, "learning_rate": 4.35434955440843e-06, "loss": 0.0043, "step": 59190 }, { "epoch": 1.76156398315802, "grad_norm": 0.1303769052028656, "learning_rate": 4.352632914989301e-06, "loss": 0.0046, "step": 59200 }, { "epoch": 1.7618615446416617, "grad_norm": 0.20405800640583038, "learning_rate": 4.35091635317542e-06, "loss": 0.0051, "step": 59210 }, { "epoch": 1.7621591061253032, "grad_norm": 0.08928980678319931, "learning_rate": 4.349199869172567e-06, "loss": 0.0045, "step": 59220 }, { "epoch": 1.7624566676089448, "grad_norm": 0.018659578636288643, "learning_rate": 4.347483463186509e-06, "loss": 0.0023, "step": 59230 }, { "epoch": 1.7627542290925864, "grad_norm": 0.12115078419446945, "learning_rate": 4.345767135423009e-06, "loss": 0.0019, "step": 59240 }, { "epoch": 1.763051790576228, "grad_norm": 0.12792378664016724, "learning_rate": 4.344050886087815e-06, "loss": 0.0033, "step": 59250 }, { "epoch": 1.7633493520598695, "grad_norm": 0.05102691054344177, "learning_rate": 4.342334715386668e-06, "loss": 0.0063, "step": 59260 }, { "epoch": 1.7636469135435109, "grad_norm": 0.09313671290874481, "learning_rate": 4.340618623525301e-06, "loss": 0.0027, "step": 59270 }, { "epoch": 1.7639444750271525, "grad_norm": 0.21808746457099915, "learning_rate": 4.338902610709432e-06, "loss": 0.002, "step": 59280 }, { "epoch": 1.764242036510794, "grad_norm": 0.14522825181484222, "learning_rate": 4.33718667714478e-06, "loss": 0.0029, "step": 59290 }, { "epoch": 1.7645395979944356, "grad_norm": 0.2647239565849304, "learning_rate": 4.335470823037045e-06, "loss": 0.0043, "step": 59300 }, { "epoch": 1.7648371594780772, "grad_norm": 0.013003646396100521, "learning_rate": 4.33375504859192e-06, "loss": 0.0021, "step": 59310 }, { "epoch": 1.7651347209617188, "grad_norm": 0.16209827363491058, "learning_rate": 4.33203935401509e-06, "loss": 0.0034, "step": 59320 }, { "epoch": 1.7654322824453603, "grad_norm": 0.28844738006591797, "learning_rate": 4.33032373951223e-06, "loss": 0.0034, "step": 59330 }, { "epoch": 1.7657298439290017, "grad_norm": 0.4774056673049927, "learning_rate": 4.328608205289005e-06, "loss": 0.0041, "step": 59340 }, { "epoch": 1.7660274054126432, "grad_norm": 0.27941563725471497, "learning_rate": 4.326892751551067e-06, "loss": 0.0064, "step": 59350 }, { "epoch": 1.7663249668962848, "grad_norm": 0.2823008894920349, "learning_rate": 4.3251773785040655e-06, "loss": 0.0035, "step": 59360 }, { "epoch": 1.7666225283799264, "grad_norm": 0.21375931799411774, "learning_rate": 4.323462086353635e-06, "loss": 0.0034, "step": 59370 }, { "epoch": 1.766920089863568, "grad_norm": 0.07028742879629135, "learning_rate": 4.3217468753054015e-06, "loss": 0.0049, "step": 59380 }, { "epoch": 1.7672176513472095, "grad_norm": 0.2566876709461212, "learning_rate": 4.320031745564982e-06, "loss": 0.0023, "step": 59390 }, { "epoch": 1.7675152128308511, "grad_norm": 0.2779959738254547, "learning_rate": 4.318316697337984e-06, "loss": 0.0046, "step": 59400 }, { "epoch": 1.7678127743144927, "grad_norm": 0.08830417692661285, "learning_rate": 4.316601730830002e-06, "loss": 0.0029, "step": 59410 }, { "epoch": 1.7681103357981343, "grad_norm": 0.22162900865077972, "learning_rate": 4.314886846246626e-06, "loss": 0.0028, "step": 59420 }, { "epoch": 1.7684078972817758, "grad_norm": 0.16010640561580658, "learning_rate": 4.313172043793432e-06, "loss": 0.0032, "step": 59430 }, { "epoch": 1.7687054587654174, "grad_norm": 0.0772886872291565, "learning_rate": 4.3114573236759895e-06, "loss": 0.003, "step": 59440 }, { "epoch": 1.769003020249059, "grad_norm": 0.1840716004371643, "learning_rate": 4.309742686099853e-06, "loss": 0.0037, "step": 59450 }, { "epoch": 1.7693005817327006, "grad_norm": 0.16739241778850555, "learning_rate": 4.308028131270572e-06, "loss": 0.0044, "step": 59460 }, { "epoch": 1.7695981432163421, "grad_norm": 0.1711047738790512, "learning_rate": 4.306313659393684e-06, "loss": 0.0022, "step": 59470 }, { "epoch": 1.7698957046999837, "grad_norm": 0.09875835478305817, "learning_rate": 4.304599270674717e-06, "loss": 0.0043, "step": 59480 }, { "epoch": 1.7701932661836253, "grad_norm": 0.04998476430773735, "learning_rate": 4.30288496531919e-06, "loss": 0.0043, "step": 59490 }, { "epoch": 1.7704908276672668, "grad_norm": 0.6292191743850708, "learning_rate": 4.30117074353261e-06, "loss": 0.0074, "step": 59500 }, { "epoch": 1.7707883891509084, "grad_norm": 0.24083620309829712, "learning_rate": 4.299456605520475e-06, "loss": 0.0044, "step": 59510 }, { "epoch": 1.77108595063455, "grad_norm": 0.10606259107589722, "learning_rate": 4.297742551488272e-06, "loss": 0.0051, "step": 59520 }, { "epoch": 1.7713835121181916, "grad_norm": 0.07944841682910919, "learning_rate": 4.29602858164148e-06, "loss": 0.0024, "step": 59530 }, { "epoch": 1.7716810736018331, "grad_norm": 0.23235106468200684, "learning_rate": 4.2943146961855685e-06, "loss": 0.004, "step": 59540 }, { "epoch": 1.7719786350854745, "grad_norm": 0.1889398843050003, "learning_rate": 4.292600895325991e-06, "loss": 0.0055, "step": 59550 }, { "epoch": 1.772276196569116, "grad_norm": 0.39480751752853394, "learning_rate": 4.290887179268199e-06, "loss": 0.0029, "step": 59560 }, { "epoch": 1.7725737580527576, "grad_norm": 0.2250487208366394, "learning_rate": 4.2891735482176245e-06, "loss": 0.0044, "step": 59570 }, { "epoch": 1.7728713195363992, "grad_norm": 0.18607962131500244, "learning_rate": 4.2874600023797e-06, "loss": 0.004, "step": 59580 }, { "epoch": 1.7731688810200408, "grad_norm": 0.1133129671216011, "learning_rate": 4.285746541959841e-06, "loss": 0.0027, "step": 59590 }, { "epoch": 1.7734664425036823, "grad_norm": 0.15398560464382172, "learning_rate": 4.284033167163454e-06, "loss": 0.0053, "step": 59600 }, { "epoch": 1.773764003987324, "grad_norm": 0.14459380507469177, "learning_rate": 4.282319878195935e-06, "loss": 0.0021, "step": 59610 }, { "epoch": 1.7740615654709653, "grad_norm": 0.023698637261986732, "learning_rate": 4.28060667526267e-06, "loss": 0.002, "step": 59620 }, { "epoch": 1.7743591269546068, "grad_norm": 0.26446524262428284, "learning_rate": 4.278893558569035e-06, "loss": 0.0027, "step": 59630 }, { "epoch": 1.7746566884382484, "grad_norm": 0.13003037869930267, "learning_rate": 4.277180528320395e-06, "loss": 0.0042, "step": 59640 }, { "epoch": 1.77495424992189, "grad_norm": 0.1603408306837082, "learning_rate": 4.2754675847221076e-06, "loss": 0.0032, "step": 59650 }, { "epoch": 1.7752518114055316, "grad_norm": 0.23321335017681122, "learning_rate": 4.273754727979514e-06, "loss": 0.0059, "step": 59660 }, { "epoch": 1.7755493728891731, "grad_norm": 0.1283569186925888, "learning_rate": 4.27204195829795e-06, "loss": 0.0032, "step": 59670 }, { "epoch": 1.7758469343728147, "grad_norm": 0.15385371446609497, "learning_rate": 4.2703292758827405e-06, "loss": 0.0033, "step": 59680 }, { "epoch": 1.7761444958564563, "grad_norm": 0.2976973354816437, "learning_rate": 4.268616680939197e-06, "loss": 0.0053, "step": 59690 }, { "epoch": 1.7764420573400979, "grad_norm": 0.14218388497829437, "learning_rate": 4.266904173672624e-06, "loss": 0.0033, "step": 59700 }, { "epoch": 1.7767396188237394, "grad_norm": 0.13458047807216644, "learning_rate": 4.265191754288313e-06, "loss": 0.0043, "step": 59710 }, { "epoch": 1.777037180307381, "grad_norm": 0.13722111284732819, "learning_rate": 4.263479422991547e-06, "loss": 0.0028, "step": 59720 }, { "epoch": 1.7773347417910226, "grad_norm": 0.1294391006231308, "learning_rate": 4.261767179987595e-06, "loss": 0.0026, "step": 59730 }, { "epoch": 1.7776323032746641, "grad_norm": 0.26115676760673523, "learning_rate": 4.260055025481719e-06, "loss": 0.0031, "step": 59740 }, { "epoch": 1.7779298647583057, "grad_norm": 0.2893914580345154, "learning_rate": 4.2583429596791705e-06, "loss": 0.0032, "step": 59750 }, { "epoch": 1.7782274262419473, "grad_norm": 0.14236336946487427, "learning_rate": 4.2566309827851885e-06, "loss": 0.0046, "step": 59760 }, { "epoch": 1.7785249877255889, "grad_norm": 0.27302607893943787, "learning_rate": 4.254919095004999e-06, "loss": 0.0038, "step": 59770 }, { "epoch": 1.7788225492092304, "grad_norm": 0.09066522121429443, "learning_rate": 4.253207296543825e-06, "loss": 0.0036, "step": 59780 }, { "epoch": 1.779120110692872, "grad_norm": 0.3741607069969177, "learning_rate": 4.2514955876068695e-06, "loss": 0.0042, "step": 59790 }, { "epoch": 1.7794176721765136, "grad_norm": 0.2243090122938156, "learning_rate": 4.249783968399332e-06, "loss": 0.006, "step": 59800 }, { "epoch": 1.7797152336601552, "grad_norm": 0.2669447958469391, "learning_rate": 4.2480724391263965e-06, "loss": 0.0048, "step": 59810 }, { "epoch": 1.7800127951437967, "grad_norm": 0.2150588035583496, "learning_rate": 4.2463609999932395e-06, "loss": 0.0044, "step": 59820 }, { "epoch": 1.7803103566274383, "grad_norm": 0.3085065186023712, "learning_rate": 4.244649651205024e-06, "loss": 0.0035, "step": 59830 }, { "epoch": 1.7806079181110797, "grad_norm": 0.2496219277381897, "learning_rate": 4.242938392966907e-06, "loss": 0.0044, "step": 59840 }, { "epoch": 1.7809054795947212, "grad_norm": 0.21284788846969604, "learning_rate": 4.241227225484024e-06, "loss": 0.003, "step": 59850 }, { "epoch": 1.7812030410783628, "grad_norm": 0.23427557945251465, "learning_rate": 4.239516148961516e-06, "loss": 0.0042, "step": 59860 }, { "epoch": 1.7815006025620044, "grad_norm": 0.1832893341779709, "learning_rate": 4.237805163604498e-06, "loss": 0.0032, "step": 59870 }, { "epoch": 1.781798164045646, "grad_norm": 0.21066181361675262, "learning_rate": 4.236094269618081e-06, "loss": 0.0057, "step": 59880 }, { "epoch": 1.7820957255292875, "grad_norm": 0.13067002594470978, "learning_rate": 4.2343834672073654e-06, "loss": 0.0025, "step": 59890 }, { "epoch": 1.782393287012929, "grad_norm": 0.17211145162582397, "learning_rate": 4.232672756577438e-06, "loss": 0.0039, "step": 59900 }, { "epoch": 1.7826908484965704, "grad_norm": 0.07441029697656631, "learning_rate": 4.230962137933376e-06, "loss": 0.003, "step": 59910 }, { "epoch": 1.782988409980212, "grad_norm": 0.21162831783294678, "learning_rate": 4.229251611480245e-06, "loss": 0.0049, "step": 59920 }, { "epoch": 1.7832859714638536, "grad_norm": 0.10592871904373169, "learning_rate": 4.227541177423101e-06, "loss": 0.0019, "step": 59930 }, { "epoch": 1.7835835329474952, "grad_norm": 0.05399838462471962, "learning_rate": 4.225830835966987e-06, "loss": 0.0028, "step": 59940 }, { "epoch": 1.7838810944311367, "grad_norm": 0.08065246045589447, "learning_rate": 4.224120587316937e-06, "loss": 0.0026, "step": 59950 }, { "epoch": 1.7841786559147783, "grad_norm": 0.4175005555152893, "learning_rate": 4.222410431677971e-06, "loss": 0.0042, "step": 59960 }, { "epoch": 1.7844762173984199, "grad_norm": 0.04798411205410957, "learning_rate": 4.220700369255101e-06, "loss": 0.0034, "step": 59970 }, { "epoch": 1.7847737788820615, "grad_norm": 0.1527806669473648, "learning_rate": 4.218990400253324e-06, "loss": 0.0035, "step": 59980 }, { "epoch": 1.785071340365703, "grad_norm": 0.2023981809616089, "learning_rate": 4.2172805248776315e-06, "loss": 0.0025, "step": 59990 }, { "epoch": 1.7853689018493446, "grad_norm": 0.03155557066202164, "learning_rate": 4.215570743332998e-06, "loss": 0.001, "step": 60000 }, { "epoch": 1.7856664633329862, "grad_norm": 0.05871666967868805, "learning_rate": 4.2138610558243895e-06, "loss": 0.003, "step": 60010 }, { "epoch": 1.7859640248166277, "grad_norm": 0.14041371643543243, "learning_rate": 4.212151462556762e-06, "loss": 0.0024, "step": 60020 }, { "epoch": 1.7862615863002693, "grad_norm": 0.19887074828147888, "learning_rate": 4.2104419637350554e-06, "loss": 0.0032, "step": 60030 }, { "epoch": 1.786559147783911, "grad_norm": 0.1913859248161316, "learning_rate": 4.2087325595642045e-06, "loss": 0.0028, "step": 60040 }, { "epoch": 1.7868567092675525, "grad_norm": 0.05635171756148338, "learning_rate": 4.2070232502491285e-06, "loss": 0.0046, "step": 60050 }, { "epoch": 1.787154270751194, "grad_norm": 0.14692553877830505, "learning_rate": 4.205314035994736e-06, "loss": 0.0051, "step": 60060 }, { "epoch": 1.7874518322348356, "grad_norm": 0.10237886756658554, "learning_rate": 4.203604917005926e-06, "loss": 0.0043, "step": 60070 }, { "epoch": 1.7877493937184772, "grad_norm": 0.11751807481050491, "learning_rate": 4.201895893487583e-06, "loss": 0.0039, "step": 60080 }, { "epoch": 1.7880469552021188, "grad_norm": 0.04392252117395401, "learning_rate": 4.200186965644583e-06, "loss": 0.0029, "step": 60090 }, { "epoch": 1.7883445166857603, "grad_norm": 0.18987269699573517, "learning_rate": 4.19847813368179e-06, "loss": 0.0043, "step": 60100 }, { "epoch": 1.788642078169402, "grad_norm": 0.15680524706840515, "learning_rate": 4.196769397804054e-06, "loss": 0.0029, "step": 60110 }, { "epoch": 1.7889396396530433, "grad_norm": 0.6128782629966736, "learning_rate": 4.195060758216216e-06, "loss": 0.006, "step": 60120 }, { "epoch": 1.7892372011366848, "grad_norm": 0.3493412733078003, "learning_rate": 4.193352215123104e-06, "loss": 0.0032, "step": 60130 }, { "epoch": 1.7895347626203264, "grad_norm": 0.3182022273540497, "learning_rate": 4.191643768729537e-06, "loss": 0.0035, "step": 60140 }, { "epoch": 1.789832324103968, "grad_norm": 0.178323894739151, "learning_rate": 4.18993541924032e-06, "loss": 0.0033, "step": 60150 }, { "epoch": 1.7901298855876095, "grad_norm": 0.1388312578201294, "learning_rate": 4.188227166860247e-06, "loss": 0.0039, "step": 60160 }, { "epoch": 1.7904274470712511, "grad_norm": 0.15190380811691284, "learning_rate": 4.1865190117941e-06, "loss": 0.0025, "step": 60170 }, { "epoch": 1.7907250085548927, "grad_norm": 0.1189471036195755, "learning_rate": 4.184810954246649e-06, "loss": 0.0094, "step": 60180 }, { "epoch": 1.7910225700385343, "grad_norm": 0.15668362379074097, "learning_rate": 4.183102994422655e-06, "loss": 0.0025, "step": 60190 }, { "epoch": 1.7913201315221756, "grad_norm": 0.194076806306839, "learning_rate": 4.181395132526863e-06, "loss": 0.0037, "step": 60200 }, { "epoch": 1.7916176930058172, "grad_norm": 0.12817227840423584, "learning_rate": 4.179687368764011e-06, "loss": 0.0022, "step": 60210 }, { "epoch": 1.7919152544894588, "grad_norm": 0.03305977210402489, "learning_rate": 4.177979703338819e-06, "loss": 0.0028, "step": 60220 }, { "epoch": 1.7922128159731003, "grad_norm": 0.19751790165901184, "learning_rate": 4.1762721364560034e-06, "loss": 0.0035, "step": 60230 }, { "epoch": 1.792510377456742, "grad_norm": 0.10934039950370789, "learning_rate": 4.174564668320261e-06, "loss": 0.0027, "step": 60240 }, { "epoch": 1.7928079389403835, "grad_norm": 0.36694401502609253, "learning_rate": 4.172857299136282e-06, "loss": 0.0027, "step": 60250 }, { "epoch": 1.793105500424025, "grad_norm": 0.03900937736034393, "learning_rate": 4.171150029108741e-06, "loss": 0.0017, "step": 60260 }, { "epoch": 1.7934030619076666, "grad_norm": 0.07991581410169601, "learning_rate": 4.169442858442307e-06, "loss": 0.0021, "step": 60270 }, { "epoch": 1.7937006233913082, "grad_norm": 0.15295745432376862, "learning_rate": 4.1677357873416264e-06, "loss": 0.0039, "step": 60280 }, { "epoch": 1.7939981848749498, "grad_norm": 0.10401701927185059, "learning_rate": 4.166028816011345e-06, "loss": 0.0039, "step": 60290 }, { "epoch": 1.7942957463585913, "grad_norm": 0.13686133921146393, "learning_rate": 4.164321944656088e-06, "loss": 0.0034, "step": 60300 }, { "epoch": 1.794593307842233, "grad_norm": 0.03339244797825813, "learning_rate": 4.162615173480476e-06, "loss": 0.0036, "step": 60310 }, { "epoch": 1.7948908693258745, "grad_norm": 0.2542177438735962, "learning_rate": 4.1609085026891096e-06, "loss": 0.0038, "step": 60320 }, { "epoch": 1.795188430809516, "grad_norm": 0.3804916441440582, "learning_rate": 4.159201932486585e-06, "loss": 0.0032, "step": 60330 }, { "epoch": 1.7954859922931576, "grad_norm": 0.1834777444601059, "learning_rate": 4.15749546307748e-06, "loss": 0.0048, "step": 60340 }, { "epoch": 1.7957835537767992, "grad_norm": 0.07453861832618713, "learning_rate": 4.155789094666366e-06, "loss": 0.0072, "step": 60350 }, { "epoch": 1.7960811152604408, "grad_norm": 0.4322255253791809, "learning_rate": 4.154082827457797e-06, "loss": 0.0034, "step": 60360 }, { "epoch": 1.7963786767440824, "grad_norm": 0.2687593698501587, "learning_rate": 4.1523766616563194e-06, "loss": 0.0029, "step": 60370 }, { "epoch": 1.796676238227724, "grad_norm": 0.3320940434932709, "learning_rate": 4.150670597466465e-06, "loss": 0.0029, "step": 60380 }, { "epoch": 1.7969737997113655, "grad_norm": 0.21085669100284576, "learning_rate": 4.148964635092752e-06, "loss": 0.0027, "step": 60390 }, { "epoch": 1.797271361195007, "grad_norm": 0.37531977891921997, "learning_rate": 4.147258774739691e-06, "loss": 0.0046, "step": 60400 }, { "epoch": 1.7975689226786484, "grad_norm": 0.3127638101577759, "learning_rate": 4.145553016611775e-06, "loss": 0.0049, "step": 60410 }, { "epoch": 1.79786648416229, "grad_norm": 0.22742924094200134, "learning_rate": 4.143847360913488e-06, "loss": 0.0044, "step": 60420 }, { "epoch": 1.7981640456459316, "grad_norm": 0.1845839023590088, "learning_rate": 4.142141807849302e-06, "loss": 0.0039, "step": 60430 }, { "epoch": 1.7984616071295731, "grad_norm": 0.10098514705896378, "learning_rate": 4.140436357623676e-06, "loss": 0.0047, "step": 60440 }, { "epoch": 1.7987591686132147, "grad_norm": 0.1932334154844284, "learning_rate": 4.138731010441056e-06, "loss": 0.0034, "step": 60450 }, { "epoch": 1.7990567300968563, "grad_norm": 0.3165946304798126, "learning_rate": 4.137025766505876e-06, "loss": 0.0034, "step": 60460 }, { "epoch": 1.7993542915804979, "grad_norm": 0.07083055377006531, "learning_rate": 4.1353206260225574e-06, "loss": 0.0134, "step": 60470 }, { "epoch": 1.7996518530641392, "grad_norm": 0.10104898363351822, "learning_rate": 4.133615589195509e-06, "loss": 0.0043, "step": 60480 }, { "epoch": 1.7999494145477808, "grad_norm": 0.19890828430652618, "learning_rate": 4.13191065622913e-06, "loss": 0.002, "step": 60490 }, { "epoch": 1.8002469760314224, "grad_norm": 0.1514749377965927, "learning_rate": 4.130205827327801e-06, "loss": 0.0036, "step": 60500 }, { "epoch": 1.800544537515064, "grad_norm": 0.07669971883296967, "learning_rate": 4.128501102695898e-06, "loss": 0.003, "step": 60510 }, { "epoch": 1.8008420989987055, "grad_norm": 0.2120971977710724, "learning_rate": 4.126796482537777e-06, "loss": 0.0027, "step": 60520 }, { "epoch": 1.801139660482347, "grad_norm": 0.046672042459249496, "learning_rate": 4.125091967057788e-06, "loss": 0.0043, "step": 60530 }, { "epoch": 1.8014372219659887, "grad_norm": 0.3047417104244232, "learning_rate": 4.123387556460262e-06, "loss": 0.0031, "step": 60540 }, { "epoch": 1.8017347834496302, "grad_norm": 0.16671910881996155, "learning_rate": 4.121683250949525e-06, "loss": 0.003, "step": 60550 }, { "epoch": 1.8020323449332718, "grad_norm": 0.13499632477760315, "learning_rate": 4.119979050729881e-06, "loss": 0.0019, "step": 60560 }, { "epoch": 1.8023299064169134, "grad_norm": 0.1723843365907669, "learning_rate": 4.118274956005632e-06, "loss": 0.0039, "step": 60570 }, { "epoch": 1.802627467900555, "grad_norm": 0.252005398273468, "learning_rate": 4.116570966981059e-06, "loss": 0.0045, "step": 60580 }, { "epoch": 1.8029250293841965, "grad_norm": 0.10572943091392517, "learning_rate": 4.1148670838604325e-06, "loss": 0.0049, "step": 60590 }, { "epoch": 1.803222590867838, "grad_norm": 0.20988447964191437, "learning_rate": 4.113163306848012e-06, "loss": 0.0025, "step": 60600 }, { "epoch": 1.8035201523514797, "grad_norm": 0.20656105875968933, "learning_rate": 4.1114596361480445e-06, "loss": 0.0041, "step": 60610 }, { "epoch": 1.8038177138351212, "grad_norm": 0.12895932793617249, "learning_rate": 4.109756071964762e-06, "loss": 0.0032, "step": 60620 }, { "epoch": 1.8041152753187628, "grad_norm": 0.23713721334934235, "learning_rate": 4.1080526145023845e-06, "loss": 0.0051, "step": 60630 }, { "epoch": 1.8044128368024044, "grad_norm": 0.28590917587280273, "learning_rate": 4.10634926396512e-06, "loss": 0.0034, "step": 60640 }, { "epoch": 1.804710398286046, "grad_norm": 0.12196196615695953, "learning_rate": 4.104646020557165e-06, "loss": 0.003, "step": 60650 }, { "epoch": 1.8050079597696875, "grad_norm": 0.09562021493911743, "learning_rate": 4.102942884482697e-06, "loss": 0.0054, "step": 60660 }, { "epoch": 1.805305521253329, "grad_norm": 0.12450490146875381, "learning_rate": 4.101239855945889e-06, "loss": 0.0036, "step": 60670 }, { "epoch": 1.8056030827369707, "grad_norm": 0.22367964684963226, "learning_rate": 4.099536935150896e-06, "loss": 0.0034, "step": 60680 }, { "epoch": 1.8059006442206123, "grad_norm": 0.2795344591140747, "learning_rate": 4.097834122301861e-06, "loss": 0.0038, "step": 60690 }, { "epoch": 1.8061982057042536, "grad_norm": 0.1152261346578598, "learning_rate": 4.0961314176029125e-06, "loss": 0.0047, "step": 60700 }, { "epoch": 1.8064957671878952, "grad_norm": 0.4091964662075043, "learning_rate": 4.094428821258169e-06, "loss": 0.0031, "step": 60710 }, { "epoch": 1.8067933286715367, "grad_norm": 0.19460248947143555, "learning_rate": 4.092726333471738e-06, "loss": 0.002, "step": 60720 }, { "epoch": 1.8070908901551783, "grad_norm": 0.17656734585762024, "learning_rate": 4.0910239544477075e-06, "loss": 0.0028, "step": 60730 }, { "epoch": 1.80738845163882, "grad_norm": 0.045310746878385544, "learning_rate": 4.089321684390156e-06, "loss": 0.0076, "step": 60740 }, { "epoch": 1.8076860131224615, "grad_norm": 0.42076945304870605, "learning_rate": 4.087619523503149e-06, "loss": 0.0026, "step": 60750 }, { "epoch": 1.807983574606103, "grad_norm": 0.29896360635757446, "learning_rate": 4.085917471990739e-06, "loss": 0.0022, "step": 60760 }, { "epoch": 1.8082811360897444, "grad_norm": 0.2457967847585678, "learning_rate": 4.084215530056965e-06, "loss": 0.0032, "step": 60770 }, { "epoch": 1.808578697573386, "grad_norm": 0.14462575316429138, "learning_rate": 4.082513697905853e-06, "loss": 0.0036, "step": 60780 }, { "epoch": 1.8088762590570275, "grad_norm": 0.13513493537902832, "learning_rate": 4.080811975741415e-06, "loss": 0.0026, "step": 60790 }, { "epoch": 1.809173820540669, "grad_norm": 0.09777632355690002, "learning_rate": 4.079110363767649e-06, "loss": 0.0028, "step": 60800 }, { "epoch": 1.8094713820243107, "grad_norm": 0.14065834879875183, "learning_rate": 4.077408862188546e-06, "loss": 0.0016, "step": 60810 }, { "epoch": 1.8097689435079523, "grad_norm": 0.06553452461957932, "learning_rate": 4.075707471208073e-06, "loss": 0.0036, "step": 60820 }, { "epoch": 1.8100665049915938, "grad_norm": 0.11089284718036652, "learning_rate": 4.0740061910301954e-06, "loss": 0.0028, "step": 60830 }, { "epoch": 1.8103640664752354, "grad_norm": 0.20251736044883728, "learning_rate": 4.072305021858855e-06, "loss": 0.0028, "step": 60840 }, { "epoch": 1.810661627958877, "grad_norm": 0.12769071757793427, "learning_rate": 4.07060396389799e-06, "loss": 0.0021, "step": 60850 }, { "epoch": 1.8109591894425185, "grad_norm": 0.04569568485021591, "learning_rate": 4.068903017351516e-06, "loss": 0.0022, "step": 60860 }, { "epoch": 1.8112567509261601, "grad_norm": 0.26835379004478455, "learning_rate": 4.067202182423342e-06, "loss": 0.0024, "step": 60870 }, { "epoch": 1.8115543124098017, "grad_norm": 0.12602435052394867, "learning_rate": 4.065501459317359e-06, "loss": 0.0035, "step": 60880 }, { "epoch": 1.8118518738934433, "grad_norm": 0.1064789667725563, "learning_rate": 4.06380084823745e-06, "loss": 0.0023, "step": 60890 }, { "epoch": 1.8121494353770848, "grad_norm": 0.22879090905189514, "learning_rate": 4.062100349387477e-06, "loss": 0.003, "step": 60900 }, { "epoch": 1.8124469968607264, "grad_norm": 0.3441191613674164, "learning_rate": 4.0603999629712975e-06, "loss": 0.004, "step": 60910 }, { "epoch": 1.812744558344368, "grad_norm": 0.21592013537883759, "learning_rate": 4.058699689192747e-06, "loss": 0.0042, "step": 60920 }, { "epoch": 1.8130421198280096, "grad_norm": 0.32475605607032776, "learning_rate": 4.056999528255654e-06, "loss": 0.0021, "step": 60930 }, { "epoch": 1.8133396813116511, "grad_norm": 0.36363983154296875, "learning_rate": 4.055299480363829e-06, "loss": 0.0047, "step": 60940 }, { "epoch": 1.8136372427952927, "grad_norm": 0.07088652998209, "learning_rate": 4.053599545721073e-06, "loss": 0.0026, "step": 60950 }, { "epoch": 1.8139348042789343, "grad_norm": 0.11151469498872757, "learning_rate": 4.051899724531169e-06, "loss": 0.0044, "step": 60960 }, { "epoch": 1.8142323657625758, "grad_norm": 0.16067427396774292, "learning_rate": 4.05020001699789e-06, "loss": 0.0028, "step": 60970 }, { "epoch": 1.8145299272462172, "grad_norm": 0.13760609924793243, "learning_rate": 4.048500423324991e-06, "loss": 0.0023, "step": 60980 }, { "epoch": 1.8148274887298588, "grad_norm": 0.19688917696475983, "learning_rate": 4.046800943716221e-06, "loss": 0.004, "step": 60990 }, { "epoch": 1.8151250502135003, "grad_norm": 0.0490310974419117, "learning_rate": 4.045101578375309e-06, "loss": 0.0038, "step": 61000 }, { "epoch": 1.815422611697142, "grad_norm": 0.10348455607891083, "learning_rate": 4.043402327505971e-06, "loss": 0.0023, "step": 61010 }, { "epoch": 1.8157201731807835, "grad_norm": 0.12045618146657944, "learning_rate": 4.041703191311912e-06, "loss": 0.0034, "step": 61020 }, { "epoch": 1.816017734664425, "grad_norm": 0.07784474641084671, "learning_rate": 4.0400041699968185e-06, "loss": 0.004, "step": 61030 }, { "epoch": 1.8163152961480666, "grad_norm": 0.17091263830661774, "learning_rate": 4.03830526376437e-06, "loss": 0.0024, "step": 61040 }, { "epoch": 1.816612857631708, "grad_norm": 0.2811773419380188, "learning_rate": 4.0366064728182256e-06, "loss": 0.0035, "step": 61050 }, { "epoch": 1.8169104191153496, "grad_norm": 0.03532293438911438, "learning_rate": 4.034907797362034e-06, "loss": 0.0022, "step": 61060 }, { "epoch": 1.8172079805989911, "grad_norm": 0.05007955804467201, "learning_rate": 4.03320923759943e-06, "loss": 0.002, "step": 61070 }, { "epoch": 1.8175055420826327, "grad_norm": 0.4715263843536377, "learning_rate": 4.0315107937340345e-06, "loss": 0.0043, "step": 61080 }, { "epoch": 1.8178031035662743, "grad_norm": 0.15127207338809967, "learning_rate": 4.029812465969453e-06, "loss": 0.0029, "step": 61090 }, { "epoch": 1.8181006650499159, "grad_norm": 0.05960272625088692, "learning_rate": 4.028114254509279e-06, "loss": 0.0033, "step": 61100 }, { "epoch": 1.8183982265335574, "grad_norm": 0.1058664470911026, "learning_rate": 4.026416159557089e-06, "loss": 0.004, "step": 61110 }, { "epoch": 1.818695788017199, "grad_norm": 0.1238047182559967, "learning_rate": 4.024718181316451e-06, "loss": 0.0024, "step": 61120 }, { "epoch": 1.8189933495008406, "grad_norm": 0.20497535169124603, "learning_rate": 4.023020319990913e-06, "loss": 0.0037, "step": 61130 }, { "epoch": 1.8192909109844821, "grad_norm": 0.1558791548013687, "learning_rate": 4.021322575784014e-06, "loss": 0.0026, "step": 61140 }, { "epoch": 1.8195884724681237, "grad_norm": 0.17280544340610504, "learning_rate": 4.019624948899274e-06, "loss": 0.0032, "step": 61150 }, { "epoch": 1.8198860339517653, "grad_norm": 0.19937923550605774, "learning_rate": 4.017927439540204e-06, "loss": 0.0048, "step": 61160 }, { "epoch": 1.8201835954354069, "grad_norm": 0.0876844972372055, "learning_rate": 4.016230047910297e-06, "loss": 0.0032, "step": 61170 }, { "epoch": 1.8204811569190484, "grad_norm": 0.19779057800769806, "learning_rate": 4.014532774213034e-06, "loss": 0.0029, "step": 61180 }, { "epoch": 1.82077871840269, "grad_norm": 0.2069396674633026, "learning_rate": 4.012835618651881e-06, "loss": 0.0029, "step": 61190 }, { "epoch": 1.8210762798863316, "grad_norm": 0.1806468367576599, "learning_rate": 4.011138581430289e-06, "loss": 0.0029, "step": 61200 }, { "epoch": 1.8213738413699732, "grad_norm": 0.02415909245610237, "learning_rate": 4.009441662751698e-06, "loss": 0.0043, "step": 61210 }, { "epoch": 1.8216714028536147, "grad_norm": 0.15320563316345215, "learning_rate": 4.007744862819531e-06, "loss": 0.0048, "step": 61220 }, { "epoch": 1.8219689643372563, "grad_norm": 0.09059479832649231, "learning_rate": 4.006048181837197e-06, "loss": 0.0016, "step": 61230 }, { "epoch": 1.8222665258208979, "grad_norm": 0.20013675093650818, "learning_rate": 4.00435162000809e-06, "loss": 0.0046, "step": 61240 }, { "epoch": 1.8225640873045394, "grad_norm": 0.144164577126503, "learning_rate": 4.002655177535595e-06, "loss": 0.0087, "step": 61250 }, { "epoch": 1.822861648788181, "grad_norm": 0.1692691296339035, "learning_rate": 4.0009588546230736e-06, "loss": 0.0035, "step": 61260 }, { "epoch": 1.8231592102718224, "grad_norm": 0.1667293757200241, "learning_rate": 3.9992626514738795e-06, "loss": 0.0053, "step": 61270 }, { "epoch": 1.823456771755464, "grad_norm": 0.18780720233917236, "learning_rate": 3.997566568291353e-06, "loss": 0.0057, "step": 61280 }, { "epoch": 1.8237543332391055, "grad_norm": 0.3649841845035553, "learning_rate": 3.9958706052788154e-06, "loss": 0.0047, "step": 61290 }, { "epoch": 1.824051894722747, "grad_norm": 0.437669575214386, "learning_rate": 3.994174762639578e-06, "loss": 0.0038, "step": 61300 }, { "epoch": 1.8243494562063887, "grad_norm": 0.1037796139717102, "learning_rate": 3.9924790405769324e-06, "loss": 0.0023, "step": 61310 }, { "epoch": 1.8246470176900302, "grad_norm": 0.13676634430885315, "learning_rate": 3.990783439294161e-06, "loss": 0.0028, "step": 61320 }, { "epoch": 1.8249445791736718, "grad_norm": 0.2251702845096588, "learning_rate": 3.989087958994528e-06, "loss": 0.0028, "step": 61330 }, { "epoch": 1.8252421406573132, "grad_norm": 0.19949090480804443, "learning_rate": 3.987392599881287e-06, "loss": 0.0036, "step": 61340 }, { "epoch": 1.8255397021409547, "grad_norm": 0.32878950238227844, "learning_rate": 3.985697362157672e-06, "loss": 0.0039, "step": 61350 }, { "epoch": 1.8258372636245963, "grad_norm": 0.14414361119270325, "learning_rate": 3.984002246026908e-06, "loss": 0.0042, "step": 61360 }, { "epoch": 1.8261348251082379, "grad_norm": 0.11088303476572037, "learning_rate": 3.9823072516922e-06, "loss": 0.0038, "step": 61370 }, { "epoch": 1.8264323865918795, "grad_norm": 0.19632893800735474, "learning_rate": 3.980612379356742e-06, "loss": 0.0041, "step": 61380 }, { "epoch": 1.826729948075521, "grad_norm": 0.15279115736484528, "learning_rate": 3.978917629223713e-06, "loss": 0.0024, "step": 61390 }, { "epoch": 1.8270275095591626, "grad_norm": 0.11371898651123047, "learning_rate": 3.977223001496276e-06, "loss": 0.0053, "step": 61400 }, { "epoch": 1.8273250710428042, "grad_norm": 0.18423821032047272, "learning_rate": 3.97552849637758e-06, "loss": 0.004, "step": 61410 }, { "epoch": 1.8276226325264457, "grad_norm": 0.212240070104599, "learning_rate": 3.97383411407076e-06, "loss": 0.0047, "step": 61420 }, { "epoch": 1.8279201940100873, "grad_norm": 0.2470191866159439, "learning_rate": 3.9721398547789345e-06, "loss": 0.0019, "step": 61430 }, { "epoch": 1.828217755493729, "grad_norm": 0.04947181046009064, "learning_rate": 3.97044571870521e-06, "loss": 0.002, "step": 61440 }, { "epoch": 1.8285153169773705, "grad_norm": 0.20298218727111816, "learning_rate": 3.968751706052674e-06, "loss": 0.0028, "step": 61450 }, { "epoch": 1.828812878461012, "grad_norm": 0.20592738687992096, "learning_rate": 3.967057817024406e-06, "loss": 0.0029, "step": 61460 }, { "epoch": 1.8291104399446536, "grad_norm": 0.2561618387699127, "learning_rate": 3.965364051823461e-06, "loss": 0.0039, "step": 61470 }, { "epoch": 1.8294080014282952, "grad_norm": 0.10710955411195755, "learning_rate": 3.96367041065289e-06, "loss": 0.0064, "step": 61480 }, { "epoch": 1.8297055629119368, "grad_norm": 0.2356201559305191, "learning_rate": 3.96197689371572e-06, "loss": 0.0028, "step": 61490 }, { "epoch": 1.8300031243955783, "grad_norm": 0.05795900151133537, "learning_rate": 3.9602835012149695e-06, "loss": 0.0033, "step": 61500 }, { "epoch": 1.83030068587922, "grad_norm": 0.23986534774303436, "learning_rate": 3.958590233353637e-06, "loss": 0.0045, "step": 61510 }, { "epoch": 1.8305982473628615, "grad_norm": 0.4779277741909027, "learning_rate": 3.956897090334711e-06, "loss": 0.004, "step": 61520 }, { "epoch": 1.830895808846503, "grad_norm": 0.2109486311674118, "learning_rate": 3.9552040723611615e-06, "loss": 0.0058, "step": 61530 }, { "epoch": 1.8311933703301446, "grad_norm": 0.12684282660484314, "learning_rate": 3.953511179635943e-06, "loss": 0.0044, "step": 61540 }, { "epoch": 1.831490931813786, "grad_norm": 0.2841905355453491, "learning_rate": 3.951818412361998e-06, "loss": 0.0044, "step": 61550 }, { "epoch": 1.8317884932974275, "grad_norm": 0.15390801429748535, "learning_rate": 3.950125770742252e-06, "loss": 0.0038, "step": 61560 }, { "epoch": 1.8320860547810691, "grad_norm": 0.08508235961198807, "learning_rate": 3.948433254979619e-06, "loss": 0.0042, "step": 61570 }, { "epoch": 1.8323836162647107, "grad_norm": 0.1096944659948349, "learning_rate": 3.946740865276991e-06, "loss": 0.0027, "step": 61580 }, { "epoch": 1.8326811777483523, "grad_norm": 0.08180341869592667, "learning_rate": 3.94504860183725e-06, "loss": 0.0031, "step": 61590 }, { "epoch": 1.8329787392319938, "grad_norm": 0.6176539659500122, "learning_rate": 3.943356464863262e-06, "loss": 0.0046, "step": 61600 }, { "epoch": 1.8332763007156354, "grad_norm": 0.17520810663700104, "learning_rate": 3.941664454557876e-06, "loss": 0.0027, "step": 61610 }, { "epoch": 1.8335738621992768, "grad_norm": 0.4591572880744934, "learning_rate": 3.93997257112393e-06, "loss": 0.0028, "step": 61620 }, { "epoch": 1.8338714236829183, "grad_norm": 0.15974433720111847, "learning_rate": 3.938280814764242e-06, "loss": 0.0023, "step": 61630 }, { "epoch": 1.83416898516656, "grad_norm": 0.19585728645324707, "learning_rate": 3.936589185681618e-06, "loss": 0.0048, "step": 61640 }, { "epoch": 1.8344665466502015, "grad_norm": 0.08118084818124771, "learning_rate": 3.934897684078846e-06, "loss": 0.0038, "step": 61650 }, { "epoch": 1.834764108133843, "grad_norm": 0.2643837630748749, "learning_rate": 3.933206310158701e-06, "loss": 0.0058, "step": 61660 }, { "epoch": 1.8350616696174846, "grad_norm": 0.08157075196504593, "learning_rate": 3.931515064123942e-06, "loss": 0.0049, "step": 61670 }, { "epoch": 1.8353592311011262, "grad_norm": 0.28627529740333557, "learning_rate": 3.929823946177314e-06, "loss": 0.0049, "step": 61680 }, { "epoch": 1.8356567925847678, "grad_norm": 0.0840844139456749, "learning_rate": 3.928132956521543e-06, "loss": 0.0043, "step": 61690 }, { "epoch": 1.8359543540684093, "grad_norm": 0.06985701620578766, "learning_rate": 3.9264420953593445e-06, "loss": 0.0028, "step": 61700 }, { "epoch": 1.836251915552051, "grad_norm": 0.32700058817863464, "learning_rate": 3.924751362893413e-06, "loss": 0.0038, "step": 61710 }, { "epoch": 1.8365494770356925, "grad_norm": 0.21030591428279877, "learning_rate": 3.923060759326433e-06, "loss": 0.0036, "step": 61720 }, { "epoch": 1.836847038519334, "grad_norm": 0.12482701241970062, "learning_rate": 3.9213702848610704e-06, "loss": 0.0021, "step": 61730 }, { "epoch": 1.8371446000029756, "grad_norm": 0.19756077229976654, "learning_rate": 3.919679939699976e-06, "loss": 0.002, "step": 61740 }, { "epoch": 1.8374421614866172, "grad_norm": 0.0794423520565033, "learning_rate": 3.9179897240457855e-06, "loss": 0.004, "step": 61750 }, { "epoch": 1.8377397229702588, "grad_norm": 0.48522526025772095, "learning_rate": 3.91629963810112e-06, "loss": 0.0039, "step": 61760 }, { "epoch": 1.8380372844539004, "grad_norm": 0.055471207946538925, "learning_rate": 3.914609682068583e-06, "loss": 0.0042, "step": 61770 }, { "epoch": 1.838334845937542, "grad_norm": 0.2323472797870636, "learning_rate": 3.9129198561507645e-06, "loss": 0.0042, "step": 61780 }, { "epoch": 1.8386324074211835, "grad_norm": 0.17320315539836884, "learning_rate": 3.911230160550236e-06, "loss": 0.0032, "step": 61790 }, { "epoch": 1.838929968904825, "grad_norm": 0.08595261722803116, "learning_rate": 3.909540595469557e-06, "loss": 0.0019, "step": 61800 }, { "epoch": 1.8392275303884666, "grad_norm": 0.1206427812576294, "learning_rate": 3.907851161111269e-06, "loss": 0.0025, "step": 61810 }, { "epoch": 1.8395250918721082, "grad_norm": 0.15549927949905396, "learning_rate": 3.9061618576779e-06, "loss": 0.0046, "step": 61820 }, { "epoch": 1.8398226533557498, "grad_norm": 0.08543804287910461, "learning_rate": 3.904472685371956e-06, "loss": 0.0026, "step": 61830 }, { "epoch": 1.8401202148393911, "grad_norm": 0.40483057498931885, "learning_rate": 3.9027836443959385e-06, "loss": 0.0028, "step": 61840 }, { "epoch": 1.8404177763230327, "grad_norm": 0.20451508462429047, "learning_rate": 3.901094734952324e-06, "loss": 0.0025, "step": 61850 }, { "epoch": 1.8407153378066743, "grad_norm": 0.14110225439071655, "learning_rate": 3.899405957243575e-06, "loss": 0.004, "step": 61860 }, { "epoch": 1.8410128992903159, "grad_norm": 0.11278752237558365, "learning_rate": 3.897717311472141e-06, "loss": 0.0031, "step": 61870 }, { "epoch": 1.8413104607739574, "grad_norm": 0.19395725429058075, "learning_rate": 3.896028797840452e-06, "loss": 0.0066, "step": 61880 }, { "epoch": 1.841608022257599, "grad_norm": 0.25860658288002014, "learning_rate": 3.894340416550926e-06, "loss": 0.003, "step": 61890 }, { "epoch": 1.8419055837412406, "grad_norm": 0.35059282183647156, "learning_rate": 3.892652167805962e-06, "loss": 0.0027, "step": 61900 }, { "epoch": 1.842203145224882, "grad_norm": 0.15293343365192413, "learning_rate": 3.890964051807945e-06, "loss": 0.0035, "step": 61910 }, { "epoch": 1.8425007067085235, "grad_norm": 0.06141253933310509, "learning_rate": 3.889276068759243e-06, "loss": 0.0033, "step": 61920 }, { "epoch": 1.842798268192165, "grad_norm": 0.047736600041389465, "learning_rate": 3.887588218862209e-06, "loss": 0.002, "step": 61930 }, { "epoch": 1.8430958296758067, "grad_norm": 0.28799793124198914, "learning_rate": 3.88590050231918e-06, "loss": 0.0042, "step": 61940 }, { "epoch": 1.8433933911594482, "grad_norm": 0.11892658472061157, "learning_rate": 3.884212919332475e-06, "loss": 0.0054, "step": 61950 }, { "epoch": 1.8436909526430898, "grad_norm": 0.25773885846138, "learning_rate": 3.882525470104399e-06, "loss": 0.004, "step": 61960 }, { "epoch": 1.8439885141267314, "grad_norm": 0.14044569432735443, "learning_rate": 3.880838154837241e-06, "loss": 0.0041, "step": 61970 }, { "epoch": 1.844286075610373, "grad_norm": 0.3473464846611023, "learning_rate": 3.879150973733274e-06, "loss": 0.0021, "step": 61980 }, { "epoch": 1.8445836370940145, "grad_norm": 0.23571428656578064, "learning_rate": 3.8774639269947525e-06, "loss": 0.0031, "step": 61990 }, { "epoch": 1.844881198577656, "grad_norm": 0.0951542779803276, "learning_rate": 3.875777014823918e-06, "loss": 0.0035, "step": 62000 }, { "epoch": 1.8451787600612977, "grad_norm": 0.14728744328022003, "learning_rate": 3.874090237422995e-06, "loss": 0.004, "step": 62010 }, { "epoch": 1.8454763215449392, "grad_norm": 0.1343916952610016, "learning_rate": 3.872403594994191e-06, "loss": 0.0034, "step": 62020 }, { "epoch": 1.8457738830285808, "grad_norm": 0.2268752008676529, "learning_rate": 3.870717087739697e-06, "loss": 0.0044, "step": 62030 }, { "epoch": 1.8460714445122224, "grad_norm": 0.17533434927463531, "learning_rate": 3.86903071586169e-06, "loss": 0.0023, "step": 62040 }, { "epoch": 1.846369005995864, "grad_norm": 0.14261198043823242, "learning_rate": 3.8673444795623286e-06, "loss": 0.0022, "step": 62050 }, { "epoch": 1.8466665674795055, "grad_norm": 0.08646510541439056, "learning_rate": 3.865658379043756e-06, "loss": 0.0026, "step": 62060 }, { "epoch": 1.846964128963147, "grad_norm": 0.21233713626861572, "learning_rate": 3.8639724145080985e-06, "loss": 0.0046, "step": 62070 }, { "epoch": 1.8472616904467887, "grad_norm": 0.1907278448343277, "learning_rate": 3.862286586157467e-06, "loss": 0.0025, "step": 62080 }, { "epoch": 1.8475592519304302, "grad_norm": 0.10160430520772934, "learning_rate": 3.860600894193956e-06, "loss": 0.0033, "step": 62090 }, { "epoch": 1.8478568134140718, "grad_norm": 0.23830357193946838, "learning_rate": 3.858915338819644e-06, "loss": 0.0032, "step": 62100 }, { "epoch": 1.8481543748977134, "grad_norm": 0.16256321966648102, "learning_rate": 3.857229920236591e-06, "loss": 0.0048, "step": 62110 }, { "epoch": 1.8484519363813547, "grad_norm": 0.225909024477005, "learning_rate": 3.855544638646841e-06, "loss": 0.0036, "step": 62120 }, { "epoch": 1.8487494978649963, "grad_norm": 0.17988182604312897, "learning_rate": 3.853859494252426e-06, "loss": 0.0057, "step": 62130 }, { "epoch": 1.849047059348638, "grad_norm": 0.2475946992635727, "learning_rate": 3.852174487255356e-06, "loss": 0.0031, "step": 62140 }, { "epoch": 1.8493446208322795, "grad_norm": 0.3584599792957306, "learning_rate": 3.850489617857628e-06, "loss": 0.0042, "step": 62150 }, { "epoch": 1.849642182315921, "grad_norm": 0.1896471381187439, "learning_rate": 3.84880488626122e-06, "loss": 0.004, "step": 62160 }, { "epoch": 1.8499397437995626, "grad_norm": 0.08052068948745728, "learning_rate": 3.847120292668096e-06, "loss": 0.0015, "step": 62170 }, { "epoch": 1.8502373052832042, "grad_norm": 0.06865082681179047, "learning_rate": 3.8454358372802e-06, "loss": 0.0044, "step": 62180 }, { "epoch": 1.8505348667668458, "grad_norm": 0.15211156010627747, "learning_rate": 3.843751520299465e-06, "loss": 0.004, "step": 62190 }, { "epoch": 1.850832428250487, "grad_norm": 0.2643163800239563, "learning_rate": 3.8420673419278e-06, "loss": 0.0031, "step": 62200 }, { "epoch": 1.8511299897341287, "grad_norm": 0.31574031710624695, "learning_rate": 3.840383302367105e-06, "loss": 0.0044, "step": 62210 }, { "epoch": 1.8514275512177703, "grad_norm": 0.12502078711986542, "learning_rate": 3.838699401819257e-06, "loss": 0.0035, "step": 62220 }, { "epoch": 1.8517251127014118, "grad_norm": 0.06305534392595291, "learning_rate": 3.8370156404861205e-06, "loss": 0.004, "step": 62230 }, { "epoch": 1.8520226741850534, "grad_norm": 0.07327855378389359, "learning_rate": 3.835332018569542e-06, "loss": 0.0025, "step": 62240 }, { "epoch": 1.852320235668695, "grad_norm": 0.18932458758354187, "learning_rate": 3.83364853627135e-06, "loss": 0.0052, "step": 62250 }, { "epoch": 1.8526177971523365, "grad_norm": 0.1320052146911621, "learning_rate": 3.831965193793359e-06, "loss": 0.0034, "step": 62260 }, { "epoch": 1.8529153586359781, "grad_norm": 0.13690434396266937, "learning_rate": 3.830281991337365e-06, "loss": 0.0037, "step": 62270 }, { "epoch": 1.8532129201196197, "grad_norm": 0.1701221764087677, "learning_rate": 3.8285989291051455e-06, "loss": 0.0028, "step": 62280 }, { "epoch": 1.8535104816032613, "grad_norm": 0.23923765122890472, "learning_rate": 3.8269160072984655e-06, "loss": 0.0026, "step": 62290 }, { "epoch": 1.8538080430869028, "grad_norm": 0.22104638814926147, "learning_rate": 3.82523322611907e-06, "loss": 0.0036, "step": 62300 }, { "epoch": 1.8541056045705444, "grad_norm": 0.019512126222252846, "learning_rate": 3.823550585768686e-06, "loss": 0.0036, "step": 62310 }, { "epoch": 1.854403166054186, "grad_norm": 0.09817004203796387, "learning_rate": 3.8218680864490285e-06, "loss": 0.0027, "step": 62320 }, { "epoch": 1.8547007275378276, "grad_norm": 0.15335778892040253, "learning_rate": 3.820185728361791e-06, "loss": 0.0031, "step": 62330 }, { "epoch": 1.8549982890214691, "grad_norm": 0.07420454174280167, "learning_rate": 3.818503511708652e-06, "loss": 0.0019, "step": 62340 }, { "epoch": 1.8552958505051107, "grad_norm": 0.18688087165355682, "learning_rate": 3.816821436691273e-06, "loss": 0.0037, "step": 62350 }, { "epoch": 1.8555934119887523, "grad_norm": 0.10636722296476364, "learning_rate": 3.815139503511298e-06, "loss": 0.0035, "step": 62360 }, { "epoch": 1.8558909734723938, "grad_norm": 0.14767147600650787, "learning_rate": 3.8134577123703524e-06, "loss": 0.0031, "step": 62370 }, { "epoch": 1.8561885349560354, "grad_norm": 0.26748916506767273, "learning_rate": 3.81177606347005e-06, "loss": 0.006, "step": 62380 }, { "epoch": 1.856486096439677, "grad_norm": 0.031158318743109703, "learning_rate": 3.810094557011981e-06, "loss": 0.0024, "step": 62390 }, { "epoch": 1.8567836579233186, "grad_norm": 0.1549183428287506, "learning_rate": 3.8084131931977218e-06, "loss": 0.0029, "step": 62400 }, { "epoch": 1.85708121940696, "grad_norm": 0.07303127646446228, "learning_rate": 3.8067319722288347e-06, "loss": 0.0034, "step": 62410 }, { "epoch": 1.8573787808906015, "grad_norm": 0.07180318236351013, "learning_rate": 3.8050508943068578e-06, "loss": 0.0034, "step": 62420 }, { "epoch": 1.857676342374243, "grad_norm": 0.18287815153598785, "learning_rate": 3.803369959633318e-06, "loss": 0.0046, "step": 62430 }, { "epoch": 1.8579739038578846, "grad_norm": 0.3566124141216278, "learning_rate": 3.8016891684097213e-06, "loss": 0.0024, "step": 62440 }, { "epoch": 1.8582714653415262, "grad_norm": 0.28406697511672974, "learning_rate": 3.8000085208375593e-06, "loss": 0.0045, "step": 62450 }, { "epoch": 1.8585690268251678, "grad_norm": 0.32157397270202637, "learning_rate": 3.7983280171183034e-06, "loss": 0.0029, "step": 62460 }, { "epoch": 1.8588665883088094, "grad_norm": 0.10812000930309296, "learning_rate": 3.7966476574534123e-06, "loss": 0.0027, "step": 62470 }, { "epoch": 1.8591641497924507, "grad_norm": 0.303230881690979, "learning_rate": 3.7949674420443218e-06, "loss": 0.0031, "step": 62480 }, { "epoch": 1.8594617112760923, "grad_norm": 0.19570504128932953, "learning_rate": 3.793287371092455e-06, "loss": 0.0031, "step": 62490 }, { "epoch": 1.8597592727597339, "grad_norm": 0.0746304839849472, "learning_rate": 3.7916074447992147e-06, "loss": 0.0054, "step": 62500 }, { "epoch": 1.8600568342433754, "grad_norm": 0.12048687040805817, "learning_rate": 3.7899276633659885e-06, "loss": 0.0026, "step": 62510 }, { "epoch": 1.860354395727017, "grad_norm": 0.08690017461776733, "learning_rate": 3.788248026994145e-06, "loss": 0.003, "step": 62520 }, { "epoch": 1.8606519572106586, "grad_norm": 0.09930261969566345, "learning_rate": 3.786568535885038e-06, "loss": 0.0051, "step": 62530 }, { "epoch": 1.8609495186943001, "grad_norm": 0.3744939863681793, "learning_rate": 3.7848891902399987e-06, "loss": 0.0031, "step": 62540 }, { "epoch": 1.8612470801779417, "grad_norm": 0.11705373972654343, "learning_rate": 3.783209990260347e-06, "loss": 0.0031, "step": 62550 }, { "epoch": 1.8615446416615833, "grad_norm": 0.18191111087799072, "learning_rate": 3.78153093614738e-06, "loss": 0.0037, "step": 62560 }, { "epoch": 1.8618422031452249, "grad_norm": 0.18304407596588135, "learning_rate": 3.779852028102383e-06, "loss": 0.0058, "step": 62570 }, { "epoch": 1.8621397646288664, "grad_norm": 0.20020602643489838, "learning_rate": 3.7781732663266175e-06, "loss": 0.0021, "step": 62580 }, { "epoch": 1.862437326112508, "grad_norm": 0.08048901706933975, "learning_rate": 3.776494651021333e-06, "loss": 0.003, "step": 62590 }, { "epoch": 1.8627348875961496, "grad_norm": 0.10529300570487976, "learning_rate": 3.7748161823877567e-06, "loss": 0.0028, "step": 62600 }, { "epoch": 1.8630324490797912, "grad_norm": 0.03269254416227341, "learning_rate": 3.773137860627103e-06, "loss": 0.0039, "step": 62610 }, { "epoch": 1.8633300105634327, "grad_norm": 0.19077140092849731, "learning_rate": 3.771459685940563e-06, "loss": 0.0022, "step": 62620 }, { "epoch": 1.8636275720470743, "grad_norm": 0.34318745136260986, "learning_rate": 3.7697816585293177e-06, "loss": 0.005, "step": 62630 }, { "epoch": 1.8639251335307159, "grad_norm": 0.3280849754810333, "learning_rate": 3.7681037785945218e-06, "loss": 0.004, "step": 62640 }, { "epoch": 1.8642226950143574, "grad_norm": 0.17648126184940338, "learning_rate": 3.76642604633732e-06, "loss": 0.007, "step": 62650 }, { "epoch": 1.864520256497999, "grad_norm": 0.03439875692129135, "learning_rate": 3.764748461958835e-06, "loss": 0.0031, "step": 62660 }, { "epoch": 1.8648178179816406, "grad_norm": 0.10345843434333801, "learning_rate": 3.7630710256601705e-06, "loss": 0.0031, "step": 62670 }, { "epoch": 1.8651153794652822, "grad_norm": 0.160631000995636, "learning_rate": 3.7613937376424175e-06, "loss": 0.0031, "step": 62680 }, { "epoch": 1.8654129409489237, "grad_norm": 0.14729546010494232, "learning_rate": 3.7597165981066423e-06, "loss": 0.0025, "step": 62690 }, { "epoch": 1.865710502432565, "grad_norm": 0.1816486418247223, "learning_rate": 3.7580396072539037e-06, "loss": 0.003, "step": 62700 }, { "epoch": 1.8660080639162067, "grad_norm": 0.3719574809074402, "learning_rate": 3.7563627652852338e-06, "loss": 0.0039, "step": 62710 }, { "epoch": 1.8663056253998482, "grad_norm": 0.14888349175453186, "learning_rate": 3.754686072401647e-06, "loss": 0.0032, "step": 62720 }, { "epoch": 1.8666031868834898, "grad_norm": 0.07251955568790436, "learning_rate": 3.7530095288041457e-06, "loss": 0.0025, "step": 62730 }, { "epoch": 1.8669007483671314, "grad_norm": 0.12783847749233246, "learning_rate": 3.7513331346937095e-06, "loss": 0.0038, "step": 62740 }, { "epoch": 1.867198309850773, "grad_norm": 0.31315791606903076, "learning_rate": 3.7496568902713017e-06, "loss": 0.0041, "step": 62750 }, { "epoch": 1.8674958713344145, "grad_norm": 0.17367085814476013, "learning_rate": 3.7479807957378676e-06, "loss": 0.0046, "step": 62760 }, { "epoch": 1.8677934328180559, "grad_norm": 0.1046079769730568, "learning_rate": 3.746304851294335e-06, "loss": 0.0038, "step": 62770 }, { "epoch": 1.8680909943016974, "grad_norm": 0.09643679112195969, "learning_rate": 3.744629057141612e-06, "loss": 0.0036, "step": 62780 }, { "epoch": 1.868388555785339, "grad_norm": 0.13410398364067078, "learning_rate": 3.7429534134805924e-06, "loss": 0.003, "step": 62790 }, { "epoch": 1.8686861172689806, "grad_norm": 0.12113101780414581, "learning_rate": 3.7412779205121462e-06, "loss": 0.0023, "step": 62800 }, { "epoch": 1.8689836787526222, "grad_norm": 0.16009795665740967, "learning_rate": 3.7396025784371317e-06, "loss": 0.0033, "step": 62810 }, { "epoch": 1.8692812402362637, "grad_norm": 0.10602611303329468, "learning_rate": 3.7379273874563832e-06, "loss": 0.0033, "step": 62820 }, { "epoch": 1.8695788017199053, "grad_norm": 0.18189987540245056, "learning_rate": 3.7362523477707226e-06, "loss": 0.0026, "step": 62830 }, { "epoch": 1.8698763632035469, "grad_norm": 0.11946534365415573, "learning_rate": 3.7345774595809476e-06, "loss": 0.0031, "step": 62840 }, { "epoch": 1.8701739246871885, "grad_norm": 0.07998088002204895, "learning_rate": 3.7329027230878434e-06, "loss": 0.0034, "step": 62850 }, { "epoch": 1.87047148617083, "grad_norm": 0.14906392991542816, "learning_rate": 3.7312281384921733e-06, "loss": 0.0038, "step": 62860 }, { "epoch": 1.8707690476544716, "grad_norm": 0.2152678221464157, "learning_rate": 3.729553705994685e-06, "loss": 0.0038, "step": 62870 }, { "epoch": 1.8710666091381132, "grad_norm": 0.17325453460216522, "learning_rate": 3.7278794257961044e-06, "loss": 0.0046, "step": 62880 }, { "epoch": 1.8713641706217548, "grad_norm": 0.15971973538398743, "learning_rate": 3.726205298097143e-06, "loss": 0.0103, "step": 62890 }, { "epoch": 1.8716617321053963, "grad_norm": 0.14830555021762848, "learning_rate": 3.7245313230984916e-06, "loss": 0.0046, "step": 62900 }, { "epoch": 1.871959293589038, "grad_norm": 0.12684395909309387, "learning_rate": 3.7228575010008244e-06, "loss": 0.0031, "step": 62910 }, { "epoch": 1.8722568550726795, "grad_norm": 0.1552734524011612, "learning_rate": 3.7211838320047945e-06, "loss": 0.0036, "step": 62920 }, { "epoch": 1.872554416556321, "grad_norm": 0.1519192010164261, "learning_rate": 3.71951031631104e-06, "loss": 0.0019, "step": 62930 }, { "epoch": 1.8728519780399626, "grad_norm": 0.099063441157341, "learning_rate": 3.7178369541201785e-06, "loss": 0.0045, "step": 62940 }, { "epoch": 1.8731495395236042, "grad_norm": 0.1343136578798294, "learning_rate": 3.7161637456328105e-06, "loss": 0.0026, "step": 62950 }, { "epoch": 1.8734471010072458, "grad_norm": 0.14997799694538116, "learning_rate": 3.7144906910495154e-06, "loss": 0.0026, "step": 62960 }, { "epoch": 1.8737446624908873, "grad_norm": 0.16516345739364624, "learning_rate": 3.7128177905708564e-06, "loss": 0.0034, "step": 62970 }, { "epoch": 1.8740422239745287, "grad_norm": 0.22578059136867523, "learning_rate": 3.711145044397382e-06, "loss": 0.0051, "step": 62980 }, { "epoch": 1.8743397854581703, "grad_norm": 0.17783355712890625, "learning_rate": 3.7094724527296124e-06, "loss": 0.0032, "step": 62990 }, { "epoch": 1.8746373469418118, "grad_norm": 0.15661726891994476, "learning_rate": 3.7078000157680595e-06, "loss": 0.0038, "step": 63000 }, { "epoch": 1.8749349084254534, "grad_norm": 0.07643376290798187, "learning_rate": 3.7061277337132095e-06, "loss": 0.0027, "step": 63010 }, { "epoch": 1.875232469909095, "grad_norm": 0.13826142251491547, "learning_rate": 3.704455606765534e-06, "loss": 0.0032, "step": 63020 }, { "epoch": 1.8755300313927366, "grad_norm": 0.05660485848784447, "learning_rate": 3.7027836351254832e-06, "loss": 0.0033, "step": 63030 }, { "epoch": 1.8758275928763781, "grad_norm": 0.1643419861793518, "learning_rate": 3.7011118189934925e-06, "loss": 0.0041, "step": 63040 }, { "epoch": 1.8761251543600195, "grad_norm": 0.299072802066803, "learning_rate": 3.6994401585699734e-06, "loss": 0.0027, "step": 63050 }, { "epoch": 1.876422715843661, "grad_norm": 0.1744183450937271, "learning_rate": 3.697768654055325e-06, "loss": 0.0043, "step": 63060 }, { "epoch": 1.8767202773273026, "grad_norm": 0.06762836128473282, "learning_rate": 3.696097305649923e-06, "loss": 0.0026, "step": 63070 }, { "epoch": 1.8770178388109442, "grad_norm": 0.10840698331594467, "learning_rate": 3.6944261135541225e-06, "loss": 0.0038, "step": 63080 }, { "epoch": 1.8773154002945858, "grad_norm": 0.09963106364011765, "learning_rate": 3.692755077968269e-06, "loss": 0.0031, "step": 63090 }, { "epoch": 1.8776129617782273, "grad_norm": 0.044376861304044724, "learning_rate": 3.691084199092678e-06, "loss": 0.0035, "step": 63100 }, { "epoch": 1.877910523261869, "grad_norm": 0.09454961866140366, "learning_rate": 3.6894134771276558e-06, "loss": 0.0039, "step": 63110 }, { "epoch": 1.8782080847455105, "grad_norm": 0.14835239946842194, "learning_rate": 3.6877429122734825e-06, "loss": 0.0032, "step": 63120 }, { "epoch": 1.878505646229152, "grad_norm": 0.2431076318025589, "learning_rate": 3.686072504730425e-06, "loss": 0.003, "step": 63130 }, { "epoch": 1.8788032077127936, "grad_norm": 0.14330759644508362, "learning_rate": 3.684402254698727e-06, "loss": 0.0017, "step": 63140 }, { "epoch": 1.8791007691964352, "grad_norm": 0.19072264432907104, "learning_rate": 3.682732162378616e-06, "loss": 0.0027, "step": 63150 }, { "epoch": 1.8793983306800768, "grad_norm": 0.12888284027576447, "learning_rate": 3.681062227970299e-06, "loss": 0.0049, "step": 63160 }, { "epoch": 1.8796958921637184, "grad_norm": 0.14887043833732605, "learning_rate": 3.6793924516739665e-06, "loss": 0.0046, "step": 63170 }, { "epoch": 1.87999345364736, "grad_norm": 0.13419808447360992, "learning_rate": 3.6777228336897873e-06, "loss": 0.0059, "step": 63180 }, { "epoch": 1.8802910151310015, "grad_norm": 0.23777168989181519, "learning_rate": 3.6760533742179127e-06, "loss": 0.0059, "step": 63190 }, { "epoch": 1.880588576614643, "grad_norm": 0.10129818320274353, "learning_rate": 3.6743840734584736e-06, "loss": 0.0032, "step": 63200 }, { "epoch": 1.8808861380982846, "grad_norm": 0.059075113385915756, "learning_rate": 3.6727149316115845e-06, "loss": 0.0039, "step": 63210 }, { "epoch": 1.8811836995819262, "grad_norm": 0.14474456012248993, "learning_rate": 3.6710459488773376e-06, "loss": 0.0014, "step": 63220 }, { "epoch": 1.8814812610655678, "grad_norm": 0.1795448213815689, "learning_rate": 3.6693771254558095e-06, "loss": 0.0032, "step": 63230 }, { "epoch": 1.8817788225492094, "grad_norm": 0.2228907346725464, "learning_rate": 3.667708461547054e-06, "loss": 0.0048, "step": 63240 }, { "epoch": 1.882076384032851, "grad_norm": 0.15872396528720856, "learning_rate": 3.6660399573511076e-06, "loss": 0.0034, "step": 63250 }, { "epoch": 1.8823739455164925, "grad_norm": 0.26472046971321106, "learning_rate": 3.6643716130679895e-06, "loss": 0.0038, "step": 63260 }, { "epoch": 1.8826715070001339, "grad_norm": 0.2628442049026489, "learning_rate": 3.662703428897697e-06, "loss": 0.003, "step": 63270 }, { "epoch": 1.8829690684837754, "grad_norm": 0.06639840453863144, "learning_rate": 3.66103540504021e-06, "loss": 0.0038, "step": 63280 }, { "epoch": 1.883266629967417, "grad_norm": 0.1371804177761078, "learning_rate": 3.6593675416954864e-06, "loss": 0.0049, "step": 63290 }, { "epoch": 1.8835641914510586, "grad_norm": 0.13181068003177643, "learning_rate": 3.65769983906347e-06, "loss": 0.0045, "step": 63300 }, { "epoch": 1.8838617529347002, "grad_norm": 0.03761698305606842, "learning_rate": 3.6560322973440775e-06, "loss": 0.003, "step": 63310 }, { "epoch": 1.8841593144183417, "grad_norm": 0.07772354781627655, "learning_rate": 3.6543649167372154e-06, "loss": 0.0029, "step": 63320 }, { "epoch": 1.8844568759019833, "grad_norm": 0.16026587784290314, "learning_rate": 3.652697697442763e-06, "loss": 0.0026, "step": 63330 }, { "epoch": 1.8847544373856246, "grad_norm": 0.13032850623130798, "learning_rate": 3.6510306396605867e-06, "loss": 0.0018, "step": 63340 }, { "epoch": 1.8850519988692662, "grad_norm": 0.3043981194496155, "learning_rate": 3.6493637435905273e-06, "loss": 0.0031, "step": 63350 }, { "epoch": 1.8853495603529078, "grad_norm": 0.08690773695707321, "learning_rate": 3.6476970094324115e-06, "loss": 0.0014, "step": 63360 }, { "epoch": 1.8856471218365494, "grad_norm": 0.14140179753303528, "learning_rate": 3.6460304373860443e-06, "loss": 0.003, "step": 63370 }, { "epoch": 1.885944683320191, "grad_norm": 0.21717539429664612, "learning_rate": 3.6443640276512115e-06, "loss": 0.0036, "step": 63380 }, { "epoch": 1.8862422448038325, "grad_norm": 0.09934718906879425, "learning_rate": 3.6426977804276787e-06, "loss": 0.0043, "step": 63390 }, { "epoch": 1.886539806287474, "grad_norm": 0.2113167941570282, "learning_rate": 3.6410316959151935e-06, "loss": 0.0037, "step": 63400 }, { "epoch": 1.8868373677711157, "grad_norm": 0.24619966745376587, "learning_rate": 3.6393657743134826e-06, "loss": 0.0035, "step": 63410 }, { "epoch": 1.8871349292547572, "grad_norm": 0.08758868277072906, "learning_rate": 3.6377000158222543e-06, "loss": 0.0026, "step": 63420 }, { "epoch": 1.8874324907383988, "grad_norm": 0.2873440980911255, "learning_rate": 3.6360344206411962e-06, "loss": 0.0021, "step": 63430 }, { "epoch": 1.8877300522220404, "grad_norm": 0.19373007118701935, "learning_rate": 3.6343689889699785e-06, "loss": 0.0035, "step": 63440 }, { "epoch": 1.888027613705682, "grad_norm": 0.11475121229887009, "learning_rate": 3.6327037210082495e-06, "loss": 0.021, "step": 63450 }, { "epoch": 1.8883251751893235, "grad_norm": 0.1333136409521103, "learning_rate": 3.6310386169556365e-06, "loss": 0.0023, "step": 63460 }, { "epoch": 1.888622736672965, "grad_norm": 0.28757745027542114, "learning_rate": 3.629373677011753e-06, "loss": 0.0033, "step": 63470 }, { "epoch": 1.8889202981566067, "grad_norm": 0.10428688675165176, "learning_rate": 3.6277089013761864e-06, "loss": 0.0022, "step": 63480 }, { "epoch": 1.8892178596402482, "grad_norm": 0.1745736002922058, "learning_rate": 3.626044290248509e-06, "loss": 0.0038, "step": 63490 }, { "epoch": 1.8895154211238898, "grad_norm": 0.2784712314605713, "learning_rate": 3.624379843828269e-06, "loss": 0.0039, "step": 63500 }, { "epoch": 1.8898129826075314, "grad_norm": 0.25857114791870117, "learning_rate": 3.622715562315e-06, "loss": 0.0041, "step": 63510 }, { "epoch": 1.890110544091173, "grad_norm": 0.13875141739845276, "learning_rate": 3.6210514459082115e-06, "loss": 0.0028, "step": 63520 }, { "epoch": 1.8904081055748145, "grad_norm": 0.17732258141040802, "learning_rate": 3.6193874948073967e-06, "loss": 0.0056, "step": 63530 }, { "epoch": 1.890705667058456, "grad_norm": 0.13649620115756989, "learning_rate": 3.6177237092120234e-06, "loss": 0.0052, "step": 63540 }, { "epoch": 1.8910032285420975, "grad_norm": 0.2660873532295227, "learning_rate": 3.6160600893215468e-06, "loss": 0.0029, "step": 63550 }, { "epoch": 1.891300790025739, "grad_norm": 0.22440263628959656, "learning_rate": 3.6143966353354e-06, "loss": 0.0046, "step": 63560 }, { "epoch": 1.8915983515093806, "grad_norm": 0.105350062251091, "learning_rate": 3.612733347452991e-06, "loss": 0.0035, "step": 63570 }, { "epoch": 1.8918959129930222, "grad_norm": 0.1791437864303589, "learning_rate": 3.6110702258737153e-06, "loss": 0.0041, "step": 63580 }, { "epoch": 1.8921934744766638, "grad_norm": 0.17950445413589478, "learning_rate": 3.6094072707969424e-06, "loss": 0.0032, "step": 63590 }, { "epoch": 1.8924910359603053, "grad_norm": 0.11313648521900177, "learning_rate": 3.6077444824220265e-06, "loss": 0.0029, "step": 63600 }, { "epoch": 1.892788597443947, "grad_norm": 0.12504681944847107, "learning_rate": 3.606081860948298e-06, "loss": 0.0034, "step": 63610 }, { "epoch": 1.8930861589275882, "grad_norm": 0.15171071887016296, "learning_rate": 3.6044194065750715e-06, "loss": 0.0048, "step": 63620 }, { "epoch": 1.8933837204112298, "grad_norm": 0.12397170066833496, "learning_rate": 3.602757119501636e-06, "loss": 0.0033, "step": 63630 }, { "epoch": 1.8936812818948714, "grad_norm": 0.01944008283317089, "learning_rate": 3.6010949999272663e-06, "loss": 0.0032, "step": 63640 }, { "epoch": 1.893978843378513, "grad_norm": 0.3012726902961731, "learning_rate": 3.599433048051213e-06, "loss": 0.0045, "step": 63650 }, { "epoch": 1.8942764048621545, "grad_norm": 0.2263510376214981, "learning_rate": 3.5977712640727093e-06, "loss": 0.0066, "step": 63660 }, { "epoch": 1.8945739663457961, "grad_norm": 0.14368867874145508, "learning_rate": 3.5961096481909645e-06, "loss": 0.0039, "step": 63670 }, { "epoch": 1.8948715278294377, "grad_norm": 0.08660726249217987, "learning_rate": 3.5944482006051735e-06, "loss": 0.002, "step": 63680 }, { "epoch": 1.8951690893130793, "grad_norm": 0.14813783764839172, "learning_rate": 3.592786921514505e-06, "loss": 0.004, "step": 63690 }, { "epoch": 1.8954666507967208, "grad_norm": 0.007485981099307537, "learning_rate": 3.5911258111181123e-06, "loss": 0.003, "step": 63700 }, { "epoch": 1.8957642122803624, "grad_norm": 0.11172205954790115, "learning_rate": 3.5894648696151247e-06, "loss": 0.0039, "step": 63710 }, { "epoch": 1.896061773764004, "grad_norm": 0.1302298754453659, "learning_rate": 3.5878040972046547e-06, "loss": 0.0028, "step": 63720 }, { "epoch": 1.8963593352476456, "grad_norm": 0.1509874165058136, "learning_rate": 3.5861434940857908e-06, "loss": 0.0016, "step": 63730 }, { "epoch": 1.8966568967312871, "grad_norm": 0.1748548001050949, "learning_rate": 3.5844830604576063e-06, "loss": 0.0052, "step": 63740 }, { "epoch": 1.8969544582149287, "grad_norm": 0.1155058965086937, "learning_rate": 3.5828227965191477e-06, "loss": 0.0024, "step": 63750 }, { "epoch": 1.8972520196985703, "grad_norm": 0.17133685946464539, "learning_rate": 3.5811627024694465e-06, "loss": 0.004, "step": 63760 }, { "epoch": 1.8975495811822118, "grad_norm": 0.13975001871585846, "learning_rate": 3.5795027785075116e-06, "loss": 0.0057, "step": 63770 }, { "epoch": 1.8978471426658534, "grad_norm": 0.26404237747192383, "learning_rate": 3.5778430248323328e-06, "loss": 0.004, "step": 63780 }, { "epoch": 1.898144704149495, "grad_norm": 0.1813775599002838, "learning_rate": 3.5761834416428753e-06, "loss": 0.0042, "step": 63790 }, { "epoch": 1.8984422656331366, "grad_norm": 0.1469057947397232, "learning_rate": 3.574524029138091e-06, "loss": 0.004, "step": 63800 }, { "epoch": 1.8987398271167781, "grad_norm": 0.1927468627691269, "learning_rate": 3.5728647875169052e-06, "loss": 0.0023, "step": 63810 }, { "epoch": 1.8990373886004197, "grad_norm": 0.07018833607435226, "learning_rate": 3.571205716978223e-06, "loss": 0.0027, "step": 63820 }, { "epoch": 1.8993349500840613, "grad_norm": 0.16761969029903412, "learning_rate": 3.5695468177209358e-06, "loss": 0.002, "step": 63830 }, { "epoch": 1.8996325115677026, "grad_norm": 0.150807186961174, "learning_rate": 3.567888089943906e-06, "loss": 0.0055, "step": 63840 }, { "epoch": 1.8999300730513442, "grad_norm": 0.11184757947921753, "learning_rate": 3.566229533845982e-06, "loss": 0.0026, "step": 63850 }, { "epoch": 1.9002276345349858, "grad_norm": 0.12204489856958389, "learning_rate": 3.5645711496259853e-06, "loss": 0.0022, "step": 63860 }, { "epoch": 1.9005251960186273, "grad_norm": 0.0685146152973175, "learning_rate": 3.5629129374827216e-06, "loss": 0.005, "step": 63870 }, { "epoch": 1.900822757502269, "grad_norm": 0.1424563080072403, "learning_rate": 3.5612548976149754e-06, "loss": 0.0041, "step": 63880 }, { "epoch": 1.9011203189859105, "grad_norm": 0.3075107932090759, "learning_rate": 3.5595970302215065e-06, "loss": 0.0039, "step": 63890 }, { "epoch": 1.901417880469552, "grad_norm": 0.003948858007788658, "learning_rate": 3.557939335501062e-06, "loss": 0.0031, "step": 63900 }, { "epoch": 1.9017154419531934, "grad_norm": 0.15612901747226715, "learning_rate": 3.556281813652359e-06, "loss": 0.0032, "step": 63910 }, { "epoch": 1.902013003436835, "grad_norm": 0.21629156172275543, "learning_rate": 3.554624464874101e-06, "loss": 0.0036, "step": 63920 }, { "epoch": 1.9023105649204766, "grad_norm": 0.24703676998615265, "learning_rate": 3.552967289364967e-06, "loss": 0.0025, "step": 63930 }, { "epoch": 1.9026081264041181, "grad_norm": 0.184947669506073, "learning_rate": 3.5513102873236173e-06, "loss": 0.0038, "step": 63940 }, { "epoch": 1.9029056878877597, "grad_norm": 0.11382344365119934, "learning_rate": 3.549653458948689e-06, "loss": 0.0027, "step": 63950 }, { "epoch": 1.9032032493714013, "grad_norm": 0.22215984761714935, "learning_rate": 3.547996804438802e-06, "loss": 0.0016, "step": 63960 }, { "epoch": 1.9035008108550429, "grad_norm": 0.13335727155208588, "learning_rate": 3.5463403239925503e-06, "loss": 0.0029, "step": 63970 }, { "epoch": 1.9037983723386844, "grad_norm": 0.18757958710193634, "learning_rate": 3.544684017808513e-06, "loss": 0.003, "step": 63980 }, { "epoch": 1.904095933822326, "grad_norm": 0.06309522688388824, "learning_rate": 3.543027886085242e-06, "loss": 0.0022, "step": 63990 }, { "epoch": 1.9043934953059676, "grad_norm": 0.1581459641456604, "learning_rate": 3.5413719290212743e-06, "loss": 0.0033, "step": 64000 }, { "epoch": 1.9046910567896091, "grad_norm": 0.18774926662445068, "learning_rate": 3.539716146815122e-06, "loss": 0.0038, "step": 64010 }, { "epoch": 1.9049886182732507, "grad_norm": 0.11317618191242218, "learning_rate": 3.538060539665277e-06, "loss": 0.0086, "step": 64020 }, { "epoch": 1.9052861797568923, "grad_norm": 0.1712944358587265, "learning_rate": 3.536405107770211e-06, "loss": 0.0046, "step": 64030 }, { "epoch": 1.9055837412405339, "grad_norm": 0.13867108523845673, "learning_rate": 3.5347498513283755e-06, "loss": 0.003, "step": 64040 }, { "epoch": 1.9058813027241754, "grad_norm": 0.14249446988105774, "learning_rate": 3.533094770538198e-06, "loss": 0.0025, "step": 64050 }, { "epoch": 1.906178864207817, "grad_norm": 0.09027145802974701, "learning_rate": 3.5314398655980877e-06, "loss": 0.0062, "step": 64060 }, { "epoch": 1.9064764256914586, "grad_norm": 0.2158060073852539, "learning_rate": 3.5297851367064305e-06, "loss": 0.0029, "step": 64070 }, { "epoch": 1.9067739871751002, "grad_norm": 0.1299428939819336, "learning_rate": 3.5281305840615955e-06, "loss": 0.0036, "step": 64080 }, { "epoch": 1.9070715486587417, "grad_norm": 0.19248424470424652, "learning_rate": 3.526476207861923e-06, "loss": 0.0033, "step": 64090 }, { "epoch": 1.9073691101423833, "grad_norm": 0.16564784944057465, "learning_rate": 3.5248220083057394e-06, "loss": 0.0025, "step": 64100 }, { "epoch": 1.9076666716260249, "grad_norm": 0.26580530405044556, "learning_rate": 3.523167985591349e-06, "loss": 0.0036, "step": 64110 }, { "epoch": 1.9079642331096662, "grad_norm": 0.282554566860199, "learning_rate": 3.5215141399170307e-06, "loss": 0.0023, "step": 64120 }, { "epoch": 1.9082617945933078, "grad_norm": 0.15025024116039276, "learning_rate": 3.519860471481046e-06, "loss": 0.0028, "step": 64130 }, { "epoch": 1.9085593560769494, "grad_norm": 0.0188928060233593, "learning_rate": 3.5182069804816314e-06, "loss": 0.0032, "step": 64140 }, { "epoch": 1.908856917560591, "grad_norm": 0.14081330597400665, "learning_rate": 3.516553667117009e-06, "loss": 0.0037, "step": 64150 }, { "epoch": 1.9091544790442325, "grad_norm": 0.19720658659934998, "learning_rate": 3.5149005315853696e-06, "loss": 0.0025, "step": 64160 }, { "epoch": 1.909452040527874, "grad_norm": 0.1973940134048462, "learning_rate": 3.513247574084893e-06, "loss": 0.0028, "step": 64170 }, { "epoch": 1.9097496020115157, "grad_norm": 0.1895960420370102, "learning_rate": 3.51159479481373e-06, "loss": 0.0023, "step": 64180 }, { "epoch": 1.9100471634951572, "grad_norm": 0.5886694192886353, "learning_rate": 3.509942193970015e-06, "loss": 0.0047, "step": 64190 }, { "epoch": 1.9103447249787986, "grad_norm": 0.09915195405483246, "learning_rate": 3.508289771751857e-06, "loss": 0.0032, "step": 64200 }, { "epoch": 1.9106422864624402, "grad_norm": 0.5966812968254089, "learning_rate": 3.5066375283573473e-06, "loss": 0.0045, "step": 64210 }, { "epoch": 1.9109398479460817, "grad_norm": 0.2935655117034912, "learning_rate": 3.5049854639845528e-06, "loss": 0.0072, "step": 64220 }, { "epoch": 1.9112374094297233, "grad_norm": 0.21759407222270966, "learning_rate": 3.503333578831519e-06, "loss": 0.0052, "step": 64230 }, { "epoch": 1.9115349709133649, "grad_norm": 0.13646553456783295, "learning_rate": 3.5016818730962743e-06, "loss": 0.0021, "step": 64240 }, { "epoch": 1.9118325323970065, "grad_norm": 0.13629798591136932, "learning_rate": 3.5000303469768193e-06, "loss": 0.0034, "step": 64250 }, { "epoch": 1.912130093880648, "grad_norm": 0.07720404863357544, "learning_rate": 3.4983790006711384e-06, "loss": 0.0022, "step": 64260 }, { "epoch": 1.9124276553642896, "grad_norm": 0.2541934847831726, "learning_rate": 3.496727834377189e-06, "loss": 0.004, "step": 64270 }, { "epoch": 1.9127252168479312, "grad_norm": 0.1280573606491089, "learning_rate": 3.4950768482929142e-06, "loss": 0.0034, "step": 64280 }, { "epoch": 1.9130227783315727, "grad_norm": 0.2320873737335205, "learning_rate": 3.493426042616228e-06, "loss": 0.0033, "step": 64290 }, { "epoch": 1.9133203398152143, "grad_norm": 0.21581466495990753, "learning_rate": 3.4917754175450287e-06, "loss": 0.0032, "step": 64300 }, { "epoch": 1.913617901298856, "grad_norm": 0.09213075041770935, "learning_rate": 3.4901249732771874e-06, "loss": 0.0041, "step": 64310 }, { "epoch": 1.9139154627824975, "grad_norm": 0.335722953081131, "learning_rate": 3.4884747100105596e-06, "loss": 0.0093, "step": 64320 }, { "epoch": 1.914213024266139, "grad_norm": 0.2911849617958069, "learning_rate": 3.486824627942973e-06, "loss": 0.0038, "step": 64330 }, { "epoch": 1.9145105857497806, "grad_norm": 0.18944630026817322, "learning_rate": 3.48517472727224e-06, "loss": 0.0033, "step": 64340 }, { "epoch": 1.9148081472334222, "grad_norm": 0.13130581378936768, "learning_rate": 3.4835250081961447e-06, "loss": 0.0026, "step": 64350 }, { "epoch": 1.9151057087170638, "grad_norm": 0.2720853388309479, "learning_rate": 3.4818754709124546e-06, "loss": 0.0031, "step": 64360 }, { "epoch": 1.9154032702007053, "grad_norm": 0.07437887787818909, "learning_rate": 3.4802261156189114e-06, "loss": 0.0043, "step": 64370 }, { "epoch": 1.915700831684347, "grad_norm": 0.27257239818573, "learning_rate": 3.4785769425132397e-06, "loss": 0.0029, "step": 64380 }, { "epoch": 1.9159983931679885, "grad_norm": 0.02014477178454399, "learning_rate": 3.4769279517931366e-06, "loss": 0.0031, "step": 64390 }, { "epoch": 1.91629595465163, "grad_norm": 0.15118341147899628, "learning_rate": 3.475279143656282e-06, "loss": 0.0092, "step": 64400 }, { "epoch": 1.9165935161352714, "grad_norm": 0.1380588859319687, "learning_rate": 3.4736305183003325e-06, "loss": 0.0026, "step": 64410 }, { "epoch": 1.916891077618913, "grad_norm": 0.24044235050678253, "learning_rate": 3.471982075922922e-06, "loss": 0.0035, "step": 64420 }, { "epoch": 1.9171886391025545, "grad_norm": 0.08953666687011719, "learning_rate": 3.470333816721663e-06, "loss": 0.002, "step": 64430 }, { "epoch": 1.9174862005861961, "grad_norm": 0.21398170292377472, "learning_rate": 3.4686857408941456e-06, "loss": 0.0039, "step": 64440 }, { "epoch": 1.9177837620698377, "grad_norm": 0.4232318103313446, "learning_rate": 3.4670378486379387e-06, "loss": 0.0033, "step": 64450 }, { "epoch": 1.9180813235534793, "grad_norm": 0.25176671147346497, "learning_rate": 3.4653901401505873e-06, "loss": 0.004, "step": 64460 }, { "epoch": 1.9183788850371208, "grad_norm": 0.18471390008926392, "learning_rate": 3.4637426156296183e-06, "loss": 0.0055, "step": 64470 }, { "epoch": 1.9186764465207622, "grad_norm": 0.07082601636648178, "learning_rate": 3.462095275272532e-06, "loss": 0.0027, "step": 64480 }, { "epoch": 1.9189740080044038, "grad_norm": 0.16690057516098022, "learning_rate": 3.460448119276811e-06, "loss": 0.0035, "step": 64490 }, { "epoch": 1.9192715694880453, "grad_norm": 0.08546163886785507, "learning_rate": 3.458801147839911e-06, "loss": 0.0045, "step": 64500 }, { "epoch": 1.919569130971687, "grad_norm": 0.08435381948947906, "learning_rate": 3.4571543611592694e-06, "loss": 0.0031, "step": 64510 }, { "epoch": 1.9198666924553285, "grad_norm": 0.1732661873102188, "learning_rate": 3.4555077594322994e-06, "loss": 0.0041, "step": 64520 }, { "epoch": 1.92016425393897, "grad_norm": 0.12341666221618652, "learning_rate": 3.4538613428563946e-06, "loss": 0.0019, "step": 64530 }, { "epoch": 1.9204618154226116, "grad_norm": 0.1341898888349533, "learning_rate": 3.452215111628922e-06, "loss": 0.0027, "step": 64540 }, { "epoch": 1.9207593769062532, "grad_norm": 0.2022707611322403, "learning_rate": 3.4505690659472314e-06, "loss": 0.0028, "step": 64550 }, { "epoch": 1.9210569383898948, "grad_norm": 0.3165748417377472, "learning_rate": 3.4489232060086455e-06, "loss": 0.0041, "step": 64560 }, { "epoch": 1.9213544998735363, "grad_norm": 0.035789549350738525, "learning_rate": 3.44727753201047e-06, "loss": 0.0029, "step": 64570 }, { "epoch": 1.921652061357178, "grad_norm": 0.06921941041946411, "learning_rate": 3.4456320441499836e-06, "loss": 0.0039, "step": 64580 }, { "epoch": 1.9219496228408195, "grad_norm": 0.21278806030750275, "learning_rate": 3.4439867426244432e-06, "loss": 0.0062, "step": 64590 }, { "epoch": 1.922247184324461, "grad_norm": 0.1361958086490631, "learning_rate": 3.4423416276310882e-06, "loss": 0.0036, "step": 64600 }, { "epoch": 1.9225447458081026, "grad_norm": 0.08282997459173203, "learning_rate": 3.4406966993671286e-06, "loss": 0.0025, "step": 64610 }, { "epoch": 1.9228423072917442, "grad_norm": 0.1881653219461441, "learning_rate": 3.4390519580297586e-06, "loss": 0.0035, "step": 64620 }, { "epoch": 1.9231398687753858, "grad_norm": 0.16289351880550385, "learning_rate": 3.4374074038161444e-06, "loss": 0.0033, "step": 64630 }, { "epoch": 1.9234374302590274, "grad_norm": 0.10970364511013031, "learning_rate": 3.4357630369234353e-06, "loss": 0.0037, "step": 64640 }, { "epoch": 1.923734991742669, "grad_norm": 0.12400943040847778, "learning_rate": 3.434118857548751e-06, "loss": 0.0052, "step": 64650 }, { "epoch": 1.9240325532263105, "grad_norm": 0.1790844351053238, "learning_rate": 3.432474865889197e-06, "loss": 0.003, "step": 64660 }, { "epoch": 1.924330114709952, "grad_norm": 0.2500283718109131, "learning_rate": 3.4308310621418484e-06, "loss": 0.003, "step": 64670 }, { "epoch": 1.9246276761935937, "grad_norm": 0.11742576211690903, "learning_rate": 3.4291874465037645e-06, "loss": 0.0029, "step": 64680 }, { "epoch": 1.9249252376772352, "grad_norm": 0.1005745530128479, "learning_rate": 3.4275440191719794e-06, "loss": 0.0029, "step": 64690 }, { "epoch": 1.9252227991608766, "grad_norm": 0.15502473711967468, "learning_rate": 3.425900780343502e-06, "loss": 0.005, "step": 64700 }, { "epoch": 1.9255203606445181, "grad_norm": 0.08337725698947906, "learning_rate": 3.4242577302153236e-06, "loss": 0.0021, "step": 64710 }, { "epoch": 1.9258179221281597, "grad_norm": 0.2797592282295227, "learning_rate": 3.4226148689844062e-06, "loss": 0.0042, "step": 64720 }, { "epoch": 1.9261154836118013, "grad_norm": 0.12563453614711761, "learning_rate": 3.420972196847698e-06, "loss": 0.0038, "step": 64730 }, { "epoch": 1.9264130450954429, "grad_norm": 0.04388715326786041, "learning_rate": 3.4193297140021163e-06, "loss": 0.0022, "step": 64740 }, { "epoch": 1.9267106065790844, "grad_norm": 0.26410576701164246, "learning_rate": 3.4176874206445607e-06, "loss": 0.0048, "step": 64750 }, { "epoch": 1.927008168062726, "grad_norm": 0.1541188508272171, "learning_rate": 3.4160453169719047e-06, "loss": 0.0028, "step": 64760 }, { "epoch": 1.9273057295463674, "grad_norm": 0.1312715709209442, "learning_rate": 3.4144034031810038e-06, "loss": 0.0045, "step": 64770 }, { "epoch": 1.927603291030009, "grad_norm": 0.5570580363273621, "learning_rate": 3.4127616794686847e-06, "loss": 0.0021, "step": 64780 }, { "epoch": 1.9279008525136505, "grad_norm": 0.25418612360954285, "learning_rate": 3.411120146031757e-06, "loss": 0.0039, "step": 64790 }, { "epoch": 1.928198413997292, "grad_norm": 0.3612608313560486, "learning_rate": 3.4094788030670024e-06, "loss": 0.0059, "step": 64800 }, { "epoch": 1.9284959754809337, "grad_norm": 0.04051775112748146, "learning_rate": 3.407837650771185e-06, "loss": 0.0022, "step": 64810 }, { "epoch": 1.9287935369645752, "grad_norm": 0.1388051062822342, "learning_rate": 3.4061966893410403e-06, "loss": 0.0012, "step": 64820 }, { "epoch": 1.9290910984482168, "grad_norm": 0.07781250774860382, "learning_rate": 3.404555918973287e-06, "loss": 0.0021, "step": 64830 }, { "epoch": 1.9293886599318584, "grad_norm": 0.03550117835402489, "learning_rate": 3.402915339864615e-06, "loss": 0.0026, "step": 64840 }, { "epoch": 1.9296862214155, "grad_norm": 0.12632395327091217, "learning_rate": 3.401274952211696e-06, "loss": 0.004, "step": 64850 }, { "epoch": 1.9299837828991415, "grad_norm": 0.21243931353092194, "learning_rate": 3.3996347562111766e-06, "loss": 0.0032, "step": 64860 }, { "epoch": 1.930281344382783, "grad_norm": 0.16230139136314392, "learning_rate": 3.39799475205968e-06, "loss": 0.0031, "step": 64870 }, { "epoch": 1.9305789058664247, "grad_norm": 0.18035146594047546, "learning_rate": 3.3963549399538064e-06, "loss": 0.0025, "step": 64880 }, { "epoch": 1.9308764673500662, "grad_norm": 0.2104034572839737, "learning_rate": 3.394715320090136e-06, "loss": 0.0044, "step": 64890 }, { "epoch": 1.9311740288337078, "grad_norm": 0.5484296679496765, "learning_rate": 3.393075892665222e-06, "loss": 0.0042, "step": 64900 }, { "epoch": 1.9314715903173494, "grad_norm": 0.10364191979169846, "learning_rate": 3.3914366578755963e-06, "loss": 0.0025, "step": 64910 }, { "epoch": 1.931769151800991, "grad_norm": 0.13395044207572937, "learning_rate": 3.389797615917767e-06, "loss": 0.0027, "step": 64920 }, { "epoch": 1.9320667132846325, "grad_norm": 0.3581051230430603, "learning_rate": 3.388158766988221e-06, "loss": 0.0052, "step": 64930 }, { "epoch": 1.932364274768274, "grad_norm": 0.18469034135341644, "learning_rate": 3.38652011128342e-06, "loss": 0.0026, "step": 64940 }, { "epoch": 1.9326618362519157, "grad_norm": 0.14870627224445343, "learning_rate": 3.3848816489997995e-06, "loss": 0.0019, "step": 64950 }, { "epoch": 1.9329593977355572, "grad_norm": 0.1526825875043869, "learning_rate": 3.383243380333783e-06, "loss": 0.0023, "step": 64960 }, { "epoch": 1.9332569592191988, "grad_norm": 0.22532737255096436, "learning_rate": 3.3816053054817576e-06, "loss": 0.0066, "step": 64970 }, { "epoch": 1.9335545207028402, "grad_norm": 0.1409001499414444, "learning_rate": 3.3799674246400967e-06, "loss": 0.0027, "step": 64980 }, { "epoch": 1.9338520821864817, "grad_norm": 0.15589892864227295, "learning_rate": 3.378329738005144e-06, "loss": 0.002, "step": 64990 }, { "epoch": 1.9341496436701233, "grad_norm": 0.23426303267478943, "learning_rate": 3.3766922457732224e-06, "loss": 0.0042, "step": 65000 }, { "epoch": 1.934447205153765, "grad_norm": 0.16055844724178314, "learning_rate": 3.3750549481406334e-06, "loss": 0.002, "step": 65010 }, { "epoch": 1.9347447666374065, "grad_norm": 0.06282003223896027, "learning_rate": 3.3734178453036516e-06, "loss": 0.0026, "step": 65020 }, { "epoch": 1.935042328121048, "grad_norm": 0.17394550144672394, "learning_rate": 3.371780937458532e-06, "loss": 0.0049, "step": 65030 }, { "epoch": 1.9353398896046896, "grad_norm": 0.13490216434001923, "learning_rate": 3.3701442248015024e-06, "loss": 0.003, "step": 65040 }, { "epoch": 1.935637451088331, "grad_norm": 0.12178920209407806, "learning_rate": 3.36850770752877e-06, "loss": 0.003, "step": 65050 }, { "epoch": 1.9359350125719725, "grad_norm": 0.31934016942977905, "learning_rate": 3.3668713858365176e-06, "loss": 0.0043, "step": 65060 }, { "epoch": 1.936232574055614, "grad_norm": 0.18356233835220337, "learning_rate": 3.365235259920905e-06, "loss": 0.0026, "step": 65070 }, { "epoch": 1.9365301355392557, "grad_norm": 0.13813860714435577, "learning_rate": 3.3635993299780667e-06, "loss": 0.0045, "step": 65080 }, { "epoch": 1.9368276970228973, "grad_norm": 0.4731975495815277, "learning_rate": 3.3619635962041174e-06, "loss": 0.0032, "step": 65090 }, { "epoch": 1.9371252585065388, "grad_norm": 0.562843382358551, "learning_rate": 3.3603280587951443e-06, "loss": 0.0052, "step": 65100 }, { "epoch": 1.9374228199901804, "grad_norm": 0.21508777141571045, "learning_rate": 3.358692717947214e-06, "loss": 0.0046, "step": 65110 }, { "epoch": 1.937720381473822, "grad_norm": 0.23948898911476135, "learning_rate": 3.3570575738563666e-06, "loss": 0.0037, "step": 65120 }, { "epoch": 1.9380179429574635, "grad_norm": 0.14942912757396698, "learning_rate": 3.3554226267186225e-06, "loss": 0.0028, "step": 65130 }, { "epoch": 1.9383155044411051, "grad_norm": 0.5763621926307678, "learning_rate": 3.3537878767299735e-06, "loss": 0.0053, "step": 65140 }, { "epoch": 1.9386130659247467, "grad_norm": 0.1815742701292038, "learning_rate": 3.3521533240863948e-06, "loss": 0.0029, "step": 65150 }, { "epoch": 1.9389106274083883, "grad_norm": 0.2725496292114258, "learning_rate": 3.350518968983829e-06, "loss": 0.0033, "step": 65160 }, { "epoch": 1.9392081888920298, "grad_norm": 0.23019720613956451, "learning_rate": 3.348884811618204e-06, "loss": 0.0029, "step": 65170 }, { "epoch": 1.9395057503756714, "grad_norm": 0.16405841708183289, "learning_rate": 3.3472508521854165e-06, "loss": 0.0039, "step": 65180 }, { "epoch": 1.939803311859313, "grad_norm": 0.08226541429758072, "learning_rate": 3.3456170908813456e-06, "loss": 0.0028, "step": 65190 }, { "epoch": 1.9401008733429546, "grad_norm": 0.12772265076637268, "learning_rate": 3.343983527901841e-06, "loss": 0.0048, "step": 65200 }, { "epoch": 1.9403984348265961, "grad_norm": 0.13572289049625397, "learning_rate": 3.3423501634427337e-06, "loss": 0.0023, "step": 65210 }, { "epoch": 1.9406959963102377, "grad_norm": 0.2506246268749237, "learning_rate": 3.3407169976998256e-06, "loss": 0.003, "step": 65220 }, { "epoch": 1.9409935577938793, "grad_norm": 0.10632840543985367, "learning_rate": 3.3390840308689023e-06, "loss": 0.0041, "step": 65230 }, { "epoch": 1.9412911192775208, "grad_norm": 0.2443077266216278, "learning_rate": 3.3374512631457157e-06, "loss": 0.0034, "step": 65240 }, { "epoch": 1.9415886807611624, "grad_norm": 0.27174896001815796, "learning_rate": 3.335818694726003e-06, "loss": 0.0032, "step": 65250 }, { "epoch": 1.941886242244804, "grad_norm": 0.0008674417622387409, "learning_rate": 3.334186325805474e-06, "loss": 0.0025, "step": 65260 }, { "epoch": 1.9421838037284453, "grad_norm": 0.17870144546031952, "learning_rate": 3.332554156579812e-06, "loss": 0.0027, "step": 65270 }, { "epoch": 1.942481365212087, "grad_norm": 0.19078665971755981, "learning_rate": 3.3309221872446806e-06, "loss": 0.0019, "step": 65280 }, { "epoch": 1.9427789266957285, "grad_norm": 0.1516694873571396, "learning_rate": 3.329290417995715e-06, "loss": 0.0018, "step": 65290 }, { "epoch": 1.94307648817937, "grad_norm": 0.07981838285923004, "learning_rate": 3.3276588490285323e-06, "loss": 0.0026, "step": 65300 }, { "epoch": 1.9433740496630116, "grad_norm": 0.15301594138145447, "learning_rate": 3.326027480538718e-06, "loss": 0.0027, "step": 65310 }, { "epoch": 1.9436716111466532, "grad_norm": 0.3131013810634613, "learning_rate": 3.3243963127218415e-06, "loss": 0.0032, "step": 65320 }, { "epoch": 1.9439691726302948, "grad_norm": 0.1437738835811615, "learning_rate": 3.322765345773442e-06, "loss": 0.0043, "step": 65330 }, { "epoch": 1.9442667341139361, "grad_norm": 0.3307953476905823, "learning_rate": 3.3211345798890395e-06, "loss": 0.003, "step": 65340 }, { "epoch": 1.9445642955975777, "grad_norm": 0.12960462272167206, "learning_rate": 3.3195040152641235e-06, "loss": 0.0032, "step": 65350 }, { "epoch": 1.9448618570812193, "grad_norm": 0.11389567703008652, "learning_rate": 3.317873652094167e-06, "loss": 0.0027, "step": 65360 }, { "epoch": 1.9451594185648609, "grad_norm": 0.08971346914768219, "learning_rate": 3.3162434905746137e-06, "loss": 0.0039, "step": 65370 }, { "epoch": 1.9454569800485024, "grad_norm": 0.21354441344738007, "learning_rate": 3.3146135309008836e-06, "loss": 0.0033, "step": 65380 }, { "epoch": 1.945754541532144, "grad_norm": 0.3041872978210449, "learning_rate": 3.312983773268376e-06, "loss": 0.0025, "step": 65390 }, { "epoch": 1.9460521030157856, "grad_norm": 0.04059400409460068, "learning_rate": 3.3113542178724588e-06, "loss": 0.0024, "step": 65400 }, { "epoch": 1.9463496644994271, "grad_norm": 0.40319401025772095, "learning_rate": 3.309724864908486e-06, "loss": 0.0047, "step": 65410 }, { "epoch": 1.9466472259830687, "grad_norm": 0.1217033788561821, "learning_rate": 3.3080957145717774e-06, "loss": 0.0027, "step": 65420 }, { "epoch": 1.9469447874667103, "grad_norm": 0.07322411239147186, "learning_rate": 3.306466767057635e-06, "loss": 0.002, "step": 65430 }, { "epoch": 1.9472423489503519, "grad_norm": 0.11474622040987015, "learning_rate": 3.3048380225613323e-06, "loss": 0.0053, "step": 65440 }, { "epoch": 1.9475399104339934, "grad_norm": 0.15891017019748688, "learning_rate": 3.3032094812781237e-06, "loss": 0.0045, "step": 65450 }, { "epoch": 1.947837471917635, "grad_norm": 0.25340813398361206, "learning_rate": 3.3015811434032306e-06, "loss": 0.0035, "step": 65460 }, { "epoch": 1.9481350334012766, "grad_norm": 0.1517062485218048, "learning_rate": 3.299953009131861e-06, "loss": 0.0027, "step": 65470 }, { "epoch": 1.9484325948849182, "grad_norm": 0.10363306105136871, "learning_rate": 3.2983250786591894e-06, "loss": 0.0074, "step": 65480 }, { "epoch": 1.9487301563685597, "grad_norm": 0.03471417352557182, "learning_rate": 3.2966973521803704e-06, "loss": 0.002, "step": 65490 }, { "epoch": 1.9490277178522013, "grad_norm": 0.059747204184532166, "learning_rate": 3.2950698298905325e-06, "loss": 0.0039, "step": 65500 }, { "epoch": 1.9493252793358429, "grad_norm": 0.1080157607793808, "learning_rate": 3.293442511984781e-06, "loss": 0.0029, "step": 65510 }, { "epoch": 1.9496228408194844, "grad_norm": 0.19809462130069733, "learning_rate": 3.2918153986581934e-06, "loss": 0.0042, "step": 65520 }, { "epoch": 1.949920402303126, "grad_norm": 0.1681515872478485, "learning_rate": 3.290188490105829e-06, "loss": 0.0021, "step": 65530 }, { "epoch": 1.9502179637867676, "grad_norm": 0.2432352602481842, "learning_rate": 3.2885617865227182e-06, "loss": 0.0045, "step": 65540 }, { "epoch": 1.950515525270409, "grad_norm": 0.2500898540019989, "learning_rate": 3.2869352881038652e-06, "loss": 0.005, "step": 65550 }, { "epoch": 1.9508130867540505, "grad_norm": 0.11660853773355484, "learning_rate": 3.285308995044254e-06, "loss": 0.0038, "step": 65560 }, { "epoch": 1.951110648237692, "grad_norm": 0.15537862479686737, "learning_rate": 3.28368290753884e-06, "loss": 0.0024, "step": 65570 }, { "epoch": 1.9514082097213337, "grad_norm": 0.1579972207546234, "learning_rate": 3.282057025782557e-06, "loss": 0.003, "step": 65580 }, { "epoch": 1.9517057712049752, "grad_norm": 0.06156368553638458, "learning_rate": 3.2804313499703123e-06, "loss": 0.0029, "step": 65590 }, { "epoch": 1.9520033326886168, "grad_norm": 0.18672433495521545, "learning_rate": 3.2788058802969892e-06, "loss": 0.0031, "step": 65600 }, { "epoch": 1.9523008941722584, "grad_norm": 0.38948720693588257, "learning_rate": 3.277180616957446e-06, "loss": 0.0039, "step": 65610 }, { "epoch": 1.9525984556558997, "grad_norm": 0.019234655424952507, "learning_rate": 3.2755555601465182e-06, "loss": 0.0021, "step": 65620 }, { "epoch": 1.9528960171395413, "grad_norm": 0.049171701073646545, "learning_rate": 3.273930710059011e-06, "loss": 0.0036, "step": 65630 }, { "epoch": 1.9531935786231829, "grad_norm": 0.1508958786725998, "learning_rate": 3.2723060668897134e-06, "loss": 0.0034, "step": 65640 }, { "epoch": 1.9534911401068245, "grad_norm": 0.11717914044857025, "learning_rate": 3.2706816308333813e-06, "loss": 0.0026, "step": 65650 }, { "epoch": 1.953788701590466, "grad_norm": 0.2519245743751526, "learning_rate": 3.2690574020847503e-06, "loss": 0.0046, "step": 65660 }, { "epoch": 1.9540862630741076, "grad_norm": 0.18776540458202362, "learning_rate": 3.26743338083853e-06, "loss": 0.0037, "step": 65670 }, { "epoch": 1.9543838245577492, "grad_norm": 0.2566046118736267, "learning_rate": 3.2658095672894064e-06, "loss": 0.0038, "step": 65680 }, { "epoch": 1.9546813860413907, "grad_norm": 0.11634059250354767, "learning_rate": 3.2641859616320376e-06, "loss": 0.0025, "step": 65690 }, { "epoch": 1.9549789475250323, "grad_norm": 0.03503111004829407, "learning_rate": 3.26256256406106e-06, "loss": 0.0022, "step": 65700 }, { "epoch": 1.955276509008674, "grad_norm": 0.26449307799339294, "learning_rate": 3.260939374771083e-06, "loss": 0.0034, "step": 65710 }, { "epoch": 1.9555740704923155, "grad_norm": 0.1423102170228958, "learning_rate": 3.259316393956693e-06, "loss": 0.0047, "step": 65720 }, { "epoch": 1.955871631975957, "grad_norm": 0.11854678392410278, "learning_rate": 3.257693621812449e-06, "loss": 0.0053, "step": 65730 }, { "epoch": 1.9561691934595986, "grad_norm": 0.11817432940006256, "learning_rate": 3.2560710585328857e-06, "loss": 0.0046, "step": 65740 }, { "epoch": 1.9564667549432402, "grad_norm": 0.08668260276317596, "learning_rate": 3.254448704312514e-06, "loss": 0.0032, "step": 65750 }, { "epoch": 1.9567643164268818, "grad_norm": 0.2127193659543991, "learning_rate": 3.2528265593458185e-06, "loss": 0.0032, "step": 65760 }, { "epoch": 1.9570618779105233, "grad_norm": 0.19194482266902924, "learning_rate": 3.25120462382726e-06, "loss": 0.0022, "step": 65770 }, { "epoch": 1.957359439394165, "grad_norm": 0.1878659874200821, "learning_rate": 3.2495828979512712e-06, "loss": 0.0021, "step": 65780 }, { "epoch": 1.9576570008778065, "grad_norm": 0.3670613467693329, "learning_rate": 3.2479613819122647e-06, "loss": 0.0026, "step": 65790 }, { "epoch": 1.957954562361448, "grad_norm": 0.11232104897499084, "learning_rate": 3.2463400759046214e-06, "loss": 0.0022, "step": 65800 }, { "epoch": 1.9582521238450896, "grad_norm": 0.1155325248837471, "learning_rate": 3.244718980122704e-06, "loss": 0.0029, "step": 65810 }, { "epoch": 1.9585496853287312, "grad_norm": 0.22787640988826752, "learning_rate": 3.243098094760847e-06, "loss": 0.0047, "step": 65820 }, { "epoch": 1.9588472468123728, "grad_norm": 0.17156322300434113, "learning_rate": 3.241477420013357e-06, "loss": 0.0045, "step": 65830 }, { "epoch": 1.9591448082960141, "grad_norm": 0.22683216631412506, "learning_rate": 3.2398569560745184e-06, "loss": 0.0028, "step": 65840 }, { "epoch": 1.9594423697796557, "grad_norm": 0.15904837846755981, "learning_rate": 3.23823670313859e-06, "loss": 0.0038, "step": 65850 }, { "epoch": 1.9597399312632973, "grad_norm": 0.16501949727535248, "learning_rate": 3.2366166613998053e-06, "loss": 0.0034, "step": 65860 }, { "epoch": 1.9600374927469388, "grad_norm": 0.10780677199363708, "learning_rate": 3.2349968310523703e-06, "loss": 0.0033, "step": 65870 }, { "epoch": 1.9603350542305804, "grad_norm": 0.2576780915260315, "learning_rate": 3.23337721229047e-06, "loss": 0.0054, "step": 65880 }, { "epoch": 1.960632615714222, "grad_norm": 0.32644757628440857, "learning_rate": 3.231757805308259e-06, "loss": 0.0039, "step": 65890 }, { "epoch": 1.9609301771978636, "grad_norm": 0.08902768790721893, "learning_rate": 3.230138610299871e-06, "loss": 0.0031, "step": 65900 }, { "epoch": 1.961227738681505, "grad_norm": 0.158184215426445, "learning_rate": 3.228519627459411e-06, "loss": 0.0041, "step": 65910 }, { "epoch": 1.9615253001651465, "grad_norm": 0.053056132048368454, "learning_rate": 3.2269008569809612e-06, "loss": 0.0013, "step": 65920 }, { "epoch": 1.961822861648788, "grad_norm": 0.16192135214805603, "learning_rate": 3.2252822990585754e-06, "loss": 0.0038, "step": 65930 }, { "epoch": 1.9621204231324296, "grad_norm": 0.07480678707361221, "learning_rate": 3.2236639538862846e-06, "loss": 0.0022, "step": 65940 }, { "epoch": 1.9624179846160712, "grad_norm": 0.4047397971153259, "learning_rate": 3.222045821658093e-06, "loss": 0.0049, "step": 65950 }, { "epoch": 1.9627155460997128, "grad_norm": 0.20656752586364746, "learning_rate": 3.2204279025679795e-06, "loss": 0.0049, "step": 65960 }, { "epoch": 1.9630131075833543, "grad_norm": 0.18926803767681122, "learning_rate": 3.2188101968098975e-06, "loss": 0.0022, "step": 65970 }, { "epoch": 1.963310669066996, "grad_norm": 0.14812755584716797, "learning_rate": 3.2171927045777744e-06, "loss": 0.0034, "step": 65980 }, { "epoch": 1.9636082305506375, "grad_norm": 0.15972991287708282, "learning_rate": 3.2155754260655127e-06, "loss": 0.0032, "step": 65990 }, { "epoch": 1.963905792034279, "grad_norm": 0.08816230297088623, "learning_rate": 3.213958361466989e-06, "loss": 0.0018, "step": 66000 }, { "epoch": 1.9642033535179206, "grad_norm": 0.23123803734779358, "learning_rate": 3.2123415109760536e-06, "loss": 0.0027, "step": 66010 }, { "epoch": 1.9645009150015622, "grad_norm": 0.13610009849071503, "learning_rate": 3.2107248747865326e-06, "loss": 0.0029, "step": 66020 }, { "epoch": 1.9647984764852038, "grad_norm": 0.10276427119970322, "learning_rate": 3.2091084530922258e-06, "loss": 0.0028, "step": 66030 }, { "epoch": 1.9650960379688454, "grad_norm": 0.04479750990867615, "learning_rate": 3.2074922460869062e-06, "loss": 0.0048, "step": 66040 }, { "epoch": 1.965393599452487, "grad_norm": 0.09748155623674393, "learning_rate": 3.2058762539643214e-06, "loss": 0.0024, "step": 66050 }, { "epoch": 1.9656911609361285, "grad_norm": 0.28130418062210083, "learning_rate": 3.204260476918195e-06, "loss": 0.0029, "step": 66060 }, { "epoch": 1.96598872241977, "grad_norm": 0.09222200512886047, "learning_rate": 3.2026449151422232e-06, "loss": 0.0024, "step": 66070 }, { "epoch": 1.9662862839034116, "grad_norm": 0.13839732110500336, "learning_rate": 3.201029568830077e-06, "loss": 0.0034, "step": 66080 }, { "epoch": 1.9665838453870532, "grad_norm": 0.05269123613834381, "learning_rate": 3.199414438175399e-06, "loss": 0.0024, "step": 66090 }, { "epoch": 1.9668814068706948, "grad_norm": 0.31799396872520447, "learning_rate": 3.1977995233718112e-06, "loss": 0.003, "step": 66100 }, { "epoch": 1.9671789683543364, "grad_norm": 0.13388767838478088, "learning_rate": 3.1961848246129067e-06, "loss": 0.0042, "step": 66110 }, { "epoch": 1.9674765298379777, "grad_norm": 0.08376501500606537, "learning_rate": 3.1945703420922513e-06, "loss": 0.0046, "step": 66120 }, { "epoch": 1.9677740913216193, "grad_norm": 0.13072825968265533, "learning_rate": 3.192956076003388e-06, "loss": 0.0036, "step": 66130 }, { "epoch": 1.9680716528052609, "grad_norm": 0.016579193994402885, "learning_rate": 3.191342026539831e-06, "loss": 0.0021, "step": 66140 }, { "epoch": 1.9683692142889024, "grad_norm": 0.21657013893127441, "learning_rate": 3.1897281938950693e-06, "loss": 0.0057, "step": 66150 }, { "epoch": 1.968666775772544, "grad_norm": 0.1436736285686493, "learning_rate": 3.1881145782625677e-06, "loss": 0.0029, "step": 66160 }, { "epoch": 1.9689643372561856, "grad_norm": 0.00664038909599185, "learning_rate": 3.1865011798357624e-06, "loss": 0.0022, "step": 66170 }, { "epoch": 1.9692618987398272, "grad_norm": 0.13113702833652496, "learning_rate": 3.184887998808066e-06, "loss": 0.0028, "step": 66180 }, { "epoch": 1.9695594602234687, "grad_norm": 0.11612388491630554, "learning_rate": 3.1832750353728625e-06, "loss": 0.003, "step": 66190 }, { "epoch": 1.96985702170711, "grad_norm": 0.2667911648750305, "learning_rate": 3.1816622897235134e-06, "loss": 0.0017, "step": 66200 }, { "epoch": 1.9701545831907517, "grad_norm": 0.04594181105494499, "learning_rate": 3.1800497620533486e-06, "loss": 0.0016, "step": 66210 }, { "epoch": 1.9704521446743932, "grad_norm": 0.0718151330947876, "learning_rate": 3.1784374525556793e-06, "loss": 0.0023, "step": 66220 }, { "epoch": 1.9707497061580348, "grad_norm": 0.2517518997192383, "learning_rate": 3.1768253614237822e-06, "loss": 0.0036, "step": 66230 }, { "epoch": 1.9710472676416764, "grad_norm": 0.35035744309425354, "learning_rate": 3.1752134888509144e-06, "loss": 0.0031, "step": 66240 }, { "epoch": 1.971344829125318, "grad_norm": 0.05689878761768341, "learning_rate": 3.173601835030304e-06, "loss": 0.0059, "step": 66250 }, { "epoch": 1.9716423906089595, "grad_norm": 0.1328294277191162, "learning_rate": 3.1719904001551538e-06, "loss": 0.0024, "step": 66260 }, { "epoch": 1.971939952092601, "grad_norm": 0.15214411914348602, "learning_rate": 3.1703791844186376e-06, "loss": 0.0027, "step": 66270 }, { "epoch": 1.9722375135762427, "grad_norm": 0.0544043630361557, "learning_rate": 3.168768188013908e-06, "loss": 0.0021, "step": 66280 }, { "epoch": 1.9725350750598842, "grad_norm": 0.06346217542886734, "learning_rate": 3.1671574111340863e-06, "loss": 0.003, "step": 66290 }, { "epoch": 1.9728326365435258, "grad_norm": 0.35645154118537903, "learning_rate": 3.1655468539722723e-06, "loss": 0.004, "step": 66300 }, { "epoch": 1.9731301980271674, "grad_norm": 0.310089111328125, "learning_rate": 3.163936516721533e-06, "loss": 0.0029, "step": 66310 }, { "epoch": 1.973427759510809, "grad_norm": 0.11750493943691254, "learning_rate": 3.1623263995749163e-06, "loss": 0.0029, "step": 66320 }, { "epoch": 1.9737253209944505, "grad_norm": 0.042982302606105804, "learning_rate": 3.1607165027254376e-06, "loss": 0.0033, "step": 66330 }, { "epoch": 1.974022882478092, "grad_norm": 0.20374362170696259, "learning_rate": 3.159106826366092e-06, "loss": 0.0043, "step": 66340 }, { "epoch": 1.9743204439617337, "grad_norm": 0.16063956916332245, "learning_rate": 3.1574973706898404e-06, "loss": 0.0017, "step": 66350 }, { "epoch": 1.9746180054453752, "grad_norm": 0.08938051015138626, "learning_rate": 3.1558881358896243e-06, "loss": 0.0037, "step": 66360 }, { "epoch": 1.9749155669290168, "grad_norm": 0.0859452337026596, "learning_rate": 3.154279122158355e-06, "loss": 0.0033, "step": 66370 }, { "epoch": 1.9752131284126584, "grad_norm": 0.18723942339420319, "learning_rate": 3.1526703296889183e-06, "loss": 0.0017, "step": 66380 }, { "epoch": 1.9755106898963, "grad_norm": 0.08442175388336182, "learning_rate": 3.1510617586741753e-06, "loss": 0.0035, "step": 66390 }, { "epoch": 1.9758082513799415, "grad_norm": 0.14901961386203766, "learning_rate": 3.1494534093069566e-06, "loss": 0.0023, "step": 66400 }, { "epoch": 1.976105812863583, "grad_norm": 0.19557073712348938, "learning_rate": 3.14784528178007e-06, "loss": 0.0032, "step": 66410 }, { "epoch": 1.9764033743472245, "grad_norm": 0.10378628224134445, "learning_rate": 3.146237376286293e-06, "loss": 0.002, "step": 66420 }, { "epoch": 1.976700935830866, "grad_norm": 0.15619076788425446, "learning_rate": 3.1446296930183806e-06, "loss": 0.003, "step": 66430 }, { "epoch": 1.9769984973145076, "grad_norm": 0.23933160305023193, "learning_rate": 3.143022232169057e-06, "loss": 0.0048, "step": 66440 }, { "epoch": 1.9772960587981492, "grad_norm": 0.15096783638000488, "learning_rate": 3.1414149939310245e-06, "loss": 0.0015, "step": 66450 }, { "epoch": 1.9775936202817908, "grad_norm": 0.31683552265167236, "learning_rate": 3.1398079784969525e-06, "loss": 0.0031, "step": 66460 }, { "epoch": 1.9778911817654323, "grad_norm": 0.6239792108535767, "learning_rate": 3.1382011860594915e-06, "loss": 0.0073, "step": 66470 }, { "epoch": 1.9781887432490737, "grad_norm": 0.17706507444381714, "learning_rate": 3.136594616811257e-06, "loss": 0.0035, "step": 66480 }, { "epoch": 1.9784863047327153, "grad_norm": 0.1956394910812378, "learning_rate": 3.134988270944844e-06, "loss": 0.0026, "step": 66490 }, { "epoch": 1.9787838662163568, "grad_norm": 0.121432363986969, "learning_rate": 3.1333821486528193e-06, "loss": 0.003, "step": 66500 }, { "epoch": 1.9790814276999984, "grad_norm": 0.02633187361061573, "learning_rate": 3.1317762501277184e-06, "loss": 0.0035, "step": 66510 }, { "epoch": 1.97937898918364, "grad_norm": 0.0837102085351944, "learning_rate": 3.130170575562058e-06, "loss": 0.0028, "step": 66520 }, { "epoch": 1.9796765506672815, "grad_norm": 0.0376165471971035, "learning_rate": 3.1285651251483197e-06, "loss": 0.0026, "step": 66530 }, { "epoch": 1.9799741121509231, "grad_norm": 0.5007882118225098, "learning_rate": 3.1269598990789653e-06, "loss": 0.0076, "step": 66540 }, { "epoch": 1.9802716736345647, "grad_norm": 0.2027585804462433, "learning_rate": 3.1253548975464243e-06, "loss": 0.0033, "step": 66550 }, { "epoch": 1.9805692351182063, "grad_norm": 0.03867080435156822, "learning_rate": 3.1237501207431037e-06, "loss": 0.0048, "step": 66560 }, { "epoch": 1.9808667966018478, "grad_norm": 0.11433116346597672, "learning_rate": 3.122145568861379e-06, "loss": 0.0051, "step": 66570 }, { "epoch": 1.9811643580854894, "grad_norm": 0.10495565086603165, "learning_rate": 3.1205412420936034e-06, "loss": 0.0024, "step": 66580 }, { "epoch": 1.981461919569131, "grad_norm": 0.09819754958152771, "learning_rate": 3.118937140632098e-06, "loss": 0.0023, "step": 66590 }, { "epoch": 1.9817594810527726, "grad_norm": 0.19902904331684113, "learning_rate": 3.117333264669162e-06, "loss": 0.0021, "step": 66600 }, { "epoch": 1.9820570425364141, "grad_norm": 0.11200964450836182, "learning_rate": 3.115729614397065e-06, "loss": 0.003, "step": 66610 }, { "epoch": 1.9823546040200557, "grad_norm": 0.1430576741695404, "learning_rate": 3.11412619000805e-06, "loss": 0.0031, "step": 66620 }, { "epoch": 1.9826521655036973, "grad_norm": 0.3336178958415985, "learning_rate": 3.112522991694331e-06, "loss": 0.0035, "step": 66630 }, { "epoch": 1.9829497269873388, "grad_norm": 0.17579065263271332, "learning_rate": 3.110920019648099e-06, "loss": 0.0051, "step": 66640 }, { "epoch": 1.9832472884709804, "grad_norm": 0.17227254807949066, "learning_rate": 3.109317274061513e-06, "loss": 0.0038, "step": 66650 }, { "epoch": 1.983544849954622, "grad_norm": 0.09645722061395645, "learning_rate": 3.1077147551267083e-06, "loss": 0.0017, "step": 66660 }, { "epoch": 1.9838424114382636, "grad_norm": 0.07719019800424576, "learning_rate": 3.106112463035794e-06, "loss": 0.004, "step": 66670 }, { "epoch": 1.9841399729219051, "grad_norm": 0.3346986174583435, "learning_rate": 3.1045103979808476e-06, "loss": 0.0056, "step": 66680 }, { "epoch": 1.9844375344055467, "grad_norm": 0.28334295749664307, "learning_rate": 3.102908560153924e-06, "loss": 0.0036, "step": 66690 }, { "epoch": 1.984735095889188, "grad_norm": 0.1434866338968277, "learning_rate": 3.101306949747048e-06, "loss": 0.002, "step": 66700 }, { "epoch": 1.9850326573728296, "grad_norm": 0.029239898547530174, "learning_rate": 3.0997055669522173e-06, "loss": 0.0028, "step": 66710 }, { "epoch": 1.9853302188564712, "grad_norm": 0.0980883538722992, "learning_rate": 3.0981044119614023e-06, "loss": 0.0032, "step": 66720 }, { "epoch": 1.9856277803401128, "grad_norm": 0.17508795857429504, "learning_rate": 3.096503484966549e-06, "loss": 0.0031, "step": 66730 }, { "epoch": 1.9859253418237544, "grad_norm": 0.17243118584156036, "learning_rate": 3.0949027861595716e-06, "loss": 0.0042, "step": 66740 }, { "epoch": 1.986222903307396, "grad_norm": 0.2807927131652832, "learning_rate": 3.0933023157323605e-06, "loss": 0.0043, "step": 66750 }, { "epoch": 1.9865204647910375, "grad_norm": 0.16233739256858826, "learning_rate": 3.0917020738767754e-06, "loss": 0.004, "step": 66760 }, { "epoch": 1.9868180262746789, "grad_norm": 0.1130705326795578, "learning_rate": 3.0901020607846538e-06, "loss": 0.0033, "step": 66770 }, { "epoch": 1.9871155877583204, "grad_norm": 0.09603173285722733, "learning_rate": 3.088502276647799e-06, "loss": 0.0018, "step": 66780 }, { "epoch": 1.987413149241962, "grad_norm": 0.2461952418088913, "learning_rate": 3.0869027216579926e-06, "loss": 0.004, "step": 66790 }, { "epoch": 1.9877107107256036, "grad_norm": 0.20211763679981232, "learning_rate": 3.0853033960069854e-06, "loss": 0.0023, "step": 66800 }, { "epoch": 1.9880082722092451, "grad_norm": 0.07317893952131271, "learning_rate": 3.083704299886503e-06, "loss": 0.0022, "step": 66810 }, { "epoch": 1.9883058336928867, "grad_norm": 0.13899248838424683, "learning_rate": 3.0821054334882406e-06, "loss": 0.0022, "step": 66820 }, { "epoch": 1.9886033951765283, "grad_norm": 0.09865190088748932, "learning_rate": 3.0805067970038692e-06, "loss": 0.0018, "step": 66830 }, { "epoch": 1.9889009566601699, "grad_norm": 0.11219321936368942, "learning_rate": 3.078908390625028e-06, "loss": 0.0021, "step": 66840 }, { "epoch": 1.9891985181438114, "grad_norm": 0.20658057928085327, "learning_rate": 3.0773102145433354e-06, "loss": 0.003, "step": 66850 }, { "epoch": 1.989496079627453, "grad_norm": 0.18705439567565918, "learning_rate": 3.075712268950375e-06, "loss": 0.0028, "step": 66860 }, { "epoch": 1.9897936411110946, "grad_norm": 0.0872679129242897, "learning_rate": 3.074114554037706e-06, "loss": 0.0029, "step": 66870 }, { "epoch": 1.9900912025947362, "grad_norm": 0.9231155514717102, "learning_rate": 3.0725170699968606e-06, "loss": 0.0067, "step": 66880 }, { "epoch": 1.9903887640783777, "grad_norm": 0.1268206536769867, "learning_rate": 3.0709198170193406e-06, "loss": 0.0031, "step": 66890 }, { "epoch": 1.9906863255620193, "grad_norm": 0.2885369062423706, "learning_rate": 3.0693227952966244e-06, "loss": 0.0031, "step": 66900 }, { "epoch": 1.9909838870456609, "grad_norm": 0.17801694571971893, "learning_rate": 3.0677260050201586e-06, "loss": 0.003, "step": 66910 }, { "epoch": 1.9912814485293024, "grad_norm": 0.06319552659988403, "learning_rate": 3.066129446381364e-06, "loss": 0.0023, "step": 66920 }, { "epoch": 1.991579010012944, "grad_norm": 0.1104382649064064, "learning_rate": 3.0645331195716333e-06, "loss": 0.0035, "step": 66930 }, { "epoch": 1.9918765714965856, "grad_norm": 0.12031802535057068, "learning_rate": 3.0629370247823298e-06, "loss": 0.0024, "step": 66940 }, { "epoch": 1.9921741329802272, "grad_norm": 0.20627255737781525, "learning_rate": 3.061341162204795e-06, "loss": 0.0051, "step": 66950 }, { "epoch": 1.9924716944638687, "grad_norm": 0.15871840715408325, "learning_rate": 3.0597455320303345e-06, "loss": 0.005, "step": 66960 }, { "epoch": 1.9927692559475103, "grad_norm": 0.3613821864128113, "learning_rate": 3.0581501344502306e-06, "loss": 0.0088, "step": 66970 }, { "epoch": 1.9930668174311517, "grad_norm": 0.04128049314022064, "learning_rate": 3.0565549696557374e-06, "loss": 0.0021, "step": 66980 }, { "epoch": 1.9933643789147932, "grad_norm": 0.04586728289723396, "learning_rate": 3.0549600378380796e-06, "loss": 0.0033, "step": 66990 }, { "epoch": 1.9936619403984348, "grad_norm": 0.23763184249401093, "learning_rate": 3.0533653391884543e-06, "loss": 0.0048, "step": 67000 }, { "epoch": 1.9939595018820764, "grad_norm": 0.10935325175523758, "learning_rate": 3.051770873898034e-06, "loss": 0.0026, "step": 67010 }, { "epoch": 1.994257063365718, "grad_norm": 0.13957367837429047, "learning_rate": 3.050176642157957e-06, "loss": 0.0024, "step": 67020 }, { "epoch": 1.9945546248493595, "grad_norm": 0.26212385296821594, "learning_rate": 3.0485826441593397e-06, "loss": 0.0035, "step": 67030 }, { "epoch": 1.994852186333001, "grad_norm": 0.25032293796539307, "learning_rate": 3.0469888800932656e-06, "loss": 0.0039, "step": 67040 }, { "epoch": 1.9951497478166424, "grad_norm": 0.03904839605093002, "learning_rate": 3.0453953501507947e-06, "loss": 0.0024, "step": 67050 }, { "epoch": 1.995447309300284, "grad_norm": 0.17948663234710693, "learning_rate": 3.0438020545229548e-06, "loss": 0.0028, "step": 67060 }, { "epoch": 1.9957448707839256, "grad_norm": 0.07662297785282135, "learning_rate": 3.042208993400749e-06, "loss": 0.0027, "step": 67070 }, { "epoch": 1.9960424322675672, "grad_norm": 0.09851087629795074, "learning_rate": 3.040616166975148e-06, "loss": 0.0024, "step": 67080 }, { "epoch": 1.9963399937512087, "grad_norm": 0.2252672165632248, "learning_rate": 3.0390235754371013e-06, "loss": 0.0021, "step": 67090 }, { "epoch": 1.9966375552348503, "grad_norm": 0.13810084760189056, "learning_rate": 3.037431218977521e-06, "loss": 0.0038, "step": 67100 }, { "epoch": 1.9969351167184919, "grad_norm": 0.2616886794567108, "learning_rate": 3.035839097787301e-06, "loss": 0.0056, "step": 67110 }, { "epoch": 1.9972326782021335, "grad_norm": 0.17775900661945343, "learning_rate": 3.034247212057298e-06, "loss": 0.0034, "step": 67120 }, { "epoch": 1.997530239685775, "grad_norm": 0.1711944192647934, "learning_rate": 3.032655561978347e-06, "loss": 0.0043, "step": 67130 }, { "epoch": 1.9978278011694166, "grad_norm": 0.2862749695777893, "learning_rate": 3.031064147741251e-06, "loss": 0.0055, "step": 67140 }, { "epoch": 1.9981253626530582, "grad_norm": 0.4043147563934326, "learning_rate": 3.0294729695367873e-06, "loss": 0.0061, "step": 67150 }, { "epoch": 1.9984229241366998, "grad_norm": 0.07647206634283066, "learning_rate": 3.027882027555702e-06, "loss": 0.0032, "step": 67160 }, { "epoch": 1.9987204856203413, "grad_norm": 0.19599425792694092, "learning_rate": 3.0262913219887165e-06, "loss": 0.004, "step": 67170 }, { "epoch": 1.999018047103983, "grad_norm": 0.0974399521946907, "learning_rate": 3.0247008530265185e-06, "loss": 0.004, "step": 67180 }, { "epoch": 1.9993156085876245, "grad_norm": 0.09396938979625702, "learning_rate": 3.0231106208597748e-06, "loss": 0.0019, "step": 67190 }, { "epoch": 1.999613170071266, "grad_norm": 0.07127540558576584, "learning_rate": 3.0215206256791162e-06, "loss": 0.004, "step": 67200 }, { "epoch": 1.9999107315549076, "grad_norm": 0.38330817222595215, "learning_rate": 3.0199308676751517e-06, "loss": 0.0023, "step": 67210 }, { "epoch": 2.000208293038549, "grad_norm": 0.24097955226898193, "learning_rate": 3.0183413470384537e-06, "loss": 0.0024, "step": 67220 }, { "epoch": 2.0005058545221908, "grad_norm": 0.13911205530166626, "learning_rate": 3.0167520639595764e-06, "loss": 0.0019, "step": 67230 }, { "epoch": 2.0008034160058323, "grad_norm": 0.1509261131286621, "learning_rate": 3.015163018629039e-06, "loss": 0.0029, "step": 67240 }, { "epoch": 2.001100977489474, "grad_norm": 0.11098672449588776, "learning_rate": 3.0135742112373327e-06, "loss": 0.002, "step": 67250 }, { "epoch": 2.0013985389731155, "grad_norm": 0.1207282766699791, "learning_rate": 3.0119856419749222e-06, "loss": 0.0019, "step": 67260 }, { "epoch": 2.001696100456757, "grad_norm": 0.10733959078788757, "learning_rate": 3.0103973110322416e-06, "loss": 0.0026, "step": 67270 }, { "epoch": 2.0019936619403986, "grad_norm": 0.11381950974464417, "learning_rate": 3.008809218599695e-06, "loss": 0.0023, "step": 67280 }, { "epoch": 2.00229122342404, "grad_norm": 0.3363015949726105, "learning_rate": 3.0072213648676647e-06, "loss": 0.0022, "step": 67290 }, { "epoch": 2.0025887849076813, "grad_norm": 0.11746982485055923, "learning_rate": 3.005633750026496e-06, "loss": 0.0026, "step": 67300 }, { "epoch": 2.002886346391323, "grad_norm": 0.23526230454444885, "learning_rate": 3.0040463742665115e-06, "loss": 0.0028, "step": 67310 }, { "epoch": 2.0031839078749645, "grad_norm": 0.2324444055557251, "learning_rate": 3.0024592377780026e-06, "loss": 0.003, "step": 67320 }, { "epoch": 2.003481469358606, "grad_norm": 0.04564545303583145, "learning_rate": 3.000872340751233e-06, "loss": 0.0024, "step": 67330 }, { "epoch": 2.0037790308422476, "grad_norm": 0.16202658414840698, "learning_rate": 2.999285683376435e-06, "loss": 0.0019, "step": 67340 }, { "epoch": 2.004076592325889, "grad_norm": 0.40928617119789124, "learning_rate": 2.9976992658438175e-06, "loss": 0.0019, "step": 67350 }, { "epoch": 2.0043741538095308, "grad_norm": 0.19213253259658813, "learning_rate": 2.9961130883435545e-06, "loss": 0.0016, "step": 67360 }, { "epoch": 2.0046717152931723, "grad_norm": 0.08828794211149216, "learning_rate": 2.994527151065797e-06, "loss": 0.0014, "step": 67370 }, { "epoch": 2.004969276776814, "grad_norm": 0.24895934760570526, "learning_rate": 2.9929414542006617e-06, "loss": 0.0018, "step": 67380 }, { "epoch": 2.0052668382604555, "grad_norm": 0.12182758748531342, "learning_rate": 2.991355997938242e-06, "loss": 0.0019, "step": 67390 }, { "epoch": 2.005564399744097, "grad_norm": 0.07983175665140152, "learning_rate": 2.9897707824685975e-06, "loss": 0.0023, "step": 67400 }, { "epoch": 2.0058619612277386, "grad_norm": 0.45903268456459045, "learning_rate": 2.9881858079817623e-06, "loss": 0.0019, "step": 67410 }, { "epoch": 2.00615952271138, "grad_norm": 0.11050841212272644, "learning_rate": 2.9866010746677387e-06, "loss": 0.002, "step": 67420 }, { "epoch": 2.006457084195022, "grad_norm": 0.1022806391119957, "learning_rate": 2.9850165827165043e-06, "loss": 0.0019, "step": 67430 }, { "epoch": 2.0067546456786634, "grad_norm": 0.21873529255390167, "learning_rate": 2.9834323323180038e-06, "loss": 0.0016, "step": 67440 }, { "epoch": 2.007052207162305, "grad_norm": 0.3166119158267975, "learning_rate": 2.981848323662154e-06, "loss": 0.002, "step": 67450 }, { "epoch": 2.0073497686459465, "grad_norm": 0.1717544049024582, "learning_rate": 2.980264556938844e-06, "loss": 0.0023, "step": 67460 }, { "epoch": 2.007647330129588, "grad_norm": 0.19410137832164764, "learning_rate": 2.9786810323379333e-06, "loss": 0.0025, "step": 67470 }, { "epoch": 2.0079448916132296, "grad_norm": 0.21027947962284088, "learning_rate": 2.9770977500492502e-06, "loss": 0.0018, "step": 67480 }, { "epoch": 2.008242453096871, "grad_norm": 0.012736375443637371, "learning_rate": 2.9755147102625988e-06, "loss": 0.0021, "step": 67490 }, { "epoch": 2.008540014580513, "grad_norm": 0.09159889817237854, "learning_rate": 2.973931913167748e-06, "loss": 0.0015, "step": 67500 }, { "epoch": 2.0088375760641544, "grad_norm": 0.18313691020011902, "learning_rate": 2.972349358954442e-06, "loss": 0.0041, "step": 67510 }, { "epoch": 2.009135137547796, "grad_norm": 0.2661741077899933, "learning_rate": 2.970767047812396e-06, "loss": 0.0043, "step": 67520 }, { "epoch": 2.0094326990314375, "grad_norm": 0.22641947865486145, "learning_rate": 2.969184979931294e-06, "loss": 0.0028, "step": 67530 }, { "epoch": 2.009730260515079, "grad_norm": 0.15709540247917175, "learning_rate": 2.967603155500791e-06, "loss": 0.002, "step": 67540 }, { "epoch": 2.0100278219987207, "grad_norm": 0.3599766194820404, "learning_rate": 2.966021574710514e-06, "loss": 0.0034, "step": 67550 }, { "epoch": 2.0103253834823622, "grad_norm": 0.05021360516548157, "learning_rate": 2.9644402377500605e-06, "loss": 0.0038, "step": 67560 }, { "epoch": 2.010622944966004, "grad_norm": 0.10513319820165634, "learning_rate": 2.962859144808996e-06, "loss": 0.0027, "step": 67570 }, { "epoch": 2.0109205064496454, "grad_norm": 0.1776251196861267, "learning_rate": 2.9612782960768627e-06, "loss": 0.003, "step": 67580 }, { "epoch": 2.0112180679332865, "grad_norm": 0.10886587202548981, "learning_rate": 2.959697691743167e-06, "loss": 0.0031, "step": 67590 }, { "epoch": 2.011515629416928, "grad_norm": 0.1254890412092209, "learning_rate": 2.958117331997391e-06, "loss": 0.0034, "step": 67600 }, { "epoch": 2.0118131909005696, "grad_norm": 0.22060056030750275, "learning_rate": 2.956537217028984e-06, "loss": 0.0044, "step": 67610 }, { "epoch": 2.012110752384211, "grad_norm": 0.07659187912940979, "learning_rate": 2.9549573470273702e-06, "loss": 0.0017, "step": 67620 }, { "epoch": 2.012408313867853, "grad_norm": 0.21092012524604797, "learning_rate": 2.9533777221819383e-06, "loss": 0.0014, "step": 67630 }, { "epoch": 2.0127058753514944, "grad_norm": 0.1865253448486328, "learning_rate": 2.951798342682053e-06, "loss": 0.0022, "step": 67640 }, { "epoch": 2.013003436835136, "grad_norm": 0.056656528264284134, "learning_rate": 2.9502192087170476e-06, "loss": 0.0018, "step": 67650 }, { "epoch": 2.0133009983187775, "grad_norm": 0.17597568035125732, "learning_rate": 2.9486403204762237e-06, "loss": 0.0019, "step": 67660 }, { "epoch": 2.013598559802419, "grad_norm": 0.18668627738952637, "learning_rate": 2.947061678148859e-06, "loss": 0.0022, "step": 67670 }, { "epoch": 2.0138961212860607, "grad_norm": 0.08996588736772537, "learning_rate": 2.945483281924196e-06, "loss": 0.0027, "step": 67680 }, { "epoch": 2.0141936827697022, "grad_norm": 0.18505102396011353, "learning_rate": 2.943905131991452e-06, "loss": 0.0016, "step": 67690 }, { "epoch": 2.014491244253344, "grad_norm": 0.15449002385139465, "learning_rate": 2.94232722853981e-06, "loss": 0.0027, "step": 67700 }, { "epoch": 2.0147888057369854, "grad_norm": 0.1428719311952591, "learning_rate": 2.94074957175843e-06, "loss": 0.0028, "step": 67710 }, { "epoch": 2.015086367220627, "grad_norm": 0.2523656189441681, "learning_rate": 2.9391721618364356e-06, "loss": 0.0035, "step": 67720 }, { "epoch": 2.0153839287042685, "grad_norm": 0.04825945571064949, "learning_rate": 2.9375949989629264e-06, "loss": 0.0018, "step": 67730 }, { "epoch": 2.01568149018791, "grad_norm": 0.15540830790996552, "learning_rate": 2.936018083326968e-06, "loss": 0.0015, "step": 67740 }, { "epoch": 2.0159790516715517, "grad_norm": 0.260257363319397, "learning_rate": 2.9344414151176006e-06, "loss": 0.0015, "step": 67750 }, { "epoch": 2.0162766131551932, "grad_norm": 0.24108555912971497, "learning_rate": 2.932864994523829e-06, "loss": 0.0024, "step": 67760 }, { "epoch": 2.016574174638835, "grad_norm": 0.14707204699516296, "learning_rate": 2.9312888217346357e-06, "loss": 0.002, "step": 67770 }, { "epoch": 2.0168717361224764, "grad_norm": 0.09476503729820251, "learning_rate": 2.9297128969389664e-06, "loss": 0.0041, "step": 67780 }, { "epoch": 2.017169297606118, "grad_norm": 0.1140279546380043, "learning_rate": 2.928137220325741e-06, "loss": 0.0027, "step": 67790 }, { "epoch": 2.0174668590897595, "grad_norm": 0.04899099841713905, "learning_rate": 2.9265617920838507e-06, "loss": 0.0037, "step": 67800 }, { "epoch": 2.017764420573401, "grad_norm": 0.0659671500325203, "learning_rate": 2.9249866124021535e-06, "loss": 0.0019, "step": 67810 }, { "epoch": 2.0180619820570427, "grad_norm": 0.07510065287351608, "learning_rate": 2.92341168146948e-06, "loss": 0.0019, "step": 67820 }, { "epoch": 2.0183595435406843, "grad_norm": 0.05622656270861626, "learning_rate": 2.92183699947463e-06, "loss": 0.0011, "step": 67830 }, { "epoch": 2.018657105024326, "grad_norm": 0.3428152799606323, "learning_rate": 2.9202625666063733e-06, "loss": 0.0037, "step": 67840 }, { "epoch": 2.0189546665079674, "grad_norm": 0.15133148431777954, "learning_rate": 2.9186883830534503e-06, "loss": 0.0025, "step": 67850 }, { "epoch": 2.019252227991609, "grad_norm": 0.22547438740730286, "learning_rate": 2.9171144490045712e-06, "loss": 0.0013, "step": 67860 }, { "epoch": 2.01954978947525, "grad_norm": 0.04236824810504913, "learning_rate": 2.915540764648418e-06, "loss": 0.0036, "step": 67870 }, { "epoch": 2.0198473509588917, "grad_norm": 0.13642500340938568, "learning_rate": 2.9139673301736386e-06, "loss": 0.0029, "step": 67880 }, { "epoch": 2.0201449124425332, "grad_norm": 0.07945706695318222, "learning_rate": 2.9123941457688553e-06, "loss": 0.0026, "step": 67890 }, { "epoch": 2.020442473926175, "grad_norm": 0.22805769741535187, "learning_rate": 2.9108212116226587e-06, "loss": 0.0045, "step": 67900 }, { "epoch": 2.0207400354098164, "grad_norm": 0.1994607001543045, "learning_rate": 2.9092485279236106e-06, "loss": 0.0014, "step": 67910 }, { "epoch": 2.021037596893458, "grad_norm": 0.22570711374282837, "learning_rate": 2.9076760948602378e-06, "loss": 0.0015, "step": 67920 }, { "epoch": 2.0213351583770995, "grad_norm": 0.06238841265439987, "learning_rate": 2.9061039126210444e-06, "loss": 0.0012, "step": 67930 }, { "epoch": 2.021632719860741, "grad_norm": 0.21947547793388367, "learning_rate": 2.904531981394499e-06, "loss": 0.0081, "step": 67940 }, { "epoch": 2.0219302813443827, "grad_norm": 0.2640892565250397, "learning_rate": 2.902960301369045e-06, "loss": 0.0022, "step": 67950 }, { "epoch": 2.0222278428280243, "grad_norm": 0.27721962332725525, "learning_rate": 2.9013888727330885e-06, "loss": 0.0025, "step": 67960 }, { "epoch": 2.022525404311666, "grad_norm": 0.23686860501766205, "learning_rate": 2.8998176956750112e-06, "loss": 0.0062, "step": 67970 }, { "epoch": 2.0228229657953074, "grad_norm": 0.09591443836688995, "learning_rate": 2.898246770383164e-06, "loss": 0.0011, "step": 67980 }, { "epoch": 2.023120527278949, "grad_norm": 0.06768433004617691, "learning_rate": 2.8966760970458687e-06, "loss": 0.0025, "step": 67990 }, { "epoch": 2.0234180887625905, "grad_norm": 0.14402033388614655, "learning_rate": 2.8951056758514095e-06, "loss": 0.0009, "step": 68000 }, { "epoch": 2.023715650246232, "grad_norm": 0.11436938494443893, "learning_rate": 2.893535506988049e-06, "loss": 0.0026, "step": 68010 }, { "epoch": 2.0240132117298737, "grad_norm": 0.05264541879296303, "learning_rate": 2.891965590644018e-06, "loss": 0.0021, "step": 68020 }, { "epoch": 2.0243107732135153, "grad_norm": 0.1658182442188263, "learning_rate": 2.890395927007511e-06, "loss": 0.0012, "step": 68030 }, { "epoch": 2.024608334697157, "grad_norm": 0.24694892764091492, "learning_rate": 2.8888265162666996e-06, "loss": 0.0061, "step": 68040 }, { "epoch": 2.0249058961807984, "grad_norm": 0.37514519691467285, "learning_rate": 2.887257358609722e-06, "loss": 0.0015, "step": 68050 }, { "epoch": 2.02520345766444, "grad_norm": 0.27679726481437683, "learning_rate": 2.8856884542246865e-06, "loss": 0.0027, "step": 68060 }, { "epoch": 2.0255010191480816, "grad_norm": 0.11484973877668381, "learning_rate": 2.884119803299666e-06, "loss": 0.0017, "step": 68070 }, { "epoch": 2.025798580631723, "grad_norm": 0.20378215610980988, "learning_rate": 2.8825514060227135e-06, "loss": 0.0027, "step": 68080 }, { "epoch": 2.0260961421153647, "grad_norm": 0.10806283354759216, "learning_rate": 2.8809832625818452e-06, "loss": 0.0018, "step": 68090 }, { "epoch": 2.0263937035990063, "grad_norm": 0.19095273315906525, "learning_rate": 2.8794153731650445e-06, "loss": 0.0019, "step": 68100 }, { "epoch": 2.026691265082648, "grad_norm": 0.2341500222682953, "learning_rate": 2.8778477379602678e-06, "loss": 0.0014, "step": 68110 }, { "epoch": 2.0269888265662894, "grad_norm": 0.11661071330308914, "learning_rate": 2.8762803571554417e-06, "loss": 0.0028, "step": 68120 }, { "epoch": 2.027286388049931, "grad_norm": 0.17830774188041687, "learning_rate": 2.874713230938463e-06, "loss": 0.0028, "step": 68130 }, { "epoch": 2.0275839495335726, "grad_norm": 0.1006244495511055, "learning_rate": 2.8731463594971908e-06, "loss": 0.002, "step": 68140 }, { "epoch": 2.027881511017214, "grad_norm": 0.1161428838968277, "learning_rate": 2.871579743019463e-06, "loss": 0.0024, "step": 68150 }, { "epoch": 2.0281790725008553, "grad_norm": 0.17462624609470367, "learning_rate": 2.870013381693082e-06, "loss": 0.0016, "step": 68160 }, { "epoch": 2.028476633984497, "grad_norm": 0.24534960091114044, "learning_rate": 2.868447275705821e-06, "loss": 0.0015, "step": 68170 }, { "epoch": 2.0287741954681384, "grad_norm": 0.18117909133434296, "learning_rate": 2.866881425245421e-06, "loss": 0.0022, "step": 68180 }, { "epoch": 2.02907175695178, "grad_norm": 0.0417327806353569, "learning_rate": 2.865315830499593e-06, "loss": 0.0017, "step": 68190 }, { "epoch": 2.0293693184354216, "grad_norm": 0.09770677238702774, "learning_rate": 2.8637504916560186e-06, "loss": 0.0024, "step": 68200 }, { "epoch": 2.029666879919063, "grad_norm": 0.08431197702884674, "learning_rate": 2.8621854089023504e-06, "loss": 0.0022, "step": 68210 }, { "epoch": 2.0299644414027047, "grad_norm": 0.22103917598724365, "learning_rate": 2.860620582426203e-06, "loss": 0.0028, "step": 68220 }, { "epoch": 2.0302620028863463, "grad_norm": 0.021424174308776855, "learning_rate": 2.8590560124151677e-06, "loss": 0.0013, "step": 68230 }, { "epoch": 2.030559564369988, "grad_norm": 0.09888292849063873, "learning_rate": 2.8574916990568023e-06, "loss": 0.0018, "step": 68240 }, { "epoch": 2.0308571258536294, "grad_norm": 0.14918625354766846, "learning_rate": 2.8559276425386356e-06, "loss": 0.0025, "step": 68250 }, { "epoch": 2.031154687337271, "grad_norm": 0.1304997205734253, "learning_rate": 2.85436384304816e-06, "loss": 0.0068, "step": 68260 }, { "epoch": 2.0314522488209126, "grad_norm": 0.3453100323677063, "learning_rate": 2.8528003007728446e-06, "loss": 0.0021, "step": 68270 }, { "epoch": 2.031749810304554, "grad_norm": 0.14751897752285004, "learning_rate": 2.8512370159001225e-06, "loss": 0.0018, "step": 68280 }, { "epoch": 2.0320473717881957, "grad_norm": 0.1631859391927719, "learning_rate": 2.8496739886173994e-06, "loss": 0.0011, "step": 68290 }, { "epoch": 2.0323449332718373, "grad_norm": 0.15234889090061188, "learning_rate": 2.848111219112045e-06, "loss": 0.003, "step": 68300 }, { "epoch": 2.032642494755479, "grad_norm": 0.17739182710647583, "learning_rate": 2.8465487075714037e-06, "loss": 0.0023, "step": 68310 }, { "epoch": 2.0329400562391204, "grad_norm": 0.2059408575296402, "learning_rate": 2.8449864541827864e-06, "loss": 0.0019, "step": 68320 }, { "epoch": 2.033237617722762, "grad_norm": 0.09400585293769836, "learning_rate": 2.843424459133476e-06, "loss": 0.0011, "step": 68330 }, { "epoch": 2.0335351792064036, "grad_norm": 0.12973831593990326, "learning_rate": 2.8418627226107158e-06, "loss": 0.0019, "step": 68340 }, { "epoch": 2.033832740690045, "grad_norm": 0.10135617852210999, "learning_rate": 2.840301244801728e-06, "loss": 0.002, "step": 68350 }, { "epoch": 2.0341303021736867, "grad_norm": 0.16281335055828094, "learning_rate": 2.8387400258936992e-06, "loss": 0.0021, "step": 68360 }, { "epoch": 2.0344278636573283, "grad_norm": 0.17999763786792755, "learning_rate": 2.8371790660737853e-06, "loss": 0.0033, "step": 68370 }, { "epoch": 2.03472542514097, "grad_norm": 0.1777259111404419, "learning_rate": 2.8356183655291127e-06, "loss": 0.0021, "step": 68380 }, { "epoch": 2.0350229866246115, "grad_norm": 0.11414293944835663, "learning_rate": 2.834057924446774e-06, "loss": 0.0025, "step": 68390 }, { "epoch": 2.035320548108253, "grad_norm": 0.2728908956050873, "learning_rate": 2.8324977430138345e-06, "loss": 0.0021, "step": 68400 }, { "epoch": 2.0356181095918946, "grad_norm": 0.1646318882703781, "learning_rate": 2.8309378214173223e-06, "loss": 0.0018, "step": 68410 }, { "epoch": 2.035915671075536, "grad_norm": 0.022408341988921165, "learning_rate": 2.8293781598442403e-06, "loss": 0.001, "step": 68420 }, { "epoch": 2.0362132325591777, "grad_norm": 0.14861935377120972, "learning_rate": 2.82781875848156e-06, "loss": 0.0021, "step": 68430 }, { "epoch": 2.036510794042819, "grad_norm": 0.06475865840911865, "learning_rate": 2.826259617516216e-06, "loss": 0.0014, "step": 68440 }, { "epoch": 2.0368083555264604, "grad_norm": 0.2744089663028717, "learning_rate": 2.8247007371351164e-06, "loss": 0.0031, "step": 68450 }, { "epoch": 2.037105917010102, "grad_norm": 0.032732125371694565, "learning_rate": 2.8231421175251383e-06, "loss": 0.0016, "step": 68460 }, { "epoch": 2.0374034784937436, "grad_norm": 0.13278305530548096, "learning_rate": 2.821583758873127e-06, "loss": 0.0016, "step": 68470 }, { "epoch": 2.037701039977385, "grad_norm": 0.15442584455013275, "learning_rate": 2.820025661365893e-06, "loss": 0.0027, "step": 68480 }, { "epoch": 2.0379986014610267, "grad_norm": 0.16370879113674164, "learning_rate": 2.81846782519022e-06, "loss": 0.0011, "step": 68490 }, { "epoch": 2.0382961629446683, "grad_norm": 0.08437436819076538, "learning_rate": 2.8169102505328593e-06, "loss": 0.0031, "step": 68500 }, { "epoch": 2.03859372442831, "grad_norm": 0.3023795187473297, "learning_rate": 2.815352937580531e-06, "loss": 0.0021, "step": 68510 }, { "epoch": 2.0388912859119515, "grad_norm": 0.29436194896698, "learning_rate": 2.8137958865199196e-06, "loss": 0.0023, "step": 68520 }, { "epoch": 2.039188847395593, "grad_norm": 0.31938937306404114, "learning_rate": 2.812239097537684e-06, "loss": 0.0015, "step": 68530 }, { "epoch": 2.0394864088792346, "grad_norm": 0.15465769171714783, "learning_rate": 2.810682570820449e-06, "loss": 0.0012, "step": 68540 }, { "epoch": 2.039783970362876, "grad_norm": 0.5155795812606812, "learning_rate": 2.8091263065548107e-06, "loss": 0.0033, "step": 68550 }, { "epoch": 2.0400815318465177, "grad_norm": 0.11213299632072449, "learning_rate": 2.8075703049273273e-06, "loss": 0.0018, "step": 68560 }, { "epoch": 2.0403790933301593, "grad_norm": 0.07376249879598618, "learning_rate": 2.8060145661245312e-06, "loss": 0.0012, "step": 68570 }, { "epoch": 2.040676654813801, "grad_norm": 0.5675896406173706, "learning_rate": 2.804459090332922e-06, "loss": 0.005, "step": 68580 }, { "epoch": 2.0409742162974425, "grad_norm": 0.11707816272974014, "learning_rate": 2.8029038777389696e-06, "loss": 0.0025, "step": 68590 }, { "epoch": 2.041271777781084, "grad_norm": 0.32944586873054504, "learning_rate": 2.801348928529106e-06, "loss": 0.003, "step": 68600 }, { "epoch": 2.0415693392647256, "grad_norm": 0.09227193146944046, "learning_rate": 2.7997942428897377e-06, "loss": 0.0028, "step": 68610 }, { "epoch": 2.041866900748367, "grad_norm": 0.11329903453588486, "learning_rate": 2.798239821007238e-06, "loss": 0.0019, "step": 68620 }, { "epoch": 2.0421644622320088, "grad_norm": 0.0475873164832592, "learning_rate": 2.7966856630679507e-06, "loss": 0.0028, "step": 68630 }, { "epoch": 2.0424620237156503, "grad_norm": 0.1338883340358734, "learning_rate": 2.7951317692581783e-06, "loss": 0.0021, "step": 68640 }, { "epoch": 2.042759585199292, "grad_norm": 0.12794189155101776, "learning_rate": 2.793578139764206e-06, "loss": 0.0022, "step": 68650 }, { "epoch": 2.0430571466829335, "grad_norm": 0.2055107206106186, "learning_rate": 2.7920247747722793e-06, "loss": 0.0016, "step": 68660 }, { "epoch": 2.043354708166575, "grad_norm": 0.1539345383644104, "learning_rate": 2.7904716744686106e-06, "loss": 0.0018, "step": 68670 }, { "epoch": 2.0436522696502166, "grad_norm": 0.16790969669818878, "learning_rate": 2.7889188390393827e-06, "loss": 0.0023, "step": 68680 }, { "epoch": 2.043949831133858, "grad_norm": 0.25478270649909973, "learning_rate": 2.787366268670749e-06, "loss": 0.0018, "step": 68690 }, { "epoch": 2.0442473926174998, "grad_norm": 0.06697983294725418, "learning_rate": 2.785813963548829e-06, "loss": 0.0031, "step": 68700 }, { "epoch": 2.0445449541011413, "grad_norm": 0.3496052324771881, "learning_rate": 2.7842619238597077e-06, "loss": 0.0033, "step": 68710 }, { "epoch": 2.044842515584783, "grad_norm": 0.11085032671689987, "learning_rate": 2.7827101497894427e-06, "loss": 0.0022, "step": 68720 }, { "epoch": 2.045140077068424, "grad_norm": 0.09644269943237305, "learning_rate": 2.781158641524057e-06, "loss": 0.003, "step": 68730 }, { "epoch": 2.0454376385520656, "grad_norm": 0.15527409315109253, "learning_rate": 2.779607399249546e-06, "loss": 0.0024, "step": 68740 }, { "epoch": 2.045735200035707, "grad_norm": 0.10241962969303131, "learning_rate": 2.778056423151865e-06, "loss": 0.002, "step": 68750 }, { "epoch": 2.0460327615193488, "grad_norm": 0.09060598164796829, "learning_rate": 2.776505713416946e-06, "loss": 0.0028, "step": 68760 }, { "epoch": 2.0463303230029903, "grad_norm": 0.15852215886116028, "learning_rate": 2.7749552702306825e-06, "loss": 0.0023, "step": 68770 }, { "epoch": 2.046627884486632, "grad_norm": 0.13143277168273926, "learning_rate": 2.7734050937789435e-06, "loss": 0.0023, "step": 68780 }, { "epoch": 2.0469254459702735, "grad_norm": 0.059120792895555496, "learning_rate": 2.771855184247557e-06, "loss": 0.0017, "step": 68790 }, { "epoch": 2.047223007453915, "grad_norm": 0.14508560299873352, "learning_rate": 2.7703055418223246e-06, "loss": 0.0028, "step": 68800 }, { "epoch": 2.0475205689375566, "grad_norm": 0.02814040333032608, "learning_rate": 2.7687561666890174e-06, "loss": 0.0015, "step": 68810 }, { "epoch": 2.047818130421198, "grad_norm": 0.25095629692077637, "learning_rate": 2.767207059033368e-06, "loss": 0.0017, "step": 68820 }, { "epoch": 2.0481156919048398, "grad_norm": 0.22318288683891296, "learning_rate": 2.7656582190410817e-06, "loss": 0.0022, "step": 68830 }, { "epoch": 2.0484132533884813, "grad_norm": 0.28525978326797485, "learning_rate": 2.7641096468978325e-06, "loss": 0.0042, "step": 68840 }, { "epoch": 2.048710814872123, "grad_norm": 0.028446948155760765, "learning_rate": 2.7625613427892604e-06, "loss": 0.0019, "step": 68850 }, { "epoch": 2.0490083763557645, "grad_norm": 0.10467670857906342, "learning_rate": 2.7610133069009713e-06, "loss": 0.0017, "step": 68860 }, { "epoch": 2.049305937839406, "grad_norm": 0.07755240052938461, "learning_rate": 2.7594655394185423e-06, "loss": 0.0014, "step": 68870 }, { "epoch": 2.0496034993230476, "grad_norm": 0.07935823500156403, "learning_rate": 2.7579180405275174e-06, "loss": 0.003, "step": 68880 }, { "epoch": 2.049901060806689, "grad_norm": 0.08490156382322311, "learning_rate": 2.756370810413409e-06, "loss": 0.0028, "step": 68890 }, { "epoch": 2.050198622290331, "grad_norm": 0.1795886904001236, "learning_rate": 2.754823849261694e-06, "loss": 0.0014, "step": 68900 }, { "epoch": 2.0504961837739724, "grad_norm": 0.4701538383960724, "learning_rate": 2.7532771572578203e-06, "loss": 0.0031, "step": 68910 }, { "epoch": 2.050793745257614, "grad_norm": 0.10972090065479279, "learning_rate": 2.7517307345872036e-06, "loss": 0.0023, "step": 68920 }, { "epoch": 2.0510913067412555, "grad_norm": 0.14740502834320068, "learning_rate": 2.7501845814352244e-06, "loss": 0.0021, "step": 68930 }, { "epoch": 2.051388868224897, "grad_norm": 0.140150785446167, "learning_rate": 2.748638697987236e-06, "loss": 0.0032, "step": 68940 }, { "epoch": 2.0516864297085387, "grad_norm": 0.12735147774219513, "learning_rate": 2.747093084428553e-06, "loss": 0.0014, "step": 68950 }, { "epoch": 2.0519839911921802, "grad_norm": 0.23093220591545105, "learning_rate": 2.7455477409444652e-06, "loss": 0.0024, "step": 68960 }, { "epoch": 2.052281552675822, "grad_norm": 0.2514031231403351, "learning_rate": 2.744002667720221e-06, "loss": 0.0026, "step": 68970 }, { "epoch": 2.0525791141594634, "grad_norm": 0.017630748450756073, "learning_rate": 2.7424578649410426e-06, "loss": 0.002, "step": 68980 }, { "epoch": 2.052876675643105, "grad_norm": 0.08281395584344864, "learning_rate": 2.740913332792119e-06, "loss": 0.0014, "step": 68990 }, { "epoch": 2.0531742371267465, "grad_norm": 0.2657527029514313, "learning_rate": 2.7393690714586075e-06, "loss": 0.0024, "step": 69000 }, { "epoch": 2.053471798610388, "grad_norm": 0.22200903296470642, "learning_rate": 2.737825081125628e-06, "loss": 0.0016, "step": 69010 }, { "epoch": 2.053769360094029, "grad_norm": 0.14680863916873932, "learning_rate": 2.7362813619782722e-06, "loss": 0.0025, "step": 69020 }, { "epoch": 2.054066921577671, "grad_norm": 0.19412533938884735, "learning_rate": 2.7347379142016006e-06, "loss": 0.0025, "step": 69030 }, { "epoch": 2.0543644830613124, "grad_norm": 0.06586852669715881, "learning_rate": 2.7331947379806395e-06, "loss": 0.0016, "step": 69040 }, { "epoch": 2.054662044544954, "grad_norm": 0.154938206076622, "learning_rate": 2.7316518335003785e-06, "loss": 0.0016, "step": 69050 }, { "epoch": 2.0549596060285955, "grad_norm": 0.2003840208053589, "learning_rate": 2.7301092009457807e-06, "loss": 0.0013, "step": 69060 }, { "epoch": 2.055257167512237, "grad_norm": 0.10390406847000122, "learning_rate": 2.728566840501774e-06, "loss": 0.0027, "step": 69070 }, { "epoch": 2.0555547289958787, "grad_norm": 0.016496673226356506, "learning_rate": 2.727024752353255e-06, "loss": 0.0035, "step": 69080 }, { "epoch": 2.0558522904795202, "grad_norm": 0.12227664887905121, "learning_rate": 2.7254829366850844e-06, "loss": 0.0026, "step": 69090 }, { "epoch": 2.056149851963162, "grad_norm": 0.0946195051074028, "learning_rate": 2.7239413936820923e-06, "loss": 0.0021, "step": 69100 }, { "epoch": 2.0564474134468034, "grad_norm": 0.3046627938747406, "learning_rate": 2.722400123529078e-06, "loss": 0.0029, "step": 69110 }, { "epoch": 2.056744974930445, "grad_norm": 0.049896884709596634, "learning_rate": 2.7208591264108074e-06, "loss": 0.002, "step": 69120 }, { "epoch": 2.0570425364140865, "grad_norm": 0.07339248806238174, "learning_rate": 2.719318402512009e-06, "loss": 0.0021, "step": 69130 }, { "epoch": 2.057340097897728, "grad_norm": 0.20904971659183502, "learning_rate": 2.717777952017384e-06, "loss": 0.001, "step": 69140 }, { "epoch": 2.0576376593813697, "grad_norm": 0.3822682201862335, "learning_rate": 2.716237775111601e-06, "loss": 0.0026, "step": 69150 }, { "epoch": 2.0579352208650112, "grad_norm": 0.18672215938568115, "learning_rate": 2.7146978719792896e-06, "loss": 0.0016, "step": 69160 }, { "epoch": 2.058232782348653, "grad_norm": 0.18132080137729645, "learning_rate": 2.713158242805052e-06, "loss": 0.0034, "step": 69170 }, { "epoch": 2.0585303438322944, "grad_norm": 0.22376976907253265, "learning_rate": 2.711618887773457e-06, "loss": 0.0027, "step": 69180 }, { "epoch": 2.058827905315936, "grad_norm": 0.10770092904567719, "learning_rate": 2.710079807069043e-06, "loss": 0.002, "step": 69190 }, { "epoch": 2.0591254667995775, "grad_norm": 0.09851497411727905, "learning_rate": 2.708541000876307e-06, "loss": 0.002, "step": 69200 }, { "epoch": 2.059423028283219, "grad_norm": 0.25755923986434937, "learning_rate": 2.707002469379718e-06, "loss": 0.0025, "step": 69210 }, { "epoch": 2.0597205897668607, "grad_norm": 0.2463395744562149, "learning_rate": 2.705464212763719e-06, "loss": 0.0038, "step": 69220 }, { "epoch": 2.0600181512505022, "grad_norm": 0.13461795449256897, "learning_rate": 2.703926231212708e-06, "loss": 0.0027, "step": 69230 }, { "epoch": 2.060315712734144, "grad_norm": 0.30648374557495117, "learning_rate": 2.7023885249110572e-06, "loss": 0.003, "step": 69240 }, { "epoch": 2.0606132742177854, "grad_norm": 0.16671587526798248, "learning_rate": 2.7008510940431043e-06, "loss": 0.0024, "step": 69250 }, { "epoch": 2.060910835701427, "grad_norm": 0.493255615234375, "learning_rate": 2.6993139387931562e-06, "loss": 0.0048, "step": 69260 }, { "epoch": 2.0612083971850685, "grad_norm": 0.10777465999126434, "learning_rate": 2.6977770593454795e-06, "loss": 0.0019, "step": 69270 }, { "epoch": 2.06150595866871, "grad_norm": 0.10900656133890152, "learning_rate": 2.6962404558843147e-06, "loss": 0.0019, "step": 69280 }, { "epoch": 2.0618035201523517, "grad_norm": 0.5614162683486938, "learning_rate": 2.6947041285938687e-06, "loss": 0.0034, "step": 69290 }, { "epoch": 2.0621010816359933, "grad_norm": 0.1264542043209076, "learning_rate": 2.693168077658314e-06, "loss": 0.0019, "step": 69300 }, { "epoch": 2.0623986431196344, "grad_norm": 0.19790495932102203, "learning_rate": 2.6916323032617863e-06, "loss": 0.0022, "step": 69310 }, { "epoch": 2.062696204603276, "grad_norm": 0.16378840804100037, "learning_rate": 2.690096805588394e-06, "loss": 0.0023, "step": 69320 }, { "epoch": 2.0629937660869175, "grad_norm": 0.14990700781345367, "learning_rate": 2.68856158482221e-06, "loss": 0.0019, "step": 69330 }, { "epoch": 2.063291327570559, "grad_norm": 0.1368410289287567, "learning_rate": 2.687026641147275e-06, "loss": 0.0037, "step": 69340 }, { "epoch": 2.0635888890542007, "grad_norm": 0.08137552440166473, "learning_rate": 2.685491974747592e-06, "loss": 0.0025, "step": 69350 }, { "epoch": 2.0638864505378423, "grad_norm": 0.16707704961299896, "learning_rate": 2.6839575858071367e-06, "loss": 0.003, "step": 69360 }, { "epoch": 2.064184012021484, "grad_norm": 0.15741533041000366, "learning_rate": 2.6824234745098475e-06, "loss": 0.0021, "step": 69370 }, { "epoch": 2.0644815735051254, "grad_norm": 0.0746377483010292, "learning_rate": 2.680889641039634e-06, "loss": 0.0022, "step": 69380 }, { "epoch": 2.064779134988767, "grad_norm": 0.19334590435028076, "learning_rate": 2.6793560855803658e-06, "loss": 0.0031, "step": 69390 }, { "epoch": 2.0650766964724085, "grad_norm": 0.1078888401389122, "learning_rate": 2.677822808315884e-06, "loss": 0.002, "step": 69400 }, { "epoch": 2.06537425795605, "grad_norm": 0.05882728099822998, "learning_rate": 2.6762898094299966e-06, "loss": 0.0023, "step": 69410 }, { "epoch": 2.0656718194396917, "grad_norm": 0.19173242151737213, "learning_rate": 2.6747570891064774e-06, "loss": 0.0017, "step": 69420 }, { "epoch": 2.0659693809233333, "grad_norm": 0.09246265888214111, "learning_rate": 2.673224647529063e-06, "loss": 0.0024, "step": 69430 }, { "epoch": 2.066266942406975, "grad_norm": 0.17360468208789825, "learning_rate": 2.671692484881462e-06, "loss": 0.0031, "step": 69440 }, { "epoch": 2.0665645038906164, "grad_norm": 0.20674221217632294, "learning_rate": 2.670160601347346e-06, "loss": 0.0022, "step": 69450 }, { "epoch": 2.066862065374258, "grad_norm": 0.2606029212474823, "learning_rate": 2.6686289971103584e-06, "loss": 0.0019, "step": 69460 }, { "epoch": 2.0671596268578996, "grad_norm": 0.12709274888038635, "learning_rate": 2.6670976723541007e-06, "loss": 0.0024, "step": 69470 }, { "epoch": 2.067457188341541, "grad_norm": 0.2789107859134674, "learning_rate": 2.665566627262147e-06, "loss": 0.0019, "step": 69480 }, { "epoch": 2.0677547498251827, "grad_norm": 0.04063566029071808, "learning_rate": 2.6640358620180372e-06, "loss": 0.0021, "step": 69490 }, { "epoch": 2.0680523113088243, "grad_norm": 0.12401846796274185, "learning_rate": 2.6625053768052763e-06, "loss": 0.0024, "step": 69500 }, { "epoch": 2.068349872792466, "grad_norm": 0.1204252690076828, "learning_rate": 2.6609751718073357e-06, "loss": 0.0026, "step": 69510 }, { "epoch": 2.0686474342761074, "grad_norm": 0.13312944769859314, "learning_rate": 2.659445247207654e-06, "loss": 0.0016, "step": 69520 }, { "epoch": 2.068944995759749, "grad_norm": 0.14765195548534393, "learning_rate": 2.6579156031896388e-06, "loss": 0.0026, "step": 69530 }, { "epoch": 2.0692425572433906, "grad_norm": 0.09254869818687439, "learning_rate": 2.6563862399366556e-06, "loss": 0.0024, "step": 69540 }, { "epoch": 2.069540118727032, "grad_norm": 0.10353519022464752, "learning_rate": 2.654857157632046e-06, "loss": 0.0009, "step": 69550 }, { "epoch": 2.0698376802106737, "grad_norm": 0.04752335324883461, "learning_rate": 2.653328356459114e-06, "loss": 0.0016, "step": 69560 }, { "epoch": 2.0701352416943153, "grad_norm": 0.002802687231451273, "learning_rate": 2.6517998366011253e-06, "loss": 0.0017, "step": 69570 }, { "epoch": 2.070432803177957, "grad_norm": 0.2489190399646759, "learning_rate": 2.6502715982413196e-06, "loss": 0.0025, "step": 69580 }, { "epoch": 2.070730364661598, "grad_norm": 0.41068416833877563, "learning_rate": 2.648743641562899e-06, "loss": 0.0022, "step": 69590 }, { "epoch": 2.0710279261452396, "grad_norm": 0.1255166083574295, "learning_rate": 2.647215966749035e-06, "loss": 0.0028, "step": 69600 }, { "epoch": 2.071325487628881, "grad_norm": 0.1479691118001938, "learning_rate": 2.6456885739828562e-06, "loss": 0.0039, "step": 69610 }, { "epoch": 2.0716230491125227, "grad_norm": 0.15579432249069214, "learning_rate": 2.644161463447469e-06, "loss": 0.0018, "step": 69620 }, { "epoch": 2.0719206105961643, "grad_norm": 0.1937588006258011, "learning_rate": 2.6426346353259386e-06, "loss": 0.0026, "step": 69630 }, { "epoch": 2.072218172079806, "grad_norm": 0.06679121404886246, "learning_rate": 2.641108089801301e-06, "loss": 0.0034, "step": 69640 }, { "epoch": 2.0725157335634474, "grad_norm": 0.18767021596431732, "learning_rate": 2.639581827056552e-06, "loss": 0.0015, "step": 69650 }, { "epoch": 2.072813295047089, "grad_norm": 0.18044111132621765, "learning_rate": 2.63805584727466e-06, "loss": 0.0016, "step": 69660 }, { "epoch": 2.0731108565307306, "grad_norm": 0.4472300708293915, "learning_rate": 2.636530150638556e-06, "loss": 0.0026, "step": 69670 }, { "epoch": 2.073408418014372, "grad_norm": 0.17587412893772125, "learning_rate": 2.6350047373311394e-06, "loss": 0.0029, "step": 69680 }, { "epoch": 2.0737059794980137, "grad_norm": 0.12978839874267578, "learning_rate": 2.6334796075352716e-06, "loss": 0.0045, "step": 69690 }, { "epoch": 2.0740035409816553, "grad_norm": 0.09529003500938416, "learning_rate": 2.631954761433783e-06, "loss": 0.0028, "step": 69700 }, { "epoch": 2.074301102465297, "grad_norm": 0.11053188890218735, "learning_rate": 2.6304301992094706e-06, "loss": 0.0017, "step": 69710 }, { "epoch": 2.0745986639489384, "grad_norm": 0.07481317967176437, "learning_rate": 2.628905921045098e-06, "loss": 0.0025, "step": 69720 }, { "epoch": 2.07489622543258, "grad_norm": 0.23115263879299164, "learning_rate": 2.627381927123389e-06, "loss": 0.0023, "step": 69730 }, { "epoch": 2.0751937869162216, "grad_norm": 0.22295334935188293, "learning_rate": 2.6258582176270387e-06, "loss": 0.0027, "step": 69740 }, { "epoch": 2.075491348399863, "grad_norm": 0.15922653675079346, "learning_rate": 2.624334792738708e-06, "loss": 0.002, "step": 69750 }, { "epoch": 2.0757889098835047, "grad_norm": 0.30183538794517517, "learning_rate": 2.6228116526410236e-06, "loss": 0.0025, "step": 69760 }, { "epoch": 2.0760864713671463, "grad_norm": 0.32075241208076477, "learning_rate": 2.6212887975165712e-06, "loss": 0.0039, "step": 69770 }, { "epoch": 2.076384032850788, "grad_norm": 0.1816563457250595, "learning_rate": 2.6197662275479137e-06, "loss": 0.002, "step": 69780 }, { "epoch": 2.0766815943344294, "grad_norm": 0.22364267706871033, "learning_rate": 2.6182439429175744e-06, "loss": 0.0041, "step": 69790 }, { "epoch": 2.076979155818071, "grad_norm": 0.126728817820549, "learning_rate": 2.616721943808038e-06, "loss": 0.0016, "step": 69800 }, { "epoch": 2.0772767173017126, "grad_norm": 0.09560132771730423, "learning_rate": 2.615200230401762e-06, "loss": 0.0017, "step": 69810 }, { "epoch": 2.077574278785354, "grad_norm": 0.14647641777992249, "learning_rate": 2.6136788028811654e-06, "loss": 0.0018, "step": 69820 }, { "epoch": 2.0778718402689957, "grad_norm": 0.06878605484962463, "learning_rate": 2.6121576614286374e-06, "loss": 0.002, "step": 69830 }, { "epoch": 2.0781694017526373, "grad_norm": 0.2364015132188797, "learning_rate": 2.6106368062265253e-06, "loss": 0.004, "step": 69840 }, { "epoch": 2.078466963236279, "grad_norm": 0.2013169676065445, "learning_rate": 2.609116237457149e-06, "loss": 0.003, "step": 69850 }, { "epoch": 2.0787645247199205, "grad_norm": 0.07628139853477478, "learning_rate": 2.6075959553027906e-06, "loss": 0.0035, "step": 69860 }, { "epoch": 2.079062086203562, "grad_norm": 0.1428423523902893, "learning_rate": 2.6060759599457026e-06, "loss": 0.0021, "step": 69870 }, { "epoch": 2.079359647687203, "grad_norm": 0.05702202022075653, "learning_rate": 2.604556251568094e-06, "loss": 0.0008, "step": 69880 }, { "epoch": 2.0796572091708447, "grad_norm": 0.35141563415527344, "learning_rate": 2.603036830352147e-06, "loss": 0.0022, "step": 69890 }, { "epoch": 2.0799547706544863, "grad_norm": 0.35781756043434143, "learning_rate": 2.6015176964800083e-06, "loss": 0.0023, "step": 69900 }, { "epoch": 2.080252332138128, "grad_norm": 0.11782762408256531, "learning_rate": 2.5999988501337892e-06, "loss": 0.0023, "step": 69910 }, { "epoch": 2.0805498936217695, "grad_norm": 0.03948216885328293, "learning_rate": 2.5984802914955638e-06, "loss": 0.0016, "step": 69920 }, { "epoch": 2.080847455105411, "grad_norm": 0.33450356125831604, "learning_rate": 2.5969620207473754e-06, "loss": 0.0018, "step": 69930 }, { "epoch": 2.0811450165890526, "grad_norm": 0.4071536958217621, "learning_rate": 2.5954440380712355e-06, "loss": 0.0044, "step": 69940 }, { "epoch": 2.081442578072694, "grad_norm": 0.2495996505022049, "learning_rate": 2.5939263436491112e-06, "loss": 0.0018, "step": 69950 }, { "epoch": 2.0817401395563357, "grad_norm": 0.5429041981697083, "learning_rate": 2.5924089376629435e-06, "loss": 0.0016, "step": 69960 }, { "epoch": 2.0820377010399773, "grad_norm": 0.12773871421813965, "learning_rate": 2.590891820294637e-06, "loss": 0.0013, "step": 69970 }, { "epoch": 2.082335262523619, "grad_norm": 0.06121331453323364, "learning_rate": 2.5893749917260624e-06, "loss": 0.0014, "step": 69980 }, { "epoch": 2.0826328240072605, "grad_norm": 0.13725383579730988, "learning_rate": 2.5878584521390513e-06, "loss": 0.0019, "step": 69990 }, { "epoch": 2.082930385490902, "grad_norm": 0.13115333020687103, "learning_rate": 2.586342201715406e-06, "loss": 0.0016, "step": 70000 }, { "epoch": 2.0832279469745436, "grad_norm": 0.08803735673427582, "learning_rate": 2.5848262406368905e-06, "loss": 0.0028, "step": 70010 }, { "epoch": 2.083525508458185, "grad_norm": 0.25397375226020813, "learning_rate": 2.583310569085239e-06, "loss": 0.0015, "step": 70020 }, { "epoch": 2.0838230699418268, "grad_norm": 0.18023709952831268, "learning_rate": 2.581795187242143e-06, "loss": 0.0028, "step": 70030 }, { "epoch": 2.0841206314254683, "grad_norm": 0.126946821808815, "learning_rate": 2.580280095289266e-06, "loss": 0.0023, "step": 70040 }, { "epoch": 2.08441819290911, "grad_norm": 0.1098899096250534, "learning_rate": 2.578765293408235e-06, "loss": 0.0021, "step": 70050 }, { "epoch": 2.0847157543927515, "grad_norm": 0.32706308364868164, "learning_rate": 2.577250781780641e-06, "loss": 0.0029, "step": 70060 }, { "epoch": 2.085013315876393, "grad_norm": 0.08048133552074432, "learning_rate": 2.575736560588042e-06, "loss": 0.0012, "step": 70070 }, { "epoch": 2.0853108773600346, "grad_norm": 0.14153389632701874, "learning_rate": 2.574222630011959e-06, "loss": 0.0017, "step": 70080 }, { "epoch": 2.085608438843676, "grad_norm": 0.12919113039970398, "learning_rate": 2.5727089902338825e-06, "loss": 0.0014, "step": 70090 }, { "epoch": 2.0859060003273178, "grad_norm": 0.20390647649765015, "learning_rate": 2.57119564143526e-06, "loss": 0.0023, "step": 70100 }, { "epoch": 2.0862035618109593, "grad_norm": 0.2436024397611618, "learning_rate": 2.5696825837975126e-06, "loss": 0.0019, "step": 70110 }, { "epoch": 2.086501123294601, "grad_norm": 0.08732740581035614, "learning_rate": 2.568169817502022e-06, "loss": 0.0023, "step": 70120 }, { "epoch": 2.0867986847782425, "grad_norm": 0.16931216418743134, "learning_rate": 2.5666573427301377e-06, "loss": 0.0014, "step": 70130 }, { "epoch": 2.087096246261884, "grad_norm": 0.11834289878606796, "learning_rate": 2.5651451596631694e-06, "loss": 0.0024, "step": 70140 }, { "epoch": 2.0873938077455256, "grad_norm": 0.12521854043006897, "learning_rate": 2.5636332684823974e-06, "loss": 0.0013, "step": 70150 }, { "epoch": 2.0876913692291668, "grad_norm": 0.010576393455266953, "learning_rate": 2.5621216693690626e-06, "loss": 0.0013, "step": 70160 }, { "epoch": 2.0879889307128083, "grad_norm": 0.22591401636600494, "learning_rate": 2.5606103625043766e-06, "loss": 0.0028, "step": 70170 }, { "epoch": 2.08828649219645, "grad_norm": 0.16132138669490814, "learning_rate": 2.559099348069508e-06, "loss": 0.002, "step": 70180 }, { "epoch": 2.0885840536800915, "grad_norm": 0.142334446310997, "learning_rate": 2.5575886262455963e-06, "loss": 0.0019, "step": 70190 }, { "epoch": 2.088881615163733, "grad_norm": 0.1461077630519867, "learning_rate": 2.5560781972137445e-06, "loss": 0.0011, "step": 70200 }, { "epoch": 2.0891791766473746, "grad_norm": 0.2017914205789566, "learning_rate": 2.5545680611550216e-06, "loss": 0.0016, "step": 70210 }, { "epoch": 2.089476738131016, "grad_norm": 0.21349506080150604, "learning_rate": 2.553058218250457e-06, "loss": 0.0021, "step": 70220 }, { "epoch": 2.0897742996146578, "grad_norm": 0.11421357095241547, "learning_rate": 2.5515486686810496e-06, "loss": 0.0035, "step": 70230 }, { "epoch": 2.0900718610982993, "grad_norm": 0.1132815033197403, "learning_rate": 2.5500394126277615e-06, "loss": 0.0024, "step": 70240 }, { "epoch": 2.090369422581941, "grad_norm": 0.13080862164497375, "learning_rate": 2.548530450271522e-06, "loss": 0.002, "step": 70250 }, { "epoch": 2.0906669840655825, "grad_norm": 0.1252777874469757, "learning_rate": 2.5470217817932185e-06, "loss": 0.0028, "step": 70260 }, { "epoch": 2.090964545549224, "grad_norm": 0.2941845953464508, "learning_rate": 2.5455134073737097e-06, "loss": 0.0019, "step": 70270 }, { "epoch": 2.0912621070328656, "grad_norm": 0.21404610574245453, "learning_rate": 2.544005327193819e-06, "loss": 0.0026, "step": 70280 }, { "epoch": 2.091559668516507, "grad_norm": 0.1405680924654007, "learning_rate": 2.542497541434329e-06, "loss": 0.0022, "step": 70290 }, { "epoch": 2.091857230000149, "grad_norm": 0.058028142899274826, "learning_rate": 2.540990050275992e-06, "loss": 0.0016, "step": 70300 }, { "epoch": 2.0921547914837904, "grad_norm": 0.19393350183963776, "learning_rate": 2.539482853899523e-06, "loss": 0.0016, "step": 70310 }, { "epoch": 2.092452352967432, "grad_norm": 0.15697833895683289, "learning_rate": 2.537975952485605e-06, "loss": 0.0025, "step": 70320 }, { "epoch": 2.0927499144510735, "grad_norm": 0.40615320205688477, "learning_rate": 2.5364693462148776e-06, "loss": 0.0032, "step": 70330 }, { "epoch": 2.093047475934715, "grad_norm": 0.15950308740139008, "learning_rate": 2.534963035267951e-06, "loss": 0.0021, "step": 70340 }, { "epoch": 2.0933450374183566, "grad_norm": 0.11567568778991699, "learning_rate": 2.533457019825405e-06, "loss": 0.0017, "step": 70350 }, { "epoch": 2.093642598901998, "grad_norm": 0.13869452476501465, "learning_rate": 2.531951300067772e-06, "loss": 0.0018, "step": 70360 }, { "epoch": 2.09394016038564, "grad_norm": 0.14877445995807648, "learning_rate": 2.530445876175557e-06, "loss": 0.0025, "step": 70370 }, { "epoch": 2.0942377218692814, "grad_norm": 0.12807321548461914, "learning_rate": 2.528940748329228e-06, "loss": 0.0036, "step": 70380 }, { "epoch": 2.094535283352923, "grad_norm": 0.10172680765390396, "learning_rate": 2.527435916709219e-06, "loss": 0.001, "step": 70390 }, { "epoch": 2.0948328448365645, "grad_norm": 0.22169266641139984, "learning_rate": 2.5259313814959217e-06, "loss": 0.0025, "step": 70400 }, { "epoch": 2.095130406320206, "grad_norm": 0.22445134818553925, "learning_rate": 2.5244271428697007e-06, "loss": 0.0015, "step": 70410 }, { "epoch": 2.0954279678038477, "grad_norm": 0.31469160318374634, "learning_rate": 2.522923201010881e-06, "loss": 0.0032, "step": 70420 }, { "epoch": 2.0957255292874892, "grad_norm": 0.08873651921749115, "learning_rate": 2.5214195560997546e-06, "loss": 0.0014, "step": 70430 }, { "epoch": 2.096023090771131, "grad_norm": 0.2032884657382965, "learning_rate": 2.5199162083165707e-06, "loss": 0.0024, "step": 70440 }, { "epoch": 2.096320652254772, "grad_norm": 0.04989038035273552, "learning_rate": 2.5184131578415515e-06, "loss": 0.0016, "step": 70450 }, { "epoch": 2.0966182137384135, "grad_norm": 0.18493102490901947, "learning_rate": 2.51691040485488e-06, "loss": 0.0026, "step": 70460 }, { "epoch": 2.096915775222055, "grad_norm": 0.14255140721797943, "learning_rate": 2.5154079495367047e-06, "loss": 0.0034, "step": 70470 }, { "epoch": 2.0972133367056967, "grad_norm": 0.11221984773874283, "learning_rate": 2.5139057920671337e-06, "loss": 0.0014, "step": 70480 }, { "epoch": 2.0975108981893382, "grad_norm": 0.20907637476921082, "learning_rate": 2.5124039326262455e-06, "loss": 0.0021, "step": 70490 }, { "epoch": 2.09780845967298, "grad_norm": 0.09078315645456314, "learning_rate": 2.5109023713940806e-06, "loss": 0.004, "step": 70500 }, { "epoch": 2.0981060211566214, "grad_norm": 0.008863041177392006, "learning_rate": 2.509401108550644e-06, "loss": 0.0023, "step": 70510 }, { "epoch": 2.098403582640263, "grad_norm": 0.08183114230632782, "learning_rate": 2.507900144275902e-06, "loss": 0.0021, "step": 70520 }, { "epoch": 2.0987011441239045, "grad_norm": 0.12607212364673615, "learning_rate": 2.5063994787497896e-06, "loss": 0.0019, "step": 70530 }, { "epoch": 2.098998705607546, "grad_norm": 0.1376790702342987, "learning_rate": 2.504899112152204e-06, "loss": 0.002, "step": 70540 }, { "epoch": 2.0992962670911877, "grad_norm": 0.21578571200370789, "learning_rate": 2.503399044663007e-06, "loss": 0.0029, "step": 70550 }, { "epoch": 2.0995938285748292, "grad_norm": 0.1417628973722458, "learning_rate": 2.5018992764620222e-06, "loss": 0.0018, "step": 70560 }, { "epoch": 2.099891390058471, "grad_norm": 0.257898211479187, "learning_rate": 2.500399807729039e-06, "loss": 0.0016, "step": 70570 }, { "epoch": 2.1001889515421124, "grad_norm": 0.25664016604423523, "learning_rate": 2.4989006386438135e-06, "loss": 0.005, "step": 70580 }, { "epoch": 2.100486513025754, "grad_norm": 0.03255152329802513, "learning_rate": 2.497401769386064e-06, "loss": 0.0017, "step": 70590 }, { "epoch": 2.1007840745093955, "grad_norm": 0.04434340447187424, "learning_rate": 2.495903200135469e-06, "loss": 0.0015, "step": 70600 }, { "epoch": 2.101081635993037, "grad_norm": 0.041416559368371964, "learning_rate": 2.4944049310716763e-06, "loss": 0.0013, "step": 70610 }, { "epoch": 2.1013791974766787, "grad_norm": 0.1210818812251091, "learning_rate": 2.4929069623742953e-06, "loss": 0.0035, "step": 70620 }, { "epoch": 2.1016767589603202, "grad_norm": 0.08395014703273773, "learning_rate": 2.4914092942229e-06, "loss": 0.0012, "step": 70630 }, { "epoch": 2.101974320443962, "grad_norm": 0.13485923409461975, "learning_rate": 2.489911926797029e-06, "loss": 0.002, "step": 70640 }, { "epoch": 2.1022718819276034, "grad_norm": 0.10985607653856277, "learning_rate": 2.488414860276184e-06, "loss": 0.0015, "step": 70650 }, { "epoch": 2.102569443411245, "grad_norm": 0.04710479453206062, "learning_rate": 2.4869180948398315e-06, "loss": 0.0022, "step": 70660 }, { "epoch": 2.1028670048948865, "grad_norm": 0.1461871862411499, "learning_rate": 2.4854216306673983e-06, "loss": 0.0011, "step": 70670 }, { "epoch": 2.103164566378528, "grad_norm": 0.06281663477420807, "learning_rate": 2.48392546793828e-06, "loss": 0.0017, "step": 70680 }, { "epoch": 2.1034621278621697, "grad_norm": 0.17133651673793793, "learning_rate": 2.482429606831836e-06, "loss": 0.0023, "step": 70690 }, { "epoch": 2.1037596893458113, "grad_norm": 0.10272181779146194, "learning_rate": 2.4809340475273824e-06, "loss": 0.0013, "step": 70700 }, { "epoch": 2.104057250829453, "grad_norm": 0.4044262170791626, "learning_rate": 2.4794387902042076e-06, "loss": 0.003, "step": 70710 }, { "epoch": 2.1043548123130944, "grad_norm": 0.21864309906959534, "learning_rate": 2.4779438350415604e-06, "loss": 0.0036, "step": 70720 }, { "epoch": 2.1046523737967355, "grad_norm": 0.1273031383752823, "learning_rate": 2.476449182218655e-06, "loss": 0.0019, "step": 70730 }, { "epoch": 2.104949935280377, "grad_norm": 0.0774899497628212, "learning_rate": 2.474954831914663e-06, "loss": 0.0013, "step": 70740 }, { "epoch": 2.1052474967640187, "grad_norm": 0.2817961275577545, "learning_rate": 2.4734607843087284e-06, "loss": 0.002, "step": 70750 }, { "epoch": 2.1055450582476603, "grad_norm": 0.20804303884506226, "learning_rate": 2.4719670395799535e-06, "loss": 0.0025, "step": 70760 }, { "epoch": 2.105842619731302, "grad_norm": 0.1587083637714386, "learning_rate": 2.470473597907408e-06, "loss": 0.0027, "step": 70770 }, { "epoch": 2.1061401812149434, "grad_norm": 0.15478110313415527, "learning_rate": 2.46898045947012e-06, "loss": 0.0019, "step": 70780 }, { "epoch": 2.106437742698585, "grad_norm": 0.40452226996421814, "learning_rate": 2.4674876244470857e-06, "loss": 0.003, "step": 70790 }, { "epoch": 2.1067353041822265, "grad_norm": 0.07333457469940186, "learning_rate": 2.465995093017264e-06, "loss": 0.002, "step": 70800 }, { "epoch": 2.107032865665868, "grad_norm": 0.47140246629714966, "learning_rate": 2.464502865359578e-06, "loss": 0.0021, "step": 70810 }, { "epoch": 2.1073304271495097, "grad_norm": 0.08013502508401871, "learning_rate": 2.463010941652911e-06, "loss": 0.002, "step": 70820 }, { "epoch": 2.1076279886331513, "grad_norm": 0.08075553923845291, "learning_rate": 2.4615193220761123e-06, "loss": 0.0018, "step": 70830 }, { "epoch": 2.107925550116793, "grad_norm": 0.11137010902166367, "learning_rate": 2.460028006807996e-06, "loss": 0.0022, "step": 70840 }, { "epoch": 2.1082231116004344, "grad_norm": 0.04658208414912224, "learning_rate": 2.4585369960273407e-06, "loss": 0.0021, "step": 70850 }, { "epoch": 2.108520673084076, "grad_norm": 0.2696504294872284, "learning_rate": 2.457046289912881e-06, "loss": 0.002, "step": 70860 }, { "epoch": 2.1088182345677176, "grad_norm": 0.301297128200531, "learning_rate": 2.4555558886433235e-06, "loss": 0.0014, "step": 70870 }, { "epoch": 2.109115796051359, "grad_norm": 0.1909172236919403, "learning_rate": 2.4540657923973338e-06, "loss": 0.0013, "step": 70880 }, { "epoch": 2.1094133575350007, "grad_norm": 0.4860917925834656, "learning_rate": 2.4525760013535445e-06, "loss": 0.002, "step": 70890 }, { "epoch": 2.1097109190186423, "grad_norm": 0.1571756899356842, "learning_rate": 2.4510865156905445e-06, "loss": 0.0025, "step": 70900 }, { "epoch": 2.110008480502284, "grad_norm": 0.14188532531261444, "learning_rate": 2.4495973355868926e-06, "loss": 0.0016, "step": 70910 }, { "epoch": 2.1103060419859254, "grad_norm": 0.06126716360449791, "learning_rate": 2.4481084612211133e-06, "loss": 0.0012, "step": 70920 }, { "epoch": 2.110603603469567, "grad_norm": 0.06600295752286911, "learning_rate": 2.446619892771685e-06, "loss": 0.0012, "step": 70930 }, { "epoch": 2.1109011649532086, "grad_norm": 0.11805884540081024, "learning_rate": 2.4451316304170564e-06, "loss": 0.0035, "step": 70940 }, { "epoch": 2.11119872643685, "grad_norm": 0.12573592364788055, "learning_rate": 2.4436436743356385e-06, "loss": 0.0018, "step": 70950 }, { "epoch": 2.1114962879204917, "grad_norm": 0.08699245005846024, "learning_rate": 2.442156024705806e-06, "loss": 0.0015, "step": 70960 }, { "epoch": 2.1117938494041333, "grad_norm": 0.11336460709571838, "learning_rate": 2.4406686817058935e-06, "loss": 0.0013, "step": 70970 }, { "epoch": 2.112091410887775, "grad_norm": 0.029660169035196304, "learning_rate": 2.4391816455142008e-06, "loss": 0.0017, "step": 70980 }, { "epoch": 2.1123889723714164, "grad_norm": 0.17467963695526123, "learning_rate": 2.4376949163089923e-06, "loss": 0.0023, "step": 70990 }, { "epoch": 2.112686533855058, "grad_norm": 0.07780828326940536, "learning_rate": 2.4362084942684973e-06, "loss": 0.0018, "step": 71000 }, { "epoch": 2.1129840953386996, "grad_norm": 0.09837993234395981, "learning_rate": 2.4347223795709003e-06, "loss": 0.0018, "step": 71010 }, { "epoch": 2.1132816568223407, "grad_norm": 0.20902852714061737, "learning_rate": 2.4332365723943565e-06, "loss": 0.0031, "step": 71020 }, { "epoch": 2.1135792183059823, "grad_norm": 0.20385432243347168, "learning_rate": 2.431751072916982e-06, "loss": 0.0024, "step": 71030 }, { "epoch": 2.113876779789624, "grad_norm": 0.038745980709791183, "learning_rate": 2.4302658813168577e-06, "loss": 0.0008, "step": 71040 }, { "epoch": 2.1141743412732654, "grad_norm": 0.3145582973957062, "learning_rate": 2.4287809977720226e-06, "loss": 0.0018, "step": 71050 }, { "epoch": 2.114471902756907, "grad_norm": 0.10662675648927689, "learning_rate": 2.4272964224604835e-06, "loss": 0.0015, "step": 71060 }, { "epoch": 2.1147694642405486, "grad_norm": 0.05648425221443176, "learning_rate": 2.4258121555602105e-06, "loss": 0.002, "step": 71070 }, { "epoch": 2.11506702572419, "grad_norm": 0.09144975990056992, "learning_rate": 2.4243281972491305e-06, "loss": 0.0023, "step": 71080 }, { "epoch": 2.1153645872078317, "grad_norm": 0.20502969622612, "learning_rate": 2.422844547705141e-06, "loss": 0.0015, "step": 71090 }, { "epoch": 2.1156621486914733, "grad_norm": 0.13259898126125336, "learning_rate": 2.4213612071060987e-06, "loss": 0.0021, "step": 71100 }, { "epoch": 2.115959710175115, "grad_norm": 0.12236855179071426, "learning_rate": 2.419878175629826e-06, "loss": 0.0029, "step": 71110 }, { "epoch": 2.1162572716587564, "grad_norm": 0.12968118488788605, "learning_rate": 2.4183954534541028e-06, "loss": 0.0022, "step": 71120 }, { "epoch": 2.116554833142398, "grad_norm": 0.04544657841324806, "learning_rate": 2.4169130407566753e-06, "loss": 0.002, "step": 71130 }, { "epoch": 2.1168523946260396, "grad_norm": 0.2867118716239929, "learning_rate": 2.415430937715255e-06, "loss": 0.0017, "step": 71140 }, { "epoch": 2.117149956109681, "grad_norm": 0.03275275230407715, "learning_rate": 2.413949144507514e-06, "loss": 0.0006, "step": 71150 }, { "epoch": 2.1174475175933227, "grad_norm": 0.8575523495674133, "learning_rate": 2.412467661311085e-06, "loss": 0.0027, "step": 71160 }, { "epoch": 2.1177450790769643, "grad_norm": 0.13389483094215393, "learning_rate": 2.4109864883035656e-06, "loss": 0.0026, "step": 71170 }, { "epoch": 2.118042640560606, "grad_norm": 0.14594930410385132, "learning_rate": 2.4095056256625178e-06, "loss": 0.0013, "step": 71180 }, { "epoch": 2.1183402020442474, "grad_norm": 0.00929952785372734, "learning_rate": 2.408025073565464e-06, "loss": 0.0007, "step": 71190 }, { "epoch": 2.118637763527889, "grad_norm": 0.09131769835948944, "learning_rate": 2.4065448321898897e-06, "loss": 0.0023, "step": 71200 }, { "epoch": 2.1189353250115306, "grad_norm": 0.21544674038887024, "learning_rate": 2.4050649017132455e-06, "loss": 0.0015, "step": 71210 }, { "epoch": 2.119232886495172, "grad_norm": 0.11301103234291077, "learning_rate": 2.4035852823129434e-06, "loss": 0.0025, "step": 71220 }, { "epoch": 2.1195304479788137, "grad_norm": 0.06154705956578255, "learning_rate": 2.4021059741663544e-06, "loss": 0.0014, "step": 71230 }, { "epoch": 2.1198280094624553, "grad_norm": 0.0800170972943306, "learning_rate": 2.4006269774508166e-06, "loss": 0.0008, "step": 71240 }, { "epoch": 2.120125570946097, "grad_norm": 0.17214815318584442, "learning_rate": 2.399148292343631e-06, "loss": 0.0021, "step": 71250 }, { "epoch": 2.1204231324297385, "grad_norm": 0.24949553608894348, "learning_rate": 2.39766991902206e-06, "loss": 0.0051, "step": 71260 }, { "epoch": 2.12072069391338, "grad_norm": 0.06540338695049286, "learning_rate": 2.396191857663326e-06, "loss": 0.0022, "step": 71270 }, { "epoch": 2.1210182553970216, "grad_norm": 0.11204742640256882, "learning_rate": 2.394714108444617e-06, "loss": 0.0013, "step": 71280 }, { "epoch": 2.121315816880663, "grad_norm": 0.2570456564426422, "learning_rate": 2.3932366715430844e-06, "loss": 0.0014, "step": 71290 }, { "epoch": 2.1216133783643043, "grad_norm": 0.12431660294532776, "learning_rate": 2.3917595471358417e-06, "loss": 0.0018, "step": 71300 }, { "epoch": 2.121910939847946, "grad_norm": 0.01963246613740921, "learning_rate": 2.390282735399961e-06, "loss": 0.0026, "step": 71310 }, { "epoch": 2.1222085013315874, "grad_norm": 0.3804585635662079, "learning_rate": 2.3888062365124813e-06, "loss": 0.0024, "step": 71320 }, { "epoch": 2.122506062815229, "grad_norm": 0.12321975082159042, "learning_rate": 2.3873300506504027e-06, "loss": 0.0012, "step": 71330 }, { "epoch": 2.1228036242988706, "grad_norm": 0.04598237946629524, "learning_rate": 2.38585417799069e-06, "loss": 0.0012, "step": 71340 }, { "epoch": 2.123101185782512, "grad_norm": 0.2420305609703064, "learning_rate": 2.384378618710265e-06, "loss": 0.0022, "step": 71350 }, { "epoch": 2.1233987472661537, "grad_norm": 0.05846022069454193, "learning_rate": 2.382903372986016e-06, "loss": 0.0034, "step": 71360 }, { "epoch": 2.1236963087497953, "grad_norm": 0.10872279107570648, "learning_rate": 2.3814284409947928e-06, "loss": 0.0022, "step": 71370 }, { "epoch": 2.123993870233437, "grad_norm": 0.161161407828331, "learning_rate": 2.379953822913411e-06, "loss": 0.0015, "step": 71380 }, { "epoch": 2.1242914317170785, "grad_norm": 0.28789299726486206, "learning_rate": 2.3784795189186405e-06, "loss": 0.0032, "step": 71390 }, { "epoch": 2.12458899320072, "grad_norm": 0.08809221535921097, "learning_rate": 2.37700552918722e-06, "loss": 0.002, "step": 71400 }, { "epoch": 2.1248865546843616, "grad_norm": 0.12912002205848694, "learning_rate": 2.3755318538958498e-06, "loss": 0.0038, "step": 71410 }, { "epoch": 2.125184116168003, "grad_norm": 0.09880244731903076, "learning_rate": 2.3740584932211924e-06, "loss": 0.0021, "step": 71420 }, { "epoch": 2.1254816776516448, "grad_norm": 0.24920009076595306, "learning_rate": 2.372585447339869e-06, "loss": 0.0024, "step": 71430 }, { "epoch": 2.1257792391352863, "grad_norm": 0.1019720807671547, "learning_rate": 2.3711127164284665e-06, "loss": 0.002, "step": 71440 }, { "epoch": 2.126076800618928, "grad_norm": 0.011566855944693089, "learning_rate": 2.369640300663536e-06, "loss": 0.0044, "step": 71450 }, { "epoch": 2.1263743621025695, "grad_norm": 0.03402305021882057, "learning_rate": 2.3681682002215846e-06, "loss": 0.0024, "step": 71460 }, { "epoch": 2.126671923586211, "grad_norm": 0.10381757467985153, "learning_rate": 2.3666964152790863e-06, "loss": 0.0038, "step": 71470 }, { "epoch": 2.1269694850698526, "grad_norm": 0.0456082820892334, "learning_rate": 2.365224946012476e-06, "loss": 0.0044, "step": 71480 }, { "epoch": 2.127267046553494, "grad_norm": 0.09669435024261475, "learning_rate": 2.3637537925981525e-06, "loss": 0.0018, "step": 71490 }, { "epoch": 2.1275646080371358, "grad_norm": 0.20255780220031738, "learning_rate": 2.362282955212473e-06, "loss": 0.0035, "step": 71500 }, { "epoch": 2.1278621695207773, "grad_norm": 0.18959926068782806, "learning_rate": 2.3608124340317607e-06, "loss": 0.0025, "step": 71510 }, { "epoch": 2.128159731004419, "grad_norm": 0.29505833983421326, "learning_rate": 2.3593422292323e-06, "loss": 0.0039, "step": 71520 }, { "epoch": 2.1284572924880605, "grad_norm": 0.05729852616786957, "learning_rate": 2.357872340990333e-06, "loss": 0.0041, "step": 71530 }, { "epoch": 2.128754853971702, "grad_norm": 0.12626519799232483, "learning_rate": 2.3564027694820695e-06, "loss": 0.0028, "step": 71540 }, { "epoch": 2.1290524154553436, "grad_norm": 0.02678295038640499, "learning_rate": 2.354933514883679e-06, "loss": 0.0009, "step": 71550 }, { "epoch": 2.129349976938985, "grad_norm": 0.08041242510080338, "learning_rate": 2.3534645773712945e-06, "loss": 0.0013, "step": 71560 }, { "epoch": 2.1296475384226268, "grad_norm": 0.08647506684064865, "learning_rate": 2.3519959571210073e-06, "loss": 0.0019, "step": 71570 }, { "epoch": 2.1299450999062683, "grad_norm": 0.11735594272613525, "learning_rate": 2.3505276543088735e-06, "loss": 0.0012, "step": 71580 }, { "epoch": 2.13024266138991, "grad_norm": 0.13627390563488007, "learning_rate": 2.349059669110912e-06, "loss": 0.0026, "step": 71590 }, { "epoch": 2.130540222873551, "grad_norm": 0.07279716432094574, "learning_rate": 2.347592001703103e-06, "loss": 0.0017, "step": 71600 }, { "epoch": 2.1308377843571926, "grad_norm": 0.06152171269059181, "learning_rate": 2.346124652261385e-06, "loss": 0.0014, "step": 71610 }, { "epoch": 2.131135345840834, "grad_norm": 0.10717885196208954, "learning_rate": 2.344657620961663e-06, "loss": 0.0028, "step": 71620 }, { "epoch": 2.1314329073244758, "grad_norm": 0.12847690284252167, "learning_rate": 2.343190907979802e-06, "loss": 0.0023, "step": 71630 }, { "epoch": 2.1317304688081173, "grad_norm": 0.18422691524028778, "learning_rate": 2.3417245134916313e-06, "loss": 0.0032, "step": 71640 }, { "epoch": 2.132028030291759, "grad_norm": 0.03136000782251358, "learning_rate": 2.3402584376729352e-06, "loss": 0.0008, "step": 71650 }, { "epoch": 2.1323255917754005, "grad_norm": 0.11815138161182404, "learning_rate": 2.338792680699467e-06, "loss": 0.0017, "step": 71660 }, { "epoch": 2.132623153259042, "grad_norm": 0.1263553947210312, "learning_rate": 2.337327242746939e-06, "loss": 0.0014, "step": 71670 }, { "epoch": 2.1329207147426836, "grad_norm": 0.2550528347492218, "learning_rate": 2.335862123991027e-06, "loss": 0.0019, "step": 71680 }, { "epoch": 2.133218276226325, "grad_norm": 0.11369255930185318, "learning_rate": 2.334397324607363e-06, "loss": 0.0019, "step": 71690 }, { "epoch": 2.133515837709967, "grad_norm": 0.13541685044765472, "learning_rate": 2.3329328447715473e-06, "loss": 0.0019, "step": 71700 }, { "epoch": 2.1338133991936084, "grad_norm": 0.10182439535856247, "learning_rate": 2.3314686846591385e-06, "loss": 0.0034, "step": 71710 }, { "epoch": 2.13411096067725, "grad_norm": 0.1463930457830429, "learning_rate": 2.330004844445659e-06, "loss": 0.0015, "step": 71720 }, { "epoch": 2.1344085221608915, "grad_norm": 0.09750929474830627, "learning_rate": 2.328541324306589e-06, "loss": 0.0029, "step": 71730 }, { "epoch": 2.134706083644533, "grad_norm": 0.1428549587726593, "learning_rate": 2.3270781244173733e-06, "loss": 0.0017, "step": 71740 }, { "epoch": 2.1350036451281746, "grad_norm": 0.24100042879581451, "learning_rate": 2.325615244953419e-06, "loss": 0.0019, "step": 71750 }, { "epoch": 2.135301206611816, "grad_norm": 0.059010718017816544, "learning_rate": 2.324152686090093e-06, "loss": 0.002, "step": 71760 }, { "epoch": 2.135598768095458, "grad_norm": 0.1957663744688034, "learning_rate": 2.322690448002724e-06, "loss": 0.0031, "step": 71770 }, { "epoch": 2.1358963295790994, "grad_norm": 0.20539207756519318, "learning_rate": 2.321228530866603e-06, "loss": 0.0019, "step": 71780 }, { "epoch": 2.136193891062741, "grad_norm": 0.06410354375839233, "learning_rate": 2.319766934856984e-06, "loss": 0.0018, "step": 71790 }, { "epoch": 2.1364914525463825, "grad_norm": 0.09822258353233337, "learning_rate": 2.3183056601490766e-06, "loss": 0.0022, "step": 71800 }, { "epoch": 2.136789014030024, "grad_norm": 0.2421475350856781, "learning_rate": 2.316844706918058e-06, "loss": 0.0012, "step": 71810 }, { "epoch": 2.1370865755136657, "grad_norm": 0.08511409163475037, "learning_rate": 2.315384075339065e-06, "loss": 0.0025, "step": 71820 }, { "epoch": 2.1373841369973072, "grad_norm": 0.16938674449920654, "learning_rate": 2.3139237655871964e-06, "loss": 0.0015, "step": 71830 }, { "epoch": 2.137681698480949, "grad_norm": 0.06217126175761223, "learning_rate": 2.312463777837509e-06, "loss": 0.0018, "step": 71840 }, { "epoch": 2.1379792599645904, "grad_norm": 0.07337382435798645, "learning_rate": 2.3110041122650252e-06, "loss": 0.0026, "step": 71850 }, { "epoch": 2.138276821448232, "grad_norm": 0.11494053900241852, "learning_rate": 2.3095447690447286e-06, "loss": 0.0025, "step": 71860 }, { "epoch": 2.138574382931873, "grad_norm": 0.07374129444360733, "learning_rate": 2.3080857483515597e-06, "loss": 0.0022, "step": 71870 }, { "epoch": 2.1388719444155146, "grad_norm": 0.07794597744941711, "learning_rate": 2.3066270503604245e-06, "loss": 0.0031, "step": 71880 }, { "epoch": 2.139169505899156, "grad_norm": 0.26762861013412476, "learning_rate": 2.3051686752461892e-06, "loss": 0.0024, "step": 71890 }, { "epoch": 2.139467067382798, "grad_norm": 0.09400548040866852, "learning_rate": 2.3037106231836843e-06, "loss": 0.0015, "step": 71900 }, { "epoch": 2.1397646288664394, "grad_norm": 0.21084530651569366, "learning_rate": 2.302252894347694e-06, "loss": 0.0025, "step": 71910 }, { "epoch": 2.140062190350081, "grad_norm": 0.1583118885755539, "learning_rate": 2.30079548891297e-06, "loss": 0.0018, "step": 71920 }, { "epoch": 2.1403597518337225, "grad_norm": 0.1128762736916542, "learning_rate": 2.299338407054224e-06, "loss": 0.0015, "step": 71930 }, { "epoch": 2.140657313317364, "grad_norm": 0.14388637244701385, "learning_rate": 2.2978816489461303e-06, "loss": 0.0019, "step": 71940 }, { "epoch": 2.1409548748010057, "grad_norm": 0.04586634412407875, "learning_rate": 2.296425214763318e-06, "loss": 0.0012, "step": 71950 }, { "epoch": 2.1412524362846472, "grad_norm": 0.22195085883140564, "learning_rate": 2.2949691046803854e-06, "loss": 0.003, "step": 71960 }, { "epoch": 2.141549997768289, "grad_norm": 0.059630636125802994, "learning_rate": 2.2935133188718866e-06, "loss": 0.0024, "step": 71970 }, { "epoch": 2.1418475592519304, "grad_norm": 0.12730465829372406, "learning_rate": 2.292057857512342e-06, "loss": 0.0013, "step": 71980 }, { "epoch": 2.142145120735572, "grad_norm": 0.3222000300884247, "learning_rate": 2.290602720776225e-06, "loss": 0.0021, "step": 71990 }, { "epoch": 2.1424426822192135, "grad_norm": 0.11851789057254791, "learning_rate": 2.2891479088379777e-06, "loss": 0.0033, "step": 72000 }, { "epoch": 2.142740243702855, "grad_norm": 0.08438145369291306, "learning_rate": 2.287693421872e-06, "loss": 0.0038, "step": 72010 }, { "epoch": 2.1430378051864967, "grad_norm": 0.10338035970926285, "learning_rate": 2.286239260052655e-06, "loss": 0.0012, "step": 72020 }, { "epoch": 2.1433353666701382, "grad_norm": 0.06139656528830528, "learning_rate": 2.2847854235542614e-06, "loss": 0.002, "step": 72030 }, { "epoch": 2.14363292815378, "grad_norm": 0.23048582673072815, "learning_rate": 2.283331912551103e-06, "loss": 0.0019, "step": 72040 }, { "epoch": 2.1439304896374214, "grad_norm": 0.12955385446548462, "learning_rate": 2.2818787272174293e-06, "loss": 0.0021, "step": 72050 }, { "epoch": 2.144228051121063, "grad_norm": 0.25716549158096313, "learning_rate": 2.2804258677274405e-06, "loss": 0.0016, "step": 72060 }, { "epoch": 2.1445256126047045, "grad_norm": 0.05691149830818176, "learning_rate": 2.2789733342553044e-06, "loss": 0.0016, "step": 72070 }, { "epoch": 2.144823174088346, "grad_norm": 0.06446006149053574, "learning_rate": 2.2775211269751484e-06, "loss": 0.0009, "step": 72080 }, { "epoch": 2.1451207355719877, "grad_norm": 0.11386701464653015, "learning_rate": 2.2760692460610623e-06, "loss": 0.0018, "step": 72090 }, { "epoch": 2.1454182970556293, "grad_norm": 0.1479395180940628, "learning_rate": 2.2746176916870916e-06, "loss": 0.0028, "step": 72100 }, { "epoch": 2.145715858539271, "grad_norm": 0.2153681367635727, "learning_rate": 2.2731664640272478e-06, "loss": 0.0018, "step": 72110 }, { "epoch": 2.1460134200229124, "grad_norm": 0.21211783587932587, "learning_rate": 2.2717155632555016e-06, "loss": 0.0036, "step": 72120 }, { "epoch": 2.146310981506554, "grad_norm": 0.22971580922603607, "learning_rate": 2.270264989545786e-06, "loss": 0.002, "step": 72130 }, { "epoch": 2.1466085429901955, "grad_norm": 0.3888782262802124, "learning_rate": 2.26881474307199e-06, "loss": 0.0025, "step": 72140 }, { "epoch": 2.146906104473837, "grad_norm": 0.14177149534225464, "learning_rate": 2.2673648240079693e-06, "loss": 0.0017, "step": 72150 }, { "epoch": 2.1472036659574787, "grad_norm": 0.021743327379226685, "learning_rate": 2.2659152325275363e-06, "loss": 0.0018, "step": 72160 }, { "epoch": 2.14750122744112, "grad_norm": 0.22355757653713226, "learning_rate": 2.2644659688044677e-06, "loss": 0.0029, "step": 72170 }, { "epoch": 2.1477987889247614, "grad_norm": 0.06500549614429474, "learning_rate": 2.2630170330124955e-06, "loss": 0.0012, "step": 72180 }, { "epoch": 2.148096350408403, "grad_norm": 0.16625525057315826, "learning_rate": 2.2615684253253168e-06, "loss": 0.0027, "step": 72190 }, { "epoch": 2.1483939118920445, "grad_norm": 0.15124638378620148, "learning_rate": 2.2601201459165908e-06, "loss": 0.002, "step": 72200 }, { "epoch": 2.148691473375686, "grad_norm": 0.039943814277648926, "learning_rate": 2.2586721949599304e-06, "loss": 0.0016, "step": 72210 }, { "epoch": 2.1489890348593277, "grad_norm": 0.04567728191614151, "learning_rate": 2.257224572628916e-06, "loss": 0.002, "step": 72220 }, { "epoch": 2.1492865963429693, "grad_norm": 0.022834649309515953, "learning_rate": 2.255777279097085e-06, "loss": 0.002, "step": 72230 }, { "epoch": 2.149584157826611, "grad_norm": 0.1693122833967209, "learning_rate": 2.25433031453794e-06, "loss": 0.0028, "step": 72240 }, { "epoch": 2.1498817193102524, "grad_norm": 0.4467769265174866, "learning_rate": 2.2528836791249347e-06, "loss": 0.0016, "step": 72250 }, { "epoch": 2.150179280793894, "grad_norm": 0.060398854315280914, "learning_rate": 2.2514373730314924e-06, "loss": 0.0029, "step": 72260 }, { "epoch": 2.1504768422775355, "grad_norm": 0.12137751281261444, "learning_rate": 2.2499913964309943e-06, "loss": 0.0022, "step": 72270 }, { "epoch": 2.150774403761177, "grad_norm": 0.062293924391269684, "learning_rate": 2.2485457494967823e-06, "loss": 0.0029, "step": 72280 }, { "epoch": 2.1510719652448187, "grad_norm": 0.0926937386393547, "learning_rate": 2.247100432402155e-06, "loss": 0.0015, "step": 72290 }, { "epoch": 2.1513695267284603, "grad_norm": 0.058997686952352524, "learning_rate": 2.2456554453203762e-06, "loss": 0.0021, "step": 72300 }, { "epoch": 2.151667088212102, "grad_norm": 0.358768492937088, "learning_rate": 2.2442107884246682e-06, "loss": 0.0026, "step": 72310 }, { "epoch": 2.1519646496957434, "grad_norm": 0.12147624790668488, "learning_rate": 2.2427664618882167e-06, "loss": 0.0019, "step": 72320 }, { "epoch": 2.152262211179385, "grad_norm": 0.06956779211759567, "learning_rate": 2.2413224658841585e-06, "loss": 0.0015, "step": 72330 }, { "epoch": 2.1525597726630266, "grad_norm": 0.10867258906364441, "learning_rate": 2.239878800585603e-06, "loss": 0.0014, "step": 72340 }, { "epoch": 2.152857334146668, "grad_norm": 0.1524481624364853, "learning_rate": 2.2384354661656154e-06, "loss": 0.0028, "step": 72350 }, { "epoch": 2.1531548956303097, "grad_norm": 0.11233357340097427, "learning_rate": 2.236992462797215e-06, "loss": 0.0026, "step": 72360 }, { "epoch": 2.1534524571139513, "grad_norm": 0.26402661204338074, "learning_rate": 2.235549790653389e-06, "loss": 0.002, "step": 72370 }, { "epoch": 2.153750018597593, "grad_norm": 0.023130711168050766, "learning_rate": 2.234107449907083e-06, "loss": 0.0008, "step": 72380 }, { "epoch": 2.1540475800812344, "grad_norm": 0.13210751116275787, "learning_rate": 2.232665440731203e-06, "loss": 0.0016, "step": 72390 }, { "epoch": 2.154345141564876, "grad_norm": 0.3731189966201782, "learning_rate": 2.2312237632986124e-06, "loss": 0.002, "step": 72400 }, { "epoch": 2.1546427030485176, "grad_norm": 0.17537762224674225, "learning_rate": 2.2297824177821374e-06, "loss": 0.0021, "step": 72410 }, { "epoch": 2.154940264532159, "grad_norm": 0.25791415572166443, "learning_rate": 2.2283414043545648e-06, "loss": 0.0031, "step": 72420 }, { "epoch": 2.1552378260158007, "grad_norm": 0.11267334222793579, "learning_rate": 2.2269007231886423e-06, "loss": 0.0018, "step": 72430 }, { "epoch": 2.155535387499442, "grad_norm": 0.20652683079242706, "learning_rate": 2.225460374457073e-06, "loss": 0.0024, "step": 72440 }, { "epoch": 2.1558329489830834, "grad_norm": 0.2021132856607437, "learning_rate": 2.224020358332525e-06, "loss": 0.0018, "step": 72450 }, { "epoch": 2.156130510466725, "grad_norm": 0.155339315533638, "learning_rate": 2.2225806749876244e-06, "loss": 0.0017, "step": 72460 }, { "epoch": 2.1564280719503666, "grad_norm": 0.49266695976257324, "learning_rate": 2.221141324594961e-06, "loss": 0.002, "step": 72470 }, { "epoch": 2.156725633434008, "grad_norm": 0.07615714520215988, "learning_rate": 2.219702307327077e-06, "loss": 0.002, "step": 72480 }, { "epoch": 2.1570231949176497, "grad_norm": 0.13280285894870758, "learning_rate": 2.2182636233564813e-06, "loss": 0.0017, "step": 72490 }, { "epoch": 2.1573207564012913, "grad_norm": 0.10597635805606842, "learning_rate": 2.2168252728556412e-06, "loss": 0.0017, "step": 72500 }, { "epoch": 2.157618317884933, "grad_norm": 0.09093216806650162, "learning_rate": 2.2153872559969853e-06, "loss": 0.0018, "step": 72510 }, { "epoch": 2.1579158793685744, "grad_norm": 0.22151882946491241, "learning_rate": 2.2139495729528963e-06, "loss": 0.0015, "step": 72520 }, { "epoch": 2.158213440852216, "grad_norm": 0.09930059313774109, "learning_rate": 2.212512223895724e-06, "loss": 0.0022, "step": 72530 }, { "epoch": 2.1585110023358576, "grad_norm": 0.03167188912630081, "learning_rate": 2.2110752089977753e-06, "loss": 0.0025, "step": 72540 }, { "epoch": 2.158808563819499, "grad_norm": 0.08434481173753738, "learning_rate": 2.209638528431319e-06, "loss": 0.0012, "step": 72550 }, { "epoch": 2.1591061253031407, "grad_norm": 0.19823111593723297, "learning_rate": 2.2082021823685766e-06, "loss": 0.0023, "step": 72560 }, { "epoch": 2.1594036867867823, "grad_norm": 0.2248901128768921, "learning_rate": 2.2067661709817384e-06, "loss": 0.0018, "step": 72570 }, { "epoch": 2.159701248270424, "grad_norm": 0.17187754809856415, "learning_rate": 2.2053304944429522e-06, "loss": 0.0015, "step": 72580 }, { "epoch": 2.1599988097540654, "grad_norm": 0.19712285697460175, "learning_rate": 2.20389515292432e-06, "loss": 0.0016, "step": 72590 }, { "epoch": 2.160296371237707, "grad_norm": 0.1602768748998642, "learning_rate": 2.202460146597912e-06, "loss": 0.0011, "step": 72600 }, { "epoch": 2.1605939327213486, "grad_norm": 0.17935006320476532, "learning_rate": 2.201025475635752e-06, "loss": 0.0018, "step": 72610 }, { "epoch": 2.16089149420499, "grad_norm": 0.019471919164061546, "learning_rate": 2.1995911402098263e-06, "loss": 0.0011, "step": 72620 }, { "epoch": 2.1611890556886317, "grad_norm": 0.12987017631530762, "learning_rate": 2.198157140492082e-06, "loss": 0.0016, "step": 72630 }, { "epoch": 2.1614866171722733, "grad_norm": 0.04966457933187485, "learning_rate": 2.1967234766544233e-06, "loss": 0.0018, "step": 72640 }, { "epoch": 2.161784178655915, "grad_norm": 0.1385808289051056, "learning_rate": 2.1952901488687172e-06, "loss": 0.0022, "step": 72650 }, { "epoch": 2.1620817401395565, "grad_norm": 0.16234154999256134, "learning_rate": 2.1938571573067854e-06, "loss": 0.0023, "step": 72660 }, { "epoch": 2.162379301623198, "grad_norm": 0.23816394805908203, "learning_rate": 2.192424502140414e-06, "loss": 0.0034, "step": 72670 }, { "epoch": 2.1626768631068396, "grad_norm": 0.36864370107650757, "learning_rate": 2.190992183541347e-06, "loss": 0.002, "step": 72680 }, { "epoch": 2.162974424590481, "grad_norm": 0.08344817906618118, "learning_rate": 2.1895602016812907e-06, "loss": 0.0028, "step": 72690 }, { "epoch": 2.1632719860741227, "grad_norm": 0.16105203330516815, "learning_rate": 2.188128556731904e-06, "loss": 0.0025, "step": 72700 }, { "epoch": 2.1635695475577643, "grad_norm": 0.01565641164779663, "learning_rate": 2.186697248864813e-06, "loss": 0.0012, "step": 72710 }, { "epoch": 2.163867109041406, "grad_norm": 0.11504616588354111, "learning_rate": 2.1852662782516e-06, "loss": 0.0016, "step": 72720 }, { "epoch": 2.1641646705250475, "grad_norm": 0.10258670151233673, "learning_rate": 2.183835645063808e-06, "loss": 0.0019, "step": 72730 }, { "epoch": 2.1644622320086886, "grad_norm": 0.016821444034576416, "learning_rate": 2.1824053494729373e-06, "loss": 0.0025, "step": 72740 }, { "epoch": 2.16475979349233, "grad_norm": 0.20789511501789093, "learning_rate": 2.1809753916504495e-06, "loss": 0.0018, "step": 72750 }, { "epoch": 2.1650573549759717, "grad_norm": 0.1783284991979599, "learning_rate": 2.1795457717677665e-06, "loss": 0.0016, "step": 72760 }, { "epoch": 2.1653549164596133, "grad_norm": 0.23147356510162354, "learning_rate": 2.178116489996269e-06, "loss": 0.0014, "step": 72770 }, { "epoch": 2.165652477943255, "grad_norm": 0.053444795310497284, "learning_rate": 2.176687546507295e-06, "loss": 0.0014, "step": 72780 }, { "epoch": 2.1659500394268965, "grad_norm": 0.12640850245952606, "learning_rate": 2.1752589414721444e-06, "loss": 0.0029, "step": 72790 }, { "epoch": 2.166247600910538, "grad_norm": 0.7859107255935669, "learning_rate": 2.173830675062076e-06, "loss": 0.0037, "step": 72800 }, { "epoch": 2.1665451623941796, "grad_norm": 0.07811474800109863, "learning_rate": 2.17240274744831e-06, "loss": 0.0013, "step": 72810 }, { "epoch": 2.166842723877821, "grad_norm": 0.17721031606197357, "learning_rate": 2.1709751588020207e-06, "loss": 0.0017, "step": 72820 }, { "epoch": 2.1671402853614627, "grad_norm": 0.26745617389678955, "learning_rate": 2.1695479092943454e-06, "loss": 0.002, "step": 72830 }, { "epoch": 2.1674378468451043, "grad_norm": 0.07614991068840027, "learning_rate": 2.168120999096382e-06, "loss": 0.0014, "step": 72840 }, { "epoch": 2.167735408328746, "grad_norm": 0.28683897852897644, "learning_rate": 2.166694428379187e-06, "loss": 0.0018, "step": 72850 }, { "epoch": 2.1680329698123875, "grad_norm": 0.2226329892873764, "learning_rate": 2.165268197313772e-06, "loss": 0.0015, "step": 72860 }, { "epoch": 2.168330531296029, "grad_norm": 0.1910264790058136, "learning_rate": 2.1638423060711117e-06, "loss": 0.0023, "step": 72870 }, { "epoch": 2.1686280927796706, "grad_norm": 0.18591511249542236, "learning_rate": 2.162416754822141e-06, "loss": 0.0026, "step": 72880 }, { "epoch": 2.168925654263312, "grad_norm": 0.16797243058681488, "learning_rate": 2.1609915437377514e-06, "loss": 0.0041, "step": 72890 }, { "epoch": 2.1692232157469538, "grad_norm": 0.09227193146944046, "learning_rate": 2.1595666729887956e-06, "loss": 0.0013, "step": 72900 }, { "epoch": 2.1695207772305953, "grad_norm": 0.1693660467863083, "learning_rate": 2.1581421427460843e-06, "loss": 0.0017, "step": 72910 }, { "epoch": 2.169818338714237, "grad_norm": 0.11777400970458984, "learning_rate": 2.1567179531803895e-06, "loss": 0.0018, "step": 72920 }, { "epoch": 2.1701159001978785, "grad_norm": 0.24848103523254395, "learning_rate": 2.155294104462437e-06, "loss": 0.0019, "step": 72930 }, { "epoch": 2.17041346168152, "grad_norm": 0.080144003033638, "learning_rate": 2.153870596762917e-06, "loss": 0.0019, "step": 72940 }, { "epoch": 2.1707110231651616, "grad_norm": 0.16296657919883728, "learning_rate": 2.1524474302524775e-06, "loss": 0.0017, "step": 72950 }, { "epoch": 2.171008584648803, "grad_norm": 0.21580535173416138, "learning_rate": 2.1510246051017266e-06, "loss": 0.0014, "step": 72960 }, { "epoch": 2.1713061461324448, "grad_norm": 0.06565450131893158, "learning_rate": 2.1496021214812266e-06, "loss": 0.0021, "step": 72970 }, { "epoch": 2.1716037076160863, "grad_norm": 0.09673257172107697, "learning_rate": 2.1481799795615044e-06, "loss": 0.0026, "step": 72980 }, { "epoch": 2.171901269099728, "grad_norm": 0.19168105721473694, "learning_rate": 2.146758179513046e-06, "loss": 0.0028, "step": 72990 }, { "epoch": 2.1721988305833695, "grad_norm": 0.006711750756949186, "learning_rate": 2.1453367215062904e-06, "loss": 0.0026, "step": 73000 }, { "epoch": 2.1724963920670106, "grad_norm": 0.3697473704814911, "learning_rate": 2.1439156057116408e-06, "loss": 0.0039, "step": 73010 }, { "epoch": 2.172793953550652, "grad_norm": 0.497135728597641, "learning_rate": 2.142494832299459e-06, "loss": 0.0015, "step": 73020 }, { "epoch": 2.1730915150342938, "grad_norm": 0.1417645961046219, "learning_rate": 2.1410744014400666e-06, "loss": 0.0026, "step": 73030 }, { "epoch": 2.1733890765179353, "grad_norm": 0.19563114643096924, "learning_rate": 2.1396543133037378e-06, "loss": 0.0025, "step": 73040 }, { "epoch": 2.173686638001577, "grad_norm": 0.20663906633853912, "learning_rate": 2.138234568060713e-06, "loss": 0.0027, "step": 73050 }, { "epoch": 2.1739841994852185, "grad_norm": 0.12382400035858154, "learning_rate": 2.1368151658811886e-06, "loss": 0.0016, "step": 73060 }, { "epoch": 2.17428176096886, "grad_norm": 0.24823765456676483, "learning_rate": 2.1353961069353217e-06, "loss": 0.0017, "step": 73070 }, { "epoch": 2.1745793224525016, "grad_norm": 0.2248261719942093, "learning_rate": 2.1339773913932237e-06, "loss": 0.0013, "step": 73080 }, { "epoch": 2.174876883936143, "grad_norm": 0.26785847544670105, "learning_rate": 2.132559019424969e-06, "loss": 0.0029, "step": 73090 }, { "epoch": 2.1751744454197848, "grad_norm": 0.11939061433076859, "learning_rate": 2.1311409912005896e-06, "loss": 0.0011, "step": 73100 }, { "epoch": 2.1754720069034263, "grad_norm": 0.26720762252807617, "learning_rate": 2.129723306890078e-06, "loss": 0.0032, "step": 73110 }, { "epoch": 2.175769568387068, "grad_norm": 0.09206350147724152, "learning_rate": 2.128305966663381e-06, "loss": 0.0028, "step": 73120 }, { "epoch": 2.1760671298707095, "grad_norm": 0.07914130389690399, "learning_rate": 2.126888970690407e-06, "loss": 0.0021, "step": 73130 }, { "epoch": 2.176364691354351, "grad_norm": 0.037551406770944595, "learning_rate": 2.125472319141025e-06, "loss": 0.0013, "step": 73140 }, { "epoch": 2.1766622528379926, "grad_norm": 0.15294666588306427, "learning_rate": 2.124056012185061e-06, "loss": 0.0034, "step": 73150 }, { "epoch": 2.176959814321634, "grad_norm": 0.09359082579612732, "learning_rate": 2.1226400499922963e-06, "loss": 0.0016, "step": 73160 }, { "epoch": 2.177257375805276, "grad_norm": 0.14488337934017181, "learning_rate": 2.121224432732476e-06, "loss": 0.0039, "step": 73170 }, { "epoch": 2.1775549372889174, "grad_norm": 0.09389737993478775, "learning_rate": 2.119809160575302e-06, "loss": 0.0013, "step": 73180 }, { "epoch": 2.177852498772559, "grad_norm": 0.12035888433456421, "learning_rate": 2.1183942336904344e-06, "loss": 0.0071, "step": 73190 }, { "epoch": 2.1781500602562005, "grad_norm": 0.30501240491867065, "learning_rate": 2.116979652247492e-06, "loss": 0.0022, "step": 73200 }, { "epoch": 2.178447621739842, "grad_norm": 0.06366831809282303, "learning_rate": 2.1155654164160533e-06, "loss": 0.0016, "step": 73210 }, { "epoch": 2.1787451832234837, "grad_norm": 0.07159434258937836, "learning_rate": 2.114151526365655e-06, "loss": 0.0009, "step": 73220 }, { "epoch": 2.1790427447071252, "grad_norm": 0.07032366096973419, "learning_rate": 2.112737982265789e-06, "loss": 0.0017, "step": 73230 }, { "epoch": 2.179340306190767, "grad_norm": 0.22914889454841614, "learning_rate": 2.1113247842859097e-06, "loss": 0.0023, "step": 73240 }, { "epoch": 2.1796378676744084, "grad_norm": 0.15336447954177856, "learning_rate": 2.10991193259543e-06, "loss": 0.0019, "step": 73250 }, { "epoch": 2.17993542915805, "grad_norm": 0.09380238503217697, "learning_rate": 2.1084994273637206e-06, "loss": 0.0015, "step": 73260 }, { "epoch": 2.1802329906416915, "grad_norm": 0.08355842530727386, "learning_rate": 2.1070872687601067e-06, "loss": 0.0034, "step": 73270 }, { "epoch": 2.180530552125333, "grad_norm": 0.06923265010118484, "learning_rate": 2.1056754569538785e-06, "loss": 0.002, "step": 73280 }, { "epoch": 2.1808281136089747, "grad_norm": 0.013179504312574863, "learning_rate": 2.1042639921142806e-06, "loss": 0.0015, "step": 73290 }, { "epoch": 2.1811256750926162, "grad_norm": 0.23049449920654297, "learning_rate": 2.1028528744105185e-06, "loss": 0.0021, "step": 73300 }, { "epoch": 2.1814232365762574, "grad_norm": 0.2140941619873047, "learning_rate": 2.1014421040117523e-06, "loss": 0.0025, "step": 73310 }, { "epoch": 2.181720798059899, "grad_norm": 0.12161050736904144, "learning_rate": 2.1000316810871028e-06, "loss": 0.0015, "step": 73320 }, { "epoch": 2.1820183595435405, "grad_norm": 0.08292955160140991, "learning_rate": 2.0986216058056503e-06, "loss": 0.0016, "step": 73330 }, { "epoch": 2.182315921027182, "grad_norm": 0.19780440628528595, "learning_rate": 2.097211878336433e-06, "loss": 0.0013, "step": 73340 }, { "epoch": 2.1826134825108237, "grad_norm": 0.04934258386492729, "learning_rate": 2.095802498848444e-06, "loss": 0.0023, "step": 73350 }, { "epoch": 2.1829110439944652, "grad_norm": 0.43420687317848206, "learning_rate": 2.094393467510638e-06, "loss": 0.0014, "step": 73360 }, { "epoch": 2.183208605478107, "grad_norm": 0.060120489448308945, "learning_rate": 2.0929847844919305e-06, "loss": 0.001, "step": 73370 }, { "epoch": 2.1835061669617484, "grad_norm": 0.022151503711938858, "learning_rate": 2.091576449961187e-06, "loss": 0.0022, "step": 73380 }, { "epoch": 2.18380372844539, "grad_norm": 0.1206122562289238, "learning_rate": 2.0901684640872388e-06, "loss": 0.0018, "step": 73390 }, { "epoch": 2.1841012899290315, "grad_norm": 0.4353398084640503, "learning_rate": 2.0887608270388726e-06, "loss": 0.0022, "step": 73400 }, { "epoch": 2.184398851412673, "grad_norm": 0.06342300772666931, "learning_rate": 2.0873535389848354e-06, "loss": 0.0016, "step": 73410 }, { "epoch": 2.1846964128963147, "grad_norm": 0.23230275511741638, "learning_rate": 2.0859466000938265e-06, "loss": 0.0025, "step": 73420 }, { "epoch": 2.1849939743799562, "grad_norm": 0.19985152781009674, "learning_rate": 2.0845400105345093e-06, "loss": 0.003, "step": 73430 }, { "epoch": 2.185291535863598, "grad_norm": 0.11675234138965607, "learning_rate": 2.083133770475504e-06, "loss": 0.0014, "step": 73440 }, { "epoch": 2.1855890973472394, "grad_norm": 0.1784067153930664, "learning_rate": 2.0817278800853884e-06, "loss": 0.0014, "step": 73450 }, { "epoch": 2.185886658830881, "grad_norm": 0.27499911189079285, "learning_rate": 2.080322339532695e-06, "loss": 0.0015, "step": 73460 }, { "epoch": 2.1861842203145225, "grad_norm": 0.14275650680065155, "learning_rate": 2.078917148985921e-06, "loss": 0.0015, "step": 73470 }, { "epoch": 2.186481781798164, "grad_norm": 0.17167815566062927, "learning_rate": 2.077512308613519e-06, "loss": 0.0008, "step": 73480 }, { "epoch": 2.1867793432818057, "grad_norm": 0.046177834272384644, "learning_rate": 2.0761078185838955e-06, "loss": 0.0022, "step": 73490 }, { "epoch": 2.1870769047654472, "grad_norm": 0.016918424516916275, "learning_rate": 2.0747036790654203e-06, "loss": 0.0014, "step": 73500 }, { "epoch": 2.187374466249089, "grad_norm": 0.005027801729738712, "learning_rate": 2.0732998902264186e-06, "loss": 0.002, "step": 73510 }, { "epoch": 2.1876720277327304, "grad_norm": 0.17946085333824158, "learning_rate": 2.071896452235176e-06, "loss": 0.0026, "step": 73520 }, { "epoch": 2.187969589216372, "grad_norm": 0.13196945190429688, "learning_rate": 2.0704933652599306e-06, "loss": 0.0013, "step": 73530 }, { "epoch": 2.1882671507000135, "grad_norm": 0.19927629828453064, "learning_rate": 2.069090629468884e-06, "loss": 0.002, "step": 73540 }, { "epoch": 2.188564712183655, "grad_norm": 0.22878073155879974, "learning_rate": 2.0676882450301943e-06, "loss": 0.0038, "step": 73550 }, { "epoch": 2.1888622736672967, "grad_norm": 0.2593282461166382, "learning_rate": 2.066286212111978e-06, "loss": 0.0023, "step": 73560 }, { "epoch": 2.1891598351509383, "grad_norm": 0.06813109666109085, "learning_rate": 2.064884530882305e-06, "loss": 0.0019, "step": 73570 }, { "epoch": 2.1894573966345794, "grad_norm": 0.26694855093955994, "learning_rate": 2.063483201509209e-06, "loss": 0.0032, "step": 73580 }, { "epoch": 2.189754958118221, "grad_norm": 0.16622482240200043, "learning_rate": 2.0620822241606775e-06, "loss": 0.0017, "step": 73590 }, { "epoch": 2.1900525196018625, "grad_norm": 0.10289173573255539, "learning_rate": 2.06068159900466e-06, "loss": 0.0026, "step": 73600 }, { "epoch": 2.190350081085504, "grad_norm": 0.16035036742687225, "learning_rate": 2.0592813262090568e-06, "loss": 0.0018, "step": 73610 }, { "epoch": 2.1906476425691457, "grad_norm": 0.12708206474781036, "learning_rate": 2.0578814059417325e-06, "loss": 0.0029, "step": 73620 }, { "epoch": 2.1909452040527873, "grad_norm": 0.5443694591522217, "learning_rate": 2.056481838370507e-06, "loss": 0.0023, "step": 73630 }, { "epoch": 2.191242765536429, "grad_norm": 0.2627047300338745, "learning_rate": 2.05508262366316e-06, "loss": 0.002, "step": 73640 }, { "epoch": 2.1915403270200704, "grad_norm": 0.14532621204853058, "learning_rate": 2.0536837619874234e-06, "loss": 0.0015, "step": 73650 }, { "epoch": 2.191837888503712, "grad_norm": 0.14612224698066711, "learning_rate": 2.0522852535109917e-06, "loss": 0.0024, "step": 73660 }, { "epoch": 2.1921354499873535, "grad_norm": 0.13270443677902222, "learning_rate": 2.0508870984015158e-06, "loss": 0.0022, "step": 73670 }, { "epoch": 2.192433011470995, "grad_norm": 0.06088544800877571, "learning_rate": 2.0494892968266056e-06, "loss": 0.0009, "step": 73680 }, { "epoch": 2.1927305729546367, "grad_norm": 0.23428234457969666, "learning_rate": 2.0480918489538243e-06, "loss": 0.0029, "step": 73690 }, { "epoch": 2.1930281344382783, "grad_norm": 0.22971811890602112, "learning_rate": 2.046694754950697e-06, "loss": 0.0029, "step": 73700 }, { "epoch": 2.19332569592192, "grad_norm": 0.04197489470243454, "learning_rate": 2.0452980149847066e-06, "loss": 0.0029, "step": 73710 }, { "epoch": 2.1936232574055614, "grad_norm": 0.22228386998176575, "learning_rate": 2.043901629223289e-06, "loss": 0.0014, "step": 73720 }, { "epoch": 2.193920818889203, "grad_norm": 0.05469539389014244, "learning_rate": 2.042505597833841e-06, "loss": 0.0012, "step": 73730 }, { "epoch": 2.1942183803728446, "grad_norm": 0.039561428129673004, "learning_rate": 2.0411099209837155e-06, "loss": 0.0023, "step": 73740 }, { "epoch": 2.194515941856486, "grad_norm": 0.2475365251302719, "learning_rate": 2.039714598840229e-06, "loss": 0.0032, "step": 73750 }, { "epoch": 2.1948135033401277, "grad_norm": 0.17504450678825378, "learning_rate": 2.038319631570645e-06, "loss": 0.0023, "step": 73760 }, { "epoch": 2.1951110648237693, "grad_norm": 0.24224665760993958, "learning_rate": 2.0369250193421917e-06, "loss": 0.0015, "step": 73770 }, { "epoch": 2.195408626307411, "grad_norm": 0.10305789113044739, "learning_rate": 2.0355307623220544e-06, "loss": 0.0013, "step": 73780 }, { "epoch": 2.1957061877910524, "grad_norm": 0.3425736725330353, "learning_rate": 2.0341368606773706e-06, "loss": 0.0026, "step": 73790 }, { "epoch": 2.196003749274694, "grad_norm": 0.267366886138916, "learning_rate": 2.0327433145752406e-06, "loss": 0.0034, "step": 73800 }, { "epoch": 2.1963013107583356, "grad_norm": 0.0741255134344101, "learning_rate": 2.0313501241827203e-06, "loss": 0.0017, "step": 73810 }, { "epoch": 2.196598872241977, "grad_norm": 0.15970994532108307, "learning_rate": 2.029957289666825e-06, "loss": 0.0012, "step": 73820 }, { "epoch": 2.1968964337256187, "grad_norm": 0.05123691260814667, "learning_rate": 2.0285648111945223e-06, "loss": 0.0031, "step": 73830 }, { "epoch": 2.1971939952092603, "grad_norm": 0.1945321261882782, "learning_rate": 2.0271726889327403e-06, "loss": 0.0035, "step": 73840 }, { "epoch": 2.197491556692902, "grad_norm": 0.09515146166086197, "learning_rate": 2.025780923048365e-06, "loss": 0.0016, "step": 73850 }, { "epoch": 2.1977891181765434, "grad_norm": 0.08135776966810226, "learning_rate": 2.0243895137082407e-06, "loss": 0.0014, "step": 73860 }, { "epoch": 2.198086679660185, "grad_norm": 0.2278532087802887, "learning_rate": 2.0229984610791643e-06, "loss": 0.0015, "step": 73870 }, { "epoch": 2.1983842411438266, "grad_norm": 0.1473880559206009, "learning_rate": 2.0216077653278936e-06, "loss": 0.0019, "step": 73880 }, { "epoch": 2.1986818026274677, "grad_norm": 0.0735311433672905, "learning_rate": 2.0202174266211433e-06, "loss": 0.0026, "step": 73890 }, { "epoch": 2.1989793641111093, "grad_norm": 0.354280948638916, "learning_rate": 2.0188274451255857e-06, "loss": 0.0021, "step": 73900 }, { "epoch": 2.199276925594751, "grad_norm": 0.09440688043832779, "learning_rate": 2.017437821007847e-06, "loss": 0.0011, "step": 73910 }, { "epoch": 2.1995744870783924, "grad_norm": 0.12455067038536072, "learning_rate": 2.0160485544345143e-06, "loss": 0.0017, "step": 73920 }, { "epoch": 2.199872048562034, "grad_norm": 0.11231030523777008, "learning_rate": 2.0146596455721305e-06, "loss": 0.0025, "step": 73930 }, { "epoch": 2.2001696100456756, "grad_norm": 0.24854671955108643, "learning_rate": 2.013271094587197e-06, "loss": 0.0023, "step": 73940 }, { "epoch": 2.200467171529317, "grad_norm": 0.2303117960691452, "learning_rate": 2.0118829016461683e-06, "loss": 0.0026, "step": 73950 }, { "epoch": 2.2007647330129587, "grad_norm": 0.17066551744937897, "learning_rate": 2.010495066915459e-06, "loss": 0.0019, "step": 73960 }, { "epoch": 2.2010622944966003, "grad_norm": 0.11271560937166214, "learning_rate": 2.009107590561441e-06, "loss": 0.0022, "step": 73970 }, { "epoch": 2.201359855980242, "grad_norm": 0.23930275440216064, "learning_rate": 2.0077204727504445e-06, "loss": 0.0018, "step": 73980 }, { "epoch": 2.2016574174638834, "grad_norm": 0.11134964972734451, "learning_rate": 2.0063337136487514e-06, "loss": 0.0022, "step": 73990 }, { "epoch": 2.201954978947525, "grad_norm": 0.42012643814086914, "learning_rate": 2.0049473134226056e-06, "loss": 0.0015, "step": 74000 }, { "epoch": 2.2022525404311666, "grad_norm": 0.25862768292427063, "learning_rate": 2.0035612722382054e-06, "loss": 0.0027, "step": 74010 }, { "epoch": 2.202550101914808, "grad_norm": 0.0813271552324295, "learning_rate": 2.00217559026171e-06, "loss": 0.001, "step": 74020 }, { "epoch": 2.2028476633984497, "grad_norm": 0.07284726947546005, "learning_rate": 2.000790267659227e-06, "loss": 0.0014, "step": 74030 }, { "epoch": 2.2031452248820913, "grad_norm": 0.2716369926929474, "learning_rate": 1.999405304596832e-06, "loss": 0.0022, "step": 74040 }, { "epoch": 2.203442786365733, "grad_norm": 0.22842472791671753, "learning_rate": 1.998020701240551e-06, "loss": 0.0019, "step": 74050 }, { "epoch": 2.2037403478493744, "grad_norm": 0.11145670711994171, "learning_rate": 1.9966364577563658e-06, "loss": 0.0029, "step": 74060 }, { "epoch": 2.204037909333016, "grad_norm": 0.40544334053993225, "learning_rate": 1.9952525743102177e-06, "loss": 0.0035, "step": 74070 }, { "epoch": 2.2043354708166576, "grad_norm": 0.059241678565740585, "learning_rate": 1.993869051068004e-06, "loss": 0.0026, "step": 74080 }, { "epoch": 2.204633032300299, "grad_norm": 0.20241227746009827, "learning_rate": 1.992485888195582e-06, "loss": 0.0018, "step": 74090 }, { "epoch": 2.2049305937839407, "grad_norm": 0.1987234354019165, "learning_rate": 1.991103085858759e-06, "loss": 0.0048, "step": 74100 }, { "epoch": 2.2052281552675823, "grad_norm": 0.16146627068519592, "learning_rate": 1.9897206442233043e-06, "loss": 0.0017, "step": 74110 }, { "epoch": 2.205525716751224, "grad_norm": 0.003556240815669298, "learning_rate": 1.9883385634549447e-06, "loss": 0.0013, "step": 74120 }, { "epoch": 2.2058232782348655, "grad_norm": 0.06987028568983078, "learning_rate": 1.986956843719358e-06, "loss": 0.0023, "step": 74130 }, { "epoch": 2.206120839718507, "grad_norm": 0.18013864755630493, "learning_rate": 1.985575485182184e-06, "loss": 0.0018, "step": 74140 }, { "epoch": 2.206418401202148, "grad_norm": 0.10515886545181274, "learning_rate": 1.984194488009018e-06, "loss": 0.0015, "step": 74150 }, { "epoch": 2.2067159626857897, "grad_norm": 0.19766968488693237, "learning_rate": 1.982813852365413e-06, "loss": 0.0013, "step": 74160 }, { "epoch": 2.2070135241694313, "grad_norm": 0.07473185658454895, "learning_rate": 1.981433578416874e-06, "loss": 0.0012, "step": 74170 }, { "epoch": 2.207311085653073, "grad_norm": 0.12669312953948975, "learning_rate": 1.980053666328867e-06, "loss": 0.0031, "step": 74180 }, { "epoch": 2.2076086471367145, "grad_norm": 0.030141374096274376, "learning_rate": 1.9786741162668144e-06, "loss": 0.0021, "step": 74190 }, { "epoch": 2.207906208620356, "grad_norm": 0.2580394148826599, "learning_rate": 1.977294928396095e-06, "loss": 0.0024, "step": 74200 }, { "epoch": 2.2082037701039976, "grad_norm": 0.19976504147052765, "learning_rate": 1.9759161028820406e-06, "loss": 0.0015, "step": 74210 }, { "epoch": 2.208501331587639, "grad_norm": 0.12399730831384659, "learning_rate": 1.9745376398899442e-06, "loss": 0.002, "step": 74220 }, { "epoch": 2.2087988930712807, "grad_norm": 0.1506022810935974, "learning_rate": 1.973159539585053e-06, "loss": 0.0015, "step": 74230 }, { "epoch": 2.2090964545549223, "grad_norm": 0.14298109710216522, "learning_rate": 1.971781802132574e-06, "loss": 0.0019, "step": 74240 }, { "epoch": 2.209394016038564, "grad_norm": 0.062069982290267944, "learning_rate": 1.970404427697664e-06, "loss": 0.0019, "step": 74250 }, { "epoch": 2.2096915775222055, "grad_norm": 0.021016521379351616, "learning_rate": 1.9690274164454415e-06, "loss": 0.0023, "step": 74260 }, { "epoch": 2.209989139005847, "grad_norm": 0.18085111677646637, "learning_rate": 1.967650768540981e-06, "loss": 0.0029, "step": 74270 }, { "epoch": 2.2102867004894886, "grad_norm": 0.11128978431224823, "learning_rate": 1.966274484149314e-06, "loss": 0.0027, "step": 74280 }, { "epoch": 2.21058426197313, "grad_norm": 0.18276862800121307, "learning_rate": 1.9648985634354232e-06, "loss": 0.0022, "step": 74290 }, { "epoch": 2.2108818234567718, "grad_norm": 0.25690793991088867, "learning_rate": 1.9635230065642545e-06, "loss": 0.0017, "step": 74300 }, { "epoch": 2.2111793849404133, "grad_norm": 0.11193709820508957, "learning_rate": 1.9621478137007067e-06, "loss": 0.0021, "step": 74310 }, { "epoch": 2.211476946424055, "grad_norm": 0.05730084329843521, "learning_rate": 1.960772985009635e-06, "loss": 0.0023, "step": 74320 }, { "epoch": 2.2117745079076965, "grad_norm": 0.02722768858075142, "learning_rate": 1.9593985206558525e-06, "loss": 0.0016, "step": 74330 }, { "epoch": 2.212072069391338, "grad_norm": 0.11118297278881073, "learning_rate": 1.9580244208041267e-06, "loss": 0.0025, "step": 74340 }, { "epoch": 2.2123696308749796, "grad_norm": 0.1339164823293686, "learning_rate": 1.956650685619185e-06, "loss": 0.0026, "step": 74350 }, { "epoch": 2.212667192358621, "grad_norm": 0.14270587265491486, "learning_rate": 1.9552773152657034e-06, "loss": 0.0025, "step": 74360 }, { "epoch": 2.2129647538422628, "grad_norm": 0.01443462073802948, "learning_rate": 1.953904309908323e-06, "loss": 0.001, "step": 74370 }, { "epoch": 2.2132623153259043, "grad_norm": 0.14953769743442535, "learning_rate": 1.9525316697116355e-06, "loss": 0.002, "step": 74380 }, { "epoch": 2.213559876809546, "grad_norm": 0.05097312480211258, "learning_rate": 1.951159394840193e-06, "loss": 0.0021, "step": 74390 }, { "epoch": 2.2138574382931875, "grad_norm": 0.0431298203766346, "learning_rate": 1.949787485458498e-06, "loss": 0.0018, "step": 74400 }, { "epoch": 2.214154999776829, "grad_norm": 0.13141106069087982, "learning_rate": 1.948415941731015e-06, "loss": 0.0022, "step": 74410 }, { "epoch": 2.2144525612604706, "grad_norm": 0.09552007913589478, "learning_rate": 1.947044763822161e-06, "loss": 0.0021, "step": 74420 }, { "epoch": 2.214750122744112, "grad_norm": 0.057840850204229355, "learning_rate": 1.9456739518963132e-06, "loss": 0.0014, "step": 74430 }, { "epoch": 2.2150476842277538, "grad_norm": 0.1409980207681656, "learning_rate": 1.9443035061177985e-06, "loss": 0.0022, "step": 74440 }, { "epoch": 2.2153452457113953, "grad_norm": 0.05597146600484848, "learning_rate": 1.942933426650905e-06, "loss": 0.002, "step": 74450 }, { "epoch": 2.2156428071950365, "grad_norm": 0.1969568282365799, "learning_rate": 1.9415637136598757e-06, "loss": 0.0027, "step": 74460 }, { "epoch": 2.215940368678678, "grad_norm": 0.26814407110214233, "learning_rate": 1.9401943673089113e-06, "loss": 0.0015, "step": 74470 }, { "epoch": 2.2162379301623196, "grad_norm": 0.11754309386014938, "learning_rate": 1.9388253877621626e-06, "loss": 0.0012, "step": 74480 }, { "epoch": 2.216535491645961, "grad_norm": 0.2761748731136322, "learning_rate": 1.937456775183743e-06, "loss": 0.0018, "step": 74490 }, { "epoch": 2.2168330531296028, "grad_norm": 0.08687826991081238, "learning_rate": 1.93608852973772e-06, "loss": 0.0012, "step": 74500 }, { "epoch": 2.2171306146132443, "grad_norm": 0.19010527431964874, "learning_rate": 1.9347206515881152e-06, "loss": 0.0017, "step": 74510 }, { "epoch": 2.217428176096886, "grad_norm": 0.2650545537471771, "learning_rate": 1.933353140898907e-06, "loss": 0.0022, "step": 74520 }, { "epoch": 2.2177257375805275, "grad_norm": 0.22505535185337067, "learning_rate": 1.9319859978340312e-06, "loss": 0.0018, "step": 74530 }, { "epoch": 2.218023299064169, "grad_norm": 0.24341613054275513, "learning_rate": 1.93061922255738e-06, "loss": 0.0015, "step": 74540 }, { "epoch": 2.2183208605478106, "grad_norm": 0.10893918573856354, "learning_rate": 1.9292528152327968e-06, "loss": 0.0021, "step": 74550 }, { "epoch": 2.218618422031452, "grad_norm": 0.34550026059150696, "learning_rate": 1.927886776024086e-06, "loss": 0.0027, "step": 74560 }, { "epoch": 2.218915983515094, "grad_norm": 0.49780523777008057, "learning_rate": 1.9265211050950057e-06, "loss": 0.0034, "step": 74570 }, { "epoch": 2.2192135449987354, "grad_norm": 0.08730453252792358, "learning_rate": 1.925155802609272e-06, "loss": 0.0017, "step": 74580 }, { "epoch": 2.219511106482377, "grad_norm": 0.07161729782819748, "learning_rate": 1.9237908687305503e-06, "loss": 0.0017, "step": 74590 }, { "epoch": 2.2198086679660185, "grad_norm": 0.17729800939559937, "learning_rate": 1.9224263036224704e-06, "loss": 0.0023, "step": 74600 }, { "epoch": 2.22010622944966, "grad_norm": 0.16356562077999115, "learning_rate": 1.9210621074486157e-06, "loss": 0.0025, "step": 74610 }, { "epoch": 2.2204037909333016, "grad_norm": 0.18735243380069733, "learning_rate": 1.919698280372519e-06, "loss": 0.0024, "step": 74620 }, { "epoch": 2.220701352416943, "grad_norm": 0.11182649433612823, "learning_rate": 1.9183348225576757e-06, "loss": 0.0015, "step": 74630 }, { "epoch": 2.220998913900585, "grad_norm": 0.09722056239843369, "learning_rate": 1.9169717341675353e-06, "loss": 0.0028, "step": 74640 }, { "epoch": 2.2212964753842264, "grad_norm": 0.07003509998321533, "learning_rate": 1.915609015365503e-06, "loss": 0.0013, "step": 74650 }, { "epoch": 2.221594036867868, "grad_norm": 0.08169811218976974, "learning_rate": 1.914246666314937e-06, "loss": 0.0011, "step": 74660 }, { "epoch": 2.2218915983515095, "grad_norm": 0.11252117156982422, "learning_rate": 1.9128846871791534e-06, "loss": 0.0017, "step": 74670 }, { "epoch": 2.222189159835151, "grad_norm": 0.11360890418291092, "learning_rate": 1.9115230781214255e-06, "loss": 0.0015, "step": 74680 }, { "epoch": 2.2224867213187927, "grad_norm": 0.05265529826283455, "learning_rate": 1.910161839304982e-06, "loss": 0.0022, "step": 74690 }, { "epoch": 2.2227842828024342, "grad_norm": 0.1241556853055954, "learning_rate": 1.908800970893002e-06, "loss": 0.0028, "step": 74700 }, { "epoch": 2.223081844286076, "grad_norm": 0.09973372519016266, "learning_rate": 1.9074404730486264e-06, "loss": 0.0039, "step": 74710 }, { "epoch": 2.223379405769717, "grad_norm": 0.1705745905637741, "learning_rate": 1.9060803459349486e-06, "loss": 0.0024, "step": 74720 }, { "epoch": 2.2236769672533585, "grad_norm": 0.26856154203414917, "learning_rate": 1.9047205897150205e-06, "loss": 0.0035, "step": 74730 }, { "epoch": 2.223974528737, "grad_norm": 0.05403034761548042, "learning_rate": 1.903361204551844e-06, "loss": 0.0024, "step": 74740 }, { "epoch": 2.2242720902206417, "grad_norm": 0.02798468805849552, "learning_rate": 1.9020021906083813e-06, "loss": 0.0024, "step": 74750 }, { "epoch": 2.2245696517042832, "grad_norm": 0.11443773657083511, "learning_rate": 1.9006435480475483e-06, "loss": 0.0019, "step": 74760 }, { "epoch": 2.224867213187925, "grad_norm": 0.14011318981647491, "learning_rate": 1.899285277032219e-06, "loss": 0.0031, "step": 74770 }, { "epoch": 2.2251647746715664, "grad_norm": 0.13119202852249146, "learning_rate": 1.8979273777252167e-06, "loss": 0.0021, "step": 74780 }, { "epoch": 2.225462336155208, "grad_norm": 0.14571134746074677, "learning_rate": 1.8965698502893264e-06, "loss": 0.0012, "step": 74790 }, { "epoch": 2.2257598976388495, "grad_norm": 0.12506967782974243, "learning_rate": 1.8952126948872856e-06, "loss": 0.0032, "step": 74800 }, { "epoch": 2.226057459122491, "grad_norm": 0.15283630788326263, "learning_rate": 1.8938559116817896e-06, "loss": 0.0019, "step": 74810 }, { "epoch": 2.2263550206061327, "grad_norm": 0.10805390775203705, "learning_rate": 1.8924995008354834e-06, "loss": 0.0014, "step": 74820 }, { "epoch": 2.2266525820897742, "grad_norm": 0.16228343546390533, "learning_rate": 1.8911434625109738e-06, "loss": 0.0014, "step": 74830 }, { "epoch": 2.226950143573416, "grad_norm": 0.08313263207674026, "learning_rate": 1.88978779687082e-06, "loss": 0.0015, "step": 74840 }, { "epoch": 2.2272477050570574, "grad_norm": 0.09963475912809372, "learning_rate": 1.888432504077538e-06, "loss": 0.0015, "step": 74850 }, { "epoch": 2.227545266540699, "grad_norm": 0.06807316839694977, "learning_rate": 1.887077584293595e-06, "loss": 0.0022, "step": 74860 }, { "epoch": 2.2278428280243405, "grad_norm": 0.126937597990036, "learning_rate": 1.8857230376814183e-06, "loss": 0.0019, "step": 74870 }, { "epoch": 2.228140389507982, "grad_norm": 0.22179758548736572, "learning_rate": 1.8843688644033887e-06, "loss": 0.0021, "step": 74880 }, { "epoch": 2.2284379509916237, "grad_norm": 0.06459198147058487, "learning_rate": 1.8830150646218414e-06, "loss": 0.0015, "step": 74890 }, { "epoch": 2.2287355124752652, "grad_norm": 0.1994173377752304, "learning_rate": 1.8816616384990683e-06, "loss": 0.0028, "step": 74900 }, { "epoch": 2.229033073958907, "grad_norm": 0.06650221347808838, "learning_rate": 1.880308586197317e-06, "loss": 0.0022, "step": 74910 }, { "epoch": 2.2293306354425484, "grad_norm": 0.05336726829409599, "learning_rate": 1.878955907878786e-06, "loss": 0.0012, "step": 74920 }, { "epoch": 2.22962819692619, "grad_norm": 0.11614072322845459, "learning_rate": 1.8776036037056338e-06, "loss": 0.0018, "step": 74930 }, { "epoch": 2.2299257584098315, "grad_norm": 0.11232282221317291, "learning_rate": 1.8762516738399721e-06, "loss": 0.0019, "step": 74940 }, { "epoch": 2.230223319893473, "grad_norm": 0.20578870177268982, "learning_rate": 1.8749001184438697e-06, "loss": 0.0025, "step": 74950 }, { "epoch": 2.2305208813771147, "grad_norm": 0.21478943526744843, "learning_rate": 1.8735489376793453e-06, "loss": 0.0024, "step": 74960 }, { "epoch": 2.2308184428607563, "grad_norm": 0.18928857147693634, "learning_rate": 1.8721981317083776e-06, "loss": 0.0011, "step": 74970 }, { "epoch": 2.231116004344398, "grad_norm": 0.14404189586639404, "learning_rate": 1.8708477006928994e-06, "loss": 0.0019, "step": 74980 }, { "epoch": 2.2314135658280394, "grad_norm": 0.251014769077301, "learning_rate": 1.8694976447947994e-06, "loss": 0.0025, "step": 74990 }, { "epoch": 2.231711127311681, "grad_norm": 0.03555477038025856, "learning_rate": 1.8681479641759165e-06, "loss": 0.0016, "step": 75000 }, { "epoch": 2.231711127311681, "eval_loss": 0.002681402489542961, "eval_runtime": 3.9202, "eval_samples_per_second": 51.018, "eval_steps_per_second": 12.755, "step": 75000 }, { "epoch": 2.2320086887953225, "grad_norm": 0.08968699723482132, "learning_rate": 1.8667986589980503e-06, "loss": 0.0136, "step": 75010 }, { "epoch": 2.232306250278964, "grad_norm": 0.20943287014961243, "learning_rate": 1.8654497294229524e-06, "loss": 0.0034, "step": 75020 }, { "epoch": 2.2326038117626053, "grad_norm": 0.0964532345533371, "learning_rate": 1.8641011756123318e-06, "loss": 0.0032, "step": 75030 }, { "epoch": 2.232901373246247, "grad_norm": 0.19685350358486176, "learning_rate": 1.8627529977278485e-06, "loss": 0.0019, "step": 75040 }, { "epoch": 2.2331989347298884, "grad_norm": 0.19598443806171417, "learning_rate": 1.8614051959311202e-06, "loss": 0.0027, "step": 75050 }, { "epoch": 2.23349649621353, "grad_norm": 0.16643451154232025, "learning_rate": 1.8600577703837202e-06, "loss": 0.0012, "step": 75060 }, { "epoch": 2.2337940576971715, "grad_norm": 0.06236805394291878, "learning_rate": 1.8587107212471766e-06, "loss": 0.0015, "step": 75070 }, { "epoch": 2.234091619180813, "grad_norm": 0.15104836225509644, "learning_rate": 1.8573640486829676e-06, "loss": 0.0015, "step": 75080 }, { "epoch": 2.2343891806644547, "grad_norm": 0.017934806644916534, "learning_rate": 1.8560177528525324e-06, "loss": 0.0015, "step": 75090 }, { "epoch": 2.2346867421480963, "grad_norm": 0.09617044776678085, "learning_rate": 1.854671833917262e-06, "loss": 0.0018, "step": 75100 }, { "epoch": 2.234984303631738, "grad_norm": 0.4231770932674408, "learning_rate": 1.8533262920385053e-06, "loss": 0.0022, "step": 75110 }, { "epoch": 2.2352818651153794, "grad_norm": 0.33803680539131165, "learning_rate": 1.851981127377559e-06, "loss": 0.0029, "step": 75120 }, { "epoch": 2.235579426599021, "grad_norm": 0.1365022510290146, "learning_rate": 1.8506363400956823e-06, "loss": 0.0025, "step": 75130 }, { "epoch": 2.2358769880826626, "grad_norm": 0.15926645696163177, "learning_rate": 1.8492919303540845e-06, "loss": 0.0022, "step": 75140 }, { "epoch": 2.236174549566304, "grad_norm": 0.15677042305469513, "learning_rate": 1.8479478983139337e-06, "loss": 0.0014, "step": 75150 }, { "epoch": 2.2364721110499457, "grad_norm": 0.2309095710515976, "learning_rate": 1.8466042441363447e-06, "loss": 0.002, "step": 75160 }, { "epoch": 2.2367696725335873, "grad_norm": 0.19837692379951477, "learning_rate": 1.845260967982398e-06, "loss": 0.0029, "step": 75170 }, { "epoch": 2.237067234017229, "grad_norm": 0.15329545736312866, "learning_rate": 1.8439180700131226e-06, "loss": 0.0021, "step": 75180 }, { "epoch": 2.2373647955008704, "grad_norm": 0.08755836635828018, "learning_rate": 1.8425755503894998e-06, "loss": 0.0027, "step": 75190 }, { "epoch": 2.237662356984512, "grad_norm": 0.11956997215747833, "learning_rate": 1.8412334092724704e-06, "loss": 0.0015, "step": 75200 }, { "epoch": 2.2379599184681536, "grad_norm": 0.03581898659467697, "learning_rate": 1.8398916468229272e-06, "loss": 0.0017, "step": 75210 }, { "epoch": 2.238257479951795, "grad_norm": 0.09706024825572968, "learning_rate": 1.838550263201721e-06, "loss": 0.0017, "step": 75220 }, { "epoch": 2.2385550414354367, "grad_norm": 0.08221906423568726, "learning_rate": 1.8372092585696505e-06, "loss": 0.0023, "step": 75230 }, { "epoch": 2.2388526029190783, "grad_norm": 0.06034356728196144, "learning_rate": 1.8358686330874747e-06, "loss": 0.0015, "step": 75240 }, { "epoch": 2.23915016440272, "grad_norm": 0.05830631032586098, "learning_rate": 1.8345283869159059e-06, "loss": 0.0021, "step": 75250 }, { "epoch": 2.2394477258863614, "grad_norm": 0.06054285541176796, "learning_rate": 1.833188520215612e-06, "loss": 0.002, "step": 75260 }, { "epoch": 2.239745287370003, "grad_norm": 0.1027204617857933, "learning_rate": 1.8318490331472106e-06, "loss": 0.0022, "step": 75270 }, { "epoch": 2.2400428488536446, "grad_norm": 0.02272678352892399, "learning_rate": 1.8305099258712782e-06, "loss": 0.0013, "step": 75280 }, { "epoch": 2.240340410337286, "grad_norm": 0.1129058375954628, "learning_rate": 1.8291711985483474e-06, "loss": 0.0012, "step": 75290 }, { "epoch": 2.2406379718209273, "grad_norm": 0.04335472360253334, "learning_rate": 1.8278328513388982e-06, "loss": 0.0016, "step": 75300 }, { "epoch": 2.240935533304569, "grad_norm": 0.19915582239627838, "learning_rate": 1.826494884403372e-06, "loss": 0.0013, "step": 75310 }, { "epoch": 2.2412330947882104, "grad_norm": 0.2120581418275833, "learning_rate": 1.825157297902161e-06, "loss": 0.0018, "step": 75320 }, { "epoch": 2.241530656271852, "grad_norm": 0.2623820900917053, "learning_rate": 1.823820091995615e-06, "loss": 0.0026, "step": 75330 }, { "epoch": 2.2418282177554936, "grad_norm": 0.24031366407871246, "learning_rate": 1.8224832668440323e-06, "loss": 0.0024, "step": 75340 }, { "epoch": 2.242125779239135, "grad_norm": 0.11579011380672455, "learning_rate": 1.8211468226076712e-06, "loss": 0.0026, "step": 75350 }, { "epoch": 2.2424233407227767, "grad_norm": 0.08314807713031769, "learning_rate": 1.8198107594467418e-06, "loss": 0.0017, "step": 75360 }, { "epoch": 2.2427209022064183, "grad_norm": 0.2793198823928833, "learning_rate": 1.818475077521411e-06, "loss": 0.0032, "step": 75370 }, { "epoch": 2.24301846369006, "grad_norm": 0.3271040916442871, "learning_rate": 1.8171397769917953e-06, "loss": 0.0019, "step": 75380 }, { "epoch": 2.2433160251737014, "grad_norm": 0.31612256169319153, "learning_rate": 1.8158048580179688e-06, "loss": 0.0016, "step": 75390 }, { "epoch": 2.243613586657343, "grad_norm": 0.0708538144826889, "learning_rate": 1.8144703207599601e-06, "loss": 0.0013, "step": 75400 }, { "epoch": 2.2439111481409846, "grad_norm": 0.3882058560848236, "learning_rate": 1.8131361653777524e-06, "loss": 0.0041, "step": 75410 }, { "epoch": 2.244208709624626, "grad_norm": 0.05609118938446045, "learning_rate": 1.8118023920312788e-06, "loss": 0.0015, "step": 75420 }, { "epoch": 2.2445062711082677, "grad_norm": 0.09725430607795715, "learning_rate": 1.8104690008804315e-06, "loss": 0.0011, "step": 75430 }, { "epoch": 2.2448038325919093, "grad_norm": 0.05101439356803894, "learning_rate": 1.8091359920850553e-06, "loss": 0.0028, "step": 75440 }, { "epoch": 2.245101394075551, "grad_norm": 0.04002397879958153, "learning_rate": 1.8078033658049487e-06, "loss": 0.0022, "step": 75450 }, { "epoch": 2.2453989555591924, "grad_norm": 0.17982614040374756, "learning_rate": 1.8064711221998642e-06, "loss": 0.0026, "step": 75460 }, { "epoch": 2.245696517042834, "grad_norm": 0.09859908372163773, "learning_rate": 1.80513926142951e-06, "loss": 0.0026, "step": 75470 }, { "epoch": 2.2459940785264756, "grad_norm": 0.1863155961036682, "learning_rate": 1.8038077836535477e-06, "loss": 0.002, "step": 75480 }, { "epoch": 2.246291640010117, "grad_norm": 0.028897330164909363, "learning_rate": 1.8024766890315904e-06, "loss": 0.002, "step": 75490 }, { "epoch": 2.2465892014937587, "grad_norm": 0.11741150170564651, "learning_rate": 1.8011459777232082e-06, "loss": 0.0018, "step": 75500 }, { "epoch": 2.2468867629774003, "grad_norm": 0.17235897481441498, "learning_rate": 1.799815649887925e-06, "loss": 0.0014, "step": 75510 }, { "epoch": 2.247184324461042, "grad_norm": 0.08443871885538101, "learning_rate": 1.7984857056852195e-06, "loss": 0.0021, "step": 75520 }, { "epoch": 2.2474818859446835, "grad_norm": 0.09309356659650803, "learning_rate": 1.7971561452745195e-06, "loss": 0.0015, "step": 75530 }, { "epoch": 2.247779447428325, "grad_norm": 0.06205713376402855, "learning_rate": 1.7958269688152131e-06, "loss": 0.0023, "step": 75540 }, { "epoch": 2.2480770089119666, "grad_norm": 0.15916527807712555, "learning_rate": 1.7944981764666392e-06, "loss": 0.0013, "step": 75550 }, { "epoch": 2.248374570395608, "grad_norm": 0.18361982703208923, "learning_rate": 1.7931697683880921e-06, "loss": 0.0017, "step": 75560 }, { "epoch": 2.2486721318792497, "grad_norm": 0.04448292404413223, "learning_rate": 1.791841744738817e-06, "loss": 0.0013, "step": 75570 }, { "epoch": 2.2489696933628913, "grad_norm": 0.9362258315086365, "learning_rate": 1.7905141056780156e-06, "loss": 0.0017, "step": 75580 }, { "epoch": 2.249267254846533, "grad_norm": 0.05818391591310501, "learning_rate": 1.7891868513648436e-06, "loss": 0.0027, "step": 75590 }, { "epoch": 2.249564816330174, "grad_norm": 0.1609441339969635, "learning_rate": 1.7878599819584113e-06, "loss": 0.0033, "step": 75600 }, { "epoch": 2.2498623778138156, "grad_norm": 0.32539257407188416, "learning_rate": 1.7865334976177784e-06, "loss": 0.0026, "step": 75610 }, { "epoch": 2.250159939297457, "grad_norm": 0.13723835349082947, "learning_rate": 1.7852073985019636e-06, "loss": 0.0015, "step": 75620 }, { "epoch": 2.2504575007810987, "grad_norm": 0.0974673181772232, "learning_rate": 1.7838816847699382e-06, "loss": 0.0022, "step": 75630 }, { "epoch": 2.2507550622647403, "grad_norm": 0.35705241560935974, "learning_rate": 1.7825563565806237e-06, "loss": 0.0017, "step": 75640 }, { "epoch": 2.251052623748382, "grad_norm": 0.2999112904071808, "learning_rate": 1.7812314140928998e-06, "loss": 0.0022, "step": 75650 }, { "epoch": 2.2513501852320235, "grad_norm": 0.07257802039384842, "learning_rate": 1.779906857465598e-06, "loss": 0.0015, "step": 75660 }, { "epoch": 2.251647746715665, "grad_norm": 0.09187492728233337, "learning_rate": 1.7785826868575058e-06, "loss": 0.0023, "step": 75670 }, { "epoch": 2.2519453081993066, "grad_norm": 0.24708886444568634, "learning_rate": 1.777258902427359e-06, "loss": 0.003, "step": 75680 }, { "epoch": 2.252242869682948, "grad_norm": 0.31311970949172974, "learning_rate": 1.7759355043338522e-06, "loss": 0.0021, "step": 75690 }, { "epoch": 2.2525404311665898, "grad_norm": 0.22852174937725067, "learning_rate": 1.7746124927356324e-06, "loss": 0.0016, "step": 75700 }, { "epoch": 2.2528379926502313, "grad_norm": 0.15165328979492188, "learning_rate": 1.7732898677913018e-06, "loss": 0.0178, "step": 75710 }, { "epoch": 2.253135554133873, "grad_norm": 0.03617962822318077, "learning_rate": 1.7719676296594102e-06, "loss": 0.0013, "step": 75720 }, { "epoch": 2.2534331156175145, "grad_norm": 0.44748109579086304, "learning_rate": 1.770645778498466e-06, "loss": 0.0022, "step": 75730 }, { "epoch": 2.253730677101156, "grad_norm": 0.22819004952907562, "learning_rate": 1.7693243144669348e-06, "loss": 0.0018, "step": 75740 }, { "epoch": 2.2540282385847976, "grad_norm": 0.21790772676467896, "learning_rate": 1.768003237723227e-06, "loss": 0.0015, "step": 75750 }, { "epoch": 2.254325800068439, "grad_norm": 0.10044650733470917, "learning_rate": 1.7666825484257128e-06, "loss": 0.0017, "step": 75760 }, { "epoch": 2.2546233615520808, "grad_norm": 0.11334045231342316, "learning_rate": 1.765362246732713e-06, "loss": 0.0031, "step": 75770 }, { "epoch": 2.2549209230357223, "grad_norm": 0.09351358562707901, "learning_rate": 1.7640423328025063e-06, "loss": 0.0032, "step": 75780 }, { "epoch": 2.255218484519364, "grad_norm": 0.17459964752197266, "learning_rate": 1.762722806793317e-06, "loss": 0.0015, "step": 75790 }, { "epoch": 2.2555160460030055, "grad_norm": 0.17320318520069122, "learning_rate": 1.7614036688633307e-06, "loss": 0.0019, "step": 75800 }, { "epoch": 2.255813607486647, "grad_norm": 0.06272780895233154, "learning_rate": 1.760084919170682e-06, "loss": 0.0017, "step": 75810 }, { "epoch": 2.2561111689702886, "grad_norm": 0.04578143358230591, "learning_rate": 1.7587665578734624e-06, "loss": 0.0025, "step": 75820 }, { "epoch": 2.25640873045393, "grad_norm": 0.17302580177783966, "learning_rate": 1.7574485851297124e-06, "loss": 0.0033, "step": 75830 }, { "epoch": 2.2567062919375718, "grad_norm": 0.097349151968956, "learning_rate": 1.7561310010974286e-06, "loss": 0.0016, "step": 75840 }, { "epoch": 2.2570038534212133, "grad_norm": 0.11972316354513168, "learning_rate": 1.7548138059345614e-06, "loss": 0.001, "step": 75850 }, { "epoch": 2.2573014149048545, "grad_norm": 0.06792303174734116, "learning_rate": 1.7534969997990153e-06, "loss": 0.002, "step": 75860 }, { "epoch": 2.257598976388496, "grad_norm": 0.030948206782341003, "learning_rate": 1.7521805828486438e-06, "loss": 0.0015, "step": 75870 }, { "epoch": 2.2578965378721376, "grad_norm": 0.15912961959838867, "learning_rate": 1.750864555241258e-06, "loss": 0.0025, "step": 75880 }, { "epoch": 2.258194099355779, "grad_norm": 0.21016757190227509, "learning_rate": 1.7495489171346209e-06, "loss": 0.0016, "step": 75890 }, { "epoch": 2.2584916608394208, "grad_norm": 0.02985403873026371, "learning_rate": 1.748233668686451e-06, "loss": 0.0017, "step": 75900 }, { "epoch": 2.2587892223230623, "grad_norm": 0.07901979982852936, "learning_rate": 1.7469188100544149e-06, "loss": 0.0015, "step": 75910 }, { "epoch": 2.259086783806704, "grad_norm": 0.09216658025979996, "learning_rate": 1.745604341396136e-06, "loss": 0.0022, "step": 75920 }, { "epoch": 2.2593843452903455, "grad_norm": 0.10761165618896484, "learning_rate": 1.7442902628691922e-06, "loss": 0.0007, "step": 75930 }, { "epoch": 2.259681906773987, "grad_norm": 0.13095314800739288, "learning_rate": 1.7429765746311133e-06, "loss": 0.0021, "step": 75940 }, { "epoch": 2.2599794682576286, "grad_norm": 0.18543948233127594, "learning_rate": 1.7416632768393793e-06, "loss": 0.0025, "step": 75950 }, { "epoch": 2.26027702974127, "grad_norm": 0.1440240740776062, "learning_rate": 1.7403503696514274e-06, "loss": 0.0013, "step": 75960 }, { "epoch": 2.2605745912249118, "grad_norm": 0.10598735511302948, "learning_rate": 1.739037853224647e-06, "loss": 0.0013, "step": 75970 }, { "epoch": 2.2608721527085534, "grad_norm": 0.15005655586719513, "learning_rate": 1.7377257277163823e-06, "loss": 0.0022, "step": 75980 }, { "epoch": 2.261169714192195, "grad_norm": 0.11682875454425812, "learning_rate": 1.7364139932839242e-06, "loss": 0.0041, "step": 75990 }, { "epoch": 2.2614672756758365, "grad_norm": 0.035079214721918106, "learning_rate": 1.7351026500845235e-06, "loss": 0.0018, "step": 76000 }, { "epoch": 2.261764837159478, "grad_norm": 0.27697983384132385, "learning_rate": 1.7337916982753823e-06, "loss": 0.0027, "step": 76010 }, { "epoch": 2.2620623986431196, "grad_norm": 0.1331588178873062, "learning_rate": 1.732481138013654e-06, "loss": 0.0022, "step": 76020 }, { "epoch": 2.262359960126761, "grad_norm": 0.08433149009943008, "learning_rate": 1.731170969456447e-06, "loss": 0.0019, "step": 76030 }, { "epoch": 2.262657521610403, "grad_norm": 0.1865754872560501, "learning_rate": 1.7298611927608238e-06, "loss": 0.0021, "step": 76040 }, { "epoch": 2.2629550830940444, "grad_norm": 0.26963233947753906, "learning_rate": 1.7285518080837948e-06, "loss": 0.0014, "step": 76050 }, { "epoch": 2.263252644577686, "grad_norm": 0.264518141746521, "learning_rate": 1.7272428155823285e-06, "loss": 0.0027, "step": 76060 }, { "epoch": 2.2635502060613275, "grad_norm": 0.050208836793899536, "learning_rate": 1.725934215413344e-06, "loss": 0.0022, "step": 76070 }, { "epoch": 2.263847767544969, "grad_norm": 0.14320270717144012, "learning_rate": 1.7246260077337162e-06, "loss": 0.0014, "step": 76080 }, { "epoch": 2.2641453290286107, "grad_norm": 0.10151875764131546, "learning_rate": 1.723318192700268e-06, "loss": 0.0012, "step": 76090 }, { "epoch": 2.2644428905122522, "grad_norm": 0.4111122488975525, "learning_rate": 1.7220107704697785e-06, "loss": 0.0024, "step": 76100 }, { "epoch": 2.264740451995894, "grad_norm": 0.07731053233146667, "learning_rate": 1.7207037411989801e-06, "loss": 0.0024, "step": 76110 }, { "epoch": 2.2650380134795354, "grad_norm": 0.23231002688407898, "learning_rate": 1.719397105044559e-06, "loss": 0.0028, "step": 76120 }, { "epoch": 2.265335574963177, "grad_norm": 0.13760893046855927, "learning_rate": 1.718090862163148e-06, "loss": 0.0026, "step": 76130 }, { "epoch": 2.2656331364468185, "grad_norm": 0.0856417566537857, "learning_rate": 1.7167850127113407e-06, "loss": 0.0013, "step": 76140 }, { "epoch": 2.26593069793046, "grad_norm": 0.12739042937755585, "learning_rate": 1.7154795568456784e-06, "loss": 0.0021, "step": 76150 }, { "epoch": 2.2662282594141017, "grad_norm": 0.10919009149074554, "learning_rate": 1.7141744947226591e-06, "loss": 0.0015, "step": 76160 }, { "epoch": 2.2665258208977432, "grad_norm": 0.05476522073149681, "learning_rate": 1.712869826498728e-06, "loss": 0.0012, "step": 76170 }, { "epoch": 2.2668233823813844, "grad_norm": 0.12565453350543976, "learning_rate": 1.7115655523302893e-06, "loss": 0.0011, "step": 76180 }, { "epoch": 2.267120943865026, "grad_norm": 0.09911998361349106, "learning_rate": 1.7102616723736953e-06, "loss": 0.0021, "step": 76190 }, { "epoch": 2.2674185053486675, "grad_norm": 0.06813137978315353, "learning_rate": 1.7089581867852557e-06, "loss": 0.0016, "step": 76200 }, { "epoch": 2.267716066832309, "grad_norm": 0.133895605802536, "learning_rate": 1.7076550957212261e-06, "loss": 0.0016, "step": 76210 }, { "epoch": 2.2680136283159507, "grad_norm": 0.21155096590518951, "learning_rate": 1.7063523993378216e-06, "loss": 0.0022, "step": 76220 }, { "epoch": 2.2683111897995922, "grad_norm": 0.18694667518138885, "learning_rate": 1.705050097791206e-06, "loss": 0.0019, "step": 76230 }, { "epoch": 2.268608751283234, "grad_norm": 0.11131993681192398, "learning_rate": 1.7037481912374992e-06, "loss": 0.0011, "step": 76240 }, { "epoch": 2.2689063127668754, "grad_norm": 0.2034481167793274, "learning_rate": 1.7024466798327677e-06, "loss": 0.0019, "step": 76250 }, { "epoch": 2.269203874250517, "grad_norm": 0.07685686647891998, "learning_rate": 1.701145563733037e-06, "loss": 0.001, "step": 76260 }, { "epoch": 2.2695014357341585, "grad_norm": 0.04702649265527725, "learning_rate": 1.6998448430942815e-06, "loss": 0.0013, "step": 76270 }, { "epoch": 2.2697989972178, "grad_norm": 0.13864600658416748, "learning_rate": 1.698544518072432e-06, "loss": 0.0025, "step": 76280 }, { "epoch": 2.2700965587014417, "grad_norm": 0.1351611465215683, "learning_rate": 1.6972445888233658e-06, "loss": 0.0024, "step": 76290 }, { "epoch": 2.2703941201850832, "grad_norm": 0.11009644716978073, "learning_rate": 1.6959450555029156e-06, "loss": 0.0017, "step": 76300 }, { "epoch": 2.270691681668725, "grad_norm": 0.11958473175764084, "learning_rate": 1.6946459182668723e-06, "loss": 0.0012, "step": 76310 }, { "epoch": 2.2709892431523664, "grad_norm": 0.13060908019542694, "learning_rate": 1.693347177270969e-06, "loss": 0.0024, "step": 76320 }, { "epoch": 2.271286804636008, "grad_norm": 0.06502187997102737, "learning_rate": 1.6920488326708994e-06, "loss": 0.001, "step": 76330 }, { "epoch": 2.2715843661196495, "grad_norm": 0.053856197744607925, "learning_rate": 1.690750884622306e-06, "loss": 0.0017, "step": 76340 }, { "epoch": 2.271881927603291, "grad_norm": 0.07086911052465439, "learning_rate": 1.689453333280786e-06, "loss": 0.0015, "step": 76350 }, { "epoch": 2.2721794890869327, "grad_norm": 0.06500443816184998, "learning_rate": 1.6881561788018847e-06, "loss": 0.0012, "step": 76360 }, { "epoch": 2.2724770505705743, "grad_norm": 0.16803273558616638, "learning_rate": 1.6868594213411037e-06, "loss": 0.0019, "step": 76370 }, { "epoch": 2.272774612054216, "grad_norm": 0.2175268530845642, "learning_rate": 1.6855630610538975e-06, "loss": 0.0019, "step": 76380 }, { "epoch": 2.2730721735378574, "grad_norm": 0.24650542438030243, "learning_rate": 1.6842670980956715e-06, "loss": 0.003, "step": 76390 }, { "epoch": 2.273369735021499, "grad_norm": 0.22806717455387115, "learning_rate": 1.6829715326217816e-06, "loss": 0.0018, "step": 76400 }, { "epoch": 2.2736672965051405, "grad_norm": 0.09067627042531967, "learning_rate": 1.6816763647875384e-06, "loss": 0.0018, "step": 76410 }, { "epoch": 2.273964857988782, "grad_norm": 0.13400088250637054, "learning_rate": 1.6803815947482073e-06, "loss": 0.0017, "step": 76420 }, { "epoch": 2.2742624194724232, "grad_norm": 0.16359303891658783, "learning_rate": 1.6790872226589982e-06, "loss": 0.0016, "step": 76430 }, { "epoch": 2.274559980956065, "grad_norm": 0.13269490003585815, "learning_rate": 1.6777932486750814e-06, "loss": 0.0028, "step": 76440 }, { "epoch": 2.2748575424397064, "grad_norm": 0.04357670992612839, "learning_rate": 1.676499672951576e-06, "loss": 0.0021, "step": 76450 }, { "epoch": 2.275155103923348, "grad_norm": 0.08437605947256088, "learning_rate": 1.6752064956435543e-06, "loss": 0.0012, "step": 76460 }, { "epoch": 2.2754526654069895, "grad_norm": 0.07818885147571564, "learning_rate": 1.6739137169060377e-06, "loss": 0.0016, "step": 76470 }, { "epoch": 2.275750226890631, "grad_norm": 0.12016736716032028, "learning_rate": 1.6726213368940037e-06, "loss": 0.0015, "step": 76480 }, { "epoch": 2.2760477883742727, "grad_norm": 0.3248153030872345, "learning_rate": 1.67132935576238e-06, "loss": 0.0053, "step": 76490 }, { "epoch": 2.2763453498579143, "grad_norm": 0.1374814212322235, "learning_rate": 1.67003777366605e-06, "loss": 0.0028, "step": 76500 }, { "epoch": 2.276642911341556, "grad_norm": 0.1393168866634369, "learning_rate": 1.6687465907598422e-06, "loss": 0.0016, "step": 76510 }, { "epoch": 2.2769404728251974, "grad_norm": 0.17654840648174286, "learning_rate": 1.667455807198543e-06, "loss": 0.0024, "step": 76520 }, { "epoch": 2.277238034308839, "grad_norm": 0.08466428518295288, "learning_rate": 1.6661654231368895e-06, "loss": 0.0019, "step": 76530 }, { "epoch": 2.2775355957924805, "grad_norm": 0.056147873401641846, "learning_rate": 1.6648754387295723e-06, "loss": 0.0017, "step": 76540 }, { "epoch": 2.277833157276122, "grad_norm": 0.0896276906132698, "learning_rate": 1.6635858541312295e-06, "loss": 0.002, "step": 76550 }, { "epoch": 2.2781307187597637, "grad_norm": 0.15743094682693481, "learning_rate": 1.6622966694964554e-06, "loss": 0.0013, "step": 76560 }, { "epoch": 2.2784282802434053, "grad_norm": 0.151471808552742, "learning_rate": 1.6610078849797957e-06, "loss": 0.0012, "step": 76570 }, { "epoch": 2.278725841727047, "grad_norm": 0.08531653881072998, "learning_rate": 1.6597195007357474e-06, "loss": 0.002, "step": 76580 }, { "epoch": 2.2790234032106884, "grad_norm": 0.09449660032987595, "learning_rate": 1.6584315169187598e-06, "loss": 0.0018, "step": 76590 }, { "epoch": 2.27932096469433, "grad_norm": 0.12230333685874939, "learning_rate": 1.6571439336832346e-06, "loss": 0.0017, "step": 76600 }, { "epoch": 2.2796185261779716, "grad_norm": 0.038548074662685394, "learning_rate": 1.6558567511835256e-06, "loss": 0.0011, "step": 76610 }, { "epoch": 2.279916087661613, "grad_norm": 0.27873513102531433, "learning_rate": 1.654569969573936e-06, "loss": 0.0032, "step": 76620 }, { "epoch": 2.2802136491452547, "grad_norm": 0.11317985504865646, "learning_rate": 1.6532835890087234e-06, "loss": 0.002, "step": 76630 }, { "epoch": 2.2805112106288963, "grad_norm": 0.17428487539291382, "learning_rate": 1.6519976096420981e-06, "loss": 0.0029, "step": 76640 }, { "epoch": 2.280808772112538, "grad_norm": 0.21761314570903778, "learning_rate": 1.6507120316282222e-06, "loss": 0.0019, "step": 76650 }, { "epoch": 2.2811063335961794, "grad_norm": 0.13933216035366058, "learning_rate": 1.649426855121205e-06, "loss": 0.0014, "step": 76660 }, { "epoch": 2.281403895079821, "grad_norm": 0.11831668019294739, "learning_rate": 1.648142080275113e-06, "loss": 0.0025, "step": 76670 }, { "epoch": 2.2817014565634626, "grad_norm": 0.17320021986961365, "learning_rate": 1.6468577072439633e-06, "loss": 0.002, "step": 76680 }, { "epoch": 2.281999018047104, "grad_norm": 0.11712735146284103, "learning_rate": 1.6455737361817254e-06, "loss": 0.0026, "step": 76690 }, { "epoch": 2.2822965795307457, "grad_norm": 0.10015492141246796, "learning_rate": 1.644290167242316e-06, "loss": 0.0007, "step": 76700 }, { "epoch": 2.2825941410143873, "grad_norm": 0.1037965938448906, "learning_rate": 1.64300700057961e-06, "loss": 0.0009, "step": 76710 }, { "epoch": 2.282891702498029, "grad_norm": 0.26032641530036926, "learning_rate": 1.64172423634743e-06, "loss": 0.0021, "step": 76720 }, { "epoch": 2.2831892639816704, "grad_norm": 0.11621541529893875, "learning_rate": 1.6404418746995538e-06, "loss": 0.0015, "step": 76730 }, { "epoch": 2.283486825465312, "grad_norm": 0.18092204630374908, "learning_rate": 1.6391599157897048e-06, "loss": 0.0019, "step": 76740 }, { "epoch": 2.283784386948953, "grad_norm": 0.2020348161458969, "learning_rate": 1.6378783597715642e-06, "loss": 0.0022, "step": 76750 }, { "epoch": 2.2840819484325947, "grad_norm": 0.2321901172399521, "learning_rate": 1.6365972067987645e-06, "loss": 0.0029, "step": 76760 }, { "epoch": 2.2843795099162363, "grad_norm": 0.021967999637126923, "learning_rate": 1.6353164570248847e-06, "loss": 0.0018, "step": 76770 }, { "epoch": 2.284677071399878, "grad_norm": 0.3142329752445221, "learning_rate": 1.6340361106034596e-06, "loss": 0.0014, "step": 76780 }, { "epoch": 2.2849746328835194, "grad_norm": 0.11353432387113571, "learning_rate": 1.6327561676879767e-06, "loss": 0.0017, "step": 76790 }, { "epoch": 2.285272194367161, "grad_norm": 0.16544774174690247, "learning_rate": 1.6314766284318734e-06, "loss": 0.0014, "step": 76800 }, { "epoch": 2.2855697558508026, "grad_norm": 0.12197695672512054, "learning_rate": 1.630197492988536e-06, "loss": 0.0016, "step": 76810 }, { "epoch": 2.285867317334444, "grad_norm": 0.11479779332876205, "learning_rate": 1.6289187615113072e-06, "loss": 0.0016, "step": 76820 }, { "epoch": 2.2861648788180857, "grad_norm": 0.13098354637622833, "learning_rate": 1.627640434153478e-06, "loss": 0.0017, "step": 76830 }, { "epoch": 2.2864624403017273, "grad_norm": 0.02250433899462223, "learning_rate": 1.6263625110682946e-06, "loss": 0.0007, "step": 76840 }, { "epoch": 2.286760001785369, "grad_norm": 0.04276151582598686, "learning_rate": 1.6250849924089485e-06, "loss": 0.0021, "step": 76850 }, { "epoch": 2.2870575632690104, "grad_norm": 0.20783551037311554, "learning_rate": 1.6238078783285872e-06, "loss": 0.0022, "step": 76860 }, { "epoch": 2.287355124752652, "grad_norm": 0.07588344812393188, "learning_rate": 1.6225311689803124e-06, "loss": 0.0019, "step": 76870 }, { "epoch": 2.2876526862362936, "grad_norm": 0.037496551871299744, "learning_rate": 1.62125486451717e-06, "loss": 0.0024, "step": 76880 }, { "epoch": 2.287950247719935, "grad_norm": 0.13106702268123627, "learning_rate": 1.6199789650921627e-06, "loss": 0.0011, "step": 76890 }, { "epoch": 2.2882478092035767, "grad_norm": 0.23241516947746277, "learning_rate": 1.6187034708582427e-06, "loss": 0.0023, "step": 76900 }, { "epoch": 2.2885453706872183, "grad_norm": 0.24807164072990417, "learning_rate": 1.6174283819683157e-06, "loss": 0.0018, "step": 76910 }, { "epoch": 2.28884293217086, "grad_norm": 0.12345651537179947, "learning_rate": 1.6161536985752347e-06, "loss": 0.0026, "step": 76920 }, { "epoch": 2.2891404936545015, "grad_norm": 0.23861870169639587, "learning_rate": 1.6148794208318074e-06, "loss": 0.0024, "step": 76930 }, { "epoch": 2.289438055138143, "grad_norm": 0.1244572326540947, "learning_rate": 1.613605548890792e-06, "loss": 0.0018, "step": 76940 }, { "epoch": 2.2897356166217846, "grad_norm": 0.08750960230827332, "learning_rate": 1.6123320829049e-06, "loss": 0.0012, "step": 76950 }, { "epoch": 2.290033178105426, "grad_norm": 0.1255902200937271, "learning_rate": 1.6110590230267891e-06, "loss": 0.0011, "step": 76960 }, { "epoch": 2.2903307395890677, "grad_norm": 0.17289744317531586, "learning_rate": 1.6097863694090731e-06, "loss": 0.0012, "step": 76970 }, { "epoch": 2.2906283010727093, "grad_norm": 0.3265642821788788, "learning_rate": 1.6085141222043155e-06, "loss": 0.0024, "step": 76980 }, { "epoch": 2.290925862556351, "grad_norm": 0.27999189496040344, "learning_rate": 1.6072422815650324e-06, "loss": 0.0022, "step": 76990 }, { "epoch": 2.291223424039992, "grad_norm": 0.022979967296123505, "learning_rate": 1.605970847643687e-06, "loss": 0.0014, "step": 77000 }, { "epoch": 2.2915209855236336, "grad_norm": 0.025603333488106728, "learning_rate": 1.6046998205926978e-06, "loss": 0.0013, "step": 77010 }, { "epoch": 2.291818547007275, "grad_norm": 0.3146318197250366, "learning_rate": 1.6034292005644335e-06, "loss": 0.002, "step": 77020 }, { "epoch": 2.2921161084909167, "grad_norm": 0.08066949248313904, "learning_rate": 1.6021589877112164e-06, "loss": 0.0022, "step": 77030 }, { "epoch": 2.2924136699745583, "grad_norm": 0.09142283350229263, "learning_rate": 1.600889182185313e-06, "loss": 0.0013, "step": 77040 }, { "epoch": 2.2927112314582, "grad_norm": 0.18727776408195496, "learning_rate": 1.5996197841389472e-06, "loss": 0.0028, "step": 77050 }, { "epoch": 2.2930087929418415, "grad_norm": 0.1660618633031845, "learning_rate": 1.598350793724292e-06, "loss": 0.0015, "step": 77060 }, { "epoch": 2.293306354425483, "grad_norm": 0.1636677086353302, "learning_rate": 1.5970822110934746e-06, "loss": 0.0017, "step": 77070 }, { "epoch": 2.2936039159091246, "grad_norm": 0.05687107890844345, "learning_rate": 1.5958140363985664e-06, "loss": 0.0014, "step": 77080 }, { "epoch": 2.293901477392766, "grad_norm": 0.31789693236351013, "learning_rate": 1.5945462697915952e-06, "loss": 0.0023, "step": 77090 }, { "epoch": 2.2941990388764077, "grad_norm": 0.23732681572437286, "learning_rate": 1.5932789114245396e-06, "loss": 0.0023, "step": 77100 }, { "epoch": 2.2944966003600493, "grad_norm": 0.00530592305585742, "learning_rate": 1.5920119614493296e-06, "loss": 0.001, "step": 77110 }, { "epoch": 2.294794161843691, "grad_norm": 0.05716796964406967, "learning_rate": 1.5907454200178414e-06, "loss": 0.0013, "step": 77120 }, { "epoch": 2.2950917233273325, "grad_norm": 0.19050009548664093, "learning_rate": 1.5894792872819075e-06, "loss": 0.0028, "step": 77130 }, { "epoch": 2.295389284810974, "grad_norm": 0.21203337609767914, "learning_rate": 1.5882135633933116e-06, "loss": 0.0012, "step": 77140 }, { "epoch": 2.2956868462946156, "grad_norm": 0.1687835454940796, "learning_rate": 1.5869482485037818e-06, "loss": 0.0026, "step": 77150 }, { "epoch": 2.295984407778257, "grad_norm": 0.06682820618152618, "learning_rate": 1.5856833427650065e-06, "loss": 0.0012, "step": 77160 }, { "epoch": 2.2962819692618988, "grad_norm": 0.04166530445218086, "learning_rate": 1.5844188463286204e-06, "loss": 0.0014, "step": 77170 }, { "epoch": 2.2965795307455403, "grad_norm": 0.060836005955934525, "learning_rate": 1.5831547593462055e-06, "loss": 0.0021, "step": 77180 }, { "epoch": 2.296877092229182, "grad_norm": 0.20723670721054077, "learning_rate": 1.5818910819693007e-06, "loss": 0.002, "step": 77190 }, { "epoch": 2.2971746537128235, "grad_norm": 0.11514217406511307, "learning_rate": 1.580627814349393e-06, "loss": 0.0022, "step": 77200 }, { "epoch": 2.297472215196465, "grad_norm": 0.26011359691619873, "learning_rate": 1.5793649566379228e-06, "loss": 0.0015, "step": 77210 }, { "epoch": 2.2977697766801066, "grad_norm": 0.25990673899650574, "learning_rate": 1.578102508986275e-06, "loss": 0.0015, "step": 77220 }, { "epoch": 2.298067338163748, "grad_norm": 0.11730432510375977, "learning_rate": 1.5768404715457925e-06, "loss": 0.0025, "step": 77230 }, { "epoch": 2.2983648996473898, "grad_norm": 0.11309405416250229, "learning_rate": 1.575578844467766e-06, "loss": 0.0024, "step": 77240 }, { "epoch": 2.2986624611310313, "grad_norm": 0.37601757049560547, "learning_rate": 1.5743176279034384e-06, "loss": 0.0018, "step": 77250 }, { "epoch": 2.298960022614673, "grad_norm": 0.13111920654773712, "learning_rate": 1.5730568220039987e-06, "loss": 0.0012, "step": 77260 }, { "epoch": 2.2992575840983145, "grad_norm": 0.1359630525112152, "learning_rate": 1.5717964269205921e-06, "loss": 0.002, "step": 77270 }, { "epoch": 2.299555145581956, "grad_norm": 0.1862412393093109, "learning_rate": 1.570536442804313e-06, "loss": 0.0016, "step": 77280 }, { "epoch": 2.2998527070655976, "grad_norm": 0.2861948311328888, "learning_rate": 1.5692768698062066e-06, "loss": 0.0026, "step": 77290 }, { "epoch": 2.300150268549239, "grad_norm": 0.10841163992881775, "learning_rate": 1.5680177080772663e-06, "loss": 0.0019, "step": 77300 }, { "epoch": 2.300447830032881, "grad_norm": 0.13391424715518951, "learning_rate": 1.5667589577684394e-06, "loss": 0.0025, "step": 77310 }, { "epoch": 2.300745391516522, "grad_norm": 0.268708199262619, "learning_rate": 1.5655006190306227e-06, "loss": 0.0019, "step": 77320 }, { "epoch": 2.3010429530001635, "grad_norm": 0.22870181500911713, "learning_rate": 1.5642426920146647e-06, "loss": 0.0021, "step": 77330 }, { "epoch": 2.301340514483805, "grad_norm": 0.11592711508274078, "learning_rate": 1.5629851768713616e-06, "loss": 0.0018, "step": 77340 }, { "epoch": 2.3016380759674466, "grad_norm": 0.24590203166007996, "learning_rate": 1.5617280737514627e-06, "loss": 0.002, "step": 77350 }, { "epoch": 2.301935637451088, "grad_norm": 0.059278469532728195, "learning_rate": 1.5604713828056673e-06, "loss": 0.0034, "step": 77360 }, { "epoch": 2.3022331989347298, "grad_norm": 0.12419579923152924, "learning_rate": 1.5592151041846275e-06, "loss": 0.0029, "step": 77370 }, { "epoch": 2.3025307604183713, "grad_norm": 0.17343780398368835, "learning_rate": 1.5579592380389407e-06, "loss": 0.0013, "step": 77380 }, { "epoch": 2.302828321902013, "grad_norm": 0.21101225912570953, "learning_rate": 1.5567037845191596e-06, "loss": 0.0026, "step": 77390 }, { "epoch": 2.3031258833856545, "grad_norm": 0.08946412056684494, "learning_rate": 1.5554487437757848e-06, "loss": 0.0019, "step": 77400 }, { "epoch": 2.303423444869296, "grad_norm": 0.04531869664788246, "learning_rate": 1.554194115959271e-06, "loss": 0.0013, "step": 77410 }, { "epoch": 2.3037210063529376, "grad_norm": 0.10214966535568237, "learning_rate": 1.5529399012200174e-06, "loss": 0.001, "step": 77420 }, { "epoch": 2.304018567836579, "grad_norm": 0.1425730288028717, "learning_rate": 1.5516860997083766e-06, "loss": 0.0012, "step": 77430 }, { "epoch": 2.304316129320221, "grad_norm": 0.21193839609622955, "learning_rate": 1.5504327115746576e-06, "loss": 0.0013, "step": 77440 }, { "epoch": 2.3046136908038624, "grad_norm": 0.07557108998298645, "learning_rate": 1.5491797369691098e-06, "loss": 0.0022, "step": 77450 }, { "epoch": 2.304911252287504, "grad_norm": 0.11258986592292786, "learning_rate": 1.5479271760419384e-06, "loss": 0.0016, "step": 77460 }, { "epoch": 2.3052088137711455, "grad_norm": 0.15867844223976135, "learning_rate": 1.5466750289432987e-06, "loss": 0.0016, "step": 77470 }, { "epoch": 2.305506375254787, "grad_norm": 0.16464515030384064, "learning_rate": 1.5454232958232974e-06, "loss": 0.002, "step": 77480 }, { "epoch": 2.3058039367384286, "grad_norm": 0.35020485520362854, "learning_rate": 1.5441719768319875e-06, "loss": 0.0028, "step": 77490 }, { "epoch": 2.3061014982220702, "grad_norm": 0.34583571553230286, "learning_rate": 1.5429210721193754e-06, "loss": 0.0032, "step": 77500 }, { "epoch": 2.306399059705712, "grad_norm": 0.28058603405952454, "learning_rate": 1.5416705818354183e-06, "loss": 0.0022, "step": 77510 }, { "epoch": 2.3066966211893534, "grad_norm": 0.09686379134654999, "learning_rate": 1.5404205061300242e-06, "loss": 0.0025, "step": 77520 }, { "epoch": 2.306994182672995, "grad_norm": 0.07107793539762497, "learning_rate": 1.5391708451530468e-06, "loss": 0.0014, "step": 77530 }, { "epoch": 2.3072917441566365, "grad_norm": 0.2695772349834442, "learning_rate": 1.537921599054295e-06, "loss": 0.0029, "step": 77540 }, { "epoch": 2.307589305640278, "grad_norm": 0.08275149017572403, "learning_rate": 1.5366727679835275e-06, "loss": 0.002, "step": 77550 }, { "epoch": 2.3078868671239197, "grad_norm": 0.16307507455348969, "learning_rate": 1.5354243520904493e-06, "loss": 0.0056, "step": 77560 }, { "epoch": 2.308184428607561, "grad_norm": 0.182031512260437, "learning_rate": 1.5341763515247193e-06, "loss": 0.002, "step": 77570 }, { "epoch": 2.3084819900912024, "grad_norm": 0.16973240673542023, "learning_rate": 1.5329287664359466e-06, "loss": 0.0013, "step": 77580 }, { "epoch": 2.308779551574844, "grad_norm": 0.1280364990234375, "learning_rate": 1.5316815969736902e-06, "loss": 0.0013, "step": 77590 }, { "epoch": 2.3090771130584855, "grad_norm": 0.2272191196680069, "learning_rate": 1.5304348432874566e-06, "loss": 0.0017, "step": 77600 }, { "epoch": 2.309374674542127, "grad_norm": 0.07713200896978378, "learning_rate": 1.5291885055267048e-06, "loss": 0.0011, "step": 77610 }, { "epoch": 2.3096722360257687, "grad_norm": 0.23303309082984924, "learning_rate": 1.5279425838408441e-06, "loss": 0.0019, "step": 77620 }, { "epoch": 2.3099697975094102, "grad_norm": 0.1561150848865509, "learning_rate": 1.526697078379235e-06, "loss": 0.0014, "step": 77630 }, { "epoch": 2.310267358993052, "grad_norm": 0.16683143377304077, "learning_rate": 1.5254519892911835e-06, "loss": 0.0026, "step": 77640 }, { "epoch": 2.3105649204766934, "grad_norm": 0.14171555638313293, "learning_rate": 1.5242073167259502e-06, "loss": 0.0019, "step": 77650 }, { "epoch": 2.310862481960335, "grad_norm": 0.08490313589572906, "learning_rate": 1.5229630608327445e-06, "loss": 0.001, "step": 77660 }, { "epoch": 2.3111600434439765, "grad_norm": 0.0823393166065216, "learning_rate": 1.5217192217607268e-06, "loss": 0.0014, "step": 77670 }, { "epoch": 2.311457604927618, "grad_norm": 0.24662956595420837, "learning_rate": 1.5204757996590042e-06, "loss": 0.0012, "step": 77680 }, { "epoch": 2.3117551664112597, "grad_norm": 0.16372603178024292, "learning_rate": 1.5192327946766361e-06, "loss": 0.0019, "step": 77690 }, { "epoch": 2.3120527278949012, "grad_norm": 0.06043723225593567, "learning_rate": 1.5179902069626328e-06, "loss": 0.0015, "step": 77700 }, { "epoch": 2.312350289378543, "grad_norm": 0.25829607248306274, "learning_rate": 1.5167480366659538e-06, "loss": 0.0016, "step": 77710 }, { "epoch": 2.3126478508621844, "grad_norm": 0.20689578354358673, "learning_rate": 1.5155062839355072e-06, "loss": 0.0018, "step": 77720 }, { "epoch": 2.312945412345826, "grad_norm": 0.08484106510877609, "learning_rate": 1.5142649489201532e-06, "loss": 0.0022, "step": 77730 }, { "epoch": 2.3132429738294675, "grad_norm": 0.16318650543689728, "learning_rate": 1.5130240317687017e-06, "loss": 0.0023, "step": 77740 }, { "epoch": 2.313540535313109, "grad_norm": 0.11932779848575592, "learning_rate": 1.5117835326299097e-06, "loss": 0.0016, "step": 77750 }, { "epoch": 2.3138380967967507, "grad_norm": 0.158966064453125, "learning_rate": 1.5105434516524864e-06, "loss": 0.0024, "step": 77760 }, { "epoch": 2.3141356582803922, "grad_norm": 0.18854466080665588, "learning_rate": 1.5093037889850913e-06, "loss": 0.0016, "step": 77770 }, { "epoch": 2.314433219764034, "grad_norm": 0.07801081240177155, "learning_rate": 1.5080645447763342e-06, "loss": 0.0015, "step": 77780 }, { "epoch": 2.3147307812476754, "grad_norm": 0.10853176563978195, "learning_rate": 1.506825719174771e-06, "loss": 0.0007, "step": 77790 }, { "epoch": 2.315028342731317, "grad_norm": 0.10766758024692535, "learning_rate": 1.5055873123289111e-06, "loss": 0.0013, "step": 77800 }, { "epoch": 2.3153259042149585, "grad_norm": 0.03330691531300545, "learning_rate": 1.5043493243872126e-06, "loss": 0.0018, "step": 77810 }, { "epoch": 2.3156234656986, "grad_norm": 0.17238283157348633, "learning_rate": 1.503111755498085e-06, "loss": 0.0017, "step": 77820 }, { "epoch": 2.3159210271822417, "grad_norm": 0.15235908329486847, "learning_rate": 1.5018746058098826e-06, "loss": 0.0013, "step": 77830 }, { "epoch": 2.3162185886658833, "grad_norm": 0.18332625925540924, "learning_rate": 1.5006378754709145e-06, "loss": 0.0016, "step": 77840 }, { "epoch": 2.316516150149525, "grad_norm": 0.13703565299510956, "learning_rate": 1.499401564629438e-06, "loss": 0.0013, "step": 77850 }, { "epoch": 2.3168137116331664, "grad_norm": 0.05814899131655693, "learning_rate": 1.4981656734336608e-06, "loss": 0.002, "step": 77860 }, { "epoch": 2.317111273116808, "grad_norm": 0.1292216032743454, "learning_rate": 1.4969302020317371e-06, "loss": 0.0024, "step": 77870 }, { "epoch": 2.3174088346004496, "grad_norm": 0.10219831019639969, "learning_rate": 1.4956951505717743e-06, "loss": 0.0019, "step": 77880 }, { "epoch": 2.317706396084091, "grad_norm": 0.16173478960990906, "learning_rate": 1.494460519201828e-06, "loss": 0.0021, "step": 77890 }, { "epoch": 2.3180039575677323, "grad_norm": 0.03831200301647186, "learning_rate": 1.4932263080699055e-06, "loss": 0.0027, "step": 77900 }, { "epoch": 2.318301519051374, "grad_norm": 0.19487647712230682, "learning_rate": 1.4919925173239591e-06, "loss": 0.0016, "step": 77910 }, { "epoch": 2.3185990805350154, "grad_norm": 0.1778334528207779, "learning_rate": 1.4907591471118942e-06, "loss": 0.0015, "step": 77920 }, { "epoch": 2.318896642018657, "grad_norm": 0.0343363992869854, "learning_rate": 1.4895261975815673e-06, "loss": 0.0015, "step": 77930 }, { "epoch": 2.3191942035022985, "grad_norm": 0.06936893612146378, "learning_rate": 1.4882936688807793e-06, "loss": 0.0021, "step": 77940 }, { "epoch": 2.31949176498594, "grad_norm": 0.06553588062524796, "learning_rate": 1.4870615611572846e-06, "loss": 0.0018, "step": 77950 }, { "epoch": 2.3197893264695817, "grad_norm": 0.12018115073442459, "learning_rate": 1.4858298745587863e-06, "loss": 0.0015, "step": 77960 }, { "epoch": 2.3200868879532233, "grad_norm": 0.17415368556976318, "learning_rate": 1.484598609232939e-06, "loss": 0.0013, "step": 77970 }, { "epoch": 2.320384449436865, "grad_norm": 0.5651643872261047, "learning_rate": 1.4833677653273405e-06, "loss": 0.0034, "step": 77980 }, { "epoch": 2.3206820109205064, "grad_norm": 0.17430338263511658, "learning_rate": 1.4821373429895447e-06, "loss": 0.0024, "step": 77990 }, { "epoch": 2.320979572404148, "grad_norm": 0.0296524278819561, "learning_rate": 1.4809073423670522e-06, "loss": 0.0017, "step": 78000 }, { "epoch": 2.3212771338877896, "grad_norm": 0.43806543946266174, "learning_rate": 1.4796777636073134e-06, "loss": 0.0019, "step": 78010 }, { "epoch": 2.321574695371431, "grad_norm": 0.211792454123497, "learning_rate": 1.4784486068577274e-06, "loss": 0.0015, "step": 78020 }, { "epoch": 2.3218722568550727, "grad_norm": 0.24913324415683746, "learning_rate": 1.477219872265645e-06, "loss": 0.0024, "step": 78030 }, { "epoch": 2.3221698183387143, "grad_norm": 0.16565269231796265, "learning_rate": 1.4759915599783647e-06, "loss": 0.0022, "step": 78040 }, { "epoch": 2.322467379822356, "grad_norm": 0.019018253311514854, "learning_rate": 1.4747636701431322e-06, "loss": 0.0019, "step": 78050 }, { "epoch": 2.3227649413059974, "grad_norm": 0.10338173806667328, "learning_rate": 1.473536202907146e-06, "loss": 0.0019, "step": 78060 }, { "epoch": 2.323062502789639, "grad_norm": 0.03643672168254852, "learning_rate": 1.472309158417553e-06, "loss": 0.0013, "step": 78070 }, { "epoch": 2.3233600642732806, "grad_norm": 0.194087952375412, "learning_rate": 1.471082536821451e-06, "loss": 0.0022, "step": 78080 }, { "epoch": 2.323657625756922, "grad_norm": 0.19887185096740723, "learning_rate": 1.4698563382658814e-06, "loss": 0.0017, "step": 78090 }, { "epoch": 2.3239551872405637, "grad_norm": 0.05667733773589134, "learning_rate": 1.4686305628978409e-06, "loss": 0.0025, "step": 78100 }, { "epoch": 2.3242527487242053, "grad_norm": 0.146173357963562, "learning_rate": 1.4674052108642734e-06, "loss": 0.0023, "step": 78110 }, { "epoch": 2.324550310207847, "grad_norm": 0.18078067898750305, "learning_rate": 1.466180282312073e-06, "loss": 0.0018, "step": 78120 }, { "epoch": 2.3248478716914884, "grad_norm": 0.044963132590055466, "learning_rate": 1.464955777388079e-06, "loss": 0.0014, "step": 78130 }, { "epoch": 2.32514543317513, "grad_norm": 0.060075368732213974, "learning_rate": 1.4637316962390853e-06, "loss": 0.0011, "step": 78140 }, { "epoch": 2.325442994658771, "grad_norm": 0.2576334774494171, "learning_rate": 1.4625080390118312e-06, "loss": 0.0018, "step": 78150 }, { "epoch": 2.3257405561424127, "grad_norm": 0.038645025342702866, "learning_rate": 1.46128480585301e-06, "loss": 0.0012, "step": 78160 }, { "epoch": 2.3260381176260543, "grad_norm": 0.1301213949918747, "learning_rate": 1.4600619969092562e-06, "loss": 0.0024, "step": 78170 }, { "epoch": 2.326335679109696, "grad_norm": 0.11991569399833679, "learning_rate": 1.4588396123271597e-06, "loss": 0.0024, "step": 78180 }, { "epoch": 2.3266332405933374, "grad_norm": 0.0936146080493927, "learning_rate": 1.4576176522532587e-06, "loss": 0.0017, "step": 78190 }, { "epoch": 2.326930802076979, "grad_norm": 0.1288813203573227, "learning_rate": 1.4563961168340408e-06, "loss": 0.0016, "step": 78200 }, { "epoch": 2.3272283635606206, "grad_norm": 0.05318891629576683, "learning_rate": 1.4551750062159386e-06, "loss": 0.0011, "step": 78210 }, { "epoch": 2.327525925044262, "grad_norm": 0.16434942185878754, "learning_rate": 1.4539543205453377e-06, "loss": 0.0019, "step": 78220 }, { "epoch": 2.3278234865279037, "grad_norm": 0.19444531202316284, "learning_rate": 1.4527340599685725e-06, "loss": 0.0033, "step": 78230 }, { "epoch": 2.3281210480115453, "grad_norm": 0.07424703240394592, "learning_rate": 1.4515142246319263e-06, "loss": 0.0018, "step": 78240 }, { "epoch": 2.328418609495187, "grad_norm": 0.07774963229894638, "learning_rate": 1.4502948146816287e-06, "loss": 0.0019, "step": 78250 }, { "epoch": 2.3287161709788284, "grad_norm": 0.2050045132637024, "learning_rate": 1.4490758302638618e-06, "loss": 0.002, "step": 78260 }, { "epoch": 2.32901373246247, "grad_norm": 0.009053467772901058, "learning_rate": 1.4478572715247575e-06, "loss": 0.0044, "step": 78270 }, { "epoch": 2.3293112939461116, "grad_norm": 0.10672732442617416, "learning_rate": 1.4466391386103884e-06, "loss": 0.0021, "step": 78280 }, { "epoch": 2.329608855429753, "grad_norm": 0.38642045855522156, "learning_rate": 1.4454214316667876e-06, "loss": 0.002, "step": 78290 }, { "epoch": 2.3299064169133947, "grad_norm": 0.15970778465270996, "learning_rate": 1.4442041508399303e-06, "loss": 0.0031, "step": 78300 }, { "epoch": 2.3302039783970363, "grad_norm": 0.12437552213668823, "learning_rate": 1.4429872962757436e-06, "loss": 0.0016, "step": 78310 }, { "epoch": 2.330501539880678, "grad_norm": 0.11180129647254944, "learning_rate": 1.4417708681200988e-06, "loss": 0.0031, "step": 78320 }, { "epoch": 2.3307991013643194, "grad_norm": 0.09183803200721741, "learning_rate": 1.4405548665188202e-06, "loss": 0.0018, "step": 78330 }, { "epoch": 2.331096662847961, "grad_norm": 0.18324585258960724, "learning_rate": 1.4393392916176819e-06, "loss": 0.0021, "step": 78340 }, { "epoch": 2.3313942243316026, "grad_norm": 0.18955180048942566, "learning_rate": 1.4381241435624022e-06, "loss": 0.0024, "step": 78350 }, { "epoch": 2.331691785815244, "grad_norm": 0.059566523879766464, "learning_rate": 1.436909422498652e-06, "loss": 0.0016, "step": 78360 }, { "epoch": 2.3319893472988857, "grad_norm": 0.06737086176872253, "learning_rate": 1.43569512857205e-06, "loss": 0.0016, "step": 78370 }, { "epoch": 2.3322869087825273, "grad_norm": 0.2716391086578369, "learning_rate": 1.4344812619281657e-06, "loss": 0.0017, "step": 78380 }, { "epoch": 2.332584470266169, "grad_norm": 0.05274061858654022, "learning_rate": 1.433267822712512e-06, "loss": 0.0015, "step": 78390 }, { "epoch": 2.3328820317498105, "grad_norm": 0.052147991955280304, "learning_rate": 1.4320548110705546e-06, "loss": 0.0018, "step": 78400 }, { "epoch": 2.333179593233452, "grad_norm": 0.15730233490467072, "learning_rate": 1.4308422271477085e-06, "loss": 0.0018, "step": 78410 }, { "epoch": 2.3334771547170936, "grad_norm": 0.09422080218791962, "learning_rate": 1.4296300710893367e-06, "loss": 0.0015, "step": 78420 }, { "epoch": 2.333774716200735, "grad_norm": 0.16308026015758514, "learning_rate": 1.4284183430407483e-06, "loss": 0.0015, "step": 78430 }, { "epoch": 2.3340722776843768, "grad_norm": 0.30836424231529236, "learning_rate": 1.427207043147204e-06, "loss": 0.0023, "step": 78440 }, { "epoch": 2.3343698391680183, "grad_norm": 0.25403937697410583, "learning_rate": 1.4259961715539127e-06, "loss": 0.002, "step": 78450 }, { "epoch": 2.33466740065166, "grad_norm": 0.07690567523241043, "learning_rate": 1.4247857284060329e-06, "loss": 0.0012, "step": 78460 }, { "epoch": 2.334964962135301, "grad_norm": 0.33645710349082947, "learning_rate": 1.4235757138486672e-06, "loss": 0.0029, "step": 78470 }, { "epoch": 2.3352625236189426, "grad_norm": 0.18640002608299255, "learning_rate": 1.422366128026872e-06, "loss": 0.0014, "step": 78480 }, { "epoch": 2.335560085102584, "grad_norm": 0.14989447593688965, "learning_rate": 1.4211569710856498e-06, "loss": 0.0018, "step": 78490 }, { "epoch": 2.3358576465862257, "grad_norm": 0.28063735365867615, "learning_rate": 1.4199482431699546e-06, "loss": 0.0017, "step": 78500 }, { "epoch": 2.3361552080698673, "grad_norm": 0.12437724322080612, "learning_rate": 1.418739944424683e-06, "loss": 0.0012, "step": 78510 }, { "epoch": 2.336452769553509, "grad_norm": 0.21539852023124695, "learning_rate": 1.4175320749946853e-06, "loss": 0.0019, "step": 78520 }, { "epoch": 2.3367503310371505, "grad_norm": 0.2297709584236145, "learning_rate": 1.4163246350247594e-06, "loss": 0.0021, "step": 78530 }, { "epoch": 2.337047892520792, "grad_norm": 0.1823553740978241, "learning_rate": 1.4151176246596515e-06, "loss": 0.0027, "step": 78540 }, { "epoch": 2.3373454540044336, "grad_norm": 0.061790477484464645, "learning_rate": 1.413911044044054e-06, "loss": 0.0022, "step": 78550 }, { "epoch": 2.337643015488075, "grad_norm": 0.02582767978310585, "learning_rate": 1.412704893322609e-06, "loss": 0.0014, "step": 78560 }, { "epoch": 2.3379405769717168, "grad_norm": 0.19104555249214172, "learning_rate": 1.4114991726399123e-06, "loss": 0.0015, "step": 78570 }, { "epoch": 2.3382381384553583, "grad_norm": 0.026318687945604324, "learning_rate": 1.4102938821404994e-06, "loss": 0.0016, "step": 78580 }, { "epoch": 2.338535699939, "grad_norm": 0.16980494558811188, "learning_rate": 1.4090890219688603e-06, "loss": 0.003, "step": 78590 }, { "epoch": 2.3388332614226415, "grad_norm": 0.07260610163211823, "learning_rate": 1.4078845922694307e-06, "loss": 0.0024, "step": 78600 }, { "epoch": 2.339130822906283, "grad_norm": 0.1394941508769989, "learning_rate": 1.4066805931865974e-06, "loss": 0.0017, "step": 78610 }, { "epoch": 2.3394283843899246, "grad_norm": 0.10430920869112015, "learning_rate": 1.405477024864691e-06, "loss": 0.0017, "step": 78620 }, { "epoch": 2.339725945873566, "grad_norm": 0.31473496556282043, "learning_rate": 1.4042738874479943e-06, "loss": 0.002, "step": 78630 }, { "epoch": 2.3400235073572078, "grad_norm": 0.1447741687297821, "learning_rate": 1.4030711810807378e-06, "loss": 0.0017, "step": 78640 }, { "epoch": 2.3403210688408493, "grad_norm": 0.08261802792549133, "learning_rate": 1.4018689059071016e-06, "loss": 0.0021, "step": 78650 }, { "epoch": 2.340618630324491, "grad_norm": 0.09648752212524414, "learning_rate": 1.400667062071208e-06, "loss": 0.0021, "step": 78660 }, { "epoch": 2.3409161918081325, "grad_norm": 0.0794181227684021, "learning_rate": 1.3994656497171348e-06, "loss": 0.0032, "step": 78670 }, { "epoch": 2.341213753291774, "grad_norm": 0.11978431791067123, "learning_rate": 1.3982646689889063e-06, "loss": 0.0029, "step": 78680 }, { "epoch": 2.3415113147754156, "grad_norm": 0.6468332409858704, "learning_rate": 1.3970641200304907e-06, "loss": 0.0017, "step": 78690 }, { "epoch": 2.341808876259057, "grad_norm": 0.29216092824935913, "learning_rate": 1.39586400298581e-06, "loss": 0.0024, "step": 78700 }, { "epoch": 2.3421064377426988, "grad_norm": 0.02094195783138275, "learning_rate": 1.3946643179987313e-06, "loss": 0.002, "step": 78710 }, { "epoch": 2.34240399922634, "grad_norm": 0.11562145501375198, "learning_rate": 1.3934650652130727e-06, "loss": 0.0046, "step": 78720 }, { "epoch": 2.3427015607099815, "grad_norm": 0.03005025163292885, "learning_rate": 1.3922662447725954e-06, "loss": 0.0013, "step": 78730 }, { "epoch": 2.342999122193623, "grad_norm": 0.14966492354869843, "learning_rate": 1.3910678568210133e-06, "loss": 0.002, "step": 78740 }, { "epoch": 2.3432966836772646, "grad_norm": 0.1805545687675476, "learning_rate": 1.389869901501988e-06, "loss": 0.002, "step": 78750 }, { "epoch": 2.343594245160906, "grad_norm": 0.02186119556427002, "learning_rate": 1.388672378959129e-06, "loss": 0.0019, "step": 78760 }, { "epoch": 2.3438918066445478, "grad_norm": 0.1390029639005661, "learning_rate": 1.38747528933599e-06, "loss": 0.0046, "step": 78770 }, { "epoch": 2.3441893681281893, "grad_norm": 0.18450050055980682, "learning_rate": 1.386278632776078e-06, "loss": 0.0023, "step": 78780 }, { "epoch": 2.344486929611831, "grad_norm": 0.13861986994743347, "learning_rate": 1.385082409422846e-06, "loss": 0.0016, "step": 78790 }, { "epoch": 2.3447844910954725, "grad_norm": 0.10771354287862778, "learning_rate": 1.3838866194196964e-06, "loss": 0.0012, "step": 78800 }, { "epoch": 2.345082052579114, "grad_norm": 0.2018684297800064, "learning_rate": 1.382691262909976e-06, "loss": 0.0017, "step": 78810 }, { "epoch": 2.3453796140627556, "grad_norm": 0.039086222648620605, "learning_rate": 1.3814963400369835e-06, "loss": 0.0031, "step": 78820 }, { "epoch": 2.345677175546397, "grad_norm": 0.17771504819393158, "learning_rate": 1.3803018509439637e-06, "loss": 0.0021, "step": 78830 }, { "epoch": 2.345974737030039, "grad_norm": 0.1967097967863083, "learning_rate": 1.3791077957741122e-06, "loss": 0.0019, "step": 78840 }, { "epoch": 2.3462722985136804, "grad_norm": 0.11314856261014938, "learning_rate": 1.3779141746705648e-06, "loss": 0.0026, "step": 78850 }, { "epoch": 2.346569859997322, "grad_norm": 0.23395679891109467, "learning_rate": 1.376720987776416e-06, "loss": 0.0029, "step": 78860 }, { "epoch": 2.3468674214809635, "grad_norm": 0.2693058252334595, "learning_rate": 1.375528235234702e-06, "loss": 0.0016, "step": 78870 }, { "epoch": 2.347164982964605, "grad_norm": 0.2412046492099762, "learning_rate": 1.3743359171884062e-06, "loss": 0.0022, "step": 78880 }, { "epoch": 2.3474625444482466, "grad_norm": 0.08111574500799179, "learning_rate": 1.373144033780463e-06, "loss": 0.001, "step": 78890 }, { "epoch": 2.347760105931888, "grad_norm": 0.17398753762245178, "learning_rate": 1.3719525851537524e-06, "loss": 0.0029, "step": 78900 }, { "epoch": 2.34805766741553, "grad_norm": 0.1917182356119156, "learning_rate": 1.3707615714511053e-06, "loss": 0.0024, "step": 78910 }, { "epoch": 2.3483552288991714, "grad_norm": 0.1290646344423294, "learning_rate": 1.369570992815295e-06, "loss": 0.0021, "step": 78920 }, { "epoch": 2.348652790382813, "grad_norm": 0.21747168898582458, "learning_rate": 1.3683808493890477e-06, "loss": 0.0011, "step": 78930 }, { "epoch": 2.3489503518664545, "grad_norm": 0.1757008582353592, "learning_rate": 1.3671911413150357e-06, "loss": 0.0024, "step": 78940 }, { "epoch": 2.349247913350096, "grad_norm": 0.242671400308609, "learning_rate": 1.3660018687358812e-06, "loss": 0.0022, "step": 78950 }, { "epoch": 2.3495454748337377, "grad_norm": 0.29317113757133484, "learning_rate": 1.3648130317941482e-06, "loss": 0.0021, "step": 78960 }, { "epoch": 2.3498430363173792, "grad_norm": 0.08653269708156586, "learning_rate": 1.3636246306323547e-06, "loss": 0.002, "step": 78970 }, { "epoch": 2.350140597801021, "grad_norm": 0.1358657330274582, "learning_rate": 1.3624366653929644e-06, "loss": 0.0022, "step": 78980 }, { "epoch": 2.3504381592846624, "grad_norm": 0.07577149569988251, "learning_rate": 1.3612491362183887e-06, "loss": 0.0022, "step": 78990 }, { "epoch": 2.350735720768304, "grad_norm": 0.11211477220058441, "learning_rate": 1.3600620432509847e-06, "loss": 0.0014, "step": 79000 }, { "epoch": 2.3510332822519455, "grad_norm": 0.3243756890296936, "learning_rate": 1.35887538663306e-06, "loss": 0.003, "step": 79010 }, { "epoch": 2.351330843735587, "grad_norm": 0.07557716220617294, "learning_rate": 1.3576891665068692e-06, "loss": 0.0014, "step": 79020 }, { "epoch": 2.3516284052192287, "grad_norm": 0.06981632858514786, "learning_rate": 1.3565033830146163e-06, "loss": 0.0012, "step": 79030 }, { "epoch": 2.35192596670287, "grad_norm": 0.05962896719574928, "learning_rate": 1.355318036298447e-06, "loss": 0.0017, "step": 79040 }, { "epoch": 2.3522235281865114, "grad_norm": 0.13038688898086548, "learning_rate": 1.3541331265004603e-06, "loss": 0.0015, "step": 79050 }, { "epoch": 2.352521089670153, "grad_norm": 0.15596942603588104, "learning_rate": 1.3529486537627034e-06, "loss": 0.0038, "step": 79060 }, { "epoch": 2.3528186511537945, "grad_norm": 0.08614444732666016, "learning_rate": 1.3517646182271654e-06, "loss": 0.0015, "step": 79070 }, { "epoch": 2.353116212637436, "grad_norm": 0.4577798843383789, "learning_rate": 1.3505810200357878e-06, "loss": 0.0023, "step": 79080 }, { "epoch": 2.3534137741210777, "grad_norm": 0.18688790500164032, "learning_rate": 1.349397859330458e-06, "loss": 0.0024, "step": 79090 }, { "epoch": 2.3537113356047192, "grad_norm": 0.3006858229637146, "learning_rate": 1.3482151362530137e-06, "loss": 0.001, "step": 79100 }, { "epoch": 2.354008897088361, "grad_norm": 0.3002493381500244, "learning_rate": 1.347032850945234e-06, "loss": 0.0022, "step": 79110 }, { "epoch": 2.3543064585720024, "grad_norm": 0.09585200250148773, "learning_rate": 1.3458510035488508e-06, "loss": 0.0024, "step": 79120 }, { "epoch": 2.354604020055644, "grad_norm": 0.4242255389690399, "learning_rate": 1.3446695942055421e-06, "loss": 0.0035, "step": 79130 }, { "epoch": 2.3549015815392855, "grad_norm": 0.10410235077142715, "learning_rate": 1.3434886230569334e-06, "loss": 0.0027, "step": 79140 }, { "epoch": 2.355199143022927, "grad_norm": 0.07936882227659225, "learning_rate": 1.3423080902445968e-06, "loss": 0.0013, "step": 79150 }, { "epoch": 2.3554967045065687, "grad_norm": 0.03316224738955498, "learning_rate": 1.3411279959100532e-06, "loss": 0.0015, "step": 79160 }, { "epoch": 2.3557942659902102, "grad_norm": 0.0707930251955986, "learning_rate": 1.3399483401947715e-06, "loss": 0.0012, "step": 79170 }, { "epoch": 2.356091827473852, "grad_norm": 0.21605467796325684, "learning_rate": 1.3387691232401634e-06, "loss": 0.0021, "step": 79180 }, { "epoch": 2.3563893889574934, "grad_norm": 0.08901124447584152, "learning_rate": 1.3375903451875938e-06, "loss": 0.0019, "step": 79190 }, { "epoch": 2.356686950441135, "grad_norm": 0.14605380594730377, "learning_rate": 1.336412006178372e-06, "loss": 0.0018, "step": 79200 }, { "epoch": 2.3569845119247765, "grad_norm": 0.13629941642284393, "learning_rate": 1.3352341063537572e-06, "loss": 0.0013, "step": 79210 }, { "epoch": 2.357282073408418, "grad_norm": 0.06827249377965927, "learning_rate": 1.3340566458549498e-06, "loss": 0.0013, "step": 79220 }, { "epoch": 2.3575796348920597, "grad_norm": 0.26123881340026855, "learning_rate": 1.3328796248231047e-06, "loss": 0.0007, "step": 79230 }, { "epoch": 2.3578771963757013, "grad_norm": 0.19252412021160126, "learning_rate": 1.3317030433993206e-06, "loss": 0.0019, "step": 79240 }, { "epoch": 2.358174757859343, "grad_norm": 0.12225823849439621, "learning_rate": 1.3305269017246448e-06, "loss": 0.0009, "step": 79250 }, { "epoch": 2.3584723193429844, "grad_norm": 0.12089332938194275, "learning_rate": 1.3293511999400694e-06, "loss": 0.0027, "step": 79260 }, { "epoch": 2.358769880826626, "grad_norm": 0.04114624112844467, "learning_rate": 1.3281759381865361e-06, "loss": 0.0014, "step": 79270 }, { "epoch": 2.3590674423102675, "grad_norm": 0.18737922608852386, "learning_rate": 1.3270011166049335e-06, "loss": 0.0016, "step": 79280 }, { "epoch": 2.3593650037939087, "grad_norm": 0.192830890417099, "learning_rate": 1.3258267353360993e-06, "loss": 0.0017, "step": 79290 }, { "epoch": 2.3596625652775503, "grad_norm": 0.05235246941447258, "learning_rate": 1.324652794520812e-06, "loss": 0.0018, "step": 79300 }, { "epoch": 2.359960126761192, "grad_norm": 0.108503058552742, "learning_rate": 1.3234792942998043e-06, "loss": 0.003, "step": 79310 }, { "epoch": 2.3602576882448334, "grad_norm": 0.060257021337747574, "learning_rate": 1.3223062348137528e-06, "loss": 0.004, "step": 79320 }, { "epoch": 2.360555249728475, "grad_norm": 0.0926232859492302, "learning_rate": 1.3211336162032834e-06, "loss": 0.0008, "step": 79330 }, { "epoch": 2.3608528112121165, "grad_norm": 0.05838558450341225, "learning_rate": 1.3199614386089643e-06, "loss": 0.0016, "step": 79340 }, { "epoch": 2.361150372695758, "grad_norm": 0.05757472291588783, "learning_rate": 1.3187897021713163e-06, "loss": 0.0022, "step": 79350 }, { "epoch": 2.3614479341793997, "grad_norm": 0.15237560868263245, "learning_rate": 1.3176184070308052e-06, "loss": 0.0013, "step": 79360 }, { "epoch": 2.3617454956630413, "grad_norm": 0.09239230304956436, "learning_rate": 1.3164475533278448e-06, "loss": 0.0008, "step": 79370 }, { "epoch": 2.362043057146683, "grad_norm": 0.0860324278473854, "learning_rate": 1.315277141202792e-06, "loss": 0.0012, "step": 79380 }, { "epoch": 2.3623406186303244, "grad_norm": 0.20080658793449402, "learning_rate": 1.3141071707959552e-06, "loss": 0.0015, "step": 79390 }, { "epoch": 2.362638180113966, "grad_norm": 0.11206545680761337, "learning_rate": 1.3129376422475893e-06, "loss": 0.0025, "step": 79400 }, { "epoch": 2.3629357415976076, "grad_norm": 0.11306702345609665, "learning_rate": 1.3117685556978942e-06, "loss": 0.0007, "step": 79410 }, { "epoch": 2.363233303081249, "grad_norm": 0.15568460524082184, "learning_rate": 1.3105999112870188e-06, "loss": 0.0015, "step": 79420 }, { "epoch": 2.3635308645648907, "grad_norm": 0.10349654406309128, "learning_rate": 1.309431709155058e-06, "loss": 0.0012, "step": 79430 }, { "epoch": 2.3638284260485323, "grad_norm": 0.11685828864574432, "learning_rate": 1.308263949442055e-06, "loss": 0.0015, "step": 79440 }, { "epoch": 2.364125987532174, "grad_norm": 0.20412208139896393, "learning_rate": 1.307096632287997e-06, "loss": 0.0016, "step": 79450 }, { "epoch": 2.3644235490158154, "grad_norm": 0.2422437220811844, "learning_rate": 1.3059297578328194e-06, "loss": 0.002, "step": 79460 }, { "epoch": 2.364721110499457, "grad_norm": 0.03144177421927452, "learning_rate": 1.3047633262164084e-06, "loss": 0.0015, "step": 79470 }, { "epoch": 2.3650186719830986, "grad_norm": 0.23553767800331116, "learning_rate": 1.3035973375785899e-06, "loss": 0.0024, "step": 79480 }, { "epoch": 2.36531623346674, "grad_norm": 0.24114114046096802, "learning_rate": 1.3024317920591423e-06, "loss": 0.0018, "step": 79490 }, { "epoch": 2.3656137949503817, "grad_norm": 0.07186047732830048, "learning_rate": 1.3012666897977893e-06, "loss": 0.002, "step": 79500 }, { "epoch": 2.3659113564340233, "grad_norm": 0.1284850388765335, "learning_rate": 1.3001020309342032e-06, "loss": 0.0021, "step": 79510 }, { "epoch": 2.366208917917665, "grad_norm": 0.07459261268377304, "learning_rate": 1.2989378156079973e-06, "loss": 0.0014, "step": 79520 }, { "epoch": 2.3665064794013064, "grad_norm": 0.20921136438846588, "learning_rate": 1.2977740439587382e-06, "loss": 0.0022, "step": 79530 }, { "epoch": 2.366804040884948, "grad_norm": 0.15835048258304596, "learning_rate": 1.2966107161259368e-06, "loss": 0.0017, "step": 79540 }, { "epoch": 2.3671016023685896, "grad_norm": 0.17431388795375824, "learning_rate": 1.295447832249051e-06, "loss": 0.0015, "step": 79550 }, { "epoch": 2.367399163852231, "grad_norm": 0.07114063203334808, "learning_rate": 1.2942853924674843e-06, "loss": 0.0025, "step": 79560 }, { "epoch": 2.3676967253358727, "grad_norm": 0.12856270372867584, "learning_rate": 1.2931233969205881e-06, "loss": 0.0022, "step": 79570 }, { "epoch": 2.3679942868195143, "grad_norm": 0.12979261577129364, "learning_rate": 1.2919618457476608e-06, "loss": 0.0021, "step": 79580 }, { "epoch": 2.368291848303156, "grad_norm": 0.537708580493927, "learning_rate": 1.2908007390879485e-06, "loss": 0.0034, "step": 79590 }, { "epoch": 2.3685894097867974, "grad_norm": 0.13404421508312225, "learning_rate": 1.2896400770806394e-06, "loss": 0.0014, "step": 79600 }, { "epoch": 2.3688869712704386, "grad_norm": 0.05592932179570198, "learning_rate": 1.2884798598648746e-06, "loss": 0.0017, "step": 79610 }, { "epoch": 2.36918453275408, "grad_norm": 0.07968320697546005, "learning_rate": 1.287320087579737e-06, "loss": 0.0016, "step": 79620 }, { "epoch": 2.3694820942377217, "grad_norm": 0.08089584857225418, "learning_rate": 1.2861607603642613e-06, "loss": 0.0019, "step": 79630 }, { "epoch": 2.3697796557213633, "grad_norm": 0.045870598405599594, "learning_rate": 1.2850018783574208e-06, "loss": 0.0011, "step": 79640 }, { "epoch": 2.370077217205005, "grad_norm": 0.3509431481361389, "learning_rate": 1.2838434416981439e-06, "loss": 0.0027, "step": 79650 }, { "epoch": 2.3703747786886464, "grad_norm": 0.15279734134674072, "learning_rate": 1.2826854505253e-06, "loss": 0.0045, "step": 79660 }, { "epoch": 2.370672340172288, "grad_norm": 0.2863384485244751, "learning_rate": 1.2815279049777102e-06, "loss": 0.0027, "step": 79670 }, { "epoch": 2.3709699016559296, "grad_norm": 0.17448818683624268, "learning_rate": 1.280370805194135e-06, "loss": 0.0016, "step": 79680 }, { "epoch": 2.371267463139571, "grad_norm": 0.08999145030975342, "learning_rate": 1.2792141513132878e-06, "loss": 0.0007, "step": 79690 }, { "epoch": 2.3715650246232127, "grad_norm": 0.16471898555755615, "learning_rate": 1.2780579434738255e-06, "loss": 0.0017, "step": 79700 }, { "epoch": 2.3718625861068543, "grad_norm": 0.0590752474963665, "learning_rate": 1.2769021818143523e-06, "loss": 0.0016, "step": 79710 }, { "epoch": 2.372160147590496, "grad_norm": 0.3804307281970978, "learning_rate": 1.2757468664734195e-06, "loss": 0.0013, "step": 79720 }, { "epoch": 2.3724577090741374, "grad_norm": 0.09638778865337372, "learning_rate": 1.2745919975895243e-06, "loss": 0.0021, "step": 79730 }, { "epoch": 2.372755270557779, "grad_norm": 0.06957319378852844, "learning_rate": 1.273437575301112e-06, "loss": 0.0021, "step": 79740 }, { "epoch": 2.3730528320414206, "grad_norm": 0.03365917503833771, "learning_rate": 1.2722835997465687e-06, "loss": 0.0031, "step": 79750 }, { "epoch": 2.373350393525062, "grad_norm": 0.22649340331554413, "learning_rate": 1.2711300710642332e-06, "loss": 0.0012, "step": 79760 }, { "epoch": 2.3736479550087037, "grad_norm": 0.13883745670318604, "learning_rate": 1.269976989392389e-06, "loss": 0.0022, "step": 79770 }, { "epoch": 2.3739455164923453, "grad_norm": 0.2496783286333084, "learning_rate": 1.2688243548692664e-06, "loss": 0.0022, "step": 79780 }, { "epoch": 2.374243077975987, "grad_norm": 0.38858291506767273, "learning_rate": 1.2676721676330384e-06, "loss": 0.0036, "step": 79790 }, { "epoch": 2.3745406394596285, "grad_norm": 0.15273265540599823, "learning_rate": 1.266520427821829e-06, "loss": 0.0021, "step": 79800 }, { "epoch": 2.37483820094327, "grad_norm": 0.09705692529678345, "learning_rate": 1.2653691355737064e-06, "loss": 0.0014, "step": 79810 }, { "epoch": 2.3751357624269116, "grad_norm": 0.2380732297897339, "learning_rate": 1.2642182910266871e-06, "loss": 0.0024, "step": 79820 }, { "epoch": 2.375433323910553, "grad_norm": 0.1642705202102661, "learning_rate": 1.2630678943187292e-06, "loss": 0.002, "step": 79830 }, { "epoch": 2.3757308853941947, "grad_norm": 0.07481199502944946, "learning_rate": 1.2619179455877417e-06, "loss": 0.001, "step": 79840 }, { "epoch": 2.3760284468778363, "grad_norm": 0.2049952745437622, "learning_rate": 1.2607684449715806e-06, "loss": 0.0023, "step": 79850 }, { "epoch": 2.3763260083614774, "grad_norm": 0.3782515525817871, "learning_rate": 1.259619392608043e-06, "loss": 0.0019, "step": 79860 }, { "epoch": 2.376623569845119, "grad_norm": 0.06566481292247772, "learning_rate": 1.2584707886348762e-06, "loss": 0.0014, "step": 79870 }, { "epoch": 2.3769211313287606, "grad_norm": 0.133945494890213, "learning_rate": 1.2573226331897731e-06, "loss": 0.0024, "step": 79880 }, { "epoch": 2.377218692812402, "grad_norm": 0.010702036321163177, "learning_rate": 1.256174926410374e-06, "loss": 0.0008, "step": 79890 }, { "epoch": 2.3775162542960437, "grad_norm": 0.06733833253383636, "learning_rate": 1.2550276684342616e-06, "loss": 0.0017, "step": 79900 }, { "epoch": 2.3778138157796853, "grad_norm": 0.26100635528564453, "learning_rate": 1.2538808593989683e-06, "loss": 0.0012, "step": 79910 }, { "epoch": 2.378111377263327, "grad_norm": 0.13879986107349396, "learning_rate": 1.252734499441971e-06, "loss": 0.0012, "step": 79920 }, { "epoch": 2.3784089387469685, "grad_norm": 0.2002292275428772, "learning_rate": 1.2515885887006956e-06, "loss": 0.0022, "step": 79930 }, { "epoch": 2.37870650023061, "grad_norm": 0.10755319893360138, "learning_rate": 1.250443127312509e-06, "loss": 0.0018, "step": 79940 }, { "epoch": 2.3790040617142516, "grad_norm": 0.06783580034971237, "learning_rate": 1.2492981154147282e-06, "loss": 0.0012, "step": 79950 }, { "epoch": 2.379301623197893, "grad_norm": 0.08342286944389343, "learning_rate": 1.2481535531446154e-06, "loss": 0.0016, "step": 79960 }, { "epoch": 2.3795991846815348, "grad_norm": 0.14558789134025574, "learning_rate": 1.24700944063938e-06, "loss": 0.0017, "step": 79970 }, { "epoch": 2.3798967461651763, "grad_norm": 0.08268177509307861, "learning_rate": 1.2458657780361727e-06, "loss": 0.0015, "step": 79980 }, { "epoch": 2.380194307648818, "grad_norm": 0.10009416937828064, "learning_rate": 1.2447225654720967e-06, "loss": 0.0013, "step": 79990 }, { "epoch": 2.3804918691324595, "grad_norm": 0.25503095984458923, "learning_rate": 1.2435798030842e-06, "loss": 0.0017, "step": 80000 }, { "epoch": 2.380789430616101, "grad_norm": 0.10754884034395218, "learning_rate": 1.2424374910094706e-06, "loss": 0.0027, "step": 80010 }, { "epoch": 2.3810869920997426, "grad_norm": 0.1427757889032364, "learning_rate": 1.2412956293848494e-06, "loss": 0.0044, "step": 80020 }, { "epoch": 2.381384553583384, "grad_norm": 0.0019366216147318482, "learning_rate": 1.2401542183472204e-06, "loss": 0.0014, "step": 80030 }, { "epoch": 2.3816821150670258, "grad_norm": 0.08602585643529892, "learning_rate": 1.2390132580334152e-06, "loss": 0.0016, "step": 80040 }, { "epoch": 2.3819796765506673, "grad_norm": 0.05843695253133774, "learning_rate": 1.2378727485802077e-06, "loss": 0.0017, "step": 80050 }, { "epoch": 2.382277238034309, "grad_norm": 0.074531190097332, "learning_rate": 1.2367326901243215e-06, "loss": 0.0013, "step": 80060 }, { "epoch": 2.3825747995179505, "grad_norm": 0.308278888463974, "learning_rate": 1.2355930828024244e-06, "loss": 0.0017, "step": 80070 }, { "epoch": 2.382872361001592, "grad_norm": 0.10444122552871704, "learning_rate": 1.2344539267511325e-06, "loss": 0.0015, "step": 80080 }, { "epoch": 2.3831699224852336, "grad_norm": 0.21239732205867767, "learning_rate": 1.2333152221070032e-06, "loss": 0.002, "step": 80090 }, { "epoch": 2.383467483968875, "grad_norm": 0.2132117599248886, "learning_rate": 1.2321769690065434e-06, "loss": 0.0016, "step": 80100 }, { "epoch": 2.3837650454525168, "grad_norm": 0.31533023715019226, "learning_rate": 1.2310391675862049e-06, "loss": 0.0016, "step": 80110 }, { "epoch": 2.3840626069361583, "grad_norm": 0.0962909460067749, "learning_rate": 1.2299018179823873e-06, "loss": 0.0018, "step": 80120 }, { "epoch": 2.3843601684198, "grad_norm": 0.25509360432624817, "learning_rate": 1.2287649203314307e-06, "loss": 0.0012, "step": 80130 }, { "epoch": 2.3846577299034415, "grad_norm": 0.04478392377495766, "learning_rate": 1.227628474769626e-06, "loss": 0.0019, "step": 80140 }, { "epoch": 2.384955291387083, "grad_norm": 0.48366254568099976, "learning_rate": 1.2264924814332086e-06, "loss": 0.0028, "step": 80150 }, { "epoch": 2.3852528528707246, "grad_norm": 0.16511757671833038, "learning_rate": 1.2253569404583609e-06, "loss": 0.0024, "step": 80160 }, { "epoch": 2.385550414354366, "grad_norm": 0.09772171080112457, "learning_rate": 1.224221851981206e-06, "loss": 0.0013, "step": 80170 }, { "epoch": 2.3858479758380073, "grad_norm": 0.1306779682636261, "learning_rate": 1.2230872161378188e-06, "loss": 0.0014, "step": 80180 }, { "epoch": 2.386145537321649, "grad_norm": 0.047985758632421494, "learning_rate": 1.2219530330642181e-06, "loss": 0.0012, "step": 80190 }, { "epoch": 2.3864430988052905, "grad_norm": 0.13858625292778015, "learning_rate": 1.220819302896365e-06, "loss": 0.0013, "step": 80200 }, { "epoch": 2.386740660288932, "grad_norm": 0.20236732065677643, "learning_rate": 1.219686025770171e-06, "loss": 0.0022, "step": 80210 }, { "epoch": 2.3870382217725736, "grad_norm": 0.05924579128623009, "learning_rate": 1.2185532018214913e-06, "loss": 0.0021, "step": 80220 }, { "epoch": 2.387335783256215, "grad_norm": 0.08040152490139008, "learning_rate": 1.217420831186128e-06, "loss": 0.0018, "step": 80230 }, { "epoch": 2.3876333447398568, "grad_norm": 0.13585834205150604, "learning_rate": 1.2162889139998252e-06, "loss": 0.0016, "step": 80240 }, { "epoch": 2.3879309062234984, "grad_norm": 0.17890393733978271, "learning_rate": 1.2151574503982766e-06, "loss": 0.0019, "step": 80250 }, { "epoch": 2.38822846770714, "grad_norm": 0.09540416300296783, "learning_rate": 1.2140264405171199e-06, "loss": 0.0019, "step": 80260 }, { "epoch": 2.3885260291907815, "grad_norm": 0.1906415820121765, "learning_rate": 1.2128958844919387e-06, "loss": 0.0012, "step": 80270 }, { "epoch": 2.388823590674423, "grad_norm": 0.08061400055885315, "learning_rate": 1.2117657824582617e-06, "loss": 0.0018, "step": 80280 }, { "epoch": 2.3891211521580646, "grad_norm": 0.1017705574631691, "learning_rate": 1.2106361345515649e-06, "loss": 0.0027, "step": 80290 }, { "epoch": 2.389418713641706, "grad_norm": 0.07486497610807419, "learning_rate": 1.2095069409072684e-06, "loss": 0.0017, "step": 80300 }, { "epoch": 2.389716275125348, "grad_norm": 0.294131875038147, "learning_rate": 1.2083782016607358e-06, "loss": 0.0022, "step": 80310 }, { "epoch": 2.3900138366089894, "grad_norm": 0.11954790353775024, "learning_rate": 1.20724991694728e-06, "loss": 0.0011, "step": 80320 }, { "epoch": 2.390311398092631, "grad_norm": 0.09407919645309448, "learning_rate": 1.206122086902158e-06, "loss": 0.0022, "step": 80330 }, { "epoch": 2.3906089595762725, "grad_norm": 0.23240645229816437, "learning_rate": 1.2049947116605725e-06, "loss": 0.002, "step": 80340 }, { "epoch": 2.390906521059914, "grad_norm": 0.1000114232301712, "learning_rate": 1.2038677913576696e-06, "loss": 0.0008, "step": 80350 }, { "epoch": 2.3912040825435557, "grad_norm": 0.11851683259010315, "learning_rate": 1.202741326128543e-06, "loss": 0.0021, "step": 80360 }, { "epoch": 2.3915016440271972, "grad_norm": 0.07669328153133392, "learning_rate": 1.2016153161082323e-06, "loss": 0.0006, "step": 80370 }, { "epoch": 2.391799205510839, "grad_norm": 0.25382521748542786, "learning_rate": 1.200489761431723e-06, "loss": 0.0014, "step": 80380 }, { "epoch": 2.3920967669944804, "grad_norm": 0.1290682703256607, "learning_rate": 1.199364662233941e-06, "loss": 0.0022, "step": 80390 }, { "epoch": 2.392394328478122, "grad_norm": 0.20260237157344818, "learning_rate": 1.1982400186497633e-06, "loss": 0.0015, "step": 80400 }, { "epoch": 2.3926918899617635, "grad_norm": 0.11642672121524811, "learning_rate": 1.19711583081401e-06, "loss": 0.0019, "step": 80410 }, { "epoch": 2.392989451445405, "grad_norm": 0.08364050090312958, "learning_rate": 1.1959920988614488e-06, "loss": 0.0015, "step": 80420 }, { "epoch": 2.393287012929046, "grad_norm": 0.11582900583744049, "learning_rate": 1.1948688229267874e-06, "loss": 0.0022, "step": 80430 }, { "epoch": 2.393584574412688, "grad_norm": 0.07347125560045242, "learning_rate": 1.1937460031446834e-06, "loss": 0.0012, "step": 80440 }, { "epoch": 2.3938821358963294, "grad_norm": 0.01771160028874874, "learning_rate": 1.1926236396497388e-06, "loss": 0.0015, "step": 80450 }, { "epoch": 2.394179697379971, "grad_norm": 0.10658017545938492, "learning_rate": 1.1915017325765015e-06, "loss": 0.0013, "step": 80460 }, { "epoch": 2.3944772588636125, "grad_norm": 0.1362650841474533, "learning_rate": 1.1903802820594618e-06, "loss": 0.002, "step": 80470 }, { "epoch": 2.394774820347254, "grad_norm": 0.10466623306274414, "learning_rate": 1.1892592882330583e-06, "loss": 0.001, "step": 80480 }, { "epoch": 2.3950723818308957, "grad_norm": 0.10787133127450943, "learning_rate": 1.1881387512316739e-06, "loss": 0.0009, "step": 80490 }, { "epoch": 2.3953699433145372, "grad_norm": 0.09555435925722122, "learning_rate": 1.1870186711896374e-06, "loss": 0.0017, "step": 80500 }, { "epoch": 2.395667504798179, "grad_norm": 0.08449187874794006, "learning_rate": 1.1858990482412203e-06, "loss": 0.0014, "step": 80510 }, { "epoch": 2.3959650662818204, "grad_norm": 0.1756664365530014, "learning_rate": 1.1847798825206419e-06, "loss": 0.001, "step": 80520 }, { "epoch": 2.396262627765462, "grad_norm": 0.2843761742115021, "learning_rate": 1.1836611741620657e-06, "loss": 0.0023, "step": 80530 }, { "epoch": 2.3965601892491035, "grad_norm": 0.23126743733882904, "learning_rate": 1.1825429232996022e-06, "loss": 0.0018, "step": 80540 }, { "epoch": 2.396857750732745, "grad_norm": 0.21520133316516876, "learning_rate": 1.1814251300673007e-06, "loss": 0.0016, "step": 80550 }, { "epoch": 2.3971553122163867, "grad_norm": 0.11661139130592346, "learning_rate": 1.1803077945991653e-06, "loss": 0.0014, "step": 80560 }, { "epoch": 2.3974528737000282, "grad_norm": 0.1031121164560318, "learning_rate": 1.17919091702914e-06, "loss": 0.0013, "step": 80570 }, { "epoch": 2.39775043518367, "grad_norm": 0.049715805798769, "learning_rate": 1.1780744974911113e-06, "loss": 0.0012, "step": 80580 }, { "epoch": 2.3980479966673114, "grad_norm": 0.1877640038728714, "learning_rate": 1.1769585361189146e-06, "loss": 0.0037, "step": 80590 }, { "epoch": 2.398345558150953, "grad_norm": 0.15447771549224854, "learning_rate": 1.1758430330463316e-06, "loss": 0.0015, "step": 80600 }, { "epoch": 2.3986431196345945, "grad_norm": 0.05984366312623024, "learning_rate": 1.1747279884070834e-06, "loss": 0.0034, "step": 80610 }, { "epoch": 2.398940681118236, "grad_norm": 0.022344263270497322, "learning_rate": 1.173613402334841e-06, "loss": 0.002, "step": 80620 }, { "epoch": 2.3992382426018777, "grad_norm": 0.4392752945423126, "learning_rate": 1.1724992749632197e-06, "loss": 0.0014, "step": 80630 }, { "epoch": 2.3995358040855193, "grad_norm": 0.08401394635438919, "learning_rate": 1.1713856064257806e-06, "loss": 0.0016, "step": 80640 }, { "epoch": 2.399833365569161, "grad_norm": 0.05958753824234009, "learning_rate": 1.1702723968560247e-06, "loss": 0.0014, "step": 80650 }, { "epoch": 2.4001309270528024, "grad_norm": 0.11736603826284409, "learning_rate": 1.169159646387404e-06, "loss": 0.0021, "step": 80660 }, { "epoch": 2.400428488536444, "grad_norm": 0.21298900246620178, "learning_rate": 1.1680473551533123e-06, "loss": 0.0013, "step": 80670 }, { "epoch": 2.4007260500200855, "grad_norm": 0.12695688009262085, "learning_rate": 1.1669355232870904e-06, "loss": 0.0023, "step": 80680 }, { "epoch": 2.401023611503727, "grad_norm": 0.19343475997447968, "learning_rate": 1.1658241509220215e-06, "loss": 0.0026, "step": 80690 }, { "epoch": 2.4013211729873687, "grad_norm": 0.22327330708503723, "learning_rate": 1.1647132381913346e-06, "loss": 0.002, "step": 80700 }, { "epoch": 2.4016187344710103, "grad_norm": 0.07465279847383499, "learning_rate": 1.163602785228205e-06, "loss": 0.0019, "step": 80710 }, { "epoch": 2.401916295954652, "grad_norm": 0.10047201812267303, "learning_rate": 1.1624927921657531e-06, "loss": 0.0012, "step": 80720 }, { "epoch": 2.4022138574382934, "grad_norm": 0.058710478246212006, "learning_rate": 1.1613832591370406e-06, "loss": 0.0017, "step": 80730 }, { "epoch": 2.402511418921935, "grad_norm": 0.30788707733154297, "learning_rate": 1.1602741862750766e-06, "loss": 0.0018, "step": 80740 }, { "epoch": 2.402808980405576, "grad_norm": 0.21680587530136108, "learning_rate": 1.159165573712816e-06, "loss": 0.0027, "step": 80750 }, { "epoch": 2.4031065418892177, "grad_norm": 0.21385349333286285, "learning_rate": 1.158057421583158e-06, "loss": 0.002, "step": 80760 }, { "epoch": 2.4034041033728593, "grad_norm": 0.11257559061050415, "learning_rate": 1.1569497300189435e-06, "loss": 0.0013, "step": 80770 }, { "epoch": 2.403701664856501, "grad_norm": 0.11395145207643509, "learning_rate": 1.1558424991529626e-06, "loss": 0.0025, "step": 80780 }, { "epoch": 2.4039992263401424, "grad_norm": 0.35693806409835815, "learning_rate": 1.1547357291179473e-06, "loss": 0.002, "step": 80790 }, { "epoch": 2.404296787823784, "grad_norm": 0.06417224556207657, "learning_rate": 1.153629420046577e-06, "loss": 0.0025, "step": 80800 }, { "epoch": 2.4045943493074255, "grad_norm": 0.12744098901748657, "learning_rate": 1.152523572071471e-06, "loss": 0.0016, "step": 80810 }, { "epoch": 2.404891910791067, "grad_norm": 0.19510164856910706, "learning_rate": 1.1514181853251987e-06, "loss": 0.0017, "step": 80820 }, { "epoch": 2.4051894722747087, "grad_norm": 0.07164165377616882, "learning_rate": 1.150313259940271e-06, "loss": 0.0009, "step": 80830 }, { "epoch": 2.4054870337583503, "grad_norm": 0.3626319169998169, "learning_rate": 1.1492087960491456e-06, "loss": 0.0025, "step": 80840 }, { "epoch": 2.405784595241992, "grad_norm": 0.20530909299850464, "learning_rate": 1.1481047937842227e-06, "loss": 0.0012, "step": 80850 }, { "epoch": 2.4060821567256334, "grad_norm": 0.06083446368575096, "learning_rate": 1.1470012532778486e-06, "loss": 0.0015, "step": 80860 }, { "epoch": 2.406379718209275, "grad_norm": 0.17418363690376282, "learning_rate": 1.1458981746623148e-06, "loss": 0.0015, "step": 80870 }, { "epoch": 2.4066772796929166, "grad_norm": 0.25318384170532227, "learning_rate": 1.1447955580698544e-06, "loss": 0.0017, "step": 80880 }, { "epoch": 2.406974841176558, "grad_norm": 0.12373798340559006, "learning_rate": 1.1436934036326485e-06, "loss": 0.0029, "step": 80890 }, { "epoch": 2.4072724026601997, "grad_norm": 0.3272477984428406, "learning_rate": 1.1425917114828206e-06, "loss": 0.0016, "step": 80900 }, { "epoch": 2.4075699641438413, "grad_norm": 0.13171041011810303, "learning_rate": 1.141490481752442e-06, "loss": 0.0026, "step": 80910 }, { "epoch": 2.407867525627483, "grad_norm": 0.13095127046108246, "learning_rate": 1.1403897145735227e-06, "loss": 0.0019, "step": 80920 }, { "epoch": 2.4081650871111244, "grad_norm": 0.1321275383234024, "learning_rate": 1.1392894100780227e-06, "loss": 0.0037, "step": 80930 }, { "epoch": 2.408462648594766, "grad_norm": 0.10337258130311966, "learning_rate": 1.1381895683978444e-06, "loss": 0.002, "step": 80940 }, { "epoch": 2.4087602100784076, "grad_norm": 0.16496242582798004, "learning_rate": 1.1370901896648361e-06, "loss": 0.0018, "step": 80950 }, { "epoch": 2.409057771562049, "grad_norm": 0.18556393682956696, "learning_rate": 1.135991274010787e-06, "loss": 0.002, "step": 80960 }, { "epoch": 2.4093553330456907, "grad_norm": 0.35806509852409363, "learning_rate": 1.1348928215674348e-06, "loss": 0.0016, "step": 80970 }, { "epoch": 2.4096528945293323, "grad_norm": 0.0824815183877945, "learning_rate": 1.133794832466461e-06, "loss": 0.0019, "step": 80980 }, { "epoch": 2.409950456012974, "grad_norm": 0.12254559993743896, "learning_rate": 1.1326973068394875e-06, "loss": 0.0023, "step": 80990 }, { "epoch": 2.410248017496615, "grad_norm": 0.13719698786735535, "learning_rate": 1.1316002448180864e-06, "loss": 0.0019, "step": 81000 }, { "epoch": 2.4105455789802566, "grad_norm": 0.1299685537815094, "learning_rate": 1.1305036465337704e-06, "loss": 0.0016, "step": 81010 }, { "epoch": 2.410843140463898, "grad_norm": 0.10858908295631409, "learning_rate": 1.1294075121179998e-06, "loss": 0.0019, "step": 81020 }, { "epoch": 2.4111407019475397, "grad_norm": 0.06451736390590668, "learning_rate": 1.1283118417021744e-06, "loss": 0.0013, "step": 81030 }, { "epoch": 2.4114382634311813, "grad_norm": 0.06695980578660965, "learning_rate": 1.1272166354176428e-06, "loss": 0.0019, "step": 81040 }, { "epoch": 2.411735824914823, "grad_norm": 0.256144255399704, "learning_rate": 1.1261218933956957e-06, "loss": 0.0015, "step": 81050 }, { "epoch": 2.4120333863984644, "grad_norm": 0.10011918097734451, "learning_rate": 1.1250276157675717e-06, "loss": 0.0015, "step": 81060 }, { "epoch": 2.412330947882106, "grad_norm": 0.12053478509187698, "learning_rate": 1.123933802664447e-06, "loss": 0.0014, "step": 81070 }, { "epoch": 2.4126285093657476, "grad_norm": 0.11118832975625992, "learning_rate": 1.1228404542174476e-06, "loss": 0.0014, "step": 81080 }, { "epoch": 2.412926070849389, "grad_norm": 0.012210218235850334, "learning_rate": 1.1217475705576424e-06, "loss": 0.0015, "step": 81090 }, { "epoch": 2.4132236323330307, "grad_norm": 0.06811833381652832, "learning_rate": 1.1206551518160457e-06, "loss": 0.0016, "step": 81100 }, { "epoch": 2.4135211938166723, "grad_norm": 0.20308561623096466, "learning_rate": 1.1195631981236121e-06, "loss": 0.0036, "step": 81110 }, { "epoch": 2.413818755300314, "grad_norm": 0.13972820341587067, "learning_rate": 1.118471709611243e-06, "loss": 0.0021, "step": 81120 }, { "epoch": 2.4141163167839554, "grad_norm": 0.2900877296924591, "learning_rate": 1.1173806864097885e-06, "loss": 0.0019, "step": 81130 }, { "epoch": 2.414413878267597, "grad_norm": 0.20524364709854126, "learning_rate": 1.1162901286500345e-06, "loss": 0.0019, "step": 81140 }, { "epoch": 2.4147114397512386, "grad_norm": 0.15522322058677673, "learning_rate": 1.1152000364627157e-06, "loss": 0.0023, "step": 81150 }, { "epoch": 2.41500900123488, "grad_norm": 0.1749245822429657, "learning_rate": 1.1141104099785115e-06, "loss": 0.001, "step": 81160 }, { "epoch": 2.4153065627185217, "grad_norm": 0.23235592246055603, "learning_rate": 1.1130212493280458e-06, "loss": 0.0026, "step": 81170 }, { "epoch": 2.4156041242021633, "grad_norm": 0.11863445490598679, "learning_rate": 1.1119325546418818e-06, "loss": 0.0021, "step": 81180 }, { "epoch": 2.415901685685805, "grad_norm": 0.008308246731758118, "learning_rate": 1.110844326050532e-06, "loss": 0.0012, "step": 81190 }, { "epoch": 2.4161992471694465, "grad_norm": 0.1391133964061737, "learning_rate": 1.109756563684452e-06, "loss": 0.0014, "step": 81200 }, { "epoch": 2.416496808653088, "grad_norm": 0.19363227486610413, "learning_rate": 1.1086692676740412e-06, "loss": 0.0017, "step": 81210 }, { "epoch": 2.4167943701367296, "grad_norm": 0.13586270809173584, "learning_rate": 1.1075824381496409e-06, "loss": 0.0015, "step": 81220 }, { "epoch": 2.417091931620371, "grad_norm": 0.08334766328334808, "learning_rate": 1.1064960752415392e-06, "loss": 0.0039, "step": 81230 }, { "epoch": 2.4173894931040127, "grad_norm": 0.06609788537025452, "learning_rate": 1.1054101790799682e-06, "loss": 0.0015, "step": 81240 }, { "epoch": 2.4176870545876543, "grad_norm": 0.14872317016124725, "learning_rate": 1.104324749795103e-06, "loss": 0.0019, "step": 81250 }, { "epoch": 2.417984616071296, "grad_norm": 0.06502218544483185, "learning_rate": 1.103239787517062e-06, "loss": 0.001, "step": 81260 }, { "epoch": 2.4182821775549375, "grad_norm": 0.19156765937805176, "learning_rate": 1.1021552923759088e-06, "loss": 0.0024, "step": 81270 }, { "epoch": 2.418579739038579, "grad_norm": 0.17001424729824066, "learning_rate": 1.101071264501652e-06, "loss": 0.0025, "step": 81280 }, { "epoch": 2.4188773005222206, "grad_norm": 0.16104674339294434, "learning_rate": 1.0999877040242429e-06, "loss": 0.0014, "step": 81290 }, { "epoch": 2.419174862005862, "grad_norm": 0.11098869889974594, "learning_rate": 1.098904611073575e-06, "loss": 0.0023, "step": 81300 }, { "epoch": 2.4194724234895038, "grad_norm": 0.05391760170459747, "learning_rate": 1.0978219857794885e-06, "loss": 0.0015, "step": 81310 }, { "epoch": 2.419769984973145, "grad_norm": 0.18205155432224274, "learning_rate": 1.096739828271769e-06, "loss": 0.0018, "step": 81320 }, { "epoch": 2.4200675464567865, "grad_norm": 0.2039976567029953, "learning_rate": 1.0956581386801396e-06, "loss": 0.0034, "step": 81330 }, { "epoch": 2.420365107940428, "grad_norm": 0.13606637716293335, "learning_rate": 1.094576917134274e-06, "loss": 0.0019, "step": 81340 }, { "epoch": 2.4206626694240696, "grad_norm": 0.13398326933383942, "learning_rate": 1.0934961637637865e-06, "loss": 0.0012, "step": 81350 }, { "epoch": 2.420960230907711, "grad_norm": 0.21467581391334534, "learning_rate": 1.092415878698237e-06, "loss": 0.0009, "step": 81360 }, { "epoch": 2.4212577923913527, "grad_norm": 0.06836928427219391, "learning_rate": 1.0913360620671259e-06, "loss": 0.0015, "step": 81370 }, { "epoch": 2.4215553538749943, "grad_norm": 0.2440355122089386, "learning_rate": 1.0902567139999016e-06, "loss": 0.0014, "step": 81380 }, { "epoch": 2.421852915358636, "grad_norm": 0.20705564320087433, "learning_rate": 1.0891778346259535e-06, "loss": 0.0017, "step": 81390 }, { "epoch": 2.4221504768422775, "grad_norm": 0.2881676256656647, "learning_rate": 1.0880994240746169e-06, "loss": 0.0028, "step": 81400 }, { "epoch": 2.422448038325919, "grad_norm": 0.07005678862333298, "learning_rate": 1.0870214824751685e-06, "loss": 0.0013, "step": 81410 }, { "epoch": 2.4227455998095606, "grad_norm": 0.1660422831773758, "learning_rate": 1.0859440099568308e-06, "loss": 0.0017, "step": 81420 }, { "epoch": 2.423043161293202, "grad_norm": 0.09991127252578735, "learning_rate": 1.084867006648771e-06, "loss": 0.0021, "step": 81430 }, { "epoch": 2.4233407227768438, "grad_norm": 0.03732703998684883, "learning_rate": 1.083790472680095e-06, "loss": 0.0013, "step": 81440 }, { "epoch": 2.4236382842604853, "grad_norm": 0.1793857365846634, "learning_rate": 1.082714408179858e-06, "loss": 0.0016, "step": 81450 }, { "epoch": 2.423935845744127, "grad_norm": 0.15290716290473938, "learning_rate": 1.081638813277056e-06, "loss": 0.0018, "step": 81460 }, { "epoch": 2.4242334072277685, "grad_norm": 0.05076024308800697, "learning_rate": 1.080563688100631e-06, "loss": 0.0012, "step": 81470 }, { "epoch": 2.42453096871141, "grad_norm": 0.06496599316596985, "learning_rate": 1.079489032779465e-06, "loss": 0.001, "step": 81480 }, { "epoch": 2.4248285301950516, "grad_norm": 0.06645169109106064, "learning_rate": 1.0784148474423861e-06, "loss": 0.0011, "step": 81490 }, { "epoch": 2.425126091678693, "grad_norm": 0.12849833071231842, "learning_rate": 1.0773411322181664e-06, "loss": 0.0013, "step": 81500 }, { "epoch": 2.4254236531623348, "grad_norm": 0.11361825466156006, "learning_rate": 1.076267887235523e-06, "loss": 0.0016, "step": 81510 }, { "epoch": 2.4257212146459763, "grad_norm": 0.02139037474989891, "learning_rate": 1.0751951126231115e-06, "loss": 0.0016, "step": 81520 }, { "epoch": 2.426018776129618, "grad_norm": 0.12415823340415955, "learning_rate": 1.0741228085095345e-06, "loss": 0.0014, "step": 81530 }, { "epoch": 2.4263163376132595, "grad_norm": 0.11009659618139267, "learning_rate": 1.0730509750233399e-06, "loss": 0.0016, "step": 81540 }, { "epoch": 2.426613899096901, "grad_norm": 0.16691839694976807, "learning_rate": 1.071979612293017e-06, "loss": 0.0012, "step": 81550 }, { "epoch": 2.4269114605805426, "grad_norm": 0.19480456411838531, "learning_rate": 1.070908720446997e-06, "loss": 0.0018, "step": 81560 }, { "epoch": 2.4272090220641838, "grad_norm": 0.19968143105506897, "learning_rate": 1.0698382996136574e-06, "loss": 0.0017, "step": 81570 }, { "epoch": 2.4275065835478253, "grad_norm": 0.13775014877319336, "learning_rate": 1.0687683499213191e-06, "loss": 0.0029, "step": 81580 }, { "epoch": 2.427804145031467, "grad_norm": 0.05042344704270363, "learning_rate": 1.0676988714982472e-06, "loss": 0.0016, "step": 81590 }, { "epoch": 2.4281017065151085, "grad_norm": 0.06802557408809662, "learning_rate": 1.0666298644726452e-06, "loss": 0.0015, "step": 81600 }, { "epoch": 2.42839926799875, "grad_norm": 0.2652274966239929, "learning_rate": 1.0655613289726663e-06, "loss": 0.0017, "step": 81610 }, { "epoch": 2.4286968294823916, "grad_norm": 0.20390503108501434, "learning_rate": 1.064493265126404e-06, "loss": 0.0017, "step": 81620 }, { "epoch": 2.428994390966033, "grad_norm": 0.122568778693676, "learning_rate": 1.0634256730618975e-06, "loss": 0.0015, "step": 81630 }, { "epoch": 2.4292919524496748, "grad_norm": 0.3172075152397156, "learning_rate": 1.062358552907125e-06, "loss": 0.0014, "step": 81640 }, { "epoch": 2.4295895139333163, "grad_norm": 0.44002124667167664, "learning_rate": 1.0612919047900134e-06, "loss": 0.0015, "step": 81650 }, { "epoch": 2.429887075416958, "grad_norm": 0.06419536471366882, "learning_rate": 1.0602257288384288e-06, "loss": 0.0029, "step": 81660 }, { "epoch": 2.4301846369005995, "grad_norm": 0.04050827771425247, "learning_rate": 1.0591600251801854e-06, "loss": 0.0013, "step": 81670 }, { "epoch": 2.430482198384241, "grad_norm": 0.13480623066425323, "learning_rate": 1.0580947939430337e-06, "loss": 0.0014, "step": 81680 }, { "epoch": 2.4307797598678826, "grad_norm": 0.20169708132743835, "learning_rate": 1.0570300352546746e-06, "loss": 0.0037, "step": 81690 }, { "epoch": 2.431077321351524, "grad_norm": 0.10331247746944427, "learning_rate": 1.055965749242751e-06, "loss": 0.002, "step": 81700 }, { "epoch": 2.431374882835166, "grad_norm": 0.04824266955256462, "learning_rate": 1.0549019360348445e-06, "loss": 0.0013, "step": 81710 }, { "epoch": 2.4316724443188074, "grad_norm": 0.2635667622089386, "learning_rate": 1.0538385957584847e-06, "loss": 0.0015, "step": 81720 }, { "epoch": 2.431970005802449, "grad_norm": 0.09643707424402237, "learning_rate": 1.0527757285411432e-06, "loss": 0.0014, "step": 81730 }, { "epoch": 2.4322675672860905, "grad_norm": 0.05972824990749359, "learning_rate": 1.0517133345102337e-06, "loss": 0.0027, "step": 81740 }, { "epoch": 2.432565128769732, "grad_norm": 0.07442526519298553, "learning_rate": 1.050651413793114e-06, "loss": 0.0013, "step": 81750 }, { "epoch": 2.4328626902533736, "grad_norm": 0.3556220233440399, "learning_rate": 1.0495899665170866e-06, "loss": 0.0015, "step": 81760 }, { "epoch": 2.4331602517370152, "grad_norm": 0.09900432080030441, "learning_rate": 1.0485289928093967e-06, "loss": 0.001, "step": 81770 }, { "epoch": 2.433457813220657, "grad_norm": 0.21671618521213531, "learning_rate": 1.0474684927972289e-06, "loss": 0.0009, "step": 81780 }, { "epoch": 2.4337553747042984, "grad_norm": 0.08796472102403641, "learning_rate": 1.0464084666077161e-06, "loss": 0.0019, "step": 81790 }, { "epoch": 2.43405293618794, "grad_norm": 0.28261229395866394, "learning_rate": 1.0453489143679324e-06, "loss": 0.0016, "step": 81800 }, { "epoch": 2.4343504976715815, "grad_norm": 0.166823610663414, "learning_rate": 1.0442898362048959e-06, "loss": 0.0015, "step": 81810 }, { "epoch": 2.434648059155223, "grad_norm": 0.11564987152814865, "learning_rate": 1.043231232245565e-06, "loss": 0.0022, "step": 81820 }, { "epoch": 2.4349456206388647, "grad_norm": 0.08905300498008728, "learning_rate": 1.0421731026168441e-06, "loss": 0.0013, "step": 81830 }, { "epoch": 2.4352431821225062, "grad_norm": 0.18692484498023987, "learning_rate": 1.0411154474455803e-06, "loss": 0.0013, "step": 81840 }, { "epoch": 2.435540743606148, "grad_norm": 0.38447344303131104, "learning_rate": 1.0400582668585647e-06, "loss": 0.0022, "step": 81850 }, { "epoch": 2.4358383050897894, "grad_norm": 0.1982906460762024, "learning_rate": 1.0390015609825276e-06, "loss": 0.0019, "step": 81860 }, { "epoch": 2.436135866573431, "grad_norm": 0.07122012972831726, "learning_rate": 1.0379453299441466e-06, "loss": 0.0015, "step": 81870 }, { "epoch": 2.4364334280570725, "grad_norm": 0.006425183732062578, "learning_rate": 1.0368895738700406e-06, "loss": 0.0012, "step": 81880 }, { "epoch": 2.436730989540714, "grad_norm": 0.12071794271469116, "learning_rate": 1.0358342928867738e-06, "loss": 0.0011, "step": 81890 }, { "epoch": 2.4370285510243552, "grad_norm": 0.16764529049396515, "learning_rate": 1.0347794871208477e-06, "loss": 0.0027, "step": 81900 }, { "epoch": 2.437326112507997, "grad_norm": 0.22712260484695435, "learning_rate": 1.0337251566987128e-06, "loss": 0.0022, "step": 81910 }, { "epoch": 2.4376236739916384, "grad_norm": 0.26608577370643616, "learning_rate": 1.03267130174676e-06, "loss": 0.0026, "step": 81920 }, { "epoch": 2.43792123547528, "grad_norm": 0.0568610318005085, "learning_rate": 1.0316179223913248e-06, "loss": 0.0019, "step": 81930 }, { "epoch": 2.4382187969589215, "grad_norm": 0.0726676881313324, "learning_rate": 1.0305650187586824e-06, "loss": 0.001, "step": 81940 }, { "epoch": 2.438516358442563, "grad_norm": 0.004335223231464624, "learning_rate": 1.0295125909750537e-06, "loss": 0.0024, "step": 81950 }, { "epoch": 2.4388139199262047, "grad_norm": 0.12811878323554993, "learning_rate": 1.0284606391666025e-06, "loss": 0.0014, "step": 81960 }, { "epoch": 2.4391114814098462, "grad_norm": 0.12587586045265198, "learning_rate": 1.0274091634594347e-06, "loss": 0.0019, "step": 81970 }, { "epoch": 2.439409042893488, "grad_norm": 0.06144661083817482, "learning_rate": 1.0263581639795993e-06, "loss": 0.0018, "step": 81980 }, { "epoch": 2.4397066043771294, "grad_norm": 0.12343920022249222, "learning_rate": 1.0253076408530894e-06, "loss": 0.0017, "step": 81990 }, { "epoch": 2.440004165860771, "grad_norm": 0.05909286439418793, "learning_rate": 1.0242575942058392e-06, "loss": 0.0014, "step": 82000 }, { "epoch": 2.4403017273444125, "grad_norm": 0.11361812055110931, "learning_rate": 1.0232080241637254e-06, "loss": 0.0023, "step": 82010 }, { "epoch": 2.440599288828054, "grad_norm": 0.3157668709754944, "learning_rate": 1.0221589308525692e-06, "loss": 0.0018, "step": 82020 }, { "epoch": 2.4408968503116957, "grad_norm": 0.15551285445690155, "learning_rate": 1.021110314398135e-06, "loss": 0.0016, "step": 82030 }, { "epoch": 2.4411944117953372, "grad_norm": 0.11636938899755478, "learning_rate": 1.020062174926129e-06, "loss": 0.0015, "step": 82040 }, { "epoch": 2.441491973278979, "grad_norm": 0.13764159381389618, "learning_rate": 1.019014512562199e-06, "loss": 0.0023, "step": 82050 }, { "epoch": 2.4417895347626204, "grad_norm": 0.14683187007904053, "learning_rate": 1.0179673274319379e-06, "loss": 0.001, "step": 82060 }, { "epoch": 2.442087096246262, "grad_norm": 0.20585426688194275, "learning_rate": 1.0169206196608804e-06, "loss": 0.0018, "step": 82070 }, { "epoch": 2.4423846577299035, "grad_norm": 0.08093340694904327, "learning_rate": 1.0158743893745055e-06, "loss": 0.0013, "step": 82080 }, { "epoch": 2.442682219213545, "grad_norm": 0.20913517475128174, "learning_rate": 1.0148286366982301e-06, "loss": 0.0018, "step": 82090 }, { "epoch": 2.4429797806971867, "grad_norm": 0.08661477267742157, "learning_rate": 1.0137833617574194e-06, "loss": 0.0014, "step": 82100 }, { "epoch": 2.4432773421808283, "grad_norm": 0.06227046251296997, "learning_rate": 1.0127385646773803e-06, "loss": 0.0014, "step": 82110 }, { "epoch": 2.44357490366447, "grad_norm": 0.24181769788265228, "learning_rate": 1.0116942455833583e-06, "loss": 0.0028, "step": 82120 }, { "epoch": 2.4438724651481114, "grad_norm": 0.12841564416885376, "learning_rate": 1.0106504046005461e-06, "loss": 0.0014, "step": 82130 }, { "epoch": 2.444170026631753, "grad_norm": 0.26246920228004456, "learning_rate": 1.0096070418540776e-06, "loss": 0.0012, "step": 82140 }, { "epoch": 2.444467588115394, "grad_norm": 0.04091201350092888, "learning_rate": 1.0085641574690308e-06, "loss": 0.0012, "step": 82150 }, { "epoch": 2.4447651495990357, "grad_norm": 0.020574403926730156, "learning_rate": 1.0075217515704217e-06, "loss": 0.0016, "step": 82160 }, { "epoch": 2.4450627110826773, "grad_norm": 0.17763662338256836, "learning_rate": 1.006479824283214e-06, "loss": 0.0011, "step": 82170 }, { "epoch": 2.445360272566319, "grad_norm": 0.1881595402956009, "learning_rate": 1.0054383757323116e-06, "loss": 0.0018, "step": 82180 }, { "epoch": 2.4456578340499604, "grad_norm": 0.2067231833934784, "learning_rate": 1.0043974060425638e-06, "loss": 0.0031, "step": 82190 }, { "epoch": 2.445955395533602, "grad_norm": 0.46493688225746155, "learning_rate": 1.0033569153387563e-06, "loss": 0.0022, "step": 82200 }, { "epoch": 2.4462529570172435, "grad_norm": 0.060964133590459824, "learning_rate": 1.0023169037456238e-06, "loss": 0.0015, "step": 82210 }, { "epoch": 2.446550518500885, "grad_norm": 0.12402986735105515, "learning_rate": 1.0012773713878405e-06, "loss": 0.0018, "step": 82220 }, { "epoch": 2.4468480799845267, "grad_norm": 0.13385741412639618, "learning_rate": 1.0002383183900248e-06, "loss": 0.0016, "step": 82230 }, { "epoch": 2.4471456414681683, "grad_norm": 0.11126977950334549, "learning_rate": 9.991997448767348e-07, "loss": 0.001, "step": 82240 }, { "epoch": 2.44744320295181, "grad_norm": 0.23105749487876892, "learning_rate": 9.981616509724729e-07, "loss": 0.0014, "step": 82250 }, { "epoch": 2.4477407644354514, "grad_norm": 0.06364058703184128, "learning_rate": 9.971240368016871e-07, "loss": 0.0011, "step": 82260 }, { "epoch": 2.448038325919093, "grad_norm": 0.1402783840894699, "learning_rate": 9.960869024887609e-07, "loss": 0.0029, "step": 82270 }, { "epoch": 2.4483358874027346, "grad_norm": 0.12414803355932236, "learning_rate": 9.950502481580264e-07, "loss": 0.0053, "step": 82280 }, { "epoch": 2.448633448886376, "grad_norm": 0.08692953735589981, "learning_rate": 9.94014073933755e-07, "loss": 0.0018, "step": 82290 }, { "epoch": 2.4489310103700177, "grad_norm": 0.013024711981415749, "learning_rate": 9.929783799401633e-07, "loss": 0.0011, "step": 82300 }, { "epoch": 2.4492285718536593, "grad_norm": 0.11462526023387909, "learning_rate": 9.919431663014057e-07, "loss": 0.0009, "step": 82310 }, { "epoch": 2.449526133337301, "grad_norm": 0.13364896178245544, "learning_rate": 9.909084331415836e-07, "loss": 0.0032, "step": 82320 }, { "epoch": 2.4498236948209424, "grad_norm": 0.13085711002349854, "learning_rate": 9.898741805847377e-07, "loss": 0.0012, "step": 82330 }, { "epoch": 2.450121256304584, "grad_norm": 0.19744457304477692, "learning_rate": 9.888404087548552e-07, "loss": 0.0017, "step": 82340 }, { "epoch": 2.4504188177882256, "grad_norm": 0.22861666977405548, "learning_rate": 9.878071177758603e-07, "loss": 0.0017, "step": 82350 }, { "epoch": 2.450716379271867, "grad_norm": 0.02302589826285839, "learning_rate": 9.86774307771622e-07, "loss": 0.0013, "step": 82360 }, { "epoch": 2.4510139407555087, "grad_norm": 0.12268797308206558, "learning_rate": 9.857419788659529e-07, "loss": 0.0014, "step": 82370 }, { "epoch": 2.4513115022391503, "grad_norm": 0.040421467274427414, "learning_rate": 9.84710131182608e-07, "loss": 0.0009, "step": 82380 }, { "epoch": 2.451609063722792, "grad_norm": 0.16759178042411804, "learning_rate": 9.8367876484528e-07, "loss": 0.0016, "step": 82390 }, { "epoch": 2.4519066252064334, "grad_norm": 0.12284303456544876, "learning_rate": 9.826478799776101e-07, "loss": 0.0011, "step": 82400 }, { "epoch": 2.452204186690075, "grad_norm": 0.11847283691167831, "learning_rate": 9.816174767031777e-07, "loss": 0.0012, "step": 82410 }, { "epoch": 2.4525017481737166, "grad_norm": 0.19316492974758148, "learning_rate": 9.805875551455074e-07, "loss": 0.0016, "step": 82420 }, { "epoch": 2.452799309657358, "grad_norm": 0.15168094635009766, "learning_rate": 9.795581154280626e-07, "loss": 0.0017, "step": 82430 }, { "epoch": 2.4530968711409997, "grad_norm": 0.22815251350402832, "learning_rate": 9.785291576742505e-07, "loss": 0.0012, "step": 82440 }, { "epoch": 2.4533944326246413, "grad_norm": 0.22064919769763947, "learning_rate": 9.775006820074222e-07, "loss": 0.0019, "step": 82450 }, { "epoch": 2.453691994108283, "grad_norm": 0.1345268040895462, "learning_rate": 9.764726885508708e-07, "loss": 0.0015, "step": 82460 }, { "epoch": 2.453989555591924, "grad_norm": 0.29557228088378906, "learning_rate": 9.754451774278267e-07, "loss": 0.0045, "step": 82470 }, { "epoch": 2.4542871170755656, "grad_norm": 0.23235556483268738, "learning_rate": 9.744181487614686e-07, "loss": 0.0024, "step": 82480 }, { "epoch": 2.454584678559207, "grad_norm": 0.14124876260757446, "learning_rate": 9.733916026749151e-07, "loss": 0.0015, "step": 82490 }, { "epoch": 2.4548822400428487, "grad_norm": 0.027786334976553917, "learning_rate": 9.723655392912252e-07, "loss": 0.0012, "step": 82500 }, { "epoch": 2.4551798015264903, "grad_norm": 0.1430066078901291, "learning_rate": 9.713399587334027e-07, "loss": 0.0018, "step": 82510 }, { "epoch": 2.455477363010132, "grad_norm": 0.1731441617012024, "learning_rate": 9.703148611243917e-07, "loss": 0.0014, "step": 82520 }, { "epoch": 2.4557749244937734, "grad_norm": 0.06193884089589119, "learning_rate": 9.692902465870802e-07, "loss": 0.0012, "step": 82530 }, { "epoch": 2.456072485977415, "grad_norm": 0.2026660442352295, "learning_rate": 9.682661152442967e-07, "loss": 0.0022, "step": 82540 }, { "epoch": 2.4563700474610566, "grad_norm": 0.2826606035232544, "learning_rate": 9.67242467218812e-07, "loss": 0.0029, "step": 82550 }, { "epoch": 2.456667608944698, "grad_norm": 0.038162779062986374, "learning_rate": 9.66219302633341e-07, "loss": 0.0009, "step": 82560 }, { "epoch": 2.4569651704283397, "grad_norm": 0.1888112872838974, "learning_rate": 9.651966216105363e-07, "loss": 0.0019, "step": 82570 }, { "epoch": 2.4572627319119813, "grad_norm": 0.06177322939038277, "learning_rate": 9.641744242729962e-07, "loss": 0.0018, "step": 82580 }, { "epoch": 2.457560293395623, "grad_norm": 0.16768895089626312, "learning_rate": 9.631527107432597e-07, "loss": 0.002, "step": 82590 }, { "epoch": 2.4578578548792644, "grad_norm": 0.24963806569576263, "learning_rate": 9.621314811438103e-07, "loss": 0.0024, "step": 82600 }, { "epoch": 2.458155416362906, "grad_norm": 0.27244052290916443, "learning_rate": 9.611107355970679e-07, "loss": 0.0021, "step": 82610 }, { "epoch": 2.4584529778465476, "grad_norm": 0.09208868443965912, "learning_rate": 9.600904742253992e-07, "loss": 0.0016, "step": 82620 }, { "epoch": 2.458750539330189, "grad_norm": 0.0860876813530922, "learning_rate": 9.59070697151111e-07, "loss": 0.0015, "step": 82630 }, { "epoch": 2.4590481008138307, "grad_norm": 0.11312831938266754, "learning_rate": 9.58051404496454e-07, "loss": 0.0028, "step": 82640 }, { "epoch": 2.4593456622974723, "grad_norm": 0.04251435771584511, "learning_rate": 9.57032596383617e-07, "loss": 0.0017, "step": 82650 }, { "epoch": 2.459643223781114, "grad_norm": 0.11335427314043045, "learning_rate": 9.560142729347338e-07, "loss": 0.0018, "step": 82660 }, { "epoch": 2.4599407852647555, "grad_norm": 0.2607780396938324, "learning_rate": 9.549964342718797e-07, "loss": 0.0027, "step": 82670 }, { "epoch": 2.460238346748397, "grad_norm": 0.16499671339988708, "learning_rate": 9.539790805170723e-07, "loss": 0.0017, "step": 82680 }, { "epoch": 2.4605359082320386, "grad_norm": 0.07435350865125656, "learning_rate": 9.529622117922677e-07, "loss": 0.0027, "step": 82690 }, { "epoch": 2.46083346971568, "grad_norm": 0.3710000216960907, "learning_rate": 9.519458282193677e-07, "loss": 0.0028, "step": 82700 }, { "epoch": 2.4611310311993218, "grad_norm": 0.2116236537694931, "learning_rate": 9.509299299202146e-07, "loss": 0.001, "step": 82710 }, { "epoch": 2.461428592682963, "grad_norm": 0.12976424396038055, "learning_rate": 9.49914517016594e-07, "loss": 0.0012, "step": 82720 }, { "epoch": 2.4617261541666045, "grad_norm": 0.2380620837211609, "learning_rate": 9.488995896302289e-07, "loss": 0.0024, "step": 82730 }, { "epoch": 2.462023715650246, "grad_norm": 0.06023375317454338, "learning_rate": 9.478851478827888e-07, "loss": 0.0036, "step": 82740 }, { "epoch": 2.4623212771338876, "grad_norm": 0.04111620783805847, "learning_rate": 9.468711918958828e-07, "loss": 0.0025, "step": 82750 }, { "epoch": 2.462618838617529, "grad_norm": 0.08573348820209503, "learning_rate": 9.458577217910636e-07, "loss": 0.0012, "step": 82760 }, { "epoch": 2.4629164001011707, "grad_norm": 0.07704737037420273, "learning_rate": 9.448447376898224e-07, "loss": 0.001, "step": 82770 }, { "epoch": 2.4632139615848123, "grad_norm": 0.13110840320587158, "learning_rate": 9.43832239713594e-07, "loss": 0.0013, "step": 82780 }, { "epoch": 2.463511523068454, "grad_norm": 0.23302718997001648, "learning_rate": 9.428202279837556e-07, "loss": 0.0024, "step": 82790 }, { "epoch": 2.4638090845520955, "grad_norm": 0.16028112173080444, "learning_rate": 9.418087026216266e-07, "loss": 0.0022, "step": 82800 }, { "epoch": 2.464106646035737, "grad_norm": 0.2051655501127243, "learning_rate": 9.407976637484645e-07, "loss": 0.0014, "step": 82810 }, { "epoch": 2.4644042075193786, "grad_norm": 0.4258629381656647, "learning_rate": 9.397871114854706e-07, "loss": 0.0015, "step": 82820 }, { "epoch": 2.46470176900302, "grad_norm": 0.1334611177444458, "learning_rate": 9.387770459537926e-07, "loss": 0.0015, "step": 82830 }, { "epoch": 2.4649993304866618, "grad_norm": 0.3617301881313324, "learning_rate": 9.377674672745108e-07, "loss": 0.004, "step": 82840 }, { "epoch": 2.4652968919703033, "grad_norm": 0.14820341765880585, "learning_rate": 9.367583755686532e-07, "loss": 0.0019, "step": 82850 }, { "epoch": 2.465594453453945, "grad_norm": 0.12336914241313934, "learning_rate": 9.357497709571883e-07, "loss": 0.0016, "step": 82860 }, { "epoch": 2.4658920149375865, "grad_norm": 0.15819263458251953, "learning_rate": 9.347416535610276e-07, "loss": 0.0018, "step": 82870 }, { "epoch": 2.466189576421228, "grad_norm": 0.14232705533504486, "learning_rate": 9.337340235010184e-07, "loss": 0.0025, "step": 82880 }, { "epoch": 2.4664871379048696, "grad_norm": 0.11448069661855698, "learning_rate": 9.327268808979568e-07, "loss": 0.0021, "step": 82890 }, { "epoch": 2.466784699388511, "grad_norm": 0.18253271281719208, "learning_rate": 9.317202258725766e-07, "loss": 0.0015, "step": 82900 }, { "epoch": 2.4670822608721528, "grad_norm": 0.17740246653556824, "learning_rate": 9.307140585455532e-07, "loss": 0.0017, "step": 82910 }, { "epoch": 2.4673798223557943, "grad_norm": 0.15461739897727966, "learning_rate": 9.297083790375045e-07, "loss": 0.0022, "step": 82920 }, { "epoch": 2.467677383839436, "grad_norm": 0.15617692470550537, "learning_rate": 9.287031874689895e-07, "loss": 0.0029, "step": 82930 }, { "epoch": 2.4679749453230775, "grad_norm": 0.02915300242602825, "learning_rate": 9.276984839605102e-07, "loss": 0.0014, "step": 82940 }, { "epoch": 2.468272506806719, "grad_norm": 0.19907356798648834, "learning_rate": 9.26694268632507e-07, "loss": 0.0022, "step": 82950 }, { "epoch": 2.4685700682903606, "grad_norm": 0.04110497608780861, "learning_rate": 9.256905416053635e-07, "loss": 0.002, "step": 82960 }, { "epoch": 2.468867629774002, "grad_norm": 0.14793238043785095, "learning_rate": 9.246873029994058e-07, "loss": 0.0012, "step": 82970 }, { "epoch": 2.4691651912576438, "grad_norm": 0.10818131268024445, "learning_rate": 9.236845529349009e-07, "loss": 0.0018, "step": 82980 }, { "epoch": 2.4694627527412853, "grad_norm": 0.08893702924251556, "learning_rate": 9.22682291532055e-07, "loss": 0.0018, "step": 82990 }, { "epoch": 2.469760314224927, "grad_norm": 0.027204986661672592, "learning_rate": 9.216805189110179e-07, "loss": 0.0017, "step": 83000 }, { "epoch": 2.4700578757085685, "grad_norm": 0.001471874420531094, "learning_rate": 9.206792351918809e-07, "loss": 0.0027, "step": 83010 }, { "epoch": 2.47035543719221, "grad_norm": 0.10898618400096893, "learning_rate": 9.196784404946774e-07, "loss": 0.0024, "step": 83020 }, { "epoch": 2.4706529986758516, "grad_norm": 0.1841520369052887, "learning_rate": 9.18678134939378e-07, "loss": 0.0021, "step": 83030 }, { "epoch": 2.4709505601594928, "grad_norm": 0.06456571817398071, "learning_rate": 9.176783186458993e-07, "loss": 0.0016, "step": 83040 }, { "epoch": 2.4712481216431343, "grad_norm": 0.10224849730730057, "learning_rate": 9.166789917340979e-07, "loss": 0.0043, "step": 83050 }, { "epoch": 2.471545683126776, "grad_norm": 0.13251690566539764, "learning_rate": 9.15680154323772e-07, "loss": 0.0014, "step": 83060 }, { "epoch": 2.4718432446104175, "grad_norm": 0.34501171112060547, "learning_rate": 9.14681806534658e-07, "loss": 0.0172, "step": 83070 }, { "epoch": 2.472140806094059, "grad_norm": 0.07612895965576172, "learning_rate": 9.136839484864374e-07, "loss": 0.002, "step": 83080 }, { "epoch": 2.4724383675777006, "grad_norm": 0.13398200273513794, "learning_rate": 9.126865802987322e-07, "loss": 0.0009, "step": 83090 }, { "epoch": 2.472735929061342, "grad_norm": 0.07163939625024796, "learning_rate": 9.116897020911048e-07, "loss": 0.0014, "step": 83100 }, { "epoch": 2.473033490544984, "grad_norm": 0.1783483475446701, "learning_rate": 9.106933139830587e-07, "loss": 0.0013, "step": 83110 }, { "epoch": 2.4733310520286254, "grad_norm": 0.13676419854164124, "learning_rate": 9.096974160940402e-07, "loss": 0.0031, "step": 83120 }, { "epoch": 2.473628613512267, "grad_norm": 0.33443695306777954, "learning_rate": 9.087020085434367e-07, "loss": 0.0017, "step": 83130 }, { "epoch": 2.4739261749959085, "grad_norm": 0.22048591077327728, "learning_rate": 9.077070914505731e-07, "loss": 0.0017, "step": 83140 }, { "epoch": 2.47422373647955, "grad_norm": 0.09105762839317322, "learning_rate": 9.067126649347203e-07, "loss": 0.0022, "step": 83150 }, { "epoch": 2.4745212979631916, "grad_norm": 0.4312446415424347, "learning_rate": 9.05718729115087e-07, "loss": 0.0024, "step": 83160 }, { "epoch": 2.474818859446833, "grad_norm": 0.22236484289169312, "learning_rate": 9.047252841108278e-07, "loss": 0.0023, "step": 83170 }, { "epoch": 2.475116420930475, "grad_norm": 0.07830175757408142, "learning_rate": 9.037323300410311e-07, "loss": 0.001, "step": 83180 }, { "epoch": 2.4754139824141164, "grad_norm": 0.09877955168485641, "learning_rate": 9.027398670247322e-07, "loss": 0.0014, "step": 83190 }, { "epoch": 2.475711543897758, "grad_norm": 0.09406454861164093, "learning_rate": 9.017478951809061e-07, "loss": 0.0021, "step": 83200 }, { "epoch": 2.4760091053813995, "grad_norm": 0.2733277380466461, "learning_rate": 9.007564146284697e-07, "loss": 0.0031, "step": 83210 }, { "epoch": 2.476306666865041, "grad_norm": 0.10035368800163269, "learning_rate": 8.997654254862776e-07, "loss": 0.0021, "step": 83220 }, { "epoch": 2.4766042283486827, "grad_norm": 0.08479449152946472, "learning_rate": 8.987749278731289e-07, "loss": 0.0021, "step": 83230 }, { "epoch": 2.4769017898323242, "grad_norm": 0.18483710289001465, "learning_rate": 8.977849219077644e-07, "loss": 0.0013, "step": 83240 }, { "epoch": 2.477199351315966, "grad_norm": 0.1270071417093277, "learning_rate": 8.967954077088614e-07, "loss": 0.0031, "step": 83250 }, { "epoch": 2.4774969127996074, "grad_norm": 0.15884292125701904, "learning_rate": 8.958063853950428e-07, "loss": 0.0014, "step": 83260 }, { "epoch": 2.477794474283249, "grad_norm": 0.08323301374912262, "learning_rate": 8.948178550848702e-07, "loss": 0.0012, "step": 83270 }, { "epoch": 2.4780920357668905, "grad_norm": 0.1195078194141388, "learning_rate": 8.93829816896849e-07, "loss": 0.002, "step": 83280 }, { "epoch": 2.4783895972505317, "grad_norm": 0.0687708631157875, "learning_rate": 8.928422709494206e-07, "loss": 0.0013, "step": 83290 }, { "epoch": 2.4786871587341732, "grad_norm": 0.13346020877361298, "learning_rate": 8.918552173609712e-07, "loss": 0.0019, "step": 83300 }, { "epoch": 2.478984720217815, "grad_norm": 0.07595393061637878, "learning_rate": 8.908686562498281e-07, "loss": 0.0012, "step": 83310 }, { "epoch": 2.4792822817014564, "grad_norm": 0.032073039561510086, "learning_rate": 8.898825877342588e-07, "loss": 0.0011, "step": 83320 }, { "epoch": 2.479579843185098, "grad_norm": 0.3111954629421234, "learning_rate": 8.888970119324692e-07, "loss": 0.0017, "step": 83330 }, { "epoch": 2.4798774046687395, "grad_norm": 0.10028122365474701, "learning_rate": 8.879119289626098e-07, "loss": 0.0038, "step": 83340 }, { "epoch": 2.480174966152381, "grad_norm": 0.14585907757282257, "learning_rate": 8.869273389427707e-07, "loss": 0.0014, "step": 83350 }, { "epoch": 2.4804725276360227, "grad_norm": 0.14934350550174713, "learning_rate": 8.859432419909841e-07, "loss": 0.0016, "step": 83360 }, { "epoch": 2.4807700891196642, "grad_norm": 0.33487439155578613, "learning_rate": 8.84959638225219e-07, "loss": 0.0018, "step": 83370 }, { "epoch": 2.481067650603306, "grad_norm": 0.01783393882215023, "learning_rate": 8.839765277633889e-07, "loss": 0.0027, "step": 83380 }, { "epoch": 2.4813652120869474, "grad_norm": 0.0726412981748581, "learning_rate": 8.829939107233504e-07, "loss": 0.0011, "step": 83390 }, { "epoch": 2.481662773570589, "grad_norm": 0.08484529703855515, "learning_rate": 8.820117872228945e-07, "loss": 0.0014, "step": 83400 }, { "epoch": 2.4819603350542305, "grad_norm": 0.08460123091936111, "learning_rate": 8.810301573797575e-07, "loss": 0.0012, "step": 83410 }, { "epoch": 2.482257896537872, "grad_norm": 0.07037770748138428, "learning_rate": 8.800490213116159e-07, "loss": 0.0013, "step": 83420 }, { "epoch": 2.4825554580215137, "grad_norm": 0.027766622602939606, "learning_rate": 8.790683791360876e-07, "loss": 0.0013, "step": 83430 }, { "epoch": 2.4828530195051552, "grad_norm": 0.34796473383903503, "learning_rate": 8.780882309707278e-07, "loss": 0.003, "step": 83440 }, { "epoch": 2.483150580988797, "grad_norm": 0.11342430859804153, "learning_rate": 8.771085769330362e-07, "loss": 0.0026, "step": 83450 }, { "epoch": 2.4834481424724384, "grad_norm": 0.16188682615756989, "learning_rate": 8.761294171404522e-07, "loss": 0.0022, "step": 83460 }, { "epoch": 2.48374570395608, "grad_norm": 0.028091954067349434, "learning_rate": 8.751507517103563e-07, "loss": 0.0017, "step": 83470 }, { "epoch": 2.4840432654397215, "grad_norm": 0.07582606375217438, "learning_rate": 8.741725807600676e-07, "loss": 0.0011, "step": 83480 }, { "epoch": 2.484340826923363, "grad_norm": 0.08545002341270447, "learning_rate": 8.731949044068489e-07, "loss": 0.0013, "step": 83490 }, { "epoch": 2.4846383884070047, "grad_norm": 0.2316117137670517, "learning_rate": 8.722177227679013e-07, "loss": 0.0019, "step": 83500 }, { "epoch": 2.4849359498906463, "grad_norm": 0.03551388531923294, "learning_rate": 8.712410359603701e-07, "loss": 0.0011, "step": 83510 }, { "epoch": 2.485233511374288, "grad_norm": 0.35930493474006653, "learning_rate": 8.702648441013356e-07, "loss": 0.0032, "step": 83520 }, { "epoch": 2.4855310728579294, "grad_norm": 0.23059788346290588, "learning_rate": 8.692891473078235e-07, "loss": 0.002, "step": 83530 }, { "epoch": 2.485828634341571, "grad_norm": 0.0877174660563469, "learning_rate": 8.683139456967987e-07, "loss": 0.0013, "step": 83540 }, { "epoch": 2.4861261958252125, "grad_norm": 0.11376381665468216, "learning_rate": 8.673392393851677e-07, "loss": 0.0021, "step": 83550 }, { "epoch": 2.486423757308854, "grad_norm": 0.15071013569831848, "learning_rate": 8.66365028489774e-07, "loss": 0.0014, "step": 83560 }, { "epoch": 2.4867213187924957, "grad_norm": 0.18392720818519592, "learning_rate": 8.653913131274056e-07, "loss": 0.0019, "step": 83570 }, { "epoch": 2.4870188802761373, "grad_norm": 0.08887138217687607, "learning_rate": 8.644180934147906e-07, "loss": 0.0022, "step": 83580 }, { "epoch": 2.487316441759779, "grad_norm": 0.0673796534538269, "learning_rate": 8.634453694685974e-07, "loss": 0.0015, "step": 83590 }, { "epoch": 2.4876140032434204, "grad_norm": 0.14333753287792206, "learning_rate": 8.624731414054316e-07, "loss": 0.0022, "step": 83600 }, { "epoch": 2.4879115647270615, "grad_norm": 0.23516716063022614, "learning_rate": 8.615014093418445e-07, "loss": 0.0029, "step": 83610 }, { "epoch": 2.488209126210703, "grad_norm": 0.10609514266252518, "learning_rate": 8.605301733943267e-07, "loss": 0.0021, "step": 83620 }, { "epoch": 2.4885066876943447, "grad_norm": 0.13580752909183502, "learning_rate": 8.595594336793051e-07, "loss": 0.0018, "step": 83630 }, { "epoch": 2.4888042491779863, "grad_norm": 0.0538485161960125, "learning_rate": 8.585891903131521e-07, "loss": 0.0033, "step": 83640 }, { "epoch": 2.489101810661628, "grad_norm": 0.31329694390296936, "learning_rate": 8.576194434121782e-07, "loss": 0.0015, "step": 83650 }, { "epoch": 2.4893993721452694, "grad_norm": 0.17000237107276917, "learning_rate": 8.566501930926374e-07, "loss": 0.0013, "step": 83660 }, { "epoch": 2.489696933628911, "grad_norm": 0.18936435878276825, "learning_rate": 8.556814394707169e-07, "loss": 0.0015, "step": 83670 }, { "epoch": 2.4899944951125526, "grad_norm": 0.18672135472297668, "learning_rate": 8.547131826625532e-07, "loss": 0.0012, "step": 83680 }, { "epoch": 2.490292056596194, "grad_norm": 0.16253991425037384, "learning_rate": 8.537454227842195e-07, "loss": 0.0016, "step": 83690 }, { "epoch": 2.4905896180798357, "grad_norm": 0.12675690650939941, "learning_rate": 8.527781599517265e-07, "loss": 0.0008, "step": 83700 }, { "epoch": 2.4908871795634773, "grad_norm": 0.14714978635311127, "learning_rate": 8.518113942810296e-07, "loss": 0.0015, "step": 83710 }, { "epoch": 2.491184741047119, "grad_norm": 0.1512841135263443, "learning_rate": 8.50845125888023e-07, "loss": 0.002, "step": 83720 }, { "epoch": 2.4914823025307604, "grad_norm": 0.21115458011627197, "learning_rate": 8.49879354888542e-07, "loss": 0.0019, "step": 83730 }, { "epoch": 2.491779864014402, "grad_norm": 0.09807037562131882, "learning_rate": 8.489140813983593e-07, "loss": 0.001, "step": 83740 }, { "epoch": 2.4920774254980436, "grad_norm": 0.157206192612648, "learning_rate": 8.479493055331911e-07, "loss": 0.0013, "step": 83750 }, { "epoch": 2.492374986981685, "grad_norm": 0.15221461653709412, "learning_rate": 8.469850274086944e-07, "loss": 0.0012, "step": 83760 }, { "epoch": 2.4926725484653267, "grad_norm": 0.18640120327472687, "learning_rate": 8.46021247140465e-07, "loss": 0.0017, "step": 83770 }, { "epoch": 2.4929701099489683, "grad_norm": 0.15845203399658203, "learning_rate": 8.450579648440371e-07, "loss": 0.0026, "step": 83780 }, { "epoch": 2.49326767143261, "grad_norm": 0.2659626603126526, "learning_rate": 8.440951806348884e-07, "loss": 0.0031, "step": 83790 }, { "epoch": 2.4935652329162514, "grad_norm": 0.1132315993309021, "learning_rate": 8.431328946284367e-07, "loss": 0.0013, "step": 83800 }, { "epoch": 2.493862794399893, "grad_norm": 0.09357477724552155, "learning_rate": 8.421711069400401e-07, "loss": 0.0019, "step": 83810 }, { "epoch": 2.4941603558835346, "grad_norm": 0.08372166752815247, "learning_rate": 8.412098176849925e-07, "loss": 0.001, "step": 83820 }, { "epoch": 2.494457917367176, "grad_norm": 0.14011110365390778, "learning_rate": 8.402490269785341e-07, "loss": 0.0013, "step": 83830 }, { "epoch": 2.4947554788508177, "grad_norm": 0.16239069402217865, "learning_rate": 8.392887349358425e-07, "loss": 0.0019, "step": 83840 }, { "epoch": 2.4950530403344593, "grad_norm": 0.36984387040138245, "learning_rate": 8.383289416720369e-07, "loss": 0.0029, "step": 83850 }, { "epoch": 2.4953506018181004, "grad_norm": 0.31095248460769653, "learning_rate": 8.373696473021731e-07, "loss": 0.002, "step": 83860 }, { "epoch": 2.495648163301742, "grad_norm": 0.10663866996765137, "learning_rate": 8.364108519412512e-07, "loss": 0.002, "step": 83870 }, { "epoch": 2.4959457247853836, "grad_norm": 0.08734210580587387, "learning_rate": 8.354525557042093e-07, "loss": 0.002, "step": 83880 }, { "epoch": 2.496243286269025, "grad_norm": 0.09086950868368149, "learning_rate": 8.344947587059288e-07, "loss": 0.0024, "step": 83890 }, { "epoch": 2.4965408477526667, "grad_norm": 0.1833035945892334, "learning_rate": 8.335374610612251e-07, "loss": 0.0014, "step": 83900 }, { "epoch": 2.4968384092363083, "grad_norm": 0.1403687447309494, "learning_rate": 8.325806628848587e-07, "loss": 0.0022, "step": 83910 }, { "epoch": 2.49713597071995, "grad_norm": 0.06529015302658081, "learning_rate": 8.316243642915295e-07, "loss": 0.0016, "step": 83920 }, { "epoch": 2.4974335322035914, "grad_norm": 0.15106147527694702, "learning_rate": 8.306685653958779e-07, "loss": 0.0038, "step": 83930 }, { "epoch": 2.497731093687233, "grad_norm": 0.12492132931947708, "learning_rate": 8.297132663124813e-07, "loss": 0.0024, "step": 83940 }, { "epoch": 2.4980286551708746, "grad_norm": 0.043245553970336914, "learning_rate": 8.287584671558585e-07, "loss": 0.0015, "step": 83950 }, { "epoch": 2.498326216654516, "grad_norm": 0.08521884679794312, "learning_rate": 8.278041680404731e-07, "loss": 0.0007, "step": 83960 }, { "epoch": 2.4986237781381577, "grad_norm": 0.15620018541812897, "learning_rate": 8.268503690807216e-07, "loss": 0.0016, "step": 83970 }, { "epoch": 2.4989213396217993, "grad_norm": 0.27048951387405396, "learning_rate": 8.258970703909453e-07, "loss": 0.0018, "step": 83980 }, { "epoch": 2.499218901105441, "grad_norm": 0.010479722172021866, "learning_rate": 8.249442720854229e-07, "loss": 0.0022, "step": 83990 }, { "epoch": 2.4995164625890824, "grad_norm": 0.5172877907752991, "learning_rate": 8.239919742783759e-07, "loss": 0.0045, "step": 84000 }, { "epoch": 2.499814024072724, "grad_norm": 0.14501217007637024, "learning_rate": 8.230401770839619e-07, "loss": 0.002, "step": 84010 }, { "epoch": 2.5001115855563656, "grad_norm": 0.2441728413105011, "learning_rate": 8.220888806162819e-07, "loss": 0.0016, "step": 84020 }, { "epoch": 2.500409147040007, "grad_norm": 0.16719426214694977, "learning_rate": 8.211380849893769e-07, "loss": 0.0016, "step": 84030 }, { "epoch": 2.5007067085236487, "grad_norm": 0.14076794683933258, "learning_rate": 8.20187790317224e-07, "loss": 0.0013, "step": 84040 }, { "epoch": 2.5010042700072903, "grad_norm": 0.11522910743951797, "learning_rate": 8.192379967137443e-07, "loss": 0.0013, "step": 84050 }, { "epoch": 2.501301831490932, "grad_norm": 0.20933757722377777, "learning_rate": 8.182887042927967e-07, "loss": 0.0025, "step": 84060 }, { "epoch": 2.5015993929745735, "grad_norm": 0.155850350856781, "learning_rate": 8.173399131681831e-07, "loss": 0.0025, "step": 84070 }, { "epoch": 2.501896954458215, "grad_norm": 0.4161965548992157, "learning_rate": 8.163916234536401e-07, "loss": 0.0025, "step": 84080 }, { "epoch": 2.5021945159418566, "grad_norm": 0.14091888070106506, "learning_rate": 8.15443835262848e-07, "loss": 0.0016, "step": 84090 }, { "epoch": 2.502492077425498, "grad_norm": 0.0797833502292633, "learning_rate": 8.144965487094258e-07, "loss": 0.0032, "step": 84100 }, { "epoch": 2.5027896389091397, "grad_norm": 0.11844668537378311, "learning_rate": 8.135497639069345e-07, "loss": 0.0023, "step": 84110 }, { "epoch": 2.5030872003927813, "grad_norm": 0.04170688986778259, "learning_rate": 8.126034809688705e-07, "loss": 0.0019, "step": 84120 }, { "epoch": 2.503384761876423, "grad_norm": 0.13939137756824493, "learning_rate": 8.116577000086734e-07, "loss": 0.001, "step": 84130 }, { "epoch": 2.5036823233600645, "grad_norm": 0.2349604219198227, "learning_rate": 8.107124211397216e-07, "loss": 0.0016, "step": 84140 }, { "epoch": 2.503979884843706, "grad_norm": 0.24614217877388, "learning_rate": 8.09767644475335e-07, "loss": 0.0018, "step": 84150 }, { "epoch": 2.5042774463273476, "grad_norm": 0.2305685430765152, "learning_rate": 8.088233701287695e-07, "loss": 0.0018, "step": 84160 }, { "epoch": 2.504575007810989, "grad_norm": 0.12800459563732147, "learning_rate": 8.078795982132243e-07, "loss": 0.0013, "step": 84170 }, { "epoch": 2.5048725692946308, "grad_norm": 0.10603974759578705, "learning_rate": 8.06936328841837e-07, "loss": 0.0026, "step": 84180 }, { "epoch": 2.5051701307782723, "grad_norm": 0.012975791469216347, "learning_rate": 8.05993562127686e-07, "loss": 0.002, "step": 84190 }, { "epoch": 2.5054676922619135, "grad_norm": 0.15681856870651245, "learning_rate": 8.050512981837865e-07, "loss": 0.0012, "step": 84200 }, { "epoch": 2.505765253745555, "grad_norm": 0.15240266919136047, "learning_rate": 8.041095371230972e-07, "loss": 0.0018, "step": 84210 }, { "epoch": 2.5060628152291966, "grad_norm": 0.18700827658176422, "learning_rate": 8.031682790585138e-07, "loss": 0.0015, "step": 84220 }, { "epoch": 2.506360376712838, "grad_norm": 0.10838412493467331, "learning_rate": 8.022275241028743e-07, "loss": 0.0011, "step": 84230 }, { "epoch": 2.5066579381964798, "grad_norm": 0.17745435237884521, "learning_rate": 8.012872723689513e-07, "loss": 0.0016, "step": 84240 }, { "epoch": 2.5069554996801213, "grad_norm": 0.1486906260251999, "learning_rate": 8.003475239694642e-07, "loss": 0.0013, "step": 84250 }, { "epoch": 2.507253061163763, "grad_norm": 0.0792713463306427, "learning_rate": 7.994082790170682e-07, "loss": 0.0024, "step": 84260 }, { "epoch": 2.5075506226474045, "grad_norm": 0.10446702688932419, "learning_rate": 7.984695376243562e-07, "loss": 0.0021, "step": 84270 }, { "epoch": 2.507848184131046, "grad_norm": 0.15521179139614105, "learning_rate": 7.975312999038637e-07, "loss": 0.0031, "step": 84280 }, { "epoch": 2.5081457456146876, "grad_norm": 0.2043500691652298, "learning_rate": 7.965935659680657e-07, "loss": 0.0015, "step": 84290 }, { "epoch": 2.508443307098329, "grad_norm": 0.17921492457389832, "learning_rate": 7.956563359293768e-07, "loss": 0.0014, "step": 84300 }, { "epoch": 2.5087408685819708, "grad_norm": 0.09636131674051285, "learning_rate": 7.947196099001475e-07, "loss": 0.0012, "step": 84310 }, { "epoch": 2.5090384300656123, "grad_norm": 0.08763816207647324, "learning_rate": 7.937833879926737e-07, "loss": 0.0022, "step": 84320 }, { "epoch": 2.509335991549254, "grad_norm": 0.06204119697213173, "learning_rate": 7.928476703191868e-07, "loss": 0.0016, "step": 84330 }, { "epoch": 2.5096335530328955, "grad_norm": 0.11892390996217728, "learning_rate": 7.919124569918602e-07, "loss": 0.0012, "step": 84340 }, { "epoch": 2.509931114516537, "grad_norm": 0.1930481642484665, "learning_rate": 7.909777481228043e-07, "loss": 0.0013, "step": 84350 }, { "epoch": 2.5102286760001786, "grad_norm": 0.016540631651878357, "learning_rate": 7.900435438240706e-07, "loss": 0.0013, "step": 84360 }, { "epoch": 2.51052623748382, "grad_norm": 0.176622673869133, "learning_rate": 7.8910984420765e-07, "loss": 0.0021, "step": 84370 }, { "epoch": 2.5108237989674618, "grad_norm": 0.09348252415657043, "learning_rate": 7.881766493854748e-07, "loss": 0.0024, "step": 84380 }, { "epoch": 2.5111213604511033, "grad_norm": 0.06145642325282097, "learning_rate": 7.872439594694114e-07, "loss": 0.0012, "step": 84390 }, { "epoch": 2.511418921934745, "grad_norm": 0.2104034423828125, "learning_rate": 7.863117745712711e-07, "loss": 0.0018, "step": 84400 }, { "epoch": 2.511716483418386, "grad_norm": 0.15488044917583466, "learning_rate": 7.853800948028028e-07, "loss": 0.0016, "step": 84410 }, { "epoch": 2.5120140449020276, "grad_norm": 0.0024331288877874613, "learning_rate": 7.844489202756933e-07, "loss": 0.0023, "step": 84420 }, { "epoch": 2.512311606385669, "grad_norm": 0.18036770820617676, "learning_rate": 7.83518251101571e-07, "loss": 0.0017, "step": 84430 }, { "epoch": 2.5126091678693108, "grad_norm": 0.056045323610305786, "learning_rate": 7.825880873920027e-07, "loss": 0.0041, "step": 84440 }, { "epoch": 2.5129067293529523, "grad_norm": 0.08332463353872299, "learning_rate": 7.816584292584961e-07, "loss": 0.0016, "step": 84450 }, { "epoch": 2.513204290836594, "grad_norm": 0.10640275478363037, "learning_rate": 7.807292768124952e-07, "loss": 0.0023, "step": 84460 }, { "epoch": 2.5135018523202355, "grad_norm": 0.18524041771888733, "learning_rate": 7.798006301653854e-07, "loss": 0.0028, "step": 84470 }, { "epoch": 2.513799413803877, "grad_norm": 0.02227136865258217, "learning_rate": 7.788724894284915e-07, "loss": 0.0031, "step": 84480 }, { "epoch": 2.5140969752875186, "grad_norm": 0.15781216323375702, "learning_rate": 7.779448547130796e-07, "loss": 0.0019, "step": 84490 }, { "epoch": 2.51439453677116, "grad_norm": 0.1729394793510437, "learning_rate": 7.770177261303491e-07, "loss": 0.0019, "step": 84500 }, { "epoch": 2.5146920982548018, "grad_norm": 0.07044053822755814, "learning_rate": 7.760911037914448e-07, "loss": 0.0018, "step": 84510 }, { "epoch": 2.5149896597384434, "grad_norm": 0.21374903619289398, "learning_rate": 7.751649878074485e-07, "loss": 0.0021, "step": 84520 }, { "epoch": 2.515287221222085, "grad_norm": 0.04357220232486725, "learning_rate": 7.742393782893809e-07, "loss": 0.0019, "step": 84530 }, { "epoch": 2.5155847827057265, "grad_norm": 0.14348924160003662, "learning_rate": 7.733142753482026e-07, "loss": 0.0018, "step": 84540 }, { "epoch": 2.515882344189368, "grad_norm": 0.18360933661460876, "learning_rate": 7.723896790948137e-07, "loss": 0.0028, "step": 84550 }, { "epoch": 2.5161799056730096, "grad_norm": 0.18239331245422363, "learning_rate": 7.714655896400541e-07, "loss": 0.0011, "step": 84560 }, { "epoch": 2.516477467156651, "grad_norm": 0.3220920264720917, "learning_rate": 7.705420070947001e-07, "loss": 0.0027, "step": 84570 }, { "epoch": 2.516775028640293, "grad_norm": 0.15054728090763092, "learning_rate": 7.696189315694691e-07, "loss": 0.0023, "step": 84580 }, { "epoch": 2.5170725901239344, "grad_norm": 0.3545466959476471, "learning_rate": 7.686963631750194e-07, "loss": 0.002, "step": 84590 }, { "epoch": 2.517370151607576, "grad_norm": 0.12914644181728363, "learning_rate": 7.677743020219475e-07, "loss": 0.0021, "step": 84600 }, { "epoch": 2.5176677130912175, "grad_norm": 0.1388668268918991, "learning_rate": 7.668527482207855e-07, "loss": 0.0008, "step": 84610 }, { "epoch": 2.517965274574859, "grad_norm": 0.0805504098534584, "learning_rate": 7.659317018820095e-07, "loss": 0.0014, "step": 84620 }, { "epoch": 2.5182628360585007, "grad_norm": 0.13180720806121826, "learning_rate": 7.650111631160334e-07, "loss": 0.0021, "step": 84630 }, { "epoch": 2.5185603975421422, "grad_norm": 0.12629052996635437, "learning_rate": 7.640911320332096e-07, "loss": 0.0013, "step": 84640 }, { "epoch": 2.518857959025784, "grad_norm": 0.028379814699292183, "learning_rate": 7.631716087438291e-07, "loss": 0.0009, "step": 84650 }, { "epoch": 2.5191555205094254, "grad_norm": 0.12602680921554565, "learning_rate": 7.622525933581226e-07, "loss": 0.0013, "step": 84660 }, { "epoch": 2.519453081993067, "grad_norm": 0.22071713209152222, "learning_rate": 7.613340859862606e-07, "loss": 0.0016, "step": 84670 }, { "epoch": 2.5197506434767085, "grad_norm": 0.20713907480239868, "learning_rate": 7.604160867383536e-07, "loss": 0.0016, "step": 84680 }, { "epoch": 2.52004820496035, "grad_norm": 0.05007457733154297, "learning_rate": 7.594985957244471e-07, "loss": 0.0018, "step": 84690 }, { "epoch": 2.5203457664439917, "grad_norm": 0.11232750117778778, "learning_rate": 7.585816130545293e-07, "loss": 0.0015, "step": 84700 }, { "epoch": 2.5206433279276332, "grad_norm": 0.07483852654695511, "learning_rate": 7.57665138838527e-07, "loss": 0.0018, "step": 84710 }, { "epoch": 2.520940889411275, "grad_norm": 0.14059844613075256, "learning_rate": 7.567491731863069e-07, "loss": 0.0023, "step": 84720 }, { "epoch": 2.5212384508949164, "grad_norm": 0.13504739105701447, "learning_rate": 7.558337162076701e-07, "loss": 0.0018, "step": 84730 }, { "epoch": 2.521536012378558, "grad_norm": 0.12977753579616547, "learning_rate": 7.549187680123621e-07, "loss": 0.0007, "step": 84740 }, { "epoch": 2.5218335738621995, "grad_norm": 0.0910632386803627, "learning_rate": 7.540043287100663e-07, "loss": 0.0013, "step": 84750 }, { "epoch": 2.522131135345841, "grad_norm": 0.07059083133935928, "learning_rate": 7.53090398410401e-07, "loss": 0.0013, "step": 84760 }, { "epoch": 2.5224286968294822, "grad_norm": 0.21039141714572906, "learning_rate": 7.521769772229287e-07, "loss": 0.0036, "step": 84770 }, { "epoch": 2.522726258313124, "grad_norm": 0.06476185470819473, "learning_rate": 7.512640652571479e-07, "loss": 0.0036, "step": 84780 }, { "epoch": 2.5230238197967654, "grad_norm": 0.6700965762138367, "learning_rate": 7.503516626224988e-07, "loss": 0.0032, "step": 84790 }, { "epoch": 2.523321381280407, "grad_norm": 0.08735446631908417, "learning_rate": 7.494397694283551e-07, "loss": 0.001, "step": 84800 }, { "epoch": 2.5236189427640485, "grad_norm": 0.09159500896930695, "learning_rate": 7.485283857840364e-07, "loss": 0.0011, "step": 84810 }, { "epoch": 2.52391650424769, "grad_norm": 0.11738384515047073, "learning_rate": 7.476175117987977e-07, "loss": 0.0029, "step": 84820 }, { "epoch": 2.5242140657313317, "grad_norm": 0.36689651012420654, "learning_rate": 7.467071475818305e-07, "loss": 0.0017, "step": 84830 }, { "epoch": 2.5245116272149732, "grad_norm": 0.12388021498918533, "learning_rate": 7.457972932422692e-07, "loss": 0.0015, "step": 84840 }, { "epoch": 2.524809188698615, "grad_norm": 0.037367623299360275, "learning_rate": 7.448879488891847e-07, "loss": 0.0018, "step": 84850 }, { "epoch": 2.5251067501822564, "grad_norm": 0.10496822744607925, "learning_rate": 7.4397911463159e-07, "loss": 0.0013, "step": 84860 }, { "epoch": 2.525404311665898, "grad_norm": 0.0708766058087349, "learning_rate": 7.430707905784318e-07, "loss": 0.0013, "step": 84870 }, { "epoch": 2.5257018731495395, "grad_norm": 0.07531922310590744, "learning_rate": 7.421629768385996e-07, "loss": 0.0012, "step": 84880 }, { "epoch": 2.525999434633181, "grad_norm": 0.16063429415225983, "learning_rate": 7.412556735209203e-07, "loss": 0.0011, "step": 84890 }, { "epoch": 2.5262969961168227, "grad_norm": 0.15380293130874634, "learning_rate": 7.403488807341614e-07, "loss": 0.0014, "step": 84900 }, { "epoch": 2.5265945576004643, "grad_norm": 0.34891900420188904, "learning_rate": 7.394425985870247e-07, "loss": 0.0014, "step": 84910 }, { "epoch": 2.526892119084106, "grad_norm": 0.18186675012111664, "learning_rate": 7.385368271881554e-07, "loss": 0.0018, "step": 84920 }, { "epoch": 2.5271896805677474, "grad_norm": 0.10254267603158951, "learning_rate": 7.376315666461359e-07, "loss": 0.0013, "step": 84930 }, { "epoch": 2.527487242051389, "grad_norm": 0.15265029668807983, "learning_rate": 7.36726817069488e-07, "loss": 0.0026, "step": 84940 }, { "epoch": 2.5277848035350305, "grad_norm": 0.2761033773422241, "learning_rate": 7.358225785666701e-07, "loss": 0.0017, "step": 84950 }, { "epoch": 2.528082365018672, "grad_norm": 0.09311091154813766, "learning_rate": 7.349188512460809e-07, "loss": 0.0026, "step": 84960 }, { "epoch": 2.5283799265023137, "grad_norm": 0.1883450746536255, "learning_rate": 7.340156352160577e-07, "loss": 0.0022, "step": 84970 }, { "epoch": 2.5286774879859553, "grad_norm": 0.05473480373620987, "learning_rate": 7.331129305848783e-07, "loss": 0.0018, "step": 84980 }, { "epoch": 2.5289750494695964, "grad_norm": 0.045567840337753296, "learning_rate": 7.322107374607545e-07, "loss": 0.0015, "step": 84990 }, { "epoch": 2.529272610953238, "grad_norm": 0.23596972227096558, "learning_rate": 7.313090559518415e-07, "loss": 0.0014, "step": 85000 }, { "epoch": 2.5295701724368795, "grad_norm": 0.11556901782751083, "learning_rate": 7.304078861662306e-07, "loss": 0.0031, "step": 85010 }, { "epoch": 2.529867733920521, "grad_norm": 0.17129205167293549, "learning_rate": 7.295072282119536e-07, "loss": 0.0018, "step": 85020 }, { "epoch": 2.5301652954041627, "grad_norm": 0.1503099799156189, "learning_rate": 7.286070821969782e-07, "loss": 0.0013, "step": 85030 }, { "epoch": 2.5304628568878043, "grad_norm": 0.07864295691251755, "learning_rate": 7.277074482292129e-07, "loss": 0.0016, "step": 85040 }, { "epoch": 2.530760418371446, "grad_norm": 0.1889609694480896, "learning_rate": 7.268083264165049e-07, "loss": 0.0023, "step": 85050 }, { "epoch": 2.5310579798550874, "grad_norm": 0.06908413767814636, "learning_rate": 7.259097168666395e-07, "loss": 0.0014, "step": 85060 }, { "epoch": 2.531355541338729, "grad_norm": 0.19309397041797638, "learning_rate": 7.250116196873391e-07, "loss": 0.0011, "step": 85070 }, { "epoch": 2.5316531028223705, "grad_norm": 0.0777381956577301, "learning_rate": 7.241140349862669e-07, "loss": 0.0032, "step": 85080 }, { "epoch": 2.531950664306012, "grad_norm": 0.09537563472986221, "learning_rate": 7.232169628710239e-07, "loss": 0.0013, "step": 85090 }, { "epoch": 2.5322482257896537, "grad_norm": 0.11560455709695816, "learning_rate": 7.223204034491493e-07, "loss": 0.0011, "step": 85100 }, { "epoch": 2.5325457872732953, "grad_norm": 0.020694026723504066, "learning_rate": 7.21424356828121e-07, "loss": 0.0011, "step": 85110 }, { "epoch": 2.532843348756937, "grad_norm": 0.09806274622678757, "learning_rate": 7.205288231153557e-07, "loss": 0.0013, "step": 85120 }, { "epoch": 2.5331409102405784, "grad_norm": 0.062465012073516846, "learning_rate": 7.196338024182098e-07, "loss": 0.002, "step": 85130 }, { "epoch": 2.53343847172422, "grad_norm": 0.1651921272277832, "learning_rate": 7.187392948439742e-07, "loss": 0.0019, "step": 85140 }, { "epoch": 2.5337360332078616, "grad_norm": 0.07167626172304153, "learning_rate": 7.178453004998826e-07, "loss": 0.0015, "step": 85150 }, { "epoch": 2.534033594691503, "grad_norm": 0.09477028250694275, "learning_rate": 7.169518194931064e-07, "loss": 0.0014, "step": 85160 }, { "epoch": 2.5343311561751447, "grad_norm": 0.2243860363960266, "learning_rate": 7.160588519307515e-07, "loss": 0.0024, "step": 85170 }, { "epoch": 2.5346287176587863, "grad_norm": 0.08994671702384949, "learning_rate": 7.151663979198675e-07, "loss": 0.0016, "step": 85180 }, { "epoch": 2.534926279142428, "grad_norm": 0.5537408590316772, "learning_rate": 7.142744575674398e-07, "loss": 0.0033, "step": 85190 }, { "epoch": 2.5352238406260694, "grad_norm": 0.050582025200128555, "learning_rate": 7.133830309803935e-07, "loss": 0.0014, "step": 85200 }, { "epoch": 2.535521402109711, "grad_norm": 0.29249125719070435, "learning_rate": 7.124921182655898e-07, "loss": 0.001, "step": 85210 }, { "epoch": 2.5358189635933526, "grad_norm": 0.05581142008304596, "learning_rate": 7.116017195298302e-07, "loss": 0.0012, "step": 85220 }, { "epoch": 2.536116525076994, "grad_norm": 0.21760860085487366, "learning_rate": 7.107118348798547e-07, "loss": 0.002, "step": 85230 }, { "epoch": 2.5364140865606357, "grad_norm": 0.028023596853017807, "learning_rate": 7.098224644223423e-07, "loss": 0.001, "step": 85240 }, { "epoch": 2.5367116480442773, "grad_norm": 0.12711378931999207, "learning_rate": 7.089336082639059e-07, "loss": 0.0013, "step": 85250 }, { "epoch": 2.537009209527919, "grad_norm": 0.06391216814517975, "learning_rate": 7.080452665111026e-07, "loss": 0.0021, "step": 85260 }, { "epoch": 2.5373067710115604, "grad_norm": 0.16043907403945923, "learning_rate": 7.071574392704239e-07, "loss": 0.0021, "step": 85270 }, { "epoch": 2.537604332495202, "grad_norm": 0.024527588859200478, "learning_rate": 7.062701266483035e-07, "loss": 0.0022, "step": 85280 }, { "epoch": 2.5379018939788436, "grad_norm": 0.27265849709510803, "learning_rate": 7.053833287511075e-07, "loss": 0.0026, "step": 85290 }, { "epoch": 2.538199455462485, "grad_norm": 0.12341437488794327, "learning_rate": 7.044970456851452e-07, "loss": 0.0011, "step": 85300 }, { "epoch": 2.5384970169461267, "grad_norm": 0.13196910917758942, "learning_rate": 7.036112775566633e-07, "loss": 0.0016, "step": 85310 }, { "epoch": 2.5387945784297683, "grad_norm": 0.20175902545452118, "learning_rate": 7.027260244718465e-07, "loss": 0.0026, "step": 85320 }, { "epoch": 2.53909213991341, "grad_norm": 0.13736899197101593, "learning_rate": 7.018412865368151e-07, "loss": 0.0046, "step": 85330 }, { "epoch": 2.539389701397051, "grad_norm": 0.11013500392436981, "learning_rate": 7.009570638576318e-07, "loss": 0.0019, "step": 85340 }, { "epoch": 2.5396872628806926, "grad_norm": 0.37814757227897644, "learning_rate": 7.000733565402951e-07, "loss": 0.003, "step": 85350 }, { "epoch": 2.539984824364334, "grad_norm": 0.06624636799097061, "learning_rate": 6.99190164690744e-07, "loss": 0.005, "step": 85360 }, { "epoch": 2.5402823858479757, "grad_norm": 0.06360214203596115, "learning_rate": 6.9830748841485e-07, "loss": 0.0017, "step": 85370 }, { "epoch": 2.5405799473316173, "grad_norm": 0.11062685400247574, "learning_rate": 6.974253278184301e-07, "loss": 0.0015, "step": 85380 }, { "epoch": 2.540877508815259, "grad_norm": 0.22183740139007568, "learning_rate": 6.965436830072369e-07, "loss": 0.0022, "step": 85390 }, { "epoch": 2.5411750702989004, "grad_norm": 0.1039256602525711, "learning_rate": 6.956625540869577e-07, "loss": 0.0019, "step": 85400 }, { "epoch": 2.541472631782542, "grad_norm": 0.1296435445547104, "learning_rate": 6.947819411632223e-07, "loss": 0.0022, "step": 85410 }, { "epoch": 2.5417701932661836, "grad_norm": 0.12551632523536682, "learning_rate": 6.939018443415963e-07, "loss": 0.001, "step": 85420 }, { "epoch": 2.542067754749825, "grad_norm": 0.38824263215065, "learning_rate": 6.930222637275863e-07, "loss": 0.0009, "step": 85430 }, { "epoch": 2.5423653162334667, "grad_norm": 0.22993431985378265, "learning_rate": 6.921431994266315e-07, "loss": 0.0016, "step": 85440 }, { "epoch": 2.5426628777171083, "grad_norm": 0.11568475514650345, "learning_rate": 6.912646515441146e-07, "loss": 0.0018, "step": 85450 }, { "epoch": 2.54296043920075, "grad_norm": 0.3286111056804657, "learning_rate": 6.903866201853543e-07, "loss": 0.0013, "step": 85460 }, { "epoch": 2.5432580006843915, "grad_norm": 0.06575090438127518, "learning_rate": 6.895091054556086e-07, "loss": 0.0014, "step": 85470 }, { "epoch": 2.543555562168033, "grad_norm": 0.11643770337104797, "learning_rate": 6.886321074600694e-07, "loss": 0.0025, "step": 85480 }, { "epoch": 2.5438531236516746, "grad_norm": 0.5174107551574707, "learning_rate": 6.877556263038721e-07, "loss": 0.0028, "step": 85490 }, { "epoch": 2.544150685135316, "grad_norm": 0.035589609295129776, "learning_rate": 6.86879662092087e-07, "loss": 0.0024, "step": 85500 }, { "epoch": 2.5444482466189577, "grad_norm": 0.15077483654022217, "learning_rate": 6.860042149297241e-07, "loss": 0.0016, "step": 85510 }, { "epoch": 2.5447458081025993, "grad_norm": 0.045615680515766144, "learning_rate": 6.851292849217289e-07, "loss": 0.0014, "step": 85520 }, { "epoch": 2.545043369586241, "grad_norm": 0.061115022748708725, "learning_rate": 6.842548721729875e-07, "loss": 0.0015, "step": 85530 }, { "epoch": 2.5453409310698825, "grad_norm": 0.3657134473323822, "learning_rate": 6.833809767883237e-07, "loss": 0.0021, "step": 85540 }, { "epoch": 2.545638492553524, "grad_norm": 0.0063766781240701675, "learning_rate": 6.825075988724966e-07, "loss": 0.0017, "step": 85550 }, { "epoch": 2.545936054037165, "grad_norm": 0.12885528802871704, "learning_rate": 6.816347385302058e-07, "loss": 0.0025, "step": 85560 }, { "epoch": 2.5462336155208067, "grad_norm": 0.10810854285955429, "learning_rate": 6.807623958660891e-07, "loss": 0.0025, "step": 85570 }, { "epoch": 2.5465311770044483, "grad_norm": 0.180577352643013, "learning_rate": 6.798905709847225e-07, "loss": 0.0018, "step": 85580 }, { "epoch": 2.54682873848809, "grad_norm": 0.13183151185512543, "learning_rate": 6.790192639906162e-07, "loss": 0.0018, "step": 85590 }, { "epoch": 2.5471262999717315, "grad_norm": 0.07787249982357025, "learning_rate": 6.781484749882227e-07, "loss": 0.0015, "step": 85600 }, { "epoch": 2.547423861455373, "grad_norm": 0.5006569027900696, "learning_rate": 6.772782040819298e-07, "loss": 0.0057, "step": 85610 }, { "epoch": 2.5477214229390146, "grad_norm": 0.23690755665302277, "learning_rate": 6.764084513760655e-07, "loss": 0.002, "step": 85620 }, { "epoch": 2.548018984422656, "grad_norm": 0.2791837155818939, "learning_rate": 6.755392169748926e-07, "loss": 0.0027, "step": 85630 }, { "epoch": 2.5483165459062977, "grad_norm": 0.19675305485725403, "learning_rate": 6.746705009826138e-07, "loss": 0.0015, "step": 85640 }, { "epoch": 2.5486141073899393, "grad_norm": 0.26991158723831177, "learning_rate": 6.738023035033698e-07, "loss": 0.002, "step": 85650 }, { "epoch": 2.548911668873581, "grad_norm": 0.046487219631671906, "learning_rate": 6.729346246412382e-07, "loss": 0.0013, "step": 85660 }, { "epoch": 2.5492092303572225, "grad_norm": 0.14992299675941467, "learning_rate": 6.720674645002351e-07, "loss": 0.0022, "step": 85670 }, { "epoch": 2.549506791840864, "grad_norm": 0.05632961913943291, "learning_rate": 6.712008231843142e-07, "loss": 0.0013, "step": 85680 }, { "epoch": 2.5498043533245056, "grad_norm": 0.02155362069606781, "learning_rate": 6.703347007973676e-07, "loss": 0.0015, "step": 85690 }, { "epoch": 2.550101914808147, "grad_norm": 0.19541651010513306, "learning_rate": 6.694690974432233e-07, "loss": 0.0019, "step": 85700 }, { "epoch": 2.5503994762917888, "grad_norm": 0.29825130105018616, "learning_rate": 6.686040132256483e-07, "loss": 0.0021, "step": 85710 }, { "epoch": 2.5506970377754303, "grad_norm": 0.05944976210594177, "learning_rate": 6.677394482483485e-07, "loss": 0.0015, "step": 85720 }, { "epoch": 2.550994599259072, "grad_norm": 0.2268209308385849, "learning_rate": 6.668754026149665e-07, "loss": 0.0019, "step": 85730 }, { "epoch": 2.5512921607427135, "grad_norm": 0.04087097570300102, "learning_rate": 6.660118764290813e-07, "loss": 0.0036, "step": 85740 }, { "epoch": 2.551589722226355, "grad_norm": 0.06055469810962677, "learning_rate": 6.651488697942116e-07, "loss": 0.0015, "step": 85750 }, { "epoch": 2.5518872837099966, "grad_norm": 0.4996849298477173, "learning_rate": 6.642863828138129e-07, "loss": 0.0014, "step": 85760 }, { "epoch": 2.552184845193638, "grad_norm": 0.11006999760866165, "learning_rate": 6.6342441559128e-07, "loss": 0.0013, "step": 85770 }, { "epoch": 2.5524824066772798, "grad_norm": 0.06530971825122833, "learning_rate": 6.625629682299423e-07, "loss": 0.0017, "step": 85780 }, { "epoch": 2.5527799681609213, "grad_norm": 0.057522453367710114, "learning_rate": 6.617020408330688e-07, "loss": 0.0028, "step": 85790 }, { "epoch": 2.553077529644563, "grad_norm": 0.1354954093694687, "learning_rate": 6.60841633503867e-07, "loss": 0.0017, "step": 85800 }, { "epoch": 2.5533750911282045, "grad_norm": 0.16221210360527039, "learning_rate": 6.599817463454816e-07, "loss": 0.001, "step": 85810 }, { "epoch": 2.553672652611846, "grad_norm": 0.26001405715942383, "learning_rate": 6.591223794609919e-07, "loss": 0.0016, "step": 85820 }, { "epoch": 2.5539702140954876, "grad_norm": 0.30431243777275085, "learning_rate": 6.582635329534187e-07, "loss": 0.0017, "step": 85830 }, { "epoch": 2.554267775579129, "grad_norm": 0.07099366188049316, "learning_rate": 6.574052069257192e-07, "loss": 0.0017, "step": 85840 }, { "epoch": 2.554565337062771, "grad_norm": 0.14487062394618988, "learning_rate": 6.565474014807893e-07, "loss": 0.0012, "step": 85850 }, { "epoch": 2.5548628985464124, "grad_norm": 0.1331743746995926, "learning_rate": 6.556901167214586e-07, "loss": 0.0021, "step": 85860 }, { "epoch": 2.555160460030054, "grad_norm": 0.1246364563703537, "learning_rate": 6.54833352750498e-07, "loss": 0.0008, "step": 85870 }, { "epoch": 2.5554580215136955, "grad_norm": 0.24250063300132751, "learning_rate": 6.539771096706154e-07, "loss": 0.0016, "step": 85880 }, { "epoch": 2.555755582997337, "grad_norm": 0.17641600966453552, "learning_rate": 6.531213875844561e-07, "loss": 0.0017, "step": 85890 }, { "epoch": 2.5560531444809786, "grad_norm": 0.11765206605195999, "learning_rate": 6.522661865946012e-07, "loss": 0.0018, "step": 85900 }, { "epoch": 2.5563507059646198, "grad_norm": 0.10999824851751328, "learning_rate": 6.514115068035709e-07, "loss": 0.0014, "step": 85910 }, { "epoch": 2.5566482674482613, "grad_norm": 0.3821732699871063, "learning_rate": 6.50557348313825e-07, "loss": 0.0058, "step": 85920 }, { "epoch": 2.556945828931903, "grad_norm": 0.22790439426898956, "learning_rate": 6.497037112277554e-07, "loss": 0.0015, "step": 85930 }, { "epoch": 2.5572433904155445, "grad_norm": 0.07854900509119034, "learning_rate": 6.488505956476948e-07, "loss": 0.0014, "step": 85940 }, { "epoch": 2.557540951899186, "grad_norm": 0.04390083998441696, "learning_rate": 6.479980016759169e-07, "loss": 0.0009, "step": 85950 }, { "epoch": 2.5578385133828276, "grad_norm": 0.07556241005659103, "learning_rate": 6.471459294146254e-07, "loss": 0.0013, "step": 85960 }, { "epoch": 2.558136074866469, "grad_norm": 0.17442640662193298, "learning_rate": 6.462943789659664e-07, "loss": 0.0015, "step": 85970 }, { "epoch": 2.558433636350111, "grad_norm": 0.10743873566389084, "learning_rate": 6.454433504320229e-07, "loss": 0.0013, "step": 85980 }, { "epoch": 2.5587311978337524, "grad_norm": 0.30517005920410156, "learning_rate": 6.445928439148153e-07, "loss": 0.0013, "step": 85990 }, { "epoch": 2.559028759317394, "grad_norm": 0.08533962815999985, "learning_rate": 6.437428595162992e-07, "loss": 0.0019, "step": 86000 }, { "epoch": 2.5593263208010355, "grad_norm": 0.10395773500204086, "learning_rate": 6.428933973383694e-07, "loss": 0.0018, "step": 86010 }, { "epoch": 2.559623882284677, "grad_norm": 0.1531161665916443, "learning_rate": 6.420444574828588e-07, "loss": 0.002, "step": 86020 }, { "epoch": 2.5599214437683186, "grad_norm": 0.1381063163280487, "learning_rate": 6.411960400515376e-07, "loss": 0.0027, "step": 86030 }, { "epoch": 2.5602190052519602, "grad_norm": 0.14077425003051758, "learning_rate": 6.403481451461102e-07, "loss": 0.0017, "step": 86040 }, { "epoch": 2.560516566735602, "grad_norm": 0.08077560365200043, "learning_rate": 6.395007728682229e-07, "loss": 0.0011, "step": 86050 }, { "epoch": 2.5608141282192434, "grad_norm": 0.131329745054245, "learning_rate": 6.386539233194556e-07, "loss": 0.003, "step": 86060 }, { "epoch": 2.561111689702885, "grad_norm": 0.205880805850029, "learning_rate": 6.378075966013297e-07, "loss": 0.0008, "step": 86070 }, { "epoch": 2.5614092511865265, "grad_norm": 0.11159474402666092, "learning_rate": 6.369617928152982e-07, "loss": 0.0017, "step": 86080 }, { "epoch": 2.561706812670168, "grad_norm": 0.4071677625179291, "learning_rate": 6.361165120627566e-07, "loss": 0.0016, "step": 86090 }, { "epoch": 2.5620043741538097, "grad_norm": 0.15043480694293976, "learning_rate": 6.352717544450348e-07, "loss": 0.0016, "step": 86100 }, { "epoch": 2.5623019356374512, "grad_norm": 0.070681132376194, "learning_rate": 6.344275200634025e-07, "loss": 0.0016, "step": 86110 }, { "epoch": 2.562599497121093, "grad_norm": 0.12388376146554947, "learning_rate": 6.335838090190632e-07, "loss": 0.0012, "step": 86120 }, { "epoch": 2.562897058604734, "grad_norm": 0.1227572038769722, "learning_rate": 6.327406214131599e-07, "loss": 0.0015, "step": 86130 }, { "epoch": 2.5631946200883755, "grad_norm": 0.0735340341925621, "learning_rate": 6.318979573467726e-07, "loss": 0.0018, "step": 86140 }, { "epoch": 2.563492181572017, "grad_norm": 0.03366772085428238, "learning_rate": 6.310558169209196e-07, "loss": 0.001, "step": 86150 }, { "epoch": 2.5637897430556587, "grad_norm": 0.2290770262479782, "learning_rate": 6.302142002365535e-07, "loss": 0.0011, "step": 86160 }, { "epoch": 2.5640873045393002, "grad_norm": 0.022386502474546432, "learning_rate": 6.293731073945664e-07, "loss": 0.0015, "step": 86170 }, { "epoch": 2.564384866022942, "grad_norm": 0.11394746601581573, "learning_rate": 6.285325384957869e-07, "loss": 0.0021, "step": 86180 }, { "epoch": 2.5646824275065834, "grad_norm": 0.2188534289598465, "learning_rate": 6.276924936409829e-07, "loss": 0.0017, "step": 86190 }, { "epoch": 2.564979988990225, "grad_norm": 0.31080350279808044, "learning_rate": 6.268529729308548e-07, "loss": 0.0041, "step": 86200 }, { "epoch": 2.5652775504738665, "grad_norm": 0.16781242191791534, "learning_rate": 6.260139764660434e-07, "loss": 0.0012, "step": 86210 }, { "epoch": 2.565575111957508, "grad_norm": 0.16185785830020905, "learning_rate": 6.251755043471275e-07, "loss": 0.0011, "step": 86220 }, { "epoch": 2.5658726734411497, "grad_norm": 0.008115661330521107, "learning_rate": 6.243375566746207e-07, "loss": 0.0018, "step": 86230 }, { "epoch": 2.5661702349247912, "grad_norm": 0.1386583149433136, "learning_rate": 6.235001335489749e-07, "loss": 0.0015, "step": 86240 }, { "epoch": 2.566467796408433, "grad_norm": 0.10630746930837631, "learning_rate": 6.226632350705786e-07, "loss": 0.0019, "step": 86250 }, { "epoch": 2.5667653578920744, "grad_norm": 0.058414943516254425, "learning_rate": 6.218268613397599e-07, "loss": 0.0019, "step": 86260 }, { "epoch": 2.567062919375716, "grad_norm": 0.04083052650094032, "learning_rate": 6.209910124567786e-07, "loss": 0.003, "step": 86270 }, { "epoch": 2.5673604808593575, "grad_norm": 0.06425534933805466, "learning_rate": 6.201556885218362e-07, "loss": 0.0011, "step": 86280 }, { "epoch": 2.567658042342999, "grad_norm": 0.22498063743114471, "learning_rate": 6.193208896350705e-07, "loss": 0.0021, "step": 86290 }, { "epoch": 2.5679556038266407, "grad_norm": 0.14688995480537415, "learning_rate": 6.184866158965558e-07, "loss": 0.0014, "step": 86300 }, { "epoch": 2.5682531653102822, "grad_norm": 0.10748781263828278, "learning_rate": 6.176528674063021e-07, "loss": 0.0012, "step": 86310 }, { "epoch": 2.568550726793924, "grad_norm": 0.27514997124671936, "learning_rate": 6.168196442642583e-07, "loss": 0.0022, "step": 86320 }, { "epoch": 2.5688482882775654, "grad_norm": 0.10639782249927521, "learning_rate": 6.159869465703111e-07, "loss": 0.0023, "step": 86330 }, { "epoch": 2.569145849761207, "grad_norm": 0.057701487094163895, "learning_rate": 6.151547744242808e-07, "loss": 0.0013, "step": 86340 }, { "epoch": 2.5694434112448485, "grad_norm": 0.06471840292215347, "learning_rate": 6.143231279259271e-07, "loss": 0.0011, "step": 86350 }, { "epoch": 2.56974097272849, "grad_norm": 0.002189767314121127, "learning_rate": 6.134920071749473e-07, "loss": 0.0015, "step": 86360 }, { "epoch": 2.5700385342121317, "grad_norm": 0.12715581059455872, "learning_rate": 6.126614122709751e-07, "loss": 0.0023, "step": 86370 }, { "epoch": 2.5703360956957733, "grad_norm": 0.0680786594748497, "learning_rate": 6.118313433135792e-07, "loss": 0.0017, "step": 86380 }, { "epoch": 2.570633657179415, "grad_norm": 0.06500887870788574, "learning_rate": 6.110018004022677e-07, "loss": 0.0018, "step": 86390 }, { "epoch": 2.5709312186630564, "grad_norm": 0.1861283928155899, "learning_rate": 6.101727836364845e-07, "loss": 0.0028, "step": 86400 }, { "epoch": 2.571228780146698, "grad_norm": 0.11654015630483627, "learning_rate": 6.093442931156124e-07, "loss": 0.0016, "step": 86410 }, { "epoch": 2.5715263416303396, "grad_norm": 0.13424085080623627, "learning_rate": 6.085163289389662e-07, "loss": 0.0024, "step": 86420 }, { "epoch": 2.571823903113981, "grad_norm": 0.09788771718740463, "learning_rate": 6.076888912058032e-07, "loss": 0.002, "step": 86430 }, { "epoch": 2.5721214645976227, "grad_norm": 0.03388849273324013, "learning_rate": 6.068619800153141e-07, "loss": 0.002, "step": 86440 }, { "epoch": 2.5724190260812643, "grad_norm": 0.19572144746780396, "learning_rate": 6.060355954666291e-07, "loss": 0.0018, "step": 86450 }, { "epoch": 2.572716587564906, "grad_norm": 0.04664748162031174, "learning_rate": 6.052097376588117e-07, "loss": 0.001, "step": 86460 }, { "epoch": 2.5730141490485474, "grad_norm": 0.1589146852493286, "learning_rate": 6.043844066908649e-07, "loss": 0.0012, "step": 86470 }, { "epoch": 2.573311710532189, "grad_norm": 0.06698007881641388, "learning_rate": 6.035596026617291e-07, "loss": 0.0009, "step": 86480 }, { "epoch": 2.57360927201583, "grad_norm": 0.2633987367153168, "learning_rate": 6.0273532567028e-07, "loss": 0.0014, "step": 86490 }, { "epoch": 2.5739068334994717, "grad_norm": 0.06898129731416702, "learning_rate": 6.019115758153288e-07, "loss": 0.0017, "step": 86500 }, { "epoch": 2.5742043949831133, "grad_norm": 0.08113077282905579, "learning_rate": 6.010883531956269e-07, "loss": 0.0015, "step": 86510 }, { "epoch": 2.574501956466755, "grad_norm": 0.15303057432174683, "learning_rate": 6.002656579098614e-07, "loss": 0.002, "step": 86520 }, { "epoch": 2.5747995179503964, "grad_norm": 0.06816112995147705, "learning_rate": 5.994434900566543e-07, "loss": 0.0027, "step": 86530 }, { "epoch": 2.575097079434038, "grad_norm": 0.2481839656829834, "learning_rate": 5.986218497345653e-07, "loss": 0.0021, "step": 86540 }, { "epoch": 2.5753946409176796, "grad_norm": 0.21784217655658722, "learning_rate": 5.978007370420924e-07, "loss": 0.0027, "step": 86550 }, { "epoch": 2.575692202401321, "grad_norm": 0.3820114731788635, "learning_rate": 5.969801520776697e-07, "loss": 0.0029, "step": 86560 }, { "epoch": 2.5759897638849627, "grad_norm": 0.20434847474098206, "learning_rate": 5.961600949396651e-07, "loss": 0.0021, "step": 86570 }, { "epoch": 2.5762873253686043, "grad_norm": 0.17070604860782623, "learning_rate": 5.953405657263872e-07, "loss": 0.0015, "step": 86580 }, { "epoch": 2.576584886852246, "grad_norm": 0.15013477206230164, "learning_rate": 5.945215645360797e-07, "loss": 0.001, "step": 86590 }, { "epoch": 2.5768824483358874, "grad_norm": 0.2945239543914795, "learning_rate": 5.93703091466924e-07, "loss": 0.002, "step": 86600 }, { "epoch": 2.577180009819529, "grad_norm": 0.19887706637382507, "learning_rate": 5.928851466170349e-07, "loss": 0.0014, "step": 86610 }, { "epoch": 2.5774775713031706, "grad_norm": 0.2854805886745453, "learning_rate": 5.92067730084468e-07, "loss": 0.0019, "step": 86620 }, { "epoch": 2.577775132786812, "grad_norm": 0.09476769715547562, "learning_rate": 5.912508419672125e-07, "loss": 0.0012, "step": 86630 }, { "epoch": 2.5780726942704537, "grad_norm": 0.18429802358150482, "learning_rate": 5.90434482363198e-07, "loss": 0.0017, "step": 86640 }, { "epoch": 2.5783702557540953, "grad_norm": 0.015361560508608818, "learning_rate": 5.89618651370285e-07, "loss": 0.0014, "step": 86650 }, { "epoch": 2.578667817237737, "grad_norm": 0.032149843871593475, "learning_rate": 5.88803349086276e-07, "loss": 0.0016, "step": 86660 }, { "epoch": 2.5789653787213784, "grad_norm": 0.14125406742095947, "learning_rate": 5.879885756089088e-07, "loss": 0.0028, "step": 86670 }, { "epoch": 2.57926294020502, "grad_norm": 0.08247046917676926, "learning_rate": 5.871743310358541e-07, "loss": 0.0019, "step": 86680 }, { "epoch": 2.5795605016886616, "grad_norm": 0.20440536737442017, "learning_rate": 5.863606154647239e-07, "loss": 0.0021, "step": 86690 }, { "epoch": 2.5798580631723027, "grad_norm": 0.05444343388080597, "learning_rate": 5.855474289930652e-07, "loss": 0.0014, "step": 86700 }, { "epoch": 2.5801556246559443, "grad_norm": 0.18122035264968872, "learning_rate": 5.847347717183621e-07, "loss": 0.0024, "step": 86710 }, { "epoch": 2.580453186139586, "grad_norm": 0.20416948199272156, "learning_rate": 5.839226437380324e-07, "loss": 0.0031, "step": 86720 }, { "epoch": 2.5807507476232274, "grad_norm": 0.09072582423686981, "learning_rate": 5.831110451494338e-07, "loss": 0.0016, "step": 86730 }, { "epoch": 2.581048309106869, "grad_norm": 0.1274694949388504, "learning_rate": 5.822999760498594e-07, "loss": 0.0026, "step": 86740 }, { "epoch": 2.5813458705905106, "grad_norm": 0.022252157330513, "learning_rate": 5.814894365365392e-07, "loss": 0.0011, "step": 86750 }, { "epoch": 2.581643432074152, "grad_norm": 0.1345180720090866, "learning_rate": 5.806794267066384e-07, "loss": 0.0024, "step": 86760 }, { "epoch": 2.5819409935577937, "grad_norm": 0.09446199238300323, "learning_rate": 5.798699466572593e-07, "loss": 0.0008, "step": 86770 }, { "epoch": 2.5822385550414353, "grad_norm": 0.1052284687757492, "learning_rate": 5.790609964854422e-07, "loss": 0.0011, "step": 86780 }, { "epoch": 2.582536116525077, "grad_norm": 0.02064688317477703, "learning_rate": 5.782525762881619e-07, "loss": 0.001, "step": 86790 }, { "epoch": 2.5828336780087184, "grad_norm": 0.14063695073127747, "learning_rate": 5.774446861623307e-07, "loss": 0.0034, "step": 86800 }, { "epoch": 2.58313123949236, "grad_norm": 0.1362980455160141, "learning_rate": 5.766373262047964e-07, "loss": 0.0017, "step": 86810 }, { "epoch": 2.5834288009760016, "grad_norm": 0.07082849740982056, "learning_rate": 5.758304965123457e-07, "loss": 0.0014, "step": 86820 }, { "epoch": 2.583726362459643, "grad_norm": 0.024793388321995735, "learning_rate": 5.750241971816978e-07, "loss": 0.0012, "step": 86830 }, { "epoch": 2.5840239239432847, "grad_norm": 0.08607838302850723, "learning_rate": 5.742184283095114e-07, "loss": 0.0012, "step": 86840 }, { "epoch": 2.5843214854269263, "grad_norm": 0.24041247367858887, "learning_rate": 5.734131899923806e-07, "loss": 0.0022, "step": 86850 }, { "epoch": 2.584619046910568, "grad_norm": 0.07347743958234787, "learning_rate": 5.726084823268369e-07, "loss": 0.0019, "step": 86860 }, { "epoch": 2.5849166083942094, "grad_norm": 0.08926497399806976, "learning_rate": 5.718043054093458e-07, "loss": 0.0022, "step": 86870 }, { "epoch": 2.585214169877851, "grad_norm": 0.026509955525398254, "learning_rate": 5.71000659336311e-07, "loss": 0.0011, "step": 86880 }, { "epoch": 2.5855117313614926, "grad_norm": 0.06735239923000336, "learning_rate": 5.701975442040725e-07, "loss": 0.0018, "step": 86890 }, { "epoch": 2.585809292845134, "grad_norm": 0.21618793904781342, "learning_rate": 5.693949601089072e-07, "loss": 0.0021, "step": 86900 }, { "epoch": 2.5861068543287757, "grad_norm": 0.21124546229839325, "learning_rate": 5.685929071470259e-07, "loss": 0.0013, "step": 86910 }, { "epoch": 2.5864044158124173, "grad_norm": 0.10355006903409958, "learning_rate": 5.677913854145778e-07, "loss": 0.0026, "step": 86920 }, { "epoch": 2.586701977296059, "grad_norm": 0.15309873223304749, "learning_rate": 5.669903950076478e-07, "loss": 0.0021, "step": 86930 }, { "epoch": 2.5869995387797005, "grad_norm": 0.0337529219686985, "learning_rate": 5.66189936022259e-07, "loss": 0.0015, "step": 86940 }, { "epoch": 2.587297100263342, "grad_norm": 0.08116519451141357, "learning_rate": 5.653900085543667e-07, "loss": 0.003, "step": 86950 }, { "epoch": 2.5875946617469836, "grad_norm": 0.10466064512729645, "learning_rate": 5.645906126998657e-07, "loss": 0.0021, "step": 86960 }, { "epoch": 2.587892223230625, "grad_norm": 0.1636323481798172, "learning_rate": 5.637917485545863e-07, "loss": 0.0014, "step": 86970 }, { "epoch": 2.5881897847142668, "grad_norm": 0.09093991667032242, "learning_rate": 5.629934162142958e-07, "loss": 0.0014, "step": 86980 }, { "epoch": 2.5884873461979083, "grad_norm": 0.13186009228229523, "learning_rate": 5.621956157746955e-07, "loss": 0.0017, "step": 86990 }, { "epoch": 2.58878490768155, "grad_norm": 0.03569871559739113, "learning_rate": 5.613983473314244e-07, "loss": 0.0024, "step": 87000 }, { "epoch": 2.5890824691651915, "grad_norm": 0.123180091381073, "learning_rate": 5.606016109800583e-07, "loss": 0.0009, "step": 87010 }, { "epoch": 2.589380030648833, "grad_norm": 0.21491388976573944, "learning_rate": 5.598054068161096e-07, "loss": 0.0021, "step": 87020 }, { "epoch": 2.5896775921324746, "grad_norm": 0.4823688268661499, "learning_rate": 5.590097349350232e-07, "loss": 0.0022, "step": 87030 }, { "epoch": 2.589975153616116, "grad_norm": 0.07545768469572067, "learning_rate": 5.582145954321849e-07, "loss": 0.0011, "step": 87040 }, { "epoch": 2.5902727150997578, "grad_norm": 0.05792247876524925, "learning_rate": 5.574199884029152e-07, "loss": 0.0018, "step": 87050 }, { "epoch": 2.590570276583399, "grad_norm": 0.1044515073299408, "learning_rate": 5.566259139424679e-07, "loss": 0.0021, "step": 87060 }, { "epoch": 2.5908678380670405, "grad_norm": 0.08932589739561081, "learning_rate": 5.558323721460351e-07, "loss": 0.0012, "step": 87070 }, { "epoch": 2.591165399550682, "grad_norm": 0.13069270551204681, "learning_rate": 5.550393631087491e-07, "loss": 0.0017, "step": 87080 }, { "epoch": 2.5914629610343236, "grad_norm": 0.09393124282360077, "learning_rate": 5.542468869256712e-07, "loss": 0.001, "step": 87090 }, { "epoch": 2.591760522517965, "grad_norm": 0.2710496187210083, "learning_rate": 5.534549436918024e-07, "loss": 0.0022, "step": 87100 }, { "epoch": 2.5920580840016068, "grad_norm": 0.28474026918411255, "learning_rate": 5.526635335020802e-07, "loss": 0.0023, "step": 87110 }, { "epoch": 2.5923556454852483, "grad_norm": 0.17916761338710785, "learning_rate": 5.518726564513782e-07, "loss": 0.0018, "step": 87120 }, { "epoch": 2.59265320696889, "grad_norm": 0.3345049321651459, "learning_rate": 5.510823126345033e-07, "loss": 0.0015, "step": 87130 }, { "epoch": 2.5929507684525315, "grad_norm": 0.10865429043769836, "learning_rate": 5.502925021462019e-07, "loss": 0.001, "step": 87140 }, { "epoch": 2.593248329936173, "grad_norm": 0.12255773693323135, "learning_rate": 5.495032250811544e-07, "loss": 0.0018, "step": 87150 }, { "epoch": 2.5935458914198146, "grad_norm": 0.10755458474159241, "learning_rate": 5.487144815339801e-07, "loss": 0.0022, "step": 87160 }, { "epoch": 2.593843452903456, "grad_norm": 0.33581826090812683, "learning_rate": 5.479262715992295e-07, "loss": 0.0017, "step": 87170 }, { "epoch": 2.5941410143870978, "grad_norm": 0.08595646917819977, "learning_rate": 5.471385953713926e-07, "loss": 0.0012, "step": 87180 }, { "epoch": 2.5944385758707393, "grad_norm": 0.1305539757013321, "learning_rate": 5.463514529448948e-07, "loss": 0.0016, "step": 87190 }, { "epoch": 2.594736137354381, "grad_norm": 0.35923856496810913, "learning_rate": 5.455648444140993e-07, "loss": 0.0022, "step": 87200 }, { "epoch": 2.5950336988380225, "grad_norm": 0.04720763489603996, "learning_rate": 5.447787698733004e-07, "loss": 0.0014, "step": 87210 }, { "epoch": 2.595331260321664, "grad_norm": 0.13396984338760376, "learning_rate": 5.439932294167322e-07, "loss": 0.002, "step": 87220 }, { "epoch": 2.5956288218053056, "grad_norm": 0.07716517895460129, "learning_rate": 5.432082231385643e-07, "loss": 0.0018, "step": 87230 }, { "epoch": 2.595926383288947, "grad_norm": 0.12361256778240204, "learning_rate": 5.424237511329033e-07, "loss": 0.0026, "step": 87240 }, { "epoch": 2.5962239447725888, "grad_norm": 0.13347862660884857, "learning_rate": 5.416398134937878e-07, "loss": 0.0027, "step": 87250 }, { "epoch": 2.5965215062562303, "grad_norm": 0.15762554109096527, "learning_rate": 5.408564103151965e-07, "loss": 0.0022, "step": 87260 }, { "epoch": 2.5968190677398715, "grad_norm": 0.12734746932983398, "learning_rate": 5.400735416910418e-07, "loss": 0.0014, "step": 87270 }, { "epoch": 2.597116629223513, "grad_norm": 0.14609310030937195, "learning_rate": 5.392912077151735e-07, "loss": 0.002, "step": 87280 }, { "epoch": 2.5974141907071546, "grad_norm": 0.12530452013015747, "learning_rate": 5.385094084813752e-07, "loss": 0.0017, "step": 87290 }, { "epoch": 2.597711752190796, "grad_norm": 0.1912049949169159, "learning_rate": 5.377281440833676e-07, "loss": 0.0017, "step": 87300 }, { "epoch": 2.5980093136744378, "grad_norm": 0.28180187940597534, "learning_rate": 5.369474146148085e-07, "loss": 0.0015, "step": 87310 }, { "epoch": 2.5983068751580793, "grad_norm": 0.07692042738199234, "learning_rate": 5.361672201692908e-07, "loss": 0.0022, "step": 87320 }, { "epoch": 2.598604436641721, "grad_norm": 0.1552152931690216, "learning_rate": 5.353875608403408e-07, "loss": 0.0017, "step": 87330 }, { "epoch": 2.5989019981253625, "grad_norm": 0.22274722158908844, "learning_rate": 5.346084367214238e-07, "loss": 0.0011, "step": 87340 }, { "epoch": 2.599199559609004, "grad_norm": 0.20220793783664703, "learning_rate": 5.338298479059401e-07, "loss": 0.0026, "step": 87350 }, { "epoch": 2.5994971210926456, "grad_norm": 0.0629652738571167, "learning_rate": 5.330517944872249e-07, "loss": 0.0018, "step": 87360 }, { "epoch": 2.599794682576287, "grad_norm": 0.10527926683425903, "learning_rate": 5.322742765585509e-07, "loss": 0.0019, "step": 87370 }, { "epoch": 2.600092244059929, "grad_norm": 0.11506091058254242, "learning_rate": 5.314972942131247e-07, "loss": 0.0014, "step": 87380 }, { "epoch": 2.6003898055435704, "grad_norm": 0.04851981997489929, "learning_rate": 5.307208475440912e-07, "loss": 0.0017, "step": 87390 }, { "epoch": 2.600687367027212, "grad_norm": 0.051890481263399124, "learning_rate": 5.299449366445269e-07, "loss": 0.0013, "step": 87400 }, { "epoch": 2.6009849285108535, "grad_norm": 0.16578561067581177, "learning_rate": 5.291695616074488e-07, "loss": 0.0009, "step": 87410 }, { "epoch": 2.601282489994495, "grad_norm": 0.07566890865564346, "learning_rate": 5.283947225258063e-07, "loss": 0.002, "step": 87420 }, { "epoch": 2.6015800514781366, "grad_norm": 0.25028544664382935, "learning_rate": 5.276204194924867e-07, "loss": 0.0023, "step": 87430 }, { "epoch": 2.601877612961778, "grad_norm": 0.2617402672767639, "learning_rate": 5.268466526003113e-07, "loss": 0.0022, "step": 87440 }, { "epoch": 2.60217517444542, "grad_norm": 0.25533145666122437, "learning_rate": 5.260734219420382e-07, "loss": 0.0022, "step": 87450 }, { "epoch": 2.6024727359290614, "grad_norm": 0.131639763712883, "learning_rate": 5.253007276103616e-07, "loss": 0.0022, "step": 87460 }, { "epoch": 2.602770297412703, "grad_norm": 0.1694771945476532, "learning_rate": 5.245285696979096e-07, "loss": 0.0016, "step": 87470 }, { "epoch": 2.6030678588963445, "grad_norm": 0.049203936010599136, "learning_rate": 5.237569482972477e-07, "loss": 0.0017, "step": 87480 }, { "epoch": 2.603365420379986, "grad_norm": 0.07668527215719223, "learning_rate": 5.229858635008767e-07, "loss": 0.0015, "step": 87490 }, { "epoch": 2.6036629818636277, "grad_norm": 0.3358568847179413, "learning_rate": 5.222153154012333e-07, "loss": 0.0025, "step": 87500 }, { "epoch": 2.6039605433472692, "grad_norm": 0.15399892628192902, "learning_rate": 5.214453040906887e-07, "loss": 0.0011, "step": 87510 }, { "epoch": 2.604258104830911, "grad_norm": 0.002127908868715167, "learning_rate": 5.206758296615499e-07, "loss": 0.002, "step": 87520 }, { "epoch": 2.6045556663145524, "grad_norm": 0.07476610690355301, "learning_rate": 5.199068922060618e-07, "loss": 0.0025, "step": 87530 }, { "epoch": 2.604853227798194, "grad_norm": 0.0586712621152401, "learning_rate": 5.191384918164033e-07, "loss": 0.0021, "step": 87540 }, { "epoch": 2.6051507892818355, "grad_norm": 0.18022486567497253, "learning_rate": 5.183706285846873e-07, "loss": 0.0019, "step": 87550 }, { "epoch": 2.605448350765477, "grad_norm": 0.07180443406105042, "learning_rate": 5.176033026029648e-07, "loss": 0.0015, "step": 87560 }, { "epoch": 2.6057459122491187, "grad_norm": 0.07255467772483826, "learning_rate": 5.168365139632215e-07, "loss": 0.0016, "step": 87570 }, { "epoch": 2.6060434737327602, "grad_norm": 0.2347337305545807, "learning_rate": 5.160702627573794e-07, "loss": 0.0018, "step": 87580 }, { "epoch": 2.606341035216402, "grad_norm": 0.06176060438156128, "learning_rate": 5.15304549077294e-07, "loss": 0.0023, "step": 87590 }, { "epoch": 2.6066385967000434, "grad_norm": 0.07979094982147217, "learning_rate": 5.14539373014758e-07, "loss": 0.0019, "step": 87600 }, { "epoch": 2.606936158183685, "grad_norm": 0.1387995034456253, "learning_rate": 5.137747346615002e-07, "loss": 0.0016, "step": 87610 }, { "epoch": 2.6072337196673265, "grad_norm": 0.4295539855957031, "learning_rate": 5.130106341091845e-07, "loss": 0.0021, "step": 87620 }, { "epoch": 2.6075312811509677, "grad_norm": 0.027751589193940163, "learning_rate": 5.122470714494088e-07, "loss": 0.001, "step": 87630 }, { "epoch": 2.6078288426346092, "grad_norm": 0.1347593367099762, "learning_rate": 5.114840467737064e-07, "loss": 0.0015, "step": 87640 }, { "epoch": 2.608126404118251, "grad_norm": 0.26969295740127563, "learning_rate": 5.10721560173551e-07, "loss": 0.0024, "step": 87650 }, { "epoch": 2.6084239656018924, "grad_norm": 0.12429459393024445, "learning_rate": 5.099596117403455e-07, "loss": 0.0018, "step": 87660 }, { "epoch": 2.608721527085534, "grad_norm": 0.296793669462204, "learning_rate": 5.091982015654317e-07, "loss": 0.0018, "step": 87670 }, { "epoch": 2.6090190885691755, "grad_norm": 0.10349340736865997, "learning_rate": 5.084373297400858e-07, "loss": 0.0015, "step": 87680 }, { "epoch": 2.609316650052817, "grad_norm": 0.20374947786331177, "learning_rate": 5.076769963555217e-07, "loss": 0.0014, "step": 87690 }, { "epoch": 2.6096142115364587, "grad_norm": 0.23948872089385986, "learning_rate": 5.069172015028839e-07, "loss": 0.0019, "step": 87700 }, { "epoch": 2.6099117730201002, "grad_norm": 0.14894717931747437, "learning_rate": 5.061579452732568e-07, "loss": 0.0021, "step": 87710 }, { "epoch": 2.610209334503742, "grad_norm": 0.1197907030582428, "learning_rate": 5.053992277576581e-07, "loss": 0.0009, "step": 87720 }, { "epoch": 2.6105068959873834, "grad_norm": 0.04557259380817413, "learning_rate": 5.046410490470433e-07, "loss": 0.002, "step": 87730 }, { "epoch": 2.610804457471025, "grad_norm": 0.1768551915884018, "learning_rate": 5.038834092322998e-07, "loss": 0.0014, "step": 87740 }, { "epoch": 2.6111020189546665, "grad_norm": 0.2965865731239319, "learning_rate": 5.03126308404252e-07, "loss": 0.002, "step": 87750 }, { "epoch": 2.611399580438308, "grad_norm": 0.048615969717502594, "learning_rate": 5.023697466536609e-07, "loss": 0.0007, "step": 87760 }, { "epoch": 2.6116971419219497, "grad_norm": 0.08155537396669388, "learning_rate": 5.016137240712221e-07, "loss": 0.0019, "step": 87770 }, { "epoch": 2.6119947034055913, "grad_norm": 0.1192486509680748, "learning_rate": 5.008582407475649e-07, "loss": 0.0013, "step": 87780 }, { "epoch": 2.612292264889233, "grad_norm": 0.30469340085983276, "learning_rate": 5.001032967732555e-07, "loss": 0.0016, "step": 87790 }, { "epoch": 2.6125898263728744, "grad_norm": 0.22629132866859436, "learning_rate": 4.993488922387974e-07, "loss": 0.0016, "step": 87800 }, { "epoch": 2.612887387856516, "grad_norm": 0.04394190385937691, "learning_rate": 4.985950272346246e-07, "loss": 0.0029, "step": 87810 }, { "epoch": 2.6131849493401575, "grad_norm": 0.04932721331715584, "learning_rate": 4.9784170185111e-07, "loss": 0.001, "step": 87820 }, { "epoch": 2.613482510823799, "grad_norm": 0.4167259931564331, "learning_rate": 4.970889161785613e-07, "loss": 0.0028, "step": 87830 }, { "epoch": 2.6137800723074402, "grad_norm": 0.17933830618858337, "learning_rate": 4.96336670307222e-07, "loss": 0.0016, "step": 87840 }, { "epoch": 2.614077633791082, "grad_norm": 0.42408034205436707, "learning_rate": 4.955849643272681e-07, "loss": 0.0013, "step": 87850 }, { "epoch": 2.6143751952747234, "grad_norm": 0.065432108938694, "learning_rate": 4.948337983288137e-07, "loss": 0.0025, "step": 87860 }, { "epoch": 2.614672756758365, "grad_norm": 0.21654075384140015, "learning_rate": 4.940831724019079e-07, "loss": 0.0025, "step": 87870 }, { "epoch": 2.6149703182420065, "grad_norm": 0.19750340282917023, "learning_rate": 4.933330866365343e-07, "loss": 0.0014, "step": 87880 }, { "epoch": 2.615267879725648, "grad_norm": 0.07165148854255676, "learning_rate": 4.925835411226109e-07, "loss": 0.0013, "step": 87890 }, { "epoch": 2.6155654412092897, "grad_norm": 0.2228710651397705, "learning_rate": 4.91834535949992e-07, "loss": 0.0026, "step": 87900 }, { "epoch": 2.6158630026929313, "grad_norm": 0.08230559527873993, "learning_rate": 4.910860712084681e-07, "loss": 0.0022, "step": 87910 }, { "epoch": 2.616160564176573, "grad_norm": 0.18877936899662018, "learning_rate": 4.903381469877633e-07, "loss": 0.0014, "step": 87920 }, { "epoch": 2.6164581256602144, "grad_norm": 0.16133412718772888, "learning_rate": 4.895907633775376e-07, "loss": 0.0016, "step": 87930 }, { "epoch": 2.616755687143856, "grad_norm": 0.08065028488636017, "learning_rate": 4.888439204673856e-07, "loss": 0.0015, "step": 87940 }, { "epoch": 2.6170532486274976, "grad_norm": 0.21486221253871918, "learning_rate": 4.880976183468389e-07, "loss": 0.0024, "step": 87950 }, { "epoch": 2.617350810111139, "grad_norm": 0.0833170935511589, "learning_rate": 4.873518571053615e-07, "loss": 0.0017, "step": 87960 }, { "epoch": 2.6176483715947807, "grad_norm": 0.15512126684188843, "learning_rate": 4.866066368323535e-07, "loss": 0.0022, "step": 87970 }, { "epoch": 2.6179459330784223, "grad_norm": 0.2634359896183014, "learning_rate": 4.858619576171525e-07, "loss": 0.0015, "step": 87980 }, { "epoch": 2.618243494562064, "grad_norm": 0.014889874495565891, "learning_rate": 4.851178195490286e-07, "loss": 0.0008, "step": 87990 }, { "epoch": 2.6185410560457054, "grad_norm": 0.12998615205287933, "learning_rate": 4.843742227171866e-07, "loss": 0.0021, "step": 88000 }, { "epoch": 2.618838617529347, "grad_norm": 0.047536883503198624, "learning_rate": 4.836311672107691e-07, "loss": 0.0011, "step": 88010 }, { "epoch": 2.6191361790129886, "grad_norm": 0.13497406244277954, "learning_rate": 4.828886531188514e-07, "loss": 0.0031, "step": 88020 }, { "epoch": 2.61943374049663, "grad_norm": 0.12897393107414246, "learning_rate": 4.821466805304458e-07, "loss": 0.0017, "step": 88030 }, { "epoch": 2.6197313019802717, "grad_norm": 0.22697140276432037, "learning_rate": 4.814052495344973e-07, "loss": 0.0012, "step": 88040 }, { "epoch": 2.6200288634639133, "grad_norm": 0.23707647621631622, "learning_rate": 4.806643602198885e-07, "loss": 0.0013, "step": 88050 }, { "epoch": 2.620326424947555, "grad_norm": 0.007834058254957199, "learning_rate": 4.799240126754351e-07, "loss": 0.0014, "step": 88060 }, { "epoch": 2.6206239864311964, "grad_norm": 0.03500175103545189, "learning_rate": 4.791842069898906e-07, "loss": 0.0011, "step": 88070 }, { "epoch": 2.620921547914838, "grad_norm": 0.2131171077489853, "learning_rate": 4.784449432519389e-07, "loss": 0.0019, "step": 88080 }, { "epoch": 2.6212191093984796, "grad_norm": 0.07190199196338654, "learning_rate": 4.777062215502032e-07, "loss": 0.0015, "step": 88090 }, { "epoch": 2.621516670882121, "grad_norm": 0.11545801907777786, "learning_rate": 4.769680419732397e-07, "loss": 0.0032, "step": 88100 }, { "epoch": 2.6218142323657627, "grad_norm": 0.18085451424121857, "learning_rate": 4.7623040460954117e-07, "loss": 0.0021, "step": 88110 }, { "epoch": 2.6221117938494043, "grad_norm": 0.1787811666727066, "learning_rate": 4.754933095475328e-07, "loss": 0.0012, "step": 88120 }, { "epoch": 2.622409355333046, "grad_norm": 0.16073161363601685, "learning_rate": 4.747567568755768e-07, "loss": 0.0024, "step": 88130 }, { "epoch": 2.6227069168166874, "grad_norm": 0.04938603192567825, "learning_rate": 4.7402074668196964e-07, "loss": 0.0019, "step": 88140 }, { "epoch": 2.623004478300329, "grad_norm": 0.12187141180038452, "learning_rate": 4.732852790549447e-07, "loss": 0.0014, "step": 88150 }, { "epoch": 2.6233020397839706, "grad_norm": 0.1438310593366623, "learning_rate": 4.725503540826659e-07, "loss": 0.0014, "step": 88160 }, { "epoch": 2.623599601267612, "grad_norm": 0.3338467478752136, "learning_rate": 4.7181597185323614e-07, "loss": 0.0023, "step": 88170 }, { "epoch": 2.6238971627512537, "grad_norm": 0.27948954701423645, "learning_rate": 4.710821324546927e-07, "loss": 0.0023, "step": 88180 }, { "epoch": 2.6241947242348953, "grad_norm": 0.14799189567565918, "learning_rate": 4.703488359750047e-07, "loss": 0.0014, "step": 88190 }, { "epoch": 2.6244922857185364, "grad_norm": 0.10498949885368347, "learning_rate": 4.696160825020801e-07, "loss": 0.0015, "step": 88200 }, { "epoch": 2.624789847202178, "grad_norm": 0.20822304487228394, "learning_rate": 4.6888387212375984e-07, "loss": 0.0015, "step": 88210 }, { "epoch": 2.6250874086858196, "grad_norm": 0.0702395886182785, "learning_rate": 4.681522049278203e-07, "loss": 0.0022, "step": 88220 }, { "epoch": 2.625384970169461, "grad_norm": 0.08083317428827286, "learning_rate": 4.6742108100197194e-07, "loss": 0.0013, "step": 88230 }, { "epoch": 2.6256825316531027, "grad_norm": 0.07663536816835403, "learning_rate": 4.666905004338612e-07, "loss": 0.0022, "step": 88240 }, { "epoch": 2.6259800931367443, "grad_norm": 0.15692280232906342, "learning_rate": 4.6596046331106926e-07, "loss": 0.002, "step": 88250 }, { "epoch": 2.626277654620386, "grad_norm": 0.27277305722236633, "learning_rate": 4.652309697211099e-07, "loss": 0.0033, "step": 88260 }, { "epoch": 2.6265752161040274, "grad_norm": 0.12154385447502136, "learning_rate": 4.645020197514344e-07, "loss": 0.003, "step": 88270 }, { "epoch": 2.626872777587669, "grad_norm": 0.09359772503376007, "learning_rate": 4.637736134894288e-07, "loss": 0.001, "step": 88280 }, { "epoch": 2.6271703390713106, "grad_norm": 0.15348488092422485, "learning_rate": 4.630457510224129e-07, "loss": 0.001, "step": 88290 }, { "epoch": 2.627467900554952, "grad_norm": 0.06924702972173691, "learning_rate": 4.623184324376412e-07, "loss": 0.0016, "step": 88300 }, { "epoch": 2.6277654620385937, "grad_norm": 0.15400993824005127, "learning_rate": 4.615916578223029e-07, "loss": 0.0032, "step": 88310 }, { "epoch": 2.6280630235222353, "grad_norm": 0.06885576248168945, "learning_rate": 4.6086542726352323e-07, "loss": 0.0015, "step": 88320 }, { "epoch": 2.628360585005877, "grad_norm": 0.02313496544957161, "learning_rate": 4.601397408483627e-07, "loss": 0.0014, "step": 88330 }, { "epoch": 2.6286581464895185, "grad_norm": 0.21782542765140533, "learning_rate": 4.594145986638121e-07, "loss": 0.0014, "step": 88340 }, { "epoch": 2.62895570797316, "grad_norm": 0.06527897715568542, "learning_rate": 4.5869000079680314e-07, "loss": 0.0019, "step": 88350 }, { "epoch": 2.6292532694568016, "grad_norm": 0.12019579112529755, "learning_rate": 4.57965947334198e-07, "loss": 0.0015, "step": 88360 }, { "epoch": 2.629550830940443, "grad_norm": 0.22648245096206665, "learning_rate": 4.57242438362796e-07, "loss": 0.0042, "step": 88370 }, { "epoch": 2.6298483924240847, "grad_norm": 0.04650719091296196, "learning_rate": 4.5651947396932847e-07, "loss": 0.0022, "step": 88380 }, { "epoch": 2.6301459539077263, "grad_norm": 0.2837770879268646, "learning_rate": 4.557970542404644e-07, "loss": 0.0014, "step": 88390 }, { "epoch": 2.630443515391368, "grad_norm": 0.04689571633934975, "learning_rate": 4.5507517926280553e-07, "loss": 0.0027, "step": 88400 }, { "epoch": 2.630741076875009, "grad_norm": 0.2577452063560486, "learning_rate": 4.543538491228905e-07, "loss": 0.0031, "step": 88410 }, { "epoch": 2.6310386383586506, "grad_norm": 0.06700675189495087, "learning_rate": 4.53633063907189e-07, "loss": 0.0011, "step": 88420 }, { "epoch": 2.631336199842292, "grad_norm": 0.1539647877216339, "learning_rate": 4.5291282370210854e-07, "loss": 0.0031, "step": 88430 }, { "epoch": 2.6316337613259337, "grad_norm": 0.13948561251163483, "learning_rate": 4.521931285939901e-07, "loss": 0.0033, "step": 88440 }, { "epoch": 2.6319313228095753, "grad_norm": 0.022032996639609337, "learning_rate": 4.514739786691108e-07, "loss": 0.0009, "step": 88450 }, { "epoch": 2.632228884293217, "grad_norm": 0.15165546536445618, "learning_rate": 4.5075537401367884e-07, "loss": 0.0026, "step": 88460 }, { "epoch": 2.6325264457768585, "grad_norm": 0.025764035061001778, "learning_rate": 4.500373147138404e-07, "loss": 0.0015, "step": 88470 }, { "epoch": 2.6328240072605, "grad_norm": 0.16171583533287048, "learning_rate": 4.493198008556754e-07, "loss": 0.0017, "step": 88480 }, { "epoch": 2.6331215687441416, "grad_norm": 0.3158692419528961, "learning_rate": 4.486028325251979e-07, "loss": 0.0022, "step": 88490 }, { "epoch": 2.633419130227783, "grad_norm": 0.09863147884607315, "learning_rate": 4.4788640980835686e-07, "loss": 0.0014, "step": 88500 }, { "epoch": 2.6337166917114248, "grad_norm": 0.1606905460357666, "learning_rate": 4.471705327910353e-07, "loss": 0.0021, "step": 88510 }, { "epoch": 2.6340142531950663, "grad_norm": 0.17733018100261688, "learning_rate": 4.4645520155905344e-07, "loss": 0.004, "step": 88520 }, { "epoch": 2.634311814678708, "grad_norm": 0.15717388689517975, "learning_rate": 4.45740416198161e-07, "loss": 0.0016, "step": 88530 }, { "epoch": 2.6346093761623495, "grad_norm": 0.0705530047416687, "learning_rate": 4.450261767940467e-07, "loss": 0.0032, "step": 88540 }, { "epoch": 2.634906937645991, "grad_norm": 0.25286665558815, "learning_rate": 4.44312483432332e-07, "loss": 0.0015, "step": 88550 }, { "epoch": 2.6352044991296326, "grad_norm": 0.0730268806219101, "learning_rate": 4.4359933619857465e-07, "loss": 0.0018, "step": 88560 }, { "epoch": 2.635502060613274, "grad_norm": 0.05265246704220772, "learning_rate": 4.428867351782629e-07, "loss": 0.0018, "step": 88570 }, { "epoch": 2.6357996220969158, "grad_norm": 0.2189284712076187, "learning_rate": 4.421746804568239e-07, "loss": 0.0017, "step": 88580 }, { "epoch": 2.6360971835805573, "grad_norm": 0.07347927242517471, "learning_rate": 4.4146317211961773e-07, "loss": 0.002, "step": 88590 }, { "epoch": 2.636394745064199, "grad_norm": 0.1890689581632614, "learning_rate": 4.4075221025193793e-07, "loss": 0.002, "step": 88600 }, { "epoch": 2.6366923065478405, "grad_norm": 0.17137411236763, "learning_rate": 4.400417949390129e-07, "loss": 0.0015, "step": 88610 }, { "epoch": 2.636989868031482, "grad_norm": 0.15841352939605713, "learning_rate": 4.393319262660073e-07, "loss": 0.0023, "step": 88620 }, { "epoch": 2.6372874295151236, "grad_norm": 0.07140062004327774, "learning_rate": 4.3862260431801864e-07, "loss": 0.0014, "step": 88630 }, { "epoch": 2.637584990998765, "grad_norm": 0.14514654874801636, "learning_rate": 4.3791382918007884e-07, "loss": 0.0009, "step": 88640 }, { "epoch": 2.6378825524824068, "grad_norm": 0.3653450906276703, "learning_rate": 4.3720560093715393e-07, "loss": 0.003, "step": 88650 }, { "epoch": 2.6381801139660483, "grad_norm": 1.162616491317749, "learning_rate": 4.364979196741465e-07, "loss": 0.0087, "step": 88660 }, { "epoch": 2.63847767544969, "grad_norm": 0.04285696893930435, "learning_rate": 4.357907854758925e-07, "loss": 0.001, "step": 88670 }, { "epoch": 2.6387752369333315, "grad_norm": 0.15852148830890656, "learning_rate": 4.3508419842716034e-07, "loss": 0.0033, "step": 88680 }, { "epoch": 2.639072798416973, "grad_norm": 0.10026542842388153, "learning_rate": 4.343781586126544e-07, "loss": 0.0013, "step": 88690 }, { "epoch": 2.6393703599006146, "grad_norm": 0.1709703505039215, "learning_rate": 4.336726661170149e-07, "loss": 0.0013, "step": 88700 }, { "epoch": 2.639667921384256, "grad_norm": 0.04550018534064293, "learning_rate": 4.329677210248151e-07, "loss": 0.0014, "step": 88710 }, { "epoch": 2.639965482867898, "grad_norm": 0.11885301023721695, "learning_rate": 4.322633234205614e-07, "loss": 0.0018, "step": 88720 }, { "epoch": 2.6402630443515394, "grad_norm": 0.0957607850432396, "learning_rate": 4.3155947338869577e-07, "loss": 0.0025, "step": 88730 }, { "epoch": 2.640560605835181, "grad_norm": 0.07720737904310226, "learning_rate": 4.308561710135956e-07, "loss": 0.0044, "step": 88740 }, { "epoch": 2.6408581673188225, "grad_norm": 0.12038187682628632, "learning_rate": 4.3015341637957174e-07, "loss": 0.0022, "step": 88750 }, { "epoch": 2.641155728802464, "grad_norm": 0.18191765248775482, "learning_rate": 4.294512095708675e-07, "loss": 0.0021, "step": 88760 }, { "epoch": 2.641453290286105, "grad_norm": 0.07096713781356812, "learning_rate": 4.287495506716627e-07, "loss": 0.0021, "step": 88770 }, { "epoch": 2.6417508517697468, "grad_norm": 0.23743005096912384, "learning_rate": 4.280484397660728e-07, "loss": 0.0012, "step": 88780 }, { "epoch": 2.6420484132533884, "grad_norm": 0.10272495448589325, "learning_rate": 4.2734787693814394e-07, "loss": 0.0012, "step": 88790 }, { "epoch": 2.64234597473703, "grad_norm": 0.05639730021357536, "learning_rate": 4.2664786227185893e-07, "loss": 0.0016, "step": 88800 }, { "epoch": 2.6426435362206715, "grad_norm": 0.08514880388975143, "learning_rate": 4.25948395851134e-07, "loss": 0.0021, "step": 88810 }, { "epoch": 2.642941097704313, "grad_norm": 0.10542551428079605, "learning_rate": 4.252494777598215e-07, "loss": 0.0019, "step": 88820 }, { "epoch": 2.6432386591879546, "grad_norm": 0.012277936562895775, "learning_rate": 4.245511080817044e-07, "loss": 0.0013, "step": 88830 }, { "epoch": 2.643536220671596, "grad_norm": 0.01774376817047596, "learning_rate": 4.23853286900503e-07, "loss": 0.0015, "step": 88840 }, { "epoch": 2.643833782155238, "grad_norm": 0.04988881200551987, "learning_rate": 4.231560142998703e-07, "loss": 0.0017, "step": 88850 }, { "epoch": 2.6441313436388794, "grad_norm": 0.04371656849980354, "learning_rate": 4.2245929036339614e-07, "loss": 0.0008, "step": 88860 }, { "epoch": 2.644428905122521, "grad_norm": 0.11907413601875305, "learning_rate": 4.2176311517460045e-07, "loss": 0.0014, "step": 88870 }, { "epoch": 2.6447264666061625, "grad_norm": 0.04185330122709274, "learning_rate": 4.210674888169397e-07, "loss": 0.0018, "step": 88880 }, { "epoch": 2.645024028089804, "grad_norm": 0.1442464292049408, "learning_rate": 4.203724113738056e-07, "loss": 0.0028, "step": 88890 }, { "epoch": 2.6453215895734457, "grad_norm": 0.04650915786623955, "learning_rate": 4.196778829285225e-07, "loss": 0.0012, "step": 88900 }, { "epoch": 2.6456191510570872, "grad_norm": 0.16019606590270996, "learning_rate": 4.189839035643478e-07, "loss": 0.0018, "step": 88910 }, { "epoch": 2.645916712540729, "grad_norm": 0.054871540516614914, "learning_rate": 4.1829047336447604e-07, "loss": 0.0014, "step": 88920 }, { "epoch": 2.6462142740243704, "grad_norm": 0.05724899098277092, "learning_rate": 4.17597592412034e-07, "loss": 0.0016, "step": 88930 }, { "epoch": 2.646511835508012, "grad_norm": 0.10314596444368362, "learning_rate": 4.1690526079008364e-07, "loss": 0.0012, "step": 88940 }, { "epoch": 2.6468093969916535, "grad_norm": 0.004814485553652048, "learning_rate": 4.1621347858161963e-07, "loss": 0.0011, "step": 88950 }, { "epoch": 2.647106958475295, "grad_norm": 0.06281737238168716, "learning_rate": 4.155222458695718e-07, "loss": 0.0012, "step": 88960 }, { "epoch": 2.6474045199589367, "grad_norm": 0.341917484998703, "learning_rate": 4.148315627368044e-07, "loss": 0.0017, "step": 88970 }, { "epoch": 2.6477020814425782, "grad_norm": 0.21435579657554626, "learning_rate": 4.1414142926611443e-07, "loss": 0.0023, "step": 88980 }, { "epoch": 2.6479996429262194, "grad_norm": 0.3020158112049103, "learning_rate": 4.1345184554023476e-07, "loss": 0.0014, "step": 88990 }, { "epoch": 2.648297204409861, "grad_norm": 0.2212565690279007, "learning_rate": 4.1276281164183083e-07, "loss": 0.0013, "step": 89000 }, { "epoch": 2.6485947658935025, "grad_norm": 0.13694660365581512, "learning_rate": 4.1207432765350443e-07, "loss": 0.0014, "step": 89010 }, { "epoch": 2.648892327377144, "grad_norm": 0.05586409941315651, "learning_rate": 4.1138639365778723e-07, "loss": 0.0014, "step": 89020 }, { "epoch": 2.6491898888607857, "grad_norm": 0.0662209764122963, "learning_rate": 4.1069900973714937e-07, "loss": 0.0019, "step": 89030 }, { "epoch": 2.6494874503444272, "grad_norm": 0.20779001712799072, "learning_rate": 4.1001217597399276e-07, "loss": 0.0016, "step": 89040 }, { "epoch": 2.649785011828069, "grad_norm": 0.06425918638706207, "learning_rate": 4.093258924506549e-07, "loss": 0.0014, "step": 89050 }, { "epoch": 2.6500825733117104, "grad_norm": 0.040390580892562866, "learning_rate": 4.0864015924940327e-07, "loss": 0.0021, "step": 89060 }, { "epoch": 2.650380134795352, "grad_norm": 0.12444215267896652, "learning_rate": 4.079549764524454e-07, "loss": 0.0022, "step": 89070 }, { "epoch": 2.6506776962789935, "grad_norm": 0.22142112255096436, "learning_rate": 4.0727034414191957e-07, "loss": 0.0013, "step": 89080 }, { "epoch": 2.650975257762635, "grad_norm": 0.16405725479125977, "learning_rate": 4.0658626239989673e-07, "loss": 0.0013, "step": 89090 }, { "epoch": 2.6512728192462767, "grad_norm": 0.07565584033727646, "learning_rate": 4.059027313083841e-07, "loss": 0.0013, "step": 89100 }, { "epoch": 2.6515703807299182, "grad_norm": 0.3077361285686493, "learning_rate": 4.0521975094932273e-07, "loss": 0.0013, "step": 89110 }, { "epoch": 2.65186794221356, "grad_norm": 0.07331537455320358, "learning_rate": 4.045373214045872e-07, "loss": 0.0017, "step": 89120 }, { "epoch": 2.6521655036972014, "grad_norm": 0.02724888175725937, "learning_rate": 4.0385544275598543e-07, "loss": 0.0017, "step": 89130 }, { "epoch": 2.652463065180843, "grad_norm": 0.05422127619385719, "learning_rate": 4.031741150852592e-07, "loss": 0.0015, "step": 89140 }, { "epoch": 2.6527606266644845, "grad_norm": 0.12010443210601807, "learning_rate": 4.024933384740859e-07, "loss": 0.0017, "step": 89150 }, { "epoch": 2.653058188148126, "grad_norm": 0.22662383317947388, "learning_rate": 4.0181311300407644e-07, "loss": 0.0024, "step": 89160 }, { "epoch": 2.6533557496317677, "grad_norm": 0.019739961251616478, "learning_rate": 4.0113343875677393e-07, "loss": 0.0013, "step": 89170 }, { "epoch": 2.6536533111154093, "grad_norm": 0.13270579278469086, "learning_rate": 4.0045431581365646e-07, "loss": 0.0011, "step": 89180 }, { "epoch": 2.653950872599051, "grad_norm": 0.10148410499095917, "learning_rate": 3.9977574425613616e-07, "loss": 0.002, "step": 89190 }, { "epoch": 2.6542484340826924, "grad_norm": 0.16258655488491058, "learning_rate": 3.990977241655608e-07, "loss": 0.0021, "step": 89200 }, { "epoch": 2.654545995566334, "grad_norm": 0.09192904084920883, "learning_rate": 3.984202556232075e-07, "loss": 0.0025, "step": 89210 }, { "epoch": 2.6548435570499755, "grad_norm": 0.06052850931882858, "learning_rate": 3.977433387102919e-07, "loss": 0.0021, "step": 89220 }, { "epoch": 2.655141118533617, "grad_norm": 0.1990288943052292, "learning_rate": 3.9706697350796076e-07, "loss": 0.0019, "step": 89230 }, { "epoch": 2.6554386800172587, "grad_norm": 0.07591754198074341, "learning_rate": 3.9639116009729697e-07, "loss": 0.002, "step": 89240 }, { "epoch": 2.6557362415009003, "grad_norm": 0.27560123801231384, "learning_rate": 3.95715898559314e-07, "loss": 0.0015, "step": 89250 }, { "epoch": 2.656033802984542, "grad_norm": 0.043172627687454224, "learning_rate": 3.950411889749617e-07, "loss": 0.0011, "step": 89260 }, { "epoch": 2.6563313644681834, "grad_norm": 0.2940182387828827, "learning_rate": 3.94367031425123e-07, "loss": 0.0015, "step": 89270 }, { "epoch": 2.656628925951825, "grad_norm": 0.18445663154125214, "learning_rate": 3.936934259906161e-07, "loss": 0.0013, "step": 89280 }, { "epoch": 2.6569264874354666, "grad_norm": 0.4142896234989166, "learning_rate": 3.9302037275218975e-07, "loss": 0.0022, "step": 89290 }, { "epoch": 2.657224048919108, "grad_norm": 0.18948814272880554, "learning_rate": 3.9234787179052933e-07, "loss": 0.0021, "step": 89300 }, { "epoch": 2.6575216104027497, "grad_norm": 0.01613146811723709, "learning_rate": 3.9167592318625323e-07, "loss": 0.0012, "step": 89310 }, { "epoch": 2.6578191718863913, "grad_norm": 0.2285057008266449, "learning_rate": 3.910045270199131e-07, "loss": 0.0019, "step": 89320 }, { "epoch": 2.658116733370033, "grad_norm": 0.021482186391949654, "learning_rate": 3.903336833719945e-07, "loss": 0.0009, "step": 89330 }, { "epoch": 2.658414294853674, "grad_norm": 0.16589127480983734, "learning_rate": 3.896633923229165e-07, "loss": 0.0011, "step": 89340 }, { "epoch": 2.6587118563373155, "grad_norm": 0.2553938031196594, "learning_rate": 3.8899365395303537e-07, "loss": 0.0013, "step": 89350 }, { "epoch": 2.659009417820957, "grad_norm": 0.1133800521492958, "learning_rate": 3.883244683426346e-07, "loss": 0.0018, "step": 89360 }, { "epoch": 2.6593069793045987, "grad_norm": 0.05368742346763611, "learning_rate": 3.876558355719373e-07, "loss": 0.0011, "step": 89370 }, { "epoch": 2.6596045407882403, "grad_norm": 0.04879339039325714, "learning_rate": 3.869877557210977e-07, "loss": 0.002, "step": 89380 }, { "epoch": 2.659902102271882, "grad_norm": 0.016033807769417763, "learning_rate": 3.863202288702028e-07, "loss": 0.002, "step": 89390 }, { "epoch": 2.6601996637555234, "grad_norm": 0.1353967785835266, "learning_rate": 3.856532550992753e-07, "loss": 0.0011, "step": 89400 }, { "epoch": 2.660497225239165, "grad_norm": 0.21122173964977264, "learning_rate": 3.8498683448827125e-07, "loss": 0.0028, "step": 89410 }, { "epoch": 2.6607947867228066, "grad_norm": 0.1192823201417923, "learning_rate": 3.8432096711708055e-07, "loss": 0.0015, "step": 89420 }, { "epoch": 2.661092348206448, "grad_norm": 0.06117371842265129, "learning_rate": 3.8365565306552445e-07, "loss": 0.0007, "step": 89430 }, { "epoch": 2.6613899096900897, "grad_norm": 0.1297995150089264, "learning_rate": 3.829908924133602e-07, "loss": 0.0016, "step": 89440 }, { "epoch": 2.6616874711737313, "grad_norm": 0.0583140030503273, "learning_rate": 3.8232668524027906e-07, "loss": 0.0016, "step": 89450 }, { "epoch": 2.661985032657373, "grad_norm": 0.3414428234100342, "learning_rate": 3.816630316259051e-07, "loss": 0.0018, "step": 89460 }, { "epoch": 2.6622825941410144, "grad_norm": 0.13189184665679932, "learning_rate": 3.8099993164979476e-07, "loss": 0.0022, "step": 89470 }, { "epoch": 2.662580155624656, "grad_norm": 0.23576264083385468, "learning_rate": 3.8033738539143994e-07, "loss": 0.0015, "step": 89480 }, { "epoch": 2.6628777171082976, "grad_norm": 0.2480040192604065, "learning_rate": 3.7967539293026546e-07, "loss": 0.0034, "step": 89490 }, { "epoch": 2.663175278591939, "grad_norm": 0.24226857721805573, "learning_rate": 3.790139543456306e-07, "loss": 0.0021, "step": 89500 }, { "epoch": 2.6634728400755807, "grad_norm": 0.11474794149398804, "learning_rate": 3.783530697168264e-07, "loss": 0.0015, "step": 89510 }, { "epoch": 2.6637704015592223, "grad_norm": 0.7469912171363831, "learning_rate": 3.7769273912307836e-07, "loss": 0.0043, "step": 89520 }, { "epoch": 2.664067963042864, "grad_norm": 0.07615300267934799, "learning_rate": 3.77032962643547e-07, "loss": 0.0015, "step": 89530 }, { "epoch": 2.6643655245265054, "grad_norm": 0.13982436060905457, "learning_rate": 3.7637374035732476e-07, "loss": 0.0024, "step": 89540 }, { "epoch": 2.664663086010147, "grad_norm": 0.16261723637580872, "learning_rate": 3.757150723434377e-07, "loss": 0.0007, "step": 89550 }, { "epoch": 2.664960647493788, "grad_norm": 0.08443643152713776, "learning_rate": 3.7505695868084556e-07, "loss": 0.0016, "step": 89560 }, { "epoch": 2.6652582089774297, "grad_norm": 0.16058249771595, "learning_rate": 3.743993994484424e-07, "loss": 0.0019, "step": 89570 }, { "epoch": 2.6655557704610713, "grad_norm": 0.287646621465683, "learning_rate": 3.7374239472505624e-07, "loss": 0.0015, "step": 89580 }, { "epoch": 2.665853331944713, "grad_norm": 0.11469753086566925, "learning_rate": 3.730859445894458e-07, "loss": 0.0021, "step": 89590 }, { "epoch": 2.6661508934283544, "grad_norm": 0.1450541615486145, "learning_rate": 3.7243004912030533e-07, "loss": 0.0013, "step": 89600 }, { "epoch": 2.666448454911996, "grad_norm": 0.1652379035949707, "learning_rate": 3.7177470839626364e-07, "loss": 0.0016, "step": 89610 }, { "epoch": 2.6667460163956376, "grad_norm": 0.12102992087602615, "learning_rate": 3.711199224958817e-07, "loss": 0.0009, "step": 89620 }, { "epoch": 2.667043577879279, "grad_norm": 0.06944859027862549, "learning_rate": 3.7046569149765287e-07, "loss": 0.0019, "step": 89630 }, { "epoch": 2.6673411393629207, "grad_norm": 0.06707505881786346, "learning_rate": 3.698120154800067e-07, "loss": 0.001, "step": 89640 }, { "epoch": 2.6676387008465623, "grad_norm": 0.1383298635482788, "learning_rate": 3.691588945213048e-07, "loss": 0.0014, "step": 89650 }, { "epoch": 2.667936262330204, "grad_norm": 0.07819757610559464, "learning_rate": 3.6850632869984083e-07, "loss": 0.0007, "step": 89660 }, { "epoch": 2.6682338238138454, "grad_norm": 0.16177281737327576, "learning_rate": 3.6785431809384373e-07, "loss": 0.0029, "step": 89670 }, { "epoch": 2.668531385297487, "grad_norm": 0.5801134705543518, "learning_rate": 3.672028627814761e-07, "loss": 0.0019, "step": 89680 }, { "epoch": 2.6688289467811286, "grad_norm": 0.0298706516623497, "learning_rate": 3.665519628408332e-07, "loss": 0.0014, "step": 89690 }, { "epoch": 2.66912650826477, "grad_norm": 0.042503323405981064, "learning_rate": 3.659016183499425e-07, "loss": 0.0013, "step": 89700 }, { "epoch": 2.6694240697484117, "grad_norm": 0.03465418145060539, "learning_rate": 3.652518293867674e-07, "loss": 0.0014, "step": 89710 }, { "epoch": 2.6697216312320533, "grad_norm": 0.21236884593963623, "learning_rate": 3.646025960292038e-07, "loss": 0.0033, "step": 89720 }, { "epoch": 2.670019192715695, "grad_norm": 0.09493669867515564, "learning_rate": 3.6395391835507886e-07, "loss": 0.0017, "step": 89730 }, { "epoch": 2.6703167541993365, "grad_norm": 0.22138823568820953, "learning_rate": 3.633057964421566e-07, "loss": 0.0011, "step": 89740 }, { "epoch": 2.670614315682978, "grad_norm": 0.08020924031734467, "learning_rate": 3.62658230368132e-07, "loss": 0.0018, "step": 89750 }, { "epoch": 2.6709118771666196, "grad_norm": 0.2047462910413742, "learning_rate": 3.6201122021063574e-07, "loss": 0.0013, "step": 89760 }, { "epoch": 2.671209438650261, "grad_norm": 0.10680070519447327, "learning_rate": 3.6136476604722747e-07, "loss": 0.0013, "step": 89770 }, { "epoch": 2.6715070001339027, "grad_norm": 0.14026713371276855, "learning_rate": 3.6071886795540465e-07, "loss": 0.0009, "step": 89780 }, { "epoch": 2.6718045616175443, "grad_norm": 0.05243617668747902, "learning_rate": 3.600735260125965e-07, "loss": 0.0026, "step": 89790 }, { "epoch": 2.672102123101186, "grad_norm": 0.14445067942142487, "learning_rate": 3.5942874029616604e-07, "loss": 0.0015, "step": 89800 }, { "epoch": 2.6723996845848275, "grad_norm": 0.12273793667554855, "learning_rate": 3.587845108834076e-07, "loss": 0.0021, "step": 89810 }, { "epoch": 2.672697246068469, "grad_norm": 0.06900041550397873, "learning_rate": 3.581408378515511e-07, "loss": 0.0009, "step": 89820 }, { "epoch": 2.6729948075521106, "grad_norm": 0.20429903268814087, "learning_rate": 3.5749772127775863e-07, "loss": 0.0014, "step": 89830 }, { "epoch": 2.673292369035752, "grad_norm": 0.1882164627313614, "learning_rate": 3.568551612391269e-07, "loss": 0.0022, "step": 89840 }, { "epoch": 2.6735899305193938, "grad_norm": 0.1965710073709488, "learning_rate": 3.5621315781268317e-07, "loss": 0.0014, "step": 89850 }, { "epoch": 2.6738874920030353, "grad_norm": 0.06183407083153725, "learning_rate": 3.5557171107539034e-07, "loss": 0.0021, "step": 89860 }, { "epoch": 2.674185053486677, "grad_norm": 0.27598899602890015, "learning_rate": 3.549308211041447e-07, "loss": 0.0022, "step": 89870 }, { "epoch": 2.6744826149703185, "grad_norm": 0.15699096024036407, "learning_rate": 3.5429048797577523e-07, "loss": 0.002, "step": 89880 }, { "epoch": 2.67478017645396, "grad_norm": 0.15073572099208832, "learning_rate": 3.536507117670429e-07, "loss": 0.0017, "step": 89890 }, { "epoch": 2.6750777379376016, "grad_norm": 0.205142542719841, "learning_rate": 3.5301149255464297e-07, "loss": 0.0028, "step": 89900 }, { "epoch": 2.675375299421243, "grad_norm": 0.23582597076892853, "learning_rate": 3.5237283041520467e-07, "loss": 0.0016, "step": 89910 }, { "epoch": 2.6756728609048843, "grad_norm": 0.49132663011550903, "learning_rate": 3.5173472542528897e-07, "loss": 0.0013, "step": 89920 }, { "epoch": 2.675970422388526, "grad_norm": 0.08777806907892227, "learning_rate": 3.510971776613914e-07, "loss": 0.0023, "step": 89930 }, { "epoch": 2.6762679838721675, "grad_norm": 0.11855260282754898, "learning_rate": 3.5046018719993966e-07, "loss": 0.002, "step": 89940 }, { "epoch": 2.676565545355809, "grad_norm": 0.11877062171697617, "learning_rate": 3.4982375411729665e-07, "loss": 0.0017, "step": 89950 }, { "epoch": 2.6768631068394506, "grad_norm": 0.03165682405233383, "learning_rate": 3.4918787848975455e-07, "loss": 0.0019, "step": 89960 }, { "epoch": 2.677160668323092, "grad_norm": 0.14649605751037598, "learning_rate": 3.485525603935419e-07, "loss": 0.0009, "step": 89970 }, { "epoch": 2.6774582298067338, "grad_norm": 0.00311701069585979, "learning_rate": 3.4791779990481946e-07, "loss": 0.0018, "step": 89980 }, { "epoch": 2.6777557912903753, "grad_norm": 0.06363462656736374, "learning_rate": 3.472835970996824e-07, "loss": 0.0043, "step": 89990 }, { "epoch": 2.678053352774017, "grad_norm": 0.06384151428937912, "learning_rate": 3.466499520541561e-07, "loss": 0.0013, "step": 90000 }, { "epoch": 2.6783509142576585, "grad_norm": 0.09201622009277344, "learning_rate": 3.460168648442014e-07, "loss": 0.0014, "step": 90010 }, { "epoch": 2.6786484757413, "grad_norm": 0.5770373344421387, "learning_rate": 3.4538433554571203e-07, "loss": 0.0024, "step": 90020 }, { "epoch": 2.6789460372249416, "grad_norm": 0.12366057932376862, "learning_rate": 3.4475236423451517e-07, "loss": 0.0011, "step": 90030 }, { "epoch": 2.679243598708583, "grad_norm": 0.2073453962802887, "learning_rate": 3.441209509863691e-07, "loss": 0.0008, "step": 90040 }, { "epoch": 2.6795411601922248, "grad_norm": 0.11867786198854446, "learning_rate": 3.434900958769666e-07, "loss": 0.0018, "step": 90050 }, { "epoch": 2.6798387216758663, "grad_norm": 0.12222570180892944, "learning_rate": 3.428597989819343e-07, "loss": 0.0012, "step": 90060 }, { "epoch": 2.680136283159508, "grad_norm": 0.06798062473535538, "learning_rate": 3.4223006037683196e-07, "loss": 0.0008, "step": 90070 }, { "epoch": 2.6804338446431495, "grad_norm": 0.34485432505607605, "learning_rate": 3.41600880137149e-07, "loss": 0.0018, "step": 90080 }, { "epoch": 2.680731406126791, "grad_norm": 0.07234935462474823, "learning_rate": 3.4097225833831237e-07, "loss": 0.001, "step": 90090 }, { "epoch": 2.6810289676104326, "grad_norm": 0.13764357566833496, "learning_rate": 3.4034419505568015e-07, "loss": 0.0017, "step": 90100 }, { "epoch": 2.681326529094074, "grad_norm": 0.05503737926483154, "learning_rate": 3.3971669036454216e-07, "loss": 0.0054, "step": 90110 }, { "epoch": 2.681624090577716, "grad_norm": 0.03467754274606705, "learning_rate": 3.3908974434012367e-07, "loss": 0.0012, "step": 90120 }, { "epoch": 2.681921652061357, "grad_norm": 0.051287028938531876, "learning_rate": 3.384633570575813e-07, "loss": 0.0012, "step": 90130 }, { "epoch": 2.6822192135449985, "grad_norm": 0.09391168504953384, "learning_rate": 3.3783752859200657e-07, "loss": 0.0008, "step": 90140 }, { "epoch": 2.68251677502864, "grad_norm": 0.5832599997520447, "learning_rate": 3.372122590184207e-07, "loss": 0.0027, "step": 90150 }, { "epoch": 2.6828143365122816, "grad_norm": 0.19626332819461823, "learning_rate": 3.3658754841178144e-07, "loss": 0.0016, "step": 90160 }, { "epoch": 2.683111897995923, "grad_norm": 0.05110549181699753, "learning_rate": 3.359633968469772e-07, "loss": 0.0008, "step": 90170 }, { "epoch": 2.6834094594795648, "grad_norm": 0.07524935156106949, "learning_rate": 3.353398043988315e-07, "loss": 0.001, "step": 90180 }, { "epoch": 2.6837070209632063, "grad_norm": 0.09262223541736603, "learning_rate": 3.3471677114209736e-07, "loss": 0.0022, "step": 90190 }, { "epoch": 2.684004582446848, "grad_norm": 0.10616732388734818, "learning_rate": 3.3409429715146433e-07, "loss": 0.0022, "step": 90200 }, { "epoch": 2.6843021439304895, "grad_norm": 0.1378091424703598, "learning_rate": 3.3347238250155456e-07, "loss": 0.0012, "step": 90210 }, { "epoch": 2.684599705414131, "grad_norm": 0.0844728872179985, "learning_rate": 3.3285102726691944e-07, "loss": 0.0008, "step": 90220 }, { "epoch": 2.6848972668977726, "grad_norm": 0.06441358476877213, "learning_rate": 3.3223023152204826e-07, "loss": 0.0007, "step": 90230 }, { "epoch": 2.685194828381414, "grad_norm": 0.35532346367836, "learning_rate": 3.316099953413593e-07, "loss": 0.0016, "step": 90240 }, { "epoch": 2.685492389865056, "grad_norm": 0.02823931910097599, "learning_rate": 3.309903187992075e-07, "loss": 0.0012, "step": 90250 }, { "epoch": 2.6857899513486974, "grad_norm": 0.16255076229572296, "learning_rate": 3.3037120196987626e-07, "loss": 0.0014, "step": 90260 }, { "epoch": 2.686087512832339, "grad_norm": 0.18614999949932098, "learning_rate": 3.2975264492758563e-07, "loss": 0.0009, "step": 90270 }, { "epoch": 2.6863850743159805, "grad_norm": 0.08132601529359818, "learning_rate": 3.2913464774648686e-07, "loss": 0.0016, "step": 90280 }, { "epoch": 2.686682635799622, "grad_norm": 0.36231279373168945, "learning_rate": 3.285172105006645e-07, "loss": 0.002, "step": 90290 }, { "epoch": 2.6869801972832636, "grad_norm": 0.1712462455034256, "learning_rate": 3.2790033326413506e-07, "loss": 0.0013, "step": 90300 }, { "epoch": 2.6872777587669052, "grad_norm": 0.13510167598724365, "learning_rate": 3.272840161108498e-07, "loss": 0.002, "step": 90310 }, { "epoch": 2.687575320250547, "grad_norm": 0.11720403283834457, "learning_rate": 3.2666825911469137e-07, "loss": 0.0017, "step": 90320 }, { "epoch": 2.6878728817341884, "grad_norm": 0.052759747952222824, "learning_rate": 3.2605306234947566e-07, "loss": 0.0015, "step": 90330 }, { "epoch": 2.68817044321783, "grad_norm": 0.1203342005610466, "learning_rate": 3.254384258889509e-07, "loss": 0.0015, "step": 90340 }, { "epoch": 2.6884680047014715, "grad_norm": 0.48773524165153503, "learning_rate": 3.248243498067993e-07, "loss": 0.0015, "step": 90350 }, { "epoch": 2.688765566185113, "grad_norm": 0.19303546845912933, "learning_rate": 3.2421083417663477e-07, "loss": 0.0029, "step": 90360 }, { "epoch": 2.6890631276687547, "grad_norm": 0.1552712321281433, "learning_rate": 3.235978790720057e-07, "loss": 0.0022, "step": 90370 }, { "epoch": 2.6893606891523962, "grad_norm": 0.05464976280927658, "learning_rate": 3.2298548456639043e-07, "loss": 0.0014, "step": 90380 }, { "epoch": 2.689658250636038, "grad_norm": 0.0879359096288681, "learning_rate": 3.2237365073320257e-07, "loss": 0.0014, "step": 90390 }, { "epoch": 2.6899558121196794, "grad_norm": 0.1155148297548294, "learning_rate": 3.217623776457873e-07, "loss": 0.0011, "step": 90400 }, { "epoch": 2.690253373603321, "grad_norm": 0.16773061454296112, "learning_rate": 3.211516653774244e-07, "loss": 0.0027, "step": 90410 }, { "epoch": 2.6905509350869625, "grad_norm": 0.07152741402387619, "learning_rate": 3.2054151400132293e-07, "loss": 0.001, "step": 90420 }, { "epoch": 2.690848496570604, "grad_norm": 0.2408590465784073, "learning_rate": 3.199319235906273e-07, "loss": 0.0028, "step": 90430 }, { "epoch": 2.6911460580542457, "grad_norm": 0.047428831458091736, "learning_rate": 3.193228942184151e-07, "loss": 0.0017, "step": 90440 }, { "epoch": 2.6914436195378872, "grad_norm": 0.038250140845775604, "learning_rate": 3.187144259576958e-07, "loss": 0.0017, "step": 90450 }, { "epoch": 2.691741181021529, "grad_norm": 0.07799490541219711, "learning_rate": 3.181065188814103e-07, "loss": 0.001, "step": 90460 }, { "epoch": 2.6920387425051704, "grad_norm": 0.08420887589454651, "learning_rate": 3.1749917306243326e-07, "loss": 0.002, "step": 90470 }, { "epoch": 2.692336303988812, "grad_norm": 0.08652516454458237, "learning_rate": 3.168923885735747e-07, "loss": 0.003, "step": 90480 }, { "epoch": 2.692633865472453, "grad_norm": 0.12405139952898026, "learning_rate": 3.1628616548757265e-07, "loss": 0.002, "step": 90490 }, { "epoch": 2.6929314269560947, "grad_norm": 0.21798336505889893, "learning_rate": 3.1568050387710045e-07, "loss": 0.0021, "step": 90500 }, { "epoch": 2.6932289884397362, "grad_norm": 0.14627817273139954, "learning_rate": 3.150754038147652e-07, "loss": 0.002, "step": 90510 }, { "epoch": 2.693526549923378, "grad_norm": 0.052526745945215225, "learning_rate": 3.1447086537310324e-07, "loss": 0.0014, "step": 90520 }, { "epoch": 2.6938241114070194, "grad_norm": 0.589447557926178, "learning_rate": 3.1386688862458603e-07, "loss": 0.001, "step": 90530 }, { "epoch": 2.694121672890661, "grad_norm": 0.054658565670251846, "learning_rate": 3.132634736416179e-07, "loss": 0.001, "step": 90540 }, { "epoch": 2.6944192343743025, "grad_norm": 0.0903889536857605, "learning_rate": 3.1266062049653603e-07, "loss": 0.0016, "step": 90550 }, { "epoch": 2.694716795857944, "grad_norm": 0.013510443270206451, "learning_rate": 3.1205832926160695e-07, "loss": 0.0033, "step": 90560 }, { "epoch": 2.6950143573415857, "grad_norm": 0.04662886634469032, "learning_rate": 3.114566000090341e-07, "loss": 0.0033, "step": 90570 }, { "epoch": 2.6953119188252272, "grad_norm": 0.11767656356096268, "learning_rate": 3.1085543281095143e-07, "loss": 0.0011, "step": 90580 }, { "epoch": 2.695609480308869, "grad_norm": 0.0884653627872467, "learning_rate": 3.1025482773942625e-07, "loss": 0.0012, "step": 90590 }, { "epoch": 2.6959070417925104, "grad_norm": 0.06786837428808212, "learning_rate": 3.0965478486645717e-07, "loss": 0.0021, "step": 90600 }, { "epoch": 2.696204603276152, "grad_norm": 0.4605443775653839, "learning_rate": 3.0905530426397655e-07, "loss": 0.0013, "step": 90610 }, { "epoch": 2.6965021647597935, "grad_norm": 0.17997053265571594, "learning_rate": 3.084563860038492e-07, "loss": 0.002, "step": 90620 }, { "epoch": 2.696799726243435, "grad_norm": 0.13709692656993866, "learning_rate": 3.0785803015787265e-07, "loss": 0.0018, "step": 90630 }, { "epoch": 2.6970972877270767, "grad_norm": 0.17867842316627502, "learning_rate": 3.0726023679777615e-07, "loss": 0.0021, "step": 90640 }, { "epoch": 2.6973948492107183, "grad_norm": 0.11264634132385254, "learning_rate": 3.06663005995223e-07, "loss": 0.0032, "step": 90650 }, { "epoch": 2.69769241069436, "grad_norm": 0.17145656049251556, "learning_rate": 3.060663378218071e-07, "loss": 0.0016, "step": 90660 }, { "epoch": 2.6979899721780014, "grad_norm": 0.19490878283977509, "learning_rate": 3.054702323490577e-07, "loss": 0.0021, "step": 90670 }, { "epoch": 2.698287533661643, "grad_norm": 0.02629152312874794, "learning_rate": 3.048746896484328e-07, "loss": 0.0009, "step": 90680 }, { "epoch": 2.6985850951452846, "grad_norm": 0.04179084673523903, "learning_rate": 3.0427970979132637e-07, "loss": 0.0014, "step": 90690 }, { "epoch": 2.6988826566289257, "grad_norm": 0.0214373841881752, "learning_rate": 3.0368529284906303e-07, "loss": 0.0014, "step": 90700 }, { "epoch": 2.6991802181125673, "grad_norm": 0.08954319357872009, "learning_rate": 3.030914388929018e-07, "loss": 0.0016, "step": 90710 }, { "epoch": 2.699477779596209, "grad_norm": 0.17295172810554504, "learning_rate": 3.0249814799403034e-07, "loss": 0.0016, "step": 90720 }, { "epoch": 2.6997753410798504, "grad_norm": 0.10910692065954208, "learning_rate": 3.019054202235733e-07, "loss": 0.0021, "step": 90730 }, { "epoch": 2.700072902563492, "grad_norm": 0.024782516062259674, "learning_rate": 3.0131325565258493e-07, "loss": 0.0015, "step": 90740 }, { "epoch": 2.7003704640471335, "grad_norm": 0.06398949027061462, "learning_rate": 3.0072165435205413e-07, "loss": 0.0009, "step": 90750 }, { "epoch": 2.700668025530775, "grad_norm": 0.18243710696697235, "learning_rate": 3.0013061639289855e-07, "loss": 0.0015, "step": 90760 }, { "epoch": 2.7009655870144167, "grad_norm": 0.07613977044820786, "learning_rate": 2.995401418459737e-07, "loss": 0.0026, "step": 90770 }, { "epoch": 2.7012631484980583, "grad_norm": 0.16736862063407898, "learning_rate": 2.989502307820635e-07, "loss": 0.0015, "step": 90780 }, { "epoch": 2.7015607099817, "grad_norm": 0.14883548021316528, "learning_rate": 2.983608832718848e-07, "loss": 0.0022, "step": 90790 }, { "epoch": 2.7018582714653414, "grad_norm": 0.1855705827474594, "learning_rate": 2.977720993860883e-07, "loss": 0.0012, "step": 90800 }, { "epoch": 2.702155832948983, "grad_norm": 0.32760217785835266, "learning_rate": 2.971838791952564e-07, "loss": 0.002, "step": 90810 }, { "epoch": 2.7024533944326246, "grad_norm": 0.05952529236674309, "learning_rate": 2.965962227699043e-07, "loss": 0.0023, "step": 90820 }, { "epoch": 2.702750955916266, "grad_norm": 0.09374060481786728, "learning_rate": 2.960091301804774e-07, "loss": 0.0008, "step": 90830 }, { "epoch": 2.7030485173999077, "grad_norm": 0.07456159591674805, "learning_rate": 2.9542260149735715e-07, "loss": 0.0012, "step": 90840 }, { "epoch": 2.7033460788835493, "grad_norm": 0.10334672778844833, "learning_rate": 2.9483663679085506e-07, "loss": 0.0012, "step": 90850 }, { "epoch": 2.703643640367191, "grad_norm": 0.10831055045127869, "learning_rate": 2.942512361312161e-07, "loss": 0.0016, "step": 90860 }, { "epoch": 2.7039412018508324, "grad_norm": 0.2532060742378235, "learning_rate": 2.9366639958861576e-07, "loss": 0.002, "step": 90870 }, { "epoch": 2.704238763334474, "grad_norm": 0.19481340050697327, "learning_rate": 2.9308212723316463e-07, "loss": 0.0015, "step": 90880 }, { "epoch": 2.7045363248181156, "grad_norm": 0.04094380512833595, "learning_rate": 2.9249841913490396e-07, "loss": 0.0022, "step": 90890 }, { "epoch": 2.704833886301757, "grad_norm": 0.15168343484401703, "learning_rate": 2.919152753638066e-07, "loss": 0.0012, "step": 90900 }, { "epoch": 2.7051314477853987, "grad_norm": 0.13567586243152618, "learning_rate": 2.913326959897794e-07, "loss": 0.0017, "step": 90910 }, { "epoch": 2.7054290092690403, "grad_norm": 0.1561703234910965, "learning_rate": 2.907506810826616e-07, "loss": 0.0018, "step": 90920 }, { "epoch": 2.705726570752682, "grad_norm": 0.17042098939418793, "learning_rate": 2.9016923071222445e-07, "loss": 0.0025, "step": 90930 }, { "epoch": 2.7060241322363234, "grad_norm": 0.12474806606769562, "learning_rate": 2.8958834494816954e-07, "loss": 0.0013, "step": 90940 }, { "epoch": 2.706321693719965, "grad_norm": 0.25223982334136963, "learning_rate": 2.8900802386013336e-07, "loss": 0.0016, "step": 90950 }, { "epoch": 2.7066192552036066, "grad_norm": 0.0981292575597763, "learning_rate": 2.8842826751768415e-07, "loss": 0.0015, "step": 90960 }, { "epoch": 2.706916816687248, "grad_norm": 0.40324053168296814, "learning_rate": 2.878490759903224e-07, "loss": 0.0019, "step": 90970 }, { "epoch": 2.7072143781708897, "grad_norm": 0.24298451840877533, "learning_rate": 2.8727044934747874e-07, "loss": 0.0017, "step": 90980 }, { "epoch": 2.7075119396545313, "grad_norm": 0.4250267744064331, "learning_rate": 2.866923876585198e-07, "loss": 0.003, "step": 90990 }, { "epoch": 2.707809501138173, "grad_norm": 0.2733965814113617, "learning_rate": 2.861148909927419e-07, "loss": 0.0025, "step": 91000 }, { "epoch": 2.7081070626218144, "grad_norm": 0.21183553338050842, "learning_rate": 2.855379594193747e-07, "loss": 0.0014, "step": 91010 }, { "epoch": 2.708404624105456, "grad_norm": 0.2802691161632538, "learning_rate": 2.849615930075794e-07, "loss": 0.0012, "step": 91020 }, { "epoch": 2.7087021855890976, "grad_norm": 0.15092432498931885, "learning_rate": 2.843857918264498e-07, "loss": 0.0055, "step": 91030 }, { "epoch": 2.708999747072739, "grad_norm": 0.09964615106582642, "learning_rate": 2.8381055594501163e-07, "loss": 0.0019, "step": 91040 }, { "epoch": 2.7092973085563807, "grad_norm": 0.12208440899848938, "learning_rate": 2.832358854322237e-07, "loss": 0.0012, "step": 91050 }, { "epoch": 2.709594870040022, "grad_norm": 0.19154028594493866, "learning_rate": 2.8266178035697644e-07, "loss": 0.0017, "step": 91060 }, { "epoch": 2.7098924315236634, "grad_norm": 0.15838398039340973, "learning_rate": 2.820882407880926e-07, "loss": 0.0023, "step": 91070 }, { "epoch": 2.710189993007305, "grad_norm": 0.19089651107788086, "learning_rate": 2.8151526679432826e-07, "loss": 0.0019, "step": 91080 }, { "epoch": 2.7104875544909466, "grad_norm": 0.20499537885189056, "learning_rate": 2.8094285844436795e-07, "loss": 0.0012, "step": 91090 }, { "epoch": 2.710785115974588, "grad_norm": 0.04871018975973129, "learning_rate": 2.803710158068329e-07, "loss": 0.0016, "step": 91100 }, { "epoch": 2.7110826774582297, "grad_norm": 0.21090167760849, "learning_rate": 2.797997389502738e-07, "loss": 0.0012, "step": 91110 }, { "epoch": 2.7113802389418713, "grad_norm": 0.06505565345287323, "learning_rate": 2.792290279431753e-07, "loss": 0.0015, "step": 91120 }, { "epoch": 2.711677800425513, "grad_norm": 0.051570579409599304, "learning_rate": 2.7865888285395215e-07, "loss": 0.0011, "step": 91130 }, { "epoch": 2.7119753619091544, "grad_norm": 0.09330438077449799, "learning_rate": 2.78089303750953e-07, "loss": 0.002, "step": 91140 }, { "epoch": 2.712272923392796, "grad_norm": 0.02643033303320408, "learning_rate": 2.7752029070245766e-07, "loss": 0.0012, "step": 91150 }, { "epoch": 2.7125704848764376, "grad_norm": 0.2756384015083313, "learning_rate": 2.7695184377667936e-07, "loss": 0.002, "step": 91160 }, { "epoch": 2.712868046360079, "grad_norm": 0.21518748998641968, "learning_rate": 2.7638396304176086e-07, "loss": 0.0015, "step": 91170 }, { "epoch": 2.7131656078437207, "grad_norm": 0.2822663486003876, "learning_rate": 2.7581664856577985e-07, "loss": 0.002, "step": 91180 }, { "epoch": 2.7134631693273623, "grad_norm": 0.12527714669704437, "learning_rate": 2.7524990041674417e-07, "loss": 0.0061, "step": 91190 }, { "epoch": 2.713760730811004, "grad_norm": 0.34695687890052795, "learning_rate": 2.7468371866259667e-07, "loss": 0.0012, "step": 91200 }, { "epoch": 2.7140582922946455, "grad_norm": 0.01846652664244175, "learning_rate": 2.7411810337120814e-07, "loss": 0.0018, "step": 91210 }, { "epoch": 2.714355853778287, "grad_norm": 0.10888081043958664, "learning_rate": 2.735530546103837e-07, "loss": 0.0008, "step": 91220 }, { "epoch": 2.7146534152619286, "grad_norm": 0.22395388782024384, "learning_rate": 2.72988572447862e-07, "loss": 0.0023, "step": 91230 }, { "epoch": 2.71495097674557, "grad_norm": 0.09270567446947098, "learning_rate": 2.7242465695131006e-07, "loss": 0.002, "step": 91240 }, { "epoch": 2.7152485382292117, "grad_norm": 0.10779116302728653, "learning_rate": 2.718613081883309e-07, "loss": 0.0028, "step": 91250 }, { "epoch": 2.7155460997128533, "grad_norm": 0.15736210346221924, "learning_rate": 2.712985262264567e-07, "loss": 0.0015, "step": 91260 }, { "epoch": 2.7158436611964945, "grad_norm": 0.2665179967880249, "learning_rate": 2.707363111331535e-07, "loss": 0.0024, "step": 91270 }, { "epoch": 2.716141222680136, "grad_norm": 0.11135521531105042, "learning_rate": 2.7017466297581786e-07, "loss": 0.0015, "step": 91280 }, { "epoch": 2.7164387841637776, "grad_norm": 0.3406475782394409, "learning_rate": 2.696135818217799e-07, "loss": 0.0026, "step": 91290 }, { "epoch": 2.716736345647419, "grad_norm": 0.14718739688396454, "learning_rate": 2.6905306773830076e-07, "loss": 0.0019, "step": 91300 }, { "epoch": 2.7170339071310607, "grad_norm": 0.055394601076841354, "learning_rate": 2.684931207925745e-07, "loss": 0.0014, "step": 91310 }, { "epoch": 2.7173314686147023, "grad_norm": 0.054241497069597244, "learning_rate": 2.679337410517252e-07, "loss": 0.0012, "step": 91320 }, { "epoch": 2.717629030098344, "grad_norm": 0.07289931178092957, "learning_rate": 2.673749285828114e-07, "loss": 0.0017, "step": 91330 }, { "epoch": 2.7179265915819855, "grad_norm": 0.22605855762958527, "learning_rate": 2.6681668345282295e-07, "loss": 0.0029, "step": 91340 }, { "epoch": 2.718224153065627, "grad_norm": 0.04013246297836304, "learning_rate": 2.662590057286807e-07, "loss": 0.0016, "step": 91350 }, { "epoch": 2.7185217145492686, "grad_norm": 0.03327200934290886, "learning_rate": 2.6570189547723726e-07, "loss": 0.0029, "step": 91360 }, { "epoch": 2.71881927603291, "grad_norm": 0.28294461965560913, "learning_rate": 2.6514535276527975e-07, "loss": 0.0018, "step": 91370 }, { "epoch": 2.7191168375165518, "grad_norm": 0.1305229514837265, "learning_rate": 2.6458937765952487e-07, "loss": 0.0019, "step": 91380 }, { "epoch": 2.7194143990001933, "grad_norm": 0.07300736010074615, "learning_rate": 2.6403397022662146e-07, "loss": 0.0019, "step": 91390 }, { "epoch": 2.719711960483835, "grad_norm": 0.07740612328052521, "learning_rate": 2.6347913053315075e-07, "loss": 0.0019, "step": 91400 }, { "epoch": 2.7200095219674765, "grad_norm": 0.1258796602487564, "learning_rate": 2.629248586456268e-07, "loss": 0.0028, "step": 91410 }, { "epoch": 2.720307083451118, "grad_norm": 0.012694724835455418, "learning_rate": 2.623711546304947e-07, "loss": 0.0015, "step": 91420 }, { "epoch": 2.7206046449347596, "grad_norm": 0.0051194424740970135, "learning_rate": 2.618180185541308e-07, "loss": 0.0027, "step": 91430 }, { "epoch": 2.720902206418401, "grad_norm": 0.21563777327537537, "learning_rate": 2.612654504828438e-07, "loss": 0.0018, "step": 91440 }, { "epoch": 2.7211997679020428, "grad_norm": 0.2529109716415405, "learning_rate": 2.6071345048287567e-07, "loss": 0.0016, "step": 91450 }, { "epoch": 2.7214973293856843, "grad_norm": 0.018338214606046677, "learning_rate": 2.601620186203996e-07, "loss": 0.0018, "step": 91460 }, { "epoch": 2.721794890869326, "grad_norm": 0.040212538093328476, "learning_rate": 2.596111549615188e-07, "loss": 0.0016, "step": 91470 }, { "epoch": 2.7220924523529675, "grad_norm": 0.06485063582658768, "learning_rate": 2.590608595722704e-07, "loss": 0.0016, "step": 91480 }, { "epoch": 2.722390013836609, "grad_norm": 0.20027907192707062, "learning_rate": 2.585111325186235e-07, "loss": 0.0032, "step": 91490 }, { "epoch": 2.7226875753202506, "grad_norm": 0.3584764003753662, "learning_rate": 2.5796197386647847e-07, "loss": 0.0033, "step": 91500 }, { "epoch": 2.722985136803892, "grad_norm": 0.11833669245243073, "learning_rate": 2.5741338368166614e-07, "loss": 0.0013, "step": 91510 }, { "epoch": 2.7232826982875338, "grad_norm": 0.06300477683544159, "learning_rate": 2.5686536202995104e-07, "loss": 0.002, "step": 91520 }, { "epoch": 2.7235802597711753, "grad_norm": 0.4924125075340271, "learning_rate": 2.5631790897702957e-07, "loss": 0.0018, "step": 91530 }, { "epoch": 2.723877821254817, "grad_norm": 0.14737829566001892, "learning_rate": 2.557710245885303e-07, "loss": 0.0014, "step": 91540 }, { "epoch": 2.7241753827384585, "grad_norm": 0.027234656736254692, "learning_rate": 2.552247089300114e-07, "loss": 0.0034, "step": 91550 }, { "epoch": 2.7244729442221, "grad_norm": 0.17472104728221893, "learning_rate": 2.546789620669637e-07, "loss": 0.0033, "step": 91560 }, { "epoch": 2.7247705057057416, "grad_norm": 0.07401484251022339, "learning_rate": 2.5413378406481224e-07, "loss": 0.0018, "step": 91570 }, { "epoch": 2.725068067189383, "grad_norm": 0.11199340224266052, "learning_rate": 2.5358917498891134e-07, "loss": 0.0019, "step": 91580 }, { "epoch": 2.725365628673025, "grad_norm": 0.005868916865438223, "learning_rate": 2.5304513490454705e-07, "loss": 0.0012, "step": 91590 }, { "epoch": 2.7256631901566664, "grad_norm": 0.2326471358537674, "learning_rate": 2.525016638769384e-07, "loss": 0.0032, "step": 91600 }, { "epoch": 2.725960751640308, "grad_norm": 0.160324364900589, "learning_rate": 2.519587619712355e-07, "loss": 0.0028, "step": 91610 }, { "epoch": 2.7262583131239495, "grad_norm": 0.06556698679924011, "learning_rate": 2.514164292525212e-07, "loss": 0.0012, "step": 91620 }, { "epoch": 2.7265558746075906, "grad_norm": 0.0835488885641098, "learning_rate": 2.508746657858091e-07, "loss": 0.0007, "step": 91630 }, { "epoch": 2.726853436091232, "grad_norm": 0.16535885632038116, "learning_rate": 2.5033347163604507e-07, "loss": 0.0011, "step": 91640 }, { "epoch": 2.727150997574874, "grad_norm": 0.31112346053123474, "learning_rate": 2.4979284686810603e-07, "loss": 0.0015, "step": 91650 }, { "epoch": 2.7274485590585154, "grad_norm": 0.2771632969379425, "learning_rate": 2.4925279154680127e-07, "loss": 0.0034, "step": 91660 }, { "epoch": 2.727746120542157, "grad_norm": 0.028838245198130608, "learning_rate": 2.487133057368718e-07, "loss": 0.0011, "step": 91670 }, { "epoch": 2.7280436820257985, "grad_norm": 0.20589925348758698, "learning_rate": 2.481743895029909e-07, "loss": 0.0014, "step": 91680 }, { "epoch": 2.72834124350944, "grad_norm": 0.03855183720588684, "learning_rate": 2.476360429097613e-07, "loss": 0.0012, "step": 91690 }, { "epoch": 2.7286388049930816, "grad_norm": 0.12192713469266891, "learning_rate": 2.4709826602172025e-07, "loss": 0.0016, "step": 91700 }, { "epoch": 2.728936366476723, "grad_norm": 0.032156214118003845, "learning_rate": 2.4656105890333515e-07, "loss": 0.0026, "step": 91710 }, { "epoch": 2.729233927960365, "grad_norm": 0.2775346040725708, "learning_rate": 2.460244216190066e-07, "loss": 0.0021, "step": 91720 }, { "epoch": 2.7295314894440064, "grad_norm": 0.04733794555068016, "learning_rate": 2.4548835423306384e-07, "loss": 0.0009, "step": 91730 }, { "epoch": 2.729829050927648, "grad_norm": 0.24538660049438477, "learning_rate": 2.4495285680977087e-07, "loss": 0.0018, "step": 91740 }, { "epoch": 2.7301266124112895, "grad_norm": 0.1123773604631424, "learning_rate": 2.4441792941332145e-07, "loss": 0.0016, "step": 91750 }, { "epoch": 2.730424173894931, "grad_norm": 0.15541322529315948, "learning_rate": 2.438835721078436e-07, "loss": 0.001, "step": 91760 }, { "epoch": 2.7307217353785727, "grad_norm": 0.11132878065109253, "learning_rate": 2.4334978495739237e-07, "loss": 0.0013, "step": 91770 }, { "epoch": 2.7310192968622142, "grad_norm": 0.21734093129634857, "learning_rate": 2.428165680259592e-07, "loss": 0.0012, "step": 91780 }, { "epoch": 2.731316858345856, "grad_norm": 0.19453589618206024, "learning_rate": 2.4228392137746473e-07, "loss": 0.0017, "step": 91790 }, { "epoch": 2.7316144198294974, "grad_norm": 0.17852400243282318, "learning_rate": 2.4175184507576167e-07, "loss": 0.0019, "step": 91800 }, { "epoch": 2.731911981313139, "grad_norm": 0.08850789815187454, "learning_rate": 2.412203391846346e-07, "loss": 0.0015, "step": 91810 }, { "epoch": 2.7322095427967805, "grad_norm": 0.04663350433111191, "learning_rate": 2.406894037677987e-07, "loss": 0.0018, "step": 91820 }, { "epoch": 2.732507104280422, "grad_norm": 0.10252046585083008, "learning_rate": 2.401590388889025e-07, "loss": 0.002, "step": 91830 }, { "epoch": 2.7328046657640632, "grad_norm": 0.13528171181678772, "learning_rate": 2.396292446115256e-07, "loss": 0.0014, "step": 91840 }, { "epoch": 2.733102227247705, "grad_norm": 0.12481982260942459, "learning_rate": 2.3910002099917784e-07, "loss": 0.0025, "step": 91850 }, { "epoch": 2.7333997887313464, "grad_norm": 0.09831671416759491, "learning_rate": 2.3857136811530123e-07, "loss": 0.0013, "step": 91860 }, { "epoch": 2.733697350214988, "grad_norm": 0.13751940429210663, "learning_rate": 2.380432860232712e-07, "loss": 0.0012, "step": 91870 }, { "epoch": 2.7339949116986295, "grad_norm": 0.1112586110830307, "learning_rate": 2.3751577478639322e-07, "loss": 0.0025, "step": 91880 }, { "epoch": 2.734292473182271, "grad_norm": 0.18462631106376648, "learning_rate": 2.3698883446790223e-07, "loss": 0.0014, "step": 91890 }, { "epoch": 2.7345900346659127, "grad_norm": 0.15984682738780975, "learning_rate": 2.3646246513096938e-07, "loss": 0.0012, "step": 91900 }, { "epoch": 2.7348875961495542, "grad_norm": 0.2298903614282608, "learning_rate": 2.3593666683869476e-07, "loss": 0.0018, "step": 91910 }, { "epoch": 2.735185157633196, "grad_norm": 0.012163287959992886, "learning_rate": 2.354114396541085e-07, "loss": 0.0012, "step": 91920 }, { "epoch": 2.7354827191168374, "grad_norm": 0.17602455615997314, "learning_rate": 2.3488678364017526e-07, "loss": 0.0013, "step": 91930 }, { "epoch": 2.735780280600479, "grad_norm": 0.26926642656326294, "learning_rate": 2.3436269885978914e-07, "loss": 0.0024, "step": 91940 }, { "epoch": 2.7360778420841205, "grad_norm": 0.11928063631057739, "learning_rate": 2.3383918537577766e-07, "loss": 0.0013, "step": 91950 }, { "epoch": 2.736375403567762, "grad_norm": 0.2263949066400528, "learning_rate": 2.3331624325089786e-07, "loss": 0.0104, "step": 91960 }, { "epoch": 2.7366729650514037, "grad_norm": 0.0719568058848381, "learning_rate": 2.3279387254783846e-07, "loss": 0.0012, "step": 91970 }, { "epoch": 2.7369705265350452, "grad_norm": 0.11762560158967972, "learning_rate": 2.3227207332922165e-07, "loss": 0.0012, "step": 91980 }, { "epoch": 2.737268088018687, "grad_norm": 0.023874623700976372, "learning_rate": 2.3175084565760008e-07, "loss": 0.0008, "step": 91990 }, { "epoch": 2.7375656495023284, "grad_norm": 0.08811167627573013, "learning_rate": 2.3123018959545607e-07, "loss": 0.0021, "step": 92000 }, { "epoch": 2.73786321098597, "grad_norm": 0.17530278861522675, "learning_rate": 2.3071010520520576e-07, "loss": 0.0026, "step": 92010 }, { "epoch": 2.7381607724696115, "grad_norm": 0.2835545241832733, "learning_rate": 2.3019059254919652e-07, "loss": 0.002, "step": 92020 }, { "epoch": 2.738458333953253, "grad_norm": 0.07809442281723022, "learning_rate": 2.2967165168970518e-07, "loss": 0.0012, "step": 92030 }, { "epoch": 2.7387558954368947, "grad_norm": 0.04031847417354584, "learning_rate": 2.291532826889431e-07, "loss": 0.0017, "step": 92040 }, { "epoch": 2.7390534569205363, "grad_norm": 0.12006296962499619, "learning_rate": 2.2863548560905004e-07, "loss": 0.0017, "step": 92050 }, { "epoch": 2.739351018404178, "grad_norm": 0.052799422293901443, "learning_rate": 2.2811826051210018e-07, "loss": 0.0018, "step": 92060 }, { "epoch": 2.7396485798878194, "grad_norm": 0.2218218445777893, "learning_rate": 2.2760160746009618e-07, "loss": 0.0018, "step": 92070 }, { "epoch": 2.739946141371461, "grad_norm": 0.13259823620319366, "learning_rate": 2.2708552651497406e-07, "loss": 0.0015, "step": 92080 }, { "epoch": 2.7402437028551025, "grad_norm": 0.1258178949356079, "learning_rate": 2.2657001773860043e-07, "loss": 0.0016, "step": 92090 }, { "epoch": 2.740541264338744, "grad_norm": 0.12751705944538116, "learning_rate": 2.2605508119277474e-07, "loss": 0.0018, "step": 92100 }, { "epoch": 2.7408388258223857, "grad_norm": 0.12295825034379959, "learning_rate": 2.255407169392254e-07, "loss": 0.0012, "step": 92110 }, { "epoch": 2.7411363873060273, "grad_norm": 0.13879582285881042, "learning_rate": 2.2502692503961365e-07, "loss": 0.0015, "step": 92120 }, { "epoch": 2.741433948789669, "grad_norm": 0.10153467953205109, "learning_rate": 2.2451370555553186e-07, "loss": 0.001, "step": 92130 }, { "epoch": 2.7417315102733104, "grad_norm": 0.07298433780670166, "learning_rate": 2.2400105854850528e-07, "loss": 0.0012, "step": 92140 }, { "epoch": 2.742029071756952, "grad_norm": 0.1542220562696457, "learning_rate": 2.2348898407998754e-07, "loss": 0.0012, "step": 92150 }, { "epoch": 2.7423266332405936, "grad_norm": 0.28475141525268555, "learning_rate": 2.2297748221136617e-07, "loss": 0.0014, "step": 92160 }, { "epoch": 2.742624194724235, "grad_norm": 0.46094968914985657, "learning_rate": 2.2246655300395826e-07, "loss": 0.0025, "step": 92170 }, { "epoch": 2.7429217562078767, "grad_norm": 0.065712571144104, "learning_rate": 2.2195619651901423e-07, "loss": 0.0016, "step": 92180 }, { "epoch": 2.7432193176915183, "grad_norm": 0.2350897192955017, "learning_rate": 2.214464128177135e-07, "loss": 0.002, "step": 92190 }, { "epoch": 2.7435168791751594, "grad_norm": 0.14944398403167725, "learning_rate": 2.2093720196116942e-07, "loss": 0.0023, "step": 92200 }, { "epoch": 2.743814440658801, "grad_norm": 0.14857217669487, "learning_rate": 2.2042856401042478e-07, "loss": 0.0015, "step": 92210 }, { "epoch": 2.7441120021424426, "grad_norm": 0.09371553361415863, "learning_rate": 2.1992049902645362e-07, "loss": 0.0011, "step": 92220 }, { "epoch": 2.744409563626084, "grad_norm": 0.037329357117414474, "learning_rate": 2.1941300707016223e-07, "loss": 0.0014, "step": 92230 }, { "epoch": 2.7447071251097257, "grad_norm": 0.15155450999736786, "learning_rate": 2.18906088202388e-07, "loss": 0.0018, "step": 92240 }, { "epoch": 2.7450046865933673, "grad_norm": 0.1821558177471161, "learning_rate": 2.1839974248390073e-07, "loss": 0.0018, "step": 92250 }, { "epoch": 2.745302248077009, "grad_norm": 0.2345699816942215, "learning_rate": 2.1789396997539735e-07, "loss": 0.002, "step": 92260 }, { "epoch": 2.7455998095606504, "grad_norm": 0.24087688326835632, "learning_rate": 2.1738877073751164e-07, "loss": 0.002, "step": 92270 }, { "epoch": 2.745897371044292, "grad_norm": 0.1179414913058281, "learning_rate": 2.1688414483080455e-07, "loss": 0.0031, "step": 92280 }, { "epoch": 2.7461949325279336, "grad_norm": 0.14693142473697662, "learning_rate": 2.1638009231577052e-07, "loss": 0.0007, "step": 92290 }, { "epoch": 2.746492494011575, "grad_norm": 0.08880078047513962, "learning_rate": 2.1587661325283393e-07, "loss": 0.0015, "step": 92300 }, { "epoch": 2.7467900554952167, "grad_norm": 0.3710150122642517, "learning_rate": 2.1537370770235154e-07, "loss": 0.0021, "step": 92310 }, { "epoch": 2.7470876169788583, "grad_norm": 0.13527938723564148, "learning_rate": 2.1487137572461014e-07, "loss": 0.0014, "step": 92320 }, { "epoch": 2.7473851784625, "grad_norm": 0.09967972338199615, "learning_rate": 2.1436961737982987e-07, "loss": 0.0014, "step": 92330 }, { "epoch": 2.7476827399461414, "grad_norm": 0.24404367804527283, "learning_rate": 2.1386843272815873e-07, "loss": 0.0014, "step": 92340 }, { "epoch": 2.747980301429783, "grad_norm": 0.19062365591526031, "learning_rate": 2.1336782182967864e-07, "loss": 0.0045, "step": 92350 }, { "epoch": 2.7482778629134246, "grad_norm": 0.19868268072605133, "learning_rate": 2.1286778474440274e-07, "loss": 0.0022, "step": 92360 }, { "epoch": 2.748575424397066, "grad_norm": 0.06507251411676407, "learning_rate": 2.1236832153227305e-07, "loss": 0.0014, "step": 92370 }, { "epoch": 2.7488729858807077, "grad_norm": 0.4033534824848175, "learning_rate": 2.1186943225316558e-07, "loss": 0.0025, "step": 92380 }, { "epoch": 2.7491705473643493, "grad_norm": 0.0827503651380539, "learning_rate": 2.1137111696688582e-07, "loss": 0.0019, "step": 92390 }, { "epoch": 2.749468108847991, "grad_norm": 0.10013526678085327, "learning_rate": 2.1087337573317213e-07, "loss": 0.0016, "step": 92400 }, { "epoch": 2.7497656703316324, "grad_norm": 0.011814177967607975, "learning_rate": 2.1037620861169116e-07, "loss": 0.0013, "step": 92410 }, { "epoch": 2.7500632318152736, "grad_norm": 0.12783190608024597, "learning_rate": 2.0987961566204252e-07, "loss": 0.0013, "step": 92420 }, { "epoch": 2.750360793298915, "grad_norm": 0.0935429185628891, "learning_rate": 2.0938359694375798e-07, "loss": 0.002, "step": 92430 }, { "epoch": 2.7506583547825567, "grad_norm": 0.09234260022640228, "learning_rate": 2.0888815251629946e-07, "loss": 0.002, "step": 92440 }, { "epoch": 2.7509559162661983, "grad_norm": 0.1141735091805458, "learning_rate": 2.0839328243905942e-07, "loss": 0.0009, "step": 92450 }, { "epoch": 2.75125347774984, "grad_norm": 0.10887755453586578, "learning_rate": 2.07898986771361e-07, "loss": 0.0015, "step": 92460 }, { "epoch": 2.7515510392334814, "grad_norm": 0.14012236893177032, "learning_rate": 2.0740526557246177e-07, "loss": 0.0016, "step": 92470 }, { "epoch": 2.751848600717123, "grad_norm": 0.02864709496498108, "learning_rate": 2.0691211890154605e-07, "loss": 0.0021, "step": 92480 }, { "epoch": 2.7521461622007646, "grad_norm": 0.04228159040212631, "learning_rate": 2.0641954681773324e-07, "loss": 0.0009, "step": 92490 }, { "epoch": 2.752443723684406, "grad_norm": 0.02321985922753811, "learning_rate": 2.0592754938007053e-07, "loss": 0.0009, "step": 92500 }, { "epoch": 2.7527412851680477, "grad_norm": 0.13307906687259674, "learning_rate": 2.0543612664753908e-07, "loss": 0.0022, "step": 92510 }, { "epoch": 2.7530388466516893, "grad_norm": 0.11576446145772934, "learning_rate": 2.0494527867904846e-07, "loss": 0.0023, "step": 92520 }, { "epoch": 2.753336408135331, "grad_norm": 0.2563819885253906, "learning_rate": 2.0445500553344156e-07, "loss": 0.0013, "step": 92530 }, { "epoch": 2.7536339696189724, "grad_norm": 0.12507477402687073, "learning_rate": 2.0396530726949083e-07, "loss": 0.0011, "step": 92540 }, { "epoch": 2.753931531102614, "grad_norm": 0.22138428688049316, "learning_rate": 2.034761839459015e-07, "loss": 0.0014, "step": 92550 }, { "epoch": 2.7542290925862556, "grad_norm": 0.1018744707107544, "learning_rate": 2.0298763562130784e-07, "loss": 0.0019, "step": 92560 }, { "epoch": 2.754526654069897, "grad_norm": 0.02289537340402603, "learning_rate": 2.024996623542763e-07, "loss": 0.0016, "step": 92570 }, { "epoch": 2.7548242155535387, "grad_norm": 0.11352957785129547, "learning_rate": 2.0201226420330456e-07, "loss": 0.0017, "step": 92580 }, { "epoch": 2.7551217770371803, "grad_norm": 0.16872790455818176, "learning_rate": 2.0152544122682138e-07, "loss": 0.0028, "step": 92590 }, { "epoch": 2.755419338520822, "grad_norm": 0.14576810598373413, "learning_rate": 2.0103919348318567e-07, "loss": 0.001, "step": 92600 }, { "epoch": 2.7557169000044635, "grad_norm": 0.10355550795793533, "learning_rate": 2.0055352103068802e-07, "loss": 0.0016, "step": 92610 }, { "epoch": 2.756014461488105, "grad_norm": 0.17149460315704346, "learning_rate": 2.0006842392754966e-07, "loss": 0.0034, "step": 92620 }, { "epoch": 2.7563120229717466, "grad_norm": 0.07389186322689056, "learning_rate": 1.9958390223192515e-07, "loss": 0.0017, "step": 92630 }, { "epoch": 2.756609584455388, "grad_norm": 0.12562580406665802, "learning_rate": 1.9909995600189524e-07, "loss": 0.0023, "step": 92640 }, { "epoch": 2.7569071459390297, "grad_norm": 0.09600019454956055, "learning_rate": 1.9861658529547634e-07, "loss": 0.0021, "step": 92650 }, { "epoch": 2.7572047074226713, "grad_norm": 0.3228932023048401, "learning_rate": 1.9813379017061374e-07, "loss": 0.0023, "step": 92660 }, { "epoch": 2.757502268906313, "grad_norm": 0.10943718999624252, "learning_rate": 1.976515706851845e-07, "loss": 0.0014, "step": 92670 }, { "epoch": 2.7577998303899545, "grad_norm": 0.0864000916481018, "learning_rate": 1.9716992689699564e-07, "loss": 0.0017, "step": 92680 }, { "epoch": 2.758097391873596, "grad_norm": 0.3394947052001953, "learning_rate": 1.9668885886378552e-07, "loss": 0.0037, "step": 92690 }, { "epoch": 2.7583949533572376, "grad_norm": 0.11560536175966263, "learning_rate": 1.9620836664322463e-07, "loss": 0.0015, "step": 92700 }, { "epoch": 2.758692514840879, "grad_norm": 0.06268498301506042, "learning_rate": 1.9572845029291353e-07, "loss": 0.0021, "step": 92710 }, { "epoch": 2.7589900763245208, "grad_norm": 0.3438456058502197, "learning_rate": 1.9524910987038237e-07, "loss": 0.002, "step": 92720 }, { "epoch": 2.7592876378081623, "grad_norm": 0.0423436239361763, "learning_rate": 1.9477034543309514e-07, "loss": 0.001, "step": 92730 }, { "epoch": 2.759585199291804, "grad_norm": 0.06059803068637848, "learning_rate": 1.9429215703844483e-07, "loss": 0.0013, "step": 92740 }, { "epoch": 2.7598827607754455, "grad_norm": 0.1311369240283966, "learning_rate": 1.938145447437556e-07, "loss": 0.0016, "step": 92750 }, { "epoch": 2.760180322259087, "grad_norm": 0.1350327581167221, "learning_rate": 1.933375086062833e-07, "loss": 0.0016, "step": 92760 }, { "epoch": 2.760477883742728, "grad_norm": 0.1994101107120514, "learning_rate": 1.9286104868321387e-07, "loss": 0.0022, "step": 92770 }, { "epoch": 2.7607754452263698, "grad_norm": 0.0639861524105072, "learning_rate": 1.9238516503166437e-07, "loss": 0.0016, "step": 92780 }, { "epoch": 2.7610730067100113, "grad_norm": 0.14206872880458832, "learning_rate": 1.919098577086831e-07, "loss": 0.0013, "step": 92790 }, { "epoch": 2.761370568193653, "grad_norm": 0.21101608872413635, "learning_rate": 1.9143512677124886e-07, "loss": 0.0013, "step": 92800 }, { "epoch": 2.7616681296772945, "grad_norm": 0.1461125761270523, "learning_rate": 1.909609722762723e-07, "loss": 0.0017, "step": 92810 }, { "epoch": 2.761965691160936, "grad_norm": 0.025080766528844833, "learning_rate": 1.9048739428059348e-07, "loss": 0.0053, "step": 92820 }, { "epoch": 2.7622632526445776, "grad_norm": 0.2090928852558136, "learning_rate": 1.9001439284098422e-07, "loss": 0.0021, "step": 92830 }, { "epoch": 2.762560814128219, "grad_norm": 0.07112585008144379, "learning_rate": 1.8954196801414694e-07, "loss": 0.0012, "step": 92840 }, { "epoch": 2.7628583756118608, "grad_norm": 0.11889955401420593, "learning_rate": 1.890701198567163e-07, "loss": 0.0016, "step": 92850 }, { "epoch": 2.7631559370955023, "grad_norm": 0.19703267514705658, "learning_rate": 1.885988484252549e-07, "loss": 0.0013, "step": 92860 }, { "epoch": 2.763453498579144, "grad_norm": 0.1269800066947937, "learning_rate": 1.8812815377625914e-07, "loss": 0.0015, "step": 92870 }, { "epoch": 2.7637510600627855, "grad_norm": 0.062122877687215805, "learning_rate": 1.8765803596615452e-07, "loss": 0.002, "step": 92880 }, { "epoch": 2.764048621546427, "grad_norm": 0.10018249601125717, "learning_rate": 1.8718849505129866e-07, "loss": 0.002, "step": 92890 }, { "epoch": 2.7643461830300686, "grad_norm": 0.045351557433605194, "learning_rate": 1.8671953108797825e-07, "loss": 0.0019, "step": 92900 }, { "epoch": 2.76464374451371, "grad_norm": 0.10569143295288086, "learning_rate": 1.8625114413241275e-07, "loss": 0.0015, "step": 92910 }, { "epoch": 2.7649413059973518, "grad_norm": 0.15368874371051788, "learning_rate": 1.8578333424075113e-07, "loss": 0.0014, "step": 92920 }, { "epoch": 2.7652388674809933, "grad_norm": 0.10194048285484314, "learning_rate": 1.8531610146907407e-07, "loss": 0.0011, "step": 92930 }, { "epoch": 2.765536428964635, "grad_norm": 0.10403253138065338, "learning_rate": 1.8484944587339236e-07, "loss": 0.0016, "step": 92940 }, { "epoch": 2.7658339904482765, "grad_norm": 0.030366264283657074, "learning_rate": 1.8438336750964736e-07, "loss": 0.0014, "step": 92950 }, { "epoch": 2.766131551931918, "grad_norm": 0.4500304162502289, "learning_rate": 1.839178664337121e-07, "loss": 0.0037, "step": 92960 }, { "epoch": 2.7664291134155596, "grad_norm": 0.14382414519786835, "learning_rate": 1.8345294270139148e-07, "loss": 0.0014, "step": 92970 }, { "epoch": 2.766726674899201, "grad_norm": 0.12400878965854645, "learning_rate": 1.8298859636841747e-07, "loss": 0.0012, "step": 92980 }, { "epoch": 2.7670242363828423, "grad_norm": 0.08696789294481277, "learning_rate": 1.8252482749045564e-07, "loss": 0.0015, "step": 92990 }, { "epoch": 2.767321797866484, "grad_norm": 0.09236936271190643, "learning_rate": 1.8206163612310312e-07, "loss": 0.0015, "step": 93000 }, { "epoch": 2.7676193593501255, "grad_norm": 0.07251813262701035, "learning_rate": 1.8159902232188553e-07, "loss": 0.0019, "step": 93010 }, { "epoch": 2.767916920833767, "grad_norm": 0.14323274791240692, "learning_rate": 1.8113698614226015e-07, "loss": 0.0005, "step": 93020 }, { "epoch": 2.7682144823174086, "grad_norm": 0.254336416721344, "learning_rate": 1.8067552763961438e-07, "loss": 0.0013, "step": 93030 }, { "epoch": 2.76851204380105, "grad_norm": 0.2788267731666565, "learning_rate": 1.8021464686926893e-07, "loss": 0.0026, "step": 93040 }, { "epoch": 2.7688096052846918, "grad_norm": 0.11970333755016327, "learning_rate": 1.797543438864724e-07, "loss": 0.0022, "step": 93050 }, { "epoch": 2.7691071667683334, "grad_norm": 0.1408693492412567, "learning_rate": 1.7929461874640452e-07, "loss": 0.0011, "step": 93060 }, { "epoch": 2.769404728251975, "grad_norm": 0.09242043644189835, "learning_rate": 1.7883547150417734e-07, "loss": 0.0008, "step": 93070 }, { "epoch": 2.7697022897356165, "grad_norm": 0.24431829154491425, "learning_rate": 1.7837690221483238e-07, "loss": 0.0025, "step": 93080 }, { "epoch": 2.769999851219258, "grad_norm": 0.036551620811223984, "learning_rate": 1.779189109333418e-07, "loss": 0.0018, "step": 93090 }, { "epoch": 2.7702974127028996, "grad_norm": 0.0665254145860672, "learning_rate": 1.7746149771460885e-07, "loss": 0.0016, "step": 93100 }, { "epoch": 2.770594974186541, "grad_norm": 0.22327470779418945, "learning_rate": 1.7700466261346748e-07, "loss": 0.0015, "step": 93110 }, { "epoch": 2.770892535670183, "grad_norm": 0.275913268327713, "learning_rate": 1.7654840568468335e-07, "loss": 0.0018, "step": 93120 }, { "epoch": 2.7711900971538244, "grad_norm": 0.21931393444538116, "learning_rate": 1.760927269829499e-07, "loss": 0.0015, "step": 93130 }, { "epoch": 2.771487658637466, "grad_norm": 0.10758709907531738, "learning_rate": 1.7563762656289397e-07, "loss": 0.001, "step": 93140 }, { "epoch": 2.7717852201211075, "grad_norm": 0.232185497879982, "learning_rate": 1.7518310447907304e-07, "loss": 0.0022, "step": 93150 }, { "epoch": 2.772082781604749, "grad_norm": 0.16612127423286438, "learning_rate": 1.7472916078597246e-07, "loss": 0.0027, "step": 93160 }, { "epoch": 2.7723803430883907, "grad_norm": 0.09684522449970245, "learning_rate": 1.74275795538012e-07, "loss": 0.0011, "step": 93170 }, { "epoch": 2.7726779045720322, "grad_norm": 0.12807205319404602, "learning_rate": 1.7382300878953983e-07, "loss": 0.0015, "step": 93180 }, { "epoch": 2.772975466055674, "grad_norm": 0.32821428775787354, "learning_rate": 1.7337080059483536e-07, "loss": 0.0021, "step": 93190 }, { "epoch": 2.7732730275393154, "grad_norm": 0.07305265963077545, "learning_rate": 1.729191710081074e-07, "loss": 0.0016, "step": 93200 }, { "epoch": 2.773570589022957, "grad_norm": 0.2615760564804077, "learning_rate": 1.7246812008349767e-07, "loss": 0.0023, "step": 93210 }, { "epoch": 2.7738681505065985, "grad_norm": 0.029593151062726974, "learning_rate": 1.7201764787507735e-07, "loss": 0.0015, "step": 93220 }, { "epoch": 2.77416571199024, "grad_norm": 0.17197102308273315, "learning_rate": 1.7156775443684824e-07, "loss": 0.0023, "step": 93230 }, { "epoch": 2.7744632734738817, "grad_norm": 0.04576312005519867, "learning_rate": 1.7111843982274223e-07, "loss": 0.0039, "step": 93240 }, { "epoch": 2.7747608349575232, "grad_norm": 0.01681853085756302, "learning_rate": 1.7066970408662287e-07, "loss": 0.001, "step": 93250 }, { "epoch": 2.775058396441165, "grad_norm": 0.10810018330812454, "learning_rate": 1.7022154728228323e-07, "loss": 0.002, "step": 93260 }, { "epoch": 2.7753559579248064, "grad_norm": 0.09914533793926239, "learning_rate": 1.6977396946344872e-07, "loss": 0.0016, "step": 93270 }, { "epoch": 2.775653519408448, "grad_norm": 0.03695652261376381, "learning_rate": 1.6932697068377358e-07, "loss": 0.0008, "step": 93280 }, { "epoch": 2.7759510808920895, "grad_norm": 0.09965166449546814, "learning_rate": 1.688805509968422e-07, "loss": 0.0019, "step": 93290 }, { "epoch": 2.776248642375731, "grad_norm": 0.07971812039613724, "learning_rate": 1.684347104561723e-07, "loss": 0.0017, "step": 93300 }, { "epoch": 2.7765462038593727, "grad_norm": 0.3147484362125397, "learning_rate": 1.6798944911520953e-07, "loss": 0.0012, "step": 93310 }, { "epoch": 2.7768437653430142, "grad_norm": 0.21071377396583557, "learning_rate": 1.6754476702733114e-07, "loss": 0.0009, "step": 93320 }, { "epoch": 2.777141326826656, "grad_norm": 0.22412095963954926, "learning_rate": 1.6710066424584505e-07, "loss": 0.0013, "step": 93330 }, { "epoch": 2.777438888310297, "grad_norm": 0.24463625252246857, "learning_rate": 1.666571408239903e-07, "loss": 0.0014, "step": 93340 }, { "epoch": 2.7777364497939385, "grad_norm": 0.0613744892179966, "learning_rate": 1.662141968149339e-07, "loss": 0.0021, "step": 93350 }, { "epoch": 2.77803401127758, "grad_norm": 0.24871273338794708, "learning_rate": 1.6577183227177662e-07, "loss": 0.0025, "step": 93360 }, { "epoch": 2.7783315727612217, "grad_norm": 0.17414379119873047, "learning_rate": 1.653300472475472e-07, "loss": 0.0011, "step": 93370 }, { "epoch": 2.7786291342448632, "grad_norm": 0.051605820655822754, "learning_rate": 1.6488884179520826e-07, "loss": 0.0021, "step": 93380 }, { "epoch": 2.778926695728505, "grad_norm": 0.06874902546405792, "learning_rate": 1.6444821596764803e-07, "loss": 0.0016, "step": 93390 }, { "epoch": 2.7792242572121464, "grad_norm": 0.11176943778991699, "learning_rate": 1.6400816981768985e-07, "loss": 0.0013, "step": 93400 }, { "epoch": 2.779521818695788, "grad_norm": 0.2831631004810333, "learning_rate": 1.635687033980843e-07, "loss": 0.0012, "step": 93410 }, { "epoch": 2.7798193801794295, "grad_norm": 0.17257727682590485, "learning_rate": 1.6312981676151584e-07, "loss": 0.0024, "step": 93420 }, { "epoch": 2.780116941663071, "grad_norm": 0.11031429469585419, "learning_rate": 1.6269150996059524e-07, "loss": 0.0012, "step": 93430 }, { "epoch": 2.7804145031467127, "grad_norm": 0.0351610966026783, "learning_rate": 1.6225378304786766e-07, "loss": 0.0013, "step": 93440 }, { "epoch": 2.7807120646303543, "grad_norm": 0.15021932125091553, "learning_rate": 1.6181663607580556e-07, "loss": 0.001, "step": 93450 }, { "epoch": 2.781009626113996, "grad_norm": 0.039877478033304214, "learning_rate": 1.613800690968148e-07, "loss": 0.0016, "step": 93460 }, { "epoch": 2.7813071875976374, "grad_norm": 0.2557045519351959, "learning_rate": 1.6094408216322965e-07, "loss": 0.0018, "step": 93470 }, { "epoch": 2.781604749081279, "grad_norm": 0.09465055912733078, "learning_rate": 1.6050867532731496e-07, "loss": 0.001, "step": 93480 }, { "epoch": 2.7819023105649205, "grad_norm": 0.13585786521434784, "learning_rate": 1.600738486412673e-07, "loss": 0.0011, "step": 93490 }, { "epoch": 2.782199872048562, "grad_norm": 0.19349806010723114, "learning_rate": 1.5963960215721276e-07, "loss": 0.0017, "step": 93500 }, { "epoch": 2.7824974335322037, "grad_norm": 0.13009898364543915, "learning_rate": 1.59205935927208e-07, "loss": 0.001, "step": 93510 }, { "epoch": 2.7827949950158453, "grad_norm": 0.09729001671075821, "learning_rate": 1.5877285000323982e-07, "loss": 0.0015, "step": 93520 }, { "epoch": 2.783092556499487, "grad_norm": 0.13076774775981903, "learning_rate": 1.5834034443722669e-07, "loss": 0.0014, "step": 93530 }, { "epoch": 2.7833901179831284, "grad_norm": 0.07489734143018723, "learning_rate": 1.579084192810154e-07, "loss": 0.0017, "step": 93540 }, { "epoch": 2.78368767946677, "grad_norm": 0.08063627034425735, "learning_rate": 1.5747707458638516e-07, "loss": 0.0014, "step": 93550 }, { "epoch": 2.783985240950411, "grad_norm": 0.20669345557689667, "learning_rate": 1.5704631040504515e-07, "loss": 0.002, "step": 93560 }, { "epoch": 2.7842828024340527, "grad_norm": 0.3838927745819092, "learning_rate": 1.566161267886346e-07, "loss": 0.0027, "step": 93570 }, { "epoch": 2.7845803639176943, "grad_norm": 0.27536988258361816, "learning_rate": 1.5618652378872223e-07, "loss": 0.0012, "step": 93580 }, { "epoch": 2.784877925401336, "grad_norm": 0.21918900310993195, "learning_rate": 1.5575750145680802e-07, "loss": 0.0024, "step": 93590 }, { "epoch": 2.7851754868849774, "grad_norm": 0.17089533805847168, "learning_rate": 1.553290598443241e-07, "loss": 0.0016, "step": 93600 }, { "epoch": 2.785473048368619, "grad_norm": 0.14384815096855164, "learning_rate": 1.5490119900263e-07, "loss": 0.0015, "step": 93610 }, { "epoch": 2.7857706098522605, "grad_norm": 0.0838296115398407, "learning_rate": 1.544739189830169e-07, "loss": 0.0015, "step": 93620 }, { "epoch": 2.786068171335902, "grad_norm": 0.22044284641742706, "learning_rate": 1.5404721983670712e-07, "loss": 0.0011, "step": 93630 }, { "epoch": 2.7863657328195437, "grad_norm": 0.03624308109283447, "learning_rate": 1.5362110161485255e-07, "loss": 0.0014, "step": 93640 }, { "epoch": 2.7866632943031853, "grad_norm": 0.09767948091030121, "learning_rate": 1.5319556436853456e-07, "loss": 0.0017, "step": 93650 }, { "epoch": 2.786960855786827, "grad_norm": 0.2911722660064697, "learning_rate": 1.5277060814876677e-07, "loss": 0.0032, "step": 93660 }, { "epoch": 2.7872584172704684, "grad_norm": 0.11244449764490128, "learning_rate": 1.5234623300649177e-07, "loss": 0.0018, "step": 93670 }, { "epoch": 2.78755597875411, "grad_norm": 0.06848479062318802, "learning_rate": 1.519224389925833e-07, "loss": 0.0017, "step": 93680 }, { "epoch": 2.7878535402377516, "grad_norm": 0.0977097749710083, "learning_rate": 1.5149922615784461e-07, "loss": 0.0017, "step": 93690 }, { "epoch": 2.788151101721393, "grad_norm": 0.009604335762560368, "learning_rate": 1.5107659455301016e-07, "loss": 0.0013, "step": 93700 }, { "epoch": 2.7884486632050347, "grad_norm": 0.1254822164773941, "learning_rate": 1.5065454422874327e-07, "loss": 0.0018, "step": 93710 }, { "epoch": 2.7887462246886763, "grad_norm": 0.06904809921979904, "learning_rate": 1.5023307523564012e-07, "loss": 0.0014, "step": 93720 }, { "epoch": 2.789043786172318, "grad_norm": 0.24371559917926788, "learning_rate": 1.4981218762422478e-07, "loss": 0.0021, "step": 93730 }, { "epoch": 2.7893413476559594, "grad_norm": 0.11428379267454147, "learning_rate": 1.4939188144495243e-07, "loss": 0.0013, "step": 93740 }, { "epoch": 2.789638909139601, "grad_norm": 0.07270479202270508, "learning_rate": 1.4897215674820887e-07, "loss": 0.0013, "step": 93750 }, { "epoch": 2.7899364706232426, "grad_norm": 0.009932298213243484, "learning_rate": 1.4855301358431053e-07, "loss": 0.001, "step": 93760 }, { "epoch": 2.790234032106884, "grad_norm": 0.04943379387259483, "learning_rate": 1.4813445200350274e-07, "loss": 0.001, "step": 93770 }, { "epoch": 2.7905315935905257, "grad_norm": 0.21727924048900604, "learning_rate": 1.4771647205596208e-07, "loss": 0.0016, "step": 93780 }, { "epoch": 2.7908291550741673, "grad_norm": 0.26787522435188293, "learning_rate": 1.4729907379179565e-07, "loss": 0.0023, "step": 93790 }, { "epoch": 2.791126716557809, "grad_norm": 0.10575583577156067, "learning_rate": 1.4688225726104066e-07, "loss": 0.0015, "step": 93800 }, { "epoch": 2.7914242780414504, "grad_norm": 0.07648095488548279, "learning_rate": 1.4646602251366326e-07, "loss": 0.0011, "step": 93810 }, { "epoch": 2.791721839525092, "grad_norm": 0.12544170022010803, "learning_rate": 1.4605036959956132e-07, "loss": 0.0032, "step": 93820 }, { "epoch": 2.7920194010087336, "grad_norm": 0.0746292844414711, "learning_rate": 1.4563529856856328e-07, "loss": 0.0012, "step": 93830 }, { "epoch": 2.792316962492375, "grad_norm": 0.2054356187582016, "learning_rate": 1.4522080947042717e-07, "loss": 0.002, "step": 93840 }, { "epoch": 2.7926145239760167, "grad_norm": 0.08910191059112549, "learning_rate": 1.4480690235484041e-07, "loss": 0.0023, "step": 93850 }, { "epoch": 2.7929120854596583, "grad_norm": 0.09126259386539459, "learning_rate": 1.4439357727142167e-07, "loss": 0.0078, "step": 93860 }, { "epoch": 2.7932096469433, "grad_norm": 0.14598502218723297, "learning_rate": 1.439808342697202e-07, "loss": 0.0023, "step": 93870 }, { "epoch": 2.7935072084269414, "grad_norm": 0.10946004837751389, "learning_rate": 1.4356867339921365e-07, "loss": 0.0007, "step": 93880 }, { "epoch": 2.793804769910583, "grad_norm": 0.1339552253484726, "learning_rate": 1.431570947093125e-07, "loss": 0.0016, "step": 93890 }, { "epoch": 2.7941023313942246, "grad_norm": 0.009549723006784916, "learning_rate": 1.4274609824935558e-07, "loss": 0.0029, "step": 93900 }, { "epoch": 2.794399892877866, "grad_norm": 0.0472363717854023, "learning_rate": 1.4233568406861354e-07, "loss": 0.0025, "step": 93910 }, { "epoch": 2.7946974543615073, "grad_norm": 0.18040403723716736, "learning_rate": 1.419258522162842e-07, "loss": 0.0012, "step": 93920 }, { "epoch": 2.794995015845149, "grad_norm": 0.08603351563215256, "learning_rate": 1.4151660274149825e-07, "loss": 0.0015, "step": 93930 }, { "epoch": 2.7952925773287904, "grad_norm": 0.053910091519355774, "learning_rate": 1.4110793569331648e-07, "loss": 0.0073, "step": 93940 }, { "epoch": 2.795590138812432, "grad_norm": 0.13176266849040985, "learning_rate": 1.4069985112072858e-07, "loss": 0.0012, "step": 93950 }, { "epoch": 2.7958877002960736, "grad_norm": 0.13506631553173065, "learning_rate": 1.4029234907265488e-07, "loss": 0.0015, "step": 93960 }, { "epoch": 2.796185261779715, "grad_norm": 0.106285959482193, "learning_rate": 1.3988542959794627e-07, "loss": 0.0014, "step": 93970 }, { "epoch": 2.7964828232633567, "grad_norm": 0.02200057916343212, "learning_rate": 1.3947909274538374e-07, "loss": 0.001, "step": 93980 }, { "epoch": 2.7967803847469983, "grad_norm": 0.02587658166885376, "learning_rate": 1.3907333856367777e-07, "loss": 0.0028, "step": 93990 }, { "epoch": 2.79707794623064, "grad_norm": 0.1575751155614853, "learning_rate": 1.3866816710146947e-07, "loss": 0.0017, "step": 94000 }, { "epoch": 2.7973755077142815, "grad_norm": 0.2659681737422943, "learning_rate": 1.382635784073305e-07, "loss": 0.0015, "step": 94010 }, { "epoch": 2.797673069197923, "grad_norm": 0.02443564683198929, "learning_rate": 1.3785957252976268e-07, "loss": 0.0007, "step": 94020 }, { "epoch": 2.7979706306815646, "grad_norm": 0.2206445336341858, "learning_rate": 1.3745614951719664e-07, "loss": 0.0018, "step": 94030 }, { "epoch": 2.798268192165206, "grad_norm": 0.2418651133775711, "learning_rate": 1.3705330941799433e-07, "loss": 0.0018, "step": 94040 }, { "epoch": 2.7985657536488477, "grad_norm": 0.006039755418896675, "learning_rate": 1.3665105228044705e-07, "loss": 0.0019, "step": 94050 }, { "epoch": 2.7988633151324893, "grad_norm": 0.2702276110649109, "learning_rate": 1.362493781527785e-07, "loss": 0.001, "step": 94060 }, { "epoch": 2.799160876616131, "grad_norm": 0.038783274590969086, "learning_rate": 1.3584828708313848e-07, "loss": 0.0015, "step": 94070 }, { "epoch": 2.7994584380997725, "grad_norm": 0.07716487348079681, "learning_rate": 1.354477791196096e-07, "loss": 0.0013, "step": 94080 }, { "epoch": 2.799755999583414, "grad_norm": 0.114692322909832, "learning_rate": 1.3504785431020518e-07, "loss": 0.0011, "step": 94090 }, { "epoch": 2.8000535610670556, "grad_norm": 0.34637218713760376, "learning_rate": 1.3464851270286683e-07, "loss": 0.0027, "step": 94100 }, { "epoch": 2.800351122550697, "grad_norm": 0.24664270877838135, "learning_rate": 1.342497543454663e-07, "loss": 0.0012, "step": 94110 }, { "epoch": 2.8006486840343388, "grad_norm": 0.10560561716556549, "learning_rate": 1.3385157928580695e-07, "loss": 0.0019, "step": 94120 }, { "epoch": 2.80094624551798, "grad_norm": 0.2407151162624359, "learning_rate": 1.334539875716212e-07, "loss": 0.0021, "step": 94130 }, { "epoch": 2.8012438070016215, "grad_norm": 0.14080287516117096, "learning_rate": 1.3305697925057148e-07, "loss": 0.0009, "step": 94140 }, { "epoch": 2.801541368485263, "grad_norm": 0.07964939624071121, "learning_rate": 1.3266055437025027e-07, "loss": 0.0015, "step": 94150 }, { "epoch": 2.8018389299689046, "grad_norm": 0.05336417257785797, "learning_rate": 1.322647129781801e-07, "loss": 0.0009, "step": 94160 }, { "epoch": 2.802136491452546, "grad_norm": 0.06302489340305328, "learning_rate": 1.318694551218147e-07, "loss": 0.0016, "step": 94170 }, { "epoch": 2.8024340529361877, "grad_norm": 0.13723066449165344, "learning_rate": 1.3147478084853617e-07, "loss": 0.0019, "step": 94180 }, { "epoch": 2.8027316144198293, "grad_norm": 0.09141972661018372, "learning_rate": 1.3108069020565772e-07, "loss": 0.0006, "step": 94190 }, { "epoch": 2.803029175903471, "grad_norm": 0.06118553504347801, "learning_rate": 1.306871832404222e-07, "loss": 0.0015, "step": 94200 }, { "epoch": 2.8033267373871125, "grad_norm": 0.21606144309043884, "learning_rate": 1.3029426000000233e-07, "loss": 0.0017, "step": 94210 }, { "epoch": 2.803624298870754, "grad_norm": 0.08130314946174622, "learning_rate": 1.2990192053150107e-07, "loss": 0.0016, "step": 94220 }, { "epoch": 2.8039218603543956, "grad_norm": 0.06769701838493347, "learning_rate": 1.2951016488195134e-07, "loss": 0.0006, "step": 94230 }, { "epoch": 2.804219421838037, "grad_norm": 0.23055711388587952, "learning_rate": 1.2911899309831666e-07, "loss": 0.0016, "step": 94240 }, { "epoch": 2.8045169833216788, "grad_norm": 0.16181008517742157, "learning_rate": 1.2872840522749007e-07, "loss": 0.0013, "step": 94250 }, { "epoch": 2.8048145448053203, "grad_norm": 0.3650515377521515, "learning_rate": 1.2833840131629305e-07, "loss": 0.0035, "step": 94260 }, { "epoch": 2.805112106288962, "grad_norm": 0.13310596346855164, "learning_rate": 1.279489814114804e-07, "loss": 0.0014, "step": 94270 }, { "epoch": 2.8054096677726035, "grad_norm": 0.24677859246730804, "learning_rate": 1.2756014555973473e-07, "loss": 0.0033, "step": 94280 }, { "epoch": 2.805707229256245, "grad_norm": 0.26387882232666016, "learning_rate": 1.2717189380766826e-07, "loss": 0.0021, "step": 94290 }, { "epoch": 2.8060047907398866, "grad_norm": 0.20950475335121155, "learning_rate": 1.267842262018243e-07, "loss": 0.0013, "step": 94300 }, { "epoch": 2.806302352223528, "grad_norm": 0.04513464868068695, "learning_rate": 1.2639714278867566e-07, "loss": 0.0008, "step": 94310 }, { "epoch": 2.8065999137071698, "grad_norm": 0.11591077595949173, "learning_rate": 1.2601064361462633e-07, "loss": 0.0012, "step": 94320 }, { "epoch": 2.8068974751908113, "grad_norm": 0.06950988620519638, "learning_rate": 1.2562472872600706e-07, "loss": 0.0014, "step": 94330 }, { "epoch": 2.807195036674453, "grad_norm": 0.056017983704805374, "learning_rate": 1.2523939816908248e-07, "loss": 0.0011, "step": 94340 }, { "epoch": 2.8074925981580945, "grad_norm": 0.4769270122051239, "learning_rate": 1.2485465199004453e-07, "loss": 0.0034, "step": 94350 }, { "epoch": 2.807790159641736, "grad_norm": 0.046500492841005325, "learning_rate": 1.244704902350169e-07, "loss": 0.0009, "step": 94360 }, { "epoch": 2.8080877211253776, "grad_norm": 0.10589776933193207, "learning_rate": 1.240869129500505e-07, "loss": 0.0014, "step": 94370 }, { "epoch": 2.808385282609019, "grad_norm": 0.2696363925933838, "learning_rate": 1.2370392018112908e-07, "loss": 0.002, "step": 94380 }, { "epoch": 2.808682844092661, "grad_norm": 0.2408321499824524, "learning_rate": 1.2332151197416486e-07, "loss": 0.0011, "step": 94390 }, { "epoch": 2.8089804055763024, "grad_norm": 0.11262450367212296, "learning_rate": 1.2293968837500115e-07, "loss": 0.0021, "step": 94400 }, { "epoch": 2.809277967059944, "grad_norm": 0.08407482504844666, "learning_rate": 1.225584494294091e-07, "loss": 0.0013, "step": 94410 }, { "epoch": 2.8095755285435855, "grad_norm": 0.09818500280380249, "learning_rate": 1.2217779518309104e-07, "loss": 0.0021, "step": 94420 }, { "epoch": 2.809873090027227, "grad_norm": 0.4338296949863434, "learning_rate": 1.2179772568167936e-07, "loss": 0.0023, "step": 94430 }, { "epoch": 2.8101706515108686, "grad_norm": 0.0666644498705864, "learning_rate": 1.214182409707365e-07, "loss": 0.0008, "step": 94440 }, { "epoch": 2.81046821299451, "grad_norm": 0.11211755871772766, "learning_rate": 1.2103934109575445e-07, "loss": 0.0013, "step": 94450 }, { "epoch": 2.810765774478152, "grad_norm": 0.13246563076972961, "learning_rate": 1.20661026102154e-07, "loss": 0.0011, "step": 94460 }, { "epoch": 2.8110633359617934, "grad_norm": 0.06363343447446823, "learning_rate": 1.20283296035289e-07, "loss": 0.0019, "step": 94470 }, { "epoch": 2.811360897445435, "grad_norm": 0.08867818117141724, "learning_rate": 1.1990615094043922e-07, "loss": 0.001, "step": 94480 }, { "epoch": 2.811658458929076, "grad_norm": 0.19181042909622192, "learning_rate": 1.1952959086281635e-07, "loss": 0.0018, "step": 94490 }, { "epoch": 2.8119560204127176, "grad_norm": 0.12374851107597351, "learning_rate": 1.1915361584756257e-07, "loss": 0.0015, "step": 94500 }, { "epoch": 2.812253581896359, "grad_norm": 0.14745187759399414, "learning_rate": 1.1877822593974908e-07, "loss": 0.001, "step": 94510 }, { "epoch": 2.812551143380001, "grad_norm": 0.0034382292069494724, "learning_rate": 1.1840342118437597e-07, "loss": 0.0021, "step": 94520 }, { "epoch": 2.8128487048636424, "grad_norm": 0.08683836460113525, "learning_rate": 1.180292016263751e-07, "loss": 0.0022, "step": 94530 }, { "epoch": 2.813146266347284, "grad_norm": 0.21773287653923035, "learning_rate": 1.1765556731060667e-07, "loss": 0.0019, "step": 94540 }, { "epoch": 2.8134438278309255, "grad_norm": 0.281141459941864, "learning_rate": 1.172825182818621e-07, "loss": 0.0018, "step": 94550 }, { "epoch": 2.813741389314567, "grad_norm": 0.06309189647436142, "learning_rate": 1.1691005458486115e-07, "loss": 0.0017, "step": 94560 }, { "epoch": 2.8140389507982086, "grad_norm": 0.054955191910266876, "learning_rate": 1.1653817626425423e-07, "loss": 0.0024, "step": 94570 }, { "epoch": 2.8143365122818502, "grad_norm": 0.3149804472923279, "learning_rate": 1.1616688336462122e-07, "loss": 0.0015, "step": 94580 }, { "epoch": 2.814634073765492, "grad_norm": 0.27363136410713196, "learning_rate": 1.1579617593047377e-07, "loss": 0.0018, "step": 94590 }, { "epoch": 2.8149316352491334, "grad_norm": 0.17154628038406372, "learning_rate": 1.1542605400624907e-07, "loss": 0.001, "step": 94600 }, { "epoch": 2.815229196732775, "grad_norm": 0.0737161710858345, "learning_rate": 1.1505651763631831e-07, "loss": 0.0016, "step": 94610 }, { "epoch": 2.8155267582164165, "grad_norm": 0.01949889399111271, "learning_rate": 1.146875668649805e-07, "loss": 0.0009, "step": 94620 }, { "epoch": 2.815824319700058, "grad_norm": 0.1728953868150711, "learning_rate": 1.1431920173646527e-07, "loss": 0.0015, "step": 94630 }, { "epoch": 2.8161218811836997, "grad_norm": 0.18487229943275452, "learning_rate": 1.1395142229493116e-07, "loss": 0.0028, "step": 94640 }, { "epoch": 2.8164194426673412, "grad_norm": 0.15504615008831024, "learning_rate": 1.1358422858446627e-07, "loss": 0.0021, "step": 94650 }, { "epoch": 2.816717004150983, "grad_norm": 0.10289376229047775, "learning_rate": 1.1321762064909036e-07, "loss": 0.0027, "step": 94660 }, { "epoch": 2.8170145656346244, "grad_norm": 0.17134937644004822, "learning_rate": 1.1285159853275107e-07, "loss": 0.002, "step": 94670 }, { "epoch": 2.817312127118266, "grad_norm": 0.061630237847566605, "learning_rate": 1.1248616227932662e-07, "loss": 0.0016, "step": 94680 }, { "epoch": 2.8176096886019075, "grad_norm": 0.19465766847133636, "learning_rate": 1.1212131193262477e-07, "loss": 0.0029, "step": 94690 }, { "epoch": 2.8179072500855487, "grad_norm": 0.08325131237506866, "learning_rate": 1.1175704753638384e-07, "loss": 0.0013, "step": 94700 }, { "epoch": 2.8182048115691902, "grad_norm": 0.01810467429459095, "learning_rate": 1.1139336913427001e-07, "loss": 0.0014, "step": 94710 }, { "epoch": 2.818502373052832, "grad_norm": 0.200151264667511, "learning_rate": 1.1103027676988121e-07, "loss": 0.002, "step": 94720 }, { "epoch": 2.8187999345364734, "grad_norm": 0.04163987934589386, "learning_rate": 1.1066777048674426e-07, "loss": 0.0018, "step": 94730 }, { "epoch": 2.819097496020115, "grad_norm": 0.11063625663518906, "learning_rate": 1.1030585032831609e-07, "loss": 0.0018, "step": 94740 }, { "epoch": 2.8193950575037565, "grad_norm": 0.06948627531528473, "learning_rate": 1.0994451633798198e-07, "loss": 0.0016, "step": 94750 }, { "epoch": 2.819692618987398, "grad_norm": 0.11427869647741318, "learning_rate": 1.0958376855905894e-07, "loss": 0.0013, "step": 94760 }, { "epoch": 2.8199901804710397, "grad_norm": 0.0977742150425911, "learning_rate": 1.0922360703479295e-07, "loss": 0.0019, "step": 94770 }, { "epoch": 2.8202877419546812, "grad_norm": 0.09136021882295609, "learning_rate": 1.0886403180835891e-07, "loss": 0.001, "step": 94780 }, { "epoch": 2.820585303438323, "grad_norm": 0.26846951246261597, "learning_rate": 1.0850504292286235e-07, "loss": 0.0018, "step": 94790 }, { "epoch": 2.8208828649219644, "grad_norm": 0.17914794385433197, "learning_rate": 1.0814664042133826e-07, "loss": 0.0016, "step": 94800 }, { "epoch": 2.821180426405606, "grad_norm": 0.014109738171100616, "learning_rate": 1.0778882434675175e-07, "loss": 0.003, "step": 94810 }, { "epoch": 2.8214779878892475, "grad_norm": 0.09868408739566803, "learning_rate": 1.0743159474199627e-07, "loss": 0.0019, "step": 94820 }, { "epoch": 2.821775549372889, "grad_norm": 0.19794736802577972, "learning_rate": 1.0707495164989645e-07, "loss": 0.0035, "step": 94830 }, { "epoch": 2.8220731108565307, "grad_norm": 0.3414178490638733, "learning_rate": 1.0671889511320588e-07, "loss": 0.0024, "step": 94840 }, { "epoch": 2.8223706723401722, "grad_norm": 0.21031221747398376, "learning_rate": 1.0636342517460874e-07, "loss": 0.002, "step": 94850 }, { "epoch": 2.822668233823814, "grad_norm": 0.13462577760219574, "learning_rate": 1.060085418767165e-07, "loss": 0.0016, "step": 94860 }, { "epoch": 2.8229657953074554, "grad_norm": 0.09652023762464523, "learning_rate": 1.0565424526207347e-07, "loss": 0.0017, "step": 94870 }, { "epoch": 2.823263356791097, "grad_norm": 0.13287173211574554, "learning_rate": 1.0530053537315121e-07, "loss": 0.0036, "step": 94880 }, { "epoch": 2.8235609182747385, "grad_norm": 0.25089383125305176, "learning_rate": 1.0494741225235305e-07, "loss": 0.0015, "step": 94890 }, { "epoch": 2.82385847975838, "grad_norm": 0.0775090903043747, "learning_rate": 1.0459487594200956e-07, "loss": 0.001, "step": 94900 }, { "epoch": 2.8241560412420217, "grad_norm": 0.09513486176729202, "learning_rate": 1.0424292648438194e-07, "loss": 0.0015, "step": 94910 }, { "epoch": 2.8244536027256633, "grad_norm": 0.10120712965726852, "learning_rate": 1.0389156392166255e-07, "loss": 0.0018, "step": 94920 }, { "epoch": 2.824751164209305, "grad_norm": 0.10437998920679092, "learning_rate": 1.0354078829597214e-07, "loss": 0.0019, "step": 94930 }, { "epoch": 2.8250487256929464, "grad_norm": 0.11732812225818634, "learning_rate": 1.031905996493593e-07, "loss": 0.0015, "step": 94940 }, { "epoch": 2.825346287176588, "grad_norm": 0.11977526545524597, "learning_rate": 1.0284099802380543e-07, "loss": 0.0016, "step": 94950 }, { "epoch": 2.8256438486602296, "grad_norm": 0.1237301155924797, "learning_rate": 1.0249198346122035e-07, "loss": 0.002, "step": 94960 }, { "epoch": 2.825941410143871, "grad_norm": 0.08712632209062576, "learning_rate": 1.0214355600344284e-07, "loss": 0.0033, "step": 94970 }, { "epoch": 2.8262389716275127, "grad_norm": 0.17498460412025452, "learning_rate": 1.0179571569224167e-07, "loss": 0.0021, "step": 94980 }, { "epoch": 2.8265365331111543, "grad_norm": 0.127183735370636, "learning_rate": 1.0144846256931573e-07, "loss": 0.0022, "step": 94990 }, { "epoch": 2.826834094594796, "grad_norm": 0.0848400890827179, "learning_rate": 1.0110179667629283e-07, "loss": 0.0012, "step": 95000 }, { "epoch": 2.8271316560784374, "grad_norm": 0.12030021101236343, "learning_rate": 1.0075571805473028e-07, "loss": 0.0015, "step": 95010 }, { "epoch": 2.827429217562079, "grad_norm": 0.6767396926879883, "learning_rate": 1.0041022674611655e-07, "loss": 0.0024, "step": 95020 }, { "epoch": 2.8277267790457206, "grad_norm": 0.11252082884311676, "learning_rate": 1.0006532279186743e-07, "loss": 0.0018, "step": 95030 }, { "epoch": 2.828024340529362, "grad_norm": 0.03339564427733421, "learning_rate": 9.972100623333036e-08, "loss": 0.0018, "step": 95040 }, { "epoch": 2.8283219020130037, "grad_norm": 0.05061882734298706, "learning_rate": 9.937727711178013e-08, "loss": 0.002, "step": 95050 }, { "epoch": 2.828619463496645, "grad_norm": 0.057182252407073975, "learning_rate": 9.903413546842378e-08, "loss": 0.001, "step": 95060 }, { "epoch": 2.8289170249802864, "grad_norm": 0.13245800137519836, "learning_rate": 9.869158134439616e-08, "loss": 0.0015, "step": 95070 }, { "epoch": 2.829214586463928, "grad_norm": 0.09114214032888412, "learning_rate": 9.834961478076111e-08, "loss": 0.0015, "step": 95080 }, { "epoch": 2.8295121479475696, "grad_norm": 0.1281479448080063, "learning_rate": 9.800823581851415e-08, "loss": 0.0027, "step": 95090 }, { "epoch": 2.829809709431211, "grad_norm": 0.011059718206524849, "learning_rate": 9.766744449857867e-08, "loss": 0.0081, "step": 95100 }, { "epoch": 2.8301072709148527, "grad_norm": 0.036946047097444534, "learning_rate": 9.732724086180812e-08, "loss": 0.0013, "step": 95110 }, { "epoch": 2.8304048323984943, "grad_norm": 0.307676762342453, "learning_rate": 9.698762494898594e-08, "loss": 0.0016, "step": 95120 }, { "epoch": 2.830702393882136, "grad_norm": 0.13353942334651947, "learning_rate": 9.664859680082406e-08, "loss": 0.0013, "step": 95130 }, { "epoch": 2.8309999553657774, "grad_norm": 0.06616052985191345, "learning_rate": 9.631015645796494e-08, "loss": 0.0016, "step": 95140 }, { "epoch": 2.831297516849419, "grad_norm": 0.19417725503444672, "learning_rate": 9.597230396098057e-08, "loss": 0.0015, "step": 95150 }, { "epoch": 2.8315950783330606, "grad_norm": 0.09410964697599411, "learning_rate": 9.563503935037133e-08, "loss": 0.0018, "step": 95160 }, { "epoch": 2.831892639816702, "grad_norm": 0.21570657193660736, "learning_rate": 9.529836266656878e-08, "loss": 0.0018, "step": 95170 }, { "epoch": 2.8321902013003437, "grad_norm": 0.06448985636234283, "learning_rate": 9.49622739499323e-08, "loss": 0.0006, "step": 95180 }, { "epoch": 2.8324877627839853, "grad_norm": 0.0682416632771492, "learning_rate": 9.462677324075243e-08, "loss": 0.001, "step": 95190 }, { "epoch": 2.832785324267627, "grad_norm": 0.07341783493757248, "learning_rate": 9.42918605792481e-08, "loss": 0.0016, "step": 95200 }, { "epoch": 2.8330828857512684, "grad_norm": 0.07927963137626648, "learning_rate": 9.395753600556723e-08, "loss": 0.001, "step": 95210 }, { "epoch": 2.83338044723491, "grad_norm": 0.17377923429012299, "learning_rate": 9.36237995597894e-08, "loss": 0.0015, "step": 95220 }, { "epoch": 2.8336780087185516, "grad_norm": 0.18912048637866974, "learning_rate": 9.329065128192205e-08, "loss": 0.0014, "step": 95230 }, { "epoch": 2.833975570202193, "grad_norm": 0.14280495047569275, "learning_rate": 9.295809121190157e-08, "loss": 0.001, "step": 95240 }, { "epoch": 2.8342731316858347, "grad_norm": 0.06308969110250473, "learning_rate": 9.262611938959554e-08, "loss": 0.0022, "step": 95250 }, { "epoch": 2.8345706931694763, "grad_norm": 0.09972751885652542, "learning_rate": 9.229473585479987e-08, "loss": 0.0028, "step": 95260 }, { "epoch": 2.8348682546531174, "grad_norm": 0.16676801443099976, "learning_rate": 9.196394064724057e-08, "loss": 0.0028, "step": 95270 }, { "epoch": 2.835165816136759, "grad_norm": 0.1062258705496788, "learning_rate": 9.163373380657259e-08, "loss": 0.0013, "step": 95280 }, { "epoch": 2.8354633776204006, "grad_norm": 0.10831230133771896, "learning_rate": 9.130411537237927e-08, "loss": 0.0015, "step": 95290 }, { "epoch": 2.835760939104042, "grad_norm": 0.30773013830184937, "learning_rate": 9.097508538417732e-08, "loss": 0.0038, "step": 95300 }, { "epoch": 2.8360585005876837, "grad_norm": 0.17355670034885406, "learning_rate": 9.064664388140799e-08, "loss": 0.0015, "step": 95310 }, { "epoch": 2.8363560620713253, "grad_norm": 0.10352009534835815, "learning_rate": 9.031879090344587e-08, "loss": 0.0018, "step": 95320 }, { "epoch": 2.836653623554967, "grad_norm": 0.16099783778190613, "learning_rate": 8.999152648959231e-08, "loss": 0.0024, "step": 95330 }, { "epoch": 2.8369511850386084, "grad_norm": 0.1453939527273178, "learning_rate": 8.966485067907981e-08, "loss": 0.0026, "step": 95340 }, { "epoch": 2.83724874652225, "grad_norm": 0.014670480042696, "learning_rate": 8.93387635110693e-08, "loss": 0.0011, "step": 95350 }, { "epoch": 2.8375463080058916, "grad_norm": 0.18811233341693878, "learning_rate": 8.901326502465169e-08, "loss": 0.0015, "step": 95360 }, { "epoch": 2.837843869489533, "grad_norm": 0.18690313398838043, "learning_rate": 8.868835525884689e-08, "loss": 0.0015, "step": 95370 }, { "epoch": 2.8381414309731747, "grad_norm": 0.1411653608083725, "learning_rate": 8.836403425260598e-08, "loss": 0.0017, "step": 95380 }, { "epoch": 2.8384389924568163, "grad_norm": 0.1165364533662796, "learning_rate": 8.804030204480619e-08, "loss": 0.0016, "step": 95390 }, { "epoch": 2.838736553940458, "grad_norm": 0.10791807621717453, "learning_rate": 8.771715867425646e-08, "loss": 0.0017, "step": 95400 }, { "epoch": 2.8390341154240994, "grad_norm": 0.3738373816013336, "learning_rate": 8.73946041796947e-08, "loss": 0.0014, "step": 95410 }, { "epoch": 2.839331676907741, "grad_norm": 0.04916995018720627, "learning_rate": 8.707263859978943e-08, "loss": 0.0006, "step": 95420 }, { "epoch": 2.8396292383913826, "grad_norm": 0.045286938548088074, "learning_rate": 8.675126197313532e-08, "loss": 0.0009, "step": 95430 }, { "epoch": 2.839926799875024, "grad_norm": 0.037529636174440384, "learning_rate": 8.643047433825935e-08, "loss": 0.0012, "step": 95440 }, { "epoch": 2.8402243613586657, "grad_norm": 0.04628941789269447, "learning_rate": 8.61102757336174e-08, "loss": 0.0008, "step": 95450 }, { "epoch": 2.8405219228423073, "grad_norm": 0.07636100053787231, "learning_rate": 8.579066619759379e-08, "loss": 0.0012, "step": 95460 }, { "epoch": 2.840819484325949, "grad_norm": 0.22305908799171448, "learning_rate": 8.547164576850286e-08, "loss": 0.0017, "step": 95470 }, { "epoch": 2.8411170458095905, "grad_norm": 0.17275409400463104, "learning_rate": 8.515321448458847e-08, "loss": 0.0015, "step": 95480 }, { "epoch": 2.841414607293232, "grad_norm": 0.13327153027057648, "learning_rate": 8.48353723840234e-08, "loss": 0.0028, "step": 95490 }, { "epoch": 2.8417121687768736, "grad_norm": 0.1430431306362152, "learning_rate": 8.451811950490996e-08, "loss": 0.0012, "step": 95500 }, { "epoch": 2.842009730260515, "grad_norm": 0.08795560151338577, "learning_rate": 8.420145588527995e-08, "loss": 0.0029, "step": 95510 }, { "epoch": 2.8423072917441567, "grad_norm": 0.10131940990686417, "learning_rate": 8.388538156309412e-08, "loss": 0.0012, "step": 95520 }, { "epoch": 2.8426048532277983, "grad_norm": 0.055009983479976654, "learning_rate": 8.356989657624381e-08, "loss": 0.0014, "step": 95530 }, { "epoch": 2.84290241471144, "grad_norm": 0.022479301318526268, "learning_rate": 8.325500096254769e-08, "loss": 0.0012, "step": 95540 }, { "epoch": 2.8431999761950815, "grad_norm": 0.1979130655527115, "learning_rate": 8.294069475975608e-08, "loss": 0.001, "step": 95550 }, { "epoch": 2.843497537678723, "grad_norm": 0.04335121810436249, "learning_rate": 8.262697800554608e-08, "loss": 0.0029, "step": 95560 }, { "epoch": 2.8437950991623646, "grad_norm": 0.11863929033279419, "learning_rate": 8.231385073752706e-08, "loss": 0.0016, "step": 95570 }, { "epoch": 2.844092660646006, "grad_norm": 0.14690953493118286, "learning_rate": 8.200131299323511e-08, "loss": 0.0022, "step": 95580 }, { "epoch": 2.8443902221296478, "grad_norm": 0.08210832625627518, "learning_rate": 8.16893648101369e-08, "loss": 0.0019, "step": 95590 }, { "epoch": 2.8446877836132893, "grad_norm": 0.14745409786701202, "learning_rate": 8.137800622562864e-08, "loss": 0.0014, "step": 95600 }, { "epoch": 2.844985345096931, "grad_norm": 0.06448569893836975, "learning_rate": 8.106723727703492e-08, "loss": 0.0009, "step": 95610 }, { "epoch": 2.8452829065805725, "grad_norm": 0.10593189299106598, "learning_rate": 8.075705800161038e-08, "loss": 0.0015, "step": 95620 }, { "epoch": 2.8455804680642136, "grad_norm": 0.10588351637125015, "learning_rate": 8.044746843653861e-08, "loss": 0.0016, "step": 95630 }, { "epoch": 2.845878029547855, "grad_norm": 0.0653546079993248, "learning_rate": 8.013846861893382e-08, "loss": 0.002, "step": 95640 }, { "epoch": 2.8461755910314968, "grad_norm": 0.15224091708660126, "learning_rate": 7.983005858583692e-08, "loss": 0.0013, "step": 95650 }, { "epoch": 2.8464731525151383, "grad_norm": 0.05778252333402634, "learning_rate": 7.952223837422002e-08, "loss": 0.0017, "step": 95660 }, { "epoch": 2.84677071399878, "grad_norm": 0.15009482204914093, "learning_rate": 7.921500802098359e-08, "loss": 0.0017, "step": 95670 }, { "epoch": 2.8470682754824215, "grad_norm": 0.11066112667322159, "learning_rate": 7.890836756295983e-08, "loss": 0.0011, "step": 95680 }, { "epoch": 2.847365836966063, "grad_norm": 0.11006706953048706, "learning_rate": 7.860231703690602e-08, "loss": 0.0014, "step": 95690 }, { "epoch": 2.8476633984497046, "grad_norm": 0.0629972591996193, "learning_rate": 7.829685647951169e-08, "loss": 0.0021, "step": 95700 }, { "epoch": 2.847960959933346, "grad_norm": 0.004179887939244509, "learning_rate": 7.799198592739532e-08, "loss": 0.0023, "step": 95710 }, { "epoch": 2.8482585214169878, "grad_norm": 0.1377209722995758, "learning_rate": 7.76877054171038e-08, "loss": 0.0013, "step": 95720 }, { "epoch": 2.8485560829006293, "grad_norm": 0.20401746034622192, "learning_rate": 7.738401498511406e-08, "loss": 0.0019, "step": 95730 }, { "epoch": 2.848853644384271, "grad_norm": 0.14176341891288757, "learning_rate": 7.708091466783196e-08, "loss": 0.0034, "step": 95740 }, { "epoch": 2.8491512058679125, "grad_norm": 0.4493321180343628, "learning_rate": 7.677840450159236e-08, "loss": 0.0014, "step": 95750 }, { "epoch": 2.849448767351554, "grad_norm": 0.2032921016216278, "learning_rate": 7.647648452266066e-08, "loss": 0.0018, "step": 95760 }, { "epoch": 2.8497463288351956, "grad_norm": 0.1510729193687439, "learning_rate": 7.617515476722903e-08, "loss": 0.0015, "step": 95770 }, { "epoch": 2.850043890318837, "grad_norm": 0.09646894782781601, "learning_rate": 7.587441527142081e-08, "loss": 0.0018, "step": 95780 }, { "epoch": 2.8503414518024788, "grad_norm": 0.4693347215652466, "learning_rate": 7.557426607128937e-08, "loss": 0.0023, "step": 95790 }, { "epoch": 2.8506390132861203, "grad_norm": 0.11439769715070724, "learning_rate": 7.527470720281427e-08, "loss": 0.0012, "step": 95800 }, { "epoch": 2.850936574769762, "grad_norm": 0.046229489147663116, "learning_rate": 7.497573870190678e-08, "loss": 0.0038, "step": 95810 }, { "epoch": 2.8512341362534035, "grad_norm": 0.08242973685264587, "learning_rate": 7.46773606044071e-08, "loss": 0.0011, "step": 95820 }, { "epoch": 2.851531697737045, "grad_norm": 0.03925176337361336, "learning_rate": 7.437957294608444e-08, "loss": 0.0011, "step": 95830 }, { "epoch": 2.851829259220686, "grad_norm": 0.020163126289844513, "learning_rate": 7.408237576263577e-08, "loss": 0.0005, "step": 95840 }, { "epoch": 2.8521268207043278, "grad_norm": 0.1122429370880127, "learning_rate": 7.378576908969038e-08, "loss": 0.0009, "step": 95850 }, { "epoch": 2.8524243821879693, "grad_norm": 0.09144096076488495, "learning_rate": 7.348975296280314e-08, "loss": 0.0013, "step": 95860 }, { "epoch": 2.852721943671611, "grad_norm": 0.12644700706005096, "learning_rate": 7.31943274174618e-08, "loss": 0.0023, "step": 95870 }, { "epoch": 2.8530195051552525, "grad_norm": 0.29861658811569214, "learning_rate": 7.289949248907968e-08, "loss": 0.0013, "step": 95880 }, { "epoch": 2.853317066638894, "grad_norm": 0.043172866106033325, "learning_rate": 7.260524821300242e-08, "loss": 0.0012, "step": 95890 }, { "epoch": 2.8536146281225356, "grad_norm": 0.24046361446380615, "learning_rate": 7.231159462450343e-08, "loss": 0.0016, "step": 95900 }, { "epoch": 2.853912189606177, "grad_norm": 0.0653475821018219, "learning_rate": 7.201853175878459e-08, "loss": 0.0016, "step": 95910 }, { "epoch": 2.854209751089819, "grad_norm": 0.11637582629919052, "learning_rate": 7.172605965097778e-08, "loss": 0.0011, "step": 95920 }, { "epoch": 2.8545073125734604, "grad_norm": 0.0377851240336895, "learning_rate": 7.143417833614497e-08, "loss": 0.001, "step": 95930 }, { "epoch": 2.854804874057102, "grad_norm": 0.13413850963115692, "learning_rate": 7.114288784927592e-08, "loss": 0.0017, "step": 95940 }, { "epoch": 2.8551024355407435, "grad_norm": 0.17096766829490662, "learning_rate": 7.085218822528994e-08, "loss": 0.0018, "step": 95950 }, { "epoch": 2.855399997024385, "grad_norm": 0.05898747593164444, "learning_rate": 7.056207949903526e-08, "loss": 0.0008, "step": 95960 }, { "epoch": 2.8556975585080266, "grad_norm": 0.2732159495353699, "learning_rate": 7.027256170529018e-08, "loss": 0.0011, "step": 95970 }, { "epoch": 2.855995119991668, "grad_norm": 0.23741382360458374, "learning_rate": 6.998363487876192e-08, "loss": 0.0023, "step": 95980 }, { "epoch": 2.85629268147531, "grad_norm": 0.40121719241142273, "learning_rate": 6.969529905408556e-08, "loss": 0.0017, "step": 95990 }, { "epoch": 2.8565902429589514, "grad_norm": 0.19010350108146667, "learning_rate": 6.940755426582679e-08, "loss": 0.0019, "step": 96000 }, { "epoch": 2.856887804442593, "grad_norm": 0.0426737442612648, "learning_rate": 6.912040054847969e-08, "loss": 0.0011, "step": 96010 }, { "epoch": 2.8571853659262345, "grad_norm": 0.08469738811254501, "learning_rate": 6.883383793646892e-08, "loss": 0.0028, "step": 96020 }, { "epoch": 2.857482927409876, "grad_norm": 0.011589807458221912, "learning_rate": 6.854786646414535e-08, "loss": 0.0013, "step": 96030 }, { "epoch": 2.8577804888935177, "grad_norm": 0.17275939881801605, "learning_rate": 6.826248616579157e-08, "loss": 0.0015, "step": 96040 }, { "epoch": 2.8580780503771592, "grad_norm": 0.023867245763540268, "learning_rate": 6.797769707561852e-08, "loss": 0.0013, "step": 96050 }, { "epoch": 2.858375611860801, "grad_norm": 0.05892999842762947, "learning_rate": 6.769349922776669e-08, "loss": 0.001, "step": 96060 }, { "epoch": 2.8586731733444424, "grad_norm": 0.09526010602712631, "learning_rate": 6.740989265630438e-08, "loss": 0.0021, "step": 96070 }, { "epoch": 2.858970734828084, "grad_norm": 0.10098382830619812, "learning_rate": 6.712687739522993e-08, "loss": 0.0012, "step": 96080 }, { "epoch": 2.8592682963117255, "grad_norm": 0.0033923140726983547, "learning_rate": 6.684445347847124e-08, "loss": 0.0008, "step": 96090 }, { "epoch": 2.859565857795367, "grad_norm": 0.11888960748910904, "learning_rate": 6.656262093988508e-08, "loss": 0.0015, "step": 96100 }, { "epoch": 2.8598634192790087, "grad_norm": 0.3197610676288605, "learning_rate": 6.628137981325611e-08, "loss": 0.002, "step": 96110 }, { "epoch": 2.8601609807626502, "grad_norm": 0.288808673620224, "learning_rate": 6.600073013229901e-08, "loss": 0.0021, "step": 96120 }, { "epoch": 2.860458542246292, "grad_norm": 0.24732176959514618, "learning_rate": 6.572067193065856e-08, "loss": 0.0014, "step": 96130 }, { "epoch": 2.8607561037299334, "grad_norm": 0.09877205640077591, "learning_rate": 6.544120524190679e-08, "loss": 0.0009, "step": 96140 }, { "epoch": 2.861053665213575, "grad_norm": 0.18040505051612854, "learning_rate": 6.516233009954575e-08, "loss": 0.0011, "step": 96150 }, { "epoch": 2.8613512266972165, "grad_norm": 0.11076395958662033, "learning_rate": 6.488404653700708e-08, "loss": 0.0012, "step": 96160 }, { "epoch": 2.861648788180858, "grad_norm": 0.18845407664775848, "learning_rate": 6.46063545876513e-08, "loss": 0.0016, "step": 96170 }, { "epoch": 2.8619463496644997, "grad_norm": 0.06481442600488663, "learning_rate": 6.432925428476622e-08, "loss": 0.003, "step": 96180 }, { "epoch": 2.8622439111481413, "grad_norm": 0.09286779165267944, "learning_rate": 6.405274566157082e-08, "loss": 0.0019, "step": 96190 }, { "epoch": 2.8625414726317824, "grad_norm": 0.06064079329371452, "learning_rate": 6.377682875121305e-08, "loss": 0.0009, "step": 96200 }, { "epoch": 2.862839034115424, "grad_norm": 0.047834184020757675, "learning_rate": 6.350150358676866e-08, "loss": 0.0011, "step": 96210 }, { "epoch": 2.8631365955990655, "grad_norm": 0.10631348937749863, "learning_rate": 6.32267702012429e-08, "loss": 0.0008, "step": 96220 }, { "epoch": 2.863434157082707, "grad_norm": 0.014849983155727386, "learning_rate": 6.295262862757113e-08, "loss": 0.0017, "step": 96230 }, { "epoch": 2.8637317185663487, "grad_norm": 0.20596745610237122, "learning_rate": 6.267907889861702e-08, "loss": 0.001, "step": 96240 }, { "epoch": 2.8640292800499902, "grad_norm": 0.12966403365135193, "learning_rate": 6.240612104717214e-08, "loss": 0.0028, "step": 96250 }, { "epoch": 2.864326841533632, "grad_norm": 0.3025074899196625, "learning_rate": 6.21337551059592e-08, "loss": 0.0016, "step": 96260 }, { "epoch": 2.8646244030172734, "grad_norm": 0.13609318435192108, "learning_rate": 6.18619811076282e-08, "loss": 0.0025, "step": 96270 }, { "epoch": 2.864921964500915, "grad_norm": 0.14691688120365143, "learning_rate": 6.15907990847603e-08, "loss": 0.0009, "step": 96280 }, { "epoch": 2.8652195259845565, "grad_norm": 0.1882709413766861, "learning_rate": 6.13202090698628e-08, "loss": 0.0014, "step": 96290 }, { "epoch": 2.865517087468198, "grad_norm": 0.1428140550851822, "learning_rate": 6.105021109537423e-08, "loss": 0.0007, "step": 96300 }, { "epoch": 2.8658146489518397, "grad_norm": 0.006645062007009983, "learning_rate": 6.078080519366147e-08, "loss": 0.0012, "step": 96310 }, { "epoch": 2.8661122104354813, "grad_norm": 0.09816500544548035, "learning_rate": 6.051199139702091e-08, "loss": 0.002, "step": 96320 }, { "epoch": 2.866409771919123, "grad_norm": 0.05209871754050255, "learning_rate": 6.02437697376762e-08, "loss": 0.0018, "step": 96330 }, { "epoch": 2.8667073334027644, "grad_norm": 0.1627541333436966, "learning_rate": 5.99761402477822e-08, "loss": 0.0028, "step": 96340 }, { "epoch": 2.867004894886406, "grad_norm": 0.14803175628185272, "learning_rate": 5.970910295942212e-08, "loss": 0.0011, "step": 96350 }, { "epoch": 2.8673024563700475, "grad_norm": 0.026715030893683434, "learning_rate": 5.944265790460757e-08, "loss": 0.0014, "step": 96360 }, { "epoch": 2.867600017853689, "grad_norm": 0.1314639300107956, "learning_rate": 5.917680511527912e-08, "loss": 0.0018, "step": 96370 }, { "epoch": 2.8678975793373307, "grad_norm": 0.24621835350990295, "learning_rate": 5.891154462330739e-08, "loss": 0.0026, "step": 96380 }, { "epoch": 2.8681951408209723, "grad_norm": 0.02310147136449814, "learning_rate": 5.8646876460490834e-08, "loss": 0.0049, "step": 96390 }, { "epoch": 2.868492702304614, "grad_norm": 0.12763714790344238, "learning_rate": 5.8382800658558505e-08, "loss": 0.0013, "step": 96400 }, { "epoch": 2.8687902637882554, "grad_norm": 0.07882096618413925, "learning_rate": 5.811931724916564e-08, "loss": 0.0011, "step": 96410 }, { "epoch": 2.8690878252718965, "grad_norm": 0.2008449286222458, "learning_rate": 5.78564262638992e-08, "loss": 0.003, "step": 96420 }, { "epoch": 2.869385386755538, "grad_norm": 0.24957334995269775, "learning_rate": 5.759412773427397e-08, "loss": 0.0017, "step": 96430 }, { "epoch": 2.8696829482391797, "grad_norm": 0.04553299769759178, "learning_rate": 5.73324216917337e-08, "loss": 0.0013, "step": 96440 }, { "epoch": 2.8699805097228213, "grad_norm": 0.5290699601173401, "learning_rate": 5.7071308167651604e-08, "loss": 0.0028, "step": 96450 }, { "epoch": 2.870278071206463, "grad_norm": 0.06617314368486404, "learning_rate": 5.681078719332933e-08, "loss": 0.0013, "step": 96460 }, { "epoch": 2.8705756326901044, "grad_norm": 0.1359957903623581, "learning_rate": 5.6550858799998e-08, "loss": 0.0013, "step": 96470 }, { "epoch": 2.870873194173746, "grad_norm": 0.05338580533862114, "learning_rate": 5.6291523018816574e-08, "loss": 0.0017, "step": 96480 }, { "epoch": 2.8711707556573876, "grad_norm": 0.1373506486415863, "learning_rate": 5.603277988087408e-08, "loss": 0.001, "step": 96490 }, { "epoch": 2.871468317141029, "grad_norm": 0.10943996906280518, "learning_rate": 5.577462941718903e-08, "loss": 0.0019, "step": 96500 }, { "epoch": 2.8717658786246707, "grad_norm": 0.07487110793590546, "learning_rate": 5.5517071658707235e-08, "loss": 0.0017, "step": 96510 }, { "epoch": 2.8720634401083123, "grad_norm": 0.09931217133998871, "learning_rate": 5.526010663630399e-08, "loss": 0.0011, "step": 96520 }, { "epoch": 2.872361001591954, "grad_norm": 0.22266411781311035, "learning_rate": 5.50037343807841e-08, "loss": 0.0019, "step": 96530 }, { "epoch": 2.8726585630755954, "grad_norm": 0.10117208957672119, "learning_rate": 5.4747954922881876e-08, "loss": 0.0017, "step": 96540 }, { "epoch": 2.872956124559237, "grad_norm": 0.15774568915367126, "learning_rate": 5.449276829325889e-08, "loss": 0.0021, "step": 96550 }, { "epoch": 2.8732536860428786, "grad_norm": 0.15361979603767395, "learning_rate": 5.423817452250624e-08, "loss": 0.0022, "step": 96560 }, { "epoch": 2.87355124752652, "grad_norm": 0.13497968018054962, "learning_rate": 5.39841736411445e-08, "loss": 0.002, "step": 96570 }, { "epoch": 2.8738488090101617, "grad_norm": 0.16694574058055878, "learning_rate": 5.373076567962376e-08, "loss": 0.0011, "step": 96580 }, { "epoch": 2.8741463704938033, "grad_norm": 0.1831883192062378, "learning_rate": 5.3477950668320286e-08, "loss": 0.002, "step": 96590 }, { "epoch": 2.874443931977445, "grad_norm": 0.13084127008914948, "learning_rate": 5.3225728637542604e-08, "loss": 0.002, "step": 96600 }, { "epoch": 2.8747414934610864, "grad_norm": 0.006150885485112667, "learning_rate": 5.297409961752598e-08, "loss": 0.0011, "step": 96610 }, { "epoch": 2.875039054944728, "grad_norm": 0.15010693669319153, "learning_rate": 5.272306363843571e-08, "loss": 0.0014, "step": 96620 }, { "epoch": 2.8753366164283696, "grad_norm": 0.11023297905921936, "learning_rate": 5.2472620730364966e-08, "loss": 0.0011, "step": 96630 }, { "epoch": 2.875634177912011, "grad_norm": 0.10837667435407639, "learning_rate": 5.222277092333694e-08, "loss": 0.0016, "step": 96640 }, { "epoch": 2.8759317393956527, "grad_norm": 0.1059407889842987, "learning_rate": 5.197351424730268e-08, "loss": 0.0019, "step": 96650 }, { "epoch": 2.8762293008792943, "grad_norm": 0.040393926203250885, "learning_rate": 5.1724850732143284e-08, "loss": 0.0015, "step": 96660 }, { "epoch": 2.876526862362936, "grad_norm": 0.17111843824386597, "learning_rate": 5.147678040766768e-08, "loss": 0.001, "step": 96670 }, { "epoch": 2.8768244238465774, "grad_norm": 0.09516644477844238, "learning_rate": 5.122930330361431e-08, "loss": 0.0025, "step": 96680 }, { "epoch": 2.877121985330219, "grad_norm": 0.1125691831111908, "learning_rate": 5.098241944964999e-08, "loss": 0.0016, "step": 96690 }, { "epoch": 2.8774195468138606, "grad_norm": 0.07547212392091751, "learning_rate": 5.073612887537105e-08, "loss": 0.0018, "step": 96700 }, { "epoch": 2.877717108297502, "grad_norm": 0.2043825089931488, "learning_rate": 5.0490431610302206e-08, "loss": 0.0025, "step": 96710 }, { "epoch": 2.8780146697811437, "grad_norm": 0.07119346410036087, "learning_rate": 5.0245327683897114e-08, "loss": 0.002, "step": 96720 }, { "epoch": 2.8783122312647853, "grad_norm": 0.033341340720653534, "learning_rate": 5.000081712553895e-08, "loss": 0.0009, "step": 96730 }, { "epoch": 2.878609792748427, "grad_norm": 0.08438748866319656, "learning_rate": 4.9756899964538696e-08, "loss": 0.0017, "step": 96740 }, { "epoch": 2.8789073542320684, "grad_norm": 0.03916320577263832, "learning_rate": 4.951357623013686e-08, "loss": 0.002, "step": 96750 }, { "epoch": 2.87920491571571, "grad_norm": 0.15944436192512512, "learning_rate": 4.9270845951502886e-08, "loss": 0.0015, "step": 96760 }, { "epoch": 2.879502477199351, "grad_norm": 0.05662901699542999, "learning_rate": 4.902870915773461e-08, "loss": 0.0016, "step": 96770 }, { "epoch": 2.8798000386829927, "grad_norm": 0.07401732355356216, "learning_rate": 4.8787165877858814e-08, "loss": 0.0011, "step": 96780 }, { "epoch": 2.8800976001666343, "grad_norm": 0.13170796632766724, "learning_rate": 4.854621614083122e-08, "loss": 0.0019, "step": 96790 }, { "epoch": 2.880395161650276, "grad_norm": 0.05629592388868332, "learning_rate": 4.830585997553705e-08, "loss": 0.0004, "step": 96800 }, { "epoch": 2.8806927231339174, "grad_norm": 0.15250740945339203, "learning_rate": 4.806609741078994e-08, "loss": 0.0027, "step": 96810 }, { "epoch": 2.880990284617559, "grad_norm": 0.10179638117551804, "learning_rate": 4.7826928475331324e-08, "loss": 0.0016, "step": 96820 }, { "epoch": 2.8812878461012006, "grad_norm": 0.24430644512176514, "learning_rate": 4.758835319783272e-08, "loss": 0.0017, "step": 96830 }, { "epoch": 2.881585407584842, "grad_norm": 0.0017189511563628912, "learning_rate": 4.7350371606894044e-08, "loss": 0.0008, "step": 96840 }, { "epoch": 2.8818829690684837, "grad_norm": 0.15204165875911713, "learning_rate": 4.7112983731044674e-08, "loss": 0.0015, "step": 96850 }, { "epoch": 2.8821805305521253, "grad_norm": 0.1014152467250824, "learning_rate": 4.687618959874129e-08, "loss": 0.0011, "step": 96860 }, { "epoch": 2.882478092035767, "grad_norm": 0.06394778192043304, "learning_rate": 4.663998923837121e-08, "loss": 0.0014, "step": 96870 }, { "epoch": 2.8827756535194085, "grad_norm": 0.10682135075330734, "learning_rate": 4.6404382678249535e-08, "loss": 0.002, "step": 96880 }, { "epoch": 2.88307321500305, "grad_norm": 0.1090942770242691, "learning_rate": 4.6169369946620354e-08, "loss": 0.0009, "step": 96890 }, { "epoch": 2.8833707764866916, "grad_norm": 0.00708494009450078, "learning_rate": 4.5934951071656686e-08, "loss": 0.0013, "step": 96900 }, { "epoch": 2.883668337970333, "grad_norm": 0.17087404429912567, "learning_rate": 4.570112608145938e-08, "loss": 0.0029, "step": 96910 }, { "epoch": 2.8839658994539747, "grad_norm": 0.11814387887716293, "learning_rate": 4.546789500406046e-08, "loss": 0.0018, "step": 96920 }, { "epoch": 2.8842634609376163, "grad_norm": 0.12825940549373627, "learning_rate": 4.523525786741811e-08, "loss": 0.0033, "step": 96930 }, { "epoch": 2.884561022421258, "grad_norm": 0.0925392284989357, "learning_rate": 4.500321469942059e-08, "loss": 0.0013, "step": 96940 }, { "epoch": 2.8848585839048995, "grad_norm": 0.07224273681640625, "learning_rate": 4.477176552788565e-08, "loss": 0.0015, "step": 96950 }, { "epoch": 2.885156145388541, "grad_norm": 0.10820776224136353, "learning_rate": 4.45409103805583e-08, "loss": 0.0018, "step": 96960 }, { "epoch": 2.8854537068721826, "grad_norm": 0.17869319021701813, "learning_rate": 4.4310649285113086e-08, "loss": 0.0019, "step": 96970 }, { "epoch": 2.885751268355824, "grad_norm": 0.06829095631837845, "learning_rate": 4.4080982269152915e-08, "loss": 0.0017, "step": 96980 }, { "epoch": 2.8860488298394653, "grad_norm": 0.18359823524951935, "learning_rate": 4.3851909360210775e-08, "loss": 0.002, "step": 96990 }, { "epoch": 2.886346391323107, "grad_norm": 0.0292226392775774, "learning_rate": 4.3623430585747474e-08, "loss": 0.005, "step": 97000 }, { "epoch": 2.8866439528067485, "grad_norm": 0.18981990218162537, "learning_rate": 4.339554597315165e-08, "loss": 0.0013, "step": 97010 }, { "epoch": 2.88694151429039, "grad_norm": 0.06751496344804764, "learning_rate": 4.3168255549743135e-08, "loss": 0.0019, "step": 97020 }, { "epoch": 2.8872390757740316, "grad_norm": 0.21023891866207123, "learning_rate": 4.294155934276789e-08, "loss": 0.0025, "step": 97030 }, { "epoch": 2.887536637257673, "grad_norm": 0.19507750868797302, "learning_rate": 4.271545737940253e-08, "loss": 0.001, "step": 97040 }, { "epoch": 2.8878341987413148, "grad_norm": 0.03401175141334534, "learning_rate": 4.248994968675146e-08, "loss": 0.0017, "step": 97050 }, { "epoch": 2.8881317602249563, "grad_norm": 0.06856099516153336, "learning_rate": 4.2265036291848636e-08, "loss": 0.0013, "step": 97060 }, { "epoch": 2.888429321708598, "grad_norm": 0.3152144253253937, "learning_rate": 4.2040717221655815e-08, "loss": 0.0023, "step": 97070 }, { "epoch": 2.8887268831922395, "grad_norm": 0.053585708141326904, "learning_rate": 4.18169925030637e-08, "loss": 0.002, "step": 97080 }, { "epoch": 2.889024444675881, "grad_norm": 0.1822996288537979, "learning_rate": 4.159386216289252e-08, "loss": 0.0014, "step": 97090 }, { "epoch": 2.8893220061595226, "grad_norm": 0.05132764205336571, "learning_rate": 4.1371326227890864e-08, "loss": 0.0013, "step": 97100 }, { "epoch": 2.889619567643164, "grad_norm": 0.08143649995326996, "learning_rate": 4.114938472473573e-08, "loss": 0.0027, "step": 97110 }, { "epoch": 2.8899171291268058, "grad_norm": 0.1933506578207016, "learning_rate": 4.092803768003306e-08, "loss": 0.0009, "step": 97120 }, { "epoch": 2.8902146906104473, "grad_norm": 0.17691725492477417, "learning_rate": 4.070728512031774e-08, "loss": 0.0016, "step": 97130 }, { "epoch": 2.890512252094089, "grad_norm": 0.15147797763347626, "learning_rate": 4.048712707205249e-08, "loss": 0.001, "step": 97140 }, { "epoch": 2.8908098135777305, "grad_norm": 0.4021368622779846, "learning_rate": 4.026756356163064e-08, "loss": 0.0038, "step": 97150 }, { "epoch": 2.891107375061372, "grad_norm": 0.0664418563246727, "learning_rate": 4.004859461537225e-08, "loss": 0.0037, "step": 97160 }, { "epoch": 2.8914049365450136, "grad_norm": 0.09747952222824097, "learning_rate": 3.983022025952743e-08, "loss": 0.0012, "step": 97170 }, { "epoch": 2.891702498028655, "grad_norm": 0.07952824234962463, "learning_rate": 3.961244052027413e-08, "loss": 0.0023, "step": 97180 }, { "epoch": 2.8920000595122968, "grad_norm": 0.19123618304729462, "learning_rate": 3.939525542371925e-08, "loss": 0.002, "step": 97190 }, { "epoch": 2.8922976209959383, "grad_norm": 0.7751671075820923, "learning_rate": 3.9178664995899175e-08, "loss": 0.0104, "step": 97200 }, { "epoch": 2.89259518247958, "grad_norm": 0.16887407004833221, "learning_rate": 3.896266926277814e-08, "loss": 0.0012, "step": 97210 }, { "epoch": 2.8928927439632215, "grad_norm": 0.2056533694267273, "learning_rate": 3.874726825024877e-08, "loss": 0.0014, "step": 97220 }, { "epoch": 2.893190305446863, "grad_norm": 0.2700706720352173, "learning_rate": 3.85324619841343e-08, "loss": 0.0024, "step": 97230 }, { "epoch": 2.8934878669305046, "grad_norm": 0.20375272631645203, "learning_rate": 3.8318250490184114e-08, "loss": 0.0013, "step": 97240 }, { "epoch": 2.893785428414146, "grad_norm": 0.13148368895053864, "learning_rate": 3.810463379407769e-08, "loss": 0.001, "step": 97250 }, { "epoch": 2.894082989897788, "grad_norm": 0.013902963139116764, "learning_rate": 3.7891611921423435e-08, "loss": 0.001, "step": 97260 }, { "epoch": 2.8943805513814294, "grad_norm": 0.08017733693122864, "learning_rate": 3.767918489775757e-08, "loss": 0.0022, "step": 97270 }, { "epoch": 2.894678112865071, "grad_norm": 0.15338028967380524, "learning_rate": 3.7467352748545846e-08, "loss": 0.0011, "step": 97280 }, { "epoch": 2.8949756743487125, "grad_norm": 0.17943178117275238, "learning_rate": 3.7256115499182396e-08, "loss": 0.0014, "step": 97290 }, { "epoch": 2.895273235832354, "grad_norm": 0.14320626854896545, "learning_rate": 3.70454731749903e-08, "loss": 0.0012, "step": 97300 }, { "epoch": 2.8955707973159956, "grad_norm": 0.07157859206199646, "learning_rate": 3.6835425801219905e-08, "loss": 0.0014, "step": 97310 }, { "epoch": 2.895868358799637, "grad_norm": 0.10067899525165558, "learning_rate": 3.6625973403052185e-08, "loss": 0.001, "step": 97320 }, { "epoch": 2.896165920283279, "grad_norm": 0.14475804567337036, "learning_rate": 3.6417116005595386e-08, "loss": 0.0015, "step": 97330 }, { "epoch": 2.8964634817669204, "grad_norm": 0.19627811014652252, "learning_rate": 3.620885363388782e-08, "loss": 0.0026, "step": 97340 }, { "epoch": 2.8967610432505615, "grad_norm": 0.08274762332439423, "learning_rate": 3.600118631289506e-08, "loss": 0.0016, "step": 97350 }, { "epoch": 2.897058604734203, "grad_norm": 0.026452219113707542, "learning_rate": 3.5794114067511655e-08, "loss": 0.0024, "step": 97360 }, { "epoch": 2.8973561662178446, "grad_norm": 0.21918423473834991, "learning_rate": 3.558763692256162e-08, "loss": 0.0069, "step": 97370 }, { "epoch": 2.897653727701486, "grad_norm": 0.07256818562746048, "learning_rate": 3.5381754902796276e-08, "loss": 0.001, "step": 97380 }, { "epoch": 2.897951289185128, "grad_norm": 0.05760493129491806, "learning_rate": 3.5176468032897536e-08, "loss": 0.002, "step": 97390 }, { "epoch": 2.8982488506687694, "grad_norm": 0.17537881433963776, "learning_rate": 3.4971776337474065e-08, "loss": 0.0012, "step": 97400 }, { "epoch": 2.898546412152411, "grad_norm": 0.09533611685037613, "learning_rate": 3.4767679841063996e-08, "loss": 0.0015, "step": 97410 }, { "epoch": 2.8988439736360525, "grad_norm": 0.13006535172462463, "learning_rate": 3.4564178568134434e-08, "loss": 0.0031, "step": 97420 }, { "epoch": 2.899141535119694, "grad_norm": 0.11693303287029266, "learning_rate": 3.43612725430803e-08, "loss": 0.0031, "step": 97430 }, { "epoch": 2.8994390966033357, "grad_norm": 0.1078273132443428, "learning_rate": 3.4158961790225484e-08, "loss": 0.0016, "step": 97440 }, { "epoch": 2.8997366580869772, "grad_norm": 0.05492309853434563, "learning_rate": 3.395724633382391e-08, "loss": 0.0015, "step": 97450 }, { "epoch": 2.900034219570619, "grad_norm": 0.10948706418275833, "learning_rate": 3.3756126198055125e-08, "loss": 0.0022, "step": 97460 }, { "epoch": 2.9003317810542604, "grad_norm": 0.09473346918821335, "learning_rate": 3.35556014070304e-08, "loss": 0.0024, "step": 97470 }, { "epoch": 2.900629342537902, "grad_norm": 0.13636071979999542, "learning_rate": 3.3355671984787175e-08, "loss": 0.0012, "step": 97480 }, { "epoch": 2.9009269040215435, "grad_norm": 0.18266849219799042, "learning_rate": 3.315633795529405e-08, "loss": 0.0023, "step": 97490 }, { "epoch": 2.901224465505185, "grad_norm": 0.09499125182628632, "learning_rate": 3.29575993424458e-08, "loss": 0.0013, "step": 97500 }, { "epoch": 2.9015220269888267, "grad_norm": 0.11790359765291214, "learning_rate": 3.2759456170067264e-08, "loss": 0.0011, "step": 97510 }, { "epoch": 2.9018195884724682, "grad_norm": 0.23878642916679382, "learning_rate": 3.2561908461911095e-08, "loss": 0.0009, "step": 97520 }, { "epoch": 2.90211714995611, "grad_norm": 0.06985606998205185, "learning_rate": 3.236495624165947e-08, "loss": 0.0023, "step": 97530 }, { "epoch": 2.9024147114397514, "grad_norm": 0.13167804479599, "learning_rate": 3.2168599532922395e-08, "loss": 0.0017, "step": 97540 }, { "epoch": 2.902712272923393, "grad_norm": 0.03633500635623932, "learning_rate": 3.197283835923881e-08, "loss": 0.0023, "step": 97550 }, { "epoch": 2.903009834407034, "grad_norm": 0.11316288262605667, "learning_rate": 3.177767274407606e-08, "loss": 0.002, "step": 97560 }, { "epoch": 2.9033073958906757, "grad_norm": 0.12928925454616547, "learning_rate": 3.158310271083098e-08, "loss": 0.0014, "step": 97570 }, { "epoch": 2.9036049573743172, "grad_norm": 0.06482360512018204, "learning_rate": 3.138912828282714e-08, "loss": 0.0014, "step": 97580 }, { "epoch": 2.903902518857959, "grad_norm": 0.06335671991109848, "learning_rate": 3.119574948331871e-08, "loss": 0.0015, "step": 97590 }, { "epoch": 2.9042000803416004, "grad_norm": 0.21780996024608612, "learning_rate": 3.100296633548771e-08, "loss": 0.0009, "step": 97600 }, { "epoch": 2.904497641825242, "grad_norm": 0.13606838881969452, "learning_rate": 3.0810778862444545e-08, "loss": 0.0017, "step": 97610 }, { "epoch": 2.9047952033088835, "grad_norm": 0.014149731956422329, "learning_rate": 3.061918708722744e-08, "loss": 0.0015, "step": 97620 }, { "epoch": 2.905092764792525, "grad_norm": 0.17885862290859222, "learning_rate": 3.04281910328047e-08, "loss": 0.0009, "step": 97630 }, { "epoch": 2.9053903262761667, "grad_norm": 0.2562674880027771, "learning_rate": 3.023779072207356e-08, "loss": 0.0101, "step": 97640 }, { "epoch": 2.9056878877598082, "grad_norm": 0.09200231730937958, "learning_rate": 3.0047986177857425e-08, "loss": 0.0029, "step": 97650 }, { "epoch": 2.90598544924345, "grad_norm": 0.035785820335149765, "learning_rate": 2.985877742290977e-08, "loss": 0.0009, "step": 97660 }, { "epoch": 2.9062830107270914, "grad_norm": 0.24280785024166107, "learning_rate": 2.9670164479913555e-08, "loss": 0.0019, "step": 97670 }, { "epoch": 2.906580572210733, "grad_norm": 0.12201259285211563, "learning_rate": 2.9482147371479586e-08, "loss": 0.0013, "step": 97680 }, { "epoch": 2.9068781336943745, "grad_norm": 0.0787852481007576, "learning_rate": 2.929472612014539e-08, "loss": 0.0015, "step": 97690 }, { "epoch": 2.907175695178016, "grad_norm": 0.006601736415177584, "learning_rate": 2.9107900748380214e-08, "loss": 0.0027, "step": 97700 }, { "epoch": 2.9074732566616577, "grad_norm": 0.16208216547966003, "learning_rate": 2.892167127858003e-08, "loss": 0.0016, "step": 97710 }, { "epoch": 2.9077708181452993, "grad_norm": 0.07368838787078857, "learning_rate": 2.8736037733069212e-08, "loss": 0.0013, "step": 97720 }, { "epoch": 2.908068379628941, "grad_norm": 0.051641833037137985, "learning_rate": 2.8551000134101613e-08, "loss": 0.0017, "step": 97730 }, { "epoch": 2.9083659411125824, "grad_norm": 0.0972307100892067, "learning_rate": 2.836655850385894e-08, "loss": 0.0021, "step": 97740 }, { "epoch": 2.908663502596224, "grad_norm": 0.1477171778678894, "learning_rate": 2.81827128644524e-08, "loss": 0.0025, "step": 97750 }, { "epoch": 2.9089610640798655, "grad_norm": 0.11839946359395981, "learning_rate": 2.799946323791991e-08, "loss": 0.0013, "step": 97760 }, { "epoch": 2.909258625563507, "grad_norm": 0.24172614514827728, "learning_rate": 2.7816809646230015e-08, "loss": 0.0019, "step": 97770 }, { "epoch": 2.9095561870471487, "grad_norm": 0.2745976150035858, "learning_rate": 2.763475211127853e-08, "loss": 0.0026, "step": 97780 }, { "epoch": 2.9098537485307903, "grad_norm": 0.10018973797559738, "learning_rate": 2.745329065489133e-08, "loss": 0.0006, "step": 97790 }, { "epoch": 2.910151310014432, "grad_norm": 0.150485560297966, "learning_rate": 2.7272425298819904e-08, "loss": 0.0007, "step": 97800 }, { "epoch": 2.9104488714980734, "grad_norm": 0.1286747008562088, "learning_rate": 2.709215606474691e-08, "loss": 0.0018, "step": 97810 }, { "epoch": 2.910746432981715, "grad_norm": 0.05688776448369026, "learning_rate": 2.6912482974282837e-08, "loss": 0.0009, "step": 97820 }, { "epoch": 2.9110439944653566, "grad_norm": 0.17637000977993011, "learning_rate": 2.673340604896657e-08, "loss": 0.0013, "step": 97830 }, { "epoch": 2.911341555948998, "grad_norm": 0.027028853073716164, "learning_rate": 2.6554925310265377e-08, "loss": 0.0014, "step": 97840 }, { "epoch": 2.9116391174326397, "grad_norm": 0.05016682296991348, "learning_rate": 2.6377040779574924e-08, "loss": 0.0032, "step": 97850 }, { "epoch": 2.9119366789162813, "grad_norm": 0.15762187540531158, "learning_rate": 2.619975247822093e-08, "loss": 0.0026, "step": 97860 }, { "epoch": 2.912234240399923, "grad_norm": 0.2054906040430069, "learning_rate": 2.6023060427454727e-08, "loss": 0.0012, "step": 97870 }, { "epoch": 2.9125318018835644, "grad_norm": 0.0618167445063591, "learning_rate": 2.5846964648459372e-08, "loss": 0.0011, "step": 97880 }, { "epoch": 2.912829363367206, "grad_norm": 0.05178723856806755, "learning_rate": 2.5671465162343534e-08, "loss": 0.0007, "step": 97890 }, { "epoch": 2.9131269248508476, "grad_norm": 0.1269194483757019, "learning_rate": 2.549656199014705e-08, "loss": 0.0014, "step": 97900 }, { "epoch": 2.913424486334489, "grad_norm": 0.04127560555934906, "learning_rate": 2.5322255152836483e-08, "loss": 0.0012, "step": 97910 }, { "epoch": 2.9137220478181303, "grad_norm": 0.05611894652247429, "learning_rate": 2.5148544671306783e-08, "loss": 0.0014, "step": 97920 }, { "epoch": 2.914019609301772, "grad_norm": 0.11946576088666916, "learning_rate": 2.4975430566382965e-08, "loss": 0.0014, "step": 97930 }, { "epoch": 2.9143171707854134, "grad_norm": 0.09078061580657959, "learning_rate": 2.480291285881731e-08, "loss": 0.0019, "step": 97940 }, { "epoch": 2.914614732269055, "grad_norm": 0.058488305658102036, "learning_rate": 2.463099156929105e-08, "loss": 0.0014, "step": 97950 }, { "epoch": 2.9149122937526966, "grad_norm": 0.023277858272194862, "learning_rate": 2.445966671841382e-08, "loss": 0.0013, "step": 97960 }, { "epoch": 2.915209855236338, "grad_norm": 0.027120670303702354, "learning_rate": 2.4288938326723633e-08, "loss": 0.0014, "step": 97970 }, { "epoch": 2.9155074167199797, "grad_norm": 0.07162638008594513, "learning_rate": 2.4118806414687446e-08, "loss": 0.0012, "step": 97980 }, { "epoch": 2.9158049782036213, "grad_norm": 0.12102320045232773, "learning_rate": 2.3949271002699503e-08, "loss": 0.0031, "step": 97990 }, { "epoch": 2.916102539687263, "grad_norm": 0.07580208778381348, "learning_rate": 2.3780332111084105e-08, "loss": 0.0009, "step": 98000 }, { "epoch": 2.9164001011709044, "grad_norm": 0.1628655642271042, "learning_rate": 2.361198976009338e-08, "loss": 0.0018, "step": 98010 }, { "epoch": 2.916697662654546, "grad_norm": 0.142686128616333, "learning_rate": 2.344424396990841e-08, "loss": 0.0013, "step": 98020 }, { "epoch": 2.9169952241381876, "grad_norm": 0.3571213185787201, "learning_rate": 2.3277094760636997e-08, "loss": 0.0032, "step": 98030 }, { "epoch": 2.917292785621829, "grad_norm": 0.058971114456653595, "learning_rate": 2.3110542152317005e-08, "loss": 0.0013, "step": 98040 }, { "epoch": 2.9175903471054707, "grad_norm": 0.06655819714069366, "learning_rate": 2.294458616491524e-08, "loss": 0.0008, "step": 98050 }, { "epoch": 2.9178879085891123, "grad_norm": 0.07225093990564346, "learning_rate": 2.277922681832634e-08, "loss": 0.0012, "step": 98060 }, { "epoch": 2.918185470072754, "grad_norm": 0.10835190117359161, "learning_rate": 2.2614464132372227e-08, "loss": 0.0011, "step": 98070 }, { "epoch": 2.9184830315563954, "grad_norm": 0.17156971991062164, "learning_rate": 2.245029812680488e-08, "loss": 0.0037, "step": 98080 }, { "epoch": 2.918780593040037, "grad_norm": 0.12644757330417633, "learning_rate": 2.2286728821304115e-08, "loss": 0.0018, "step": 98090 }, { "epoch": 2.9190781545236786, "grad_norm": 0.198081374168396, "learning_rate": 2.2123756235478688e-08, "loss": 0.0016, "step": 98100 }, { "epoch": 2.91937571600732, "grad_norm": 0.1446094959974289, "learning_rate": 2.1961380388865195e-08, "loss": 0.0015, "step": 98110 }, { "epoch": 2.9196732774909617, "grad_norm": 0.08252212405204773, "learning_rate": 2.1799601300929176e-08, "loss": 0.0014, "step": 98120 }, { "epoch": 2.919970838974603, "grad_norm": 0.22045618295669556, "learning_rate": 2.1638418991064004e-08, "loss": 0.0017, "step": 98130 }, { "epoch": 2.9202684004582444, "grad_norm": 0.07084215432405472, "learning_rate": 2.147783347859256e-08, "loss": 0.0018, "step": 98140 }, { "epoch": 2.920565961941886, "grad_norm": 0.10338116437196732, "learning_rate": 2.1317844782764997e-08, "loss": 0.0044, "step": 98150 }, { "epoch": 2.9208635234255276, "grad_norm": 0.19178172945976257, "learning_rate": 2.1158452922760973e-08, "loss": 0.0016, "step": 98160 }, { "epoch": 2.921161084909169, "grad_norm": 0.10443393141031265, "learning_rate": 2.0999657917687432e-08, "loss": 0.0017, "step": 98170 }, { "epoch": 2.9214586463928107, "grad_norm": 0.04028214141726494, "learning_rate": 2.0841459786581362e-08, "loss": 0.0014, "step": 98180 }, { "epoch": 2.9217562078764523, "grad_norm": 0.109098419547081, "learning_rate": 2.068385854840649e-08, "loss": 0.0021, "step": 98190 }, { "epoch": 2.922053769360094, "grad_norm": 0.07106673717498779, "learning_rate": 2.0526854222056026e-08, "loss": 0.0011, "step": 98200 }, { "epoch": 2.9223513308437354, "grad_norm": 0.12290938943624496, "learning_rate": 2.0370446826351585e-08, "loss": 0.0028, "step": 98210 }, { "epoch": 2.922648892327377, "grad_norm": 0.202427938580513, "learning_rate": 2.021463638004262e-08, "loss": 0.0025, "step": 98220 }, { "epoch": 2.9229464538110186, "grad_norm": 0.013671942055225372, "learning_rate": 2.0059422901807514e-08, "loss": 0.0118, "step": 98230 }, { "epoch": 2.92324401529466, "grad_norm": 0.04854222387075424, "learning_rate": 1.9904806410253608e-08, "loss": 0.0014, "step": 98240 }, { "epoch": 2.9235415767783017, "grad_norm": 0.22886958718299866, "learning_rate": 1.975078692391552e-08, "loss": 0.0016, "step": 98250 }, { "epoch": 2.9238391382619433, "grad_norm": 0.04136678948998451, "learning_rate": 1.9597364461256818e-08, "loss": 0.0007, "step": 98260 }, { "epoch": 2.924136699745585, "grad_norm": 0.2074524164199829, "learning_rate": 1.944453904066945e-08, "loss": 0.0023, "step": 98270 }, { "epoch": 2.9244342612292265, "grad_norm": 0.1932595670223236, "learning_rate": 1.9292310680474325e-08, "loss": 0.002, "step": 98280 }, { "epoch": 2.924731822712868, "grad_norm": 0.06760778278112411, "learning_rate": 1.9140679398919616e-08, "loss": 0.0022, "step": 98290 }, { "epoch": 2.9250293841965096, "grad_norm": 0.175139918923378, "learning_rate": 1.8989645214183007e-08, "loss": 0.002, "step": 98300 }, { "epoch": 2.925326945680151, "grad_norm": 0.11044890433549881, "learning_rate": 1.883920814437057e-08, "loss": 0.001, "step": 98310 }, { "epoch": 2.9256245071637927, "grad_norm": 0.11826159060001373, "learning_rate": 1.868936820751621e-08, "loss": 0.0011, "step": 98320 }, { "epoch": 2.9259220686474343, "grad_norm": 0.043036848306655884, "learning_rate": 1.8540125421581678e-08, "loss": 0.0023, "step": 98330 }, { "epoch": 2.926219630131076, "grad_norm": 0.22899478673934937, "learning_rate": 1.8391479804459324e-08, "loss": 0.0034, "step": 98340 }, { "epoch": 2.9265171916147175, "grad_norm": 0.33624395728111267, "learning_rate": 1.8243431373967113e-08, "loss": 0.0022, "step": 98350 }, { "epoch": 2.926814753098359, "grad_norm": 0.30938291549682617, "learning_rate": 1.8095980147854186e-08, "loss": 0.0024, "step": 98360 }, { "epoch": 2.9271123145820006, "grad_norm": 0.028781216591596603, "learning_rate": 1.7949126143795848e-08, "loss": 0.0013, "step": 98370 }, { "epoch": 2.927409876065642, "grad_norm": 0.16512691974639893, "learning_rate": 1.7802869379396902e-08, "loss": 0.001, "step": 98380 }, { "epoch": 2.9277074375492838, "grad_norm": 0.027881862595677376, "learning_rate": 1.765720987219055e-08, "loss": 0.0014, "step": 98390 }, { "epoch": 2.9280049990329253, "grad_norm": 0.07711463421583176, "learning_rate": 1.7512147639638378e-08, "loss": 0.0035, "step": 98400 }, { "epoch": 2.928302560516567, "grad_norm": 0.11199495941400528, "learning_rate": 1.736768269912925e-08, "loss": 0.0018, "step": 98410 }, { "epoch": 2.9286001220002085, "grad_norm": 0.029151758179068565, "learning_rate": 1.7223815067982097e-08, "loss": 0.002, "step": 98420 }, { "epoch": 2.92889768348385, "grad_norm": 0.5215436816215515, "learning_rate": 1.7080544763443672e-08, "loss": 0.0016, "step": 98430 }, { "epoch": 2.9291952449674916, "grad_norm": 0.15410098433494568, "learning_rate": 1.6937871802689132e-08, "loss": 0.0016, "step": 98440 }, { "epoch": 2.929492806451133, "grad_norm": 0.1531086415052414, "learning_rate": 1.6795796202820902e-08, "loss": 0.0014, "step": 98450 }, { "epoch": 2.9297903679347748, "grad_norm": 0.17206108570098877, "learning_rate": 1.6654317980871472e-08, "loss": 0.0016, "step": 98460 }, { "epoch": 2.9300879294184163, "grad_norm": 0.0662432461977005, "learning_rate": 1.6513437153800604e-08, "loss": 0.0028, "step": 98470 }, { "epoch": 2.930385490902058, "grad_norm": 0.13012579083442688, "learning_rate": 1.6373153738497573e-08, "loss": 0.0013, "step": 98480 }, { "epoch": 2.930683052385699, "grad_norm": 0.20708119869232178, "learning_rate": 1.6233467751778365e-08, "loss": 0.0034, "step": 98490 }, { "epoch": 2.9309806138693406, "grad_norm": 0.18885968625545502, "learning_rate": 1.6094379210389587e-08, "loss": 0.001, "step": 98500 }, { "epoch": 2.931278175352982, "grad_norm": 0.14975124597549438, "learning_rate": 1.595588813100346e-08, "loss": 0.0019, "step": 98510 }, { "epoch": 2.9315757368366238, "grad_norm": 0.1217365488409996, "learning_rate": 1.5817994530222814e-08, "loss": 0.0011, "step": 98520 }, { "epoch": 2.9318732983202653, "grad_norm": 0.22243435680866241, "learning_rate": 1.5680698424577756e-08, "loss": 0.0018, "step": 98530 }, { "epoch": 2.932170859803907, "grad_norm": 0.0643528625369072, "learning_rate": 1.55439998305279e-08, "loss": 0.0014, "step": 98540 }, { "epoch": 2.9324684212875485, "grad_norm": 0.23725996911525726, "learning_rate": 1.5407898764459585e-08, "loss": 0.0014, "step": 98550 }, { "epoch": 2.93276598277119, "grad_norm": 0.12767858803272247, "learning_rate": 1.5272395242688644e-08, "loss": 0.001, "step": 98560 }, { "epoch": 2.9330635442548316, "grad_norm": 0.08807382732629776, "learning_rate": 1.5137489281459306e-08, "loss": 0.0013, "step": 98570 }, { "epoch": 2.933361105738473, "grad_norm": 0.07857507467269897, "learning_rate": 1.5003180896943083e-08, "loss": 0.0014, "step": 98580 }, { "epoch": 2.9336586672221148, "grad_norm": 0.17196352779865265, "learning_rate": 1.4869470105241534e-08, "loss": 0.0016, "step": 98590 }, { "epoch": 2.9339562287057563, "grad_norm": 0.11699820309877396, "learning_rate": 1.473635692238351e-08, "loss": 0.0016, "step": 98600 }, { "epoch": 2.934253790189398, "grad_norm": 0.5466817617416382, "learning_rate": 1.4603841364325687e-08, "loss": 0.0018, "step": 98610 }, { "epoch": 2.9345513516730395, "grad_norm": 0.2484281361103058, "learning_rate": 1.447192344695425e-08, "loss": 0.0089, "step": 98620 }, { "epoch": 2.934848913156681, "grad_norm": 0.06328313797712326, "learning_rate": 1.4340603186083768e-08, "loss": 0.002, "step": 98630 }, { "epoch": 2.9351464746403226, "grad_norm": 0.2016623616218567, "learning_rate": 1.4209880597456093e-08, "loss": 0.0022, "step": 98640 }, { "epoch": 2.935444036123964, "grad_norm": 0.15165673196315765, "learning_rate": 1.4079755696742026e-08, "loss": 0.0014, "step": 98650 }, { "epoch": 2.935741597607606, "grad_norm": 0.07789548486471176, "learning_rate": 1.3950228499540752e-08, "loss": 0.0019, "step": 98660 }, { "epoch": 2.9360391590912474, "grad_norm": 0.0913916528224945, "learning_rate": 1.3821299021380408e-08, "loss": 0.001, "step": 98670 }, { "epoch": 2.936336720574889, "grad_norm": 0.06011403352022171, "learning_rate": 1.3692967277715852e-08, "loss": 0.0015, "step": 98680 }, { "epoch": 2.9366342820585305, "grad_norm": 0.113546222448349, "learning_rate": 1.3565233283931445e-08, "loss": 0.0012, "step": 98690 }, { "epoch": 2.9369318435421716, "grad_norm": 0.015302871353924274, "learning_rate": 1.3438097055340493e-08, "loss": 0.0016, "step": 98700 }, { "epoch": 2.937229405025813, "grad_norm": 0.13971586525440216, "learning_rate": 1.3311558607183028e-08, "loss": 0.0021, "step": 98710 }, { "epoch": 2.9375269665094548, "grad_norm": 0.22362998127937317, "learning_rate": 1.3185617954629137e-08, "loss": 0.004, "step": 98720 }, { "epoch": 2.9378245279930963, "grad_norm": 0.31403255462646484, "learning_rate": 1.3060275112775633e-08, "loss": 0.0033, "step": 98730 }, { "epoch": 2.938122089476738, "grad_norm": 0.10535971075296402, "learning_rate": 1.2935530096648274e-08, "loss": 0.002, "step": 98740 }, { "epoch": 2.9384196509603795, "grad_norm": 0.18137651681900024, "learning_rate": 1.2811382921201765e-08, "loss": 0.0018, "step": 98750 }, { "epoch": 2.938717212444021, "grad_norm": 0.07782983034849167, "learning_rate": 1.2687833601318644e-08, "loss": 0.0019, "step": 98760 }, { "epoch": 2.9390147739276626, "grad_norm": 0.25661200284957886, "learning_rate": 1.2564882151809843e-08, "loss": 0.0017, "step": 98770 }, { "epoch": 2.939312335411304, "grad_norm": 0.23217064142227173, "learning_rate": 1.2442528587414127e-08, "loss": 0.0012, "step": 98780 }, { "epoch": 2.939609896894946, "grad_norm": 0.10221350193023682, "learning_rate": 1.2320772922799207e-08, "loss": 0.0025, "step": 98790 }, { "epoch": 2.9399074583785874, "grad_norm": 0.04871182516217232, "learning_rate": 1.219961517256063e-08, "loss": 0.0017, "step": 98800 }, { "epoch": 2.940205019862229, "grad_norm": 0.14475572109222412, "learning_rate": 1.2079055351223445e-08, "loss": 0.0036, "step": 98810 }, { "epoch": 2.9405025813458705, "grad_norm": 0.17879977822303772, "learning_rate": 1.195909347323998e-08, "loss": 0.002, "step": 98820 }, { "epoch": 2.940800142829512, "grad_norm": 0.26540207862854004, "learning_rate": 1.1839729552990398e-08, "loss": 0.0018, "step": 98830 }, { "epoch": 2.9410977043131536, "grad_norm": 0.099706269800663, "learning_rate": 1.1720963604783808e-08, "loss": 0.0013, "step": 98840 }, { "epoch": 2.941395265796795, "grad_norm": 0.019234852865338326, "learning_rate": 1.1602795642858822e-08, "loss": 0.0017, "step": 98850 }, { "epoch": 2.941692827280437, "grad_norm": 0.3275686800479889, "learning_rate": 1.1485225681379664e-08, "loss": 0.002, "step": 98860 }, { "epoch": 2.9419903887640784, "grad_norm": 0.12647956609725952, "learning_rate": 1.1368253734441726e-08, "loss": 0.0015, "step": 98870 }, { "epoch": 2.94228795024772, "grad_norm": 0.14778561890125275, "learning_rate": 1.1251879816066569e-08, "loss": 0.0024, "step": 98880 }, { "epoch": 2.9425855117313615, "grad_norm": 0.16944807767868042, "learning_rate": 1.1136103940205256e-08, "loss": 0.002, "step": 98890 }, { "epoch": 2.942883073215003, "grad_norm": 0.055514201521873474, "learning_rate": 1.1020926120736686e-08, "loss": 0.0012, "step": 98900 }, { "epoch": 2.9431806346986447, "grad_norm": 0.03377271816134453, "learning_rate": 1.0906346371468147e-08, "loss": 0.0014, "step": 98910 }, { "epoch": 2.9434781961822862, "grad_norm": 0.06532648205757141, "learning_rate": 1.0792364706135872e-08, "loss": 0.0026, "step": 98920 }, { "epoch": 2.943775757665928, "grad_norm": 0.24280600249767303, "learning_rate": 1.0678981138402823e-08, "loss": 0.0018, "step": 98930 }, { "epoch": 2.9440733191495694, "grad_norm": 0.11821755766868591, "learning_rate": 1.0566195681861457e-08, "loss": 0.0022, "step": 98940 }, { "epoch": 2.944370880633211, "grad_norm": 0.2158825397491455, "learning_rate": 1.0454008350033184e-08, "loss": 0.0021, "step": 98950 }, { "epoch": 2.9446684421168525, "grad_norm": 0.06971796602010727, "learning_rate": 1.034241915636558e-08, "loss": 0.0009, "step": 98960 }, { "epoch": 2.944966003600494, "grad_norm": 0.12687760591506958, "learning_rate": 1.023142811423683e-08, "loss": 0.0013, "step": 98970 }, { "epoch": 2.9452635650841357, "grad_norm": 0.050277289003133774, "learning_rate": 1.0121035236951293e-08, "loss": 0.0009, "step": 98980 }, { "epoch": 2.9455611265677772, "grad_norm": 0.06354131549596786, "learning_rate": 1.0011240537743383e-08, "loss": 0.0017, "step": 98990 }, { "epoch": 2.945858688051419, "grad_norm": 0.12920822203159332, "learning_rate": 9.902044029774794e-09, "loss": 0.0014, "step": 99000 }, { "epoch": 2.9461562495350604, "grad_norm": 0.32943207025527954, "learning_rate": 9.793445726136163e-09, "loss": 0.0015, "step": 99010 }, { "epoch": 2.946453811018702, "grad_norm": 0.03888345882296562, "learning_rate": 9.68544563984597e-09, "loss": 0.0013, "step": 99020 }, { "epoch": 2.9467513725023435, "grad_norm": 0.030024329200387, "learning_rate": 9.578043783851076e-09, "loss": 0.0008, "step": 99030 }, { "epoch": 2.947048933985985, "grad_norm": 0.04894943907856941, "learning_rate": 9.471240171026741e-09, "loss": 0.0014, "step": 99040 }, { "epoch": 2.9473464954696267, "grad_norm": 0.107479028403759, "learning_rate": 9.3650348141755e-09, "loss": 0.0021, "step": 99050 }, { "epoch": 2.947644056953268, "grad_norm": 0.015472464263439178, "learning_rate": 9.259427726029946e-09, "loss": 0.0014, "step": 99060 }, { "epoch": 2.9479416184369094, "grad_norm": 0.17840047180652618, "learning_rate": 9.154418919250508e-09, "loss": 0.0011, "step": 99070 }, { "epoch": 2.948239179920551, "grad_norm": 0.17466013133525848, "learning_rate": 9.050008406424337e-09, "loss": 0.0013, "step": 99080 }, { "epoch": 2.9485367414041925, "grad_norm": 0.22101210057735443, "learning_rate": 8.946196200068647e-09, "loss": 0.0016, "step": 99090 }, { "epoch": 2.948834302887834, "grad_norm": 0.13187846541404724, "learning_rate": 8.842982312627924e-09, "loss": 0.001, "step": 99100 }, { "epoch": 2.9491318643714757, "grad_norm": 0.08820786327123642, "learning_rate": 8.740366756475604e-09, "loss": 0.0009, "step": 99110 }, { "epoch": 2.9494294258551172, "grad_norm": 0.13330228626728058, "learning_rate": 8.63834954391296e-09, "loss": 0.0018, "step": 99120 }, { "epoch": 2.949726987338759, "grad_norm": 0.08682655543088913, "learning_rate": 8.536930687169099e-09, "loss": 0.0019, "step": 99130 }, { "epoch": 2.9500245488224004, "grad_norm": 0.037597790360450745, "learning_rate": 8.43611019840318e-09, "loss": 0.001, "step": 99140 }, { "epoch": 2.950322110306042, "grad_norm": 0.0638313740491867, "learning_rate": 8.335888089699983e-09, "loss": 0.0031, "step": 99150 }, { "epoch": 2.9506196717896835, "grad_norm": 0.33696573972702026, "learning_rate": 8.236264373075453e-09, "loss": 0.0017, "step": 99160 }, { "epoch": 2.950917233273325, "grad_norm": 0.09288745373487473, "learning_rate": 8.137239060470592e-09, "loss": 0.0011, "step": 99170 }, { "epoch": 2.9512147947569667, "grad_norm": 0.055945273488759995, "learning_rate": 8.03881216375757e-09, "loss": 0.0016, "step": 99180 }, { "epoch": 2.9515123562406083, "grad_norm": 0.08095069974660873, "learning_rate": 7.940983694735837e-09, "loss": 0.0014, "step": 99190 }, { "epoch": 2.95180991772425, "grad_norm": 0.02760450355708599, "learning_rate": 7.843753665131571e-09, "loss": 0.0017, "step": 99200 }, { "epoch": 2.9521074792078914, "grad_norm": 0.10811564326286316, "learning_rate": 7.74712208660211e-09, "loss": 0.0022, "step": 99210 }, { "epoch": 2.952405040691533, "grad_norm": 0.0633944645524025, "learning_rate": 7.651088970730413e-09, "loss": 0.0019, "step": 99220 }, { "epoch": 2.9527026021751746, "grad_norm": 0.03450648859143257, "learning_rate": 7.555654329028938e-09, "loss": 0.0047, "step": 99230 }, { "epoch": 2.953000163658816, "grad_norm": 0.07696820795536041, "learning_rate": 7.460818172938533e-09, "loss": 0.0014, "step": 99240 }, { "epoch": 2.9532977251424577, "grad_norm": 0.03668638691306114, "learning_rate": 7.366580513827881e-09, "loss": 0.0013, "step": 99250 }, { "epoch": 2.9535952866260993, "grad_norm": 0.046520184725522995, "learning_rate": 7.272941362994057e-09, "loss": 0.0006, "step": 99260 }, { "epoch": 2.9538928481097404, "grad_norm": 0.31224575638771057, "learning_rate": 7.179900731661971e-09, "loss": 0.0016, "step": 99270 }, { "epoch": 2.954190409593382, "grad_norm": 0.08934484422206879, "learning_rate": 7.0874586309854775e-09, "loss": 0.0013, "step": 99280 }, { "epoch": 2.9544879710770235, "grad_norm": 0.18490110337734222, "learning_rate": 6.995615072046824e-09, "loss": 0.0017, "step": 99290 }, { "epoch": 2.954785532560665, "grad_norm": 0.1746569573879242, "learning_rate": 6.904370065855537e-09, "loss": 0.001, "step": 99300 }, { "epoch": 2.9550830940443067, "grad_norm": 0.09827090054750443, "learning_rate": 6.813723623349533e-09, "loss": 0.0017, "step": 99310 }, { "epoch": 2.9553806555279483, "grad_norm": 0.20778492093086243, "learning_rate": 6.7236757553962306e-09, "loss": 0.0014, "step": 99320 }, { "epoch": 2.95567821701159, "grad_norm": 0.03287726640701294, "learning_rate": 6.634226472789773e-09, "loss": 0.0009, "step": 99330 }, { "epoch": 2.9559757784952314, "grad_norm": 0.11648154258728027, "learning_rate": 6.545375786253805e-09, "loss": 0.0007, "step": 99340 }, { "epoch": 2.956273339978873, "grad_norm": 0.4303368926048279, "learning_rate": 6.4571237064386946e-09, "loss": 0.002, "step": 99350 }, { "epoch": 2.9565709014625146, "grad_norm": 0.09860081970691681, "learning_rate": 6.369470243924869e-09, "loss": 0.0011, "step": 99360 }, { "epoch": 2.956868462946156, "grad_norm": 0.06474345922470093, "learning_rate": 6.282415409219478e-09, "loss": 0.0015, "step": 99370 }, { "epoch": 2.9571660244297977, "grad_norm": 0.05372850224375725, "learning_rate": 6.195959212758617e-09, "loss": 0.0014, "step": 99380 }, { "epoch": 2.9574635859134393, "grad_norm": 0.04296031594276428, "learning_rate": 6.110101664906776e-09, "loss": 0.0015, "step": 99390 }, { "epoch": 2.957761147397081, "grad_norm": 0.028889738023281097, "learning_rate": 6.024842775956275e-09, "loss": 0.0019, "step": 99400 }, { "epoch": 2.9580587088807224, "grad_norm": 0.10655377805233002, "learning_rate": 5.940182556127272e-09, "loss": 0.0019, "step": 99410 }, { "epoch": 2.958356270364364, "grad_norm": 0.009068929590284824, "learning_rate": 5.856121015569427e-09, "loss": 0.0013, "step": 99420 }, { "epoch": 2.9586538318480056, "grad_norm": 0.14649568498134613, "learning_rate": 5.772658164359679e-09, "loss": 0.0018, "step": 99430 }, { "epoch": 2.958951393331647, "grad_norm": 0.4043935537338257, "learning_rate": 5.689794012503358e-09, "loss": 0.0029, "step": 99440 }, { "epoch": 2.9592489548152887, "grad_norm": 0.12997300922870636, "learning_rate": 5.607528569934184e-09, "loss": 0.0027, "step": 99450 }, { "epoch": 2.9595465162989303, "grad_norm": 0.11779427528381348, "learning_rate": 5.525861846513714e-09, "loss": 0.0013, "step": 99460 }, { "epoch": 2.959844077782572, "grad_norm": 0.1450837254524231, "learning_rate": 5.444793852031894e-09, "loss": 0.0017, "step": 99470 }, { "epoch": 2.9601416392662134, "grad_norm": 0.11961575597524643, "learning_rate": 5.364324596207615e-09, "loss": 0.0016, "step": 99480 }, { "epoch": 2.960439200749855, "grad_norm": 3.8980629444122314, "learning_rate": 5.284454088687052e-09, "loss": 0.0103, "step": 99490 }, { "epoch": 2.9607367622334966, "grad_norm": 0.1482389122247696, "learning_rate": 5.205182339044767e-09, "loss": 0.0043, "step": 99500 }, { "epoch": 2.961034323717138, "grad_norm": 0.22226296365261078, "learning_rate": 5.126509356784271e-09, "loss": 0.0014, "step": 99510 }, { "epoch": 2.9613318852007797, "grad_norm": 0.13450782001018524, "learning_rate": 5.0484351513357955e-09, "loss": 0.0015, "step": 99520 }, { "epoch": 2.9616294466844213, "grad_norm": 0.14471183717250824, "learning_rate": 4.970959732060188e-09, "loss": 0.0023, "step": 99530 }, { "epoch": 2.961927008168063, "grad_norm": 0.4637986421585083, "learning_rate": 4.894083108243908e-09, "loss": 0.0025, "step": 99540 }, { "epoch": 2.9622245696517044, "grad_norm": 0.15234613418579102, "learning_rate": 4.817805289102362e-09, "loss": 0.0025, "step": 99550 }, { "epoch": 2.962522131135346, "grad_norm": 0.0770464688539505, "learning_rate": 4.742126283781012e-09, "loss": 0.0016, "step": 99560 }, { "epoch": 2.9628196926189876, "grad_norm": 0.30984944105148315, "learning_rate": 4.667046101351491e-09, "loss": 0.0031, "step": 99570 }, { "epoch": 2.963117254102629, "grad_norm": 0.07062900066375732, "learning_rate": 4.592564750814377e-09, "loss": 0.0014, "step": 99580 }, { "epoch": 2.9634148155862707, "grad_norm": 0.08481699973344803, "learning_rate": 4.518682241098083e-09, "loss": 0.0014, "step": 99590 }, { "epoch": 2.9637123770699123, "grad_norm": 0.08022064715623856, "learning_rate": 4.445398581059413e-09, "loss": 0.0015, "step": 99600 }, { "epoch": 2.964009938553554, "grad_norm": 0.09771647304296494, "learning_rate": 4.372713779484117e-09, "loss": 0.0017, "step": 99610 }, { "epoch": 2.9643075000371955, "grad_norm": 0.20842471718788147, "learning_rate": 4.300627845085226e-09, "loss": 0.0039, "step": 99620 }, { "epoch": 2.9646050615208366, "grad_norm": 0.06884737312793732, "learning_rate": 4.229140786503605e-09, "loss": 0.0021, "step": 99630 }, { "epoch": 2.964902623004478, "grad_norm": 0.03812003135681152, "learning_rate": 4.1582526123101765e-09, "loss": 0.0021, "step": 99640 }, { "epoch": 2.9652001844881197, "grad_norm": 0.3951622247695923, "learning_rate": 4.087963331002032e-09, "loss": 0.0022, "step": 99650 }, { "epoch": 2.9654977459717613, "grad_norm": 0.15656234323978424, "learning_rate": 4.018272951005764e-09, "loss": 0.002, "step": 99660 }, { "epoch": 2.965795307455403, "grad_norm": 0.14484860002994537, "learning_rate": 3.949181480675801e-09, "loss": 0.0019, "step": 99670 }, { "epoch": 2.9660928689390444, "grad_norm": 0.3223671317100525, "learning_rate": 3.880688928293852e-09, "loss": 0.0024, "step": 99680 }, { "epoch": 2.966390430422686, "grad_norm": 0.02635754458606243, "learning_rate": 3.812795302071681e-09, "loss": 0.0011, "step": 99690 }, { "epoch": 2.9666879919063276, "grad_norm": 0.17583703994750977, "learning_rate": 3.745500610147779e-09, "loss": 0.0017, "step": 99700 }, { "epoch": 2.966985553389969, "grad_norm": 0.19182221591472626, "learning_rate": 3.678804860590135e-09, "loss": 0.002, "step": 99710 }, { "epoch": 2.9672831148736107, "grad_norm": 0.004495508968830109, "learning_rate": 3.612708061392356e-09, "loss": 0.0023, "step": 99720 }, { "epoch": 2.9675806763572523, "grad_norm": 0.1857752799987793, "learning_rate": 3.547210220479769e-09, "loss": 0.0023, "step": 99730 }, { "epoch": 2.967878237840894, "grad_norm": 0.05352848768234253, "learning_rate": 3.482311345703315e-09, "loss": 0.0017, "step": 99740 }, { "epoch": 2.9681757993245355, "grad_norm": 0.24504142999649048, "learning_rate": 3.4180114448428837e-09, "loss": 0.0014, "step": 99750 }, { "epoch": 2.968473360808177, "grad_norm": 0.045339182019233704, "learning_rate": 3.3543105256073073e-09, "loss": 0.0012, "step": 99760 }, { "epoch": 2.9687709222918186, "grad_norm": 0.29515373706817627, "learning_rate": 3.291208595632145e-09, "loss": 0.002, "step": 99770 }, { "epoch": 2.96906848377546, "grad_norm": 0.05005528777837753, "learning_rate": 3.2287056624824565e-09, "loss": 0.0016, "step": 99780 }, { "epoch": 2.9693660452591017, "grad_norm": 0.05452429875731468, "learning_rate": 3.166801733650582e-09, "loss": 0.0034, "step": 99790 }, { "epoch": 2.9696636067427433, "grad_norm": 0.12369000911712646, "learning_rate": 3.105496816557807e-09, "loss": 0.0029, "step": 99800 }, { "epoch": 2.969961168226385, "grad_norm": 0.03528565913438797, "learning_rate": 3.044790918553253e-09, "loss": 0.002, "step": 99810 }, { "epoch": 2.9702587297100265, "grad_norm": 0.08041874319314957, "learning_rate": 2.984684046913877e-09, "loss": 0.0021, "step": 99820 }, { "epoch": 2.970556291193668, "grad_norm": 0.11186698824167252, "learning_rate": 2.9251762088455817e-09, "loss": 0.0051, "step": 99830 }, { "epoch": 2.9708538526773096, "grad_norm": 0.07472174614667892, "learning_rate": 2.8662674114821043e-09, "loss": 0.001, "step": 99840 }, { "epoch": 2.9711514141609507, "grad_norm": 0.19589850306510925, "learning_rate": 2.807957661885019e-09, "loss": 0.0048, "step": 99850 }, { "epoch": 2.9714489756445923, "grad_norm": 0.0438813716173172, "learning_rate": 2.750246967044845e-09, "loss": 0.002, "step": 99860 }, { "epoch": 2.971746537128234, "grad_norm": 0.230778768658638, "learning_rate": 2.693135333878827e-09, "loss": 0.0017, "step": 99870 }, { "epoch": 2.9720440986118755, "grad_norm": 0.15083064138889313, "learning_rate": 2.63662276923482e-09, "loss": 0.0012, "step": 99880 }, { "epoch": 2.972341660095517, "grad_norm": 0.04051138460636139, "learning_rate": 2.5807092798862953e-09, "loss": 0.0031, "step": 99890 }, { "epoch": 2.9726392215791586, "grad_norm": 0.15253905951976776, "learning_rate": 2.5253948725367794e-09, "loss": 0.002, "step": 99900 }, { "epoch": 2.9729367830628, "grad_norm": 0.09901861101388931, "learning_rate": 2.470679553816524e-09, "loss": 0.003, "step": 99910 }, { "epoch": 2.9732343445464418, "grad_norm": 0.09098390489816666, "learning_rate": 2.416563330285282e-09, "loss": 0.0012, "step": 99920 }, { "epoch": 2.9735319060300833, "grad_norm": 0.08671852201223373, "learning_rate": 2.3630462084306414e-09, "loss": 0.0008, "step": 99930 }, { "epoch": 2.973829467513725, "grad_norm": 0.09084725379943848, "learning_rate": 2.3101281946680265e-09, "loss": 0.0019, "step": 99940 }, { "epoch": 2.9741270289973665, "grad_norm": 0.16026386618614197, "learning_rate": 2.257809295340696e-09, "loss": 0.0011, "step": 99950 }, { "epoch": 2.974424590481008, "grad_norm": 0.2240859568119049, "learning_rate": 2.2060895167203e-09, "loss": 0.0024, "step": 99960 }, { "epoch": 2.9747221519646496, "grad_norm": 0.062324054539203644, "learning_rate": 2.154968865007989e-09, "loss": 0.002, "step": 99970 }, { "epoch": 2.975019713448291, "grad_norm": 0.08334305137395859, "learning_rate": 2.1044473463310843e-09, "loss": 0.0019, "step": 99980 }, { "epoch": 2.9753172749319328, "grad_norm": 0.2364354282617569, "learning_rate": 2.0545249667469626e-09, "loss": 0.0014, "step": 99990 }, { "epoch": 2.9756148364155743, "grad_norm": 0.05835684388875961, "learning_rate": 2.00520173223917e-09, "loss": 0.0011, "step": 100000 }, { "epoch": 2.9756148364155743, "eval_loss": 0.0024060173891484737, "eval_runtime": 3.9248, "eval_samples_per_second": 50.958, "eval_steps_per_second": 12.739, "step": 100000 }, { "epoch": 2.975912397899216, "grad_norm": 0.06908328086137772, "learning_rate": 1.956477648721311e-09, "loss": 0.0026, "step": 100010 }, { "epoch": 2.9762099593828575, "grad_norm": 0.19701267778873444, "learning_rate": 1.908352722033713e-09, "loss": 0.0014, "step": 100020 }, { "epoch": 2.976507520866499, "grad_norm": 0.22373199462890625, "learning_rate": 1.8608269579456496e-09, "loss": 0.0021, "step": 100030 }, { "epoch": 2.9768050823501406, "grad_norm": 0.33650654554367065, "learning_rate": 1.8139003621547857e-09, "loss": 0.0012, "step": 100040 }, { "epoch": 2.977102643833782, "grad_norm": 0.10684695094823837, "learning_rate": 1.7675729402866216e-09, "loss": 0.0012, "step": 100050 }, { "epoch": 2.9774002053174238, "grad_norm": 0.05942963436245918, "learning_rate": 1.7218446978939373e-09, "loss": 0.0013, "step": 100060 }, { "epoch": 2.9776977668010653, "grad_norm": 0.0639771819114685, "learning_rate": 1.6767156404595698e-09, "loss": 0.0019, "step": 100070 }, { "epoch": 2.977995328284707, "grad_norm": 0.0989387035369873, "learning_rate": 1.63218577339308e-09, "loss": 0.0025, "step": 100080 }, { "epoch": 2.9782928897683485, "grad_norm": 0.1317049264907837, "learning_rate": 1.5882551020329762e-09, "loss": 0.0022, "step": 100090 }, { "epoch": 2.97859045125199, "grad_norm": 0.15160050988197327, "learning_rate": 1.5449236316450456e-09, "loss": 0.0017, "step": 100100 }, { "epoch": 2.9788880127356316, "grad_norm": 0.0788276195526123, "learning_rate": 1.502191367424022e-09, "loss": 0.001, "step": 100110 }, { "epoch": 2.979185574219273, "grad_norm": 0.09414684772491455, "learning_rate": 1.4600583144930291e-09, "loss": 0.0017, "step": 100120 }, { "epoch": 2.979483135702915, "grad_norm": 0.2738887071609497, "learning_rate": 1.4185244779019168e-09, "loss": 0.0025, "step": 100130 }, { "epoch": 2.9797806971865564, "grad_norm": 0.3390924036502838, "learning_rate": 1.3775898626305907e-09, "loss": 0.0049, "step": 100140 }, { "epoch": 2.980078258670198, "grad_norm": 0.1719864457845688, "learning_rate": 1.3372544735856807e-09, "loss": 0.0029, "step": 100150 }, { "epoch": 2.9803758201538395, "grad_norm": 0.16229727864265442, "learning_rate": 1.2975183156027637e-09, "loss": 0.0016, "step": 100160 }, { "epoch": 2.980673381637481, "grad_norm": 0.25142931938171387, "learning_rate": 1.2583813934452517e-09, "loss": 0.0009, "step": 100170 }, { "epoch": 2.9809709431211227, "grad_norm": 0.10114864259958267, "learning_rate": 1.2198437118049467e-09, "loss": 0.0017, "step": 100180 }, { "epoch": 2.9812685046047642, "grad_norm": 0.40707284212112427, "learning_rate": 1.1819052753014871e-09, "loss": 0.0044, "step": 100190 }, { "epoch": 2.9815660660884054, "grad_norm": 0.12076224386692047, "learning_rate": 1.1445660884834564e-09, "loss": 0.0015, "step": 100200 }, { "epoch": 2.981863627572047, "grad_norm": 0.06668150424957275, "learning_rate": 1.1078261558261638e-09, "loss": 0.0014, "step": 100210 }, { "epoch": 2.9821611890556885, "grad_norm": 0.15145918726921082, "learning_rate": 1.071685481734419e-09, "loss": 0.0015, "step": 100220 }, { "epoch": 2.98245875053933, "grad_norm": 0.1184655949473381, "learning_rate": 1.0361440705403126e-09, "loss": 0.0015, "step": 100230 }, { "epoch": 2.9827563120229716, "grad_norm": 0.06573411822319031, "learning_rate": 1.00120192650488e-09, "loss": 0.0013, "step": 100240 }, { "epoch": 2.983053873506613, "grad_norm": 0.1264464259147644, "learning_rate": 9.668590538169931e-10, "loss": 0.0013, "step": 100250 }, { "epoch": 2.983351434990255, "grad_norm": 0.05205975100398064, "learning_rate": 9.331154565933586e-10, "loss": 0.0021, "step": 100260 }, { "epoch": 2.9836489964738964, "grad_norm": 0.20050235092639923, "learning_rate": 8.999711388790744e-10, "loss": 0.0015, "step": 100270 }, { "epoch": 2.983946557957538, "grad_norm": 0.04731978476047516, "learning_rate": 8.674261046470734e-10, "loss": 0.002, "step": 100280 }, { "epoch": 2.9842441194411795, "grad_norm": 0.04146779701113701, "learning_rate": 8.354803577997895e-10, "loss": 0.0016, "step": 100290 }, { "epoch": 2.984541680924821, "grad_norm": 0.14333608746528625, "learning_rate": 8.041339021663819e-10, "loss": 0.0009, "step": 100300 }, { "epoch": 2.9848392424084627, "grad_norm": 0.11625108867883682, "learning_rate": 7.733867415044005e-10, "loss": 0.0017, "step": 100310 }, { "epoch": 2.9851368038921042, "grad_norm": 0.24088510870933533, "learning_rate": 7.432388794997858e-10, "loss": 0.0023, "step": 100320 }, { "epoch": 2.985434365375746, "grad_norm": 0.059265270829200745, "learning_rate": 7.136903197663136e-10, "loss": 0.0024, "step": 100330 }, { "epoch": 2.9857319268593874, "grad_norm": 0.1748286336660385, "learning_rate": 6.847410658472609e-10, "loss": 0.0023, "step": 100340 }, { "epoch": 2.986029488343029, "grad_norm": 0.09689918905496597, "learning_rate": 6.563911212120743e-10, "loss": 0.0013, "step": 100350 }, { "epoch": 2.9863270498266705, "grad_norm": 0.10409583151340485, "learning_rate": 6.286404892591469e-10, "loss": 0.0018, "step": 100360 }, { "epoch": 2.986624611310312, "grad_norm": 0.17739613354206085, "learning_rate": 6.014891733163719e-10, "loss": 0.0016, "step": 100370 }, { "epoch": 2.9869221727939537, "grad_norm": 0.1083010733127594, "learning_rate": 5.74937176637258e-10, "loss": 0.0016, "step": 100380 }, { "epoch": 2.9872197342775952, "grad_norm": 0.04649268463253975, "learning_rate": 5.489845024053698e-10, "loss": 0.0017, "step": 100390 }, { "epoch": 2.987517295761237, "grad_norm": 0.21801742911338806, "learning_rate": 5.23631153732107e-10, "loss": 0.0022, "step": 100400 }, { "epoch": 2.9878148572448784, "grad_norm": 0.14845862984657288, "learning_rate": 4.988771336567055e-10, "loss": 0.0013, "step": 100410 }, { "epoch": 2.9881124187285195, "grad_norm": 0.12807448208332062, "learning_rate": 4.747224451462363e-10, "loss": 0.0029, "step": 100420 }, { "epoch": 2.988409980212161, "grad_norm": 0.17327085137367249, "learning_rate": 4.511670910967159e-10, "loss": 0.0013, "step": 100430 }, { "epoch": 2.9887075416958027, "grad_norm": 0.20214004814624786, "learning_rate": 4.282110743314416e-10, "loss": 0.0008, "step": 100440 }, { "epoch": 2.9890051031794442, "grad_norm": 0.08559747785329819, "learning_rate": 4.0585439760321146e-10, "loss": 0.0025, "step": 100450 }, { "epoch": 2.989302664663086, "grad_norm": 0.08872087299823761, "learning_rate": 3.840970635909935e-10, "loss": 0.002, "step": 100460 }, { "epoch": 2.9896002261467274, "grad_norm": 0.13537606596946716, "learning_rate": 3.62939074903812e-10, "loss": 0.0017, "step": 100470 }, { "epoch": 2.989897787630369, "grad_norm": 0.013887898996472359, "learning_rate": 3.423804340779713e-10, "loss": 0.0008, "step": 100480 }, { "epoch": 2.9901953491140105, "grad_norm": 0.04831043630838394, "learning_rate": 3.224211435781666e-10, "loss": 0.0009, "step": 100490 }, { "epoch": 2.990492910597652, "grad_norm": 0.20497876405715942, "learning_rate": 3.0306120579637333e-10, "loss": 0.0014, "step": 100500 }, { "epoch": 2.9907904720812937, "grad_norm": 0.15626516938209534, "learning_rate": 2.843006230535128e-10, "loss": 0.0019, "step": 100510 }, { "epoch": 2.9910880335649352, "grad_norm": 0.035183146595954895, "learning_rate": 2.6613939759945195e-10, "loss": 0.0038, "step": 100520 }, { "epoch": 2.991385595048577, "grad_norm": 0.0359540656208992, "learning_rate": 2.48577531610783e-10, "loss": 0.0017, "step": 100530 }, { "epoch": 2.9916831565322184, "grad_norm": 0.08988898992538452, "learning_rate": 2.3161502719248886e-10, "loss": 0.0015, "step": 100540 }, { "epoch": 2.99198071801586, "grad_norm": 0.12391964346170425, "learning_rate": 2.1525188637849804e-10, "loss": 0.001, "step": 100550 }, { "epoch": 2.9922782794995015, "grad_norm": 0.025525571778416634, "learning_rate": 1.9948811113001953e-10, "loss": 0.0009, "step": 100560 }, { "epoch": 2.992575840983143, "grad_norm": 0.026190025731921196, "learning_rate": 1.8432370333720805e-10, "loss": 0.0017, "step": 100570 }, { "epoch": 2.9928734024667847, "grad_norm": 0.16149313747882843, "learning_rate": 1.697586648174987e-10, "loss": 0.0012, "step": 100580 }, { "epoch": 2.9931709639504263, "grad_norm": 0.049432504922151566, "learning_rate": 1.5579299731671715e-10, "loss": 0.0011, "step": 100590 }, { "epoch": 2.993468525434068, "grad_norm": 0.17412994801998138, "learning_rate": 1.4242670251018997e-10, "loss": 0.0024, "step": 100600 }, { "epoch": 2.9937660869177094, "grad_norm": 0.07911474257707596, "learning_rate": 1.2965978199941388e-10, "loss": 0.0025, "step": 100610 }, { "epoch": 2.994063648401351, "grad_norm": 0.09256621450185776, "learning_rate": 1.174922373148313e-10, "loss": 0.0028, "step": 100620 }, { "epoch": 2.9943612098849925, "grad_norm": 0.038505781441926956, "learning_rate": 1.0592406991472015e-10, "loss": 0.0012, "step": 100630 }, { "epoch": 2.994658771368634, "grad_norm": 0.1429326981306076, "learning_rate": 9.495528118685926e-11, "loss": 0.002, "step": 100640 }, { "epoch": 2.9949563328522757, "grad_norm": 0.09649589657783508, "learning_rate": 8.458587244575268e-11, "loss": 0.002, "step": 100650 }, { "epoch": 2.9952538943359173, "grad_norm": 0.1050792708992958, "learning_rate": 7.481584493373994e-11, "loss": 0.0016, "step": 100660 }, { "epoch": 2.995551455819559, "grad_norm": 0.24642041325569153, "learning_rate": 6.56451998232166e-11, "loss": 0.0013, "step": 100670 }, { "epoch": 2.9958490173032004, "grad_norm": 0.10798586159944534, "learning_rate": 5.7073938212748315e-11, "loss": 0.0017, "step": 100680 }, { "epoch": 2.996146578786842, "grad_norm": 0.09033340215682983, "learning_rate": 4.9102061130401616e-11, "loss": 0.001, "step": 100690 }, { "epoch": 2.9964441402704836, "grad_norm": 0.10000459849834442, "learning_rate": 4.172956953096829e-11, "loss": 0.0012, "step": 100700 }, { "epoch": 2.996741701754125, "grad_norm": 0.005388373509049416, "learning_rate": 3.4956464298741e-11, "loss": 0.0025, "step": 100710 }, { "epoch": 2.9970392632377667, "grad_norm": 0.2167316973209381, "learning_rate": 2.8782746246402983e-11, "loss": 0.0018, "step": 100720 }, { "epoch": 2.9973368247214083, "grad_norm": 0.07696081697940826, "learning_rate": 2.3208416112807664e-11, "loss": 0.0012, "step": 100730 }, { "epoch": 2.99763438620505, "grad_norm": 0.1424436867237091, "learning_rate": 1.8233474566864418e-11, "loss": 0.0013, "step": 100740 }, { "epoch": 2.9979319476886914, "grad_norm": 0.051672857254743576, "learning_rate": 1.3857922204763008e-11, "loss": 0.0024, "step": 100750 }, { "epoch": 2.998229509172333, "grad_norm": 0.351142555475235, "learning_rate": 1.0081759551083814e-11, "loss": 0.0026, "step": 100760 }, { "epoch": 2.998527070655974, "grad_norm": 0.1769547164440155, "learning_rate": 6.90498705879783e-12, "loss": 0.0015, "step": 100770 }, { "epoch": 2.9988246321396157, "grad_norm": 0.22977161407470703, "learning_rate": 4.327605108156441e-12, "loss": 0.0017, "step": 100780 }, { "epoch": 2.9991221936232573, "grad_norm": 0.11162953078746796, "learning_rate": 2.3496140089118715e-12, "loss": 0.002, "step": 100790 }, { "epoch": 2.999419755106899, "grad_norm": 0.31060510873794556, "learning_rate": 9.71013996986514e-13, "loss": 0.0019, "step": 100800 }, { "epoch": 2.9997173165905404, "grad_norm": 0.09186103940010071, "learning_rate": 1.918052389138225e-13, "loss": 0.0024, "step": 100810 }, { "epoch": 2.9999553657774536, "step": 100818, "total_flos": 3.9696050120357315e+18, "train_loss": 0.008893722523577549, "train_runtime": 76794.855, "train_samples_per_second": 10.503, "train_steps_per_second": 1.313 } ], "logging_steps": 10, "max_steps": 100818, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.9696050120357315e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }