{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8139, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012286521685710776, "grad_norm": 5.513866090258891, "learning_rate": 3.685503685503685e-08, "loss": 0.9282, "step": 1 }, { "epoch": 0.0002457304337142155, "grad_norm": 4.9554528632421695, "learning_rate": 7.37100737100737e-08, "loss": 0.9808, "step": 2 }, { "epoch": 0.00036859565057132326, "grad_norm": 4.462062461574065, "learning_rate": 1.1056511056511058e-07, "loss": 0.9555, "step": 3 }, { "epoch": 0.000491460867428431, "grad_norm": 4.656553225564516, "learning_rate": 1.474201474201474e-07, "loss": 0.923, "step": 4 }, { "epoch": 0.0006143260842855387, "grad_norm": 4.3910806878746405, "learning_rate": 1.8427518427518426e-07, "loss": 0.9445, "step": 5 }, { "epoch": 0.0007371913011426465, "grad_norm": 5.394970800217896, "learning_rate": 2.2113022113022115e-07, "loss": 1.0469, "step": 6 }, { "epoch": 0.0008600565179997543, "grad_norm": 5.357960943161164, "learning_rate": 2.57985257985258e-07, "loss": 0.9159, "step": 7 }, { "epoch": 0.000982921734856862, "grad_norm": 5.023035223958168, "learning_rate": 2.948402948402948e-07, "loss": 0.9649, "step": 8 }, { "epoch": 0.0011057869517139699, "grad_norm": 5.6997600638412225, "learning_rate": 3.3169533169533167e-07, "loss": 0.9858, "step": 9 }, { "epoch": 0.0012286521685710775, "grad_norm": 4.822320421005481, "learning_rate": 3.685503685503685e-07, "loss": 0.9486, "step": 10 }, { "epoch": 0.0013515173854281852, "grad_norm": 6.200074438855475, "learning_rate": 4.0540540540540546e-07, "loss": 1.083, "step": 11 }, { "epoch": 0.001474382602285293, "grad_norm": 5.925483423081176, "learning_rate": 4.422604422604423e-07, "loss": 0.9326, "step": 12 }, { "epoch": 0.0015972478191424008, "grad_norm": 5.36625544305861, "learning_rate": 4.791154791154791e-07, "loss": 1.0109, "step": 13 }, { "epoch": 0.0017201130359995086, "grad_norm": 4.321108070196379, "learning_rate": 5.15970515970516e-07, "loss": 0.8756, "step": 14 }, { "epoch": 0.0018429782528566164, "grad_norm": 4.939830306048706, "learning_rate": 5.528255528255528e-07, "loss": 1.0655, "step": 15 }, { "epoch": 0.001965843469713724, "grad_norm": 4.799782704105224, "learning_rate": 5.896805896805896e-07, "loss": 1.093, "step": 16 }, { "epoch": 0.0020887086865708318, "grad_norm": 4.308163412038154, "learning_rate": 6.265356265356265e-07, "loss": 0.9188, "step": 17 }, { "epoch": 0.0022115739034279398, "grad_norm": 5.033423055391164, "learning_rate": 6.633906633906633e-07, "loss": 0.9692, "step": 18 }, { "epoch": 0.0023344391202850473, "grad_norm": 4.393822453744325, "learning_rate": 7.002457002457002e-07, "loss": 1.0138, "step": 19 }, { "epoch": 0.002457304337142155, "grad_norm": 4.2097510698903, "learning_rate": 7.37100737100737e-07, "loss": 0.9279, "step": 20 }, { "epoch": 0.002580169553999263, "grad_norm": 3.9117326521215965, "learning_rate": 7.73955773955774e-07, "loss": 1.0043, "step": 21 }, { "epoch": 0.0027030347708563705, "grad_norm": 4.3969351698596295, "learning_rate": 8.108108108108109e-07, "loss": 0.9832, "step": 22 }, { "epoch": 0.0028258999877134785, "grad_norm": 4.003253123142959, "learning_rate": 8.476658476658478e-07, "loss": 0.9429, "step": 23 }, { "epoch": 0.002948765204570586, "grad_norm": 3.4494151704322524, "learning_rate": 8.845208845208846e-07, "loss": 0.9876, "step": 24 }, { "epoch": 0.0030716304214276936, "grad_norm": 3.6501311378231653, "learning_rate": 9.213759213759215e-07, "loss": 0.867, "step": 25 }, { "epoch": 0.0031944956382848016, "grad_norm": 3.3634474475404894, "learning_rate": 9.582309582309582e-07, "loss": 0.8617, "step": 26 }, { "epoch": 0.003317360855141909, "grad_norm": 4.117736000713901, "learning_rate": 9.95085995085995e-07, "loss": 1.0234, "step": 27 }, { "epoch": 0.003440226071999017, "grad_norm": 3.5647953413926996, "learning_rate": 1.031941031941032e-06, "loss": 0.9908, "step": 28 }, { "epoch": 0.003563091288856125, "grad_norm": 3.866749586946385, "learning_rate": 1.0687960687960688e-06, "loss": 0.9723, "step": 29 }, { "epoch": 0.003685956505713233, "grad_norm": 4.06338403512261, "learning_rate": 1.1056511056511056e-06, "loss": 0.9832, "step": 30 }, { "epoch": 0.0038088217225703404, "grad_norm": 4.605165542662463, "learning_rate": 1.1425061425061425e-06, "loss": 0.9528, "step": 31 }, { "epoch": 0.003931686939427448, "grad_norm": 3.4567611655777437, "learning_rate": 1.1793611793611793e-06, "loss": 0.8588, "step": 32 }, { "epoch": 0.004054552156284556, "grad_norm": 3.4951438002510287, "learning_rate": 1.2162162162162162e-06, "loss": 0.9919, "step": 33 }, { "epoch": 0.0041774173731416635, "grad_norm": 3.035179925207047, "learning_rate": 1.253071253071253e-06, "loss": 0.9158, "step": 34 }, { "epoch": 0.004300282589998771, "grad_norm": 3.071876797860922, "learning_rate": 1.28992628992629e-06, "loss": 0.9149, "step": 35 }, { "epoch": 0.0044231478068558795, "grad_norm": 4.111005566745022, "learning_rate": 1.3267813267813267e-06, "loss": 0.95, "step": 36 }, { "epoch": 0.004546013023712987, "grad_norm": 3.8168902975192087, "learning_rate": 1.3636363636363636e-06, "loss": 0.871, "step": 37 }, { "epoch": 0.004668878240570095, "grad_norm": 3.8555521925282537, "learning_rate": 1.4004914004914004e-06, "loss": 0.9557, "step": 38 }, { "epoch": 0.004791743457427202, "grad_norm": 2.939536985558811, "learning_rate": 1.4373464373464373e-06, "loss": 0.8727, "step": 39 }, { "epoch": 0.00491460867428431, "grad_norm": 3.034058598134351, "learning_rate": 1.474201474201474e-06, "loss": 0.9908, "step": 40 }, { "epoch": 0.005037473891141418, "grad_norm": 2.845652611291133, "learning_rate": 1.5110565110565112e-06, "loss": 0.8475, "step": 41 }, { "epoch": 0.005160339107998526, "grad_norm": 2.2463314144132607, "learning_rate": 1.547911547911548e-06, "loss": 0.8375, "step": 42 }, { "epoch": 0.005283204324855633, "grad_norm": 2.8150265768403635, "learning_rate": 1.584766584766585e-06, "loss": 0.8842, "step": 43 }, { "epoch": 0.005406069541712741, "grad_norm": 3.400436755110308, "learning_rate": 1.6216216216216219e-06, "loss": 0.9731, "step": 44 }, { "epoch": 0.0055289347585698485, "grad_norm": 2.5875092640144626, "learning_rate": 1.6584766584766586e-06, "loss": 0.8952, "step": 45 }, { "epoch": 0.005651799975426957, "grad_norm": 2.9967583649325187, "learning_rate": 1.6953316953316955e-06, "loss": 0.8758, "step": 46 }, { "epoch": 0.0057746651922840645, "grad_norm": 3.726508092699981, "learning_rate": 1.7321867321867323e-06, "loss": 0.8588, "step": 47 }, { "epoch": 0.005897530409141172, "grad_norm": 2.4185906853044563, "learning_rate": 1.7690417690417692e-06, "loss": 0.9433, "step": 48 }, { "epoch": 0.00602039562599828, "grad_norm": 2.1300107100086225, "learning_rate": 1.805896805896806e-06, "loss": 0.8694, "step": 49 }, { "epoch": 0.006143260842855387, "grad_norm": 1.9725055247468293, "learning_rate": 1.842751842751843e-06, "loss": 0.9715, "step": 50 }, { "epoch": 0.006266126059712496, "grad_norm": 2.424151075834017, "learning_rate": 1.8796068796068799e-06, "loss": 0.8716, "step": 51 }, { "epoch": 0.006388991276569603, "grad_norm": 2.47301723561335, "learning_rate": 1.9164619164619164e-06, "loss": 0.8766, "step": 52 }, { "epoch": 0.006511856493426711, "grad_norm": 2.306221114798729, "learning_rate": 1.9533169533169534e-06, "loss": 0.9108, "step": 53 }, { "epoch": 0.006634721710283818, "grad_norm": 2.7998718379596172, "learning_rate": 1.99017199017199e-06, "loss": 0.8301, "step": 54 }, { "epoch": 0.006757586927140926, "grad_norm": 1.9316957290525076, "learning_rate": 2.0270270270270273e-06, "loss": 0.8245, "step": 55 }, { "epoch": 0.006880452143998034, "grad_norm": 2.3225785839846447, "learning_rate": 2.063882063882064e-06, "loss": 0.7795, "step": 56 }, { "epoch": 0.007003317360855142, "grad_norm": 2.1506352887629903, "learning_rate": 2.1007371007371007e-06, "loss": 0.8121, "step": 57 }, { "epoch": 0.00712618257771225, "grad_norm": 2.2414978938850094, "learning_rate": 2.1375921375921377e-06, "loss": 0.8411, "step": 58 }, { "epoch": 0.007249047794569357, "grad_norm": 2.217372090144132, "learning_rate": 2.1744471744471746e-06, "loss": 0.8757, "step": 59 }, { "epoch": 0.007371913011426466, "grad_norm": 2.2817429591369165, "learning_rate": 2.211302211302211e-06, "loss": 0.7842, "step": 60 }, { "epoch": 0.007494778228283573, "grad_norm": 2.5386942795509246, "learning_rate": 2.248157248157248e-06, "loss": 0.9066, "step": 61 }, { "epoch": 0.007617643445140681, "grad_norm": 2.324567330178371, "learning_rate": 2.285012285012285e-06, "loss": 0.8358, "step": 62 }, { "epoch": 0.007740508661997788, "grad_norm": 2.4843663767121793, "learning_rate": 2.321867321867322e-06, "loss": 0.8161, "step": 63 }, { "epoch": 0.007863373878854897, "grad_norm": 2.2400952081124554, "learning_rate": 2.3587223587223586e-06, "loss": 0.8063, "step": 64 }, { "epoch": 0.007986239095712003, "grad_norm": 2.4470283289037416, "learning_rate": 2.3955773955773955e-06, "loss": 0.8059, "step": 65 }, { "epoch": 0.008109104312569112, "grad_norm": 2.0603183792683124, "learning_rate": 2.4324324324324325e-06, "loss": 0.8019, "step": 66 }, { "epoch": 0.008231969529426219, "grad_norm": 2.2266532809288075, "learning_rate": 2.4692874692874694e-06, "loss": 0.7921, "step": 67 }, { "epoch": 0.008354834746283327, "grad_norm": 2.1533768123746198, "learning_rate": 2.506142506142506e-06, "loss": 0.7185, "step": 68 }, { "epoch": 0.008477699963140435, "grad_norm": 2.2697446351636352, "learning_rate": 2.542997542997543e-06, "loss": 0.8926, "step": 69 }, { "epoch": 0.008600565179997542, "grad_norm": 1.9547412434546492, "learning_rate": 2.57985257985258e-06, "loss": 0.7148, "step": 70 }, { "epoch": 0.00872343039685465, "grad_norm": 1.9371145887095345, "learning_rate": 2.616707616707617e-06, "loss": 0.7846, "step": 71 }, { "epoch": 0.008846295613711759, "grad_norm": 2.537280666949752, "learning_rate": 2.6535626535626533e-06, "loss": 0.7594, "step": 72 }, { "epoch": 0.008969160830568866, "grad_norm": 2.0134445444127267, "learning_rate": 2.6904176904176907e-06, "loss": 0.8376, "step": 73 }, { "epoch": 0.009092026047425974, "grad_norm": 2.375681185480166, "learning_rate": 2.7272727272727272e-06, "loss": 0.8542, "step": 74 }, { "epoch": 0.009214891264283081, "grad_norm": 2.2284015663099996, "learning_rate": 2.764127764127764e-06, "loss": 0.7682, "step": 75 }, { "epoch": 0.00933775648114019, "grad_norm": 2.1223572049006183, "learning_rate": 2.8009828009828007e-06, "loss": 0.8151, "step": 76 }, { "epoch": 0.009460621697997298, "grad_norm": 1.9498992886735789, "learning_rate": 2.837837837837838e-06, "loss": 0.8871, "step": 77 }, { "epoch": 0.009583486914854404, "grad_norm": 1.761698645327371, "learning_rate": 2.8746928746928746e-06, "loss": 0.7584, "step": 78 }, { "epoch": 0.009706352131711513, "grad_norm": 2.124914597987514, "learning_rate": 2.9115479115479116e-06, "loss": 0.8169, "step": 79 }, { "epoch": 0.00982921734856862, "grad_norm": 2.0919909269488186, "learning_rate": 2.948402948402948e-06, "loss": 0.6499, "step": 80 }, { "epoch": 0.009952082565425728, "grad_norm": 2.099324623037599, "learning_rate": 2.9852579852579855e-06, "loss": 0.6526, "step": 81 }, { "epoch": 0.010074947782282836, "grad_norm": 1.92075738433415, "learning_rate": 3.0221130221130224e-06, "loss": 0.9446, "step": 82 }, { "epoch": 0.010197812999139943, "grad_norm": 2.4867794775340966, "learning_rate": 3.058968058968059e-06, "loss": 0.744, "step": 83 }, { "epoch": 0.010320678215997052, "grad_norm": 2.151973857031448, "learning_rate": 3.095823095823096e-06, "loss": 0.7708, "step": 84 }, { "epoch": 0.010443543432854158, "grad_norm": 2.2078944058232524, "learning_rate": 3.132678132678133e-06, "loss": 0.7984, "step": 85 }, { "epoch": 0.010566408649711267, "grad_norm": 2.112322210243022, "learning_rate": 3.16953316953317e-06, "loss": 0.9349, "step": 86 }, { "epoch": 0.010689273866568375, "grad_norm": 1.8966018599814107, "learning_rate": 3.2063882063882063e-06, "loss": 0.9199, "step": 87 }, { "epoch": 0.010812139083425482, "grad_norm": 2.3004075590576076, "learning_rate": 3.2432432432432437e-06, "loss": 0.8567, "step": 88 }, { "epoch": 0.01093500430028259, "grad_norm": 2.0541443243309727, "learning_rate": 3.2800982800982802e-06, "loss": 0.8415, "step": 89 }, { "epoch": 0.011057869517139697, "grad_norm": 1.7361116289703336, "learning_rate": 3.316953316953317e-06, "loss": 0.7144, "step": 90 }, { "epoch": 0.011180734733996806, "grad_norm": 1.989445636753907, "learning_rate": 3.3538083538083537e-06, "loss": 0.691, "step": 91 }, { "epoch": 0.011303599950853914, "grad_norm": 1.9607075864202708, "learning_rate": 3.390663390663391e-06, "loss": 0.709, "step": 92 }, { "epoch": 0.01142646516771102, "grad_norm": 1.9180637242155694, "learning_rate": 3.4275184275184276e-06, "loss": 0.8674, "step": 93 }, { "epoch": 0.011549330384568129, "grad_norm": 2.176961099785198, "learning_rate": 3.4643734643734646e-06, "loss": 0.7773, "step": 94 }, { "epoch": 0.011672195601425236, "grad_norm": 1.8711385480214902, "learning_rate": 3.501228501228501e-06, "loss": 0.8228, "step": 95 }, { "epoch": 0.011795060818282344, "grad_norm": 2.7093646366375186, "learning_rate": 3.5380835380835385e-06, "loss": 0.8478, "step": 96 }, { "epoch": 0.011917926035139453, "grad_norm": 1.9521329019033107, "learning_rate": 3.574938574938575e-06, "loss": 0.8348, "step": 97 }, { "epoch": 0.01204079125199656, "grad_norm": 2.7549800721483524, "learning_rate": 3.611793611793612e-06, "loss": 0.7688, "step": 98 }, { "epoch": 0.012163656468853668, "grad_norm": 2.123924485994918, "learning_rate": 3.648648648648649e-06, "loss": 0.7667, "step": 99 }, { "epoch": 0.012286521685710775, "grad_norm": 1.7943724330319255, "learning_rate": 3.685503685503686e-06, "loss": 0.7188, "step": 100 }, { "epoch": 0.012409386902567883, "grad_norm": 1.7062001924272912, "learning_rate": 3.7223587223587224e-06, "loss": 0.7607, "step": 101 }, { "epoch": 0.012532252119424991, "grad_norm": 2.096044564947959, "learning_rate": 3.7592137592137598e-06, "loss": 0.7247, "step": 102 }, { "epoch": 0.012655117336282098, "grad_norm": 1.9271775214159095, "learning_rate": 3.796068796068796e-06, "loss": 0.7519, "step": 103 }, { "epoch": 0.012777982553139207, "grad_norm": 1.755264391118941, "learning_rate": 3.832923832923833e-06, "loss": 0.8325, "step": 104 }, { "epoch": 0.012900847769996313, "grad_norm": 1.9989465666278592, "learning_rate": 3.869778869778871e-06, "loss": 0.689, "step": 105 }, { "epoch": 0.013023712986853422, "grad_norm": 2.0000267967175565, "learning_rate": 3.906633906633907e-06, "loss": 0.6583, "step": 106 }, { "epoch": 0.01314657820371053, "grad_norm": 1.9893807661366398, "learning_rate": 3.943488943488944e-06, "loss": 0.7455, "step": 107 }, { "epoch": 0.013269443420567637, "grad_norm": 2.3835493881692096, "learning_rate": 3.98034398034398e-06, "loss": 0.7274, "step": 108 }, { "epoch": 0.013392308637424745, "grad_norm": 1.7164620683578935, "learning_rate": 4.0171990171990176e-06, "loss": 0.7556, "step": 109 }, { "epoch": 0.013515173854281852, "grad_norm": 2.2908311887870694, "learning_rate": 4.0540540540540545e-06, "loss": 0.835, "step": 110 }, { "epoch": 0.01363803907113896, "grad_norm": 2.039544661558442, "learning_rate": 4.090909090909091e-06, "loss": 0.8806, "step": 111 }, { "epoch": 0.013760904287996069, "grad_norm": 1.9367676780592882, "learning_rate": 4.127764127764128e-06, "loss": 0.8225, "step": 112 }, { "epoch": 0.013883769504853176, "grad_norm": 2.1901025979950868, "learning_rate": 4.164619164619165e-06, "loss": 0.827, "step": 113 }, { "epoch": 0.014006634721710284, "grad_norm": 1.9334515380670565, "learning_rate": 4.2014742014742015e-06, "loss": 0.7683, "step": 114 }, { "epoch": 0.014129499938567392, "grad_norm": 1.7940119923486164, "learning_rate": 4.2383292383292384e-06, "loss": 0.8543, "step": 115 }, { "epoch": 0.0142523651554245, "grad_norm": 1.646323482780624, "learning_rate": 4.275184275184275e-06, "loss": 0.8729, "step": 116 }, { "epoch": 0.014375230372281608, "grad_norm": 1.9306814941290429, "learning_rate": 4.312039312039312e-06, "loss": 0.7837, "step": 117 }, { "epoch": 0.014498095589138714, "grad_norm": 1.6747792227115856, "learning_rate": 4.348894348894349e-06, "loss": 0.7657, "step": 118 }, { "epoch": 0.014620960805995823, "grad_norm": 2.5116743414061493, "learning_rate": 4.385749385749385e-06, "loss": 0.7912, "step": 119 }, { "epoch": 0.014743826022852931, "grad_norm": 2.2543920257744796, "learning_rate": 4.422604422604422e-06, "loss": 0.7871, "step": 120 }, { "epoch": 0.014866691239710038, "grad_norm": 1.9945810171170197, "learning_rate": 4.45945945945946e-06, "loss": 0.7342, "step": 121 }, { "epoch": 0.014989556456567146, "grad_norm": 2.2204859893567943, "learning_rate": 4.496314496314496e-06, "loss": 0.8786, "step": 122 }, { "epoch": 0.015112421673424253, "grad_norm": 2.002919416848398, "learning_rate": 4.533169533169533e-06, "loss": 0.7228, "step": 123 }, { "epoch": 0.015235286890281361, "grad_norm": 1.868216079800274, "learning_rate": 4.57002457002457e-06, "loss": 0.742, "step": 124 }, { "epoch": 0.01535815210713847, "grad_norm": 1.8005633082126062, "learning_rate": 4.606879606879607e-06, "loss": 0.8712, "step": 125 }, { "epoch": 0.015481017323995577, "grad_norm": 1.8166686764177278, "learning_rate": 4.643734643734644e-06, "loss": 0.9068, "step": 126 }, { "epoch": 0.015603882540852685, "grad_norm": 2.0515222597254805, "learning_rate": 4.680589680589681e-06, "loss": 0.8018, "step": 127 }, { "epoch": 0.015726747757709793, "grad_norm": 2.279636341044896, "learning_rate": 4.717444717444717e-06, "loss": 0.7718, "step": 128 }, { "epoch": 0.0158496129745669, "grad_norm": 1.8378117347640532, "learning_rate": 4.754299754299755e-06, "loss": 0.6841, "step": 129 }, { "epoch": 0.015972478191424007, "grad_norm": 2.218430722039516, "learning_rate": 4.791154791154791e-06, "loss": 0.8657, "step": 130 }, { "epoch": 0.016095343408281115, "grad_norm": 1.916287061479979, "learning_rate": 4.828009828009828e-06, "loss": 0.6776, "step": 131 }, { "epoch": 0.016218208625138224, "grad_norm": 1.7620338730219163, "learning_rate": 4.864864864864865e-06, "loss": 0.7414, "step": 132 }, { "epoch": 0.016341073841995332, "grad_norm": 1.7543931040893224, "learning_rate": 4.901719901719902e-06, "loss": 0.7906, "step": 133 }, { "epoch": 0.016463939058852437, "grad_norm": 1.5496112007083318, "learning_rate": 4.938574938574939e-06, "loss": 0.8203, "step": 134 }, { "epoch": 0.016586804275709546, "grad_norm": 1.7438331967702376, "learning_rate": 4.975429975429976e-06, "loss": 0.8933, "step": 135 }, { "epoch": 0.016709669492566654, "grad_norm": 1.8605660015672187, "learning_rate": 5.012285012285012e-06, "loss": 0.7005, "step": 136 }, { "epoch": 0.016832534709423762, "grad_norm": 2.2229713433830343, "learning_rate": 5.04914004914005e-06, "loss": 0.7777, "step": 137 }, { "epoch": 0.01695539992628087, "grad_norm": 1.9301500618954412, "learning_rate": 5.085995085995086e-06, "loss": 0.7667, "step": 138 }, { "epoch": 0.01707826514313798, "grad_norm": 1.9473626434903029, "learning_rate": 5.122850122850123e-06, "loss": 0.7669, "step": 139 }, { "epoch": 0.017201130359995084, "grad_norm": 1.9268872807220647, "learning_rate": 5.15970515970516e-06, "loss": 0.8654, "step": 140 }, { "epoch": 0.017323995576852193, "grad_norm": 2.0841169041870757, "learning_rate": 5.196560196560197e-06, "loss": 0.8258, "step": 141 }, { "epoch": 0.0174468607937093, "grad_norm": 1.8341245823766783, "learning_rate": 5.233415233415234e-06, "loss": 0.9055, "step": 142 }, { "epoch": 0.01756972601056641, "grad_norm": 1.988196597099631, "learning_rate": 5.2702702702702705e-06, "loss": 0.7418, "step": 143 }, { "epoch": 0.017692591227423518, "grad_norm": 2.163121152614218, "learning_rate": 5.307125307125307e-06, "loss": 0.9437, "step": 144 }, { "epoch": 0.017815456444280623, "grad_norm": 2.2416333125988923, "learning_rate": 5.3439803439803444e-06, "loss": 0.7348, "step": 145 }, { "epoch": 0.01793832166113773, "grad_norm": 2.1561442382301426, "learning_rate": 5.380835380835381e-06, "loss": 0.7212, "step": 146 }, { "epoch": 0.01806118687799484, "grad_norm": 2.2981656479629393, "learning_rate": 5.4176904176904175e-06, "loss": 0.7829, "step": 147 }, { "epoch": 0.01818405209485195, "grad_norm": 2.0617711334227113, "learning_rate": 5.4545454545454545e-06, "loss": 0.7847, "step": 148 }, { "epoch": 0.018306917311709057, "grad_norm": 1.7819338394255597, "learning_rate": 5.491400491400491e-06, "loss": 0.6362, "step": 149 }, { "epoch": 0.018429782528566162, "grad_norm": 1.8454195227172059, "learning_rate": 5.528255528255528e-06, "loss": 0.7743, "step": 150 }, { "epoch": 0.01855264774542327, "grad_norm": 1.9648565931926607, "learning_rate": 5.565110565110565e-06, "loss": 0.6764, "step": 151 }, { "epoch": 0.01867551296228038, "grad_norm": 2.0118392830540253, "learning_rate": 5.601965601965601e-06, "loss": 0.6951, "step": 152 }, { "epoch": 0.018798378179137487, "grad_norm": 1.803135424324761, "learning_rate": 5.638820638820639e-06, "loss": 0.818, "step": 153 }, { "epoch": 0.018921243395994596, "grad_norm": 1.9795101473740746, "learning_rate": 5.675675675675676e-06, "loss": 0.8511, "step": 154 }, { "epoch": 0.0190441086128517, "grad_norm": 1.7322842434464771, "learning_rate": 5.712530712530712e-06, "loss": 0.7128, "step": 155 }, { "epoch": 0.01916697382970881, "grad_norm": 1.9785431181483861, "learning_rate": 5.749385749385749e-06, "loss": 0.732, "step": 156 }, { "epoch": 0.019289839046565917, "grad_norm": 2.193855981127751, "learning_rate": 5.786240786240787e-06, "loss": 0.8811, "step": 157 }, { "epoch": 0.019412704263423026, "grad_norm": 2.1286480959222667, "learning_rate": 5.823095823095823e-06, "loss": 0.7895, "step": 158 }, { "epoch": 0.019535569480280134, "grad_norm": 2.1859847721366954, "learning_rate": 5.85995085995086e-06, "loss": 0.8027, "step": 159 }, { "epoch": 0.01965843469713724, "grad_norm": 1.9324074142237098, "learning_rate": 5.896805896805896e-06, "loss": 0.7395, "step": 160 }, { "epoch": 0.019781299913994348, "grad_norm": 1.800633659071209, "learning_rate": 5.933660933660934e-06, "loss": 0.7415, "step": 161 }, { "epoch": 0.019904165130851456, "grad_norm": 2.643996006904747, "learning_rate": 5.970515970515971e-06, "loss": 0.8222, "step": 162 }, { "epoch": 0.020027030347708565, "grad_norm": 1.967104685269245, "learning_rate": 6.007371007371007e-06, "loss": 0.6736, "step": 163 }, { "epoch": 0.020149895564565673, "grad_norm": 2.3088778995166312, "learning_rate": 6.044226044226045e-06, "loss": 0.7391, "step": 164 }, { "epoch": 0.020272760781422778, "grad_norm": 1.8578933501590726, "learning_rate": 6.081081081081082e-06, "loss": 0.6349, "step": 165 }, { "epoch": 0.020395625998279886, "grad_norm": 1.5977353959892007, "learning_rate": 6.117936117936118e-06, "loss": 0.856, "step": 166 }, { "epoch": 0.020518491215136995, "grad_norm": 2.41916844601463, "learning_rate": 6.154791154791155e-06, "loss": 0.7527, "step": 167 }, { "epoch": 0.020641356431994103, "grad_norm": 2.0022315718215435, "learning_rate": 6.191646191646192e-06, "loss": 0.7005, "step": 168 }, { "epoch": 0.02076422164885121, "grad_norm": 1.9040703311352212, "learning_rate": 6.228501228501229e-06, "loss": 0.6852, "step": 169 }, { "epoch": 0.020887086865708317, "grad_norm": 2.176320105062913, "learning_rate": 6.265356265356266e-06, "loss": 0.8448, "step": 170 }, { "epoch": 0.021009952082565425, "grad_norm": 1.9266691571891912, "learning_rate": 6.302211302211302e-06, "loss": 0.7955, "step": 171 }, { "epoch": 0.021132817299422534, "grad_norm": 1.8114027476423795, "learning_rate": 6.33906633906634e-06, "loss": 0.7434, "step": 172 }, { "epoch": 0.021255682516279642, "grad_norm": 1.9515491664797833, "learning_rate": 6.3759213759213766e-06, "loss": 0.7566, "step": 173 }, { "epoch": 0.02137854773313675, "grad_norm": 2.025956196474469, "learning_rate": 6.412776412776413e-06, "loss": 0.9448, "step": 174 }, { "epoch": 0.021501412949993855, "grad_norm": 1.7634395032917143, "learning_rate": 6.44963144963145e-06, "loss": 0.7636, "step": 175 }, { "epoch": 0.021624278166850964, "grad_norm": 2.070407439272014, "learning_rate": 6.486486486486487e-06, "loss": 0.8912, "step": 176 }, { "epoch": 0.021747143383708072, "grad_norm": 2.2960900402022246, "learning_rate": 6.5233415233415235e-06, "loss": 0.7653, "step": 177 }, { "epoch": 0.02187000860056518, "grad_norm": 2.1754850194688036, "learning_rate": 6.5601965601965605e-06, "loss": 0.9228, "step": 178 }, { "epoch": 0.02199287381742229, "grad_norm": 2.0374135782794585, "learning_rate": 6.5970515970515966e-06, "loss": 0.6933, "step": 179 }, { "epoch": 0.022115739034279394, "grad_norm": 2.0661917296130095, "learning_rate": 6.633906633906634e-06, "loss": 0.6648, "step": 180 }, { "epoch": 0.022238604251136503, "grad_norm": 1.8722756101140505, "learning_rate": 6.670761670761671e-06, "loss": 0.7699, "step": 181 }, { "epoch": 0.02236146946799361, "grad_norm": 2.1776684326905933, "learning_rate": 6.707616707616707e-06, "loss": 0.7335, "step": 182 }, { "epoch": 0.02248433468485072, "grad_norm": 1.6129926525625968, "learning_rate": 6.744471744471744e-06, "loss": 0.7357, "step": 183 }, { "epoch": 0.022607199901707828, "grad_norm": 2.1277756112782567, "learning_rate": 6.781326781326782e-06, "loss": 0.7046, "step": 184 }, { "epoch": 0.022730065118564933, "grad_norm": 2.3391912703074724, "learning_rate": 6.818181818181818e-06, "loss": 0.7216, "step": 185 }, { "epoch": 0.02285293033542204, "grad_norm": 2.1511290166475003, "learning_rate": 6.855036855036855e-06, "loss": 0.8211, "step": 186 }, { "epoch": 0.02297579555227915, "grad_norm": 1.6719111505737243, "learning_rate": 6.891891891891892e-06, "loss": 0.7054, "step": 187 }, { "epoch": 0.023098660769136258, "grad_norm": 2.5515281191195154, "learning_rate": 6.928746928746929e-06, "loss": 0.8233, "step": 188 }, { "epoch": 0.023221525985993367, "grad_norm": 1.900669341574493, "learning_rate": 6.965601965601966e-06, "loss": 0.7284, "step": 189 }, { "epoch": 0.02334439120285047, "grad_norm": 1.8850453862694156, "learning_rate": 7.002457002457002e-06, "loss": 0.7119, "step": 190 }, { "epoch": 0.02346725641970758, "grad_norm": 1.9640177846783462, "learning_rate": 7.039312039312039e-06, "loss": 0.6226, "step": 191 }, { "epoch": 0.02359012163656469, "grad_norm": 1.78285590490415, "learning_rate": 7.076167076167077e-06, "loss": 0.7527, "step": 192 }, { "epoch": 0.023712986853421797, "grad_norm": 1.909904047542188, "learning_rate": 7.113022113022113e-06, "loss": 0.6273, "step": 193 }, { "epoch": 0.023835852070278905, "grad_norm": 1.5774860471993846, "learning_rate": 7.14987714987715e-06, "loss": 0.7097, "step": 194 }, { "epoch": 0.02395871728713601, "grad_norm": 1.7822894593923615, "learning_rate": 7.186732186732187e-06, "loss": 0.7927, "step": 195 }, { "epoch": 0.02408158250399312, "grad_norm": 1.9467034435413708, "learning_rate": 7.223587223587224e-06, "loss": 0.8011, "step": 196 }, { "epoch": 0.024204447720850227, "grad_norm": 2.198717952759409, "learning_rate": 7.260442260442261e-06, "loss": 0.772, "step": 197 }, { "epoch": 0.024327312937707336, "grad_norm": 2.183689896839222, "learning_rate": 7.297297297297298e-06, "loss": 0.783, "step": 198 }, { "epoch": 0.024450178154564444, "grad_norm": 1.8304193568098985, "learning_rate": 7.334152334152334e-06, "loss": 0.6804, "step": 199 }, { "epoch": 0.02457304337142155, "grad_norm": 2.1180414532189467, "learning_rate": 7.371007371007372e-06, "loss": 0.6441, "step": 200 }, { "epoch": 0.024695908588278657, "grad_norm": 2.22447091842928, "learning_rate": 7.407862407862408e-06, "loss": 0.8026, "step": 201 }, { "epoch": 0.024818773805135766, "grad_norm": 1.9875736683941647, "learning_rate": 7.444717444717445e-06, "loss": 0.7405, "step": 202 }, { "epoch": 0.024941639021992874, "grad_norm": 1.988612777537409, "learning_rate": 7.481572481572482e-06, "loss": 0.7623, "step": 203 }, { "epoch": 0.025064504238849983, "grad_norm": 1.983215440853281, "learning_rate": 7.5184275184275195e-06, "loss": 0.7147, "step": 204 }, { "epoch": 0.025187369455707088, "grad_norm": 2.0591468153698536, "learning_rate": 7.555282555282556e-06, "loss": 0.8068, "step": 205 }, { "epoch": 0.025310234672564196, "grad_norm": 2.424275565078959, "learning_rate": 7.592137592137592e-06, "loss": 0.6661, "step": 206 }, { "epoch": 0.025433099889421305, "grad_norm": 1.8556122946270948, "learning_rate": 7.6289926289926295e-06, "loss": 0.7364, "step": 207 }, { "epoch": 0.025555965106278413, "grad_norm": 2.0226528495706426, "learning_rate": 7.665847665847666e-06, "loss": 0.7476, "step": 208 }, { "epoch": 0.02567883032313552, "grad_norm": 2.0839462629230394, "learning_rate": 7.702702702702703e-06, "loss": 0.7402, "step": 209 }, { "epoch": 0.025801695539992626, "grad_norm": 1.7157059567245307, "learning_rate": 7.739557739557741e-06, "loss": 0.6697, "step": 210 }, { "epoch": 0.025924560756849735, "grad_norm": 2.221003192137898, "learning_rate": 7.776412776412776e-06, "loss": 0.7494, "step": 211 }, { "epoch": 0.026047425973706843, "grad_norm": 2.0068010262632767, "learning_rate": 7.813267813267813e-06, "loss": 0.6485, "step": 212 }, { "epoch": 0.026170291190563952, "grad_norm": 1.9835837288566343, "learning_rate": 7.85012285012285e-06, "loss": 0.7184, "step": 213 }, { "epoch": 0.02629315640742106, "grad_norm": 1.81943468759955, "learning_rate": 7.886977886977887e-06, "loss": 0.7361, "step": 214 }, { "epoch": 0.026416021624278165, "grad_norm": 2.196400858375408, "learning_rate": 7.923832923832924e-06, "loss": 0.7529, "step": 215 }, { "epoch": 0.026538886841135274, "grad_norm": 2.278266713871535, "learning_rate": 7.96068796068796e-06, "loss": 0.6606, "step": 216 }, { "epoch": 0.026661752057992382, "grad_norm": 2.7429671536295697, "learning_rate": 7.997542997542998e-06, "loss": 0.7663, "step": 217 }, { "epoch": 0.02678461727484949, "grad_norm": 1.948658312851503, "learning_rate": 8.034398034398035e-06, "loss": 0.6791, "step": 218 }, { "epoch": 0.0269074824917066, "grad_norm": 2.1016293365734637, "learning_rate": 8.07125307125307e-06, "loss": 0.6664, "step": 219 }, { "epoch": 0.027030347708563704, "grad_norm": 2.165590672020245, "learning_rate": 8.108108108108109e-06, "loss": 0.8209, "step": 220 }, { "epoch": 0.027153212925420812, "grad_norm": 1.8489369609520914, "learning_rate": 8.144963144963144e-06, "loss": 0.7198, "step": 221 }, { "epoch": 0.02727607814227792, "grad_norm": 1.8035798718548126, "learning_rate": 8.181818181818181e-06, "loss": 0.7851, "step": 222 }, { "epoch": 0.02739894335913503, "grad_norm": 1.9709884836797689, "learning_rate": 8.21867321867322e-06, "loss": 0.7337, "step": 223 }, { "epoch": 0.027521808575992138, "grad_norm": 2.0840307607391977, "learning_rate": 8.255528255528255e-06, "loss": 0.7496, "step": 224 }, { "epoch": 0.027644673792849243, "grad_norm": 2.355604284748557, "learning_rate": 8.292383292383292e-06, "loss": 0.6245, "step": 225 }, { "epoch": 0.02776753900970635, "grad_norm": 2.0079503059460353, "learning_rate": 8.32923832923833e-06, "loss": 0.7352, "step": 226 }, { "epoch": 0.02789040422656346, "grad_norm": 2.0985545780787294, "learning_rate": 8.366093366093366e-06, "loss": 0.6448, "step": 227 }, { "epoch": 0.028013269443420568, "grad_norm": 1.7189616249257684, "learning_rate": 8.402948402948403e-06, "loss": 0.6683, "step": 228 }, { "epoch": 0.028136134660277676, "grad_norm": 2.010324383403565, "learning_rate": 8.43980343980344e-06, "loss": 0.6502, "step": 229 }, { "epoch": 0.028258999877134785, "grad_norm": 2.093256992408434, "learning_rate": 8.476658476658477e-06, "loss": 0.8893, "step": 230 }, { "epoch": 0.02838186509399189, "grad_norm": 1.9484291029299183, "learning_rate": 8.513513513513514e-06, "loss": 0.7864, "step": 231 }, { "epoch": 0.028504730310849, "grad_norm": 2.071568161786432, "learning_rate": 8.55036855036855e-06, "loss": 0.7149, "step": 232 }, { "epoch": 0.028627595527706107, "grad_norm": 2.0271669833556056, "learning_rate": 8.587223587223588e-06, "loss": 0.6132, "step": 233 }, { "epoch": 0.028750460744563215, "grad_norm": 1.9102956251761058, "learning_rate": 8.624078624078625e-06, "loss": 0.712, "step": 234 }, { "epoch": 0.028873325961420324, "grad_norm": 1.6778675992552967, "learning_rate": 8.66093366093366e-06, "loss": 0.7827, "step": 235 }, { "epoch": 0.02899619117827743, "grad_norm": 2.0948610150941485, "learning_rate": 8.697788697788699e-06, "loss": 0.7089, "step": 236 }, { "epoch": 0.029119056395134537, "grad_norm": 1.8346975899689884, "learning_rate": 8.734643734643734e-06, "loss": 0.727, "step": 237 }, { "epoch": 0.029241921611991645, "grad_norm": 1.8247128431766366, "learning_rate": 8.77149877149877e-06, "loss": 0.6293, "step": 238 }, { "epoch": 0.029364786828848754, "grad_norm": 1.7470054252402831, "learning_rate": 8.80835380835381e-06, "loss": 0.6507, "step": 239 }, { "epoch": 0.029487652045705862, "grad_norm": 2.1064269646877927, "learning_rate": 8.845208845208845e-06, "loss": 0.707, "step": 240 }, { "epoch": 0.029610517262562967, "grad_norm": 1.859638512263217, "learning_rate": 8.882063882063882e-06, "loss": 0.7611, "step": 241 }, { "epoch": 0.029733382479420076, "grad_norm": 1.9257602100834517, "learning_rate": 8.91891891891892e-06, "loss": 0.8156, "step": 242 }, { "epoch": 0.029856247696277184, "grad_norm": 2.026208368297337, "learning_rate": 8.955773955773956e-06, "loss": 0.7262, "step": 243 }, { "epoch": 0.029979112913134293, "grad_norm": 1.9594383548407612, "learning_rate": 8.992628992628992e-06, "loss": 0.7067, "step": 244 }, { "epoch": 0.0301019781299914, "grad_norm": 2.048343815790902, "learning_rate": 9.02948402948403e-06, "loss": 0.7218, "step": 245 }, { "epoch": 0.030224843346848506, "grad_norm": 2.312213736657857, "learning_rate": 9.066339066339066e-06, "loss": 0.7519, "step": 246 }, { "epoch": 0.030347708563705614, "grad_norm": 2.1483915077471387, "learning_rate": 9.103194103194103e-06, "loss": 0.6721, "step": 247 }, { "epoch": 0.030470573780562723, "grad_norm": 1.7973362119561997, "learning_rate": 9.14004914004914e-06, "loss": 0.6936, "step": 248 }, { "epoch": 0.03059343899741983, "grad_norm": 1.810351343394265, "learning_rate": 9.176904176904177e-06, "loss": 0.6041, "step": 249 }, { "epoch": 0.03071630421427694, "grad_norm": 2.110068365426576, "learning_rate": 9.213759213759214e-06, "loss": 0.6788, "step": 250 }, { "epoch": 0.030839169431134045, "grad_norm": 2.122011315946288, "learning_rate": 9.250614250614251e-06, "loss": 0.6693, "step": 251 }, { "epoch": 0.030962034647991153, "grad_norm": 1.9509953443662824, "learning_rate": 9.287469287469288e-06, "loss": 0.8107, "step": 252 }, { "epoch": 0.03108489986484826, "grad_norm": 2.1759954795663727, "learning_rate": 9.324324324324325e-06, "loss": 0.6855, "step": 253 }, { "epoch": 0.03120776508170537, "grad_norm": 1.968246155991691, "learning_rate": 9.361179361179362e-06, "loss": 0.6639, "step": 254 }, { "epoch": 0.03133063029856248, "grad_norm": 1.9567072014668756, "learning_rate": 9.398034398034399e-06, "loss": 0.735, "step": 255 }, { "epoch": 0.03145349551541959, "grad_norm": 1.902831366116418, "learning_rate": 9.434889434889434e-06, "loss": 0.6908, "step": 256 }, { "epoch": 0.031576360732276695, "grad_norm": 2.289894777988885, "learning_rate": 9.471744471744471e-06, "loss": 0.8442, "step": 257 }, { "epoch": 0.0316992259491338, "grad_norm": 2.500635044381565, "learning_rate": 9.50859950859951e-06, "loss": 0.7555, "step": 258 }, { "epoch": 0.031822091165990905, "grad_norm": 1.8382057052032, "learning_rate": 9.545454545454545e-06, "loss": 0.7777, "step": 259 }, { "epoch": 0.031944956382848014, "grad_norm": 1.6402173157463737, "learning_rate": 9.582309582309582e-06, "loss": 0.7489, "step": 260 }, { "epoch": 0.03206782159970512, "grad_norm": 1.9348524563006024, "learning_rate": 9.61916461916462e-06, "loss": 0.6943, "step": 261 }, { "epoch": 0.03219068681656223, "grad_norm": 2.47299864257624, "learning_rate": 9.656019656019656e-06, "loss": 0.6839, "step": 262 }, { "epoch": 0.03231355203341934, "grad_norm": 2.1400603176111836, "learning_rate": 9.692874692874693e-06, "loss": 0.8084, "step": 263 }, { "epoch": 0.03243641725027645, "grad_norm": 1.9102694201056012, "learning_rate": 9.72972972972973e-06, "loss": 0.7101, "step": 264 }, { "epoch": 0.032559282467133556, "grad_norm": 1.8135314598207293, "learning_rate": 9.766584766584767e-06, "loss": 0.8603, "step": 265 }, { "epoch": 0.032682147683990664, "grad_norm": 1.780865286156098, "learning_rate": 9.803439803439804e-06, "loss": 0.7228, "step": 266 }, { "epoch": 0.03280501290084777, "grad_norm": 2.0720944619287947, "learning_rate": 9.84029484029484e-06, "loss": 0.6952, "step": 267 }, { "epoch": 0.032927878117704874, "grad_norm": 2.2665325688511726, "learning_rate": 9.877149877149878e-06, "loss": 0.864, "step": 268 }, { "epoch": 0.03305074333456198, "grad_norm": 1.8809441165375158, "learning_rate": 9.914004914004915e-06, "loss": 0.7674, "step": 269 }, { "epoch": 0.03317360855141909, "grad_norm": 1.7822626965815573, "learning_rate": 9.950859950859952e-06, "loss": 0.7876, "step": 270 }, { "epoch": 0.0332964737682762, "grad_norm": 2.1108938104666266, "learning_rate": 9.987714987714989e-06, "loss": 0.7215, "step": 271 }, { "epoch": 0.03341933898513331, "grad_norm": 1.9966211299175396, "learning_rate": 1.0024570024570024e-05, "loss": 0.7674, "step": 272 }, { "epoch": 0.033542204201990417, "grad_norm": 2.2680348042610348, "learning_rate": 1.0061425061425062e-05, "loss": 0.8111, "step": 273 }, { "epoch": 0.033665069418847525, "grad_norm": 1.9324203050010555, "learning_rate": 1.00982800982801e-05, "loss": 0.6647, "step": 274 }, { "epoch": 0.03378793463570463, "grad_norm": 2.155792456819831, "learning_rate": 1.0135135135135135e-05, "loss": 0.6672, "step": 275 }, { "epoch": 0.03391079985256174, "grad_norm": 2.232518991826889, "learning_rate": 1.0171990171990172e-05, "loss": 0.7007, "step": 276 }, { "epoch": 0.03403366506941885, "grad_norm": 2.316018402937038, "learning_rate": 1.020884520884521e-05, "loss": 0.7158, "step": 277 }, { "epoch": 0.03415653028627596, "grad_norm": 2.10917506496764, "learning_rate": 1.0245700245700245e-05, "loss": 0.7441, "step": 278 }, { "epoch": 0.03427939550313306, "grad_norm": 2.121907339930492, "learning_rate": 1.0282555282555282e-05, "loss": 0.7577, "step": 279 }, { "epoch": 0.03440226071999017, "grad_norm": 1.963810644603433, "learning_rate": 1.031941031941032e-05, "loss": 0.7163, "step": 280 }, { "epoch": 0.03452512593684728, "grad_norm": 1.9330640317734664, "learning_rate": 1.0356265356265356e-05, "loss": 0.7213, "step": 281 }, { "epoch": 0.034647991153704386, "grad_norm": 2.516811453904035, "learning_rate": 1.0393120393120393e-05, "loss": 0.8406, "step": 282 }, { "epoch": 0.034770856370561494, "grad_norm": 1.9277517959623651, "learning_rate": 1.042997542997543e-05, "loss": 0.7377, "step": 283 }, { "epoch": 0.0348937215874186, "grad_norm": 2.0099179877124667, "learning_rate": 1.0466830466830467e-05, "loss": 0.7708, "step": 284 }, { "epoch": 0.03501658680427571, "grad_norm": 8.185672833271616, "learning_rate": 1.0503685503685504e-05, "loss": 0.7543, "step": 285 }, { "epoch": 0.03513945202113282, "grad_norm": 2.2633720920972746, "learning_rate": 1.0540540540540541e-05, "loss": 0.7156, "step": 286 }, { "epoch": 0.03526231723798993, "grad_norm": 1.8016495469574831, "learning_rate": 1.0577395577395578e-05, "loss": 0.7324, "step": 287 }, { "epoch": 0.035385182454847036, "grad_norm": 1.955712121244304, "learning_rate": 1.0614250614250613e-05, "loss": 0.7083, "step": 288 }, { "epoch": 0.03550804767170414, "grad_norm": 2.043471854335718, "learning_rate": 1.0651105651105652e-05, "loss": 0.7764, "step": 289 }, { "epoch": 0.035630912888561246, "grad_norm": 2.0961118593081283, "learning_rate": 1.0687960687960689e-05, "loss": 0.6612, "step": 290 }, { "epoch": 0.035753778105418355, "grad_norm": 2.1177177146109747, "learning_rate": 1.0724815724815724e-05, "loss": 0.6344, "step": 291 }, { "epoch": 0.03587664332227546, "grad_norm": 1.9609345218613277, "learning_rate": 1.0761670761670763e-05, "loss": 0.7421, "step": 292 }, { "epoch": 0.03599950853913257, "grad_norm": 1.7653847079983935, "learning_rate": 1.07985257985258e-05, "loss": 0.7348, "step": 293 }, { "epoch": 0.03612237375598968, "grad_norm": 2.285989107622271, "learning_rate": 1.0835380835380835e-05, "loss": 0.63, "step": 294 }, { "epoch": 0.03624523897284679, "grad_norm": 1.8304219610778436, "learning_rate": 1.0872235872235874e-05, "loss": 0.6845, "step": 295 }, { "epoch": 0.0363681041897039, "grad_norm": 2.060934670201375, "learning_rate": 1.0909090909090909e-05, "loss": 0.793, "step": 296 }, { "epoch": 0.036490969406561005, "grad_norm": 2.1625614978969936, "learning_rate": 1.0945945945945946e-05, "loss": 0.7189, "step": 297 }, { "epoch": 0.036613834623418114, "grad_norm": 1.8167775026327464, "learning_rate": 1.0982800982800983e-05, "loss": 0.7288, "step": 298 }, { "epoch": 0.036736699840275215, "grad_norm": 1.8268824852907397, "learning_rate": 1.101965601965602e-05, "loss": 0.7162, "step": 299 }, { "epoch": 0.036859565057132324, "grad_norm": 1.8856787707561768, "learning_rate": 1.1056511056511057e-05, "loss": 0.7354, "step": 300 }, { "epoch": 0.03698243027398943, "grad_norm": 1.6989978896187465, "learning_rate": 1.1093366093366094e-05, "loss": 0.6707, "step": 301 }, { "epoch": 0.03710529549084654, "grad_norm": 2.1523214233302137, "learning_rate": 1.113022113022113e-05, "loss": 0.7256, "step": 302 }, { "epoch": 0.03722816070770365, "grad_norm": 1.7887421119223106, "learning_rate": 1.1167076167076168e-05, "loss": 0.7572, "step": 303 }, { "epoch": 0.03735102592456076, "grad_norm": 1.9200546760873909, "learning_rate": 1.1203931203931203e-05, "loss": 0.7215, "step": 304 }, { "epoch": 0.037473891141417866, "grad_norm": 2.004864587313459, "learning_rate": 1.1240786240786241e-05, "loss": 0.8387, "step": 305 }, { "epoch": 0.037596756358274974, "grad_norm": 2.3253393869500907, "learning_rate": 1.1277641277641278e-05, "loss": 0.6206, "step": 306 }, { "epoch": 0.03771962157513208, "grad_norm": 2.116835304787679, "learning_rate": 1.1314496314496314e-05, "loss": 0.6894, "step": 307 }, { "epoch": 0.03784248679198919, "grad_norm": 2.0780156430341252, "learning_rate": 1.1351351351351352e-05, "loss": 0.8229, "step": 308 }, { "epoch": 0.03796535200884629, "grad_norm": 1.9603628702683005, "learning_rate": 1.138820638820639e-05, "loss": 0.6491, "step": 309 }, { "epoch": 0.0380882172257034, "grad_norm": 2.11419079720498, "learning_rate": 1.1425061425061425e-05, "loss": 0.7261, "step": 310 }, { "epoch": 0.03821108244256051, "grad_norm": 2.1494880122991376, "learning_rate": 1.1461916461916463e-05, "loss": 0.7398, "step": 311 }, { "epoch": 0.03833394765941762, "grad_norm": 1.8312491638881796, "learning_rate": 1.1498771498771498e-05, "loss": 0.7131, "step": 312 }, { "epoch": 0.038456812876274726, "grad_norm": 1.8121074511932032, "learning_rate": 1.1535626535626535e-05, "loss": 0.5967, "step": 313 }, { "epoch": 0.038579678093131835, "grad_norm": 2.262581966738921, "learning_rate": 1.1572481572481574e-05, "loss": 0.6846, "step": 314 }, { "epoch": 0.03870254330998894, "grad_norm": 1.7831974722387802, "learning_rate": 1.160933660933661e-05, "loss": 0.6213, "step": 315 }, { "epoch": 0.03882540852684605, "grad_norm": 1.961908226643096, "learning_rate": 1.1646191646191646e-05, "loss": 0.6749, "step": 316 }, { "epoch": 0.03894827374370316, "grad_norm": 2.0666673725512044, "learning_rate": 1.1683046683046683e-05, "loss": 0.7427, "step": 317 }, { "epoch": 0.03907113896056027, "grad_norm": 2.134738474981189, "learning_rate": 1.171990171990172e-05, "loss": 0.7518, "step": 318 }, { "epoch": 0.03919400417741737, "grad_norm": 1.908240612441246, "learning_rate": 1.1756756756756757e-05, "loss": 0.7798, "step": 319 }, { "epoch": 0.03931686939427448, "grad_norm": 1.9630236970895545, "learning_rate": 1.1793611793611792e-05, "loss": 0.7478, "step": 320 }, { "epoch": 0.03943973461113159, "grad_norm": 2.20652863885511, "learning_rate": 1.1830466830466831e-05, "loss": 0.7791, "step": 321 }, { "epoch": 0.039562599827988695, "grad_norm": 1.6540791915803679, "learning_rate": 1.1867321867321868e-05, "loss": 0.648, "step": 322 }, { "epoch": 0.039685465044845804, "grad_norm": 2.2870473710600394, "learning_rate": 1.1904176904176903e-05, "loss": 0.8005, "step": 323 }, { "epoch": 0.03980833026170291, "grad_norm": 1.924694801217767, "learning_rate": 1.1941031941031942e-05, "loss": 0.7746, "step": 324 }, { "epoch": 0.03993119547856002, "grad_norm": 2.693310438219356, "learning_rate": 1.1977886977886979e-05, "loss": 0.828, "step": 325 }, { "epoch": 0.04005406069541713, "grad_norm": 2.2592708719945174, "learning_rate": 1.2014742014742014e-05, "loss": 0.6775, "step": 326 }, { "epoch": 0.04017692591227424, "grad_norm": 1.9458228737821566, "learning_rate": 1.2051597051597053e-05, "loss": 0.8071, "step": 327 }, { "epoch": 0.040299791129131346, "grad_norm": 2.3588528991691162, "learning_rate": 1.208845208845209e-05, "loss": 0.7265, "step": 328 }, { "epoch": 0.04042265634598845, "grad_norm": 1.7752142204479076, "learning_rate": 1.2125307125307125e-05, "loss": 0.7747, "step": 329 }, { "epoch": 0.040545521562845556, "grad_norm": 1.760637762857694, "learning_rate": 1.2162162162162164e-05, "loss": 0.6457, "step": 330 }, { "epoch": 0.040668386779702664, "grad_norm": 1.897303196191167, "learning_rate": 1.2199017199017199e-05, "loss": 0.7169, "step": 331 }, { "epoch": 0.04079125199655977, "grad_norm": 2.123365641908746, "learning_rate": 1.2235872235872236e-05, "loss": 0.7304, "step": 332 }, { "epoch": 0.04091411721341688, "grad_norm": 2.1201082942021316, "learning_rate": 1.2272727272727274e-05, "loss": 0.7206, "step": 333 }, { "epoch": 0.04103698243027399, "grad_norm": 1.8607948454585348, "learning_rate": 1.230958230958231e-05, "loss": 0.7294, "step": 334 }, { "epoch": 0.0411598476471311, "grad_norm": 2.0653303507632246, "learning_rate": 1.2346437346437347e-05, "loss": 0.8098, "step": 335 }, { "epoch": 0.04128271286398821, "grad_norm": 2.039458635676661, "learning_rate": 1.2383292383292384e-05, "loss": 0.8171, "step": 336 }, { "epoch": 0.041405578080845315, "grad_norm": 2.2263423318760807, "learning_rate": 1.242014742014742e-05, "loss": 0.7632, "step": 337 }, { "epoch": 0.04152844329770242, "grad_norm": 2.0263655977164428, "learning_rate": 1.2457002457002457e-05, "loss": 0.7282, "step": 338 }, { "epoch": 0.041651308514559525, "grad_norm": 2.188768178536143, "learning_rate": 1.2493857493857493e-05, "loss": 0.6465, "step": 339 }, { "epoch": 0.04177417373141663, "grad_norm": 2.3008615084097888, "learning_rate": 1.2530712530712531e-05, "loss": 0.8001, "step": 340 }, { "epoch": 0.04189703894827374, "grad_norm": 1.8088028647021088, "learning_rate": 1.2567567567567568e-05, "loss": 0.772, "step": 341 }, { "epoch": 0.04201990416513085, "grad_norm": 2.0507123754388115, "learning_rate": 1.2604422604422604e-05, "loss": 0.7904, "step": 342 }, { "epoch": 0.04214276938198796, "grad_norm": 2.0910637741314133, "learning_rate": 1.2641277641277642e-05, "loss": 0.7083, "step": 343 }, { "epoch": 0.04226563459884507, "grad_norm": 2.287615313324481, "learning_rate": 1.267813267813268e-05, "loss": 0.7979, "step": 344 }, { "epoch": 0.042388499815702176, "grad_norm": 2.41383107129278, "learning_rate": 1.2714987714987714e-05, "loss": 0.6396, "step": 345 }, { "epoch": 0.042511365032559284, "grad_norm": 2.1888168792339955, "learning_rate": 1.2751842751842753e-05, "loss": 0.7382, "step": 346 }, { "epoch": 0.04263423024941639, "grad_norm": 2.0593995683781956, "learning_rate": 1.2788697788697788e-05, "loss": 0.642, "step": 347 }, { "epoch": 0.0427570954662735, "grad_norm": 2.3467540239291114, "learning_rate": 1.2825552825552825e-05, "loss": 0.79, "step": 348 }, { "epoch": 0.0428799606831306, "grad_norm": 1.8723984130808378, "learning_rate": 1.2862407862407864e-05, "loss": 0.8359, "step": 349 }, { "epoch": 0.04300282589998771, "grad_norm": 2.0442233859536754, "learning_rate": 1.28992628992629e-05, "loss": 0.6335, "step": 350 }, { "epoch": 0.04312569111684482, "grad_norm": 2.0757855428971785, "learning_rate": 1.2936117936117936e-05, "loss": 0.6809, "step": 351 }, { "epoch": 0.04324855633370193, "grad_norm": 1.9886508996214847, "learning_rate": 1.2972972972972975e-05, "loss": 0.6729, "step": 352 }, { "epoch": 0.043371421550559036, "grad_norm": 1.8690157255382642, "learning_rate": 1.300982800982801e-05, "loss": 0.9208, "step": 353 }, { "epoch": 0.043494286767416145, "grad_norm": 2.3663309259170067, "learning_rate": 1.3046683046683047e-05, "loss": 0.5853, "step": 354 }, { "epoch": 0.04361715198427325, "grad_norm": 2.1603833441367897, "learning_rate": 1.3083538083538084e-05, "loss": 0.6757, "step": 355 }, { "epoch": 0.04374001720113036, "grad_norm": 2.593079436867142, "learning_rate": 1.3120393120393121e-05, "loss": 0.8396, "step": 356 }, { "epoch": 0.04386288241798747, "grad_norm": 2.1071705131020746, "learning_rate": 1.3157248157248158e-05, "loss": 0.6493, "step": 357 }, { "epoch": 0.04398574763484458, "grad_norm": 1.7589144967274257, "learning_rate": 1.3194103194103193e-05, "loss": 0.7332, "step": 358 }, { "epoch": 0.04410861285170168, "grad_norm": 2.079967163095207, "learning_rate": 1.3230958230958232e-05, "loss": 0.6664, "step": 359 }, { "epoch": 0.04423147806855879, "grad_norm": 2.4070186884460325, "learning_rate": 1.3267813267813269e-05, "loss": 0.8518, "step": 360 }, { "epoch": 0.0443543432854159, "grad_norm": 2.2911434092266294, "learning_rate": 1.3304668304668304e-05, "loss": 0.8158, "step": 361 }, { "epoch": 0.044477208502273005, "grad_norm": 2.0789129806858426, "learning_rate": 1.3341523341523343e-05, "loss": 0.6302, "step": 362 }, { "epoch": 0.044600073719130114, "grad_norm": 1.8567447276652316, "learning_rate": 1.3378378378378378e-05, "loss": 0.8159, "step": 363 }, { "epoch": 0.04472293893598722, "grad_norm": 1.9129061577035584, "learning_rate": 1.3415233415233415e-05, "loss": 0.6508, "step": 364 }, { "epoch": 0.04484580415284433, "grad_norm": 1.9061631707224485, "learning_rate": 1.3452088452088453e-05, "loss": 0.8036, "step": 365 }, { "epoch": 0.04496866936970144, "grad_norm": 1.9077811837263354, "learning_rate": 1.3488943488943489e-05, "loss": 0.7048, "step": 366 }, { "epoch": 0.04509153458655855, "grad_norm": 1.8799413471907032, "learning_rate": 1.3525798525798526e-05, "loss": 0.6892, "step": 367 }, { "epoch": 0.045214399803415656, "grad_norm": 1.9104689599607951, "learning_rate": 1.3562653562653564e-05, "loss": 0.6501, "step": 368 }, { "epoch": 0.045337265020272764, "grad_norm": 1.7561402484592374, "learning_rate": 1.35995085995086e-05, "loss": 0.5894, "step": 369 }, { "epoch": 0.045460130237129866, "grad_norm": 2.0214912839976273, "learning_rate": 1.3636363636363637e-05, "loss": 0.6932, "step": 370 }, { "epoch": 0.045582995453986974, "grad_norm": 2.2514776078535133, "learning_rate": 1.3673218673218674e-05, "loss": 0.7486, "step": 371 }, { "epoch": 0.04570586067084408, "grad_norm": 1.832245316182979, "learning_rate": 1.371007371007371e-05, "loss": 0.7485, "step": 372 }, { "epoch": 0.04582872588770119, "grad_norm": 1.865490268319536, "learning_rate": 1.3746928746928747e-05, "loss": 0.6679, "step": 373 }, { "epoch": 0.0459515911045583, "grad_norm": 1.7623318009343716, "learning_rate": 1.3783783783783784e-05, "loss": 0.6638, "step": 374 }, { "epoch": 0.04607445632141541, "grad_norm": 2.1447377121825575, "learning_rate": 1.3820638820638821e-05, "loss": 0.8204, "step": 375 }, { "epoch": 0.046197321538272516, "grad_norm": 1.8155383502416222, "learning_rate": 1.3857493857493858e-05, "loss": 0.5779, "step": 376 }, { "epoch": 0.046320186755129625, "grad_norm": 1.8188662077903164, "learning_rate": 1.3894348894348894e-05, "loss": 0.745, "step": 377 }, { "epoch": 0.04644305197198673, "grad_norm": 1.7094151744991495, "learning_rate": 1.3931203931203932e-05, "loss": 0.7748, "step": 378 }, { "epoch": 0.04656591718884384, "grad_norm": 2.115932179131684, "learning_rate": 1.3968058968058967e-05, "loss": 0.7132, "step": 379 }, { "epoch": 0.04668878240570094, "grad_norm": 1.967624065092432, "learning_rate": 1.4004914004914004e-05, "loss": 0.6183, "step": 380 }, { "epoch": 0.04681164762255805, "grad_norm": 2.7078433565670705, "learning_rate": 1.4041769041769043e-05, "loss": 0.6663, "step": 381 }, { "epoch": 0.04693451283941516, "grad_norm": 2.383019734241531, "learning_rate": 1.4078624078624078e-05, "loss": 0.7197, "step": 382 }, { "epoch": 0.04705737805627227, "grad_norm": 1.9527342939061616, "learning_rate": 1.4115479115479115e-05, "loss": 0.7659, "step": 383 }, { "epoch": 0.04718024327312938, "grad_norm": 1.8797841661896242, "learning_rate": 1.4152334152334154e-05, "loss": 0.6423, "step": 384 }, { "epoch": 0.047303108489986485, "grad_norm": 2.125027825620537, "learning_rate": 1.4189189189189189e-05, "loss": 0.6688, "step": 385 }, { "epoch": 0.047425973706843594, "grad_norm": 1.9643590322280289, "learning_rate": 1.4226044226044226e-05, "loss": 0.7397, "step": 386 }, { "epoch": 0.0475488389237007, "grad_norm": 1.927905866289698, "learning_rate": 1.4262899262899263e-05, "loss": 0.7264, "step": 387 }, { "epoch": 0.04767170414055781, "grad_norm": 2.0437524067850372, "learning_rate": 1.42997542997543e-05, "loss": 0.7254, "step": 388 }, { "epoch": 0.04779456935741492, "grad_norm": 2.132315872365481, "learning_rate": 1.4336609336609337e-05, "loss": 0.7217, "step": 389 }, { "epoch": 0.04791743457427202, "grad_norm": 1.7906031709295303, "learning_rate": 1.4373464373464374e-05, "loss": 0.7334, "step": 390 }, { "epoch": 0.04804029979112913, "grad_norm": 1.9015139910966756, "learning_rate": 1.441031941031941e-05, "loss": 0.7007, "step": 391 }, { "epoch": 0.04816316500798624, "grad_norm": 2.5100527179759458, "learning_rate": 1.4447174447174448e-05, "loss": 0.7695, "step": 392 }, { "epoch": 0.048286030224843346, "grad_norm": 2.5869255787425933, "learning_rate": 1.4484029484029485e-05, "loss": 0.696, "step": 393 }, { "epoch": 0.048408895441700454, "grad_norm": 1.9841743033252863, "learning_rate": 1.4520884520884522e-05, "loss": 0.6753, "step": 394 }, { "epoch": 0.04853176065855756, "grad_norm": 2.305836010745212, "learning_rate": 1.4557739557739557e-05, "loss": 0.721, "step": 395 }, { "epoch": 0.04865462587541467, "grad_norm": 1.8166300558576498, "learning_rate": 1.4594594594594596e-05, "loss": 0.5741, "step": 396 }, { "epoch": 0.04877749109227178, "grad_norm": 1.9774537134246464, "learning_rate": 1.4631449631449633e-05, "loss": 0.7166, "step": 397 }, { "epoch": 0.04890035630912889, "grad_norm": 2.1403413408485625, "learning_rate": 1.4668304668304668e-05, "loss": 0.6792, "step": 398 }, { "epoch": 0.049023221525986, "grad_norm": 1.7427602430512399, "learning_rate": 1.4705159705159705e-05, "loss": 0.616, "step": 399 }, { "epoch": 0.0491460867428431, "grad_norm": 1.7606644580173563, "learning_rate": 1.4742014742014743e-05, "loss": 0.6295, "step": 400 }, { "epoch": 0.049268951959700207, "grad_norm": 2.0917837745880767, "learning_rate": 1.4778869778869779e-05, "loss": 0.664, "step": 401 }, { "epoch": 0.049391817176557315, "grad_norm": 1.5686558691995127, "learning_rate": 1.4815724815724816e-05, "loss": 0.7115, "step": 402 }, { "epoch": 0.04951468239341442, "grad_norm": 2.0325770006891517, "learning_rate": 1.4852579852579853e-05, "loss": 0.6965, "step": 403 }, { "epoch": 0.04963754761027153, "grad_norm": 2.1122779300572403, "learning_rate": 1.488943488943489e-05, "loss": 0.7723, "step": 404 }, { "epoch": 0.04976041282712864, "grad_norm": 2.024207450354362, "learning_rate": 1.4926289926289926e-05, "loss": 0.7147, "step": 405 }, { "epoch": 0.04988327804398575, "grad_norm": 1.9402270982175134, "learning_rate": 1.4963144963144963e-05, "loss": 0.7117, "step": 406 }, { "epoch": 0.05000614326084286, "grad_norm": 2.0760149527073546, "learning_rate": 1.5e-05, "loss": 0.6795, "step": 407 }, { "epoch": 0.050129008477699966, "grad_norm": 1.7639706254743934, "learning_rate": 1.5036855036855039e-05, "loss": 0.6619, "step": 408 }, { "epoch": 0.050251873694557074, "grad_norm": 2.01214156299943, "learning_rate": 1.5073710073710073e-05, "loss": 0.6675, "step": 409 }, { "epoch": 0.050374738911414176, "grad_norm": 2.1951937491314624, "learning_rate": 1.5110565110565111e-05, "loss": 0.6106, "step": 410 }, { "epoch": 0.050497604128271284, "grad_norm": 2.137537365681502, "learning_rate": 1.5147420147420148e-05, "loss": 0.6583, "step": 411 }, { "epoch": 0.05062046934512839, "grad_norm": 1.6279763403578074, "learning_rate": 1.5184275184275183e-05, "loss": 0.6736, "step": 412 }, { "epoch": 0.0507433345619855, "grad_norm": 2.0399234379080937, "learning_rate": 1.5221130221130222e-05, "loss": 0.657, "step": 413 }, { "epoch": 0.05086619977884261, "grad_norm": 1.762652647762199, "learning_rate": 1.5257985257985259e-05, "loss": 0.7263, "step": 414 }, { "epoch": 0.05098906499569972, "grad_norm": 2.1585960742636083, "learning_rate": 1.5294840294840294e-05, "loss": 0.7437, "step": 415 }, { "epoch": 0.051111930212556826, "grad_norm": 2.1321093878006363, "learning_rate": 1.533169533169533e-05, "loss": 0.7344, "step": 416 }, { "epoch": 0.051234795429413935, "grad_norm": 2.3576870722535332, "learning_rate": 1.536855036855037e-05, "loss": 0.6276, "step": 417 }, { "epoch": 0.05135766064627104, "grad_norm": 2.045082346913435, "learning_rate": 1.5405405405405405e-05, "loss": 0.628, "step": 418 }, { "epoch": 0.05148052586312815, "grad_norm": 2.0759652553732977, "learning_rate": 1.5442260442260442e-05, "loss": 0.7324, "step": 419 }, { "epoch": 0.05160339107998525, "grad_norm": 2.0412784329789875, "learning_rate": 1.5479115479115482e-05, "loss": 0.7996, "step": 420 }, { "epoch": 0.05172625629684236, "grad_norm": 2.0913478248770536, "learning_rate": 1.5515970515970516e-05, "loss": 0.8609, "step": 421 }, { "epoch": 0.05184912151369947, "grad_norm": 2.1371852509139626, "learning_rate": 1.5552825552825553e-05, "loss": 0.7313, "step": 422 }, { "epoch": 0.05197198673055658, "grad_norm": 1.9442557050208458, "learning_rate": 1.5589680589680593e-05, "loss": 0.7469, "step": 423 }, { "epoch": 0.05209485194741369, "grad_norm": 2.0892299790540068, "learning_rate": 1.5626535626535627e-05, "loss": 0.8119, "step": 424 }, { "epoch": 0.052217717164270795, "grad_norm": 1.894552826160603, "learning_rate": 1.5663390663390664e-05, "loss": 0.6871, "step": 425 }, { "epoch": 0.052340582381127904, "grad_norm": 1.8760943944084383, "learning_rate": 1.57002457002457e-05, "loss": 0.6326, "step": 426 }, { "epoch": 0.05246344759798501, "grad_norm": 1.9440464350855404, "learning_rate": 1.5737100737100738e-05, "loss": 0.691, "step": 427 }, { "epoch": 0.05258631281484212, "grad_norm": 2.3512172709301478, "learning_rate": 1.5773955773955775e-05, "loss": 0.7162, "step": 428 }, { "epoch": 0.05270917803169923, "grad_norm": 2.2186890420809044, "learning_rate": 1.5810810810810808e-05, "loss": 0.6602, "step": 429 }, { "epoch": 0.05283204324855633, "grad_norm": 1.7273211263309867, "learning_rate": 1.584766584766585e-05, "loss": 0.611, "step": 430 }, { "epoch": 0.05295490846541344, "grad_norm": 1.7475577087129792, "learning_rate": 1.5884520884520886e-05, "loss": 0.6745, "step": 431 }, { "epoch": 0.05307777368227055, "grad_norm": 2.0917120051655895, "learning_rate": 1.592137592137592e-05, "loss": 0.7538, "step": 432 }, { "epoch": 0.053200638899127656, "grad_norm": 1.9707603548464119, "learning_rate": 1.595823095823096e-05, "loss": 0.7631, "step": 433 }, { "epoch": 0.053323504115984764, "grad_norm": 1.8886966765714708, "learning_rate": 1.5995085995085996e-05, "loss": 0.8089, "step": 434 }, { "epoch": 0.05344636933284187, "grad_norm": 2.1497817469515303, "learning_rate": 1.603194103194103e-05, "loss": 0.7558, "step": 435 }, { "epoch": 0.05356923454969898, "grad_norm": 1.753946917007456, "learning_rate": 1.606879606879607e-05, "loss": 0.7652, "step": 436 }, { "epoch": 0.05369209976655609, "grad_norm": 2.3976013266198444, "learning_rate": 1.6105651105651107e-05, "loss": 0.829, "step": 437 }, { "epoch": 0.0538149649834132, "grad_norm": 2.094305826968303, "learning_rate": 1.614250614250614e-05, "loss": 0.7637, "step": 438 }, { "epoch": 0.053937830200270306, "grad_norm": 1.999097283816911, "learning_rate": 1.617936117936118e-05, "loss": 0.6849, "step": 439 }, { "epoch": 0.05406069541712741, "grad_norm": 1.9289062579404344, "learning_rate": 1.6216216216216218e-05, "loss": 0.6984, "step": 440 }, { "epoch": 0.054183560633984516, "grad_norm": 1.9180958313981633, "learning_rate": 1.625307125307125e-05, "loss": 0.7515, "step": 441 }, { "epoch": 0.054306425850841625, "grad_norm": 2.342090669604325, "learning_rate": 1.628992628992629e-05, "loss": 0.9044, "step": 442 }, { "epoch": 0.05442929106769873, "grad_norm": 1.9260463456889936, "learning_rate": 1.632678132678133e-05, "loss": 0.7339, "step": 443 }, { "epoch": 0.05455215628455584, "grad_norm": 2.264591673688307, "learning_rate": 1.6363636363636363e-05, "loss": 0.6785, "step": 444 }, { "epoch": 0.05467502150141295, "grad_norm": 1.8200201074063862, "learning_rate": 1.64004914004914e-05, "loss": 0.6844, "step": 445 }, { "epoch": 0.05479788671827006, "grad_norm": 1.9228555398635145, "learning_rate": 1.643734643734644e-05, "loss": 0.6536, "step": 446 }, { "epoch": 0.05492075193512717, "grad_norm": 1.9566323134338655, "learning_rate": 1.6474201474201473e-05, "loss": 0.6743, "step": 447 }, { "epoch": 0.055043617151984275, "grad_norm": 1.951599429963938, "learning_rate": 1.651105651105651e-05, "loss": 0.7161, "step": 448 }, { "epoch": 0.055166482368841384, "grad_norm": 2.765309959326327, "learning_rate": 1.654791154791155e-05, "loss": 0.8251, "step": 449 }, { "epoch": 0.055289347585698485, "grad_norm": 2.000189428560847, "learning_rate": 1.6584766584766584e-05, "loss": 0.6055, "step": 450 }, { "epoch": 0.055412212802555594, "grad_norm": 2.1388224918442487, "learning_rate": 1.662162162162162e-05, "loss": 0.6275, "step": 451 }, { "epoch": 0.0555350780194127, "grad_norm": 2.1359160568077433, "learning_rate": 1.665847665847666e-05, "loss": 0.7841, "step": 452 }, { "epoch": 0.05565794323626981, "grad_norm": 1.7137154663549354, "learning_rate": 1.6695331695331695e-05, "loss": 0.7752, "step": 453 }, { "epoch": 0.05578080845312692, "grad_norm": 2.058366152637004, "learning_rate": 1.6732186732186732e-05, "loss": 0.7866, "step": 454 }, { "epoch": 0.05590367366998403, "grad_norm": 1.6755049597023677, "learning_rate": 1.6769041769041772e-05, "loss": 0.6936, "step": 455 }, { "epoch": 0.056026538886841136, "grad_norm": 1.8901714723589986, "learning_rate": 1.6805896805896806e-05, "loss": 0.8164, "step": 456 }, { "epoch": 0.056149404103698244, "grad_norm": 2.436826233660747, "learning_rate": 1.6842751842751843e-05, "loss": 0.8378, "step": 457 }, { "epoch": 0.05627226932055535, "grad_norm": 2.041833619280879, "learning_rate": 1.687960687960688e-05, "loss": 0.6302, "step": 458 }, { "epoch": 0.05639513453741246, "grad_norm": 2.0964824886227826, "learning_rate": 1.6916461916461917e-05, "loss": 0.7129, "step": 459 }, { "epoch": 0.05651799975426957, "grad_norm": 2.270597388688162, "learning_rate": 1.6953316953316954e-05, "loss": 0.7128, "step": 460 }, { "epoch": 0.05664086497112667, "grad_norm": 1.8660246822262052, "learning_rate": 1.699017199017199e-05, "loss": 0.777, "step": 461 }, { "epoch": 0.05676373018798378, "grad_norm": 2.3782733506498053, "learning_rate": 1.7027027027027028e-05, "loss": 0.6615, "step": 462 }, { "epoch": 0.05688659540484089, "grad_norm": 2.1442928870900735, "learning_rate": 1.7063882063882065e-05, "loss": 0.6766, "step": 463 }, { "epoch": 0.057009460621698, "grad_norm": 2.071838223594134, "learning_rate": 1.71007371007371e-05, "loss": 0.6852, "step": 464 }, { "epoch": 0.057132325838555105, "grad_norm": 1.8461221994993846, "learning_rate": 1.713759213759214e-05, "loss": 0.7903, "step": 465 }, { "epoch": 0.05725519105541221, "grad_norm": 2.087416006304511, "learning_rate": 1.7174447174447175e-05, "loss": 0.5906, "step": 466 }, { "epoch": 0.05737805627226932, "grad_norm": 1.907362834207086, "learning_rate": 1.7211302211302212e-05, "loss": 0.6855, "step": 467 }, { "epoch": 0.05750092148912643, "grad_norm": 1.724123373800819, "learning_rate": 1.724815724815725e-05, "loss": 0.6707, "step": 468 }, { "epoch": 0.05762378670598354, "grad_norm": 2.3223462639745933, "learning_rate": 1.7285012285012286e-05, "loss": 0.8163, "step": 469 }, { "epoch": 0.05774665192284065, "grad_norm": 2.0821310428379243, "learning_rate": 1.732186732186732e-05, "loss": 0.812, "step": 470 }, { "epoch": 0.05786951713969775, "grad_norm": 1.9926787966090476, "learning_rate": 1.735872235872236e-05, "loss": 0.743, "step": 471 }, { "epoch": 0.05799238235655486, "grad_norm": 2.4976507545630655, "learning_rate": 1.7395577395577397e-05, "loss": 0.7529, "step": 472 }, { "epoch": 0.058115247573411966, "grad_norm": 1.861992054403512, "learning_rate": 1.743243243243243e-05, "loss": 0.6764, "step": 473 }, { "epoch": 0.058238112790269074, "grad_norm": 1.9025046576544518, "learning_rate": 1.7469287469287468e-05, "loss": 0.7604, "step": 474 }, { "epoch": 0.05836097800712618, "grad_norm": 1.940748792513558, "learning_rate": 1.7506142506142508e-05, "loss": 0.6984, "step": 475 }, { "epoch": 0.05848384322398329, "grad_norm": 1.9135095767707437, "learning_rate": 1.754299754299754e-05, "loss": 0.7756, "step": 476 }, { "epoch": 0.0586067084408404, "grad_norm": 1.7726204957651555, "learning_rate": 1.757985257985258e-05, "loss": 0.6183, "step": 477 }, { "epoch": 0.05872957365769751, "grad_norm": 1.8367505216838214, "learning_rate": 1.761670761670762e-05, "loss": 0.7494, "step": 478 }, { "epoch": 0.058852438874554616, "grad_norm": 1.9270152495767996, "learning_rate": 1.7653562653562652e-05, "loss": 0.7208, "step": 479 }, { "epoch": 0.058975304091411725, "grad_norm": 1.8548260381135355, "learning_rate": 1.769041769041769e-05, "loss": 0.651, "step": 480 }, { "epoch": 0.059098169308268826, "grad_norm": 1.8625274834460512, "learning_rate": 1.772727272727273e-05, "loss": 0.7101, "step": 481 }, { "epoch": 0.059221034525125935, "grad_norm": 1.8664744836340494, "learning_rate": 1.7764127764127763e-05, "loss": 0.6675, "step": 482 }, { "epoch": 0.05934389974198304, "grad_norm": 2.2907973493667972, "learning_rate": 1.78009828009828e-05, "loss": 0.6579, "step": 483 }, { "epoch": 0.05946676495884015, "grad_norm": 2.2143074581627595, "learning_rate": 1.783783783783784e-05, "loss": 0.7164, "step": 484 }, { "epoch": 0.05958963017569726, "grad_norm": 2.020856206527743, "learning_rate": 1.7874692874692874e-05, "loss": 0.8205, "step": 485 }, { "epoch": 0.05971249539255437, "grad_norm": 2.071605012177533, "learning_rate": 1.791154791154791e-05, "loss": 0.8685, "step": 486 }, { "epoch": 0.05983536060941148, "grad_norm": 2.0374807606539322, "learning_rate": 1.794840294840295e-05, "loss": 0.7829, "step": 487 }, { "epoch": 0.059958225826268585, "grad_norm": 2.173190255603572, "learning_rate": 1.7985257985257985e-05, "loss": 0.6843, "step": 488 }, { "epoch": 0.060081091043125694, "grad_norm": 1.8273034307139202, "learning_rate": 1.8022113022113022e-05, "loss": 0.5978, "step": 489 }, { "epoch": 0.0602039562599828, "grad_norm": 2.3089326187934947, "learning_rate": 1.805896805896806e-05, "loss": 0.7468, "step": 490 }, { "epoch": 0.060326821476839904, "grad_norm": 2.4203853051172737, "learning_rate": 1.8095823095823096e-05, "loss": 0.7678, "step": 491 }, { "epoch": 0.06044968669369701, "grad_norm": 2.373258334919009, "learning_rate": 1.8132678132678133e-05, "loss": 0.6623, "step": 492 }, { "epoch": 0.06057255191055412, "grad_norm": 2.3414591829037317, "learning_rate": 1.816953316953317e-05, "loss": 0.873, "step": 493 }, { "epoch": 0.06069541712741123, "grad_norm": 2.2740576417694474, "learning_rate": 1.8206388206388207e-05, "loss": 0.7079, "step": 494 }, { "epoch": 0.06081828234426834, "grad_norm": 1.9565234393312163, "learning_rate": 1.8243243243243244e-05, "loss": 0.7624, "step": 495 }, { "epoch": 0.060941147561125446, "grad_norm": 1.911531059391597, "learning_rate": 1.828009828009828e-05, "loss": 0.7939, "step": 496 }, { "epoch": 0.061064012777982554, "grad_norm": 2.342224818221533, "learning_rate": 1.8316953316953318e-05, "loss": 0.7048, "step": 497 }, { "epoch": 0.06118687799483966, "grad_norm": 2.171000784560076, "learning_rate": 1.8353808353808355e-05, "loss": 0.7169, "step": 498 }, { "epoch": 0.06130974321169677, "grad_norm": 1.9958230958011038, "learning_rate": 1.839066339066339e-05, "loss": 0.6634, "step": 499 }, { "epoch": 0.06143260842855388, "grad_norm": 2.1508795768885918, "learning_rate": 1.842751842751843e-05, "loss": 0.7765, "step": 500 }, { "epoch": 0.06155547364541098, "grad_norm": 1.7283308921706135, "learning_rate": 1.8464373464373465e-05, "loss": 0.6258, "step": 501 }, { "epoch": 0.06167833886226809, "grad_norm": 2.128066794176502, "learning_rate": 1.8501228501228502e-05, "loss": 0.5821, "step": 502 }, { "epoch": 0.0618012040791252, "grad_norm": 2.3709256991990073, "learning_rate": 1.853808353808354e-05, "loss": 0.7189, "step": 503 }, { "epoch": 0.061924069295982306, "grad_norm": 1.9808177177514754, "learning_rate": 1.8574938574938576e-05, "loss": 0.6448, "step": 504 }, { "epoch": 0.062046934512839415, "grad_norm": 1.8464843298364157, "learning_rate": 1.8611793611793613e-05, "loss": 0.7335, "step": 505 }, { "epoch": 0.06216979972969652, "grad_norm": 2.435547905533497, "learning_rate": 1.864864864864865e-05, "loss": 0.6962, "step": 506 }, { "epoch": 0.06229266494655363, "grad_norm": 2.1373853240562974, "learning_rate": 1.8685503685503687e-05, "loss": 0.689, "step": 507 }, { "epoch": 0.06241553016341074, "grad_norm": 2.0645155829860347, "learning_rate": 1.8722358722358724e-05, "loss": 0.6639, "step": 508 }, { "epoch": 0.06253839538026784, "grad_norm": 1.7982090061191802, "learning_rate": 1.8759213759213758e-05, "loss": 0.6843, "step": 509 }, { "epoch": 0.06266126059712496, "grad_norm": 1.9194479081781124, "learning_rate": 1.8796068796068798e-05, "loss": 0.7175, "step": 510 }, { "epoch": 0.06278412581398206, "grad_norm": 2.1185243464470807, "learning_rate": 1.883292383292383e-05, "loss": 0.6509, "step": 511 }, { "epoch": 0.06290699103083917, "grad_norm": 1.997360129445677, "learning_rate": 1.886977886977887e-05, "loss": 0.6824, "step": 512 }, { "epoch": 0.06302985624769628, "grad_norm": 1.9097457072771375, "learning_rate": 1.890663390663391e-05, "loss": 0.7307, "step": 513 }, { "epoch": 0.06315272146455339, "grad_norm": 1.7515420351212456, "learning_rate": 1.8943488943488942e-05, "loss": 0.7153, "step": 514 }, { "epoch": 0.06327558668141049, "grad_norm": 2.1644615273116723, "learning_rate": 1.898034398034398e-05, "loss": 0.7089, "step": 515 }, { "epoch": 0.0633984518982676, "grad_norm": 2.2608971652179526, "learning_rate": 1.901719901719902e-05, "loss": 0.6137, "step": 516 }, { "epoch": 0.06352131711512471, "grad_norm": 2.077349988760777, "learning_rate": 1.9054054054054053e-05, "loss": 0.662, "step": 517 }, { "epoch": 0.06364418233198181, "grad_norm": 1.7576926947213254, "learning_rate": 1.909090909090909e-05, "loss": 0.8567, "step": 518 }, { "epoch": 0.06376704754883893, "grad_norm": 1.9833695154140059, "learning_rate": 1.912776412776413e-05, "loss": 0.7427, "step": 519 }, { "epoch": 0.06388991276569603, "grad_norm": 2.3638498545522872, "learning_rate": 1.9164619164619164e-05, "loss": 0.7018, "step": 520 }, { "epoch": 0.06401277798255314, "grad_norm": 2.1441800542079155, "learning_rate": 1.92014742014742e-05, "loss": 0.791, "step": 521 }, { "epoch": 0.06413564319941024, "grad_norm": 2.1206188217070996, "learning_rate": 1.923832923832924e-05, "loss": 0.7062, "step": 522 }, { "epoch": 0.06425850841626736, "grad_norm": 2.51121776362489, "learning_rate": 1.9275184275184275e-05, "loss": 0.7154, "step": 523 }, { "epoch": 0.06438137363312446, "grad_norm": 1.897917368444447, "learning_rate": 1.9312039312039312e-05, "loss": 0.754, "step": 524 }, { "epoch": 0.06450423884998158, "grad_norm": 1.9121804922393901, "learning_rate": 1.934889434889435e-05, "loss": 0.7304, "step": 525 }, { "epoch": 0.06462710406683868, "grad_norm": 2.159825258338104, "learning_rate": 1.9385749385749386e-05, "loss": 0.7749, "step": 526 }, { "epoch": 0.06474996928369578, "grad_norm": 2.19956708736866, "learning_rate": 1.9422604422604423e-05, "loss": 0.6687, "step": 527 }, { "epoch": 0.0648728345005529, "grad_norm": 1.9694374157804415, "learning_rate": 1.945945945945946e-05, "loss": 0.6652, "step": 528 }, { "epoch": 0.06499569971741, "grad_norm": 1.853260335430941, "learning_rate": 1.9496314496314497e-05, "loss": 0.5907, "step": 529 }, { "epoch": 0.06511856493426711, "grad_norm": 1.8819944016719397, "learning_rate": 1.9533169533169534e-05, "loss": 0.5258, "step": 530 }, { "epoch": 0.06524143015112421, "grad_norm": 1.9802728909732878, "learning_rate": 1.957002457002457e-05, "loss": 0.7492, "step": 531 }, { "epoch": 0.06536429536798133, "grad_norm": 2.022593593600987, "learning_rate": 1.9606879606879607e-05, "loss": 0.6469, "step": 532 }, { "epoch": 0.06548716058483843, "grad_norm": 1.7771327141137787, "learning_rate": 1.9643734643734644e-05, "loss": 0.7068, "step": 533 }, { "epoch": 0.06561002580169555, "grad_norm": 1.852581214762114, "learning_rate": 1.968058968058968e-05, "loss": 0.8058, "step": 534 }, { "epoch": 0.06573289101855265, "grad_norm": 1.6376101438135378, "learning_rate": 1.971744471744472e-05, "loss": 0.6492, "step": 535 }, { "epoch": 0.06585575623540975, "grad_norm": 1.9951664331083137, "learning_rate": 1.9754299754299755e-05, "loss": 0.6434, "step": 536 }, { "epoch": 0.06597862145226686, "grad_norm": 1.9022173503469637, "learning_rate": 1.9791154791154792e-05, "loss": 0.6672, "step": 537 }, { "epoch": 0.06610148666912397, "grad_norm": 1.903943702161234, "learning_rate": 1.982800982800983e-05, "loss": 0.7301, "step": 538 }, { "epoch": 0.06622435188598108, "grad_norm": 2.0375471489032475, "learning_rate": 1.9864864864864866e-05, "loss": 0.8768, "step": 539 }, { "epoch": 0.06634721710283818, "grad_norm": 2.1440402634584235, "learning_rate": 1.9901719901719903e-05, "loss": 0.7016, "step": 540 }, { "epoch": 0.0664700823196953, "grad_norm": 2.1344561936114506, "learning_rate": 1.9938574938574937e-05, "loss": 0.6984, "step": 541 }, { "epoch": 0.0665929475365524, "grad_norm": 1.9382366230567107, "learning_rate": 1.9975429975429977e-05, "loss": 0.6456, "step": 542 }, { "epoch": 0.06671581275340951, "grad_norm": 2.008008369586257, "learning_rate": 2.0012285012285014e-05, "loss": 0.6301, "step": 543 }, { "epoch": 0.06683867797026662, "grad_norm": 2.6740831380919077, "learning_rate": 2.0049140049140048e-05, "loss": 0.689, "step": 544 }, { "epoch": 0.06696154318712373, "grad_norm": 2.339770686935505, "learning_rate": 2.0085995085995088e-05, "loss": 0.8015, "step": 545 }, { "epoch": 0.06708440840398083, "grad_norm": 3.202080304866746, "learning_rate": 2.0122850122850125e-05, "loss": 0.7726, "step": 546 }, { "epoch": 0.06720727362083793, "grad_norm": 1.9620041088047253, "learning_rate": 2.015970515970516e-05, "loss": 0.7309, "step": 547 }, { "epoch": 0.06733013883769505, "grad_norm": 1.8631079564208417, "learning_rate": 2.01965601965602e-05, "loss": 0.6778, "step": 548 }, { "epoch": 0.06745300405455215, "grad_norm": 1.654514401179753, "learning_rate": 2.0233415233415236e-05, "loss": 0.6988, "step": 549 }, { "epoch": 0.06757586927140927, "grad_norm": 2.030227879798917, "learning_rate": 2.027027027027027e-05, "loss": 0.6121, "step": 550 }, { "epoch": 0.06769873448826637, "grad_norm": 1.9600033086491733, "learning_rate": 2.030712530712531e-05, "loss": 0.6675, "step": 551 }, { "epoch": 0.06782159970512348, "grad_norm": 2.1051414421299723, "learning_rate": 2.0343980343980343e-05, "loss": 0.6527, "step": 552 }, { "epoch": 0.06794446492198059, "grad_norm": 2.0667395553548493, "learning_rate": 2.038083538083538e-05, "loss": 0.7225, "step": 553 }, { "epoch": 0.0680673301388377, "grad_norm": 1.7322573949240014, "learning_rate": 2.041769041769042e-05, "loss": 0.701, "step": 554 }, { "epoch": 0.0681901953556948, "grad_norm": 2.1567206093813076, "learning_rate": 2.0454545454545454e-05, "loss": 0.8044, "step": 555 }, { "epoch": 0.06831306057255192, "grad_norm": 2.1658034347403947, "learning_rate": 2.049140049140049e-05, "loss": 0.8096, "step": 556 }, { "epoch": 0.06843592578940902, "grad_norm": 2.2849138053891163, "learning_rate": 2.0528255528255528e-05, "loss": 0.5901, "step": 557 }, { "epoch": 0.06855879100626612, "grad_norm": 1.8841753784162607, "learning_rate": 2.0565110565110565e-05, "loss": 0.7117, "step": 558 }, { "epoch": 0.06868165622312324, "grad_norm": 1.936132153787478, "learning_rate": 2.0601965601965602e-05, "loss": 0.6644, "step": 559 }, { "epoch": 0.06880452143998034, "grad_norm": 1.9891220416109536, "learning_rate": 2.063882063882064e-05, "loss": 0.6325, "step": 560 }, { "epoch": 0.06892738665683745, "grad_norm": 1.8962534309087307, "learning_rate": 2.0675675675675676e-05, "loss": 0.6545, "step": 561 }, { "epoch": 0.06905025187369455, "grad_norm": 2.204048657624188, "learning_rate": 2.0712530712530713e-05, "loss": 0.684, "step": 562 }, { "epoch": 0.06917311709055167, "grad_norm": 1.9916543130785984, "learning_rate": 2.074938574938575e-05, "loss": 0.7059, "step": 563 }, { "epoch": 0.06929598230740877, "grad_norm": 2.0544967941324197, "learning_rate": 2.0786240786240787e-05, "loss": 0.7746, "step": 564 }, { "epoch": 0.06941884752426589, "grad_norm": 1.8693663999584553, "learning_rate": 2.0823095823095824e-05, "loss": 0.7438, "step": 565 }, { "epoch": 0.06954171274112299, "grad_norm": 2.1101554923166606, "learning_rate": 2.085995085995086e-05, "loss": 0.6561, "step": 566 }, { "epoch": 0.06966457795798009, "grad_norm": 2.1169254339568515, "learning_rate": 2.0896805896805897e-05, "loss": 0.7129, "step": 567 }, { "epoch": 0.0697874431748372, "grad_norm": 1.8470221909127316, "learning_rate": 2.0933660933660934e-05, "loss": 0.7379, "step": 568 }, { "epoch": 0.0699103083916943, "grad_norm": 1.7109189527029212, "learning_rate": 2.097051597051597e-05, "loss": 0.7486, "step": 569 }, { "epoch": 0.07003317360855142, "grad_norm": 1.9112601420959396, "learning_rate": 2.1007371007371008e-05, "loss": 0.5895, "step": 570 }, { "epoch": 0.07015603882540852, "grad_norm": 2.192992153351891, "learning_rate": 2.1044226044226045e-05, "loss": 0.667, "step": 571 }, { "epoch": 0.07027890404226564, "grad_norm": 1.9278428221726676, "learning_rate": 2.1081081081081082e-05, "loss": 0.6867, "step": 572 }, { "epoch": 0.07040176925912274, "grad_norm": 1.9372487668278928, "learning_rate": 2.111793611793612e-05, "loss": 0.8377, "step": 573 }, { "epoch": 0.07052463447597986, "grad_norm": 1.8201532239171367, "learning_rate": 2.1154791154791156e-05, "loss": 0.6086, "step": 574 }, { "epoch": 0.07064749969283696, "grad_norm": 1.9507494045829414, "learning_rate": 2.1191646191646193e-05, "loss": 0.8002, "step": 575 }, { "epoch": 0.07077036490969407, "grad_norm": 1.7880748889421596, "learning_rate": 2.1228501228501227e-05, "loss": 0.703, "step": 576 }, { "epoch": 0.07089323012655117, "grad_norm": 1.9125399549392166, "learning_rate": 2.1265356265356267e-05, "loss": 0.6774, "step": 577 }, { "epoch": 0.07101609534340828, "grad_norm": 2.3411935773084944, "learning_rate": 2.1302211302211304e-05, "loss": 0.7504, "step": 578 }, { "epoch": 0.07113896056026539, "grad_norm": 1.8469575071452744, "learning_rate": 2.1339066339066337e-05, "loss": 0.5581, "step": 579 }, { "epoch": 0.07126182577712249, "grad_norm": 1.980992909925409, "learning_rate": 2.1375921375921378e-05, "loss": 0.7456, "step": 580 }, { "epoch": 0.07138469099397961, "grad_norm": 1.656736701128086, "learning_rate": 2.1412776412776415e-05, "loss": 0.6771, "step": 581 }, { "epoch": 0.07150755621083671, "grad_norm": 1.8793876805389826, "learning_rate": 2.1449631449631448e-05, "loss": 0.6047, "step": 582 }, { "epoch": 0.07163042142769382, "grad_norm": 1.8252532617280828, "learning_rate": 2.148648648648649e-05, "loss": 0.6572, "step": 583 }, { "epoch": 0.07175328664455093, "grad_norm": 2.7235407375076077, "learning_rate": 2.1523341523341526e-05, "loss": 0.7116, "step": 584 }, { "epoch": 0.07187615186140804, "grad_norm": 2.0549695427326244, "learning_rate": 2.156019656019656e-05, "loss": 0.8645, "step": 585 }, { "epoch": 0.07199901707826514, "grad_norm": 1.9662709890153582, "learning_rate": 2.15970515970516e-05, "loss": 0.6612, "step": 586 }, { "epoch": 0.07212188229512224, "grad_norm": 2.072374302791573, "learning_rate": 2.1633906633906636e-05, "loss": 0.82, "step": 587 }, { "epoch": 0.07224474751197936, "grad_norm": 1.9232320305518285, "learning_rate": 2.167076167076167e-05, "loss": 0.7297, "step": 588 }, { "epoch": 0.07236761272883646, "grad_norm": 1.8937276450105898, "learning_rate": 2.170761670761671e-05, "loss": 0.6319, "step": 589 }, { "epoch": 0.07249047794569358, "grad_norm": 2.5708160186337676, "learning_rate": 2.1744471744471747e-05, "loss": 0.7812, "step": 590 }, { "epoch": 0.07261334316255068, "grad_norm": 2.252257641288314, "learning_rate": 2.178132678132678e-05, "loss": 0.6408, "step": 591 }, { "epoch": 0.0727362083794078, "grad_norm": 1.9186663630717087, "learning_rate": 2.1818181818181818e-05, "loss": 0.6203, "step": 592 }, { "epoch": 0.0728590735962649, "grad_norm": 2.070433886871464, "learning_rate": 2.1855036855036855e-05, "loss": 0.6898, "step": 593 }, { "epoch": 0.07298193881312201, "grad_norm": 1.9948235298812942, "learning_rate": 2.1891891891891892e-05, "loss": 0.7608, "step": 594 }, { "epoch": 0.07310480402997911, "grad_norm": 1.6439195401470175, "learning_rate": 2.192874692874693e-05, "loss": 0.7041, "step": 595 }, { "epoch": 0.07322766924683623, "grad_norm": 2.059761775744195, "learning_rate": 2.1965601965601966e-05, "loss": 0.7671, "step": 596 }, { "epoch": 0.07335053446369333, "grad_norm": 1.9714717947903029, "learning_rate": 2.2002457002457003e-05, "loss": 0.704, "step": 597 }, { "epoch": 0.07347339968055043, "grad_norm": 2.0066872105060676, "learning_rate": 2.203931203931204e-05, "loss": 0.6947, "step": 598 }, { "epoch": 0.07359626489740755, "grad_norm": 1.990341662718597, "learning_rate": 2.2076167076167076e-05, "loss": 0.736, "step": 599 }, { "epoch": 0.07371913011426465, "grad_norm": 2.188100481231796, "learning_rate": 2.2113022113022113e-05, "loss": 0.6427, "step": 600 }, { "epoch": 0.07384199533112176, "grad_norm": 2.342597054727318, "learning_rate": 2.214987714987715e-05, "loss": 0.7819, "step": 601 }, { "epoch": 0.07396486054797886, "grad_norm": 2.549170714779742, "learning_rate": 2.2186732186732187e-05, "loss": 0.8905, "step": 602 }, { "epoch": 0.07408772576483598, "grad_norm": 1.825207964177122, "learning_rate": 2.2223587223587224e-05, "loss": 0.7032, "step": 603 }, { "epoch": 0.07421059098169308, "grad_norm": 2.0318987454219934, "learning_rate": 2.226044226044226e-05, "loss": 0.7042, "step": 604 }, { "epoch": 0.0743334561985502, "grad_norm": 1.833453897454606, "learning_rate": 2.2297297297297298e-05, "loss": 0.7917, "step": 605 }, { "epoch": 0.0744563214154073, "grad_norm": 2.0796188323911218, "learning_rate": 2.2334152334152335e-05, "loss": 0.7021, "step": 606 }, { "epoch": 0.0745791866322644, "grad_norm": 2.065534963203004, "learning_rate": 2.2371007371007372e-05, "loss": 0.7327, "step": 607 }, { "epoch": 0.07470205184912151, "grad_norm": 2.1378459006523642, "learning_rate": 2.2407862407862406e-05, "loss": 0.6251, "step": 608 }, { "epoch": 0.07482491706597862, "grad_norm": 2.0892609805261193, "learning_rate": 2.2444717444717446e-05, "loss": 0.7242, "step": 609 }, { "epoch": 0.07494778228283573, "grad_norm": 2.1045805079981244, "learning_rate": 2.2481572481572483e-05, "loss": 0.7377, "step": 610 }, { "epoch": 0.07507064749969283, "grad_norm": 1.8423494101097295, "learning_rate": 2.2518427518427517e-05, "loss": 0.7431, "step": 611 }, { "epoch": 0.07519351271654995, "grad_norm": 1.816169539963417, "learning_rate": 2.2555282555282557e-05, "loss": 0.6797, "step": 612 }, { "epoch": 0.07531637793340705, "grad_norm": 1.786998407621002, "learning_rate": 2.2592137592137594e-05, "loss": 0.6533, "step": 613 }, { "epoch": 0.07543924315026417, "grad_norm": 1.801343383153808, "learning_rate": 2.2628992628992627e-05, "loss": 0.6911, "step": 614 }, { "epoch": 0.07556210836712127, "grad_norm": 1.8839246656393371, "learning_rate": 2.2665847665847668e-05, "loss": 0.5506, "step": 615 }, { "epoch": 0.07568497358397838, "grad_norm": 1.7016074446134384, "learning_rate": 2.2702702702702705e-05, "loss": 0.6732, "step": 616 }, { "epoch": 0.07580783880083548, "grad_norm": 1.8609990934574652, "learning_rate": 2.2739557739557738e-05, "loss": 0.7229, "step": 617 }, { "epoch": 0.07593070401769259, "grad_norm": 1.8504435578850045, "learning_rate": 2.277641277641278e-05, "loss": 0.7272, "step": 618 }, { "epoch": 0.0760535692345497, "grad_norm": 1.9052358697588765, "learning_rate": 2.2813267813267816e-05, "loss": 0.6563, "step": 619 }, { "epoch": 0.0761764344514068, "grad_norm": 1.7824462202745566, "learning_rate": 2.285012285012285e-05, "loss": 0.6875, "step": 620 }, { "epoch": 0.07629929966826392, "grad_norm": 1.8378148405768098, "learning_rate": 2.288697788697789e-05, "loss": 0.7189, "step": 621 }, { "epoch": 0.07642216488512102, "grad_norm": 1.9607908846539674, "learning_rate": 2.2923832923832926e-05, "loss": 0.6669, "step": 622 }, { "epoch": 0.07654503010197813, "grad_norm": 2.1216298379633556, "learning_rate": 2.296068796068796e-05, "loss": 0.7276, "step": 623 }, { "epoch": 0.07666789531883524, "grad_norm": 1.5484644418224525, "learning_rate": 2.2997542997542997e-05, "loss": 0.7356, "step": 624 }, { "epoch": 0.07679076053569235, "grad_norm": 1.888460188688903, "learning_rate": 2.3034398034398037e-05, "loss": 0.7807, "step": 625 }, { "epoch": 0.07691362575254945, "grad_norm": 2.0666182163393154, "learning_rate": 2.307125307125307e-05, "loss": 0.8242, "step": 626 }, { "epoch": 0.07703649096940655, "grad_norm": 2.0564807526433806, "learning_rate": 2.3108108108108108e-05, "loss": 0.7612, "step": 627 }, { "epoch": 0.07715935618626367, "grad_norm": 1.8509144985170571, "learning_rate": 2.3144963144963148e-05, "loss": 0.6884, "step": 628 }, { "epoch": 0.07728222140312077, "grad_norm": 2.0066058367990136, "learning_rate": 2.318181818181818e-05, "loss": 0.6909, "step": 629 }, { "epoch": 0.07740508661997789, "grad_norm": 2.286763811516986, "learning_rate": 2.321867321867322e-05, "loss": 0.637, "step": 630 }, { "epoch": 0.07752795183683499, "grad_norm": 2.0288506621844644, "learning_rate": 2.3255528255528256e-05, "loss": 0.8078, "step": 631 }, { "epoch": 0.0776508170536921, "grad_norm": 1.8383604308531454, "learning_rate": 2.3292383292383292e-05, "loss": 0.7482, "step": 632 }, { "epoch": 0.0777736822705492, "grad_norm": 1.7085955089707194, "learning_rate": 2.332923832923833e-05, "loss": 0.6243, "step": 633 }, { "epoch": 0.07789654748740632, "grad_norm": 1.884760375078464, "learning_rate": 2.3366093366093366e-05, "loss": 0.6904, "step": 634 }, { "epoch": 0.07801941270426342, "grad_norm": 1.925715746962403, "learning_rate": 2.3402948402948403e-05, "loss": 0.6704, "step": 635 }, { "epoch": 0.07814227792112054, "grad_norm": 1.9922559251170244, "learning_rate": 2.343980343980344e-05, "loss": 0.8221, "step": 636 }, { "epoch": 0.07826514313797764, "grad_norm": 1.766099361963721, "learning_rate": 2.3476658476658477e-05, "loss": 0.7942, "step": 637 }, { "epoch": 0.07838800835483474, "grad_norm": 3.0823455442009005, "learning_rate": 2.3513513513513514e-05, "loss": 0.8466, "step": 638 }, { "epoch": 0.07851087357169186, "grad_norm": 1.6894818105936766, "learning_rate": 2.355036855036855e-05, "loss": 0.8231, "step": 639 }, { "epoch": 0.07863373878854896, "grad_norm": 2.068864290870519, "learning_rate": 2.3587223587223585e-05, "loss": 0.6031, "step": 640 }, { "epoch": 0.07875660400540607, "grad_norm": 2.2146348425574134, "learning_rate": 2.3624078624078625e-05, "loss": 0.741, "step": 641 }, { "epoch": 0.07887946922226317, "grad_norm": 1.8544829266401204, "learning_rate": 2.3660933660933662e-05, "loss": 0.8268, "step": 642 }, { "epoch": 0.07900233443912029, "grad_norm": 2.002310166888022, "learning_rate": 2.3697788697788696e-05, "loss": 0.7727, "step": 643 }, { "epoch": 0.07912519965597739, "grad_norm": 1.727559049895937, "learning_rate": 2.3734643734643736e-05, "loss": 0.7225, "step": 644 }, { "epoch": 0.0792480648728345, "grad_norm": 1.7987185503825505, "learning_rate": 2.3771498771498773e-05, "loss": 0.7349, "step": 645 }, { "epoch": 0.07937093008969161, "grad_norm": 1.6203868054475823, "learning_rate": 2.3808353808353806e-05, "loss": 0.7179, "step": 646 }, { "epoch": 0.07949379530654872, "grad_norm": 1.7771052096605318, "learning_rate": 2.3845208845208847e-05, "loss": 0.7225, "step": 647 }, { "epoch": 0.07961666052340582, "grad_norm": 1.8249108319701817, "learning_rate": 2.3882063882063884e-05, "loss": 0.7042, "step": 648 }, { "epoch": 0.07973952574026293, "grad_norm": 2.3213947728402857, "learning_rate": 2.3918918918918917e-05, "loss": 0.6781, "step": 649 }, { "epoch": 0.07986239095712004, "grad_norm": 1.661493825137898, "learning_rate": 2.3955773955773958e-05, "loss": 0.6693, "step": 650 }, { "epoch": 0.07998525617397714, "grad_norm": 1.877934173817582, "learning_rate": 2.3992628992628995e-05, "loss": 0.8023, "step": 651 }, { "epoch": 0.08010812139083426, "grad_norm": 2.2717249502090158, "learning_rate": 2.4029484029484028e-05, "loss": 0.7029, "step": 652 }, { "epoch": 0.08023098660769136, "grad_norm": 1.9749875260656726, "learning_rate": 2.406633906633907e-05, "loss": 0.7231, "step": 653 }, { "epoch": 0.08035385182454848, "grad_norm": 2.0235208059091145, "learning_rate": 2.4103194103194105e-05, "loss": 0.6019, "step": 654 }, { "epoch": 0.08047671704140558, "grad_norm": 1.9488758119253824, "learning_rate": 2.414004914004914e-05, "loss": 0.7002, "step": 655 }, { "epoch": 0.08059958225826269, "grad_norm": 2.08201930886419, "learning_rate": 2.417690417690418e-05, "loss": 0.7334, "step": 656 }, { "epoch": 0.0807224474751198, "grad_norm": 2.3386777455430448, "learning_rate": 2.4213759213759216e-05, "loss": 0.7283, "step": 657 }, { "epoch": 0.0808453126919769, "grad_norm": 1.9614452408707026, "learning_rate": 2.425061425061425e-05, "loss": 0.738, "step": 658 }, { "epoch": 0.08096817790883401, "grad_norm": 1.979862302207417, "learning_rate": 2.4287469287469287e-05, "loss": 0.5718, "step": 659 }, { "epoch": 0.08109104312569111, "grad_norm": 2.368414824043649, "learning_rate": 2.4324324324324327e-05, "loss": 0.7385, "step": 660 }, { "epoch": 0.08121390834254823, "grad_norm": 2.0636374038579532, "learning_rate": 2.436117936117936e-05, "loss": 0.6717, "step": 661 }, { "epoch": 0.08133677355940533, "grad_norm": 1.952024538904434, "learning_rate": 2.4398034398034398e-05, "loss": 0.6759, "step": 662 }, { "epoch": 0.08145963877626244, "grad_norm": 2.100027352994839, "learning_rate": 2.4434889434889438e-05, "loss": 0.6236, "step": 663 }, { "epoch": 0.08158250399311955, "grad_norm": 1.7057289166671867, "learning_rate": 2.447174447174447e-05, "loss": 0.8087, "step": 664 }, { "epoch": 0.08170536920997666, "grad_norm": 1.6877249122975946, "learning_rate": 2.450859950859951e-05, "loss": 0.7088, "step": 665 }, { "epoch": 0.08182823442683376, "grad_norm": 1.7851300649246822, "learning_rate": 2.454545454545455e-05, "loss": 0.6561, "step": 666 }, { "epoch": 0.08195109964369088, "grad_norm": 1.9128002833242168, "learning_rate": 2.4582309582309582e-05, "loss": 0.7204, "step": 667 }, { "epoch": 0.08207396486054798, "grad_norm": 2.0009437988322913, "learning_rate": 2.461916461916462e-05, "loss": 0.7596, "step": 668 }, { "epoch": 0.08219683007740508, "grad_norm": 1.761803061763879, "learning_rate": 2.465601965601966e-05, "loss": 0.7069, "step": 669 }, { "epoch": 0.0823196952942622, "grad_norm": 1.7775113283965929, "learning_rate": 2.4692874692874693e-05, "loss": 0.8083, "step": 670 }, { "epoch": 0.0824425605111193, "grad_norm": 1.9434199576965636, "learning_rate": 2.472972972972973e-05, "loss": 0.7583, "step": 671 }, { "epoch": 0.08256542572797641, "grad_norm": 1.931002366681942, "learning_rate": 2.4766584766584767e-05, "loss": 0.6696, "step": 672 }, { "epoch": 0.08268829094483351, "grad_norm": 1.801609016878661, "learning_rate": 2.4803439803439804e-05, "loss": 0.8318, "step": 673 }, { "epoch": 0.08281115616169063, "grad_norm": 2.37412428611428, "learning_rate": 2.484029484029484e-05, "loss": 0.6725, "step": 674 }, { "epoch": 0.08293402137854773, "grad_norm": 2.191037019069383, "learning_rate": 2.4877149877149875e-05, "loss": 0.8105, "step": 675 }, { "epoch": 0.08305688659540485, "grad_norm": 1.9750275927718735, "learning_rate": 2.4914004914004915e-05, "loss": 0.7909, "step": 676 }, { "epoch": 0.08317975181226195, "grad_norm": 1.7551641645196254, "learning_rate": 2.4950859950859952e-05, "loss": 0.7162, "step": 677 }, { "epoch": 0.08330261702911905, "grad_norm": 2.257952607382702, "learning_rate": 2.4987714987714985e-05, "loss": 0.8187, "step": 678 }, { "epoch": 0.08342548224597617, "grad_norm": 1.720034328567471, "learning_rate": 2.5024570024570026e-05, "loss": 0.7066, "step": 679 }, { "epoch": 0.08354834746283327, "grad_norm": 1.8773052678784248, "learning_rate": 2.5061425061425063e-05, "loss": 0.7023, "step": 680 }, { "epoch": 0.08367121267969038, "grad_norm": 1.6375229187289637, "learning_rate": 2.5098280098280096e-05, "loss": 0.7372, "step": 681 }, { "epoch": 0.08379407789654748, "grad_norm": 2.1653577302506095, "learning_rate": 2.5135135135135137e-05, "loss": 0.8151, "step": 682 }, { "epoch": 0.0839169431134046, "grad_norm": 2.0128394665439453, "learning_rate": 2.5171990171990174e-05, "loss": 0.8008, "step": 683 }, { "epoch": 0.0840398083302617, "grad_norm": 2.199399863364831, "learning_rate": 2.5208845208845207e-05, "loss": 0.6506, "step": 684 }, { "epoch": 0.08416267354711882, "grad_norm": 1.9290279569809399, "learning_rate": 2.5245700245700248e-05, "loss": 0.7927, "step": 685 }, { "epoch": 0.08428553876397592, "grad_norm": 2.0440955486621015, "learning_rate": 2.5282555282555284e-05, "loss": 0.7259, "step": 686 }, { "epoch": 0.08440840398083303, "grad_norm": 1.5633787830392318, "learning_rate": 2.5319410319410318e-05, "loss": 0.7643, "step": 687 }, { "epoch": 0.08453126919769013, "grad_norm": 1.9141925847656822, "learning_rate": 2.535626535626536e-05, "loss": 0.7375, "step": 688 }, { "epoch": 0.08465413441454724, "grad_norm": 1.9354248054621102, "learning_rate": 2.5393120393120395e-05, "loss": 0.6888, "step": 689 }, { "epoch": 0.08477699963140435, "grad_norm": 1.9509715097857399, "learning_rate": 2.542997542997543e-05, "loss": 0.6953, "step": 690 }, { "epoch": 0.08489986484826145, "grad_norm": 1.7122612796926369, "learning_rate": 2.5466830466830466e-05, "loss": 0.7513, "step": 691 }, { "epoch": 0.08502273006511857, "grad_norm": 1.878370508749116, "learning_rate": 2.5503685503685506e-05, "loss": 0.7067, "step": 692 }, { "epoch": 0.08514559528197567, "grad_norm": 2.133843265423579, "learning_rate": 2.554054054054054e-05, "loss": 0.7008, "step": 693 }, { "epoch": 0.08526846049883278, "grad_norm": 2.039041027140724, "learning_rate": 2.5577395577395577e-05, "loss": 0.7189, "step": 694 }, { "epoch": 0.08539132571568989, "grad_norm": 1.6886245257068158, "learning_rate": 2.5614250614250617e-05, "loss": 0.7496, "step": 695 }, { "epoch": 0.085514190932547, "grad_norm": 1.9994119629392992, "learning_rate": 2.565110565110565e-05, "loss": 0.6969, "step": 696 }, { "epoch": 0.0856370561494041, "grad_norm": 2.1815283157048175, "learning_rate": 2.5687960687960688e-05, "loss": 0.733, "step": 697 }, { "epoch": 0.0857599213662612, "grad_norm": 1.790726850633851, "learning_rate": 2.5724815724815728e-05, "loss": 0.6345, "step": 698 }, { "epoch": 0.08588278658311832, "grad_norm": 1.8136074284309402, "learning_rate": 2.576167076167076e-05, "loss": 0.8141, "step": 699 }, { "epoch": 0.08600565179997542, "grad_norm": 1.8552977846941467, "learning_rate": 2.57985257985258e-05, "loss": 0.7642, "step": 700 }, { "epoch": 0.08612851701683254, "grad_norm": 2.1836400493024777, "learning_rate": 2.583538083538084e-05, "loss": 0.7579, "step": 701 }, { "epoch": 0.08625138223368964, "grad_norm": 1.6784475462964883, "learning_rate": 2.5872235872235872e-05, "loss": 0.643, "step": 702 }, { "epoch": 0.08637424745054675, "grad_norm": 1.9106768723067558, "learning_rate": 2.590909090909091e-05, "loss": 0.7653, "step": 703 }, { "epoch": 0.08649711266740386, "grad_norm": 1.8164383360368452, "learning_rate": 2.594594594594595e-05, "loss": 0.6836, "step": 704 }, { "epoch": 0.08661997788426097, "grad_norm": 2.1218487241891184, "learning_rate": 2.5982800982800983e-05, "loss": 0.8274, "step": 705 }, { "epoch": 0.08674284310111807, "grad_norm": 1.9557454965061531, "learning_rate": 2.601965601965602e-05, "loss": 0.7209, "step": 706 }, { "epoch": 0.08686570831797519, "grad_norm": 1.830276620205532, "learning_rate": 2.6056511056511057e-05, "loss": 0.7363, "step": 707 }, { "epoch": 0.08698857353483229, "grad_norm": 1.76407245777333, "learning_rate": 2.6093366093366094e-05, "loss": 0.6053, "step": 708 }, { "epoch": 0.08711143875168939, "grad_norm": 1.897899819347947, "learning_rate": 2.613022113022113e-05, "loss": 0.7802, "step": 709 }, { "epoch": 0.0872343039685465, "grad_norm": 1.919438263381118, "learning_rate": 2.6167076167076168e-05, "loss": 0.7213, "step": 710 }, { "epoch": 0.08735716918540361, "grad_norm": 1.7251714178748108, "learning_rate": 2.6203931203931205e-05, "loss": 0.7453, "step": 711 }, { "epoch": 0.08748003440226072, "grad_norm": 1.898155873271899, "learning_rate": 2.6240786240786242e-05, "loss": 0.7317, "step": 712 }, { "epoch": 0.08760289961911782, "grad_norm": 1.793396232243243, "learning_rate": 2.6277641277641275e-05, "loss": 0.7684, "step": 713 }, { "epoch": 0.08772576483597494, "grad_norm": 1.8199359460694664, "learning_rate": 2.6314496314496316e-05, "loss": 0.7441, "step": 714 }, { "epoch": 0.08784863005283204, "grad_norm": 1.7570280510112468, "learning_rate": 2.6351351351351353e-05, "loss": 0.6544, "step": 715 }, { "epoch": 0.08797149526968916, "grad_norm": 2.0289327165183506, "learning_rate": 2.6388206388206386e-05, "loss": 0.7242, "step": 716 }, { "epoch": 0.08809436048654626, "grad_norm": 2.39705719088832, "learning_rate": 2.6425061425061427e-05, "loss": 0.7362, "step": 717 }, { "epoch": 0.08821722570340336, "grad_norm": 2.048719459912594, "learning_rate": 2.6461916461916464e-05, "loss": 0.661, "step": 718 }, { "epoch": 0.08834009092026048, "grad_norm": 1.9934833821034068, "learning_rate": 2.6498771498771497e-05, "loss": 0.6636, "step": 719 }, { "epoch": 0.08846295613711758, "grad_norm": 1.6935359207095917, "learning_rate": 2.6535626535626537e-05, "loss": 0.6872, "step": 720 }, { "epoch": 0.08858582135397469, "grad_norm": 1.6920369655935517, "learning_rate": 2.6572481572481574e-05, "loss": 0.6154, "step": 721 }, { "epoch": 0.0887086865708318, "grad_norm": 2.0760174108611733, "learning_rate": 2.6609336609336608e-05, "loss": 0.7297, "step": 722 }, { "epoch": 0.08883155178768891, "grad_norm": 2.077259433286552, "learning_rate": 2.6646191646191645e-05, "loss": 0.6294, "step": 723 }, { "epoch": 0.08895441700454601, "grad_norm": 2.410904790531113, "learning_rate": 2.6683046683046685e-05, "loss": 0.7154, "step": 724 }, { "epoch": 0.08907728222140313, "grad_norm": 1.9130010748639648, "learning_rate": 2.671990171990172e-05, "loss": 0.8182, "step": 725 }, { "epoch": 0.08920014743826023, "grad_norm": 1.7996594405288788, "learning_rate": 2.6756756756756756e-05, "loss": 0.6928, "step": 726 }, { "epoch": 0.08932301265511734, "grad_norm": 2.4717604219950795, "learning_rate": 2.6793611793611796e-05, "loss": 0.7318, "step": 727 }, { "epoch": 0.08944587787197444, "grad_norm": 1.7841440740342938, "learning_rate": 2.683046683046683e-05, "loss": 0.6427, "step": 728 }, { "epoch": 0.08956874308883155, "grad_norm": 2.122793042974321, "learning_rate": 2.6867321867321867e-05, "loss": 0.8501, "step": 729 }, { "epoch": 0.08969160830568866, "grad_norm": 1.8641386492434562, "learning_rate": 2.6904176904176907e-05, "loss": 0.6758, "step": 730 }, { "epoch": 0.08981447352254576, "grad_norm": 1.8529403818197823, "learning_rate": 2.694103194103194e-05, "loss": 0.6809, "step": 731 }, { "epoch": 0.08993733873940288, "grad_norm": 1.8844533664095007, "learning_rate": 2.6977886977886977e-05, "loss": 0.8055, "step": 732 }, { "epoch": 0.09006020395625998, "grad_norm": 2.205152780988247, "learning_rate": 2.7014742014742018e-05, "loss": 0.6133, "step": 733 }, { "epoch": 0.0901830691731171, "grad_norm": 1.8738690379163438, "learning_rate": 2.705159705159705e-05, "loss": 0.721, "step": 734 }, { "epoch": 0.0903059343899742, "grad_norm": 1.9276824342763887, "learning_rate": 2.708845208845209e-05, "loss": 0.7057, "step": 735 }, { "epoch": 0.09042879960683131, "grad_norm": 1.8151668937020267, "learning_rate": 2.712530712530713e-05, "loss": 0.6927, "step": 736 }, { "epoch": 0.09055166482368841, "grad_norm": 2.0410520180397618, "learning_rate": 2.7162162162162162e-05, "loss": 0.7571, "step": 737 }, { "epoch": 0.09067453004054553, "grad_norm": 1.6490851547190442, "learning_rate": 2.71990171990172e-05, "loss": 0.6468, "step": 738 }, { "epoch": 0.09079739525740263, "grad_norm": 1.7687358648071572, "learning_rate": 2.7235872235872236e-05, "loss": 0.6225, "step": 739 }, { "epoch": 0.09092026047425973, "grad_norm": 2.021049230200209, "learning_rate": 2.7272727272727273e-05, "loss": 0.7445, "step": 740 }, { "epoch": 0.09104312569111685, "grad_norm": 2.01425907255038, "learning_rate": 2.730958230958231e-05, "loss": 0.7138, "step": 741 }, { "epoch": 0.09116599090797395, "grad_norm": 2.199930315619395, "learning_rate": 2.7346437346437347e-05, "loss": 0.7787, "step": 742 }, { "epoch": 0.09128885612483106, "grad_norm": 1.744512084045899, "learning_rate": 2.7383292383292384e-05, "loss": 0.6105, "step": 743 }, { "epoch": 0.09141172134168817, "grad_norm": 1.7937909360538287, "learning_rate": 2.742014742014742e-05, "loss": 0.7318, "step": 744 }, { "epoch": 0.09153458655854528, "grad_norm": 1.742012851978804, "learning_rate": 2.7457002457002458e-05, "loss": 0.6268, "step": 745 }, { "epoch": 0.09165745177540238, "grad_norm": 1.7848459168400745, "learning_rate": 2.7493857493857495e-05, "loss": 0.5972, "step": 746 }, { "epoch": 0.0917803169922595, "grad_norm": 1.7476609982271176, "learning_rate": 2.7530712530712532e-05, "loss": 0.6487, "step": 747 }, { "epoch": 0.0919031822091166, "grad_norm": 1.814878806880183, "learning_rate": 2.756756756756757e-05, "loss": 0.7957, "step": 748 }, { "epoch": 0.0920260474259737, "grad_norm": 1.9976664762703271, "learning_rate": 2.7604422604422606e-05, "loss": 0.801, "step": 749 }, { "epoch": 0.09214891264283082, "grad_norm": 1.876846877310892, "learning_rate": 2.7641277641277643e-05, "loss": 0.8528, "step": 750 }, { "epoch": 0.09227177785968792, "grad_norm": 1.7545647846128416, "learning_rate": 2.767813267813268e-05, "loss": 0.847, "step": 751 }, { "epoch": 0.09239464307654503, "grad_norm": 1.8031723387185685, "learning_rate": 2.7714987714987717e-05, "loss": 0.754, "step": 752 }, { "epoch": 0.09251750829340213, "grad_norm": 1.797273438904846, "learning_rate": 2.7751842751842753e-05, "loss": 0.5849, "step": 753 }, { "epoch": 0.09264037351025925, "grad_norm": 1.750646620722912, "learning_rate": 2.7788697788697787e-05, "loss": 0.7259, "step": 754 }, { "epoch": 0.09276323872711635, "grad_norm": 1.8294502122465757, "learning_rate": 2.7825552825552827e-05, "loss": 0.714, "step": 755 }, { "epoch": 0.09288610394397347, "grad_norm": 2.3523571492091633, "learning_rate": 2.7862407862407864e-05, "loss": 0.7442, "step": 756 }, { "epoch": 0.09300896916083057, "grad_norm": 1.9997820418717616, "learning_rate": 2.7899262899262898e-05, "loss": 0.684, "step": 757 }, { "epoch": 0.09313183437768768, "grad_norm": 1.9949969304756525, "learning_rate": 2.7936117936117935e-05, "loss": 0.6808, "step": 758 }, { "epoch": 0.09325469959454478, "grad_norm": 1.7925039543647958, "learning_rate": 2.7972972972972975e-05, "loss": 0.8131, "step": 759 }, { "epoch": 0.09337756481140189, "grad_norm": 1.8136271698608502, "learning_rate": 2.800982800982801e-05, "loss": 0.7519, "step": 760 }, { "epoch": 0.093500430028259, "grad_norm": 2.152903615096727, "learning_rate": 2.8046683046683046e-05, "loss": 0.7839, "step": 761 }, { "epoch": 0.0936232952451161, "grad_norm": 2.019726412734734, "learning_rate": 2.8083538083538086e-05, "loss": 0.6159, "step": 762 }, { "epoch": 0.09374616046197322, "grad_norm": 1.7123898376173292, "learning_rate": 2.812039312039312e-05, "loss": 0.6471, "step": 763 }, { "epoch": 0.09386902567883032, "grad_norm": 1.7456603706753775, "learning_rate": 2.8157248157248157e-05, "loss": 0.7607, "step": 764 }, { "epoch": 0.09399189089568744, "grad_norm": 2.0557589121048774, "learning_rate": 2.8194103194103197e-05, "loss": 0.6497, "step": 765 }, { "epoch": 0.09411475611254454, "grad_norm": 1.8273794204935183, "learning_rate": 2.823095823095823e-05, "loss": 0.7724, "step": 766 }, { "epoch": 0.09423762132940165, "grad_norm": 1.7920851693299429, "learning_rate": 2.8267813267813267e-05, "loss": 0.7578, "step": 767 }, { "epoch": 0.09436048654625875, "grad_norm": 1.7897970751219825, "learning_rate": 2.8304668304668308e-05, "loss": 0.929, "step": 768 }, { "epoch": 0.09448335176311586, "grad_norm": 2.2052655684371123, "learning_rate": 2.834152334152334e-05, "loss": 0.7738, "step": 769 }, { "epoch": 0.09460621697997297, "grad_norm": 1.6941956657393724, "learning_rate": 2.8378378378378378e-05, "loss": 0.7234, "step": 770 }, { "epoch": 0.09472908219683007, "grad_norm": 1.6576057753979319, "learning_rate": 2.841523341523342e-05, "loss": 0.7708, "step": 771 }, { "epoch": 0.09485194741368719, "grad_norm": 1.6485550791361792, "learning_rate": 2.8452088452088452e-05, "loss": 0.6292, "step": 772 }, { "epoch": 0.09497481263054429, "grad_norm": 1.7515084019243152, "learning_rate": 2.848894348894349e-05, "loss": 0.6632, "step": 773 }, { "epoch": 0.0950976778474014, "grad_norm": 2.015025921159922, "learning_rate": 2.8525798525798526e-05, "loss": 0.6529, "step": 774 }, { "epoch": 0.0952205430642585, "grad_norm": 1.6490860331979154, "learning_rate": 2.8562653562653563e-05, "loss": 0.6888, "step": 775 }, { "epoch": 0.09534340828111562, "grad_norm": 1.723817595711864, "learning_rate": 2.85995085995086e-05, "loss": 0.7561, "step": 776 }, { "epoch": 0.09546627349797272, "grad_norm": 1.7727870879813197, "learning_rate": 2.8636363636363637e-05, "loss": 0.7156, "step": 777 }, { "epoch": 0.09558913871482984, "grad_norm": 1.767167770446803, "learning_rate": 2.8673218673218674e-05, "loss": 0.6408, "step": 778 }, { "epoch": 0.09571200393168694, "grad_norm": 1.7485732361363484, "learning_rate": 2.871007371007371e-05, "loss": 0.67, "step": 779 }, { "epoch": 0.09583486914854404, "grad_norm": 1.8606602335017313, "learning_rate": 2.8746928746928748e-05, "loss": 0.6906, "step": 780 }, { "epoch": 0.09595773436540116, "grad_norm": 1.534330702737851, "learning_rate": 2.8783783783783785e-05, "loss": 0.6914, "step": 781 }, { "epoch": 0.09608059958225826, "grad_norm": 1.602667890550802, "learning_rate": 2.882063882063882e-05, "loss": 0.7187, "step": 782 }, { "epoch": 0.09620346479911537, "grad_norm": 1.839623507005776, "learning_rate": 2.885749385749386e-05, "loss": 0.6185, "step": 783 }, { "epoch": 0.09632633001597248, "grad_norm": 1.9440183623512557, "learning_rate": 2.8894348894348896e-05, "loss": 0.6713, "step": 784 }, { "epoch": 0.09644919523282959, "grad_norm": 1.8177161917277385, "learning_rate": 2.8931203931203933e-05, "loss": 0.6296, "step": 785 }, { "epoch": 0.09657206044968669, "grad_norm": 1.7068180447676822, "learning_rate": 2.896805896805897e-05, "loss": 0.6355, "step": 786 }, { "epoch": 0.09669492566654381, "grad_norm": 2.1379981086390845, "learning_rate": 2.9004914004914006e-05, "loss": 0.7311, "step": 787 }, { "epoch": 0.09681779088340091, "grad_norm": 2.0726508968495483, "learning_rate": 2.9041769041769043e-05, "loss": 0.6938, "step": 788 }, { "epoch": 0.09694065610025801, "grad_norm": 1.8011049088612894, "learning_rate": 2.907862407862408e-05, "loss": 0.7179, "step": 789 }, { "epoch": 0.09706352131711513, "grad_norm": 1.8610689684907293, "learning_rate": 2.9115479115479114e-05, "loss": 0.683, "step": 790 }, { "epoch": 0.09718638653397223, "grad_norm": 1.936700246687565, "learning_rate": 2.9152334152334154e-05, "loss": 0.7167, "step": 791 }, { "epoch": 0.09730925175082934, "grad_norm": 1.979351549352123, "learning_rate": 2.918918918918919e-05, "loss": 0.6801, "step": 792 }, { "epoch": 0.09743211696768644, "grad_norm": 1.8232765587262365, "learning_rate": 2.9226044226044225e-05, "loss": 0.753, "step": 793 }, { "epoch": 0.09755498218454356, "grad_norm": 1.7183954512651585, "learning_rate": 2.9262899262899265e-05, "loss": 0.6029, "step": 794 }, { "epoch": 0.09767784740140066, "grad_norm": 1.5055830780147417, "learning_rate": 2.92997542997543e-05, "loss": 0.7741, "step": 795 }, { "epoch": 0.09780071261825778, "grad_norm": 1.7353704153614988, "learning_rate": 2.9336609336609336e-05, "loss": 0.6684, "step": 796 }, { "epoch": 0.09792357783511488, "grad_norm": 1.9806485034265893, "learning_rate": 2.9373464373464376e-05, "loss": 0.8293, "step": 797 }, { "epoch": 0.098046443051972, "grad_norm": 1.6642103299188327, "learning_rate": 2.941031941031941e-05, "loss": 0.6977, "step": 798 }, { "epoch": 0.0981693082688291, "grad_norm": 1.7666901647672828, "learning_rate": 2.9447174447174446e-05, "loss": 0.7435, "step": 799 }, { "epoch": 0.0982921734856862, "grad_norm": 1.9059673713649457, "learning_rate": 2.9484029484029487e-05, "loss": 0.6279, "step": 800 }, { "epoch": 0.09841503870254331, "grad_norm": 1.6331573677058462, "learning_rate": 2.952088452088452e-05, "loss": 0.7214, "step": 801 }, { "epoch": 0.09853790391940041, "grad_norm": 2.2368254305739637, "learning_rate": 2.9557739557739557e-05, "loss": 0.6907, "step": 802 }, { "epoch": 0.09866076913625753, "grad_norm": 1.8620822521949996, "learning_rate": 2.9594594594594598e-05, "loss": 0.5667, "step": 803 }, { "epoch": 0.09878363435311463, "grad_norm": 1.8405431999482347, "learning_rate": 2.963144963144963e-05, "loss": 0.6636, "step": 804 }, { "epoch": 0.09890649956997175, "grad_norm": 2.1524082198062513, "learning_rate": 2.9668304668304668e-05, "loss": 0.6005, "step": 805 }, { "epoch": 0.09902936478682885, "grad_norm": 1.961454296800343, "learning_rate": 2.9705159705159705e-05, "loss": 0.7756, "step": 806 }, { "epoch": 0.09915223000368596, "grad_norm": 2.296145582468531, "learning_rate": 2.9742014742014742e-05, "loss": 0.7254, "step": 807 }, { "epoch": 0.09927509522054306, "grad_norm": 1.636090386743313, "learning_rate": 2.977886977886978e-05, "loss": 0.646, "step": 808 }, { "epoch": 0.09939796043740017, "grad_norm": 1.7278840839888492, "learning_rate": 2.9815724815724816e-05, "loss": 0.7, "step": 809 }, { "epoch": 0.09952082565425728, "grad_norm": 1.7718539064940368, "learning_rate": 2.9852579852579853e-05, "loss": 0.5874, "step": 810 }, { "epoch": 0.09964369087111438, "grad_norm": 1.7569748035378938, "learning_rate": 2.988943488943489e-05, "loss": 0.6211, "step": 811 }, { "epoch": 0.0997665560879715, "grad_norm": 1.783174233042772, "learning_rate": 2.9926289926289927e-05, "loss": 0.6519, "step": 812 }, { "epoch": 0.0998894213048286, "grad_norm": 1.8715097862635965, "learning_rate": 2.9963144963144964e-05, "loss": 0.8062, "step": 813 }, { "epoch": 0.10001228652168571, "grad_norm": 1.8322324042239617, "learning_rate": 3e-05, "loss": 0.6869, "step": 814 }, { "epoch": 0.10013515173854282, "grad_norm": 1.9110877211728614, "learning_rate": 2.999999862042364e-05, "loss": 0.6044, "step": 815 }, { "epoch": 0.10025801695539993, "grad_norm": 1.920788411540279, "learning_rate": 2.999999448169481e-05, "loss": 0.736, "step": 816 }, { "epoch": 0.10038088217225703, "grad_norm": 2.0113341101147726, "learning_rate": 2.9999987583814276e-05, "loss": 0.6973, "step": 817 }, { "epoch": 0.10050374738911415, "grad_norm": 2.198214750382668, "learning_rate": 2.9999977926783303e-05, "loss": 0.7296, "step": 818 }, { "epoch": 0.10062661260597125, "grad_norm": 1.9945483589847353, "learning_rate": 2.999996551060367e-05, "loss": 0.6734, "step": 819 }, { "epoch": 0.10074947782282835, "grad_norm": 1.8418330843375452, "learning_rate": 2.999995033527766e-05, "loss": 0.6548, "step": 820 }, { "epoch": 0.10087234303968547, "grad_norm": 1.7601094012802614, "learning_rate": 2.999993240080806e-05, "loss": 0.7416, "step": 821 }, { "epoch": 0.10099520825654257, "grad_norm": 1.726841900632902, "learning_rate": 2.9999911707198176e-05, "loss": 0.7876, "step": 822 }, { "epoch": 0.10111807347339968, "grad_norm": 1.7548151254345847, "learning_rate": 2.999988825445181e-05, "loss": 0.7137, "step": 823 }, { "epoch": 0.10124093869025678, "grad_norm": 1.8549727989339913, "learning_rate": 2.999986204257328e-05, "loss": 0.849, "step": 824 }, { "epoch": 0.1013638039071139, "grad_norm": 1.793482004941933, "learning_rate": 2.9999833071567397e-05, "loss": 0.7766, "step": 825 }, { "epoch": 0.101486669123971, "grad_norm": 2.0761075922881984, "learning_rate": 2.9999801341439506e-05, "loss": 0.7363, "step": 826 }, { "epoch": 0.10160953434082812, "grad_norm": 1.7202202444656944, "learning_rate": 2.999976685219543e-05, "loss": 0.714, "step": 827 }, { "epoch": 0.10173239955768522, "grad_norm": 2.059747992382795, "learning_rate": 2.9999729603841524e-05, "loss": 0.8224, "step": 828 }, { "epoch": 0.10185526477454233, "grad_norm": 1.6925348112811556, "learning_rate": 2.999968959638463e-05, "loss": 0.6544, "step": 829 }, { "epoch": 0.10197812999139944, "grad_norm": 2.0761179667686593, "learning_rate": 2.999964682983211e-05, "loss": 0.6533, "step": 830 }, { "epoch": 0.10210099520825654, "grad_norm": 1.7975177001510814, "learning_rate": 2.9999601304191835e-05, "loss": 0.7377, "step": 831 }, { "epoch": 0.10222386042511365, "grad_norm": 2.2815351114743216, "learning_rate": 2.9999553019472177e-05, "loss": 0.7976, "step": 832 }, { "epoch": 0.10234672564197075, "grad_norm": 2.160673375146874, "learning_rate": 2.9999501975682015e-05, "loss": 0.6293, "step": 833 }, { "epoch": 0.10246959085882787, "grad_norm": 1.9060779401388181, "learning_rate": 2.9999448172830738e-05, "loss": 0.6915, "step": 834 }, { "epoch": 0.10259245607568497, "grad_norm": 1.7016832348007465, "learning_rate": 2.9999391610928247e-05, "loss": 0.7251, "step": 835 }, { "epoch": 0.10271532129254209, "grad_norm": 1.7298757262256934, "learning_rate": 2.999933228998494e-05, "loss": 0.6793, "step": 836 }, { "epoch": 0.10283818650939919, "grad_norm": 1.6895229958826614, "learning_rate": 2.9999270210011737e-05, "loss": 0.7324, "step": 837 }, { "epoch": 0.1029610517262563, "grad_norm": 1.7987936410199106, "learning_rate": 2.999920537102005e-05, "loss": 0.6969, "step": 838 }, { "epoch": 0.1030839169431134, "grad_norm": 1.8137962721660554, "learning_rate": 2.9999137773021807e-05, "loss": 0.6411, "step": 839 }, { "epoch": 0.1032067821599705, "grad_norm": 1.8779149770775954, "learning_rate": 2.9999067416029446e-05, "loss": 0.7585, "step": 840 }, { "epoch": 0.10332964737682762, "grad_norm": 1.7359599431781119, "learning_rate": 2.9998994300055905e-05, "loss": 0.7239, "step": 841 }, { "epoch": 0.10345251259368472, "grad_norm": 1.6836336979403232, "learning_rate": 2.9998918425114633e-05, "loss": 0.6164, "step": 842 }, { "epoch": 0.10357537781054184, "grad_norm": 1.8202252092638256, "learning_rate": 2.9998839791219593e-05, "loss": 0.7304, "step": 843 }, { "epoch": 0.10369824302739894, "grad_norm": 1.7102109385460997, "learning_rate": 2.999875839838524e-05, "loss": 0.6972, "step": 844 }, { "epoch": 0.10382110824425606, "grad_norm": 1.681122626293071, "learning_rate": 2.999867424662655e-05, "loss": 0.7113, "step": 845 }, { "epoch": 0.10394397346111316, "grad_norm": 1.7979892047752892, "learning_rate": 2.9998587335959002e-05, "loss": 0.7173, "step": 846 }, { "epoch": 0.10406683867797027, "grad_norm": 1.7933612570861321, "learning_rate": 2.9998497666398586e-05, "loss": 0.678, "step": 847 }, { "epoch": 0.10418970389482737, "grad_norm": 1.6856806772157973, "learning_rate": 2.999840523796179e-05, "loss": 0.6144, "step": 848 }, { "epoch": 0.10431256911168449, "grad_norm": 1.5769254666206316, "learning_rate": 2.9998310050665622e-05, "loss": 0.7458, "step": 849 }, { "epoch": 0.10443543432854159, "grad_norm": 1.582085693581304, "learning_rate": 2.9998212104527582e-05, "loss": 0.6019, "step": 850 }, { "epoch": 0.10455829954539869, "grad_norm": 1.993783243125877, "learning_rate": 2.9998111399565696e-05, "loss": 0.7048, "step": 851 }, { "epoch": 0.10468116476225581, "grad_norm": 1.6749058902865683, "learning_rate": 2.9998007935798486e-05, "loss": 0.5928, "step": 852 }, { "epoch": 0.10480402997911291, "grad_norm": 1.922134106252314, "learning_rate": 2.999790171324498e-05, "loss": 0.6484, "step": 853 }, { "epoch": 0.10492689519597002, "grad_norm": 1.647214266136117, "learning_rate": 2.9997792731924718e-05, "loss": 0.7465, "step": 854 }, { "epoch": 0.10504976041282713, "grad_norm": 1.746357877381484, "learning_rate": 2.9997680991857744e-05, "loss": 0.5806, "step": 855 }, { "epoch": 0.10517262562968424, "grad_norm": 1.7681333929045144, "learning_rate": 2.999756649306462e-05, "loss": 0.6909, "step": 856 }, { "epoch": 0.10529549084654134, "grad_norm": 1.8109413092672964, "learning_rate": 2.99974492355664e-05, "loss": 0.7324, "step": 857 }, { "epoch": 0.10541835606339846, "grad_norm": 1.9006284353907568, "learning_rate": 2.9997329219384655e-05, "loss": 0.6463, "step": 858 }, { "epoch": 0.10554122128025556, "grad_norm": 1.6675558117732143, "learning_rate": 2.999720644454146e-05, "loss": 0.6952, "step": 859 }, { "epoch": 0.10566408649711266, "grad_norm": 1.6467466447444385, "learning_rate": 2.9997080911059402e-05, "loss": 0.6968, "step": 860 }, { "epoch": 0.10578695171396978, "grad_norm": 1.5481236354096752, "learning_rate": 2.9996952618961567e-05, "loss": 0.6396, "step": 861 }, { "epoch": 0.10590981693082688, "grad_norm": 1.6424131996879718, "learning_rate": 2.9996821568271563e-05, "loss": 0.7588, "step": 862 }, { "epoch": 0.106032682147684, "grad_norm": 1.710068046896545, "learning_rate": 2.9996687759013483e-05, "loss": 0.7198, "step": 863 }, { "epoch": 0.1061555473645411, "grad_norm": 1.9602026551495622, "learning_rate": 2.9996551191211948e-05, "loss": 0.8309, "step": 864 }, { "epoch": 0.10627841258139821, "grad_norm": 1.655791176740972, "learning_rate": 2.9996411864892078e-05, "loss": 0.8019, "step": 865 }, { "epoch": 0.10640127779825531, "grad_norm": 1.7152286993695074, "learning_rate": 2.9996269780079497e-05, "loss": 0.7309, "step": 866 }, { "epoch": 0.10652414301511243, "grad_norm": 2.647214705126715, "learning_rate": 2.999612493680035e-05, "loss": 0.7219, "step": 867 }, { "epoch": 0.10664700823196953, "grad_norm": 1.7121488236087108, "learning_rate": 2.9995977335081273e-05, "loss": 0.7539, "step": 868 }, { "epoch": 0.10676987344882664, "grad_norm": 1.73865527002815, "learning_rate": 2.9995826974949413e-05, "loss": 0.6458, "step": 869 }, { "epoch": 0.10689273866568375, "grad_norm": 1.9440280497344198, "learning_rate": 2.9995673856432436e-05, "loss": 0.8574, "step": 870 }, { "epoch": 0.10701560388254085, "grad_norm": 2.0024570955194614, "learning_rate": 2.9995517979558503e-05, "loss": 0.6999, "step": 871 }, { "epoch": 0.10713846909939796, "grad_norm": 1.8420198719280292, "learning_rate": 2.9995359344356287e-05, "loss": 0.6556, "step": 872 }, { "epoch": 0.10726133431625506, "grad_norm": 1.8729907704120885, "learning_rate": 2.999519795085497e-05, "loss": 0.6831, "step": 873 }, { "epoch": 0.10738419953311218, "grad_norm": 1.8281572248235232, "learning_rate": 2.9995033799084232e-05, "loss": 0.7561, "step": 874 }, { "epoch": 0.10750706474996928, "grad_norm": 1.5748095458832954, "learning_rate": 2.999486688907428e-05, "loss": 0.7263, "step": 875 }, { "epoch": 0.1076299299668264, "grad_norm": 1.6073602615538956, "learning_rate": 2.9994697220855805e-05, "loss": 0.6249, "step": 876 }, { "epoch": 0.1077527951836835, "grad_norm": 1.6200979185301414, "learning_rate": 2.9994524794460016e-05, "loss": 0.7218, "step": 877 }, { "epoch": 0.10787566040054061, "grad_norm": 1.9287448787643673, "learning_rate": 2.9994349609918643e-05, "loss": 0.6491, "step": 878 }, { "epoch": 0.10799852561739771, "grad_norm": 1.881379236330819, "learning_rate": 2.999417166726389e-05, "loss": 0.7482, "step": 879 }, { "epoch": 0.10812139083425482, "grad_norm": 1.739984279366319, "learning_rate": 2.999399096652851e-05, "loss": 0.6835, "step": 880 }, { "epoch": 0.10824425605111193, "grad_norm": 1.9472959743074811, "learning_rate": 2.9993807507745725e-05, "loss": 0.6825, "step": 881 }, { "epoch": 0.10836712126796903, "grad_norm": 1.7678619999001417, "learning_rate": 2.999362129094929e-05, "loss": 0.7111, "step": 882 }, { "epoch": 0.10848998648482615, "grad_norm": 2.0151904741787026, "learning_rate": 2.9993432316173456e-05, "loss": 0.8369, "step": 883 }, { "epoch": 0.10861285170168325, "grad_norm": 1.7492221387370035, "learning_rate": 2.999324058345298e-05, "loss": 0.725, "step": 884 }, { "epoch": 0.10873571691854036, "grad_norm": 2.083925099957001, "learning_rate": 2.9993046092823137e-05, "loss": 0.8388, "step": 885 }, { "epoch": 0.10885858213539747, "grad_norm": 1.9666780029736954, "learning_rate": 2.9992848844319697e-05, "loss": 0.6891, "step": 886 }, { "epoch": 0.10898144735225458, "grad_norm": 1.922153256619731, "learning_rate": 2.9992648837978944e-05, "loss": 0.7401, "step": 887 }, { "epoch": 0.10910431256911168, "grad_norm": 2.0004469454196374, "learning_rate": 2.9992446073837665e-05, "loss": 0.6496, "step": 888 }, { "epoch": 0.1092271777859688, "grad_norm": 1.821791088923992, "learning_rate": 2.9992240551933163e-05, "loss": 0.65, "step": 889 }, { "epoch": 0.1093500430028259, "grad_norm": 1.8714780627487186, "learning_rate": 2.9992032272303238e-05, "loss": 0.7548, "step": 890 }, { "epoch": 0.109472908219683, "grad_norm": 1.9173877653152953, "learning_rate": 2.9991821234986205e-05, "loss": 0.7176, "step": 891 }, { "epoch": 0.10959577343654012, "grad_norm": 1.8324634784926979, "learning_rate": 2.9991607440020885e-05, "loss": 0.7643, "step": 892 }, { "epoch": 0.10971863865339722, "grad_norm": 1.7626335958280535, "learning_rate": 2.999139088744659e-05, "loss": 0.7257, "step": 893 }, { "epoch": 0.10984150387025433, "grad_norm": 2.180450643285706, "learning_rate": 2.9991171577303175e-05, "loss": 0.741, "step": 894 }, { "epoch": 0.10996436908711144, "grad_norm": 2.241402523102751, "learning_rate": 2.9990949509630964e-05, "loss": 0.6719, "step": 895 }, { "epoch": 0.11008723430396855, "grad_norm": 1.9645271423030781, "learning_rate": 2.9990724684470814e-05, "loss": 0.7951, "step": 896 }, { "epoch": 0.11021009952082565, "grad_norm": 2.040076681445501, "learning_rate": 2.999049710186407e-05, "loss": 0.7832, "step": 897 }, { "epoch": 0.11033296473768277, "grad_norm": 1.8356450729469074, "learning_rate": 2.9990266761852607e-05, "loss": 0.6955, "step": 898 }, { "epoch": 0.11045582995453987, "grad_norm": 1.8876600941006187, "learning_rate": 2.9990033664478786e-05, "loss": 0.5818, "step": 899 }, { "epoch": 0.11057869517139697, "grad_norm": 1.6512466669666943, "learning_rate": 2.9989797809785484e-05, "loss": 0.7221, "step": 900 }, { "epoch": 0.11070156038825409, "grad_norm": 1.9776948762561855, "learning_rate": 2.998955919781609e-05, "loss": 0.7435, "step": 901 }, { "epoch": 0.11082442560511119, "grad_norm": 1.8931780238287874, "learning_rate": 2.998931782861449e-05, "loss": 0.7505, "step": 902 }, { "epoch": 0.1109472908219683, "grad_norm": 1.9746881903566404, "learning_rate": 2.998907370222509e-05, "loss": 0.7514, "step": 903 }, { "epoch": 0.1110701560388254, "grad_norm": 2.2017779563417035, "learning_rate": 2.9988826818692784e-05, "loss": 0.8303, "step": 904 }, { "epoch": 0.11119302125568252, "grad_norm": 1.640662795406327, "learning_rate": 2.998857717806299e-05, "loss": 0.6454, "step": 905 }, { "epoch": 0.11131588647253962, "grad_norm": 1.9777465233969052, "learning_rate": 2.9988324780381633e-05, "loss": 0.6456, "step": 906 }, { "epoch": 0.11143875168939674, "grad_norm": 1.6911123225790752, "learning_rate": 2.9988069625695134e-05, "loss": 0.7055, "step": 907 }, { "epoch": 0.11156161690625384, "grad_norm": 1.7602750841449315, "learning_rate": 2.998781171405043e-05, "loss": 0.6935, "step": 908 }, { "epoch": 0.11168448212311095, "grad_norm": 1.6464657669920808, "learning_rate": 2.9987551045494956e-05, "loss": 0.6387, "step": 909 }, { "epoch": 0.11180734733996806, "grad_norm": 1.7492388462265454, "learning_rate": 2.998728762007667e-05, "loss": 0.6234, "step": 910 }, { "epoch": 0.11193021255682516, "grad_norm": 1.984834571097876, "learning_rate": 2.998702143784402e-05, "loss": 0.7274, "step": 911 }, { "epoch": 0.11205307777368227, "grad_norm": 1.786139055153458, "learning_rate": 2.998675249884597e-05, "loss": 0.7535, "step": 912 }, { "epoch": 0.11217594299053937, "grad_norm": 1.6828445805325252, "learning_rate": 2.998648080313199e-05, "loss": 0.6985, "step": 913 }, { "epoch": 0.11229880820739649, "grad_norm": 1.924082825152558, "learning_rate": 2.9986206350752058e-05, "loss": 0.7258, "step": 914 }, { "epoch": 0.11242167342425359, "grad_norm": 1.640439725352474, "learning_rate": 2.9985929141756655e-05, "loss": 0.6569, "step": 915 }, { "epoch": 0.1125445386411107, "grad_norm": 1.7739376402655125, "learning_rate": 2.998564917619678e-05, "loss": 0.6889, "step": 916 }, { "epoch": 0.11266740385796781, "grad_norm": 1.9022090529373108, "learning_rate": 2.9985366454123914e-05, "loss": 0.7098, "step": 917 }, { "epoch": 0.11279026907482492, "grad_norm": 1.7708336598267937, "learning_rate": 2.9985080975590083e-05, "loss": 0.6411, "step": 918 }, { "epoch": 0.11291313429168202, "grad_norm": 1.7044777167977707, "learning_rate": 2.998479274064778e-05, "loss": 0.6384, "step": 919 }, { "epoch": 0.11303599950853914, "grad_norm": 1.6527131598758114, "learning_rate": 2.9984501749350038e-05, "loss": 0.6573, "step": 920 }, { "epoch": 0.11315886472539624, "grad_norm": 1.6143084602550728, "learning_rate": 2.9984208001750372e-05, "loss": 0.7337, "step": 921 }, { "epoch": 0.11328172994225334, "grad_norm": 1.945995805855703, "learning_rate": 2.9983911497902822e-05, "loss": 0.721, "step": 922 }, { "epoch": 0.11340459515911046, "grad_norm": 1.8120202690090428, "learning_rate": 2.9983612237861927e-05, "loss": 0.7042, "step": 923 }, { "epoch": 0.11352746037596756, "grad_norm": 1.697666466704746, "learning_rate": 2.998331022168273e-05, "loss": 0.722, "step": 924 }, { "epoch": 0.11365032559282467, "grad_norm": 1.7204181890016, "learning_rate": 2.9983005449420792e-05, "loss": 0.7334, "step": 925 }, { "epoch": 0.11377319080968178, "grad_norm": 2.034375749946619, "learning_rate": 2.998269792113217e-05, "loss": 0.7825, "step": 926 }, { "epoch": 0.11389605602653889, "grad_norm": 2.0174048596851946, "learning_rate": 2.9982387636873428e-05, "loss": 0.6811, "step": 927 }, { "epoch": 0.114018921243396, "grad_norm": 1.831695854404119, "learning_rate": 2.9982074596701644e-05, "loss": 0.711, "step": 928 }, { "epoch": 0.11414178646025311, "grad_norm": 1.4595843529376773, "learning_rate": 2.9981758800674404e-05, "loss": 0.6742, "step": 929 }, { "epoch": 0.11426465167711021, "grad_norm": 1.990850231365332, "learning_rate": 2.9981440248849793e-05, "loss": 0.7988, "step": 930 }, { "epoch": 0.11438751689396731, "grad_norm": 1.65411997569785, "learning_rate": 2.9981118941286402e-05, "loss": 0.6542, "step": 931 }, { "epoch": 0.11451038211082443, "grad_norm": 2.0241758170345987, "learning_rate": 2.9980794878043338e-05, "loss": 0.7369, "step": 932 }, { "epoch": 0.11463324732768153, "grad_norm": 2.033522992646661, "learning_rate": 2.9980468059180215e-05, "loss": 0.7993, "step": 933 }, { "epoch": 0.11475611254453864, "grad_norm": 1.6687471032166814, "learning_rate": 2.9980138484757137e-05, "loss": 0.6607, "step": 934 }, { "epoch": 0.11487897776139575, "grad_norm": 1.800387535371971, "learning_rate": 2.9979806154834743e-05, "loss": 0.688, "step": 935 }, { "epoch": 0.11500184297825286, "grad_norm": 1.5239242450621517, "learning_rate": 2.9979471069474148e-05, "loss": 0.5969, "step": 936 }, { "epoch": 0.11512470819510996, "grad_norm": 2.0392172473699626, "learning_rate": 2.9979133228736998e-05, "loss": 0.7098, "step": 937 }, { "epoch": 0.11524757341196708, "grad_norm": 1.6093785620592955, "learning_rate": 2.997879263268543e-05, "loss": 0.6024, "step": 938 }, { "epoch": 0.11537043862882418, "grad_norm": 1.623260960923209, "learning_rate": 2.99784492813821e-05, "loss": 0.6335, "step": 939 }, { "epoch": 0.1154933038456813, "grad_norm": 1.9313604260050343, "learning_rate": 2.9978103174890167e-05, "loss": 0.7909, "step": 940 }, { "epoch": 0.1156161690625384, "grad_norm": 1.9559459867999844, "learning_rate": 2.9977754313273286e-05, "loss": 0.6451, "step": 941 }, { "epoch": 0.1157390342793955, "grad_norm": 1.8356358611081436, "learning_rate": 2.9977402696595638e-05, "loss": 0.6963, "step": 942 }, { "epoch": 0.11586189949625261, "grad_norm": 1.7880031501323828, "learning_rate": 2.9977048324921895e-05, "loss": 0.7626, "step": 943 }, { "epoch": 0.11598476471310971, "grad_norm": 2.2681437508666114, "learning_rate": 2.997669119831724e-05, "loss": 0.7904, "step": 944 }, { "epoch": 0.11610762992996683, "grad_norm": 1.715961806485397, "learning_rate": 2.9976331316847366e-05, "loss": 0.6347, "step": 945 }, { "epoch": 0.11623049514682393, "grad_norm": 1.730114264100273, "learning_rate": 2.9975968680578472e-05, "loss": 0.6577, "step": 946 }, { "epoch": 0.11635336036368105, "grad_norm": 1.6734098550637142, "learning_rate": 2.997560328957726e-05, "loss": 0.9078, "step": 947 }, { "epoch": 0.11647622558053815, "grad_norm": 1.6747659062551452, "learning_rate": 2.9975235143910945e-05, "loss": 0.6541, "step": 948 }, { "epoch": 0.11659909079739526, "grad_norm": 1.6665144517418158, "learning_rate": 2.9974864243647243e-05, "loss": 0.666, "step": 949 }, { "epoch": 0.11672195601425236, "grad_norm": 2.2224752994394987, "learning_rate": 2.997449058885438e-05, "loss": 0.7344, "step": 950 }, { "epoch": 0.11684482123110947, "grad_norm": 1.7848051308519954, "learning_rate": 2.9974114179601085e-05, "loss": 0.792, "step": 951 }, { "epoch": 0.11696768644796658, "grad_norm": 1.7045958162640003, "learning_rate": 2.9973735015956596e-05, "loss": 0.628, "step": 952 }, { "epoch": 0.11709055166482368, "grad_norm": 1.4962538422161107, "learning_rate": 2.9973353097990665e-05, "loss": 0.7212, "step": 953 }, { "epoch": 0.1172134168816808, "grad_norm": 1.7421943900398063, "learning_rate": 2.997296842577353e-05, "loss": 0.6899, "step": 954 }, { "epoch": 0.1173362820985379, "grad_norm": 1.666832408102903, "learning_rate": 2.9972580999375957e-05, "loss": 0.8394, "step": 955 }, { "epoch": 0.11745914731539502, "grad_norm": 1.660293458017513, "learning_rate": 2.997219081886921e-05, "loss": 0.704, "step": 956 }, { "epoch": 0.11758201253225212, "grad_norm": 1.500870963294999, "learning_rate": 2.9971797884325062e-05, "loss": 0.6691, "step": 957 }, { "epoch": 0.11770487774910923, "grad_norm": 1.6090890054970115, "learning_rate": 2.997140219581579e-05, "loss": 0.7163, "step": 958 }, { "epoch": 0.11782774296596633, "grad_norm": 1.9504343035537988, "learning_rate": 2.9971003753414173e-05, "loss": 0.7751, "step": 959 }, { "epoch": 0.11795060818282345, "grad_norm": 1.7718814833191252, "learning_rate": 2.997060255719351e-05, "loss": 0.727, "step": 960 }, { "epoch": 0.11807347339968055, "grad_norm": 1.7819396884821375, "learning_rate": 2.997019860722759e-05, "loss": 0.8704, "step": 961 }, { "epoch": 0.11819633861653765, "grad_norm": 1.7552512381599663, "learning_rate": 2.9969791903590727e-05, "loss": 0.8173, "step": 962 }, { "epoch": 0.11831920383339477, "grad_norm": 1.6195587659864026, "learning_rate": 2.996938244635772e-05, "loss": 0.6907, "step": 963 }, { "epoch": 0.11844206905025187, "grad_norm": 1.6399874256573652, "learning_rate": 2.9968970235603897e-05, "loss": 0.674, "step": 964 }, { "epoch": 0.11856493426710898, "grad_norm": 1.5807111758291847, "learning_rate": 2.996855527140507e-05, "loss": 0.7625, "step": 965 }, { "epoch": 0.11868779948396609, "grad_norm": 1.6518682404897127, "learning_rate": 2.9968137553837578e-05, "loss": 0.6313, "step": 966 }, { "epoch": 0.1188106647008232, "grad_norm": 1.5393445640587344, "learning_rate": 2.996771708297826e-05, "loss": 0.6983, "step": 967 }, { "epoch": 0.1189335299176803, "grad_norm": 1.641498477331313, "learning_rate": 2.9967293858904447e-05, "loss": 0.7421, "step": 968 }, { "epoch": 0.11905639513453742, "grad_norm": 1.5795100070765273, "learning_rate": 2.9966867881693995e-05, "loss": 0.7488, "step": 969 }, { "epoch": 0.11917926035139452, "grad_norm": 1.535289105314938, "learning_rate": 2.996643915142526e-05, "loss": 0.6455, "step": 970 }, { "epoch": 0.11930212556825162, "grad_norm": 1.5398875949981876, "learning_rate": 2.9966007668177112e-05, "loss": 0.7271, "step": 971 }, { "epoch": 0.11942499078510874, "grad_norm": 1.597661943186351, "learning_rate": 2.9965573432028907e-05, "loss": 0.8128, "step": 972 }, { "epoch": 0.11954785600196584, "grad_norm": 1.874274188184879, "learning_rate": 2.9965136443060523e-05, "loss": 0.7607, "step": 973 }, { "epoch": 0.11967072121882295, "grad_norm": 2.2633525574357822, "learning_rate": 2.9964696701352337e-05, "loss": 0.7755, "step": 974 }, { "epoch": 0.11979358643568006, "grad_norm": 1.9163759652229604, "learning_rate": 2.9964254206985248e-05, "loss": 0.6826, "step": 975 }, { "epoch": 0.11991645165253717, "grad_norm": 1.5511171572089466, "learning_rate": 2.9963808960040645e-05, "loss": 0.6637, "step": 976 }, { "epoch": 0.12003931686939427, "grad_norm": 2.034460886112864, "learning_rate": 2.9963360960600427e-05, "loss": 0.662, "step": 977 }, { "epoch": 0.12016218208625139, "grad_norm": 1.4904389641958855, "learning_rate": 2.9962910208747e-05, "loss": 0.6058, "step": 978 }, { "epoch": 0.12028504730310849, "grad_norm": 2.086325559344193, "learning_rate": 2.996245670456328e-05, "loss": 0.7161, "step": 979 }, { "epoch": 0.1204079125199656, "grad_norm": 1.8940147833476066, "learning_rate": 2.996200044813268e-05, "loss": 0.6574, "step": 980 }, { "epoch": 0.1205307777368227, "grad_norm": 1.710189084168831, "learning_rate": 2.996154143953913e-05, "loss": 0.5656, "step": 981 }, { "epoch": 0.12065364295367981, "grad_norm": 1.5260725063145748, "learning_rate": 2.9961079678867064e-05, "loss": 0.7211, "step": 982 }, { "epoch": 0.12077650817053692, "grad_norm": 1.5371099229531677, "learning_rate": 2.9960615166201417e-05, "loss": 0.6258, "step": 983 }, { "epoch": 0.12089937338739402, "grad_norm": 1.6815215739851217, "learning_rate": 2.996014790162763e-05, "loss": 0.5639, "step": 984 }, { "epoch": 0.12102223860425114, "grad_norm": 1.6069953840523878, "learning_rate": 2.995967788523166e-05, "loss": 0.7785, "step": 985 }, { "epoch": 0.12114510382110824, "grad_norm": 1.729038782250289, "learning_rate": 2.995920511709996e-05, "loss": 0.6434, "step": 986 }, { "epoch": 0.12126796903796536, "grad_norm": 1.9795436135216768, "learning_rate": 2.995872959731949e-05, "loss": 0.7228, "step": 987 }, { "epoch": 0.12139083425482246, "grad_norm": 1.7353020741796659, "learning_rate": 2.9958251325977726e-05, "loss": 0.646, "step": 988 }, { "epoch": 0.12151369947167957, "grad_norm": 1.69620004842621, "learning_rate": 2.9957770303162634e-05, "loss": 0.6902, "step": 989 }, { "epoch": 0.12163656468853667, "grad_norm": 1.471739502127223, "learning_rate": 2.99572865289627e-05, "loss": 0.6312, "step": 990 }, { "epoch": 0.12175942990539378, "grad_norm": 1.937543293883123, "learning_rate": 2.995680000346691e-05, "loss": 0.6524, "step": 991 }, { "epoch": 0.12188229512225089, "grad_norm": 1.7123279740455752, "learning_rate": 2.995631072676476e-05, "loss": 0.7177, "step": 992 }, { "epoch": 0.122005160339108, "grad_norm": 1.5703044853222123, "learning_rate": 2.9955818698946243e-05, "loss": 0.7582, "step": 993 }, { "epoch": 0.12212802555596511, "grad_norm": 1.6270738289526687, "learning_rate": 2.9955323920101872e-05, "loss": 0.6279, "step": 994 }, { "epoch": 0.12225089077282221, "grad_norm": 1.7028831799280446, "learning_rate": 2.9954826390322653e-05, "loss": 0.6748, "step": 995 }, { "epoch": 0.12237375598967933, "grad_norm": 1.5939771887037102, "learning_rate": 2.9954326109700104e-05, "loss": 0.632, "step": 996 }, { "epoch": 0.12249662120653643, "grad_norm": 1.6619543156824201, "learning_rate": 2.9953823078326252e-05, "loss": 0.5534, "step": 997 }, { "epoch": 0.12261948642339354, "grad_norm": 1.6257406992276335, "learning_rate": 2.995331729629362e-05, "loss": 0.7374, "step": 998 }, { "epoch": 0.12274235164025064, "grad_norm": 1.9853691962915836, "learning_rate": 2.9952808763695247e-05, "loss": 0.7009, "step": 999 }, { "epoch": 0.12286521685710776, "grad_norm": 1.6249542899389606, "learning_rate": 2.9952297480624678e-05, "loss": 0.7002, "step": 1000 }, { "epoch": 0.12298808207396486, "grad_norm": 1.5778414431899186, "learning_rate": 2.9951783447175954e-05, "loss": 0.6402, "step": 1001 }, { "epoch": 0.12311094729082196, "grad_norm": 2.0874593486187107, "learning_rate": 2.9951266663443634e-05, "loss": 0.7527, "step": 1002 }, { "epoch": 0.12323381250767908, "grad_norm": 1.9236355685092414, "learning_rate": 2.995074712952277e-05, "loss": 0.6869, "step": 1003 }, { "epoch": 0.12335667772453618, "grad_norm": 2.1163180732001603, "learning_rate": 2.995022484550893e-05, "loss": 0.7183, "step": 1004 }, { "epoch": 0.1234795429413933, "grad_norm": 1.5879735405788065, "learning_rate": 2.994969981149819e-05, "loss": 0.7409, "step": 1005 }, { "epoch": 0.1236024081582504, "grad_norm": 1.6235846789859358, "learning_rate": 2.9949172027587116e-05, "loss": 0.6656, "step": 1006 }, { "epoch": 0.12372527337510751, "grad_norm": 1.6187951425034859, "learning_rate": 2.99486414938728e-05, "loss": 0.8049, "step": 1007 }, { "epoch": 0.12384813859196461, "grad_norm": 2.090461465668652, "learning_rate": 2.9948108210452824e-05, "loss": 0.656, "step": 1008 }, { "epoch": 0.12397100380882173, "grad_norm": 1.8696158686109474, "learning_rate": 2.9947572177425285e-05, "loss": 0.6037, "step": 1009 }, { "epoch": 0.12409386902567883, "grad_norm": 1.6716225834339644, "learning_rate": 2.9947033394888786e-05, "loss": 0.7694, "step": 1010 }, { "epoch": 0.12421673424253594, "grad_norm": 1.868196850506318, "learning_rate": 2.9946491862942426e-05, "loss": 0.677, "step": 1011 }, { "epoch": 0.12433959945939305, "grad_norm": 1.6943817245924655, "learning_rate": 2.994594758168582e-05, "loss": 0.6645, "step": 1012 }, { "epoch": 0.12446246467625015, "grad_norm": 2.3596853645259968, "learning_rate": 2.9945400551219084e-05, "loss": 0.769, "step": 1013 }, { "epoch": 0.12458532989310726, "grad_norm": 1.7647279657083175, "learning_rate": 2.9944850771642843e-05, "loss": 0.6843, "step": 1014 }, { "epoch": 0.12470819510996436, "grad_norm": 2.001597647305168, "learning_rate": 2.9944298243058217e-05, "loss": 0.8216, "step": 1015 }, { "epoch": 0.12483106032682148, "grad_norm": 2.4293765027399092, "learning_rate": 2.9943742965566854e-05, "loss": 0.7272, "step": 1016 }, { "epoch": 0.12495392554367858, "grad_norm": 2.0057682712893836, "learning_rate": 2.9943184939270882e-05, "loss": 0.7424, "step": 1017 }, { "epoch": 0.12507679076053568, "grad_norm": 1.8114123374825029, "learning_rate": 2.994262416427295e-05, "loss": 0.7571, "step": 1018 }, { "epoch": 0.1251996559773928, "grad_norm": 1.7857596790370338, "learning_rate": 2.994206064067621e-05, "loss": 0.6938, "step": 1019 }, { "epoch": 0.12532252119424991, "grad_norm": 1.8504140600909118, "learning_rate": 2.994149436858432e-05, "loss": 0.7713, "step": 1020 }, { "epoch": 0.12544538641110703, "grad_norm": 1.7834403693247516, "learning_rate": 2.994092534810144e-05, "loss": 0.65, "step": 1021 }, { "epoch": 0.12556825162796412, "grad_norm": 1.7229019313668377, "learning_rate": 2.9940353579332233e-05, "loss": 0.6635, "step": 1022 }, { "epoch": 0.12569111684482123, "grad_norm": 1.8409393907020477, "learning_rate": 2.9939779062381876e-05, "loss": 0.7243, "step": 1023 }, { "epoch": 0.12581398206167835, "grad_norm": 1.5963612283430768, "learning_rate": 2.9939201797356053e-05, "loss": 0.6832, "step": 1024 }, { "epoch": 0.12593684727853544, "grad_norm": 1.6393220706638125, "learning_rate": 2.993862178436094e-05, "loss": 0.6668, "step": 1025 }, { "epoch": 0.12605971249539255, "grad_norm": 1.6078984246283192, "learning_rate": 2.9938039023503233e-05, "loss": 0.7442, "step": 1026 }, { "epoch": 0.12618257771224967, "grad_norm": 1.7665842136358443, "learning_rate": 2.9937453514890123e-05, "loss": 0.7841, "step": 1027 }, { "epoch": 0.12630544292910678, "grad_norm": 1.4864408591818952, "learning_rate": 2.9936865258629312e-05, "loss": 0.6271, "step": 1028 }, { "epoch": 0.12642830814596387, "grad_norm": 1.5184504569936368, "learning_rate": 2.9936274254829007e-05, "loss": 0.6183, "step": 1029 }, { "epoch": 0.12655117336282098, "grad_norm": 1.7832266201503009, "learning_rate": 2.9935680503597917e-05, "loss": 0.651, "step": 1030 }, { "epoch": 0.1266740385796781, "grad_norm": 1.9398792647983572, "learning_rate": 2.993508400504526e-05, "loss": 0.7072, "step": 1031 }, { "epoch": 0.1267969037965352, "grad_norm": 1.5983712281493048, "learning_rate": 2.9934484759280756e-05, "loss": 0.7195, "step": 1032 }, { "epoch": 0.1269197690133923, "grad_norm": 1.591340307152886, "learning_rate": 2.9933882766414634e-05, "loss": 0.6022, "step": 1033 }, { "epoch": 0.12704263423024942, "grad_norm": 1.8092691577786109, "learning_rate": 2.9933278026557627e-05, "loss": 0.8369, "step": 1034 }, { "epoch": 0.12716549944710653, "grad_norm": 1.639598330700463, "learning_rate": 2.9932670539820975e-05, "loss": 0.6061, "step": 1035 }, { "epoch": 0.12728836466396362, "grad_norm": 1.736962784004616, "learning_rate": 2.9932060306316416e-05, "loss": 0.7568, "step": 1036 }, { "epoch": 0.12741122988082074, "grad_norm": 1.6565201366898092, "learning_rate": 2.9931447326156204e-05, "loss": 0.594, "step": 1037 }, { "epoch": 0.12753409509767785, "grad_norm": 1.7033835861610587, "learning_rate": 2.9930831599453087e-05, "loss": 0.7553, "step": 1038 }, { "epoch": 0.12765696031453497, "grad_norm": 1.79384836041842, "learning_rate": 2.9930213126320333e-05, "loss": 0.659, "step": 1039 }, { "epoch": 0.12777982553139206, "grad_norm": 1.7769733738955777, "learning_rate": 2.9929591906871696e-05, "loss": 0.7507, "step": 1040 }, { "epoch": 0.12790269074824917, "grad_norm": 1.995675685753367, "learning_rate": 2.992896794122145e-05, "loss": 0.75, "step": 1041 }, { "epoch": 0.12802555596510629, "grad_norm": 1.6647032382231328, "learning_rate": 2.992834122948437e-05, "loss": 0.7719, "step": 1042 }, { "epoch": 0.12814842118196337, "grad_norm": 1.785718702120485, "learning_rate": 2.9927711771775735e-05, "loss": 0.7239, "step": 1043 }, { "epoch": 0.1282712863988205, "grad_norm": 1.655665154994619, "learning_rate": 2.9927079568211327e-05, "loss": 0.708, "step": 1044 }, { "epoch": 0.1283941516156776, "grad_norm": 1.5490913307762182, "learning_rate": 2.9926444618907438e-05, "loss": 0.6317, "step": 1045 }, { "epoch": 0.12851701683253472, "grad_norm": 1.900604893061953, "learning_rate": 2.9925806923980863e-05, "loss": 0.8069, "step": 1046 }, { "epoch": 0.1286398820493918, "grad_norm": 1.8049158843087787, "learning_rate": 2.9925166483548903e-05, "loss": 0.7032, "step": 1047 }, { "epoch": 0.12876274726624892, "grad_norm": 1.3282479359829484, "learning_rate": 2.9924523297729358e-05, "loss": 0.6359, "step": 1048 }, { "epoch": 0.12888561248310604, "grad_norm": 1.7681151167012679, "learning_rate": 2.9923877366640543e-05, "loss": 0.6992, "step": 1049 }, { "epoch": 0.12900847769996315, "grad_norm": 2.2170000762704545, "learning_rate": 2.9923228690401273e-05, "loss": 0.7693, "step": 1050 }, { "epoch": 0.12913134291682024, "grad_norm": 1.8092401809576655, "learning_rate": 2.9922577269130866e-05, "loss": 0.7706, "step": 1051 }, { "epoch": 0.12925420813367736, "grad_norm": 1.7624374533037472, "learning_rate": 2.9921923102949142e-05, "loss": 0.6787, "step": 1052 }, { "epoch": 0.12937707335053447, "grad_norm": 1.8843453181944527, "learning_rate": 2.992126619197644e-05, "loss": 0.7445, "step": 1053 }, { "epoch": 0.12949993856739156, "grad_norm": 1.6128247249951075, "learning_rate": 2.9920606536333587e-05, "loss": 0.6856, "step": 1054 }, { "epoch": 0.12962280378424867, "grad_norm": 1.5574081506399267, "learning_rate": 2.991994413614193e-05, "loss": 0.7239, "step": 1055 }, { "epoch": 0.1297456690011058, "grad_norm": 1.5450313028733766, "learning_rate": 2.9919278991523307e-05, "loss": 0.7535, "step": 1056 }, { "epoch": 0.1298685342179629, "grad_norm": 1.7127214724111612, "learning_rate": 2.9918611102600066e-05, "loss": 0.6009, "step": 1057 }, { "epoch": 0.12999139943482, "grad_norm": 1.6257758853019078, "learning_rate": 2.9917940469495065e-05, "loss": 0.7182, "step": 1058 }, { "epoch": 0.1301142646516771, "grad_norm": 1.583824872117485, "learning_rate": 2.9917267092331655e-05, "loss": 0.5854, "step": 1059 }, { "epoch": 0.13023712986853422, "grad_norm": 1.469559497269179, "learning_rate": 2.9916590971233714e-05, "loss": 0.6542, "step": 1060 }, { "epoch": 0.13035999508539134, "grad_norm": 1.5862365184085885, "learning_rate": 2.99159121063256e-05, "loss": 0.729, "step": 1061 }, { "epoch": 0.13048286030224843, "grad_norm": 1.5545005513403674, "learning_rate": 2.991523049773218e-05, "loss": 0.6382, "step": 1062 }, { "epoch": 0.13060572551910554, "grad_norm": 1.503068933204754, "learning_rate": 2.9914546145578843e-05, "loss": 0.7352, "step": 1063 }, { "epoch": 0.13072859073596266, "grad_norm": 1.9106083474133921, "learning_rate": 2.9913859049991464e-05, "loss": 0.7455, "step": 1064 }, { "epoch": 0.13085145595281975, "grad_norm": 1.8363870822732362, "learning_rate": 2.991316921109644e-05, "loss": 0.7338, "step": 1065 }, { "epoch": 0.13097432116967686, "grad_norm": 1.5685336737116835, "learning_rate": 2.991247662902065e-05, "loss": 0.6975, "step": 1066 }, { "epoch": 0.13109718638653398, "grad_norm": 1.6261019595969362, "learning_rate": 2.9911781303891493e-05, "loss": 0.6532, "step": 1067 }, { "epoch": 0.1312200516033911, "grad_norm": 1.6649994992870834, "learning_rate": 2.9911083235836872e-05, "loss": 0.7355, "step": 1068 }, { "epoch": 0.13134291682024818, "grad_norm": 1.7134890650926717, "learning_rate": 2.9910382424985196e-05, "loss": 0.758, "step": 1069 }, { "epoch": 0.1314657820371053, "grad_norm": 1.5529677249782339, "learning_rate": 2.9909678871465368e-05, "loss": 0.7866, "step": 1070 }, { "epoch": 0.1315886472539624, "grad_norm": 2.1087056801190935, "learning_rate": 2.9908972575406802e-05, "loss": 0.6364, "step": 1071 }, { "epoch": 0.1317115124708195, "grad_norm": 1.6490759630849179, "learning_rate": 2.990826353693942e-05, "loss": 0.7244, "step": 1072 }, { "epoch": 0.1318343776876766, "grad_norm": 1.5253849343391495, "learning_rate": 2.9907551756193645e-05, "loss": 0.662, "step": 1073 }, { "epoch": 0.13195724290453373, "grad_norm": 1.5997971327964589, "learning_rate": 2.9906837233300403e-05, "loss": 0.7599, "step": 1074 }, { "epoch": 0.13208010812139084, "grad_norm": 1.685469445928351, "learning_rate": 2.9906119968391125e-05, "loss": 0.7296, "step": 1075 }, { "epoch": 0.13220297333824793, "grad_norm": 1.4826514386477891, "learning_rate": 2.990539996159775e-05, "loss": 0.6762, "step": 1076 }, { "epoch": 0.13232583855510505, "grad_norm": 1.6491923179873533, "learning_rate": 2.9904677213052712e-05, "loss": 0.6171, "step": 1077 }, { "epoch": 0.13244870377196216, "grad_norm": 1.7221441488332943, "learning_rate": 2.990395172288897e-05, "loss": 0.7139, "step": 1078 }, { "epoch": 0.13257156898881928, "grad_norm": 1.75224878860636, "learning_rate": 2.9903223491239958e-05, "loss": 0.734, "step": 1079 }, { "epoch": 0.13269443420567636, "grad_norm": 1.5175572099141303, "learning_rate": 2.9902492518239638e-05, "loss": 0.6105, "step": 1080 }, { "epoch": 0.13281729942253348, "grad_norm": 1.7041409133501713, "learning_rate": 2.990175880402246e-05, "loss": 0.7258, "step": 1081 }, { "epoch": 0.1329401646393906, "grad_norm": 1.5253051442756447, "learning_rate": 2.9901022348723397e-05, "loss": 0.718, "step": 1082 }, { "epoch": 0.13306302985624768, "grad_norm": 1.520831403060298, "learning_rate": 2.990028315247791e-05, "loss": 0.6726, "step": 1083 }, { "epoch": 0.1331858950731048, "grad_norm": 1.7374415573273694, "learning_rate": 2.9899541215421965e-05, "loss": 0.6715, "step": 1084 }, { "epoch": 0.13330876028996191, "grad_norm": 1.774969031330596, "learning_rate": 2.9898796537692044e-05, "loss": 0.7461, "step": 1085 }, { "epoch": 0.13343162550681903, "grad_norm": 1.6893695017599883, "learning_rate": 2.9898049119425118e-05, "loss": 0.7804, "step": 1086 }, { "epoch": 0.13355449072367612, "grad_norm": 1.8280899379724442, "learning_rate": 2.9897298960758674e-05, "loss": 0.7282, "step": 1087 }, { "epoch": 0.13367735594053323, "grad_norm": 1.5469141291530257, "learning_rate": 2.98965460618307e-05, "loss": 0.6232, "step": 1088 }, { "epoch": 0.13380022115739035, "grad_norm": 1.6239630377852645, "learning_rate": 2.989579042277969e-05, "loss": 0.6922, "step": 1089 }, { "epoch": 0.13392308637424746, "grad_norm": 1.5582302830631414, "learning_rate": 2.9895032043744625e-05, "loss": 0.7025, "step": 1090 }, { "epoch": 0.13404595159110455, "grad_norm": 1.7151813415907224, "learning_rate": 2.989427092486502e-05, "loss": 0.8021, "step": 1091 }, { "epoch": 0.13416881680796167, "grad_norm": 1.6671315355968632, "learning_rate": 2.989350706628087e-05, "loss": 0.619, "step": 1092 }, { "epoch": 0.13429168202481878, "grad_norm": 1.5897395470727977, "learning_rate": 2.9892740468132683e-05, "loss": 0.7023, "step": 1093 }, { "epoch": 0.13441454724167587, "grad_norm": 1.5005597620455335, "learning_rate": 2.9891971130561467e-05, "loss": 0.6672, "step": 1094 }, { "epoch": 0.13453741245853298, "grad_norm": 1.5826884769046605, "learning_rate": 2.9891199053708743e-05, "loss": 0.7526, "step": 1095 }, { "epoch": 0.1346602776753901, "grad_norm": 1.6741467416650784, "learning_rate": 2.9890424237716524e-05, "loss": 0.652, "step": 1096 }, { "epoch": 0.13478314289224722, "grad_norm": 1.614720866942373, "learning_rate": 2.9889646682727334e-05, "loss": 0.7233, "step": 1097 }, { "epoch": 0.1349060081091043, "grad_norm": 1.852966897322659, "learning_rate": 2.98888663888842e-05, "loss": 0.6917, "step": 1098 }, { "epoch": 0.13502887332596142, "grad_norm": 1.395848435555885, "learning_rate": 2.988808335633065e-05, "loss": 0.6428, "step": 1099 }, { "epoch": 0.13515173854281853, "grad_norm": 1.727699178066572, "learning_rate": 2.988729758521072e-05, "loss": 0.5772, "step": 1100 }, { "epoch": 0.13527460375967565, "grad_norm": 2.360574451364533, "learning_rate": 2.9886509075668947e-05, "loss": 0.8123, "step": 1101 }, { "epoch": 0.13539746897653274, "grad_norm": 1.7034318099687635, "learning_rate": 2.9885717827850372e-05, "loss": 0.8034, "step": 1102 }, { "epoch": 0.13552033419338985, "grad_norm": 1.6964938823875, "learning_rate": 2.988492384190054e-05, "loss": 0.648, "step": 1103 }, { "epoch": 0.13564319941024697, "grad_norm": 1.357080599585336, "learning_rate": 2.98841271179655e-05, "loss": 0.7694, "step": 1104 }, { "epoch": 0.13576606462710405, "grad_norm": 1.6051874443433634, "learning_rate": 2.9883327656191806e-05, "loss": 0.5676, "step": 1105 }, { "epoch": 0.13588892984396117, "grad_norm": 1.5996527238503389, "learning_rate": 2.9882525456726507e-05, "loss": 0.6148, "step": 1106 }, { "epoch": 0.13601179506081829, "grad_norm": 1.7527813375420025, "learning_rate": 2.988172051971717e-05, "loss": 0.6786, "step": 1107 }, { "epoch": 0.1361346602776754, "grad_norm": 2.1797407390282446, "learning_rate": 2.988091284531185e-05, "loss": 0.7661, "step": 1108 }, { "epoch": 0.1362575254945325, "grad_norm": 1.6281417280686319, "learning_rate": 2.988010243365912e-05, "loss": 0.6377, "step": 1109 }, { "epoch": 0.1363803907113896, "grad_norm": 1.628826675952316, "learning_rate": 2.9879289284908053e-05, "loss": 0.7115, "step": 1110 }, { "epoch": 0.13650325592824672, "grad_norm": 1.575995587052852, "learning_rate": 2.9878473399208215e-05, "loss": 0.761, "step": 1111 }, { "epoch": 0.13662612114510383, "grad_norm": 1.5557246073582405, "learning_rate": 2.9877654776709685e-05, "loss": 0.7143, "step": 1112 }, { "epoch": 0.13674898636196092, "grad_norm": 1.68306350535184, "learning_rate": 2.9876833417563044e-05, "loss": 0.7616, "step": 1113 }, { "epoch": 0.13687185157881804, "grad_norm": 1.5120456943854803, "learning_rate": 2.9876009321919372e-05, "loss": 0.7334, "step": 1114 }, { "epoch": 0.13699471679567515, "grad_norm": 2.0993213715121914, "learning_rate": 2.9875182489930263e-05, "loss": 0.6747, "step": 1115 }, { "epoch": 0.13711758201253224, "grad_norm": 1.5984791951395687, "learning_rate": 2.98743529217478e-05, "loss": 0.6757, "step": 1116 }, { "epoch": 0.13724044722938936, "grad_norm": 1.6232016639873426, "learning_rate": 2.9873520617524585e-05, "loss": 0.7204, "step": 1117 }, { "epoch": 0.13736331244624647, "grad_norm": 1.4248520190304972, "learning_rate": 2.9872685577413712e-05, "loss": 0.6354, "step": 1118 }, { "epoch": 0.1374861776631036, "grad_norm": 1.6957929170662709, "learning_rate": 2.987184780156878e-05, "loss": 0.7979, "step": 1119 }, { "epoch": 0.13760904287996067, "grad_norm": 1.4160038707855611, "learning_rate": 2.9871007290143884e-05, "loss": 0.7727, "step": 1120 }, { "epoch": 0.1377319080968178, "grad_norm": 1.714684305341049, "learning_rate": 2.9870164043293645e-05, "loss": 0.7741, "step": 1121 }, { "epoch": 0.1378547733136749, "grad_norm": 1.7006929161823827, "learning_rate": 2.9869318061173168e-05, "loss": 0.6791, "step": 1122 }, { "epoch": 0.137977638530532, "grad_norm": 1.607830749630221, "learning_rate": 2.9868469343938063e-05, "loss": 0.7489, "step": 1123 }, { "epoch": 0.1381005037473891, "grad_norm": 1.6238723271731592, "learning_rate": 2.9867617891744447e-05, "loss": 0.5973, "step": 1124 }, { "epoch": 0.13822336896424622, "grad_norm": 1.6223529364307367, "learning_rate": 2.9866763704748938e-05, "loss": 0.6676, "step": 1125 }, { "epoch": 0.13834623418110334, "grad_norm": 1.4303665214802679, "learning_rate": 2.9865906783108663e-05, "loss": 0.6366, "step": 1126 }, { "epoch": 0.13846909939796043, "grad_norm": 1.5903653314170936, "learning_rate": 2.9865047126981238e-05, "loss": 0.6407, "step": 1127 }, { "epoch": 0.13859196461481754, "grad_norm": 1.6142191370840326, "learning_rate": 2.9864184736524808e-05, "loss": 0.6473, "step": 1128 }, { "epoch": 0.13871482983167466, "grad_norm": 1.3842105458389793, "learning_rate": 2.9863319611897985e-05, "loss": 0.7453, "step": 1129 }, { "epoch": 0.13883769504853177, "grad_norm": 1.5572047301595482, "learning_rate": 2.9862451753259912e-05, "loss": 0.5556, "step": 1130 }, { "epoch": 0.13896056026538886, "grad_norm": 1.5155458938049342, "learning_rate": 2.986158116077023e-05, "loss": 0.6837, "step": 1131 }, { "epoch": 0.13908342548224598, "grad_norm": 1.5818650898706743, "learning_rate": 2.986070783458907e-05, "loss": 0.5668, "step": 1132 }, { "epoch": 0.1392062906991031, "grad_norm": 1.7291002308753405, "learning_rate": 2.9859831774877077e-05, "loss": 0.7332, "step": 1133 }, { "epoch": 0.13932915591596018, "grad_norm": 1.3800473069173909, "learning_rate": 2.9858952981795407e-05, "loss": 0.7414, "step": 1134 }, { "epoch": 0.1394520211328173, "grad_norm": 1.5772211023209295, "learning_rate": 2.985807145550569e-05, "loss": 0.6733, "step": 1135 }, { "epoch": 0.1395748863496744, "grad_norm": 1.7771990478139945, "learning_rate": 2.9857187196170093e-05, "loss": 0.7348, "step": 1136 }, { "epoch": 0.13969775156653153, "grad_norm": 1.531975844586977, "learning_rate": 2.985630020395126e-05, "loss": 0.6783, "step": 1137 }, { "epoch": 0.1398206167833886, "grad_norm": 2.113447093229907, "learning_rate": 2.9855410479012354e-05, "loss": 0.7505, "step": 1138 }, { "epoch": 0.13994348200024573, "grad_norm": 1.5142104459046328, "learning_rate": 2.985451802151703e-05, "loss": 0.6454, "step": 1139 }, { "epoch": 0.14006634721710284, "grad_norm": 1.5959559270845487, "learning_rate": 2.9853622831629448e-05, "loss": 0.6463, "step": 1140 }, { "epoch": 0.14018921243395996, "grad_norm": 1.4290711790573352, "learning_rate": 2.985272490951428e-05, "loss": 0.6776, "step": 1141 }, { "epoch": 0.14031207765081705, "grad_norm": 1.5117342303363002, "learning_rate": 2.9851824255336686e-05, "loss": 0.6445, "step": 1142 }, { "epoch": 0.14043494286767416, "grad_norm": 1.741812081110336, "learning_rate": 2.9850920869262338e-05, "loss": 0.7343, "step": 1143 }, { "epoch": 0.14055780808453128, "grad_norm": 1.7573076206717237, "learning_rate": 2.9850014751457407e-05, "loss": 0.7436, "step": 1144 }, { "epoch": 0.14068067330138836, "grad_norm": 1.6571594013571993, "learning_rate": 2.984910590208857e-05, "loss": 0.7109, "step": 1145 }, { "epoch": 0.14080353851824548, "grad_norm": 1.5100474084765798, "learning_rate": 2.9848194321322996e-05, "loss": 0.7391, "step": 1146 }, { "epoch": 0.1409264037351026, "grad_norm": 1.5865471745601147, "learning_rate": 2.9847280009328377e-05, "loss": 0.6929, "step": 1147 }, { "epoch": 0.1410492689519597, "grad_norm": 1.7695306423384076, "learning_rate": 2.9846362966272888e-05, "loss": 0.7631, "step": 1148 }, { "epoch": 0.1411721341688168, "grad_norm": 1.5094838137769047, "learning_rate": 2.984544319232521e-05, "loss": 0.7425, "step": 1149 }, { "epoch": 0.14129499938567391, "grad_norm": 1.405896089121248, "learning_rate": 2.9844520687654537e-05, "loss": 0.6925, "step": 1150 }, { "epoch": 0.14141786460253103, "grad_norm": 1.8775933743033086, "learning_rate": 2.984359545243055e-05, "loss": 0.8296, "step": 1151 }, { "epoch": 0.14154072981938814, "grad_norm": 1.5284700945190077, "learning_rate": 2.9842667486823446e-05, "loss": 0.7085, "step": 1152 }, { "epoch": 0.14166359503624523, "grad_norm": 1.6149581262704114, "learning_rate": 2.9841736791003914e-05, "loss": 0.6629, "step": 1153 }, { "epoch": 0.14178646025310235, "grad_norm": 1.6705926070544859, "learning_rate": 2.9840803365143153e-05, "loss": 0.6295, "step": 1154 }, { "epoch": 0.14190932546995946, "grad_norm": 1.6265851552636075, "learning_rate": 2.983986720941286e-05, "loss": 0.5882, "step": 1155 }, { "epoch": 0.14203219068681655, "grad_norm": 1.8213003661624614, "learning_rate": 2.983892832398523e-05, "loss": 0.7028, "step": 1156 }, { "epoch": 0.14215505590367367, "grad_norm": 1.8238674991249615, "learning_rate": 2.983798670903297e-05, "loss": 0.8217, "step": 1157 }, { "epoch": 0.14227792112053078, "grad_norm": 1.4786832757343902, "learning_rate": 2.9837042364729284e-05, "loss": 0.6697, "step": 1158 }, { "epoch": 0.1424007863373879, "grad_norm": 1.5583943344798705, "learning_rate": 2.9836095291247875e-05, "loss": 0.8287, "step": 1159 }, { "epoch": 0.14252365155424498, "grad_norm": 1.700219130998528, "learning_rate": 2.9835145488762952e-05, "loss": 0.6588, "step": 1160 }, { "epoch": 0.1426465167711021, "grad_norm": 1.6867726267692873, "learning_rate": 2.983419295744923e-05, "loss": 0.8276, "step": 1161 }, { "epoch": 0.14276938198795922, "grad_norm": 1.5464045937771351, "learning_rate": 2.983323769748191e-05, "loss": 0.7312, "step": 1162 }, { "epoch": 0.1428922472048163, "grad_norm": 1.4725598950961054, "learning_rate": 2.983227970903672e-05, "loss": 0.7272, "step": 1163 }, { "epoch": 0.14301511242167342, "grad_norm": 1.5764167115941043, "learning_rate": 2.983131899228986e-05, "loss": 0.7458, "step": 1164 }, { "epoch": 0.14313797763853053, "grad_norm": 1.7073788727927754, "learning_rate": 2.983035554741806e-05, "loss": 0.7216, "step": 1165 }, { "epoch": 0.14326084285538765, "grad_norm": 1.369893090073144, "learning_rate": 2.9829389374598538e-05, "loss": 0.5365, "step": 1166 }, { "epoch": 0.14338370807224474, "grad_norm": 1.753218224005704, "learning_rate": 2.982842047400901e-05, "loss": 0.6553, "step": 1167 }, { "epoch": 0.14350657328910185, "grad_norm": 1.6290826751220913, "learning_rate": 2.9827448845827697e-05, "loss": 0.7021, "step": 1168 }, { "epoch": 0.14362943850595897, "grad_norm": 1.8613743971801529, "learning_rate": 2.9826474490233337e-05, "loss": 0.7541, "step": 1169 }, { "epoch": 0.14375230372281608, "grad_norm": 1.5388950279651519, "learning_rate": 2.9825497407405144e-05, "loss": 0.6148, "step": 1170 }, { "epoch": 0.14387516893967317, "grad_norm": 1.8247844825561343, "learning_rate": 2.982451759752285e-05, "loss": 0.6611, "step": 1171 }, { "epoch": 0.14399803415653029, "grad_norm": 1.8459463799301494, "learning_rate": 2.982353506076668e-05, "loss": 0.7517, "step": 1172 }, { "epoch": 0.1441208993733874, "grad_norm": 1.7305820071085602, "learning_rate": 2.9822549797317374e-05, "loss": 0.6252, "step": 1173 }, { "epoch": 0.1442437645902445, "grad_norm": 1.6306274681626463, "learning_rate": 2.9821561807356158e-05, "loss": 0.6901, "step": 1174 }, { "epoch": 0.1443666298071016, "grad_norm": 1.6475188417039162, "learning_rate": 2.9820571091064767e-05, "loss": 0.7643, "step": 1175 }, { "epoch": 0.14448949502395872, "grad_norm": 2.157485210174474, "learning_rate": 2.9819577648625442e-05, "loss": 0.6939, "step": 1176 }, { "epoch": 0.14461236024081583, "grad_norm": 1.7119502689604285, "learning_rate": 2.981858148022092e-05, "loss": 0.6844, "step": 1177 }, { "epoch": 0.14473522545767292, "grad_norm": 1.8389559260002328, "learning_rate": 2.9817582586034433e-05, "loss": 0.5993, "step": 1178 }, { "epoch": 0.14485809067453004, "grad_norm": 1.7853063116985455, "learning_rate": 2.981658096624972e-05, "loss": 0.6719, "step": 1179 }, { "epoch": 0.14498095589138715, "grad_norm": 1.5264089205767941, "learning_rate": 2.9815576621051036e-05, "loss": 0.7524, "step": 1180 }, { "epoch": 0.14510382110824427, "grad_norm": 1.6589414063727597, "learning_rate": 2.9814569550623108e-05, "loss": 0.6786, "step": 1181 }, { "epoch": 0.14522668632510136, "grad_norm": 1.583312604020123, "learning_rate": 2.981355975515119e-05, "loss": 0.6543, "step": 1182 }, { "epoch": 0.14534955154195847, "grad_norm": 1.753303253691593, "learning_rate": 2.9812547234821024e-05, "loss": 0.7135, "step": 1183 }, { "epoch": 0.1454724167588156, "grad_norm": 1.4360977477057137, "learning_rate": 2.981153198981886e-05, "loss": 0.604, "step": 1184 }, { "epoch": 0.14559528197567267, "grad_norm": 1.6191223919459055, "learning_rate": 2.9810514020331437e-05, "loss": 0.7221, "step": 1185 }, { "epoch": 0.1457181471925298, "grad_norm": 1.9682130471230237, "learning_rate": 2.980949332654601e-05, "loss": 0.7947, "step": 1186 }, { "epoch": 0.1458410124093869, "grad_norm": 1.872401214267614, "learning_rate": 2.9808469908650335e-05, "loss": 0.6328, "step": 1187 }, { "epoch": 0.14596387762624402, "grad_norm": 1.5490625447687678, "learning_rate": 2.980744376683265e-05, "loss": 0.7051, "step": 1188 }, { "epoch": 0.1460867428431011, "grad_norm": 1.7426227546212294, "learning_rate": 2.9806414901281716e-05, "loss": 0.6305, "step": 1189 }, { "epoch": 0.14620960805995822, "grad_norm": 1.602279188139357, "learning_rate": 2.9805383312186784e-05, "loss": 0.6566, "step": 1190 }, { "epoch": 0.14633247327681534, "grad_norm": 1.6625325543909981, "learning_rate": 2.980434899973761e-05, "loss": 0.7038, "step": 1191 }, { "epoch": 0.14645533849367245, "grad_norm": 1.422983208073982, "learning_rate": 2.9803311964124444e-05, "loss": 0.606, "step": 1192 }, { "epoch": 0.14657820371052954, "grad_norm": 1.2448112809501521, "learning_rate": 2.9802272205538045e-05, "loss": 0.6758, "step": 1193 }, { "epoch": 0.14670106892738666, "grad_norm": 1.633985917320782, "learning_rate": 2.980122972416967e-05, "loss": 0.7712, "step": 1194 }, { "epoch": 0.14682393414424377, "grad_norm": 1.9262031182157078, "learning_rate": 2.980018452021108e-05, "loss": 0.8191, "step": 1195 }, { "epoch": 0.14694679936110086, "grad_norm": 1.8125483833149785, "learning_rate": 2.9799136593854524e-05, "loss": 0.5848, "step": 1196 }, { "epoch": 0.14706966457795798, "grad_norm": 1.955113153097074, "learning_rate": 2.979808594529277e-05, "loss": 0.7724, "step": 1197 }, { "epoch": 0.1471925297948151, "grad_norm": 1.5764574389032477, "learning_rate": 2.979703257471908e-05, "loss": 0.653, "step": 1198 }, { "epoch": 0.1473153950116722, "grad_norm": 1.4581251083745166, "learning_rate": 2.9795976482327206e-05, "loss": 0.6443, "step": 1199 }, { "epoch": 0.1474382602285293, "grad_norm": 1.5907438929782913, "learning_rate": 2.979491766831141e-05, "loss": 0.5955, "step": 1200 }, { "epoch": 0.1475611254453864, "grad_norm": 1.8345042417039938, "learning_rate": 2.9793856132866465e-05, "loss": 0.7026, "step": 1201 }, { "epoch": 0.14768399066224353, "grad_norm": 1.8327276685472635, "learning_rate": 2.979279187618762e-05, "loss": 0.7877, "step": 1202 }, { "epoch": 0.14780685587910064, "grad_norm": 1.655776278183064, "learning_rate": 2.9791724898470646e-05, "loss": 0.7082, "step": 1203 }, { "epoch": 0.14792972109595773, "grad_norm": 1.8710857964958132, "learning_rate": 2.9790655199911803e-05, "loss": 0.6639, "step": 1204 }, { "epoch": 0.14805258631281484, "grad_norm": 1.5729160823184503, "learning_rate": 2.978958278070786e-05, "loss": 0.7187, "step": 1205 }, { "epoch": 0.14817545152967196, "grad_norm": 1.7692842183867792, "learning_rate": 2.9788507641056077e-05, "loss": 0.6397, "step": 1206 }, { "epoch": 0.14829831674652905, "grad_norm": 1.5225726857010475, "learning_rate": 2.9787429781154216e-05, "loss": 0.6324, "step": 1207 }, { "epoch": 0.14842118196338616, "grad_norm": 1.574101701200964, "learning_rate": 2.9786349201200554e-05, "loss": 0.6848, "step": 1208 }, { "epoch": 0.14854404718024328, "grad_norm": 1.7350174605080917, "learning_rate": 2.9785265901393843e-05, "loss": 0.623, "step": 1209 }, { "epoch": 0.1486669123971004, "grad_norm": 1.519916208235255, "learning_rate": 2.978417988193336e-05, "loss": 0.6394, "step": 1210 }, { "epoch": 0.14878977761395748, "grad_norm": 1.4064763057858765, "learning_rate": 2.9783091143018862e-05, "loss": 0.5975, "step": 1211 }, { "epoch": 0.1489126428308146, "grad_norm": 1.8710996494560364, "learning_rate": 2.9781999684850625e-05, "loss": 0.6911, "step": 1212 }, { "epoch": 0.1490355080476717, "grad_norm": 1.5262959561241536, "learning_rate": 2.9780905507629405e-05, "loss": 0.6146, "step": 1213 }, { "epoch": 0.1491583732645288, "grad_norm": 1.8150959374948743, "learning_rate": 2.9779808611556478e-05, "loss": 0.6508, "step": 1214 }, { "epoch": 0.14928123848138591, "grad_norm": 1.6406309209669037, "learning_rate": 2.977870899683361e-05, "loss": 0.6877, "step": 1215 }, { "epoch": 0.14940410369824303, "grad_norm": 1.5686184682656354, "learning_rate": 2.9777606663663058e-05, "loss": 0.5543, "step": 1216 }, { "epoch": 0.14952696891510014, "grad_norm": 1.440127246322775, "learning_rate": 2.9776501612247603e-05, "loss": 0.7569, "step": 1217 }, { "epoch": 0.14964983413195723, "grad_norm": 2.027850025331084, "learning_rate": 2.97753938427905e-05, "loss": 0.7018, "step": 1218 }, { "epoch": 0.14977269934881435, "grad_norm": 1.8733522399494582, "learning_rate": 2.9774283355495527e-05, "loss": 0.6699, "step": 1219 }, { "epoch": 0.14989556456567146, "grad_norm": 1.6544268863803946, "learning_rate": 2.9773170150566943e-05, "loss": 0.7353, "step": 1220 }, { "epoch": 0.15001842978252858, "grad_norm": 1.5992461680689807, "learning_rate": 2.9772054228209514e-05, "loss": 0.7086, "step": 1221 }, { "epoch": 0.15014129499938567, "grad_norm": 1.7465601249987257, "learning_rate": 2.9770935588628513e-05, "loss": 0.7106, "step": 1222 }, { "epoch": 0.15026416021624278, "grad_norm": 1.4179863590448907, "learning_rate": 2.9769814232029703e-05, "loss": 0.739, "step": 1223 }, { "epoch": 0.1503870254330999, "grad_norm": 1.5766732914177883, "learning_rate": 2.976869015861935e-05, "loss": 0.7044, "step": 1224 }, { "epoch": 0.15050989064995698, "grad_norm": 1.5193364312620863, "learning_rate": 2.976756336860422e-05, "loss": 0.6617, "step": 1225 }, { "epoch": 0.1506327558668141, "grad_norm": 1.8576393134305684, "learning_rate": 2.976643386219158e-05, "loss": 0.7441, "step": 1226 }, { "epoch": 0.15075562108367122, "grad_norm": 1.6802947420540455, "learning_rate": 2.97653016395892e-05, "loss": 0.5402, "step": 1227 }, { "epoch": 0.15087848630052833, "grad_norm": 1.465944010393479, "learning_rate": 2.9764166701005334e-05, "loss": 0.6746, "step": 1228 }, { "epoch": 0.15100135151738542, "grad_norm": 1.5726405831910284, "learning_rate": 2.9763029046648753e-05, "loss": 0.5916, "step": 1229 }, { "epoch": 0.15112421673424253, "grad_norm": 1.5494649880831801, "learning_rate": 2.976188867672872e-05, "loss": 0.5809, "step": 1230 }, { "epoch": 0.15124708195109965, "grad_norm": 1.812806697714595, "learning_rate": 2.9760745591455e-05, "loss": 0.7011, "step": 1231 }, { "epoch": 0.15136994716795676, "grad_norm": 1.6085733910007172, "learning_rate": 2.9759599791037855e-05, "loss": 0.6369, "step": 1232 }, { "epoch": 0.15149281238481385, "grad_norm": 1.6843380470488998, "learning_rate": 2.9758451275688044e-05, "loss": 0.6993, "step": 1233 }, { "epoch": 0.15161567760167097, "grad_norm": 1.685937370061377, "learning_rate": 2.975730004561684e-05, "loss": 0.7194, "step": 1234 }, { "epoch": 0.15173854281852808, "grad_norm": 1.8206571296468042, "learning_rate": 2.9756146101035995e-05, "loss": 0.7139, "step": 1235 }, { "epoch": 0.15186140803538517, "grad_norm": 1.8174694587224753, "learning_rate": 2.9754989442157767e-05, "loss": 0.7365, "step": 1236 }, { "epoch": 0.15198427325224229, "grad_norm": 2.0524678961573075, "learning_rate": 2.9753830069194926e-05, "loss": 0.7248, "step": 1237 }, { "epoch": 0.1521071384690994, "grad_norm": 1.6973357879228583, "learning_rate": 2.9752667982360725e-05, "loss": 0.6457, "step": 1238 }, { "epoch": 0.15223000368595652, "grad_norm": 1.8660744581845183, "learning_rate": 2.975150318186892e-05, "loss": 0.6815, "step": 1239 }, { "epoch": 0.1523528689028136, "grad_norm": 1.8230493721011525, "learning_rate": 2.9750335667933775e-05, "loss": 0.7618, "step": 1240 }, { "epoch": 0.15247573411967072, "grad_norm": 1.7556107481799574, "learning_rate": 2.9749165440770037e-05, "loss": 0.7147, "step": 1241 }, { "epoch": 0.15259859933652783, "grad_norm": 1.6465963824374081, "learning_rate": 2.9747992500592977e-05, "loss": 0.6343, "step": 1242 }, { "epoch": 0.15272146455338495, "grad_norm": 1.5166542104554002, "learning_rate": 2.9746816847618333e-05, "loss": 0.6261, "step": 1243 }, { "epoch": 0.15284432977024204, "grad_norm": 1.5597973401527465, "learning_rate": 2.974563848206237e-05, "loss": 0.6809, "step": 1244 }, { "epoch": 0.15296719498709915, "grad_norm": 1.7195669806365428, "learning_rate": 2.9744457404141837e-05, "loss": 0.6773, "step": 1245 }, { "epoch": 0.15309006020395627, "grad_norm": 1.6324555019739144, "learning_rate": 2.9743273614073987e-05, "loss": 0.6696, "step": 1246 }, { "epoch": 0.15321292542081336, "grad_norm": 1.6042525455540455, "learning_rate": 2.974208711207657e-05, "loss": 0.6072, "step": 1247 }, { "epoch": 0.15333579063767047, "grad_norm": 1.3472970887532731, "learning_rate": 2.9740897898367827e-05, "loss": 0.6372, "step": 1248 }, { "epoch": 0.1534586558545276, "grad_norm": 1.6303594470143339, "learning_rate": 2.973970597316652e-05, "loss": 0.6902, "step": 1249 }, { "epoch": 0.1535815210713847, "grad_norm": 1.8559133738857785, "learning_rate": 2.9738511336691887e-05, "loss": 0.7459, "step": 1250 }, { "epoch": 0.1537043862882418, "grad_norm": 1.5022320996995606, "learning_rate": 2.973731398916368e-05, "loss": 0.6402, "step": 1251 }, { "epoch": 0.1538272515050989, "grad_norm": 1.5781780439044837, "learning_rate": 2.9736113930802134e-05, "loss": 0.7471, "step": 1252 }, { "epoch": 0.15395011672195602, "grad_norm": 1.919848691796749, "learning_rate": 2.9734911161828e-05, "loss": 0.7187, "step": 1253 }, { "epoch": 0.1540729819388131, "grad_norm": 1.4624158100168778, "learning_rate": 2.973370568246252e-05, "loss": 0.7944, "step": 1254 }, { "epoch": 0.15419584715567022, "grad_norm": 1.586576339664053, "learning_rate": 2.9732497492927424e-05, "loss": 0.6188, "step": 1255 }, { "epoch": 0.15431871237252734, "grad_norm": 1.599093378098706, "learning_rate": 2.9731286593444967e-05, "loss": 0.6992, "step": 1256 }, { "epoch": 0.15444157758938445, "grad_norm": 1.6118384022378216, "learning_rate": 2.973007298423787e-05, "loss": 0.7319, "step": 1257 }, { "epoch": 0.15456444280624154, "grad_norm": 1.5112759376211, "learning_rate": 2.9728856665529378e-05, "loss": 0.6735, "step": 1258 }, { "epoch": 0.15468730802309866, "grad_norm": 1.6017720437177714, "learning_rate": 2.9727637637543225e-05, "loss": 0.7303, "step": 1259 }, { "epoch": 0.15481017323995577, "grad_norm": 1.4653649692502642, "learning_rate": 2.9726415900503635e-05, "loss": 0.6752, "step": 1260 }, { "epoch": 0.1549330384568129, "grad_norm": 1.6244094135079614, "learning_rate": 2.9725191454635346e-05, "loss": 0.5937, "step": 1261 }, { "epoch": 0.15505590367366998, "grad_norm": 1.7797051934797476, "learning_rate": 2.9723964300163584e-05, "loss": 0.7147, "step": 1262 }, { "epoch": 0.1551787688905271, "grad_norm": 1.4272109958895642, "learning_rate": 2.9722734437314084e-05, "loss": 0.734, "step": 1263 }, { "epoch": 0.1553016341073842, "grad_norm": 1.5027857866874148, "learning_rate": 2.972150186631306e-05, "loss": 0.741, "step": 1264 }, { "epoch": 0.1554244993242413, "grad_norm": 1.5861847813482135, "learning_rate": 2.9720266587387236e-05, "loss": 0.6582, "step": 1265 }, { "epoch": 0.1555473645410984, "grad_norm": 1.7374219966008766, "learning_rate": 2.971902860076384e-05, "loss": 0.6234, "step": 1266 }, { "epoch": 0.15567022975795552, "grad_norm": 1.4613946582290924, "learning_rate": 2.9717787906670592e-05, "loss": 0.6831, "step": 1267 }, { "epoch": 0.15579309497481264, "grad_norm": 1.6033703076904535, "learning_rate": 2.9716544505335705e-05, "loss": 0.6142, "step": 1268 }, { "epoch": 0.15591596019166973, "grad_norm": 1.5013388546449573, "learning_rate": 2.9715298396987898e-05, "loss": 0.7599, "step": 1269 }, { "epoch": 0.15603882540852684, "grad_norm": 1.7325398793553692, "learning_rate": 2.971404958185638e-05, "loss": 0.67, "step": 1270 }, { "epoch": 0.15616169062538396, "grad_norm": 1.6531963699380625, "learning_rate": 2.9712798060170868e-05, "loss": 0.628, "step": 1271 }, { "epoch": 0.15628455584224107, "grad_norm": 1.8100984749784255, "learning_rate": 2.9711543832161565e-05, "loss": 0.6465, "step": 1272 }, { "epoch": 0.15640742105909816, "grad_norm": 1.7743883132036649, "learning_rate": 2.9710286898059185e-05, "loss": 0.7905, "step": 1273 }, { "epoch": 0.15653028627595528, "grad_norm": 1.758571612609507, "learning_rate": 2.970902725809493e-05, "loss": 0.8187, "step": 1274 }, { "epoch": 0.1566531514928124, "grad_norm": 1.8136552327253288, "learning_rate": 2.97077649125005e-05, "loss": 0.6677, "step": 1275 }, { "epoch": 0.15677601670966948, "grad_norm": 1.6582225453232893, "learning_rate": 2.9706499861508098e-05, "loss": 0.586, "step": 1276 }, { "epoch": 0.1568988819265266, "grad_norm": 1.5538353615202256, "learning_rate": 2.9705232105350427e-05, "loss": 0.6086, "step": 1277 }, { "epoch": 0.1570217471433837, "grad_norm": 1.6881453621732438, "learning_rate": 2.970396164426067e-05, "loss": 0.6459, "step": 1278 }, { "epoch": 0.15714461236024083, "grad_norm": 1.7824457932803317, "learning_rate": 2.970268847847253e-05, "loss": 0.758, "step": 1279 }, { "epoch": 0.1572674775770979, "grad_norm": 1.6059541957193617, "learning_rate": 2.9701412608220193e-05, "loss": 0.5885, "step": 1280 }, { "epoch": 0.15739034279395503, "grad_norm": 1.6319281442140379, "learning_rate": 2.970013403373835e-05, "loss": 0.6287, "step": 1281 }, { "epoch": 0.15751320801081214, "grad_norm": 1.4178645942500518, "learning_rate": 2.9698852755262186e-05, "loss": 0.5757, "step": 1282 }, { "epoch": 0.15763607322766926, "grad_norm": 1.6505530635654917, "learning_rate": 2.9697568773027385e-05, "loss": 0.7155, "step": 1283 }, { "epoch": 0.15775893844452635, "grad_norm": 1.5030189715633817, "learning_rate": 2.9696282087270116e-05, "loss": 0.8029, "step": 1284 }, { "epoch": 0.15788180366138346, "grad_norm": 1.7418034144745902, "learning_rate": 2.9694992698227074e-05, "loss": 0.6499, "step": 1285 }, { "epoch": 0.15800466887824058, "grad_norm": 1.346544920923115, "learning_rate": 2.9693700606135425e-05, "loss": 0.6607, "step": 1286 }, { "epoch": 0.15812753409509767, "grad_norm": 1.621410126077198, "learning_rate": 2.969240581123284e-05, "loss": 0.8047, "step": 1287 }, { "epoch": 0.15825039931195478, "grad_norm": 1.6409232256203494, "learning_rate": 2.969110831375749e-05, "loss": 0.6086, "step": 1288 }, { "epoch": 0.1583732645288119, "grad_norm": 1.8995747747087122, "learning_rate": 2.968980811394804e-05, "loss": 0.6762, "step": 1289 }, { "epoch": 0.158496129745669, "grad_norm": 1.7754159948899968, "learning_rate": 2.9688505212043656e-05, "loss": 0.7541, "step": 1290 }, { "epoch": 0.1586189949625261, "grad_norm": 1.570811799089022, "learning_rate": 2.9687199608283992e-05, "loss": 0.6511, "step": 1291 }, { "epoch": 0.15874186017938322, "grad_norm": 1.6469236055916405, "learning_rate": 2.9685891302909213e-05, "loss": 0.7096, "step": 1292 }, { "epoch": 0.15886472539624033, "grad_norm": 1.5254124244048641, "learning_rate": 2.9684580296159973e-05, "loss": 0.6033, "step": 1293 }, { "epoch": 0.15898759061309745, "grad_norm": 1.4985237821231112, "learning_rate": 2.9683266588277417e-05, "loss": 0.7394, "step": 1294 }, { "epoch": 0.15911045582995453, "grad_norm": 1.4449490425959275, "learning_rate": 2.9681950179503196e-05, "loss": 0.7565, "step": 1295 }, { "epoch": 0.15923332104681165, "grad_norm": 1.591382564047471, "learning_rate": 2.968063107007946e-05, "loss": 0.7137, "step": 1296 }, { "epoch": 0.15935618626366876, "grad_norm": 1.7880322015403431, "learning_rate": 2.967930926024884e-05, "loss": 0.6009, "step": 1297 }, { "epoch": 0.15947905148052585, "grad_norm": 1.411917167878625, "learning_rate": 2.9677984750254482e-05, "loss": 0.6514, "step": 1298 }, { "epoch": 0.15960191669738297, "grad_norm": 1.6356712102624247, "learning_rate": 2.967665754034002e-05, "loss": 0.6135, "step": 1299 }, { "epoch": 0.15972478191424008, "grad_norm": 1.4553546760529097, "learning_rate": 2.9675327630749587e-05, "loss": 0.7229, "step": 1300 }, { "epoch": 0.1598476471310972, "grad_norm": 1.3834559534024546, "learning_rate": 2.967399502172781e-05, "loss": 0.6244, "step": 1301 }, { "epoch": 0.15997051234795429, "grad_norm": 1.680602345451252, "learning_rate": 2.9672659713519805e-05, "loss": 0.7073, "step": 1302 }, { "epoch": 0.1600933775648114, "grad_norm": 1.3865309773500987, "learning_rate": 2.9671321706371206e-05, "loss": 0.7573, "step": 1303 }, { "epoch": 0.16021624278166852, "grad_norm": 1.3975713757750239, "learning_rate": 2.966998100052813e-05, "loss": 0.6678, "step": 1304 }, { "epoch": 0.1603391079985256, "grad_norm": 1.6903514129699546, "learning_rate": 2.966863759623718e-05, "loss": 0.673, "step": 1305 }, { "epoch": 0.16046197321538272, "grad_norm": 1.6165966700953591, "learning_rate": 2.9667291493745478e-05, "loss": 0.7919, "step": 1306 }, { "epoch": 0.16058483843223983, "grad_norm": 1.499330263769428, "learning_rate": 2.9665942693300626e-05, "loss": 0.5896, "step": 1307 }, { "epoch": 0.16070770364909695, "grad_norm": 1.490506323217619, "learning_rate": 2.9664591195150725e-05, "loss": 0.6456, "step": 1308 }, { "epoch": 0.16083056886595404, "grad_norm": 1.532839452705662, "learning_rate": 2.966323699954438e-05, "loss": 0.6691, "step": 1309 }, { "epoch": 0.16095343408281115, "grad_norm": 1.7803524181408446, "learning_rate": 2.966188010673068e-05, "loss": 0.7608, "step": 1310 }, { "epoch": 0.16107629929966827, "grad_norm": 1.873214918687934, "learning_rate": 2.9660520516959227e-05, "loss": 0.6906, "step": 1311 }, { "epoch": 0.16119916451652538, "grad_norm": 1.6498083580532625, "learning_rate": 2.9659158230480098e-05, "loss": 0.686, "step": 1312 }, { "epoch": 0.16132202973338247, "grad_norm": 1.4210685906984497, "learning_rate": 2.9657793247543875e-05, "loss": 0.5709, "step": 1313 }, { "epoch": 0.1614448949502396, "grad_norm": 1.255774445567483, "learning_rate": 2.965642556840165e-05, "loss": 0.7376, "step": 1314 }, { "epoch": 0.1615677601670967, "grad_norm": 1.7211587252446188, "learning_rate": 2.9655055193304987e-05, "loss": 0.6596, "step": 1315 }, { "epoch": 0.1616906253839538, "grad_norm": 1.4468243973662143, "learning_rate": 2.9653682122505966e-05, "loss": 0.7649, "step": 1316 }, { "epoch": 0.1618134906008109, "grad_norm": 1.5855813203330258, "learning_rate": 2.965230635625715e-05, "loss": 0.7753, "step": 1317 }, { "epoch": 0.16193635581766802, "grad_norm": 1.3841659030117763, "learning_rate": 2.9650927894811607e-05, "loss": 0.7618, "step": 1318 }, { "epoch": 0.16205922103452514, "grad_norm": 1.3952000715802364, "learning_rate": 2.9649546738422887e-05, "loss": 0.6983, "step": 1319 }, { "epoch": 0.16218208625138222, "grad_norm": 1.4968054536422764, "learning_rate": 2.9648162887345052e-05, "loss": 0.7055, "step": 1320 }, { "epoch": 0.16230495146823934, "grad_norm": 1.7636479235532885, "learning_rate": 2.9646776341832648e-05, "loss": 0.5898, "step": 1321 }, { "epoch": 0.16242781668509645, "grad_norm": 1.4590600780346026, "learning_rate": 2.964538710214073e-05, "loss": 0.6796, "step": 1322 }, { "epoch": 0.16255068190195357, "grad_norm": 1.5737879130112384, "learning_rate": 2.9643995168524827e-05, "loss": 0.6996, "step": 1323 }, { "epoch": 0.16267354711881066, "grad_norm": 1.6146752771745299, "learning_rate": 2.964260054124098e-05, "loss": 0.714, "step": 1324 }, { "epoch": 0.16279641233566777, "grad_norm": 1.349611935150245, "learning_rate": 2.964120322054573e-05, "loss": 0.6752, "step": 1325 }, { "epoch": 0.1629192775525249, "grad_norm": 1.5678753556915583, "learning_rate": 2.9639803206696102e-05, "loss": 0.7888, "step": 1326 }, { "epoch": 0.16304214276938198, "grad_norm": 1.5541977646372498, "learning_rate": 2.963840049994961e-05, "loss": 0.596, "step": 1327 }, { "epoch": 0.1631650079862391, "grad_norm": 1.5600434465473811, "learning_rate": 2.9636995100564282e-05, "loss": 0.6501, "step": 1328 }, { "epoch": 0.1632878732030962, "grad_norm": 1.6575221247506704, "learning_rate": 2.9635587008798632e-05, "loss": 0.7925, "step": 1329 }, { "epoch": 0.16341073841995332, "grad_norm": 1.6385754037099134, "learning_rate": 2.9634176224911665e-05, "loss": 0.6807, "step": 1330 }, { "epoch": 0.1635336036368104, "grad_norm": 1.4899317906270981, "learning_rate": 2.9632762749162886e-05, "loss": 0.6111, "step": 1331 }, { "epoch": 0.16365646885366752, "grad_norm": 1.5247825681808087, "learning_rate": 2.9631346581812293e-05, "loss": 0.7122, "step": 1332 }, { "epoch": 0.16377933407052464, "grad_norm": 1.7219166436024107, "learning_rate": 2.962992772312039e-05, "loss": 0.6558, "step": 1333 }, { "epoch": 0.16390219928738176, "grad_norm": 1.7597285588803662, "learning_rate": 2.9628506173348158e-05, "loss": 0.6724, "step": 1334 }, { "epoch": 0.16402506450423884, "grad_norm": 1.4997216536780074, "learning_rate": 2.9627081932757084e-05, "loss": 0.6164, "step": 1335 }, { "epoch": 0.16414792972109596, "grad_norm": 1.805240604993113, "learning_rate": 2.962565500160915e-05, "loss": 0.6678, "step": 1336 }, { "epoch": 0.16427079493795307, "grad_norm": 1.6460684168207265, "learning_rate": 2.9624225380166827e-05, "loss": 0.569, "step": 1337 }, { "epoch": 0.16439366015481016, "grad_norm": 1.6348581199735779, "learning_rate": 2.962279306869309e-05, "loss": 0.6278, "step": 1338 }, { "epoch": 0.16451652537166728, "grad_norm": 1.5927112744789222, "learning_rate": 2.9621358067451398e-05, "loss": 0.7032, "step": 1339 }, { "epoch": 0.1646393905885244, "grad_norm": 1.4603178121679485, "learning_rate": 2.961992037670571e-05, "loss": 0.6792, "step": 1340 }, { "epoch": 0.1647622558053815, "grad_norm": 1.5939427509466404, "learning_rate": 2.9618479996720488e-05, "loss": 0.7612, "step": 1341 }, { "epoch": 0.1648851210222386, "grad_norm": 1.6256083327326802, "learning_rate": 2.9617036927760672e-05, "loss": 0.7003, "step": 1342 }, { "epoch": 0.1650079862390957, "grad_norm": 1.5485164643724154, "learning_rate": 2.9615591170091707e-05, "loss": 0.6028, "step": 1343 }, { "epoch": 0.16513085145595283, "grad_norm": 1.5306703195503275, "learning_rate": 2.961414272397953e-05, "loss": 0.6234, "step": 1344 }, { "epoch": 0.1652537166728099, "grad_norm": 1.696072386920944, "learning_rate": 2.961269158969058e-05, "loss": 0.6911, "step": 1345 }, { "epoch": 0.16537658188966703, "grad_norm": 1.6372621029860812, "learning_rate": 2.9611237767491776e-05, "loss": 0.6443, "step": 1346 }, { "epoch": 0.16549944710652414, "grad_norm": 1.6747976764920425, "learning_rate": 2.9609781257650543e-05, "loss": 0.666, "step": 1347 }, { "epoch": 0.16562231232338126, "grad_norm": 1.6346543852671003, "learning_rate": 2.960832206043479e-05, "loss": 0.6588, "step": 1348 }, { "epoch": 0.16574517754023835, "grad_norm": 1.5856390012333887, "learning_rate": 2.960686017611294e-05, "loss": 0.6731, "step": 1349 }, { "epoch": 0.16586804275709546, "grad_norm": 1.6043550671761344, "learning_rate": 2.9605395604953888e-05, "loss": 0.6289, "step": 1350 }, { "epoch": 0.16599090797395258, "grad_norm": 1.6062708046937797, "learning_rate": 2.960392834722703e-05, "loss": 0.7362, "step": 1351 }, { "epoch": 0.1661137731908097, "grad_norm": 1.446375309058786, "learning_rate": 2.960245840320226e-05, "loss": 0.6967, "step": 1352 }, { "epoch": 0.16623663840766678, "grad_norm": 1.332971322015202, "learning_rate": 2.9600985773149972e-05, "loss": 0.5825, "step": 1353 }, { "epoch": 0.1663595036245239, "grad_norm": 1.6718494779313504, "learning_rate": 2.959951045734104e-05, "loss": 0.6575, "step": 1354 }, { "epoch": 0.166482368841381, "grad_norm": 1.5499521091339896, "learning_rate": 2.9598032456046846e-05, "loss": 0.7077, "step": 1355 }, { "epoch": 0.1666052340582381, "grad_norm": 1.5810052948723623, "learning_rate": 2.9596551769539248e-05, "loss": 0.6392, "step": 1356 }, { "epoch": 0.16672809927509522, "grad_norm": 1.420343980049429, "learning_rate": 2.9595068398090614e-05, "loss": 0.6015, "step": 1357 }, { "epoch": 0.16685096449195233, "grad_norm": 1.717622148076989, "learning_rate": 2.9593582341973803e-05, "loss": 0.7126, "step": 1358 }, { "epoch": 0.16697382970880945, "grad_norm": 1.453619714683216, "learning_rate": 2.959209360146216e-05, "loss": 0.6235, "step": 1359 }, { "epoch": 0.16709669492566653, "grad_norm": 1.5989363714193026, "learning_rate": 2.9590602176829532e-05, "loss": 0.7054, "step": 1360 }, { "epoch": 0.16721956014252365, "grad_norm": 1.4836542734074705, "learning_rate": 2.958910806835026e-05, "loss": 0.6497, "step": 1361 }, { "epoch": 0.16734242535938076, "grad_norm": 1.5246243668553445, "learning_rate": 2.958761127629917e-05, "loss": 0.8059, "step": 1362 }, { "epoch": 0.16746529057623788, "grad_norm": 1.376133113221235, "learning_rate": 2.9586111800951588e-05, "loss": 0.6262, "step": 1363 }, { "epoch": 0.16758815579309497, "grad_norm": 1.666019599416163, "learning_rate": 2.9584609642583337e-05, "loss": 0.7523, "step": 1364 }, { "epoch": 0.16771102100995208, "grad_norm": 1.4734539052591753, "learning_rate": 2.958310480147073e-05, "loss": 0.6735, "step": 1365 }, { "epoch": 0.1678338862268092, "grad_norm": 1.2513640610651222, "learning_rate": 2.9581597277890565e-05, "loss": 0.5961, "step": 1366 }, { "epoch": 0.16795675144366629, "grad_norm": 1.6134625411775323, "learning_rate": 2.958008707212015e-05, "loss": 0.7341, "step": 1367 }, { "epoch": 0.1680796166605234, "grad_norm": 1.5643703066161072, "learning_rate": 2.9578574184437264e-05, "loss": 0.5905, "step": 1368 }, { "epoch": 0.16820248187738052, "grad_norm": 1.4285380841745852, "learning_rate": 2.9577058615120212e-05, "loss": 0.628, "step": 1369 }, { "epoch": 0.16832534709423763, "grad_norm": 1.6855752487774136, "learning_rate": 2.9575540364447755e-05, "loss": 0.6257, "step": 1370 }, { "epoch": 0.16844821231109472, "grad_norm": 1.5499039797191638, "learning_rate": 2.9574019432699182e-05, "loss": 0.6711, "step": 1371 }, { "epoch": 0.16857107752795183, "grad_norm": 1.644987364159324, "learning_rate": 2.9572495820154245e-05, "loss": 0.6704, "step": 1372 }, { "epoch": 0.16869394274480895, "grad_norm": 1.3552428419139861, "learning_rate": 2.957096952709321e-05, "loss": 0.6244, "step": 1373 }, { "epoch": 0.16881680796166607, "grad_norm": 1.440282391337794, "learning_rate": 2.9569440553796824e-05, "loss": 0.6276, "step": 1374 }, { "epoch": 0.16893967317852315, "grad_norm": 1.6692397841242128, "learning_rate": 2.9567908900546335e-05, "loss": 0.6916, "step": 1375 }, { "epoch": 0.16906253839538027, "grad_norm": 1.7035407511741656, "learning_rate": 2.956637456762348e-05, "loss": 0.6653, "step": 1376 }, { "epoch": 0.16918540361223738, "grad_norm": 1.5031875118929463, "learning_rate": 2.9564837555310494e-05, "loss": 0.6172, "step": 1377 }, { "epoch": 0.16930826882909447, "grad_norm": 1.5259437077998224, "learning_rate": 2.9563297863890093e-05, "loss": 0.6944, "step": 1378 }, { "epoch": 0.1694311340459516, "grad_norm": 1.4713393645513817, "learning_rate": 2.956175549364549e-05, "loss": 0.6732, "step": 1379 }, { "epoch": 0.1695539992628087, "grad_norm": 1.415962149721047, "learning_rate": 2.956021044486041e-05, "loss": 0.7165, "step": 1380 }, { "epoch": 0.16967686447966582, "grad_norm": 1.6407949715626922, "learning_rate": 2.9558662717819038e-05, "loss": 0.7315, "step": 1381 }, { "epoch": 0.1697997296965229, "grad_norm": 1.4748050161701602, "learning_rate": 2.955711231280608e-05, "loss": 0.701, "step": 1382 }, { "epoch": 0.16992259491338002, "grad_norm": 1.859427635359362, "learning_rate": 2.955555923010672e-05, "loss": 0.7703, "step": 1383 }, { "epoch": 0.17004546013023714, "grad_norm": 1.6152908335690546, "learning_rate": 2.9554003470006633e-05, "loss": 0.7203, "step": 1384 }, { "epoch": 0.17016832534709425, "grad_norm": 1.614601380070933, "learning_rate": 2.9552445032791988e-05, "loss": 0.7042, "step": 1385 }, { "epoch": 0.17029119056395134, "grad_norm": 1.6173387658269949, "learning_rate": 2.955088391874946e-05, "loss": 0.6326, "step": 1386 }, { "epoch": 0.17041405578080845, "grad_norm": 1.660984461504832, "learning_rate": 2.9549320128166202e-05, "loss": 0.7155, "step": 1387 }, { "epoch": 0.17053692099766557, "grad_norm": 1.7657798049176725, "learning_rate": 2.954775366132986e-05, "loss": 0.7123, "step": 1388 }, { "epoch": 0.17065978621452266, "grad_norm": 1.7585763719882277, "learning_rate": 2.954618451852858e-05, "loss": 0.7122, "step": 1389 }, { "epoch": 0.17078265143137977, "grad_norm": 1.4947594902784727, "learning_rate": 2.9544612700050994e-05, "loss": 0.577, "step": 1390 }, { "epoch": 0.1709055166482369, "grad_norm": 1.726186252410145, "learning_rate": 2.9543038206186223e-05, "loss": 0.6054, "step": 1391 }, { "epoch": 0.171028381865094, "grad_norm": 1.3504585798542077, "learning_rate": 2.9541461037223888e-05, "loss": 0.6494, "step": 1392 }, { "epoch": 0.1711512470819511, "grad_norm": 1.5204637765791489, "learning_rate": 2.9539881193454105e-05, "loss": 0.738, "step": 1393 }, { "epoch": 0.1712741122988082, "grad_norm": 1.4712842003096895, "learning_rate": 2.953829867516747e-05, "loss": 0.6545, "step": 1394 }, { "epoch": 0.17139697751566532, "grad_norm": 1.7624811357671435, "learning_rate": 2.9536713482655074e-05, "loss": 0.7616, "step": 1395 }, { "epoch": 0.1715198427325224, "grad_norm": 1.5793939489056135, "learning_rate": 2.9535125616208507e-05, "loss": 0.5927, "step": 1396 }, { "epoch": 0.17164270794937952, "grad_norm": 1.5413068428272383, "learning_rate": 2.953353507611985e-05, "loss": 0.7394, "step": 1397 }, { "epoch": 0.17176557316623664, "grad_norm": 1.534149029927234, "learning_rate": 2.9531941862681667e-05, "loss": 0.5921, "step": 1398 }, { "epoch": 0.17188843838309376, "grad_norm": 1.6345962934802634, "learning_rate": 2.953034597618702e-05, "loss": 0.735, "step": 1399 }, { "epoch": 0.17201130359995084, "grad_norm": 1.646560670364865, "learning_rate": 2.9528747416929467e-05, "loss": 0.8204, "step": 1400 }, { "epoch": 0.17213416881680796, "grad_norm": 1.716818319300227, "learning_rate": 2.952714618520305e-05, "loss": 0.6774, "step": 1401 }, { "epoch": 0.17225703403366507, "grad_norm": 1.5693943569446105, "learning_rate": 2.95255422813023e-05, "loss": 0.6844, "step": 1402 }, { "epoch": 0.1723798992505222, "grad_norm": 1.5280648593827988, "learning_rate": 2.952393570552225e-05, "loss": 0.5662, "step": 1403 }, { "epoch": 0.17250276446737928, "grad_norm": 1.6995423785868131, "learning_rate": 2.9522326458158415e-05, "loss": 0.6673, "step": 1404 }, { "epoch": 0.1726256296842364, "grad_norm": 1.3420049178959992, "learning_rate": 2.9520714539506812e-05, "loss": 0.7075, "step": 1405 }, { "epoch": 0.1727484949010935, "grad_norm": 1.7184323189753032, "learning_rate": 2.951909994986394e-05, "loss": 0.7002, "step": 1406 }, { "epoch": 0.1728713601179506, "grad_norm": 1.4573732306749252, "learning_rate": 2.951748268952679e-05, "loss": 0.6235, "step": 1407 }, { "epoch": 0.1729942253348077, "grad_norm": 1.4106934683650214, "learning_rate": 2.951586275879285e-05, "loss": 0.8082, "step": 1408 }, { "epoch": 0.17311709055166483, "grad_norm": 1.4971725182764106, "learning_rate": 2.9514240157960093e-05, "loss": 0.7283, "step": 1409 }, { "epoch": 0.17323995576852194, "grad_norm": 1.3416199512123435, "learning_rate": 2.951261488732699e-05, "loss": 0.6836, "step": 1410 }, { "epoch": 0.17336282098537903, "grad_norm": 1.532201756127604, "learning_rate": 2.9510986947192494e-05, "loss": 0.6073, "step": 1411 }, { "epoch": 0.17348568620223614, "grad_norm": 1.6605921112596864, "learning_rate": 2.9509356337856054e-05, "loss": 0.7326, "step": 1412 }, { "epoch": 0.17360855141909326, "grad_norm": 1.6224598165853281, "learning_rate": 2.9507723059617616e-05, "loss": 0.7144, "step": 1413 }, { "epoch": 0.17373141663595038, "grad_norm": 2.043972299822935, "learning_rate": 2.9506087112777602e-05, "loss": 0.7368, "step": 1414 }, { "epoch": 0.17385428185280746, "grad_norm": 2.0465721933321923, "learning_rate": 2.9504448497636945e-05, "loss": 0.6609, "step": 1415 }, { "epoch": 0.17397714706966458, "grad_norm": 1.5030607083624699, "learning_rate": 2.9502807214497047e-05, "loss": 0.6917, "step": 1416 }, { "epoch": 0.1741000122865217, "grad_norm": 1.536416025960106, "learning_rate": 2.9501163263659818e-05, "loss": 0.6456, "step": 1417 }, { "epoch": 0.17422287750337878, "grad_norm": 1.6335548826329411, "learning_rate": 2.949951664542765e-05, "loss": 0.6347, "step": 1418 }, { "epoch": 0.1743457427202359, "grad_norm": 1.3684693715493643, "learning_rate": 2.9497867360103427e-05, "loss": 0.6867, "step": 1419 }, { "epoch": 0.174468607937093, "grad_norm": 1.4759374721908372, "learning_rate": 2.9496215407990524e-05, "loss": 0.6684, "step": 1420 }, { "epoch": 0.17459147315395013, "grad_norm": 1.5291765687504206, "learning_rate": 2.949456078939281e-05, "loss": 0.5793, "step": 1421 }, { "epoch": 0.17471433837080722, "grad_norm": 1.4873522430689772, "learning_rate": 2.949290350461464e-05, "loss": 0.5832, "step": 1422 }, { "epoch": 0.17483720358766433, "grad_norm": 2.096656429728606, "learning_rate": 2.9491243553960856e-05, "loss": 0.8136, "step": 1423 }, { "epoch": 0.17496006880452145, "grad_norm": 1.9679070349029695, "learning_rate": 2.9489580937736805e-05, "loss": 0.7235, "step": 1424 }, { "epoch": 0.17508293402137856, "grad_norm": 1.6044879880142335, "learning_rate": 2.94879156562483e-05, "loss": 0.6913, "step": 1425 }, { "epoch": 0.17520579923823565, "grad_norm": 1.5466630601582532, "learning_rate": 2.9486247709801674e-05, "loss": 0.5825, "step": 1426 }, { "epoch": 0.17532866445509276, "grad_norm": 2.0799826717479486, "learning_rate": 2.948457709870373e-05, "loss": 0.747, "step": 1427 }, { "epoch": 0.17545152967194988, "grad_norm": 2.0136744683524577, "learning_rate": 2.948290382326176e-05, "loss": 0.7633, "step": 1428 }, { "epoch": 0.17557439488880697, "grad_norm": 2.063264482844403, "learning_rate": 2.948122788378356e-05, "loss": 0.7632, "step": 1429 }, { "epoch": 0.17569726010566408, "grad_norm": 1.6538766663957383, "learning_rate": 2.9479549280577402e-05, "loss": 0.6584, "step": 1430 }, { "epoch": 0.1758201253225212, "grad_norm": 1.564633156023667, "learning_rate": 2.947786801395206e-05, "loss": 0.6585, "step": 1431 }, { "epoch": 0.1759429905393783, "grad_norm": 1.9106723945180943, "learning_rate": 2.947618408421679e-05, "loss": 0.7811, "step": 1432 }, { "epoch": 0.1760658557562354, "grad_norm": 1.4642936704120249, "learning_rate": 2.9474497491681337e-05, "loss": 0.5989, "step": 1433 }, { "epoch": 0.17618872097309252, "grad_norm": 2.0912469796637296, "learning_rate": 2.947280823665594e-05, "loss": 0.7146, "step": 1434 }, { "epoch": 0.17631158618994963, "grad_norm": 1.3978127297662788, "learning_rate": 2.9471116319451324e-05, "loss": 0.6926, "step": 1435 }, { "epoch": 0.17643445140680672, "grad_norm": 1.4606903653395344, "learning_rate": 2.9469421740378713e-05, "loss": 0.6045, "step": 1436 }, { "epoch": 0.17655731662366383, "grad_norm": 1.4623692323121391, "learning_rate": 2.9467724499749813e-05, "loss": 0.629, "step": 1437 }, { "epoch": 0.17668018184052095, "grad_norm": 1.4507574393995153, "learning_rate": 2.9466024597876814e-05, "loss": 0.6791, "step": 1438 }, { "epoch": 0.17680304705737807, "grad_norm": 1.569372059007248, "learning_rate": 2.9464322035072407e-05, "loss": 0.6276, "step": 1439 }, { "epoch": 0.17692591227423515, "grad_norm": 1.4384239425415848, "learning_rate": 2.9462616811649767e-05, "loss": 0.5997, "step": 1440 }, { "epoch": 0.17704877749109227, "grad_norm": 1.6405405258624208, "learning_rate": 2.9460908927922557e-05, "loss": 0.7524, "step": 1441 }, { "epoch": 0.17717164270794938, "grad_norm": 1.5348827652034749, "learning_rate": 2.945919838420493e-05, "loss": 0.5959, "step": 1442 }, { "epoch": 0.1772945079248065, "grad_norm": 1.3066900207645296, "learning_rate": 2.9457485180811535e-05, "loss": 0.6312, "step": 1443 }, { "epoch": 0.1774173731416636, "grad_norm": 1.5978762084970735, "learning_rate": 2.94557693180575e-05, "loss": 0.6885, "step": 1444 }, { "epoch": 0.1775402383585207, "grad_norm": 1.709069614002222, "learning_rate": 2.9454050796258448e-05, "loss": 0.6845, "step": 1445 }, { "epoch": 0.17766310357537782, "grad_norm": 1.545706411211335, "learning_rate": 2.9452329615730488e-05, "loss": 0.6297, "step": 1446 }, { "epoch": 0.1777859687922349, "grad_norm": 1.5730727277601777, "learning_rate": 2.9450605776790225e-05, "loss": 0.7458, "step": 1447 }, { "epoch": 0.17790883400909202, "grad_norm": 1.4097299448069807, "learning_rate": 2.9448879279754743e-05, "loss": 0.7984, "step": 1448 }, { "epoch": 0.17803169922594914, "grad_norm": 1.567873443564787, "learning_rate": 2.944715012494162e-05, "loss": 0.6279, "step": 1449 }, { "epoch": 0.17815456444280625, "grad_norm": 1.3985384103621674, "learning_rate": 2.9445418312668924e-05, "loss": 0.5952, "step": 1450 }, { "epoch": 0.17827742965966334, "grad_norm": 1.493695719527005, "learning_rate": 2.944368384325522e-05, "loss": 0.6776, "step": 1451 }, { "epoch": 0.17840029487652045, "grad_norm": 1.62654721001496, "learning_rate": 2.9441946717019535e-05, "loss": 0.5808, "step": 1452 }, { "epoch": 0.17852316009337757, "grad_norm": 1.972363365551275, "learning_rate": 2.9440206934281413e-05, "loss": 0.7768, "step": 1453 }, { "epoch": 0.17864602531023469, "grad_norm": 1.585423769802939, "learning_rate": 2.943846449536087e-05, "loss": 0.6339, "step": 1454 }, { "epoch": 0.17876889052709177, "grad_norm": 1.4837724119117175, "learning_rate": 2.9436719400578426e-05, "loss": 0.599, "step": 1455 }, { "epoch": 0.1788917557439489, "grad_norm": 1.5058697390196099, "learning_rate": 2.9434971650255067e-05, "loss": 0.637, "step": 1456 }, { "epoch": 0.179014620960806, "grad_norm": 1.610007553136736, "learning_rate": 2.9433221244712293e-05, "loss": 0.6487, "step": 1457 }, { "epoch": 0.1791374861776631, "grad_norm": 1.4929631736720275, "learning_rate": 2.9431468184272072e-05, "loss": 0.6298, "step": 1458 }, { "epoch": 0.1792603513945202, "grad_norm": 1.5586690893882018, "learning_rate": 2.942971246925687e-05, "loss": 0.6707, "step": 1459 }, { "epoch": 0.17938321661137732, "grad_norm": 1.513648086155852, "learning_rate": 2.942795409998964e-05, "loss": 0.5753, "step": 1460 }, { "epoch": 0.17950608182823444, "grad_norm": 1.7002616785187883, "learning_rate": 2.9426193076793817e-05, "loss": 0.6875, "step": 1461 }, { "epoch": 0.17962894704509152, "grad_norm": 1.4660620005206715, "learning_rate": 2.942442939999334e-05, "loss": 0.5373, "step": 1462 }, { "epoch": 0.17975181226194864, "grad_norm": 1.4872749341512093, "learning_rate": 2.9422663069912616e-05, "loss": 0.6818, "step": 1463 }, { "epoch": 0.17987467747880576, "grad_norm": 1.6770990236996794, "learning_rate": 2.942089408687656e-05, "loss": 0.7107, "step": 1464 }, { "epoch": 0.17999754269566287, "grad_norm": 1.4447846004930602, "learning_rate": 2.9419122451210556e-05, "loss": 0.8194, "step": 1465 }, { "epoch": 0.18012040791251996, "grad_norm": 1.2955118740243192, "learning_rate": 2.941734816324049e-05, "loss": 0.7268, "step": 1466 }, { "epoch": 0.18024327312937707, "grad_norm": 1.4048086415234264, "learning_rate": 2.9415571223292726e-05, "loss": 0.5934, "step": 1467 }, { "epoch": 0.1803661383462342, "grad_norm": 2.1032243579165075, "learning_rate": 2.9413791631694128e-05, "loss": 0.7375, "step": 1468 }, { "epoch": 0.18048900356309128, "grad_norm": 1.3718159405342165, "learning_rate": 2.9412009388772033e-05, "loss": 0.636, "step": 1469 }, { "epoch": 0.1806118687799484, "grad_norm": 1.5341274586113056, "learning_rate": 2.941022449485428e-05, "loss": 0.7376, "step": 1470 }, { "epoch": 0.1807347339968055, "grad_norm": 1.692185668962046, "learning_rate": 2.940843695026918e-05, "loss": 0.732, "step": 1471 }, { "epoch": 0.18085759921366262, "grad_norm": 1.5734363321314713, "learning_rate": 2.9406646755345544e-05, "loss": 0.6489, "step": 1472 }, { "epoch": 0.1809804644305197, "grad_norm": 1.2534477778005653, "learning_rate": 2.9404853910412674e-05, "loss": 0.6126, "step": 1473 }, { "epoch": 0.18110332964737683, "grad_norm": 1.6140586630188694, "learning_rate": 2.9403058415800344e-05, "loss": 0.6208, "step": 1474 }, { "epoch": 0.18122619486423394, "grad_norm": 1.6799085476886144, "learning_rate": 2.9401260271838822e-05, "loss": 0.6901, "step": 1475 }, { "epoch": 0.18134906008109106, "grad_norm": 2.434275639498713, "learning_rate": 2.9399459478858872e-05, "loss": 0.7896, "step": 1476 }, { "epoch": 0.18147192529794814, "grad_norm": 1.4462437780478459, "learning_rate": 2.939765603719173e-05, "loss": 0.6225, "step": 1477 }, { "epoch": 0.18159479051480526, "grad_norm": 1.4544564306575352, "learning_rate": 2.9395849947169136e-05, "loss": 0.6707, "step": 1478 }, { "epoch": 0.18171765573166238, "grad_norm": 1.4806034262109453, "learning_rate": 2.939404120912331e-05, "loss": 0.5727, "step": 1479 }, { "epoch": 0.18184052094851946, "grad_norm": 1.7589791823827159, "learning_rate": 2.9392229823386944e-05, "loss": 0.6641, "step": 1480 }, { "epoch": 0.18196338616537658, "grad_norm": 1.7478586267721083, "learning_rate": 2.9390415790293236e-05, "loss": 0.9111, "step": 1481 }, { "epoch": 0.1820862513822337, "grad_norm": 1.5863274778027279, "learning_rate": 2.938859911017588e-05, "loss": 0.6995, "step": 1482 }, { "epoch": 0.1822091165990908, "grad_norm": 1.5006370865097798, "learning_rate": 2.938677978336902e-05, "loss": 0.6935, "step": 1483 }, { "epoch": 0.1823319818159479, "grad_norm": 1.7905091415311891, "learning_rate": 2.9384957810207326e-05, "loss": 0.6946, "step": 1484 }, { "epoch": 0.182454847032805, "grad_norm": 1.6044825728879877, "learning_rate": 2.938313319102593e-05, "loss": 0.728, "step": 1485 }, { "epoch": 0.18257771224966213, "grad_norm": 1.4864421671004941, "learning_rate": 2.9381305926160464e-05, "loss": 0.6502, "step": 1486 }, { "epoch": 0.18270057746651922, "grad_norm": 1.484714343444669, "learning_rate": 2.9379476015947035e-05, "loss": 0.5789, "step": 1487 }, { "epoch": 0.18282344268337633, "grad_norm": 1.6513371986804872, "learning_rate": 2.9377643460722256e-05, "loss": 0.7086, "step": 1488 }, { "epoch": 0.18294630790023345, "grad_norm": 1.4165022328757289, "learning_rate": 2.9375808260823192e-05, "loss": 0.619, "step": 1489 }, { "epoch": 0.18306917311709056, "grad_norm": 1.6931364091071508, "learning_rate": 2.9373970416587437e-05, "loss": 0.7376, "step": 1490 }, { "epoch": 0.18319203833394765, "grad_norm": 1.6716148326451667, "learning_rate": 2.9372129928353042e-05, "loss": 0.7297, "step": 1491 }, { "epoch": 0.18331490355080476, "grad_norm": 1.667771272677289, "learning_rate": 2.9370286796458552e-05, "loss": 0.7033, "step": 1492 }, { "epoch": 0.18343776876766188, "grad_norm": 1.7326509464381088, "learning_rate": 2.9368441021243e-05, "loss": 0.7811, "step": 1493 }, { "epoch": 0.183560633984519, "grad_norm": 1.4292263527245446, "learning_rate": 2.9366592603045906e-05, "loss": 0.6343, "step": 1494 }, { "epoch": 0.18368349920137608, "grad_norm": 1.663022711438851, "learning_rate": 2.936474154220727e-05, "loss": 0.7252, "step": 1495 }, { "epoch": 0.1838063644182332, "grad_norm": 1.6162570598946928, "learning_rate": 2.936288783906759e-05, "loss": 0.7427, "step": 1496 }, { "epoch": 0.1839292296350903, "grad_norm": 1.496257304840906, "learning_rate": 2.936103149396784e-05, "loss": 0.5657, "step": 1497 }, { "epoch": 0.1840520948519474, "grad_norm": 1.6335991716099814, "learning_rate": 2.9359172507249477e-05, "loss": 0.6262, "step": 1498 }, { "epoch": 0.18417496006880452, "grad_norm": 1.7260012366680195, "learning_rate": 2.935731087925445e-05, "loss": 0.8514, "step": 1499 }, { "epoch": 0.18429782528566163, "grad_norm": 1.513128308073426, "learning_rate": 2.935544661032521e-05, "loss": 0.5813, "step": 1500 }, { "epoch": 0.18442069050251875, "grad_norm": 1.4795557148068579, "learning_rate": 2.935357970080465e-05, "loss": 0.7416, "step": 1501 }, { "epoch": 0.18454355571937583, "grad_norm": 1.6926262296767476, "learning_rate": 2.93517101510362e-05, "loss": 0.6584, "step": 1502 }, { "epoch": 0.18466642093623295, "grad_norm": 1.5687106587158242, "learning_rate": 2.9349837961363736e-05, "loss": 0.7823, "step": 1503 }, { "epoch": 0.18478928615309007, "grad_norm": 1.3364470843203262, "learning_rate": 2.9347963132131644e-05, "loss": 0.6914, "step": 1504 }, { "epoch": 0.18491215136994718, "grad_norm": 1.7139684852781043, "learning_rate": 2.9346085663684784e-05, "loss": 0.7796, "step": 1505 }, { "epoch": 0.18503501658680427, "grad_norm": 1.5063708524916688, "learning_rate": 2.9344205556368502e-05, "loss": 0.7022, "step": 1506 }, { "epoch": 0.18515788180366138, "grad_norm": 1.5356429623950476, "learning_rate": 2.9342322810528635e-05, "loss": 0.7742, "step": 1507 }, { "epoch": 0.1852807470205185, "grad_norm": 1.9736472888985805, "learning_rate": 2.93404374265115e-05, "loss": 0.8254, "step": 1508 }, { "epoch": 0.1854036122373756, "grad_norm": 1.869106464888736, "learning_rate": 2.93385494046639e-05, "loss": 0.605, "step": 1509 }, { "epoch": 0.1855264774542327, "grad_norm": 1.6469977417990107, "learning_rate": 2.933665874533313e-05, "loss": 0.6525, "step": 1510 }, { "epoch": 0.18564934267108982, "grad_norm": 1.6422489834952425, "learning_rate": 2.9334765448866953e-05, "loss": 0.6857, "step": 1511 }, { "epoch": 0.18577220788794693, "grad_norm": 1.62996031733984, "learning_rate": 2.933286951561364e-05, "loss": 0.6388, "step": 1512 }, { "epoch": 0.18589507310480402, "grad_norm": 1.4418348971842323, "learning_rate": 2.9330970945921932e-05, "loss": 0.6961, "step": 1513 }, { "epoch": 0.18601793832166114, "grad_norm": 1.6620861807216256, "learning_rate": 2.9329069740141057e-05, "loss": 0.7901, "step": 1514 }, { "epoch": 0.18614080353851825, "grad_norm": 1.4551387470193682, "learning_rate": 2.9327165898620734e-05, "loss": 0.7056, "step": 1515 }, { "epoch": 0.18626366875537537, "grad_norm": 1.6602896132356881, "learning_rate": 2.9325259421711155e-05, "loss": 0.6555, "step": 1516 }, { "epoch": 0.18638653397223245, "grad_norm": 1.3818113457285126, "learning_rate": 2.9323350309763006e-05, "loss": 0.6939, "step": 1517 }, { "epoch": 0.18650939918908957, "grad_norm": 1.549184757941032, "learning_rate": 2.9321438563127464e-05, "loss": 0.7145, "step": 1518 }, { "epoch": 0.18663226440594669, "grad_norm": 1.379294041488474, "learning_rate": 2.931952418215617e-05, "loss": 0.5793, "step": 1519 }, { "epoch": 0.18675512962280377, "grad_norm": 1.9104707622454937, "learning_rate": 2.9317607167201273e-05, "loss": 0.7943, "step": 1520 }, { "epoch": 0.1868779948396609, "grad_norm": 1.7545533044454762, "learning_rate": 2.931568751861539e-05, "loss": 0.7043, "step": 1521 }, { "epoch": 0.187000860056518, "grad_norm": 1.5472200050991374, "learning_rate": 2.9313765236751626e-05, "loss": 0.6722, "step": 1522 }, { "epoch": 0.18712372527337512, "grad_norm": 1.623862825971186, "learning_rate": 2.9311840321963578e-05, "loss": 0.7201, "step": 1523 }, { "epoch": 0.1872465904902322, "grad_norm": 1.431466881205994, "learning_rate": 2.9309912774605313e-05, "loss": 0.7115, "step": 1524 }, { "epoch": 0.18736945570708932, "grad_norm": 1.6504068544430703, "learning_rate": 2.9307982595031398e-05, "loss": 0.7444, "step": 1525 }, { "epoch": 0.18749232092394644, "grad_norm": 1.6238139865404828, "learning_rate": 2.9306049783596875e-05, "loss": 0.7516, "step": 1526 }, { "epoch": 0.18761518614080352, "grad_norm": 1.5858565823399937, "learning_rate": 2.9304114340657272e-05, "loss": 0.6484, "step": 1527 }, { "epoch": 0.18773805135766064, "grad_norm": 1.5517009774000043, "learning_rate": 2.9302176266568607e-05, "loss": 0.6721, "step": 1528 }, { "epoch": 0.18786091657451776, "grad_norm": 1.588523876048512, "learning_rate": 2.9300235561687368e-05, "loss": 0.7541, "step": 1529 }, { "epoch": 0.18798378179137487, "grad_norm": 1.58050264717144, "learning_rate": 2.9298292226370533e-05, "loss": 0.5834, "step": 1530 }, { "epoch": 0.18810664700823196, "grad_norm": 1.2484084357824006, "learning_rate": 2.9296346260975576e-05, "loss": 0.701, "step": 1531 }, { "epoch": 0.18822951222508907, "grad_norm": 1.3173928950782057, "learning_rate": 2.9294397665860437e-05, "loss": 0.6236, "step": 1532 }, { "epoch": 0.1883523774419462, "grad_norm": 1.574290578535979, "learning_rate": 2.929244644138355e-05, "loss": 0.7638, "step": 1533 }, { "epoch": 0.1884752426588033, "grad_norm": 1.659760237307229, "learning_rate": 2.929049258790383e-05, "loss": 0.5903, "step": 1534 }, { "epoch": 0.1885981078756604, "grad_norm": 1.5940384340066525, "learning_rate": 2.9288536105780674e-05, "loss": 0.6189, "step": 1535 }, { "epoch": 0.1887209730925175, "grad_norm": 1.7517293387038757, "learning_rate": 2.9286576995373966e-05, "loss": 0.795, "step": 1536 }, { "epoch": 0.18884383830937462, "grad_norm": 1.3511009081519965, "learning_rate": 2.9284615257044076e-05, "loss": 0.6965, "step": 1537 }, { "epoch": 0.1889667035262317, "grad_norm": 1.829616935600136, "learning_rate": 2.9282650891151844e-05, "loss": 0.7593, "step": 1538 }, { "epoch": 0.18908956874308883, "grad_norm": 1.520531927883508, "learning_rate": 2.9280683898058608e-05, "loss": 0.7871, "step": 1539 }, { "epoch": 0.18921243395994594, "grad_norm": 1.619066654880713, "learning_rate": 2.9278714278126182e-05, "loss": 0.6959, "step": 1540 }, { "epoch": 0.18933529917680306, "grad_norm": 1.4416485848040632, "learning_rate": 2.9276742031716866e-05, "loss": 0.6978, "step": 1541 }, { "epoch": 0.18945816439366014, "grad_norm": 1.5184159629377585, "learning_rate": 2.9274767159193438e-05, "loss": 0.7382, "step": 1542 }, { "epoch": 0.18958102961051726, "grad_norm": 1.4090159140056246, "learning_rate": 2.927278966091917e-05, "loss": 0.6666, "step": 1543 }, { "epoch": 0.18970389482737438, "grad_norm": 1.3951599685413352, "learning_rate": 2.9270809537257805e-05, "loss": 0.6848, "step": 1544 }, { "epoch": 0.1898267600442315, "grad_norm": 1.36127163797329, "learning_rate": 2.926882678857358e-05, "loss": 0.6937, "step": 1545 }, { "epoch": 0.18994962526108858, "grad_norm": 1.5991557482790169, "learning_rate": 2.92668414152312e-05, "loss": 0.7124, "step": 1546 }, { "epoch": 0.1900724904779457, "grad_norm": 1.3625166879810149, "learning_rate": 2.926485341759586e-05, "loss": 0.6658, "step": 1547 }, { "epoch": 0.1901953556948028, "grad_norm": 1.5706104814715964, "learning_rate": 2.926286279603325e-05, "loss": 0.6403, "step": 1548 }, { "epoch": 0.1903182209116599, "grad_norm": 1.6422892295033031, "learning_rate": 2.9260869550909526e-05, "loss": 0.5654, "step": 1549 }, { "epoch": 0.190441086128517, "grad_norm": 1.5222071453972204, "learning_rate": 2.9258873682591334e-05, "loss": 0.5574, "step": 1550 }, { "epoch": 0.19056395134537413, "grad_norm": 1.6156389559199866, "learning_rate": 2.9256875191445797e-05, "loss": 0.7303, "step": 1551 }, { "epoch": 0.19068681656223124, "grad_norm": 1.5947713682173652, "learning_rate": 2.925487407784053e-05, "loss": 0.6444, "step": 1552 }, { "epoch": 0.19080968177908833, "grad_norm": 1.6707565948408543, "learning_rate": 2.925287034214362e-05, "loss": 0.6571, "step": 1553 }, { "epoch": 0.19093254699594545, "grad_norm": 1.4505669503151828, "learning_rate": 2.925086398472365e-05, "loss": 0.6501, "step": 1554 }, { "epoch": 0.19105541221280256, "grad_norm": 1.7156793882199235, "learning_rate": 2.9248855005949665e-05, "loss": 0.6766, "step": 1555 }, { "epoch": 0.19117827742965968, "grad_norm": 1.4264438589909567, "learning_rate": 2.924684340619121e-05, "loss": 0.6399, "step": 1556 }, { "epoch": 0.19130114264651676, "grad_norm": 1.5491430630933203, "learning_rate": 2.92448291858183e-05, "loss": 0.6539, "step": 1557 }, { "epoch": 0.19142400786337388, "grad_norm": 1.4042897266093388, "learning_rate": 2.924281234520145e-05, "loss": 0.7379, "step": 1558 }, { "epoch": 0.191546873080231, "grad_norm": 1.367344640663153, "learning_rate": 2.924079288471163e-05, "loss": 0.7131, "step": 1559 }, { "epoch": 0.19166973829708808, "grad_norm": 1.6183571897181392, "learning_rate": 2.9238770804720318e-05, "loss": 0.6521, "step": 1560 }, { "epoch": 0.1917926035139452, "grad_norm": 1.4065130121967313, "learning_rate": 2.923674610559946e-05, "loss": 0.5831, "step": 1561 }, { "epoch": 0.1919154687308023, "grad_norm": 1.4576924300441743, "learning_rate": 2.9234718787721477e-05, "loss": 0.7174, "step": 1562 }, { "epoch": 0.19203833394765943, "grad_norm": 1.6217778913744338, "learning_rate": 2.9232688851459293e-05, "loss": 0.665, "step": 1563 }, { "epoch": 0.19216119916451652, "grad_norm": 1.3824628027346642, "learning_rate": 2.9230656297186298e-05, "loss": 0.6806, "step": 1564 }, { "epoch": 0.19228406438137363, "grad_norm": 1.5832668683858522, "learning_rate": 2.9228621125276363e-05, "loss": 0.6586, "step": 1565 }, { "epoch": 0.19240692959823075, "grad_norm": 1.8482722821301765, "learning_rate": 2.9226583336103855e-05, "loss": 0.7928, "step": 1566 }, { "epoch": 0.19252979481508786, "grad_norm": 1.4563549945831427, "learning_rate": 2.9224542930043595e-05, "loss": 0.6073, "step": 1567 }, { "epoch": 0.19265266003194495, "grad_norm": 1.5434520062285009, "learning_rate": 2.9222499907470917e-05, "loss": 0.6018, "step": 1568 }, { "epoch": 0.19277552524880207, "grad_norm": 1.379709637897164, "learning_rate": 2.922045426876162e-05, "loss": 0.7192, "step": 1569 }, { "epoch": 0.19289839046565918, "grad_norm": 1.5353816038853487, "learning_rate": 2.921840601429198e-05, "loss": 0.5746, "step": 1570 }, { "epoch": 0.19302125568251627, "grad_norm": 1.531225158293345, "learning_rate": 2.9216355144438766e-05, "loss": 0.651, "step": 1571 }, { "epoch": 0.19314412089937338, "grad_norm": 1.49018278226954, "learning_rate": 2.9214301659579218e-05, "loss": 0.7497, "step": 1572 }, { "epoch": 0.1932669861162305, "grad_norm": 1.3912687312973346, "learning_rate": 2.921224556009106e-05, "loss": 0.7307, "step": 1573 }, { "epoch": 0.19338985133308761, "grad_norm": 1.454294378763509, "learning_rate": 2.9210186846352504e-05, "loss": 0.6316, "step": 1574 }, { "epoch": 0.1935127165499447, "grad_norm": 1.4811443958039792, "learning_rate": 2.9208125518742232e-05, "loss": 0.6694, "step": 1575 }, { "epoch": 0.19363558176680182, "grad_norm": 1.313593747041495, "learning_rate": 2.9206061577639415e-05, "loss": 0.6968, "step": 1576 }, { "epoch": 0.19375844698365893, "grad_norm": 1.297190236562062, "learning_rate": 2.9203995023423697e-05, "loss": 0.7406, "step": 1577 }, { "epoch": 0.19388131220051602, "grad_norm": 1.336160829884966, "learning_rate": 2.9201925856475214e-05, "loss": 0.6216, "step": 1578 }, { "epoch": 0.19400417741737314, "grad_norm": 1.196270908681761, "learning_rate": 2.9199854077174573e-05, "loss": 0.6768, "step": 1579 }, { "epoch": 0.19412704263423025, "grad_norm": 1.5209644363959263, "learning_rate": 2.9197779685902862e-05, "loss": 0.7167, "step": 1580 }, { "epoch": 0.19424990785108737, "grad_norm": 1.5330637943422534, "learning_rate": 2.9195702683041657e-05, "loss": 0.8041, "step": 1581 }, { "epoch": 0.19437277306794445, "grad_norm": 1.545754629473385, "learning_rate": 2.9193623068973003e-05, "loss": 0.745, "step": 1582 }, { "epoch": 0.19449563828480157, "grad_norm": 1.514294344831554, "learning_rate": 2.919154084407943e-05, "loss": 0.6673, "step": 1583 }, { "epoch": 0.19461850350165869, "grad_norm": 1.4557884006798436, "learning_rate": 2.9189456008743964e-05, "loss": 0.6302, "step": 1584 }, { "epoch": 0.1947413687185158, "grad_norm": 1.554529455103597, "learning_rate": 2.918736856335008e-05, "loss": 0.6605, "step": 1585 }, { "epoch": 0.1948642339353729, "grad_norm": 1.4549208583768032, "learning_rate": 2.9185278508281757e-05, "loss": 0.6651, "step": 1586 }, { "epoch": 0.19498709915223, "grad_norm": 1.3396714158847536, "learning_rate": 2.9183185843923446e-05, "loss": 0.6812, "step": 1587 }, { "epoch": 0.19510996436908712, "grad_norm": 1.4205516359365997, "learning_rate": 2.9181090570660086e-05, "loss": 0.6601, "step": 1588 }, { "epoch": 0.1952328295859442, "grad_norm": 1.535752433115311, "learning_rate": 2.917899268887708e-05, "loss": 0.7288, "step": 1589 }, { "epoch": 0.19535569480280132, "grad_norm": 1.4555939672481808, "learning_rate": 2.9176892198960324e-05, "loss": 0.616, "step": 1590 }, { "epoch": 0.19547856001965844, "grad_norm": 1.6822131490240755, "learning_rate": 2.9174789101296186e-05, "loss": 0.6132, "step": 1591 }, { "epoch": 0.19560142523651555, "grad_norm": 1.5449470743658773, "learning_rate": 2.9172683396271523e-05, "loss": 0.6089, "step": 1592 }, { "epoch": 0.19572429045337264, "grad_norm": 1.3445564784788413, "learning_rate": 2.917057508427366e-05, "loss": 0.6428, "step": 1593 }, { "epoch": 0.19584715567022976, "grad_norm": 1.5500281690320337, "learning_rate": 2.916846416569041e-05, "loss": 0.6773, "step": 1594 }, { "epoch": 0.19597002088708687, "grad_norm": 1.4891458988894621, "learning_rate": 2.916635064091006e-05, "loss": 0.6301, "step": 1595 }, { "epoch": 0.196092886103944, "grad_norm": 1.593426660361374, "learning_rate": 2.9164234510321387e-05, "loss": 0.7132, "step": 1596 }, { "epoch": 0.19621575132080107, "grad_norm": 1.6631861828897245, "learning_rate": 2.9162115774313628e-05, "loss": 0.6467, "step": 1597 }, { "epoch": 0.1963386165376582, "grad_norm": 1.5597530037909644, "learning_rate": 2.9159994433276525e-05, "loss": 0.7454, "step": 1598 }, { "epoch": 0.1964614817545153, "grad_norm": 1.6225931338152804, "learning_rate": 2.9157870487600268e-05, "loss": 0.6913, "step": 1599 }, { "epoch": 0.1965843469713724, "grad_norm": 1.4108804250856053, "learning_rate": 2.9155743937675556e-05, "loss": 0.7428, "step": 1600 }, { "epoch": 0.1967072121882295, "grad_norm": 1.5127448153613658, "learning_rate": 2.915361478389355e-05, "loss": 0.701, "step": 1601 }, { "epoch": 0.19683007740508662, "grad_norm": 1.8286886070251531, "learning_rate": 2.9151483026645895e-05, "loss": 0.6531, "step": 1602 }, { "epoch": 0.19695294262194374, "grad_norm": 1.5240621458511348, "learning_rate": 2.914934866632471e-05, "loss": 0.6708, "step": 1603 }, { "epoch": 0.19707580783880083, "grad_norm": 1.3688855262444584, "learning_rate": 2.91472117033226e-05, "loss": 0.7017, "step": 1604 }, { "epoch": 0.19719867305565794, "grad_norm": 2.1590638973241187, "learning_rate": 2.9145072138032648e-05, "loss": 0.7431, "step": 1605 }, { "epoch": 0.19732153827251506, "grad_norm": 1.8660765371331238, "learning_rate": 2.9142929970848406e-05, "loss": 0.706, "step": 1606 }, { "epoch": 0.19744440348937217, "grad_norm": 1.5193081379209106, "learning_rate": 2.9140785202163918e-05, "loss": 0.6693, "step": 1607 }, { "epoch": 0.19756726870622926, "grad_norm": 1.3984267071154053, "learning_rate": 2.91386378323737e-05, "loss": 0.5885, "step": 1608 }, { "epoch": 0.19769013392308638, "grad_norm": 1.730707603972452, "learning_rate": 2.9136487861872743e-05, "loss": 0.6696, "step": 1609 }, { "epoch": 0.1978129991399435, "grad_norm": 1.794023298084845, "learning_rate": 2.9134335291056522e-05, "loss": 0.7202, "step": 1610 }, { "epoch": 0.19793586435680058, "grad_norm": 1.6472503209427598, "learning_rate": 2.9132180120320987e-05, "loss": 0.757, "step": 1611 }, { "epoch": 0.1980587295736577, "grad_norm": 1.6638515515381833, "learning_rate": 2.9130022350062573e-05, "loss": 0.6791, "step": 1612 }, { "epoch": 0.1981815947905148, "grad_norm": 1.7607942925792015, "learning_rate": 2.9127861980678185e-05, "loss": 0.7019, "step": 1613 }, { "epoch": 0.19830446000737192, "grad_norm": 1.4053922742222311, "learning_rate": 2.9125699012565204e-05, "loss": 0.7324, "step": 1614 }, { "epoch": 0.198427325224229, "grad_norm": 1.4855802277836725, "learning_rate": 2.91235334461215e-05, "loss": 0.7791, "step": 1615 }, { "epoch": 0.19855019044108613, "grad_norm": 1.601259286498872, "learning_rate": 2.912136528174541e-05, "loss": 0.7129, "step": 1616 }, { "epoch": 0.19867305565794324, "grad_norm": 1.3942407270162824, "learning_rate": 2.9119194519835762e-05, "loss": 0.6473, "step": 1617 }, { "epoch": 0.19879592087480033, "grad_norm": 1.4677964964170078, "learning_rate": 2.9117021160791844e-05, "loss": 0.6139, "step": 1618 }, { "epoch": 0.19891878609165745, "grad_norm": 1.4957720312259364, "learning_rate": 2.9114845205013436e-05, "loss": 0.7522, "step": 1619 }, { "epoch": 0.19904165130851456, "grad_norm": 1.4341689364412344, "learning_rate": 2.9112666652900796e-05, "loss": 0.7148, "step": 1620 }, { "epoch": 0.19916451652537168, "grad_norm": 1.7839646142469772, "learning_rate": 2.9110485504854643e-05, "loss": 0.8059, "step": 1621 }, { "epoch": 0.19928738174222876, "grad_norm": 1.5799980431649903, "learning_rate": 2.9108301761276194e-05, "loss": 0.6312, "step": 1622 }, { "epoch": 0.19941024695908588, "grad_norm": 1.5074495787454885, "learning_rate": 2.910611542256713e-05, "loss": 0.7504, "step": 1623 }, { "epoch": 0.199533112175943, "grad_norm": 1.3902197503306994, "learning_rate": 2.9103926489129616e-05, "loss": 0.7071, "step": 1624 }, { "epoch": 0.1996559773928001, "grad_norm": 1.389046690626052, "learning_rate": 2.910173496136629e-05, "loss": 0.6806, "step": 1625 }, { "epoch": 0.1997788426096572, "grad_norm": 1.7108880435381788, "learning_rate": 2.9099540839680272e-05, "loss": 0.7894, "step": 1626 }, { "epoch": 0.1999017078265143, "grad_norm": 1.921771248252148, "learning_rate": 2.9097344124475155e-05, "loss": 0.8078, "step": 1627 }, { "epoch": 0.20002457304337143, "grad_norm": 1.8101104696472938, "learning_rate": 2.909514481615501e-05, "loss": 0.7091, "step": 1628 }, { "epoch": 0.20014743826022852, "grad_norm": 1.8057931076620009, "learning_rate": 2.9092942915124386e-05, "loss": 0.7527, "step": 1629 }, { "epoch": 0.20027030347708563, "grad_norm": 1.4369971982417507, "learning_rate": 2.909073842178831e-05, "loss": 0.7754, "step": 1630 }, { "epoch": 0.20039316869394275, "grad_norm": 1.6744242000126663, "learning_rate": 2.9088531336552285e-05, "loss": 0.7873, "step": 1631 }, { "epoch": 0.20051603391079986, "grad_norm": 1.5133157883872566, "learning_rate": 2.9086321659822285e-05, "loss": 0.6318, "step": 1632 }, { "epoch": 0.20063889912765695, "grad_norm": 1.6404257409183727, "learning_rate": 2.908410939200477e-05, "loss": 0.6416, "step": 1633 }, { "epoch": 0.20076176434451407, "grad_norm": 1.6374959631074553, "learning_rate": 2.908189453350667e-05, "loss": 0.7259, "step": 1634 }, { "epoch": 0.20088462956137118, "grad_norm": 1.4474412863344543, "learning_rate": 2.9079677084735396e-05, "loss": 0.6849, "step": 1635 }, { "epoch": 0.2010074947782283, "grad_norm": 1.4721500781791519, "learning_rate": 2.9077457046098833e-05, "loss": 0.7043, "step": 1636 }, { "epoch": 0.20113035999508538, "grad_norm": 1.3680991355322443, "learning_rate": 2.9075234418005344e-05, "loss": 0.7373, "step": 1637 }, { "epoch": 0.2012532252119425, "grad_norm": 1.487452804266133, "learning_rate": 2.907300920086376e-05, "loss": 0.617, "step": 1638 }, { "epoch": 0.20137609042879961, "grad_norm": 1.309122641263006, "learning_rate": 2.90707813950834e-05, "loss": 0.7302, "step": 1639 }, { "epoch": 0.2014989556456567, "grad_norm": 1.6229428895960165, "learning_rate": 2.906855100107406e-05, "loss": 0.745, "step": 1640 }, { "epoch": 0.20162182086251382, "grad_norm": 1.3879452780343724, "learning_rate": 2.9066318019245994e-05, "loss": 0.7327, "step": 1641 }, { "epoch": 0.20174468607937093, "grad_norm": 1.3463820755968419, "learning_rate": 2.9064082450009956e-05, "loss": 0.6063, "step": 1642 }, { "epoch": 0.20186755129622805, "grad_norm": 1.7694458731699079, "learning_rate": 2.9061844293777156e-05, "loss": 0.8044, "step": 1643 }, { "epoch": 0.20199041651308514, "grad_norm": 1.3381725169774552, "learning_rate": 2.9059603550959296e-05, "loss": 0.6198, "step": 1644 }, { "epoch": 0.20211328172994225, "grad_norm": 1.583569908139625, "learning_rate": 2.9057360221968546e-05, "loss": 0.7266, "step": 1645 }, { "epoch": 0.20223614694679937, "grad_norm": 1.5902436387506211, "learning_rate": 2.9055114307217543e-05, "loss": 0.6147, "step": 1646 }, { "epoch": 0.20235901216365648, "grad_norm": 1.2384204233225307, "learning_rate": 2.9052865807119415e-05, "loss": 0.6601, "step": 1647 }, { "epoch": 0.20248187738051357, "grad_norm": 1.283595881513416, "learning_rate": 2.905061472208776e-05, "loss": 0.6981, "step": 1648 }, { "epoch": 0.20260474259737069, "grad_norm": 1.6211949007645106, "learning_rate": 2.9048361052536644e-05, "loss": 0.7928, "step": 1649 }, { "epoch": 0.2027276078142278, "grad_norm": 1.7813330700334447, "learning_rate": 2.904610479888062e-05, "loss": 0.6544, "step": 1650 }, { "epoch": 0.2028504730310849, "grad_norm": 1.4553175051762477, "learning_rate": 2.9043845961534713e-05, "loss": 0.6717, "step": 1651 }, { "epoch": 0.202973338247942, "grad_norm": 1.4379750392426947, "learning_rate": 2.904158454091442e-05, "loss": 0.6424, "step": 1652 }, { "epoch": 0.20309620346479912, "grad_norm": 1.3874089102540252, "learning_rate": 2.9039320537435706e-05, "loss": 0.6969, "step": 1653 }, { "epoch": 0.20321906868165623, "grad_norm": 1.4189976041542967, "learning_rate": 2.9037053951515036e-05, "loss": 0.6535, "step": 1654 }, { "epoch": 0.20334193389851332, "grad_norm": 1.3806059748873656, "learning_rate": 2.9034784783569324e-05, "loss": 0.7327, "step": 1655 }, { "epoch": 0.20346479911537044, "grad_norm": 1.4983034351050566, "learning_rate": 2.9032513034015965e-05, "loss": 0.7382, "step": 1656 }, { "epoch": 0.20358766433222755, "grad_norm": 1.5704116166851976, "learning_rate": 2.903023870327284e-05, "loss": 0.73, "step": 1657 }, { "epoch": 0.20371052954908467, "grad_norm": 1.6151523676181114, "learning_rate": 2.90279617917583e-05, "loss": 0.7412, "step": 1658 }, { "epoch": 0.20383339476594176, "grad_norm": 1.4303070173605275, "learning_rate": 2.9025682299891154e-05, "loss": 0.6355, "step": 1659 }, { "epoch": 0.20395625998279887, "grad_norm": 1.5155897888626866, "learning_rate": 2.902340022809071e-05, "loss": 0.7118, "step": 1660 }, { "epoch": 0.204079125199656, "grad_norm": 1.1877163078645256, "learning_rate": 2.9021115576776745e-05, "loss": 0.7047, "step": 1661 }, { "epoch": 0.20420199041651307, "grad_norm": 1.6650373555078317, "learning_rate": 2.9018828346369496e-05, "loss": 0.6151, "step": 1662 }, { "epoch": 0.2043248556333702, "grad_norm": 1.6152373801219506, "learning_rate": 2.9016538537289688e-05, "loss": 0.7038, "step": 1663 }, { "epoch": 0.2044477208502273, "grad_norm": 1.648680790409684, "learning_rate": 2.901424614995852e-05, "loss": 0.6862, "step": 1664 }, { "epoch": 0.20457058606708442, "grad_norm": 1.5298108437265414, "learning_rate": 2.901195118479765e-05, "loss": 0.7646, "step": 1665 }, { "epoch": 0.2046934512839415, "grad_norm": 1.3332001841817516, "learning_rate": 2.900965364222924e-05, "loss": 0.7028, "step": 1666 }, { "epoch": 0.20481631650079862, "grad_norm": 1.520768631073332, "learning_rate": 2.900735352267589e-05, "loss": 0.7101, "step": 1667 }, { "epoch": 0.20493918171765574, "grad_norm": 1.3152825982927783, "learning_rate": 2.9005050826560704e-05, "loss": 0.6148, "step": 1668 }, { "epoch": 0.20506204693451283, "grad_norm": 1.5678536764861069, "learning_rate": 2.9002745554307247e-05, "loss": 0.6781, "step": 1669 }, { "epoch": 0.20518491215136994, "grad_norm": 1.3813906838666152, "learning_rate": 2.900043770633955e-05, "loss": 0.6862, "step": 1670 }, { "epoch": 0.20530777736822706, "grad_norm": 1.3079929022763197, "learning_rate": 2.8998127283082138e-05, "loss": 0.6354, "step": 1671 }, { "epoch": 0.20543064258508417, "grad_norm": 1.4832568552529857, "learning_rate": 2.8995814284959992e-05, "loss": 0.6236, "step": 1672 }, { "epoch": 0.20555350780194126, "grad_norm": 1.6295987795734126, "learning_rate": 2.8993498712398575e-05, "loss": 0.6912, "step": 1673 }, { "epoch": 0.20567637301879838, "grad_norm": 1.4125495836035356, "learning_rate": 2.8991180565823823e-05, "loss": 0.6692, "step": 1674 }, { "epoch": 0.2057992382356555, "grad_norm": 1.3545420821753, "learning_rate": 2.8988859845662137e-05, "loss": 0.6491, "step": 1675 }, { "epoch": 0.2059221034525126, "grad_norm": 1.5546704341036024, "learning_rate": 2.8986536552340406e-05, "loss": 0.6654, "step": 1676 }, { "epoch": 0.2060449686693697, "grad_norm": 1.492660865361205, "learning_rate": 2.8984210686285982e-05, "loss": 0.784, "step": 1677 }, { "epoch": 0.2061678338862268, "grad_norm": 1.5339453008430162, "learning_rate": 2.8981882247926695e-05, "loss": 0.6454, "step": 1678 }, { "epoch": 0.20629069910308392, "grad_norm": 1.6822679833947132, "learning_rate": 2.897955123769084e-05, "loss": 0.6576, "step": 1679 }, { "epoch": 0.206413564319941, "grad_norm": 1.4328013377571083, "learning_rate": 2.8977217656007198e-05, "loss": 0.708, "step": 1680 }, { "epoch": 0.20653642953679813, "grad_norm": 1.5427939691784718, "learning_rate": 2.897488150330501e-05, "loss": 0.5728, "step": 1681 }, { "epoch": 0.20665929475365524, "grad_norm": 1.6820652315736742, "learning_rate": 2.8972542780014002e-05, "loss": 0.7099, "step": 1682 }, { "epoch": 0.20678215997051236, "grad_norm": 1.6925280981348614, "learning_rate": 2.8970201486564367e-05, "loss": 0.7588, "step": 1683 }, { "epoch": 0.20690502518736945, "grad_norm": 1.6081883107423713, "learning_rate": 2.896785762338677e-05, "loss": 0.6657, "step": 1684 }, { "epoch": 0.20702789040422656, "grad_norm": 1.3206277129869408, "learning_rate": 2.8965511190912342e-05, "loss": 0.6172, "step": 1685 }, { "epoch": 0.20715075562108368, "grad_norm": 1.6377462720759686, "learning_rate": 2.89631621895727e-05, "loss": 0.7675, "step": 1686 }, { "epoch": 0.2072736208379408, "grad_norm": 1.3512720596998788, "learning_rate": 2.8960810619799933e-05, "loss": 0.8258, "step": 1687 }, { "epoch": 0.20739648605479788, "grad_norm": 1.3218751814536698, "learning_rate": 2.8958456482026586e-05, "loss": 0.6191, "step": 1688 }, { "epoch": 0.207519351271655, "grad_norm": 1.8032349722544163, "learning_rate": 2.8956099776685695e-05, "loss": 0.7192, "step": 1689 }, { "epoch": 0.2076422164885121, "grad_norm": 1.4750747974460565, "learning_rate": 2.895374050421076e-05, "loss": 0.6765, "step": 1690 }, { "epoch": 0.2077650817053692, "grad_norm": 1.5287990582693514, "learning_rate": 2.8951378665035754e-05, "loss": 0.6421, "step": 1691 }, { "epoch": 0.2078879469222263, "grad_norm": 1.2977201443061555, "learning_rate": 2.894901425959512e-05, "loss": 0.5847, "step": 1692 }, { "epoch": 0.20801081213908343, "grad_norm": 1.7088501750159508, "learning_rate": 2.894664728832377e-05, "loss": 0.7014, "step": 1693 }, { "epoch": 0.20813367735594054, "grad_norm": 1.6431184004192918, "learning_rate": 2.8944277751657106e-05, "loss": 0.5736, "step": 1694 }, { "epoch": 0.20825654257279763, "grad_norm": 1.2948597164135192, "learning_rate": 2.894190565003097e-05, "loss": 0.6285, "step": 1695 }, { "epoch": 0.20837940778965475, "grad_norm": 1.4769031727677895, "learning_rate": 2.893953098388172e-05, "loss": 0.6046, "step": 1696 }, { "epoch": 0.20850227300651186, "grad_norm": 1.4780181675878536, "learning_rate": 2.8937153753646138e-05, "loss": 0.6234, "step": 1697 }, { "epoch": 0.20862513822336898, "grad_norm": 1.3846827088364708, "learning_rate": 2.8934773959761512e-05, "loss": 0.6055, "step": 1698 }, { "epoch": 0.20874800344022607, "grad_norm": 1.411126940256544, "learning_rate": 2.8932391602665585e-05, "loss": 0.8787, "step": 1699 }, { "epoch": 0.20887086865708318, "grad_norm": 1.3921815278284932, "learning_rate": 2.8930006682796578e-05, "loss": 0.6707, "step": 1700 }, { "epoch": 0.2089937338739403, "grad_norm": 1.5844157918780364, "learning_rate": 2.892761920059318e-05, "loss": 0.6061, "step": 1701 }, { "epoch": 0.20911659909079738, "grad_norm": 1.4600790001930777, "learning_rate": 2.8925229156494553e-05, "loss": 0.6653, "step": 1702 }, { "epoch": 0.2092394643076545, "grad_norm": 1.6150312724381297, "learning_rate": 2.892283655094033e-05, "loss": 0.731, "step": 1703 }, { "epoch": 0.20936232952451161, "grad_norm": 1.4590384021555642, "learning_rate": 2.892044138437062e-05, "loss": 0.6231, "step": 1704 }, { "epoch": 0.20948519474136873, "grad_norm": 1.4312947639080624, "learning_rate": 2.8918043657225994e-05, "loss": 0.7578, "step": 1705 }, { "epoch": 0.20960805995822582, "grad_norm": 1.615633210017766, "learning_rate": 2.8915643369947497e-05, "loss": 0.6215, "step": 1706 }, { "epoch": 0.20973092517508293, "grad_norm": 1.653993153982696, "learning_rate": 2.891324052297665e-05, "loss": 0.6563, "step": 1707 }, { "epoch": 0.20985379039194005, "grad_norm": 1.3667664419908012, "learning_rate": 2.891083511675544e-05, "loss": 0.6844, "step": 1708 }, { "epoch": 0.20997665560879714, "grad_norm": 1.6737442186169018, "learning_rate": 2.8908427151726325e-05, "loss": 0.7301, "step": 1709 }, { "epoch": 0.21009952082565425, "grad_norm": 1.4057509236606869, "learning_rate": 2.8906016628332233e-05, "loss": 0.6721, "step": 1710 }, { "epoch": 0.21022238604251137, "grad_norm": 1.507623090861582, "learning_rate": 2.8903603547016565e-05, "loss": 0.6401, "step": 1711 }, { "epoch": 0.21034525125936848, "grad_norm": 1.5914086023911689, "learning_rate": 2.8901187908223195e-05, "loss": 0.6029, "step": 1712 }, { "epoch": 0.21046811647622557, "grad_norm": 1.6610035575577686, "learning_rate": 2.8898769712396458e-05, "loss": 0.8083, "step": 1713 }, { "epoch": 0.21059098169308269, "grad_norm": 1.6287325315481873, "learning_rate": 2.8896348959981173e-05, "loss": 0.6621, "step": 1714 }, { "epoch": 0.2107138469099398, "grad_norm": 1.3682318121893053, "learning_rate": 2.8893925651422614e-05, "loss": 0.6496, "step": 1715 }, { "epoch": 0.21083671212679692, "grad_norm": 1.3551051674257304, "learning_rate": 2.8891499787166542e-05, "loss": 0.7096, "step": 1716 }, { "epoch": 0.210959577343654, "grad_norm": 1.4646539105025689, "learning_rate": 2.8889071367659172e-05, "loss": 0.6574, "step": 1717 }, { "epoch": 0.21108244256051112, "grad_norm": 1.5297321968854536, "learning_rate": 2.8886640393347195e-05, "loss": 0.6508, "step": 1718 }, { "epoch": 0.21120530777736823, "grad_norm": 1.444121528999513, "learning_rate": 2.888420686467778e-05, "loss": 0.7345, "step": 1719 }, { "epoch": 0.21132817299422532, "grad_norm": 1.359755292370642, "learning_rate": 2.8881770782098547e-05, "loss": 0.6974, "step": 1720 }, { "epoch": 0.21145103821108244, "grad_norm": 1.390170937914581, "learning_rate": 2.8879332146057612e-05, "loss": 0.6322, "step": 1721 }, { "epoch": 0.21157390342793955, "grad_norm": 1.386463572232355, "learning_rate": 2.887689095700354e-05, "loss": 0.696, "step": 1722 }, { "epoch": 0.21169676864479667, "grad_norm": 1.38407009288635, "learning_rate": 2.8874447215385365e-05, "loss": 0.7194, "step": 1723 }, { "epoch": 0.21181963386165376, "grad_norm": 1.2854769170260811, "learning_rate": 2.8872000921652607e-05, "loss": 0.5832, "step": 1724 }, { "epoch": 0.21194249907851087, "grad_norm": 1.2962323461402037, "learning_rate": 2.8869552076255243e-05, "loss": 0.6037, "step": 1725 }, { "epoch": 0.212065364295368, "grad_norm": 1.5511157855954367, "learning_rate": 2.886710067964372e-05, "loss": 0.6841, "step": 1726 }, { "epoch": 0.2121882295122251, "grad_norm": 1.6900615186355474, "learning_rate": 2.8864646732268962e-05, "loss": 0.7129, "step": 1727 }, { "epoch": 0.2123110947290822, "grad_norm": 1.470829333081114, "learning_rate": 2.8862190234582348e-05, "loss": 0.7278, "step": 1728 }, { "epoch": 0.2124339599459393, "grad_norm": 1.5819966706250743, "learning_rate": 2.8859731187035746e-05, "loss": 0.8149, "step": 1729 }, { "epoch": 0.21255682516279642, "grad_norm": 1.4206511652040512, "learning_rate": 2.8857269590081472e-05, "loss": 0.5662, "step": 1730 }, { "epoch": 0.2126796903796535, "grad_norm": 1.4799409179640088, "learning_rate": 2.885480544417232e-05, "loss": 0.6105, "step": 1731 }, { "epoch": 0.21280255559651062, "grad_norm": 1.5124468947449239, "learning_rate": 2.8852338749761566e-05, "loss": 0.7432, "step": 1732 }, { "epoch": 0.21292542081336774, "grad_norm": 1.6110618134312789, "learning_rate": 2.884986950730293e-05, "loss": 0.7073, "step": 1733 }, { "epoch": 0.21304828603022485, "grad_norm": 1.494918466398621, "learning_rate": 2.884739771725062e-05, "loss": 0.701, "step": 1734 }, { "epoch": 0.21317115124708194, "grad_norm": 1.5218926694626034, "learning_rate": 2.88449233800593e-05, "loss": 0.6231, "step": 1735 }, { "epoch": 0.21329401646393906, "grad_norm": 1.653000812316923, "learning_rate": 2.8842446496184114e-05, "loss": 0.7462, "step": 1736 }, { "epoch": 0.21341688168079617, "grad_norm": 1.3626794496716919, "learning_rate": 2.883996706608067e-05, "loss": 0.6156, "step": 1737 }, { "epoch": 0.2135397468976533, "grad_norm": 1.4132252004912156, "learning_rate": 2.8837485090205033e-05, "loss": 0.6293, "step": 1738 }, { "epoch": 0.21366261211451038, "grad_norm": 1.644794069283531, "learning_rate": 2.883500056901376e-05, "loss": 0.7024, "step": 1739 }, { "epoch": 0.2137854773313675, "grad_norm": 1.408983802114771, "learning_rate": 2.883251350296385e-05, "loss": 0.717, "step": 1740 }, { "epoch": 0.2139083425482246, "grad_norm": 1.4665305687447852, "learning_rate": 2.8830023892512792e-05, "loss": 0.7488, "step": 1741 }, { "epoch": 0.2140312077650817, "grad_norm": 1.3830222138380757, "learning_rate": 2.8827531738118526e-05, "loss": 0.6517, "step": 1742 }, { "epoch": 0.2141540729819388, "grad_norm": 1.4663141184833655, "learning_rate": 2.8825037040239473e-05, "loss": 0.6005, "step": 1743 }, { "epoch": 0.21427693819879592, "grad_norm": 1.6544962013607898, "learning_rate": 2.8822539799334513e-05, "loss": 0.6614, "step": 1744 }, { "epoch": 0.21439980341565304, "grad_norm": 1.7495929272576076, "learning_rate": 2.8820040015863e-05, "loss": 0.7829, "step": 1745 }, { "epoch": 0.21452266863251013, "grad_norm": 1.4634542962181936, "learning_rate": 2.8817537690284755e-05, "loss": 0.5953, "step": 1746 }, { "epoch": 0.21464553384936724, "grad_norm": 1.5928780033757222, "learning_rate": 2.881503282306006e-05, "loss": 0.7012, "step": 1747 }, { "epoch": 0.21476839906622436, "grad_norm": 1.7188477888120064, "learning_rate": 2.881252541464967e-05, "loss": 0.6554, "step": 1748 }, { "epoch": 0.21489126428308147, "grad_norm": 1.6118413090895642, "learning_rate": 2.8810015465514808e-05, "loss": 0.7479, "step": 1749 }, { "epoch": 0.21501412949993856, "grad_norm": 1.7470599753442921, "learning_rate": 2.880750297611716e-05, "loss": 0.6362, "step": 1750 }, { "epoch": 0.21513699471679568, "grad_norm": 1.4151079133492444, "learning_rate": 2.8804987946918888e-05, "loss": 0.6841, "step": 1751 }, { "epoch": 0.2152598599336528, "grad_norm": 1.6180307610219546, "learning_rate": 2.880247037838261e-05, "loss": 0.6621, "step": 1752 }, { "epoch": 0.21538272515050988, "grad_norm": 1.4359963924404517, "learning_rate": 2.879995027097142e-05, "loss": 0.6427, "step": 1753 }, { "epoch": 0.215505590367367, "grad_norm": 1.3586896565007869, "learning_rate": 2.8797427625148872e-05, "loss": 0.6621, "step": 1754 }, { "epoch": 0.2156284555842241, "grad_norm": 1.6493736411929218, "learning_rate": 2.8794902441378992e-05, "loss": 0.6716, "step": 1755 }, { "epoch": 0.21575132080108123, "grad_norm": 1.6452055558898349, "learning_rate": 2.8792374720126268e-05, "loss": 0.6743, "step": 1756 }, { "epoch": 0.2158741860179383, "grad_norm": 1.8291232347196271, "learning_rate": 2.8789844461855665e-05, "loss": 0.7119, "step": 1757 }, { "epoch": 0.21599705123479543, "grad_norm": 1.5352373294254873, "learning_rate": 2.8787311667032603e-05, "loss": 0.7149, "step": 1758 }, { "epoch": 0.21611991645165254, "grad_norm": 1.5047466091176325, "learning_rate": 2.8784776336122975e-05, "loss": 0.7167, "step": 1759 }, { "epoch": 0.21624278166850963, "grad_norm": 1.4292318707631428, "learning_rate": 2.8782238469593136e-05, "loss": 0.7608, "step": 1760 }, { "epoch": 0.21636564688536675, "grad_norm": 1.4982783802237056, "learning_rate": 2.8779698067909907e-05, "loss": 0.6563, "step": 1761 }, { "epoch": 0.21648851210222386, "grad_norm": 1.6649616664077795, "learning_rate": 2.8777155131540588e-05, "loss": 0.601, "step": 1762 }, { "epoch": 0.21661137731908098, "grad_norm": 1.2412184310057395, "learning_rate": 2.877460966095293e-05, "loss": 0.6617, "step": 1763 }, { "epoch": 0.21673424253593807, "grad_norm": 1.2961334045033739, "learning_rate": 2.8772061656615155e-05, "loss": 0.6643, "step": 1764 }, { "epoch": 0.21685710775279518, "grad_norm": 1.3738017224088097, "learning_rate": 2.8769511118995955e-05, "loss": 0.6603, "step": 1765 }, { "epoch": 0.2169799729696523, "grad_norm": 1.4864887850673314, "learning_rate": 2.8766958048564477e-05, "loss": 0.6948, "step": 1766 }, { "epoch": 0.2171028381865094, "grad_norm": 1.3171959692020287, "learning_rate": 2.8764402445790358e-05, "loss": 0.7002, "step": 1767 }, { "epoch": 0.2172257034033665, "grad_norm": 1.3897416607890123, "learning_rate": 2.8761844311143665e-05, "loss": 0.7061, "step": 1768 }, { "epoch": 0.21734856862022361, "grad_norm": 1.7659557145356548, "learning_rate": 2.8759283645094964e-05, "loss": 0.7633, "step": 1769 }, { "epoch": 0.21747143383708073, "grad_norm": 1.5495038265499104, "learning_rate": 2.875672044811527e-05, "loss": 0.6362, "step": 1770 }, { "epoch": 0.21759429905393782, "grad_norm": 1.4035436606901, "learning_rate": 2.8754154720676063e-05, "loss": 0.63, "step": 1771 }, { "epoch": 0.21771716427079493, "grad_norm": 1.4889221156055759, "learning_rate": 2.8751586463249294e-05, "loss": 0.6905, "step": 1772 }, { "epoch": 0.21784002948765205, "grad_norm": 1.696148356290709, "learning_rate": 2.8749015676307378e-05, "loss": 0.6379, "step": 1773 }, { "epoch": 0.21796289470450916, "grad_norm": 1.2936723184721173, "learning_rate": 2.8746442360323192e-05, "loss": 0.6278, "step": 1774 }, { "epoch": 0.21808575992136625, "grad_norm": 1.9392152732515384, "learning_rate": 2.8743866515770083e-05, "loss": 0.7102, "step": 1775 }, { "epoch": 0.21820862513822337, "grad_norm": 2.063666802902862, "learning_rate": 2.8741288143121862e-05, "loss": 0.7141, "step": 1776 }, { "epoch": 0.21833149035508048, "grad_norm": 1.456365443379661, "learning_rate": 2.8738707242852802e-05, "loss": 0.5908, "step": 1777 }, { "epoch": 0.2184543555719376, "grad_norm": 1.7810228877686733, "learning_rate": 2.8736123815437646e-05, "loss": 0.6452, "step": 1778 }, { "epoch": 0.21857722078879468, "grad_norm": 1.7681707080122373, "learning_rate": 2.8733537861351592e-05, "loss": 0.6617, "step": 1779 }, { "epoch": 0.2187000860056518, "grad_norm": 1.612447661769684, "learning_rate": 2.8730949381070315e-05, "loss": 0.5422, "step": 1780 }, { "epoch": 0.21882295122250892, "grad_norm": 1.6082381294024288, "learning_rate": 2.8728358375069946e-05, "loss": 0.5493, "step": 1781 }, { "epoch": 0.218945816439366, "grad_norm": 1.622571206700607, "learning_rate": 2.8725764843827087e-05, "loss": 0.7665, "step": 1782 }, { "epoch": 0.21906868165622312, "grad_norm": 1.4748839329593193, "learning_rate": 2.8723168787818804e-05, "loss": 0.6619, "step": 1783 }, { "epoch": 0.21919154687308023, "grad_norm": 1.4807385238567103, "learning_rate": 2.8720570207522613e-05, "loss": 0.6154, "step": 1784 }, { "epoch": 0.21931441208993735, "grad_norm": 1.3288500790531856, "learning_rate": 2.8717969103416516e-05, "loss": 0.6564, "step": 1785 }, { "epoch": 0.21943727730679444, "grad_norm": 1.2675067733686276, "learning_rate": 2.871536547597897e-05, "loss": 0.7659, "step": 1786 }, { "epoch": 0.21956014252365155, "grad_norm": 1.4956245316524286, "learning_rate": 2.871275932568889e-05, "loss": 0.6505, "step": 1787 }, { "epoch": 0.21968300774050867, "grad_norm": 1.4780631890590037, "learning_rate": 2.8710150653025656e-05, "loss": 0.6954, "step": 1788 }, { "epoch": 0.21980587295736578, "grad_norm": 1.3506483466718562, "learning_rate": 2.870753945846913e-05, "loss": 0.7045, "step": 1789 }, { "epoch": 0.21992873817422287, "grad_norm": 1.4965637481819267, "learning_rate": 2.8704925742499614e-05, "loss": 0.6832, "step": 1790 }, { "epoch": 0.22005160339108, "grad_norm": 1.4411701957827523, "learning_rate": 2.870230950559789e-05, "loss": 0.7041, "step": 1791 }, { "epoch": 0.2201744686079371, "grad_norm": 1.5752583301358818, "learning_rate": 2.8699690748245194e-05, "loss": 0.5988, "step": 1792 }, { "epoch": 0.2202973338247942, "grad_norm": 1.8518471474217646, "learning_rate": 2.8697069470923233e-05, "loss": 0.617, "step": 1793 }, { "epoch": 0.2204201990416513, "grad_norm": 1.5954333430800083, "learning_rate": 2.8694445674114163e-05, "loss": 0.6451, "step": 1794 }, { "epoch": 0.22054306425850842, "grad_norm": 1.340741659774446, "learning_rate": 2.8691819358300633e-05, "loss": 0.617, "step": 1795 }, { "epoch": 0.22066592947536554, "grad_norm": 1.6268783840507084, "learning_rate": 2.868919052396572e-05, "loss": 0.6857, "step": 1796 }, { "epoch": 0.22078879469222262, "grad_norm": 1.5431271528733617, "learning_rate": 2.8686559171592987e-05, "loss": 0.7879, "step": 1797 }, { "epoch": 0.22091165990907974, "grad_norm": 1.3544678277513882, "learning_rate": 2.868392530166646e-05, "loss": 0.7673, "step": 1798 }, { "epoch": 0.22103452512593685, "grad_norm": 1.6520752541366406, "learning_rate": 2.8681288914670615e-05, "loss": 0.6755, "step": 1799 }, { "epoch": 0.22115739034279394, "grad_norm": 1.3953205395508201, "learning_rate": 2.86786500110904e-05, "loss": 0.6975, "step": 1800 }, { "epoch": 0.22128025555965106, "grad_norm": 1.4240899850580566, "learning_rate": 2.867600859141122e-05, "loss": 0.7133, "step": 1801 }, { "epoch": 0.22140312077650817, "grad_norm": 1.4606511771391453, "learning_rate": 2.8673364656118962e-05, "loss": 0.6106, "step": 1802 }, { "epoch": 0.2215259859933653, "grad_norm": 1.507556908392169, "learning_rate": 2.8670718205699944e-05, "loss": 0.6209, "step": 1803 }, { "epoch": 0.22164885121022238, "grad_norm": 1.560981927890189, "learning_rate": 2.866806924064097e-05, "loss": 0.6045, "step": 1804 }, { "epoch": 0.2217717164270795, "grad_norm": 1.273827898829418, "learning_rate": 2.86654177614293e-05, "loss": 0.6268, "step": 1805 }, { "epoch": 0.2218945816439366, "grad_norm": 1.3727638870448748, "learning_rate": 2.8662763768552656e-05, "loss": 0.6789, "step": 1806 }, { "epoch": 0.22201744686079372, "grad_norm": 2.965088387699349, "learning_rate": 2.8660107262499223e-05, "loss": 0.6154, "step": 1807 }, { "epoch": 0.2221403120776508, "grad_norm": 1.5320266932756215, "learning_rate": 2.8657448243757646e-05, "loss": 0.6275, "step": 1808 }, { "epoch": 0.22226317729450792, "grad_norm": 1.4076417079411998, "learning_rate": 2.865478671281704e-05, "loss": 0.7576, "step": 1809 }, { "epoch": 0.22238604251136504, "grad_norm": 1.3196260480798208, "learning_rate": 2.865212267016697e-05, "loss": 0.6821, "step": 1810 }, { "epoch": 0.22250890772822213, "grad_norm": 1.4942395573439717, "learning_rate": 2.8649456116297475e-05, "loss": 0.6465, "step": 1811 }, { "epoch": 0.22263177294507924, "grad_norm": 1.4094611287058796, "learning_rate": 2.8646787051699045e-05, "loss": 0.6689, "step": 1812 }, { "epoch": 0.22275463816193636, "grad_norm": 1.2904676310116452, "learning_rate": 2.8644115476862636e-05, "loss": 0.7223, "step": 1813 }, { "epoch": 0.22287750337879347, "grad_norm": 1.651307371565112, "learning_rate": 2.8641441392279676e-05, "loss": 0.7286, "step": 1814 }, { "epoch": 0.22300036859565056, "grad_norm": 1.5714536274111897, "learning_rate": 2.8638764798442037e-05, "loss": 0.6185, "step": 1815 }, { "epoch": 0.22312323381250768, "grad_norm": 1.5031820102894207, "learning_rate": 2.8636085695842063e-05, "loss": 0.7403, "step": 1816 }, { "epoch": 0.2232460990293648, "grad_norm": 1.399536515435283, "learning_rate": 2.863340408497256e-05, "loss": 0.6991, "step": 1817 }, { "epoch": 0.2233689642462219, "grad_norm": 1.3094755651613494, "learning_rate": 2.8630719966326793e-05, "loss": 0.7518, "step": 1818 }, { "epoch": 0.223491829463079, "grad_norm": 1.453795310909441, "learning_rate": 2.8628033340398484e-05, "loss": 0.6327, "step": 1819 }, { "epoch": 0.2236146946799361, "grad_norm": 1.5432237232195412, "learning_rate": 2.8625344207681822e-05, "loss": 0.6831, "step": 1820 }, { "epoch": 0.22373755989679323, "grad_norm": 2.335319782247208, "learning_rate": 2.8622652568671458e-05, "loss": 0.6936, "step": 1821 }, { "epoch": 0.2238604251136503, "grad_norm": 1.4490215956993102, "learning_rate": 2.86199584238625e-05, "loss": 0.627, "step": 1822 }, { "epoch": 0.22398329033050743, "grad_norm": 1.4626782776332612, "learning_rate": 2.861726177375052e-05, "loss": 0.7695, "step": 1823 }, { "epoch": 0.22410615554736454, "grad_norm": 1.6923764823402485, "learning_rate": 2.8614562618831543e-05, "loss": 0.632, "step": 1824 }, { "epoch": 0.22422902076422166, "grad_norm": 1.3686704539437728, "learning_rate": 2.861186095960207e-05, "loss": 0.6189, "step": 1825 }, { "epoch": 0.22435188598107875, "grad_norm": 1.4344424061312906, "learning_rate": 2.860915679655905e-05, "loss": 0.5722, "step": 1826 }, { "epoch": 0.22447475119793586, "grad_norm": 1.5240052673499629, "learning_rate": 2.8606450130199895e-05, "loss": 0.7051, "step": 1827 }, { "epoch": 0.22459761641479298, "grad_norm": 1.3927712366216236, "learning_rate": 2.8603740961022475e-05, "loss": 0.6596, "step": 1828 }, { "epoch": 0.2247204816316501, "grad_norm": 1.5471873600885089, "learning_rate": 2.8601029289525133e-05, "loss": 0.6439, "step": 1829 }, { "epoch": 0.22484334684850718, "grad_norm": 1.4924486996742614, "learning_rate": 2.8598315116206657e-05, "loss": 0.7835, "step": 1830 }, { "epoch": 0.2249662120653643, "grad_norm": 1.506633874583061, "learning_rate": 2.8595598441566304e-05, "loss": 0.6586, "step": 1831 }, { "epoch": 0.2250890772822214, "grad_norm": 1.4699631897961438, "learning_rate": 2.859287926610379e-05, "loss": 0.6096, "step": 1832 }, { "epoch": 0.2252119424990785, "grad_norm": 1.6905102808049715, "learning_rate": 2.859015759031929e-05, "loss": 0.6395, "step": 1833 }, { "epoch": 0.22533480771593561, "grad_norm": 1.6741314167365013, "learning_rate": 2.8587433414713433e-05, "loss": 0.6452, "step": 1834 }, { "epoch": 0.22545767293279273, "grad_norm": 1.397473365256371, "learning_rate": 2.8584706739787315e-05, "loss": 0.7386, "step": 1835 }, { "epoch": 0.22558053814964985, "grad_norm": 1.6106582101490519, "learning_rate": 2.8581977566042495e-05, "loss": 0.5945, "step": 1836 }, { "epoch": 0.22570340336650693, "grad_norm": 1.5356480867436941, "learning_rate": 2.8579245893980984e-05, "loss": 0.6676, "step": 1837 }, { "epoch": 0.22582626858336405, "grad_norm": 1.3130595771816067, "learning_rate": 2.8576511724105255e-05, "loss": 0.711, "step": 1838 }, { "epoch": 0.22594913380022116, "grad_norm": 1.5168601136604052, "learning_rate": 2.857377505691824e-05, "loss": 0.6571, "step": 1839 }, { "epoch": 0.22607199901707828, "grad_norm": 1.6261293011707636, "learning_rate": 2.8571035892923333e-05, "loss": 0.6324, "step": 1840 }, { "epoch": 0.22619486423393537, "grad_norm": 1.5098840129023414, "learning_rate": 2.8568294232624384e-05, "loss": 0.5987, "step": 1841 }, { "epoch": 0.22631772945079248, "grad_norm": 1.4585375332168942, "learning_rate": 2.856555007652571e-05, "loss": 0.7053, "step": 1842 }, { "epoch": 0.2264405946676496, "grad_norm": 1.6290144906544173, "learning_rate": 2.856280342513207e-05, "loss": 0.6671, "step": 1843 }, { "epoch": 0.22656345988450668, "grad_norm": 1.5708948869055401, "learning_rate": 2.8560054278948694e-05, "loss": 0.7438, "step": 1844 }, { "epoch": 0.2266863251013638, "grad_norm": 1.5181812268096109, "learning_rate": 2.855730263848128e-05, "loss": 0.7283, "step": 1845 }, { "epoch": 0.22680919031822092, "grad_norm": 1.5222937630607194, "learning_rate": 2.8554548504235963e-05, "loss": 0.6848, "step": 1846 }, { "epoch": 0.22693205553507803, "grad_norm": 1.63717186695256, "learning_rate": 2.8551791876719357e-05, "loss": 0.6274, "step": 1847 }, { "epoch": 0.22705492075193512, "grad_norm": 1.4794082949391054, "learning_rate": 2.8549032756438523e-05, "loss": 0.7562, "step": 1848 }, { "epoch": 0.22717778596879223, "grad_norm": 1.3567159547460232, "learning_rate": 2.8546271143900976e-05, "loss": 0.6093, "step": 1849 }, { "epoch": 0.22730065118564935, "grad_norm": 1.4085643840451179, "learning_rate": 2.8543507039614706e-05, "loss": 0.6321, "step": 1850 }, { "epoch": 0.22742351640250644, "grad_norm": 1.2530998619493356, "learning_rate": 2.8540740444088148e-05, "loss": 0.6336, "step": 1851 }, { "epoch": 0.22754638161936355, "grad_norm": 1.8706096307005218, "learning_rate": 2.8537971357830198e-05, "loss": 0.7, "step": 1852 }, { "epoch": 0.22766924683622067, "grad_norm": 1.4380390698268792, "learning_rate": 2.853519978135022e-05, "loss": 0.7924, "step": 1853 }, { "epoch": 0.22779211205307778, "grad_norm": 1.4996711325471876, "learning_rate": 2.8532425715158018e-05, "loss": 0.665, "step": 1854 }, { "epoch": 0.22791497726993487, "grad_norm": 1.5077874358423065, "learning_rate": 2.8529649159763868e-05, "loss": 0.5998, "step": 1855 }, { "epoch": 0.228037842486792, "grad_norm": 1.4531640965162798, "learning_rate": 2.852687011567849e-05, "loss": 0.6146, "step": 1856 }, { "epoch": 0.2281607077036491, "grad_norm": 1.659888400424189, "learning_rate": 2.852408858341309e-05, "loss": 0.774, "step": 1857 }, { "epoch": 0.22828357292050622, "grad_norm": 1.4286494053277208, "learning_rate": 2.8521304563479295e-05, "loss": 0.6878, "step": 1858 }, { "epoch": 0.2284064381373633, "grad_norm": 1.4015496633660385, "learning_rate": 2.8518518056389217e-05, "loss": 0.562, "step": 1859 }, { "epoch": 0.22852930335422042, "grad_norm": 1.4366999349835166, "learning_rate": 2.851572906265541e-05, "loss": 0.7344, "step": 1860 }, { "epoch": 0.22865216857107754, "grad_norm": 1.2157170599344538, "learning_rate": 2.8512937582790896e-05, "loss": 0.6858, "step": 1861 }, { "epoch": 0.22877503378793462, "grad_norm": 1.5036358484362105, "learning_rate": 2.851014361730915e-05, "loss": 0.6228, "step": 1862 }, { "epoch": 0.22889789900479174, "grad_norm": 1.3508153229437634, "learning_rate": 2.85073471667241e-05, "loss": 0.7308, "step": 1863 }, { "epoch": 0.22902076422164885, "grad_norm": 2.001244753692986, "learning_rate": 2.8504548231550143e-05, "loss": 0.8058, "step": 1864 }, { "epoch": 0.22914362943850597, "grad_norm": 1.2961830146593696, "learning_rate": 2.850174681230211e-05, "loss": 0.6452, "step": 1865 }, { "epoch": 0.22926649465536306, "grad_norm": 1.516515852490975, "learning_rate": 2.8498942909495316e-05, "loss": 0.5926, "step": 1866 }, { "epoch": 0.22938935987222017, "grad_norm": 2.5403673589903, "learning_rate": 2.849613652364552e-05, "loss": 0.8644, "step": 1867 }, { "epoch": 0.2295122250890773, "grad_norm": 1.2829369270450584, "learning_rate": 2.8493327655268934e-05, "loss": 0.6448, "step": 1868 }, { "epoch": 0.2296350903059344, "grad_norm": 1.6178841583904429, "learning_rate": 2.8490516304882233e-05, "loss": 0.809, "step": 1869 }, { "epoch": 0.2297579555227915, "grad_norm": 1.510433625584689, "learning_rate": 2.8487702473002548e-05, "loss": 0.755, "step": 1870 }, { "epoch": 0.2298808207396486, "grad_norm": 4.370268235241678, "learning_rate": 2.8484886160147463e-05, "loss": 0.7278, "step": 1871 }, { "epoch": 0.23000368595650572, "grad_norm": 1.4465108565121152, "learning_rate": 2.8482067366835017e-05, "loss": 0.6684, "step": 1872 }, { "epoch": 0.2301265511733628, "grad_norm": 1.887210192886744, "learning_rate": 2.847924609358372e-05, "loss": 0.5999, "step": 1873 }, { "epoch": 0.23024941639021992, "grad_norm": 1.4057346451767663, "learning_rate": 2.8476422340912517e-05, "loss": 0.6011, "step": 1874 }, { "epoch": 0.23037228160707704, "grad_norm": 1.7446034416496725, "learning_rate": 2.8473596109340824e-05, "loss": 0.7953, "step": 1875 }, { "epoch": 0.23049514682393415, "grad_norm": 1.3633189839077127, "learning_rate": 2.8470767399388505e-05, "loss": 0.7228, "step": 1876 }, { "epoch": 0.23061801204079124, "grad_norm": 1.7754713428117912, "learning_rate": 2.846793621157588e-05, "loss": 0.8221, "step": 1877 }, { "epoch": 0.23074087725764836, "grad_norm": 2.2716535844899033, "learning_rate": 2.8465102546423734e-05, "loss": 0.6776, "step": 1878 }, { "epoch": 0.23086374247450547, "grad_norm": 1.7960038709897752, "learning_rate": 2.84622664044533e-05, "loss": 0.6547, "step": 1879 }, { "epoch": 0.2309866076913626, "grad_norm": 1.496169058963142, "learning_rate": 2.845942778618627e-05, "loss": 0.5122, "step": 1880 }, { "epoch": 0.23110947290821968, "grad_norm": 1.287323081903452, "learning_rate": 2.8456586692144783e-05, "loss": 0.6124, "step": 1881 }, { "epoch": 0.2312323381250768, "grad_norm": 1.4827077860582607, "learning_rate": 2.845374312285144e-05, "loss": 0.7361, "step": 1882 }, { "epoch": 0.2313552033419339, "grad_norm": 1.6143459011730577, "learning_rate": 2.8450897078829305e-05, "loss": 0.7147, "step": 1883 }, { "epoch": 0.231478068558791, "grad_norm": 1.6004899107365949, "learning_rate": 2.8448048560601882e-05, "loss": 0.6763, "step": 1884 }, { "epoch": 0.2316009337756481, "grad_norm": 1.2148936782904822, "learning_rate": 2.844519756869314e-05, "loss": 0.6314, "step": 1885 }, { "epoch": 0.23172379899250523, "grad_norm": 1.329826147481339, "learning_rate": 2.8442344103627502e-05, "loss": 0.6754, "step": 1886 }, { "epoch": 0.23184666420936234, "grad_norm": 1.3399048439164958, "learning_rate": 2.843948816592984e-05, "loss": 0.5972, "step": 1887 }, { "epoch": 0.23196952942621943, "grad_norm": 1.7135085008999384, "learning_rate": 2.8436629756125486e-05, "loss": 0.6374, "step": 1888 }, { "epoch": 0.23209239464307654, "grad_norm": 1.258186454586413, "learning_rate": 2.8433768874740236e-05, "loss": 0.6523, "step": 1889 }, { "epoch": 0.23221525985993366, "grad_norm": 1.5234352936862412, "learning_rate": 2.843090552230032e-05, "loss": 0.6393, "step": 1890 }, { "epoch": 0.23233812507679075, "grad_norm": 1.4465023307084601, "learning_rate": 2.8428039699332427e-05, "loss": 0.6297, "step": 1891 }, { "epoch": 0.23246099029364786, "grad_norm": 1.2853270720512924, "learning_rate": 2.8425171406363722e-05, "loss": 0.5533, "step": 1892 }, { "epoch": 0.23258385551050498, "grad_norm": 1.8448448951737586, "learning_rate": 2.8422300643921806e-05, "loss": 0.7058, "step": 1893 }, { "epoch": 0.2327067207273621, "grad_norm": 1.4982710264026378, "learning_rate": 2.8419427412534727e-05, "loss": 0.6527, "step": 1894 }, { "epoch": 0.23282958594421918, "grad_norm": 1.2485865223947104, "learning_rate": 2.841655171273101e-05, "loss": 0.6914, "step": 1895 }, { "epoch": 0.2329524511610763, "grad_norm": 1.3923614941702425, "learning_rate": 2.8413673545039608e-05, "loss": 0.6072, "step": 1896 }, { "epoch": 0.2330753163779334, "grad_norm": 1.404476769690423, "learning_rate": 2.841079290998995e-05, "loss": 0.7326, "step": 1897 }, { "epoch": 0.23319818159479053, "grad_norm": 1.2852237261488053, "learning_rate": 2.840790980811191e-05, "loss": 0.6571, "step": 1898 }, { "epoch": 0.23332104681164761, "grad_norm": 1.3972692087599041, "learning_rate": 2.8405024239935813e-05, "loss": 0.729, "step": 1899 }, { "epoch": 0.23344391202850473, "grad_norm": 1.4864621357061747, "learning_rate": 2.840213620599244e-05, "loss": 0.5903, "step": 1900 }, { "epoch": 0.23356677724536185, "grad_norm": 1.3152534089536692, "learning_rate": 2.839924570681303e-05, "loss": 0.5965, "step": 1901 }, { "epoch": 0.23368964246221893, "grad_norm": 1.485620835511369, "learning_rate": 2.839635274292927e-05, "loss": 0.7124, "step": 1902 }, { "epoch": 0.23381250767907605, "grad_norm": 1.551200356633622, "learning_rate": 2.83934573148733e-05, "loss": 0.665, "step": 1903 }, { "epoch": 0.23393537289593316, "grad_norm": 1.4198346625122482, "learning_rate": 2.8390559423177718e-05, "loss": 0.6506, "step": 1904 }, { "epoch": 0.23405823811279028, "grad_norm": 1.4235842540004153, "learning_rate": 2.8387659068375566e-05, "loss": 0.7081, "step": 1905 }, { "epoch": 0.23418110332964737, "grad_norm": 1.4344891158873314, "learning_rate": 2.8384756251000354e-05, "loss": 0.6662, "step": 1906 }, { "epoch": 0.23430396854650448, "grad_norm": 1.5332555586173227, "learning_rate": 2.8381850971586035e-05, "loss": 0.66, "step": 1907 }, { "epoch": 0.2344268337633616, "grad_norm": 1.2967297594161404, "learning_rate": 2.8378943230667012e-05, "loss": 0.6624, "step": 1908 }, { "epoch": 0.2345496989802187, "grad_norm": 1.359915295372671, "learning_rate": 2.8376033028778145e-05, "loss": 0.6995, "step": 1909 }, { "epoch": 0.2346725641970758, "grad_norm": 1.3942555135273065, "learning_rate": 2.8373120366454753e-05, "loss": 0.6181, "step": 1910 }, { "epoch": 0.23479542941393292, "grad_norm": 1.269484709785803, "learning_rate": 2.8370205244232598e-05, "loss": 0.5707, "step": 1911 }, { "epoch": 0.23491829463079003, "grad_norm": 1.3514178140455484, "learning_rate": 2.8367287662647894e-05, "loss": 0.6359, "step": 1912 }, { "epoch": 0.23504115984764712, "grad_norm": 1.3466151119197496, "learning_rate": 2.8364367622237314e-05, "loss": 0.6952, "step": 1913 }, { "epoch": 0.23516402506450423, "grad_norm": 1.4843714452836088, "learning_rate": 2.8361445123537982e-05, "loss": 0.5425, "step": 1914 }, { "epoch": 0.23528689028136135, "grad_norm": 1.4956555371305023, "learning_rate": 2.8358520167087472e-05, "loss": 0.646, "step": 1915 }, { "epoch": 0.23540975549821846, "grad_norm": 1.4511761513252135, "learning_rate": 2.835559275342381e-05, "loss": 0.6671, "step": 1916 }, { "epoch": 0.23553262071507555, "grad_norm": 1.4228126773926266, "learning_rate": 2.8352662883085475e-05, "loss": 0.7286, "step": 1917 }, { "epoch": 0.23565548593193267, "grad_norm": 1.4820952365994307, "learning_rate": 2.8349730556611394e-05, "loss": 0.6744, "step": 1918 }, { "epoch": 0.23577835114878978, "grad_norm": 1.5351919469200723, "learning_rate": 2.8346795774540958e-05, "loss": 0.6479, "step": 1919 }, { "epoch": 0.2359012163656469, "grad_norm": 1.37057691539937, "learning_rate": 2.8343858537414e-05, "loss": 0.6084, "step": 1920 }, { "epoch": 0.236024081582504, "grad_norm": 1.3661348280550993, "learning_rate": 2.8340918845770795e-05, "loss": 0.7809, "step": 1921 }, { "epoch": 0.2361469467993611, "grad_norm": 1.322116260135962, "learning_rate": 2.8337976700152087e-05, "loss": 0.6246, "step": 1922 }, { "epoch": 0.23626981201621822, "grad_norm": 1.381105141645205, "learning_rate": 2.833503210109907e-05, "loss": 0.6137, "step": 1923 }, { "epoch": 0.2363926772330753, "grad_norm": 1.208097543212404, "learning_rate": 2.8332085049153374e-05, "loss": 0.6599, "step": 1924 }, { "epoch": 0.23651554244993242, "grad_norm": 1.5428124159588716, "learning_rate": 2.8329135544857096e-05, "loss": 0.6494, "step": 1925 }, { "epoch": 0.23663840766678954, "grad_norm": 1.313583034498013, "learning_rate": 2.8326183588752778e-05, "loss": 0.582, "step": 1926 }, { "epoch": 0.23676127288364665, "grad_norm": 1.5473375659027344, "learning_rate": 2.832322918138341e-05, "loss": 0.598, "step": 1927 }, { "epoch": 0.23688413810050374, "grad_norm": 1.7003842564348794, "learning_rate": 2.8320272323292443e-05, "loss": 0.6766, "step": 1928 }, { "epoch": 0.23700700331736085, "grad_norm": 1.4806989926966707, "learning_rate": 2.8317313015023762e-05, "loss": 0.6477, "step": 1929 }, { "epoch": 0.23712986853421797, "grad_norm": 1.853546081859132, "learning_rate": 2.8314351257121724e-05, "loss": 0.7146, "step": 1930 }, { "epoch": 0.23725273375107508, "grad_norm": 1.558019557252877, "learning_rate": 2.8311387050131112e-05, "loss": 0.6598, "step": 1931 }, { "epoch": 0.23737559896793217, "grad_norm": 1.4023278675571245, "learning_rate": 2.830842039459718e-05, "loss": 0.7657, "step": 1932 }, { "epoch": 0.2374984641847893, "grad_norm": 1.468190300270811, "learning_rate": 2.830545129106563e-05, "loss": 0.6496, "step": 1933 }, { "epoch": 0.2376213294016464, "grad_norm": 1.379582875261381, "learning_rate": 2.83024797400826e-05, "loss": 0.5525, "step": 1934 }, { "epoch": 0.2377441946185035, "grad_norm": 1.7511789748946138, "learning_rate": 2.8299505742194693e-05, "loss": 0.6082, "step": 1935 }, { "epoch": 0.2378670598353606, "grad_norm": 1.5702784966862657, "learning_rate": 2.829652929794895e-05, "loss": 0.7082, "step": 1936 }, { "epoch": 0.23798992505221772, "grad_norm": 1.4967315420014118, "learning_rate": 2.829355040789288e-05, "loss": 0.7402, "step": 1937 }, { "epoch": 0.23811279026907484, "grad_norm": 1.437117788317812, "learning_rate": 2.829056907257442e-05, "loss": 0.778, "step": 1938 }, { "epoch": 0.23823565548593192, "grad_norm": 1.2239769140368264, "learning_rate": 2.8287585292541977e-05, "loss": 0.6213, "step": 1939 }, { "epoch": 0.23835852070278904, "grad_norm": 1.4129269706960363, "learning_rate": 2.828459906834439e-05, "loss": 0.607, "step": 1940 }, { "epoch": 0.23848138591964615, "grad_norm": 1.4626347584386912, "learning_rate": 2.828161040053096e-05, "loss": 0.7302, "step": 1941 }, { "epoch": 0.23860425113650324, "grad_norm": 1.3770035681366322, "learning_rate": 2.8278619289651423e-05, "loss": 0.7243, "step": 1942 }, { "epoch": 0.23872711635336036, "grad_norm": 1.3957130521888303, "learning_rate": 2.827562573625599e-05, "loss": 0.677, "step": 1943 }, { "epoch": 0.23884998157021747, "grad_norm": 1.5511831974565842, "learning_rate": 2.8272629740895294e-05, "loss": 0.7573, "step": 1944 }, { "epoch": 0.2389728467870746, "grad_norm": 1.519656144361857, "learning_rate": 2.8269631304120433e-05, "loss": 0.7558, "step": 1945 }, { "epoch": 0.23909571200393168, "grad_norm": 1.2579960576983111, "learning_rate": 2.8266630426482956e-05, "loss": 0.6597, "step": 1946 }, { "epoch": 0.2392185772207888, "grad_norm": 1.7786790968784365, "learning_rate": 2.8263627108534843e-05, "loss": 0.6336, "step": 1947 }, { "epoch": 0.2393414424376459, "grad_norm": 1.4270768300179948, "learning_rate": 2.826062135082854e-05, "loss": 0.6777, "step": 1948 }, { "epoch": 0.23946430765450302, "grad_norm": 1.466380685865264, "learning_rate": 2.8257613153916946e-05, "loss": 0.6341, "step": 1949 }, { "epoch": 0.2395871728713601, "grad_norm": 1.3061907239609516, "learning_rate": 2.8254602518353384e-05, "loss": 0.66, "step": 1950 }, { "epoch": 0.23971003808821723, "grad_norm": 1.2442035326962946, "learning_rate": 2.825158944469165e-05, "loss": 0.6808, "step": 1951 }, { "epoch": 0.23983290330507434, "grad_norm": 1.6694452677742735, "learning_rate": 2.8248573933485977e-05, "loss": 0.6226, "step": 1952 }, { "epoch": 0.23995576852193143, "grad_norm": 1.6442217807427004, "learning_rate": 2.824555598529105e-05, "loss": 0.6075, "step": 1953 }, { "epoch": 0.24007863373878854, "grad_norm": 1.4546350857553723, "learning_rate": 2.8242535600662e-05, "loss": 0.5795, "step": 1954 }, { "epoch": 0.24020149895564566, "grad_norm": 1.2840767694102855, "learning_rate": 2.8239512780154406e-05, "loss": 0.6792, "step": 1955 }, { "epoch": 0.24032436417250277, "grad_norm": 1.277044929046929, "learning_rate": 2.8236487524324298e-05, "loss": 0.6443, "step": 1956 }, { "epoch": 0.24044722938935986, "grad_norm": 1.6442892528511088, "learning_rate": 2.823345983372815e-05, "loss": 0.8002, "step": 1957 }, { "epoch": 0.24057009460621698, "grad_norm": 1.3335844209295504, "learning_rate": 2.8230429708922886e-05, "loss": 0.7032, "step": 1958 }, { "epoch": 0.2406929598230741, "grad_norm": 1.5505960780571655, "learning_rate": 2.8227397150465884e-05, "loss": 0.6724, "step": 1959 }, { "epoch": 0.2408158250399312, "grad_norm": 1.498695084337685, "learning_rate": 2.8224362158914958e-05, "loss": 0.7534, "step": 1960 }, { "epoch": 0.2409386902567883, "grad_norm": 1.1470341279046108, "learning_rate": 2.8221324734828376e-05, "loss": 0.7165, "step": 1961 }, { "epoch": 0.2410615554736454, "grad_norm": 1.475911707098852, "learning_rate": 2.8218284878764848e-05, "loss": 0.6775, "step": 1962 }, { "epoch": 0.24118442069050253, "grad_norm": 1.3115133290906498, "learning_rate": 2.8215242591283545e-05, "loss": 0.676, "step": 1963 }, { "epoch": 0.24130728590735961, "grad_norm": 1.2209383075535614, "learning_rate": 2.8212197872944067e-05, "loss": 0.7392, "step": 1964 }, { "epoch": 0.24143015112421673, "grad_norm": 1.6089871437004832, "learning_rate": 2.820915072430648e-05, "loss": 0.7093, "step": 1965 }, { "epoch": 0.24155301634107385, "grad_norm": 1.6515281515423277, "learning_rate": 2.8206101145931275e-05, "loss": 0.6113, "step": 1966 }, { "epoch": 0.24167588155793096, "grad_norm": 1.4162345428042113, "learning_rate": 2.8203049138379415e-05, "loss": 0.5544, "step": 1967 }, { "epoch": 0.24179874677478805, "grad_norm": 1.3971084905694233, "learning_rate": 2.8199994702212287e-05, "loss": 0.7639, "step": 1968 }, { "epoch": 0.24192161199164516, "grad_norm": 1.3855702449642475, "learning_rate": 2.819693783799174e-05, "loss": 0.6513, "step": 1969 }, { "epoch": 0.24204447720850228, "grad_norm": 2.190686437100372, "learning_rate": 2.8193878546280067e-05, "loss": 0.7788, "step": 1970 }, { "epoch": 0.2421673424253594, "grad_norm": 1.5240940782536632, "learning_rate": 2.8190816827639994e-05, "loss": 0.7127, "step": 1971 }, { "epoch": 0.24229020764221648, "grad_norm": 1.438662761978373, "learning_rate": 2.8187752682634715e-05, "loss": 0.6844, "step": 1972 }, { "epoch": 0.2424130728590736, "grad_norm": 1.5707932477258382, "learning_rate": 2.818468611182785e-05, "loss": 0.7183, "step": 1973 }, { "epoch": 0.2425359380759307, "grad_norm": 1.3053956466734793, "learning_rate": 2.8181617115783483e-05, "loss": 0.5957, "step": 1974 }, { "epoch": 0.2426588032927878, "grad_norm": 1.4710345098565927, "learning_rate": 2.8178545695066137e-05, "loss": 0.6912, "step": 1975 }, { "epoch": 0.24278166850964492, "grad_norm": 1.7154406904606465, "learning_rate": 2.817547185024077e-05, "loss": 0.6374, "step": 1976 }, { "epoch": 0.24290453372650203, "grad_norm": 1.5493212875778228, "learning_rate": 2.8172395581872808e-05, "loss": 0.6356, "step": 1977 }, { "epoch": 0.24302739894335915, "grad_norm": 1.6100947857834758, "learning_rate": 2.81693168905281e-05, "loss": 0.8758, "step": 1978 }, { "epoch": 0.24315026416021623, "grad_norm": 1.399459702540571, "learning_rate": 2.816623577677296e-05, "loss": 0.6635, "step": 1979 }, { "epoch": 0.24327312937707335, "grad_norm": 1.7494671086516875, "learning_rate": 2.8163152241174133e-05, "loss": 0.714, "step": 1980 }, { "epoch": 0.24339599459393046, "grad_norm": 1.2564794400626866, "learning_rate": 2.816006628429882e-05, "loss": 0.5881, "step": 1981 }, { "epoch": 0.24351885981078755, "grad_norm": 1.9919130720505156, "learning_rate": 2.8156977906714657e-05, "loss": 0.6083, "step": 1982 }, { "epoch": 0.24364172502764467, "grad_norm": 1.3745146721708865, "learning_rate": 2.8153887108989734e-05, "loss": 0.6943, "step": 1983 }, { "epoch": 0.24376459024450178, "grad_norm": 1.268344795727946, "learning_rate": 2.8150793891692582e-05, "loss": 0.6086, "step": 1984 }, { "epoch": 0.2438874554613589, "grad_norm": 1.3449328304993349, "learning_rate": 2.8147698255392183e-05, "loss": 0.6589, "step": 1985 }, { "epoch": 0.244010320678216, "grad_norm": 1.439757763112672, "learning_rate": 2.8144600200657953e-05, "loss": 0.641, "step": 1986 }, { "epoch": 0.2441331858950731, "grad_norm": 1.762316814535619, "learning_rate": 2.8141499728059765e-05, "loss": 0.7405, "step": 1987 }, { "epoch": 0.24425605111193022, "grad_norm": 1.25115716754034, "learning_rate": 2.8138396838167925e-05, "loss": 0.6958, "step": 1988 }, { "epoch": 0.24437891632878733, "grad_norm": 1.6456221819877082, "learning_rate": 2.8135291531553192e-05, "loss": 0.6639, "step": 1989 }, { "epoch": 0.24450178154564442, "grad_norm": 1.382825629217359, "learning_rate": 2.8132183808786772e-05, "loss": 0.5401, "step": 1990 }, { "epoch": 0.24462464676250154, "grad_norm": 1.786367532191298, "learning_rate": 2.8129073670440297e-05, "loss": 0.7728, "step": 1991 }, { "epoch": 0.24474751197935865, "grad_norm": 1.3163113255729968, "learning_rate": 2.812596111708587e-05, "loss": 0.7267, "step": 1992 }, { "epoch": 0.24487037719621574, "grad_norm": 1.4706158448855873, "learning_rate": 2.8122846149296025e-05, "loss": 0.5809, "step": 1993 }, { "epoch": 0.24499324241307285, "grad_norm": 1.4015651795879798, "learning_rate": 2.8119728767643725e-05, "loss": 0.6112, "step": 1994 }, { "epoch": 0.24511610762992997, "grad_norm": 1.1622595536295528, "learning_rate": 2.8116608972702414e-05, "loss": 0.6915, "step": 1995 }, { "epoch": 0.24523897284678708, "grad_norm": 1.3986387553198485, "learning_rate": 2.811348676504594e-05, "loss": 0.5903, "step": 1996 }, { "epoch": 0.24536183806364417, "grad_norm": 1.3818424229418955, "learning_rate": 2.8110362145248617e-05, "loss": 0.6585, "step": 1997 }, { "epoch": 0.2454847032805013, "grad_norm": 1.4204339916761488, "learning_rate": 2.8107235113885206e-05, "loss": 0.7314, "step": 1998 }, { "epoch": 0.2456075684973584, "grad_norm": 1.3194562893888564, "learning_rate": 2.81041056715309e-05, "loss": 0.7335, "step": 1999 }, { "epoch": 0.24573043371421552, "grad_norm": 1.3313819364191601, "learning_rate": 2.8100973818761332e-05, "loss": 0.6538, "step": 2000 }, { "epoch": 0.2458532989310726, "grad_norm": 1.3209600194662847, "learning_rate": 2.80978395561526e-05, "loss": 0.566, "step": 2001 }, { "epoch": 0.24597616414792972, "grad_norm": 1.6123311151225557, "learning_rate": 2.8094702884281224e-05, "loss": 0.7222, "step": 2002 }, { "epoch": 0.24609902936478684, "grad_norm": 1.6141413148952697, "learning_rate": 2.8091563803724172e-05, "loss": 0.7919, "step": 2003 }, { "epoch": 0.24622189458164392, "grad_norm": 1.4805022083182833, "learning_rate": 2.808842231505886e-05, "loss": 0.742, "step": 2004 }, { "epoch": 0.24634475979850104, "grad_norm": 1.3726328526200597, "learning_rate": 2.8085278418863142e-05, "loss": 0.6494, "step": 2005 }, { "epoch": 0.24646762501535815, "grad_norm": 1.5902594275810606, "learning_rate": 2.8082132115715323e-05, "loss": 0.6231, "step": 2006 }, { "epoch": 0.24659049023221527, "grad_norm": 1.4146670874059721, "learning_rate": 2.8078983406194142e-05, "loss": 0.6781, "step": 2007 }, { "epoch": 0.24671335544907236, "grad_norm": 1.4037907866357373, "learning_rate": 2.8075832290878782e-05, "loss": 0.7053, "step": 2008 }, { "epoch": 0.24683622066592947, "grad_norm": 1.3838576241359217, "learning_rate": 2.8072678770348876e-05, "loss": 0.6959, "step": 2009 }, { "epoch": 0.2469590858827866, "grad_norm": 1.3256960909047104, "learning_rate": 2.8069522845184484e-05, "loss": 0.7072, "step": 2010 }, { "epoch": 0.2470819510996437, "grad_norm": 1.6990816204286645, "learning_rate": 2.8066364515966126e-05, "loss": 0.5782, "step": 2011 }, { "epoch": 0.2472048163165008, "grad_norm": 1.5760441863662125, "learning_rate": 2.8063203783274755e-05, "loss": 0.6522, "step": 2012 }, { "epoch": 0.2473276815333579, "grad_norm": 1.3017445847462996, "learning_rate": 2.8060040647691765e-05, "loss": 0.7389, "step": 2013 }, { "epoch": 0.24745054675021502, "grad_norm": 1.5322655558793852, "learning_rate": 2.8056875109798994e-05, "loss": 0.6701, "step": 2014 }, { "epoch": 0.2475734119670721, "grad_norm": 1.3239277810471157, "learning_rate": 2.8053707170178726e-05, "loss": 0.5927, "step": 2015 }, { "epoch": 0.24769627718392923, "grad_norm": 1.7122023756727283, "learning_rate": 2.8050536829413676e-05, "loss": 0.6685, "step": 2016 }, { "epoch": 0.24781914240078634, "grad_norm": 1.379929756253379, "learning_rate": 2.8047364088087015e-05, "loss": 0.672, "step": 2017 }, { "epoch": 0.24794200761764346, "grad_norm": 1.4672385508645174, "learning_rate": 2.8044188946782344e-05, "loss": 0.6875, "step": 2018 }, { "epoch": 0.24806487283450054, "grad_norm": 1.2818332175213603, "learning_rate": 2.804101140608371e-05, "loss": 0.6817, "step": 2019 }, { "epoch": 0.24818773805135766, "grad_norm": 1.3443159922526535, "learning_rate": 2.8037831466575603e-05, "loss": 0.6257, "step": 2020 }, { "epoch": 0.24831060326821477, "grad_norm": 1.3684341436030807, "learning_rate": 2.8034649128842952e-05, "loss": 0.6237, "step": 2021 }, { "epoch": 0.2484334684850719, "grad_norm": 1.521236477562804, "learning_rate": 2.8031464393471126e-05, "loss": 0.765, "step": 2022 }, { "epoch": 0.24855633370192898, "grad_norm": 1.2700414537613245, "learning_rate": 2.8028277261045933e-05, "loss": 0.5835, "step": 2023 }, { "epoch": 0.2486791989187861, "grad_norm": 1.3247446259924713, "learning_rate": 2.8025087732153634e-05, "loss": 0.6208, "step": 2024 }, { "epoch": 0.2488020641356432, "grad_norm": 1.7381089541706063, "learning_rate": 2.802189580738092e-05, "loss": 0.6529, "step": 2025 }, { "epoch": 0.2489249293525003, "grad_norm": 1.5512176696744036, "learning_rate": 2.8018701487314917e-05, "loss": 0.7041, "step": 2026 }, { "epoch": 0.2490477945693574, "grad_norm": 1.1850654755163519, "learning_rate": 2.8015504772543204e-05, "loss": 0.5769, "step": 2027 }, { "epoch": 0.24917065978621453, "grad_norm": 1.6554833465691456, "learning_rate": 2.8012305663653797e-05, "loss": 0.6996, "step": 2028 }, { "epoch": 0.24929352500307164, "grad_norm": 1.319000723517553, "learning_rate": 2.800910416123515e-05, "loss": 0.6831, "step": 2029 }, { "epoch": 0.24941639021992873, "grad_norm": 1.3765260986952075, "learning_rate": 2.8005900265876167e-05, "loss": 0.6499, "step": 2030 }, { "epoch": 0.24953925543678585, "grad_norm": 1.4254504164566582, "learning_rate": 2.8002693978166166e-05, "loss": 0.6654, "step": 2031 }, { "epoch": 0.24966212065364296, "grad_norm": 1.5581203637553405, "learning_rate": 2.799948529869494e-05, "loss": 0.6108, "step": 2032 }, { "epoch": 0.24978498587050005, "grad_norm": 1.7270384715397098, "learning_rate": 2.7996274228052698e-05, "loss": 0.6546, "step": 2033 }, { "epoch": 0.24990785108735716, "grad_norm": 1.5185268134010126, "learning_rate": 2.7993060766830093e-05, "loss": 0.703, "step": 2034 }, { "epoch": 0.25003071630421425, "grad_norm": 1.4411454688133087, "learning_rate": 2.7989844915618226e-05, "loss": 0.6304, "step": 2035 }, { "epoch": 0.25015358152107137, "grad_norm": 1.2515171408718433, "learning_rate": 2.7986626675008625e-05, "loss": 0.5689, "step": 2036 }, { "epoch": 0.2502764467379285, "grad_norm": 1.448157319298629, "learning_rate": 2.7983406045593273e-05, "loss": 0.6073, "step": 2037 }, { "epoch": 0.2503993119547856, "grad_norm": 1.3144484869998798, "learning_rate": 2.7980183027964573e-05, "loss": 0.6572, "step": 2038 }, { "epoch": 0.2505221771716427, "grad_norm": 1.196309578451918, "learning_rate": 2.797695762271539e-05, "loss": 0.5874, "step": 2039 }, { "epoch": 0.25064504238849983, "grad_norm": 1.2855448807610967, "learning_rate": 2.7973729830439008e-05, "loss": 0.6337, "step": 2040 }, { "epoch": 0.25076790760535694, "grad_norm": 1.3613547160121815, "learning_rate": 2.797049965172916e-05, "loss": 0.6862, "step": 2041 }, { "epoch": 0.25089077282221406, "grad_norm": 1.4575532276815764, "learning_rate": 2.7967267087180018e-05, "loss": 0.6878, "step": 2042 }, { "epoch": 0.2510136380390711, "grad_norm": 1.3916449777980728, "learning_rate": 2.7964032137386192e-05, "loss": 0.6505, "step": 2043 }, { "epoch": 0.25113650325592823, "grad_norm": 1.3487525980286055, "learning_rate": 2.796079480294273e-05, "loss": 0.6936, "step": 2044 }, { "epoch": 0.25125936847278535, "grad_norm": 1.317822115541718, "learning_rate": 2.7957555084445114e-05, "loss": 0.5685, "step": 2045 }, { "epoch": 0.25138223368964246, "grad_norm": 1.3056679803764912, "learning_rate": 2.7954312982489278e-05, "loss": 0.6463, "step": 2046 }, { "epoch": 0.2515050989064996, "grad_norm": 1.3143736319303578, "learning_rate": 2.7951068497671582e-05, "loss": 0.6001, "step": 2047 }, { "epoch": 0.2516279641233567, "grad_norm": 1.5302056586043324, "learning_rate": 2.794782163058882e-05, "loss": 0.7196, "step": 2048 }, { "epoch": 0.2517508293402138, "grad_norm": 1.4517710429408015, "learning_rate": 2.794457238183824e-05, "loss": 0.6671, "step": 2049 }, { "epoch": 0.25187369455707087, "grad_norm": 1.4189706665823618, "learning_rate": 2.7941320752017522e-05, "loss": 0.5962, "step": 2050 }, { "epoch": 0.251996559773928, "grad_norm": 1.409911880614214, "learning_rate": 2.793806674172478e-05, "loss": 0.6532, "step": 2051 }, { "epoch": 0.2521194249907851, "grad_norm": 1.4355712605246402, "learning_rate": 2.7934810351558565e-05, "loss": 0.6702, "step": 2052 }, { "epoch": 0.2522422902076422, "grad_norm": 1.3680268738511183, "learning_rate": 2.7931551582117868e-05, "loss": 0.6984, "step": 2053 }, { "epoch": 0.25236515542449933, "grad_norm": 1.5158394993907192, "learning_rate": 2.7928290434002122e-05, "loss": 0.7366, "step": 2054 }, { "epoch": 0.25248802064135645, "grad_norm": 1.3276417349823233, "learning_rate": 2.79250269078112e-05, "loss": 0.647, "step": 2055 }, { "epoch": 0.25261088585821356, "grad_norm": 1.1892433636539788, "learning_rate": 2.7921761004145397e-05, "loss": 0.618, "step": 2056 }, { "epoch": 0.2527337510750706, "grad_norm": 1.2995502616887598, "learning_rate": 2.7918492723605453e-05, "loss": 0.6051, "step": 2057 }, { "epoch": 0.25285661629192774, "grad_norm": 1.4393118315165196, "learning_rate": 2.791522206679256e-05, "loss": 0.7341, "step": 2058 }, { "epoch": 0.25297948150878485, "grad_norm": 1.416351976417614, "learning_rate": 2.7911949034308318e-05, "loss": 0.6209, "step": 2059 }, { "epoch": 0.25310234672564197, "grad_norm": 1.4101241333228298, "learning_rate": 2.7908673626754794e-05, "loss": 0.5653, "step": 2060 }, { "epoch": 0.2532252119424991, "grad_norm": 1.2497521245860952, "learning_rate": 2.7905395844734468e-05, "loss": 0.5654, "step": 2061 }, { "epoch": 0.2533480771593562, "grad_norm": 1.7816117582628301, "learning_rate": 2.7902115688850272e-05, "loss": 0.6599, "step": 2062 }, { "epoch": 0.2534709423762133, "grad_norm": 1.5339936286451272, "learning_rate": 2.789883315970557e-05, "loss": 0.6931, "step": 2063 }, { "epoch": 0.2535938075930704, "grad_norm": 1.336525118968716, "learning_rate": 2.7895548257904157e-05, "loss": 0.6429, "step": 2064 }, { "epoch": 0.2537166728099275, "grad_norm": 1.5365976861474295, "learning_rate": 2.789226098405028e-05, "loss": 0.6349, "step": 2065 }, { "epoch": 0.2538395380267846, "grad_norm": 1.5516525587852044, "learning_rate": 2.7888971338748595e-05, "loss": 0.6247, "step": 2066 }, { "epoch": 0.2539624032436417, "grad_norm": 1.4106425222097636, "learning_rate": 2.7885679322604223e-05, "loss": 0.7662, "step": 2067 }, { "epoch": 0.25408526846049884, "grad_norm": 1.312450836229963, "learning_rate": 2.7882384936222703e-05, "loss": 0.7126, "step": 2068 }, { "epoch": 0.25420813367735595, "grad_norm": 1.4018332370863573, "learning_rate": 2.787908818021003e-05, "loss": 0.6215, "step": 2069 }, { "epoch": 0.25433099889421307, "grad_norm": 1.6019722504001375, "learning_rate": 2.78757890551726e-05, "loss": 0.7116, "step": 2070 }, { "epoch": 0.2544538641110702, "grad_norm": 1.4574848834263765, "learning_rate": 2.7872487561717277e-05, "loss": 0.7279, "step": 2071 }, { "epoch": 0.25457672932792724, "grad_norm": 1.4984013147583055, "learning_rate": 2.7869183700451352e-05, "loss": 0.626, "step": 2072 }, { "epoch": 0.25469959454478436, "grad_norm": 1.4312591911916634, "learning_rate": 2.7865877471982544e-05, "loss": 0.7502, "step": 2073 }, { "epoch": 0.2548224597616415, "grad_norm": 1.8394043855394422, "learning_rate": 2.786256887691901e-05, "loss": 0.8096, "step": 2074 }, { "epoch": 0.2549453249784986, "grad_norm": 1.6297681072444639, "learning_rate": 2.785925791586935e-05, "loss": 0.6899, "step": 2075 }, { "epoch": 0.2550681901953557, "grad_norm": 1.3894080562402324, "learning_rate": 2.785594458944259e-05, "loss": 0.6231, "step": 2076 }, { "epoch": 0.2551910554122128, "grad_norm": 1.2103984041485563, "learning_rate": 2.7852628898248203e-05, "loss": 0.6276, "step": 2077 }, { "epoch": 0.25531392062906993, "grad_norm": 1.881288732183288, "learning_rate": 2.7849310842896074e-05, "loss": 0.6726, "step": 2078 }, { "epoch": 0.255436785845927, "grad_norm": 1.3062621046781762, "learning_rate": 2.7845990423996548e-05, "loss": 0.6258, "step": 2079 }, { "epoch": 0.2555596510627841, "grad_norm": 1.4450225965009083, "learning_rate": 2.7842667642160394e-05, "loss": 0.6036, "step": 2080 }, { "epoch": 0.2556825162796412, "grad_norm": 1.387796452428006, "learning_rate": 2.7839342497998813e-05, "loss": 0.6142, "step": 2081 }, { "epoch": 0.25580538149649834, "grad_norm": 1.440237236019156, "learning_rate": 2.783601499212345e-05, "loss": 0.8645, "step": 2082 }, { "epoch": 0.25592824671335546, "grad_norm": 1.3175757504491177, "learning_rate": 2.783268512514637e-05, "loss": 0.5592, "step": 2083 }, { "epoch": 0.25605111193021257, "grad_norm": 1.3847895025094408, "learning_rate": 2.7829352897680087e-05, "loss": 0.6797, "step": 2084 }, { "epoch": 0.2561739771470697, "grad_norm": 1.2910575047563868, "learning_rate": 2.782601831033754e-05, "loss": 0.667, "step": 2085 }, { "epoch": 0.25629684236392675, "grad_norm": 2.77514937157323, "learning_rate": 2.7822681363732104e-05, "loss": 0.8667, "step": 2086 }, { "epoch": 0.25641970758078386, "grad_norm": 1.5265498536792226, "learning_rate": 2.7819342058477584e-05, "loss": 0.5974, "step": 2087 }, { "epoch": 0.256542572797641, "grad_norm": 1.4163444645459913, "learning_rate": 2.7816000395188232e-05, "loss": 0.6379, "step": 2088 }, { "epoch": 0.2566654380144981, "grad_norm": 1.6117640832817466, "learning_rate": 2.7812656374478723e-05, "loss": 0.6976, "step": 2089 }, { "epoch": 0.2567883032313552, "grad_norm": 1.5137109393877402, "learning_rate": 2.780930999696417e-05, "loss": 0.5938, "step": 2090 }, { "epoch": 0.2569111684482123, "grad_norm": 1.375929592031223, "learning_rate": 2.7805961263260108e-05, "loss": 0.6832, "step": 2091 }, { "epoch": 0.25703403366506944, "grad_norm": 1.7277477749128631, "learning_rate": 2.7802610173982523e-05, "loss": 0.599, "step": 2092 }, { "epoch": 0.2571568988819265, "grad_norm": 1.443486133800164, "learning_rate": 2.7799256729747825e-05, "loss": 0.5719, "step": 2093 }, { "epoch": 0.2572797640987836, "grad_norm": 1.1737349500311147, "learning_rate": 2.7795900931172856e-05, "loss": 0.5578, "step": 2094 }, { "epoch": 0.25740262931564073, "grad_norm": 2.219065701095662, "learning_rate": 2.7792542778874896e-05, "loss": 0.714, "step": 2095 }, { "epoch": 0.25752549453249785, "grad_norm": 1.546312990814226, "learning_rate": 2.778918227347166e-05, "loss": 0.7182, "step": 2096 }, { "epoch": 0.25764835974935496, "grad_norm": 1.398153311389245, "learning_rate": 2.778581941558128e-05, "loss": 0.5815, "step": 2097 }, { "epoch": 0.2577712249662121, "grad_norm": 1.6711550533205222, "learning_rate": 2.778245420582234e-05, "loss": 0.651, "step": 2098 }, { "epoch": 0.2578940901830692, "grad_norm": 1.4431149125973532, "learning_rate": 2.777908664481384e-05, "loss": 0.6646, "step": 2099 }, { "epoch": 0.2580169553999263, "grad_norm": 1.3542876554499295, "learning_rate": 2.7775716733175232e-05, "loss": 0.6063, "step": 2100 }, { "epoch": 0.25813982061678337, "grad_norm": 1.5285540148532393, "learning_rate": 2.7772344471526385e-05, "loss": 0.7424, "step": 2101 }, { "epoch": 0.2582626858336405, "grad_norm": 1.7274805340268136, "learning_rate": 2.77689698604876e-05, "loss": 0.7143, "step": 2102 }, { "epoch": 0.2583855510504976, "grad_norm": 1.5490892808878842, "learning_rate": 2.7765592900679622e-05, "loss": 0.565, "step": 2103 }, { "epoch": 0.2585084162673547, "grad_norm": 1.5019864981957478, "learning_rate": 2.7762213592723616e-05, "loss": 0.5823, "step": 2104 }, { "epoch": 0.25863128148421183, "grad_norm": 1.4459465352960252, "learning_rate": 2.7758831937241188e-05, "loss": 0.6863, "step": 2105 }, { "epoch": 0.25875414670106894, "grad_norm": 1.4372127389772587, "learning_rate": 2.775544793485437e-05, "loss": 0.6835, "step": 2106 }, { "epoch": 0.25887701191792606, "grad_norm": 1.454992340907955, "learning_rate": 2.775206158618562e-05, "loss": 0.7744, "step": 2107 }, { "epoch": 0.2589998771347831, "grad_norm": 1.5095289508936582, "learning_rate": 2.7748672891857847e-05, "loss": 0.6772, "step": 2108 }, { "epoch": 0.25912274235164023, "grad_norm": 1.3852078540421113, "learning_rate": 2.7745281852494373e-05, "loss": 0.6214, "step": 2109 }, { "epoch": 0.25924560756849735, "grad_norm": 1.444143182715382, "learning_rate": 2.7741888468718956e-05, "loss": 0.5929, "step": 2110 }, { "epoch": 0.25936847278535446, "grad_norm": 1.4715099296645295, "learning_rate": 2.773849274115579e-05, "loss": 0.6854, "step": 2111 }, { "epoch": 0.2594913380022116, "grad_norm": 1.4451012188790329, "learning_rate": 2.77350946704295e-05, "loss": 0.7399, "step": 2112 }, { "epoch": 0.2596142032190687, "grad_norm": 1.4149925137777644, "learning_rate": 2.7731694257165126e-05, "loss": 0.6749, "step": 2113 }, { "epoch": 0.2597370684359258, "grad_norm": 1.5725347801238525, "learning_rate": 2.7728291501988173e-05, "loss": 0.7158, "step": 2114 }, { "epoch": 0.25985993365278287, "grad_norm": 1.7063697810859408, "learning_rate": 2.7724886405524536e-05, "loss": 0.7486, "step": 2115 }, { "epoch": 0.25998279886964, "grad_norm": 1.399879803900202, "learning_rate": 2.7721478968400573e-05, "loss": 0.6344, "step": 2116 }, { "epoch": 0.2601056640864971, "grad_norm": 1.5124289611093618, "learning_rate": 2.771806919124305e-05, "loss": 0.739, "step": 2117 }, { "epoch": 0.2602285293033542, "grad_norm": 1.7801915748098678, "learning_rate": 2.771465707467918e-05, "loss": 0.6882, "step": 2118 }, { "epoch": 0.26035139452021133, "grad_norm": 1.382162167378409, "learning_rate": 2.7711242619336605e-05, "loss": 0.5842, "step": 2119 }, { "epoch": 0.26047425973706845, "grad_norm": 1.8503044524625418, "learning_rate": 2.7707825825843382e-05, "loss": 0.7363, "step": 2120 }, { "epoch": 0.26059712495392556, "grad_norm": 1.4958925788035258, "learning_rate": 2.770440669482801e-05, "loss": 0.7736, "step": 2121 }, { "epoch": 0.2607199901707827, "grad_norm": 1.3263634608897843, "learning_rate": 2.7700985226919415e-05, "loss": 0.7854, "step": 2122 }, { "epoch": 0.26084285538763974, "grad_norm": 1.5571784643491906, "learning_rate": 2.769756142274696e-05, "loss": 0.7648, "step": 2123 }, { "epoch": 0.26096572060449685, "grad_norm": 1.4602358756805132, "learning_rate": 2.769413528294043e-05, "loss": 0.6341, "step": 2124 }, { "epoch": 0.26108858582135397, "grad_norm": 1.419661500007916, "learning_rate": 2.7690706808130037e-05, "loss": 0.6322, "step": 2125 }, { "epoch": 0.2612114510382111, "grad_norm": 1.5114218457355253, "learning_rate": 2.7687275998946426e-05, "loss": 0.53, "step": 2126 }, { "epoch": 0.2613343162550682, "grad_norm": 1.6070198548888042, "learning_rate": 2.768384285602068e-05, "loss": 0.7168, "step": 2127 }, { "epoch": 0.2614571814719253, "grad_norm": 1.3979567047885573, "learning_rate": 2.7680407379984292e-05, "loss": 0.5876, "step": 2128 }, { "epoch": 0.26158004668878243, "grad_norm": 1.617948812139079, "learning_rate": 2.7676969571469207e-05, "loss": 0.7524, "step": 2129 }, { "epoch": 0.2617029119056395, "grad_norm": 1.296002999164522, "learning_rate": 2.7673529431107777e-05, "loss": 0.5363, "step": 2130 }, { "epoch": 0.2618257771224966, "grad_norm": 1.2524432238247738, "learning_rate": 2.7670086959532807e-05, "loss": 0.655, "step": 2131 }, { "epoch": 0.2619486423393537, "grad_norm": 1.562395168839403, "learning_rate": 2.7666642157377504e-05, "loss": 0.6504, "step": 2132 }, { "epoch": 0.26207150755621084, "grad_norm": 1.4467627738174942, "learning_rate": 2.766319502527552e-05, "loss": 0.8018, "step": 2133 }, { "epoch": 0.26219437277306795, "grad_norm": 1.433704280435961, "learning_rate": 2.765974556386094e-05, "loss": 0.68, "step": 2134 }, { "epoch": 0.26231723798992507, "grad_norm": 1.2775876661458148, "learning_rate": 2.7656293773768262e-05, "loss": 0.5918, "step": 2135 }, { "epoch": 0.2624401032067822, "grad_norm": 1.3281122365921707, "learning_rate": 2.7652839655632423e-05, "loss": 0.6307, "step": 2136 }, { "epoch": 0.26256296842363924, "grad_norm": 1.246159354741134, "learning_rate": 2.764938321008879e-05, "loss": 0.5968, "step": 2137 }, { "epoch": 0.26268583364049636, "grad_norm": 1.2280704652836798, "learning_rate": 2.7645924437773144e-05, "loss": 0.671, "step": 2138 }, { "epoch": 0.2628086988573535, "grad_norm": 1.3961619959640614, "learning_rate": 2.764246333932171e-05, "loss": 0.752, "step": 2139 }, { "epoch": 0.2629315640742106, "grad_norm": 1.7375098735610799, "learning_rate": 2.7638999915371137e-05, "loss": 0.7658, "step": 2140 }, { "epoch": 0.2630544292910677, "grad_norm": 1.5240563096135558, "learning_rate": 2.7635534166558495e-05, "loss": 0.6849, "step": 2141 }, { "epoch": 0.2631772945079248, "grad_norm": 1.1669348625132332, "learning_rate": 2.7632066093521283e-05, "loss": 0.5777, "step": 2142 }, { "epoch": 0.26330015972478193, "grad_norm": 1.3719422974200397, "learning_rate": 2.7628595696897443e-05, "loss": 0.6143, "step": 2143 }, { "epoch": 0.263423024941639, "grad_norm": 1.6675898314527708, "learning_rate": 2.7625122977325318e-05, "loss": 0.5698, "step": 2144 }, { "epoch": 0.2635458901584961, "grad_norm": 1.4751562852537439, "learning_rate": 2.76216479354437e-05, "loss": 0.748, "step": 2145 }, { "epoch": 0.2636687553753532, "grad_norm": 1.328346255953098, "learning_rate": 2.76181705718918e-05, "loss": 0.6594, "step": 2146 }, { "epoch": 0.26379162059221034, "grad_norm": 1.3707341599022775, "learning_rate": 2.7614690887309253e-05, "loss": 0.6703, "step": 2147 }, { "epoch": 0.26391448580906746, "grad_norm": 1.3637620678180664, "learning_rate": 2.7611208882336128e-05, "loss": 0.6051, "step": 2148 }, { "epoch": 0.26403735102592457, "grad_norm": 1.5101264853049854, "learning_rate": 2.760772455761291e-05, "loss": 0.6319, "step": 2149 }, { "epoch": 0.2641602162427817, "grad_norm": 1.532030865088229, "learning_rate": 2.7604237913780533e-05, "loss": 0.7999, "step": 2150 }, { "epoch": 0.2642830814596388, "grad_norm": 1.4965143879817313, "learning_rate": 2.7600748951480325e-05, "loss": 0.5871, "step": 2151 }, { "epoch": 0.26440594667649586, "grad_norm": 1.3505222969859052, "learning_rate": 2.7597257671354072e-05, "loss": 0.7339, "step": 2152 }, { "epoch": 0.264528811893353, "grad_norm": 1.32156674300675, "learning_rate": 2.7593764074043966e-05, "loss": 0.744, "step": 2153 }, { "epoch": 0.2646516771102101, "grad_norm": 1.3953137371066846, "learning_rate": 2.759026816019263e-05, "loss": 0.7325, "step": 2154 }, { "epoch": 0.2647745423270672, "grad_norm": 1.3244712960340375, "learning_rate": 2.7586769930443114e-05, "loss": 0.6372, "step": 2155 }, { "epoch": 0.2648974075439243, "grad_norm": 1.4241022912608328, "learning_rate": 2.7583269385438903e-05, "loss": 0.7049, "step": 2156 }, { "epoch": 0.26502027276078144, "grad_norm": 1.4476134837306067, "learning_rate": 2.7579766525823888e-05, "loss": 0.6748, "step": 2157 }, { "epoch": 0.26514313797763855, "grad_norm": 1.5122512829304895, "learning_rate": 2.7576261352242407e-05, "loss": 0.6322, "step": 2158 }, { "epoch": 0.2652660031944956, "grad_norm": 1.3709312143319423, "learning_rate": 2.757275386533921e-05, "loss": 0.5941, "step": 2159 }, { "epoch": 0.26538886841135273, "grad_norm": 1.3685914057260686, "learning_rate": 2.7569244065759478e-05, "loss": 0.5991, "step": 2160 }, { "epoch": 0.26551173362820985, "grad_norm": 1.4371741879832187, "learning_rate": 2.756573195414881e-05, "loss": 0.6453, "step": 2161 }, { "epoch": 0.26563459884506696, "grad_norm": 1.3399875365943785, "learning_rate": 2.7562217531153248e-05, "loss": 0.6373, "step": 2162 }, { "epoch": 0.2657574640619241, "grad_norm": 1.6362428532937372, "learning_rate": 2.7558700797419233e-05, "loss": 0.5986, "step": 2163 }, { "epoch": 0.2658803292787812, "grad_norm": 1.624435839372398, "learning_rate": 2.755518175359365e-05, "loss": 0.6351, "step": 2164 }, { "epoch": 0.2660031944956383, "grad_norm": 1.5894189883737795, "learning_rate": 2.7551660400323817e-05, "loss": 0.6283, "step": 2165 }, { "epoch": 0.26612605971249537, "grad_norm": 1.9640240637886188, "learning_rate": 2.7548136738257447e-05, "loss": 0.6784, "step": 2166 }, { "epoch": 0.2662489249293525, "grad_norm": 1.2648723256027277, "learning_rate": 2.7544610768042698e-05, "loss": 0.7275, "step": 2167 }, { "epoch": 0.2663717901462096, "grad_norm": 1.5243444890798117, "learning_rate": 2.754108249032816e-05, "loss": 0.5992, "step": 2168 }, { "epoch": 0.2664946553630667, "grad_norm": 1.369203628490957, "learning_rate": 2.753755190576283e-05, "loss": 0.6897, "step": 2169 }, { "epoch": 0.26661752057992383, "grad_norm": 1.4255366041217563, "learning_rate": 2.7534019014996132e-05, "loss": 0.7312, "step": 2170 }, { "epoch": 0.26674038579678094, "grad_norm": 1.3542019455930876, "learning_rate": 2.753048381867792e-05, "loss": 0.6616, "step": 2171 }, { "epoch": 0.26686325101363806, "grad_norm": 1.40536225955373, "learning_rate": 2.7526946317458474e-05, "loss": 0.6047, "step": 2172 }, { "epoch": 0.2669861162304952, "grad_norm": 1.5548983164370185, "learning_rate": 2.7523406511988497e-05, "loss": 0.6993, "step": 2173 }, { "epoch": 0.26710898144735223, "grad_norm": 1.3069629167553187, "learning_rate": 2.7519864402919108e-05, "loss": 0.6328, "step": 2174 }, { "epoch": 0.26723184666420935, "grad_norm": 1.249218018727622, "learning_rate": 2.7516319990901857e-05, "loss": 0.6451, "step": 2175 }, { "epoch": 0.26735471188106646, "grad_norm": 1.6327318951105683, "learning_rate": 2.751277327658871e-05, "loss": 0.6234, "step": 2176 }, { "epoch": 0.2674775770979236, "grad_norm": 1.280400376901837, "learning_rate": 2.750922426063207e-05, "loss": 0.6307, "step": 2177 }, { "epoch": 0.2676004423147807, "grad_norm": 1.18334924153182, "learning_rate": 2.7505672943684753e-05, "loss": 0.5466, "step": 2178 }, { "epoch": 0.2677233075316378, "grad_norm": 1.318705527983527, "learning_rate": 2.7502119326399997e-05, "loss": 0.6632, "step": 2179 }, { "epoch": 0.2678461727484949, "grad_norm": 1.5530553764696429, "learning_rate": 2.7498563409431475e-05, "loss": 0.6759, "step": 2180 }, { "epoch": 0.267969037965352, "grad_norm": 1.5468681472047394, "learning_rate": 2.7495005193433266e-05, "loss": 0.5698, "step": 2181 }, { "epoch": 0.2680919031822091, "grad_norm": 1.3373917501063668, "learning_rate": 2.749144467905989e-05, "loss": 0.6481, "step": 2182 }, { "epoch": 0.2682147683990662, "grad_norm": 1.347379878442477, "learning_rate": 2.7487881866966268e-05, "loss": 0.6385, "step": 2183 }, { "epoch": 0.26833763361592333, "grad_norm": 1.4894484085228086, "learning_rate": 2.7484316757807768e-05, "loss": 0.6736, "step": 2184 }, { "epoch": 0.26846049883278045, "grad_norm": 1.3940801785878614, "learning_rate": 2.7480749352240166e-05, "loss": 0.5683, "step": 2185 }, { "epoch": 0.26858336404963756, "grad_norm": 1.253628126026695, "learning_rate": 2.7477179650919654e-05, "loss": 0.6374, "step": 2186 }, { "epoch": 0.2687062292664947, "grad_norm": 1.2026531826786953, "learning_rate": 2.7473607654502866e-05, "loss": 0.6063, "step": 2187 }, { "epoch": 0.26882909448335174, "grad_norm": 1.5534227354083239, "learning_rate": 2.7470033363646846e-05, "loss": 0.613, "step": 2188 }, { "epoch": 0.26895195970020885, "grad_norm": 1.5049063646444027, "learning_rate": 2.7466456779009058e-05, "loss": 0.6411, "step": 2189 }, { "epoch": 0.26907482491706597, "grad_norm": 1.3820747766338286, "learning_rate": 2.746287790124739e-05, "loss": 0.7061, "step": 2190 }, { "epoch": 0.2691976901339231, "grad_norm": 1.6070271862529732, "learning_rate": 2.7459296731020163e-05, "loss": 0.6266, "step": 2191 }, { "epoch": 0.2693205553507802, "grad_norm": 1.3875573260692273, "learning_rate": 2.7455713268986098e-05, "loss": 0.6488, "step": 2192 }, { "epoch": 0.2694434205676373, "grad_norm": 1.3224129011759234, "learning_rate": 2.7452127515804357e-05, "loss": 0.6151, "step": 2193 }, { "epoch": 0.26956628578449443, "grad_norm": 1.4983280773667487, "learning_rate": 2.7448539472134518e-05, "loss": 0.6634, "step": 2194 }, { "epoch": 0.2696891510013515, "grad_norm": 1.4755137131494507, "learning_rate": 2.7444949138636576e-05, "loss": 0.598, "step": 2195 }, { "epoch": 0.2698120162182086, "grad_norm": 1.4045906381843318, "learning_rate": 2.744135651597094e-05, "loss": 0.6347, "step": 2196 }, { "epoch": 0.2699348814350657, "grad_norm": 1.5764288357975977, "learning_rate": 2.7437761604798465e-05, "loss": 0.5907, "step": 2197 }, { "epoch": 0.27005774665192284, "grad_norm": 1.5089574618361155, "learning_rate": 2.74341644057804e-05, "loss": 0.7146, "step": 2198 }, { "epoch": 0.27018061186877995, "grad_norm": 1.3970143681680132, "learning_rate": 2.7430564919578432e-05, "loss": 0.6077, "step": 2199 }, { "epoch": 0.27030347708563707, "grad_norm": 1.3561153237130887, "learning_rate": 2.742696314685466e-05, "loss": 0.7329, "step": 2200 }, { "epoch": 0.2704263423024942, "grad_norm": 1.4785305669803246, "learning_rate": 2.7423359088271614e-05, "loss": 0.5988, "step": 2201 }, { "epoch": 0.2705492075193513, "grad_norm": 1.3890442073773912, "learning_rate": 2.741975274449223e-05, "loss": 0.6184, "step": 2202 }, { "epoch": 0.27067207273620836, "grad_norm": 1.6893708125236024, "learning_rate": 2.7416144116179876e-05, "loss": 0.7841, "step": 2203 }, { "epoch": 0.2707949379530655, "grad_norm": 1.5143214431253462, "learning_rate": 2.741253320399833e-05, "loss": 0.791, "step": 2204 }, { "epoch": 0.2709178031699226, "grad_norm": 1.464583745917509, "learning_rate": 2.74089200086118e-05, "loss": 0.6127, "step": 2205 }, { "epoch": 0.2710406683867797, "grad_norm": 1.6665512317682036, "learning_rate": 2.740530453068491e-05, "loss": 0.5996, "step": 2206 }, { "epoch": 0.2711635336036368, "grad_norm": 1.5104050447436361, "learning_rate": 2.74016867708827e-05, "loss": 0.8274, "step": 2207 }, { "epoch": 0.27128639882049393, "grad_norm": 1.3081485920267748, "learning_rate": 2.7398066729870637e-05, "loss": 0.5848, "step": 2208 }, { "epoch": 0.27140926403735105, "grad_norm": 1.418735511221626, "learning_rate": 2.739444440831461e-05, "loss": 0.6981, "step": 2209 }, { "epoch": 0.2715321292542081, "grad_norm": 1.443231258812306, "learning_rate": 2.7390819806880906e-05, "loss": 0.5822, "step": 2210 }, { "epoch": 0.2716549944710652, "grad_norm": 1.4798875722048717, "learning_rate": 2.738719292623626e-05, "loss": 0.6427, "step": 2211 }, { "epoch": 0.27177785968792234, "grad_norm": 1.5740853877890568, "learning_rate": 2.7383563767047808e-05, "loss": 0.7005, "step": 2212 }, { "epoch": 0.27190072490477946, "grad_norm": 1.7681786651129643, "learning_rate": 2.7379932329983114e-05, "loss": 0.5989, "step": 2213 }, { "epoch": 0.27202359012163657, "grad_norm": 1.566579669806081, "learning_rate": 2.737629861571015e-05, "loss": 0.7634, "step": 2214 }, { "epoch": 0.2721464553384937, "grad_norm": 1.3531828214631005, "learning_rate": 2.737266262489732e-05, "loss": 0.5892, "step": 2215 }, { "epoch": 0.2722693205553508, "grad_norm": 1.2606401907410367, "learning_rate": 2.7369024358213436e-05, "loss": 0.8284, "step": 2216 }, { "epoch": 0.27239218577220786, "grad_norm": 1.362709990863083, "learning_rate": 2.7365383816327746e-05, "loss": 0.629, "step": 2217 }, { "epoch": 0.272515050989065, "grad_norm": 1.364286690363534, "learning_rate": 2.736174099990989e-05, "loss": 0.6686, "step": 2218 }, { "epoch": 0.2726379162059221, "grad_norm": 1.4577900650783806, "learning_rate": 2.7358095909629947e-05, "loss": 0.7285, "step": 2219 }, { "epoch": 0.2727607814227792, "grad_norm": 1.5091858859348273, "learning_rate": 2.735444854615841e-05, "loss": 0.6542, "step": 2220 }, { "epoch": 0.2728836466396363, "grad_norm": 1.288406891606747, "learning_rate": 2.7350798910166176e-05, "loss": 0.7034, "step": 2221 }, { "epoch": 0.27300651185649344, "grad_norm": 1.3973130422488302, "learning_rate": 2.7347147002324587e-05, "loss": 0.6631, "step": 2222 }, { "epoch": 0.27312937707335055, "grad_norm": 1.3391316263694792, "learning_rate": 2.7343492823305377e-05, "loss": 0.6642, "step": 2223 }, { "epoch": 0.27325224229020767, "grad_norm": 1.3960303226075372, "learning_rate": 2.7339836373780712e-05, "loss": 0.5774, "step": 2224 }, { "epoch": 0.27337510750706473, "grad_norm": 1.3938939888604018, "learning_rate": 2.733617765442318e-05, "loss": 0.5726, "step": 2225 }, { "epoch": 0.27349797272392184, "grad_norm": 1.5178227396989306, "learning_rate": 2.7332516665905763e-05, "loss": 0.7074, "step": 2226 }, { "epoch": 0.27362083794077896, "grad_norm": 1.4399313715143345, "learning_rate": 2.732885340890189e-05, "loss": 0.7273, "step": 2227 }, { "epoch": 0.2737437031576361, "grad_norm": 1.3075490667355374, "learning_rate": 2.7325187884085385e-05, "loss": 0.7258, "step": 2228 }, { "epoch": 0.2738665683744932, "grad_norm": 1.3994372185063837, "learning_rate": 2.73215200921305e-05, "loss": 0.5769, "step": 2229 }, { "epoch": 0.2739894335913503, "grad_norm": 1.4953157567032949, "learning_rate": 2.7317850033711903e-05, "loss": 0.6125, "step": 2230 }, { "epoch": 0.2741122988082074, "grad_norm": 1.398277999171455, "learning_rate": 2.7314177709504674e-05, "loss": 0.7705, "step": 2231 }, { "epoch": 0.2742351640250645, "grad_norm": 1.4963035765442916, "learning_rate": 2.7310503120184326e-05, "loss": 0.4962, "step": 2232 }, { "epoch": 0.2743580292419216, "grad_norm": 1.7125132494292332, "learning_rate": 2.730682626642675e-05, "loss": 0.756, "step": 2233 }, { "epoch": 0.2744808944587787, "grad_norm": 1.4396561801068752, "learning_rate": 2.7303147148908305e-05, "loss": 0.5997, "step": 2234 }, { "epoch": 0.27460375967563583, "grad_norm": 1.7023444207189797, "learning_rate": 2.729946576830573e-05, "loss": 0.6045, "step": 2235 }, { "epoch": 0.27472662489249294, "grad_norm": 1.369104175507042, "learning_rate": 2.7295782125296188e-05, "loss": 0.8375, "step": 2236 }, { "epoch": 0.27484949010935006, "grad_norm": 1.2847284146441715, "learning_rate": 2.7292096220557267e-05, "loss": 0.5916, "step": 2237 }, { "epoch": 0.2749723553262072, "grad_norm": 1.597488636836791, "learning_rate": 2.728840805476696e-05, "loss": 0.614, "step": 2238 }, { "epoch": 0.27509522054306423, "grad_norm": 1.5160558212908166, "learning_rate": 2.728471762860369e-05, "loss": 0.5933, "step": 2239 }, { "epoch": 0.27521808575992135, "grad_norm": 1.527198215470518, "learning_rate": 2.728102494274628e-05, "loss": 0.597, "step": 2240 }, { "epoch": 0.27534095097677846, "grad_norm": 1.5974415265766118, "learning_rate": 2.7277329997873974e-05, "loss": 0.7841, "step": 2241 }, { "epoch": 0.2754638161936356, "grad_norm": 1.3752038263794273, "learning_rate": 2.727363279466644e-05, "loss": 0.6482, "step": 2242 }, { "epoch": 0.2755866814104927, "grad_norm": 1.4591634546320258, "learning_rate": 2.726993333380375e-05, "loss": 0.5563, "step": 2243 }, { "epoch": 0.2757095466273498, "grad_norm": 1.3152996225917803, "learning_rate": 2.7266231615966396e-05, "loss": 0.6999, "step": 2244 }, { "epoch": 0.2758324118442069, "grad_norm": 1.4015944989670872, "learning_rate": 2.726252764183528e-05, "loss": 0.6771, "step": 2245 }, { "epoch": 0.275955277061064, "grad_norm": 1.4822846704893073, "learning_rate": 2.7258821412091735e-05, "loss": 0.5638, "step": 2246 }, { "epoch": 0.2760781422779211, "grad_norm": 1.3037484199679858, "learning_rate": 2.7255112927417494e-05, "loss": 0.5913, "step": 2247 }, { "epoch": 0.2762010074947782, "grad_norm": 1.4003464570600392, "learning_rate": 2.7251402188494704e-05, "loss": 0.7682, "step": 2248 }, { "epoch": 0.27632387271163533, "grad_norm": 1.96633577811444, "learning_rate": 2.7247689196005935e-05, "loss": 0.6516, "step": 2249 }, { "epoch": 0.27644673792849245, "grad_norm": 1.567342379400192, "learning_rate": 2.7243973950634165e-05, "loss": 0.6375, "step": 2250 }, { "epoch": 0.27656960314534956, "grad_norm": 1.3111707652239746, "learning_rate": 2.7240256453062796e-05, "loss": 0.6489, "step": 2251 }, { "epoch": 0.2766924683622067, "grad_norm": 1.2817226515151958, "learning_rate": 2.7236536703975633e-05, "loss": 0.6415, "step": 2252 }, { "epoch": 0.2768153335790638, "grad_norm": 1.2876156554962757, "learning_rate": 2.7232814704056902e-05, "loss": 0.6395, "step": 2253 }, { "epoch": 0.27693819879592085, "grad_norm": 1.1683153305133076, "learning_rate": 2.7229090453991238e-05, "loss": 0.7245, "step": 2254 }, { "epoch": 0.27706106401277797, "grad_norm": 1.32949883128201, "learning_rate": 2.722536395446369e-05, "loss": 0.6153, "step": 2255 }, { "epoch": 0.2771839292296351, "grad_norm": 1.4362028838830463, "learning_rate": 2.7221635206159725e-05, "loss": 0.6623, "step": 2256 }, { "epoch": 0.2773067944464922, "grad_norm": 1.4403691070296074, "learning_rate": 2.721790420976523e-05, "loss": 0.6875, "step": 2257 }, { "epoch": 0.2774296596633493, "grad_norm": 1.5391808806845022, "learning_rate": 2.721417096596649e-05, "loss": 0.636, "step": 2258 }, { "epoch": 0.27755252488020643, "grad_norm": 1.2000198964466813, "learning_rate": 2.7210435475450207e-05, "loss": 0.6695, "step": 2259 }, { "epoch": 0.27767539009706355, "grad_norm": 1.5848865359706197, "learning_rate": 2.7206697738903513e-05, "loss": 0.7984, "step": 2260 }, { "epoch": 0.2777982553139206, "grad_norm": 1.3191703828149695, "learning_rate": 2.720295775701393e-05, "loss": 0.5858, "step": 2261 }, { "epoch": 0.2779211205307777, "grad_norm": 1.259645792035103, "learning_rate": 2.719921553046941e-05, "loss": 0.7574, "step": 2262 }, { "epoch": 0.27804398574763484, "grad_norm": 1.604692692509029, "learning_rate": 2.71954710599583e-05, "loss": 0.7284, "step": 2263 }, { "epoch": 0.27816685096449195, "grad_norm": 1.3184213667385616, "learning_rate": 2.719172434616938e-05, "loss": 0.563, "step": 2264 }, { "epoch": 0.27828971618134907, "grad_norm": 1.3557467558465117, "learning_rate": 2.718797538979184e-05, "loss": 0.7652, "step": 2265 }, { "epoch": 0.2784125813982062, "grad_norm": 1.49860151814819, "learning_rate": 2.7184224191515263e-05, "loss": 0.6647, "step": 2266 }, { "epoch": 0.2785354466150633, "grad_norm": 1.468681278118489, "learning_rate": 2.718047075202967e-05, "loss": 0.7328, "step": 2267 }, { "epoch": 0.27865831183192036, "grad_norm": 1.3934761350995364, "learning_rate": 2.717671507202547e-05, "loss": 0.6451, "step": 2268 }, { "epoch": 0.2787811770487775, "grad_norm": 1.7103092459301472, "learning_rate": 2.71729571521935e-05, "loss": 0.6337, "step": 2269 }, { "epoch": 0.2789040422656346, "grad_norm": 1.396357480214934, "learning_rate": 2.716919699322501e-05, "loss": 0.7118, "step": 2270 }, { "epoch": 0.2790269074824917, "grad_norm": 1.352466531314236, "learning_rate": 2.716543459581165e-05, "loss": 0.6501, "step": 2271 }, { "epoch": 0.2791497726993488, "grad_norm": 1.2403023601255228, "learning_rate": 2.7161669960645493e-05, "loss": 0.6713, "step": 2272 }, { "epoch": 0.27927263791620593, "grad_norm": 1.4364222547289218, "learning_rate": 2.7157903088419016e-05, "loss": 0.7437, "step": 2273 }, { "epoch": 0.27939550313306305, "grad_norm": 1.524130593989765, "learning_rate": 2.7154133979825116e-05, "loss": 0.6898, "step": 2274 }, { "epoch": 0.2795183683499201, "grad_norm": 1.3880654711349896, "learning_rate": 2.715036263555709e-05, "loss": 0.7524, "step": 2275 }, { "epoch": 0.2796412335667772, "grad_norm": 1.2973963655164118, "learning_rate": 2.714658905630866e-05, "loss": 0.6608, "step": 2276 }, { "epoch": 0.27976409878363434, "grad_norm": 1.5686317266154568, "learning_rate": 2.714281324277394e-05, "loss": 0.7424, "step": 2277 }, { "epoch": 0.27988696400049146, "grad_norm": 1.1566924872636943, "learning_rate": 2.7139035195647475e-05, "loss": 0.6246, "step": 2278 }, { "epoch": 0.28000982921734857, "grad_norm": 1.5532158791320374, "learning_rate": 2.7135254915624213e-05, "loss": 0.6139, "step": 2279 }, { "epoch": 0.2801326944342057, "grad_norm": 1.559824768815689, "learning_rate": 2.7131472403399505e-05, "loss": 0.6547, "step": 2280 }, { "epoch": 0.2802555596510628, "grad_norm": 1.2841037789864103, "learning_rate": 2.7127687659669126e-05, "loss": 0.671, "step": 2281 }, { "epoch": 0.2803784248679199, "grad_norm": 1.436638426174409, "learning_rate": 2.7123900685129253e-05, "loss": 0.721, "step": 2282 }, { "epoch": 0.280501290084777, "grad_norm": 1.5534864197694331, "learning_rate": 2.7120111480476476e-05, "loss": 0.5315, "step": 2283 }, { "epoch": 0.2806241553016341, "grad_norm": 1.7154422017030828, "learning_rate": 2.7116320046407795e-05, "loss": 0.616, "step": 2284 }, { "epoch": 0.2807470205184912, "grad_norm": 1.4375786925806058, "learning_rate": 2.7112526383620615e-05, "loss": 0.7154, "step": 2285 }, { "epoch": 0.2808698857353483, "grad_norm": 1.1746711465513362, "learning_rate": 2.710873049281276e-05, "loss": 0.5665, "step": 2286 }, { "epoch": 0.28099275095220544, "grad_norm": 1.570189029348816, "learning_rate": 2.7104932374682462e-05, "loss": 0.69, "step": 2287 }, { "epoch": 0.28111561616906255, "grad_norm": 1.4202025089957389, "learning_rate": 2.7101132029928352e-05, "loss": 0.6624, "step": 2288 }, { "epoch": 0.28123848138591967, "grad_norm": 1.427444178913261, "learning_rate": 2.7097329459249485e-05, "loss": 0.7273, "step": 2289 }, { "epoch": 0.28136134660277673, "grad_norm": 1.4241784904978274, "learning_rate": 2.7093524663345318e-05, "loss": 0.8499, "step": 2290 }, { "epoch": 0.28148421181963384, "grad_norm": 1.4713752795542048, "learning_rate": 2.7089717642915723e-05, "loss": 0.6187, "step": 2291 }, { "epoch": 0.28160707703649096, "grad_norm": 1.3058882868808603, "learning_rate": 2.7085908398660966e-05, "loss": 0.7097, "step": 2292 }, { "epoch": 0.2817299422533481, "grad_norm": 1.3758965721411256, "learning_rate": 2.7082096931281743e-05, "loss": 0.6385, "step": 2293 }, { "epoch": 0.2818528074702052, "grad_norm": 1.3319120618021292, "learning_rate": 2.707828324147914e-05, "loss": 0.6174, "step": 2294 }, { "epoch": 0.2819756726870623, "grad_norm": 1.4380469209157543, "learning_rate": 2.707446732995467e-05, "loss": 0.6966, "step": 2295 }, { "epoch": 0.2820985379039194, "grad_norm": 1.3507092288988436, "learning_rate": 2.7070649197410236e-05, "loss": 0.655, "step": 2296 }, { "epoch": 0.2822214031207765, "grad_norm": 1.3893322993188522, "learning_rate": 2.7066828844548166e-05, "loss": 0.6203, "step": 2297 }, { "epoch": 0.2823442683376336, "grad_norm": 1.2329902844485314, "learning_rate": 2.7063006272071185e-05, "loss": 0.5897, "step": 2298 }, { "epoch": 0.2824671335544907, "grad_norm": 1.4148633382754057, "learning_rate": 2.7059181480682434e-05, "loss": 0.6296, "step": 2299 }, { "epoch": 0.28258999877134783, "grad_norm": 1.3761880116644563, "learning_rate": 2.7055354471085454e-05, "loss": 0.6794, "step": 2300 }, { "epoch": 0.28271286398820494, "grad_norm": 1.3477540043149876, "learning_rate": 2.70515252439842e-05, "loss": 0.6631, "step": 2301 }, { "epoch": 0.28283572920506206, "grad_norm": 1.5942566446978674, "learning_rate": 2.704769380008304e-05, "loss": 0.6216, "step": 2302 }, { "epoch": 0.2829585944219192, "grad_norm": 1.6037249340766404, "learning_rate": 2.7043860140086728e-05, "loss": 0.6283, "step": 2303 }, { "epoch": 0.2830814596387763, "grad_norm": 1.6440260710232344, "learning_rate": 2.7040024264700457e-05, "loss": 0.6595, "step": 2304 }, { "epoch": 0.28320432485563335, "grad_norm": 1.529381308190572, "learning_rate": 2.70361861746298e-05, "loss": 0.5792, "step": 2305 }, { "epoch": 0.28332719007249046, "grad_norm": 1.4708460913099222, "learning_rate": 2.7032345870580756e-05, "loss": 0.6915, "step": 2306 }, { "epoch": 0.2834500552893476, "grad_norm": 1.300770079723082, "learning_rate": 2.7028503353259728e-05, "loss": 0.6247, "step": 2307 }, { "epoch": 0.2835729205062047, "grad_norm": 1.2630262496093956, "learning_rate": 2.702465862337351e-05, "loss": 0.6231, "step": 2308 }, { "epoch": 0.2836957857230618, "grad_norm": 1.2091580601209428, "learning_rate": 2.7020811681629318e-05, "loss": 0.6469, "step": 2309 }, { "epoch": 0.2838186509399189, "grad_norm": 1.3101138221182682, "learning_rate": 2.701696252873478e-05, "loss": 0.6886, "step": 2310 }, { "epoch": 0.28394151615677604, "grad_norm": 1.3722088659224434, "learning_rate": 2.7013111165397912e-05, "loss": 0.606, "step": 2311 }, { "epoch": 0.2840643813736331, "grad_norm": 1.2536367859192785, "learning_rate": 2.700925759232716e-05, "loss": 0.5796, "step": 2312 }, { "epoch": 0.2841872465904902, "grad_norm": 1.3949159627734986, "learning_rate": 2.700540181023135e-05, "loss": 0.6238, "step": 2313 }, { "epoch": 0.28431011180734733, "grad_norm": 1.2500613826393803, "learning_rate": 2.700154381981974e-05, "loss": 0.6886, "step": 2314 }, { "epoch": 0.28443297702420445, "grad_norm": 1.5564201644403335, "learning_rate": 2.699768362180197e-05, "loss": 0.6674, "step": 2315 }, { "epoch": 0.28455584224106156, "grad_norm": 1.5824835322967588, "learning_rate": 2.6993821216888115e-05, "loss": 0.6397, "step": 2316 }, { "epoch": 0.2846787074579187, "grad_norm": 1.6145329328589666, "learning_rate": 2.6989956605788623e-05, "loss": 0.768, "step": 2317 }, { "epoch": 0.2848015726747758, "grad_norm": 1.5243613844587494, "learning_rate": 2.6986089789214376e-05, "loss": 0.7313, "step": 2318 }, { "epoch": 0.28492443789163285, "grad_norm": 1.3811676419795467, "learning_rate": 2.698222076787664e-05, "loss": 0.5877, "step": 2319 }, { "epoch": 0.28504730310848997, "grad_norm": 1.3538866291762695, "learning_rate": 2.6978349542487102e-05, "loss": 0.619, "step": 2320 }, { "epoch": 0.2851701683253471, "grad_norm": 1.7751044641068865, "learning_rate": 2.6974476113757855e-05, "loss": 0.6507, "step": 2321 }, { "epoch": 0.2852930335422042, "grad_norm": 1.3117933636216823, "learning_rate": 2.6970600482401373e-05, "loss": 0.6744, "step": 2322 }, { "epoch": 0.2854158987590613, "grad_norm": 1.2555423562302266, "learning_rate": 2.696672264913057e-05, "loss": 0.5506, "step": 2323 }, { "epoch": 0.28553876397591843, "grad_norm": 1.6563637425242683, "learning_rate": 2.6962842614658742e-05, "loss": 0.6712, "step": 2324 }, { "epoch": 0.28566162919277555, "grad_norm": 1.4863520581666054, "learning_rate": 2.6958960379699596e-05, "loss": 0.6903, "step": 2325 }, { "epoch": 0.2857844944096326, "grad_norm": 1.6260634847878985, "learning_rate": 2.695507594496725e-05, "loss": 0.6825, "step": 2326 }, { "epoch": 0.2859073596264897, "grad_norm": 1.3358686391256134, "learning_rate": 2.695118931117621e-05, "loss": 0.6961, "step": 2327 }, { "epoch": 0.28603022484334684, "grad_norm": 1.1053726509961348, "learning_rate": 2.69473004790414e-05, "loss": 0.592, "step": 2328 }, { "epoch": 0.28615309006020395, "grad_norm": 1.5003164557436357, "learning_rate": 2.6943409449278152e-05, "loss": 0.6872, "step": 2329 }, { "epoch": 0.28627595527706107, "grad_norm": 1.2545663428466534, "learning_rate": 2.693951622260219e-05, "loss": 0.6549, "step": 2330 }, { "epoch": 0.2863988204939182, "grad_norm": 1.4357904040583336, "learning_rate": 2.6935620799729652e-05, "loss": 0.54, "step": 2331 }, { "epoch": 0.2865216857107753, "grad_norm": 1.2317550692259565, "learning_rate": 2.6931723181377067e-05, "loss": 0.7372, "step": 2332 }, { "epoch": 0.2866445509276324, "grad_norm": 1.0348863509544772, "learning_rate": 2.692782336826139e-05, "loss": 0.6508, "step": 2333 }, { "epoch": 0.2867674161444895, "grad_norm": 1.5867995097210663, "learning_rate": 2.6923921361099953e-05, "loss": 0.7803, "step": 2334 }, { "epoch": 0.2868902813613466, "grad_norm": 1.2435371487391187, "learning_rate": 2.6920017160610514e-05, "loss": 0.5816, "step": 2335 }, { "epoch": 0.2870131465782037, "grad_norm": 1.3215745007091262, "learning_rate": 2.6916110767511223e-05, "loss": 0.7075, "step": 2336 }, { "epoch": 0.2871360117950608, "grad_norm": 1.2813949096540969, "learning_rate": 2.6912202182520637e-05, "loss": 0.6734, "step": 2337 }, { "epoch": 0.28725887701191793, "grad_norm": 1.3874586440917138, "learning_rate": 2.6908291406357714e-05, "loss": 0.593, "step": 2338 }, { "epoch": 0.28738174222877505, "grad_norm": 1.3741124690116528, "learning_rate": 2.6904378439741806e-05, "loss": 0.5827, "step": 2339 }, { "epoch": 0.28750460744563217, "grad_norm": 1.4502570753607562, "learning_rate": 2.690046328339269e-05, "loss": 0.6545, "step": 2340 }, { "epoch": 0.2876274726624892, "grad_norm": 1.357652974097361, "learning_rate": 2.6896545938030532e-05, "loss": 0.6288, "step": 2341 }, { "epoch": 0.28775033787934634, "grad_norm": 1.4561742134742317, "learning_rate": 2.68926264043759e-05, "loss": 0.6229, "step": 2342 }, { "epoch": 0.28787320309620346, "grad_norm": 1.795775069072911, "learning_rate": 2.6888704683149768e-05, "loss": 0.7792, "step": 2343 }, { "epoch": 0.28799606831306057, "grad_norm": 1.2881431961787628, "learning_rate": 2.6884780775073513e-05, "loss": 0.6513, "step": 2344 }, { "epoch": 0.2881189335299177, "grad_norm": 1.2304769714001682, "learning_rate": 2.6880854680868905e-05, "loss": 0.7874, "step": 2345 }, { "epoch": 0.2882417987467748, "grad_norm": 1.3574911510772705, "learning_rate": 2.687692640125813e-05, "loss": 0.7058, "step": 2346 }, { "epoch": 0.2883646639636319, "grad_norm": 1.3466192862412458, "learning_rate": 2.687299593696377e-05, "loss": 0.6884, "step": 2347 }, { "epoch": 0.288487529180489, "grad_norm": 1.5279621880903231, "learning_rate": 2.6869063288708807e-05, "loss": 0.5868, "step": 2348 }, { "epoch": 0.2886103943973461, "grad_norm": 1.3339210736952853, "learning_rate": 2.6865128457216623e-05, "loss": 0.7545, "step": 2349 }, { "epoch": 0.2887332596142032, "grad_norm": 1.3747327522362114, "learning_rate": 2.6861191443211008e-05, "loss": 0.6101, "step": 2350 }, { "epoch": 0.2888561248310603, "grad_norm": 1.5240198600673898, "learning_rate": 2.685725224741615e-05, "loss": 0.7174, "step": 2351 }, { "epoch": 0.28897899004791744, "grad_norm": 1.4081328904299477, "learning_rate": 2.6853310870556638e-05, "loss": 0.6793, "step": 2352 }, { "epoch": 0.28910185526477455, "grad_norm": 1.5610754167273866, "learning_rate": 2.6849367313357458e-05, "loss": 0.6229, "step": 2353 }, { "epoch": 0.28922472048163167, "grad_norm": 1.3674193895516589, "learning_rate": 2.6845421576544017e-05, "loss": 0.7214, "step": 2354 }, { "epoch": 0.2893475856984888, "grad_norm": 1.2589763368723152, "learning_rate": 2.6841473660842088e-05, "loss": 0.6452, "step": 2355 }, { "epoch": 0.28947045091534584, "grad_norm": 1.3766157754638133, "learning_rate": 2.6837523566977876e-05, "loss": 0.6389, "step": 2356 }, { "epoch": 0.28959331613220296, "grad_norm": 1.3756202967187943, "learning_rate": 2.6833571295677976e-05, "loss": 0.728, "step": 2357 }, { "epoch": 0.2897161813490601, "grad_norm": 1.2443862900439489, "learning_rate": 2.6829616847669372e-05, "loss": 0.6129, "step": 2358 }, { "epoch": 0.2898390465659172, "grad_norm": 1.403597006359707, "learning_rate": 2.6825660223679477e-05, "loss": 0.7489, "step": 2359 }, { "epoch": 0.2899619117827743, "grad_norm": 1.3417765638414851, "learning_rate": 2.682170142443607e-05, "loss": 0.809, "step": 2360 }, { "epoch": 0.2900847769996314, "grad_norm": 1.4880015784755476, "learning_rate": 2.681774045066735e-05, "loss": 0.8553, "step": 2361 }, { "epoch": 0.29020764221648854, "grad_norm": 1.5292691139737404, "learning_rate": 2.6813777303101922e-05, "loss": 0.7199, "step": 2362 }, { "epoch": 0.2903305074333456, "grad_norm": 1.385479021290994, "learning_rate": 2.6809811982468768e-05, "loss": 0.675, "step": 2363 }, { "epoch": 0.2904533726502027, "grad_norm": 1.5000692823886397, "learning_rate": 2.680584448949729e-05, "loss": 0.7758, "step": 2364 }, { "epoch": 0.29057623786705983, "grad_norm": 1.3228278715643271, "learning_rate": 2.6801874824917287e-05, "loss": 0.6411, "step": 2365 }, { "epoch": 0.29069910308391694, "grad_norm": 1.6867419143389066, "learning_rate": 2.6797902989458944e-05, "loss": 0.6082, "step": 2366 }, { "epoch": 0.29082196830077406, "grad_norm": 1.2637297237230825, "learning_rate": 2.679392898385286e-05, "loss": 0.6473, "step": 2367 }, { "epoch": 0.2909448335176312, "grad_norm": 1.5595809647393528, "learning_rate": 2.678995280883002e-05, "loss": 0.5838, "step": 2368 }, { "epoch": 0.2910676987344883, "grad_norm": 1.241479917879033, "learning_rate": 2.6785974465121827e-05, "loss": 0.5741, "step": 2369 }, { "epoch": 0.29119056395134535, "grad_norm": 1.5972350426520476, "learning_rate": 2.678199395346006e-05, "loss": 0.7174, "step": 2370 }, { "epoch": 0.29131342916820246, "grad_norm": 1.485075674534945, "learning_rate": 2.677801127457692e-05, "loss": 0.5888, "step": 2371 }, { "epoch": 0.2914362943850596, "grad_norm": 1.561351697234515, "learning_rate": 2.6774026429204987e-05, "loss": 0.8026, "step": 2372 }, { "epoch": 0.2915591596019167, "grad_norm": 1.5370320142722032, "learning_rate": 2.677003941807725e-05, "loss": 0.689, "step": 2373 }, { "epoch": 0.2916820248187738, "grad_norm": 1.4748724868235021, "learning_rate": 2.6766050241927095e-05, "loss": 0.6487, "step": 2374 }, { "epoch": 0.2918048900356309, "grad_norm": 1.6119807046391001, "learning_rate": 2.6762058901488303e-05, "loss": 0.7718, "step": 2375 }, { "epoch": 0.29192775525248804, "grad_norm": 1.2744559328086116, "learning_rate": 2.6758065397495057e-05, "loss": 0.5263, "step": 2376 }, { "epoch": 0.2920506204693451, "grad_norm": 1.625645551930686, "learning_rate": 2.675406973068193e-05, "loss": 0.7076, "step": 2377 }, { "epoch": 0.2921734856862022, "grad_norm": 1.3214840240744323, "learning_rate": 2.6750071901783907e-05, "loss": 0.6375, "step": 2378 }, { "epoch": 0.29229635090305933, "grad_norm": 1.464322449343132, "learning_rate": 2.6746071911536358e-05, "loss": 0.6111, "step": 2379 }, { "epoch": 0.29241921611991645, "grad_norm": 1.1902615020497778, "learning_rate": 2.674206976067506e-05, "loss": 0.6591, "step": 2380 }, { "epoch": 0.29254208133677356, "grad_norm": 1.253419272415584, "learning_rate": 2.6738065449936178e-05, "loss": 0.6359, "step": 2381 }, { "epoch": 0.2926649465536307, "grad_norm": 1.2976732889014784, "learning_rate": 2.673405898005628e-05, "loss": 0.5671, "step": 2382 }, { "epoch": 0.2927878117704878, "grad_norm": 1.4177258308303493, "learning_rate": 2.673005035177233e-05, "loss": 0.7137, "step": 2383 }, { "epoch": 0.2929106769873449, "grad_norm": 1.3292276051178207, "learning_rate": 2.6726039565821686e-05, "loss": 0.6389, "step": 2384 }, { "epoch": 0.29303354220420197, "grad_norm": 1.3195035776409756, "learning_rate": 2.6722026622942118e-05, "loss": 0.7, "step": 2385 }, { "epoch": 0.2931564074210591, "grad_norm": 1.461860616137111, "learning_rate": 2.6718011523871766e-05, "loss": 0.6187, "step": 2386 }, { "epoch": 0.2932792726379162, "grad_norm": 1.2929775540531439, "learning_rate": 2.6713994269349195e-05, "loss": 0.6341, "step": 2387 }, { "epoch": 0.2934021378547733, "grad_norm": 1.3184296079387035, "learning_rate": 2.670997486011334e-05, "loss": 0.6717, "step": 2388 }, { "epoch": 0.29352500307163043, "grad_norm": 1.6264705432552435, "learning_rate": 2.6705953296903554e-05, "loss": 0.7501, "step": 2389 }, { "epoch": 0.29364786828848755, "grad_norm": 1.3509801250243938, "learning_rate": 2.670192958045957e-05, "loss": 0.664, "step": 2390 }, { "epoch": 0.29377073350534466, "grad_norm": 1.4386967352497981, "learning_rate": 2.669790371152154e-05, "loss": 0.7087, "step": 2391 }, { "epoch": 0.2938935987222017, "grad_norm": 1.512357376426424, "learning_rate": 2.6693875690829982e-05, "loss": 0.613, "step": 2392 }, { "epoch": 0.29401646393905884, "grad_norm": 1.737242725271504, "learning_rate": 2.668984551912582e-05, "loss": 0.6891, "step": 2393 }, { "epoch": 0.29413932915591595, "grad_norm": 1.4281470080451333, "learning_rate": 2.6685813197150395e-05, "loss": 0.7265, "step": 2394 }, { "epoch": 0.29426219437277307, "grad_norm": 1.344618804445938, "learning_rate": 2.6681778725645414e-05, "loss": 0.6866, "step": 2395 }, { "epoch": 0.2943850595896302, "grad_norm": 1.4537982723269145, "learning_rate": 2.6677742105352994e-05, "loss": 0.607, "step": 2396 }, { "epoch": 0.2945079248064873, "grad_norm": 1.3053647109723443, "learning_rate": 2.667370333701565e-05, "loss": 0.7297, "step": 2397 }, { "epoch": 0.2946307900233444, "grad_norm": 1.4911442249586875, "learning_rate": 2.6669662421376278e-05, "loss": 0.6712, "step": 2398 }, { "epoch": 0.2947536552402015, "grad_norm": 1.2238201567477958, "learning_rate": 2.6665619359178192e-05, "loss": 0.5783, "step": 2399 }, { "epoch": 0.2948765204570586, "grad_norm": 1.3570722872120904, "learning_rate": 2.6661574151165072e-05, "loss": 0.6034, "step": 2400 }, { "epoch": 0.2949993856739157, "grad_norm": 1.3887905827339477, "learning_rate": 2.665752679808102e-05, "loss": 0.6079, "step": 2401 }, { "epoch": 0.2951222508907728, "grad_norm": 1.461338067757449, "learning_rate": 2.6653477300670515e-05, "loss": 0.6326, "step": 2402 }, { "epoch": 0.29524511610762993, "grad_norm": 1.272220018267634, "learning_rate": 2.6649425659678435e-05, "loss": 0.577, "step": 2403 }, { "epoch": 0.29536798132448705, "grad_norm": 1.3518148896772197, "learning_rate": 2.664537187585005e-05, "loss": 0.6125, "step": 2404 }, { "epoch": 0.29549084654134417, "grad_norm": 1.688446219130224, "learning_rate": 2.6641315949931034e-05, "loss": 0.7056, "step": 2405 }, { "epoch": 0.2956137117582013, "grad_norm": 1.5608721191429917, "learning_rate": 2.6637257882667446e-05, "loss": 0.591, "step": 2406 }, { "epoch": 0.29573657697505834, "grad_norm": 1.3234372134418741, "learning_rate": 2.663319767480574e-05, "loss": 0.641, "step": 2407 }, { "epoch": 0.29585944219191546, "grad_norm": 1.4594953046896513, "learning_rate": 2.6629135327092772e-05, "loss": 0.5938, "step": 2408 }, { "epoch": 0.29598230740877257, "grad_norm": 1.300664983788724, "learning_rate": 2.6625070840275767e-05, "loss": 0.5663, "step": 2409 }, { "epoch": 0.2961051726256297, "grad_norm": 1.3867753852868154, "learning_rate": 2.662100421510238e-05, "loss": 0.6084, "step": 2410 }, { "epoch": 0.2962280378424868, "grad_norm": 1.2430135298240106, "learning_rate": 2.6616935452320634e-05, "loss": 0.7468, "step": 2411 }, { "epoch": 0.2963509030593439, "grad_norm": 1.4681634300405093, "learning_rate": 2.661286455267894e-05, "loss": 0.6462, "step": 2412 }, { "epoch": 0.29647376827620103, "grad_norm": 1.330231847879845, "learning_rate": 2.6608791516926133e-05, "loss": 0.6292, "step": 2413 }, { "epoch": 0.2965966334930581, "grad_norm": 1.6441750432730258, "learning_rate": 2.660471634581141e-05, "loss": 0.7538, "step": 2414 }, { "epoch": 0.2967194987099152, "grad_norm": 1.4338646771457852, "learning_rate": 2.660063904008437e-05, "loss": 0.6066, "step": 2415 }, { "epoch": 0.2968423639267723, "grad_norm": 1.8635030690541587, "learning_rate": 2.659655960049502e-05, "loss": 0.7196, "step": 2416 }, { "epoch": 0.29696522914362944, "grad_norm": 1.2193824830009194, "learning_rate": 2.6592478027793732e-05, "loss": 0.7361, "step": 2417 }, { "epoch": 0.29708809436048655, "grad_norm": 1.3868314240387876, "learning_rate": 2.658839432273129e-05, "loss": 0.65, "step": 2418 }, { "epoch": 0.29721095957734367, "grad_norm": 1.205332926048183, "learning_rate": 2.6584308486058866e-05, "loss": 0.699, "step": 2419 }, { "epoch": 0.2973338247942008, "grad_norm": 1.420851197812113, "learning_rate": 2.6580220518528025e-05, "loss": 0.7105, "step": 2420 }, { "epoch": 0.29745669001105784, "grad_norm": 1.5848834819284587, "learning_rate": 2.657613042089072e-05, "loss": 0.6333, "step": 2421 }, { "epoch": 0.29757955522791496, "grad_norm": 1.3621699404319922, "learning_rate": 2.6572038193899296e-05, "loss": 0.634, "step": 2422 }, { "epoch": 0.2977024204447721, "grad_norm": 1.3110149634591528, "learning_rate": 2.6567943838306497e-05, "loss": 0.616, "step": 2423 }, { "epoch": 0.2978252856616292, "grad_norm": 1.3808115857040475, "learning_rate": 2.6563847354865443e-05, "loss": 0.6119, "step": 2424 }, { "epoch": 0.2979481508784863, "grad_norm": 1.7679539200123076, "learning_rate": 2.655974874432967e-05, "loss": 0.6511, "step": 2425 }, { "epoch": 0.2980710160953434, "grad_norm": 1.4533395249779197, "learning_rate": 2.655564800745308e-05, "loss": 0.5851, "step": 2426 }, { "epoch": 0.29819388131220054, "grad_norm": 1.5164199903265803, "learning_rate": 2.655154514498998e-05, "loss": 0.6407, "step": 2427 }, { "epoch": 0.2983167465290576, "grad_norm": 1.6278295404583447, "learning_rate": 2.654744015769506e-05, "loss": 0.6136, "step": 2428 }, { "epoch": 0.2984396117459147, "grad_norm": 1.3661832432023582, "learning_rate": 2.6543333046323416e-05, "loss": 0.6561, "step": 2429 }, { "epoch": 0.29856247696277183, "grad_norm": 1.225977593213098, "learning_rate": 2.653922381163052e-05, "loss": 0.6093, "step": 2430 }, { "epoch": 0.29868534217962894, "grad_norm": 1.184076210924204, "learning_rate": 2.6535112454372236e-05, "loss": 0.6146, "step": 2431 }, { "epoch": 0.29880820739648606, "grad_norm": 1.362569804727129, "learning_rate": 2.6530998975304823e-05, "loss": 0.6637, "step": 2432 }, { "epoch": 0.2989310726133432, "grad_norm": 1.485539180353542, "learning_rate": 2.652688337518493e-05, "loss": 0.5147, "step": 2433 }, { "epoch": 0.2990539378302003, "grad_norm": 1.2460179031611254, "learning_rate": 2.65227656547696e-05, "loss": 0.6562, "step": 2434 }, { "epoch": 0.2991768030470574, "grad_norm": 1.173444454588339, "learning_rate": 2.651864581481625e-05, "loss": 0.6501, "step": 2435 }, { "epoch": 0.29929966826391446, "grad_norm": 1.5580583304626232, "learning_rate": 2.6514523856082703e-05, "loss": 0.7505, "step": 2436 }, { "epoch": 0.2994225334807716, "grad_norm": 1.4138993995088807, "learning_rate": 2.651039977932717e-05, "loss": 0.6289, "step": 2437 }, { "epoch": 0.2995453986976287, "grad_norm": 1.6046125637358981, "learning_rate": 2.6506273585308247e-05, "loss": 0.5689, "step": 2438 }, { "epoch": 0.2996682639144858, "grad_norm": 1.4095377681281238, "learning_rate": 2.6502145274784916e-05, "loss": 0.6976, "step": 2439 }, { "epoch": 0.2997911291313429, "grad_norm": 1.2225672789322066, "learning_rate": 2.6498014848516557e-05, "loss": 0.5801, "step": 2440 }, { "epoch": 0.29991399434820004, "grad_norm": 1.30447627608537, "learning_rate": 2.649388230726293e-05, "loss": 0.572, "step": 2441 }, { "epoch": 0.30003685956505716, "grad_norm": 1.376186799665795, "learning_rate": 2.6489747651784196e-05, "loss": 0.8825, "step": 2442 }, { "epoch": 0.3001597247819142, "grad_norm": 1.4761414176356382, "learning_rate": 2.6485610882840892e-05, "loss": 0.7107, "step": 2443 }, { "epoch": 0.30028258999877133, "grad_norm": 1.5918204978840815, "learning_rate": 2.6481472001193958e-05, "loss": 0.7503, "step": 2444 }, { "epoch": 0.30040545521562845, "grad_norm": 1.445191374769686, "learning_rate": 2.647733100760471e-05, "loss": 0.5469, "step": 2445 }, { "epoch": 0.30052832043248556, "grad_norm": 1.3377955487205306, "learning_rate": 2.6473187902834848e-05, "loss": 0.7394, "step": 2446 }, { "epoch": 0.3006511856493427, "grad_norm": 1.3142133141215817, "learning_rate": 2.646904268764648e-05, "loss": 0.6485, "step": 2447 }, { "epoch": 0.3007740508661998, "grad_norm": 1.337261276043544, "learning_rate": 2.6464895362802095e-05, "loss": 0.6148, "step": 2448 }, { "epoch": 0.3008969160830569, "grad_norm": 1.377489523323608, "learning_rate": 2.6460745929064553e-05, "loss": 0.6353, "step": 2449 }, { "epoch": 0.30101978129991397, "grad_norm": 1.4132881543320492, "learning_rate": 2.645659438719713e-05, "loss": 0.6028, "step": 2450 }, { "epoch": 0.3011426465167711, "grad_norm": 1.7655989302338462, "learning_rate": 2.6452440737963463e-05, "loss": 0.8197, "step": 2451 }, { "epoch": 0.3012655117336282, "grad_norm": 1.4442894580494268, "learning_rate": 2.6448284982127596e-05, "loss": 0.6502, "step": 2452 }, { "epoch": 0.3013883769504853, "grad_norm": 1.536197832157501, "learning_rate": 2.6444127120453957e-05, "loss": 0.6919, "step": 2453 }, { "epoch": 0.30151124216734243, "grad_norm": 1.2562097096007652, "learning_rate": 2.643996715370734e-05, "loss": 0.6998, "step": 2454 }, { "epoch": 0.30163410738419955, "grad_norm": 1.2801352479232897, "learning_rate": 2.6435805082652966e-05, "loss": 0.5539, "step": 2455 }, { "epoch": 0.30175697260105666, "grad_norm": 1.3079420825339567, "learning_rate": 2.6431640908056408e-05, "loss": 0.6051, "step": 2456 }, { "epoch": 0.3018798378179137, "grad_norm": 1.4513054598372057, "learning_rate": 2.6427474630683636e-05, "loss": 0.74, "step": 2457 }, { "epoch": 0.30200270303477084, "grad_norm": 1.6408437203301698, "learning_rate": 2.642330625130102e-05, "loss": 0.6344, "step": 2458 }, { "epoch": 0.30212556825162795, "grad_norm": 1.2732096846259093, "learning_rate": 2.6419135770675304e-05, "loss": 0.604, "step": 2459 }, { "epoch": 0.30224843346848507, "grad_norm": 1.377548799427008, "learning_rate": 2.6414963189573616e-05, "loss": 0.6089, "step": 2460 }, { "epoch": 0.3023712986853422, "grad_norm": 1.4603319581710439, "learning_rate": 2.641078850876348e-05, "loss": 0.6389, "step": 2461 }, { "epoch": 0.3024941639021993, "grad_norm": 1.55341564151219, "learning_rate": 2.6406611729012796e-05, "loss": 0.6829, "step": 2462 }, { "epoch": 0.3026170291190564, "grad_norm": 1.2542987931849043, "learning_rate": 2.6402432851089863e-05, "loss": 0.746, "step": 2463 }, { "epoch": 0.30273989433591353, "grad_norm": 1.1668802770077082, "learning_rate": 2.639825187576335e-05, "loss": 0.5791, "step": 2464 }, { "epoch": 0.3028627595527706, "grad_norm": 1.2941309788019855, "learning_rate": 2.6394068803802328e-05, "loss": 0.6517, "step": 2465 }, { "epoch": 0.3029856247696277, "grad_norm": 1.3537898591316258, "learning_rate": 2.6389883635976243e-05, "loss": 0.697, "step": 2466 }, { "epoch": 0.3031084899864848, "grad_norm": 1.1669624561257468, "learning_rate": 2.6385696373054926e-05, "loss": 0.5354, "step": 2467 }, { "epoch": 0.30323135520334193, "grad_norm": 1.412063534463707, "learning_rate": 2.6381507015808603e-05, "loss": 0.7671, "step": 2468 }, { "epoch": 0.30335422042019905, "grad_norm": 1.12853057979421, "learning_rate": 2.6377315565007876e-05, "loss": 0.6919, "step": 2469 }, { "epoch": 0.30347708563705617, "grad_norm": 1.3192679166399208, "learning_rate": 2.6373122021423733e-05, "loss": 0.6567, "step": 2470 }, { "epoch": 0.3035999508539133, "grad_norm": 1.149287516751743, "learning_rate": 2.6368926385827548e-05, "loss": 0.7025, "step": 2471 }, { "epoch": 0.30372281607077034, "grad_norm": 1.2564238302020516, "learning_rate": 2.6364728658991093e-05, "loss": 0.6621, "step": 2472 }, { "epoch": 0.30384568128762746, "grad_norm": 1.2889429679730293, "learning_rate": 2.63605288416865e-05, "loss": 0.6959, "step": 2473 }, { "epoch": 0.30396854650448457, "grad_norm": 1.4898757690755198, "learning_rate": 2.6356326934686303e-05, "loss": 0.7409, "step": 2474 }, { "epoch": 0.3040914117213417, "grad_norm": 1.3899086667916727, "learning_rate": 2.6352122938763412e-05, "loss": 0.6986, "step": 2475 }, { "epoch": 0.3042142769381988, "grad_norm": 1.25979048352415, "learning_rate": 2.634791685469113e-05, "loss": 0.7144, "step": 2476 }, { "epoch": 0.3043371421550559, "grad_norm": 1.3610340649771746, "learning_rate": 2.6343708683243137e-05, "loss": 0.6037, "step": 2477 }, { "epoch": 0.30446000737191303, "grad_norm": 1.4802857672976426, "learning_rate": 2.6339498425193496e-05, "loss": 0.5383, "step": 2478 }, { "epoch": 0.3045828725887701, "grad_norm": 1.3058682083126159, "learning_rate": 2.633528608131666e-05, "loss": 0.6201, "step": 2479 }, { "epoch": 0.3047057378056272, "grad_norm": 1.3162182356411625, "learning_rate": 2.6331071652387463e-05, "loss": 0.6432, "step": 2480 }, { "epoch": 0.3048286030224843, "grad_norm": 1.2629438363239822, "learning_rate": 2.6326855139181117e-05, "loss": 0.653, "step": 2481 }, { "epoch": 0.30495146823934144, "grad_norm": 1.3815055890360515, "learning_rate": 2.6322636542473228e-05, "loss": 0.7235, "step": 2482 }, { "epoch": 0.30507433345619855, "grad_norm": 1.3009432438526725, "learning_rate": 2.631841586303978e-05, "loss": 0.5671, "step": 2483 }, { "epoch": 0.30519719867305567, "grad_norm": 1.327052106998988, "learning_rate": 2.6314193101657124e-05, "loss": 0.6564, "step": 2484 }, { "epoch": 0.3053200638899128, "grad_norm": 1.4685376349489467, "learning_rate": 2.6309968259102032e-05, "loss": 0.7218, "step": 2485 }, { "epoch": 0.3054429291067699, "grad_norm": 2.1368373874279567, "learning_rate": 2.630574133615163e-05, "loss": 0.689, "step": 2486 }, { "epoch": 0.30556579432362696, "grad_norm": 1.1806373531561964, "learning_rate": 2.630151233358342e-05, "loss": 0.6337, "step": 2487 }, { "epoch": 0.3056886595404841, "grad_norm": 1.5547882466225953, "learning_rate": 2.6297281252175316e-05, "loss": 0.5564, "step": 2488 }, { "epoch": 0.3058115247573412, "grad_norm": 1.2232814756210562, "learning_rate": 2.6293048092705586e-05, "loss": 0.5094, "step": 2489 }, { "epoch": 0.3059343899741983, "grad_norm": 1.3585393518992124, "learning_rate": 2.62888128559529e-05, "loss": 0.7912, "step": 2490 }, { "epoch": 0.3060572551910554, "grad_norm": 1.6308232832267326, "learning_rate": 2.6284575542696297e-05, "loss": 0.6719, "step": 2491 }, { "epoch": 0.30618012040791254, "grad_norm": 1.3366492794879048, "learning_rate": 2.628033615371521e-05, "loss": 0.6072, "step": 2492 }, { "epoch": 0.30630298562476965, "grad_norm": 1.4902375710911069, "learning_rate": 2.627609468978944e-05, "loss": 0.6522, "step": 2493 }, { "epoch": 0.3064258508416267, "grad_norm": 2.0756321666560433, "learning_rate": 2.6271851151699184e-05, "loss": 0.7065, "step": 2494 }, { "epoch": 0.30654871605848383, "grad_norm": 1.3766022234270892, "learning_rate": 2.626760554022501e-05, "loss": 0.6257, "step": 2495 }, { "epoch": 0.30667158127534094, "grad_norm": 1.4815480133941579, "learning_rate": 2.626335785614786e-05, "loss": 0.6207, "step": 2496 }, { "epoch": 0.30679444649219806, "grad_norm": 1.5921998916548383, "learning_rate": 2.6259108100249086e-05, "loss": 0.7173, "step": 2497 }, { "epoch": 0.3069173117090552, "grad_norm": 1.378425045426828, "learning_rate": 2.6254856273310394e-05, "loss": 0.5211, "step": 2498 }, { "epoch": 0.3070401769259123, "grad_norm": 1.329175482073475, "learning_rate": 2.6250602376113882e-05, "loss": 0.7245, "step": 2499 }, { "epoch": 0.3071630421427694, "grad_norm": 1.7221210583365882, "learning_rate": 2.6246346409442024e-05, "loss": 0.8727, "step": 2500 }, { "epoch": 0.30728590735962646, "grad_norm": 1.3072633374908982, "learning_rate": 2.6242088374077676e-05, "loss": 0.5817, "step": 2501 }, { "epoch": 0.3074087725764836, "grad_norm": 1.4852634287298978, "learning_rate": 2.623782827080408e-05, "loss": 0.7688, "step": 2502 }, { "epoch": 0.3075316377933407, "grad_norm": 1.4482031481844009, "learning_rate": 2.6233566100404856e-05, "loss": 0.6449, "step": 2503 }, { "epoch": 0.3076545030101978, "grad_norm": 1.2675970740240525, "learning_rate": 2.6229301863664e-05, "loss": 0.591, "step": 2504 }, { "epoch": 0.3077773682270549, "grad_norm": 1.441719576125879, "learning_rate": 2.6225035561365888e-05, "loss": 0.6407, "step": 2505 }, { "epoch": 0.30790023344391204, "grad_norm": 1.2956222180735257, "learning_rate": 2.6220767194295285e-05, "loss": 0.6527, "step": 2506 }, { "epoch": 0.30802309866076916, "grad_norm": 1.3829537062957955, "learning_rate": 2.6216496763237324e-05, "loss": 0.7639, "step": 2507 }, { "epoch": 0.3081459638776262, "grad_norm": 1.3018371785417477, "learning_rate": 2.6212224268977527e-05, "loss": 0.633, "step": 2508 }, { "epoch": 0.30826882909448333, "grad_norm": 1.4061000732830922, "learning_rate": 2.6207949712301787e-05, "loss": 0.731, "step": 2509 }, { "epoch": 0.30839169431134045, "grad_norm": 1.2386783246880577, "learning_rate": 2.6203673093996385e-05, "loss": 0.6638, "step": 2510 }, { "epoch": 0.30851455952819756, "grad_norm": 1.374410039184066, "learning_rate": 2.6199394414847975e-05, "loss": 0.7109, "step": 2511 }, { "epoch": 0.3086374247450547, "grad_norm": 1.1227663809651465, "learning_rate": 2.619511367564359e-05, "loss": 0.6737, "step": 2512 }, { "epoch": 0.3087602899619118, "grad_norm": 1.1396891319287876, "learning_rate": 2.6190830877170653e-05, "loss": 0.5849, "step": 2513 }, { "epoch": 0.3088831551787689, "grad_norm": 1.3446049045321882, "learning_rate": 2.618654602021695e-05, "loss": 0.6503, "step": 2514 }, { "epoch": 0.309006020395626, "grad_norm": 1.1422331814351225, "learning_rate": 2.6182259105570652e-05, "loss": 0.5188, "step": 2515 }, { "epoch": 0.3091288856124831, "grad_norm": 1.6336220708089082, "learning_rate": 2.6177970134020308e-05, "loss": 0.6219, "step": 2516 }, { "epoch": 0.3092517508293402, "grad_norm": 1.264204170686446, "learning_rate": 2.6173679106354852e-05, "loss": 0.6696, "step": 2517 }, { "epoch": 0.3093746160461973, "grad_norm": 1.349567393846118, "learning_rate": 2.616938602336359e-05, "loss": 0.679, "step": 2518 }, { "epoch": 0.30949748126305443, "grad_norm": 1.2517880866341422, "learning_rate": 2.6165090885836208e-05, "loss": 0.6055, "step": 2519 }, { "epoch": 0.30962034647991155, "grad_norm": 1.2607803784804532, "learning_rate": 2.616079369456276e-05, "loss": 0.7428, "step": 2520 }, { "epoch": 0.30974321169676866, "grad_norm": 1.238614498838452, "learning_rate": 2.6156494450333696e-05, "loss": 0.6576, "step": 2521 }, { "epoch": 0.3098660769136258, "grad_norm": 1.2864564924525819, "learning_rate": 2.6152193153939826e-05, "loss": 0.628, "step": 2522 }, { "epoch": 0.30998894213048284, "grad_norm": 1.1728487729998607, "learning_rate": 2.614788980617235e-05, "loss": 0.6175, "step": 2523 }, { "epoch": 0.31011180734733995, "grad_norm": 1.3132363058693837, "learning_rate": 2.6143584407822848e-05, "loss": 0.657, "step": 2524 }, { "epoch": 0.31023467256419707, "grad_norm": 1.7142676593143533, "learning_rate": 2.6139276959683254e-05, "loss": 0.7109, "step": 2525 }, { "epoch": 0.3103575377810542, "grad_norm": 1.4543096618175497, "learning_rate": 2.6134967462545908e-05, "loss": 0.7207, "step": 2526 }, { "epoch": 0.3104804029979113, "grad_norm": 1.4348026856164229, "learning_rate": 2.6130655917203512e-05, "loss": 0.7111, "step": 2527 }, { "epoch": 0.3106032682147684, "grad_norm": 1.6185329391280219, "learning_rate": 2.6126342324449142e-05, "loss": 0.6284, "step": 2528 }, { "epoch": 0.31072613343162553, "grad_norm": 1.2772076956547773, "learning_rate": 2.6122026685076256e-05, "loss": 0.6835, "step": 2529 }, { "epoch": 0.3108489986484826, "grad_norm": 1.5443823181694118, "learning_rate": 2.6117708999878695e-05, "loss": 0.718, "step": 2530 }, { "epoch": 0.3109718638653397, "grad_norm": 1.6278603059521664, "learning_rate": 2.611338926965066e-05, "loss": 0.7446, "step": 2531 }, { "epoch": 0.3110947290821968, "grad_norm": 1.4029653716314316, "learning_rate": 2.6109067495186747e-05, "loss": 0.7183, "step": 2532 }, { "epoch": 0.31121759429905393, "grad_norm": 1.1942365271575093, "learning_rate": 2.6104743677281912e-05, "loss": 0.5555, "step": 2533 }, { "epoch": 0.31134045951591105, "grad_norm": 1.3040379994176885, "learning_rate": 2.610041781673149e-05, "loss": 0.6809, "step": 2534 }, { "epoch": 0.31146332473276817, "grad_norm": 1.226468097490837, "learning_rate": 2.60960899143312e-05, "loss": 0.6494, "step": 2535 }, { "epoch": 0.3115861899496253, "grad_norm": 1.3356276346099825, "learning_rate": 2.6091759970877134e-05, "loss": 0.6388, "step": 2536 }, { "epoch": 0.3117090551664824, "grad_norm": 1.2277314594635969, "learning_rate": 2.6087427987165754e-05, "loss": 0.6733, "step": 2537 }, { "epoch": 0.31183192038333946, "grad_norm": 1.7859210985388885, "learning_rate": 2.6083093963993898e-05, "loss": 0.743, "step": 2538 }, { "epoch": 0.31195478560019657, "grad_norm": 1.2798944498099116, "learning_rate": 2.6078757902158784e-05, "loss": 0.5982, "step": 2539 }, { "epoch": 0.3120776508170537, "grad_norm": 1.6188265327407387, "learning_rate": 2.6074419802458002e-05, "loss": 0.6475, "step": 2540 }, { "epoch": 0.3122005160339108, "grad_norm": 1.2444869239351086, "learning_rate": 2.6070079665689518e-05, "loss": 0.6599, "step": 2541 }, { "epoch": 0.3123233812507679, "grad_norm": 1.4173631620479035, "learning_rate": 2.6065737492651677e-05, "loss": 0.556, "step": 2542 }, { "epoch": 0.31244624646762503, "grad_norm": 1.3523053100461713, "learning_rate": 2.606139328414318e-05, "loss": 0.667, "step": 2543 }, { "epoch": 0.31256911168448215, "grad_norm": 1.3674646951596976, "learning_rate": 2.6057047040963127e-05, "loss": 0.7741, "step": 2544 }, { "epoch": 0.3126919769013392, "grad_norm": 1.1879789892668655, "learning_rate": 2.605269876391098e-05, "loss": 0.5451, "step": 2545 }, { "epoch": 0.3128148421181963, "grad_norm": 1.3205740270632151, "learning_rate": 2.6048348453786576e-05, "loss": 0.7046, "step": 2546 }, { "epoch": 0.31293770733505344, "grad_norm": 1.3827041541426788, "learning_rate": 2.604399611139012e-05, "loss": 0.6505, "step": 2547 }, { "epoch": 0.31306057255191055, "grad_norm": 1.187935203296127, "learning_rate": 2.60396417375222e-05, "loss": 0.5001, "step": 2548 }, { "epoch": 0.31318343776876767, "grad_norm": 1.2683024100457911, "learning_rate": 2.6035285332983783e-05, "loss": 0.6129, "step": 2549 }, { "epoch": 0.3133063029856248, "grad_norm": 1.2801065416233088, "learning_rate": 2.6030926898576196e-05, "loss": 0.5747, "step": 2550 }, { "epoch": 0.3134291682024819, "grad_norm": 1.5012068352588868, "learning_rate": 2.6026566435101143e-05, "loss": 0.6733, "step": 2551 }, { "epoch": 0.31355203341933896, "grad_norm": 1.1059099916301751, "learning_rate": 2.60222039433607e-05, "loss": 0.6212, "step": 2552 }, { "epoch": 0.3136748986361961, "grad_norm": 1.4283691349842955, "learning_rate": 2.6017839424157322e-05, "loss": 0.6212, "step": 2553 }, { "epoch": 0.3137977638530532, "grad_norm": 1.490641846209876, "learning_rate": 2.601347287829384e-05, "loss": 0.7474, "step": 2554 }, { "epoch": 0.3139206290699103, "grad_norm": 1.6498064978450815, "learning_rate": 2.6009104306573447e-05, "loss": 0.6375, "step": 2555 }, { "epoch": 0.3140434942867674, "grad_norm": 1.5501671439934752, "learning_rate": 2.600473370979971e-05, "loss": 0.6318, "step": 2556 }, { "epoch": 0.31416635950362454, "grad_norm": 1.3357930680220989, "learning_rate": 2.600036108877658e-05, "loss": 0.5443, "step": 2557 }, { "epoch": 0.31428922472048165, "grad_norm": 1.1034052001652728, "learning_rate": 2.5995986444308366e-05, "loss": 0.5908, "step": 2558 }, { "epoch": 0.3144120899373387, "grad_norm": 1.4559047460943224, "learning_rate": 2.5991609777199755e-05, "loss": 0.6918, "step": 2559 }, { "epoch": 0.3145349551541958, "grad_norm": 1.3882494862365227, "learning_rate": 2.5987231088255807e-05, "loss": 0.5873, "step": 2560 }, { "epoch": 0.31465782037105294, "grad_norm": 1.6013406223009476, "learning_rate": 2.598285037828196e-05, "loss": 0.5888, "step": 2561 }, { "epoch": 0.31478068558791006, "grad_norm": 1.365953475128749, "learning_rate": 2.5978467648084012e-05, "loss": 0.6063, "step": 2562 }, { "epoch": 0.3149035508047672, "grad_norm": 1.244561415772206, "learning_rate": 2.5974082898468135e-05, "loss": 0.6508, "step": 2563 }, { "epoch": 0.3150264160216243, "grad_norm": 1.5446476507843223, "learning_rate": 2.5969696130240876e-05, "loss": 0.7349, "step": 2564 }, { "epoch": 0.3151492812384814, "grad_norm": 1.4555865289214887, "learning_rate": 2.596530734420916e-05, "loss": 0.6362, "step": 2565 }, { "epoch": 0.3152721464553385, "grad_norm": 1.4698464494128383, "learning_rate": 2.596091654118027e-05, "loss": 0.6475, "step": 2566 }, { "epoch": 0.3153950116721956, "grad_norm": 1.2105702552277666, "learning_rate": 2.5956523721961866e-05, "loss": 0.5492, "step": 2567 }, { "epoch": 0.3155178768890527, "grad_norm": 1.5139621919074298, "learning_rate": 2.5952128887361977e-05, "loss": 0.5953, "step": 2568 }, { "epoch": 0.3156407421059098, "grad_norm": 1.376337368112041, "learning_rate": 2.5947732038189005e-05, "loss": 0.665, "step": 2569 }, { "epoch": 0.3157636073227669, "grad_norm": 1.250419532897594, "learning_rate": 2.5943333175251723e-05, "loss": 0.5404, "step": 2570 }, { "epoch": 0.31588647253962404, "grad_norm": 1.370229977969879, "learning_rate": 2.5938932299359276e-05, "loss": 0.5484, "step": 2571 }, { "epoch": 0.31600933775648116, "grad_norm": 1.29867804103967, "learning_rate": 2.5934529411321174e-05, "loss": 0.7878, "step": 2572 }, { "epoch": 0.31613220297333827, "grad_norm": 1.3677243117750901, "learning_rate": 2.59301245119473e-05, "loss": 0.7092, "step": 2573 }, { "epoch": 0.31625506819019533, "grad_norm": 1.1637274135340092, "learning_rate": 2.5925717602047903e-05, "loss": 0.6135, "step": 2574 }, { "epoch": 0.31637793340705245, "grad_norm": 1.298217335672922, "learning_rate": 2.5921308682433613e-05, "loss": 0.618, "step": 2575 }, { "epoch": 0.31650079862390956, "grad_norm": 1.3597695491733306, "learning_rate": 2.5916897753915415e-05, "loss": 0.651, "step": 2576 }, { "epoch": 0.3166236638407667, "grad_norm": 1.214403160311376, "learning_rate": 2.5912484817304675e-05, "loss": 0.6438, "step": 2577 }, { "epoch": 0.3167465290576238, "grad_norm": 1.3332037720094896, "learning_rate": 2.5908069873413123e-05, "loss": 0.7253, "step": 2578 }, { "epoch": 0.3168693942744809, "grad_norm": 1.1781021038172923, "learning_rate": 2.590365292305286e-05, "loss": 0.6703, "step": 2579 }, { "epoch": 0.316992259491338, "grad_norm": 1.2373537921675537, "learning_rate": 2.589923396703635e-05, "loss": 0.6871, "step": 2580 }, { "epoch": 0.3171151247081951, "grad_norm": 1.458256984028032, "learning_rate": 2.5894813006176443e-05, "loss": 0.6507, "step": 2581 }, { "epoch": 0.3172379899250522, "grad_norm": 1.1502066692457085, "learning_rate": 2.5890390041286335e-05, "loss": 0.6551, "step": 2582 }, { "epoch": 0.3173608551419093, "grad_norm": 1.4041446859293654, "learning_rate": 2.5885965073179605e-05, "loss": 0.5989, "step": 2583 }, { "epoch": 0.31748372035876643, "grad_norm": 1.2869795160531898, "learning_rate": 2.58815381026702e-05, "loss": 0.6641, "step": 2584 }, { "epoch": 0.31760658557562355, "grad_norm": 1.3907326018079387, "learning_rate": 2.5877109130572427e-05, "loss": 0.5665, "step": 2585 }, { "epoch": 0.31772945079248066, "grad_norm": 1.2350372873818287, "learning_rate": 2.587267815770097e-05, "loss": 0.672, "step": 2586 }, { "epoch": 0.3178523160093378, "grad_norm": 1.248320314920715, "learning_rate": 2.586824518487088e-05, "loss": 0.7338, "step": 2587 }, { "epoch": 0.3179751812261949, "grad_norm": 1.4369785814691036, "learning_rate": 2.586381021289757e-05, "loss": 0.6268, "step": 2588 }, { "epoch": 0.31809804644305195, "grad_norm": 1.302923672839092, "learning_rate": 2.5859373242596827e-05, "loss": 0.5552, "step": 2589 }, { "epoch": 0.31822091165990907, "grad_norm": 1.2109691119657817, "learning_rate": 2.58549342747848e-05, "loss": 0.6076, "step": 2590 }, { "epoch": 0.3183437768767662, "grad_norm": 1.418565842140509, "learning_rate": 2.585049331027801e-05, "loss": 0.646, "step": 2591 }, { "epoch": 0.3184666420936233, "grad_norm": 1.520246966610575, "learning_rate": 2.5846050349893345e-05, "loss": 0.623, "step": 2592 }, { "epoch": 0.3185895073104804, "grad_norm": 1.512292729014408, "learning_rate": 2.584160539444806e-05, "loss": 0.6445, "step": 2593 }, { "epoch": 0.31871237252733753, "grad_norm": 1.146667164858264, "learning_rate": 2.5837158444759764e-05, "loss": 0.6671, "step": 2594 }, { "epoch": 0.31883523774419464, "grad_norm": 1.251373086606052, "learning_rate": 2.583270950164646e-05, "loss": 0.6246, "step": 2595 }, { "epoch": 0.3189581029610517, "grad_norm": 1.2822615137428304, "learning_rate": 2.5828258565926497e-05, "loss": 0.6483, "step": 2596 }, { "epoch": 0.3190809681779088, "grad_norm": 1.3793251900759342, "learning_rate": 2.582380563841859e-05, "loss": 0.6412, "step": 2597 }, { "epoch": 0.31920383339476593, "grad_norm": 1.401578369021944, "learning_rate": 2.5819350719941836e-05, "loss": 0.7121, "step": 2598 }, { "epoch": 0.31932669861162305, "grad_norm": 1.4071848815983876, "learning_rate": 2.5814893811315675e-05, "loss": 0.6147, "step": 2599 }, { "epoch": 0.31944956382848017, "grad_norm": 1.400233448306146, "learning_rate": 2.5810434913359943e-05, "loss": 0.6179, "step": 2600 }, { "epoch": 0.3195724290453373, "grad_norm": 1.1959140906101628, "learning_rate": 2.580597402689481e-05, "loss": 0.6302, "step": 2601 }, { "epoch": 0.3196952942621944, "grad_norm": 1.4347986175401308, "learning_rate": 2.5801511152740837e-05, "loss": 0.6266, "step": 2602 }, { "epoch": 0.31981815947905146, "grad_norm": 1.550561011660293, "learning_rate": 2.5797046291718943e-05, "loss": 0.6967, "step": 2603 }, { "epoch": 0.31994102469590857, "grad_norm": 1.6490815814270972, "learning_rate": 2.57925794446504e-05, "loss": 0.7436, "step": 2604 }, { "epoch": 0.3200638899127657, "grad_norm": 1.166096420517154, "learning_rate": 2.578811061235686e-05, "loss": 0.5972, "step": 2605 }, { "epoch": 0.3201867551296228, "grad_norm": 1.6985292587802272, "learning_rate": 2.5783639795660333e-05, "loss": 0.6917, "step": 2606 }, { "epoch": 0.3203096203464799, "grad_norm": 1.5096066459399982, "learning_rate": 2.57791669953832e-05, "loss": 0.7676, "step": 2607 }, { "epoch": 0.32043248556333703, "grad_norm": 1.6707534920991851, "learning_rate": 2.577469221234821e-05, "loss": 0.6244, "step": 2608 }, { "epoch": 0.32055535078019415, "grad_norm": 1.6566903487379576, "learning_rate": 2.5770215447378463e-05, "loss": 0.801, "step": 2609 }, { "epoch": 0.3206782159970512, "grad_norm": 1.4237559103569994, "learning_rate": 2.5765736701297427e-05, "loss": 0.6654, "step": 2610 }, { "epoch": 0.3208010812139083, "grad_norm": 1.3674943012861098, "learning_rate": 2.576125597492895e-05, "loss": 0.6859, "step": 2611 }, { "epoch": 0.32092394643076544, "grad_norm": 1.269773735327447, "learning_rate": 2.5756773269097217e-05, "loss": 0.5755, "step": 2612 }, { "epoch": 0.32104681164762255, "grad_norm": 1.5633598131928748, "learning_rate": 2.5752288584626807e-05, "loss": 0.6617, "step": 2613 }, { "epoch": 0.32116967686447967, "grad_norm": 1.4245475468319873, "learning_rate": 2.574780192234264e-05, "loss": 0.7261, "step": 2614 }, { "epoch": 0.3212925420813368, "grad_norm": 1.6144095494920554, "learning_rate": 2.5743313283070015e-05, "loss": 0.5976, "step": 2615 }, { "epoch": 0.3214154072981939, "grad_norm": 1.3085805707846523, "learning_rate": 2.573882266763458e-05, "loss": 0.6626, "step": 2616 }, { "epoch": 0.321538272515051, "grad_norm": 1.5154457376959152, "learning_rate": 2.573433007686236e-05, "loss": 0.6931, "step": 2617 }, { "epoch": 0.3216611377319081, "grad_norm": 1.4225351102347, "learning_rate": 2.572983551157974e-05, "loss": 0.5263, "step": 2618 }, { "epoch": 0.3217840029487652, "grad_norm": 1.2047673939135632, "learning_rate": 2.572533897261346e-05, "loss": 0.5607, "step": 2619 }, { "epoch": 0.3219068681656223, "grad_norm": 1.1246564674742145, "learning_rate": 2.5720840460790635e-05, "loss": 0.6805, "step": 2620 }, { "epoch": 0.3220297333824794, "grad_norm": 1.6353367158565286, "learning_rate": 2.571633997693873e-05, "loss": 0.612, "step": 2621 }, { "epoch": 0.32215259859933654, "grad_norm": 1.5130014569591679, "learning_rate": 2.571183752188559e-05, "loss": 0.6231, "step": 2622 }, { "epoch": 0.32227546381619365, "grad_norm": 1.3667676938426772, "learning_rate": 2.57073330964594e-05, "loss": 0.6656, "step": 2623 }, { "epoch": 0.32239832903305077, "grad_norm": 1.3229940266448188, "learning_rate": 2.5702826701488735e-05, "loss": 0.7438, "step": 2624 }, { "epoch": 0.3225211942499078, "grad_norm": 1.4347702153060102, "learning_rate": 2.56983183378025e-05, "loss": 0.7259, "step": 2625 }, { "epoch": 0.32264405946676494, "grad_norm": 2.2042327438534524, "learning_rate": 2.5693808006229988e-05, "loss": 0.7014, "step": 2626 }, { "epoch": 0.32276692468362206, "grad_norm": 1.3379183537386428, "learning_rate": 2.5689295707600853e-05, "loss": 0.5754, "step": 2627 }, { "epoch": 0.3228897899004792, "grad_norm": 1.104038076891047, "learning_rate": 2.568478144274509e-05, "loss": 0.6649, "step": 2628 }, { "epoch": 0.3230126551173363, "grad_norm": 1.2326051771159159, "learning_rate": 2.568026521249307e-05, "loss": 0.7723, "step": 2629 }, { "epoch": 0.3231355203341934, "grad_norm": 1.4401760104379724, "learning_rate": 2.5675747017675535e-05, "loss": 0.6467, "step": 2630 }, { "epoch": 0.3232583855510505, "grad_norm": 1.4325110418303435, "learning_rate": 2.5671226859123567e-05, "loss": 0.7172, "step": 2631 }, { "epoch": 0.3233812507679076, "grad_norm": 1.2460257175717617, "learning_rate": 2.5666704737668627e-05, "loss": 0.6924, "step": 2632 }, { "epoch": 0.3235041159847647, "grad_norm": 1.1567462888893196, "learning_rate": 2.5662180654142523e-05, "loss": 0.5933, "step": 2633 }, { "epoch": 0.3236269812016218, "grad_norm": 1.2360458694802836, "learning_rate": 2.5657654609377438e-05, "loss": 0.6713, "step": 2634 }, { "epoch": 0.3237498464184789, "grad_norm": 1.456236450735909, "learning_rate": 2.56531266042059e-05, "loss": 0.6664, "step": 2635 }, { "epoch": 0.32387271163533604, "grad_norm": 1.3438996397427874, "learning_rate": 2.564859663946081e-05, "loss": 0.6311, "step": 2636 }, { "epoch": 0.32399557685219316, "grad_norm": 1.640426117356657, "learning_rate": 2.564406471597543e-05, "loss": 0.6873, "step": 2637 }, { "epoch": 0.32411844206905027, "grad_norm": 1.3631561818408349, "learning_rate": 2.563953083458338e-05, "loss": 0.5908, "step": 2638 }, { "epoch": 0.32424130728590733, "grad_norm": 1.7492913635085148, "learning_rate": 2.5634994996118625e-05, "loss": 0.5763, "step": 2639 }, { "epoch": 0.32436417250276445, "grad_norm": 1.3650085867199575, "learning_rate": 2.563045720141551e-05, "loss": 0.8255, "step": 2640 }, { "epoch": 0.32448703771962156, "grad_norm": 1.3667143011918912, "learning_rate": 2.562591745130874e-05, "loss": 0.6195, "step": 2641 }, { "epoch": 0.3246099029364787, "grad_norm": 1.3464031111411778, "learning_rate": 2.5621375746633363e-05, "loss": 0.6702, "step": 2642 }, { "epoch": 0.3247327681533358, "grad_norm": 1.3431064944452729, "learning_rate": 2.56168320882248e-05, "loss": 0.6261, "step": 2643 }, { "epoch": 0.3248556333701929, "grad_norm": 1.4975466565509967, "learning_rate": 2.561228647691883e-05, "loss": 0.6689, "step": 2644 }, { "epoch": 0.32497849858705, "grad_norm": 1.4209372711633905, "learning_rate": 2.560773891355158e-05, "loss": 0.7505, "step": 2645 }, { "epoch": 0.32510136380390714, "grad_norm": 1.330257862460566, "learning_rate": 2.5603189398959554e-05, "loss": 0.7212, "step": 2646 }, { "epoch": 0.3252242290207642, "grad_norm": 1.251823445855228, "learning_rate": 2.55986379339796e-05, "loss": 0.5255, "step": 2647 }, { "epoch": 0.3253470942376213, "grad_norm": 1.23395845228261, "learning_rate": 2.5594084519448934e-05, "loss": 0.5615, "step": 2648 }, { "epoch": 0.32546995945447843, "grad_norm": 1.2430184554090695, "learning_rate": 2.5589529156205126e-05, "loss": 0.6128, "step": 2649 }, { "epoch": 0.32559282467133555, "grad_norm": 1.3269565551861917, "learning_rate": 2.5584971845086107e-05, "loss": 0.6181, "step": 2650 }, { "epoch": 0.32571568988819266, "grad_norm": 1.197794710707406, "learning_rate": 2.558041258693016e-05, "loss": 0.5411, "step": 2651 }, { "epoch": 0.3258385551050498, "grad_norm": 1.7487812007064714, "learning_rate": 2.5575851382575935e-05, "loss": 0.6968, "step": 2652 }, { "epoch": 0.3259614203219069, "grad_norm": 1.2618777226109428, "learning_rate": 2.5571288232862433e-05, "loss": 0.5781, "step": 2653 }, { "epoch": 0.32608428553876395, "grad_norm": 1.3879980676624495, "learning_rate": 2.556672313862902e-05, "loss": 0.6464, "step": 2654 }, { "epoch": 0.32620715075562107, "grad_norm": 1.6431412314944789, "learning_rate": 2.556215610071541e-05, "loss": 0.7289, "step": 2655 }, { "epoch": 0.3263300159724782, "grad_norm": 1.3774703472867555, "learning_rate": 2.555758711996169e-05, "loss": 0.7313, "step": 2656 }, { "epoch": 0.3264528811893353, "grad_norm": 1.3691435047088343, "learning_rate": 2.5553016197208282e-05, "loss": 0.7864, "step": 2657 }, { "epoch": 0.3265757464061924, "grad_norm": 1.5203732616078796, "learning_rate": 2.5548443333295984e-05, "loss": 0.6866, "step": 2658 }, { "epoch": 0.32669861162304953, "grad_norm": 1.3141009154740049, "learning_rate": 2.5543868529065944e-05, "loss": 0.5628, "step": 2659 }, { "epoch": 0.32682147683990664, "grad_norm": 1.5009999433405659, "learning_rate": 2.5539291785359672e-05, "loss": 0.676, "step": 2660 }, { "epoch": 0.3269443420567637, "grad_norm": 1.5396464659175015, "learning_rate": 2.553471310301902e-05, "loss": 0.6434, "step": 2661 }, { "epoch": 0.3270672072736208, "grad_norm": 1.375997399182245, "learning_rate": 2.5530132482886215e-05, "loss": 0.6876, "step": 2662 }, { "epoch": 0.32719007249047793, "grad_norm": 1.3073619411919035, "learning_rate": 2.552554992580383e-05, "loss": 0.6325, "step": 2663 }, { "epoch": 0.32731293770733505, "grad_norm": 1.491340639576816, "learning_rate": 2.55209654326148e-05, "loss": 0.7321, "step": 2664 }, { "epoch": 0.32743580292419217, "grad_norm": 1.2801826898829267, "learning_rate": 2.5516379004162402e-05, "loss": 0.5597, "step": 2665 }, { "epoch": 0.3275586681410493, "grad_norm": 1.4700304633000156, "learning_rate": 2.5511790641290292e-05, "loss": 0.639, "step": 2666 }, { "epoch": 0.3276815333579064, "grad_norm": 1.2378453323075547, "learning_rate": 2.5507200344842466e-05, "loss": 0.5537, "step": 2667 }, { "epoch": 0.3278043985747635, "grad_norm": 1.3084044018497565, "learning_rate": 2.5502608115663275e-05, "loss": 0.5727, "step": 2668 }, { "epoch": 0.32792726379162057, "grad_norm": 1.3122542979756189, "learning_rate": 2.5498013954597435e-05, "loss": 0.5632, "step": 2669 }, { "epoch": 0.3280501290084777, "grad_norm": 1.3338882694787164, "learning_rate": 2.5493417862490013e-05, "loss": 0.5219, "step": 2670 }, { "epoch": 0.3281729942253348, "grad_norm": 1.26135798450435, "learning_rate": 2.548881984018642e-05, "loss": 0.6892, "step": 2671 }, { "epoch": 0.3282958594421919, "grad_norm": 1.3744655841250497, "learning_rate": 2.5484219888532443e-05, "loss": 0.5929, "step": 2672 }, { "epoch": 0.32841872465904903, "grad_norm": 1.3487172955431832, "learning_rate": 2.547961800837421e-05, "loss": 0.6916, "step": 2673 }, { "epoch": 0.32854158987590615, "grad_norm": 1.3231541444458397, "learning_rate": 2.547501420055821e-05, "loss": 0.7199, "step": 2674 }, { "epoch": 0.32866445509276326, "grad_norm": 1.2799948731013884, "learning_rate": 2.5470408465931277e-05, "loss": 0.6268, "step": 2675 }, { "epoch": 0.3287873203096203, "grad_norm": 1.5946570981689958, "learning_rate": 2.5465800805340613e-05, "loss": 0.6189, "step": 2676 }, { "epoch": 0.32891018552647744, "grad_norm": 1.3423578702699086, "learning_rate": 2.546119121963376e-05, "loss": 0.5986, "step": 2677 }, { "epoch": 0.32903305074333455, "grad_norm": 1.2223057524868068, "learning_rate": 2.5456579709658632e-05, "loss": 0.5736, "step": 2678 }, { "epoch": 0.32915591596019167, "grad_norm": 1.4838679174060914, "learning_rate": 2.5451966276263472e-05, "loss": 0.6286, "step": 2679 }, { "epoch": 0.3292787811770488, "grad_norm": 1.3179843553670556, "learning_rate": 2.5447350920296902e-05, "loss": 0.6856, "step": 2680 }, { "epoch": 0.3294016463939059, "grad_norm": 1.5657305872870284, "learning_rate": 2.5442733642607888e-05, "loss": 0.6464, "step": 2681 }, { "epoch": 0.329524511610763, "grad_norm": 1.2481930659503457, "learning_rate": 2.5438114444045738e-05, "loss": 0.6587, "step": 2682 }, { "epoch": 0.3296473768276201, "grad_norm": 1.2656631835055256, "learning_rate": 2.543349332546013e-05, "loss": 0.5842, "step": 2683 }, { "epoch": 0.3297702420444772, "grad_norm": 1.3813778455726833, "learning_rate": 2.5428870287701088e-05, "loss": 0.7175, "step": 2684 }, { "epoch": 0.3298931072613343, "grad_norm": 1.4680904203293106, "learning_rate": 2.5424245331618992e-05, "loss": 0.5517, "step": 2685 }, { "epoch": 0.3300159724781914, "grad_norm": 1.1455593623385454, "learning_rate": 2.541961845806457e-05, "loss": 0.5961, "step": 2686 }, { "epoch": 0.33013883769504854, "grad_norm": 1.5395167741825477, "learning_rate": 2.541498966788891e-05, "loss": 0.7804, "step": 2687 }, { "epoch": 0.33026170291190565, "grad_norm": 1.2925538029626353, "learning_rate": 2.541035896194344e-05, "loss": 0.6675, "step": 2688 }, { "epoch": 0.33038456812876277, "grad_norm": 1.2209973428780587, "learning_rate": 2.5405726341079955e-05, "loss": 0.5646, "step": 2689 }, { "epoch": 0.3305074333456198, "grad_norm": 1.575950000734113, "learning_rate": 2.540109180615059e-05, "loss": 0.5994, "step": 2690 }, { "epoch": 0.33063029856247694, "grad_norm": 1.5933673430400759, "learning_rate": 2.5396455358007843e-05, "loss": 0.6644, "step": 2691 }, { "epoch": 0.33075316377933406, "grad_norm": 1.311588283744037, "learning_rate": 2.5391816997504552e-05, "loss": 0.6193, "step": 2692 }, { "epoch": 0.3308760289961912, "grad_norm": 1.3308232294119642, "learning_rate": 2.5387176725493922e-05, "loss": 0.6086, "step": 2693 }, { "epoch": 0.3309988942130483, "grad_norm": 1.5444151986439603, "learning_rate": 2.5382534542829497e-05, "loss": 0.6461, "step": 2694 }, { "epoch": 0.3311217594299054, "grad_norm": 1.2453342570047887, "learning_rate": 2.5377890450365174e-05, "loss": 0.7758, "step": 2695 }, { "epoch": 0.3312446246467625, "grad_norm": 1.4100841748747668, "learning_rate": 2.5373244448955207e-05, "loss": 0.6867, "step": 2696 }, { "epoch": 0.33136748986361964, "grad_norm": 1.333463795517214, "learning_rate": 2.5368596539454195e-05, "loss": 0.6467, "step": 2697 }, { "epoch": 0.3314903550804767, "grad_norm": 1.4209955828809815, "learning_rate": 2.536394672271709e-05, "loss": 0.6587, "step": 2698 }, { "epoch": 0.3316132202973338, "grad_norm": 1.6442228098945157, "learning_rate": 2.5359294999599204e-05, "loss": 0.6933, "step": 2699 }, { "epoch": 0.3317360855141909, "grad_norm": 1.4409559993116852, "learning_rate": 2.5354641370956184e-05, "loss": 0.6646, "step": 2700 }, { "epoch": 0.33185895073104804, "grad_norm": 1.4543784476723773, "learning_rate": 2.5349985837644033e-05, "loss": 0.6004, "step": 2701 }, { "epoch": 0.33198181594790516, "grad_norm": 1.2588493257173354, "learning_rate": 2.5345328400519112e-05, "loss": 0.6592, "step": 2702 }, { "epoch": 0.33210468116476227, "grad_norm": 1.1902975262865394, "learning_rate": 2.534066906043812e-05, "loss": 0.6165, "step": 2703 }, { "epoch": 0.3322275463816194, "grad_norm": 1.1191003799348873, "learning_rate": 2.533600781825812e-05, "loss": 0.7186, "step": 2704 }, { "epoch": 0.33235041159847645, "grad_norm": 1.608572413844205, "learning_rate": 2.533134467483651e-05, "loss": 0.7429, "step": 2705 }, { "epoch": 0.33247327681533356, "grad_norm": 1.5401357608432849, "learning_rate": 2.532667963103105e-05, "loss": 0.6945, "step": 2706 }, { "epoch": 0.3325961420321907, "grad_norm": 1.5053801691974358, "learning_rate": 2.532201268769984e-05, "loss": 0.5584, "step": 2707 }, { "epoch": 0.3327190072490478, "grad_norm": 1.5834434583335115, "learning_rate": 2.531734384570134e-05, "loss": 0.7287, "step": 2708 }, { "epoch": 0.3328418724659049, "grad_norm": 1.5481727048817362, "learning_rate": 2.5312673105894347e-05, "loss": 0.5778, "step": 2709 }, { "epoch": 0.332964737682762, "grad_norm": 1.2860392580619513, "learning_rate": 2.530800046913802e-05, "loss": 0.5164, "step": 2710 }, { "epoch": 0.33308760289961914, "grad_norm": 1.3512751264741225, "learning_rate": 2.5303325936291853e-05, "loss": 0.6541, "step": 2711 }, { "epoch": 0.3332104681164762, "grad_norm": 1.272965541221011, "learning_rate": 2.5298649508215702e-05, "loss": 0.5421, "step": 2712 }, { "epoch": 0.3333333333333333, "grad_norm": 1.388388404583162, "learning_rate": 2.529397118576976e-05, "loss": 0.6003, "step": 2713 }, { "epoch": 0.33345619855019043, "grad_norm": 1.2818228689222484, "learning_rate": 2.5289290969814582e-05, "loss": 0.6775, "step": 2714 }, { "epoch": 0.33357906376704755, "grad_norm": 1.4209528049678357, "learning_rate": 2.5284608861211053e-05, "loss": 0.7344, "step": 2715 }, { "epoch": 0.33370192898390466, "grad_norm": 1.3136830686068734, "learning_rate": 2.527992486082042e-05, "loss": 0.6331, "step": 2716 }, { "epoch": 0.3338247942007618, "grad_norm": 1.3335549633808954, "learning_rate": 2.5275238969504288e-05, "loss": 0.5645, "step": 2717 }, { "epoch": 0.3339476594176189, "grad_norm": 1.4891249762505194, "learning_rate": 2.5270551188124572e-05, "loss": 0.6508, "step": 2718 }, { "epoch": 0.334070524634476, "grad_norm": 1.3969083060238623, "learning_rate": 2.526586151754358e-05, "loss": 0.726, "step": 2719 }, { "epoch": 0.33419338985133307, "grad_norm": 1.297573137597818, "learning_rate": 2.5261169958623937e-05, "loss": 0.6166, "step": 2720 }, { "epoch": 0.3343162550681902, "grad_norm": 1.5822075367930928, "learning_rate": 2.5256476512228625e-05, "loss": 0.6672, "step": 2721 }, { "epoch": 0.3344391202850473, "grad_norm": 1.279749283220469, "learning_rate": 2.5251781179220973e-05, "loss": 0.6018, "step": 2722 }, { "epoch": 0.3345619855019044, "grad_norm": 1.5204670722317952, "learning_rate": 2.524708396046466e-05, "loss": 0.7262, "step": 2723 }, { "epoch": 0.33468485071876153, "grad_norm": 1.1414499242267164, "learning_rate": 2.5242384856823703e-05, "loss": 0.7091, "step": 2724 }, { "epoch": 0.33480771593561864, "grad_norm": 1.3445900356264866, "learning_rate": 2.523768386916248e-05, "loss": 0.5157, "step": 2725 }, { "epoch": 0.33493058115247576, "grad_norm": 1.1917871966739255, "learning_rate": 2.5232980998345702e-05, "loss": 0.6323, "step": 2726 }, { "epoch": 0.3350534463693328, "grad_norm": 1.3056607806536895, "learning_rate": 2.522827624523844e-05, "loss": 0.6727, "step": 2727 }, { "epoch": 0.33517631158618993, "grad_norm": 1.4234768935791653, "learning_rate": 2.522356961070608e-05, "loss": 0.662, "step": 2728 }, { "epoch": 0.33529917680304705, "grad_norm": 1.2937528259628377, "learning_rate": 2.5218861095614404e-05, "loss": 0.6049, "step": 2729 }, { "epoch": 0.33542204201990417, "grad_norm": 1.2546859420856482, "learning_rate": 2.5214150700829497e-05, "loss": 0.5489, "step": 2730 }, { "epoch": 0.3355449072367613, "grad_norm": 1.3218681532014407, "learning_rate": 2.520943842721781e-05, "loss": 0.6688, "step": 2731 }, { "epoch": 0.3356677724536184, "grad_norm": 1.4158680349182347, "learning_rate": 2.5204724275646132e-05, "loss": 0.689, "step": 2732 }, { "epoch": 0.3357906376704755, "grad_norm": 1.5396225500608862, "learning_rate": 2.5200008246981612e-05, "loss": 0.551, "step": 2733 }, { "epoch": 0.33591350288733257, "grad_norm": 1.8526086257114844, "learning_rate": 2.5195290342091717e-05, "loss": 0.6192, "step": 2734 }, { "epoch": 0.3360363681041897, "grad_norm": 1.286306052313637, "learning_rate": 2.5190570561844283e-05, "loss": 0.6217, "step": 2735 }, { "epoch": 0.3361592333210468, "grad_norm": 1.3958622013242172, "learning_rate": 2.5185848907107485e-05, "loss": 0.6642, "step": 2736 }, { "epoch": 0.3362820985379039, "grad_norm": 1.543902295533319, "learning_rate": 2.5181125378749834e-05, "loss": 0.6359, "step": 2737 }, { "epoch": 0.33640496375476103, "grad_norm": 1.2221201211218737, "learning_rate": 2.5176399977640202e-05, "loss": 0.6478, "step": 2738 }, { "epoch": 0.33652782897161815, "grad_norm": 1.3325713769700507, "learning_rate": 2.5171672704647785e-05, "loss": 0.6282, "step": 2739 }, { "epoch": 0.33665069418847526, "grad_norm": 1.4765669065240266, "learning_rate": 2.5166943560642145e-05, "loss": 0.5777, "step": 2740 }, { "epoch": 0.3367735594053323, "grad_norm": 1.3987573950506902, "learning_rate": 2.5162212546493166e-05, "loss": 0.5671, "step": 2741 }, { "epoch": 0.33689642462218944, "grad_norm": 1.4734856645399652, "learning_rate": 2.5157479663071096e-05, "loss": 0.6656, "step": 2742 }, { "epoch": 0.33701928983904655, "grad_norm": 1.4584227434908958, "learning_rate": 2.5152744911246516e-05, "loss": 0.6472, "step": 2743 }, { "epoch": 0.33714215505590367, "grad_norm": 1.3409594007849681, "learning_rate": 2.5148008291890358e-05, "loss": 0.6361, "step": 2744 }, { "epoch": 0.3372650202727608, "grad_norm": 1.413283015605535, "learning_rate": 2.5143269805873877e-05, "loss": 0.6315, "step": 2745 }, { "epoch": 0.3373878854896179, "grad_norm": 1.3803259262908205, "learning_rate": 2.5138529454068704e-05, "loss": 0.5988, "step": 2746 }, { "epoch": 0.337510750706475, "grad_norm": 1.6594303520450842, "learning_rate": 2.513378723734678e-05, "loss": 0.6848, "step": 2747 }, { "epoch": 0.33763361592333213, "grad_norm": 1.2449683179012523, "learning_rate": 2.512904315658042e-05, "loss": 0.5339, "step": 2748 }, { "epoch": 0.3377564811401892, "grad_norm": 1.4042392133664494, "learning_rate": 2.5124297212642263e-05, "loss": 0.7336, "step": 2749 }, { "epoch": 0.3378793463570463, "grad_norm": 1.4638731582483036, "learning_rate": 2.511954940640529e-05, "loss": 0.6547, "step": 2750 }, { "epoch": 0.3380022115739034, "grad_norm": 1.229619716744784, "learning_rate": 2.5114799738742827e-05, "loss": 0.5844, "step": 2751 }, { "epoch": 0.33812507679076054, "grad_norm": 1.2747095944222078, "learning_rate": 2.511004821052855e-05, "loss": 0.6961, "step": 2752 }, { "epoch": 0.33824794200761765, "grad_norm": 1.3692451004362243, "learning_rate": 2.5105294822636476e-05, "loss": 0.6996, "step": 2753 }, { "epoch": 0.33837080722447477, "grad_norm": 1.2941705514138502, "learning_rate": 2.510053957594095e-05, "loss": 0.7397, "step": 2754 }, { "epoch": 0.3384936724413319, "grad_norm": 1.4212208518475902, "learning_rate": 2.5095782471316676e-05, "loss": 0.674, "step": 2755 }, { "epoch": 0.33861653765818894, "grad_norm": 1.4556614335168183, "learning_rate": 2.5091023509638688e-05, "loss": 0.5659, "step": 2756 }, { "epoch": 0.33873940287504606, "grad_norm": 1.2751085512651674, "learning_rate": 2.5086262691782366e-05, "loss": 0.6139, "step": 2757 }, { "epoch": 0.3388622680919032, "grad_norm": 1.329054882311604, "learning_rate": 2.5081500018623436e-05, "loss": 0.738, "step": 2758 }, { "epoch": 0.3389851333087603, "grad_norm": 1.2996808598919685, "learning_rate": 2.5076735491037958e-05, "loss": 0.7835, "step": 2759 }, { "epoch": 0.3391079985256174, "grad_norm": 1.349836699553481, "learning_rate": 2.5071969109902334e-05, "loss": 0.5799, "step": 2760 }, { "epoch": 0.3392308637424745, "grad_norm": 1.2155478307122192, "learning_rate": 2.5067200876093316e-05, "loss": 0.6015, "step": 2761 }, { "epoch": 0.33935372895933164, "grad_norm": 1.4036067587199046, "learning_rate": 2.506243079048798e-05, "loss": 0.6481, "step": 2762 }, { "epoch": 0.3394765941761887, "grad_norm": 1.766343039304458, "learning_rate": 2.505765885396376e-05, "loss": 0.6644, "step": 2763 }, { "epoch": 0.3395994593930458, "grad_norm": 1.3903174524425181, "learning_rate": 2.5052885067398423e-05, "loss": 0.6531, "step": 2764 }, { "epoch": 0.3397223246099029, "grad_norm": 1.1540918235132587, "learning_rate": 2.504810943167007e-05, "loss": 0.7156, "step": 2765 }, { "epoch": 0.33984518982676004, "grad_norm": 1.4025129185362368, "learning_rate": 2.5043331947657147e-05, "loss": 0.6622, "step": 2766 }, { "epoch": 0.33996805504361716, "grad_norm": 1.2726252285132713, "learning_rate": 2.503855261623845e-05, "loss": 0.7039, "step": 2767 }, { "epoch": 0.34009092026047427, "grad_norm": 1.4769809037173156, "learning_rate": 2.5033771438293104e-05, "loss": 0.6342, "step": 2768 }, { "epoch": 0.3402137854773314, "grad_norm": 1.528388962909006, "learning_rate": 2.5028988414700573e-05, "loss": 0.6819, "step": 2769 }, { "epoch": 0.3403366506941885, "grad_norm": 1.4600954675953655, "learning_rate": 2.5024203546340657e-05, "loss": 0.7113, "step": 2770 }, { "epoch": 0.34045951591104556, "grad_norm": 1.299534406526137, "learning_rate": 2.5019416834093513e-05, "loss": 0.7765, "step": 2771 }, { "epoch": 0.3405823811279027, "grad_norm": 1.472797811816017, "learning_rate": 2.5014628278839617e-05, "loss": 0.6378, "step": 2772 }, { "epoch": 0.3407052463447598, "grad_norm": 1.3446742762382806, "learning_rate": 2.5009837881459805e-05, "loss": 0.614, "step": 2773 }, { "epoch": 0.3408281115616169, "grad_norm": 1.5341673680966372, "learning_rate": 2.5005045642835223e-05, "loss": 0.6235, "step": 2774 }, { "epoch": 0.340950976778474, "grad_norm": 1.1800236080442934, "learning_rate": 2.5000251563847378e-05, "loss": 0.5598, "step": 2775 }, { "epoch": 0.34107384199533114, "grad_norm": 1.2057060641162696, "learning_rate": 2.4995455645378114e-05, "loss": 0.6298, "step": 2776 }, { "epoch": 0.34119670721218825, "grad_norm": 1.3380564012798153, "learning_rate": 2.499065788830961e-05, "loss": 0.6323, "step": 2777 }, { "epoch": 0.3413195724290453, "grad_norm": 1.220620409791524, "learning_rate": 2.498585829352438e-05, "loss": 0.6799, "step": 2778 }, { "epoch": 0.34144243764590243, "grad_norm": 1.2332615406153689, "learning_rate": 2.498105686190527e-05, "loss": 0.6106, "step": 2779 }, { "epoch": 0.34156530286275955, "grad_norm": 1.186125929055212, "learning_rate": 2.4976253594335485e-05, "loss": 0.5799, "step": 2780 }, { "epoch": 0.34168816807961666, "grad_norm": 1.4226232607385074, "learning_rate": 2.497144849169855e-05, "loss": 0.6987, "step": 2781 }, { "epoch": 0.3418110332964738, "grad_norm": 1.3083254225314636, "learning_rate": 2.4966641554878332e-05, "loss": 0.6338, "step": 2782 }, { "epoch": 0.3419338985133309, "grad_norm": 1.6876847834021844, "learning_rate": 2.4961832784759037e-05, "loss": 0.5879, "step": 2783 }, { "epoch": 0.342056763730188, "grad_norm": 1.48442532949755, "learning_rate": 2.49570221822252e-05, "loss": 0.7045, "step": 2784 }, { "epoch": 0.34217962894704507, "grad_norm": 1.4916725424356772, "learning_rate": 2.4952209748161708e-05, "loss": 0.7509, "step": 2785 }, { "epoch": 0.3423024941639022, "grad_norm": 1.2823394690344974, "learning_rate": 2.494739548345378e-05, "loss": 0.5919, "step": 2786 }, { "epoch": 0.3424253593807593, "grad_norm": 1.41506627426524, "learning_rate": 2.494257938898696e-05, "loss": 0.6549, "step": 2787 }, { "epoch": 0.3425482245976164, "grad_norm": 1.1970963774935819, "learning_rate": 2.4937761465647144e-05, "loss": 0.6614, "step": 2788 }, { "epoch": 0.34267108981447353, "grad_norm": 1.3109448165825661, "learning_rate": 2.4932941714320552e-05, "loss": 0.6936, "step": 2789 }, { "epoch": 0.34279395503133064, "grad_norm": 1.443510754212926, "learning_rate": 2.4928120135893752e-05, "loss": 0.6727, "step": 2790 }, { "epoch": 0.34291682024818776, "grad_norm": 1.402077777923276, "learning_rate": 2.4923296731253635e-05, "loss": 0.5513, "step": 2791 }, { "epoch": 0.3430396854650448, "grad_norm": 1.3347071693240653, "learning_rate": 2.4918471501287447e-05, "loss": 0.6354, "step": 2792 }, { "epoch": 0.34316255068190193, "grad_norm": 1.1506939440778963, "learning_rate": 2.491364444688274e-05, "loss": 0.5777, "step": 2793 }, { "epoch": 0.34328541589875905, "grad_norm": 1.1678002005096746, "learning_rate": 2.4908815568927435e-05, "loss": 0.6605, "step": 2794 }, { "epoch": 0.34340828111561617, "grad_norm": 1.5583626133180346, "learning_rate": 2.4903984868309768e-05, "loss": 0.6601, "step": 2795 }, { "epoch": 0.3435311463324733, "grad_norm": 1.3655131868215489, "learning_rate": 2.489915234591831e-05, "loss": 0.5476, "step": 2796 }, { "epoch": 0.3436540115493304, "grad_norm": 1.1183084303794542, "learning_rate": 2.489431800264198e-05, "loss": 0.566, "step": 2797 }, { "epoch": 0.3437768767661875, "grad_norm": 1.319563121580085, "learning_rate": 2.488948183937002e-05, "loss": 0.606, "step": 2798 }, { "epoch": 0.3438997419830446, "grad_norm": 1.2691685394506749, "learning_rate": 2.4884643856992008e-05, "loss": 0.7269, "step": 2799 }, { "epoch": 0.3440226071999017, "grad_norm": 1.3109649351979273, "learning_rate": 2.4879804056397865e-05, "loss": 0.6612, "step": 2800 }, { "epoch": 0.3441454724167588, "grad_norm": 1.2155566932415305, "learning_rate": 2.4874962438477838e-05, "loss": 0.6405, "step": 2801 }, { "epoch": 0.3442683376336159, "grad_norm": 1.1817827721064367, "learning_rate": 2.487011900412251e-05, "loss": 0.6778, "step": 2802 }, { "epoch": 0.34439120285047303, "grad_norm": 1.5654657202255273, "learning_rate": 2.4865273754222805e-05, "loss": 0.5839, "step": 2803 }, { "epoch": 0.34451406806733015, "grad_norm": 1.3171313231504846, "learning_rate": 2.4860426689669965e-05, "loss": 0.7002, "step": 2804 }, { "epoch": 0.34463693328418726, "grad_norm": 1.2647864512785145, "learning_rate": 2.485557781135559e-05, "loss": 0.6973, "step": 2805 }, { "epoch": 0.3447597985010444, "grad_norm": 1.4402805296448422, "learning_rate": 2.485072712017159e-05, "loss": 0.6464, "step": 2806 }, { "epoch": 0.34488266371790144, "grad_norm": 1.3949273396598187, "learning_rate": 2.4845874617010218e-05, "loss": 0.6053, "step": 2807 }, { "epoch": 0.34500552893475855, "grad_norm": 1.2580850902397493, "learning_rate": 2.4841020302764066e-05, "loss": 0.566, "step": 2808 }, { "epoch": 0.34512839415161567, "grad_norm": 1.3398063403516818, "learning_rate": 2.483616417832605e-05, "loss": 0.6812, "step": 2809 }, { "epoch": 0.3452512593684728, "grad_norm": 1.3316264936952265, "learning_rate": 2.483130624458942e-05, "loss": 0.7361, "step": 2810 }, { "epoch": 0.3453741245853299, "grad_norm": 1.8385969532029396, "learning_rate": 2.4826446502447767e-05, "loss": 0.6795, "step": 2811 }, { "epoch": 0.345496989802187, "grad_norm": 2.019773098001874, "learning_rate": 2.482158495279501e-05, "loss": 0.6371, "step": 2812 }, { "epoch": 0.34561985501904413, "grad_norm": 1.404725207515585, "learning_rate": 2.4816721596525392e-05, "loss": 0.6955, "step": 2813 }, { "epoch": 0.3457427202359012, "grad_norm": 1.2622116461730961, "learning_rate": 2.4811856434533497e-05, "loss": 0.5712, "step": 2814 }, { "epoch": 0.3458655854527583, "grad_norm": 1.3542839252628665, "learning_rate": 2.480698946771425e-05, "loss": 0.6087, "step": 2815 }, { "epoch": 0.3459884506696154, "grad_norm": 1.589661558271548, "learning_rate": 2.4802120696962886e-05, "loss": 0.6571, "step": 2816 }, { "epoch": 0.34611131588647254, "grad_norm": 1.6157299451119107, "learning_rate": 2.4797250123174993e-05, "loss": 0.6621, "step": 2817 }, { "epoch": 0.34623418110332965, "grad_norm": 1.7434374568652977, "learning_rate": 2.479237774724647e-05, "loss": 0.7823, "step": 2818 }, { "epoch": 0.34635704632018677, "grad_norm": 1.2362406618938764, "learning_rate": 2.4787503570073574e-05, "loss": 0.5939, "step": 2819 }, { "epoch": 0.3464799115370439, "grad_norm": 1.4466765788750935, "learning_rate": 2.478262759255287e-05, "loss": 0.6847, "step": 2820 }, { "epoch": 0.34660277675390094, "grad_norm": 1.4761913865046838, "learning_rate": 2.4777749815581258e-05, "loss": 0.5929, "step": 2821 }, { "epoch": 0.34672564197075806, "grad_norm": 1.7937272243190563, "learning_rate": 2.477287024005598e-05, "loss": 0.5303, "step": 2822 }, { "epoch": 0.3468485071876152, "grad_norm": 1.3375731507145783, "learning_rate": 2.4767988866874604e-05, "loss": 0.733, "step": 2823 }, { "epoch": 0.3469713724044723, "grad_norm": 1.3910293593881524, "learning_rate": 2.4763105696935016e-05, "loss": 0.6012, "step": 2824 }, { "epoch": 0.3470942376213294, "grad_norm": 1.1040308914500607, "learning_rate": 2.4758220731135456e-05, "loss": 0.5913, "step": 2825 }, { "epoch": 0.3472171028381865, "grad_norm": 1.2663447998245985, "learning_rate": 2.475333397037448e-05, "loss": 0.6959, "step": 2826 }, { "epoch": 0.34733996805504364, "grad_norm": 1.3619351390825811, "learning_rate": 2.4748445415550964e-05, "loss": 0.66, "step": 2827 }, { "epoch": 0.34746283327190075, "grad_norm": 1.293708058290336, "learning_rate": 2.4743555067564144e-05, "loss": 0.6855, "step": 2828 }, { "epoch": 0.3475856984887578, "grad_norm": 1.425947759738349, "learning_rate": 2.473866292731355e-05, "loss": 0.6866, "step": 2829 }, { "epoch": 0.3477085637056149, "grad_norm": 1.423324542436127, "learning_rate": 2.4733768995699077e-05, "loss": 0.6774, "step": 2830 }, { "epoch": 0.34783142892247204, "grad_norm": 1.2326684636687357, "learning_rate": 2.4728873273620918e-05, "loss": 0.7282, "step": 2831 }, { "epoch": 0.34795429413932916, "grad_norm": 1.2116439917394042, "learning_rate": 2.4723975761979615e-05, "loss": 0.7036, "step": 2832 }, { "epoch": 0.34807715935618627, "grad_norm": 1.4166775644935086, "learning_rate": 2.4719076461676033e-05, "loss": 0.5902, "step": 2833 }, { "epoch": 0.3482000245730434, "grad_norm": 1.6258721347411786, "learning_rate": 2.4714175373611365e-05, "loss": 0.7292, "step": 2834 }, { "epoch": 0.3483228897899005, "grad_norm": 1.501000329257634, "learning_rate": 2.4709272498687135e-05, "loss": 0.6987, "step": 2835 }, { "epoch": 0.34844575500675756, "grad_norm": 1.5896820779091139, "learning_rate": 2.47043678378052e-05, "loss": 0.6031, "step": 2836 }, { "epoch": 0.3485686202236147, "grad_norm": 1.5609640525643038, "learning_rate": 2.469946139186773e-05, "loss": 0.6056, "step": 2837 }, { "epoch": 0.3486914854404718, "grad_norm": 1.2764317496336732, "learning_rate": 2.4694553161777246e-05, "loss": 0.6027, "step": 2838 }, { "epoch": 0.3488143506573289, "grad_norm": 1.4441067183201504, "learning_rate": 2.4689643148436577e-05, "loss": 0.6499, "step": 2839 }, { "epoch": 0.348937215874186, "grad_norm": 1.3555668270768086, "learning_rate": 2.4684731352748893e-05, "loss": 0.5842, "step": 2840 }, { "epoch": 0.34906008109104314, "grad_norm": 1.3296748432849688, "learning_rate": 2.4679817775617675e-05, "loss": 0.6204, "step": 2841 }, { "epoch": 0.34918294630790025, "grad_norm": 1.5537789695755568, "learning_rate": 2.4674902417946763e-05, "loss": 0.7813, "step": 2842 }, { "epoch": 0.3493058115247573, "grad_norm": 1.3604099855997405, "learning_rate": 2.466998528064029e-05, "loss": 0.6389, "step": 2843 }, { "epoch": 0.34942867674161443, "grad_norm": 1.2596162155182409, "learning_rate": 2.4665066364602743e-05, "loss": 0.5566, "step": 2844 }, { "epoch": 0.34955154195847155, "grad_norm": 1.2221445019611434, "learning_rate": 2.4660145670738914e-05, "loss": 0.8065, "step": 2845 }, { "epoch": 0.34967440717532866, "grad_norm": 1.2943068305962433, "learning_rate": 2.4655223199953932e-05, "loss": 0.6082, "step": 2846 }, { "epoch": 0.3497972723921858, "grad_norm": 1.391852438593883, "learning_rate": 2.4650298953153265e-05, "loss": 0.8128, "step": 2847 }, { "epoch": 0.3499201376090429, "grad_norm": 1.297446988158341, "learning_rate": 2.4645372931242692e-05, "loss": 0.5959, "step": 2848 }, { "epoch": 0.3500430028259, "grad_norm": 1.4850173400831785, "learning_rate": 2.4640445135128317e-05, "loss": 0.6086, "step": 2849 }, { "epoch": 0.3501658680427571, "grad_norm": 1.3867025911455704, "learning_rate": 2.4635515565716577e-05, "loss": 0.586, "step": 2850 }, { "epoch": 0.3502887332596142, "grad_norm": 1.2045176476940846, "learning_rate": 2.463058422391424e-05, "loss": 0.6751, "step": 2851 }, { "epoch": 0.3504115984764713, "grad_norm": 1.396377648055341, "learning_rate": 2.4625651110628395e-05, "loss": 0.5676, "step": 2852 }, { "epoch": 0.3505344636933284, "grad_norm": 1.2166167758489665, "learning_rate": 2.4620716226766448e-05, "loss": 0.6311, "step": 2853 }, { "epoch": 0.35065732891018553, "grad_norm": 1.6128788597832702, "learning_rate": 2.4615779573236145e-05, "loss": 0.6247, "step": 2854 }, { "epoch": 0.35078019412704264, "grad_norm": 1.6225956139742743, "learning_rate": 2.461084115094555e-05, "loss": 0.6058, "step": 2855 }, { "epoch": 0.35090305934389976, "grad_norm": 1.4963772791581587, "learning_rate": 2.4605900960803056e-05, "loss": 0.5201, "step": 2856 }, { "epoch": 0.3510259245607569, "grad_norm": 1.12768998961081, "learning_rate": 2.4600959003717375e-05, "loss": 0.5812, "step": 2857 }, { "epoch": 0.35114878977761393, "grad_norm": 1.3804339369983554, "learning_rate": 2.459601528059755e-05, "loss": 0.6361, "step": 2858 }, { "epoch": 0.35127165499447105, "grad_norm": 1.2246546127094418, "learning_rate": 2.4591069792352946e-05, "loss": 0.6152, "step": 2859 }, { "epoch": 0.35139452021132817, "grad_norm": 1.4253142619730457, "learning_rate": 2.4586122539893253e-05, "loss": 0.6549, "step": 2860 }, { "epoch": 0.3515173854281853, "grad_norm": 1.225413066005511, "learning_rate": 2.458117352412849e-05, "loss": 0.6153, "step": 2861 }, { "epoch": 0.3516402506450424, "grad_norm": 1.3389064405566784, "learning_rate": 2.4576222745968988e-05, "loss": 0.6364, "step": 2862 }, { "epoch": 0.3517631158618995, "grad_norm": 1.474764775908628, "learning_rate": 2.457127020632542e-05, "loss": 0.6861, "step": 2863 }, { "epoch": 0.3518859810787566, "grad_norm": 1.6920927507187056, "learning_rate": 2.4566315906108772e-05, "loss": 0.7226, "step": 2864 }, { "epoch": 0.3520088462956137, "grad_norm": 1.2757191849543568, "learning_rate": 2.4561359846230346e-05, "loss": 0.6685, "step": 2865 }, { "epoch": 0.3521317115124708, "grad_norm": 1.3804077134245787, "learning_rate": 2.455640202760179e-05, "loss": 0.6421, "step": 2866 }, { "epoch": 0.3522545767293279, "grad_norm": 1.4520652269838694, "learning_rate": 2.4551442451135052e-05, "loss": 0.5816, "step": 2867 }, { "epoch": 0.35237744194618503, "grad_norm": 1.3418940934266081, "learning_rate": 2.4546481117742422e-05, "loss": 0.6212, "step": 2868 }, { "epoch": 0.35250030716304215, "grad_norm": 1.3014164086891709, "learning_rate": 2.4541518028336496e-05, "loss": 0.6146, "step": 2869 }, { "epoch": 0.35262317237989926, "grad_norm": 1.3869525726418286, "learning_rate": 2.453655318383021e-05, "loss": 0.5949, "step": 2870 }, { "epoch": 0.3527460375967564, "grad_norm": 1.2684613030258876, "learning_rate": 2.4531586585136817e-05, "loss": 0.62, "step": 2871 }, { "epoch": 0.35286890281361344, "grad_norm": 1.3347761756812664, "learning_rate": 2.452661823316988e-05, "loss": 0.7649, "step": 2872 }, { "epoch": 0.35299176803047055, "grad_norm": 1.3289611675761985, "learning_rate": 2.4521648128843307e-05, "loss": 0.6773, "step": 2873 }, { "epoch": 0.35311463324732767, "grad_norm": 1.2934680513006693, "learning_rate": 2.451667627307131e-05, "loss": 0.678, "step": 2874 }, { "epoch": 0.3532374984641848, "grad_norm": 1.3684301965698735, "learning_rate": 2.4511702666768422e-05, "loss": 0.6355, "step": 2875 }, { "epoch": 0.3533603636810419, "grad_norm": 1.4260512793612574, "learning_rate": 2.4506727310849525e-05, "loss": 0.6897, "step": 2876 }, { "epoch": 0.353483228897899, "grad_norm": 1.4609799844987639, "learning_rate": 2.4501750206229785e-05, "loss": 0.6503, "step": 2877 }, { "epoch": 0.35360609411475613, "grad_norm": 1.1024603555101053, "learning_rate": 2.449677135382472e-05, "loss": 0.5527, "step": 2878 }, { "epoch": 0.35372895933161325, "grad_norm": 1.3733017767330695, "learning_rate": 2.4491790754550154e-05, "loss": 0.6711, "step": 2879 }, { "epoch": 0.3538518245484703, "grad_norm": 1.2414510695633374, "learning_rate": 2.4486808409322234e-05, "loss": 0.6173, "step": 2880 }, { "epoch": 0.3539746897653274, "grad_norm": 1.3513278257752368, "learning_rate": 2.448182431905743e-05, "loss": 0.6844, "step": 2881 }, { "epoch": 0.35409755498218454, "grad_norm": 1.2721215365495033, "learning_rate": 2.4476838484672533e-05, "loss": 0.5657, "step": 2882 }, { "epoch": 0.35422042019904165, "grad_norm": 1.154448156448332, "learning_rate": 2.4471850907084658e-05, "loss": 0.6157, "step": 2883 }, { "epoch": 0.35434328541589877, "grad_norm": 1.2364215540928962, "learning_rate": 2.4466861587211233e-05, "loss": 0.7401, "step": 2884 }, { "epoch": 0.3544661506327559, "grad_norm": 1.2032761472677374, "learning_rate": 2.4461870525970013e-05, "loss": 0.5662, "step": 2885 }, { "epoch": 0.354589015849613, "grad_norm": 1.192850377770801, "learning_rate": 2.4456877724279076e-05, "loss": 0.5529, "step": 2886 }, { "epoch": 0.35471188106647006, "grad_norm": 1.329738599637935, "learning_rate": 2.4451883183056812e-05, "loss": 0.4972, "step": 2887 }, { "epoch": 0.3548347462833272, "grad_norm": 1.5334138047542893, "learning_rate": 2.4446886903221935e-05, "loss": 0.6805, "step": 2888 }, { "epoch": 0.3549576115001843, "grad_norm": 1.2477255899037958, "learning_rate": 2.4441888885693473e-05, "loss": 0.5577, "step": 2889 }, { "epoch": 0.3550804767170414, "grad_norm": 1.957636902895662, "learning_rate": 2.4436889131390788e-05, "loss": 0.7928, "step": 2890 }, { "epoch": 0.3552033419338985, "grad_norm": 1.330358276377526, "learning_rate": 2.4431887641233543e-05, "loss": 0.5986, "step": 2891 }, { "epoch": 0.35532620715075564, "grad_norm": 1.3958942236873897, "learning_rate": 2.442688441614174e-05, "loss": 0.6695, "step": 2892 }, { "epoch": 0.35544907236761275, "grad_norm": 1.496385869981552, "learning_rate": 2.4421879457035678e-05, "loss": 0.6896, "step": 2893 }, { "epoch": 0.3555719375844698, "grad_norm": 1.394113431092259, "learning_rate": 2.4416872764836e-05, "loss": 0.6945, "step": 2894 }, { "epoch": 0.3556948028013269, "grad_norm": 1.4309264333093454, "learning_rate": 2.441186434046364e-05, "loss": 0.7273, "step": 2895 }, { "epoch": 0.35581766801818404, "grad_norm": 1.5092040245508187, "learning_rate": 2.4406854184839875e-05, "loss": 0.6822, "step": 2896 }, { "epoch": 0.35594053323504116, "grad_norm": 1.3183042497106792, "learning_rate": 2.440184229888629e-05, "loss": 0.6489, "step": 2897 }, { "epoch": 0.35606339845189827, "grad_norm": 1.321890055763165, "learning_rate": 2.4396828683524787e-05, "loss": 0.5979, "step": 2898 }, { "epoch": 0.3561862636687554, "grad_norm": 1.3612569963971601, "learning_rate": 2.4391813339677588e-05, "loss": 0.7658, "step": 2899 }, { "epoch": 0.3563091288856125, "grad_norm": 1.1903335719172712, "learning_rate": 2.4386796268267227e-05, "loss": 0.6039, "step": 2900 }, { "epoch": 0.3564319941024696, "grad_norm": 1.280922543928591, "learning_rate": 2.438177747021658e-05, "loss": 0.5993, "step": 2901 }, { "epoch": 0.3565548593193267, "grad_norm": 1.3513351249191208, "learning_rate": 2.43767569464488e-05, "loss": 0.6161, "step": 2902 }, { "epoch": 0.3566777245361838, "grad_norm": 1.4302856532851838, "learning_rate": 2.4371734697887395e-05, "loss": 0.6074, "step": 2903 }, { "epoch": 0.3568005897530409, "grad_norm": 1.3046739645405683, "learning_rate": 2.436671072545617e-05, "loss": 0.6468, "step": 2904 }, { "epoch": 0.356923454969898, "grad_norm": 1.2660216199311898, "learning_rate": 2.436168503007925e-05, "loss": 0.6288, "step": 2905 }, { "epoch": 0.35704632018675514, "grad_norm": 1.3175804655602612, "learning_rate": 2.435665761268108e-05, "loss": 0.6142, "step": 2906 }, { "epoch": 0.35716918540361225, "grad_norm": 1.1771289683037809, "learning_rate": 2.4351628474186427e-05, "loss": 0.6695, "step": 2907 }, { "epoch": 0.35729205062046937, "grad_norm": 1.219462980416533, "learning_rate": 2.434659761552036e-05, "loss": 0.7147, "step": 2908 }, { "epoch": 0.35741491583732643, "grad_norm": 1.384968856443894, "learning_rate": 2.4341565037608278e-05, "loss": 0.6194, "step": 2909 }, { "epoch": 0.35753778105418355, "grad_norm": 1.272710186134619, "learning_rate": 2.4336530741375892e-05, "loss": 0.6559, "step": 2910 }, { "epoch": 0.35766064627104066, "grad_norm": 1.3473198797702814, "learning_rate": 2.4331494727749223e-05, "loss": 0.6689, "step": 2911 }, { "epoch": 0.3577835114878978, "grad_norm": 1.4032147496694274, "learning_rate": 2.4326456997654617e-05, "loss": 0.6509, "step": 2912 }, { "epoch": 0.3579063767047549, "grad_norm": 1.3851314228605418, "learning_rate": 2.4321417552018728e-05, "loss": 0.6237, "step": 2913 }, { "epoch": 0.358029241921612, "grad_norm": 1.6153704737539059, "learning_rate": 2.4316376391768534e-05, "loss": 0.7231, "step": 2914 }, { "epoch": 0.3581521071384691, "grad_norm": 1.2618219796318941, "learning_rate": 2.431133351783132e-05, "loss": 0.6867, "step": 2915 }, { "epoch": 0.3582749723553262, "grad_norm": 1.118042457696448, "learning_rate": 2.430628893113469e-05, "loss": 0.5658, "step": 2916 }, { "epoch": 0.3583978375721833, "grad_norm": 1.0592276223105983, "learning_rate": 2.430124263260657e-05, "loss": 0.5983, "step": 2917 }, { "epoch": 0.3585207027890404, "grad_norm": 1.251775341770502, "learning_rate": 2.4296194623175187e-05, "loss": 0.7416, "step": 2918 }, { "epoch": 0.35864356800589753, "grad_norm": 1.3595690513311904, "learning_rate": 2.4291144903769087e-05, "loss": 0.5997, "step": 2919 }, { "epoch": 0.35876643322275464, "grad_norm": 1.4134084331513677, "learning_rate": 2.4286093475317145e-05, "loss": 0.7977, "step": 2920 }, { "epoch": 0.35888929843961176, "grad_norm": 1.3441908648939007, "learning_rate": 2.428104033874852e-05, "loss": 0.5951, "step": 2921 }, { "epoch": 0.3590121636564689, "grad_norm": 1.3877739937126663, "learning_rate": 2.4275985494992724e-05, "loss": 0.6889, "step": 2922 }, { "epoch": 0.35913502887332593, "grad_norm": 1.606485277003862, "learning_rate": 2.4270928944979546e-05, "loss": 0.7592, "step": 2923 }, { "epoch": 0.35925789409018305, "grad_norm": 1.44096581767626, "learning_rate": 2.4265870689639113e-05, "loss": 0.665, "step": 2924 }, { "epoch": 0.35938075930704017, "grad_norm": 1.573327100045176, "learning_rate": 2.4260810729901857e-05, "loss": 0.7232, "step": 2925 }, { "epoch": 0.3595036245238973, "grad_norm": 1.5303182042207017, "learning_rate": 2.4255749066698535e-05, "loss": 0.674, "step": 2926 }, { "epoch": 0.3596264897407544, "grad_norm": 1.5431198218500666, "learning_rate": 2.4250685700960188e-05, "loss": 0.739, "step": 2927 }, { "epoch": 0.3597493549576115, "grad_norm": 1.2479839826703332, "learning_rate": 2.4245620633618207e-05, "loss": 0.5594, "step": 2928 }, { "epoch": 0.3598722201744686, "grad_norm": 1.1408334736336936, "learning_rate": 2.424055386560426e-05, "loss": 0.6992, "step": 2929 }, { "epoch": 0.35999508539132574, "grad_norm": 1.3671484586437301, "learning_rate": 2.4235485397850363e-05, "loss": 0.6673, "step": 2930 }, { "epoch": 0.3601179506081828, "grad_norm": 1.2700758259448937, "learning_rate": 2.4230415231288823e-05, "loss": 0.5556, "step": 2931 }, { "epoch": 0.3602408158250399, "grad_norm": 1.3609185252187523, "learning_rate": 2.422534336685226e-05, "loss": 0.6092, "step": 2932 }, { "epoch": 0.36036368104189703, "grad_norm": 1.408986878211817, "learning_rate": 2.4220269805473612e-05, "loss": 0.543, "step": 2933 }, { "epoch": 0.36048654625875415, "grad_norm": 1.256372017792412, "learning_rate": 2.421519454808613e-05, "loss": 0.6098, "step": 2934 }, { "epoch": 0.36060941147561126, "grad_norm": 1.2898994775773198, "learning_rate": 2.4210117595623377e-05, "loss": 0.6233, "step": 2935 }, { "epoch": 0.3607322766924684, "grad_norm": 1.5459042565270145, "learning_rate": 2.4205038949019218e-05, "loss": 0.5792, "step": 2936 }, { "epoch": 0.3608551419093255, "grad_norm": 1.432206799448882, "learning_rate": 2.419995860920784e-05, "loss": 0.5883, "step": 2937 }, { "epoch": 0.36097800712618255, "grad_norm": 1.5181332185322032, "learning_rate": 2.4194876577123746e-05, "loss": 0.6576, "step": 2938 }, { "epoch": 0.36110087234303967, "grad_norm": 1.4397044452009289, "learning_rate": 2.4189792853701734e-05, "loss": 0.6617, "step": 2939 }, { "epoch": 0.3612237375598968, "grad_norm": 1.4707158604208062, "learning_rate": 2.418470743987692e-05, "loss": 0.5681, "step": 2940 }, { "epoch": 0.3613466027767539, "grad_norm": 1.2968890774401758, "learning_rate": 2.4179620336584743e-05, "loss": 0.5357, "step": 2941 }, { "epoch": 0.361469467993611, "grad_norm": 1.2899064815993366, "learning_rate": 2.417453154476093e-05, "loss": 0.7098, "step": 2942 }, { "epoch": 0.36159233321046813, "grad_norm": 1.2828699388813116, "learning_rate": 2.4169441065341546e-05, "loss": 0.5733, "step": 2943 }, { "epoch": 0.36171519842732525, "grad_norm": 1.5338316128657483, "learning_rate": 2.4164348899262936e-05, "loss": 0.6749, "step": 2944 }, { "epoch": 0.3618380636441823, "grad_norm": 1.3809076504241335, "learning_rate": 2.4159255047461785e-05, "loss": 0.6523, "step": 2945 }, { "epoch": 0.3619609288610394, "grad_norm": 1.2148841553979777, "learning_rate": 2.4154159510875065e-05, "loss": 0.6656, "step": 2946 }, { "epoch": 0.36208379407789654, "grad_norm": 1.3313564477265711, "learning_rate": 2.414906229044007e-05, "loss": 0.6964, "step": 2947 }, { "epoch": 0.36220665929475365, "grad_norm": 1.3712767120852138, "learning_rate": 2.4143963387094403e-05, "loss": 0.5847, "step": 2948 }, { "epoch": 0.36232952451161077, "grad_norm": 1.1313410495333536, "learning_rate": 2.4138862801775973e-05, "loss": 0.5773, "step": 2949 }, { "epoch": 0.3624523897284679, "grad_norm": 1.7086468731394118, "learning_rate": 2.4133760535422994e-05, "loss": 0.6471, "step": 2950 }, { "epoch": 0.362575254945325, "grad_norm": 1.2752716393839483, "learning_rate": 2.4128656588974e-05, "loss": 0.6765, "step": 2951 }, { "epoch": 0.3626981201621821, "grad_norm": 1.1763211871191426, "learning_rate": 2.4123550963367824e-05, "loss": 0.6525, "step": 2952 }, { "epoch": 0.3628209853790392, "grad_norm": 1.195326708457016, "learning_rate": 2.411844365954362e-05, "loss": 0.6604, "step": 2953 }, { "epoch": 0.3629438505958963, "grad_norm": 1.2739677536622915, "learning_rate": 2.4113334678440842e-05, "loss": 0.6038, "step": 2954 }, { "epoch": 0.3630667158127534, "grad_norm": 1.3760507222020615, "learning_rate": 2.410822402099925e-05, "loss": 0.6635, "step": 2955 }, { "epoch": 0.3631895810296105, "grad_norm": 1.3108169371628935, "learning_rate": 2.4103111688158917e-05, "loss": 0.6499, "step": 2956 }, { "epoch": 0.36331244624646764, "grad_norm": 1.6731838672088424, "learning_rate": 2.4097997680860232e-05, "loss": 0.6814, "step": 2957 }, { "epoch": 0.36343531146332475, "grad_norm": 1.7087002669456355, "learning_rate": 2.4092882000043868e-05, "loss": 0.694, "step": 2958 }, { "epoch": 0.36355817668018187, "grad_norm": 1.432029648065579, "learning_rate": 2.408776464665083e-05, "loss": 0.7091, "step": 2959 }, { "epoch": 0.3636810418970389, "grad_norm": 1.2855320506796832, "learning_rate": 2.4082645621622425e-05, "loss": 0.6262, "step": 2960 }, { "epoch": 0.36380390711389604, "grad_norm": 1.0028777474356807, "learning_rate": 2.407752492590026e-05, "loss": 0.6286, "step": 2961 }, { "epoch": 0.36392677233075316, "grad_norm": 1.1039105805897944, "learning_rate": 2.4072402560426253e-05, "loss": 0.6662, "step": 2962 }, { "epoch": 0.36404963754761027, "grad_norm": 1.3826209509763898, "learning_rate": 2.4067278526142635e-05, "loss": 0.7074, "step": 2963 }, { "epoch": 0.3641725027644674, "grad_norm": 1.5330846884072493, "learning_rate": 2.4062152823991933e-05, "loss": 0.7138, "step": 2964 }, { "epoch": 0.3642953679813245, "grad_norm": 1.3602726894624495, "learning_rate": 2.405702545491699e-05, "loss": 0.5689, "step": 2965 }, { "epoch": 0.3644182331981816, "grad_norm": 1.511557408225221, "learning_rate": 2.405189641986095e-05, "loss": 0.6491, "step": 2966 }, { "epoch": 0.3645410984150387, "grad_norm": 1.2553907337805934, "learning_rate": 2.404676571976727e-05, "loss": 0.599, "step": 2967 }, { "epoch": 0.3646639636318958, "grad_norm": 1.2398584654944411, "learning_rate": 2.4041633355579705e-05, "loss": 0.6342, "step": 2968 }, { "epoch": 0.3647868288487529, "grad_norm": 1.4534434429812835, "learning_rate": 2.403649932824232e-05, "loss": 0.7167, "step": 2969 }, { "epoch": 0.36490969406561, "grad_norm": 1.4247760214121539, "learning_rate": 2.403136363869949e-05, "loss": 0.7409, "step": 2970 }, { "epoch": 0.36503255928246714, "grad_norm": 1.375507382751723, "learning_rate": 2.4026226287895885e-05, "loss": 0.6425, "step": 2971 }, { "epoch": 0.36515542449932425, "grad_norm": 1.3422094284789758, "learning_rate": 2.4021087276776493e-05, "loss": 0.6277, "step": 2972 }, { "epoch": 0.36527828971618137, "grad_norm": 1.1783589896325022, "learning_rate": 2.40159466062866e-05, "loss": 0.6096, "step": 2973 }, { "epoch": 0.36540115493303843, "grad_norm": 1.4451769549505467, "learning_rate": 2.40108042773718e-05, "loss": 0.5913, "step": 2974 }, { "epoch": 0.36552402014989555, "grad_norm": 1.3626891086730504, "learning_rate": 2.400566029097799e-05, "loss": 0.6079, "step": 2975 }, { "epoch": 0.36564688536675266, "grad_norm": 1.2035142264046366, "learning_rate": 2.4000514648051372e-05, "loss": 0.6635, "step": 2976 }, { "epoch": 0.3657697505836098, "grad_norm": 1.248744846346287, "learning_rate": 2.3995367349538456e-05, "loss": 0.5929, "step": 2977 }, { "epoch": 0.3658926158004669, "grad_norm": 1.280535805746773, "learning_rate": 2.399021839638605e-05, "loss": 0.6486, "step": 2978 }, { "epoch": 0.366015481017324, "grad_norm": 1.2981241659885376, "learning_rate": 2.3985067789541285e-05, "loss": 0.5422, "step": 2979 }, { "epoch": 0.3661383462341811, "grad_norm": 1.2030127977946137, "learning_rate": 2.3979915529951562e-05, "loss": 0.5277, "step": 2980 }, { "epoch": 0.36626121145103824, "grad_norm": 1.4095033341245589, "learning_rate": 2.3974761618564613e-05, "loss": 0.6332, "step": 2981 }, { "epoch": 0.3663840766678953, "grad_norm": 1.446787439605187, "learning_rate": 2.396960605632847e-05, "loss": 0.6148, "step": 2982 }, { "epoch": 0.3665069418847524, "grad_norm": 1.2275521134578737, "learning_rate": 2.396444884419146e-05, "loss": 0.5814, "step": 2983 }, { "epoch": 0.36662980710160953, "grad_norm": 1.1208233633738671, "learning_rate": 2.3959289983102223e-05, "loss": 0.5367, "step": 2984 }, { "epoch": 0.36675267231846664, "grad_norm": 1.3502268238323631, "learning_rate": 2.39541294740097e-05, "loss": 0.6142, "step": 2985 }, { "epoch": 0.36687553753532376, "grad_norm": 1.394303209782231, "learning_rate": 2.3948967317863124e-05, "loss": 0.6272, "step": 2986 }, { "epoch": 0.3669984027521809, "grad_norm": 1.4311177426259918, "learning_rate": 2.3943803515612053e-05, "loss": 0.6935, "step": 2987 }, { "epoch": 0.367121267969038, "grad_norm": 1.2891582893564986, "learning_rate": 2.393863806820632e-05, "loss": 0.6471, "step": 2988 }, { "epoch": 0.36724413318589505, "grad_norm": 1.256424536224272, "learning_rate": 2.3933470976596088e-05, "loss": 0.6356, "step": 2989 }, { "epoch": 0.36736699840275217, "grad_norm": 1.415806151627246, "learning_rate": 2.3928302241731807e-05, "loss": 0.623, "step": 2990 }, { "epoch": 0.3674898636196093, "grad_norm": 1.4201794001200552, "learning_rate": 2.3923131864564228e-05, "loss": 0.5906, "step": 2991 }, { "epoch": 0.3676127288364664, "grad_norm": 1.3865085153213939, "learning_rate": 2.391795984604441e-05, "loss": 0.5984, "step": 2992 }, { "epoch": 0.3677355940533235, "grad_norm": 1.2942271545687993, "learning_rate": 2.3912786187123714e-05, "loss": 0.6193, "step": 2993 }, { "epoch": 0.3678584592701806, "grad_norm": 1.439224968515976, "learning_rate": 2.39076108887538e-05, "loss": 0.692, "step": 2994 }, { "epoch": 0.36798132448703774, "grad_norm": 1.1420610058825036, "learning_rate": 2.3902433951886634e-05, "loss": 0.6487, "step": 2995 }, { "epoch": 0.3681041897038948, "grad_norm": 1.1783292561186094, "learning_rate": 2.3897255377474472e-05, "loss": 0.5752, "step": 2996 }, { "epoch": 0.3682270549207519, "grad_norm": 1.5374626570368481, "learning_rate": 2.389207516646989e-05, "loss": 0.616, "step": 2997 }, { "epoch": 0.36834992013760903, "grad_norm": 1.426290023423719, "learning_rate": 2.3886893319825747e-05, "loss": 0.6277, "step": 2998 }, { "epoch": 0.36847278535446615, "grad_norm": 1.1938918147971573, "learning_rate": 2.3881709838495208e-05, "loss": 0.5355, "step": 2999 }, { "epoch": 0.36859565057132326, "grad_norm": 1.7837989514549788, "learning_rate": 2.3876524723431748e-05, "loss": 0.6743, "step": 3000 }, { "epoch": 0.3687185157881804, "grad_norm": 1.3672478651261157, "learning_rate": 2.3871337975589124e-05, "loss": 0.6594, "step": 3001 }, { "epoch": 0.3688413810050375, "grad_norm": 1.1484836477217446, "learning_rate": 2.386614959592142e-05, "loss": 0.6646, "step": 3002 }, { "epoch": 0.36896424622189455, "grad_norm": 1.1578454706796342, "learning_rate": 2.3860959585382995e-05, "loss": 0.6339, "step": 3003 }, { "epoch": 0.36908711143875167, "grad_norm": 1.7868597088533542, "learning_rate": 2.385576794492852e-05, "loss": 0.808, "step": 3004 }, { "epoch": 0.3692099766556088, "grad_norm": 1.4573882378870107, "learning_rate": 2.385057467551296e-05, "loss": 0.7382, "step": 3005 }, { "epoch": 0.3693328418724659, "grad_norm": 1.324977226044546, "learning_rate": 2.3845379778091587e-05, "loss": 0.5496, "step": 3006 }, { "epoch": 0.369455707089323, "grad_norm": 1.3455956070702377, "learning_rate": 2.384018325361997e-05, "loss": 0.5725, "step": 3007 }, { "epoch": 0.36957857230618013, "grad_norm": 1.3216042995034352, "learning_rate": 2.3834985103053976e-05, "loss": 0.5868, "step": 3008 }, { "epoch": 0.36970143752303725, "grad_norm": 1.4314494389024397, "learning_rate": 2.3829785327349766e-05, "loss": 0.6431, "step": 3009 }, { "epoch": 0.36982430273989436, "grad_norm": 1.6180545698747584, "learning_rate": 2.382458392746381e-05, "loss": 0.6689, "step": 3010 }, { "epoch": 0.3699471679567514, "grad_norm": 1.2795557983570238, "learning_rate": 2.381938090435287e-05, "loss": 0.6213, "step": 3011 }, { "epoch": 0.37007003317360854, "grad_norm": 1.297917871196048, "learning_rate": 2.3814176258974006e-05, "loss": 0.6315, "step": 3012 }, { "epoch": 0.37019289839046565, "grad_norm": 1.1702951026882769, "learning_rate": 2.380896999228458e-05, "loss": 0.6575, "step": 3013 }, { "epoch": 0.37031576360732277, "grad_norm": 1.3322508595526688, "learning_rate": 2.3803762105242255e-05, "loss": 0.6746, "step": 3014 }, { "epoch": 0.3704386288241799, "grad_norm": 1.180468781992892, "learning_rate": 2.3798552598804987e-05, "loss": 0.6285, "step": 3015 }, { "epoch": 0.370561494041037, "grad_norm": 1.9441644424162583, "learning_rate": 2.3793341473931024e-05, "loss": 0.6077, "step": 3016 }, { "epoch": 0.3706843592578941, "grad_norm": 1.6465345064959926, "learning_rate": 2.3788128731578928e-05, "loss": 0.6587, "step": 3017 }, { "epoch": 0.3708072244747512, "grad_norm": 1.2172969011676469, "learning_rate": 2.378291437270754e-05, "loss": 0.5475, "step": 3018 }, { "epoch": 0.3709300896916083, "grad_norm": 1.3292162659881221, "learning_rate": 2.377769839827602e-05, "loss": 0.5892, "step": 3019 }, { "epoch": 0.3710529549084654, "grad_norm": 1.3651005055379835, "learning_rate": 2.3772480809243797e-05, "loss": 0.5468, "step": 3020 }, { "epoch": 0.3711758201253225, "grad_norm": 1.3457942682457644, "learning_rate": 2.3767261606570626e-05, "loss": 0.671, "step": 3021 }, { "epoch": 0.37129868534217964, "grad_norm": 1.451509516742317, "learning_rate": 2.376204079121654e-05, "loss": 0.7284, "step": 3022 }, { "epoch": 0.37142155055903675, "grad_norm": 1.5569670431871967, "learning_rate": 2.375681836414187e-05, "loss": 0.5742, "step": 3023 }, { "epoch": 0.37154441577589387, "grad_norm": 1.0818502149786509, "learning_rate": 2.3751594326307254e-05, "loss": 0.6219, "step": 3024 }, { "epoch": 0.3716672809927509, "grad_norm": 1.4427557366131711, "learning_rate": 2.374636867867362e-05, "loss": 0.6509, "step": 3025 }, { "epoch": 0.37179014620960804, "grad_norm": 1.236146525683267, "learning_rate": 2.3741141422202188e-05, "loss": 0.7282, "step": 3026 }, { "epoch": 0.37191301142646516, "grad_norm": 1.26711501236808, "learning_rate": 2.373591255785448e-05, "loss": 0.5392, "step": 3027 }, { "epoch": 0.37203587664332227, "grad_norm": 1.3498801682043113, "learning_rate": 2.373068208659231e-05, "loss": 0.5354, "step": 3028 }, { "epoch": 0.3721587418601794, "grad_norm": 1.2223077728708003, "learning_rate": 2.3725450009377795e-05, "loss": 0.5359, "step": 3029 }, { "epoch": 0.3722816070770365, "grad_norm": 1.3123842792988207, "learning_rate": 2.3720216327173327e-05, "loss": 0.6811, "step": 3030 }, { "epoch": 0.3724044722938936, "grad_norm": 2.0093199041201646, "learning_rate": 2.371498104094163e-05, "loss": 0.6532, "step": 3031 }, { "epoch": 0.37252733751075073, "grad_norm": 1.3192358059017948, "learning_rate": 2.3709744151645686e-05, "loss": 0.6539, "step": 3032 }, { "epoch": 0.3726502027276078, "grad_norm": 1.2980217591623473, "learning_rate": 2.3704505660248786e-05, "loss": 0.6419, "step": 3033 }, { "epoch": 0.3727730679444649, "grad_norm": 1.2422002611106016, "learning_rate": 2.3699265567714522e-05, "loss": 0.6063, "step": 3034 }, { "epoch": 0.372895933161322, "grad_norm": 1.4203449476293402, "learning_rate": 2.3694023875006773e-05, "loss": 0.5732, "step": 3035 }, { "epoch": 0.37301879837817914, "grad_norm": 1.3294561783584766, "learning_rate": 2.368878058308972e-05, "loss": 0.7252, "step": 3036 }, { "epoch": 0.37314166359503625, "grad_norm": 1.4273031127727738, "learning_rate": 2.368353569292782e-05, "loss": 0.5938, "step": 3037 }, { "epoch": 0.37326452881189337, "grad_norm": 1.5597780367656804, "learning_rate": 2.367828920548585e-05, "loss": 0.7185, "step": 3038 }, { "epoch": 0.3733873940287505, "grad_norm": 1.167696785599463, "learning_rate": 2.3673041121728857e-05, "loss": 0.6144, "step": 3039 }, { "epoch": 0.37351025924560755, "grad_norm": 1.7029812578899834, "learning_rate": 2.36677914426222e-05, "loss": 0.7357, "step": 3040 }, { "epoch": 0.37363312446246466, "grad_norm": 1.2561781970018855, "learning_rate": 2.3662540169131516e-05, "loss": 0.5892, "step": 3041 }, { "epoch": 0.3737559896793218, "grad_norm": 1.310506619929962, "learning_rate": 2.365728730222275e-05, "loss": 0.5368, "step": 3042 }, { "epoch": 0.3738788548961789, "grad_norm": 1.3557688020031782, "learning_rate": 2.3652032842862127e-05, "loss": 0.6249, "step": 3043 }, { "epoch": 0.374001720113036, "grad_norm": 1.233971986097292, "learning_rate": 2.3646776792016175e-05, "loss": 0.6428, "step": 3044 }, { "epoch": 0.3741245853298931, "grad_norm": 1.4216214114363233, "learning_rate": 2.3641519150651707e-05, "loss": 0.4976, "step": 3045 }, { "epoch": 0.37424745054675024, "grad_norm": 1.2783146921697044, "learning_rate": 2.3636259919735835e-05, "loss": 0.6205, "step": 3046 }, { "epoch": 0.3743703157636073, "grad_norm": 1.4002410177787372, "learning_rate": 2.3630999100235956e-05, "loss": 0.6286, "step": 3047 }, { "epoch": 0.3744931809804644, "grad_norm": 1.2449844319945587, "learning_rate": 2.362573669311977e-05, "loss": 0.7029, "step": 3048 }, { "epoch": 0.37461604619732153, "grad_norm": 1.3194452945407886, "learning_rate": 2.3620472699355255e-05, "loss": 0.5959, "step": 3049 }, { "epoch": 0.37473891141417864, "grad_norm": 1.1423055632959906, "learning_rate": 2.3615207119910693e-05, "loss": 0.5663, "step": 3050 }, { "epoch": 0.37486177663103576, "grad_norm": 1.3299685589697985, "learning_rate": 2.3609939955754656e-05, "loss": 0.6749, "step": 3051 }, { "epoch": 0.3749846418478929, "grad_norm": 1.615816441540961, "learning_rate": 2.3604671207856002e-05, "loss": 0.6793, "step": 3052 }, { "epoch": 0.37510750706475, "grad_norm": 1.3993187650668584, "learning_rate": 2.359940087718388e-05, "loss": 0.5887, "step": 3053 }, { "epoch": 0.37523037228160705, "grad_norm": 1.2375670840040132, "learning_rate": 2.3594128964707736e-05, "loss": 0.5319, "step": 3054 }, { "epoch": 0.37535323749846417, "grad_norm": 2.0808246177826173, "learning_rate": 2.3588855471397305e-05, "loss": 0.6731, "step": 3055 }, { "epoch": 0.3754761027153213, "grad_norm": 1.710629895778046, "learning_rate": 2.358358039822261e-05, "loss": 0.6559, "step": 3056 }, { "epoch": 0.3755989679321784, "grad_norm": 1.5226938040252462, "learning_rate": 2.357830374615397e-05, "loss": 0.5699, "step": 3057 }, { "epoch": 0.3757218331490355, "grad_norm": 1.122065225903869, "learning_rate": 2.3573025516161977e-05, "loss": 0.6112, "step": 3058 }, { "epoch": 0.3758446983658926, "grad_norm": 1.2831878520478954, "learning_rate": 2.356774570921755e-05, "loss": 0.6499, "step": 3059 }, { "epoch": 0.37596756358274974, "grad_norm": 2.132641348444403, "learning_rate": 2.3562464326291862e-05, "loss": 0.7971, "step": 3060 }, { "epoch": 0.37609042879960686, "grad_norm": 1.3621468000889878, "learning_rate": 2.355718136835639e-05, "loss": 0.6989, "step": 3061 }, { "epoch": 0.3762132940164639, "grad_norm": 1.355987035450686, "learning_rate": 2.35518968363829e-05, "loss": 0.68, "step": 3062 }, { "epoch": 0.37633615923332103, "grad_norm": 1.4127390332582228, "learning_rate": 2.3546610731343446e-05, "loss": 0.7668, "step": 3063 }, { "epoch": 0.37645902445017815, "grad_norm": 1.3099907780498028, "learning_rate": 2.3541323054210374e-05, "loss": 0.6351, "step": 3064 }, { "epoch": 0.37658188966703526, "grad_norm": 1.586507466621477, "learning_rate": 2.353603380595633e-05, "loss": 0.6144, "step": 3065 }, { "epoch": 0.3767047548838924, "grad_norm": 1.3377084762212375, "learning_rate": 2.353074298755421e-05, "loss": 0.7024, "step": 3066 }, { "epoch": 0.3768276201007495, "grad_norm": 1.7010983677916929, "learning_rate": 2.352545059997725e-05, "loss": 0.6986, "step": 3067 }, { "epoch": 0.3769504853176066, "grad_norm": 1.4342961407259711, "learning_rate": 2.352015664419894e-05, "loss": 0.5826, "step": 3068 }, { "epoch": 0.37707335053446367, "grad_norm": 1.3182187492920756, "learning_rate": 2.3514861121193068e-05, "loss": 0.6134, "step": 3069 }, { "epoch": 0.3771962157513208, "grad_norm": 1.2232995188566826, "learning_rate": 2.3509564031933716e-05, "loss": 0.5831, "step": 3070 }, { "epoch": 0.3773190809681779, "grad_norm": 1.1279329391816846, "learning_rate": 2.3504265377395244e-05, "loss": 0.5241, "step": 3071 }, { "epoch": 0.377441946185035, "grad_norm": 1.3892540767416506, "learning_rate": 2.349896515855231e-05, "loss": 0.5687, "step": 3072 }, { "epoch": 0.37756481140189213, "grad_norm": 1.3409968800249306, "learning_rate": 2.3493663376379853e-05, "loss": 0.6251, "step": 3073 }, { "epoch": 0.37768767661874925, "grad_norm": 1.3243336003543786, "learning_rate": 2.3488360031853102e-05, "loss": 0.5823, "step": 3074 }, { "epoch": 0.37781054183560636, "grad_norm": 1.2934130300753455, "learning_rate": 2.348305512594757e-05, "loss": 0.6706, "step": 3075 }, { "epoch": 0.3779334070524634, "grad_norm": 1.4917946038223329, "learning_rate": 2.3477748659639063e-05, "loss": 0.5832, "step": 3076 }, { "epoch": 0.37805627226932054, "grad_norm": 1.3152077175788883, "learning_rate": 2.347244063390367e-05, "loss": 0.5935, "step": 3077 }, { "epoch": 0.37817913748617765, "grad_norm": 1.1792844260031874, "learning_rate": 2.346713104971777e-05, "loss": 0.5363, "step": 3078 }, { "epoch": 0.37830200270303477, "grad_norm": 1.6073602977028432, "learning_rate": 2.3461819908058024e-05, "loss": 0.6854, "step": 3079 }, { "epoch": 0.3784248679198919, "grad_norm": 1.2454186792565136, "learning_rate": 2.3456507209901382e-05, "loss": 0.6403, "step": 3080 }, { "epoch": 0.378547733136749, "grad_norm": 1.4465885678285888, "learning_rate": 2.345119295622508e-05, "loss": 0.7558, "step": 3081 }, { "epoch": 0.3786705983536061, "grad_norm": 1.3888426014082174, "learning_rate": 2.3445877148006643e-05, "loss": 0.6638, "step": 3082 }, { "epoch": 0.37879346357046323, "grad_norm": 1.259527974002533, "learning_rate": 2.3440559786223878e-05, "loss": 0.6393, "step": 3083 }, { "epoch": 0.3789163287873203, "grad_norm": 1.315487635917726, "learning_rate": 2.343524087185488e-05, "loss": 0.6973, "step": 3084 }, { "epoch": 0.3790391940041774, "grad_norm": 1.2558828981209915, "learning_rate": 2.3429920405878024e-05, "loss": 0.5364, "step": 3085 }, { "epoch": 0.3791620592210345, "grad_norm": 1.197453458943176, "learning_rate": 2.3424598389271986e-05, "loss": 0.6815, "step": 3086 }, { "epoch": 0.37928492443789164, "grad_norm": 1.3414207995986853, "learning_rate": 2.3419274823015704e-05, "loss": 0.5766, "step": 3087 }, { "epoch": 0.37940778965474875, "grad_norm": 1.228127593264913, "learning_rate": 2.3413949708088424e-05, "loss": 0.5907, "step": 3088 }, { "epoch": 0.37953065487160587, "grad_norm": 1.4497319073567136, "learning_rate": 2.3408623045469658e-05, "loss": 0.6258, "step": 3089 }, { "epoch": 0.379653520088463, "grad_norm": 1.2950724658796482, "learning_rate": 2.3403294836139216e-05, "loss": 0.6105, "step": 3090 }, { "epoch": 0.37977638530532004, "grad_norm": 1.4936394491254836, "learning_rate": 2.339796508107718e-05, "loss": 0.6613, "step": 3091 }, { "epoch": 0.37989925052217716, "grad_norm": 1.2110650834823302, "learning_rate": 2.339263378126394e-05, "loss": 0.6122, "step": 3092 }, { "epoch": 0.38002211573903427, "grad_norm": 1.5055783337702415, "learning_rate": 2.338730093768014e-05, "loss": 0.6648, "step": 3093 }, { "epoch": 0.3801449809558914, "grad_norm": 1.5097882371525442, "learning_rate": 2.338196655130673e-05, "loss": 0.5912, "step": 3094 }, { "epoch": 0.3802678461727485, "grad_norm": 1.398334913808832, "learning_rate": 2.3376630623124925e-05, "loss": 0.6546, "step": 3095 }, { "epoch": 0.3803907113896056, "grad_norm": 1.1711902684241875, "learning_rate": 2.3371293154116244e-05, "loss": 0.6672, "step": 3096 }, { "epoch": 0.38051357660646273, "grad_norm": 1.3888086838523215, "learning_rate": 2.3365954145262478e-05, "loss": 0.6172, "step": 3097 }, { "epoch": 0.3806364418233198, "grad_norm": 1.4622026132767225, "learning_rate": 2.3360613597545698e-05, "loss": 0.5806, "step": 3098 }, { "epoch": 0.3807593070401769, "grad_norm": 1.838457863455748, "learning_rate": 2.3355271511948272e-05, "loss": 0.6677, "step": 3099 }, { "epoch": 0.380882172257034, "grad_norm": 1.407937002148143, "learning_rate": 2.3349927889452834e-05, "loss": 0.5639, "step": 3100 }, { "epoch": 0.38100503747389114, "grad_norm": 1.5319887365660656, "learning_rate": 2.3344582731042313e-05, "loss": 0.5934, "step": 3101 }, { "epoch": 0.38112790269074825, "grad_norm": 1.2921245591606374, "learning_rate": 2.3339236037699915e-05, "loss": 0.7351, "step": 3102 }, { "epoch": 0.38125076790760537, "grad_norm": 1.3009038843686636, "learning_rate": 2.333388781040913e-05, "loss": 0.559, "step": 3103 }, { "epoch": 0.3813736331244625, "grad_norm": 1.2603536621092253, "learning_rate": 2.3328538050153735e-05, "loss": 0.6757, "step": 3104 }, { "epoch": 0.38149649834131955, "grad_norm": 1.1044198671026118, "learning_rate": 2.3323186757917772e-05, "loss": 0.6132, "step": 3105 }, { "epoch": 0.38161936355817666, "grad_norm": 1.213749743677481, "learning_rate": 2.3317833934685583e-05, "loss": 0.569, "step": 3106 }, { "epoch": 0.3817422287750338, "grad_norm": 1.3001519551364638, "learning_rate": 2.3312479581441786e-05, "loss": 0.6609, "step": 3107 }, { "epoch": 0.3818650939918909, "grad_norm": 1.4414410588134763, "learning_rate": 2.3307123699171277e-05, "loss": 0.5635, "step": 3108 }, { "epoch": 0.381987959208748, "grad_norm": 1.4272943020021183, "learning_rate": 2.330176628885924e-05, "loss": 0.7128, "step": 3109 }, { "epoch": 0.3821108244256051, "grad_norm": 1.3987002858598885, "learning_rate": 2.329640735149113e-05, "loss": 0.5813, "step": 3110 }, { "epoch": 0.38223368964246224, "grad_norm": 1.600257613230404, "learning_rate": 2.329104688805269e-05, "loss": 0.7582, "step": 3111 }, { "epoch": 0.38235655485931935, "grad_norm": 1.2598699201113055, "learning_rate": 2.3285684899529948e-05, "loss": 0.5529, "step": 3112 }, { "epoch": 0.3824794200761764, "grad_norm": 1.306911075244212, "learning_rate": 2.3280321386909203e-05, "loss": 0.7384, "step": 3113 }, { "epoch": 0.38260228529303353, "grad_norm": 1.1317120661591564, "learning_rate": 2.3274956351177037e-05, "loss": 0.6573, "step": 3114 }, { "epoch": 0.38272515050989064, "grad_norm": 1.4172414426706152, "learning_rate": 2.326958979332032e-05, "loss": 0.676, "step": 3115 }, { "epoch": 0.38284801572674776, "grad_norm": 1.173142625828647, "learning_rate": 2.3264221714326182e-05, "loss": 0.6793, "step": 3116 }, { "epoch": 0.3829708809436049, "grad_norm": 1.6564340585049546, "learning_rate": 2.325885211518206e-05, "loss": 0.744, "step": 3117 }, { "epoch": 0.383093746160462, "grad_norm": 1.2031537084123252, "learning_rate": 2.3253480996875653e-05, "loss": 0.6579, "step": 3118 }, { "epoch": 0.3832166113773191, "grad_norm": 1.1616810714614563, "learning_rate": 2.3248108360394942e-05, "loss": 0.5813, "step": 3119 }, { "epoch": 0.38333947659417617, "grad_norm": 1.4373256623716808, "learning_rate": 2.3242734206728186e-05, "loss": 0.7659, "step": 3120 }, { "epoch": 0.3834623418110333, "grad_norm": 1.4399961118109796, "learning_rate": 2.323735853686393e-05, "loss": 0.6127, "step": 3121 }, { "epoch": 0.3835852070278904, "grad_norm": 1.4570822759732374, "learning_rate": 2.3231981351790993e-05, "loss": 0.6319, "step": 3122 }, { "epoch": 0.3837080722447475, "grad_norm": 1.215428865304125, "learning_rate": 2.3226602652498473e-05, "loss": 0.569, "step": 3123 }, { "epoch": 0.3838309374616046, "grad_norm": 1.1488719202695636, "learning_rate": 2.3221222439975748e-05, "loss": 0.6472, "step": 3124 }, { "epoch": 0.38395380267846174, "grad_norm": 1.145034954914683, "learning_rate": 2.3215840715212467e-05, "loss": 0.6023, "step": 3125 }, { "epoch": 0.38407666789531886, "grad_norm": 1.1060677953722022, "learning_rate": 2.3210457479198573e-05, "loss": 0.556, "step": 3126 }, { "epoch": 0.3841995331121759, "grad_norm": 1.465473984401948, "learning_rate": 2.3205072732924266e-05, "loss": 0.7223, "step": 3127 }, { "epoch": 0.38432239832903303, "grad_norm": 1.3505774434187645, "learning_rate": 2.3199686477380047e-05, "loss": 0.6449, "step": 3128 }, { "epoch": 0.38444526354589015, "grad_norm": 1.4057788598548848, "learning_rate": 2.3194298713556676e-05, "loss": 0.6154, "step": 3129 }, { "epoch": 0.38456812876274726, "grad_norm": 1.3706875460672943, "learning_rate": 2.3188909442445202e-05, "loss": 0.573, "step": 3130 }, { "epoch": 0.3846909939796044, "grad_norm": 1.5368055841421846, "learning_rate": 2.318351866503694e-05, "loss": 0.766, "step": 3131 }, { "epoch": 0.3848138591964615, "grad_norm": 1.2427877208963602, "learning_rate": 2.3178126382323488e-05, "loss": 0.6327, "step": 3132 }, { "epoch": 0.3849367244133186, "grad_norm": 1.4923328543661019, "learning_rate": 2.3172732595296727e-05, "loss": 0.5986, "step": 3133 }, { "epoch": 0.3850595896301757, "grad_norm": 1.2551929518285199, "learning_rate": 2.316733730494881e-05, "loss": 0.7245, "step": 3134 }, { "epoch": 0.3851824548470328, "grad_norm": 1.808330218641065, "learning_rate": 2.316194051227216e-05, "loss": 0.6572, "step": 3135 }, { "epoch": 0.3853053200638899, "grad_norm": 1.2338044807744633, "learning_rate": 2.3156542218259485e-05, "loss": 0.6245, "step": 3136 }, { "epoch": 0.385428185280747, "grad_norm": 1.4663039728017604, "learning_rate": 2.3151142423903765e-05, "loss": 0.627, "step": 3137 }, { "epoch": 0.38555105049760413, "grad_norm": 1.2747617512773768, "learning_rate": 2.314574113019826e-05, "loss": 0.6249, "step": 3138 }, { "epoch": 0.38567391571446125, "grad_norm": 1.1351158354518418, "learning_rate": 2.3140338338136505e-05, "loss": 0.5741, "step": 3139 }, { "epoch": 0.38579678093131836, "grad_norm": 1.3033155620339516, "learning_rate": 2.31349340487123e-05, "loss": 0.6037, "step": 3140 }, { "epoch": 0.3859196461481755, "grad_norm": 1.203321248600611, "learning_rate": 2.312952826291973e-05, "loss": 0.5665, "step": 3141 }, { "epoch": 0.38604251136503254, "grad_norm": 1.3544156349488452, "learning_rate": 2.3124120981753164e-05, "loss": 0.6239, "step": 3142 }, { "epoch": 0.38616537658188965, "grad_norm": 1.3796261399412184, "learning_rate": 2.311871220620723e-05, "loss": 0.6914, "step": 3143 }, { "epoch": 0.38628824179874677, "grad_norm": 1.3395557851223567, "learning_rate": 2.3113301937276834e-05, "loss": 0.6225, "step": 3144 }, { "epoch": 0.3864111070156039, "grad_norm": 1.2130202009382083, "learning_rate": 2.310789017595717e-05, "loss": 0.5927, "step": 3145 }, { "epoch": 0.386533972232461, "grad_norm": 1.2531027051100638, "learning_rate": 2.310247692324368e-05, "loss": 0.6578, "step": 3146 }, { "epoch": 0.3866568374493181, "grad_norm": 1.1206652091879596, "learning_rate": 2.3097062180132113e-05, "loss": 0.7087, "step": 3147 }, { "epoch": 0.38677970266617523, "grad_norm": 1.294485282229125, "learning_rate": 2.3091645947618463e-05, "loss": 0.6285, "step": 3148 }, { "epoch": 0.3869025678830323, "grad_norm": 1.2529010878354239, "learning_rate": 2.3086228226699023e-05, "loss": 0.6203, "step": 3149 }, { "epoch": 0.3870254330998894, "grad_norm": 1.326038525572919, "learning_rate": 2.3080809018370338e-05, "loss": 0.639, "step": 3150 }, { "epoch": 0.3871482983167465, "grad_norm": 1.1958554236348393, "learning_rate": 2.3075388323629242e-05, "loss": 0.6997, "step": 3151 }, { "epoch": 0.38727116353360364, "grad_norm": 1.2403462107313898, "learning_rate": 2.3069966143472837e-05, "loss": 0.6336, "step": 3152 }, { "epoch": 0.38739402875046075, "grad_norm": 1.2804649917446485, "learning_rate": 2.3064542478898494e-05, "loss": 0.5514, "step": 3153 }, { "epoch": 0.38751689396731787, "grad_norm": 1.2006425101010036, "learning_rate": 2.305911733090386e-05, "loss": 0.6396, "step": 3154 }, { "epoch": 0.387639759184175, "grad_norm": 1.6255698371566016, "learning_rate": 2.305369070048686e-05, "loss": 0.69, "step": 3155 }, { "epoch": 0.38776262440103204, "grad_norm": 1.242586095370747, "learning_rate": 2.304826258864569e-05, "loss": 0.7377, "step": 3156 }, { "epoch": 0.38788548961788916, "grad_norm": 1.336222000861688, "learning_rate": 2.30428329963788e-05, "loss": 0.5447, "step": 3157 }, { "epoch": 0.38800835483474627, "grad_norm": 1.6041166700461131, "learning_rate": 2.303740192468495e-05, "loss": 0.6048, "step": 3158 }, { "epoch": 0.3881312200516034, "grad_norm": 1.6130857755177876, "learning_rate": 2.3031969374563137e-05, "loss": 0.6089, "step": 3159 }, { "epoch": 0.3882540852684605, "grad_norm": 1.3807524813772005, "learning_rate": 2.302653534701265e-05, "loss": 0.5442, "step": 3160 }, { "epoch": 0.3883769504853176, "grad_norm": 1.2756225956443825, "learning_rate": 2.3021099843033037e-05, "loss": 0.7019, "step": 3161 }, { "epoch": 0.38849981570217473, "grad_norm": 1.3308797797025091, "learning_rate": 2.3015662863624124e-05, "loss": 0.5988, "step": 3162 }, { "epoch": 0.38862268091903185, "grad_norm": 1.262070107988154, "learning_rate": 2.3010224409786016e-05, "loss": 0.7164, "step": 3163 }, { "epoch": 0.3887455461358889, "grad_norm": 1.0689202286246584, "learning_rate": 2.300478448251907e-05, "loss": 0.6055, "step": 3164 }, { "epoch": 0.388868411352746, "grad_norm": 1.2023740944534262, "learning_rate": 2.299934308282393e-05, "loss": 0.6403, "step": 3165 }, { "epoch": 0.38899127656960314, "grad_norm": 1.312873842398042, "learning_rate": 2.2993900211701516e-05, "loss": 0.6819, "step": 3166 }, { "epoch": 0.38911414178646025, "grad_norm": 1.2862506598929728, "learning_rate": 2.2988455870152995e-05, "loss": 0.6167, "step": 3167 }, { "epoch": 0.38923700700331737, "grad_norm": 1.5285739473499709, "learning_rate": 2.2983010059179824e-05, "loss": 0.6872, "step": 3168 }, { "epoch": 0.3893598722201745, "grad_norm": 2.3820159916983856, "learning_rate": 2.2977562779783726e-05, "loss": 0.6585, "step": 3169 }, { "epoch": 0.3894827374370316, "grad_norm": 1.302537875958053, "learning_rate": 2.297211403296669e-05, "loss": 0.7127, "step": 3170 }, { "epoch": 0.38960560265388866, "grad_norm": 1.2526793558939684, "learning_rate": 2.296666381973098e-05, "loss": 0.6875, "step": 3171 }, { "epoch": 0.3897284678707458, "grad_norm": 1.4077678438476966, "learning_rate": 2.2961212141079123e-05, "loss": 0.6735, "step": 3172 }, { "epoch": 0.3898513330876029, "grad_norm": 1.3784288548771246, "learning_rate": 2.2955758998013924e-05, "loss": 0.6559, "step": 3173 }, { "epoch": 0.38997419830446, "grad_norm": 1.456774300127324, "learning_rate": 2.2950304391538453e-05, "loss": 0.6592, "step": 3174 }, { "epoch": 0.3900970635213171, "grad_norm": 1.0901773012356553, "learning_rate": 2.2944848322656048e-05, "loss": 0.6065, "step": 3175 }, { "epoch": 0.39021992873817424, "grad_norm": 1.255412454356825, "learning_rate": 2.2939390792370315e-05, "loss": 0.6686, "step": 3176 }, { "epoch": 0.39034279395503135, "grad_norm": 1.1986719650477806, "learning_rate": 2.2933931801685137e-05, "loss": 0.5304, "step": 3177 }, { "epoch": 0.3904656591718884, "grad_norm": 1.3724783116947679, "learning_rate": 2.292847135160466e-05, "loss": 0.6078, "step": 3178 }, { "epoch": 0.39058852438874553, "grad_norm": 1.7342458172935287, "learning_rate": 2.2923009443133294e-05, "loss": 0.7165, "step": 3179 }, { "epoch": 0.39071138960560264, "grad_norm": 1.2725680176165868, "learning_rate": 2.2917546077275725e-05, "loss": 0.6399, "step": 3180 }, { "epoch": 0.39083425482245976, "grad_norm": 1.3734963812446788, "learning_rate": 2.29120812550369e-05, "loss": 0.6286, "step": 3181 }, { "epoch": 0.3909571200393169, "grad_norm": 1.4049282046205358, "learning_rate": 2.290661497742204e-05, "loss": 0.5864, "step": 3182 }, { "epoch": 0.391079985256174, "grad_norm": 1.238852097215092, "learning_rate": 2.2901147245436635e-05, "loss": 0.5276, "step": 3183 }, { "epoch": 0.3912028504730311, "grad_norm": 1.404362352342043, "learning_rate": 2.2895678060086432e-05, "loss": 0.7803, "step": 3184 }, { "epoch": 0.39132571568988817, "grad_norm": 1.3260638315433146, "learning_rate": 2.289020742237745e-05, "loss": 0.5056, "step": 3185 }, { "epoch": 0.3914485809067453, "grad_norm": 1.3063314905135048, "learning_rate": 2.288473533331599e-05, "loss": 0.5841, "step": 3186 }, { "epoch": 0.3915714461236024, "grad_norm": 1.369956406120543, "learning_rate": 2.2879261793908596e-05, "loss": 0.6445, "step": 3187 }, { "epoch": 0.3916943113404595, "grad_norm": 1.2877857913658093, "learning_rate": 2.2873786805162096e-05, "loss": 0.6085, "step": 3188 }, { "epoch": 0.3918171765573166, "grad_norm": 1.2441992800576496, "learning_rate": 2.2868310368083578e-05, "loss": 0.5688, "step": 3189 }, { "epoch": 0.39194004177417374, "grad_norm": 1.1009481005316666, "learning_rate": 2.2862832483680392e-05, "loss": 0.6587, "step": 3190 }, { "epoch": 0.39206290699103086, "grad_norm": 1.4990053466945141, "learning_rate": 2.2857353152960165e-05, "loss": 0.7823, "step": 3191 }, { "epoch": 0.392185772207888, "grad_norm": 1.3398430480027306, "learning_rate": 2.2851872376930777e-05, "loss": 0.5784, "step": 3192 }, { "epoch": 0.39230863742474503, "grad_norm": 1.186626427690252, "learning_rate": 2.2846390156600395e-05, "loss": 0.6655, "step": 3193 }, { "epoch": 0.39243150264160215, "grad_norm": 1.4149618633784187, "learning_rate": 2.284090649297742e-05, "loss": 0.5873, "step": 3194 }, { "epoch": 0.39255436785845926, "grad_norm": 1.4185054124790508, "learning_rate": 2.2835421387070556e-05, "loss": 0.6246, "step": 3195 }, { "epoch": 0.3926772330753164, "grad_norm": 1.2675950034982193, "learning_rate": 2.2829934839888732e-05, "loss": 0.6652, "step": 3196 }, { "epoch": 0.3928000982921735, "grad_norm": 1.3797889140399258, "learning_rate": 2.2824446852441182e-05, "loss": 0.6901, "step": 3197 }, { "epoch": 0.3929229635090306, "grad_norm": 1.4499497710343199, "learning_rate": 2.281895742573737e-05, "loss": 0.582, "step": 3198 }, { "epoch": 0.3930458287258877, "grad_norm": 1.2306390838652088, "learning_rate": 2.281346656078705e-05, "loss": 0.5767, "step": 3199 }, { "epoch": 0.3931686939427448, "grad_norm": 1.1352118156419178, "learning_rate": 2.2807974258600227e-05, "loss": 0.576, "step": 3200 }, { "epoch": 0.3932915591596019, "grad_norm": 1.3560556355186917, "learning_rate": 2.280248052018718e-05, "loss": 0.6573, "step": 3201 }, { "epoch": 0.393414424376459, "grad_norm": 1.3093441197502143, "learning_rate": 2.2796985346558436e-05, "loss": 0.5844, "step": 3202 }, { "epoch": 0.39353728959331613, "grad_norm": 1.0949837376604656, "learning_rate": 2.2791488738724807e-05, "loss": 0.5613, "step": 3203 }, { "epoch": 0.39366015481017325, "grad_norm": 1.1780485338935327, "learning_rate": 2.2785990697697353e-05, "loss": 0.5273, "step": 3204 }, { "epoch": 0.39378302002703036, "grad_norm": 1.336867865788004, "learning_rate": 2.2780491224487402e-05, "loss": 0.6233, "step": 3205 }, { "epoch": 0.3939058852438875, "grad_norm": 1.380179394459063, "learning_rate": 2.2774990320106552e-05, "loss": 0.6375, "step": 3206 }, { "epoch": 0.39402875046074454, "grad_norm": 1.3226846765482512, "learning_rate": 2.2769487985566653e-05, "loss": 0.6798, "step": 3207 }, { "epoch": 0.39415161567760165, "grad_norm": 1.2131165238090367, "learning_rate": 2.2763984221879827e-05, "loss": 0.6447, "step": 3208 }, { "epoch": 0.39427448089445877, "grad_norm": 1.1948172570697202, "learning_rate": 2.2758479030058453e-05, "loss": 0.6115, "step": 3209 }, { "epoch": 0.3943973461113159, "grad_norm": 1.2357057381140435, "learning_rate": 2.275297241111518e-05, "loss": 0.7365, "step": 3210 }, { "epoch": 0.394520211328173, "grad_norm": 1.1149240727215048, "learning_rate": 2.274746436606291e-05, "loss": 0.5601, "step": 3211 }, { "epoch": 0.3946430765450301, "grad_norm": 1.4218882875605054, "learning_rate": 2.2741954895914813e-05, "loss": 0.7394, "step": 3212 }, { "epoch": 0.39476594176188723, "grad_norm": 1.1924781255813701, "learning_rate": 2.273644400168432e-05, "loss": 0.6534, "step": 3213 }, { "epoch": 0.39488880697874434, "grad_norm": 1.2153778763577558, "learning_rate": 2.273093168438513e-05, "loss": 0.5918, "step": 3214 }, { "epoch": 0.3950116721956014, "grad_norm": 1.3455511955649004, "learning_rate": 2.272541794503119e-05, "loss": 0.6062, "step": 3215 }, { "epoch": 0.3951345374124585, "grad_norm": 1.2899533518816655, "learning_rate": 2.271990278463672e-05, "loss": 0.653, "step": 3216 }, { "epoch": 0.39525740262931564, "grad_norm": 1.2139759356648276, "learning_rate": 2.27143862042162e-05, "loss": 0.5891, "step": 3217 }, { "epoch": 0.39538026784617275, "grad_norm": 1.0789412445450366, "learning_rate": 2.270886820478437e-05, "loss": 0.6616, "step": 3218 }, { "epoch": 0.39550313306302987, "grad_norm": 1.4744508666038183, "learning_rate": 2.270334878735622e-05, "loss": 0.6547, "step": 3219 }, { "epoch": 0.395625998279887, "grad_norm": 1.2058387923570915, "learning_rate": 2.2697827952947023e-05, "loss": 0.5986, "step": 3220 }, { "epoch": 0.3957488634967441, "grad_norm": 1.3127742272889749, "learning_rate": 2.2692305702572295e-05, "loss": 0.5398, "step": 3221 }, { "epoch": 0.39587172871360116, "grad_norm": 1.1800218007309389, "learning_rate": 2.268678203724782e-05, "loss": 0.6745, "step": 3222 }, { "epoch": 0.39599459393045827, "grad_norm": 1.2361848533247268, "learning_rate": 2.268125695798964e-05, "loss": 0.6423, "step": 3223 }, { "epoch": 0.3961174591473154, "grad_norm": 1.4638412437777095, "learning_rate": 2.2675730465814056e-05, "loss": 0.7075, "step": 3224 }, { "epoch": 0.3962403243641725, "grad_norm": 1.195781149496282, "learning_rate": 2.2670202561737635e-05, "loss": 0.7178, "step": 3225 }, { "epoch": 0.3963631895810296, "grad_norm": 1.3046837414849843, "learning_rate": 2.2664673246777197e-05, "loss": 0.6841, "step": 3226 }, { "epoch": 0.39648605479788673, "grad_norm": 1.4220067382927764, "learning_rate": 2.265914252194982e-05, "loss": 0.7508, "step": 3227 }, { "epoch": 0.39660892001474385, "grad_norm": 1.5038192682488487, "learning_rate": 2.2653610388272842e-05, "loss": 0.6757, "step": 3228 }, { "epoch": 0.3967317852316009, "grad_norm": 1.5999611967983256, "learning_rate": 2.2648076846763877e-05, "loss": 0.6354, "step": 3229 }, { "epoch": 0.396854650448458, "grad_norm": 1.4872140648984473, "learning_rate": 2.2642541898440764e-05, "loss": 0.6915, "step": 3230 }, { "epoch": 0.39697751566531514, "grad_norm": 1.3775721360788562, "learning_rate": 2.2637005544321645e-05, "loss": 0.634, "step": 3231 }, { "epoch": 0.39710038088217225, "grad_norm": 1.466706034094729, "learning_rate": 2.2631467785424875e-05, "loss": 0.5249, "step": 3232 }, { "epoch": 0.39722324609902937, "grad_norm": 1.4567987451465292, "learning_rate": 2.2625928622769105e-05, "loss": 0.5729, "step": 3233 }, { "epoch": 0.3973461113158865, "grad_norm": 1.247859831548399, "learning_rate": 2.2620388057373216e-05, "loss": 0.6488, "step": 3234 }, { "epoch": 0.3974689765327436, "grad_norm": 1.154418462409934, "learning_rate": 2.2614846090256366e-05, "loss": 0.564, "step": 3235 }, { "epoch": 0.39759184174960066, "grad_norm": 1.3963176369738308, "learning_rate": 2.2609302722437958e-05, "loss": 0.6263, "step": 3236 }, { "epoch": 0.3977147069664578, "grad_norm": 1.3693709243698375, "learning_rate": 2.2603757954937668e-05, "loss": 0.706, "step": 3237 }, { "epoch": 0.3978375721833149, "grad_norm": 1.236617387839573, "learning_rate": 2.259821178877541e-05, "loss": 0.6136, "step": 3238 }, { "epoch": 0.397960437400172, "grad_norm": 1.5315521117059792, "learning_rate": 2.259266422497137e-05, "loss": 0.6277, "step": 3239 }, { "epoch": 0.3980833026170291, "grad_norm": 1.312997781795286, "learning_rate": 2.2587115264545984e-05, "loss": 0.6497, "step": 3240 }, { "epoch": 0.39820616783388624, "grad_norm": 1.2615721886938012, "learning_rate": 2.2581564908519952e-05, "loss": 0.71, "step": 3241 }, { "epoch": 0.39832903305074335, "grad_norm": 1.1856660065793647, "learning_rate": 2.2576013157914224e-05, "loss": 0.6756, "step": 3242 }, { "epoch": 0.39845189826760047, "grad_norm": 1.3359619961614464, "learning_rate": 2.2570460013750012e-05, "loss": 0.5679, "step": 3243 }, { "epoch": 0.39857476348445753, "grad_norm": 1.4101835325816554, "learning_rate": 2.2564905477048768e-05, "loss": 0.672, "step": 3244 }, { "epoch": 0.39869762870131464, "grad_norm": 1.3416340918874878, "learning_rate": 2.2559349548832227e-05, "loss": 0.6618, "step": 3245 }, { "epoch": 0.39882049391817176, "grad_norm": 1.1949058258818597, "learning_rate": 2.2553792230122357e-05, "loss": 0.6133, "step": 3246 }, { "epoch": 0.3989433591350289, "grad_norm": 1.2947800251575485, "learning_rate": 2.25482335219414e-05, "loss": 0.6409, "step": 3247 }, { "epoch": 0.399066224351886, "grad_norm": 1.326907311998871, "learning_rate": 2.2542673425311834e-05, "loss": 0.6191, "step": 3248 }, { "epoch": 0.3991890895687431, "grad_norm": 1.2655865325521907, "learning_rate": 2.2537111941256406e-05, "loss": 0.6746, "step": 3249 }, { "epoch": 0.3993119547856002, "grad_norm": 1.03690816188133, "learning_rate": 2.2531549070798117e-05, "loss": 0.616, "step": 3250 }, { "epoch": 0.3994348200024573, "grad_norm": 1.6701155222644832, "learning_rate": 2.252598481496022e-05, "loss": 0.6188, "step": 3251 }, { "epoch": 0.3995576852193144, "grad_norm": 1.4711854734380423, "learning_rate": 2.252041917476623e-05, "loss": 0.6402, "step": 3252 }, { "epoch": 0.3996805504361715, "grad_norm": 1.2848058775409943, "learning_rate": 2.2514852151239897e-05, "loss": 0.573, "step": 3253 }, { "epoch": 0.3998034156530286, "grad_norm": 1.1420749003535782, "learning_rate": 2.250928374540525e-05, "loss": 0.5564, "step": 3254 }, { "epoch": 0.39992628086988574, "grad_norm": 1.4042175784174427, "learning_rate": 2.250371395828656e-05, "loss": 0.5693, "step": 3255 }, { "epoch": 0.40004914608674286, "grad_norm": 1.2449548731695756, "learning_rate": 2.2498142790908346e-05, "loss": 0.6172, "step": 3256 }, { "epoch": 0.4001720113036, "grad_norm": 1.203581423835913, "learning_rate": 2.2492570244295395e-05, "loss": 0.6144, "step": 3257 }, { "epoch": 0.40029487652045703, "grad_norm": 1.3395528462472013, "learning_rate": 2.248699631947274e-05, "loss": 0.7415, "step": 3258 }, { "epoch": 0.40041774173731415, "grad_norm": 1.2742380692823234, "learning_rate": 2.2481421017465662e-05, "loss": 0.564, "step": 3259 }, { "epoch": 0.40054060695417126, "grad_norm": 1.3373372087386748, "learning_rate": 2.2475844339299714e-05, "loss": 0.6277, "step": 3260 }, { "epoch": 0.4006634721710284, "grad_norm": 1.3764700481567107, "learning_rate": 2.2470266286000672e-05, "loss": 0.5544, "step": 3261 }, { "epoch": 0.4007863373878855, "grad_norm": 1.2196638764476968, "learning_rate": 2.24646868585946e-05, "loss": 0.6219, "step": 3262 }, { "epoch": 0.4009092026047426, "grad_norm": 1.0478105806511462, "learning_rate": 2.2459106058107788e-05, "loss": 0.6405, "step": 3263 }, { "epoch": 0.4010320678215997, "grad_norm": 1.3118165149275716, "learning_rate": 2.2453523885566794e-05, "loss": 0.6238, "step": 3264 }, { "epoch": 0.40115493303845684, "grad_norm": 1.5699851473408712, "learning_rate": 2.244794034199842e-05, "loss": 0.643, "step": 3265 }, { "epoch": 0.4012777982553139, "grad_norm": 1.5145323255496057, "learning_rate": 2.244235542842972e-05, "loss": 0.6878, "step": 3266 }, { "epoch": 0.401400663472171, "grad_norm": 1.5159057065925594, "learning_rate": 2.2436769145888e-05, "loss": 0.5359, "step": 3267 }, { "epoch": 0.40152352868902813, "grad_norm": 1.5078388344358378, "learning_rate": 2.243118149540083e-05, "loss": 0.5959, "step": 3268 }, { "epoch": 0.40164639390588525, "grad_norm": 1.146619347693364, "learning_rate": 2.2425592477996012e-05, "loss": 0.6745, "step": 3269 }, { "epoch": 0.40176925912274236, "grad_norm": 1.767482303724687, "learning_rate": 2.2420002094701615e-05, "loss": 0.637, "step": 3270 }, { "epoch": 0.4018921243395995, "grad_norm": 1.2166597745732455, "learning_rate": 2.241441034654596e-05, "loss": 0.5667, "step": 3271 }, { "epoch": 0.4020149895564566, "grad_norm": 1.2376417054449578, "learning_rate": 2.24088172345576e-05, "loss": 0.7022, "step": 3272 }, { "epoch": 0.40213785477331365, "grad_norm": 1.1907550450806428, "learning_rate": 2.2403222759765358e-05, "loss": 0.6045, "step": 3273 }, { "epoch": 0.40226071999017077, "grad_norm": 1.5349718993399744, "learning_rate": 2.23976269231983e-05, "loss": 0.7598, "step": 3274 }, { "epoch": 0.4023835852070279, "grad_norm": 1.4444610665867792, "learning_rate": 2.239202972588575e-05, "loss": 0.6682, "step": 3275 }, { "epoch": 0.402506450423885, "grad_norm": 1.3220353503530338, "learning_rate": 2.2386431168857263e-05, "loss": 0.613, "step": 3276 }, { "epoch": 0.4026293156407421, "grad_norm": 1.2983445936803282, "learning_rate": 2.2380831253142673e-05, "loss": 0.6995, "step": 3277 }, { "epoch": 0.40275218085759923, "grad_norm": 1.0340427808167714, "learning_rate": 2.2375229979772034e-05, "loss": 0.588, "step": 3278 }, { "epoch": 0.40287504607445634, "grad_norm": 1.2036746156684257, "learning_rate": 2.2369627349775673e-05, "loss": 0.519, "step": 3279 }, { "epoch": 0.4029979112913134, "grad_norm": 1.1995868213116705, "learning_rate": 2.2364023364184154e-05, "loss": 0.5328, "step": 3280 }, { "epoch": 0.4031207765081705, "grad_norm": 1.340386025255557, "learning_rate": 2.2358418024028294e-05, "loss": 0.6464, "step": 3281 }, { "epoch": 0.40324364172502764, "grad_norm": 1.3726389374293992, "learning_rate": 2.2352811330339164e-05, "loss": 0.6609, "step": 3282 }, { "epoch": 0.40336650694188475, "grad_norm": 1.2788012353103457, "learning_rate": 2.234720328414807e-05, "loss": 0.5462, "step": 3283 }, { "epoch": 0.40348937215874187, "grad_norm": 1.254837863480527, "learning_rate": 2.2341593886486584e-05, "loss": 0.624, "step": 3284 }, { "epoch": 0.403612237375599, "grad_norm": 1.2853586091814015, "learning_rate": 2.2335983138386513e-05, "loss": 0.6424, "step": 3285 }, { "epoch": 0.4037351025924561, "grad_norm": 1.2200355675824899, "learning_rate": 2.2330371040879914e-05, "loss": 0.5413, "step": 3286 }, { "epoch": 0.40385796780931316, "grad_norm": 1.2878276398279134, "learning_rate": 2.232475759499911e-05, "loss": 0.7391, "step": 3287 }, { "epoch": 0.40398083302617027, "grad_norm": 1.218015649223548, "learning_rate": 2.2319142801776637e-05, "loss": 0.6272, "step": 3288 }, { "epoch": 0.4041036982430274, "grad_norm": 1.1986055074291175, "learning_rate": 2.2313526662245324e-05, "loss": 0.5546, "step": 3289 }, { "epoch": 0.4042265634598845, "grad_norm": 1.1198276669215759, "learning_rate": 2.2307909177438205e-05, "loss": 0.5789, "step": 3290 }, { "epoch": 0.4043494286767416, "grad_norm": 1.4579383955347107, "learning_rate": 2.230229034838859e-05, "loss": 0.7434, "step": 3291 }, { "epoch": 0.40447229389359873, "grad_norm": 1.7663899913204821, "learning_rate": 2.229667017613002e-05, "loss": 0.6948, "step": 3292 }, { "epoch": 0.40459515911045585, "grad_norm": 1.5490562994452994, "learning_rate": 2.229104866169629e-05, "loss": 0.5501, "step": 3293 }, { "epoch": 0.40471802432731296, "grad_norm": 1.3236869479302555, "learning_rate": 2.2285425806121446e-05, "loss": 0.7112, "step": 3294 }, { "epoch": 0.40484088954417, "grad_norm": 1.4691450430078279, "learning_rate": 2.2279801610439768e-05, "loss": 0.6875, "step": 3295 }, { "epoch": 0.40496375476102714, "grad_norm": 1.336442650731409, "learning_rate": 2.22741760756858e-05, "loss": 0.7052, "step": 3296 }, { "epoch": 0.40508661997788425, "grad_norm": 1.2275347467613416, "learning_rate": 2.2268549202894314e-05, "loss": 0.5856, "step": 3297 }, { "epoch": 0.40520948519474137, "grad_norm": 1.4220932076997166, "learning_rate": 2.2262920993100345e-05, "loss": 0.613, "step": 3298 }, { "epoch": 0.4053323504115985, "grad_norm": 1.485283443779952, "learning_rate": 2.2257291447339157e-05, "loss": 0.6244, "step": 3299 }, { "epoch": 0.4054552156284556, "grad_norm": 1.5237046155388518, "learning_rate": 2.2251660566646275e-05, "loss": 0.7001, "step": 3300 }, { "epoch": 0.4055780808453127, "grad_norm": 1.501783904885649, "learning_rate": 2.2246028352057457e-05, "loss": 0.7254, "step": 3301 }, { "epoch": 0.4057009460621698, "grad_norm": 3.033988770552471, "learning_rate": 2.224039480460872e-05, "loss": 0.6636, "step": 3302 }, { "epoch": 0.4058238112790269, "grad_norm": 1.35346960946082, "learning_rate": 2.2234759925336312e-05, "loss": 0.6745, "step": 3303 }, { "epoch": 0.405946676495884, "grad_norm": 1.3120245620804625, "learning_rate": 2.222912371527674e-05, "loss": 0.7097, "step": 3304 }, { "epoch": 0.4060695417127411, "grad_norm": 1.4771858510826843, "learning_rate": 2.2223486175466734e-05, "loss": 0.6112, "step": 3305 }, { "epoch": 0.40619240692959824, "grad_norm": 1.1790424136623359, "learning_rate": 2.2217847306943298e-05, "loss": 0.6156, "step": 3306 }, { "epoch": 0.40631527214645535, "grad_norm": 1.3462765175868245, "learning_rate": 2.2212207110743655e-05, "loss": 0.592, "step": 3307 }, { "epoch": 0.40643813736331247, "grad_norm": 1.2257372200965577, "learning_rate": 2.220656558790529e-05, "loss": 0.701, "step": 3308 }, { "epoch": 0.40656100258016953, "grad_norm": 1.6782583017049542, "learning_rate": 2.2200922739465915e-05, "loss": 0.7204, "step": 3309 }, { "epoch": 0.40668386779702664, "grad_norm": 1.3454425566535098, "learning_rate": 2.219527856646351e-05, "loss": 0.7044, "step": 3310 }, { "epoch": 0.40680673301388376, "grad_norm": 1.3718040496771688, "learning_rate": 2.2189633069936273e-05, "loss": 0.7009, "step": 3311 }, { "epoch": 0.4069295982307409, "grad_norm": 1.203467900709806, "learning_rate": 2.2183986250922663e-05, "loss": 0.5794, "step": 3312 }, { "epoch": 0.407052463447598, "grad_norm": 1.3560079022897504, "learning_rate": 2.2178338110461365e-05, "loss": 0.6078, "step": 3313 }, { "epoch": 0.4071753286644551, "grad_norm": 1.4533278317230367, "learning_rate": 2.2172688649591325e-05, "loss": 0.7051, "step": 3314 }, { "epoch": 0.4072981938813122, "grad_norm": 1.3760954883701098, "learning_rate": 2.2167037869351728e-05, "loss": 0.6702, "step": 3315 }, { "epoch": 0.40742105909816934, "grad_norm": 1.1818077196663601, "learning_rate": 2.2161385770781994e-05, "loss": 0.5981, "step": 3316 }, { "epoch": 0.4075439243150264, "grad_norm": 1.4784620306465266, "learning_rate": 2.215573235492179e-05, "loss": 0.5883, "step": 3317 }, { "epoch": 0.4076667895318835, "grad_norm": 1.5499146609981964, "learning_rate": 2.2150077622811024e-05, "loss": 0.6884, "step": 3318 }, { "epoch": 0.4077896547487406, "grad_norm": 1.5208026898802873, "learning_rate": 2.2144421575489853e-05, "loss": 0.558, "step": 3319 }, { "epoch": 0.40791251996559774, "grad_norm": 1.2553723609396739, "learning_rate": 2.2138764213998666e-05, "loss": 0.5621, "step": 3320 }, { "epoch": 0.40803538518245486, "grad_norm": 1.4505094275698664, "learning_rate": 2.2133105539378103e-05, "loss": 0.6816, "step": 3321 }, { "epoch": 0.408158250399312, "grad_norm": 1.37000915940128, "learning_rate": 2.212744555266903e-05, "loss": 0.7299, "step": 3322 }, { "epoch": 0.4082811156161691, "grad_norm": 1.3584347803342223, "learning_rate": 2.2121784254912568e-05, "loss": 0.7081, "step": 3323 }, { "epoch": 0.40840398083302615, "grad_norm": 1.135022144859272, "learning_rate": 2.211612164715008e-05, "loss": 0.5862, "step": 3324 }, { "epoch": 0.40852684604988326, "grad_norm": 1.2060920024357518, "learning_rate": 2.211045773042317e-05, "loss": 0.6277, "step": 3325 }, { "epoch": 0.4086497112667404, "grad_norm": 1.2447633650053305, "learning_rate": 2.2104792505773666e-05, "loss": 0.5815, "step": 3326 }, { "epoch": 0.4087725764835975, "grad_norm": 1.2954537472941674, "learning_rate": 2.209912597424366e-05, "loss": 0.6568, "step": 3327 }, { "epoch": 0.4088954417004546, "grad_norm": 1.601495140382785, "learning_rate": 2.209345813687547e-05, "loss": 0.5562, "step": 3328 }, { "epoch": 0.4090183069173117, "grad_norm": 1.49247132385456, "learning_rate": 2.208778899471166e-05, "loss": 0.7212, "step": 3329 }, { "epoch": 0.40914117213416884, "grad_norm": 1.3380562287583588, "learning_rate": 2.2082118548795034e-05, "loss": 0.6611, "step": 3330 }, { "epoch": 0.4092640373510259, "grad_norm": 1.27705228659089, "learning_rate": 2.2076446800168624e-05, "loss": 0.6419, "step": 3331 }, { "epoch": 0.409386902567883, "grad_norm": 1.1785651779798292, "learning_rate": 2.207077374987572e-05, "loss": 0.4905, "step": 3332 }, { "epoch": 0.40950976778474013, "grad_norm": 1.3599174398547398, "learning_rate": 2.2065099398959837e-05, "loss": 0.7168, "step": 3333 }, { "epoch": 0.40963263300159725, "grad_norm": 1.3669693097557376, "learning_rate": 2.205942374846474e-05, "loss": 0.7672, "step": 3334 }, { "epoch": 0.40975549821845436, "grad_norm": 1.3169453862622456, "learning_rate": 2.205374679943443e-05, "loss": 0.5095, "step": 3335 }, { "epoch": 0.4098783634353115, "grad_norm": 1.5135642124710704, "learning_rate": 2.2048068552913136e-05, "loss": 0.6174, "step": 3336 }, { "epoch": 0.4100012286521686, "grad_norm": 1.2634133604573028, "learning_rate": 2.204238900994534e-05, "loss": 0.5551, "step": 3337 }, { "epoch": 0.41012409386902565, "grad_norm": 1.1351072817720993, "learning_rate": 2.2036708171575763e-05, "loss": 0.6424, "step": 3338 }, { "epoch": 0.41024695908588277, "grad_norm": 1.4369111819309996, "learning_rate": 2.2031026038849353e-05, "loss": 0.5755, "step": 3339 }, { "epoch": 0.4103698243027399, "grad_norm": 1.2223499057182563, "learning_rate": 2.2025342612811297e-05, "loss": 0.5662, "step": 3340 }, { "epoch": 0.410492689519597, "grad_norm": 1.4028540062997914, "learning_rate": 2.2019657894507027e-05, "loss": 0.5935, "step": 3341 }, { "epoch": 0.4106155547364541, "grad_norm": 1.4288790133396314, "learning_rate": 2.2013971884982212e-05, "loss": 0.614, "step": 3342 }, { "epoch": 0.41073841995331123, "grad_norm": 1.3997114604287628, "learning_rate": 2.200828458528276e-05, "loss": 0.5798, "step": 3343 }, { "epoch": 0.41086128517016834, "grad_norm": 1.4587483833203176, "learning_rate": 2.2002595996454805e-05, "loss": 0.6589, "step": 3344 }, { "epoch": 0.41098415038702546, "grad_norm": 1.777131129814559, "learning_rate": 2.199690611954473e-05, "loss": 0.6547, "step": 3345 }, { "epoch": 0.4111070156038825, "grad_norm": 1.3646672372065574, "learning_rate": 2.199121495559915e-05, "loss": 0.6188, "step": 3346 }, { "epoch": 0.41122988082073964, "grad_norm": 1.2692354754169843, "learning_rate": 2.198552250566492e-05, "loss": 0.6258, "step": 3347 }, { "epoch": 0.41135274603759675, "grad_norm": 1.3169315900194807, "learning_rate": 2.197982877078913e-05, "loss": 0.7404, "step": 3348 }, { "epoch": 0.41147561125445387, "grad_norm": 1.40143445944332, "learning_rate": 2.19741337520191e-05, "loss": 0.7362, "step": 3349 }, { "epoch": 0.411598476471311, "grad_norm": 1.3859193247092367, "learning_rate": 2.19684374504024e-05, "loss": 0.6806, "step": 3350 }, { "epoch": 0.4117213416881681, "grad_norm": 1.33280325276953, "learning_rate": 2.1962739866986816e-05, "loss": 0.5798, "step": 3351 }, { "epoch": 0.4118442069050252, "grad_norm": 1.3256621988653419, "learning_rate": 2.195704100282039e-05, "loss": 0.544, "step": 3352 }, { "epoch": 0.41196707212188227, "grad_norm": 1.1064679260853545, "learning_rate": 2.1951340858951392e-05, "loss": 0.5967, "step": 3353 }, { "epoch": 0.4120899373387394, "grad_norm": 1.5492112091747514, "learning_rate": 2.1945639436428324e-05, "loss": 0.6985, "step": 3354 }, { "epoch": 0.4122128025555965, "grad_norm": 1.5057146060254767, "learning_rate": 2.1939936736299925e-05, "loss": 0.6458, "step": 3355 }, { "epoch": 0.4123356677724536, "grad_norm": 1.2693686722541415, "learning_rate": 2.1934232759615168e-05, "loss": 0.6546, "step": 3356 }, { "epoch": 0.41245853298931073, "grad_norm": 1.5489566543222992, "learning_rate": 2.192852750742327e-05, "loss": 0.6547, "step": 3357 }, { "epoch": 0.41258139820616785, "grad_norm": 1.6644369613343506, "learning_rate": 2.1922820980773667e-05, "loss": 0.5725, "step": 3358 }, { "epoch": 0.41270426342302496, "grad_norm": 1.3714662477151225, "learning_rate": 2.1917113180716044e-05, "loss": 0.6029, "step": 3359 }, { "epoch": 0.412827128639882, "grad_norm": 1.4498702737426121, "learning_rate": 2.1911404108300307e-05, "loss": 0.586, "step": 3360 }, { "epoch": 0.41294999385673914, "grad_norm": 1.3331092975969738, "learning_rate": 2.1905693764576608e-05, "loss": 0.6558, "step": 3361 }, { "epoch": 0.41307285907359625, "grad_norm": 1.3440295030078897, "learning_rate": 2.1899982150595324e-05, "loss": 0.7367, "step": 3362 }, { "epoch": 0.41319572429045337, "grad_norm": 1.360115067429757, "learning_rate": 2.189426926740707e-05, "loss": 0.6462, "step": 3363 }, { "epoch": 0.4133185895073105, "grad_norm": 1.5413844042630853, "learning_rate": 2.18885551160627e-05, "loss": 0.6352, "step": 3364 }, { "epoch": 0.4134414547241676, "grad_norm": 1.4236456054226783, "learning_rate": 2.1882839697613286e-05, "loss": 0.6409, "step": 3365 }, { "epoch": 0.4135643199410247, "grad_norm": 1.4972867769243716, "learning_rate": 2.1877123013110146e-05, "loss": 0.6043, "step": 3366 }, { "epoch": 0.4136871851578818, "grad_norm": 1.3072007078254688, "learning_rate": 2.187140506360483e-05, "loss": 0.5098, "step": 3367 }, { "epoch": 0.4138100503747389, "grad_norm": 1.8102966396025628, "learning_rate": 2.186568585014912e-05, "loss": 0.6173, "step": 3368 }, { "epoch": 0.413932915591596, "grad_norm": 1.3244758532255911, "learning_rate": 2.1859965373795018e-05, "loss": 0.5819, "step": 3369 }, { "epoch": 0.4140557808084531, "grad_norm": 1.340206361974584, "learning_rate": 2.185424363559477e-05, "loss": 0.6917, "step": 3370 }, { "epoch": 0.41417864602531024, "grad_norm": 1.2939069265616907, "learning_rate": 2.1848520636600863e-05, "loss": 0.6667, "step": 3371 }, { "epoch": 0.41430151124216735, "grad_norm": 1.504537074123858, "learning_rate": 2.1842796377865995e-05, "loss": 0.6434, "step": 3372 }, { "epoch": 0.41442437645902447, "grad_norm": 1.4332700733871364, "learning_rate": 2.1837070860443115e-05, "loss": 0.5452, "step": 3373 }, { "epoch": 0.4145472416758816, "grad_norm": 1.3219964455846296, "learning_rate": 2.1831344085385386e-05, "loss": 0.6845, "step": 3374 }, { "epoch": 0.41467010689273864, "grad_norm": 1.4780072827624873, "learning_rate": 2.182561605374622e-05, "loss": 0.693, "step": 3375 }, { "epoch": 0.41479297210959576, "grad_norm": 1.3665358934227405, "learning_rate": 2.181988676657924e-05, "loss": 0.5896, "step": 3376 }, { "epoch": 0.4149158373264529, "grad_norm": 1.280974580048406, "learning_rate": 2.1814156224938322e-05, "loss": 0.5872, "step": 3377 }, { "epoch": 0.41503870254331, "grad_norm": 1.4332057151140534, "learning_rate": 2.1808424429877557e-05, "loss": 0.5861, "step": 3378 }, { "epoch": 0.4151615677601671, "grad_norm": 1.233056371667068, "learning_rate": 2.1802691382451272e-05, "loss": 0.5773, "step": 3379 }, { "epoch": 0.4152844329770242, "grad_norm": 1.2729009992526377, "learning_rate": 2.1796957083714022e-05, "loss": 0.564, "step": 3380 }, { "epoch": 0.41540729819388134, "grad_norm": 1.5072817817307798, "learning_rate": 2.17912215347206e-05, "loss": 0.7989, "step": 3381 }, { "epoch": 0.4155301634107384, "grad_norm": 1.3245118975015144, "learning_rate": 2.1785484736526017e-05, "loss": 0.5678, "step": 3382 }, { "epoch": 0.4156530286275955, "grad_norm": 1.7904570669143907, "learning_rate": 2.1779746690185522e-05, "loss": 0.7071, "step": 3383 }, { "epoch": 0.4157758938444526, "grad_norm": 1.3961431957083748, "learning_rate": 2.1774007396754594e-05, "loss": 0.5765, "step": 3384 }, { "epoch": 0.41589875906130974, "grad_norm": 1.3560826516302678, "learning_rate": 2.1768266857288934e-05, "loss": 0.5877, "step": 3385 }, { "epoch": 0.41602162427816686, "grad_norm": 1.4253043070269829, "learning_rate": 2.176252507284448e-05, "loss": 0.665, "step": 3386 }, { "epoch": 0.416144489495024, "grad_norm": 1.3409621135554424, "learning_rate": 2.1756782044477397e-05, "loss": 0.7059, "step": 3387 }, { "epoch": 0.4162673547118811, "grad_norm": 1.2565212619731896, "learning_rate": 2.1751037773244075e-05, "loss": 0.5764, "step": 3388 }, { "epoch": 0.41639021992873815, "grad_norm": 1.163030535878867, "learning_rate": 2.1745292260201137e-05, "loss": 0.5689, "step": 3389 }, { "epoch": 0.41651308514559526, "grad_norm": 1.291670492490088, "learning_rate": 2.173954550640543e-05, "loss": 0.6251, "step": 3390 }, { "epoch": 0.4166359503624524, "grad_norm": 1.5480672066997876, "learning_rate": 2.1733797512914035e-05, "loss": 0.6169, "step": 3391 }, { "epoch": 0.4167588155793095, "grad_norm": 1.2568186418753893, "learning_rate": 2.1728048280784264e-05, "loss": 0.6651, "step": 3392 }, { "epoch": 0.4168816807961666, "grad_norm": 1.5434019733246809, "learning_rate": 2.172229781107364e-05, "loss": 0.716, "step": 3393 }, { "epoch": 0.4170045460130237, "grad_norm": 1.3383956630595795, "learning_rate": 2.1716546104839928e-05, "loss": 0.6713, "step": 3394 }, { "epoch": 0.41712741122988084, "grad_norm": 1.0622292640538367, "learning_rate": 2.1710793163141117e-05, "loss": 0.5943, "step": 3395 }, { "epoch": 0.41725027644673796, "grad_norm": 1.3123530742270229, "learning_rate": 2.170503898703543e-05, "loss": 0.6173, "step": 3396 }, { "epoch": 0.417373141663595, "grad_norm": 1.3353647895636316, "learning_rate": 2.1699283577581302e-05, "loss": 0.6253, "step": 3397 }, { "epoch": 0.41749600688045213, "grad_norm": 1.0919582417341764, "learning_rate": 2.1693526935837405e-05, "loss": 0.7299, "step": 3398 }, { "epoch": 0.41761887209730925, "grad_norm": 1.3949519704704174, "learning_rate": 2.168776906286264e-05, "loss": 0.5954, "step": 3399 }, { "epoch": 0.41774173731416636, "grad_norm": 1.2770639765182688, "learning_rate": 2.1682009959716127e-05, "loss": 0.6256, "step": 3400 }, { "epoch": 0.4178646025310235, "grad_norm": 1.1621149078660744, "learning_rate": 2.1676249627457218e-05, "loss": 0.5682, "step": 3401 }, { "epoch": 0.4179874677478806, "grad_norm": 1.2223743131288431, "learning_rate": 2.167048806714548e-05, "loss": 0.5749, "step": 3402 }, { "epoch": 0.4181103329647377, "grad_norm": 1.3940481688979862, "learning_rate": 2.1664725279840727e-05, "loss": 0.5716, "step": 3403 }, { "epoch": 0.41823319818159477, "grad_norm": 2.4849102096996627, "learning_rate": 2.1658961266602984e-05, "loss": 0.4899, "step": 3404 }, { "epoch": 0.4183560633984519, "grad_norm": 1.499803879770401, "learning_rate": 2.1653196028492495e-05, "loss": 0.6463, "step": 3405 }, { "epoch": 0.418478928615309, "grad_norm": 1.3694226653077874, "learning_rate": 2.1647429566569745e-05, "loss": 0.754, "step": 3406 }, { "epoch": 0.4186017938321661, "grad_norm": 1.7315060066167258, "learning_rate": 2.164166188189544e-05, "loss": 0.7452, "step": 3407 }, { "epoch": 0.41872465904902323, "grad_norm": 1.4569230185600441, "learning_rate": 2.16358929755305e-05, "loss": 0.6706, "step": 3408 }, { "epoch": 0.41884752426588034, "grad_norm": 1.4491424340004655, "learning_rate": 2.1630122848536087e-05, "loss": 0.5865, "step": 3409 }, { "epoch": 0.41897038948273746, "grad_norm": 1.2376531672144617, "learning_rate": 2.162435150197357e-05, "loss": 0.5798, "step": 3410 }, { "epoch": 0.4190932546995945, "grad_norm": 1.3110796923503247, "learning_rate": 2.1618578936904552e-05, "loss": 0.7114, "step": 3411 }, { "epoch": 0.41921611991645163, "grad_norm": 1.3800602090839078, "learning_rate": 2.1612805154390868e-05, "loss": 0.555, "step": 3412 }, { "epoch": 0.41933898513330875, "grad_norm": 1.2511616870094646, "learning_rate": 2.160703015549456e-05, "loss": 0.7556, "step": 3413 }, { "epoch": 0.41946185035016587, "grad_norm": 1.2472848015219806, "learning_rate": 2.1601253941277906e-05, "loss": 0.6803, "step": 3414 }, { "epoch": 0.419584715567023, "grad_norm": 1.343935405178687, "learning_rate": 2.1595476512803397e-05, "loss": 0.5542, "step": 3415 }, { "epoch": 0.4197075807838801, "grad_norm": 1.2314846800941737, "learning_rate": 2.158969787113375e-05, "loss": 0.5618, "step": 3416 }, { "epoch": 0.4198304460007372, "grad_norm": 1.1608513718561941, "learning_rate": 2.1583918017331925e-05, "loss": 0.7079, "step": 3417 }, { "epoch": 0.41995331121759427, "grad_norm": 1.2781343848868714, "learning_rate": 2.1578136952461073e-05, "loss": 0.5892, "step": 3418 }, { "epoch": 0.4200761764344514, "grad_norm": 1.34345790062968, "learning_rate": 2.157235467758459e-05, "loss": 0.8701, "step": 3419 }, { "epoch": 0.4201990416513085, "grad_norm": 1.1791554080197288, "learning_rate": 2.156657119376609e-05, "loss": 0.5343, "step": 3420 }, { "epoch": 0.4203219068681656, "grad_norm": 1.2500766333049922, "learning_rate": 2.1560786502069398e-05, "loss": 0.6451, "step": 3421 }, { "epoch": 0.42044477208502273, "grad_norm": 1.344315547591989, "learning_rate": 2.1555000603558588e-05, "loss": 0.7266, "step": 3422 }, { "epoch": 0.42056763730187985, "grad_norm": 1.30963704647146, "learning_rate": 2.154921349929792e-05, "loss": 0.8358, "step": 3423 }, { "epoch": 0.42069050251873696, "grad_norm": 1.1505459951032426, "learning_rate": 2.1543425190351908e-05, "loss": 0.6363, "step": 3424 }, { "epoch": 0.4208133677355941, "grad_norm": 1.223232236262694, "learning_rate": 2.153763567778526e-05, "loss": 0.6237, "step": 3425 }, { "epoch": 0.42093623295245114, "grad_norm": 1.3456357796447265, "learning_rate": 2.1531844962662933e-05, "loss": 0.6217, "step": 3426 }, { "epoch": 0.42105909816930825, "grad_norm": 1.3688335997056595, "learning_rate": 2.152605304605008e-05, "loss": 0.7334, "step": 3427 }, { "epoch": 0.42118196338616537, "grad_norm": 1.4548641596744412, "learning_rate": 2.15202599290121e-05, "loss": 0.5867, "step": 3428 }, { "epoch": 0.4213048286030225, "grad_norm": 1.651687531284175, "learning_rate": 2.1514465612614583e-05, "loss": 0.536, "step": 3429 }, { "epoch": 0.4214276938198796, "grad_norm": 1.3287960741023768, "learning_rate": 2.150867009792337e-05, "loss": 0.6307, "step": 3430 }, { "epoch": 0.4215505590367367, "grad_norm": 1.7871094662108038, "learning_rate": 2.1502873386004498e-05, "loss": 0.6904, "step": 3431 }, { "epoch": 0.42167342425359383, "grad_norm": 1.2022194499429109, "learning_rate": 2.1497075477924245e-05, "loss": 0.6226, "step": 3432 }, { "epoch": 0.4217962894704509, "grad_norm": 1.2532615433842964, "learning_rate": 2.149127637474909e-05, "loss": 0.5678, "step": 3433 }, { "epoch": 0.421919154687308, "grad_norm": 1.462332733116907, "learning_rate": 2.1485476077545745e-05, "loss": 0.6775, "step": 3434 }, { "epoch": 0.4220420199041651, "grad_norm": 1.3394230906627682, "learning_rate": 2.1479674587381136e-05, "loss": 0.671, "step": 3435 }, { "epoch": 0.42216488512102224, "grad_norm": 2.04789210122639, "learning_rate": 2.1473871905322406e-05, "loss": 0.5973, "step": 3436 }, { "epoch": 0.42228775033787935, "grad_norm": 1.3532358978264498, "learning_rate": 2.146806803243692e-05, "loss": 0.6536, "step": 3437 }, { "epoch": 0.42241061555473647, "grad_norm": 1.6904486181712066, "learning_rate": 2.1462262969792272e-05, "loss": 0.7115, "step": 3438 }, { "epoch": 0.4225334807715936, "grad_norm": 1.447254507414375, "learning_rate": 2.1456456718456256e-05, "loss": 0.6739, "step": 3439 }, { "epoch": 0.42265634598845064, "grad_norm": 1.1858845413896495, "learning_rate": 2.1450649279496903e-05, "loss": 0.6731, "step": 3440 }, { "epoch": 0.42277921120530776, "grad_norm": 1.4475692848098725, "learning_rate": 2.1444840653982447e-05, "loss": 0.6204, "step": 3441 }, { "epoch": 0.4229020764221649, "grad_norm": 1.4112078253892182, "learning_rate": 2.143903084298135e-05, "loss": 0.6835, "step": 3442 }, { "epoch": 0.423024941639022, "grad_norm": 1.0785855388379113, "learning_rate": 2.1433219847562287e-05, "loss": 0.5242, "step": 3443 }, { "epoch": 0.4231478068558791, "grad_norm": 1.2738292461621952, "learning_rate": 2.1427407668794152e-05, "loss": 0.68, "step": 3444 }, { "epoch": 0.4232706720727362, "grad_norm": 1.275580692813145, "learning_rate": 2.1421594307746062e-05, "loss": 0.7372, "step": 3445 }, { "epoch": 0.42339353728959334, "grad_norm": 1.5429549084875198, "learning_rate": 2.1415779765487342e-05, "loss": 0.6353, "step": 3446 }, { "epoch": 0.42351640250645045, "grad_norm": 1.179510628987097, "learning_rate": 2.1409964043087548e-05, "loss": 0.6266, "step": 3447 }, { "epoch": 0.4236392677233075, "grad_norm": 1.5222910131290042, "learning_rate": 2.140414714161643e-05, "loss": 0.591, "step": 3448 }, { "epoch": 0.4237621329401646, "grad_norm": 1.4094096818381, "learning_rate": 2.1398329062143982e-05, "loss": 0.5634, "step": 3449 }, { "epoch": 0.42388499815702174, "grad_norm": 1.3062676697169266, "learning_rate": 2.1392509805740396e-05, "loss": 0.6144, "step": 3450 }, { "epoch": 0.42400786337387886, "grad_norm": 1.1483753458772148, "learning_rate": 2.138668937347609e-05, "loss": 0.6418, "step": 3451 }, { "epoch": 0.424130728590736, "grad_norm": 1.1712850690879462, "learning_rate": 2.1380867766421693e-05, "loss": 0.6378, "step": 3452 }, { "epoch": 0.4242535938075931, "grad_norm": 1.1650030137688014, "learning_rate": 2.137504498564805e-05, "loss": 0.6171, "step": 3453 }, { "epoch": 0.4243764590244502, "grad_norm": 1.5431132340873495, "learning_rate": 2.136922103222623e-05, "loss": 0.637, "step": 3454 }, { "epoch": 0.42449932424130726, "grad_norm": 1.4291176347121837, "learning_rate": 2.1363395907227502e-05, "loss": 0.5367, "step": 3455 }, { "epoch": 0.4246221894581644, "grad_norm": 1.2750110421586105, "learning_rate": 2.1357569611723365e-05, "loss": 0.5859, "step": 3456 }, { "epoch": 0.4247450546750215, "grad_norm": 1.2118502046914408, "learning_rate": 2.135174214678553e-05, "loss": 0.5842, "step": 3457 }, { "epoch": 0.4248679198918786, "grad_norm": 1.3494988255942384, "learning_rate": 2.134591351348592e-05, "loss": 0.5466, "step": 3458 }, { "epoch": 0.4249907851087357, "grad_norm": 1.4186921478681067, "learning_rate": 2.1340083712896674e-05, "loss": 0.5611, "step": 3459 }, { "epoch": 0.42511365032559284, "grad_norm": 1.2900068078744247, "learning_rate": 2.1334252746090142e-05, "loss": 0.544, "step": 3460 }, { "epoch": 0.42523651554244996, "grad_norm": 1.1790126200337352, "learning_rate": 2.1328420614138903e-05, "loss": 0.628, "step": 3461 }, { "epoch": 0.425359380759307, "grad_norm": 1.4201331352323003, "learning_rate": 2.1322587318115728e-05, "loss": 0.6863, "step": 3462 }, { "epoch": 0.42548224597616413, "grad_norm": 1.3952737159974218, "learning_rate": 2.131675285909362e-05, "loss": 0.5533, "step": 3463 }, { "epoch": 0.42560511119302125, "grad_norm": 1.3731822515889234, "learning_rate": 2.1310917238145793e-05, "loss": 0.5844, "step": 3464 }, { "epoch": 0.42572797640987836, "grad_norm": 1.1259352509729847, "learning_rate": 2.130508045634566e-05, "loss": 0.5402, "step": 3465 }, { "epoch": 0.4258508416267355, "grad_norm": 1.242487156386763, "learning_rate": 2.1299242514766875e-05, "loss": 0.7108, "step": 3466 }, { "epoch": 0.4259737068435926, "grad_norm": 1.3507853747514502, "learning_rate": 2.1293403414483277e-05, "loss": 0.6303, "step": 3467 }, { "epoch": 0.4260965720604497, "grad_norm": 1.4693490228998327, "learning_rate": 2.128756315656894e-05, "loss": 0.6299, "step": 3468 }, { "epoch": 0.42621943727730677, "grad_norm": 1.1937378536426377, "learning_rate": 2.128172174209813e-05, "loss": 0.6623, "step": 3469 }, { "epoch": 0.4263423024941639, "grad_norm": 1.1289603434924278, "learning_rate": 2.127587917214535e-05, "loss": 0.5223, "step": 3470 }, { "epoch": 0.426465167711021, "grad_norm": 1.3695411084313474, "learning_rate": 2.127003544778529e-05, "loss": 0.6019, "step": 3471 }, { "epoch": 0.4265880329278781, "grad_norm": 1.2946048095233342, "learning_rate": 2.126419057009288e-05, "loss": 0.7382, "step": 3472 }, { "epoch": 0.42671089814473523, "grad_norm": 1.4163518093859047, "learning_rate": 2.1258344540143234e-05, "loss": 0.5762, "step": 3473 }, { "epoch": 0.42683376336159234, "grad_norm": 1.116491306654738, "learning_rate": 2.1252497359011698e-05, "loss": 0.6106, "step": 3474 }, { "epoch": 0.42695662857844946, "grad_norm": 1.5549779299348403, "learning_rate": 2.1246649027773815e-05, "loss": 0.6282, "step": 3475 }, { "epoch": 0.4270794937953066, "grad_norm": 1.1575593920734617, "learning_rate": 2.1240799547505365e-05, "loss": 0.5321, "step": 3476 }, { "epoch": 0.42720235901216363, "grad_norm": 1.2864142345359124, "learning_rate": 2.1234948919282303e-05, "loss": 0.6346, "step": 3477 }, { "epoch": 0.42732522422902075, "grad_norm": 1.273332383644958, "learning_rate": 2.1229097144180832e-05, "loss": 0.637, "step": 3478 }, { "epoch": 0.42744808944587787, "grad_norm": 1.251676796771201, "learning_rate": 2.122324422327733e-05, "loss": 0.5353, "step": 3479 }, { "epoch": 0.427570954662735, "grad_norm": 1.2701431657948747, "learning_rate": 2.1217390157648414e-05, "loss": 0.5508, "step": 3480 }, { "epoch": 0.4276938198795921, "grad_norm": 1.3266420778729997, "learning_rate": 2.1211534948370903e-05, "loss": 0.6394, "step": 3481 }, { "epoch": 0.4278166850964492, "grad_norm": 1.4627882353558082, "learning_rate": 2.1205678596521817e-05, "loss": 0.7113, "step": 3482 }, { "epoch": 0.4279395503133063, "grad_norm": 1.4261389085707894, "learning_rate": 2.1199821103178402e-05, "loss": 0.5973, "step": 3483 }, { "epoch": 0.4280624155301634, "grad_norm": 1.2957586514949393, "learning_rate": 2.11939624694181e-05, "loss": 0.5254, "step": 3484 }, { "epoch": 0.4281852807470205, "grad_norm": 1.2991919424946516, "learning_rate": 2.1188102696318573e-05, "loss": 0.6874, "step": 3485 }, { "epoch": 0.4283081459638776, "grad_norm": 1.3683165180127044, "learning_rate": 2.118224178495768e-05, "loss": 0.6245, "step": 3486 }, { "epoch": 0.42843101118073473, "grad_norm": 1.2315167285915316, "learning_rate": 2.1176379736413513e-05, "loss": 0.5204, "step": 3487 }, { "epoch": 0.42855387639759185, "grad_norm": 1.3019592707928342, "learning_rate": 2.1170516551764343e-05, "loss": 0.5801, "step": 3488 }, { "epoch": 0.42867674161444896, "grad_norm": 1.4876652120151799, "learning_rate": 2.1164652232088674e-05, "loss": 0.6455, "step": 3489 }, { "epoch": 0.4287996068313061, "grad_norm": 1.4653019550760753, "learning_rate": 2.1158786778465206e-05, "loss": 0.7316, "step": 3490 }, { "epoch": 0.42892247204816314, "grad_norm": 1.3358511966309072, "learning_rate": 2.1152920191972848e-05, "loss": 0.6844, "step": 3491 }, { "epoch": 0.42904533726502025, "grad_norm": 1.1623128264287934, "learning_rate": 2.1147052473690726e-05, "loss": 0.6497, "step": 3492 }, { "epoch": 0.42916820248187737, "grad_norm": 1.4629359956351053, "learning_rate": 2.1141183624698166e-05, "loss": 0.6236, "step": 3493 }, { "epoch": 0.4292910676987345, "grad_norm": 1.4269132231777633, "learning_rate": 2.1135313646074702e-05, "loss": 0.6466, "step": 3494 }, { "epoch": 0.4294139329155916, "grad_norm": 1.2967068741201084, "learning_rate": 2.1129442538900087e-05, "loss": 0.6207, "step": 3495 }, { "epoch": 0.4295367981324487, "grad_norm": 1.1081636786839073, "learning_rate": 2.1123570304254265e-05, "loss": 0.5955, "step": 3496 }, { "epoch": 0.42965966334930583, "grad_norm": 1.2408979060011305, "learning_rate": 2.11176969432174e-05, "loss": 0.6014, "step": 3497 }, { "epoch": 0.42978252856616295, "grad_norm": 1.4705562773061758, "learning_rate": 2.1111822456869853e-05, "loss": 0.5885, "step": 3498 }, { "epoch": 0.42990539378302, "grad_norm": 1.1270580161368646, "learning_rate": 2.1105946846292207e-05, "loss": 0.6394, "step": 3499 }, { "epoch": 0.4300282589998771, "grad_norm": 1.3680656516654517, "learning_rate": 2.1100070112565237e-05, "loss": 0.7149, "step": 3500 }, { "epoch": 0.43015112421673424, "grad_norm": 1.5119150884567174, "learning_rate": 2.1094192256769927e-05, "loss": 0.683, "step": 3501 }, { "epoch": 0.43027398943359135, "grad_norm": 1.1882053786060716, "learning_rate": 2.108831327998747e-05, "loss": 0.6852, "step": 3502 }, { "epoch": 0.43039685465044847, "grad_norm": 1.3467092595731702, "learning_rate": 2.108243318329928e-05, "loss": 0.5993, "step": 3503 }, { "epoch": 0.4305197198673056, "grad_norm": 1.2252654449193163, "learning_rate": 2.107655196778694e-05, "loss": 0.6428, "step": 3504 }, { "epoch": 0.4306425850841627, "grad_norm": 1.4843352012868505, "learning_rate": 2.1070669634532276e-05, "loss": 0.6756, "step": 3505 }, { "epoch": 0.43076545030101976, "grad_norm": 1.228734169144027, "learning_rate": 2.1064786184617306e-05, "loss": 0.552, "step": 3506 }, { "epoch": 0.4308883155178769, "grad_norm": 1.3860589304386575, "learning_rate": 2.1058901619124247e-05, "loss": 0.5642, "step": 3507 }, { "epoch": 0.431011180734734, "grad_norm": 1.5264744128359216, "learning_rate": 2.1053015939135533e-05, "loss": 0.7452, "step": 3508 }, { "epoch": 0.4311340459515911, "grad_norm": 1.418921177946135, "learning_rate": 2.1047129145733787e-05, "loss": 0.6867, "step": 3509 }, { "epoch": 0.4312569111684482, "grad_norm": 1.2808596740830946, "learning_rate": 2.1041241240001856e-05, "loss": 0.7363, "step": 3510 }, { "epoch": 0.43137977638530534, "grad_norm": 1.2464491864666372, "learning_rate": 2.1035352223022773e-05, "loss": 0.7, "step": 3511 }, { "epoch": 0.43150264160216245, "grad_norm": 1.4428272361157695, "learning_rate": 2.1029462095879795e-05, "loss": 0.5306, "step": 3512 }, { "epoch": 0.4316255068190195, "grad_norm": 1.2847832048025258, "learning_rate": 2.1023570859656358e-05, "loss": 0.6244, "step": 3513 }, { "epoch": 0.4317483720358766, "grad_norm": 1.1897331667663213, "learning_rate": 2.1017678515436134e-05, "loss": 0.6292, "step": 3514 }, { "epoch": 0.43187123725273374, "grad_norm": 1.2578260351544996, "learning_rate": 2.1011785064302967e-05, "loss": 0.5592, "step": 3515 }, { "epoch": 0.43199410246959086, "grad_norm": 1.4942458314074962, "learning_rate": 2.100589050734093e-05, "loss": 0.6732, "step": 3516 }, { "epoch": 0.432116967686448, "grad_norm": 1.2956015986861527, "learning_rate": 2.0999994845634285e-05, "loss": 0.6269, "step": 3517 }, { "epoch": 0.4322398329033051, "grad_norm": 1.3175340532920685, "learning_rate": 2.0994098080267496e-05, "loss": 0.6093, "step": 3518 }, { "epoch": 0.4323626981201622, "grad_norm": 1.3453191037459895, "learning_rate": 2.0988200212325237e-05, "loss": 0.6281, "step": 3519 }, { "epoch": 0.43248556333701926, "grad_norm": 1.1100017930976023, "learning_rate": 2.0982301242892386e-05, "loss": 0.5573, "step": 3520 }, { "epoch": 0.4326084285538764, "grad_norm": 1.3691318020400036, "learning_rate": 2.0976401173054016e-05, "loss": 0.6505, "step": 3521 }, { "epoch": 0.4327312937707335, "grad_norm": 1.3761625591021982, "learning_rate": 2.0970500003895408e-05, "loss": 0.6689, "step": 3522 }, { "epoch": 0.4328541589875906, "grad_norm": 1.2238728358041044, "learning_rate": 2.0964597736502043e-05, "loss": 0.4826, "step": 3523 }, { "epoch": 0.4329770242044477, "grad_norm": 1.2760978775899323, "learning_rate": 2.0958694371959614e-05, "loss": 0.7522, "step": 3524 }, { "epoch": 0.43309988942130484, "grad_norm": 1.2735162659376245, "learning_rate": 2.095278991135399e-05, "loss": 0.7004, "step": 3525 }, { "epoch": 0.43322275463816196, "grad_norm": 1.2865401142458193, "learning_rate": 2.0946884355771274e-05, "loss": 0.6095, "step": 3526 }, { "epoch": 0.43334561985501907, "grad_norm": 1.4271250460710811, "learning_rate": 2.0940977706297747e-05, "loss": 0.6103, "step": 3527 }, { "epoch": 0.43346848507187613, "grad_norm": 1.3118349745397995, "learning_rate": 2.0935069964019897e-05, "loss": 0.6236, "step": 3528 }, { "epoch": 0.43359135028873325, "grad_norm": 1.2641029968273507, "learning_rate": 2.0929161130024415e-05, "loss": 0.6141, "step": 3529 }, { "epoch": 0.43371421550559036, "grad_norm": 1.1963873737381538, "learning_rate": 2.0923251205398198e-05, "loss": 0.5964, "step": 3530 }, { "epoch": 0.4338370807224475, "grad_norm": 1.447872104245257, "learning_rate": 2.0917340191228337e-05, "loss": 0.6553, "step": 3531 }, { "epoch": 0.4339599459393046, "grad_norm": 1.5704484172987203, "learning_rate": 2.091142808860212e-05, "loss": 0.684, "step": 3532 }, { "epoch": 0.4340828111561617, "grad_norm": 1.119433282395661, "learning_rate": 2.0905514898607045e-05, "loss": 0.5585, "step": 3533 }, { "epoch": 0.4342056763730188, "grad_norm": 1.4462673565174435, "learning_rate": 2.0899600622330802e-05, "loss": 0.6424, "step": 3534 }, { "epoch": 0.4343285415898759, "grad_norm": 1.6351129460811595, "learning_rate": 2.0893685260861288e-05, "loss": 0.4973, "step": 3535 }, { "epoch": 0.434451406806733, "grad_norm": 1.3040331531009521, "learning_rate": 2.0887768815286585e-05, "loss": 0.7803, "step": 3536 }, { "epoch": 0.4345742720235901, "grad_norm": 1.4574794729512208, "learning_rate": 2.0881851286694998e-05, "loss": 0.592, "step": 3537 }, { "epoch": 0.43469713724044723, "grad_norm": 1.2444503311016155, "learning_rate": 2.0875932676175013e-05, "loss": 0.6506, "step": 3538 }, { "epoch": 0.43482000245730434, "grad_norm": 1.1924768567215682, "learning_rate": 2.0870012984815312e-05, "loss": 0.5625, "step": 3539 }, { "epoch": 0.43494286767416146, "grad_norm": 1.3777776442672132, "learning_rate": 2.0864092213704797e-05, "loss": 0.6166, "step": 3540 }, { "epoch": 0.4350657328910186, "grad_norm": 1.3031490973736857, "learning_rate": 2.0858170363932545e-05, "loss": 0.5435, "step": 3541 }, { "epoch": 0.43518859810787563, "grad_norm": 1.7107670355891171, "learning_rate": 2.0852247436587847e-05, "loss": 0.7049, "step": 3542 }, { "epoch": 0.43531146332473275, "grad_norm": 1.1516773573194925, "learning_rate": 2.0846323432760192e-05, "loss": 0.6324, "step": 3543 }, { "epoch": 0.43543432854158987, "grad_norm": 1.152072180773817, "learning_rate": 2.084039835353925e-05, "loss": 0.6264, "step": 3544 }, { "epoch": 0.435557193758447, "grad_norm": 1.0773374208395947, "learning_rate": 2.0834472200014906e-05, "loss": 0.6413, "step": 3545 }, { "epoch": 0.4356800589753041, "grad_norm": 1.1382554743279147, "learning_rate": 2.0828544973277244e-05, "loss": 0.623, "step": 3546 }, { "epoch": 0.4358029241921612, "grad_norm": 1.351263416899528, "learning_rate": 2.0822616674416533e-05, "loss": 0.6132, "step": 3547 }, { "epoch": 0.4359257894090183, "grad_norm": 1.1337283245204235, "learning_rate": 2.0816687304523243e-05, "loss": 0.6389, "step": 3548 }, { "epoch": 0.4360486546258754, "grad_norm": 1.386060331131095, "learning_rate": 2.0810756864688045e-05, "loss": 0.748, "step": 3549 }, { "epoch": 0.4361715198427325, "grad_norm": 1.0210122980684389, "learning_rate": 2.080482535600181e-05, "loss": 0.5665, "step": 3550 }, { "epoch": 0.4362943850595896, "grad_norm": 1.2743636725868617, "learning_rate": 2.0798892779555592e-05, "loss": 0.6544, "step": 3551 }, { "epoch": 0.43641725027644673, "grad_norm": 1.2063966035705025, "learning_rate": 2.079295913644066e-05, "loss": 0.6131, "step": 3552 }, { "epoch": 0.43654011549330385, "grad_norm": 1.209310452380257, "learning_rate": 2.0787024427748455e-05, "loss": 0.5675, "step": 3553 }, { "epoch": 0.43666298071016096, "grad_norm": 1.2966964900213451, "learning_rate": 2.078108865457064e-05, "loss": 0.6714, "step": 3554 }, { "epoch": 0.4367858459270181, "grad_norm": 1.2008599360903591, "learning_rate": 2.0775151817999063e-05, "loss": 0.6704, "step": 3555 }, { "epoch": 0.4369087111438752, "grad_norm": 1.0400466061083526, "learning_rate": 2.0769213919125764e-05, "loss": 0.5177, "step": 3556 }, { "epoch": 0.43703157636073225, "grad_norm": 1.1393015265189632, "learning_rate": 2.0763274959042972e-05, "loss": 0.5468, "step": 3557 }, { "epoch": 0.43715444157758937, "grad_norm": 1.229536238099014, "learning_rate": 2.0757334938843135e-05, "loss": 0.6055, "step": 3558 }, { "epoch": 0.4372773067944465, "grad_norm": 1.2007466956340924, "learning_rate": 2.075139385961886e-05, "loss": 0.5369, "step": 3559 }, { "epoch": 0.4374001720113036, "grad_norm": 1.0828416064117952, "learning_rate": 2.0745451722462996e-05, "loss": 0.654, "step": 3560 }, { "epoch": 0.4375230372281607, "grad_norm": 1.2401853254845558, "learning_rate": 2.0739508528468544e-05, "loss": 0.6407, "step": 3561 }, { "epoch": 0.43764590244501783, "grad_norm": 1.3709376423885293, "learning_rate": 2.0733564278728723e-05, "loss": 0.647, "step": 3562 }, { "epoch": 0.43776876766187495, "grad_norm": 1.3701327279378608, "learning_rate": 2.072761897433693e-05, "loss": 0.6621, "step": 3563 }, { "epoch": 0.437891632878732, "grad_norm": 1.286140118392058, "learning_rate": 2.072167261638678e-05, "loss": 0.5152, "step": 3564 }, { "epoch": 0.4380144980955891, "grad_norm": 1.363175860850957, "learning_rate": 2.0715725205972054e-05, "loss": 0.5822, "step": 3565 }, { "epoch": 0.43813736331244624, "grad_norm": 1.1993207260321368, "learning_rate": 2.070977674418675e-05, "loss": 0.6987, "step": 3566 }, { "epoch": 0.43826022852930335, "grad_norm": 1.4861955833302396, "learning_rate": 2.0703827232125033e-05, "loss": 0.6389, "step": 3567 }, { "epoch": 0.43838309374616047, "grad_norm": 1.1792785668228283, "learning_rate": 2.069787667088129e-05, "loss": 0.6862, "step": 3568 }, { "epoch": 0.4385059589630176, "grad_norm": 1.3568363291150614, "learning_rate": 2.069192506155009e-05, "loss": 0.5728, "step": 3569 }, { "epoch": 0.4386288241798747, "grad_norm": 1.5954392799221275, "learning_rate": 2.068597240522618e-05, "loss": 0.7716, "step": 3570 }, { "epoch": 0.43875168939673176, "grad_norm": 1.168970342508959, "learning_rate": 2.068001870300453e-05, "loss": 0.5904, "step": 3571 }, { "epoch": 0.4388745546135889, "grad_norm": 1.2184237657991792, "learning_rate": 2.067406395598027e-05, "loss": 0.5849, "step": 3572 }, { "epoch": 0.438997419830446, "grad_norm": 1.2053715822610087, "learning_rate": 2.0668108165248747e-05, "loss": 0.5025, "step": 3573 }, { "epoch": 0.4391202850473031, "grad_norm": 1.3292318428606023, "learning_rate": 2.0662151331905486e-05, "loss": 0.6254, "step": 3574 }, { "epoch": 0.4392431502641602, "grad_norm": 1.3748585063043088, "learning_rate": 2.0656193457046206e-05, "loss": 0.4897, "step": 3575 }, { "epoch": 0.43936601548101734, "grad_norm": 1.3308241304257984, "learning_rate": 2.065023454176682e-05, "loss": 0.682, "step": 3576 }, { "epoch": 0.43948888069787445, "grad_norm": 1.6031579596144083, "learning_rate": 2.064427458716344e-05, "loss": 0.7342, "step": 3577 }, { "epoch": 0.43961174591473157, "grad_norm": 1.6034968033814327, "learning_rate": 2.0638313594332344e-05, "loss": 0.5246, "step": 3578 }, { "epoch": 0.4397346111315886, "grad_norm": 1.148235864436196, "learning_rate": 2.0632351564370035e-05, "loss": 0.5508, "step": 3579 }, { "epoch": 0.43985747634844574, "grad_norm": 1.141131474918344, "learning_rate": 2.062638849837318e-05, "loss": 0.6139, "step": 3580 }, { "epoch": 0.43998034156530286, "grad_norm": 1.4464687481745717, "learning_rate": 2.0620424397438646e-05, "loss": 0.5841, "step": 3581 }, { "epoch": 0.44010320678216, "grad_norm": 1.2235385362703148, "learning_rate": 2.06144592626635e-05, "loss": 0.6308, "step": 3582 }, { "epoch": 0.4402260719990171, "grad_norm": 1.478486063299159, "learning_rate": 2.060849309514498e-05, "loss": 0.6587, "step": 3583 }, { "epoch": 0.4403489372158742, "grad_norm": 1.5184577482791952, "learning_rate": 2.0602525895980528e-05, "loss": 0.6423, "step": 3584 }, { "epoch": 0.4404718024327313, "grad_norm": 1.3135871539553015, "learning_rate": 2.0596557666267776e-05, "loss": 0.5182, "step": 3585 }, { "epoch": 0.4405946676495884, "grad_norm": 1.3000995568174691, "learning_rate": 2.0590588407104532e-05, "loss": 0.5481, "step": 3586 }, { "epoch": 0.4407175328664455, "grad_norm": 1.2714251339203333, "learning_rate": 2.0584618119588806e-05, "loss": 0.5812, "step": 3587 }, { "epoch": 0.4408403980833026, "grad_norm": 1.6919445942176656, "learning_rate": 2.0578646804818793e-05, "loss": 0.6534, "step": 3588 }, { "epoch": 0.4409632633001597, "grad_norm": 1.3104436716938188, "learning_rate": 2.0572674463892883e-05, "loss": 0.6328, "step": 3589 }, { "epoch": 0.44108612851701684, "grad_norm": 1.302723041980785, "learning_rate": 2.0566701097909643e-05, "loss": 0.6413, "step": 3590 }, { "epoch": 0.44120899373387396, "grad_norm": 1.2510691571512815, "learning_rate": 2.0560726707967836e-05, "loss": 0.5479, "step": 3591 }, { "epoch": 0.44133185895073107, "grad_norm": 1.4895279549387337, "learning_rate": 2.0554751295166412e-05, "loss": 0.5594, "step": 3592 }, { "epoch": 0.44145472416758813, "grad_norm": 1.963769478316416, "learning_rate": 2.054877486060452e-05, "loss": 0.6617, "step": 3593 }, { "epoch": 0.44157758938444525, "grad_norm": 1.3754972396567011, "learning_rate": 2.0542797405381476e-05, "loss": 0.6361, "step": 3594 }, { "epoch": 0.44170045460130236, "grad_norm": 1.4466093772937216, "learning_rate": 2.0536818930596785e-05, "loss": 0.5859, "step": 3595 }, { "epoch": 0.4418233198181595, "grad_norm": 1.4113580549715155, "learning_rate": 2.053083943735017e-05, "loss": 0.6837, "step": 3596 }, { "epoch": 0.4419461850350166, "grad_norm": 1.3087318655254474, "learning_rate": 2.0524858926741505e-05, "loss": 0.4933, "step": 3597 }, { "epoch": 0.4420690502518737, "grad_norm": 1.1740542181453728, "learning_rate": 2.051887739987087e-05, "loss": 0.6128, "step": 3598 }, { "epoch": 0.4421919154687308, "grad_norm": 1.144889549007166, "learning_rate": 2.0512894857838528e-05, "loss": 0.5785, "step": 3599 }, { "epoch": 0.4423147806855879, "grad_norm": 1.7498999054171522, "learning_rate": 2.050691130174493e-05, "loss": 0.6747, "step": 3600 }, { "epoch": 0.442437645902445, "grad_norm": 1.417112458018011, "learning_rate": 2.0500926732690713e-05, "loss": 0.8139, "step": 3601 }, { "epoch": 0.4425605111193021, "grad_norm": 1.631690852708493, "learning_rate": 2.0494941151776698e-05, "loss": 0.6746, "step": 3602 }, { "epoch": 0.44268337633615923, "grad_norm": 1.3054974075286592, "learning_rate": 2.0488954560103895e-05, "loss": 0.5305, "step": 3603 }, { "epoch": 0.44280624155301634, "grad_norm": 1.2082922427885265, "learning_rate": 2.0482966958773494e-05, "loss": 0.7933, "step": 3604 }, { "epoch": 0.44292910676987346, "grad_norm": 1.2328659124677859, "learning_rate": 2.047697834888688e-05, "loss": 0.6267, "step": 3605 }, { "epoch": 0.4430519719867306, "grad_norm": 1.6593836235354964, "learning_rate": 2.047098873154562e-05, "loss": 0.6938, "step": 3606 }, { "epoch": 0.4431748372035877, "grad_norm": 1.4302238196496895, "learning_rate": 2.0464998107851464e-05, "loss": 0.6579, "step": 3607 }, { "epoch": 0.44329770242044475, "grad_norm": 1.3055432580985649, "learning_rate": 2.0459006478906348e-05, "loss": 0.568, "step": 3608 }, { "epoch": 0.44342056763730187, "grad_norm": 1.2139579767226087, "learning_rate": 2.045301384581239e-05, "loss": 0.703, "step": 3609 }, { "epoch": 0.443543432854159, "grad_norm": 1.5718974968447121, "learning_rate": 2.0447020209671904e-05, "loss": 0.6374, "step": 3610 }, { "epoch": 0.4436662980710161, "grad_norm": 1.3595250223046766, "learning_rate": 2.044102557158737e-05, "loss": 0.534, "step": 3611 }, { "epoch": 0.4437891632878732, "grad_norm": 1.4430962889133303, "learning_rate": 2.0435029932661472e-05, "loss": 0.6416, "step": 3612 }, { "epoch": 0.4439120285047303, "grad_norm": 1.550653633024119, "learning_rate": 2.0429033293997066e-05, "loss": 0.6806, "step": 3613 }, { "epoch": 0.44403489372158744, "grad_norm": 1.384290570440926, "learning_rate": 2.042303565669719e-05, "loss": 0.6624, "step": 3614 }, { "epoch": 0.4441577589384445, "grad_norm": 1.2460722997927791, "learning_rate": 2.0417037021865077e-05, "loss": 0.6273, "step": 3615 }, { "epoch": 0.4442806241553016, "grad_norm": 1.5439007294976186, "learning_rate": 2.0411037390604134e-05, "loss": 0.6251, "step": 3616 }, { "epoch": 0.44440348937215873, "grad_norm": 1.2217201508346363, "learning_rate": 2.0405036764017956e-05, "loss": 0.6874, "step": 3617 }, { "epoch": 0.44452635458901585, "grad_norm": 1.4584471212306531, "learning_rate": 2.0399035143210315e-05, "loss": 0.6447, "step": 3618 }, { "epoch": 0.44464921980587296, "grad_norm": 1.5881319656746442, "learning_rate": 2.039303252928518e-05, "loss": 0.6115, "step": 3619 }, { "epoch": 0.4447720850227301, "grad_norm": 1.2141820201050342, "learning_rate": 2.038702892334668e-05, "loss": 0.6064, "step": 3620 }, { "epoch": 0.4448949502395872, "grad_norm": 1.3490284841524718, "learning_rate": 2.038102432649915e-05, "loss": 0.7269, "step": 3621 }, { "epoch": 0.44501781545644425, "grad_norm": 1.356387828704352, "learning_rate": 2.0375018739847087e-05, "loss": 0.5842, "step": 3622 }, { "epoch": 0.44514068067330137, "grad_norm": 1.3993363946042585, "learning_rate": 2.0369012164495195e-05, "loss": 0.6596, "step": 3623 }, { "epoch": 0.4452635458901585, "grad_norm": 1.3262606798213514, "learning_rate": 2.036300460154832e-05, "loss": 0.5481, "step": 3624 }, { "epoch": 0.4453864111070156, "grad_norm": 1.4058479917869016, "learning_rate": 2.035699605211154e-05, "loss": 0.558, "step": 3625 }, { "epoch": 0.4455092763238727, "grad_norm": 1.0222869065140592, "learning_rate": 2.0350986517290072e-05, "loss": 0.6424, "step": 3626 }, { "epoch": 0.44563214154072983, "grad_norm": 1.35387698569259, "learning_rate": 2.034497599818934e-05, "loss": 0.6011, "step": 3627 }, { "epoch": 0.44575500675758695, "grad_norm": 1.4201669877050878, "learning_rate": 2.0338964495914932e-05, "loss": 0.6691, "step": 3628 }, { "epoch": 0.44587787197444406, "grad_norm": 1.253579019338957, "learning_rate": 2.0332952011572634e-05, "loss": 0.6333, "step": 3629 }, { "epoch": 0.4460007371913011, "grad_norm": 1.6179076771203558, "learning_rate": 2.0326938546268398e-05, "loss": 0.644, "step": 3630 }, { "epoch": 0.44612360240815824, "grad_norm": 1.4392364954793477, "learning_rate": 2.0320924101108364e-05, "loss": 0.659, "step": 3631 }, { "epoch": 0.44624646762501535, "grad_norm": 1.2718568319133743, "learning_rate": 2.0314908677198846e-05, "loss": 0.7068, "step": 3632 }, { "epoch": 0.44636933284187247, "grad_norm": 1.2601608476331965, "learning_rate": 2.0308892275646343e-05, "loss": 0.6256, "step": 3633 }, { "epoch": 0.4464921980587296, "grad_norm": 1.0918091047581853, "learning_rate": 2.0302874897557545e-05, "loss": 0.5978, "step": 3634 }, { "epoch": 0.4466150632755867, "grad_norm": 1.278563615500784, "learning_rate": 2.029685654403929e-05, "loss": 0.6068, "step": 3635 }, { "epoch": 0.4467379284924438, "grad_norm": 1.1908491006723305, "learning_rate": 2.029083721619863e-05, "loss": 0.5899, "step": 3636 }, { "epoch": 0.4468607937093009, "grad_norm": 1.36335015215458, "learning_rate": 2.0284816915142775e-05, "loss": 0.782, "step": 3637 }, { "epoch": 0.446983658926158, "grad_norm": 1.1472550301864122, "learning_rate": 2.027879564197912e-05, "loss": 0.6343, "step": 3638 }, { "epoch": 0.4471065241430151, "grad_norm": 1.3940365937179287, "learning_rate": 2.0272773397815247e-05, "loss": 0.7331, "step": 3639 }, { "epoch": 0.4472293893598722, "grad_norm": 1.2677159944857712, "learning_rate": 2.02667501837589e-05, "loss": 0.6363, "step": 3640 }, { "epoch": 0.44735225457672934, "grad_norm": 1.5426917768645334, "learning_rate": 2.0260726000918006e-05, "loss": 0.7541, "step": 3641 }, { "epoch": 0.44747511979358645, "grad_norm": 1.4850028110107596, "learning_rate": 2.025470085040069e-05, "loss": 0.6509, "step": 3642 }, { "epoch": 0.44759798501044357, "grad_norm": 1.8031594989339639, "learning_rate": 2.0248674733315224e-05, "loss": 0.6243, "step": 3643 }, { "epoch": 0.4477208502273006, "grad_norm": 1.1021554617678455, "learning_rate": 2.0242647650770084e-05, "loss": 0.5468, "step": 3644 }, { "epoch": 0.44784371544415774, "grad_norm": 1.3550325042329932, "learning_rate": 2.0236619603873905e-05, "loss": 0.7536, "step": 3645 }, { "epoch": 0.44796658066101486, "grad_norm": 1.3232798983527259, "learning_rate": 2.0230590593735515e-05, "loss": 0.5793, "step": 3646 }, { "epoch": 0.448089445877872, "grad_norm": 1.4676840465092558, "learning_rate": 2.02245606214639e-05, "loss": 0.7422, "step": 3647 }, { "epoch": 0.4482123110947291, "grad_norm": 1.3887119222202766, "learning_rate": 2.0218529688168244e-05, "loss": 0.6347, "step": 3648 }, { "epoch": 0.4483351763115862, "grad_norm": 1.3772483119447994, "learning_rate": 2.02124977949579e-05, "loss": 0.5996, "step": 3649 }, { "epoch": 0.4484580415284433, "grad_norm": 1.2993344381193968, "learning_rate": 2.0206464942942388e-05, "loss": 0.5611, "step": 3650 }, { "epoch": 0.4485809067453004, "grad_norm": 1.131376215645332, "learning_rate": 2.0200431133231414e-05, "loss": 0.523, "step": 3651 }, { "epoch": 0.4487037719621575, "grad_norm": 1.2981366021004856, "learning_rate": 2.0194396366934863e-05, "loss": 0.6278, "step": 3652 }, { "epoch": 0.4488266371790146, "grad_norm": 1.2301986669159366, "learning_rate": 2.018836064516278e-05, "loss": 0.5217, "step": 3653 }, { "epoch": 0.4489495023958717, "grad_norm": 1.3841404068176357, "learning_rate": 2.018232396902541e-05, "loss": 0.6049, "step": 3654 }, { "epoch": 0.44907236761272884, "grad_norm": 1.296572262075572, "learning_rate": 2.0176286339633148e-05, "loss": 0.5866, "step": 3655 }, { "epoch": 0.44919523282958596, "grad_norm": 1.3856732084259802, "learning_rate": 2.0170247758096586e-05, "loss": 0.6829, "step": 3656 }, { "epoch": 0.44931809804644307, "grad_norm": 1.353419333497641, "learning_rate": 2.016420822552648e-05, "loss": 0.6269, "step": 3657 }, { "epoch": 0.4494409632633002, "grad_norm": 1.2953919556798261, "learning_rate": 2.0158167743033764e-05, "loss": 0.5509, "step": 3658 }, { "epoch": 0.44956382848015725, "grad_norm": 1.345605625985593, "learning_rate": 2.0152126311729542e-05, "loss": 0.5877, "step": 3659 }, { "epoch": 0.44968669369701436, "grad_norm": 1.2057993768284678, "learning_rate": 2.0146083932725096e-05, "loss": 0.6886, "step": 3660 }, { "epoch": 0.4498095589138715, "grad_norm": 1.3175371906896025, "learning_rate": 2.0140040607131888e-05, "loss": 0.5779, "step": 3661 }, { "epoch": 0.4499324241307286, "grad_norm": 1.1957163696985378, "learning_rate": 2.0133996336061538e-05, "loss": 0.5579, "step": 3662 }, { "epoch": 0.4500552893475857, "grad_norm": 1.3490837194344436, "learning_rate": 2.0127951120625864e-05, "loss": 0.5597, "step": 3663 }, { "epoch": 0.4501781545644428, "grad_norm": 1.3549790814528637, "learning_rate": 2.0121904961936835e-05, "loss": 0.6071, "step": 3664 }, { "epoch": 0.45030101978129994, "grad_norm": 1.2987560426529738, "learning_rate": 2.0115857861106604e-05, "loss": 0.7209, "step": 3665 }, { "epoch": 0.450423884998157, "grad_norm": 1.1021450883703203, "learning_rate": 2.0109809819247498e-05, "loss": 0.649, "step": 3666 }, { "epoch": 0.4505467502150141, "grad_norm": 1.174820536257423, "learning_rate": 2.010376083747201e-05, "loss": 0.7063, "step": 3667 }, { "epoch": 0.45066961543187123, "grad_norm": 1.4396062751727705, "learning_rate": 2.0097710916892823e-05, "loss": 0.6355, "step": 3668 }, { "epoch": 0.45079248064872834, "grad_norm": 1.0816226089212564, "learning_rate": 2.0091660058622767e-05, "loss": 0.6383, "step": 3669 }, { "epoch": 0.45091534586558546, "grad_norm": 1.296232076205354, "learning_rate": 2.0085608263774864e-05, "loss": 0.5588, "step": 3670 }, { "epoch": 0.4510382110824426, "grad_norm": 1.5590898714911274, "learning_rate": 2.0079555533462306e-05, "loss": 0.6342, "step": 3671 }, { "epoch": 0.4511610762992997, "grad_norm": 1.3247259302682552, "learning_rate": 2.0073501868798444e-05, "loss": 0.7013, "step": 3672 }, { "epoch": 0.45128394151615675, "grad_norm": 1.2154716104760397, "learning_rate": 2.0067447270896822e-05, "loss": 0.5339, "step": 3673 }, { "epoch": 0.45140680673301387, "grad_norm": 1.114222000982366, "learning_rate": 2.0061391740871133e-05, "loss": 0.6528, "step": 3674 }, { "epoch": 0.451529671949871, "grad_norm": 1.15977197514122, "learning_rate": 2.0055335279835257e-05, "loss": 0.5696, "step": 3675 }, { "epoch": 0.4516525371667281, "grad_norm": 1.2994001558667179, "learning_rate": 2.0049277888903244e-05, "loss": 0.5775, "step": 3676 }, { "epoch": 0.4517754023835852, "grad_norm": 1.4055986357515828, "learning_rate": 2.0043219569189312e-05, "loss": 0.6377, "step": 3677 }, { "epoch": 0.4518982676004423, "grad_norm": 1.2861334440730345, "learning_rate": 2.0037160321807846e-05, "loss": 0.7786, "step": 3678 }, { "epoch": 0.45202113281729944, "grad_norm": 1.1967032964961712, "learning_rate": 2.00311001478734e-05, "loss": 0.5507, "step": 3679 }, { "epoch": 0.45214399803415656, "grad_norm": 1.5310305814525436, "learning_rate": 2.0025039048500712e-05, "loss": 0.5561, "step": 3680 }, { "epoch": 0.4522668632510136, "grad_norm": 1.6039050442525151, "learning_rate": 2.0018977024804682e-05, "loss": 0.6803, "step": 3681 }, { "epoch": 0.45238972846787073, "grad_norm": 1.425255523701518, "learning_rate": 2.0012914077900374e-05, "loss": 0.6021, "step": 3682 }, { "epoch": 0.45251259368472785, "grad_norm": 1.2639826232576261, "learning_rate": 2.0006850208903034e-05, "loss": 0.5973, "step": 3683 }, { "epoch": 0.45263545890158496, "grad_norm": 1.1627560978684526, "learning_rate": 2.000078541892807e-05, "loss": 0.6714, "step": 3684 }, { "epoch": 0.4527583241184421, "grad_norm": 1.154607282256261, "learning_rate": 1.9994719709091052e-05, "loss": 0.7082, "step": 3685 }, { "epoch": 0.4528811893352992, "grad_norm": 1.2373389347740353, "learning_rate": 1.9988653080507743e-05, "loss": 0.6576, "step": 3686 }, { "epoch": 0.4530040545521563, "grad_norm": 1.3886700522407687, "learning_rate": 1.9982585534294054e-05, "loss": 0.6471, "step": 3687 }, { "epoch": 0.45312691976901337, "grad_norm": 1.4109106976038837, "learning_rate": 1.9976517071566065e-05, "loss": 0.5998, "step": 3688 }, { "epoch": 0.4532497849858705, "grad_norm": 1.4516845833112728, "learning_rate": 1.9970447693440036e-05, "loss": 0.599, "step": 3689 }, { "epoch": 0.4533726502027276, "grad_norm": 1.3072646731374264, "learning_rate": 1.9964377401032386e-05, "loss": 0.6001, "step": 3690 }, { "epoch": 0.4534955154195847, "grad_norm": 1.3378503973997127, "learning_rate": 1.9958306195459708e-05, "loss": 0.7439, "step": 3691 }, { "epoch": 0.45361838063644183, "grad_norm": 1.6228780187193528, "learning_rate": 1.995223407783877e-05, "loss": 0.6618, "step": 3692 }, { "epoch": 0.45374124585329895, "grad_norm": 1.2390147392240438, "learning_rate": 1.9946161049286474e-05, "loss": 0.6153, "step": 3693 }, { "epoch": 0.45386411107015606, "grad_norm": 1.3843263909989256, "learning_rate": 1.994008711091994e-05, "loss": 0.5373, "step": 3694 }, { "epoch": 0.4539869762870131, "grad_norm": 1.2880044681607195, "learning_rate": 1.9934012263856417e-05, "loss": 0.5557, "step": 3695 }, { "epoch": 0.45410984150387024, "grad_norm": 1.2750745525386145, "learning_rate": 1.992793650921334e-05, "loss": 0.628, "step": 3696 }, { "epoch": 0.45423270672072735, "grad_norm": 1.0745362173817081, "learning_rate": 1.99218598481083e-05, "loss": 0.6217, "step": 3697 }, { "epoch": 0.45435557193758447, "grad_norm": 1.4687755714889328, "learning_rate": 1.9915782281659052e-05, "loss": 0.6717, "step": 3698 }, { "epoch": 0.4544784371544416, "grad_norm": 1.1911644193582225, "learning_rate": 1.9909703810983542e-05, "loss": 0.6516, "step": 3699 }, { "epoch": 0.4546013023712987, "grad_norm": 1.3717092150771575, "learning_rate": 1.9903624437199853e-05, "loss": 0.6041, "step": 3700 }, { "epoch": 0.4547241675881558, "grad_norm": 1.473235670777995, "learning_rate": 1.9897544161426252e-05, "loss": 0.7015, "step": 3701 }, { "epoch": 0.4548470328050129, "grad_norm": 1.251566173090998, "learning_rate": 1.9891462984781162e-05, "loss": 0.6571, "step": 3702 }, { "epoch": 0.45496989802187, "grad_norm": 1.1473380659866728, "learning_rate": 1.988538090838318e-05, "loss": 0.5992, "step": 3703 }, { "epoch": 0.4550927632387271, "grad_norm": 1.3568890183383173, "learning_rate": 1.987929793335106e-05, "loss": 0.6078, "step": 3704 }, { "epoch": 0.4552156284555842, "grad_norm": 1.198861842845574, "learning_rate": 1.987321406080373e-05, "loss": 0.6082, "step": 3705 }, { "epoch": 0.45533849367244134, "grad_norm": 1.7384048791408333, "learning_rate": 1.9867129291860283e-05, "loss": 0.6645, "step": 3706 }, { "epoch": 0.45546135888929845, "grad_norm": 1.3572765356707082, "learning_rate": 1.986104362763996e-05, "loss": 0.597, "step": 3707 }, { "epoch": 0.45558422410615557, "grad_norm": 1.1978982998013663, "learning_rate": 1.985495706926219e-05, "loss": 0.6041, "step": 3708 }, { "epoch": 0.4557070893230127, "grad_norm": 1.1341290043769148, "learning_rate": 1.984886961784655e-05, "loss": 0.6731, "step": 3709 }, { "epoch": 0.45582995453986974, "grad_norm": 1.2886893888930566, "learning_rate": 1.984278127451279e-05, "loss": 0.6579, "step": 3710 }, { "epoch": 0.45595281975672686, "grad_norm": 1.322680233079386, "learning_rate": 1.9836692040380826e-05, "loss": 0.654, "step": 3711 }, { "epoch": 0.456075684973584, "grad_norm": 1.4401124143781512, "learning_rate": 1.9830601916570722e-05, "loss": 0.5738, "step": 3712 }, { "epoch": 0.4561985501904411, "grad_norm": 1.243888099544029, "learning_rate": 1.9824510904202725e-05, "loss": 0.6981, "step": 3713 }, { "epoch": 0.4563214154072982, "grad_norm": 1.2110228764802953, "learning_rate": 1.9818419004397234e-05, "loss": 0.5488, "step": 3714 }, { "epoch": 0.4564442806241553, "grad_norm": 1.2592549023493207, "learning_rate": 1.981232621827482e-05, "loss": 0.6423, "step": 3715 }, { "epoch": 0.45656714584101243, "grad_norm": 1.1448693333504705, "learning_rate": 1.980623254695621e-05, "loss": 0.8262, "step": 3716 }, { "epoch": 0.4566900110578695, "grad_norm": 1.2140376234421097, "learning_rate": 1.9800137991562286e-05, "loss": 0.5382, "step": 3717 }, { "epoch": 0.4568128762747266, "grad_norm": 1.2736906900887848, "learning_rate": 1.9794042553214106e-05, "loss": 0.6918, "step": 3718 }, { "epoch": 0.4569357414915837, "grad_norm": 1.3932240844962591, "learning_rate": 1.9787946233032896e-05, "loss": 0.692, "step": 3719 }, { "epoch": 0.45705860670844084, "grad_norm": 1.1606858854489719, "learning_rate": 1.978184903214002e-05, "loss": 0.5459, "step": 3720 }, { "epoch": 0.45718147192529796, "grad_norm": 1.0359673180926554, "learning_rate": 1.977575095165703e-05, "loss": 0.6175, "step": 3721 }, { "epoch": 0.45730433714215507, "grad_norm": 1.5578775028924268, "learning_rate": 1.9769651992705627e-05, "loss": 0.7043, "step": 3722 }, { "epoch": 0.4574272023590122, "grad_norm": 1.4408694250576741, "learning_rate": 1.9763552156407666e-05, "loss": 0.7181, "step": 3723 }, { "epoch": 0.45755006757586925, "grad_norm": 1.2521246379512494, "learning_rate": 1.9757451443885184e-05, "loss": 0.6042, "step": 3724 }, { "epoch": 0.45767293279272636, "grad_norm": 1.290472233646275, "learning_rate": 1.9751349856260357e-05, "loss": 0.5695, "step": 3725 }, { "epoch": 0.4577957980095835, "grad_norm": 1.258548104034713, "learning_rate": 1.9745247394655544e-05, "loss": 0.5503, "step": 3726 }, { "epoch": 0.4579186632264406, "grad_norm": 1.215852457342528, "learning_rate": 1.973914406019324e-05, "loss": 0.5684, "step": 3727 }, { "epoch": 0.4580415284432977, "grad_norm": 1.2416447280128335, "learning_rate": 1.9733039853996126e-05, "loss": 0.5681, "step": 3728 }, { "epoch": 0.4581643936601548, "grad_norm": 1.2747921926988273, "learning_rate": 1.9726934777187023e-05, "loss": 0.6212, "step": 3729 }, { "epoch": 0.45828725887701194, "grad_norm": 1.3585046595335897, "learning_rate": 1.9720828830888922e-05, "loss": 0.6303, "step": 3730 }, { "epoch": 0.458410124093869, "grad_norm": 1.61069838460963, "learning_rate": 1.9714722016224977e-05, "loss": 0.6449, "step": 3731 }, { "epoch": 0.4585329893107261, "grad_norm": 1.5184298098804947, "learning_rate": 1.970861433431849e-05, "loss": 0.5386, "step": 3732 }, { "epoch": 0.45865585452758323, "grad_norm": 1.1162760646650856, "learning_rate": 1.970250578629293e-05, "loss": 0.5641, "step": 3733 }, { "epoch": 0.45877871974444034, "grad_norm": 1.098717743414248, "learning_rate": 1.9696396373271935e-05, "loss": 0.5954, "step": 3734 }, { "epoch": 0.45890158496129746, "grad_norm": 1.3096202519613267, "learning_rate": 1.9690286096379277e-05, "loss": 0.5636, "step": 3735 }, { "epoch": 0.4590244501781546, "grad_norm": 1.151746113371294, "learning_rate": 1.9684174956738912e-05, "loss": 0.6505, "step": 3736 }, { "epoch": 0.4591473153950117, "grad_norm": 1.3319707268626126, "learning_rate": 1.9678062955474943e-05, "loss": 0.5278, "step": 3737 }, { "epoch": 0.4592701806118688, "grad_norm": 1.1796552079905098, "learning_rate": 1.9671950093711633e-05, "loss": 0.5871, "step": 3738 }, { "epoch": 0.45939304582872587, "grad_norm": 1.561430907926399, "learning_rate": 1.9665836372573397e-05, "loss": 0.5791, "step": 3739 }, { "epoch": 0.459515911045583, "grad_norm": 1.1960841023618576, "learning_rate": 1.965972179318482e-05, "loss": 0.6526, "step": 3740 }, { "epoch": 0.4596387762624401, "grad_norm": 1.218789779812076, "learning_rate": 1.965360635667064e-05, "loss": 0.5007, "step": 3741 }, { "epoch": 0.4597616414792972, "grad_norm": 1.3203182736302792, "learning_rate": 1.964749006415575e-05, "loss": 0.5807, "step": 3742 }, { "epoch": 0.4598845066961543, "grad_norm": 1.2717629415883203, "learning_rate": 1.9641372916765207e-05, "loss": 0.6308, "step": 3743 }, { "epoch": 0.46000737191301144, "grad_norm": 1.281574065848736, "learning_rate": 1.963525491562421e-05, "loss": 0.6638, "step": 3744 }, { "epoch": 0.46013023712986856, "grad_norm": 1.3397532176789233, "learning_rate": 1.962913606185814e-05, "loss": 0.6202, "step": 3745 }, { "epoch": 0.4602531023467256, "grad_norm": 1.3580139994482554, "learning_rate": 1.9623016356592504e-05, "loss": 0.4735, "step": 3746 }, { "epoch": 0.46037596756358273, "grad_norm": 1.1892680773124515, "learning_rate": 1.9616895800952994e-05, "loss": 0.5591, "step": 3747 }, { "epoch": 0.46049883278043985, "grad_norm": 1.190529255668072, "learning_rate": 1.961077439606544e-05, "loss": 0.5957, "step": 3748 }, { "epoch": 0.46062169799729696, "grad_norm": 1.2386781273918077, "learning_rate": 1.9604652143055843e-05, "loss": 0.6015, "step": 3749 }, { "epoch": 0.4607445632141541, "grad_norm": 1.3390762694138658, "learning_rate": 1.9598529043050343e-05, "loss": 0.6863, "step": 3750 }, { "epoch": 0.4608674284310112, "grad_norm": 1.6510267020480978, "learning_rate": 1.9592405097175248e-05, "loss": 0.684, "step": 3751 }, { "epoch": 0.4609902936478683, "grad_norm": 1.3381448099549105, "learning_rate": 1.958628030655702e-05, "loss": 0.5401, "step": 3752 }, { "epoch": 0.46111315886472537, "grad_norm": 1.2803738222763699, "learning_rate": 1.958015467232227e-05, "loss": 0.6338, "step": 3753 }, { "epoch": 0.4612360240815825, "grad_norm": 1.3839873291308296, "learning_rate": 1.9574028195597776e-05, "loss": 0.6562, "step": 3754 }, { "epoch": 0.4613588892984396, "grad_norm": 1.2829827792894752, "learning_rate": 1.9567900877510456e-05, "loss": 0.6404, "step": 3755 }, { "epoch": 0.4614817545152967, "grad_norm": 1.1809218325535302, "learning_rate": 1.9561772719187394e-05, "loss": 0.6138, "step": 3756 }, { "epoch": 0.46160461973215383, "grad_norm": 1.6879531517714317, "learning_rate": 1.9555643721755826e-05, "loss": 0.7663, "step": 3757 }, { "epoch": 0.46172748494901095, "grad_norm": 1.149127655615933, "learning_rate": 1.9549513886343135e-05, "loss": 0.6626, "step": 3758 }, { "epoch": 0.46185035016586806, "grad_norm": 1.2412182580528726, "learning_rate": 1.9543383214076874e-05, "loss": 0.5754, "step": 3759 }, { "epoch": 0.4619732153827252, "grad_norm": 1.1649984197532444, "learning_rate": 1.9537251706084733e-05, "loss": 0.631, "step": 3760 }, { "epoch": 0.46209608059958224, "grad_norm": 1.0601781212343289, "learning_rate": 1.9531119363494566e-05, "loss": 0.7062, "step": 3761 }, { "epoch": 0.46221894581643935, "grad_norm": 1.2157223454954063, "learning_rate": 1.952498618743438e-05, "loss": 0.4988, "step": 3762 }, { "epoch": 0.46234181103329647, "grad_norm": 1.38230037475248, "learning_rate": 1.9518852179032325e-05, "loss": 0.763, "step": 3763 }, { "epoch": 0.4624646762501536, "grad_norm": 1.4139891712596242, "learning_rate": 1.9512717339416724e-05, "loss": 0.5702, "step": 3764 }, { "epoch": 0.4625875414670107, "grad_norm": 1.3227720502833038, "learning_rate": 1.950658166971603e-05, "loss": 0.679, "step": 3765 }, { "epoch": 0.4627104066838678, "grad_norm": 1.0302683673686026, "learning_rate": 1.9500445171058866e-05, "loss": 0.6118, "step": 3766 }, { "epoch": 0.46283327190072493, "grad_norm": 1.2677348840814187, "learning_rate": 1.9494307844573997e-05, "loss": 0.6256, "step": 3767 }, { "epoch": 0.462956137117582, "grad_norm": 1.2768898060533036, "learning_rate": 1.9488169691390348e-05, "loss": 0.5321, "step": 3768 }, { "epoch": 0.4630790023344391, "grad_norm": 0.9528117595121683, "learning_rate": 1.948203071263699e-05, "loss": 0.5518, "step": 3769 }, { "epoch": 0.4632018675512962, "grad_norm": 1.3014750816793834, "learning_rate": 1.947589090944315e-05, "loss": 0.5846, "step": 3770 }, { "epoch": 0.46332473276815334, "grad_norm": 1.2810220849076575, "learning_rate": 1.9469750282938208e-05, "loss": 0.7296, "step": 3771 }, { "epoch": 0.46344759798501045, "grad_norm": 1.4634174632782038, "learning_rate": 1.9463608834251687e-05, "loss": 0.7016, "step": 3772 }, { "epoch": 0.46357046320186757, "grad_norm": 1.352038168508579, "learning_rate": 1.9457466564513268e-05, "loss": 0.6406, "step": 3773 }, { "epoch": 0.4636933284187247, "grad_norm": 0.9929995844471909, "learning_rate": 1.945132347485278e-05, "loss": 0.4639, "step": 3774 }, { "epoch": 0.46381619363558174, "grad_norm": 1.4605747791875108, "learning_rate": 1.9445179566400206e-05, "loss": 0.6709, "step": 3775 }, { "epoch": 0.46393905885243886, "grad_norm": 1.3641331514214992, "learning_rate": 1.943903484028568e-05, "loss": 0.5931, "step": 3776 }, { "epoch": 0.464061924069296, "grad_norm": 1.3700958863068857, "learning_rate": 1.9432889297639485e-05, "loss": 0.7184, "step": 3777 }, { "epoch": 0.4641847892861531, "grad_norm": 1.3889907957773826, "learning_rate": 1.9426742939592052e-05, "loss": 0.6643, "step": 3778 }, { "epoch": 0.4643076545030102, "grad_norm": 1.6335137487396247, "learning_rate": 1.942059576727396e-05, "loss": 0.6309, "step": 3779 }, { "epoch": 0.4644305197198673, "grad_norm": 1.2657516068331456, "learning_rate": 1.941444778181595e-05, "loss": 0.737, "step": 3780 }, { "epoch": 0.46455338493672443, "grad_norm": 1.2545993595888654, "learning_rate": 1.94082989843489e-05, "loss": 0.4973, "step": 3781 }, { "epoch": 0.4646762501535815, "grad_norm": 1.3654930877391755, "learning_rate": 1.9402149376003837e-05, "loss": 0.653, "step": 3782 }, { "epoch": 0.4647991153704386, "grad_norm": 1.1264959459315358, "learning_rate": 1.9395998957911945e-05, "loss": 0.6475, "step": 3783 }, { "epoch": 0.4649219805872957, "grad_norm": 1.4074164695526388, "learning_rate": 1.938984773120455e-05, "loss": 0.5942, "step": 3784 }, { "epoch": 0.46504484580415284, "grad_norm": 1.1488133025699512, "learning_rate": 1.938369569701314e-05, "loss": 0.6057, "step": 3785 }, { "epoch": 0.46516771102100996, "grad_norm": 1.1493092857007638, "learning_rate": 1.9377542856469335e-05, "loss": 0.6902, "step": 3786 }, { "epoch": 0.46529057623786707, "grad_norm": 1.700424576299404, "learning_rate": 1.937138921070491e-05, "loss": 0.5651, "step": 3787 }, { "epoch": 0.4654134414547242, "grad_norm": 1.3052921795858181, "learning_rate": 1.9365234760851792e-05, "loss": 0.721, "step": 3788 }, { "epoch": 0.4655363066715813, "grad_norm": 1.4643499055674702, "learning_rate": 1.9359079508042046e-05, "loss": 0.6033, "step": 3789 }, { "epoch": 0.46565917188843836, "grad_norm": 1.6635211743791694, "learning_rate": 1.9352923453407896e-05, "loss": 0.7195, "step": 3790 }, { "epoch": 0.4657820371052955, "grad_norm": 1.1950497775249331, "learning_rate": 1.934676659808171e-05, "loss": 0.6337, "step": 3791 }, { "epoch": 0.4659049023221526, "grad_norm": 1.2240223318366525, "learning_rate": 1.934060894319599e-05, "loss": 0.5517, "step": 3792 }, { "epoch": 0.4660277675390097, "grad_norm": 1.0876730607570364, "learning_rate": 1.933445048988341e-05, "loss": 0.6187, "step": 3793 }, { "epoch": 0.4661506327558668, "grad_norm": 1.4896757462740706, "learning_rate": 1.932829123927677e-05, "loss": 0.6179, "step": 3794 }, { "epoch": 0.46627349797272394, "grad_norm": 1.7634194147032483, "learning_rate": 1.9322131192509028e-05, "loss": 0.6759, "step": 3795 }, { "epoch": 0.46639636318958105, "grad_norm": 1.2542152950416465, "learning_rate": 1.9315970350713278e-05, "loss": 0.6996, "step": 3796 }, { "epoch": 0.4665192284064381, "grad_norm": 1.5079041862957505, "learning_rate": 1.930980871502278e-05, "loss": 0.6142, "step": 3797 }, { "epoch": 0.46664209362329523, "grad_norm": 1.4182529581695562, "learning_rate": 1.9303646286570913e-05, "loss": 0.6602, "step": 3798 }, { "epoch": 0.46676495884015234, "grad_norm": 1.271319533093495, "learning_rate": 1.9297483066491222e-05, "loss": 0.6356, "step": 3799 }, { "epoch": 0.46688782405700946, "grad_norm": 1.3221681207996063, "learning_rate": 1.9291319055917393e-05, "loss": 0.6204, "step": 3800 }, { "epoch": 0.4670106892738666, "grad_norm": 1.1898691118673697, "learning_rate": 1.9285154255983257e-05, "loss": 0.6151, "step": 3801 }, { "epoch": 0.4671335544907237, "grad_norm": 2.004111280533378, "learning_rate": 1.927898866782278e-05, "loss": 0.6515, "step": 3802 }, { "epoch": 0.4672564197075808, "grad_norm": 1.3491662009494219, "learning_rate": 1.9272822292570092e-05, "loss": 0.6157, "step": 3803 }, { "epoch": 0.46737928492443787, "grad_norm": 1.1863502750744326, "learning_rate": 1.926665513135945e-05, "loss": 0.5614, "step": 3804 }, { "epoch": 0.467502150141295, "grad_norm": 1.2406472768872499, "learning_rate": 1.9260487185325267e-05, "loss": 0.6041, "step": 3805 }, { "epoch": 0.4676250153581521, "grad_norm": 1.5208541268136644, "learning_rate": 1.92543184556021e-05, "loss": 0.602, "step": 3806 }, { "epoch": 0.4677478805750092, "grad_norm": 1.653283243747774, "learning_rate": 1.924814894332464e-05, "loss": 0.7092, "step": 3807 }, { "epoch": 0.4678707457918663, "grad_norm": 1.0472641279347328, "learning_rate": 1.9241978649627738e-05, "loss": 0.5967, "step": 3808 }, { "epoch": 0.46799361100872344, "grad_norm": 1.396801622017612, "learning_rate": 1.9235807575646368e-05, "loss": 0.6205, "step": 3809 }, { "epoch": 0.46811647622558056, "grad_norm": 1.4386213988666408, "learning_rate": 1.9229635722515667e-05, "loss": 0.7292, "step": 3810 }, { "epoch": 0.4682393414424377, "grad_norm": 1.3045528248863723, "learning_rate": 1.9223463091370903e-05, "loss": 0.551, "step": 3811 }, { "epoch": 0.46836220665929473, "grad_norm": 1.200766541261277, "learning_rate": 1.9217289683347496e-05, "loss": 0.6229, "step": 3812 }, { "epoch": 0.46848507187615185, "grad_norm": 1.3989599598443423, "learning_rate": 1.9211115499580995e-05, "loss": 0.6407, "step": 3813 }, { "epoch": 0.46860793709300896, "grad_norm": 1.1915537422818472, "learning_rate": 1.9204940541207113e-05, "loss": 0.5981, "step": 3814 }, { "epoch": 0.4687308023098661, "grad_norm": 1.3462727399978962, "learning_rate": 1.919876480936169e-05, "loss": 0.6542, "step": 3815 }, { "epoch": 0.4688536675267232, "grad_norm": 1.2386780017279388, "learning_rate": 1.919258830518071e-05, "loss": 0.5555, "step": 3816 }, { "epoch": 0.4689765327435803, "grad_norm": 1.4495370448771092, "learning_rate": 1.91864110298003e-05, "loss": 0.8099, "step": 3817 }, { "epoch": 0.4690993979604374, "grad_norm": 1.4352042039153425, "learning_rate": 1.918023298435673e-05, "loss": 0.6357, "step": 3818 }, { "epoch": 0.4692222631772945, "grad_norm": 1.2802503446938998, "learning_rate": 1.9174054169986415e-05, "loss": 0.5493, "step": 3819 }, { "epoch": 0.4693451283941516, "grad_norm": 1.3373062680926957, "learning_rate": 1.9167874587825902e-05, "loss": 0.6193, "step": 3820 }, { "epoch": 0.4694679936110087, "grad_norm": 1.360734048908169, "learning_rate": 1.916169423901189e-05, "loss": 0.6583, "step": 3821 }, { "epoch": 0.46959085882786583, "grad_norm": 1.2438042689672133, "learning_rate": 1.9155513124681216e-05, "loss": 0.7309, "step": 3822 }, { "epoch": 0.46971372404472295, "grad_norm": 1.1078731485563078, "learning_rate": 1.9149331245970844e-05, "loss": 0.5816, "step": 3823 }, { "epoch": 0.46983658926158006, "grad_norm": 1.1250145983570483, "learning_rate": 1.91431486040179e-05, "loss": 0.5534, "step": 3824 }, { "epoch": 0.4699594544784372, "grad_norm": 1.2459069303696255, "learning_rate": 1.913696519995964e-05, "loss": 0.6332, "step": 3825 }, { "epoch": 0.47008231969529424, "grad_norm": 1.2969302619450613, "learning_rate": 1.9130781034933463e-05, "loss": 0.5694, "step": 3826 }, { "epoch": 0.47020518491215135, "grad_norm": 1.2350086332234365, "learning_rate": 1.9124596110076908e-05, "loss": 0.5915, "step": 3827 }, { "epoch": 0.47032805012900847, "grad_norm": 1.4477504729911732, "learning_rate": 1.911841042652764e-05, "loss": 0.6547, "step": 3828 }, { "epoch": 0.4704509153458656, "grad_norm": 1.4374148151895036, "learning_rate": 1.911222398542349e-05, "loss": 0.6946, "step": 3829 }, { "epoch": 0.4705737805627227, "grad_norm": 1.1278006503290798, "learning_rate": 1.91060367879024e-05, "loss": 0.6139, "step": 3830 }, { "epoch": 0.4706966457795798, "grad_norm": 1.1292263964994338, "learning_rate": 1.9099848835102476e-05, "loss": 0.607, "step": 3831 }, { "epoch": 0.47081951099643693, "grad_norm": 1.306383412549856, "learning_rate": 1.9093660128161943e-05, "loss": 0.6231, "step": 3832 }, { "epoch": 0.470942376213294, "grad_norm": 1.3271496958927365, "learning_rate": 1.908747066821918e-05, "loss": 0.6352, "step": 3833 }, { "epoch": 0.4710652414301511, "grad_norm": 1.7351260736050274, "learning_rate": 1.908128045641269e-05, "loss": 0.6636, "step": 3834 }, { "epoch": 0.4711881066470082, "grad_norm": 1.1554619042014789, "learning_rate": 1.9075089493881137e-05, "loss": 0.578, "step": 3835 }, { "epoch": 0.47131097186386534, "grad_norm": 1.579208006198566, "learning_rate": 1.9068897781763294e-05, "loss": 0.5996, "step": 3836 }, { "epoch": 0.47143383708072245, "grad_norm": 1.1189354842550163, "learning_rate": 1.9062705321198095e-05, "loss": 0.7431, "step": 3837 }, { "epoch": 0.47155670229757957, "grad_norm": 1.1851703991136269, "learning_rate": 1.90565121133246e-05, "loss": 0.5675, "step": 3838 }, { "epoch": 0.4716795675144367, "grad_norm": 1.189395990193646, "learning_rate": 1.905031815928201e-05, "loss": 0.5666, "step": 3839 }, { "epoch": 0.4718024327312938, "grad_norm": 1.1323045826251998, "learning_rate": 1.9044123460209655e-05, "loss": 0.5559, "step": 3840 }, { "epoch": 0.47192529794815086, "grad_norm": 1.2095688872997774, "learning_rate": 1.9037928017247023e-05, "loss": 0.6432, "step": 3841 }, { "epoch": 0.472048163165008, "grad_norm": 1.1425337711530468, "learning_rate": 1.9031731831533716e-05, "loss": 0.5268, "step": 3842 }, { "epoch": 0.4721710283818651, "grad_norm": 1.1967060198454642, "learning_rate": 1.902553490420949e-05, "loss": 0.5558, "step": 3843 }, { "epoch": 0.4722938935987222, "grad_norm": 5.929724944469359, "learning_rate": 1.9019337236414218e-05, "loss": 0.6937, "step": 3844 }, { "epoch": 0.4724167588155793, "grad_norm": 1.4035200851847638, "learning_rate": 1.9013138829287932e-05, "loss": 0.5517, "step": 3845 }, { "epoch": 0.47253962403243643, "grad_norm": 1.9565600568143076, "learning_rate": 1.900693968397078e-05, "loss": 0.7654, "step": 3846 }, { "epoch": 0.47266248924929355, "grad_norm": 1.215992595253324, "learning_rate": 1.9000739801603066e-05, "loss": 0.6724, "step": 3847 }, { "epoch": 0.4727853544661506, "grad_norm": 1.3089493401745964, "learning_rate": 1.8994539183325207e-05, "loss": 0.5212, "step": 3848 }, { "epoch": 0.4729082196830077, "grad_norm": 1.3454363918809649, "learning_rate": 1.8988337830277772e-05, "loss": 0.5585, "step": 3849 }, { "epoch": 0.47303108489986484, "grad_norm": 2.2578457010290274, "learning_rate": 1.898213574360146e-05, "loss": 0.5749, "step": 3850 }, { "epoch": 0.47315395011672196, "grad_norm": 1.199849069490536, "learning_rate": 1.8975932924437098e-05, "loss": 0.7011, "step": 3851 }, { "epoch": 0.47327681533357907, "grad_norm": 1.1679738950183993, "learning_rate": 1.8969729373925668e-05, "loss": 0.5525, "step": 3852 }, { "epoch": 0.4733996805504362, "grad_norm": 1.2374539239496998, "learning_rate": 1.896352509320825e-05, "loss": 0.6659, "step": 3853 }, { "epoch": 0.4735225457672933, "grad_norm": 1.0888289679307717, "learning_rate": 1.8957320083426108e-05, "loss": 0.7005, "step": 3854 }, { "epoch": 0.47364541098415036, "grad_norm": 1.3099238607809705, "learning_rate": 1.8951114345720598e-05, "loss": 0.547, "step": 3855 }, { "epoch": 0.4737682762010075, "grad_norm": 1.3122879693491132, "learning_rate": 1.8944907881233225e-05, "loss": 0.5819, "step": 3856 }, { "epoch": 0.4738911414178646, "grad_norm": 1.389670071645873, "learning_rate": 1.8938700691105632e-05, "loss": 0.5933, "step": 3857 }, { "epoch": 0.4740140066347217, "grad_norm": 1.1342834924475842, "learning_rate": 1.8932492776479596e-05, "loss": 0.6408, "step": 3858 }, { "epoch": 0.4741368718515788, "grad_norm": 1.183331606933879, "learning_rate": 1.892628413849701e-05, "loss": 0.7883, "step": 3859 }, { "epoch": 0.47425973706843594, "grad_norm": 1.1461954674078751, "learning_rate": 1.892007477829992e-05, "loss": 0.6126, "step": 3860 }, { "epoch": 0.47438260228529305, "grad_norm": 1.0041573143812426, "learning_rate": 1.8913864697030497e-05, "loss": 0.5689, "step": 3861 }, { "epoch": 0.47450546750215017, "grad_norm": 1.2589390496285016, "learning_rate": 1.8907653895831047e-05, "loss": 0.6554, "step": 3862 }, { "epoch": 0.47462833271900723, "grad_norm": 1.2688369093449443, "learning_rate": 1.8901442375844006e-05, "loss": 0.5972, "step": 3863 }, { "epoch": 0.47475119793586434, "grad_norm": 1.1773339844840642, "learning_rate": 1.8895230138211942e-05, "loss": 0.7355, "step": 3864 }, { "epoch": 0.47487406315272146, "grad_norm": 1.4232526457491783, "learning_rate": 1.8889017184077554e-05, "loss": 0.6642, "step": 3865 }, { "epoch": 0.4749969283695786, "grad_norm": 1.2387041624722668, "learning_rate": 1.8882803514583676e-05, "loss": 0.5916, "step": 3866 }, { "epoch": 0.4751197935864357, "grad_norm": 1.2014310627429468, "learning_rate": 1.8876589130873273e-05, "loss": 0.6093, "step": 3867 }, { "epoch": 0.4752426588032928, "grad_norm": 1.468685839038577, "learning_rate": 1.8870374034089434e-05, "loss": 0.6477, "step": 3868 }, { "epoch": 0.4753655240201499, "grad_norm": 1.1716285743240171, "learning_rate": 1.8864158225375403e-05, "loss": 0.4885, "step": 3869 }, { "epoch": 0.475488389237007, "grad_norm": 1.212137782523293, "learning_rate": 1.8857941705874514e-05, "loss": 0.6683, "step": 3870 }, { "epoch": 0.4756112544538641, "grad_norm": 1.0855627580319434, "learning_rate": 1.8851724476730275e-05, "loss": 0.5891, "step": 3871 }, { "epoch": 0.4757341196707212, "grad_norm": 0.994494879277994, "learning_rate": 1.88455065390863e-05, "loss": 0.6229, "step": 3872 }, { "epoch": 0.4758569848875783, "grad_norm": 1.3576949677231227, "learning_rate": 1.8839287894086334e-05, "loss": 0.6177, "step": 3873 }, { "epoch": 0.47597985010443544, "grad_norm": 1.229682497151213, "learning_rate": 1.8833068542874258e-05, "loss": 0.6387, "step": 3874 }, { "epoch": 0.47610271532129256, "grad_norm": 1.2766634456115593, "learning_rate": 1.882684848659408e-05, "loss": 0.7477, "step": 3875 }, { "epoch": 0.4762255805381497, "grad_norm": 1.2280841408300218, "learning_rate": 1.8820627726389944e-05, "loss": 0.6304, "step": 3876 }, { "epoch": 0.47634844575500673, "grad_norm": 1.1794192925905904, "learning_rate": 1.8814406263406115e-05, "loss": 0.662, "step": 3877 }, { "epoch": 0.47647131097186385, "grad_norm": 1.2696634327089864, "learning_rate": 1.880818409878699e-05, "loss": 0.4927, "step": 3878 }, { "epoch": 0.47659417618872096, "grad_norm": 1.2643467182076942, "learning_rate": 1.8801961233677095e-05, "loss": 0.7021, "step": 3879 }, { "epoch": 0.4767170414055781, "grad_norm": 1.120866202006693, "learning_rate": 1.879573766922109e-05, "loss": 0.5069, "step": 3880 }, { "epoch": 0.4768399066224352, "grad_norm": 1.238310712323139, "learning_rate": 1.878951340656376e-05, "loss": 0.5573, "step": 3881 }, { "epoch": 0.4769627718392923, "grad_norm": 1.2055907768366447, "learning_rate": 1.8783288446850006e-05, "loss": 0.6603, "step": 3882 }, { "epoch": 0.4770856370561494, "grad_norm": 1.2474755951350283, "learning_rate": 1.8777062791224883e-05, "loss": 0.6108, "step": 3883 }, { "epoch": 0.4772085022730065, "grad_norm": 1.1493208629770617, "learning_rate": 1.877083644083356e-05, "loss": 0.4694, "step": 3884 }, { "epoch": 0.4773313674898636, "grad_norm": 1.2759934911300765, "learning_rate": 1.876460939682132e-05, "loss": 0.6714, "step": 3885 }, { "epoch": 0.4774542327067207, "grad_norm": 1.3292483996972644, "learning_rate": 1.8758381660333595e-05, "loss": 0.6958, "step": 3886 }, { "epoch": 0.47757709792357783, "grad_norm": 1.3099601239690601, "learning_rate": 1.8752153232515946e-05, "loss": 0.5689, "step": 3887 }, { "epoch": 0.47769996314043495, "grad_norm": 1.1998058695201876, "learning_rate": 1.874592411451404e-05, "loss": 0.5783, "step": 3888 }, { "epoch": 0.47782282835729206, "grad_norm": 1.221999083778615, "learning_rate": 1.873969430747368e-05, "loss": 0.5579, "step": 3889 }, { "epoch": 0.4779456935741492, "grad_norm": 1.425148751396884, "learning_rate": 1.8733463812540812e-05, "loss": 0.711, "step": 3890 }, { "epoch": 0.4780685587910063, "grad_norm": 1.157917943643076, "learning_rate": 1.8727232630861483e-05, "loss": 0.526, "step": 3891 }, { "epoch": 0.47819142400786335, "grad_norm": 1.2431070054618614, "learning_rate": 1.8721000763581888e-05, "loss": 0.6659, "step": 3892 }, { "epoch": 0.47831428922472047, "grad_norm": 1.2273812670605957, "learning_rate": 1.8714768211848336e-05, "loss": 0.6081, "step": 3893 }, { "epoch": 0.4784371544415776, "grad_norm": 1.2195187676532657, "learning_rate": 1.870853497680726e-05, "loss": 0.5924, "step": 3894 }, { "epoch": 0.4785600196584347, "grad_norm": 1.1930906241096562, "learning_rate": 1.8702301059605226e-05, "loss": 0.5347, "step": 3895 }, { "epoch": 0.4786828848752918, "grad_norm": 1.4670961216467175, "learning_rate": 1.869606646138892e-05, "loss": 0.6944, "step": 3896 }, { "epoch": 0.47880575009214893, "grad_norm": 1.2560203286670037, "learning_rate": 1.8689831183305157e-05, "loss": 0.5865, "step": 3897 }, { "epoch": 0.47892861530900604, "grad_norm": 1.1317849276282275, "learning_rate": 1.8683595226500884e-05, "loss": 0.6399, "step": 3898 }, { "epoch": 0.4790514805258631, "grad_norm": 1.334795555914241, "learning_rate": 1.867735859212315e-05, "loss": 0.6431, "step": 3899 }, { "epoch": 0.4791743457427202, "grad_norm": 1.2379064088776899, "learning_rate": 1.8671121281319156e-05, "loss": 0.5823, "step": 3900 }, { "epoch": 0.47929721095957734, "grad_norm": 1.2052324367348517, "learning_rate": 1.866488329523621e-05, "loss": 0.6312, "step": 3901 }, { "epoch": 0.47942007617643445, "grad_norm": 1.4151659137131156, "learning_rate": 1.865864463502175e-05, "loss": 0.6978, "step": 3902 }, { "epoch": 0.47954294139329157, "grad_norm": 1.0919430001731778, "learning_rate": 1.8652405301823333e-05, "loss": 0.5815, "step": 3903 }, { "epoch": 0.4796658066101487, "grad_norm": 1.1501855124572116, "learning_rate": 1.8646165296788654e-05, "loss": 0.5219, "step": 3904 }, { "epoch": 0.4797886718270058, "grad_norm": 1.2920686359944922, "learning_rate": 1.863992462106551e-05, "loss": 0.561, "step": 3905 }, { "epoch": 0.47991153704386286, "grad_norm": 1.3054348030246359, "learning_rate": 1.863368327580184e-05, "loss": 0.672, "step": 3906 }, { "epoch": 0.48003440226072, "grad_norm": 1.3704183117422477, "learning_rate": 1.8627441262145692e-05, "loss": 0.5322, "step": 3907 }, { "epoch": 0.4801572674775771, "grad_norm": 1.32338813938963, "learning_rate": 1.8621198581245255e-05, "loss": 0.5301, "step": 3908 }, { "epoch": 0.4802801326944342, "grad_norm": 1.393926224069048, "learning_rate": 1.8614955234248816e-05, "loss": 0.6182, "step": 3909 }, { "epoch": 0.4804029979112913, "grad_norm": 1.232028524975572, "learning_rate": 1.8608711222304814e-05, "loss": 0.6223, "step": 3910 }, { "epoch": 0.48052586312814843, "grad_norm": 1.3480236031868313, "learning_rate": 1.8602466546561776e-05, "loss": 0.5617, "step": 3911 }, { "epoch": 0.48064872834500555, "grad_norm": 1.402248561510517, "learning_rate": 1.859622120816839e-05, "loss": 0.6883, "step": 3912 }, { "epoch": 0.4807715935618626, "grad_norm": 1.4119192842469264, "learning_rate": 1.858997520827343e-05, "loss": 0.5531, "step": 3913 }, { "epoch": 0.4808944587787197, "grad_norm": 1.3824768791455402, "learning_rate": 1.858372854802581e-05, "loss": 0.6134, "step": 3914 }, { "epoch": 0.48101732399557684, "grad_norm": 1.5161934896528293, "learning_rate": 1.857748122857457e-05, "loss": 0.5843, "step": 3915 }, { "epoch": 0.48114018921243396, "grad_norm": 1.265571303039805, "learning_rate": 1.8571233251068853e-05, "loss": 0.686, "step": 3916 }, { "epoch": 0.48126305442929107, "grad_norm": 1.6204475975658281, "learning_rate": 1.856498461665795e-05, "loss": 0.7124, "step": 3917 }, { "epoch": 0.4813859196461482, "grad_norm": 1.1998188612031968, "learning_rate": 1.8558735326491233e-05, "loss": 0.5955, "step": 3918 }, { "epoch": 0.4815087848630053, "grad_norm": 1.3732887495346038, "learning_rate": 1.855248538171824e-05, "loss": 0.5736, "step": 3919 }, { "epoch": 0.4816316500798624, "grad_norm": 1.1315451835794066, "learning_rate": 1.85462347834886e-05, "loss": 0.6889, "step": 3920 }, { "epoch": 0.4817545152967195, "grad_norm": 1.706709807023273, "learning_rate": 1.8539983532952065e-05, "loss": 0.5909, "step": 3921 }, { "epoch": 0.4818773805135766, "grad_norm": 1.3553359706327686, "learning_rate": 1.853373163125852e-05, "loss": 0.5591, "step": 3922 }, { "epoch": 0.4820002457304337, "grad_norm": 1.4481804493459811, "learning_rate": 1.852747907955796e-05, "loss": 0.6079, "step": 3923 }, { "epoch": 0.4821231109472908, "grad_norm": 1.3157924787753428, "learning_rate": 1.8521225879000496e-05, "loss": 0.6669, "step": 3924 }, { "epoch": 0.48224597616414794, "grad_norm": 1.1915106817460905, "learning_rate": 1.851497203073637e-05, "loss": 0.6423, "step": 3925 }, { "epoch": 0.48236884138100505, "grad_norm": 1.1908432484732963, "learning_rate": 1.850871753591593e-05, "loss": 0.6364, "step": 3926 }, { "epoch": 0.48249170659786217, "grad_norm": 1.5026907797533768, "learning_rate": 1.8502462395689663e-05, "loss": 0.7041, "step": 3927 }, { "epoch": 0.48261457181471923, "grad_norm": 1.4761198190557943, "learning_rate": 1.8496206611208144e-05, "loss": 0.5851, "step": 3928 }, { "epoch": 0.48273743703157634, "grad_norm": 1.1941401008463572, "learning_rate": 1.8489950183622097e-05, "loss": 0.5064, "step": 3929 }, { "epoch": 0.48286030224843346, "grad_norm": 1.3488540416466381, "learning_rate": 1.8483693114082346e-05, "loss": 0.6037, "step": 3930 }, { "epoch": 0.4829831674652906, "grad_norm": 1.3235984888584975, "learning_rate": 1.847743540373984e-05, "loss": 0.5638, "step": 3931 }, { "epoch": 0.4831060326821477, "grad_norm": 1.1979134729963379, "learning_rate": 1.8471177053745644e-05, "loss": 0.6182, "step": 3932 }, { "epoch": 0.4832288978990048, "grad_norm": 1.0980091273154853, "learning_rate": 1.8464918065250935e-05, "loss": 0.5257, "step": 3933 }, { "epoch": 0.4833517631158619, "grad_norm": 1.5124262816052743, "learning_rate": 1.8458658439407024e-05, "loss": 0.6918, "step": 3934 }, { "epoch": 0.483474628332719, "grad_norm": 1.1759437393722867, "learning_rate": 1.845239817736532e-05, "loss": 0.6792, "step": 3935 }, { "epoch": 0.4835974935495761, "grad_norm": 1.4256831239603953, "learning_rate": 1.8446137280277362e-05, "loss": 0.5355, "step": 3936 }, { "epoch": 0.4837203587664332, "grad_norm": 1.1390764772394804, "learning_rate": 1.84398757492948e-05, "loss": 0.726, "step": 3937 }, { "epoch": 0.4838432239832903, "grad_norm": 1.2160134173766664, "learning_rate": 1.8433613585569406e-05, "loss": 0.6016, "step": 3938 }, { "epoch": 0.48396608920014744, "grad_norm": 1.3570052914943052, "learning_rate": 1.8427350790253055e-05, "loss": 0.5868, "step": 3939 }, { "epoch": 0.48408895441700456, "grad_norm": 1.1710670953754942, "learning_rate": 1.8421087364497756e-05, "loss": 0.5463, "step": 3940 }, { "epoch": 0.4842118196338617, "grad_norm": 1.329549011304685, "learning_rate": 1.8414823309455625e-05, "loss": 0.6259, "step": 3941 }, { "epoch": 0.4843346848507188, "grad_norm": 1.1701940018542463, "learning_rate": 1.8408558626278892e-05, "loss": 0.6886, "step": 3942 }, { "epoch": 0.48445755006757585, "grad_norm": 1.4394459639078374, "learning_rate": 1.84022933161199e-05, "loss": 0.5924, "step": 3943 }, { "epoch": 0.48458041528443296, "grad_norm": 1.2501543049753259, "learning_rate": 1.8396027380131123e-05, "loss": 0.7162, "step": 3944 }, { "epoch": 0.4847032805012901, "grad_norm": 1.423710296190459, "learning_rate": 1.838976081946513e-05, "loss": 0.7092, "step": 3945 }, { "epoch": 0.4848261457181472, "grad_norm": 1.4242589093647058, "learning_rate": 1.8383493635274618e-05, "loss": 0.665, "step": 3946 }, { "epoch": 0.4849490109350043, "grad_norm": 1.852403357008487, "learning_rate": 1.8377225828712393e-05, "loss": 0.7168, "step": 3947 }, { "epoch": 0.4850718761518614, "grad_norm": 1.4061737541307144, "learning_rate": 1.8370957400931383e-05, "loss": 0.6926, "step": 3948 }, { "epoch": 0.48519474136871854, "grad_norm": 1.2899695409584422, "learning_rate": 1.8364688353084614e-05, "loss": 0.7261, "step": 3949 }, { "epoch": 0.4853176065855756, "grad_norm": 1.3927430913034793, "learning_rate": 1.835841868632525e-05, "loss": 0.619, "step": 3950 }, { "epoch": 0.4854404718024327, "grad_norm": 1.31743166084498, "learning_rate": 1.8352148401806546e-05, "loss": 0.5976, "step": 3951 }, { "epoch": 0.48556333701928983, "grad_norm": 1.6535920027805293, "learning_rate": 1.8345877500681887e-05, "loss": 0.678, "step": 3952 }, { "epoch": 0.48568620223614695, "grad_norm": 1.2331135111180023, "learning_rate": 1.8339605984104755e-05, "loss": 0.7656, "step": 3953 }, { "epoch": 0.48580906745300406, "grad_norm": 1.351550056655753, "learning_rate": 1.833333385322876e-05, "loss": 0.5604, "step": 3954 }, { "epoch": 0.4859319326698612, "grad_norm": 1.2685867639261659, "learning_rate": 1.8327061109207622e-05, "loss": 0.5686, "step": 3955 }, { "epoch": 0.4860547978867183, "grad_norm": 1.3979412470588533, "learning_rate": 1.8320787753195168e-05, "loss": 0.525, "step": 3956 }, { "epoch": 0.48617766310357535, "grad_norm": 1.2735549819428555, "learning_rate": 1.8314513786345345e-05, "loss": 0.6477, "step": 3957 }, { "epoch": 0.48630052832043247, "grad_norm": 1.3398615456611416, "learning_rate": 1.8308239209812204e-05, "loss": 0.4973, "step": 3958 }, { "epoch": 0.4864233935372896, "grad_norm": 1.5892238156837684, "learning_rate": 1.8301964024749917e-05, "loss": 0.6681, "step": 3959 }, { "epoch": 0.4865462587541467, "grad_norm": 1.264851310366645, "learning_rate": 1.8295688232312764e-05, "loss": 0.7201, "step": 3960 }, { "epoch": 0.4866691239710038, "grad_norm": 1.2717837861154042, "learning_rate": 1.8289411833655134e-05, "loss": 0.5433, "step": 3961 }, { "epoch": 0.48679198918786093, "grad_norm": 1.3604225561234031, "learning_rate": 1.8283134829931526e-05, "loss": 0.6688, "step": 3962 }, { "epoch": 0.48691485440471804, "grad_norm": 1.2857383491276482, "learning_rate": 1.827685722229656e-05, "loss": 0.6983, "step": 3963 }, { "epoch": 0.4870377196215751, "grad_norm": 1.1498216041913718, "learning_rate": 1.8270579011904957e-05, "loss": 0.6248, "step": 3964 }, { "epoch": 0.4871605848384322, "grad_norm": 1.2424510714226107, "learning_rate": 1.8264300199911557e-05, "loss": 0.6813, "step": 3965 }, { "epoch": 0.48728345005528934, "grad_norm": 1.3468098537277904, "learning_rate": 1.8258020787471307e-05, "loss": 0.5722, "step": 3966 }, { "epoch": 0.48740631527214645, "grad_norm": 1.1878259532198152, "learning_rate": 1.8251740775739258e-05, "loss": 0.644, "step": 3967 }, { "epoch": 0.48752918048900357, "grad_norm": 1.1792922568071738, "learning_rate": 1.824546016587058e-05, "loss": 0.5821, "step": 3968 }, { "epoch": 0.4876520457058607, "grad_norm": 1.3019137287072324, "learning_rate": 1.823917895902056e-05, "loss": 0.586, "step": 3969 }, { "epoch": 0.4877749109227178, "grad_norm": 1.6163375612651039, "learning_rate": 1.8232897156344574e-05, "loss": 0.628, "step": 3970 }, { "epoch": 0.4878977761395749, "grad_norm": 1.160085485938795, "learning_rate": 1.822661475899812e-05, "loss": 0.5845, "step": 3971 }, { "epoch": 0.488020641356432, "grad_norm": 1.072315424761548, "learning_rate": 1.8220331768136806e-05, "loss": 0.5189, "step": 3972 }, { "epoch": 0.4881435065732891, "grad_norm": 1.3395919184146052, "learning_rate": 1.821404818491635e-05, "loss": 0.5671, "step": 3973 }, { "epoch": 0.4882663717901462, "grad_norm": 1.170367249248654, "learning_rate": 1.820776401049257e-05, "loss": 0.6395, "step": 3974 }, { "epoch": 0.4883892370070033, "grad_norm": 1.1062076119527504, "learning_rate": 1.8201479246021405e-05, "loss": 0.6731, "step": 3975 }, { "epoch": 0.48851210222386043, "grad_norm": 1.330336174215417, "learning_rate": 1.81951938926589e-05, "loss": 0.7377, "step": 3976 }, { "epoch": 0.48863496744071755, "grad_norm": 1.1693819482673609, "learning_rate": 1.8188907951561194e-05, "loss": 0.7323, "step": 3977 }, { "epoch": 0.48875783265757466, "grad_norm": 1.3984572245048115, "learning_rate": 1.8182621423884555e-05, "loss": 0.7069, "step": 3978 }, { "epoch": 0.4888806978744317, "grad_norm": 1.352842834859655, "learning_rate": 1.8176334310785344e-05, "loss": 0.5823, "step": 3979 }, { "epoch": 0.48900356309128884, "grad_norm": 1.4084505361855988, "learning_rate": 1.8170046613420037e-05, "loss": 0.6394, "step": 3980 }, { "epoch": 0.48912642830814596, "grad_norm": 1.2910782248246004, "learning_rate": 1.8163758332945215e-05, "loss": 0.595, "step": 3981 }, { "epoch": 0.48924929352500307, "grad_norm": 1.2007454586100965, "learning_rate": 1.815746947051756e-05, "loss": 0.5586, "step": 3982 }, { "epoch": 0.4893721587418602, "grad_norm": 1.1779389429707823, "learning_rate": 1.8151180027293877e-05, "loss": 0.5531, "step": 3983 }, { "epoch": 0.4894950239587173, "grad_norm": 1.4185078098441628, "learning_rate": 1.8144890004431066e-05, "loss": 0.75, "step": 3984 }, { "epoch": 0.4896178891755744, "grad_norm": 1.2985140851080599, "learning_rate": 1.8138599403086127e-05, "loss": 0.7059, "step": 3985 }, { "epoch": 0.4897407543924315, "grad_norm": 1.2304544813724767, "learning_rate": 1.8132308224416186e-05, "loss": 0.6887, "step": 3986 }, { "epoch": 0.4898636196092886, "grad_norm": 1.6281528188690844, "learning_rate": 1.812601646957846e-05, "loss": 0.675, "step": 3987 }, { "epoch": 0.4899864848261457, "grad_norm": 1.3120905238535745, "learning_rate": 1.811972413973028e-05, "loss": 0.6878, "step": 3988 }, { "epoch": 0.4901093500430028, "grad_norm": 1.3966419903641167, "learning_rate": 1.8113431236029078e-05, "loss": 0.4754, "step": 3989 }, { "epoch": 0.49023221525985994, "grad_norm": 1.2110139366546557, "learning_rate": 1.8107137759632387e-05, "loss": 0.6193, "step": 3990 }, { "epoch": 0.49035508047671705, "grad_norm": 1.1034041379579198, "learning_rate": 1.8100843711697854e-05, "loss": 0.5114, "step": 3991 }, { "epoch": 0.49047794569357417, "grad_norm": 1.269176677360592, "learning_rate": 1.8094549093383236e-05, "loss": 0.6554, "step": 3992 }, { "epoch": 0.4906008109104313, "grad_norm": 1.1745252298478908, "learning_rate": 1.8088253905846377e-05, "loss": 0.5559, "step": 3993 }, { "epoch": 0.49072367612728834, "grad_norm": 1.2569470894206287, "learning_rate": 1.8081958150245243e-05, "loss": 0.6458, "step": 3994 }, { "epoch": 0.49084654134414546, "grad_norm": 1.1071051545350132, "learning_rate": 1.807566182773789e-05, "loss": 0.6753, "step": 3995 }, { "epoch": 0.4909694065610026, "grad_norm": 1.485515065482353, "learning_rate": 1.8069364939482496e-05, "loss": 0.8065, "step": 3996 }, { "epoch": 0.4910922717778597, "grad_norm": 1.2719664974343001, "learning_rate": 1.8063067486637324e-05, "loss": 0.6759, "step": 3997 }, { "epoch": 0.4912151369947168, "grad_norm": 1.1012949690966631, "learning_rate": 1.8056769470360748e-05, "loss": 0.6881, "step": 3998 }, { "epoch": 0.4913380022115739, "grad_norm": 1.4116783784253788, "learning_rate": 1.8050470891811257e-05, "loss": 0.5498, "step": 3999 }, { "epoch": 0.49146086742843104, "grad_norm": 1.1771107035530757, "learning_rate": 1.804417175214743e-05, "loss": 0.6001, "step": 4000 }, { "epoch": 0.4915837326452881, "grad_norm": 1.323488423482268, "learning_rate": 1.8037872052527948e-05, "loss": 0.5352, "step": 4001 }, { "epoch": 0.4917065978621452, "grad_norm": 1.081515157418436, "learning_rate": 1.8031571794111602e-05, "loss": 0.6692, "step": 4002 }, { "epoch": 0.4918294630790023, "grad_norm": 1.178207696312425, "learning_rate": 1.8025270978057285e-05, "loss": 0.5448, "step": 4003 }, { "epoch": 0.49195232829585944, "grad_norm": 1.312095966470591, "learning_rate": 1.8018969605523996e-05, "loss": 0.6346, "step": 4004 }, { "epoch": 0.49207519351271656, "grad_norm": 1.2656166605227273, "learning_rate": 1.8012667677670825e-05, "loss": 0.5655, "step": 4005 }, { "epoch": 0.4921980587295737, "grad_norm": 1.4401440523876121, "learning_rate": 1.8006365195656972e-05, "loss": 0.5488, "step": 4006 }, { "epoch": 0.4923209239464308, "grad_norm": 1.0550221865150673, "learning_rate": 1.8000062160641737e-05, "loss": 0.5652, "step": 4007 }, { "epoch": 0.49244378916328785, "grad_norm": 1.1665649908073752, "learning_rate": 1.7993758573784525e-05, "loss": 0.61, "step": 4008 }, { "epoch": 0.49256665438014496, "grad_norm": 1.3664517998404058, "learning_rate": 1.798745443624484e-05, "loss": 0.5461, "step": 4009 }, { "epoch": 0.4926895195970021, "grad_norm": 1.2538349216063378, "learning_rate": 1.798114974918228e-05, "loss": 0.6501, "step": 4010 }, { "epoch": 0.4928123848138592, "grad_norm": 1.3750056395116717, "learning_rate": 1.797484451375656e-05, "loss": 0.5696, "step": 4011 }, { "epoch": 0.4929352500307163, "grad_norm": 1.183800803828898, "learning_rate": 1.7968538731127486e-05, "loss": 0.6432, "step": 4012 }, { "epoch": 0.4930581152475734, "grad_norm": 1.0995790839787227, "learning_rate": 1.7962232402454965e-05, "loss": 0.7439, "step": 4013 }, { "epoch": 0.49318098046443054, "grad_norm": 1.3459653532812588, "learning_rate": 1.7955925528898997e-05, "loss": 0.6072, "step": 4014 }, { "epoch": 0.4933038456812876, "grad_norm": 1.2564983489589492, "learning_rate": 1.7949618111619706e-05, "loss": 0.6304, "step": 4015 }, { "epoch": 0.4934267108981447, "grad_norm": 1.2370714810031957, "learning_rate": 1.794331015177729e-05, "loss": 0.5368, "step": 4016 }, { "epoch": 0.49354957611500183, "grad_norm": 1.1620414890413449, "learning_rate": 1.793700165053206e-05, "loss": 0.7101, "step": 4017 }, { "epoch": 0.49367244133185895, "grad_norm": 1.285144200121842, "learning_rate": 1.793069260904442e-05, "loss": 0.6257, "step": 4018 }, { "epoch": 0.49379530654871606, "grad_norm": 1.284579315241585, "learning_rate": 1.7924383028474884e-05, "loss": 0.6212, "step": 4019 }, { "epoch": 0.4939181717655732, "grad_norm": 1.2297687551014476, "learning_rate": 1.7918072909984057e-05, "loss": 0.6584, "step": 4020 }, { "epoch": 0.4940410369824303, "grad_norm": 1.6336034810353335, "learning_rate": 1.7911762254732636e-05, "loss": 0.5272, "step": 4021 }, { "epoch": 0.4941639021992874, "grad_norm": 1.1735820693197858, "learning_rate": 1.7905451063881435e-05, "loss": 0.5608, "step": 4022 }, { "epoch": 0.49428676741614447, "grad_norm": 1.4571359777539556, "learning_rate": 1.7899139338591354e-05, "loss": 0.7525, "step": 4023 }, { "epoch": 0.4944096326330016, "grad_norm": 1.1214372261890664, "learning_rate": 1.7892827080023393e-05, "loss": 0.6091, "step": 4024 }, { "epoch": 0.4945324978498587, "grad_norm": 1.3760779139906787, "learning_rate": 1.7886514289338656e-05, "loss": 0.5982, "step": 4025 }, { "epoch": 0.4946553630667158, "grad_norm": 1.348188876159567, "learning_rate": 1.7880200967698332e-05, "loss": 0.7368, "step": 4026 }, { "epoch": 0.49477822828357293, "grad_norm": 1.590861871268304, "learning_rate": 1.7873887116263715e-05, "loss": 0.7055, "step": 4027 }, { "epoch": 0.49490109350043004, "grad_norm": 1.4430557883115331, "learning_rate": 1.7867572736196204e-05, "loss": 0.7765, "step": 4028 }, { "epoch": 0.49502395871728716, "grad_norm": 1.369578528628881, "learning_rate": 1.7861257828657283e-05, "loss": 0.6915, "step": 4029 }, { "epoch": 0.4951468239341442, "grad_norm": 1.1860294216006273, "learning_rate": 1.785494239480854e-05, "loss": 0.5485, "step": 4030 }, { "epoch": 0.49526968915100134, "grad_norm": 1.6124576888605764, "learning_rate": 1.784862643581166e-05, "loss": 0.6868, "step": 4031 }, { "epoch": 0.49539255436785845, "grad_norm": 1.3539297465639135, "learning_rate": 1.7842309952828424e-05, "loss": 0.6754, "step": 4032 }, { "epoch": 0.49551541958471557, "grad_norm": 1.38008113344926, "learning_rate": 1.7835992947020702e-05, "loss": 0.6065, "step": 4033 }, { "epoch": 0.4956382848015727, "grad_norm": 1.4468687450358388, "learning_rate": 1.782967541955047e-05, "loss": 0.7067, "step": 4034 }, { "epoch": 0.4957611500184298, "grad_norm": 1.9350090409291731, "learning_rate": 1.7823357371579797e-05, "loss": 0.8362, "step": 4035 }, { "epoch": 0.4958840152352869, "grad_norm": 1.239578856891161, "learning_rate": 1.7817038804270848e-05, "loss": 0.6929, "step": 4036 }, { "epoch": 0.496006880452144, "grad_norm": 1.1144238610730497, "learning_rate": 1.781071971878587e-05, "loss": 0.6098, "step": 4037 }, { "epoch": 0.4961297456690011, "grad_norm": 1.0390828803124725, "learning_rate": 1.7804400116287238e-05, "loss": 0.5875, "step": 4038 }, { "epoch": 0.4962526108858582, "grad_norm": 1.1350964837653306, "learning_rate": 1.7798079997937387e-05, "loss": 0.5675, "step": 4039 }, { "epoch": 0.4963754761027153, "grad_norm": 1.1912465878346779, "learning_rate": 1.7791759364898865e-05, "loss": 0.5667, "step": 4040 }, { "epoch": 0.49649834131957243, "grad_norm": 1.1914052739671983, "learning_rate": 1.7785438218334317e-05, "loss": 0.6198, "step": 4041 }, { "epoch": 0.49662120653642955, "grad_norm": 1.15865428350756, "learning_rate": 1.777911655940647e-05, "loss": 0.6325, "step": 4042 }, { "epoch": 0.49674407175328666, "grad_norm": 1.1044735166108692, "learning_rate": 1.7772794389278156e-05, "loss": 0.643, "step": 4043 }, { "epoch": 0.4968669369701438, "grad_norm": 1.1523941156599116, "learning_rate": 1.77664717091123e-05, "loss": 0.5509, "step": 4044 }, { "epoch": 0.49698980218700084, "grad_norm": 1.249300695245001, "learning_rate": 1.776014852007191e-05, "loss": 0.4827, "step": 4045 }, { "epoch": 0.49711266740385796, "grad_norm": 1.2449973605688345, "learning_rate": 1.77538248233201e-05, "loss": 0.5164, "step": 4046 }, { "epoch": 0.49723553262071507, "grad_norm": 1.3311162842358943, "learning_rate": 1.7747500620020076e-05, "loss": 0.7772, "step": 4047 }, { "epoch": 0.4973583978375722, "grad_norm": 1.3941870557070586, "learning_rate": 1.7741175911335125e-05, "loss": 0.5517, "step": 4048 }, { "epoch": 0.4974812630544293, "grad_norm": 1.5873531239285186, "learning_rate": 1.773485069842865e-05, "loss": 0.6909, "step": 4049 }, { "epoch": 0.4976041282712864, "grad_norm": 1.3890647143033608, "learning_rate": 1.772852498246412e-05, "loss": 0.4811, "step": 4050 }, { "epoch": 0.49772699348814353, "grad_norm": 1.2260703821099521, "learning_rate": 1.7722198764605114e-05, "loss": 0.6377, "step": 4051 }, { "epoch": 0.4978498587050006, "grad_norm": 1.3626262795835533, "learning_rate": 1.77158720460153e-05, "loss": 0.4713, "step": 4052 }, { "epoch": 0.4979727239218577, "grad_norm": 1.2280581700247875, "learning_rate": 1.770954482785844e-05, "loss": 0.563, "step": 4053 }, { "epoch": 0.4980955891387148, "grad_norm": 1.0903358961968306, "learning_rate": 1.770321711129838e-05, "loss": 0.5832, "step": 4054 }, { "epoch": 0.49821845435557194, "grad_norm": 1.0833627861444814, "learning_rate": 1.7696888897499062e-05, "loss": 0.6336, "step": 4055 }, { "epoch": 0.49834131957242905, "grad_norm": 1.4167608285978608, "learning_rate": 1.769056018762452e-05, "loss": 0.622, "step": 4056 }, { "epoch": 0.49846418478928617, "grad_norm": 1.5951464443564447, "learning_rate": 1.7684230982838883e-05, "loss": 0.6907, "step": 4057 }, { "epoch": 0.4985870500061433, "grad_norm": 1.357866858480301, "learning_rate": 1.7677901284306363e-05, "loss": 0.6938, "step": 4058 }, { "epoch": 0.49870991522300034, "grad_norm": 1.170498305686023, "learning_rate": 1.767157109319127e-05, "loss": 0.5761, "step": 4059 }, { "epoch": 0.49883278043985746, "grad_norm": 1.2068934971010823, "learning_rate": 1.7665240410657996e-05, "loss": 0.6647, "step": 4060 }, { "epoch": 0.4989556456567146, "grad_norm": 1.212683185234647, "learning_rate": 1.7658909237871035e-05, "loss": 0.5289, "step": 4061 }, { "epoch": 0.4990785108735717, "grad_norm": 1.3949767302163711, "learning_rate": 1.7652577575994965e-05, "loss": 0.535, "step": 4062 }, { "epoch": 0.4992013760904288, "grad_norm": 1.5652203080142404, "learning_rate": 1.7646245426194453e-05, "loss": 0.6328, "step": 4063 }, { "epoch": 0.4993242413072859, "grad_norm": 1.2619051216504946, "learning_rate": 1.7639912789634257e-05, "loss": 0.6938, "step": 4064 }, { "epoch": 0.49944710652414304, "grad_norm": 1.1884190470838978, "learning_rate": 1.763357966747922e-05, "loss": 0.5348, "step": 4065 }, { "epoch": 0.4995699717410001, "grad_norm": 1.3055523886472606, "learning_rate": 1.7627246060894285e-05, "loss": 0.5388, "step": 4066 }, { "epoch": 0.4996928369578572, "grad_norm": 1.410254371230192, "learning_rate": 1.7620911971044472e-05, "loss": 0.5372, "step": 4067 }, { "epoch": 0.4998157021747143, "grad_norm": 1.2973704068330616, "learning_rate": 1.7614577399094904e-05, "loss": 0.7042, "step": 4068 }, { "epoch": 0.49993856739157144, "grad_norm": 1.253438301267003, "learning_rate": 1.7608242346210775e-05, "loss": 0.6639, "step": 4069 }, { "epoch": 0.5000614326084285, "grad_norm": 1.3781990407498623, "learning_rate": 1.7601906813557383e-05, "loss": 0.6479, "step": 4070 }, { "epoch": 0.5001842978252856, "grad_norm": 1.1248389712934395, "learning_rate": 1.7595570802300107e-05, "loss": 0.6218, "step": 4071 }, { "epoch": 0.5003071630421427, "grad_norm": 1.275307165526083, "learning_rate": 1.758923431360442e-05, "loss": 0.5902, "step": 4072 }, { "epoch": 0.5004300282589998, "grad_norm": 1.3864789441261725, "learning_rate": 1.7582897348635867e-05, "loss": 0.6913, "step": 4073 }, { "epoch": 0.500552893475857, "grad_norm": 1.3196199385113057, "learning_rate": 1.7576559908560104e-05, "loss": 0.5124, "step": 4074 }, { "epoch": 0.5006757586927141, "grad_norm": 1.2759800899381155, "learning_rate": 1.7570221994542845e-05, "loss": 0.6253, "step": 4075 }, { "epoch": 0.5007986239095712, "grad_norm": 1.257043437090135, "learning_rate": 1.7563883607749927e-05, "loss": 0.613, "step": 4076 }, { "epoch": 0.5009214891264283, "grad_norm": 1.0977965811094175, "learning_rate": 1.755754474934724e-05, "loss": 0.6688, "step": 4077 }, { "epoch": 0.5010443543432854, "grad_norm": 1.074189633282041, "learning_rate": 1.7551205420500785e-05, "loss": 0.5806, "step": 4078 }, { "epoch": 0.5011672195601425, "grad_norm": 1.2463539681180344, "learning_rate": 1.7544865622376638e-05, "loss": 0.5413, "step": 4079 }, { "epoch": 0.5012900847769997, "grad_norm": 1.5869235147898462, "learning_rate": 1.753852535614097e-05, "loss": 0.6563, "step": 4080 }, { "epoch": 0.5014129499938568, "grad_norm": 1.3332992105791501, "learning_rate": 1.7532184622960014e-05, "loss": 0.5557, "step": 4081 }, { "epoch": 0.5015358152107139, "grad_norm": 1.4068258925727244, "learning_rate": 1.7525843424000128e-05, "loss": 0.5322, "step": 4082 }, { "epoch": 0.501658680427571, "grad_norm": 1.263114118210626, "learning_rate": 1.751950176042772e-05, "loss": 0.559, "step": 4083 }, { "epoch": 0.5017815456444281, "grad_norm": 1.1623772346584238, "learning_rate": 1.7513159633409305e-05, "loss": 0.6637, "step": 4084 }, { "epoch": 0.5019044108612851, "grad_norm": 1.2067843085170566, "learning_rate": 1.7506817044111477e-05, "loss": 0.5546, "step": 4085 }, { "epoch": 0.5020272760781422, "grad_norm": 1.034361708716484, "learning_rate": 1.75004739937009e-05, "loss": 0.5707, "step": 4086 }, { "epoch": 0.5021501412949994, "grad_norm": 1.6132587902964262, "learning_rate": 1.7494130483344357e-05, "loss": 0.7053, "step": 4087 }, { "epoch": 0.5022730065118565, "grad_norm": 1.1845983411063636, "learning_rate": 1.7487786514208685e-05, "loss": 0.5298, "step": 4088 }, { "epoch": 0.5023958717287136, "grad_norm": 1.1301760183377083, "learning_rate": 1.748144208746082e-05, "loss": 0.585, "step": 4089 }, { "epoch": 0.5025187369455707, "grad_norm": 1.2950692366443164, "learning_rate": 1.747509720426777e-05, "loss": 0.558, "step": 4090 }, { "epoch": 0.5026416021624278, "grad_norm": 1.3279213847296365, "learning_rate": 1.7468751865796645e-05, "loss": 0.5539, "step": 4091 }, { "epoch": 0.5027644673792849, "grad_norm": 1.0614348918976513, "learning_rate": 1.746240607321462e-05, "loss": 0.6641, "step": 4092 }, { "epoch": 0.502887332596142, "grad_norm": 1.1477658997742797, "learning_rate": 1.7456059827688976e-05, "loss": 0.6219, "step": 4093 }, { "epoch": 0.5030101978129992, "grad_norm": 1.1172307686395038, "learning_rate": 1.744971313038705e-05, "loss": 0.573, "step": 4094 }, { "epoch": 0.5031330630298563, "grad_norm": 1.5268839997184922, "learning_rate": 1.744336598247628e-05, "loss": 0.6936, "step": 4095 }, { "epoch": 0.5032559282467134, "grad_norm": 1.318093938235832, "learning_rate": 1.7437018385124182e-05, "loss": 0.6179, "step": 4096 }, { "epoch": 0.5033787934635705, "grad_norm": 1.1529378199287859, "learning_rate": 1.7430670339498358e-05, "loss": 0.5979, "step": 4097 }, { "epoch": 0.5035016586804276, "grad_norm": 1.1728566673096008, "learning_rate": 1.7424321846766487e-05, "loss": 0.546, "step": 4098 }, { "epoch": 0.5036245238972846, "grad_norm": 1.4037085578872663, "learning_rate": 1.7417972908096337e-05, "loss": 0.6173, "step": 4099 }, { "epoch": 0.5037473891141417, "grad_norm": 2.3344741726222478, "learning_rate": 1.741162352465575e-05, "loss": 0.6939, "step": 4100 }, { "epoch": 0.5038702543309989, "grad_norm": 1.1434958286746266, "learning_rate": 1.7405273697612656e-05, "loss": 0.5828, "step": 4101 }, { "epoch": 0.503993119547856, "grad_norm": 1.0802047293035066, "learning_rate": 1.7398923428135066e-05, "loss": 0.6274, "step": 4102 }, { "epoch": 0.5041159847647131, "grad_norm": 1.153731063334193, "learning_rate": 1.739257271739107e-05, "loss": 0.581, "step": 4103 }, { "epoch": 0.5042388499815702, "grad_norm": 1.1279532455273549, "learning_rate": 1.7386221566548836e-05, "loss": 0.6022, "step": 4104 }, { "epoch": 0.5043617151984273, "grad_norm": 1.3047958472735839, "learning_rate": 1.7379869976776617e-05, "loss": 0.6781, "step": 4105 }, { "epoch": 0.5044845804152844, "grad_norm": 1.2237535793439767, "learning_rate": 1.7373517949242755e-05, "loss": 0.7058, "step": 4106 }, { "epoch": 0.5046074456321415, "grad_norm": 1.3385248173765294, "learning_rate": 1.7367165485115657e-05, "loss": 0.6024, "step": 4107 }, { "epoch": 0.5047303108489987, "grad_norm": 1.281988045557728, "learning_rate": 1.736081258556382e-05, "loss": 0.61, "step": 4108 }, { "epoch": 0.5048531760658558, "grad_norm": 1.2659191532774394, "learning_rate": 1.7354459251755816e-05, "loss": 0.5225, "step": 4109 }, { "epoch": 0.5049760412827129, "grad_norm": 1.159112761484028, "learning_rate": 1.7348105484860305e-05, "loss": 0.5857, "step": 4110 }, { "epoch": 0.50509890649957, "grad_norm": 1.4586572773343138, "learning_rate": 1.7341751286046018e-05, "loss": 0.5559, "step": 4111 }, { "epoch": 0.5052217717164271, "grad_norm": 1.3943357280092812, "learning_rate": 1.733539665648177e-05, "loss": 0.7045, "step": 4112 }, { "epoch": 0.5053446369332842, "grad_norm": 1.1533395152014136, "learning_rate": 1.732904159733645e-05, "loss": 0.5404, "step": 4113 }, { "epoch": 0.5054675021501412, "grad_norm": 1.317841366098898, "learning_rate": 1.7322686109779032e-05, "loss": 0.6736, "step": 4114 }, { "epoch": 0.5055903673669984, "grad_norm": 1.5601316078709793, "learning_rate": 1.731633019497857e-05, "loss": 0.5915, "step": 4115 }, { "epoch": 0.5057132325838555, "grad_norm": 1.2119405160490337, "learning_rate": 1.7309973854104186e-05, "loss": 0.5593, "step": 4116 }, { "epoch": 0.5058360978007126, "grad_norm": 1.2729887651554948, "learning_rate": 1.7303617088325097e-05, "loss": 0.6082, "step": 4117 }, { "epoch": 0.5059589630175697, "grad_norm": 1.2979866858563829, "learning_rate": 1.729725989881058e-05, "loss": 0.5095, "step": 4118 }, { "epoch": 0.5060818282344268, "grad_norm": 1.1133417546869844, "learning_rate": 1.7290902286730007e-05, "loss": 0.6859, "step": 4119 }, { "epoch": 0.5062046934512839, "grad_norm": 1.24349039006455, "learning_rate": 1.7284544253252813e-05, "loss": 0.5704, "step": 4120 }, { "epoch": 0.506327558668141, "grad_norm": 1.0009344920391352, "learning_rate": 1.727818579954852e-05, "loss": 0.5908, "step": 4121 }, { "epoch": 0.5064504238849982, "grad_norm": 1.5733966882018366, "learning_rate": 1.7271826926786724e-05, "loss": 0.6726, "step": 4122 }, { "epoch": 0.5065732891018553, "grad_norm": 1.2704838305898034, "learning_rate": 1.7265467636137097e-05, "loss": 0.5759, "step": 4123 }, { "epoch": 0.5066961543187124, "grad_norm": 1.4453877547742615, "learning_rate": 1.7259107928769392e-05, "loss": 0.7, "step": 4124 }, { "epoch": 0.5068190195355695, "grad_norm": 1.3010405389497752, "learning_rate": 1.725274780585343e-05, "loss": 0.5916, "step": 4125 }, { "epoch": 0.5069418847524266, "grad_norm": 1.33414495047856, "learning_rate": 1.724638726855912e-05, "loss": 0.6306, "step": 4126 }, { "epoch": 0.5070647499692837, "grad_norm": 1.3576829218820794, "learning_rate": 1.7240026318056446e-05, "loss": 0.5969, "step": 4127 }, { "epoch": 0.5071876151861408, "grad_norm": 1.2273646041169446, "learning_rate": 1.7233664955515454e-05, "loss": 0.5772, "step": 4128 }, { "epoch": 0.5073104804029979, "grad_norm": 1.2189189092826462, "learning_rate": 1.722730318210628e-05, "loss": 0.5233, "step": 4129 }, { "epoch": 0.507433345619855, "grad_norm": 1.1064246406620746, "learning_rate": 1.722094099899913e-05, "loss": 0.6584, "step": 4130 }, { "epoch": 0.5075562108367121, "grad_norm": 1.100524040694271, "learning_rate": 1.7214578407364286e-05, "loss": 0.6458, "step": 4131 }, { "epoch": 0.5076790760535692, "grad_norm": 1.414949426513427, "learning_rate": 1.7208215408372107e-05, "loss": 0.6001, "step": 4132 }, { "epoch": 0.5078019412704263, "grad_norm": 1.3403963939400776, "learning_rate": 1.720185200319302e-05, "loss": 0.5232, "step": 4133 }, { "epoch": 0.5079248064872834, "grad_norm": 1.4281987800201277, "learning_rate": 1.7195488192997543e-05, "loss": 0.6651, "step": 4134 }, { "epoch": 0.5080476717041406, "grad_norm": 1.230898176973714, "learning_rate": 1.7189123978956246e-05, "loss": 0.6841, "step": 4135 }, { "epoch": 0.5081705369209977, "grad_norm": 1.0237237719276406, "learning_rate": 1.718275936223979e-05, "loss": 0.5787, "step": 4136 }, { "epoch": 0.5082934021378548, "grad_norm": 1.164475454376925, "learning_rate": 1.7176394344018912e-05, "loss": 0.5337, "step": 4137 }, { "epoch": 0.5084162673547119, "grad_norm": 1.2885799895020256, "learning_rate": 1.7170028925464403e-05, "loss": 0.6488, "step": 4138 }, { "epoch": 0.508539132571569, "grad_norm": 1.1119921320152204, "learning_rate": 1.716366310774715e-05, "loss": 0.6776, "step": 4139 }, { "epoch": 0.5086619977884261, "grad_norm": 1.260066014774468, "learning_rate": 1.7157296892038096e-05, "loss": 0.5744, "step": 4140 }, { "epoch": 0.5087848630052832, "grad_norm": 1.2265349404310013, "learning_rate": 1.7150930279508273e-05, "loss": 0.6722, "step": 4141 }, { "epoch": 0.5089077282221404, "grad_norm": 1.1601032548669743, "learning_rate": 1.714456327132877e-05, "loss": 0.6354, "step": 4142 }, { "epoch": 0.5090305934389974, "grad_norm": 1.1140711721194152, "learning_rate": 1.7138195868670764e-05, "loss": 0.5157, "step": 4143 }, { "epoch": 0.5091534586558545, "grad_norm": 1.2476388272361378, "learning_rate": 1.7131828072705494e-05, "loss": 0.6695, "step": 4144 }, { "epoch": 0.5092763238727116, "grad_norm": 1.1398265546580781, "learning_rate": 1.7125459884604278e-05, "loss": 0.5443, "step": 4145 }, { "epoch": 0.5093991890895687, "grad_norm": 1.2271153426683994, "learning_rate": 1.7119091305538495e-05, "loss": 0.706, "step": 4146 }, { "epoch": 0.5095220543064258, "grad_norm": 1.3897713058448364, "learning_rate": 1.711272233667961e-05, "loss": 0.7794, "step": 4147 }, { "epoch": 0.509644919523283, "grad_norm": 1.3632815675562604, "learning_rate": 1.710635297919916e-05, "loss": 0.665, "step": 4148 }, { "epoch": 0.5097677847401401, "grad_norm": 1.4636988227024073, "learning_rate": 1.7099983234268733e-05, "loss": 0.6445, "step": 4149 }, { "epoch": 0.5098906499569972, "grad_norm": 1.3594616797617187, "learning_rate": 1.709361310306001e-05, "loss": 0.5707, "step": 4150 }, { "epoch": 0.5100135151738543, "grad_norm": 1.0839391831662053, "learning_rate": 1.7087242586744733e-05, "loss": 0.5236, "step": 4151 }, { "epoch": 0.5101363803907114, "grad_norm": 1.2396075885187536, "learning_rate": 1.708087168649472e-05, "loss": 0.6279, "step": 4152 }, { "epoch": 0.5102592456075685, "grad_norm": 1.2726100901347932, "learning_rate": 1.7074500403481855e-05, "loss": 0.6162, "step": 4153 }, { "epoch": 0.5103821108244256, "grad_norm": 1.3519484155270383, "learning_rate": 1.7068128738878095e-05, "loss": 0.8016, "step": 4154 }, { "epoch": 0.5105049760412828, "grad_norm": 1.2885760768321537, "learning_rate": 1.706175669385546e-05, "loss": 0.7005, "step": 4155 }, { "epoch": 0.5106278412581399, "grad_norm": 1.351093471844497, "learning_rate": 1.7055384269586063e-05, "loss": 0.6009, "step": 4156 }, { "epoch": 0.5107507064749969, "grad_norm": 1.4108676220448408, "learning_rate": 1.7049011467242055e-05, "loss": 0.524, "step": 4157 }, { "epoch": 0.510873571691854, "grad_norm": 1.234532666295897, "learning_rate": 1.7042638287995673e-05, "loss": 0.6825, "step": 4158 }, { "epoch": 0.5109964369087111, "grad_norm": 1.3503834106842694, "learning_rate": 1.7036264733019226e-05, "loss": 0.5705, "step": 4159 }, { "epoch": 0.5111193021255682, "grad_norm": 1.6374761778382572, "learning_rate": 1.702989080348509e-05, "loss": 0.7748, "step": 4160 }, { "epoch": 0.5112421673424253, "grad_norm": 1.772215824639301, "learning_rate": 1.7023516500565702e-05, "loss": 0.5845, "step": 4161 }, { "epoch": 0.5113650325592825, "grad_norm": 1.2088284149459396, "learning_rate": 1.7017141825433576e-05, "loss": 0.6628, "step": 4162 }, { "epoch": 0.5114878977761396, "grad_norm": 1.1357381958854131, "learning_rate": 1.7010766779261292e-05, "loss": 0.6064, "step": 4163 }, { "epoch": 0.5116107629929967, "grad_norm": 1.2410418575049407, "learning_rate": 1.7004391363221502e-05, "loss": 0.567, "step": 4164 }, { "epoch": 0.5117336282098538, "grad_norm": 1.2646041410878872, "learning_rate": 1.6998015578486918e-05, "loss": 0.5433, "step": 4165 }, { "epoch": 0.5118564934267109, "grad_norm": 1.3195757369266008, "learning_rate": 1.699163942623033e-05, "loss": 0.5645, "step": 4166 }, { "epoch": 0.511979358643568, "grad_norm": 1.3059311238032743, "learning_rate": 1.6985262907624583e-05, "loss": 0.7651, "step": 4167 }, { "epoch": 0.5121022238604251, "grad_norm": 1.1670871361774835, "learning_rate": 1.6978886023842598e-05, "loss": 0.6327, "step": 4168 }, { "epoch": 0.5122250890772823, "grad_norm": 1.2080195404543168, "learning_rate": 1.6972508776057362e-05, "loss": 0.6837, "step": 4169 }, { "epoch": 0.5123479542941394, "grad_norm": 1.0702341946954859, "learning_rate": 1.6966131165441928e-05, "loss": 0.6506, "step": 4170 }, { "epoch": 0.5124708195109965, "grad_norm": 1.1732205377553286, "learning_rate": 1.6959753193169422e-05, "loss": 0.658, "step": 4171 }, { "epoch": 0.5125936847278535, "grad_norm": 1.7414066796847802, "learning_rate": 1.695337486041302e-05, "loss": 0.6465, "step": 4172 }, { "epoch": 0.5127165499447106, "grad_norm": 1.2722621694330176, "learning_rate": 1.694699616834598e-05, "loss": 0.5129, "step": 4173 }, { "epoch": 0.5128394151615677, "grad_norm": 1.1952760059924372, "learning_rate": 1.6940617118141626e-05, "loss": 0.5487, "step": 4174 }, { "epoch": 0.5129622803784248, "grad_norm": 1.2370908378212728, "learning_rate": 1.693423771097334e-05, "loss": 0.6055, "step": 4175 }, { "epoch": 0.513085145595282, "grad_norm": 1.3658047838052374, "learning_rate": 1.6927857948014565e-05, "loss": 0.6713, "step": 4176 }, { "epoch": 0.5132080108121391, "grad_norm": 1.0537336997375397, "learning_rate": 1.6921477830438827e-05, "loss": 0.6098, "step": 4177 }, { "epoch": 0.5133308760289962, "grad_norm": 2.027039349092157, "learning_rate": 1.6915097359419703e-05, "loss": 0.6523, "step": 4178 }, { "epoch": 0.5134537412458533, "grad_norm": 1.3199844304620736, "learning_rate": 1.690871653613084e-05, "loss": 0.6951, "step": 4179 }, { "epoch": 0.5135766064627104, "grad_norm": 1.303947882354836, "learning_rate": 1.6902335361745944e-05, "loss": 0.6889, "step": 4180 }, { "epoch": 0.5136994716795675, "grad_norm": 1.5858356225006345, "learning_rate": 1.6895953837438802e-05, "loss": 0.7444, "step": 4181 }, { "epoch": 0.5138223368964246, "grad_norm": 1.4602729737795226, "learning_rate": 1.6889571964383242e-05, "loss": 0.5328, "step": 4182 }, { "epoch": 0.5139452021132818, "grad_norm": 1.2845383323544521, "learning_rate": 1.6883189743753174e-05, "loss": 0.6098, "step": 4183 }, { "epoch": 0.5140680673301389, "grad_norm": 1.230293509612916, "learning_rate": 1.687680717672257e-05, "loss": 0.6571, "step": 4184 }, { "epoch": 0.514190932546996, "grad_norm": 1.3171010024603018, "learning_rate": 1.6870424264465454e-05, "loss": 0.6318, "step": 4185 }, { "epoch": 0.514313797763853, "grad_norm": 1.3994129857337814, "learning_rate": 1.6864041008155926e-05, "loss": 0.6262, "step": 4186 }, { "epoch": 0.5144366629807101, "grad_norm": 1.1630718011210157, "learning_rate": 1.6857657408968146e-05, "loss": 0.6474, "step": 4187 }, { "epoch": 0.5145595281975672, "grad_norm": 1.3791567167848149, "learning_rate": 1.6851273468076328e-05, "loss": 0.6549, "step": 4188 }, { "epoch": 0.5146823934144243, "grad_norm": 1.4607468530445382, "learning_rate": 1.6844889186654757e-05, "loss": 0.6337, "step": 4189 }, { "epoch": 0.5148052586312815, "grad_norm": 1.17377402988719, "learning_rate": 1.6838504565877795e-05, "loss": 0.607, "step": 4190 }, { "epoch": 0.5149281238481386, "grad_norm": 1.3141352628802079, "learning_rate": 1.6832119606919835e-05, "loss": 0.6169, "step": 4191 }, { "epoch": 0.5150509890649957, "grad_norm": 1.702820595098977, "learning_rate": 1.6825734310955356e-05, "loss": 0.6696, "step": 4192 }, { "epoch": 0.5151738542818528, "grad_norm": 1.420768079606018, "learning_rate": 1.681934867915889e-05, "loss": 0.6014, "step": 4193 }, { "epoch": 0.5152967194987099, "grad_norm": 1.3545565631776781, "learning_rate": 1.6812962712705037e-05, "loss": 0.5505, "step": 4194 }, { "epoch": 0.515419584715567, "grad_norm": 1.5561849122572577, "learning_rate": 1.6806576412768446e-05, "loss": 0.6761, "step": 4195 }, { "epoch": 0.5155424499324242, "grad_norm": 1.4311139511429454, "learning_rate": 1.6800189780523844e-05, "loss": 0.5588, "step": 4196 }, { "epoch": 0.5156653151492813, "grad_norm": 1.3301938350512337, "learning_rate": 1.6793802817146003e-05, "loss": 0.4948, "step": 4197 }, { "epoch": 0.5157881803661384, "grad_norm": 1.3333436747497975, "learning_rate": 1.6787415523809775e-05, "loss": 0.6123, "step": 4198 }, { "epoch": 0.5159110455829955, "grad_norm": 1.2054281189318998, "learning_rate": 1.6781027901690043e-05, "loss": 0.7135, "step": 4199 }, { "epoch": 0.5160339107998526, "grad_norm": 1.184937987692237, "learning_rate": 1.6774639951961783e-05, "loss": 0.6463, "step": 4200 }, { "epoch": 0.5161567760167096, "grad_norm": 1.239996960534812, "learning_rate": 1.6768251675800012e-05, "loss": 0.565, "step": 4201 }, { "epoch": 0.5162796412335667, "grad_norm": 1.2867754615546094, "learning_rate": 1.6761863074379815e-05, "loss": 0.5347, "step": 4202 }, { "epoch": 0.5164025064504238, "grad_norm": 1.1974639057733678, "learning_rate": 1.6755474148876328e-05, "loss": 0.5869, "step": 4203 }, { "epoch": 0.516525371667281, "grad_norm": 1.2763405444851272, "learning_rate": 1.674908490046476e-05, "loss": 0.5826, "step": 4204 }, { "epoch": 0.5166482368841381, "grad_norm": 1.224450744448321, "learning_rate": 1.6742695330320367e-05, "loss": 0.5902, "step": 4205 }, { "epoch": 0.5167711021009952, "grad_norm": 1.6384752329853172, "learning_rate": 1.6736305439618466e-05, "loss": 0.6321, "step": 4206 }, { "epoch": 0.5168939673178523, "grad_norm": 1.245638682918695, "learning_rate": 1.672991522953444e-05, "loss": 0.6485, "step": 4207 }, { "epoch": 0.5170168325347094, "grad_norm": 1.3713922888966426, "learning_rate": 1.672352470124373e-05, "loss": 0.5802, "step": 4208 }, { "epoch": 0.5171396977515665, "grad_norm": 1.191380667012763, "learning_rate": 1.671713385592183e-05, "loss": 0.6404, "step": 4209 }, { "epoch": 0.5172625629684237, "grad_norm": 1.199208593002673, "learning_rate": 1.6710742694744288e-05, "loss": 0.6505, "step": 4210 }, { "epoch": 0.5173854281852808, "grad_norm": 1.0906193162500728, "learning_rate": 1.6704351218886722e-05, "loss": 0.7048, "step": 4211 }, { "epoch": 0.5175082934021379, "grad_norm": 1.1237142300137535, "learning_rate": 1.6697959429524803e-05, "loss": 0.4851, "step": 4212 }, { "epoch": 0.517631158618995, "grad_norm": 1.0980293518129507, "learning_rate": 1.6691567327834264e-05, "loss": 0.5834, "step": 4213 }, { "epoch": 0.5177540238358521, "grad_norm": 1.248790045168421, "learning_rate": 1.668517491499088e-05, "loss": 0.6309, "step": 4214 }, { "epoch": 0.5178768890527092, "grad_norm": 1.7008877651711838, "learning_rate": 1.6678782192170503e-05, "loss": 0.7584, "step": 4215 }, { "epoch": 0.5179997542695662, "grad_norm": 1.3412685710041883, "learning_rate": 1.6672389160549027e-05, "loss": 0.6299, "step": 4216 }, { "epoch": 0.5181226194864234, "grad_norm": 1.325990495813332, "learning_rate": 1.6665995821302413e-05, "loss": 0.6442, "step": 4217 }, { "epoch": 0.5182454847032805, "grad_norm": 1.1588260062038955, "learning_rate": 1.6659602175606665e-05, "loss": 0.5337, "step": 4218 }, { "epoch": 0.5183683499201376, "grad_norm": 1.1741284281965074, "learning_rate": 1.6653208224637868e-05, "loss": 0.6968, "step": 4219 }, { "epoch": 0.5184912151369947, "grad_norm": 1.2818938413670031, "learning_rate": 1.6646813969572133e-05, "loss": 0.5866, "step": 4220 }, { "epoch": 0.5186140803538518, "grad_norm": 1.17270943643584, "learning_rate": 1.664041941158565e-05, "loss": 0.6661, "step": 4221 }, { "epoch": 0.5187369455707089, "grad_norm": 1.2755035117725944, "learning_rate": 1.6634024551854656e-05, "loss": 0.6087, "step": 4222 }, { "epoch": 0.518859810787566, "grad_norm": 1.1229287358743867, "learning_rate": 1.662762939155544e-05, "loss": 0.5305, "step": 4223 }, { "epoch": 0.5189826760044232, "grad_norm": 1.0196750648931596, "learning_rate": 1.6621233931864357e-05, "loss": 0.6042, "step": 4224 }, { "epoch": 0.5191055412212803, "grad_norm": 1.3669485603117042, "learning_rate": 1.661483817395781e-05, "loss": 0.6463, "step": 4225 }, { "epoch": 0.5192284064381374, "grad_norm": 1.1244019793287447, "learning_rate": 1.6608442119012242e-05, "loss": 0.6436, "step": 4226 }, { "epoch": 0.5193512716549945, "grad_norm": 1.2137328208209355, "learning_rate": 1.6602045768204186e-05, "loss": 0.593, "step": 4227 }, { "epoch": 0.5194741368718516, "grad_norm": 2.383449284933318, "learning_rate": 1.6595649122710197e-05, "loss": 0.7098, "step": 4228 }, { "epoch": 0.5195970020887087, "grad_norm": 1.1986989797239005, "learning_rate": 1.6589252183706904e-05, "loss": 0.5912, "step": 4229 }, { "epoch": 0.5197198673055657, "grad_norm": 1.1713263645811969, "learning_rate": 1.6582854952370972e-05, "loss": 0.67, "step": 4230 }, { "epoch": 0.5198427325224229, "grad_norm": 1.2420122903387165, "learning_rate": 1.657645742987914e-05, "loss": 0.4883, "step": 4231 }, { "epoch": 0.51996559773928, "grad_norm": 1.2984824821828782, "learning_rate": 1.6570059617408187e-05, "loss": 0.604, "step": 4232 }, { "epoch": 0.5200884629561371, "grad_norm": 1.227260097553568, "learning_rate": 1.656366151613495e-05, "loss": 0.6249, "step": 4233 }, { "epoch": 0.5202113281729942, "grad_norm": 1.6198075940274366, "learning_rate": 1.6557263127236323e-05, "loss": 0.6997, "step": 4234 }, { "epoch": 0.5203341933898513, "grad_norm": 1.2063466094921436, "learning_rate": 1.6550864451889234e-05, "loss": 0.5956, "step": 4235 }, { "epoch": 0.5204570586067084, "grad_norm": 1.2155964531899865, "learning_rate": 1.654446549127069e-05, "loss": 0.6371, "step": 4236 }, { "epoch": 0.5205799238235655, "grad_norm": 1.213939539574419, "learning_rate": 1.6538066246557735e-05, "loss": 0.6441, "step": 4237 }, { "epoch": 0.5207027890404227, "grad_norm": 1.0432296659337787, "learning_rate": 1.653166671892747e-05, "loss": 0.5139, "step": 4238 }, { "epoch": 0.5208256542572798, "grad_norm": 1.2795083806509446, "learning_rate": 1.6525266909557046e-05, "loss": 0.5605, "step": 4239 }, { "epoch": 0.5209485194741369, "grad_norm": 1.1280372899233686, "learning_rate": 1.6518866819623665e-05, "loss": 0.5644, "step": 4240 }, { "epoch": 0.521071384690994, "grad_norm": 1.0669101686390612, "learning_rate": 1.6512466450304584e-05, "loss": 0.6787, "step": 4241 }, { "epoch": 0.5211942499078511, "grad_norm": 1.5096765739971247, "learning_rate": 1.6506065802777107e-05, "loss": 0.5195, "step": 4242 }, { "epoch": 0.5213171151247082, "grad_norm": 1.5131996739756128, "learning_rate": 1.6499664878218592e-05, "loss": 0.5811, "step": 4243 }, { "epoch": 0.5214399803415654, "grad_norm": 1.1787990236115518, "learning_rate": 1.649326367780645e-05, "loss": 0.5787, "step": 4244 }, { "epoch": 0.5215628455584224, "grad_norm": 1.342246600835556, "learning_rate": 1.6486862202718134e-05, "loss": 0.6023, "step": 4245 }, { "epoch": 0.5216857107752795, "grad_norm": 1.575249703996527, "learning_rate": 1.6480460454131165e-05, "loss": 0.5617, "step": 4246 }, { "epoch": 0.5218085759921366, "grad_norm": 1.4669737609737286, "learning_rate": 1.6474058433223092e-05, "loss": 0.6462, "step": 4247 }, { "epoch": 0.5219314412089937, "grad_norm": 1.3796153660905843, "learning_rate": 1.646765614117153e-05, "loss": 0.5778, "step": 4248 }, { "epoch": 0.5220543064258508, "grad_norm": 1.498387530366278, "learning_rate": 1.646125357915414e-05, "loss": 0.6278, "step": 4249 }, { "epoch": 0.5221771716427079, "grad_norm": 1.2931669592268369, "learning_rate": 1.645485074834863e-05, "loss": 0.6208, "step": 4250 }, { "epoch": 0.522300036859565, "grad_norm": 1.1763646600087867, "learning_rate": 1.6448447649932763e-05, "loss": 0.6032, "step": 4251 }, { "epoch": 0.5224229020764222, "grad_norm": 1.4834248991358954, "learning_rate": 1.644204428508434e-05, "loss": 0.5836, "step": 4252 }, { "epoch": 0.5225457672932793, "grad_norm": 1.0673251508297166, "learning_rate": 1.6435640654981225e-05, "loss": 0.5975, "step": 4253 }, { "epoch": 0.5226686325101364, "grad_norm": 1.256668719482166, "learning_rate": 1.642923676080132e-05, "loss": 0.663, "step": 4254 }, { "epoch": 0.5227914977269935, "grad_norm": 1.0781031870464053, "learning_rate": 1.6422832603722583e-05, "loss": 0.4825, "step": 4255 }, { "epoch": 0.5229143629438506, "grad_norm": 1.3859000640785157, "learning_rate": 1.6416428184923014e-05, "loss": 0.5234, "step": 4256 }, { "epoch": 0.5230372281607077, "grad_norm": 1.1447023320084395, "learning_rate": 1.641002350558067e-05, "loss": 0.5935, "step": 4257 }, { "epoch": 0.5231600933775649, "grad_norm": 1.2842165605393696, "learning_rate": 1.6403618566873645e-05, "loss": 0.5905, "step": 4258 }, { "epoch": 0.5232829585944219, "grad_norm": 1.155499600531974, "learning_rate": 1.6397213369980087e-05, "loss": 0.5745, "step": 4259 }, { "epoch": 0.523405823811279, "grad_norm": 1.4590141792671811, "learning_rate": 1.6390807916078192e-05, "loss": 0.5939, "step": 4260 }, { "epoch": 0.5235286890281361, "grad_norm": 1.351962999085323, "learning_rate": 1.6384402206346202e-05, "loss": 0.6297, "step": 4261 }, { "epoch": 0.5236515542449932, "grad_norm": 1.2913696767833085, "learning_rate": 1.6377996241962402e-05, "loss": 0.6249, "step": 4262 }, { "epoch": 0.5237744194618503, "grad_norm": 1.1813700340380469, "learning_rate": 1.6371590024105128e-05, "loss": 0.7462, "step": 4263 }, { "epoch": 0.5238972846787074, "grad_norm": 1.2933314599430885, "learning_rate": 1.6365183553952765e-05, "loss": 0.5564, "step": 4264 }, { "epoch": 0.5240201498955646, "grad_norm": 1.2985453192855174, "learning_rate": 1.6358776832683743e-05, "loss": 0.6582, "step": 4265 }, { "epoch": 0.5241430151124217, "grad_norm": 1.055326997580227, "learning_rate": 1.635236986147653e-05, "loss": 0.5277, "step": 4266 }, { "epoch": 0.5242658803292788, "grad_norm": 1.4184858818130355, "learning_rate": 1.6345962641509657e-05, "loss": 0.5367, "step": 4267 }, { "epoch": 0.5243887455461359, "grad_norm": 1.4417135296914994, "learning_rate": 1.633955517396168e-05, "loss": 0.6533, "step": 4268 }, { "epoch": 0.524511610762993, "grad_norm": 1.3553278816627548, "learning_rate": 1.6333147460011223e-05, "loss": 0.6129, "step": 4269 }, { "epoch": 0.5246344759798501, "grad_norm": 1.176862511854242, "learning_rate": 1.6326739500836935e-05, "loss": 0.5947, "step": 4270 }, { "epoch": 0.5247573411967072, "grad_norm": 1.068924986550487, "learning_rate": 1.6320331297617513e-05, "loss": 0.6375, "step": 4271 }, { "epoch": 0.5248802064135644, "grad_norm": 1.4298611252256577, "learning_rate": 1.631392285153172e-05, "loss": 0.6168, "step": 4272 }, { "epoch": 0.5250030716304215, "grad_norm": 1.1612210603904038, "learning_rate": 1.6307514163758334e-05, "loss": 0.5731, "step": 4273 }, { "epoch": 0.5251259368472785, "grad_norm": 1.466255992637623, "learning_rate": 1.6301105235476195e-05, "loss": 0.7601, "step": 4274 }, { "epoch": 0.5252488020641356, "grad_norm": 1.1432669256501309, "learning_rate": 1.629469606786419e-05, "loss": 0.6119, "step": 4275 }, { "epoch": 0.5253716672809927, "grad_norm": 1.6834018882813744, "learning_rate": 1.628828666210124e-05, "loss": 0.7085, "step": 4276 }, { "epoch": 0.5254945324978498, "grad_norm": 1.4112966611587572, "learning_rate": 1.628187701936631e-05, "loss": 0.644, "step": 4277 }, { "epoch": 0.525617397714707, "grad_norm": 1.4292318835795002, "learning_rate": 1.6275467140838418e-05, "loss": 0.6481, "step": 4278 }, { "epoch": 0.5257402629315641, "grad_norm": 1.2833051161987439, "learning_rate": 1.6269057027696618e-05, "loss": 0.4778, "step": 4279 }, { "epoch": 0.5258631281484212, "grad_norm": 1.329655049175335, "learning_rate": 1.626264668112001e-05, "loss": 0.5754, "step": 4280 }, { "epoch": 0.5259859933652783, "grad_norm": 1.3076962587601542, "learning_rate": 1.625623610228773e-05, "loss": 0.7077, "step": 4281 }, { "epoch": 0.5261088585821354, "grad_norm": 1.4322184479883449, "learning_rate": 1.6249825292378965e-05, "loss": 0.6474, "step": 4282 }, { "epoch": 0.5262317237989925, "grad_norm": 1.4114726945898115, "learning_rate": 1.6243414252572946e-05, "loss": 0.518, "step": 4283 }, { "epoch": 0.5263545890158496, "grad_norm": 1.4628982849173522, "learning_rate": 1.6237002984048935e-05, "loss": 0.6823, "step": 4284 }, { "epoch": 0.5264774542327068, "grad_norm": 1.2356942896591545, "learning_rate": 1.6230591487986247e-05, "loss": 0.6515, "step": 4285 }, { "epoch": 0.5266003194495639, "grad_norm": 1.2756910353108089, "learning_rate": 1.6224179765564243e-05, "loss": 0.6163, "step": 4286 }, { "epoch": 0.526723184666421, "grad_norm": 1.2890631989360206, "learning_rate": 1.6217767817962304e-05, "loss": 0.5493, "step": 4287 }, { "epoch": 0.526846049883278, "grad_norm": 1.5569202448667867, "learning_rate": 1.6211355646359877e-05, "loss": 0.687, "step": 4288 }, { "epoch": 0.5269689151001351, "grad_norm": 1.1947104949601746, "learning_rate": 1.620494325193643e-05, "loss": 0.644, "step": 4289 }, { "epoch": 0.5270917803169922, "grad_norm": 1.3637536499078662, "learning_rate": 1.619853063587149e-05, "loss": 0.6873, "step": 4290 }, { "epoch": 0.5272146455338493, "grad_norm": 1.2490680105206218, "learning_rate": 1.6192117799344606e-05, "loss": 0.6694, "step": 4291 }, { "epoch": 0.5273375107507065, "grad_norm": 1.1396378647238006, "learning_rate": 1.6185704743535388e-05, "loss": 0.5226, "step": 4292 }, { "epoch": 0.5274603759675636, "grad_norm": 1.3315612668868808, "learning_rate": 1.6179291469623474e-05, "loss": 0.6204, "step": 4293 }, { "epoch": 0.5275832411844207, "grad_norm": 1.2855158741909696, "learning_rate": 1.617287797878854e-05, "loss": 0.5753, "step": 4294 }, { "epoch": 0.5277061064012778, "grad_norm": 1.2695168861682755, "learning_rate": 1.6166464272210304e-05, "loss": 0.5813, "step": 4295 }, { "epoch": 0.5278289716181349, "grad_norm": 1.2075578749079223, "learning_rate": 1.6160050351068534e-05, "loss": 0.6223, "step": 4296 }, { "epoch": 0.527951836834992, "grad_norm": 1.27065786878236, "learning_rate": 1.6153636216543027e-05, "loss": 0.6216, "step": 4297 }, { "epoch": 0.5280747020518491, "grad_norm": 1.1258688812265873, "learning_rate": 1.6147221869813618e-05, "loss": 0.588, "step": 4298 }, { "epoch": 0.5281975672687063, "grad_norm": 1.2375947424094456, "learning_rate": 1.6140807312060188e-05, "loss": 0.6862, "step": 4299 }, { "epoch": 0.5283204324855634, "grad_norm": 1.1986147765878126, "learning_rate": 1.613439254446265e-05, "loss": 0.5202, "step": 4300 }, { "epoch": 0.5284432977024205, "grad_norm": 1.224801926170722, "learning_rate": 1.612797756820096e-05, "loss": 0.6245, "step": 4301 }, { "epoch": 0.5285661629192776, "grad_norm": 1.0716920135295458, "learning_rate": 1.612156238445511e-05, "loss": 0.6112, "step": 4302 }, { "epoch": 0.5286890281361346, "grad_norm": 1.351474257631185, "learning_rate": 1.6115146994405133e-05, "loss": 0.6472, "step": 4303 }, { "epoch": 0.5288118933529917, "grad_norm": 1.2684969224289784, "learning_rate": 1.61087313992311e-05, "loss": 0.6082, "step": 4304 }, { "epoch": 0.5289347585698488, "grad_norm": 1.4350278840200403, "learning_rate": 1.6102315600113117e-05, "loss": 0.5351, "step": 4305 }, { "epoch": 0.529057623786706, "grad_norm": 1.141318855720664, "learning_rate": 1.6095899598231324e-05, "loss": 0.5918, "step": 4306 }, { "epoch": 0.5291804890035631, "grad_norm": 1.2230732927058143, "learning_rate": 1.6089483394765908e-05, "loss": 0.6722, "step": 4307 }, { "epoch": 0.5293033542204202, "grad_norm": 1.1797759370218, "learning_rate": 1.6083066990897094e-05, "loss": 0.6672, "step": 4308 }, { "epoch": 0.5294262194372773, "grad_norm": 1.1961960972027517, "learning_rate": 1.607665038780513e-05, "loss": 0.5841, "step": 4309 }, { "epoch": 0.5295490846541344, "grad_norm": 1.5341117046590953, "learning_rate": 1.6070233586670297e-05, "loss": 0.6685, "step": 4310 }, { "epoch": 0.5296719498709915, "grad_norm": 1.7576311580951482, "learning_rate": 1.606381658867295e-05, "loss": 0.602, "step": 4311 }, { "epoch": 0.5297948150878486, "grad_norm": 1.2082150247145997, "learning_rate": 1.6057399394993432e-05, "loss": 0.7483, "step": 4312 }, { "epoch": 0.5299176803047058, "grad_norm": 1.252194916994575, "learning_rate": 1.6050982006812158e-05, "loss": 0.5279, "step": 4313 }, { "epoch": 0.5300405455215629, "grad_norm": 1.3998754622468084, "learning_rate": 1.6044564425309555e-05, "loss": 0.5624, "step": 4314 }, { "epoch": 0.53016341073842, "grad_norm": 1.2428258220410766, "learning_rate": 1.6038146651666106e-05, "loss": 0.621, "step": 4315 }, { "epoch": 0.5302862759552771, "grad_norm": 1.3155025574688937, "learning_rate": 1.603172868706231e-05, "loss": 0.5579, "step": 4316 }, { "epoch": 0.5304091411721342, "grad_norm": 1.2085466825335522, "learning_rate": 1.6025310532678713e-05, "loss": 0.5657, "step": 4317 }, { "epoch": 0.5305320063889912, "grad_norm": 1.2657685552319962, "learning_rate": 1.6018892189695893e-05, "loss": 0.4639, "step": 4318 }, { "epoch": 0.5306548716058483, "grad_norm": 1.2467808105787705, "learning_rate": 1.6012473659294463e-05, "loss": 0.6422, "step": 4319 }, { "epoch": 0.5307777368227055, "grad_norm": 1.2249434080568264, "learning_rate": 1.6006054942655073e-05, "loss": 0.6772, "step": 4320 }, { "epoch": 0.5309006020395626, "grad_norm": 1.196509489029234, "learning_rate": 1.5999636040958394e-05, "loss": 0.609, "step": 4321 }, { "epoch": 0.5310234672564197, "grad_norm": 1.161090363154261, "learning_rate": 1.5993216955385153e-05, "loss": 0.5201, "step": 4322 }, { "epoch": 0.5311463324732768, "grad_norm": 1.3718375753627412, "learning_rate": 1.598679768711609e-05, "loss": 0.6162, "step": 4323 }, { "epoch": 0.5312691976901339, "grad_norm": 1.2166884511491731, "learning_rate": 1.5980378237331995e-05, "loss": 0.6327, "step": 4324 }, { "epoch": 0.531392062906991, "grad_norm": 1.2884525282198391, "learning_rate": 1.597395860721368e-05, "loss": 0.5387, "step": 4325 }, { "epoch": 0.5315149281238482, "grad_norm": 1.1255720210846591, "learning_rate": 1.5967538797941997e-05, "loss": 0.6304, "step": 4326 }, { "epoch": 0.5316377933407053, "grad_norm": 1.1088334468965502, "learning_rate": 1.5961118810697824e-05, "loss": 0.6072, "step": 4327 }, { "epoch": 0.5317606585575624, "grad_norm": 1.2610127562711524, "learning_rate": 1.5954698646662085e-05, "loss": 0.6328, "step": 4328 }, { "epoch": 0.5318835237744195, "grad_norm": 1.1464982787798386, "learning_rate": 1.5948278307015715e-05, "loss": 0.557, "step": 4329 }, { "epoch": 0.5320063889912766, "grad_norm": 1.394239203512859, "learning_rate": 1.5941857792939702e-05, "loss": 0.5645, "step": 4330 }, { "epoch": 0.5321292542081337, "grad_norm": 1.5555437794321274, "learning_rate": 1.593543710561506e-05, "loss": 0.5723, "step": 4331 }, { "epoch": 0.5322521194249907, "grad_norm": 1.2095400604865274, "learning_rate": 1.592901624622282e-05, "loss": 0.5765, "step": 4332 }, { "epoch": 0.5323749846418478, "grad_norm": 1.259032413497801, "learning_rate": 1.5922595215944072e-05, "loss": 0.6059, "step": 4333 }, { "epoch": 0.532497849858705, "grad_norm": 1.137659986259396, "learning_rate": 1.591617401595992e-05, "loss": 0.6136, "step": 4334 }, { "epoch": 0.5326207150755621, "grad_norm": 1.3613924049481243, "learning_rate": 1.5909752647451494e-05, "loss": 0.5951, "step": 4335 }, { "epoch": 0.5327435802924192, "grad_norm": 1.4451758947436337, "learning_rate": 1.590333111159997e-05, "loss": 0.5608, "step": 4336 }, { "epoch": 0.5328664455092763, "grad_norm": 1.1953242239830473, "learning_rate": 1.589690940958655e-05, "loss": 0.5513, "step": 4337 }, { "epoch": 0.5329893107261334, "grad_norm": 0.99944216292318, "learning_rate": 1.5890487542592458e-05, "loss": 0.4824, "step": 4338 }, { "epoch": 0.5331121759429905, "grad_norm": 1.1152496714432332, "learning_rate": 1.5884065511798957e-05, "loss": 0.6199, "step": 4339 }, { "epoch": 0.5332350411598477, "grad_norm": 1.364669105914537, "learning_rate": 1.5877643318387338e-05, "loss": 0.6496, "step": 4340 }, { "epoch": 0.5333579063767048, "grad_norm": 1.1414381348561864, "learning_rate": 1.5871220963538927e-05, "loss": 0.6158, "step": 4341 }, { "epoch": 0.5334807715935619, "grad_norm": 1.205476098846901, "learning_rate": 1.5864798448435064e-05, "loss": 0.6449, "step": 4342 }, { "epoch": 0.533603636810419, "grad_norm": 1.1515790441114955, "learning_rate": 1.5858375774257136e-05, "loss": 0.6841, "step": 4343 }, { "epoch": 0.5337265020272761, "grad_norm": 1.1716905336103194, "learning_rate": 1.585195294218655e-05, "loss": 0.6434, "step": 4344 }, { "epoch": 0.5338493672441332, "grad_norm": 1.1931006330780958, "learning_rate": 1.584552995340475e-05, "loss": 0.6173, "step": 4345 }, { "epoch": 0.5339722324609903, "grad_norm": 1.373425514395551, "learning_rate": 1.58391068090932e-05, "loss": 0.6638, "step": 4346 }, { "epoch": 0.5340950976778474, "grad_norm": 1.3432509199282612, "learning_rate": 1.5832683510433393e-05, "loss": 0.5642, "step": 4347 }, { "epoch": 0.5342179628947045, "grad_norm": 1.3532324317366726, "learning_rate": 1.582626005860685e-05, "loss": 0.54, "step": 4348 }, { "epoch": 0.5343408281115616, "grad_norm": 1.0839589523668423, "learning_rate": 1.581983645479513e-05, "loss": 0.5708, "step": 4349 }, { "epoch": 0.5344636933284187, "grad_norm": 1.3819817353416641, "learning_rate": 1.581341270017981e-05, "loss": 0.5615, "step": 4350 }, { "epoch": 0.5345865585452758, "grad_norm": 1.2314795583657772, "learning_rate": 1.5806988795942495e-05, "loss": 0.6574, "step": 4351 }, { "epoch": 0.5347094237621329, "grad_norm": 1.1931726245829304, "learning_rate": 1.580056474326483e-05, "loss": 0.5724, "step": 4352 }, { "epoch": 0.53483228897899, "grad_norm": 1.2948324571557073, "learning_rate": 1.5794140543328472e-05, "loss": 0.5544, "step": 4353 }, { "epoch": 0.5349551541958472, "grad_norm": 1.3274639917989848, "learning_rate": 1.5787716197315107e-05, "loss": 0.5947, "step": 4354 }, { "epoch": 0.5350780194127043, "grad_norm": 1.2310755888579925, "learning_rate": 1.578129170640646e-05, "loss": 0.6114, "step": 4355 }, { "epoch": 0.5352008846295614, "grad_norm": 1.1032310024156444, "learning_rate": 1.5774867071784274e-05, "loss": 0.6043, "step": 4356 }, { "epoch": 0.5353237498464185, "grad_norm": 1.2465317001150256, "learning_rate": 1.5768442294630312e-05, "loss": 0.5047, "step": 4357 }, { "epoch": 0.5354466150632756, "grad_norm": 1.1824330344197964, "learning_rate": 1.5762017376126372e-05, "loss": 0.6233, "step": 4358 }, { "epoch": 0.5355694802801327, "grad_norm": 1.2558565493164255, "learning_rate": 1.5755592317454278e-05, "loss": 0.5619, "step": 4359 }, { "epoch": 0.5356923454969899, "grad_norm": 1.230563680110278, "learning_rate": 1.5749167119795878e-05, "loss": 0.5895, "step": 4360 }, { "epoch": 0.5358152107138469, "grad_norm": 1.6454217636588497, "learning_rate": 1.574274178433304e-05, "loss": 0.6129, "step": 4361 }, { "epoch": 0.535938075930704, "grad_norm": 1.3735871897081506, "learning_rate": 1.5736316312247675e-05, "loss": 0.6368, "step": 4362 }, { "epoch": 0.5360609411475611, "grad_norm": 1.1150419574792605, "learning_rate": 1.5729890704721698e-05, "loss": 0.5402, "step": 4363 }, { "epoch": 0.5361838063644182, "grad_norm": 1.3591394221412354, "learning_rate": 1.572346496293706e-05, "loss": 0.5478, "step": 4364 }, { "epoch": 0.5363066715812753, "grad_norm": 1.1773946048451065, "learning_rate": 1.5717039088075728e-05, "loss": 0.5578, "step": 4365 }, { "epoch": 0.5364295367981324, "grad_norm": 1.7098353969847262, "learning_rate": 1.5710613081319714e-05, "loss": 0.6479, "step": 4366 }, { "epoch": 0.5365524020149895, "grad_norm": 1.3012360290408098, "learning_rate": 1.5704186943851025e-05, "loss": 0.6386, "step": 4367 }, { "epoch": 0.5366752672318467, "grad_norm": 1.2170361840451445, "learning_rate": 1.5697760676851717e-05, "loss": 0.5465, "step": 4368 }, { "epoch": 0.5367981324487038, "grad_norm": 1.31871056996393, "learning_rate": 1.5691334281503858e-05, "loss": 0.578, "step": 4369 }, { "epoch": 0.5369209976655609, "grad_norm": 1.360153677834514, "learning_rate": 1.5684907758989543e-05, "loss": 0.5408, "step": 4370 }, { "epoch": 0.537043862882418, "grad_norm": 1.3243282796840476, "learning_rate": 1.567848111049088e-05, "loss": 0.5268, "step": 4371 }, { "epoch": 0.5371667280992751, "grad_norm": 1.2515125885865221, "learning_rate": 1.5672054337190026e-05, "loss": 0.6206, "step": 4372 }, { "epoch": 0.5372895933161322, "grad_norm": 1.314035487074974, "learning_rate": 1.5665627440269134e-05, "loss": 0.5428, "step": 4373 }, { "epoch": 0.5374124585329894, "grad_norm": 1.2094874281039014, "learning_rate": 1.565920042091039e-05, "loss": 0.5459, "step": 4374 }, { "epoch": 0.5375353237498465, "grad_norm": 1.245842235251364, "learning_rate": 1.5652773280296002e-05, "loss": 0.5478, "step": 4375 }, { "epoch": 0.5376581889667035, "grad_norm": 1.512852440001919, "learning_rate": 1.5646346019608205e-05, "loss": 0.587, "step": 4376 }, { "epoch": 0.5377810541835606, "grad_norm": 1.130211641181014, "learning_rate": 1.5639918640029247e-05, "loss": 0.6153, "step": 4377 }, { "epoch": 0.5379039194004177, "grad_norm": 1.3430359473258147, "learning_rate": 1.5633491142741403e-05, "loss": 0.6509, "step": 4378 }, { "epoch": 0.5380267846172748, "grad_norm": 1.2978172385890951, "learning_rate": 1.5627063528926973e-05, "loss": 0.6726, "step": 4379 }, { "epoch": 0.5381496498341319, "grad_norm": 1.2246496647364546, "learning_rate": 1.562063579976828e-05, "loss": 0.5857, "step": 4380 }, { "epoch": 0.538272515050989, "grad_norm": 1.2106986189311912, "learning_rate": 1.561420795644765e-05, "loss": 0.6691, "step": 4381 }, { "epoch": 0.5383953802678462, "grad_norm": 1.5774516780781163, "learning_rate": 1.560778000014745e-05, "loss": 0.593, "step": 4382 }, { "epoch": 0.5385182454847033, "grad_norm": 1.1914284958071424, "learning_rate": 1.5601351932050063e-05, "loss": 0.5734, "step": 4383 }, { "epoch": 0.5386411107015604, "grad_norm": 1.3404874320057154, "learning_rate": 1.5594923753337884e-05, "loss": 0.6596, "step": 4384 }, { "epoch": 0.5387639759184175, "grad_norm": 1.2906440277751678, "learning_rate": 1.5588495465193345e-05, "loss": 0.637, "step": 4385 }, { "epoch": 0.5388868411352746, "grad_norm": 1.6627706207104231, "learning_rate": 1.5582067068798873e-05, "loss": 0.6444, "step": 4386 }, { "epoch": 0.5390097063521317, "grad_norm": 1.1652940561602751, "learning_rate": 1.557563856533695e-05, "loss": 0.5761, "step": 4387 }, { "epoch": 0.5391325715689889, "grad_norm": 1.1149310500983625, "learning_rate": 1.5569209955990036e-05, "loss": 0.6227, "step": 4388 }, { "epoch": 0.539255436785846, "grad_norm": 1.1684253809898575, "learning_rate": 1.5562781241940647e-05, "loss": 0.5255, "step": 4389 }, { "epoch": 0.539378302002703, "grad_norm": 1.1017776817192952, "learning_rate": 1.5556352424371294e-05, "loss": 0.6245, "step": 4390 }, { "epoch": 0.5395011672195601, "grad_norm": 1.2426521268298811, "learning_rate": 1.5549923504464527e-05, "loss": 0.6153, "step": 4391 }, { "epoch": 0.5396240324364172, "grad_norm": 0.9999074357185077, "learning_rate": 1.5543494483402894e-05, "loss": 0.5779, "step": 4392 }, { "epoch": 0.5397468976532743, "grad_norm": 1.2701069339566733, "learning_rate": 1.5537065362368977e-05, "loss": 0.6407, "step": 4393 }, { "epoch": 0.5398697628701314, "grad_norm": 1.5718431857727055, "learning_rate": 1.553063614254537e-05, "loss": 0.6242, "step": 4394 }, { "epoch": 0.5399926280869886, "grad_norm": 1.3440920024964829, "learning_rate": 1.5524206825114685e-05, "loss": 0.5724, "step": 4395 }, { "epoch": 0.5401154933038457, "grad_norm": 1.7663912998093592, "learning_rate": 1.551777741125955e-05, "loss": 0.5845, "step": 4396 }, { "epoch": 0.5402383585207028, "grad_norm": 1.399582253014718, "learning_rate": 1.5511347902162622e-05, "loss": 0.56, "step": 4397 }, { "epoch": 0.5403612237375599, "grad_norm": 1.4214701523303708, "learning_rate": 1.5504918299006564e-05, "loss": 0.6186, "step": 4398 }, { "epoch": 0.540484088954417, "grad_norm": 1.3954991674110218, "learning_rate": 1.549848860297406e-05, "loss": 0.546, "step": 4399 }, { "epoch": 0.5406069541712741, "grad_norm": 1.1911242466758956, "learning_rate": 1.5492058815247804e-05, "loss": 0.7685, "step": 4400 }, { "epoch": 0.5407298193881312, "grad_norm": 1.4780228560256317, "learning_rate": 1.548562893701053e-05, "loss": 0.6962, "step": 4401 }, { "epoch": 0.5408526846049884, "grad_norm": 1.4142146552519128, "learning_rate": 1.5479198969444956e-05, "loss": 0.6742, "step": 4402 }, { "epoch": 0.5409755498218455, "grad_norm": 1.3956439994195429, "learning_rate": 1.547276891373384e-05, "loss": 0.7088, "step": 4403 }, { "epoch": 0.5410984150387026, "grad_norm": 1.2921985467779908, "learning_rate": 1.546633877105995e-05, "loss": 0.5059, "step": 4404 }, { "epoch": 0.5412212802555596, "grad_norm": 1.1689434958723093, "learning_rate": 1.5459908542606066e-05, "loss": 0.5953, "step": 4405 }, { "epoch": 0.5413441454724167, "grad_norm": 1.3575765468504395, "learning_rate": 1.545347822955499e-05, "loss": 0.6937, "step": 4406 }, { "epoch": 0.5414670106892738, "grad_norm": 1.4207492356528686, "learning_rate": 1.544704783308953e-05, "loss": 0.6077, "step": 4407 }, { "epoch": 0.541589875906131, "grad_norm": 1.3673254576347835, "learning_rate": 1.5440617354392526e-05, "loss": 0.5699, "step": 4408 }, { "epoch": 0.5417127411229881, "grad_norm": 1.4061306435221461, "learning_rate": 1.5434186794646813e-05, "loss": 0.6409, "step": 4409 }, { "epoch": 0.5418356063398452, "grad_norm": 1.23927203093729, "learning_rate": 1.5427756155035257e-05, "loss": 0.562, "step": 4410 }, { "epoch": 0.5419584715567023, "grad_norm": 1.1083953025537885, "learning_rate": 1.5421325436740734e-05, "loss": 0.6036, "step": 4411 }, { "epoch": 0.5420813367735594, "grad_norm": 1.125628381479355, "learning_rate": 1.5414894640946122e-05, "loss": 0.6061, "step": 4412 }, { "epoch": 0.5422042019904165, "grad_norm": 1.2102061137452074, "learning_rate": 1.5408463768834336e-05, "loss": 0.6162, "step": 4413 }, { "epoch": 0.5423270672072736, "grad_norm": 1.0953672841397657, "learning_rate": 1.5402032821588288e-05, "loss": 0.5151, "step": 4414 }, { "epoch": 0.5424499324241308, "grad_norm": 1.170885039744495, "learning_rate": 1.5395601800390907e-05, "loss": 0.6533, "step": 4415 }, { "epoch": 0.5425727976409879, "grad_norm": 1.1843601345447476, "learning_rate": 1.5389170706425142e-05, "loss": 0.5613, "step": 4416 }, { "epoch": 0.542695662857845, "grad_norm": 0.9852256196135046, "learning_rate": 1.538273954087395e-05, "loss": 0.5814, "step": 4417 }, { "epoch": 0.5428185280747021, "grad_norm": 1.2138245101233558, "learning_rate": 1.5376308304920303e-05, "loss": 0.4784, "step": 4418 }, { "epoch": 0.5429413932915591, "grad_norm": 1.2675575703041655, "learning_rate": 1.536987699974718e-05, "loss": 0.5928, "step": 4419 }, { "epoch": 0.5430642585084162, "grad_norm": 1.4904067597884545, "learning_rate": 1.536344562653759e-05, "loss": 0.5469, "step": 4420 }, { "epoch": 0.5431871237252733, "grad_norm": 1.2706073890078255, "learning_rate": 1.5357014186474527e-05, "loss": 0.4528, "step": 4421 }, { "epoch": 0.5433099889421305, "grad_norm": 1.3660182021668983, "learning_rate": 1.5350582680741022e-05, "loss": 0.6297, "step": 4422 }, { "epoch": 0.5434328541589876, "grad_norm": 1.3060323564396317, "learning_rate": 1.5344151110520104e-05, "loss": 0.6393, "step": 4423 }, { "epoch": 0.5435557193758447, "grad_norm": 1.4459814644388398, "learning_rate": 1.533771947699482e-05, "loss": 0.6569, "step": 4424 }, { "epoch": 0.5436785845927018, "grad_norm": 1.7803595888923265, "learning_rate": 1.5331287781348234e-05, "loss": 0.5763, "step": 4425 }, { "epoch": 0.5438014498095589, "grad_norm": 1.0900153088744446, "learning_rate": 1.53248560247634e-05, "loss": 0.6526, "step": 4426 }, { "epoch": 0.543924315026416, "grad_norm": 1.3829222922096918, "learning_rate": 1.5318424208423415e-05, "loss": 0.5478, "step": 4427 }, { "epoch": 0.5440471802432731, "grad_norm": 1.1468478361051782, "learning_rate": 1.531199233351136e-05, "loss": 0.682, "step": 4428 }, { "epoch": 0.5441700454601303, "grad_norm": 1.215359473410985, "learning_rate": 1.5305560401210337e-05, "loss": 0.5529, "step": 4429 }, { "epoch": 0.5442929106769874, "grad_norm": 1.14155507189865, "learning_rate": 1.5299128412703465e-05, "loss": 0.4978, "step": 4430 }, { "epoch": 0.5444157758938445, "grad_norm": 1.206657659405935, "learning_rate": 1.5292696369173858e-05, "loss": 0.716, "step": 4431 }, { "epoch": 0.5445386411107016, "grad_norm": 1.2537533077007825, "learning_rate": 1.5286264271804648e-05, "loss": 0.5981, "step": 4432 }, { "epoch": 0.5446615063275587, "grad_norm": 1.2922995084052475, "learning_rate": 1.5279832121778987e-05, "loss": 0.573, "step": 4433 }, { "epoch": 0.5447843715444157, "grad_norm": 1.2324543411221631, "learning_rate": 1.527339992028002e-05, "loss": 0.6387, "step": 4434 }, { "epoch": 0.5449072367612728, "grad_norm": 1.1123539861488962, "learning_rate": 1.5266967668490912e-05, "loss": 0.5635, "step": 4435 }, { "epoch": 0.54503010197813, "grad_norm": 1.4280701949006696, "learning_rate": 1.526053536759483e-05, "loss": 0.6037, "step": 4436 }, { "epoch": 0.5451529671949871, "grad_norm": 1.3604831566128586, "learning_rate": 1.525410301877496e-05, "loss": 0.5584, "step": 4437 }, { "epoch": 0.5452758324118442, "grad_norm": 1.1648713619844802, "learning_rate": 1.5247670623214484e-05, "loss": 0.5319, "step": 4438 }, { "epoch": 0.5453986976287013, "grad_norm": 1.175405421343064, "learning_rate": 1.5241238182096606e-05, "loss": 0.4967, "step": 4439 }, { "epoch": 0.5455215628455584, "grad_norm": 1.1581257186237395, "learning_rate": 1.5234805696604531e-05, "loss": 0.6363, "step": 4440 }, { "epoch": 0.5456444280624155, "grad_norm": 1.5628755360021496, "learning_rate": 1.5228373167921469e-05, "loss": 0.5712, "step": 4441 }, { "epoch": 0.5457672932792726, "grad_norm": 1.517255865804262, "learning_rate": 1.5221940597230639e-05, "loss": 0.7081, "step": 4442 }, { "epoch": 0.5458901584961298, "grad_norm": 1.1065633894569462, "learning_rate": 1.5215507985715283e-05, "loss": 0.5971, "step": 4443 }, { "epoch": 0.5460130237129869, "grad_norm": 1.2370471596993637, "learning_rate": 1.5209075334558625e-05, "loss": 0.5096, "step": 4444 }, { "epoch": 0.546135888929844, "grad_norm": 1.2961573522054295, "learning_rate": 1.5202642644943914e-05, "loss": 0.5963, "step": 4445 }, { "epoch": 0.5462587541467011, "grad_norm": 1.1793622871558542, "learning_rate": 1.5196209918054408e-05, "loss": 0.6929, "step": 4446 }, { "epoch": 0.5463816193635582, "grad_norm": 1.1690105854183774, "learning_rate": 1.5189777155073354e-05, "loss": 0.5404, "step": 4447 }, { "epoch": 0.5465044845804153, "grad_norm": 1.2119655850553628, "learning_rate": 1.5183344357184032e-05, "loss": 0.6045, "step": 4448 }, { "epoch": 0.5466273497972723, "grad_norm": 1.3773291861084984, "learning_rate": 1.5176911525569699e-05, "loss": 0.6, "step": 4449 }, { "epoch": 0.5467502150141295, "grad_norm": 1.162922522027371, "learning_rate": 1.517047866141364e-05, "loss": 0.5384, "step": 4450 }, { "epoch": 0.5468730802309866, "grad_norm": 1.183119167612791, "learning_rate": 1.5164045765899133e-05, "loss": 0.5471, "step": 4451 }, { "epoch": 0.5469959454478437, "grad_norm": 1.1165821533359752, "learning_rate": 1.5157612840209477e-05, "loss": 0.5699, "step": 4452 }, { "epoch": 0.5471188106647008, "grad_norm": 1.3353573976513766, "learning_rate": 1.5151179885527954e-05, "loss": 0.6006, "step": 4453 }, { "epoch": 0.5472416758815579, "grad_norm": 1.0527357481281037, "learning_rate": 1.5144746903037876e-05, "loss": 0.5808, "step": 4454 }, { "epoch": 0.547364541098415, "grad_norm": 1.14211966998034, "learning_rate": 1.5138313893922542e-05, "loss": 0.6287, "step": 4455 }, { "epoch": 0.5474874063152722, "grad_norm": 1.194663339750787, "learning_rate": 1.5131880859365268e-05, "loss": 0.6798, "step": 4456 }, { "epoch": 0.5476102715321293, "grad_norm": 1.3511152213859463, "learning_rate": 1.5125447800549357e-05, "loss": 0.589, "step": 4457 }, { "epoch": 0.5477331367489864, "grad_norm": 1.1438649063757471, "learning_rate": 1.5119014718658147e-05, "loss": 0.5705, "step": 4458 }, { "epoch": 0.5478560019658435, "grad_norm": 1.1416498969775781, "learning_rate": 1.5112581614874946e-05, "loss": 0.5874, "step": 4459 }, { "epoch": 0.5479788671827006, "grad_norm": 1.2984677487773286, "learning_rate": 1.5106148490383091e-05, "loss": 0.6716, "step": 4460 }, { "epoch": 0.5481017323995577, "grad_norm": 1.428721142160359, "learning_rate": 1.5099715346365902e-05, "loss": 0.7229, "step": 4461 }, { "epoch": 0.5482245976164148, "grad_norm": 1.401384278623002, "learning_rate": 1.5093282184006728e-05, "loss": 0.6353, "step": 4462 }, { "epoch": 0.5483474628332718, "grad_norm": 1.2188452910503562, "learning_rate": 1.5086849004488897e-05, "loss": 0.6321, "step": 4463 }, { "epoch": 0.548470328050129, "grad_norm": 1.371623706752202, "learning_rate": 1.508041580899576e-05, "loss": 0.6799, "step": 4464 }, { "epoch": 0.5485931932669861, "grad_norm": 1.1664618542606606, "learning_rate": 1.507398259871065e-05, "loss": 0.7031, "step": 4465 }, { "epoch": 0.5487160584838432, "grad_norm": 1.1228984135439792, "learning_rate": 1.5067549374816924e-05, "loss": 0.7082, "step": 4466 }, { "epoch": 0.5488389237007003, "grad_norm": 1.3549127989268441, "learning_rate": 1.506111613849793e-05, "loss": 0.5885, "step": 4467 }, { "epoch": 0.5489617889175574, "grad_norm": 1.0935857124271429, "learning_rate": 1.5054682890937019e-05, "loss": 0.5847, "step": 4468 }, { "epoch": 0.5490846541344145, "grad_norm": 1.2498660503901702, "learning_rate": 1.5048249633317546e-05, "loss": 0.5746, "step": 4469 }, { "epoch": 0.5492075193512717, "grad_norm": 1.545086196716166, "learning_rate": 1.5041816366822859e-05, "loss": 0.5565, "step": 4470 }, { "epoch": 0.5493303845681288, "grad_norm": 1.4590704634778577, "learning_rate": 1.503538309263633e-05, "loss": 0.6359, "step": 4471 }, { "epoch": 0.5494532497849859, "grad_norm": 1.162559011426625, "learning_rate": 1.5028949811941304e-05, "loss": 0.6319, "step": 4472 }, { "epoch": 0.549576115001843, "grad_norm": 1.1757854505504235, "learning_rate": 1.5022516525921152e-05, "loss": 0.5699, "step": 4473 }, { "epoch": 0.5496989802187001, "grad_norm": 1.232704257692557, "learning_rate": 1.5016083235759227e-05, "loss": 0.6501, "step": 4474 }, { "epoch": 0.5498218454355572, "grad_norm": 1.1945508405090481, "learning_rate": 1.5009649942638901e-05, "loss": 0.595, "step": 4475 }, { "epoch": 0.5499447106524143, "grad_norm": 1.4177657056089086, "learning_rate": 1.5003216647743528e-05, "loss": 0.5617, "step": 4476 }, { "epoch": 0.5500675758692715, "grad_norm": 1.0908269820597043, "learning_rate": 1.4996783352256473e-05, "loss": 0.547, "step": 4477 }, { "epoch": 0.5501904410861285, "grad_norm": 1.2935883696934694, "learning_rate": 1.4990350057361101e-05, "loss": 0.5732, "step": 4478 }, { "epoch": 0.5503133063029856, "grad_norm": 1.325777519539606, "learning_rate": 1.4983916764240773e-05, "loss": 0.6185, "step": 4479 }, { "epoch": 0.5504361715198427, "grad_norm": 1.5744817184129773, "learning_rate": 1.4977483474078852e-05, "loss": 0.5838, "step": 4480 }, { "epoch": 0.5505590367366998, "grad_norm": 1.1944680118221014, "learning_rate": 1.4971050188058697e-05, "loss": 0.5889, "step": 4481 }, { "epoch": 0.5506819019535569, "grad_norm": 1.3516922445194715, "learning_rate": 1.4964616907363675e-05, "loss": 0.5654, "step": 4482 }, { "epoch": 0.550804767170414, "grad_norm": 1.286874988876658, "learning_rate": 1.4958183633177142e-05, "loss": 0.5751, "step": 4483 }, { "epoch": 0.5509276323872712, "grad_norm": 1.26623507775261, "learning_rate": 1.4951750366682462e-05, "loss": 0.5395, "step": 4484 }, { "epoch": 0.5510504976041283, "grad_norm": 1.3045348779048043, "learning_rate": 1.4945317109062985e-05, "loss": 0.5404, "step": 4485 }, { "epoch": 0.5511733628209854, "grad_norm": 1.1814815414830238, "learning_rate": 1.4938883861502073e-05, "loss": 0.6088, "step": 4486 }, { "epoch": 0.5512962280378425, "grad_norm": 1.4403489615965988, "learning_rate": 1.493245062518308e-05, "loss": 0.7054, "step": 4487 }, { "epoch": 0.5514190932546996, "grad_norm": 1.2478655198219715, "learning_rate": 1.4926017401289349e-05, "loss": 0.6761, "step": 4488 }, { "epoch": 0.5515419584715567, "grad_norm": 1.119638596721304, "learning_rate": 1.4919584191004244e-05, "loss": 0.6522, "step": 4489 }, { "epoch": 0.5516648236884139, "grad_norm": 1.268366322793554, "learning_rate": 1.4913150995511104e-05, "loss": 0.6541, "step": 4490 }, { "epoch": 0.551787688905271, "grad_norm": 1.0876933348766653, "learning_rate": 1.4906717815993278e-05, "loss": 0.6508, "step": 4491 }, { "epoch": 0.551910554122128, "grad_norm": 1.1923919935040954, "learning_rate": 1.4900284653634095e-05, "loss": 0.629, "step": 4492 }, { "epoch": 0.5520334193389851, "grad_norm": 1.2021297162767905, "learning_rate": 1.4893851509616913e-05, "loss": 0.598, "step": 4493 }, { "epoch": 0.5521562845558422, "grad_norm": 1.3208219585946581, "learning_rate": 1.4887418385125056e-05, "loss": 0.6449, "step": 4494 }, { "epoch": 0.5522791497726993, "grad_norm": 1.174668382137461, "learning_rate": 1.4880985281341855e-05, "loss": 0.6805, "step": 4495 }, { "epoch": 0.5524020149895564, "grad_norm": 1.265904037801625, "learning_rate": 1.487455219945064e-05, "loss": 0.5479, "step": 4496 }, { "epoch": 0.5525248802064135, "grad_norm": 1.2566706155081635, "learning_rate": 1.4868119140634736e-05, "loss": 0.6049, "step": 4497 }, { "epoch": 0.5526477454232707, "grad_norm": 1.0764738805760177, "learning_rate": 1.4861686106077462e-05, "loss": 0.5802, "step": 4498 }, { "epoch": 0.5527706106401278, "grad_norm": 1.5083802842500211, "learning_rate": 1.485525309696213e-05, "loss": 0.6465, "step": 4499 }, { "epoch": 0.5528934758569849, "grad_norm": 1.5891397820528708, "learning_rate": 1.4848820114472045e-05, "loss": 0.6226, "step": 4500 }, { "epoch": 0.553016341073842, "grad_norm": 1.6451756666838886, "learning_rate": 1.4842387159790527e-05, "loss": 0.7007, "step": 4501 }, { "epoch": 0.5531392062906991, "grad_norm": 1.2864200030059876, "learning_rate": 1.483595423410087e-05, "loss": 0.5832, "step": 4502 }, { "epoch": 0.5532620715075562, "grad_norm": 1.3253782991107685, "learning_rate": 1.4829521338586367e-05, "loss": 0.5811, "step": 4503 }, { "epoch": 0.5533849367244134, "grad_norm": 1.2203832090377358, "learning_rate": 1.4823088474430304e-05, "loss": 0.5657, "step": 4504 }, { "epoch": 0.5535078019412705, "grad_norm": 1.384674791685188, "learning_rate": 1.4816655642815972e-05, "loss": 0.6722, "step": 4505 }, { "epoch": 0.5536306671581276, "grad_norm": 1.2188641886672031, "learning_rate": 1.4810222844926647e-05, "loss": 0.5869, "step": 4506 }, { "epoch": 0.5537535323749846, "grad_norm": 1.2759091005276342, "learning_rate": 1.4803790081945597e-05, "loss": 0.6597, "step": 4507 }, { "epoch": 0.5538763975918417, "grad_norm": 1.1807328860881836, "learning_rate": 1.4797357355056085e-05, "loss": 0.5196, "step": 4508 }, { "epoch": 0.5539992628086988, "grad_norm": 1.4550087236675344, "learning_rate": 1.4790924665441379e-05, "loss": 0.7073, "step": 4509 }, { "epoch": 0.5541221280255559, "grad_norm": 1.20617484013702, "learning_rate": 1.4784492014284723e-05, "loss": 0.5589, "step": 4510 }, { "epoch": 0.554244993242413, "grad_norm": 1.1850610009161269, "learning_rate": 1.4778059402769358e-05, "loss": 0.6433, "step": 4511 }, { "epoch": 0.5543678584592702, "grad_norm": 1.3111248328173564, "learning_rate": 1.4771626832078534e-05, "loss": 0.5832, "step": 4512 }, { "epoch": 0.5544907236761273, "grad_norm": 1.232259243918243, "learning_rate": 1.4765194303395473e-05, "loss": 0.6259, "step": 4513 }, { "epoch": 0.5546135888929844, "grad_norm": 1.3562104534888184, "learning_rate": 1.4758761817903396e-05, "loss": 0.6329, "step": 4514 }, { "epoch": 0.5547364541098415, "grad_norm": 1.1149645080830672, "learning_rate": 1.4752329376785516e-05, "loss": 0.6298, "step": 4515 }, { "epoch": 0.5548593193266986, "grad_norm": 1.3777037703290833, "learning_rate": 1.4745896981225043e-05, "loss": 0.666, "step": 4516 }, { "epoch": 0.5549821845435557, "grad_norm": 1.2708174266762104, "learning_rate": 1.4739464632405173e-05, "loss": 0.5696, "step": 4517 }, { "epoch": 0.5551050497604129, "grad_norm": 1.2855325492775243, "learning_rate": 1.4733032331509094e-05, "loss": 0.5785, "step": 4518 }, { "epoch": 0.55522791497727, "grad_norm": 1.162759920514632, "learning_rate": 1.472660007971998e-05, "loss": 0.5917, "step": 4519 }, { "epoch": 0.5553507801941271, "grad_norm": 1.1289066739806524, "learning_rate": 1.4720167878221014e-05, "loss": 0.5899, "step": 4520 }, { "epoch": 0.5554736454109841, "grad_norm": 1.1573277873786398, "learning_rate": 1.4713735728195353e-05, "loss": 0.5714, "step": 4521 }, { "epoch": 0.5555965106278412, "grad_norm": 1.2739878873869095, "learning_rate": 1.4707303630826148e-05, "loss": 0.5278, "step": 4522 }, { "epoch": 0.5557193758446983, "grad_norm": 1.123043148241051, "learning_rate": 1.4700871587296539e-05, "loss": 0.7693, "step": 4523 }, { "epoch": 0.5558422410615554, "grad_norm": 1.1383851225677568, "learning_rate": 1.4694439598789664e-05, "loss": 0.5888, "step": 4524 }, { "epoch": 0.5559651062784126, "grad_norm": 1.4281677771852896, "learning_rate": 1.4688007666488645e-05, "loss": 0.6148, "step": 4525 }, { "epoch": 0.5560879714952697, "grad_norm": 1.2098656192638517, "learning_rate": 1.468157579157659e-05, "loss": 0.6231, "step": 4526 }, { "epoch": 0.5562108367121268, "grad_norm": 1.2356727598940476, "learning_rate": 1.4675143975236599e-05, "loss": 0.5427, "step": 4527 }, { "epoch": 0.5563337019289839, "grad_norm": 1.3057850100461088, "learning_rate": 1.4668712218651772e-05, "loss": 0.6102, "step": 4528 }, { "epoch": 0.556456567145841, "grad_norm": 1.364942198914135, "learning_rate": 1.4662280523005185e-05, "loss": 0.6551, "step": 4529 }, { "epoch": 0.5565794323626981, "grad_norm": 1.1478400103045578, "learning_rate": 1.4655848889479897e-05, "loss": 0.54, "step": 4530 }, { "epoch": 0.5567022975795552, "grad_norm": 1.155880125066274, "learning_rate": 1.4649417319258982e-05, "loss": 0.5119, "step": 4531 }, { "epoch": 0.5568251627964124, "grad_norm": 1.4265257834225555, "learning_rate": 1.4642985813525477e-05, "loss": 0.7146, "step": 4532 }, { "epoch": 0.5569480280132695, "grad_norm": 1.3358597868199606, "learning_rate": 1.4636554373462416e-05, "loss": 0.7056, "step": 4533 }, { "epoch": 0.5570708932301266, "grad_norm": 1.374944699290895, "learning_rate": 1.463012300025282e-05, "loss": 0.7309, "step": 4534 }, { "epoch": 0.5571937584469837, "grad_norm": 1.6185247202588084, "learning_rate": 1.4623691695079698e-05, "loss": 0.685, "step": 4535 }, { "epoch": 0.5573166236638407, "grad_norm": 1.3257000258769485, "learning_rate": 1.4617260459126053e-05, "loss": 0.7105, "step": 4536 }, { "epoch": 0.5574394888806978, "grad_norm": 1.0837295298939489, "learning_rate": 1.461082929357486e-05, "loss": 0.5391, "step": 4537 }, { "epoch": 0.557562354097555, "grad_norm": 1.1480056822113902, "learning_rate": 1.4604398199609092e-05, "loss": 0.5685, "step": 4538 }, { "epoch": 0.5576852193144121, "grad_norm": 1.1417248832046307, "learning_rate": 1.4597967178411715e-05, "loss": 0.6008, "step": 4539 }, { "epoch": 0.5578080845312692, "grad_norm": 1.2106735754832245, "learning_rate": 1.4591536231165668e-05, "loss": 0.6936, "step": 4540 }, { "epoch": 0.5579309497481263, "grad_norm": 1.272929733840975, "learning_rate": 1.4585105359053882e-05, "loss": 0.5872, "step": 4541 }, { "epoch": 0.5580538149649834, "grad_norm": 0.9631214973392644, "learning_rate": 1.4578674563259272e-05, "loss": 0.6436, "step": 4542 }, { "epoch": 0.5581766801818405, "grad_norm": 1.2266359810138767, "learning_rate": 1.4572243844964745e-05, "loss": 0.6264, "step": 4543 }, { "epoch": 0.5582995453986976, "grad_norm": 1.275499027441949, "learning_rate": 1.4565813205353191e-05, "loss": 0.6734, "step": 4544 }, { "epoch": 0.5584224106155548, "grad_norm": 0.9970198864079293, "learning_rate": 1.455938264560748e-05, "loss": 0.6064, "step": 4545 }, { "epoch": 0.5585452758324119, "grad_norm": 1.5746069220121484, "learning_rate": 1.455295216691047e-05, "loss": 0.6782, "step": 4546 }, { "epoch": 0.558668141049269, "grad_norm": 1.3137406249270493, "learning_rate": 1.4546521770445014e-05, "loss": 0.5421, "step": 4547 }, { "epoch": 0.5587910062661261, "grad_norm": 1.4233865667752914, "learning_rate": 1.4540091457393938e-05, "loss": 0.5484, "step": 4548 }, { "epoch": 0.5589138714829832, "grad_norm": 1.2965086226494713, "learning_rate": 1.4533661228940056e-05, "loss": 0.5922, "step": 4549 }, { "epoch": 0.5590367366998402, "grad_norm": 1.337520201899617, "learning_rate": 1.452723108626616e-05, "loss": 0.5756, "step": 4550 }, { "epoch": 0.5591596019166973, "grad_norm": 1.1384019680587942, "learning_rate": 1.4520801030555044e-05, "loss": 0.6657, "step": 4551 }, { "epoch": 0.5592824671335545, "grad_norm": 1.2969664976338569, "learning_rate": 1.4514371062989473e-05, "loss": 0.6495, "step": 4552 }, { "epoch": 0.5594053323504116, "grad_norm": 1.2762865956281078, "learning_rate": 1.4507941184752195e-05, "loss": 0.6123, "step": 4553 }, { "epoch": 0.5595281975672687, "grad_norm": 1.2175136283511327, "learning_rate": 1.4501511397025943e-05, "loss": 0.5882, "step": 4554 }, { "epoch": 0.5596510627841258, "grad_norm": 1.2609843182602287, "learning_rate": 1.449508170099344e-05, "loss": 0.4765, "step": 4555 }, { "epoch": 0.5597739280009829, "grad_norm": 1.3208467243896405, "learning_rate": 1.4488652097837384e-05, "loss": 0.6253, "step": 4556 }, { "epoch": 0.55989679321784, "grad_norm": 1.2000709539019305, "learning_rate": 1.4482222588740448e-05, "loss": 0.6746, "step": 4557 }, { "epoch": 0.5600196584346971, "grad_norm": 1.3349090887767852, "learning_rate": 1.447579317488532e-05, "loss": 0.605, "step": 4558 }, { "epoch": 0.5601425236515543, "grad_norm": 1.4191723639137879, "learning_rate": 1.4469363857454635e-05, "loss": 0.6902, "step": 4559 }, { "epoch": 0.5602653888684114, "grad_norm": 1.2947562089516875, "learning_rate": 1.4462934637631027e-05, "loss": 0.584, "step": 4560 }, { "epoch": 0.5603882540852685, "grad_norm": 1.3725611456393727, "learning_rate": 1.4456505516597107e-05, "loss": 0.5444, "step": 4561 }, { "epoch": 0.5605111193021256, "grad_norm": 1.2165606450914632, "learning_rate": 1.4450076495535477e-05, "loss": 0.6111, "step": 4562 }, { "epoch": 0.5606339845189827, "grad_norm": 1.173057682450614, "learning_rate": 1.4443647575628707e-05, "loss": 0.5474, "step": 4563 }, { "epoch": 0.5607568497358398, "grad_norm": 1.7025442401552386, "learning_rate": 1.443721875805936e-05, "loss": 0.5414, "step": 4564 }, { "epoch": 0.5608797149526968, "grad_norm": 1.1854205065105832, "learning_rate": 1.4430790044009965e-05, "loss": 0.6657, "step": 4565 }, { "epoch": 0.561002580169554, "grad_norm": 1.2488439286655864, "learning_rate": 1.4424361434663057e-05, "loss": 0.5772, "step": 4566 }, { "epoch": 0.5611254453864111, "grad_norm": 1.3154482485603118, "learning_rate": 1.4417932931201126e-05, "loss": 0.6389, "step": 4567 }, { "epoch": 0.5612483106032682, "grad_norm": 1.5692632570005356, "learning_rate": 1.4411504534806662e-05, "loss": 0.5968, "step": 4568 }, { "epoch": 0.5613711758201253, "grad_norm": 1.5573175195292288, "learning_rate": 1.4405076246662113e-05, "loss": 0.6872, "step": 4569 }, { "epoch": 0.5614940410369824, "grad_norm": 1.2836386837143907, "learning_rate": 1.439864806794994e-05, "loss": 0.6585, "step": 4570 }, { "epoch": 0.5616169062538395, "grad_norm": 1.3688507275269965, "learning_rate": 1.4392219999852552e-05, "loss": 0.4901, "step": 4571 }, { "epoch": 0.5617397714706966, "grad_norm": 1.19968766618538, "learning_rate": 1.4385792043552354e-05, "loss": 0.5083, "step": 4572 }, { "epoch": 0.5618626366875538, "grad_norm": 1.1202824634002855, "learning_rate": 1.4379364200231724e-05, "loss": 0.6051, "step": 4573 }, { "epoch": 0.5619855019044109, "grad_norm": 1.40346823691081, "learning_rate": 1.4372936471073028e-05, "loss": 0.5536, "step": 4574 }, { "epoch": 0.562108367121268, "grad_norm": 1.1171922075885992, "learning_rate": 1.43665088572586e-05, "loss": 0.6133, "step": 4575 }, { "epoch": 0.5622312323381251, "grad_norm": 1.289845147256014, "learning_rate": 1.4360081359970755e-05, "loss": 0.5273, "step": 4576 }, { "epoch": 0.5623540975549822, "grad_norm": 1.1829108444469658, "learning_rate": 1.4353653980391799e-05, "loss": 0.5498, "step": 4577 }, { "epoch": 0.5624769627718393, "grad_norm": 1.219701943325564, "learning_rate": 1.4347226719704e-05, "loss": 0.5624, "step": 4578 }, { "epoch": 0.5625998279886965, "grad_norm": 1.6466855801721787, "learning_rate": 1.4340799579089615e-05, "loss": 0.6446, "step": 4579 }, { "epoch": 0.5627226932055535, "grad_norm": 1.2752422903096396, "learning_rate": 1.4334372559730867e-05, "loss": 0.577, "step": 4580 }, { "epoch": 0.5628455584224106, "grad_norm": 1.2948668744389487, "learning_rate": 1.4327945662809975e-05, "loss": 0.5539, "step": 4581 }, { "epoch": 0.5629684236392677, "grad_norm": 1.220509798863145, "learning_rate": 1.4321518889509118e-05, "loss": 0.6998, "step": 4582 }, { "epoch": 0.5630912888561248, "grad_norm": 1.3211875168554499, "learning_rate": 1.4315092241010465e-05, "loss": 0.5931, "step": 4583 }, { "epoch": 0.5632141540729819, "grad_norm": 1.2020959526567563, "learning_rate": 1.4308665718496143e-05, "loss": 0.6616, "step": 4584 }, { "epoch": 0.563337019289839, "grad_norm": 1.1419602095310597, "learning_rate": 1.4302239323148284e-05, "loss": 0.4885, "step": 4585 }, { "epoch": 0.5634598845066962, "grad_norm": 1.150258580649529, "learning_rate": 1.4295813056148979e-05, "loss": 0.4698, "step": 4586 }, { "epoch": 0.5635827497235533, "grad_norm": 1.442118918339612, "learning_rate": 1.4289386918680294e-05, "loss": 0.7413, "step": 4587 }, { "epoch": 0.5637056149404104, "grad_norm": 1.1024912245550025, "learning_rate": 1.428296091192427e-05, "loss": 0.5782, "step": 4588 }, { "epoch": 0.5638284801572675, "grad_norm": 1.360787001640595, "learning_rate": 1.4276535037062943e-05, "loss": 0.6903, "step": 4589 }, { "epoch": 0.5639513453741246, "grad_norm": 1.2409838415233667, "learning_rate": 1.4270109295278305e-05, "loss": 0.5944, "step": 4590 }, { "epoch": 0.5640742105909817, "grad_norm": 1.2644270449134676, "learning_rate": 1.4263683687752329e-05, "loss": 0.6029, "step": 4591 }, { "epoch": 0.5641970758078388, "grad_norm": 1.1922045277870976, "learning_rate": 1.4257258215666957e-05, "loss": 0.711, "step": 4592 }, { "epoch": 0.564319941024696, "grad_norm": 1.2544398943731, "learning_rate": 1.4250832880204126e-05, "loss": 0.7277, "step": 4593 }, { "epoch": 0.564442806241553, "grad_norm": 1.3220740885583142, "learning_rate": 1.4244407682545728e-05, "loss": 0.6256, "step": 4594 }, { "epoch": 0.5645656714584101, "grad_norm": 1.5112286502961825, "learning_rate": 1.4237982623873629e-05, "loss": 0.7181, "step": 4595 }, { "epoch": 0.5646885366752672, "grad_norm": 1.1297177874099888, "learning_rate": 1.4231557705369689e-05, "loss": 0.678, "step": 4596 }, { "epoch": 0.5648114018921243, "grad_norm": 1.217289148495727, "learning_rate": 1.4225132928215729e-05, "loss": 0.6702, "step": 4597 }, { "epoch": 0.5649342671089814, "grad_norm": 1.468289391058185, "learning_rate": 1.4218708293593539e-05, "loss": 0.581, "step": 4598 }, { "epoch": 0.5650571323258385, "grad_norm": 1.109761940241006, "learning_rate": 1.421228380268489e-05, "loss": 0.5745, "step": 4599 }, { "epoch": 0.5651799975426957, "grad_norm": 0.988035017065615, "learning_rate": 1.420585945667153e-05, "loss": 0.5874, "step": 4600 }, { "epoch": 0.5653028627595528, "grad_norm": 1.3472977073672892, "learning_rate": 1.4199435256735172e-05, "loss": 0.5876, "step": 4601 }, { "epoch": 0.5654257279764099, "grad_norm": 1.3264984701934266, "learning_rate": 1.4193011204057507e-05, "loss": 0.5415, "step": 4602 }, { "epoch": 0.565548593193267, "grad_norm": 1.1268641081728386, "learning_rate": 1.4186587299820193e-05, "loss": 0.647, "step": 4603 }, { "epoch": 0.5656714584101241, "grad_norm": 1.1927765742830152, "learning_rate": 1.4180163545204875e-05, "loss": 0.6063, "step": 4604 }, { "epoch": 0.5657943236269812, "grad_norm": 1.2084156004917113, "learning_rate": 1.4173739941393156e-05, "loss": 0.6132, "step": 4605 }, { "epoch": 0.5659171888438383, "grad_norm": 1.1734994946985944, "learning_rate": 1.4167316489566617e-05, "loss": 0.542, "step": 4606 }, { "epoch": 0.5660400540606955, "grad_norm": 1.2683835398106218, "learning_rate": 1.4160893190906804e-05, "loss": 0.4758, "step": 4607 }, { "epoch": 0.5661629192775526, "grad_norm": 1.1110681824804856, "learning_rate": 1.4154470046595251e-05, "loss": 0.496, "step": 4608 }, { "epoch": 0.5662857844944096, "grad_norm": 1.242314687308722, "learning_rate": 1.414804705781345e-05, "loss": 0.6035, "step": 4609 }, { "epoch": 0.5664086497112667, "grad_norm": 1.194385152304613, "learning_rate": 1.4141624225742867e-05, "loss": 0.6421, "step": 4610 }, { "epoch": 0.5665315149281238, "grad_norm": 1.3112145086228058, "learning_rate": 1.4135201551564937e-05, "loss": 0.7328, "step": 4611 }, { "epoch": 0.5666543801449809, "grad_norm": 1.4662565351862022, "learning_rate": 1.4128779036461077e-05, "loss": 0.5955, "step": 4612 }, { "epoch": 0.566777245361838, "grad_norm": 1.272298035908665, "learning_rate": 1.4122356681612664e-05, "loss": 0.6296, "step": 4613 }, { "epoch": 0.5669001105786952, "grad_norm": 1.3363067122762489, "learning_rate": 1.4115934488201047e-05, "loss": 0.6874, "step": 4614 }, { "epoch": 0.5670229757955523, "grad_norm": 0.9626580778673175, "learning_rate": 1.4109512457407543e-05, "loss": 0.526, "step": 4615 }, { "epoch": 0.5671458410124094, "grad_norm": 1.1346194392700522, "learning_rate": 1.4103090590413452e-05, "loss": 0.512, "step": 4616 }, { "epoch": 0.5672687062292665, "grad_norm": 1.244233754594268, "learning_rate": 1.409666888840003e-05, "loss": 0.6733, "step": 4617 }, { "epoch": 0.5673915714461236, "grad_norm": 1.3256181257090929, "learning_rate": 1.4090247352548504e-05, "loss": 0.5006, "step": 4618 }, { "epoch": 0.5675144366629807, "grad_norm": 1.047957550009395, "learning_rate": 1.4083825984040083e-05, "loss": 0.5525, "step": 4619 }, { "epoch": 0.5676373018798379, "grad_norm": 1.238754108344294, "learning_rate": 1.407740478405593e-05, "loss": 0.5731, "step": 4620 }, { "epoch": 0.567760167096695, "grad_norm": 1.2432835982571118, "learning_rate": 1.4070983753777183e-05, "loss": 0.672, "step": 4621 }, { "epoch": 0.5678830323135521, "grad_norm": 1.3686640376188857, "learning_rate": 1.4064562894384944e-05, "loss": 0.7681, "step": 4622 }, { "epoch": 0.5680058975304091, "grad_norm": 1.107506593510886, "learning_rate": 1.40581422070603e-05, "loss": 0.6099, "step": 4623 }, { "epoch": 0.5681287627472662, "grad_norm": 1.090701703080941, "learning_rate": 1.4051721692984289e-05, "loss": 0.6478, "step": 4624 }, { "epoch": 0.5682516279641233, "grad_norm": 0.94938856747882, "learning_rate": 1.4045301353337922e-05, "loss": 0.5474, "step": 4625 }, { "epoch": 0.5683744931809804, "grad_norm": 1.22475836212397, "learning_rate": 1.4038881189302175e-05, "loss": 0.6494, "step": 4626 }, { "epoch": 0.5684973583978375, "grad_norm": 1.151417800091835, "learning_rate": 1.4032461202058009e-05, "loss": 0.6643, "step": 4627 }, { "epoch": 0.5686202236146947, "grad_norm": 1.230753789284278, "learning_rate": 1.4026041392786325e-05, "loss": 0.6121, "step": 4628 }, { "epoch": 0.5687430888315518, "grad_norm": 1.3666543057223202, "learning_rate": 1.4019621762668011e-05, "loss": 0.6018, "step": 4629 }, { "epoch": 0.5688659540484089, "grad_norm": 1.1921416588098885, "learning_rate": 1.4013202312883912e-05, "loss": 0.6543, "step": 4630 }, { "epoch": 0.568988819265266, "grad_norm": 1.0791112656788846, "learning_rate": 1.4006783044614853e-05, "loss": 0.6327, "step": 4631 }, { "epoch": 0.5691116844821231, "grad_norm": 1.437141862980313, "learning_rate": 1.400036395904161e-05, "loss": 0.558, "step": 4632 }, { "epoch": 0.5692345496989802, "grad_norm": 1.2936297136878028, "learning_rate": 1.3993945057344935e-05, "loss": 0.4617, "step": 4633 }, { "epoch": 0.5693574149158374, "grad_norm": 1.4283639367044074, "learning_rate": 1.3987526340705538e-05, "loss": 0.6019, "step": 4634 }, { "epoch": 0.5694802801326945, "grad_norm": 1.298815340359428, "learning_rate": 1.3981107810304106e-05, "loss": 0.5801, "step": 4635 }, { "epoch": 0.5696031453495516, "grad_norm": 1.2189758695914747, "learning_rate": 1.3974689467321289e-05, "loss": 0.6846, "step": 4636 }, { "epoch": 0.5697260105664087, "grad_norm": 1.1753239740774404, "learning_rate": 1.396827131293769e-05, "loss": 0.5301, "step": 4637 }, { "epoch": 0.5698488757832657, "grad_norm": 1.1573930973084545, "learning_rate": 1.3961853348333896e-05, "loss": 0.6064, "step": 4638 }, { "epoch": 0.5699717410001228, "grad_norm": 1.1370335477569917, "learning_rate": 1.3955435574690444e-05, "loss": 0.7141, "step": 4639 }, { "epoch": 0.5700946062169799, "grad_norm": 1.0669427106799774, "learning_rate": 1.3949017993187848e-05, "loss": 0.6909, "step": 4640 }, { "epoch": 0.570217471433837, "grad_norm": 1.7497966111211383, "learning_rate": 1.3942600605006565e-05, "loss": 0.7399, "step": 4641 }, { "epoch": 0.5703403366506942, "grad_norm": 1.5001268111813744, "learning_rate": 1.3936183411327054e-05, "loss": 0.7252, "step": 4642 }, { "epoch": 0.5704632018675513, "grad_norm": 1.124612606117686, "learning_rate": 1.3929766413329702e-05, "loss": 0.551, "step": 4643 }, { "epoch": 0.5705860670844084, "grad_norm": 1.1391824173781804, "learning_rate": 1.392334961219488e-05, "loss": 0.5046, "step": 4644 }, { "epoch": 0.5707089323012655, "grad_norm": 1.3003500781860189, "learning_rate": 1.391693300910291e-05, "loss": 0.5499, "step": 4645 }, { "epoch": 0.5708317975181226, "grad_norm": 1.2654142531604196, "learning_rate": 1.3910516605234091e-05, "loss": 0.5656, "step": 4646 }, { "epoch": 0.5709546627349797, "grad_norm": 1.3240350119884037, "learning_rate": 1.390410040176868e-05, "loss": 0.6028, "step": 4647 }, { "epoch": 0.5710775279518369, "grad_norm": 1.3059867700283034, "learning_rate": 1.3897684399886892e-05, "loss": 0.6807, "step": 4648 }, { "epoch": 0.571200393168694, "grad_norm": 1.1359036264797264, "learning_rate": 1.3891268600768902e-05, "loss": 0.6316, "step": 4649 }, { "epoch": 0.5713232583855511, "grad_norm": 1.0957942467403845, "learning_rate": 1.3884853005594869e-05, "loss": 0.5022, "step": 4650 }, { "epoch": 0.5714461236024082, "grad_norm": 1.0150443542639316, "learning_rate": 1.3878437615544896e-05, "loss": 0.5098, "step": 4651 }, { "epoch": 0.5715689888192652, "grad_norm": 1.0680992480990628, "learning_rate": 1.3872022431799047e-05, "loss": 0.6321, "step": 4652 }, { "epoch": 0.5716918540361223, "grad_norm": 1.1350814182422964, "learning_rate": 1.3865607455537352e-05, "loss": 0.5925, "step": 4653 }, { "epoch": 0.5718147192529794, "grad_norm": 1.2438834469977347, "learning_rate": 1.3859192687939813e-05, "loss": 0.6333, "step": 4654 }, { "epoch": 0.5719375844698366, "grad_norm": 1.1901743961483797, "learning_rate": 1.3852778130186384e-05, "loss": 0.5696, "step": 4655 }, { "epoch": 0.5720604496866937, "grad_norm": 1.0820138659121796, "learning_rate": 1.3846363783456976e-05, "loss": 0.6428, "step": 4656 }, { "epoch": 0.5721833149035508, "grad_norm": 1.441493083872987, "learning_rate": 1.3839949648931465e-05, "loss": 0.6376, "step": 4657 }, { "epoch": 0.5723061801204079, "grad_norm": 1.3795898472162686, "learning_rate": 1.3833535727789695e-05, "loss": 0.5276, "step": 4658 }, { "epoch": 0.572429045337265, "grad_norm": 1.328664216342804, "learning_rate": 1.3827122021211465e-05, "loss": 0.6075, "step": 4659 }, { "epoch": 0.5725519105541221, "grad_norm": 1.2380007633028083, "learning_rate": 1.3820708530376527e-05, "loss": 0.5762, "step": 4660 }, { "epoch": 0.5726747757709792, "grad_norm": 1.908195598480364, "learning_rate": 1.3814295256464613e-05, "loss": 0.6063, "step": 4661 }, { "epoch": 0.5727976409878364, "grad_norm": 1.5400004698279506, "learning_rate": 1.3807882200655396e-05, "loss": 0.7839, "step": 4662 }, { "epoch": 0.5729205062046935, "grad_norm": 1.3893973015583125, "learning_rate": 1.3801469364128515e-05, "loss": 0.6279, "step": 4663 }, { "epoch": 0.5730433714215506, "grad_norm": 1.2160340950503707, "learning_rate": 1.3795056748063574e-05, "loss": 0.527, "step": 4664 }, { "epoch": 0.5731662366384077, "grad_norm": 1.0857077554342722, "learning_rate": 1.3788644353640129e-05, "loss": 0.5862, "step": 4665 }, { "epoch": 0.5732891018552648, "grad_norm": 1.4127508041929673, "learning_rate": 1.3782232182037701e-05, "loss": 0.6052, "step": 4666 }, { "epoch": 0.5734119670721218, "grad_norm": 1.0915987723783944, "learning_rate": 1.3775820234435764e-05, "loss": 0.7842, "step": 4667 }, { "epoch": 0.573534832288979, "grad_norm": 1.1955289992924432, "learning_rate": 1.3769408512013748e-05, "loss": 0.5705, "step": 4668 }, { "epoch": 0.5736576975058361, "grad_norm": 1.314951863602414, "learning_rate": 1.3762997015951066e-05, "loss": 0.4392, "step": 4669 }, { "epoch": 0.5737805627226932, "grad_norm": 1.9016346044266332, "learning_rate": 1.375658574742706e-05, "loss": 0.727, "step": 4670 }, { "epoch": 0.5739034279395503, "grad_norm": 1.3271506563632611, "learning_rate": 1.375017470762104e-05, "loss": 0.5098, "step": 4671 }, { "epoch": 0.5740262931564074, "grad_norm": 1.3167894232527018, "learning_rate": 1.3743763897712271e-05, "loss": 0.5633, "step": 4672 }, { "epoch": 0.5741491583732645, "grad_norm": 1.1828546641879487, "learning_rate": 1.3737353318879993e-05, "loss": 0.6483, "step": 4673 }, { "epoch": 0.5742720235901216, "grad_norm": 1.7527861230980746, "learning_rate": 1.3730942972303383e-05, "loss": 0.6888, "step": 4674 }, { "epoch": 0.5743948888069788, "grad_norm": 1.3044584635467598, "learning_rate": 1.3724532859161583e-05, "loss": 0.5995, "step": 4675 }, { "epoch": 0.5745177540238359, "grad_norm": 1.1786523892552525, "learning_rate": 1.371812298063369e-05, "loss": 0.6391, "step": 4676 }, { "epoch": 0.574640619240693, "grad_norm": 1.4449846420392145, "learning_rate": 1.3711713337898763e-05, "loss": 0.6763, "step": 4677 }, { "epoch": 0.5747634844575501, "grad_norm": 1.248455781107704, "learning_rate": 1.3705303932135813e-05, "loss": 0.5858, "step": 4678 }, { "epoch": 0.5748863496744072, "grad_norm": 1.2839497717154595, "learning_rate": 1.3698894764523809e-05, "loss": 0.6154, "step": 4679 }, { "epoch": 0.5750092148912643, "grad_norm": 1.0770344012497395, "learning_rate": 1.3692485836241668e-05, "loss": 0.5944, "step": 4680 }, { "epoch": 0.5751320801081214, "grad_norm": 1.3299273624612464, "learning_rate": 1.3686077148468285e-05, "loss": 0.6246, "step": 4681 }, { "epoch": 0.5752549453249785, "grad_norm": 1.1876719249770373, "learning_rate": 1.367966870238249e-05, "loss": 0.6207, "step": 4682 }, { "epoch": 0.5753778105418356, "grad_norm": 1.3869607995934792, "learning_rate": 1.367326049916307e-05, "loss": 0.6182, "step": 4683 }, { "epoch": 0.5755006757586927, "grad_norm": 1.2175270664807105, "learning_rate": 1.366685253998878e-05, "loss": 0.6075, "step": 4684 }, { "epoch": 0.5756235409755498, "grad_norm": 1.4783397033004377, "learning_rate": 1.3660444826038322e-05, "loss": 0.689, "step": 4685 }, { "epoch": 0.5757464061924069, "grad_norm": 1.2507638376778987, "learning_rate": 1.3654037358490348e-05, "loss": 0.5906, "step": 4686 }, { "epoch": 0.575869271409264, "grad_norm": 1.2363131951592086, "learning_rate": 1.3647630138523467e-05, "loss": 0.6053, "step": 4687 }, { "epoch": 0.5759921366261211, "grad_norm": 1.0709760636105599, "learning_rate": 1.364122316731626e-05, "loss": 0.5617, "step": 4688 }, { "epoch": 0.5761150018429783, "grad_norm": 1.1677463959230503, "learning_rate": 1.3634816446047237e-05, "loss": 0.5871, "step": 4689 }, { "epoch": 0.5762378670598354, "grad_norm": 1.193976709788362, "learning_rate": 1.3628409975894878e-05, "loss": 0.6475, "step": 4690 }, { "epoch": 0.5763607322766925, "grad_norm": 1.1846136820877098, "learning_rate": 1.36220037580376e-05, "loss": 0.5511, "step": 4691 }, { "epoch": 0.5764835974935496, "grad_norm": 1.1879943990486173, "learning_rate": 1.36155977936538e-05, "loss": 0.5689, "step": 4692 }, { "epoch": 0.5766064627104067, "grad_norm": 1.3216866690054578, "learning_rate": 1.360919208392181e-05, "loss": 0.5397, "step": 4693 }, { "epoch": 0.5767293279272638, "grad_norm": 1.1435032402110077, "learning_rate": 1.3602786630019914e-05, "loss": 0.513, "step": 4694 }, { "epoch": 0.576852193144121, "grad_norm": 1.3581717084023102, "learning_rate": 1.3596381433126356e-05, "loss": 0.5327, "step": 4695 }, { "epoch": 0.576975058360978, "grad_norm": 1.622056807820146, "learning_rate": 1.3589976494419333e-05, "loss": 0.5851, "step": 4696 }, { "epoch": 0.5770979235778351, "grad_norm": 1.5129053080232266, "learning_rate": 1.3583571815076988e-05, "loss": 0.7189, "step": 4697 }, { "epoch": 0.5772207887946922, "grad_norm": 1.3015678216787943, "learning_rate": 1.3577167396277421e-05, "loss": 0.6313, "step": 4698 }, { "epoch": 0.5773436540115493, "grad_norm": 1.2027382425846882, "learning_rate": 1.357076323919868e-05, "loss": 0.5116, "step": 4699 }, { "epoch": 0.5774665192284064, "grad_norm": 0.9738339267332328, "learning_rate": 1.3564359345018777e-05, "loss": 0.5918, "step": 4700 }, { "epoch": 0.5775893844452635, "grad_norm": 1.2305719528411898, "learning_rate": 1.3557955714915665e-05, "loss": 0.6179, "step": 4701 }, { "epoch": 0.5777122496621206, "grad_norm": 1.2247303245431662, "learning_rate": 1.3551552350067241e-05, "loss": 0.7235, "step": 4702 }, { "epoch": 0.5778351148789778, "grad_norm": 1.1323079516762584, "learning_rate": 1.3545149251651372e-05, "loss": 0.6214, "step": 4703 }, { "epoch": 0.5779579800958349, "grad_norm": 1.1675600540781508, "learning_rate": 1.3538746420845866e-05, "loss": 0.5422, "step": 4704 }, { "epoch": 0.578080845312692, "grad_norm": 1.2784403532281126, "learning_rate": 1.3532343858828476e-05, "loss": 0.6994, "step": 4705 }, { "epoch": 0.5782037105295491, "grad_norm": 1.1334379542920079, "learning_rate": 1.3525941566776909e-05, "loss": 0.585, "step": 4706 }, { "epoch": 0.5783265757464062, "grad_norm": 1.2412249430991897, "learning_rate": 1.351953954586884e-05, "loss": 0.5757, "step": 4707 }, { "epoch": 0.5784494409632633, "grad_norm": 1.4189239160935743, "learning_rate": 1.3513137797281868e-05, "loss": 0.576, "step": 4708 }, { "epoch": 0.5785723061801205, "grad_norm": 1.2708661109839232, "learning_rate": 1.3506736322193556e-05, "loss": 0.5546, "step": 4709 }, { "epoch": 0.5786951713969776, "grad_norm": 1.3132950563602657, "learning_rate": 1.350033512178141e-05, "loss": 0.6612, "step": 4710 }, { "epoch": 0.5788180366138346, "grad_norm": 1.0581355638592556, "learning_rate": 1.3493934197222893e-05, "loss": 0.7165, "step": 4711 }, { "epoch": 0.5789409018306917, "grad_norm": 1.2037813410799456, "learning_rate": 1.3487533549695417e-05, "loss": 0.5792, "step": 4712 }, { "epoch": 0.5790637670475488, "grad_norm": 1.2666107770925612, "learning_rate": 1.3481133180376336e-05, "loss": 0.5931, "step": 4713 }, { "epoch": 0.5791866322644059, "grad_norm": 1.2068204300513385, "learning_rate": 1.3474733090442953e-05, "loss": 0.5803, "step": 4714 }, { "epoch": 0.579309497481263, "grad_norm": 1.828160299107149, "learning_rate": 1.3468333281072528e-05, "loss": 0.6167, "step": 4715 }, { "epoch": 0.5794323626981202, "grad_norm": 1.190841224745319, "learning_rate": 1.3461933753442265e-05, "loss": 0.6536, "step": 4716 }, { "epoch": 0.5795552279149773, "grad_norm": 1.3173896247991663, "learning_rate": 1.3455534508729313e-05, "loss": 0.432, "step": 4717 }, { "epoch": 0.5796780931318344, "grad_norm": 1.1422982287952756, "learning_rate": 1.3449135548110763e-05, "loss": 0.7335, "step": 4718 }, { "epoch": 0.5798009583486915, "grad_norm": 1.295421409308299, "learning_rate": 1.3442736872763681e-05, "loss": 0.7278, "step": 4719 }, { "epoch": 0.5799238235655486, "grad_norm": 1.3965633938794029, "learning_rate": 1.343633848386505e-05, "loss": 0.5649, "step": 4720 }, { "epoch": 0.5800466887824057, "grad_norm": 1.5111856755797919, "learning_rate": 1.3429940382591815e-05, "loss": 0.5347, "step": 4721 }, { "epoch": 0.5801695539992628, "grad_norm": 1.3778098757961499, "learning_rate": 1.3423542570120861e-05, "loss": 0.5869, "step": 4722 }, { "epoch": 0.58029241921612, "grad_norm": 1.1572015745740143, "learning_rate": 1.3417145047629029e-05, "loss": 0.5051, "step": 4723 }, { "epoch": 0.5804152844329771, "grad_norm": 1.3607987498624037, "learning_rate": 1.3410747816293102e-05, "loss": 0.5751, "step": 4724 }, { "epoch": 0.5805381496498341, "grad_norm": 1.2721181547353824, "learning_rate": 1.34043508772898e-05, "loss": 0.7007, "step": 4725 }, { "epoch": 0.5806610148666912, "grad_norm": 1.2914329981624004, "learning_rate": 1.3397954231795815e-05, "loss": 0.5261, "step": 4726 }, { "epoch": 0.5807838800835483, "grad_norm": 1.1515765768756328, "learning_rate": 1.3391557880987757e-05, "loss": 0.5584, "step": 4727 }, { "epoch": 0.5809067453004054, "grad_norm": 1.2090253542285954, "learning_rate": 1.3385161826042199e-05, "loss": 0.6045, "step": 4728 }, { "epoch": 0.5810296105172625, "grad_norm": 1.1544956544069105, "learning_rate": 1.3378766068135642e-05, "loss": 0.5118, "step": 4729 }, { "epoch": 0.5811524757341197, "grad_norm": 1.4221746160698074, "learning_rate": 1.337237060844456e-05, "loss": 0.5437, "step": 4730 }, { "epoch": 0.5812753409509768, "grad_norm": 1.2649176881041557, "learning_rate": 1.3365975448145348e-05, "loss": 0.6236, "step": 4731 }, { "epoch": 0.5813982061678339, "grad_norm": 1.1685002279400998, "learning_rate": 1.3359580588414354e-05, "loss": 0.5258, "step": 4732 }, { "epoch": 0.581521071384691, "grad_norm": 1.3672555132718396, "learning_rate": 1.3353186030427868e-05, "loss": 0.6591, "step": 4733 }, { "epoch": 0.5816439366015481, "grad_norm": 1.0218274673289387, "learning_rate": 1.3346791775362136e-05, "loss": 0.6355, "step": 4734 }, { "epoch": 0.5817668018184052, "grad_norm": 1.0883376096535766, "learning_rate": 1.3340397824393337e-05, "loss": 0.607, "step": 4735 }, { "epoch": 0.5818896670352623, "grad_norm": 1.14436077182396, "learning_rate": 1.3334004178697595e-05, "loss": 0.5286, "step": 4736 }, { "epoch": 0.5820125322521195, "grad_norm": 1.1893561985899257, "learning_rate": 1.3327610839450972e-05, "loss": 0.6014, "step": 4737 }, { "epoch": 0.5821353974689766, "grad_norm": 1.2681493034733324, "learning_rate": 1.3321217807829498e-05, "loss": 0.4932, "step": 4738 }, { "epoch": 0.5822582626858337, "grad_norm": 1.4090156822712316, "learning_rate": 1.331482508500912e-05, "loss": 0.6203, "step": 4739 }, { "epoch": 0.5823811279026907, "grad_norm": 1.2476505238007927, "learning_rate": 1.3308432672165738e-05, "loss": 0.4972, "step": 4740 }, { "epoch": 0.5825039931195478, "grad_norm": 1.1260282259659349, "learning_rate": 1.3302040570475194e-05, "loss": 0.537, "step": 4741 }, { "epoch": 0.5826268583364049, "grad_norm": 1.2608006024289669, "learning_rate": 1.3295648781113277e-05, "loss": 0.6856, "step": 4742 }, { "epoch": 0.582749723553262, "grad_norm": 0.9675729905197541, "learning_rate": 1.3289257305255716e-05, "loss": 0.64, "step": 4743 }, { "epoch": 0.5828725887701192, "grad_norm": 1.3336813494288424, "learning_rate": 1.3282866144078171e-05, "loss": 0.5348, "step": 4744 }, { "epoch": 0.5829954539869763, "grad_norm": 1.1288799652160622, "learning_rate": 1.327647529875627e-05, "loss": 0.6855, "step": 4745 }, { "epoch": 0.5831183192038334, "grad_norm": 1.3806414911633011, "learning_rate": 1.327008477046556e-05, "loss": 0.6846, "step": 4746 }, { "epoch": 0.5832411844206905, "grad_norm": 1.2060952727683958, "learning_rate": 1.3263694560381538e-05, "loss": 0.5075, "step": 4747 }, { "epoch": 0.5833640496375476, "grad_norm": 1.1844189244045897, "learning_rate": 1.3257304669679637e-05, "loss": 0.4641, "step": 4748 }, { "epoch": 0.5834869148544047, "grad_norm": 1.1010302238292413, "learning_rate": 1.3250915099535245e-05, "loss": 0.6643, "step": 4749 }, { "epoch": 0.5836097800712619, "grad_norm": 1.2478689783081902, "learning_rate": 1.3244525851123676e-05, "loss": 0.5027, "step": 4750 }, { "epoch": 0.583732645288119, "grad_norm": 1.1969305377065733, "learning_rate": 1.3238136925620191e-05, "loss": 0.6165, "step": 4751 }, { "epoch": 0.5838555105049761, "grad_norm": 1.1566111426988128, "learning_rate": 1.3231748324199989e-05, "loss": 0.6752, "step": 4752 }, { "epoch": 0.5839783757218332, "grad_norm": 1.1413120738940352, "learning_rate": 1.322536004803822e-05, "loss": 0.5268, "step": 4753 }, { "epoch": 0.5841012409386902, "grad_norm": 1.3231134882784314, "learning_rate": 1.321897209830996e-05, "loss": 0.4905, "step": 4754 }, { "epoch": 0.5842241061555473, "grad_norm": 1.1480707596095834, "learning_rate": 1.3212584476190233e-05, "loss": 0.6679, "step": 4755 }, { "epoch": 0.5843469713724044, "grad_norm": 1.4604682245978404, "learning_rate": 1.3206197182853994e-05, "loss": 0.6337, "step": 4756 }, { "epoch": 0.5844698365892615, "grad_norm": 1.3861831717109385, "learning_rate": 1.3199810219476156e-05, "loss": 0.5797, "step": 4757 }, { "epoch": 0.5845927018061187, "grad_norm": 1.108738212207439, "learning_rate": 1.3193423587231553e-05, "loss": 0.627, "step": 4758 }, { "epoch": 0.5847155670229758, "grad_norm": 1.349710873398861, "learning_rate": 1.3187037287294967e-05, "loss": 0.6939, "step": 4759 }, { "epoch": 0.5848384322398329, "grad_norm": 1.1245056924925245, "learning_rate": 1.318065132084111e-05, "loss": 0.6879, "step": 4760 }, { "epoch": 0.58496129745669, "grad_norm": 1.1526134027481056, "learning_rate": 1.3174265689044646e-05, "loss": 0.6107, "step": 4761 }, { "epoch": 0.5850841626735471, "grad_norm": 1.154424451456958, "learning_rate": 1.3167880393080171e-05, "loss": 0.5586, "step": 4762 }, { "epoch": 0.5852070278904042, "grad_norm": 1.3870435097609697, "learning_rate": 1.3161495434122213e-05, "loss": 0.6892, "step": 4763 }, { "epoch": 0.5853298931072614, "grad_norm": 1.0780004877696172, "learning_rate": 1.315511081334524e-05, "loss": 0.538, "step": 4764 }, { "epoch": 0.5854527583241185, "grad_norm": 1.3585592271916285, "learning_rate": 1.3148726531923677e-05, "loss": 0.5703, "step": 4765 }, { "epoch": 0.5855756235409756, "grad_norm": 1.3583348019660262, "learning_rate": 1.3142342591031862e-05, "loss": 0.6103, "step": 4766 }, { "epoch": 0.5856984887578327, "grad_norm": 1.0611416507764426, "learning_rate": 1.3135958991844076e-05, "loss": 0.5449, "step": 4767 }, { "epoch": 0.5858213539746898, "grad_norm": 1.3195678082329894, "learning_rate": 1.3129575735534548e-05, "loss": 0.5756, "step": 4768 }, { "epoch": 0.5859442191915468, "grad_norm": 1.2877785542777946, "learning_rate": 1.3123192823277435e-05, "loss": 0.4999, "step": 4769 }, { "epoch": 0.5860670844084039, "grad_norm": 1.274787274177473, "learning_rate": 1.3116810256246828e-05, "loss": 0.5283, "step": 4770 }, { "epoch": 0.586189949625261, "grad_norm": 1.184599648226264, "learning_rate": 1.3110428035616757e-05, "loss": 0.657, "step": 4771 }, { "epoch": 0.5863128148421182, "grad_norm": 1.254192462193568, "learning_rate": 1.31040461625612e-05, "loss": 0.5663, "step": 4772 }, { "epoch": 0.5864356800589753, "grad_norm": 1.122428967180021, "learning_rate": 1.3097664638254057e-05, "loss": 0.6561, "step": 4773 }, { "epoch": 0.5865585452758324, "grad_norm": 1.2381252582002227, "learning_rate": 1.3091283463869167e-05, "loss": 0.5978, "step": 4774 }, { "epoch": 0.5866814104926895, "grad_norm": 1.3275324252011076, "learning_rate": 1.3084902640580297e-05, "loss": 0.6438, "step": 4775 }, { "epoch": 0.5868042757095466, "grad_norm": 1.160003532751031, "learning_rate": 1.3078522169561172e-05, "loss": 0.494, "step": 4776 }, { "epoch": 0.5869271409264037, "grad_norm": 1.58758247411693, "learning_rate": 1.3072142051985436e-05, "loss": 0.6355, "step": 4777 }, { "epoch": 0.5870500061432609, "grad_norm": 1.3216801222022145, "learning_rate": 1.3065762289026665e-05, "loss": 0.6262, "step": 4778 }, { "epoch": 0.587172871360118, "grad_norm": 1.1209079951999203, "learning_rate": 1.3059382881858375e-05, "loss": 0.5189, "step": 4779 }, { "epoch": 0.5872957365769751, "grad_norm": 1.1914566510143834, "learning_rate": 1.3053003831654019e-05, "loss": 0.6782, "step": 4780 }, { "epoch": 0.5874186017938322, "grad_norm": 1.2274261008151561, "learning_rate": 1.3046625139586984e-05, "loss": 0.6529, "step": 4781 }, { "epoch": 0.5875414670106893, "grad_norm": 1.1240193322232122, "learning_rate": 1.3040246806830585e-05, "loss": 0.5907, "step": 4782 }, { "epoch": 0.5876643322275463, "grad_norm": 1.155807076083266, "learning_rate": 1.3033868834558071e-05, "loss": 0.5646, "step": 4783 }, { "epoch": 0.5877871974444034, "grad_norm": 1.1305659694162093, "learning_rate": 1.302749122394264e-05, "loss": 0.6125, "step": 4784 }, { "epoch": 0.5879100626612606, "grad_norm": 1.1817154785622284, "learning_rate": 1.3021113976157408e-05, "loss": 0.5436, "step": 4785 }, { "epoch": 0.5880329278781177, "grad_norm": 1.2395799443236521, "learning_rate": 1.3014737092375423e-05, "loss": 0.6125, "step": 4786 }, { "epoch": 0.5881557930949748, "grad_norm": 1.198145334364304, "learning_rate": 1.3008360573769676e-05, "loss": 0.6238, "step": 4787 }, { "epoch": 0.5882786583118319, "grad_norm": 1.2393234124904315, "learning_rate": 1.3001984421513085e-05, "loss": 0.5765, "step": 4788 }, { "epoch": 0.588401523528689, "grad_norm": 1.1551714742138843, "learning_rate": 1.2995608636778502e-05, "loss": 0.6347, "step": 4789 }, { "epoch": 0.5885243887455461, "grad_norm": 1.28334018888213, "learning_rate": 1.2989233220738707e-05, "loss": 0.6108, "step": 4790 }, { "epoch": 0.5886472539624032, "grad_norm": 1.3181069585124405, "learning_rate": 1.2982858174566425e-05, "loss": 0.5672, "step": 4791 }, { "epoch": 0.5887701191792604, "grad_norm": 1.3953138385491877, "learning_rate": 1.2976483499434302e-05, "loss": 0.6027, "step": 4792 }, { "epoch": 0.5888929843961175, "grad_norm": 1.3446139059369324, "learning_rate": 1.2970109196514918e-05, "loss": 0.5546, "step": 4793 }, { "epoch": 0.5890158496129746, "grad_norm": 1.2499292381851896, "learning_rate": 1.2963735266980773e-05, "loss": 0.5505, "step": 4794 }, { "epoch": 0.5891387148298317, "grad_norm": 1.0950797272594022, "learning_rate": 1.2957361712004327e-05, "loss": 0.6828, "step": 4795 }, { "epoch": 0.5892615800466888, "grad_norm": 1.4897092712378632, "learning_rate": 1.295098853275795e-05, "loss": 0.5507, "step": 4796 }, { "epoch": 0.5893844452635459, "grad_norm": 1.0292409213129154, "learning_rate": 1.2944615730413941e-05, "loss": 0.6007, "step": 4797 }, { "epoch": 0.589507310480403, "grad_norm": 1.3243859727372664, "learning_rate": 1.2938243306144536e-05, "loss": 0.5905, "step": 4798 }, { "epoch": 0.5896301756972601, "grad_norm": 1.2894661514399857, "learning_rate": 1.2931871261121907e-05, "loss": 0.6269, "step": 4799 }, { "epoch": 0.5897530409141172, "grad_norm": 1.4242311238497813, "learning_rate": 1.292549959651815e-05, "loss": 0.6179, "step": 4800 }, { "epoch": 0.5898759061309743, "grad_norm": 1.4773708541886612, "learning_rate": 1.2919128313505286e-05, "loss": 0.5944, "step": 4801 }, { "epoch": 0.5899987713478314, "grad_norm": 1.3004427810834438, "learning_rate": 1.2912757413255266e-05, "loss": 0.6149, "step": 4802 }, { "epoch": 0.5901216365646885, "grad_norm": 1.12447595039409, "learning_rate": 1.2906386896939994e-05, "loss": 0.5003, "step": 4803 }, { "epoch": 0.5902445017815456, "grad_norm": 1.998177280351999, "learning_rate": 1.2900016765731271e-05, "loss": 0.7327, "step": 4804 }, { "epoch": 0.5903673669984028, "grad_norm": 1.3520734940280055, "learning_rate": 1.2893647020800847e-05, "loss": 0.6392, "step": 4805 }, { "epoch": 0.5904902322152599, "grad_norm": 1.5741071131797018, "learning_rate": 1.288727766332039e-05, "loss": 0.552, "step": 4806 }, { "epoch": 0.590613097432117, "grad_norm": 1.2817782291479891, "learning_rate": 1.288090869446151e-05, "loss": 0.4934, "step": 4807 }, { "epoch": 0.5907359626489741, "grad_norm": 1.477162876994285, "learning_rate": 1.287454011539573e-05, "loss": 0.6877, "step": 4808 }, { "epoch": 0.5908588278658312, "grad_norm": 1.1266656807071846, "learning_rate": 1.2868171927294507e-05, "loss": 0.5782, "step": 4809 }, { "epoch": 0.5909816930826883, "grad_norm": 1.2607425917522699, "learning_rate": 1.2861804131329237e-05, "loss": 0.6053, "step": 4810 }, { "epoch": 0.5911045582995454, "grad_norm": 1.2000060934158963, "learning_rate": 1.2855436728671232e-05, "loss": 0.5758, "step": 4811 }, { "epoch": 0.5912274235164026, "grad_norm": 1.1365567254400024, "learning_rate": 1.2849069720491735e-05, "loss": 0.5659, "step": 4812 }, { "epoch": 0.5913502887332596, "grad_norm": 1.4935692333962751, "learning_rate": 1.2842703107961903e-05, "loss": 0.6836, "step": 4813 }, { "epoch": 0.5914731539501167, "grad_norm": 1.2397329991539605, "learning_rate": 1.2836336892252851e-05, "loss": 0.6293, "step": 4814 }, { "epoch": 0.5915960191669738, "grad_norm": 1.3535896781055545, "learning_rate": 1.2829971074535597e-05, "loss": 0.5964, "step": 4815 }, { "epoch": 0.5917188843838309, "grad_norm": 1.2027105713073871, "learning_rate": 1.282360565598109e-05, "loss": 0.6555, "step": 4816 }, { "epoch": 0.591841749600688, "grad_norm": 1.4148955591025714, "learning_rate": 1.2817240637760206e-05, "loss": 0.6487, "step": 4817 }, { "epoch": 0.5919646148175451, "grad_norm": 1.6005349470844865, "learning_rate": 1.2810876021043753e-05, "loss": 0.553, "step": 4818 }, { "epoch": 0.5920874800344023, "grad_norm": 1.1773767887711961, "learning_rate": 1.280451180700246e-05, "loss": 0.5487, "step": 4819 }, { "epoch": 0.5922103452512594, "grad_norm": 1.319234456739277, "learning_rate": 1.2798147996806982e-05, "loss": 0.5933, "step": 4820 }, { "epoch": 0.5923332104681165, "grad_norm": 0.9936583556503611, "learning_rate": 1.2791784591627893e-05, "loss": 0.5684, "step": 4821 }, { "epoch": 0.5924560756849736, "grad_norm": 1.2263127626732733, "learning_rate": 1.2785421592635716e-05, "loss": 0.5053, "step": 4822 }, { "epoch": 0.5925789409018307, "grad_norm": 1.1616092175704456, "learning_rate": 1.2779059001000873e-05, "loss": 0.5031, "step": 4823 }, { "epoch": 0.5927018061186878, "grad_norm": 1.3339950719096958, "learning_rate": 1.2772696817893726e-05, "loss": 0.5661, "step": 4824 }, { "epoch": 0.592824671335545, "grad_norm": 1.2159259287034658, "learning_rate": 1.2766335044484548e-05, "loss": 0.5847, "step": 4825 }, { "epoch": 0.5929475365524021, "grad_norm": 1.201969288328668, "learning_rate": 1.2759973681943559e-05, "loss": 0.6373, "step": 4826 }, { "epoch": 0.5930704017692591, "grad_norm": 1.2285562007140498, "learning_rate": 1.2753612731440882e-05, "loss": 0.6017, "step": 4827 }, { "epoch": 0.5931932669861162, "grad_norm": 1.1885248346464463, "learning_rate": 1.2747252194146575e-05, "loss": 0.7167, "step": 4828 }, { "epoch": 0.5933161322029733, "grad_norm": 1.1431667976182276, "learning_rate": 1.274089207123061e-05, "loss": 0.5683, "step": 4829 }, { "epoch": 0.5934389974198304, "grad_norm": 1.1869006772036486, "learning_rate": 1.2734532363862907e-05, "loss": 0.5408, "step": 4830 }, { "epoch": 0.5935618626366875, "grad_norm": 1.4446974172740192, "learning_rate": 1.2728173073213282e-05, "loss": 0.6376, "step": 4831 }, { "epoch": 0.5936847278535446, "grad_norm": 1.4949186114741688, "learning_rate": 1.2721814200451483e-05, "loss": 0.6619, "step": 4832 }, { "epoch": 0.5938075930704018, "grad_norm": 1.2037328916191774, "learning_rate": 1.2715455746747188e-05, "loss": 0.5519, "step": 4833 }, { "epoch": 0.5939304582872589, "grad_norm": 1.3397550319598888, "learning_rate": 1.2709097713269996e-05, "loss": 0.5092, "step": 4834 }, { "epoch": 0.594053323504116, "grad_norm": 1.515492603728574, "learning_rate": 1.2702740101189423e-05, "loss": 0.5563, "step": 4835 }, { "epoch": 0.5941761887209731, "grad_norm": 1.2169627622971348, "learning_rate": 1.2696382911674905e-05, "loss": 0.6838, "step": 4836 }, { "epoch": 0.5942990539378302, "grad_norm": 1.2923215997503803, "learning_rate": 1.2690026145895814e-05, "loss": 0.696, "step": 4837 }, { "epoch": 0.5944219191546873, "grad_norm": 1.1764857305244325, "learning_rate": 1.2683669805021437e-05, "loss": 0.5812, "step": 4838 }, { "epoch": 0.5945447843715445, "grad_norm": 1.5672854299117656, "learning_rate": 1.2677313890220974e-05, "loss": 0.6358, "step": 4839 }, { "epoch": 0.5946676495884016, "grad_norm": 1.2074023796105426, "learning_rate": 1.2670958402663552e-05, "loss": 0.6192, "step": 4840 }, { "epoch": 0.5947905148052587, "grad_norm": 1.2702007445595194, "learning_rate": 1.2664603343518232e-05, "loss": 0.7071, "step": 4841 }, { "epoch": 0.5949133800221157, "grad_norm": 1.0760966716774454, "learning_rate": 1.2658248713953983e-05, "loss": 0.5848, "step": 4842 }, { "epoch": 0.5950362452389728, "grad_norm": 1.327004424531281, "learning_rate": 1.2651894515139697e-05, "loss": 0.6108, "step": 4843 }, { "epoch": 0.5951591104558299, "grad_norm": 1.361984147887081, "learning_rate": 1.2645540748244183e-05, "loss": 0.4864, "step": 4844 }, { "epoch": 0.595281975672687, "grad_norm": 1.0703773839441337, "learning_rate": 1.2639187414436182e-05, "loss": 0.5447, "step": 4845 }, { "epoch": 0.5954048408895442, "grad_norm": 1.1590123091743507, "learning_rate": 1.2632834514884347e-05, "loss": 0.4985, "step": 4846 }, { "epoch": 0.5955277061064013, "grad_norm": 1.3528554707462161, "learning_rate": 1.2626482050757251e-05, "loss": 0.5613, "step": 4847 }, { "epoch": 0.5956505713232584, "grad_norm": 1.3400061041220181, "learning_rate": 1.2620130023223382e-05, "loss": 0.6477, "step": 4848 }, { "epoch": 0.5957734365401155, "grad_norm": 1.1684579483248707, "learning_rate": 1.2613778433451168e-05, "loss": 0.6421, "step": 4849 }, { "epoch": 0.5958963017569726, "grad_norm": 1.1785025483176674, "learning_rate": 1.2607427282608936e-05, "loss": 0.5401, "step": 4850 }, { "epoch": 0.5960191669738297, "grad_norm": 0.9605819670485719, "learning_rate": 1.2601076571864934e-05, "loss": 0.6536, "step": 4851 }, { "epoch": 0.5961420321906868, "grad_norm": 1.1449303422108332, "learning_rate": 1.2594726302387345e-05, "loss": 0.5416, "step": 4852 }, { "epoch": 0.596264897407544, "grad_norm": 1.3328394056062203, "learning_rate": 1.2588376475344252e-05, "loss": 0.6088, "step": 4853 }, { "epoch": 0.5963877626244011, "grad_norm": 1.1027248935165148, "learning_rate": 1.2582027091903667e-05, "loss": 0.5301, "step": 4854 }, { "epoch": 0.5965106278412582, "grad_norm": 1.1211285290323374, "learning_rate": 1.2575678153233512e-05, "loss": 0.5144, "step": 4855 }, { "epoch": 0.5966334930581152, "grad_norm": 1.2334055576672946, "learning_rate": 1.2569329660501643e-05, "loss": 0.5691, "step": 4856 }, { "epoch": 0.5967563582749723, "grad_norm": 1.2009376896814892, "learning_rate": 1.256298161487582e-05, "loss": 0.7159, "step": 4857 }, { "epoch": 0.5968792234918294, "grad_norm": 1.280866602894649, "learning_rate": 1.2556634017523727e-05, "loss": 0.6747, "step": 4858 }, { "epoch": 0.5970020887086865, "grad_norm": 1.5209138279896175, "learning_rate": 1.255028686961295e-05, "loss": 0.7119, "step": 4859 }, { "epoch": 0.5971249539255437, "grad_norm": 1.1219549446774342, "learning_rate": 1.2543940172311026e-05, "loss": 0.5808, "step": 4860 }, { "epoch": 0.5972478191424008, "grad_norm": 1.2382493379561585, "learning_rate": 1.2537593926785378e-05, "loss": 0.6008, "step": 4861 }, { "epoch": 0.5973706843592579, "grad_norm": 1.0847206076951112, "learning_rate": 1.2531248134203357e-05, "loss": 0.702, "step": 4862 }, { "epoch": 0.597493549576115, "grad_norm": 1.4534792653646045, "learning_rate": 1.252490279573223e-05, "loss": 0.6392, "step": 4863 }, { "epoch": 0.5976164147929721, "grad_norm": 1.2637082974212228, "learning_rate": 1.2518557912539185e-05, "loss": 0.5007, "step": 4864 }, { "epoch": 0.5977392800098292, "grad_norm": 1.2054140159825808, "learning_rate": 1.2512213485791318e-05, "loss": 0.5696, "step": 4865 }, { "epoch": 0.5978621452266863, "grad_norm": 1.3225733191985167, "learning_rate": 1.2505869516655647e-05, "loss": 0.6419, "step": 4866 }, { "epoch": 0.5979850104435435, "grad_norm": 1.3689263337516, "learning_rate": 1.2499526006299097e-05, "loss": 0.6087, "step": 4867 }, { "epoch": 0.5981078756604006, "grad_norm": 1.3656226391001414, "learning_rate": 1.249318295588853e-05, "loss": 0.6009, "step": 4868 }, { "epoch": 0.5982307408772577, "grad_norm": 1.040325726010827, "learning_rate": 1.2486840366590698e-05, "loss": 0.5843, "step": 4869 }, { "epoch": 0.5983536060941148, "grad_norm": 1.227231239425388, "learning_rate": 1.2480498239572285e-05, "loss": 0.6575, "step": 4870 }, { "epoch": 0.5984764713109718, "grad_norm": 1.8493681737990717, "learning_rate": 1.2474156575999875e-05, "loss": 0.6487, "step": 4871 }, { "epoch": 0.5985993365278289, "grad_norm": 1.1043787330259531, "learning_rate": 1.2467815377039988e-05, "loss": 0.5104, "step": 4872 }, { "epoch": 0.598722201744686, "grad_norm": 1.2532530362264491, "learning_rate": 1.246147464385904e-05, "loss": 0.6873, "step": 4873 }, { "epoch": 0.5988450669615432, "grad_norm": 1.1953829189554201, "learning_rate": 1.2455134377623361e-05, "loss": 0.6648, "step": 4874 }, { "epoch": 0.5989679321784003, "grad_norm": 1.641077732030845, "learning_rate": 1.2448794579499216e-05, "loss": 0.7248, "step": 4875 }, { "epoch": 0.5990907973952574, "grad_norm": 1.2194693739547757, "learning_rate": 1.2442455250652763e-05, "loss": 0.5957, "step": 4876 }, { "epoch": 0.5992136626121145, "grad_norm": 1.2183860075642599, "learning_rate": 1.243611639225008e-05, "loss": 0.6886, "step": 4877 }, { "epoch": 0.5993365278289716, "grad_norm": 1.1254032476300109, "learning_rate": 1.2429778005457154e-05, "loss": 0.568, "step": 4878 }, { "epoch": 0.5994593930458287, "grad_norm": 1.8159739879896684, "learning_rate": 1.2423440091439902e-05, "loss": 0.6408, "step": 4879 }, { "epoch": 0.5995822582626859, "grad_norm": 1.129345362037194, "learning_rate": 1.2417102651364134e-05, "loss": 0.5925, "step": 4880 }, { "epoch": 0.599705123479543, "grad_norm": 1.4314893327996057, "learning_rate": 1.2410765686395584e-05, "loss": 0.6655, "step": 4881 }, { "epoch": 0.5998279886964001, "grad_norm": 1.411291387756137, "learning_rate": 1.240442919769989e-05, "loss": 0.6334, "step": 4882 }, { "epoch": 0.5999508539132572, "grad_norm": 1.3974885985544316, "learning_rate": 1.2398093186442616e-05, "loss": 0.6161, "step": 4883 }, { "epoch": 0.6000737191301143, "grad_norm": 1.1731329272908382, "learning_rate": 1.2391757653789227e-05, "loss": 0.5356, "step": 4884 }, { "epoch": 0.6001965843469713, "grad_norm": 1.0569051170079709, "learning_rate": 1.2385422600905102e-05, "loss": 0.5688, "step": 4885 }, { "epoch": 0.6003194495638284, "grad_norm": 1.5519262717822182, "learning_rate": 1.2379088028955525e-05, "loss": 0.6725, "step": 4886 }, { "epoch": 0.6004423147806855, "grad_norm": 1.1288185665164088, "learning_rate": 1.2372753939105716e-05, "loss": 0.5079, "step": 4887 }, { "epoch": 0.6005651799975427, "grad_norm": 1.0859275077184412, "learning_rate": 1.2366420332520783e-05, "loss": 0.6504, "step": 4888 }, { "epoch": 0.6006880452143998, "grad_norm": 1.6743377499276053, "learning_rate": 1.236008721036575e-05, "loss": 0.6892, "step": 4889 }, { "epoch": 0.6008109104312569, "grad_norm": 1.3204148696154865, "learning_rate": 1.2353754573805549e-05, "loss": 0.6118, "step": 4890 }, { "epoch": 0.600933775648114, "grad_norm": 1.3944927715182258, "learning_rate": 1.2347422424005039e-05, "loss": 0.5413, "step": 4891 }, { "epoch": 0.6010566408649711, "grad_norm": 1.098662012132866, "learning_rate": 1.2341090762128969e-05, "loss": 0.6613, "step": 4892 }, { "epoch": 0.6011795060818282, "grad_norm": 1.1763143472540212, "learning_rate": 1.2334759589342003e-05, "loss": 0.5721, "step": 4893 }, { "epoch": 0.6013023712986854, "grad_norm": 1.1982806180466452, "learning_rate": 1.2328428906808734e-05, "loss": 0.5891, "step": 4894 }, { "epoch": 0.6014252365155425, "grad_norm": 1.235532010627716, "learning_rate": 1.232209871569364e-05, "loss": 0.523, "step": 4895 }, { "epoch": 0.6015481017323996, "grad_norm": 1.0959241562593927, "learning_rate": 1.2315769017161121e-05, "loss": 0.5817, "step": 4896 }, { "epoch": 0.6016709669492567, "grad_norm": 1.0104412107629688, "learning_rate": 1.2309439812375479e-05, "loss": 0.4791, "step": 4897 }, { "epoch": 0.6017938321661138, "grad_norm": 1.1837798538565758, "learning_rate": 1.2303111102500938e-05, "loss": 0.6, "step": 4898 }, { "epoch": 0.6019166973829709, "grad_norm": 1.0595716655297487, "learning_rate": 1.2296782888701621e-05, "loss": 0.5246, "step": 4899 }, { "epoch": 0.6020395625998279, "grad_norm": 1.2051442409636888, "learning_rate": 1.2290455172141563e-05, "loss": 0.6095, "step": 4900 }, { "epoch": 0.602162427816685, "grad_norm": 1.4535344034141597, "learning_rate": 1.2284127953984698e-05, "loss": 0.5928, "step": 4901 }, { "epoch": 0.6022852930335422, "grad_norm": 1.1604259866337132, "learning_rate": 1.2277801235394885e-05, "loss": 0.5774, "step": 4902 }, { "epoch": 0.6024081582503993, "grad_norm": 1.2786690022957672, "learning_rate": 1.2271475017535884e-05, "loss": 0.5739, "step": 4903 }, { "epoch": 0.6025310234672564, "grad_norm": 1.3822056765715338, "learning_rate": 1.2265149301571357e-05, "loss": 0.5792, "step": 4904 }, { "epoch": 0.6026538886841135, "grad_norm": 1.2998574839381445, "learning_rate": 1.2258824088664874e-05, "loss": 0.6369, "step": 4905 }, { "epoch": 0.6027767539009706, "grad_norm": 1.2585693332848045, "learning_rate": 1.2252499379979928e-05, "loss": 0.7371, "step": 4906 }, { "epoch": 0.6028996191178277, "grad_norm": 1.2152615332442036, "learning_rate": 1.2246175176679902e-05, "loss": 0.6168, "step": 4907 }, { "epoch": 0.6030224843346849, "grad_norm": 1.1606144879843505, "learning_rate": 1.2239851479928096e-05, "loss": 0.6176, "step": 4908 }, { "epoch": 0.603145349551542, "grad_norm": 1.2352622053677993, "learning_rate": 1.2233528290887705e-05, "loss": 0.5689, "step": 4909 }, { "epoch": 0.6032682147683991, "grad_norm": 1.324380877520496, "learning_rate": 1.2227205610721848e-05, "loss": 0.544, "step": 4910 }, { "epoch": 0.6033910799852562, "grad_norm": 1.1119098064619926, "learning_rate": 1.2220883440593536e-05, "loss": 0.5852, "step": 4911 }, { "epoch": 0.6035139452021133, "grad_norm": 1.3971188433549913, "learning_rate": 1.221456178166569e-05, "loss": 0.5977, "step": 4912 }, { "epoch": 0.6036368104189704, "grad_norm": 1.2973159114595534, "learning_rate": 1.2208240635101137e-05, "loss": 0.6845, "step": 4913 }, { "epoch": 0.6037596756358274, "grad_norm": 1.1920944355821705, "learning_rate": 1.2201920002062617e-05, "loss": 0.5798, "step": 4914 }, { "epoch": 0.6038825408526846, "grad_norm": 1.2962479179328106, "learning_rate": 1.2195599883712768e-05, "loss": 0.6799, "step": 4915 }, { "epoch": 0.6040054060695417, "grad_norm": 1.333066464614596, "learning_rate": 1.2189280281214128e-05, "loss": 0.6923, "step": 4916 }, { "epoch": 0.6041282712863988, "grad_norm": 1.254072586074098, "learning_rate": 1.2182961195729158e-05, "loss": 0.605, "step": 4917 }, { "epoch": 0.6042511365032559, "grad_norm": 1.240150673531269, "learning_rate": 1.2176642628420206e-05, "loss": 0.6651, "step": 4918 }, { "epoch": 0.604374001720113, "grad_norm": 0.9944720707339351, "learning_rate": 1.2170324580449534e-05, "loss": 0.617, "step": 4919 }, { "epoch": 0.6044968669369701, "grad_norm": 1.1417273954752818, "learning_rate": 1.2164007052979299e-05, "loss": 0.6007, "step": 4920 }, { "epoch": 0.6046197321538272, "grad_norm": 1.2712312400413885, "learning_rate": 1.2157690047171578e-05, "loss": 0.5025, "step": 4921 }, { "epoch": 0.6047425973706844, "grad_norm": 1.2823737227667862, "learning_rate": 1.215137356418834e-05, "loss": 0.6601, "step": 4922 }, { "epoch": 0.6048654625875415, "grad_norm": 1.3220042469754072, "learning_rate": 1.2145057605191462e-05, "loss": 0.7328, "step": 4923 }, { "epoch": 0.6049883278043986, "grad_norm": 1.3277688302899817, "learning_rate": 1.2138742171342716e-05, "loss": 0.5146, "step": 4924 }, { "epoch": 0.6051111930212557, "grad_norm": 1.088024567623259, "learning_rate": 1.2132427263803797e-05, "loss": 0.6229, "step": 4925 }, { "epoch": 0.6052340582381128, "grad_norm": 1.1293168998084013, "learning_rate": 1.2126112883736288e-05, "loss": 0.6083, "step": 4926 }, { "epoch": 0.6053569234549699, "grad_norm": 1.0530981111514812, "learning_rate": 1.2119799032301675e-05, "loss": 0.494, "step": 4927 }, { "epoch": 0.6054797886718271, "grad_norm": 1.4077884266853518, "learning_rate": 1.2113485710661348e-05, "loss": 0.6472, "step": 4928 }, { "epoch": 0.6056026538886841, "grad_norm": 1.2391031124306708, "learning_rate": 1.2107172919976607e-05, "loss": 0.601, "step": 4929 }, { "epoch": 0.6057255191055412, "grad_norm": 1.0467099183558541, "learning_rate": 1.2100860661408648e-05, "loss": 0.6282, "step": 4930 }, { "epoch": 0.6058483843223983, "grad_norm": 1.1259482883797272, "learning_rate": 1.2094548936118567e-05, "loss": 0.5636, "step": 4931 }, { "epoch": 0.6059712495392554, "grad_norm": 1.3139570152436675, "learning_rate": 1.2088237745267363e-05, "loss": 0.6908, "step": 4932 }, { "epoch": 0.6060941147561125, "grad_norm": 1.5085176690956654, "learning_rate": 1.2081927090015949e-05, "loss": 0.6066, "step": 4933 }, { "epoch": 0.6062169799729696, "grad_norm": 1.1100883956105243, "learning_rate": 1.2075616971525119e-05, "loss": 0.6135, "step": 4934 }, { "epoch": 0.6063398451898268, "grad_norm": 1.1764617969665039, "learning_rate": 1.2069307390955584e-05, "loss": 0.6124, "step": 4935 }, { "epoch": 0.6064627104066839, "grad_norm": 1.0613135026947689, "learning_rate": 1.2062998349467941e-05, "loss": 0.5337, "step": 4936 }, { "epoch": 0.606585575623541, "grad_norm": 1.0461872326419004, "learning_rate": 1.2056689848222713e-05, "loss": 0.6129, "step": 4937 }, { "epoch": 0.6067084408403981, "grad_norm": 1.1068289457663292, "learning_rate": 1.2050381888380297e-05, "loss": 0.7301, "step": 4938 }, { "epoch": 0.6068313060572552, "grad_norm": 1.3023916246740155, "learning_rate": 1.2044074471101e-05, "loss": 0.698, "step": 4939 }, { "epoch": 0.6069541712741123, "grad_norm": 1.0539568898407412, "learning_rate": 1.2037767597545039e-05, "loss": 0.4786, "step": 4940 }, { "epoch": 0.6070770364909694, "grad_norm": 1.2996579452778554, "learning_rate": 1.2031461268872518e-05, "loss": 0.5475, "step": 4941 }, { "epoch": 0.6071999017078266, "grad_norm": 1.187115000764816, "learning_rate": 1.2025155486243444e-05, "loss": 0.5855, "step": 4942 }, { "epoch": 0.6073227669246837, "grad_norm": 1.4616641988656198, "learning_rate": 1.2018850250817719e-05, "loss": 0.7058, "step": 4943 }, { "epoch": 0.6074456321415407, "grad_norm": 1.498723211383975, "learning_rate": 1.2012545563755165e-05, "loss": 0.612, "step": 4944 }, { "epoch": 0.6075684973583978, "grad_norm": 1.2118738359321068, "learning_rate": 1.2006241426215479e-05, "loss": 0.5907, "step": 4945 }, { "epoch": 0.6076913625752549, "grad_norm": 1.2612516112786734, "learning_rate": 1.1999937839358268e-05, "loss": 0.6787, "step": 4946 }, { "epoch": 0.607814227792112, "grad_norm": 1.4463986676983365, "learning_rate": 1.1993634804343032e-05, "loss": 0.5803, "step": 4947 }, { "epoch": 0.6079370930089691, "grad_norm": 1.2639282148441897, "learning_rate": 1.198733232232918e-05, "loss": 0.6267, "step": 4948 }, { "epoch": 0.6080599582258263, "grad_norm": 1.1465854917810474, "learning_rate": 1.198103039447601e-05, "loss": 0.6214, "step": 4949 }, { "epoch": 0.6081828234426834, "grad_norm": 1.1159249188071896, "learning_rate": 1.1974729021942717e-05, "loss": 0.5205, "step": 4950 }, { "epoch": 0.6083056886595405, "grad_norm": 7.067580458796607, "learning_rate": 1.1968428205888397e-05, "loss": 0.6946, "step": 4951 }, { "epoch": 0.6084285538763976, "grad_norm": 1.1530405335155964, "learning_rate": 1.1962127947472055e-05, "loss": 0.5355, "step": 4952 }, { "epoch": 0.6085514190932547, "grad_norm": 1.5752879758820033, "learning_rate": 1.1955828247852576e-05, "loss": 0.6583, "step": 4953 }, { "epoch": 0.6086742843101118, "grad_norm": 1.2202970017070367, "learning_rate": 1.1949529108188746e-05, "loss": 0.6047, "step": 4954 }, { "epoch": 0.608797149526969, "grad_norm": 1.0759875957240013, "learning_rate": 1.1943230529639251e-05, "loss": 0.5621, "step": 4955 }, { "epoch": 0.6089200147438261, "grad_norm": 1.2695798898974724, "learning_rate": 1.193693251336268e-05, "loss": 0.4978, "step": 4956 }, { "epoch": 0.6090428799606832, "grad_norm": 1.308476326049241, "learning_rate": 1.1930635060517509e-05, "loss": 0.6101, "step": 4957 }, { "epoch": 0.6091657451775402, "grad_norm": 1.116098067463713, "learning_rate": 1.192433817226211e-05, "loss": 0.6083, "step": 4958 }, { "epoch": 0.6092886103943973, "grad_norm": 1.302464301449465, "learning_rate": 1.191804184975476e-05, "loss": 0.621, "step": 4959 }, { "epoch": 0.6094114756112544, "grad_norm": 1.217744952787814, "learning_rate": 1.1911746094153627e-05, "loss": 0.4722, "step": 4960 }, { "epoch": 0.6095343408281115, "grad_norm": 1.0925866624033331, "learning_rate": 1.190545090661677e-05, "loss": 0.5671, "step": 4961 }, { "epoch": 0.6096572060449686, "grad_norm": 1.208881942195444, "learning_rate": 1.1899156288302144e-05, "loss": 0.6317, "step": 4962 }, { "epoch": 0.6097800712618258, "grad_norm": 1.3406082118061433, "learning_rate": 1.1892862240367615e-05, "loss": 0.63, "step": 4963 }, { "epoch": 0.6099029364786829, "grad_norm": 1.4181090865369084, "learning_rate": 1.1886568763970928e-05, "loss": 0.6296, "step": 4964 }, { "epoch": 0.61002580169554, "grad_norm": 1.2501330541594633, "learning_rate": 1.1880275860269723e-05, "loss": 0.5356, "step": 4965 }, { "epoch": 0.6101486669123971, "grad_norm": 1.2733179496525797, "learning_rate": 1.1873983530421539e-05, "loss": 0.4739, "step": 4966 }, { "epoch": 0.6102715321292542, "grad_norm": 1.1109718082881412, "learning_rate": 1.1867691775583816e-05, "loss": 0.564, "step": 4967 }, { "epoch": 0.6103943973461113, "grad_norm": 1.0818474660725255, "learning_rate": 1.1861400596913877e-05, "loss": 0.5345, "step": 4968 }, { "epoch": 0.6105172625629685, "grad_norm": 1.1635960970027468, "learning_rate": 1.1855109995568944e-05, "loss": 0.7055, "step": 4969 }, { "epoch": 0.6106401277798256, "grad_norm": 1.4019880251436114, "learning_rate": 1.1848819972706124e-05, "loss": 0.6428, "step": 4970 }, { "epoch": 0.6107629929966827, "grad_norm": 1.3011240015275214, "learning_rate": 1.1842530529482441e-05, "loss": 0.5801, "step": 4971 }, { "epoch": 0.6108858582135398, "grad_norm": 1.091945692248107, "learning_rate": 1.183624166705479e-05, "loss": 0.5386, "step": 4972 }, { "epoch": 0.6110087234303968, "grad_norm": 1.1369277103151245, "learning_rate": 1.1829953386579967e-05, "loss": 0.5939, "step": 4973 }, { "epoch": 0.6111315886472539, "grad_norm": 1.391912973569696, "learning_rate": 1.1823665689214657e-05, "loss": 0.5774, "step": 4974 }, { "epoch": 0.611254453864111, "grad_norm": 1.1915983428839627, "learning_rate": 1.1817378576115447e-05, "loss": 0.7807, "step": 4975 }, { "epoch": 0.6113773190809682, "grad_norm": 1.208104689133999, "learning_rate": 1.1811092048438808e-05, "loss": 0.5368, "step": 4976 }, { "epoch": 0.6115001842978253, "grad_norm": 1.2771909231787817, "learning_rate": 1.1804806107341106e-05, "loss": 0.5917, "step": 4977 }, { "epoch": 0.6116230495146824, "grad_norm": 1.2080680957003636, "learning_rate": 1.1798520753978592e-05, "loss": 0.5363, "step": 4978 }, { "epoch": 0.6117459147315395, "grad_norm": 1.2946744660197171, "learning_rate": 1.179223598950743e-05, "loss": 0.6682, "step": 4979 }, { "epoch": 0.6118687799483966, "grad_norm": 1.2882395770449075, "learning_rate": 1.1785951815083655e-05, "loss": 0.573, "step": 4980 }, { "epoch": 0.6119916451652537, "grad_norm": 2.0977582952479708, "learning_rate": 1.1779668231863193e-05, "loss": 0.6721, "step": 4981 }, { "epoch": 0.6121145103821108, "grad_norm": 1.323381475644259, "learning_rate": 1.1773385241001882e-05, "loss": 0.6335, "step": 4982 }, { "epoch": 0.612237375598968, "grad_norm": 1.3785434589242611, "learning_rate": 1.176710284365543e-05, "loss": 0.6395, "step": 4983 }, { "epoch": 0.6123602408158251, "grad_norm": 1.1968432822209616, "learning_rate": 1.1760821040979446e-05, "loss": 0.5475, "step": 4984 }, { "epoch": 0.6124831060326822, "grad_norm": 1.3094417135849583, "learning_rate": 1.1754539834129417e-05, "loss": 0.5381, "step": 4985 }, { "epoch": 0.6126059712495393, "grad_norm": 1.1342248311421985, "learning_rate": 1.1748259224260745e-05, "loss": 0.6797, "step": 4986 }, { "epoch": 0.6127288364663963, "grad_norm": 1.2234218729924584, "learning_rate": 1.1741979212528698e-05, "loss": 0.4632, "step": 4987 }, { "epoch": 0.6128517016832534, "grad_norm": 1.3999054760694098, "learning_rate": 1.1735699800088447e-05, "loss": 0.5956, "step": 4988 }, { "epoch": 0.6129745669001105, "grad_norm": 1.2599062407781814, "learning_rate": 1.1729420988095042e-05, "loss": 0.5157, "step": 4989 }, { "epoch": 0.6130974321169677, "grad_norm": 1.1180472789061613, "learning_rate": 1.1723142777703442e-05, "loss": 0.5695, "step": 4990 }, { "epoch": 0.6132202973338248, "grad_norm": 1.523310336354127, "learning_rate": 1.1716865170068475e-05, "loss": 0.6112, "step": 4991 }, { "epoch": 0.6133431625506819, "grad_norm": 1.449922558529882, "learning_rate": 1.1710588166344872e-05, "loss": 0.5985, "step": 4992 }, { "epoch": 0.613466027767539, "grad_norm": 1.0868533085664365, "learning_rate": 1.1704311767687237e-05, "loss": 0.5871, "step": 4993 }, { "epoch": 0.6135888929843961, "grad_norm": 1.3341261278790046, "learning_rate": 1.1698035975250082e-05, "loss": 0.6083, "step": 4994 }, { "epoch": 0.6137117582012532, "grad_norm": 1.5246895857077716, "learning_rate": 1.1691760790187798e-05, "loss": 0.7059, "step": 4995 }, { "epoch": 0.6138346234181103, "grad_norm": 1.5927845943164514, "learning_rate": 1.168548621365466e-05, "loss": 0.7643, "step": 4996 }, { "epoch": 0.6139574886349675, "grad_norm": 1.5888376488461773, "learning_rate": 1.1679212246804831e-05, "loss": 0.5632, "step": 4997 }, { "epoch": 0.6140803538518246, "grad_norm": 2.269227831416141, "learning_rate": 1.167293889079238e-05, "loss": 0.6493, "step": 4998 }, { "epoch": 0.6142032190686817, "grad_norm": 1.4515020189963683, "learning_rate": 1.1666666146771243e-05, "loss": 0.5581, "step": 4999 }, { "epoch": 0.6143260842855388, "grad_norm": 1.3480644673580846, "learning_rate": 1.1660394015895245e-05, "loss": 0.4811, "step": 5000 }, { "epoch": 0.6144489495023959, "grad_norm": 1.2492273151785767, "learning_rate": 1.1654122499318117e-05, "loss": 0.5873, "step": 5001 }, { "epoch": 0.6145718147192529, "grad_norm": 1.2571312931712866, "learning_rate": 1.1647851598193456e-05, "loss": 0.5815, "step": 5002 }, { "epoch": 0.61469467993611, "grad_norm": 1.2528238799603164, "learning_rate": 1.1641581313674752e-05, "loss": 0.5519, "step": 5003 }, { "epoch": 0.6148175451529672, "grad_norm": 1.1345185237970636, "learning_rate": 1.1635311646915385e-05, "loss": 0.5929, "step": 5004 }, { "epoch": 0.6149404103698243, "grad_norm": 1.3686581828799098, "learning_rate": 1.162904259906862e-05, "loss": 0.6869, "step": 5005 }, { "epoch": 0.6150632755866814, "grad_norm": 1.0389074446285969, "learning_rate": 1.162277417128761e-05, "loss": 0.5664, "step": 5006 }, { "epoch": 0.6151861408035385, "grad_norm": 1.2234550814853662, "learning_rate": 1.1616506364725388e-05, "loss": 0.5629, "step": 5007 }, { "epoch": 0.6153090060203956, "grad_norm": 1.3719053801574301, "learning_rate": 1.1610239180534872e-05, "loss": 0.6129, "step": 5008 }, { "epoch": 0.6154318712372527, "grad_norm": 1.2385678942893728, "learning_rate": 1.1603972619868881e-05, "loss": 0.7027, "step": 5009 }, { "epoch": 0.6155547364541099, "grad_norm": 1.2547888019109483, "learning_rate": 1.15977066838801e-05, "loss": 0.5799, "step": 5010 }, { "epoch": 0.615677601670967, "grad_norm": 1.087553769955007, "learning_rate": 1.1591441373721115e-05, "loss": 0.5633, "step": 5011 }, { "epoch": 0.6158004668878241, "grad_norm": 1.5159407662087923, "learning_rate": 1.1585176690544377e-05, "loss": 0.5846, "step": 5012 }, { "epoch": 0.6159233321046812, "grad_norm": 1.1856190969924847, "learning_rate": 1.1578912635502245e-05, "loss": 0.6455, "step": 5013 }, { "epoch": 0.6160461973215383, "grad_norm": 1.0698717600667265, "learning_rate": 1.1572649209746948e-05, "loss": 0.5434, "step": 5014 }, { "epoch": 0.6161690625383954, "grad_norm": 1.1154640886661982, "learning_rate": 1.1566386414430602e-05, "loss": 0.7014, "step": 5015 }, { "epoch": 0.6162919277552524, "grad_norm": 1.3789546891523476, "learning_rate": 1.1560124250705198e-05, "loss": 0.6708, "step": 5016 }, { "epoch": 0.6164147929721095, "grad_norm": 1.2850680529061898, "learning_rate": 1.1553862719722639e-05, "loss": 0.5778, "step": 5017 }, { "epoch": 0.6165376581889667, "grad_norm": 1.4413050544960548, "learning_rate": 1.1547601822634684e-05, "loss": 0.6766, "step": 5018 }, { "epoch": 0.6166605234058238, "grad_norm": 1.3825490136685927, "learning_rate": 1.1541341560592982e-05, "loss": 0.6937, "step": 5019 }, { "epoch": 0.6167833886226809, "grad_norm": 1.50587080641632, "learning_rate": 1.1535081934749064e-05, "loss": 0.5928, "step": 5020 }, { "epoch": 0.616906253839538, "grad_norm": 1.133969583450374, "learning_rate": 1.152882294625436e-05, "loss": 0.6034, "step": 5021 }, { "epoch": 0.6170291190563951, "grad_norm": 1.4859781407611206, "learning_rate": 1.1522564596260165e-05, "loss": 0.5333, "step": 5022 }, { "epoch": 0.6171519842732522, "grad_norm": 1.1046155379394351, "learning_rate": 1.1516306885917656e-05, "loss": 0.57, "step": 5023 }, { "epoch": 0.6172748494901094, "grad_norm": 1.2355669928018604, "learning_rate": 1.1510049816377904e-05, "loss": 0.7192, "step": 5024 }, { "epoch": 0.6173977147069665, "grad_norm": 1.5238497483811848, "learning_rate": 1.1503793388791859e-05, "loss": 0.6139, "step": 5025 }, { "epoch": 0.6175205799238236, "grad_norm": 1.4326612106343033, "learning_rate": 1.1497537604310343e-05, "loss": 0.6929, "step": 5026 }, { "epoch": 0.6176434451406807, "grad_norm": 1.3140278376234882, "learning_rate": 1.1491282464084067e-05, "loss": 0.5592, "step": 5027 }, { "epoch": 0.6177663103575378, "grad_norm": 1.1817282396953968, "learning_rate": 1.1485027969263632e-05, "loss": 0.6782, "step": 5028 }, { "epoch": 0.6178891755743949, "grad_norm": 1.3651512715527456, "learning_rate": 1.1478774120999507e-05, "loss": 0.61, "step": 5029 }, { "epoch": 0.618012040791252, "grad_norm": 1.25028525263793, "learning_rate": 1.1472520920442044e-05, "loss": 0.5488, "step": 5030 }, { "epoch": 0.618134906008109, "grad_norm": 1.4070726466605803, "learning_rate": 1.146626836874148e-05, "loss": 0.6564, "step": 5031 }, { "epoch": 0.6182577712249662, "grad_norm": 1.209523692926911, "learning_rate": 1.1460016467047937e-05, "loss": 0.7519, "step": 5032 }, { "epoch": 0.6183806364418233, "grad_norm": 1.245515452694344, "learning_rate": 1.1453765216511408e-05, "loss": 0.6005, "step": 5033 }, { "epoch": 0.6185035016586804, "grad_norm": 1.1178634379523753, "learning_rate": 1.1447514618281768e-05, "loss": 0.6472, "step": 5034 }, { "epoch": 0.6186263668755375, "grad_norm": 1.2006484961191004, "learning_rate": 1.1441264673508766e-05, "loss": 0.6246, "step": 5035 }, { "epoch": 0.6187492320923946, "grad_norm": 1.142576022503797, "learning_rate": 1.1435015383342058e-05, "loss": 0.5651, "step": 5036 }, { "epoch": 0.6188720973092517, "grad_norm": 1.1271094183716466, "learning_rate": 1.1428766748931148e-05, "loss": 0.5947, "step": 5037 }, { "epoch": 0.6189949625261089, "grad_norm": 1.470665469622402, "learning_rate": 1.1422518771425435e-05, "loss": 0.7306, "step": 5038 }, { "epoch": 0.619117827742966, "grad_norm": 1.3328610909849994, "learning_rate": 1.1416271451974187e-05, "loss": 0.5467, "step": 5039 }, { "epoch": 0.6192406929598231, "grad_norm": 1.246152004421772, "learning_rate": 1.1410024791726573e-05, "loss": 0.6115, "step": 5040 }, { "epoch": 0.6193635581766802, "grad_norm": 1.6894941564372101, "learning_rate": 1.1403778791831614e-05, "loss": 0.5927, "step": 5041 }, { "epoch": 0.6194864233935373, "grad_norm": 1.334582089979247, "learning_rate": 1.1397533453438223e-05, "loss": 0.694, "step": 5042 }, { "epoch": 0.6196092886103944, "grad_norm": 1.5258965671905316, "learning_rate": 1.139128877769519e-05, "loss": 0.673, "step": 5043 }, { "epoch": 0.6197321538272516, "grad_norm": 1.2028924292455212, "learning_rate": 1.1385044765751185e-05, "loss": 0.5842, "step": 5044 }, { "epoch": 0.6198550190441087, "grad_norm": 1.3304540883866547, "learning_rate": 1.1378801418754752e-05, "loss": 0.6311, "step": 5045 }, { "epoch": 0.6199778842609657, "grad_norm": 1.2715894419384308, "learning_rate": 1.1372558737854307e-05, "loss": 0.5928, "step": 5046 }, { "epoch": 0.6201007494778228, "grad_norm": 1.2958524455035123, "learning_rate": 1.1366316724198163e-05, "loss": 0.6403, "step": 5047 }, { "epoch": 0.6202236146946799, "grad_norm": 1.2922269927688752, "learning_rate": 1.1360075378934492e-05, "loss": 0.7419, "step": 5048 }, { "epoch": 0.620346479911537, "grad_norm": 1.1386988397715831, "learning_rate": 1.1353834703211351e-05, "loss": 0.5806, "step": 5049 }, { "epoch": 0.6204693451283941, "grad_norm": 1.1293451425916112, "learning_rate": 1.1347594698176666e-05, "loss": 0.5728, "step": 5050 }, { "epoch": 0.6205922103452512, "grad_norm": 1.1946618782826206, "learning_rate": 1.1341355364978253e-05, "loss": 0.5658, "step": 5051 }, { "epoch": 0.6207150755621084, "grad_norm": 1.282412974769383, "learning_rate": 1.1335116704763794e-05, "loss": 0.6273, "step": 5052 }, { "epoch": 0.6208379407789655, "grad_norm": 1.4112900058021143, "learning_rate": 1.132887871868085e-05, "loss": 0.6231, "step": 5053 }, { "epoch": 0.6209608059958226, "grad_norm": 1.1518251743940755, "learning_rate": 1.132264140787685e-05, "loss": 0.5457, "step": 5054 }, { "epoch": 0.6210836712126797, "grad_norm": 1.33011057937413, "learning_rate": 1.1316404773499122e-05, "loss": 0.6055, "step": 5055 }, { "epoch": 0.6212065364295368, "grad_norm": 1.279091449990655, "learning_rate": 1.1310168816694846e-05, "loss": 0.5786, "step": 5056 }, { "epoch": 0.6213294016463939, "grad_norm": 1.2781349986142845, "learning_rate": 1.1303933538611086e-05, "loss": 0.5746, "step": 5057 }, { "epoch": 0.6214522668632511, "grad_norm": 1.1586469280210892, "learning_rate": 1.1297698940394777e-05, "loss": 0.5962, "step": 5058 }, { "epoch": 0.6215751320801082, "grad_norm": 1.5236035738728022, "learning_rate": 1.1291465023192742e-05, "loss": 0.7066, "step": 5059 }, { "epoch": 0.6216979972969652, "grad_norm": 1.2054911818740424, "learning_rate": 1.1285231788151667e-05, "loss": 0.6479, "step": 5060 }, { "epoch": 0.6218208625138223, "grad_norm": 1.1267719896866883, "learning_rate": 1.1278999236418113e-05, "loss": 0.5905, "step": 5061 }, { "epoch": 0.6219437277306794, "grad_norm": 1.3304309602808178, "learning_rate": 1.1272767369138515e-05, "loss": 0.5312, "step": 5062 }, { "epoch": 0.6220665929475365, "grad_norm": 0.9769668886762187, "learning_rate": 1.126653618745919e-05, "loss": 0.5679, "step": 5063 }, { "epoch": 0.6221894581643936, "grad_norm": 1.4619451314528829, "learning_rate": 1.1260305692526321e-05, "loss": 0.7035, "step": 5064 }, { "epoch": 0.6223123233812508, "grad_norm": 1.4549592616860754, "learning_rate": 1.1254075885485962e-05, "loss": 0.5146, "step": 5065 }, { "epoch": 0.6224351885981079, "grad_norm": 1.1279033402288838, "learning_rate": 1.1247846767484057e-05, "loss": 0.5026, "step": 5066 }, { "epoch": 0.622558053814965, "grad_norm": 1.082989744302011, "learning_rate": 1.1241618339666404e-05, "loss": 0.5646, "step": 5067 }, { "epoch": 0.6226809190318221, "grad_norm": 1.445865208697441, "learning_rate": 1.1235390603178684e-05, "loss": 0.5488, "step": 5068 }, { "epoch": 0.6228037842486792, "grad_norm": 1.2354814525790305, "learning_rate": 1.1229163559166445e-05, "loss": 0.604, "step": 5069 }, { "epoch": 0.6229266494655363, "grad_norm": 1.2208770012454093, "learning_rate": 1.1222937208775117e-05, "loss": 0.5727, "step": 5070 }, { "epoch": 0.6230495146823934, "grad_norm": 1.284487864636418, "learning_rate": 1.1216711553149995e-05, "loss": 0.615, "step": 5071 }, { "epoch": 0.6231723798992506, "grad_norm": 1.3570119748634881, "learning_rate": 1.1210486593436249e-05, "loss": 0.4951, "step": 5072 }, { "epoch": 0.6232952451161077, "grad_norm": 1.131112442376893, "learning_rate": 1.1204262330778912e-05, "loss": 0.5375, "step": 5073 }, { "epoch": 0.6234181103329648, "grad_norm": 1.035893861424968, "learning_rate": 1.1198038766322907e-05, "loss": 0.697, "step": 5074 }, { "epoch": 0.6235409755498218, "grad_norm": 1.3631869011950557, "learning_rate": 1.1191815901213015e-05, "loss": 0.5959, "step": 5075 }, { "epoch": 0.6236638407666789, "grad_norm": 1.3203737124005201, "learning_rate": 1.118559373659389e-05, "loss": 0.6388, "step": 5076 }, { "epoch": 0.623786705983536, "grad_norm": 0.9858095810018782, "learning_rate": 1.117937227361006e-05, "loss": 0.6272, "step": 5077 }, { "epoch": 0.6239095712003931, "grad_norm": 1.2849040254160922, "learning_rate": 1.1173151513405923e-05, "loss": 0.5894, "step": 5078 }, { "epoch": 0.6240324364172503, "grad_norm": 1.0755847117062771, "learning_rate": 1.1166931457125744e-05, "loss": 0.6197, "step": 5079 }, { "epoch": 0.6241553016341074, "grad_norm": 1.6789458063721014, "learning_rate": 1.116071210591367e-05, "loss": 0.5357, "step": 5080 }, { "epoch": 0.6242781668509645, "grad_norm": 1.3573249633484312, "learning_rate": 1.1154493460913702e-05, "loss": 0.6216, "step": 5081 }, { "epoch": 0.6244010320678216, "grad_norm": 1.0755548762936746, "learning_rate": 1.1148275523269724e-05, "loss": 0.6641, "step": 5082 }, { "epoch": 0.6245238972846787, "grad_norm": 1.1973381617562384, "learning_rate": 1.1142058294125486e-05, "loss": 0.5908, "step": 5083 }, { "epoch": 0.6246467625015358, "grad_norm": 1.2378277373992692, "learning_rate": 1.1135841774624605e-05, "loss": 0.5802, "step": 5084 }, { "epoch": 0.624769627718393, "grad_norm": 1.3113237833646978, "learning_rate": 1.1129625965910563e-05, "loss": 0.7407, "step": 5085 }, { "epoch": 0.6248924929352501, "grad_norm": 1.1036943790382927, "learning_rate": 1.1123410869126731e-05, "loss": 0.6006, "step": 5086 }, { "epoch": 0.6250153581521072, "grad_norm": 1.6447106753558067, "learning_rate": 1.1117196485416328e-05, "loss": 0.7647, "step": 5087 }, { "epoch": 0.6251382233689643, "grad_norm": 1.3617687314128617, "learning_rate": 1.1110982815922449e-05, "loss": 0.5834, "step": 5088 }, { "epoch": 0.6252610885858213, "grad_norm": 1.3925387863709977, "learning_rate": 1.1104769861788062e-05, "loss": 0.7333, "step": 5089 }, { "epoch": 0.6253839538026784, "grad_norm": 1.3070520702563027, "learning_rate": 1.1098557624155997e-05, "loss": 0.524, "step": 5090 }, { "epoch": 0.6255068190195355, "grad_norm": 1.5837708605993694, "learning_rate": 1.1092346104168955e-05, "loss": 0.5287, "step": 5091 }, { "epoch": 0.6256296842363926, "grad_norm": 1.2350220279916468, "learning_rate": 1.10861353029695e-05, "loss": 0.5941, "step": 5092 }, { "epoch": 0.6257525494532498, "grad_norm": 1.3113123688970842, "learning_rate": 1.107992522170008e-05, "loss": 0.6135, "step": 5093 }, { "epoch": 0.6258754146701069, "grad_norm": 1.1336862901718112, "learning_rate": 1.1073715861502994e-05, "loss": 0.599, "step": 5094 }, { "epoch": 0.625998279886964, "grad_norm": 1.3595717883557306, "learning_rate": 1.106750722352041e-05, "loss": 0.5227, "step": 5095 }, { "epoch": 0.6261211451038211, "grad_norm": 1.2847480279910206, "learning_rate": 1.1061299308894367e-05, "loss": 0.6271, "step": 5096 }, { "epoch": 0.6262440103206782, "grad_norm": 1.0797125853968466, "learning_rate": 1.1055092118766776e-05, "loss": 0.6458, "step": 5097 }, { "epoch": 0.6263668755375353, "grad_norm": 1.1999437420794858, "learning_rate": 1.1048885654279407e-05, "loss": 0.5227, "step": 5098 }, { "epoch": 0.6264897407543925, "grad_norm": 1.3037353991004332, "learning_rate": 1.1042679916573898e-05, "loss": 0.598, "step": 5099 }, { "epoch": 0.6266126059712496, "grad_norm": 1.193294326847676, "learning_rate": 1.1036474906791746e-05, "loss": 0.5993, "step": 5100 }, { "epoch": 0.6267354711881067, "grad_norm": 1.2906530980619113, "learning_rate": 1.1030270626074338e-05, "loss": 0.6785, "step": 5101 }, { "epoch": 0.6268583364049638, "grad_norm": 1.304233647206345, "learning_rate": 1.1024067075562903e-05, "loss": 0.4714, "step": 5102 }, { "epoch": 0.6269812016218209, "grad_norm": 1.384958903910949, "learning_rate": 1.1017864256398547e-05, "loss": 0.6489, "step": 5103 }, { "epoch": 0.6271040668386779, "grad_norm": 1.193656168722829, "learning_rate": 1.1011662169722227e-05, "loss": 0.5416, "step": 5104 }, { "epoch": 0.627226932055535, "grad_norm": 1.1116157182251722, "learning_rate": 1.1005460816674792e-05, "loss": 0.6689, "step": 5105 }, { "epoch": 0.6273497972723922, "grad_norm": 1.220034260458224, "learning_rate": 1.0999260198396936e-05, "loss": 0.6063, "step": 5106 }, { "epoch": 0.6274726624892493, "grad_norm": 1.0326376831494857, "learning_rate": 1.0993060316029216e-05, "loss": 0.6285, "step": 5107 }, { "epoch": 0.6275955277061064, "grad_norm": 1.1088135631220724, "learning_rate": 1.098686117071207e-05, "loss": 0.5353, "step": 5108 }, { "epoch": 0.6277183929229635, "grad_norm": 1.2347219710318778, "learning_rate": 1.0980662763585783e-05, "loss": 0.5913, "step": 5109 }, { "epoch": 0.6278412581398206, "grad_norm": 0.9803588229042811, "learning_rate": 1.0974465095790516e-05, "loss": 0.5184, "step": 5110 }, { "epoch": 0.6279641233566777, "grad_norm": 1.091791847076933, "learning_rate": 1.0968268168466282e-05, "loss": 0.6377, "step": 5111 }, { "epoch": 0.6280869885735348, "grad_norm": 1.1790098624628167, "learning_rate": 1.0962071982752977e-05, "loss": 0.4939, "step": 5112 }, { "epoch": 0.628209853790392, "grad_norm": 1.2484262578216874, "learning_rate": 1.0955876539790344e-05, "loss": 0.6001, "step": 5113 }, { "epoch": 0.6283327190072491, "grad_norm": 1.2879440451160309, "learning_rate": 1.0949681840717997e-05, "loss": 0.6531, "step": 5114 }, { "epoch": 0.6284555842241062, "grad_norm": 1.0617019336729843, "learning_rate": 1.0943487886675401e-05, "loss": 0.5694, "step": 5115 }, { "epoch": 0.6285784494409633, "grad_norm": 1.2310351793377374, "learning_rate": 1.0937294678801905e-05, "loss": 0.5138, "step": 5116 }, { "epoch": 0.6287013146578204, "grad_norm": 1.283082204445918, "learning_rate": 1.0931102218236707e-05, "loss": 0.6134, "step": 5117 }, { "epoch": 0.6288241798746774, "grad_norm": 1.3345202004934498, "learning_rate": 1.0924910506118868e-05, "loss": 0.666, "step": 5118 }, { "epoch": 0.6289470450915345, "grad_norm": 1.4149307736567502, "learning_rate": 1.0918719543587307e-05, "loss": 0.7128, "step": 5119 }, { "epoch": 0.6290699103083917, "grad_norm": 1.2826128109119423, "learning_rate": 1.0912529331780824e-05, "loss": 0.5284, "step": 5120 }, { "epoch": 0.6291927755252488, "grad_norm": 1.1644485186569258, "learning_rate": 1.090633987183806e-05, "loss": 0.7042, "step": 5121 }, { "epoch": 0.6293156407421059, "grad_norm": 1.269878250683381, "learning_rate": 1.0900151164897532e-05, "loss": 0.5013, "step": 5122 }, { "epoch": 0.629438505958963, "grad_norm": 1.32318655841898, "learning_rate": 1.08939632120976e-05, "loss": 0.5806, "step": 5123 }, { "epoch": 0.6295613711758201, "grad_norm": 1.5380884970061923, "learning_rate": 1.0887776014576514e-05, "loss": 0.5595, "step": 5124 }, { "epoch": 0.6296842363926772, "grad_norm": 1.1122794970195218, "learning_rate": 1.088158957347236e-05, "loss": 0.8212, "step": 5125 }, { "epoch": 0.6298071016095343, "grad_norm": 1.3204992587459632, "learning_rate": 1.0875403889923098e-05, "loss": 0.6051, "step": 5126 }, { "epoch": 0.6299299668263915, "grad_norm": 1.1922548276561036, "learning_rate": 1.0869218965066536e-05, "loss": 0.5992, "step": 5127 }, { "epoch": 0.6300528320432486, "grad_norm": 1.1414544051185522, "learning_rate": 1.086303480004036e-05, "loss": 0.5493, "step": 5128 }, { "epoch": 0.6301756972601057, "grad_norm": 1.2446575315608766, "learning_rate": 1.0856851395982103e-05, "loss": 0.5903, "step": 5129 }, { "epoch": 0.6302985624769628, "grad_norm": 1.1570737499103128, "learning_rate": 1.0850668754029157e-05, "loss": 0.6296, "step": 5130 }, { "epoch": 0.6304214276938199, "grad_norm": 1.3056696262720868, "learning_rate": 1.084448687531879e-05, "loss": 0.6066, "step": 5131 }, { "epoch": 0.630544292910677, "grad_norm": 1.0950048938568029, "learning_rate": 1.0838305760988113e-05, "loss": 0.5878, "step": 5132 }, { "epoch": 0.630667158127534, "grad_norm": 1.2208758464933822, "learning_rate": 1.0832125412174102e-05, "loss": 0.5653, "step": 5133 }, { "epoch": 0.6307900233443912, "grad_norm": 1.246131116787707, "learning_rate": 1.0825945830013588e-05, "loss": 0.5212, "step": 5134 }, { "epoch": 0.6309128885612483, "grad_norm": 1.2116186930692385, "learning_rate": 1.0819767015643273e-05, "loss": 0.6097, "step": 5135 }, { "epoch": 0.6310357537781054, "grad_norm": 1.2243459477720735, "learning_rate": 1.0813588970199705e-05, "loss": 0.5558, "step": 5136 }, { "epoch": 0.6311586189949625, "grad_norm": 1.233678605385963, "learning_rate": 1.0807411694819295e-05, "loss": 0.5535, "step": 5137 }, { "epoch": 0.6312814842118196, "grad_norm": 0.8816621647050311, "learning_rate": 1.0801235190638309e-05, "loss": 0.6091, "step": 5138 }, { "epoch": 0.6314043494286767, "grad_norm": 1.265650375060481, "learning_rate": 1.0795059458792886e-05, "loss": 0.6378, "step": 5139 }, { "epoch": 0.6315272146455339, "grad_norm": 1.1104399182049496, "learning_rate": 1.0788884500419005e-05, "loss": 0.5951, "step": 5140 }, { "epoch": 0.631650079862391, "grad_norm": 1.1699290722178657, "learning_rate": 1.0782710316652512e-05, "loss": 0.5654, "step": 5141 }, { "epoch": 0.6317729450792481, "grad_norm": 1.151796479022268, "learning_rate": 1.0776536908629098e-05, "loss": 0.5638, "step": 5142 }, { "epoch": 0.6318958102961052, "grad_norm": 1.247741487433421, "learning_rate": 1.0770364277484335e-05, "loss": 0.5831, "step": 5143 }, { "epoch": 0.6320186755129623, "grad_norm": 1.3136538374447175, "learning_rate": 1.0764192424353634e-05, "loss": 0.6943, "step": 5144 }, { "epoch": 0.6321415407298194, "grad_norm": 1.196613885407614, "learning_rate": 1.0758021350372268e-05, "loss": 0.5195, "step": 5145 }, { "epoch": 0.6322644059466765, "grad_norm": 1.15245030909477, "learning_rate": 1.0751851056675358e-05, "loss": 0.5277, "step": 5146 }, { "epoch": 0.6323872711635335, "grad_norm": 1.087410814792468, "learning_rate": 1.0745681544397902e-05, "loss": 0.521, "step": 5147 }, { "epoch": 0.6325101363803907, "grad_norm": 1.3064207976108808, "learning_rate": 1.0739512814674734e-05, "loss": 0.532, "step": 5148 }, { "epoch": 0.6326330015972478, "grad_norm": 1.1444027245855342, "learning_rate": 1.0733344868640556e-05, "loss": 0.5232, "step": 5149 }, { "epoch": 0.6327558668141049, "grad_norm": 1.1906464005753519, "learning_rate": 1.072717770742991e-05, "loss": 0.5457, "step": 5150 }, { "epoch": 0.632878732030962, "grad_norm": 1.0729232265468, "learning_rate": 1.0721011332177223e-05, "loss": 0.5692, "step": 5151 }, { "epoch": 0.6330015972478191, "grad_norm": 1.2994863375575971, "learning_rate": 1.0714845744016749e-05, "loss": 0.7338, "step": 5152 }, { "epoch": 0.6331244624646762, "grad_norm": 1.5263266814267635, "learning_rate": 1.0708680944082608e-05, "loss": 0.6402, "step": 5153 }, { "epoch": 0.6332473276815334, "grad_norm": 0.9740079765551258, "learning_rate": 1.0702516933508779e-05, "loss": 0.5375, "step": 5154 }, { "epoch": 0.6333701928983905, "grad_norm": 1.1961372503309824, "learning_rate": 1.0696353713429092e-05, "loss": 0.5103, "step": 5155 }, { "epoch": 0.6334930581152476, "grad_norm": 1.3379002035411385, "learning_rate": 1.0690191284977229e-05, "loss": 0.5629, "step": 5156 }, { "epoch": 0.6336159233321047, "grad_norm": 1.3517621017739407, "learning_rate": 1.0684029649286721e-05, "loss": 0.6597, "step": 5157 }, { "epoch": 0.6337387885489618, "grad_norm": 1.1273527379363528, "learning_rate": 1.0677868807490977e-05, "loss": 0.5597, "step": 5158 }, { "epoch": 0.6338616537658189, "grad_norm": 1.5196670728882018, "learning_rate": 1.0671708760723236e-05, "loss": 0.5944, "step": 5159 }, { "epoch": 0.633984518982676, "grad_norm": 1.127836189996214, "learning_rate": 1.0665549510116597e-05, "loss": 0.5096, "step": 5160 }, { "epoch": 0.6341073841995332, "grad_norm": 1.0691553998209042, "learning_rate": 1.065939105680401e-05, "loss": 0.4808, "step": 5161 }, { "epoch": 0.6342302494163902, "grad_norm": 1.3606349646926093, "learning_rate": 1.0653233401918296e-05, "loss": 0.6458, "step": 5162 }, { "epoch": 0.6343531146332473, "grad_norm": 1.1939948516073384, "learning_rate": 1.0647076546592105e-05, "loss": 0.6227, "step": 5163 }, { "epoch": 0.6344759798501044, "grad_norm": 0.9833425578943614, "learning_rate": 1.0640920491957957e-05, "loss": 0.6376, "step": 5164 }, { "epoch": 0.6345988450669615, "grad_norm": 1.3243877636962131, "learning_rate": 1.063476523914821e-05, "loss": 0.5428, "step": 5165 }, { "epoch": 0.6347217102838186, "grad_norm": 1.2564994048232034, "learning_rate": 1.062861078929509e-05, "loss": 0.6441, "step": 5166 }, { "epoch": 0.6348445755006757, "grad_norm": 1.2769316559344588, "learning_rate": 1.0622457143530666e-05, "loss": 0.5233, "step": 5167 }, { "epoch": 0.6349674407175329, "grad_norm": 1.2547264443349149, "learning_rate": 1.0616304302986863e-05, "loss": 0.4678, "step": 5168 }, { "epoch": 0.63509030593439, "grad_norm": 1.1216175771115215, "learning_rate": 1.0610152268795446e-05, "loss": 0.5832, "step": 5169 }, { "epoch": 0.6352131711512471, "grad_norm": 1.1210841178965223, "learning_rate": 1.0604001042088057e-05, "loss": 0.5441, "step": 5170 }, { "epoch": 0.6353360363681042, "grad_norm": 1.2214958139923193, "learning_rate": 1.0597850623996169e-05, "loss": 0.5555, "step": 5171 }, { "epoch": 0.6354589015849613, "grad_norm": 1.2309159458284342, "learning_rate": 1.0591701015651104e-05, "loss": 0.5158, "step": 5172 }, { "epoch": 0.6355817668018184, "grad_norm": 1.0937369743974306, "learning_rate": 1.0585552218184054e-05, "loss": 0.6795, "step": 5173 }, { "epoch": 0.6357046320186756, "grad_norm": 1.2473530091885279, "learning_rate": 1.0579404232726041e-05, "loss": 0.5039, "step": 5174 }, { "epoch": 0.6358274972355327, "grad_norm": 1.2643218796536217, "learning_rate": 1.0573257060407955e-05, "loss": 0.5531, "step": 5175 }, { "epoch": 0.6359503624523898, "grad_norm": 1.4750649881392695, "learning_rate": 1.0567110702360514e-05, "loss": 0.6206, "step": 5176 }, { "epoch": 0.6360732276692468, "grad_norm": 1.1690116636110488, "learning_rate": 1.056096515971432e-05, "loss": 0.5966, "step": 5177 }, { "epoch": 0.6361960928861039, "grad_norm": 1.2148566265019691, "learning_rate": 1.0554820433599797e-05, "loss": 0.5617, "step": 5178 }, { "epoch": 0.636318958102961, "grad_norm": 1.0996996993676347, "learning_rate": 1.0548676525147226e-05, "loss": 0.5942, "step": 5179 }, { "epoch": 0.6364418233198181, "grad_norm": 1.2268734100108452, "learning_rate": 1.0542533435486734e-05, "loss": 0.5616, "step": 5180 }, { "epoch": 0.6365646885366752, "grad_norm": 1.0861159706691832, "learning_rate": 1.0536391165748315e-05, "loss": 0.564, "step": 5181 }, { "epoch": 0.6366875537535324, "grad_norm": 1.408381212365579, "learning_rate": 1.0530249717061795e-05, "loss": 0.5765, "step": 5182 }, { "epoch": 0.6368104189703895, "grad_norm": 1.1052627823903678, "learning_rate": 1.052410909055685e-05, "loss": 0.6108, "step": 5183 }, { "epoch": 0.6369332841872466, "grad_norm": 1.268075328988321, "learning_rate": 1.051796928736301e-05, "loss": 0.5307, "step": 5184 }, { "epoch": 0.6370561494041037, "grad_norm": 1.079468360257231, "learning_rate": 1.0511830308609655e-05, "loss": 0.6122, "step": 5185 }, { "epoch": 0.6371790146209608, "grad_norm": 1.1571065111878291, "learning_rate": 1.0505692155426007e-05, "loss": 0.5816, "step": 5186 }, { "epoch": 0.6373018798378179, "grad_norm": 1.612372368806991, "learning_rate": 1.049955482894114e-05, "loss": 0.6135, "step": 5187 }, { "epoch": 0.6374247450546751, "grad_norm": 1.250495485336522, "learning_rate": 1.049341833028397e-05, "loss": 0.5925, "step": 5188 }, { "epoch": 0.6375476102715322, "grad_norm": 1.3769031297388294, "learning_rate": 1.0487282660583278e-05, "loss": 0.5653, "step": 5189 }, { "epoch": 0.6376704754883893, "grad_norm": 1.3401263362533986, "learning_rate": 1.0481147820967677e-05, "loss": 0.5715, "step": 5190 }, { "epoch": 0.6377933407052463, "grad_norm": 1.397251571225431, "learning_rate": 1.0475013812565628e-05, "loss": 0.5326, "step": 5191 }, { "epoch": 0.6379162059221034, "grad_norm": 1.3330711513951181, "learning_rate": 1.0468880636505437e-05, "loss": 0.6165, "step": 5192 }, { "epoch": 0.6380390711389605, "grad_norm": 1.2506953335435549, "learning_rate": 1.0462748293915271e-05, "loss": 0.5901, "step": 5193 }, { "epoch": 0.6381619363558176, "grad_norm": 1.3357027817280274, "learning_rate": 1.0456616785923131e-05, "loss": 0.5326, "step": 5194 }, { "epoch": 0.6382848015726748, "grad_norm": 1.6183556163394999, "learning_rate": 1.0450486113656862e-05, "loss": 0.5506, "step": 5195 }, { "epoch": 0.6384076667895319, "grad_norm": 1.4126761941328836, "learning_rate": 1.0444356278244178e-05, "loss": 0.6166, "step": 5196 }, { "epoch": 0.638530532006389, "grad_norm": 1.3464103992754433, "learning_rate": 1.0438227280812608e-05, "loss": 0.5298, "step": 5197 }, { "epoch": 0.6386533972232461, "grad_norm": 1.1923810736096563, "learning_rate": 1.0432099122489547e-05, "loss": 0.5423, "step": 5198 }, { "epoch": 0.6387762624401032, "grad_norm": 1.2571880895420982, "learning_rate": 1.0425971804402227e-05, "loss": 0.6405, "step": 5199 }, { "epoch": 0.6388991276569603, "grad_norm": 1.212589607637169, "learning_rate": 1.0419845327677731e-05, "loss": 0.5117, "step": 5200 }, { "epoch": 0.6390219928738174, "grad_norm": 1.1634802396234316, "learning_rate": 1.0413719693442984e-05, "loss": 0.6753, "step": 5201 }, { "epoch": 0.6391448580906746, "grad_norm": 1.0712497992150178, "learning_rate": 1.0407594902824751e-05, "loss": 0.5415, "step": 5202 }, { "epoch": 0.6392677233075317, "grad_norm": 1.1016208805383636, "learning_rate": 1.0401470956949656e-05, "loss": 0.5378, "step": 5203 }, { "epoch": 0.6393905885243888, "grad_norm": 1.1797323542365845, "learning_rate": 1.0395347856944158e-05, "loss": 0.5849, "step": 5204 }, { "epoch": 0.6395134537412459, "grad_norm": 1.3885606053187627, "learning_rate": 1.0389225603934561e-05, "loss": 0.5388, "step": 5205 }, { "epoch": 0.6396363189581029, "grad_norm": 1.187179661535103, "learning_rate": 1.038310419904701e-05, "loss": 0.5895, "step": 5206 }, { "epoch": 0.63975918417496, "grad_norm": 1.0472071690206557, "learning_rate": 1.0376983643407497e-05, "loss": 0.5129, "step": 5207 }, { "epoch": 0.6398820493918171, "grad_norm": 1.384551770160788, "learning_rate": 1.0370863938141864e-05, "loss": 0.592, "step": 5208 }, { "epoch": 0.6400049146086743, "grad_norm": 1.6275700329940714, "learning_rate": 1.036474508437579e-05, "loss": 0.5891, "step": 5209 }, { "epoch": 0.6401277798255314, "grad_norm": 1.2097185470649288, "learning_rate": 1.0358627083234797e-05, "loss": 0.5718, "step": 5210 }, { "epoch": 0.6402506450423885, "grad_norm": 1.0322791298559149, "learning_rate": 1.0352509935844248e-05, "loss": 0.5862, "step": 5211 }, { "epoch": 0.6403735102592456, "grad_norm": 1.4739293421972846, "learning_rate": 1.0346393643329359e-05, "loss": 0.6882, "step": 5212 }, { "epoch": 0.6404963754761027, "grad_norm": 1.2317422676348588, "learning_rate": 1.0340278206815183e-05, "loss": 0.5088, "step": 5213 }, { "epoch": 0.6406192406929598, "grad_norm": 1.4233805941388213, "learning_rate": 1.0334163627426603e-05, "loss": 0.6143, "step": 5214 }, { "epoch": 0.640742105909817, "grad_norm": 1.3407696679580865, "learning_rate": 1.0328049906288371e-05, "loss": 0.5198, "step": 5215 }, { "epoch": 0.6408649711266741, "grad_norm": 1.3631923152831453, "learning_rate": 1.0321937044525059e-05, "loss": 0.5079, "step": 5216 }, { "epoch": 0.6409878363435312, "grad_norm": 1.379612239264027, "learning_rate": 1.031582504326109e-05, "loss": 0.6255, "step": 5217 }, { "epoch": 0.6411107015603883, "grad_norm": 1.2293337943126337, "learning_rate": 1.0309713903620723e-05, "loss": 0.5749, "step": 5218 }, { "epoch": 0.6412335667772454, "grad_norm": 1.2289546945823377, "learning_rate": 1.0303603626728069e-05, "loss": 0.6685, "step": 5219 }, { "epoch": 0.6413564319941024, "grad_norm": 1.0966618214271466, "learning_rate": 1.0297494213707073e-05, "loss": 0.6351, "step": 5220 }, { "epoch": 0.6414792972109595, "grad_norm": 1.2942912028946107, "learning_rate": 1.0291385665681516e-05, "loss": 0.6112, "step": 5221 }, { "epoch": 0.6416021624278166, "grad_norm": 1.490943946173928, "learning_rate": 1.0285277983775026e-05, "loss": 0.6193, "step": 5222 }, { "epoch": 0.6417250276446738, "grad_norm": 1.28995228247958, "learning_rate": 1.0279171169111079e-05, "loss": 0.5637, "step": 5223 }, { "epoch": 0.6418478928615309, "grad_norm": 1.2653501857491485, "learning_rate": 1.0273065222812982e-05, "loss": 0.5555, "step": 5224 }, { "epoch": 0.641970758078388, "grad_norm": 1.0351796822467487, "learning_rate": 1.0266960146003878e-05, "loss": 0.5009, "step": 5225 }, { "epoch": 0.6420936232952451, "grad_norm": 1.2324758027351077, "learning_rate": 1.0260855939806759e-05, "loss": 0.5056, "step": 5226 }, { "epoch": 0.6422164885121022, "grad_norm": 1.1874143974424516, "learning_rate": 1.0254752605344458e-05, "loss": 0.578, "step": 5227 }, { "epoch": 0.6423393537289593, "grad_norm": 1.2349492690367263, "learning_rate": 1.0248650143739643e-05, "loss": 0.4156, "step": 5228 }, { "epoch": 0.6424622189458165, "grad_norm": 1.2826658911275965, "learning_rate": 1.024254855611482e-05, "loss": 0.6324, "step": 5229 }, { "epoch": 0.6425850841626736, "grad_norm": 1.516848149384472, "learning_rate": 1.0236447843592334e-05, "loss": 0.5394, "step": 5230 }, { "epoch": 0.6427079493795307, "grad_norm": 1.1364680567780459, "learning_rate": 1.0230348007294377e-05, "loss": 0.6598, "step": 5231 }, { "epoch": 0.6428308145963878, "grad_norm": 1.2692820325208305, "learning_rate": 1.0224249048342974e-05, "loss": 0.5696, "step": 5232 }, { "epoch": 0.6429536798132449, "grad_norm": 1.1961704803001916, "learning_rate": 1.0218150967859984e-05, "loss": 0.5898, "step": 5233 }, { "epoch": 0.643076545030102, "grad_norm": 1.8253812211649454, "learning_rate": 1.0212053766967107e-05, "loss": 0.4961, "step": 5234 }, { "epoch": 0.643199410246959, "grad_norm": 1.0953369243163862, "learning_rate": 1.0205957446785894e-05, "loss": 0.6521, "step": 5235 }, { "epoch": 0.6433222754638162, "grad_norm": 1.1962059312383158, "learning_rate": 1.0199862008437718e-05, "loss": 0.6685, "step": 5236 }, { "epoch": 0.6434451406806733, "grad_norm": 1.2143008985669947, "learning_rate": 1.0193767453043795e-05, "loss": 0.6243, "step": 5237 }, { "epoch": 0.6435680058975304, "grad_norm": 1.2740155164787943, "learning_rate": 1.0187673781725181e-05, "loss": 0.5238, "step": 5238 }, { "epoch": 0.6436908711143875, "grad_norm": 1.3286692062218268, "learning_rate": 1.0181580995602766e-05, "loss": 0.5245, "step": 5239 }, { "epoch": 0.6438137363312446, "grad_norm": 1.4398486143268203, "learning_rate": 1.0175489095797278e-05, "loss": 0.6382, "step": 5240 }, { "epoch": 0.6439366015481017, "grad_norm": 1.1346791251717732, "learning_rate": 1.0169398083429277e-05, "loss": 0.5205, "step": 5241 }, { "epoch": 0.6440594667649588, "grad_norm": 1.2721533548616502, "learning_rate": 1.0163307959619176e-05, "loss": 0.7334, "step": 5242 }, { "epoch": 0.644182331981816, "grad_norm": 1.3824496774213904, "learning_rate": 1.015721872548721e-05, "loss": 0.7426, "step": 5243 }, { "epoch": 0.6443051971986731, "grad_norm": 1.3285384661272186, "learning_rate": 1.0151130382153453e-05, "loss": 0.6732, "step": 5244 }, { "epoch": 0.6444280624155302, "grad_norm": 1.222415779703877, "learning_rate": 1.014504293073781e-05, "loss": 0.6038, "step": 5245 }, { "epoch": 0.6445509276323873, "grad_norm": 1.1191050829846856, "learning_rate": 1.0138956372360041e-05, "loss": 0.6695, "step": 5246 }, { "epoch": 0.6446737928492444, "grad_norm": 1.0740902437480164, "learning_rate": 1.013287070813972e-05, "loss": 0.5556, "step": 5247 }, { "epoch": 0.6447966580661015, "grad_norm": 1.1634176301118708, "learning_rate": 1.012678593919627e-05, "loss": 0.5363, "step": 5248 }, { "epoch": 0.6449195232829585, "grad_norm": 1.3348413649685462, "learning_rate": 1.0120702066648938e-05, "loss": 0.5257, "step": 5249 }, { "epoch": 0.6450423884998157, "grad_norm": 1.3330427786997971, "learning_rate": 1.0114619091616822e-05, "loss": 0.5985, "step": 5250 }, { "epoch": 0.6451652537166728, "grad_norm": 1.4148568346467474, "learning_rate": 1.010853701521884e-05, "loss": 0.6473, "step": 5251 }, { "epoch": 0.6452881189335299, "grad_norm": 1.1451011408262466, "learning_rate": 1.0102455838573753e-05, "loss": 0.4991, "step": 5252 }, { "epoch": 0.645410984150387, "grad_norm": 1.2578572202268592, "learning_rate": 1.0096375562800146e-05, "loss": 0.5566, "step": 5253 }, { "epoch": 0.6455338493672441, "grad_norm": 1.0986390871750857, "learning_rate": 1.0090296189016459e-05, "loss": 0.6512, "step": 5254 }, { "epoch": 0.6456567145841012, "grad_norm": 1.3086550567843056, "learning_rate": 1.0084217718340949e-05, "loss": 0.6531, "step": 5255 }, { "epoch": 0.6457795798009583, "grad_norm": 1.0791741329887907, "learning_rate": 1.0078140151891705e-05, "loss": 0.5637, "step": 5256 }, { "epoch": 0.6459024450178155, "grad_norm": 1.1793118917184937, "learning_rate": 1.0072063490786665e-05, "loss": 0.5398, "step": 5257 }, { "epoch": 0.6460253102346726, "grad_norm": 1.0199356701458826, "learning_rate": 1.0065987736143586e-05, "loss": 0.5875, "step": 5258 }, { "epoch": 0.6461481754515297, "grad_norm": 1.1662705372041255, "learning_rate": 1.0059912889080064e-05, "loss": 0.65, "step": 5259 }, { "epoch": 0.6462710406683868, "grad_norm": 1.275023151132807, "learning_rate": 1.0053838950713523e-05, "loss": 0.5454, "step": 5260 }, { "epoch": 0.6463939058852439, "grad_norm": 1.145053127385827, "learning_rate": 1.0047765922161237e-05, "loss": 0.5463, "step": 5261 }, { "epoch": 0.646516771102101, "grad_norm": 1.214735693923012, "learning_rate": 1.0041693804540293e-05, "loss": 0.655, "step": 5262 }, { "epoch": 0.6466396363189582, "grad_norm": 1.3727005427566359, "learning_rate": 1.0035622598967618e-05, "loss": 0.5683, "step": 5263 }, { "epoch": 0.6467625015358152, "grad_norm": 1.0077281681277364, "learning_rate": 1.0029552306559965e-05, "loss": 0.5233, "step": 5264 }, { "epoch": 0.6468853667526723, "grad_norm": 1.4931728661254378, "learning_rate": 1.0023482928433934e-05, "loss": 0.6609, "step": 5265 }, { "epoch": 0.6470082319695294, "grad_norm": 1.2779237662750802, "learning_rate": 1.0017414465705948e-05, "loss": 0.5777, "step": 5266 }, { "epoch": 0.6471310971863865, "grad_norm": 1.2457368283612147, "learning_rate": 1.0011346919492256e-05, "loss": 0.688, "step": 5267 }, { "epoch": 0.6472539624032436, "grad_norm": 1.3438348595782776, "learning_rate": 1.0005280290908943e-05, "loss": 0.5642, "step": 5268 }, { "epoch": 0.6473768276201007, "grad_norm": 1.1961207838078172, "learning_rate": 9.999214581071933e-06, "loss": 0.676, "step": 5269 }, { "epoch": 0.6474996928369579, "grad_norm": 1.236862633311059, "learning_rate": 9.993149791096968e-06, "loss": 0.5365, "step": 5270 }, { "epoch": 0.647622558053815, "grad_norm": 1.3522962865788926, "learning_rate": 9.987085922099628e-06, "loss": 0.611, "step": 5271 }, { "epoch": 0.6477454232706721, "grad_norm": 1.237890273969685, "learning_rate": 9.981022975195319e-06, "loss": 0.6408, "step": 5272 }, { "epoch": 0.6478682884875292, "grad_norm": 1.0381382765833658, "learning_rate": 9.974960951499288e-06, "loss": 0.7282, "step": 5273 }, { "epoch": 0.6479911537043863, "grad_norm": 1.275337234044017, "learning_rate": 9.968899852126605e-06, "loss": 0.5089, "step": 5274 }, { "epoch": 0.6481140189212434, "grad_norm": 1.2959928848340654, "learning_rate": 9.962839678192163e-06, "loss": 0.5296, "step": 5275 }, { "epoch": 0.6482368841381005, "grad_norm": 1.391443132552266, "learning_rate": 9.956780430810692e-06, "loss": 0.6231, "step": 5276 }, { "epoch": 0.6483597493549577, "grad_norm": 1.1933390684958582, "learning_rate": 9.950722111096758e-06, "loss": 0.5312, "step": 5277 }, { "epoch": 0.6484826145718147, "grad_norm": 1.3848588837467481, "learning_rate": 9.944664720164745e-06, "loss": 0.7465, "step": 5278 }, { "epoch": 0.6486054797886718, "grad_norm": 1.0528354561948678, "learning_rate": 9.938608259128866e-06, "loss": 0.5674, "step": 5279 }, { "epoch": 0.6487283450055289, "grad_norm": 1.2233828133737459, "learning_rate": 9.932552729103183e-06, "loss": 0.529, "step": 5280 }, { "epoch": 0.648851210222386, "grad_norm": 1.335249199502768, "learning_rate": 9.926498131201556e-06, "loss": 0.6128, "step": 5281 }, { "epoch": 0.6489740754392431, "grad_norm": 1.0779195211212043, "learning_rate": 9.9204444665377e-06, "loss": 0.6202, "step": 5282 }, { "epoch": 0.6490969406561002, "grad_norm": 1.1425433042135433, "learning_rate": 9.914391736225134e-06, "loss": 0.6336, "step": 5283 }, { "epoch": 0.6492198058729574, "grad_norm": 1.164427266233832, "learning_rate": 9.908339941377232e-06, "loss": 0.535, "step": 5284 }, { "epoch": 0.6493426710898145, "grad_norm": 1.3944340775157373, "learning_rate": 9.902289083107181e-06, "loss": 0.6988, "step": 5285 }, { "epoch": 0.6494655363066716, "grad_norm": 1.167913449130525, "learning_rate": 9.89623916252799e-06, "loss": 0.4626, "step": 5286 }, { "epoch": 0.6495884015235287, "grad_norm": 0.9804442854530732, "learning_rate": 9.890190180752503e-06, "loss": 0.634, "step": 5287 }, { "epoch": 0.6497112667403858, "grad_norm": 1.429970011915802, "learning_rate": 9.884142138893399e-06, "loss": 0.7161, "step": 5288 }, { "epoch": 0.6498341319572429, "grad_norm": 1.3785048994236457, "learning_rate": 9.87809503806317e-06, "loss": 0.636, "step": 5289 }, { "epoch": 0.6499569971741, "grad_norm": 1.1476338929929766, "learning_rate": 9.87204887937414e-06, "loss": 0.4549, "step": 5290 }, { "epoch": 0.6500798623909572, "grad_norm": 1.2614802511186742, "learning_rate": 9.86600366393846e-06, "loss": 0.4574, "step": 5291 }, { "epoch": 0.6502027276078143, "grad_norm": 1.3005422663069057, "learning_rate": 9.859959392868114e-06, "loss": 0.5244, "step": 5292 }, { "epoch": 0.6503255928246713, "grad_norm": 0.9980722412959651, "learning_rate": 9.853916067274905e-06, "loss": 0.5649, "step": 5293 }, { "epoch": 0.6504484580415284, "grad_norm": 1.2092539695772027, "learning_rate": 9.847873688270462e-06, "loss": 0.6012, "step": 5294 }, { "epoch": 0.6505713232583855, "grad_norm": 1.1736001832565368, "learning_rate": 9.841832256966239e-06, "loss": 0.5185, "step": 5295 }, { "epoch": 0.6506941884752426, "grad_norm": 1.3128149226479897, "learning_rate": 9.835791774473522e-06, "loss": 0.6206, "step": 5296 }, { "epoch": 0.6508170536920997, "grad_norm": 1.2991607954743523, "learning_rate": 9.829752241903418e-06, "loss": 0.5519, "step": 5297 }, { "epoch": 0.6509399189089569, "grad_norm": 1.2988971039067583, "learning_rate": 9.823713660366858e-06, "loss": 0.6164, "step": 5298 }, { "epoch": 0.651062784125814, "grad_norm": 1.135468197586648, "learning_rate": 9.817676030974596e-06, "loss": 0.4821, "step": 5299 }, { "epoch": 0.6511856493426711, "grad_norm": 1.1634737927263625, "learning_rate": 9.811639354837224e-06, "loss": 0.6254, "step": 5300 }, { "epoch": 0.6513085145595282, "grad_norm": 1.217018455402315, "learning_rate": 9.805603633065145e-06, "loss": 0.5543, "step": 5301 }, { "epoch": 0.6514313797763853, "grad_norm": 1.2340227585388304, "learning_rate": 9.799568866768584e-06, "loss": 0.5068, "step": 5302 }, { "epoch": 0.6515542449932424, "grad_norm": 1.5914870027899903, "learning_rate": 9.793535057057614e-06, "loss": 0.6563, "step": 5303 }, { "epoch": 0.6516771102100996, "grad_norm": 1.2977952522062588, "learning_rate": 9.787502205042102e-06, "loss": 0.635, "step": 5304 }, { "epoch": 0.6517999754269567, "grad_norm": 1.389054670078481, "learning_rate": 9.781470311831755e-06, "loss": 0.596, "step": 5305 }, { "epoch": 0.6519228406438138, "grad_norm": 1.343400591106651, "learning_rate": 9.7754393785361e-06, "loss": 0.49, "step": 5306 }, { "epoch": 0.6520457058606709, "grad_norm": 1.5138638230448436, "learning_rate": 9.76940940626449e-06, "loss": 0.6209, "step": 5307 }, { "epoch": 0.6521685710775279, "grad_norm": 1.8257578812721513, "learning_rate": 9.763380396126099e-06, "loss": 0.7134, "step": 5308 }, { "epoch": 0.652291436294385, "grad_norm": 1.357932636314792, "learning_rate": 9.757352349229922e-06, "loss": 0.5503, "step": 5309 }, { "epoch": 0.6524143015112421, "grad_norm": 1.126004277996551, "learning_rate": 9.751325266684775e-06, "loss": 0.6587, "step": 5310 }, { "epoch": 0.6525371667280992, "grad_norm": 1.1878368656326732, "learning_rate": 9.745299149599314e-06, "loss": 0.5062, "step": 5311 }, { "epoch": 0.6526600319449564, "grad_norm": 1.1762375383952635, "learning_rate": 9.739273999081995e-06, "loss": 0.6739, "step": 5312 }, { "epoch": 0.6527828971618135, "grad_norm": 1.3154255359819806, "learning_rate": 9.733249816241108e-06, "loss": 0.6831, "step": 5313 }, { "epoch": 0.6529057623786706, "grad_norm": 1.4174464647486962, "learning_rate": 9.727226602184759e-06, "loss": 0.591, "step": 5314 }, { "epoch": 0.6530286275955277, "grad_norm": 1.1440365847912402, "learning_rate": 9.721204358020881e-06, "loss": 0.7112, "step": 5315 }, { "epoch": 0.6531514928123848, "grad_norm": 1.1983939914460777, "learning_rate": 9.71518308485723e-06, "loss": 0.558, "step": 5316 }, { "epoch": 0.6532743580292419, "grad_norm": 1.103190381300676, "learning_rate": 9.709162783801375e-06, "loss": 0.596, "step": 5317 }, { "epoch": 0.6533972232460991, "grad_norm": 1.1499280231792122, "learning_rate": 9.70314345596071e-06, "loss": 0.6621, "step": 5318 }, { "epoch": 0.6535200884629562, "grad_norm": 1.5621353021446582, "learning_rate": 9.697125102442461e-06, "loss": 0.6369, "step": 5319 }, { "epoch": 0.6536429536798133, "grad_norm": 1.142416765849412, "learning_rate": 9.691107724353656e-06, "loss": 0.5158, "step": 5320 }, { "epoch": 0.6537658188966704, "grad_norm": 1.287384331614206, "learning_rate": 9.685091322801155e-06, "loss": 0.7789, "step": 5321 }, { "epoch": 0.6538886841135274, "grad_norm": 1.3185768684216863, "learning_rate": 9.67907589889164e-06, "loss": 0.6683, "step": 5322 }, { "epoch": 0.6540115493303845, "grad_norm": 1.2779193528575943, "learning_rate": 9.673061453731605e-06, "loss": 0.5657, "step": 5323 }, { "epoch": 0.6541344145472416, "grad_norm": 1.2853073858552113, "learning_rate": 9.66704798842737e-06, "loss": 0.584, "step": 5324 }, { "epoch": 0.6542572797640988, "grad_norm": 1.2493455160686304, "learning_rate": 9.661035504085065e-06, "loss": 0.5511, "step": 5325 }, { "epoch": 0.6543801449809559, "grad_norm": 1.2508294648588896, "learning_rate": 9.655024001810662e-06, "loss": 0.5959, "step": 5326 }, { "epoch": 0.654503010197813, "grad_norm": 1.2236164578141877, "learning_rate": 9.64901348270993e-06, "loss": 0.534, "step": 5327 }, { "epoch": 0.6546258754146701, "grad_norm": 1.3133118623805917, "learning_rate": 9.643003947888465e-06, "loss": 0.6851, "step": 5328 }, { "epoch": 0.6547487406315272, "grad_norm": 1.1594122077009854, "learning_rate": 9.636995398451677e-06, "loss": 0.7326, "step": 5329 }, { "epoch": 0.6548716058483843, "grad_norm": 1.217171023238275, "learning_rate": 9.630987835504811e-06, "loss": 0.5441, "step": 5330 }, { "epoch": 0.6549944710652414, "grad_norm": 1.4117247534004862, "learning_rate": 9.624981260152914e-06, "loss": 0.6259, "step": 5331 }, { "epoch": 0.6551173362820986, "grad_norm": 2.001120900883098, "learning_rate": 9.618975673500856e-06, "loss": 0.7654, "step": 5332 }, { "epoch": 0.6552402014989557, "grad_norm": 1.3672219950205813, "learning_rate": 9.61297107665332e-06, "loss": 0.686, "step": 5333 }, { "epoch": 0.6553630667158128, "grad_norm": 1.2726882156130843, "learning_rate": 9.606967470714826e-06, "loss": 0.5582, "step": 5334 }, { "epoch": 0.6554859319326699, "grad_norm": 1.1354256229893647, "learning_rate": 9.600964856789688e-06, "loss": 0.7643, "step": 5335 }, { "epoch": 0.655608797149527, "grad_norm": 1.2215005791144686, "learning_rate": 9.59496323598205e-06, "loss": 0.4949, "step": 5336 }, { "epoch": 0.655731662366384, "grad_norm": 1.0541478514378368, "learning_rate": 9.588962609395867e-06, "loss": 0.7002, "step": 5337 }, { "epoch": 0.6558545275832411, "grad_norm": 1.465652481026434, "learning_rate": 9.582962978134924e-06, "loss": 0.6266, "step": 5338 }, { "epoch": 0.6559773928000983, "grad_norm": 1.0962883641339438, "learning_rate": 9.576964343302812e-06, "loss": 0.5217, "step": 5339 }, { "epoch": 0.6561002580169554, "grad_norm": 1.193801360057326, "learning_rate": 9.570966706002941e-06, "loss": 0.6089, "step": 5340 }, { "epoch": 0.6562231232338125, "grad_norm": 1.2277485101500245, "learning_rate": 9.564970067338532e-06, "loss": 0.499, "step": 5341 }, { "epoch": 0.6563459884506696, "grad_norm": 1.2180841616270914, "learning_rate": 9.558974428412634e-06, "loss": 0.541, "step": 5342 }, { "epoch": 0.6564688536675267, "grad_norm": 1.2314664392934864, "learning_rate": 9.552979790328105e-06, "loss": 0.7058, "step": 5343 }, { "epoch": 0.6565917188843838, "grad_norm": 1.228727883913474, "learning_rate": 9.54698615418761e-06, "loss": 0.5556, "step": 5344 }, { "epoch": 0.656714584101241, "grad_norm": 1.1654334981410415, "learning_rate": 9.540993521093654e-06, "loss": 0.6523, "step": 5345 }, { "epoch": 0.6568374493180981, "grad_norm": 1.1999711558691315, "learning_rate": 9.535001892148538e-06, "loss": 0.5968, "step": 5346 }, { "epoch": 0.6569603145349552, "grad_norm": 1.2870370400066877, "learning_rate": 9.529011268454384e-06, "loss": 0.5379, "step": 5347 }, { "epoch": 0.6570831797518123, "grad_norm": 1.2023577931237242, "learning_rate": 9.523021651113118e-06, "loss": 0.6441, "step": 5348 }, { "epoch": 0.6572060449686694, "grad_norm": 1.0609645248971382, "learning_rate": 9.517033041226506e-06, "loss": 0.6309, "step": 5349 }, { "epoch": 0.6573289101855265, "grad_norm": 1.4880148474394845, "learning_rate": 9.51104543989611e-06, "loss": 0.5704, "step": 5350 }, { "epoch": 0.6574517754023835, "grad_norm": 1.0767347327570003, "learning_rate": 9.505058848223306e-06, "loss": 0.6691, "step": 5351 }, { "epoch": 0.6575746406192406, "grad_norm": 1.307202023110517, "learning_rate": 9.49907326730929e-06, "loss": 0.6625, "step": 5352 }, { "epoch": 0.6576975058360978, "grad_norm": 1.048024084907999, "learning_rate": 9.49308869825507e-06, "loss": 0.6016, "step": 5353 }, { "epoch": 0.6578203710529549, "grad_norm": 1.1587684366783015, "learning_rate": 9.487105142161475e-06, "loss": 0.5388, "step": 5354 }, { "epoch": 0.657943236269812, "grad_norm": 1.2228436495906432, "learning_rate": 9.481122600129137e-06, "loss": 0.5212, "step": 5355 }, { "epoch": 0.6580661014866691, "grad_norm": 1.1490678571680035, "learning_rate": 9.475141073258498e-06, "loss": 0.5049, "step": 5356 }, { "epoch": 0.6581889667035262, "grad_norm": 1.3421421990973672, "learning_rate": 9.469160562649832e-06, "loss": 0.766, "step": 5357 }, { "epoch": 0.6583118319203833, "grad_norm": 1.2023039626992391, "learning_rate": 9.463181069403216e-06, "loss": 0.5594, "step": 5358 }, { "epoch": 0.6584346971372405, "grad_norm": 1.264128815449993, "learning_rate": 9.457202594618532e-06, "loss": 0.5399, "step": 5359 }, { "epoch": 0.6585575623540976, "grad_norm": 1.0302714376500743, "learning_rate": 9.451225139395482e-06, "loss": 0.5711, "step": 5360 }, { "epoch": 0.6586804275709547, "grad_norm": 1.476872702635223, "learning_rate": 9.445248704833587e-06, "loss": 0.5419, "step": 5361 }, { "epoch": 0.6588032927878118, "grad_norm": 1.26631484840403, "learning_rate": 9.439273292032168e-06, "loss": 0.7485, "step": 5362 }, { "epoch": 0.6589261580046689, "grad_norm": 1.2814609963682926, "learning_rate": 9.43329890209036e-06, "loss": 0.5972, "step": 5363 }, { "epoch": 0.659049023221526, "grad_norm": 1.1596355018970608, "learning_rate": 9.42732553610712e-06, "loss": 0.5386, "step": 5364 }, { "epoch": 0.6591718884383831, "grad_norm": 1.1325534435231255, "learning_rate": 9.42135319518121e-06, "loss": 0.6117, "step": 5365 }, { "epoch": 0.6592947536552402, "grad_norm": 1.562270049191132, "learning_rate": 9.4153818804112e-06, "loss": 0.6685, "step": 5366 }, { "epoch": 0.6594176188720973, "grad_norm": 1.1063936691464893, "learning_rate": 9.409411592895469e-06, "loss": 0.69, "step": 5367 }, { "epoch": 0.6595404840889544, "grad_norm": 1.3756707219684667, "learning_rate": 9.403442333732227e-06, "loss": 0.637, "step": 5368 }, { "epoch": 0.6596633493058115, "grad_norm": 1.1309852628932937, "learning_rate": 9.397474104019471e-06, "loss": 0.5289, "step": 5369 }, { "epoch": 0.6597862145226686, "grad_norm": 1.3092417761333737, "learning_rate": 9.391506904855022e-06, "loss": 0.4882, "step": 5370 }, { "epoch": 0.6599090797395257, "grad_norm": 1.1447562042080837, "learning_rate": 9.385540737336502e-06, "loss": 0.6453, "step": 5371 }, { "epoch": 0.6600319449563828, "grad_norm": 1.1488727688198235, "learning_rate": 9.379575602561355e-06, "loss": 0.634, "step": 5372 }, { "epoch": 0.66015481017324, "grad_norm": 1.3106853827476563, "learning_rate": 9.373611501626826e-06, "loss": 0.5356, "step": 5373 }, { "epoch": 0.6602776753900971, "grad_norm": 1.3234032757337477, "learning_rate": 9.367648435629973e-06, "loss": 0.6864, "step": 5374 }, { "epoch": 0.6604005406069542, "grad_norm": 1.2746991808922827, "learning_rate": 9.361686405667657e-06, "loss": 0.6179, "step": 5375 }, { "epoch": 0.6605234058238113, "grad_norm": 1.3999372429350039, "learning_rate": 9.355725412836565e-06, "loss": 0.6541, "step": 5376 }, { "epoch": 0.6606462710406684, "grad_norm": 1.191222028739644, "learning_rate": 9.349765458233182e-06, "loss": 0.5608, "step": 5377 }, { "epoch": 0.6607691362575255, "grad_norm": 1.1355170343186067, "learning_rate": 9.343806542953798e-06, "loss": 0.6242, "step": 5378 }, { "epoch": 0.6608920014743827, "grad_norm": 0.9866164644610046, "learning_rate": 9.337848668094517e-06, "loss": 0.5584, "step": 5379 }, { "epoch": 0.6610148666912397, "grad_norm": 1.6435242941303976, "learning_rate": 9.331891834751254e-06, "loss": 0.6659, "step": 5380 }, { "epoch": 0.6611377319080968, "grad_norm": 1.289472696632637, "learning_rate": 9.32593604401973e-06, "loss": 0.5339, "step": 5381 }, { "epoch": 0.6612605971249539, "grad_norm": 1.2474689524451454, "learning_rate": 9.319981296995474e-06, "loss": 0.4602, "step": 5382 }, { "epoch": 0.661383462341811, "grad_norm": 1.2936556684083684, "learning_rate": 9.314027594773816e-06, "loss": 0.5094, "step": 5383 }, { "epoch": 0.6615063275586681, "grad_norm": 1.2774031140397493, "learning_rate": 9.308074938449914e-06, "loss": 0.5824, "step": 5384 }, { "epoch": 0.6616291927755252, "grad_norm": 1.4407886352203754, "learning_rate": 9.302123329118712e-06, "loss": 0.5348, "step": 5385 }, { "epoch": 0.6617520579923823, "grad_norm": 1.485692404116567, "learning_rate": 9.296172767874966e-06, "loss": 0.6685, "step": 5386 }, { "epoch": 0.6618749232092395, "grad_norm": 1.1557099975458156, "learning_rate": 9.290223255813256e-06, "loss": 0.5276, "step": 5387 }, { "epoch": 0.6619977884260966, "grad_norm": 1.139284771303178, "learning_rate": 9.284274794027947e-06, "loss": 0.6, "step": 5388 }, { "epoch": 0.6621206536429537, "grad_norm": 1.2846014433444866, "learning_rate": 9.278327383613224e-06, "loss": 0.5227, "step": 5389 }, { "epoch": 0.6622435188598108, "grad_norm": 1.114264627324994, "learning_rate": 9.272381025663068e-06, "loss": 0.5179, "step": 5390 }, { "epoch": 0.6623663840766679, "grad_norm": 1.3627667365033898, "learning_rate": 9.26643572127128e-06, "loss": 0.6022, "step": 5391 }, { "epoch": 0.662489249293525, "grad_norm": 1.055190288150068, "learning_rate": 9.260491471531459e-06, "loss": 0.5065, "step": 5392 }, { "epoch": 0.6626121145103822, "grad_norm": 1.0569602707736216, "learning_rate": 9.254548277537008e-06, "loss": 0.4378, "step": 5393 }, { "epoch": 0.6627349797272393, "grad_norm": 1.49087855437183, "learning_rate": 9.248606140381135e-06, "loss": 0.4596, "step": 5394 }, { "epoch": 0.6628578449440963, "grad_norm": 1.3196504211542666, "learning_rate": 9.242665061156871e-06, "loss": 0.6815, "step": 5395 }, { "epoch": 0.6629807101609534, "grad_norm": 1.0984101601837264, "learning_rate": 9.236725040957032e-06, "loss": 0.571, "step": 5396 }, { "epoch": 0.6631035753778105, "grad_norm": 1.077449267281618, "learning_rate": 9.230786080874243e-06, "loss": 0.505, "step": 5397 }, { "epoch": 0.6632264405946676, "grad_norm": 1.213793907955046, "learning_rate": 9.224848182000937e-06, "loss": 0.5252, "step": 5398 }, { "epoch": 0.6633493058115247, "grad_norm": 1.4378149659779227, "learning_rate": 9.21891134542936e-06, "loss": 0.5563, "step": 5399 }, { "epoch": 0.6634721710283819, "grad_norm": 0.9751296963284363, "learning_rate": 9.212975572251547e-06, "loss": 0.6215, "step": 5400 }, { "epoch": 0.663595036245239, "grad_norm": 1.4951367928944619, "learning_rate": 9.207040863559349e-06, "loss": 0.5374, "step": 5401 }, { "epoch": 0.6637179014620961, "grad_norm": 1.2108652053668667, "learning_rate": 9.201107220444407e-06, "loss": 0.5228, "step": 5402 }, { "epoch": 0.6638407666789532, "grad_norm": 1.1732498410045245, "learning_rate": 9.195174643998193e-06, "loss": 0.6343, "step": 5403 }, { "epoch": 0.6639636318958103, "grad_norm": 1.2948022279847706, "learning_rate": 9.189243135311957e-06, "loss": 0.6186, "step": 5404 }, { "epoch": 0.6640864971126674, "grad_norm": 1.4402870232333937, "learning_rate": 9.183312695476762e-06, "loss": 0.6093, "step": 5405 }, { "epoch": 0.6642093623295245, "grad_norm": 1.389453362639374, "learning_rate": 9.17738332558347e-06, "loss": 0.6265, "step": 5406 }, { "epoch": 0.6643322275463817, "grad_norm": 1.2936087231928903, "learning_rate": 9.171455026722757e-06, "loss": 0.5835, "step": 5407 }, { "epoch": 0.6644550927632388, "grad_norm": 1.2082915818582454, "learning_rate": 9.165527799985095e-06, "loss": 0.6917, "step": 5408 }, { "epoch": 0.6645779579800959, "grad_norm": 1.1547852039785829, "learning_rate": 9.159601646460752e-06, "loss": 0.6423, "step": 5409 }, { "epoch": 0.6647008231969529, "grad_norm": 1.1063935836555865, "learning_rate": 9.153676567239812e-06, "loss": 0.6318, "step": 5410 }, { "epoch": 0.66482368841381, "grad_norm": 1.2306009537175067, "learning_rate": 9.147752563412155e-06, "loss": 0.4776, "step": 5411 }, { "epoch": 0.6649465536306671, "grad_norm": 1.2109842315696715, "learning_rate": 9.141829636067458e-06, "loss": 0.5815, "step": 5412 }, { "epoch": 0.6650694188475242, "grad_norm": 1.2956606503671095, "learning_rate": 9.135907786295204e-06, "loss": 0.5081, "step": 5413 }, { "epoch": 0.6651922840643814, "grad_norm": 1.1521563306302667, "learning_rate": 9.129987015184687e-06, "loss": 0.706, "step": 5414 }, { "epoch": 0.6653151492812385, "grad_norm": 1.0816758133943258, "learning_rate": 9.124067323824993e-06, "loss": 0.6197, "step": 5415 }, { "epoch": 0.6654380144980956, "grad_norm": 1.1961976594312118, "learning_rate": 9.118148713305006e-06, "loss": 0.608, "step": 5416 }, { "epoch": 0.6655608797149527, "grad_norm": 1.1679838637887987, "learning_rate": 9.112231184713415e-06, "loss": 0.5025, "step": 5417 }, { "epoch": 0.6656837449318098, "grad_norm": 1.1105847892963847, "learning_rate": 9.106314739138718e-06, "loss": 0.5669, "step": 5418 }, { "epoch": 0.6658066101486669, "grad_norm": 1.1834194609770026, "learning_rate": 9.100399377669203e-06, "loss": 0.5884, "step": 5419 }, { "epoch": 0.665929475365524, "grad_norm": 1.400524371713395, "learning_rate": 9.09448510139296e-06, "loss": 0.568, "step": 5420 }, { "epoch": 0.6660523405823812, "grad_norm": 1.209159503677965, "learning_rate": 9.088571911397882e-06, "loss": 0.5615, "step": 5421 }, { "epoch": 0.6661752057992383, "grad_norm": 1.1652976737596819, "learning_rate": 9.082659808771666e-06, "loss": 0.5745, "step": 5422 }, { "epoch": 0.6662980710160954, "grad_norm": 1.3699659286161283, "learning_rate": 9.076748794601803e-06, "loss": 0.4894, "step": 5423 }, { "epoch": 0.6664209362329524, "grad_norm": 1.3575808020092126, "learning_rate": 9.070838869975587e-06, "loss": 0.7357, "step": 5424 }, { "epoch": 0.6665438014498095, "grad_norm": 1.1252436867469335, "learning_rate": 9.064930035980104e-06, "loss": 0.6039, "step": 5425 }, { "epoch": 0.6666666666666666, "grad_norm": 1.3042482167348968, "learning_rate": 9.059022293702257e-06, "loss": 0.5965, "step": 5426 }, { "epoch": 0.6667895318835237, "grad_norm": 1.122768962709427, "learning_rate": 9.053115644228729e-06, "loss": 0.5935, "step": 5427 }, { "epoch": 0.6669123971003809, "grad_norm": 1.9934774910091, "learning_rate": 9.047210088646005e-06, "loss": 0.6156, "step": 5428 }, { "epoch": 0.667035262317238, "grad_norm": 1.303768673261939, "learning_rate": 9.04130562804039e-06, "loss": 0.5988, "step": 5429 }, { "epoch": 0.6671581275340951, "grad_norm": 1.3457535097062097, "learning_rate": 9.035402263497956e-06, "loss": 0.5987, "step": 5430 }, { "epoch": 0.6672809927509522, "grad_norm": 1.1191112661120572, "learning_rate": 9.029499996104594e-06, "loss": 0.5536, "step": 5431 }, { "epoch": 0.6674038579678093, "grad_norm": 1.386817838672882, "learning_rate": 9.023598826945983e-06, "loss": 0.669, "step": 5432 }, { "epoch": 0.6675267231846664, "grad_norm": 1.1795479657420052, "learning_rate": 9.017698757107618e-06, "loss": 0.5726, "step": 5433 }, { "epoch": 0.6676495884015236, "grad_norm": 1.3494724693097804, "learning_rate": 9.011799787674767e-06, "loss": 0.5632, "step": 5434 }, { "epoch": 0.6677724536183807, "grad_norm": 1.167954993697105, "learning_rate": 9.00590191973251e-06, "loss": 0.672, "step": 5435 }, { "epoch": 0.6678953188352378, "grad_norm": 1.4644311999685764, "learning_rate": 9.00000515436572e-06, "loss": 0.5559, "step": 5436 }, { "epoch": 0.6680181840520949, "grad_norm": 1.081215350980237, "learning_rate": 8.994109492659072e-06, "loss": 0.4664, "step": 5437 }, { "epoch": 0.668141049268952, "grad_norm": 1.0673320155500754, "learning_rate": 8.988214935697036e-06, "loss": 0.5612, "step": 5438 }, { "epoch": 0.668263914485809, "grad_norm": 1.2120820358769335, "learning_rate": 8.982321484563872e-06, "loss": 0.4739, "step": 5439 }, { "epoch": 0.6683867797026661, "grad_norm": 1.355342133240704, "learning_rate": 8.976429140343639e-06, "loss": 0.6064, "step": 5440 }, { "epoch": 0.6685096449195232, "grad_norm": 1.1262678856199464, "learning_rate": 8.970537904120211e-06, "loss": 0.6567, "step": 5441 }, { "epoch": 0.6686325101363804, "grad_norm": 1.1410304089779084, "learning_rate": 8.96464777697723e-06, "loss": 0.5707, "step": 5442 }, { "epoch": 0.6687553753532375, "grad_norm": 1.2759615966077273, "learning_rate": 8.95875875999815e-06, "loss": 0.6248, "step": 5443 }, { "epoch": 0.6688782405700946, "grad_norm": 1.1092249027774848, "learning_rate": 8.952870854266214e-06, "loss": 0.537, "step": 5444 }, { "epoch": 0.6690011057869517, "grad_norm": 1.3107109402210457, "learning_rate": 8.946984060864471e-06, "loss": 0.6066, "step": 5445 }, { "epoch": 0.6691239710038088, "grad_norm": 1.1387502955950397, "learning_rate": 8.941098380875754e-06, "loss": 0.6675, "step": 5446 }, { "epoch": 0.6692468362206659, "grad_norm": 1.301273981193034, "learning_rate": 8.935213815382698e-06, "loss": 0.5176, "step": 5447 }, { "epoch": 0.6693697014375231, "grad_norm": 1.3812533932021833, "learning_rate": 8.929330365467722e-06, "loss": 0.5594, "step": 5448 }, { "epoch": 0.6694925666543802, "grad_norm": 1.1942232795292123, "learning_rate": 8.923448032213062e-06, "loss": 0.5626, "step": 5449 }, { "epoch": 0.6696154318712373, "grad_norm": 1.232252205130166, "learning_rate": 8.917566816700729e-06, "loss": 0.5891, "step": 5450 }, { "epoch": 0.6697382970880944, "grad_norm": 1.1350519775079593, "learning_rate": 8.911686720012527e-06, "loss": 0.5841, "step": 5451 }, { "epoch": 0.6698611623049515, "grad_norm": 1.3022207139822306, "learning_rate": 8.905807743230075e-06, "loss": 0.695, "step": 5452 }, { "epoch": 0.6699840275218085, "grad_norm": 0.9273241252514355, "learning_rate": 8.899929887434767e-06, "loss": 0.4556, "step": 5453 }, { "epoch": 0.6701068927386656, "grad_norm": 1.356155548395237, "learning_rate": 8.894053153707798e-06, "loss": 0.5184, "step": 5454 }, { "epoch": 0.6702297579555228, "grad_norm": 1.256758526351901, "learning_rate": 8.888177543130144e-06, "loss": 0.5238, "step": 5455 }, { "epoch": 0.6703526231723799, "grad_norm": 1.2394370149420395, "learning_rate": 8.882303056782603e-06, "loss": 0.56, "step": 5456 }, { "epoch": 0.670475488389237, "grad_norm": 1.3273126706571647, "learning_rate": 8.876429695745739e-06, "loss": 0.6606, "step": 5457 }, { "epoch": 0.6705983536060941, "grad_norm": 1.2528763817866215, "learning_rate": 8.870557461099917e-06, "loss": 0.5407, "step": 5458 }, { "epoch": 0.6707212188229512, "grad_norm": 1.3352729395421492, "learning_rate": 8.864686353925295e-06, "loss": 0.5726, "step": 5459 }, { "epoch": 0.6708440840398083, "grad_norm": 1.293680801362028, "learning_rate": 8.858816375301836e-06, "loss": 0.4985, "step": 5460 }, { "epoch": 0.6709669492566654, "grad_norm": 1.0662788707630826, "learning_rate": 8.852947526309278e-06, "loss": 0.5809, "step": 5461 }, { "epoch": 0.6710898144735226, "grad_norm": 1.153598010778161, "learning_rate": 8.847079808027156e-06, "loss": 0.6293, "step": 5462 }, { "epoch": 0.6712126796903797, "grad_norm": 1.1362262382995019, "learning_rate": 8.841213221534798e-06, "loss": 0.6192, "step": 5463 }, { "epoch": 0.6713355449072368, "grad_norm": 1.1157536209690637, "learning_rate": 8.835347767911329e-06, "loss": 0.5541, "step": 5464 }, { "epoch": 0.6714584101240939, "grad_norm": 1.1504621027864248, "learning_rate": 8.829483448235659e-06, "loss": 0.5192, "step": 5465 }, { "epoch": 0.671581275340951, "grad_norm": 1.1759856056385154, "learning_rate": 8.823620263586493e-06, "loss": 0.591, "step": 5466 }, { "epoch": 0.6717041405578081, "grad_norm": 1.3999741737805917, "learning_rate": 8.817758215042316e-06, "loss": 0.5875, "step": 5467 }, { "epoch": 0.6718270057746651, "grad_norm": 1.4631762116027573, "learning_rate": 8.81189730368143e-06, "loss": 0.5893, "step": 5468 }, { "epoch": 0.6719498709915223, "grad_norm": 1.3062169732002271, "learning_rate": 8.806037530581904e-06, "loss": 0.5323, "step": 5469 }, { "epoch": 0.6720727362083794, "grad_norm": 1.038090937012699, "learning_rate": 8.800178896821597e-06, "loss": 0.6111, "step": 5470 }, { "epoch": 0.6721956014252365, "grad_norm": 1.4464513839744553, "learning_rate": 8.794321403478182e-06, "loss": 0.5131, "step": 5471 }, { "epoch": 0.6723184666420936, "grad_norm": 1.2818843260301203, "learning_rate": 8.788465051629101e-06, "loss": 0.5507, "step": 5472 }, { "epoch": 0.6724413318589507, "grad_norm": 1.0063043528163842, "learning_rate": 8.782609842351587e-06, "loss": 0.5657, "step": 5473 }, { "epoch": 0.6725641970758078, "grad_norm": 1.2237530110480506, "learning_rate": 8.77675577672267e-06, "loss": 0.6172, "step": 5474 }, { "epoch": 0.672687062292665, "grad_norm": 1.6325690823511276, "learning_rate": 8.770902855819174e-06, "loss": 0.7306, "step": 5475 }, { "epoch": 0.6728099275095221, "grad_norm": 1.122716475378354, "learning_rate": 8.765051080717696e-06, "loss": 0.5497, "step": 5476 }, { "epoch": 0.6729327927263792, "grad_norm": 1.0929586403750307, "learning_rate": 8.75920045249464e-06, "loss": 0.5477, "step": 5477 }, { "epoch": 0.6730556579432363, "grad_norm": 1.104066995462852, "learning_rate": 8.75335097222618e-06, "loss": 0.5666, "step": 5478 }, { "epoch": 0.6731785231600934, "grad_norm": 1.3603928357760826, "learning_rate": 8.74750264098831e-06, "loss": 0.6951, "step": 5479 }, { "epoch": 0.6733013883769505, "grad_norm": 1.191752585859024, "learning_rate": 8.74165545985677e-06, "loss": 0.6171, "step": 5480 }, { "epoch": 0.6734242535938076, "grad_norm": 1.1956443640015022, "learning_rate": 8.73580942990713e-06, "loss": 0.6082, "step": 5481 }, { "epoch": 0.6735471188106646, "grad_norm": 1.6400869581029824, "learning_rate": 8.729964552214708e-06, "loss": 0.5369, "step": 5482 }, { "epoch": 0.6736699840275218, "grad_norm": 1.039769184010898, "learning_rate": 8.724120827854657e-06, "loss": 0.6166, "step": 5483 }, { "epoch": 0.6737928492443789, "grad_norm": 1.4290270239236416, "learning_rate": 8.718278257901872e-06, "loss": 0.6507, "step": 5484 }, { "epoch": 0.673915714461236, "grad_norm": 1.20287783400799, "learning_rate": 8.712436843431068e-06, "loss": 0.5226, "step": 5485 }, { "epoch": 0.6740385796780931, "grad_norm": 1.1769862999940464, "learning_rate": 8.70659658551672e-06, "loss": 0.546, "step": 5486 }, { "epoch": 0.6741614448949502, "grad_norm": 1.4146889874229311, "learning_rate": 8.700757485233126e-06, "loss": 0.6247, "step": 5487 }, { "epoch": 0.6742843101118073, "grad_norm": 0.9900874639645196, "learning_rate": 8.694919543654337e-06, "loss": 0.5139, "step": 5488 }, { "epoch": 0.6744071753286645, "grad_norm": 1.1892280813901905, "learning_rate": 8.689082761854213e-06, "loss": 0.6207, "step": 5489 }, { "epoch": 0.6745300405455216, "grad_norm": 1.3221558244849385, "learning_rate": 8.683247140906382e-06, "loss": 0.5785, "step": 5490 }, { "epoch": 0.6746529057623787, "grad_norm": 1.200893578318306, "learning_rate": 8.677412681884273e-06, "loss": 0.5756, "step": 5491 }, { "epoch": 0.6747757709792358, "grad_norm": 1.2019424888020496, "learning_rate": 8.671579385861105e-06, "loss": 0.6227, "step": 5492 }, { "epoch": 0.6748986361960929, "grad_norm": 1.345512818188321, "learning_rate": 8.665747253909855e-06, "loss": 0.5456, "step": 5493 }, { "epoch": 0.67502150141295, "grad_norm": 1.1983695490545931, "learning_rate": 8.659916287103329e-06, "loss": 0.6501, "step": 5494 }, { "epoch": 0.6751443666298071, "grad_norm": 1.5775270603329956, "learning_rate": 8.65408648651408e-06, "loss": 0.6643, "step": 5495 }, { "epoch": 0.6752672318466643, "grad_norm": 1.2710142579865846, "learning_rate": 8.648257853214474e-06, "loss": 0.5551, "step": 5496 }, { "epoch": 0.6753900970635213, "grad_norm": 1.3598773955389767, "learning_rate": 8.642430388276638e-06, "loss": 0.6102, "step": 5497 }, { "epoch": 0.6755129622803784, "grad_norm": 1.2568881776897751, "learning_rate": 8.6366040927725e-06, "loss": 0.5403, "step": 5498 }, { "epoch": 0.6756358274972355, "grad_norm": 1.080857358681369, "learning_rate": 8.630778967773777e-06, "loss": 0.7155, "step": 5499 }, { "epoch": 0.6757586927140926, "grad_norm": 1.3515546552574471, "learning_rate": 8.624955014351953e-06, "loss": 0.559, "step": 5500 }, { "epoch": 0.6758815579309497, "grad_norm": 1.1591632038766402, "learning_rate": 8.619132233578308e-06, "loss": 0.5517, "step": 5501 }, { "epoch": 0.6760044231478068, "grad_norm": 1.141116659641108, "learning_rate": 8.61331062652391e-06, "loss": 0.6355, "step": 5502 }, { "epoch": 0.676127288364664, "grad_norm": 1.138180326058647, "learning_rate": 8.607490194259606e-06, "loss": 0.564, "step": 5503 }, { "epoch": 0.6762501535815211, "grad_norm": 1.2228504948581862, "learning_rate": 8.60167093785602e-06, "loss": 0.5694, "step": 5504 }, { "epoch": 0.6763730187983782, "grad_norm": 1.2198642429638746, "learning_rate": 8.59585285838357e-06, "loss": 0.5719, "step": 5505 }, { "epoch": 0.6764958840152353, "grad_norm": 1.4840411341599562, "learning_rate": 8.590035956912461e-06, "loss": 0.606, "step": 5506 }, { "epoch": 0.6766187492320924, "grad_norm": 1.188612743520994, "learning_rate": 8.58422023451266e-06, "loss": 0.5476, "step": 5507 }, { "epoch": 0.6767416144489495, "grad_norm": 1.234020954089997, "learning_rate": 8.578405692253945e-06, "loss": 0.4883, "step": 5508 }, { "epoch": 0.6768644796658067, "grad_norm": 1.1491567658586752, "learning_rate": 8.572592331205849e-06, "loss": 0.5469, "step": 5509 }, { "epoch": 0.6769873448826638, "grad_norm": 1.5177614857178434, "learning_rate": 8.566780152437717e-06, "loss": 0.6416, "step": 5510 }, { "epoch": 0.6771102100995208, "grad_norm": 1.2471498210336696, "learning_rate": 8.560969157018655e-06, "loss": 0.5462, "step": 5511 }, { "epoch": 0.6772330753163779, "grad_norm": 1.5264952298764982, "learning_rate": 8.555159346017559e-06, "loss": 0.5744, "step": 5512 }, { "epoch": 0.677355940533235, "grad_norm": 1.1324340690621615, "learning_rate": 8.549350720503094e-06, "loss": 0.5191, "step": 5513 }, { "epoch": 0.6774788057500921, "grad_norm": 1.0421755327117062, "learning_rate": 8.543543281543745e-06, "loss": 0.4953, "step": 5514 }, { "epoch": 0.6776016709669492, "grad_norm": 1.463222438847887, "learning_rate": 8.537737030207728e-06, "loss": 0.5596, "step": 5515 }, { "epoch": 0.6777245361838063, "grad_norm": 1.2833605367939092, "learning_rate": 8.531931967563078e-06, "loss": 0.4573, "step": 5516 }, { "epoch": 0.6778474014006635, "grad_norm": 1.298967859654523, "learning_rate": 8.5261280946776e-06, "loss": 0.5422, "step": 5517 }, { "epoch": 0.6779702666175206, "grad_norm": 1.1025000833250829, "learning_rate": 8.520325412618868e-06, "loss": 0.6198, "step": 5518 }, { "epoch": 0.6780931318343777, "grad_norm": 1.189920006343685, "learning_rate": 8.514523922454263e-06, "loss": 0.5021, "step": 5519 }, { "epoch": 0.6782159970512348, "grad_norm": 1.165503409133728, "learning_rate": 8.508723625250907e-06, "loss": 0.608, "step": 5520 }, { "epoch": 0.6783388622680919, "grad_norm": 1.5275667204899208, "learning_rate": 8.502924522075757e-06, "loss": 0.5433, "step": 5521 }, { "epoch": 0.678461727484949, "grad_norm": 1.1290585154282597, "learning_rate": 8.4971266139955e-06, "loss": 0.6372, "step": 5522 }, { "epoch": 0.6785845927018062, "grad_norm": 1.095017104860675, "learning_rate": 8.491329902076635e-06, "loss": 0.5229, "step": 5523 }, { "epoch": 0.6787074579186633, "grad_norm": 0.9903107429495395, "learning_rate": 8.48553438738542e-06, "loss": 0.5756, "step": 5524 }, { "epoch": 0.6788303231355204, "grad_norm": 1.0684487916166505, "learning_rate": 8.479740070987904e-06, "loss": 0.4929, "step": 5525 }, { "epoch": 0.6789531883523774, "grad_norm": 1.3674926520025616, "learning_rate": 8.473946953949924e-06, "loss": 0.5404, "step": 5526 }, { "epoch": 0.6790760535692345, "grad_norm": 1.2398411357492116, "learning_rate": 8.468155037337072e-06, "loss": 0.5943, "step": 5527 }, { "epoch": 0.6791989187860916, "grad_norm": 1.206815087083274, "learning_rate": 8.462364322214742e-06, "loss": 0.4884, "step": 5528 }, { "epoch": 0.6793217840029487, "grad_norm": 1.0987973419464097, "learning_rate": 8.456574809648096e-06, "loss": 0.61, "step": 5529 }, { "epoch": 0.6794446492198059, "grad_norm": 1.1972945623907818, "learning_rate": 8.450786500702084e-06, "loss": 0.5951, "step": 5530 }, { "epoch": 0.679567514436663, "grad_norm": 1.4032252927207136, "learning_rate": 8.444999396441416e-06, "loss": 0.5459, "step": 5531 }, { "epoch": 0.6796903796535201, "grad_norm": 1.3397801117162873, "learning_rate": 8.439213497930598e-06, "loss": 0.5412, "step": 5532 }, { "epoch": 0.6798132448703772, "grad_norm": 1.0261508310976428, "learning_rate": 8.43342880623391e-06, "loss": 0.5465, "step": 5533 }, { "epoch": 0.6799361100872343, "grad_norm": 1.1639757900258374, "learning_rate": 8.427645322415412e-06, "loss": 0.6101, "step": 5534 }, { "epoch": 0.6800589753040914, "grad_norm": 1.3484777650577826, "learning_rate": 8.42186304753893e-06, "loss": 0.7159, "step": 5535 }, { "epoch": 0.6801818405209485, "grad_norm": 1.2010976202120776, "learning_rate": 8.41608198266808e-06, "loss": 0.5973, "step": 5536 }, { "epoch": 0.6803047057378057, "grad_norm": 1.1251202752306608, "learning_rate": 8.410302128866253e-06, "loss": 0.6645, "step": 5537 }, { "epoch": 0.6804275709546628, "grad_norm": 1.173138914413587, "learning_rate": 8.40452348719661e-06, "loss": 0.5466, "step": 5538 }, { "epoch": 0.6805504361715199, "grad_norm": 2.128310953377174, "learning_rate": 8.3987460587221e-06, "loss": 0.7182, "step": 5539 }, { "epoch": 0.680673301388377, "grad_norm": 1.1940472057762614, "learning_rate": 8.392969844505441e-06, "loss": 0.5298, "step": 5540 }, { "epoch": 0.680796166605234, "grad_norm": 1.0625782469786935, "learning_rate": 8.387194845609134e-06, "loss": 0.5711, "step": 5541 }, { "epoch": 0.6809190318220911, "grad_norm": 1.1260033162505472, "learning_rate": 8.381421063095447e-06, "loss": 0.574, "step": 5542 }, { "epoch": 0.6810418970389482, "grad_norm": 1.210223282494203, "learning_rate": 8.375648498026431e-06, "loss": 0.6931, "step": 5543 }, { "epoch": 0.6811647622558054, "grad_norm": 1.1803403885718766, "learning_rate": 8.36987715146392e-06, "loss": 0.584, "step": 5544 }, { "epoch": 0.6812876274726625, "grad_norm": 1.1048173550874438, "learning_rate": 8.364107024469502e-06, "loss": 0.5232, "step": 5545 }, { "epoch": 0.6814104926895196, "grad_norm": 1.5219922021722194, "learning_rate": 8.358338118104568e-06, "loss": 0.5957, "step": 5546 }, { "epoch": 0.6815333579063767, "grad_norm": 1.320818601571406, "learning_rate": 8.352570433430254e-06, "loss": 0.6868, "step": 5547 }, { "epoch": 0.6816562231232338, "grad_norm": 1.285050333704961, "learning_rate": 8.346803971507508e-06, "loss": 0.567, "step": 5548 }, { "epoch": 0.6817790883400909, "grad_norm": 1.2246286448129153, "learning_rate": 8.34103873339702e-06, "loss": 0.4756, "step": 5549 }, { "epoch": 0.681901953556948, "grad_norm": 1.1277562510119499, "learning_rate": 8.335274720159279e-06, "loss": 0.4999, "step": 5550 }, { "epoch": 0.6820248187738052, "grad_norm": 1.1962519854302565, "learning_rate": 8.329511932854517e-06, "loss": 0.5536, "step": 5551 }, { "epoch": 0.6821476839906623, "grad_norm": 1.0034817331854886, "learning_rate": 8.323750372542788e-06, "loss": 0.5846, "step": 5552 }, { "epoch": 0.6822705492075194, "grad_norm": 1.2922563017629383, "learning_rate": 8.317990040283876e-06, "loss": 0.559, "step": 5553 }, { "epoch": 0.6823934144243765, "grad_norm": 1.2073993075850846, "learning_rate": 8.312230937137365e-06, "loss": 0.4939, "step": 5554 }, { "epoch": 0.6825162796412335, "grad_norm": 1.4006924605366056, "learning_rate": 8.306473064162597e-06, "loss": 0.5976, "step": 5555 }, { "epoch": 0.6826391448580906, "grad_norm": 1.5388442969479064, "learning_rate": 8.300716422418699e-06, "loss": 0.6163, "step": 5556 }, { "epoch": 0.6827620100749477, "grad_norm": 1.1840074619920913, "learning_rate": 8.294961012964576e-06, "loss": 0.6908, "step": 5557 }, { "epoch": 0.6828848752918049, "grad_norm": 1.2176931324047124, "learning_rate": 8.289206836858879e-06, "loss": 0.5868, "step": 5558 }, { "epoch": 0.683007740508662, "grad_norm": 1.2865768154230015, "learning_rate": 8.283453895160075e-06, "loss": 0.6198, "step": 5559 }, { "epoch": 0.6831306057255191, "grad_norm": 1.439467324645684, "learning_rate": 8.277702188926363e-06, "loss": 0.5093, "step": 5560 }, { "epoch": 0.6832534709423762, "grad_norm": 1.7064875821353878, "learning_rate": 8.27195171921574e-06, "loss": 0.6985, "step": 5561 }, { "epoch": 0.6833763361592333, "grad_norm": 1.0512876481173994, "learning_rate": 8.266202487085964e-06, "loss": 0.5387, "step": 5562 }, { "epoch": 0.6834992013760904, "grad_norm": 1.3029182507694035, "learning_rate": 8.26045449359457e-06, "loss": 0.4758, "step": 5563 }, { "epoch": 0.6836220665929476, "grad_norm": 1.3361889910948663, "learning_rate": 8.25470773979887e-06, "loss": 0.5424, "step": 5564 }, { "epoch": 0.6837449318098047, "grad_norm": 1.4569326593928469, "learning_rate": 8.248962226755929e-06, "loss": 0.5452, "step": 5565 }, { "epoch": 0.6838677970266618, "grad_norm": 1.3843190197186312, "learning_rate": 8.243217955522605e-06, "loss": 0.6343, "step": 5566 }, { "epoch": 0.6839906622435189, "grad_norm": 1.2034589946111576, "learning_rate": 8.237474927155517e-06, "loss": 0.5918, "step": 5567 }, { "epoch": 0.684113527460376, "grad_norm": 1.1136777457382039, "learning_rate": 8.23173314271107e-06, "loss": 0.4681, "step": 5568 }, { "epoch": 0.6842363926772331, "grad_norm": 1.1192514033973737, "learning_rate": 8.225992603245408e-06, "loss": 0.6661, "step": 5569 }, { "epoch": 0.6843592578940901, "grad_norm": 1.3525239802951745, "learning_rate": 8.220253309814479e-06, "loss": 0.5483, "step": 5570 }, { "epoch": 0.6844821231109472, "grad_norm": 1.2447744865787258, "learning_rate": 8.214515263473983e-06, "loss": 0.5215, "step": 5571 }, { "epoch": 0.6846049883278044, "grad_norm": 1.0101377397556024, "learning_rate": 8.208778465279404e-06, "loss": 0.5404, "step": 5572 }, { "epoch": 0.6847278535446615, "grad_norm": 1.1733425202897443, "learning_rate": 8.203042916285977e-06, "loss": 0.5899, "step": 5573 }, { "epoch": 0.6848507187615186, "grad_norm": 1.3411169806208167, "learning_rate": 8.19730861754873e-06, "loss": 0.6826, "step": 5574 }, { "epoch": 0.6849735839783757, "grad_norm": 1.160720658895645, "learning_rate": 8.191575570122449e-06, "loss": 0.6242, "step": 5575 }, { "epoch": 0.6850964491952328, "grad_norm": 1.2230761480334023, "learning_rate": 8.185843775061682e-06, "loss": 0.4872, "step": 5576 }, { "epoch": 0.6852193144120899, "grad_norm": 1.1131783110638138, "learning_rate": 8.180113233420761e-06, "loss": 0.5275, "step": 5577 }, { "epoch": 0.6853421796289471, "grad_norm": 1.3779542346414455, "learning_rate": 8.174383946253783e-06, "loss": 0.6241, "step": 5578 }, { "epoch": 0.6854650448458042, "grad_norm": 1.2055960405366897, "learning_rate": 8.168655914614617e-06, "loss": 0.6009, "step": 5579 }, { "epoch": 0.6855879100626613, "grad_norm": 1.545296065187811, "learning_rate": 8.162929139556888e-06, "loss": 0.5998, "step": 5580 }, { "epoch": 0.6857107752795184, "grad_norm": 1.226098725673508, "learning_rate": 8.157203622134004e-06, "loss": 0.5529, "step": 5581 }, { "epoch": 0.6858336404963755, "grad_norm": 1.3681611977348092, "learning_rate": 8.151479363399143e-06, "loss": 0.7222, "step": 5582 }, { "epoch": 0.6859565057132326, "grad_norm": 0.9531265814707083, "learning_rate": 8.14575636440523e-06, "loss": 0.5812, "step": 5583 }, { "epoch": 0.6860793709300896, "grad_norm": 1.2123401020489009, "learning_rate": 8.14003462620499e-06, "loss": 0.545, "step": 5584 }, { "epoch": 0.6862022361469468, "grad_norm": 1.1502294049958293, "learning_rate": 8.134314149850882e-06, "loss": 0.5276, "step": 5585 }, { "epoch": 0.6863251013638039, "grad_norm": 1.0245394659924127, "learning_rate": 8.12859493639517e-06, "loss": 0.586, "step": 5586 }, { "epoch": 0.686447966580661, "grad_norm": 1.1218356256516622, "learning_rate": 8.122876986889853e-06, "loss": 0.6042, "step": 5587 }, { "epoch": 0.6865708317975181, "grad_norm": 1.1488006920021612, "learning_rate": 8.117160302386718e-06, "loss": 0.671, "step": 5588 }, { "epoch": 0.6866936970143752, "grad_norm": 1.4114610304726096, "learning_rate": 8.111444883937299e-06, "loss": 0.608, "step": 5589 }, { "epoch": 0.6868165622312323, "grad_norm": 1.6530615992788658, "learning_rate": 8.105730732592931e-06, "loss": 0.7546, "step": 5590 }, { "epoch": 0.6869394274480894, "grad_norm": 1.2015194981633994, "learning_rate": 8.100017849404677e-06, "loss": 0.5514, "step": 5591 }, { "epoch": 0.6870622926649466, "grad_norm": 1.1175556655429864, "learning_rate": 8.094306235423398e-06, "loss": 0.6526, "step": 5592 }, { "epoch": 0.6871851578818037, "grad_norm": 1.071196172916086, "learning_rate": 8.088595891699695e-06, "loss": 0.5027, "step": 5593 }, { "epoch": 0.6873080230986608, "grad_norm": 1.200813024421294, "learning_rate": 8.082886819283958e-06, "loss": 0.5855, "step": 5594 }, { "epoch": 0.6874308883155179, "grad_norm": 1.2887640013516184, "learning_rate": 8.077179019226335e-06, "loss": 0.6348, "step": 5595 }, { "epoch": 0.687553753532375, "grad_norm": 1.0971653060804116, "learning_rate": 8.07147249257673e-06, "loss": 0.5542, "step": 5596 }, { "epoch": 0.6876766187492321, "grad_norm": 1.414093053391415, "learning_rate": 8.06576724038483e-06, "loss": 0.4957, "step": 5597 }, { "epoch": 0.6877994839660893, "grad_norm": 1.0734360410474622, "learning_rate": 8.060063263700074e-06, "loss": 0.6226, "step": 5598 }, { "epoch": 0.6879223491829463, "grad_norm": 1.4798562839374585, "learning_rate": 8.054360563571678e-06, "loss": 0.5104, "step": 5599 }, { "epoch": 0.6880452143998034, "grad_norm": 1.0734530589256912, "learning_rate": 8.048659141048608e-06, "loss": 0.6523, "step": 5600 }, { "epoch": 0.6881680796166605, "grad_norm": 1.1912903484787905, "learning_rate": 8.042958997179608e-06, "loss": 0.6118, "step": 5601 }, { "epoch": 0.6882909448335176, "grad_norm": 1.2210127107145723, "learning_rate": 8.037260133013188e-06, "loss": 0.6108, "step": 5602 }, { "epoch": 0.6884138100503747, "grad_norm": 1.3002063813248594, "learning_rate": 8.031562549597606e-06, "loss": 0.5417, "step": 5603 }, { "epoch": 0.6885366752672318, "grad_norm": 1.280428016246187, "learning_rate": 8.025866247980902e-06, "loss": 0.5671, "step": 5604 }, { "epoch": 0.688659540484089, "grad_norm": 1.107851970800824, "learning_rate": 8.02017122921087e-06, "loss": 0.6289, "step": 5605 }, { "epoch": 0.6887824057009461, "grad_norm": 1.1487124316964399, "learning_rate": 8.014477494335082e-06, "loss": 0.6825, "step": 5606 }, { "epoch": 0.6889052709178032, "grad_norm": 1.1191892473667016, "learning_rate": 8.00878504440085e-06, "loss": 0.6639, "step": 5607 }, { "epoch": 0.6890281361346603, "grad_norm": 1.1693597619594591, "learning_rate": 8.00309388045527e-06, "loss": 0.5273, "step": 5608 }, { "epoch": 0.6891510013515174, "grad_norm": 1.6881721659848923, "learning_rate": 7.997404003545195e-06, "loss": 0.7388, "step": 5609 }, { "epoch": 0.6892738665683745, "grad_norm": 1.1408695147584327, "learning_rate": 7.991715414717246e-06, "loss": 0.6015, "step": 5610 }, { "epoch": 0.6893967317852316, "grad_norm": 1.1458268126484092, "learning_rate": 7.986028115017788e-06, "loss": 0.5406, "step": 5611 }, { "epoch": 0.6895195970020888, "grad_norm": 1.2282994310813244, "learning_rate": 7.980342105492973e-06, "loss": 0.5691, "step": 5612 }, { "epoch": 0.6896424622189458, "grad_norm": 1.5720000134221044, "learning_rate": 7.97465738718871e-06, "loss": 0.5657, "step": 5613 }, { "epoch": 0.6897653274358029, "grad_norm": 1.1929345407989702, "learning_rate": 7.968973961150653e-06, "loss": 0.6578, "step": 5614 }, { "epoch": 0.68988819265266, "grad_norm": 1.2889740515628836, "learning_rate": 7.963291828424242e-06, "loss": 0.4608, "step": 5615 }, { "epoch": 0.6900110578695171, "grad_norm": 1.3341800463658702, "learning_rate": 7.957610990054654e-06, "loss": 0.5959, "step": 5616 }, { "epoch": 0.6901339230863742, "grad_norm": 1.1426823184838397, "learning_rate": 7.951931447086864e-06, "loss": 0.5376, "step": 5617 }, { "epoch": 0.6902567883032313, "grad_norm": 1.1712215859178003, "learning_rate": 7.946253200565572e-06, "loss": 0.6444, "step": 5618 }, { "epoch": 0.6903796535200885, "grad_norm": 1.3190229299768048, "learning_rate": 7.940576251535264e-06, "loss": 0.6519, "step": 5619 }, { "epoch": 0.6905025187369456, "grad_norm": 1.149601381954899, "learning_rate": 7.934900601040165e-06, "loss": 0.5959, "step": 5620 }, { "epoch": 0.6906253839538027, "grad_norm": 1.0909571148045023, "learning_rate": 7.929226250124284e-06, "loss": 0.4734, "step": 5621 }, { "epoch": 0.6907482491706598, "grad_norm": 1.3306854707555564, "learning_rate": 7.923553199831384e-06, "loss": 0.5728, "step": 5622 }, { "epoch": 0.6908711143875169, "grad_norm": 1.4063838822256967, "learning_rate": 7.917881451204966e-06, "loss": 0.6308, "step": 5623 }, { "epoch": 0.690993979604374, "grad_norm": 1.3375804822080501, "learning_rate": 7.912211005288342e-06, "loss": 0.6496, "step": 5624 }, { "epoch": 0.6911168448212311, "grad_norm": 1.2387419715284353, "learning_rate": 7.906541863124529e-06, "loss": 0.48, "step": 5625 }, { "epoch": 0.6912397100380883, "grad_norm": 1.0661735329035589, "learning_rate": 7.900874025756344e-06, "loss": 0.5583, "step": 5626 }, { "epoch": 0.6913625752549454, "grad_norm": 1.4097554021465466, "learning_rate": 7.895207494226338e-06, "loss": 0.5354, "step": 5627 }, { "epoch": 0.6914854404718024, "grad_norm": 1.0698890036713267, "learning_rate": 7.889542269576836e-06, "loss": 0.6071, "step": 5628 }, { "epoch": 0.6916083056886595, "grad_norm": 1.1837792820203894, "learning_rate": 7.883878352849925e-06, "loss": 0.6039, "step": 5629 }, { "epoch": 0.6917311709055166, "grad_norm": 1.0125046671733573, "learning_rate": 7.878215745087438e-06, "loss": 0.5037, "step": 5630 }, { "epoch": 0.6918540361223737, "grad_norm": 1.1443607297834595, "learning_rate": 7.872554447330977e-06, "loss": 0.5463, "step": 5631 }, { "epoch": 0.6919769013392308, "grad_norm": 1.4492294499218195, "learning_rate": 7.866894460621903e-06, "loss": 0.7272, "step": 5632 }, { "epoch": 0.692099766556088, "grad_norm": 1.507517307972361, "learning_rate": 7.861235786001338e-06, "loss": 0.6818, "step": 5633 }, { "epoch": 0.6922226317729451, "grad_norm": 1.2658210319837337, "learning_rate": 7.855578424510146e-06, "loss": 0.4878, "step": 5634 }, { "epoch": 0.6923454969898022, "grad_norm": 1.1471634350272175, "learning_rate": 7.849922377188973e-06, "loss": 0.5784, "step": 5635 }, { "epoch": 0.6924683622066593, "grad_norm": 1.1054239611305727, "learning_rate": 7.844267645078209e-06, "loss": 0.6171, "step": 5636 }, { "epoch": 0.6925912274235164, "grad_norm": 1.3275223387186565, "learning_rate": 7.83861422921801e-06, "loss": 0.6005, "step": 5637 }, { "epoch": 0.6927140926403735, "grad_norm": 1.1113006065346165, "learning_rate": 7.832962130648273e-06, "loss": 0.5697, "step": 5638 }, { "epoch": 0.6928369578572307, "grad_norm": 1.3695552852779618, "learning_rate": 7.827311350408674e-06, "loss": 0.6664, "step": 5639 }, { "epoch": 0.6929598230740878, "grad_norm": 1.0935866027567274, "learning_rate": 7.821661889538641e-06, "loss": 0.5187, "step": 5640 }, { "epoch": 0.6930826882909449, "grad_norm": 1.1679051292258025, "learning_rate": 7.816013749077344e-06, "loss": 0.5086, "step": 5641 }, { "epoch": 0.6932055535078019, "grad_norm": 1.2047314097416144, "learning_rate": 7.810366930063729e-06, "loss": 0.6281, "step": 5642 }, { "epoch": 0.693328418724659, "grad_norm": 1.1345764296002423, "learning_rate": 7.80472143353649e-06, "loss": 0.5801, "step": 5643 }, { "epoch": 0.6934512839415161, "grad_norm": 1.0867907048973902, "learning_rate": 7.799077260534085e-06, "loss": 0.5474, "step": 5644 }, { "epoch": 0.6935741491583732, "grad_norm": 1.1866949359263121, "learning_rate": 7.793434412094714e-06, "loss": 0.4608, "step": 5645 }, { "epoch": 0.6936970143752303, "grad_norm": 1.0256636615153212, "learning_rate": 7.787792889256347e-06, "loss": 0.5405, "step": 5646 }, { "epoch": 0.6938198795920875, "grad_norm": 1.2885703812221854, "learning_rate": 7.782152693056711e-06, "loss": 0.6253, "step": 5647 }, { "epoch": 0.6939427448089446, "grad_norm": 1.3821879314000147, "learning_rate": 7.776513824533272e-06, "loss": 0.6429, "step": 5648 }, { "epoch": 0.6940656100258017, "grad_norm": 1.0736786957674584, "learning_rate": 7.770876284723272e-06, "loss": 0.6469, "step": 5649 }, { "epoch": 0.6941884752426588, "grad_norm": 1.1301913789275426, "learning_rate": 7.765240074663689e-06, "loss": 0.5696, "step": 5650 }, { "epoch": 0.6943113404595159, "grad_norm": 1.2614628451929968, "learning_rate": 7.759605195391285e-06, "loss": 0.5564, "step": 5651 }, { "epoch": 0.694434205676373, "grad_norm": 1.2454094822044728, "learning_rate": 7.753971647942543e-06, "loss": 0.5206, "step": 5652 }, { "epoch": 0.6945570708932302, "grad_norm": 1.2479422071068338, "learning_rate": 7.748339433353731e-06, "loss": 0.5159, "step": 5653 }, { "epoch": 0.6946799361100873, "grad_norm": 1.092935019326991, "learning_rate": 7.74270855266084e-06, "loss": 0.6027, "step": 5654 }, { "epoch": 0.6948028013269444, "grad_norm": 1.2107553984811215, "learning_rate": 7.737079006899658e-06, "loss": 0.5776, "step": 5655 }, { "epoch": 0.6949256665438015, "grad_norm": 1.2042640984228694, "learning_rate": 7.731450797105687e-06, "loss": 0.5394, "step": 5656 }, { "epoch": 0.6950485317606585, "grad_norm": 1.1758020142593968, "learning_rate": 7.725823924314203e-06, "loss": 0.545, "step": 5657 }, { "epoch": 0.6951713969775156, "grad_norm": 1.3310976546154771, "learning_rate": 7.720198389560233e-06, "loss": 0.6924, "step": 5658 }, { "epoch": 0.6952942621943727, "grad_norm": 1.0256778764811518, "learning_rate": 7.714574193878557e-06, "loss": 0.5266, "step": 5659 }, { "epoch": 0.6954171274112299, "grad_norm": 1.1561607465014199, "learning_rate": 7.708951338303715e-06, "loss": 0.5359, "step": 5660 }, { "epoch": 0.695539992628087, "grad_norm": 1.5261346983439317, "learning_rate": 7.703329823869987e-06, "loss": 0.5763, "step": 5661 }, { "epoch": 0.6956628578449441, "grad_norm": 1.3941145863416418, "learning_rate": 7.697709651611415e-06, "loss": 0.6436, "step": 5662 }, { "epoch": 0.6957857230618012, "grad_norm": 1.1354969693506516, "learning_rate": 7.692090822561796e-06, "loss": 0.5279, "step": 5663 }, { "epoch": 0.6959085882786583, "grad_norm": 1.2967865765940443, "learning_rate": 7.686473337754682e-06, "loss": 0.5861, "step": 5664 }, { "epoch": 0.6960314534955154, "grad_norm": 1.1111375791308593, "learning_rate": 7.680857198223364e-06, "loss": 0.6005, "step": 5665 }, { "epoch": 0.6961543187123725, "grad_norm": 1.2470828550503483, "learning_rate": 7.675242405000896e-06, "loss": 0.6591, "step": 5666 }, { "epoch": 0.6962771839292297, "grad_norm": 1.3823013567306917, "learning_rate": 7.66962895912009e-06, "loss": 0.5917, "step": 5667 }, { "epoch": 0.6964000491460868, "grad_norm": 1.1512067943788893, "learning_rate": 7.664016861613495e-06, "loss": 0.5543, "step": 5668 }, { "epoch": 0.6965229143629439, "grad_norm": 1.1769896891607714, "learning_rate": 7.65840611351342e-06, "loss": 0.4839, "step": 5669 }, { "epoch": 0.696645779579801, "grad_norm": 1.295673789436105, "learning_rate": 7.65279671585193e-06, "loss": 0.6511, "step": 5670 }, { "epoch": 0.6967686447966581, "grad_norm": 1.0905549751060228, "learning_rate": 7.647188669660842e-06, "loss": 0.511, "step": 5671 }, { "epoch": 0.6968915100135151, "grad_norm": 1.2038976994560286, "learning_rate": 7.641581975971705e-06, "loss": 0.6512, "step": 5672 }, { "epoch": 0.6970143752303722, "grad_norm": 1.3976966227036185, "learning_rate": 7.635976635815845e-06, "loss": 0.5718, "step": 5673 }, { "epoch": 0.6971372404472294, "grad_norm": 1.1285535705847112, "learning_rate": 7.630372650224326e-06, "loss": 0.5627, "step": 5674 }, { "epoch": 0.6972601056640865, "grad_norm": 1.272360168612306, "learning_rate": 7.624770020227968e-06, "loss": 0.6512, "step": 5675 }, { "epoch": 0.6973829708809436, "grad_norm": 1.2633200639653437, "learning_rate": 7.619168746857331e-06, "loss": 0.6078, "step": 5676 }, { "epoch": 0.6975058360978007, "grad_norm": 0.9865428383733277, "learning_rate": 7.6135688311427364e-06, "loss": 0.5448, "step": 5677 }, { "epoch": 0.6976287013146578, "grad_norm": 1.079019876695279, "learning_rate": 7.607970274114257e-06, "loss": 0.6267, "step": 5678 }, { "epoch": 0.6977515665315149, "grad_norm": 1.1987782607368782, "learning_rate": 7.602373076801701e-06, "loss": 0.611, "step": 5679 }, { "epoch": 0.697874431748372, "grad_norm": 1.250032472940683, "learning_rate": 7.596777240234649e-06, "loss": 0.5383, "step": 5680 }, { "epoch": 0.6979972969652292, "grad_norm": 1.273125630750744, "learning_rate": 7.5911827654424005e-06, "loss": 0.6805, "step": 5681 }, { "epoch": 0.6981201621820863, "grad_norm": 1.2105492561606177, "learning_rate": 7.585589653454045e-06, "loss": 0.7155, "step": 5682 }, { "epoch": 0.6982430273989434, "grad_norm": 1.25057798880049, "learning_rate": 7.579997905298382e-06, "loss": 0.5783, "step": 5683 }, { "epoch": 0.6983658926158005, "grad_norm": 1.4169305899390214, "learning_rate": 7.574407522003988e-06, "loss": 0.6321, "step": 5684 }, { "epoch": 0.6984887578326576, "grad_norm": 1.077743644185033, "learning_rate": 7.568818504599175e-06, "loss": 0.52, "step": 5685 }, { "epoch": 0.6986116230495146, "grad_norm": 1.2882541657759443, "learning_rate": 7.563230854112002e-06, "loss": 0.6798, "step": 5686 }, { "epoch": 0.6987344882663717, "grad_norm": 1.1307087000224498, "learning_rate": 7.557644571570289e-06, "loss": 0.5788, "step": 5687 }, { "epoch": 0.6988573534832289, "grad_norm": 1.0912600519475106, "learning_rate": 7.55205965800158e-06, "loss": 0.5221, "step": 5688 }, { "epoch": 0.698980218700086, "grad_norm": 1.3111601572461575, "learning_rate": 7.5464761144332074e-06, "loss": 0.544, "step": 5689 }, { "epoch": 0.6991030839169431, "grad_norm": 1.609823079301775, "learning_rate": 7.5408939418922095e-06, "loss": 0.7466, "step": 5690 }, { "epoch": 0.6992259491338002, "grad_norm": 1.161310037813083, "learning_rate": 7.5353131414054025e-06, "loss": 0.5927, "step": 5691 }, { "epoch": 0.6993488143506573, "grad_norm": 1.2913202148830971, "learning_rate": 7.529733713999323e-06, "loss": 0.5768, "step": 5692 }, { "epoch": 0.6994716795675144, "grad_norm": 1.1795172187584488, "learning_rate": 7.52415566070029e-06, "loss": 0.654, "step": 5693 }, { "epoch": 0.6995945447843716, "grad_norm": 1.1598207260919409, "learning_rate": 7.518578982534336e-06, "loss": 0.71, "step": 5694 }, { "epoch": 0.6997174100012287, "grad_norm": 1.1637059942482801, "learning_rate": 7.513003680527265e-06, "loss": 0.4501, "step": 5695 }, { "epoch": 0.6998402752180858, "grad_norm": 1.0356150086496085, "learning_rate": 7.507429755704606e-06, "loss": 0.5652, "step": 5696 }, { "epoch": 0.6999631404349429, "grad_norm": 1.3385933889742565, "learning_rate": 7.5018572090916526e-06, "loss": 0.5309, "step": 5697 }, { "epoch": 0.7000860056518, "grad_norm": 1.3891938941138404, "learning_rate": 7.496286041713444e-06, "loss": 0.5656, "step": 5698 }, { "epoch": 0.7002088708686571, "grad_norm": 1.162934908193538, "learning_rate": 7.490716254594751e-06, "loss": 0.6868, "step": 5699 }, { "epoch": 0.7003317360855142, "grad_norm": 1.425167790116577, "learning_rate": 7.485147848760102e-06, "loss": 0.6007, "step": 5700 }, { "epoch": 0.7004546013023712, "grad_norm": 1.3785772115628128, "learning_rate": 7.47958082523377e-06, "loss": 0.5679, "step": 5701 }, { "epoch": 0.7005774665192284, "grad_norm": 1.6163685702014767, "learning_rate": 7.47401518503978e-06, "loss": 0.5532, "step": 5702 }, { "epoch": 0.7007003317360855, "grad_norm": 1.296126857595835, "learning_rate": 7.468450929201882e-06, "loss": 0.6498, "step": 5703 }, { "epoch": 0.7008231969529426, "grad_norm": 1.4082813184421166, "learning_rate": 7.462888058743593e-06, "loss": 0.6941, "step": 5704 }, { "epoch": 0.7009460621697997, "grad_norm": 1.3587331734418637, "learning_rate": 7.457326574688172e-06, "loss": 0.5115, "step": 5705 }, { "epoch": 0.7010689273866568, "grad_norm": 1.0856302316312183, "learning_rate": 7.451766478058605e-06, "loss": 0.6778, "step": 5706 }, { "epoch": 0.7011917926035139, "grad_norm": 1.1182188276308027, "learning_rate": 7.446207769877642e-06, "loss": 0.5694, "step": 5707 }, { "epoch": 0.7013146578203711, "grad_norm": 1.274010209326199, "learning_rate": 7.440650451167772e-06, "loss": 0.5606, "step": 5708 }, { "epoch": 0.7014375230372282, "grad_norm": 1.3152318270549028, "learning_rate": 7.435094522951234e-06, "loss": 0.5273, "step": 5709 }, { "epoch": 0.7015603882540853, "grad_norm": 1.0797431714811438, "learning_rate": 7.429539986249992e-06, "loss": 0.6372, "step": 5710 }, { "epoch": 0.7016832534709424, "grad_norm": 1.480381388069687, "learning_rate": 7.423986842085774e-06, "loss": 0.5983, "step": 5711 }, { "epoch": 0.7018061186877995, "grad_norm": 1.3506365482903868, "learning_rate": 7.4184350914800435e-06, "loss": 0.6559, "step": 5712 }, { "epoch": 0.7019289839046566, "grad_norm": 1.266889336902524, "learning_rate": 7.412884735454016e-06, "loss": 0.5382, "step": 5713 }, { "epoch": 0.7020518491215137, "grad_norm": 1.1932786930559602, "learning_rate": 7.407335775028631e-06, "loss": 0.5765, "step": 5714 }, { "epoch": 0.7021747143383708, "grad_norm": 1.2348856560985757, "learning_rate": 7.401788211224589e-06, "loss": 0.6038, "step": 5715 }, { "epoch": 0.7022975795552279, "grad_norm": 1.2137744630007445, "learning_rate": 7.396242045062336e-06, "loss": 0.5517, "step": 5716 }, { "epoch": 0.702420444772085, "grad_norm": 1.2903470516336304, "learning_rate": 7.3906972775620415e-06, "loss": 0.5516, "step": 5717 }, { "epoch": 0.7025433099889421, "grad_norm": 1.3303982605415705, "learning_rate": 7.385153909743641e-06, "loss": 0.5255, "step": 5718 }, { "epoch": 0.7026661752057992, "grad_norm": 1.0508281893475862, "learning_rate": 7.3796119426267815e-06, "loss": 0.7319, "step": 5719 }, { "epoch": 0.7027890404226563, "grad_norm": 1.0458664995896987, "learning_rate": 7.374071377230898e-06, "loss": 0.534, "step": 5720 }, { "epoch": 0.7029119056395134, "grad_norm": 1.0855774192859997, "learning_rate": 7.3685322145751235e-06, "loss": 0.6294, "step": 5721 }, { "epoch": 0.7030347708563706, "grad_norm": 1.0757897668175076, "learning_rate": 7.36299445567836e-06, "loss": 0.6078, "step": 5722 }, { "epoch": 0.7031576360732277, "grad_norm": 1.1357546924440463, "learning_rate": 7.3574581015592355e-06, "loss": 0.6748, "step": 5723 }, { "epoch": 0.7032805012900848, "grad_norm": 1.1450350702581613, "learning_rate": 7.351923153236128e-06, "loss": 0.5483, "step": 5724 }, { "epoch": 0.7034033665069419, "grad_norm": 1.1949006761202412, "learning_rate": 7.346389611727163e-06, "loss": 0.544, "step": 5725 }, { "epoch": 0.703526231723799, "grad_norm": 1.348417226763605, "learning_rate": 7.340857478050183e-06, "loss": 0.648, "step": 5726 }, { "epoch": 0.7036490969406561, "grad_norm": 1.570794600174266, "learning_rate": 7.335326753222808e-06, "loss": 0.6631, "step": 5727 }, { "epoch": 0.7037719621575133, "grad_norm": 1.342669481166849, "learning_rate": 7.329797438262366e-06, "loss": 0.6576, "step": 5728 }, { "epoch": 0.7038948273743704, "grad_norm": 1.1490408673632881, "learning_rate": 7.324269534185947e-06, "loss": 0.5541, "step": 5729 }, { "epoch": 0.7040176925912274, "grad_norm": 1.1310173866604216, "learning_rate": 7.318743042010361e-06, "loss": 0.5324, "step": 5730 }, { "epoch": 0.7041405578080845, "grad_norm": 1.290339168145589, "learning_rate": 7.313217962752179e-06, "loss": 0.5413, "step": 5731 }, { "epoch": 0.7042634230249416, "grad_norm": 1.1947658790443367, "learning_rate": 7.307694297427704e-06, "loss": 0.6342, "step": 5732 }, { "epoch": 0.7043862882417987, "grad_norm": 1.3069075764237337, "learning_rate": 7.3021720470529794e-06, "loss": 0.5293, "step": 5733 }, { "epoch": 0.7045091534586558, "grad_norm": 1.4767806045932323, "learning_rate": 7.296651212643781e-06, "loss": 0.5251, "step": 5734 }, { "epoch": 0.704632018675513, "grad_norm": 1.08301307920495, "learning_rate": 7.291131795215632e-06, "loss": 0.6573, "step": 5735 }, { "epoch": 0.7047548838923701, "grad_norm": 1.418815346852245, "learning_rate": 7.285613795783803e-06, "loss": 0.5266, "step": 5736 }, { "epoch": 0.7048777491092272, "grad_norm": 1.060798681542825, "learning_rate": 7.28009721536328e-06, "loss": 0.5649, "step": 5737 }, { "epoch": 0.7050006143260843, "grad_norm": 1.4179499931732644, "learning_rate": 7.274582054968811e-06, "loss": 0.5327, "step": 5738 }, { "epoch": 0.7051234795429414, "grad_norm": 1.2006922451582343, "learning_rate": 7.2690683156148705e-06, "loss": 0.5705, "step": 5739 }, { "epoch": 0.7052463447597985, "grad_norm": 1.453856439986463, "learning_rate": 7.2635559983156825e-06, "loss": 0.5847, "step": 5740 }, { "epoch": 0.7053692099766556, "grad_norm": 1.2678633571549354, "learning_rate": 7.258045104085189e-06, "loss": 0.7074, "step": 5741 }, { "epoch": 0.7054920751935128, "grad_norm": 1.2830859278039677, "learning_rate": 7.252535633937092e-06, "loss": 0.6406, "step": 5742 }, { "epoch": 0.7056149404103699, "grad_norm": 1.5228588099144023, "learning_rate": 7.247027588884825e-06, "loss": 0.5874, "step": 5743 }, { "epoch": 0.7057378056272269, "grad_norm": 1.050519264222237, "learning_rate": 7.2415209699415485e-06, "loss": 0.6125, "step": 5744 }, { "epoch": 0.705860670844084, "grad_norm": 1.1074581223682745, "learning_rate": 7.23601577812018e-06, "loss": 0.6422, "step": 5745 }, { "epoch": 0.7059835360609411, "grad_norm": 1.054443322793296, "learning_rate": 7.2305120144333465e-06, "loss": 0.6069, "step": 5746 }, { "epoch": 0.7061064012777982, "grad_norm": 1.2150167351662573, "learning_rate": 7.225009679893452e-06, "loss": 0.6173, "step": 5747 }, { "epoch": 0.7062292664946553, "grad_norm": 1.119401951426301, "learning_rate": 7.2195087755125975e-06, "loss": 0.6253, "step": 5748 }, { "epoch": 0.7063521317115125, "grad_norm": 1.2784924724924924, "learning_rate": 7.214009302302648e-06, "loss": 0.6218, "step": 5749 }, { "epoch": 0.7064749969283696, "grad_norm": 1.3532266467638157, "learning_rate": 7.208511261275198e-06, "loss": 0.6088, "step": 5750 }, { "epoch": 0.7065978621452267, "grad_norm": 1.2192633630658183, "learning_rate": 7.203014653441567e-06, "loss": 0.5655, "step": 5751 }, { "epoch": 0.7067207273620838, "grad_norm": 1.3934715056550437, "learning_rate": 7.197519479812828e-06, "loss": 0.615, "step": 5752 }, { "epoch": 0.7068435925789409, "grad_norm": 1.314031226393348, "learning_rate": 7.192025741399771e-06, "loss": 0.5845, "step": 5753 }, { "epoch": 0.706966457795798, "grad_norm": 1.3724073012025795, "learning_rate": 7.186533439212953e-06, "loss": 0.5574, "step": 5754 }, { "epoch": 0.7070893230126551, "grad_norm": 1.127366029157803, "learning_rate": 7.181042574262633e-06, "loss": 0.4481, "step": 5755 }, { "epoch": 0.7072121882295123, "grad_norm": 1.3308792127641735, "learning_rate": 7.1755531475588265e-06, "loss": 0.5948, "step": 5756 }, { "epoch": 0.7073350534463694, "grad_norm": 1.3394849395710051, "learning_rate": 7.1700651601112646e-06, "loss": 0.6052, "step": 5757 }, { "epoch": 0.7074579186632265, "grad_norm": 1.2965800008465418, "learning_rate": 7.16457861292945e-06, "loss": 0.5313, "step": 5758 }, { "epoch": 0.7075807838800835, "grad_norm": 1.3562799875478142, "learning_rate": 7.159093507022579e-06, "loss": 0.5599, "step": 5759 }, { "epoch": 0.7077036490969406, "grad_norm": 1.201626183894177, "learning_rate": 7.153609843399613e-06, "loss": 0.5679, "step": 5760 }, { "epoch": 0.7078265143137977, "grad_norm": 1.8863322234032065, "learning_rate": 7.148127623069225e-06, "loss": 0.61, "step": 5761 }, { "epoch": 0.7079493795306548, "grad_norm": 1.160344671752403, "learning_rate": 7.14264684703984e-06, "loss": 0.7411, "step": 5762 }, { "epoch": 0.708072244747512, "grad_norm": 1.559916338736381, "learning_rate": 7.137167516319615e-06, "loss": 0.4664, "step": 5763 }, { "epoch": 0.7081951099643691, "grad_norm": 1.2994403490311055, "learning_rate": 7.131689631916427e-06, "loss": 0.4639, "step": 5764 }, { "epoch": 0.7083179751812262, "grad_norm": 1.1796104771453266, "learning_rate": 7.126213194837905e-06, "loss": 0.5722, "step": 5765 }, { "epoch": 0.7084408403980833, "grad_norm": 1.342407954801766, "learning_rate": 7.120738206091403e-06, "loss": 0.5607, "step": 5766 }, { "epoch": 0.7085637056149404, "grad_norm": 1.8618112928715833, "learning_rate": 7.115264666684013e-06, "loss": 0.7505, "step": 5767 }, { "epoch": 0.7086865708317975, "grad_norm": 1.2353491104981922, "learning_rate": 7.1097925776225495e-06, "loss": 0.5712, "step": 5768 }, { "epoch": 0.7088094360486547, "grad_norm": 1.1026642876623363, "learning_rate": 7.10432193991357e-06, "loss": 0.5352, "step": 5769 }, { "epoch": 0.7089323012655118, "grad_norm": 0.9995481099165657, "learning_rate": 7.098852754563371e-06, "loss": 0.5693, "step": 5770 }, { "epoch": 0.7090551664823689, "grad_norm": 1.3572916016518013, "learning_rate": 7.09338502257796e-06, "loss": 0.6754, "step": 5771 }, { "epoch": 0.709178031699226, "grad_norm": 1.6833130537807133, "learning_rate": 7.0879187449631e-06, "loss": 0.7323, "step": 5772 }, { "epoch": 0.7093008969160831, "grad_norm": 1.1190422922049048, "learning_rate": 7.082453922724275e-06, "loss": 0.529, "step": 5773 }, { "epoch": 0.7094237621329401, "grad_norm": 1.4662071365643152, "learning_rate": 7.076990556866708e-06, "loss": 0.5821, "step": 5774 }, { "epoch": 0.7095466273497972, "grad_norm": 1.5187844540061315, "learning_rate": 7.0715286483953405e-06, "loss": 0.7054, "step": 5775 }, { "epoch": 0.7096694925666543, "grad_norm": 1.4415135956915555, "learning_rate": 7.06606819831486e-06, "loss": 0.7331, "step": 5776 }, { "epoch": 0.7097923577835115, "grad_norm": 1.432305802135095, "learning_rate": 7.060609207629682e-06, "loss": 0.594, "step": 5777 }, { "epoch": 0.7099152230003686, "grad_norm": 1.2004923109971903, "learning_rate": 7.055151677343955e-06, "loss": 0.5097, "step": 5778 }, { "epoch": 0.7100380882172257, "grad_norm": 1.3837346391899004, "learning_rate": 7.04969560846155e-06, "loss": 0.7393, "step": 5779 }, { "epoch": 0.7101609534340828, "grad_norm": 1.458954586563351, "learning_rate": 7.044241001986076e-06, "loss": 0.5354, "step": 5780 }, { "epoch": 0.7102838186509399, "grad_norm": 1.1587697933268901, "learning_rate": 7.038787858920881e-06, "loss": 0.6123, "step": 5781 }, { "epoch": 0.710406683867797, "grad_norm": 1.4033532077227382, "learning_rate": 7.033336180269024e-06, "loss": 0.5905, "step": 5782 }, { "epoch": 0.7105295490846542, "grad_norm": 1.2716849479562329, "learning_rate": 7.027885967033316e-06, "loss": 0.6176, "step": 5783 }, { "epoch": 0.7106524143015113, "grad_norm": 1.2990841617996842, "learning_rate": 7.022437220216273e-06, "loss": 0.5403, "step": 5784 }, { "epoch": 0.7107752795183684, "grad_norm": 1.2625967395725193, "learning_rate": 7.016989940820178e-06, "loss": 0.6724, "step": 5785 }, { "epoch": 0.7108981447352255, "grad_norm": 1.2337725191366085, "learning_rate": 7.011544129847006e-06, "loss": 0.6022, "step": 5786 }, { "epoch": 0.7110210099520826, "grad_norm": 1.1749126535146508, "learning_rate": 7.00609978829849e-06, "loss": 0.5373, "step": 5787 }, { "epoch": 0.7111438751689396, "grad_norm": 1.1310656804647898, "learning_rate": 7.000656917176069e-06, "loss": 0.6863, "step": 5788 }, { "epoch": 0.7112667403857967, "grad_norm": 1.3237019281635043, "learning_rate": 6.995215517480932e-06, "loss": 0.467, "step": 5789 }, { "epoch": 0.7113896056026539, "grad_norm": 0.9795057904179649, "learning_rate": 6.9897755902139946e-06, "loss": 0.5613, "step": 5790 }, { "epoch": 0.711512470819511, "grad_norm": 1.108221594022862, "learning_rate": 6.984337136375875e-06, "loss": 0.6727, "step": 5791 }, { "epoch": 0.7116353360363681, "grad_norm": 1.1322700748963865, "learning_rate": 6.978900156966968e-06, "loss": 0.6065, "step": 5792 }, { "epoch": 0.7117582012532252, "grad_norm": 1.09238176404221, "learning_rate": 6.973464652987353e-06, "loss": 0.547, "step": 5793 }, { "epoch": 0.7118810664700823, "grad_norm": 1.166119928883705, "learning_rate": 6.968030625436867e-06, "loss": 0.5626, "step": 5794 }, { "epoch": 0.7120039316869394, "grad_norm": 1.3001472245561245, "learning_rate": 6.962598075315047e-06, "loss": 0.5918, "step": 5795 }, { "epoch": 0.7121267969037965, "grad_norm": 1.1697190246044635, "learning_rate": 6.957167003621199e-06, "loss": 0.7425, "step": 5796 }, { "epoch": 0.7122496621206537, "grad_norm": 1.215358827917493, "learning_rate": 6.951737411354313e-06, "loss": 0.6029, "step": 5797 }, { "epoch": 0.7123725273375108, "grad_norm": 1.139689711258494, "learning_rate": 6.9463092995131426e-06, "loss": 0.6305, "step": 5798 }, { "epoch": 0.7124953925543679, "grad_norm": 1.2867420354061996, "learning_rate": 6.94088266909614e-06, "loss": 0.6005, "step": 5799 }, { "epoch": 0.712618257771225, "grad_norm": 1.1015665044397294, "learning_rate": 6.935457521101507e-06, "loss": 0.6065, "step": 5800 }, { "epoch": 0.7127411229880821, "grad_norm": 1.0911902802413385, "learning_rate": 6.930033856527167e-06, "loss": 0.6235, "step": 5801 }, { "epoch": 0.7128639882049392, "grad_norm": 1.119956223380845, "learning_rate": 6.9246116763707575e-06, "loss": 0.5333, "step": 5802 }, { "epoch": 0.7129868534217962, "grad_norm": 1.1589425896356937, "learning_rate": 6.91919098162966e-06, "loss": 0.6897, "step": 5803 }, { "epoch": 0.7131097186386534, "grad_norm": 1.312514040665402, "learning_rate": 6.913771773300975e-06, "loss": 0.5699, "step": 5804 }, { "epoch": 0.7132325838555105, "grad_norm": 1.2886164671113787, "learning_rate": 6.908354052381538e-06, "loss": 0.587, "step": 5805 }, { "epoch": 0.7133554490723676, "grad_norm": 1.083492006467809, "learning_rate": 6.902937819867891e-06, "loss": 0.648, "step": 5806 }, { "epoch": 0.7134783142892247, "grad_norm": 2.3155017166343206, "learning_rate": 6.897523076756319e-06, "loss": 0.7698, "step": 5807 }, { "epoch": 0.7136011795060818, "grad_norm": 1.1904415268941864, "learning_rate": 6.892109824042838e-06, "loss": 0.5685, "step": 5808 }, { "epoch": 0.7137240447229389, "grad_norm": 1.2052961582087776, "learning_rate": 6.886698062723167e-06, "loss": 0.5626, "step": 5809 }, { "epoch": 0.713846909939796, "grad_norm": 1.1903740127915126, "learning_rate": 6.881287793792777e-06, "loss": 0.4029, "step": 5810 }, { "epoch": 0.7139697751566532, "grad_norm": 1.2328405696389708, "learning_rate": 6.875879018246835e-06, "loss": 0.5794, "step": 5811 }, { "epoch": 0.7140926403735103, "grad_norm": 1.2718028143784392, "learning_rate": 6.87047173708027e-06, "loss": 0.5968, "step": 5812 }, { "epoch": 0.7142155055903674, "grad_norm": 1.4642964746635874, "learning_rate": 6.865065951287703e-06, "loss": 0.532, "step": 5813 }, { "epoch": 0.7143383708072245, "grad_norm": 0.9703985059574514, "learning_rate": 6.859661661863497e-06, "loss": 0.611, "step": 5814 }, { "epoch": 0.7144612360240816, "grad_norm": 1.2426170798487577, "learning_rate": 6.854258869801736e-06, "loss": 0.5581, "step": 5815 }, { "epoch": 0.7145841012409387, "grad_norm": 1.15961472317541, "learning_rate": 6.848857576096235e-06, "loss": 0.6456, "step": 5816 }, { "epoch": 0.7147069664577957, "grad_norm": 1.334802166167363, "learning_rate": 6.843457781740516e-06, "loss": 0.5513, "step": 5817 }, { "epoch": 0.7148298316746529, "grad_norm": 1.1935085554264357, "learning_rate": 6.83805948772784e-06, "loss": 0.5916, "step": 5818 }, { "epoch": 0.71495269689151, "grad_norm": 1.2401037072410983, "learning_rate": 6.832662695051195e-06, "loss": 0.6048, "step": 5819 }, { "epoch": 0.7150755621083671, "grad_norm": 1.1521125544105246, "learning_rate": 6.827267404703274e-06, "loss": 0.6853, "step": 5820 }, { "epoch": 0.7151984273252242, "grad_norm": 1.100269249745341, "learning_rate": 6.821873617676519e-06, "loss": 0.5521, "step": 5821 }, { "epoch": 0.7153212925420813, "grad_norm": 1.1608308359172952, "learning_rate": 6.816481334963061e-06, "loss": 0.5839, "step": 5822 }, { "epoch": 0.7154441577589384, "grad_norm": 1.2031034055225187, "learning_rate": 6.811090557554803e-06, "loss": 0.4529, "step": 5823 }, { "epoch": 0.7155670229757956, "grad_norm": 1.0360813009000631, "learning_rate": 6.805701286443323e-06, "loss": 0.6079, "step": 5824 }, { "epoch": 0.7156898881926527, "grad_norm": 1.0333578924081652, "learning_rate": 6.800313522619957e-06, "loss": 0.5528, "step": 5825 }, { "epoch": 0.7158127534095098, "grad_norm": 1.363867745416032, "learning_rate": 6.794927267075735e-06, "loss": 0.5827, "step": 5826 }, { "epoch": 0.7159356186263669, "grad_norm": 1.335096629255007, "learning_rate": 6.7895425208014304e-06, "loss": 0.5528, "step": 5827 }, { "epoch": 0.716058483843224, "grad_norm": 1.3653220299525013, "learning_rate": 6.784159284787537e-06, "loss": 0.608, "step": 5828 }, { "epoch": 0.7161813490600811, "grad_norm": 1.1690578325619843, "learning_rate": 6.7787775600242575e-06, "loss": 0.5433, "step": 5829 }, { "epoch": 0.7163042142769382, "grad_norm": 1.3498927927132656, "learning_rate": 6.773397347501529e-06, "loss": 0.6958, "step": 5830 }, { "epoch": 0.7164270794937954, "grad_norm": 1.3017941944838383, "learning_rate": 6.768018648209008e-06, "loss": 0.5491, "step": 5831 }, { "epoch": 0.7165499447106524, "grad_norm": 1.029874971190032, "learning_rate": 6.762641463136074e-06, "loss": 0.6349, "step": 5832 }, { "epoch": 0.7166728099275095, "grad_norm": 1.3823513046614209, "learning_rate": 6.757265793271811e-06, "loss": 0.6754, "step": 5833 }, { "epoch": 0.7167956751443666, "grad_norm": 1.066391911473468, "learning_rate": 6.7518916396050606e-06, "loss": 0.4928, "step": 5834 }, { "epoch": 0.7169185403612237, "grad_norm": 1.2921118397480762, "learning_rate": 6.746519003124347e-06, "loss": 0.6599, "step": 5835 }, { "epoch": 0.7170414055780808, "grad_norm": 1.261324359695132, "learning_rate": 6.7411478848179435e-06, "loss": 0.54, "step": 5836 }, { "epoch": 0.7171642707949379, "grad_norm": 1.2037502538260048, "learning_rate": 6.73577828567382e-06, "loss": 0.5765, "step": 5837 }, { "epoch": 0.7172871360117951, "grad_norm": 1.2757813455826494, "learning_rate": 6.730410206679684e-06, "loss": 0.5553, "step": 5838 }, { "epoch": 0.7174100012286522, "grad_norm": 1.189168625597662, "learning_rate": 6.725043648822967e-06, "loss": 0.6089, "step": 5839 }, { "epoch": 0.7175328664455093, "grad_norm": 1.266175823420697, "learning_rate": 6.719678613090801e-06, "loss": 0.4964, "step": 5840 }, { "epoch": 0.7176557316623664, "grad_norm": 1.245378677795781, "learning_rate": 6.714315100470053e-06, "loss": 0.5989, "step": 5841 }, { "epoch": 0.7177785968792235, "grad_norm": 1.2673256738776728, "learning_rate": 6.708953111947308e-06, "loss": 0.5609, "step": 5842 }, { "epoch": 0.7179014620960806, "grad_norm": 1.394447056315142, "learning_rate": 6.703592648508875e-06, "loss": 0.5856, "step": 5843 }, { "epoch": 0.7180243273129377, "grad_norm": 1.2561057041157506, "learning_rate": 6.698233711140764e-06, "loss": 0.5507, "step": 5844 }, { "epoch": 0.7181471925297949, "grad_norm": 1.3673702330852875, "learning_rate": 6.692876300828723e-06, "loss": 0.4616, "step": 5845 }, { "epoch": 0.7182700577466519, "grad_norm": 1.0892524708049547, "learning_rate": 6.687520418558219e-06, "loss": 0.5948, "step": 5846 }, { "epoch": 0.718392922963509, "grad_norm": 1.306129399523482, "learning_rate": 6.68216606531442e-06, "loss": 0.5445, "step": 5847 }, { "epoch": 0.7185157881803661, "grad_norm": 1.2353131277077498, "learning_rate": 6.676813242082236e-06, "loss": 0.6348, "step": 5848 }, { "epoch": 0.7186386533972232, "grad_norm": 1.1960504482141205, "learning_rate": 6.671461949846265e-06, "loss": 0.5705, "step": 5849 }, { "epoch": 0.7187615186140803, "grad_norm": 1.1581117824682023, "learning_rate": 6.6661121895908695e-06, "loss": 0.5788, "step": 5850 }, { "epoch": 0.7188843838309374, "grad_norm": 1.0885453901059732, "learning_rate": 6.660763962300084e-06, "loss": 0.6672, "step": 5851 }, { "epoch": 0.7190072490477946, "grad_norm": 1.021082603113587, "learning_rate": 6.6554172689576896e-06, "loss": 0.6934, "step": 5852 }, { "epoch": 0.7191301142646517, "grad_norm": 1.4147487807923342, "learning_rate": 6.650072110547169e-06, "loss": 0.5802, "step": 5853 }, { "epoch": 0.7192529794815088, "grad_norm": 1.456128322366719, "learning_rate": 6.64472848805173e-06, "loss": 0.6713, "step": 5854 }, { "epoch": 0.7193758446983659, "grad_norm": 1.5482187142607937, "learning_rate": 6.639386402454302e-06, "loss": 0.6006, "step": 5855 }, { "epoch": 0.719498709915223, "grad_norm": 1.3313188204213053, "learning_rate": 6.634045854737523e-06, "loss": 0.6198, "step": 5856 }, { "epoch": 0.7196215751320801, "grad_norm": 1.1935769977932142, "learning_rate": 6.628706845883759e-06, "loss": 0.551, "step": 5857 }, { "epoch": 0.7197444403489373, "grad_norm": 0.9809574033746302, "learning_rate": 6.623369376875077e-06, "loss": 0.5906, "step": 5858 }, { "epoch": 0.7198673055657944, "grad_norm": 1.1480795696286137, "learning_rate": 6.618033448693279e-06, "loss": 0.4643, "step": 5859 }, { "epoch": 0.7199901707826515, "grad_norm": 1.3224795815770858, "learning_rate": 6.612699062319858e-06, "loss": 0.5762, "step": 5860 }, { "epoch": 0.7201130359995085, "grad_norm": 1.2275632415973505, "learning_rate": 6.607366218736062e-06, "loss": 0.6079, "step": 5861 }, { "epoch": 0.7202359012163656, "grad_norm": 1.4578528065837546, "learning_rate": 6.602034918922816e-06, "loss": 0.6368, "step": 5862 }, { "epoch": 0.7203587664332227, "grad_norm": 1.4257493297019976, "learning_rate": 6.59670516386079e-06, "loss": 0.5525, "step": 5863 }, { "epoch": 0.7204816316500798, "grad_norm": 1.408099849960531, "learning_rate": 6.591376954530345e-06, "loss": 0.6496, "step": 5864 }, { "epoch": 0.720604496866937, "grad_norm": 1.041965922861159, "learning_rate": 6.586050291911579e-06, "loss": 0.508, "step": 5865 }, { "epoch": 0.7207273620837941, "grad_norm": 1.2623752804977917, "learning_rate": 6.5807251769843e-06, "loss": 0.5828, "step": 5866 }, { "epoch": 0.7208502273006512, "grad_norm": 1.108162636920431, "learning_rate": 6.575401610728019e-06, "loss": 0.5089, "step": 5867 }, { "epoch": 0.7209730925175083, "grad_norm": 1.460852553840171, "learning_rate": 6.570079594121976e-06, "loss": 0.4718, "step": 5868 }, { "epoch": 0.7210959577343654, "grad_norm": 1.506222196836904, "learning_rate": 6.5647591281451215e-06, "loss": 0.6707, "step": 5869 }, { "epoch": 0.7212188229512225, "grad_norm": 1.26504426927464, "learning_rate": 6.559440213776126e-06, "loss": 0.7019, "step": 5870 }, { "epoch": 0.7213416881680796, "grad_norm": 1.4473614819118563, "learning_rate": 6.554122851993359e-06, "loss": 0.5905, "step": 5871 }, { "epoch": 0.7214645533849368, "grad_norm": 1.2645483847930272, "learning_rate": 6.54880704377492e-06, "loss": 0.7117, "step": 5872 }, { "epoch": 0.7215874186017939, "grad_norm": 1.1346258566050182, "learning_rate": 6.543492790098623e-06, "loss": 0.5783, "step": 5873 }, { "epoch": 0.721710283818651, "grad_norm": 1.1944941966838851, "learning_rate": 6.5381800919419805e-06, "loss": 0.4596, "step": 5874 }, { "epoch": 0.721833149035508, "grad_norm": 1.3847356282574934, "learning_rate": 6.532868950282237e-06, "loss": 0.551, "step": 5875 }, { "epoch": 0.7219560142523651, "grad_norm": 0.998276745121459, "learning_rate": 6.527559366096328e-06, "loss": 0.5862, "step": 5876 }, { "epoch": 0.7220788794692222, "grad_norm": 1.1960746727221903, "learning_rate": 6.5222513403609405e-06, "loss": 0.5407, "step": 5877 }, { "epoch": 0.7222017446860793, "grad_norm": 1.2121705670926441, "learning_rate": 6.5169448740524315e-06, "loss": 0.6076, "step": 5878 }, { "epoch": 0.7223246099029365, "grad_norm": 1.632566802957556, "learning_rate": 6.511639968146898e-06, "loss": 0.5831, "step": 5879 }, { "epoch": 0.7224474751197936, "grad_norm": 1.249922835772653, "learning_rate": 6.506336623620145e-06, "loss": 0.5896, "step": 5880 }, { "epoch": 0.7225703403366507, "grad_norm": 1.0771952403425245, "learning_rate": 6.501034841447692e-06, "loss": 0.495, "step": 5881 }, { "epoch": 0.7226932055535078, "grad_norm": 1.6649935456645526, "learning_rate": 6.495734622604757e-06, "loss": 0.6819, "step": 5882 }, { "epoch": 0.7228160707703649, "grad_norm": 1.326199440818517, "learning_rate": 6.490435968066284e-06, "loss": 0.6102, "step": 5883 }, { "epoch": 0.722938935987222, "grad_norm": 1.4040447141637833, "learning_rate": 6.485138878806937e-06, "loss": 0.6631, "step": 5884 }, { "epoch": 0.7230618012040791, "grad_norm": 1.034310870853518, "learning_rate": 6.479843355801064e-06, "loss": 0.6146, "step": 5885 }, { "epoch": 0.7231846664209363, "grad_norm": 1.247525381713037, "learning_rate": 6.474549400022757e-06, "loss": 0.5254, "step": 5886 }, { "epoch": 0.7233075316377934, "grad_norm": 1.0939967040409733, "learning_rate": 6.469257012445788e-06, "loss": 0.4725, "step": 5887 }, { "epoch": 0.7234303968546505, "grad_norm": 1.3484032706354077, "learning_rate": 6.463966194043678e-06, "loss": 0.5708, "step": 5888 }, { "epoch": 0.7235532620715076, "grad_norm": 1.2519822253477928, "learning_rate": 6.458676945789624e-06, "loss": 0.6783, "step": 5889 }, { "epoch": 0.7236761272883646, "grad_norm": 1.182292211595881, "learning_rate": 6.453389268656558e-06, "loss": 0.6201, "step": 5890 }, { "epoch": 0.7237989925052217, "grad_norm": 1.330363878433059, "learning_rate": 6.448103163617103e-06, "loss": 0.5394, "step": 5891 }, { "epoch": 0.7239218577220788, "grad_norm": 1.2783003980284016, "learning_rate": 6.442818631643612e-06, "loss": 0.6113, "step": 5892 }, { "epoch": 0.724044722938936, "grad_norm": 1.300900595706287, "learning_rate": 6.437535673708143e-06, "loss": 0.587, "step": 5893 }, { "epoch": 0.7241675881557931, "grad_norm": 0.9614770447119556, "learning_rate": 6.432254290782452e-06, "loss": 0.5052, "step": 5894 }, { "epoch": 0.7242904533726502, "grad_norm": 1.274874404137922, "learning_rate": 6.42697448383802e-06, "loss": 0.5565, "step": 5895 }, { "epoch": 0.7244133185895073, "grad_norm": 1.373522044669741, "learning_rate": 6.421696253846033e-06, "loss": 0.5155, "step": 5896 }, { "epoch": 0.7245361838063644, "grad_norm": 1.0846491184633413, "learning_rate": 6.416419601777395e-06, "loss": 0.6001, "step": 5897 }, { "epoch": 0.7246590490232215, "grad_norm": 1.1291527677621216, "learning_rate": 6.411144528602693e-06, "loss": 0.6141, "step": 5898 }, { "epoch": 0.7247819142400787, "grad_norm": 1.3662341731540006, "learning_rate": 6.405871035292266e-06, "loss": 0.5789, "step": 5899 }, { "epoch": 0.7249047794569358, "grad_norm": 1.3330922174913324, "learning_rate": 6.40059912281612e-06, "loss": 0.5758, "step": 5900 }, { "epoch": 0.7250276446737929, "grad_norm": 1.1482048287579354, "learning_rate": 6.395328792144003e-06, "loss": 0.4388, "step": 5901 }, { "epoch": 0.72515050989065, "grad_norm": 1.1567440331917391, "learning_rate": 6.390060044245345e-06, "loss": 0.6982, "step": 5902 }, { "epoch": 0.7252733751075071, "grad_norm": 1.3265421998861628, "learning_rate": 6.384792880089306e-06, "loss": 0.5814, "step": 5903 }, { "epoch": 0.7253962403243642, "grad_norm": 1.2290424290949398, "learning_rate": 6.3795273006447505e-06, "loss": 0.5888, "step": 5904 }, { "epoch": 0.7255191055412212, "grad_norm": 1.3258951255819902, "learning_rate": 6.3742633068802356e-06, "loss": 0.482, "step": 5905 }, { "epoch": 0.7256419707580783, "grad_norm": 1.0747516445633205, "learning_rate": 6.369000899764046e-06, "loss": 0.6336, "step": 5906 }, { "epoch": 0.7257648359749355, "grad_norm": 1.1720069059810452, "learning_rate": 6.363740080264166e-06, "loss": 0.5521, "step": 5907 }, { "epoch": 0.7258877011917926, "grad_norm": 1.0247347601216206, "learning_rate": 6.358480849348296e-06, "loss": 0.568, "step": 5908 }, { "epoch": 0.7260105664086497, "grad_norm": 1.3829418598242205, "learning_rate": 6.3532232079838275e-06, "loss": 0.5832, "step": 5909 }, { "epoch": 0.7261334316255068, "grad_norm": 1.9129859369169844, "learning_rate": 6.347967157137873e-06, "loss": 0.7403, "step": 5910 }, { "epoch": 0.7262562968423639, "grad_norm": 1.4682502974136928, "learning_rate": 6.342712697777254e-06, "loss": 0.5661, "step": 5911 }, { "epoch": 0.726379162059221, "grad_norm": 1.2427824115944508, "learning_rate": 6.337459830868486e-06, "loss": 0.5946, "step": 5912 }, { "epoch": 0.7265020272760782, "grad_norm": 1.0845964632939977, "learning_rate": 6.332208557377807e-06, "loss": 0.5487, "step": 5913 }, { "epoch": 0.7266248924929353, "grad_norm": 1.4755333067834688, "learning_rate": 6.326958878271143e-06, "loss": 0.563, "step": 5914 }, { "epoch": 0.7267477577097924, "grad_norm": 1.1005801506073436, "learning_rate": 6.321710794514154e-06, "loss": 0.5698, "step": 5915 }, { "epoch": 0.7268706229266495, "grad_norm": 1.1934159640738673, "learning_rate": 6.3164643070721806e-06, "loss": 0.6238, "step": 5916 }, { "epoch": 0.7269934881435066, "grad_norm": 1.395501281297613, "learning_rate": 6.3112194169102885e-06, "loss": 0.461, "step": 5917 }, { "epoch": 0.7271163533603637, "grad_norm": 1.2762042769610031, "learning_rate": 6.305976124993225e-06, "loss": 0.592, "step": 5918 }, { "epoch": 0.7272392185772207, "grad_norm": 1.109726512219594, "learning_rate": 6.3007344322854815e-06, "loss": 0.5161, "step": 5919 }, { "epoch": 0.7273620837940779, "grad_norm": 1.2906211037528015, "learning_rate": 6.295494339751217e-06, "loss": 0.6052, "step": 5920 }, { "epoch": 0.727484949010935, "grad_norm": 1.3145932217453877, "learning_rate": 6.290255848354316e-06, "loss": 0.5234, "step": 5921 }, { "epoch": 0.7276078142277921, "grad_norm": 1.7723376009715528, "learning_rate": 6.285018959058376e-06, "loss": 0.6858, "step": 5922 }, { "epoch": 0.7277306794446492, "grad_norm": 1.1134422401722797, "learning_rate": 6.279783672826672e-06, "loss": 0.6467, "step": 5923 }, { "epoch": 0.7278535446615063, "grad_norm": 1.3086179643331146, "learning_rate": 6.2745499906222136e-06, "loss": 0.5943, "step": 5924 }, { "epoch": 0.7279764098783634, "grad_norm": 1.2477066773697527, "learning_rate": 6.269317913407688e-06, "loss": 0.6428, "step": 5925 }, { "epoch": 0.7280992750952205, "grad_norm": 1.2521216033209712, "learning_rate": 6.264087442145524e-06, "loss": 0.5792, "step": 5926 }, { "epoch": 0.7282221403120777, "grad_norm": 1.204469864443805, "learning_rate": 6.258858577797815e-06, "loss": 0.6036, "step": 5927 }, { "epoch": 0.7283450055289348, "grad_norm": 1.1417131723002953, "learning_rate": 6.253631321326386e-06, "loss": 0.5674, "step": 5928 }, { "epoch": 0.7284678707457919, "grad_norm": 1.0973539554766425, "learning_rate": 6.248405673692748e-06, "loss": 0.5951, "step": 5929 }, { "epoch": 0.728590735962649, "grad_norm": 1.2484040826524825, "learning_rate": 6.243181635858131e-06, "loss": 0.575, "step": 5930 }, { "epoch": 0.7287136011795061, "grad_norm": 1.1844958523526037, "learning_rate": 6.237959208783468e-06, "loss": 0.6443, "step": 5931 }, { "epoch": 0.7288364663963632, "grad_norm": 1.2809746459385443, "learning_rate": 6.232738393429378e-06, "loss": 0.5548, "step": 5932 }, { "epoch": 0.7289593316132204, "grad_norm": 1.0363534178395724, "learning_rate": 6.227519190756204e-06, "loss": 0.5554, "step": 5933 }, { "epoch": 0.7290821968300774, "grad_norm": 1.3955788303044536, "learning_rate": 6.2223016017239835e-06, "loss": 0.5876, "step": 5934 }, { "epoch": 0.7292050620469345, "grad_norm": 1.0763451590920206, "learning_rate": 6.217085627292463e-06, "loss": 0.5376, "step": 5935 }, { "epoch": 0.7293279272637916, "grad_norm": 1.0972807194165155, "learning_rate": 6.2118712684210755e-06, "loss": 0.5253, "step": 5936 }, { "epoch": 0.7294507924806487, "grad_norm": 1.0571268208883549, "learning_rate": 6.206658526068976e-06, "loss": 0.5792, "step": 5937 }, { "epoch": 0.7295736576975058, "grad_norm": 1.1298074260574107, "learning_rate": 6.201447401195015e-06, "loss": 0.5905, "step": 5938 }, { "epoch": 0.7296965229143629, "grad_norm": 1.2544465283188764, "learning_rate": 6.1962378947577486e-06, "loss": 0.6032, "step": 5939 }, { "epoch": 0.72981938813122, "grad_norm": 1.1364535976073067, "learning_rate": 6.191030007715422e-06, "loss": 0.5962, "step": 5940 }, { "epoch": 0.7299422533480772, "grad_norm": 1.3799265287752271, "learning_rate": 6.185823741025995e-06, "loss": 0.6463, "step": 5941 }, { "epoch": 0.7300651185649343, "grad_norm": 1.2026825809471708, "learning_rate": 6.180619095647137e-06, "loss": 0.5569, "step": 5942 }, { "epoch": 0.7301879837817914, "grad_norm": 1.3459257984461155, "learning_rate": 6.175416072536194e-06, "loss": 0.5877, "step": 5943 }, { "epoch": 0.7303108489986485, "grad_norm": 1.1533345005253142, "learning_rate": 6.170214672650236e-06, "loss": 0.5926, "step": 5944 }, { "epoch": 0.7304337142155056, "grad_norm": 1.1810523849591672, "learning_rate": 6.165014896946024e-06, "loss": 0.4937, "step": 5945 }, { "epoch": 0.7305565794323627, "grad_norm": 1.4059823448197015, "learning_rate": 6.159816746380033e-06, "loss": 0.6126, "step": 5946 }, { "epoch": 0.7306794446492199, "grad_norm": 1.4520605462189493, "learning_rate": 6.154620221908414e-06, "loss": 0.5598, "step": 5947 }, { "epoch": 0.7308023098660769, "grad_norm": 1.4818425933333033, "learning_rate": 6.149425324487039e-06, "loss": 0.6234, "step": 5948 }, { "epoch": 0.730925175082934, "grad_norm": 1.7402680824444128, "learning_rate": 6.144232055071485e-06, "loss": 0.616, "step": 5949 }, { "epoch": 0.7310480402997911, "grad_norm": 1.3141211841280032, "learning_rate": 6.139040414617006e-06, "loss": 0.492, "step": 5950 }, { "epoch": 0.7311709055166482, "grad_norm": 1.1316602941277514, "learning_rate": 6.133850404078585e-06, "loss": 0.6132, "step": 5951 }, { "epoch": 0.7312937707335053, "grad_norm": 1.2937085228202196, "learning_rate": 6.128662024410871e-06, "loss": 0.5258, "step": 5952 }, { "epoch": 0.7314166359503624, "grad_norm": 1.2072587779478297, "learning_rate": 6.123475276568257e-06, "loss": 0.614, "step": 5953 }, { "epoch": 0.7315395011672196, "grad_norm": 1.2061846015241873, "learning_rate": 6.118290161504792e-06, "loss": 0.6343, "step": 5954 }, { "epoch": 0.7316623663840767, "grad_norm": 1.1360251862906505, "learning_rate": 6.113106680174259e-06, "loss": 0.6981, "step": 5955 }, { "epoch": 0.7317852316009338, "grad_norm": 1.1564907474094646, "learning_rate": 6.107924833530107e-06, "loss": 0.6094, "step": 5956 }, { "epoch": 0.7319080968177909, "grad_norm": 1.1288652984378609, "learning_rate": 6.102744622525527e-06, "loss": 0.5222, "step": 5957 }, { "epoch": 0.732030962034648, "grad_norm": 1.2862212160413213, "learning_rate": 6.097566048113365e-06, "loss": 0.5733, "step": 5958 }, { "epoch": 0.7321538272515051, "grad_norm": 1.2075806144187795, "learning_rate": 6.092389111246201e-06, "loss": 0.51, "step": 5959 }, { "epoch": 0.7322766924683622, "grad_norm": 1.2333403638336322, "learning_rate": 6.0872138128762866e-06, "loss": 0.5283, "step": 5960 }, { "epoch": 0.7323995576852194, "grad_norm": 1.2197854405846857, "learning_rate": 6.08204015395559e-06, "loss": 0.4928, "step": 5961 }, { "epoch": 0.7325224229020765, "grad_norm": 1.3259220857267917, "learning_rate": 6.076868135435778e-06, "loss": 0.6056, "step": 5962 }, { "epoch": 0.7326452881189335, "grad_norm": 1.4463684246246447, "learning_rate": 6.071697758268192e-06, "loss": 0.5903, "step": 5963 }, { "epoch": 0.7327681533357906, "grad_norm": 1.1483600549479456, "learning_rate": 6.066529023403913e-06, "loss": 0.4696, "step": 5964 }, { "epoch": 0.7328910185526477, "grad_norm": 1.3368009324499486, "learning_rate": 6.061361931793679e-06, "loss": 0.4937, "step": 5965 }, { "epoch": 0.7330138837695048, "grad_norm": 1.2541882269709037, "learning_rate": 6.056196484387954e-06, "loss": 0.6631, "step": 5966 }, { "epoch": 0.7331367489863619, "grad_norm": 1.9830293190277053, "learning_rate": 6.051032682136877e-06, "loss": 0.7542, "step": 5967 }, { "epoch": 0.7332596142032191, "grad_norm": 1.2351742583726097, "learning_rate": 6.0458705259903015e-06, "loss": 0.4148, "step": 5968 }, { "epoch": 0.7333824794200762, "grad_norm": 1.4448325231082835, "learning_rate": 6.04071001689778e-06, "loss": 0.5496, "step": 5969 }, { "epoch": 0.7335053446369333, "grad_norm": 1.267859894869566, "learning_rate": 6.035551155808542e-06, "loss": 0.7047, "step": 5970 }, { "epoch": 0.7336282098537904, "grad_norm": 1.1089565257096052, "learning_rate": 6.0303939436715324e-06, "loss": 0.5056, "step": 5971 }, { "epoch": 0.7337510750706475, "grad_norm": 1.2699012659995954, "learning_rate": 6.025238381435387e-06, "loss": 0.5287, "step": 5972 }, { "epoch": 0.7338739402875046, "grad_norm": 1.1963148002393753, "learning_rate": 6.020084470048444e-06, "loss": 0.5251, "step": 5973 }, { "epoch": 0.7339968055043617, "grad_norm": 1.2186325250156769, "learning_rate": 6.01493221045872e-06, "loss": 0.5778, "step": 5974 }, { "epoch": 0.7341196707212189, "grad_norm": 1.1372699334153606, "learning_rate": 6.0097816036139455e-06, "loss": 0.6232, "step": 5975 }, { "epoch": 0.734242535938076, "grad_norm": 1.3430751220172323, "learning_rate": 6.004632650461542e-06, "loss": 0.5674, "step": 5976 }, { "epoch": 0.734365401154933, "grad_norm": 1.1960903613778302, "learning_rate": 5.9994853519486284e-06, "loss": 0.5804, "step": 5977 }, { "epoch": 0.7344882663717901, "grad_norm": 1.09391820737372, "learning_rate": 5.994339709022012e-06, "loss": 0.6682, "step": 5978 }, { "epoch": 0.7346111315886472, "grad_norm": 1.582821129127882, "learning_rate": 5.9891957226282e-06, "loss": 0.5616, "step": 5979 }, { "epoch": 0.7347339968055043, "grad_norm": 1.2774247955815607, "learning_rate": 5.984053393713405e-06, "loss": 0.5446, "step": 5980 }, { "epoch": 0.7348568620223614, "grad_norm": 1.1364319389711852, "learning_rate": 5.97891272322351e-06, "loss": 0.6178, "step": 5981 }, { "epoch": 0.7349797272392186, "grad_norm": 1.149309820862441, "learning_rate": 5.973773712104122e-06, "loss": 0.6612, "step": 5982 }, { "epoch": 0.7351025924560757, "grad_norm": 1.3380929231928833, "learning_rate": 5.968636361300512e-06, "loss": 0.5601, "step": 5983 }, { "epoch": 0.7352254576729328, "grad_norm": 1.3770975218221415, "learning_rate": 5.963500671757684e-06, "loss": 0.6472, "step": 5984 }, { "epoch": 0.7353483228897899, "grad_norm": 1.2442066080140806, "learning_rate": 5.958366644420298e-06, "loss": 0.7093, "step": 5985 }, { "epoch": 0.735471188106647, "grad_norm": 1.0344792525846933, "learning_rate": 5.9532342802327315e-06, "loss": 0.579, "step": 5986 }, { "epoch": 0.7355940533235041, "grad_norm": 1.1072300177138872, "learning_rate": 5.948103580139052e-06, "loss": 0.5611, "step": 5987 }, { "epoch": 0.7357169185403613, "grad_norm": 1.2861780738839328, "learning_rate": 5.942974545083013e-06, "loss": 0.5147, "step": 5988 }, { "epoch": 0.7358397837572184, "grad_norm": 1.1631968816144655, "learning_rate": 5.937847176008072e-06, "loss": 0.5004, "step": 5989 }, { "epoch": 0.7359626489740755, "grad_norm": 1.4123513251296511, "learning_rate": 5.9327214738573645e-06, "loss": 0.4884, "step": 5990 }, { "epoch": 0.7360855141909326, "grad_norm": 1.088185507115247, "learning_rate": 5.927597439573748e-06, "loss": 0.5288, "step": 5991 }, { "epoch": 0.7362083794077896, "grad_norm": 1.2043624305397838, "learning_rate": 5.92247507409974e-06, "loss": 0.5604, "step": 5992 }, { "epoch": 0.7363312446246467, "grad_norm": 1.2374005616242658, "learning_rate": 5.917354378377579e-06, "loss": 0.6005, "step": 5993 }, { "epoch": 0.7364541098415038, "grad_norm": 1.2192300886964207, "learning_rate": 5.912235353349171e-06, "loss": 0.5748, "step": 5994 }, { "epoch": 0.736576975058361, "grad_norm": 1.1630264840369593, "learning_rate": 5.907117999956134e-06, "loss": 0.6274, "step": 5995 }, { "epoch": 0.7366998402752181, "grad_norm": 1.183241522426505, "learning_rate": 5.9020023191397766e-06, "loss": 0.6259, "step": 5996 }, { "epoch": 0.7368227054920752, "grad_norm": 1.2744069643042508, "learning_rate": 5.896888311841084e-06, "loss": 0.5441, "step": 5997 }, { "epoch": 0.7369455707089323, "grad_norm": 1.169968376460733, "learning_rate": 5.891775979000752e-06, "loss": 0.5319, "step": 5998 }, { "epoch": 0.7370684359257894, "grad_norm": 1.2011447435398324, "learning_rate": 5.886665321559158e-06, "loss": 0.6592, "step": 5999 }, { "epoch": 0.7371913011426465, "grad_norm": 1.0319320468139719, "learning_rate": 5.881556340456382e-06, "loss": 0.5846, "step": 6000 }, { "epoch": 0.7373141663595036, "grad_norm": 1.2698817725369442, "learning_rate": 5.876449036632177e-06, "loss": 0.6434, "step": 6001 }, { "epoch": 0.7374370315763608, "grad_norm": 1.0166722020708958, "learning_rate": 5.871343411026004e-06, "loss": 0.6642, "step": 6002 }, { "epoch": 0.7375598967932179, "grad_norm": 1.4917398292443638, "learning_rate": 5.866239464577008e-06, "loss": 0.6124, "step": 6003 }, { "epoch": 0.737682762010075, "grad_norm": 1.1968932631243, "learning_rate": 5.8611371982240344e-06, "loss": 0.5535, "step": 6004 }, { "epoch": 0.7378056272269321, "grad_norm": 1.2764286401208274, "learning_rate": 5.856036612905598e-06, "loss": 0.6282, "step": 6005 }, { "epoch": 0.7379284924437891, "grad_norm": 1.3193297984251757, "learning_rate": 5.850937709559929e-06, "loss": 0.5564, "step": 6006 }, { "epoch": 0.7380513576606462, "grad_norm": 1.2521457302985337, "learning_rate": 5.845840489124939e-06, "loss": 0.5837, "step": 6007 }, { "epoch": 0.7381742228775033, "grad_norm": 1.380659765469898, "learning_rate": 5.840744952538218e-06, "loss": 0.5015, "step": 6008 }, { "epoch": 0.7382970880943605, "grad_norm": 1.3265391379375717, "learning_rate": 5.835651100737064e-06, "loss": 0.5972, "step": 6009 }, { "epoch": 0.7384199533112176, "grad_norm": 1.297428109609007, "learning_rate": 5.8305589346584555e-06, "loss": 0.5728, "step": 6010 }, { "epoch": 0.7385428185280747, "grad_norm": 1.1726722835765486, "learning_rate": 5.825468455239073e-06, "loss": 0.7074, "step": 6011 }, { "epoch": 0.7386656837449318, "grad_norm": 1.3524320325360817, "learning_rate": 5.820379663415262e-06, "loss": 0.5133, "step": 6012 }, { "epoch": 0.7387885489617889, "grad_norm": 1.1589277357243195, "learning_rate": 5.81529256012308e-06, "loss": 0.5676, "step": 6013 }, { "epoch": 0.738911414178646, "grad_norm": 1.237004786707932, "learning_rate": 5.810207146298273e-06, "loss": 0.5993, "step": 6014 }, { "epoch": 0.7390342793955031, "grad_norm": 1.0277524056318577, "learning_rate": 5.8051234228762574e-06, "loss": 0.4883, "step": 6015 }, { "epoch": 0.7391571446123603, "grad_norm": 1.3196894386463087, "learning_rate": 5.800041390792163e-06, "loss": 0.6824, "step": 6016 }, { "epoch": 0.7392800098292174, "grad_norm": 1.2234524638160185, "learning_rate": 5.79496105098078e-06, "loss": 0.6819, "step": 6017 }, { "epoch": 0.7394028750460745, "grad_norm": 1.30020437962447, "learning_rate": 5.789882404376626e-06, "loss": 0.4918, "step": 6018 }, { "epoch": 0.7395257402629316, "grad_norm": 1.3522444806470741, "learning_rate": 5.7848054519138686e-06, "loss": 0.6636, "step": 6019 }, { "epoch": 0.7396486054797887, "grad_norm": 1.2802008188229215, "learning_rate": 5.77973019452639e-06, "loss": 0.5443, "step": 6020 }, { "epoch": 0.7397714706966457, "grad_norm": 1.5265771170141045, "learning_rate": 5.7746566331477375e-06, "loss": 0.591, "step": 6021 }, { "epoch": 0.7398943359135028, "grad_norm": 1.1901625382169225, "learning_rate": 5.769584768711178e-06, "loss": 0.5486, "step": 6022 }, { "epoch": 0.74001720113036, "grad_norm": 1.1494136577626306, "learning_rate": 5.764514602149634e-06, "loss": 0.6355, "step": 6023 }, { "epoch": 0.7401400663472171, "grad_norm": 1.0393758524348675, "learning_rate": 5.7594461343957416e-06, "loss": 0.4735, "step": 6024 }, { "epoch": 0.7402629315640742, "grad_norm": 1.1700334866565187, "learning_rate": 5.7543793663817995e-06, "loss": 0.6593, "step": 6025 }, { "epoch": 0.7403857967809313, "grad_norm": 1.3691896217535486, "learning_rate": 5.749314299039813e-06, "loss": 0.6212, "step": 6026 }, { "epoch": 0.7405086619977884, "grad_norm": 1.337860819267661, "learning_rate": 5.744250933301473e-06, "loss": 0.5703, "step": 6027 }, { "epoch": 0.7406315272146455, "grad_norm": 1.0834616116919549, "learning_rate": 5.739189270098137e-06, "loss": 0.595, "step": 6028 }, { "epoch": 0.7407543924315027, "grad_norm": 1.222051423102389, "learning_rate": 5.734129310360889e-06, "loss": 0.5357, "step": 6029 }, { "epoch": 0.7408772576483598, "grad_norm": 1.3341920305954666, "learning_rate": 5.729071055020456e-06, "loss": 0.5699, "step": 6030 }, { "epoch": 0.7410001228652169, "grad_norm": 1.2761497765819843, "learning_rate": 5.724014505007285e-06, "loss": 0.5389, "step": 6031 }, { "epoch": 0.741122988082074, "grad_norm": 1.1614709998045407, "learning_rate": 5.7189596612514814e-06, "loss": 0.5717, "step": 6032 }, { "epoch": 0.7412458532989311, "grad_norm": 1.2432202064633506, "learning_rate": 5.71390652468286e-06, "loss": 0.596, "step": 6033 }, { "epoch": 0.7413687185157882, "grad_norm": 1.2830264710161703, "learning_rate": 5.7088550962309175e-06, "loss": 0.5265, "step": 6034 }, { "epoch": 0.7414915837326453, "grad_norm": 1.0342901730090683, "learning_rate": 5.703805376824817e-06, "loss": 0.5165, "step": 6035 }, { "epoch": 0.7416144489495023, "grad_norm": 1.4783153634721442, "learning_rate": 5.69875736739343e-06, "loss": 0.6446, "step": 6036 }, { "epoch": 0.7417373141663595, "grad_norm": 1.320644368665161, "learning_rate": 5.693711068865307e-06, "loss": 0.6387, "step": 6037 }, { "epoch": 0.7418601793832166, "grad_norm": 1.1737786084778394, "learning_rate": 5.688666482168682e-06, "loss": 0.5594, "step": 6038 }, { "epoch": 0.7419830446000737, "grad_norm": 1.031752689088059, "learning_rate": 5.683623608231467e-06, "loss": 0.5765, "step": 6039 }, { "epoch": 0.7421059098169308, "grad_norm": 1.2002489814169268, "learning_rate": 5.678582447981271e-06, "loss": 0.6122, "step": 6040 }, { "epoch": 0.7422287750337879, "grad_norm": 1.2611756341580498, "learning_rate": 5.673543002345383e-06, "loss": 0.6215, "step": 6041 }, { "epoch": 0.742351640250645, "grad_norm": 1.4438676866866063, "learning_rate": 5.66850527225078e-06, "loss": 0.674, "step": 6042 }, { "epoch": 0.7424745054675022, "grad_norm": 1.4561988233519245, "learning_rate": 5.663469258624109e-06, "loss": 0.699, "step": 6043 }, { "epoch": 0.7425973706843593, "grad_norm": 1.0284174656433023, "learning_rate": 5.658434962391719e-06, "loss": 0.5688, "step": 6044 }, { "epoch": 0.7427202359012164, "grad_norm": 1.1855071106902055, "learning_rate": 5.653402384479642e-06, "loss": 0.5861, "step": 6045 }, { "epoch": 0.7428431011180735, "grad_norm": 1.2311506919796311, "learning_rate": 5.648371525813575e-06, "loss": 0.6429, "step": 6046 }, { "epoch": 0.7429659663349306, "grad_norm": 1.3117150448098116, "learning_rate": 5.6433423873189184e-06, "loss": 0.5261, "step": 6047 }, { "epoch": 0.7430888315517877, "grad_norm": 1.3101177328395803, "learning_rate": 5.638314969920749e-06, "loss": 0.5316, "step": 6048 }, { "epoch": 0.7432116967686448, "grad_norm": 1.7512872937941992, "learning_rate": 5.633289274543835e-06, "loss": 0.629, "step": 6049 }, { "epoch": 0.7433345619855019, "grad_norm": 1.093438423117958, "learning_rate": 5.628265302112607e-06, "loss": 0.5206, "step": 6050 }, { "epoch": 0.743457427202359, "grad_norm": 1.2703556848558022, "learning_rate": 5.623243053551199e-06, "loss": 0.6004, "step": 6051 }, { "epoch": 0.7435802924192161, "grad_norm": 1.0767379304353755, "learning_rate": 5.618222529783428e-06, "loss": 0.5467, "step": 6052 }, { "epoch": 0.7437031576360732, "grad_norm": 1.090686818516809, "learning_rate": 5.613203731732772e-06, "loss": 0.5954, "step": 6053 }, { "epoch": 0.7438260228529303, "grad_norm": 1.345758458008963, "learning_rate": 5.608186660322421e-06, "loss": 0.6289, "step": 6054 }, { "epoch": 0.7439488880697874, "grad_norm": 1.2378387278421428, "learning_rate": 5.603171316475213e-06, "loss": 0.6071, "step": 6055 }, { "epoch": 0.7440717532866445, "grad_norm": 1.407903280891253, "learning_rate": 5.598157701113714e-06, "loss": 0.5653, "step": 6056 }, { "epoch": 0.7441946185035017, "grad_norm": 1.2773942282658086, "learning_rate": 5.593145815160127e-06, "loss": 0.5962, "step": 6057 }, { "epoch": 0.7443174837203588, "grad_norm": 1.0284426236178532, "learning_rate": 5.588135659536366e-06, "loss": 0.6871, "step": 6058 }, { "epoch": 0.7444403489372159, "grad_norm": 1.2113082018050378, "learning_rate": 5.583127235164003e-06, "loss": 0.581, "step": 6059 }, { "epoch": 0.744563214154073, "grad_norm": 1.3996241452045892, "learning_rate": 5.578120542964324e-06, "loss": 0.6388, "step": 6060 }, { "epoch": 0.7446860793709301, "grad_norm": 1.448496226776883, "learning_rate": 5.573115583858262e-06, "loss": 0.582, "step": 6061 }, { "epoch": 0.7448089445877872, "grad_norm": 1.3096975882700905, "learning_rate": 5.568112358766461e-06, "loss": 0.5868, "step": 6062 }, { "epoch": 0.7449318098046444, "grad_norm": 1.2467837349311237, "learning_rate": 5.563110868609215e-06, "loss": 0.6521, "step": 6063 }, { "epoch": 0.7450546750215015, "grad_norm": 1.1651963975809811, "learning_rate": 5.5581111143065265e-06, "loss": 0.5628, "step": 6064 }, { "epoch": 0.7451775402383585, "grad_norm": 1.3373911580584463, "learning_rate": 5.55311309677807e-06, "loss": 0.7413, "step": 6065 }, { "epoch": 0.7453004054552156, "grad_norm": 1.0156222455888035, "learning_rate": 5.548116816943191e-06, "loss": 0.51, "step": 6066 }, { "epoch": 0.7454232706720727, "grad_norm": 1.0477607363752004, "learning_rate": 5.543122275720922e-06, "loss": 0.496, "step": 6067 }, { "epoch": 0.7455461358889298, "grad_norm": 1.107352628700259, "learning_rate": 5.538129474029984e-06, "loss": 0.6389, "step": 6068 }, { "epoch": 0.7456690011057869, "grad_norm": 1.3697580022233429, "learning_rate": 5.533138412788771e-06, "loss": 0.5557, "step": 6069 }, { "epoch": 0.745791866322644, "grad_norm": 1.4529111356403113, "learning_rate": 5.528149092915346e-06, "loss": 0.6221, "step": 6070 }, { "epoch": 0.7459147315395012, "grad_norm": 1.2491690982821884, "learning_rate": 5.523161515327469e-06, "loss": 0.6295, "step": 6071 }, { "epoch": 0.7460375967563583, "grad_norm": 1.2559167473495108, "learning_rate": 5.518175680942577e-06, "loss": 0.4856, "step": 6072 }, { "epoch": 0.7461604619732154, "grad_norm": 1.3168904145230191, "learning_rate": 5.513191590677772e-06, "loss": 0.5608, "step": 6073 }, { "epoch": 0.7462833271900725, "grad_norm": 1.3354579303629706, "learning_rate": 5.508209245449849e-06, "loss": 0.5437, "step": 6074 }, { "epoch": 0.7464061924069296, "grad_norm": 1.1841902713986125, "learning_rate": 5.503228646175278e-06, "loss": 0.6426, "step": 6075 }, { "epoch": 0.7465290576237867, "grad_norm": 1.2754241581672172, "learning_rate": 5.498249793770216e-06, "loss": 0.6013, "step": 6076 }, { "epoch": 0.7466519228406439, "grad_norm": 0.9659092244574345, "learning_rate": 5.493272689150478e-06, "loss": 0.538, "step": 6077 }, { "epoch": 0.746774788057501, "grad_norm": 1.252808086029255, "learning_rate": 5.4882973332315746e-06, "loss": 0.6307, "step": 6078 }, { "epoch": 0.746897653274358, "grad_norm": 1.2146466694336877, "learning_rate": 5.4833237269286915e-06, "loss": 0.4044, "step": 6079 }, { "epoch": 0.7470205184912151, "grad_norm": 1.242132717296946, "learning_rate": 5.478351871156696e-06, "loss": 0.6377, "step": 6080 }, { "epoch": 0.7471433837080722, "grad_norm": 1.4479828061014228, "learning_rate": 5.473381766830119e-06, "loss": 0.4967, "step": 6081 }, { "epoch": 0.7472662489249293, "grad_norm": 1.142134231146321, "learning_rate": 5.468413414863184e-06, "loss": 0.8034, "step": 6082 }, { "epoch": 0.7473891141417864, "grad_norm": 1.259165955092884, "learning_rate": 5.463446816169792e-06, "loss": 0.5917, "step": 6083 }, { "epoch": 0.7475119793586436, "grad_norm": 1.2563544795477573, "learning_rate": 5.458481971663505e-06, "loss": 0.5482, "step": 6084 }, { "epoch": 0.7476348445755007, "grad_norm": 1.3367601963924942, "learning_rate": 5.453518882257586e-06, "loss": 0.6902, "step": 6085 }, { "epoch": 0.7477577097923578, "grad_norm": 1.2211660584750295, "learning_rate": 5.448557548864948e-06, "loss": 0.5078, "step": 6086 }, { "epoch": 0.7478805750092149, "grad_norm": 1.4390848227927397, "learning_rate": 5.4435979723982145e-06, "loss": 0.6475, "step": 6087 }, { "epoch": 0.748003440226072, "grad_norm": 1.3301476730524902, "learning_rate": 5.438640153769654e-06, "loss": 0.6309, "step": 6088 }, { "epoch": 0.7481263054429291, "grad_norm": 1.3365094298724436, "learning_rate": 5.433684093891231e-06, "loss": 0.5787, "step": 6089 }, { "epoch": 0.7482491706597862, "grad_norm": 1.0099168728841101, "learning_rate": 5.428729793674582e-06, "loss": 0.6494, "step": 6090 }, { "epoch": 0.7483720358766434, "grad_norm": 1.2636693648761934, "learning_rate": 5.423777254031013e-06, "loss": 0.5569, "step": 6091 }, { "epoch": 0.7484949010935005, "grad_norm": 1.625175580808489, "learning_rate": 5.4188264758715165e-06, "loss": 0.6215, "step": 6092 }, { "epoch": 0.7486177663103576, "grad_norm": 1.3529318446552734, "learning_rate": 5.4138774601067456e-06, "loss": 0.5353, "step": 6093 }, { "epoch": 0.7487406315272146, "grad_norm": 1.4421611053600714, "learning_rate": 5.408930207647057e-06, "loss": 0.6331, "step": 6094 }, { "epoch": 0.7488634967440717, "grad_norm": 1.4879297539763068, "learning_rate": 5.403984719402452e-06, "loss": 0.6621, "step": 6095 }, { "epoch": 0.7489863619609288, "grad_norm": 1.1211069764683337, "learning_rate": 5.399040996282631e-06, "loss": 0.6214, "step": 6096 }, { "epoch": 0.7491092271777859, "grad_norm": 0.9889354233537592, "learning_rate": 5.394099039196947e-06, "loss": 0.6027, "step": 6097 }, { "epoch": 0.7492320923946431, "grad_norm": 1.3037438374079497, "learning_rate": 5.38915884905445e-06, "loss": 0.5803, "step": 6098 }, { "epoch": 0.7493549576115002, "grad_norm": 1.2825431032011878, "learning_rate": 5.384220426763854e-06, "loss": 0.6134, "step": 6099 }, { "epoch": 0.7494778228283573, "grad_norm": 1.316272283376697, "learning_rate": 5.379283773233556e-06, "loss": 0.6671, "step": 6100 }, { "epoch": 0.7496006880452144, "grad_norm": 1.0121925444146407, "learning_rate": 5.374348889371608e-06, "loss": 0.5582, "step": 6101 }, { "epoch": 0.7497235532620715, "grad_norm": 1.1676704592210234, "learning_rate": 5.369415776085759e-06, "loss": 0.5957, "step": 6102 }, { "epoch": 0.7498464184789286, "grad_norm": 2.3826005274307875, "learning_rate": 5.364484434283427e-06, "loss": 0.7237, "step": 6103 }, { "epoch": 0.7499692836957857, "grad_norm": 1.26556584812381, "learning_rate": 5.3595548648716884e-06, "loss": 0.6356, "step": 6104 }, { "epoch": 0.7500921489126429, "grad_norm": 1.2742291296398531, "learning_rate": 5.354627068757311e-06, "loss": 0.4672, "step": 6105 }, { "epoch": 0.7502150141295, "grad_norm": 1.529380974434644, "learning_rate": 5.349701046846734e-06, "loss": 0.657, "step": 6106 }, { "epoch": 0.7503378793463571, "grad_norm": 1.0173141226317903, "learning_rate": 5.344776800046068e-06, "loss": 0.5586, "step": 6107 }, { "epoch": 0.7504607445632141, "grad_norm": 1.2751864603319956, "learning_rate": 5.33985432926109e-06, "loss": 0.6142, "step": 6108 }, { "epoch": 0.7505836097800712, "grad_norm": 1.0585410319364028, "learning_rate": 5.334933635397261e-06, "loss": 0.639, "step": 6109 }, { "epoch": 0.7507064749969283, "grad_norm": 1.1691734362489559, "learning_rate": 5.330014719359712e-06, "loss": 0.6146, "step": 6110 }, { "epoch": 0.7508293402137854, "grad_norm": 1.196801053660907, "learning_rate": 5.325097582053239e-06, "loss": 0.5665, "step": 6111 }, { "epoch": 0.7509522054306426, "grad_norm": 1.054031998669263, "learning_rate": 5.320182224382322e-06, "loss": 0.6945, "step": 6112 }, { "epoch": 0.7510750706474997, "grad_norm": 1.1985353137494985, "learning_rate": 5.315268647251109e-06, "loss": 0.5241, "step": 6113 }, { "epoch": 0.7511979358643568, "grad_norm": 1.4198611645549337, "learning_rate": 5.310356851563427e-06, "loss": 0.5883, "step": 6114 }, { "epoch": 0.7513208010812139, "grad_norm": 1.4801667084919736, "learning_rate": 5.305446838222757e-06, "loss": 0.5917, "step": 6115 }, { "epoch": 0.751443666298071, "grad_norm": 1.657229424621451, "learning_rate": 5.300538608132269e-06, "loss": 0.5299, "step": 6116 }, { "epoch": 0.7515665315149281, "grad_norm": 1.1982248542615805, "learning_rate": 5.295632162194806e-06, "loss": 0.5749, "step": 6117 }, { "epoch": 0.7516893967317853, "grad_norm": 1.235749036147684, "learning_rate": 5.290727501312867e-06, "loss": 0.4401, "step": 6118 }, { "epoch": 0.7518122619486424, "grad_norm": 1.0895257425129083, "learning_rate": 5.285824626388641e-06, "loss": 0.6312, "step": 6119 }, { "epoch": 0.7519351271654995, "grad_norm": 1.3182467364675798, "learning_rate": 5.280923538323967e-06, "loss": 0.64, "step": 6120 }, { "epoch": 0.7520579923823566, "grad_norm": 1.2106496567939957, "learning_rate": 5.276024238020389e-06, "loss": 0.4504, "step": 6121 }, { "epoch": 0.7521808575992137, "grad_norm": 1.1865498924379072, "learning_rate": 5.2711267263790845e-06, "loss": 0.5681, "step": 6122 }, { "epoch": 0.7523037228160707, "grad_norm": 1.2874707682428514, "learning_rate": 5.2662310043009295e-06, "loss": 0.5759, "step": 6123 }, { "epoch": 0.7524265880329278, "grad_norm": 1.3517991895554586, "learning_rate": 5.2613370726864445e-06, "loss": 0.5021, "step": 6124 }, { "epoch": 0.752549453249785, "grad_norm": 1.2013241688223106, "learning_rate": 5.256444932435859e-06, "loss": 0.6487, "step": 6125 }, { "epoch": 0.7526723184666421, "grad_norm": 1.0977314099593183, "learning_rate": 5.251554584449034e-06, "loss": 0.6188, "step": 6126 }, { "epoch": 0.7527951836834992, "grad_norm": 1.5069596800774885, "learning_rate": 5.246666029625527e-06, "loss": 0.5148, "step": 6127 }, { "epoch": 0.7529180489003563, "grad_norm": 1.2193935419740007, "learning_rate": 5.241779268864546e-06, "loss": 0.5836, "step": 6128 }, { "epoch": 0.7530409141172134, "grad_norm": 1.3105081364373665, "learning_rate": 5.2368943030649835e-06, "loss": 0.6254, "step": 6129 }, { "epoch": 0.7531637793340705, "grad_norm": 1.2289434304229043, "learning_rate": 5.2320111331254054e-06, "loss": 0.5794, "step": 6130 }, { "epoch": 0.7532866445509276, "grad_norm": 1.184804193250878, "learning_rate": 5.227129759944024e-06, "loss": 0.5688, "step": 6131 }, { "epoch": 0.7534095097677848, "grad_norm": 1.3139461522279745, "learning_rate": 5.2222501844187465e-06, "loss": 0.549, "step": 6132 }, { "epoch": 0.7535323749846419, "grad_norm": 1.4339102776878776, "learning_rate": 5.217372407447135e-06, "loss": 0.5826, "step": 6133 }, { "epoch": 0.753655240201499, "grad_norm": 1.0487029947852495, "learning_rate": 5.212496429926432e-06, "loss": 0.484, "step": 6134 }, { "epoch": 0.7537781054183561, "grad_norm": 1.4212711989058104, "learning_rate": 5.2076222527535296e-06, "loss": 0.679, "step": 6135 }, { "epoch": 0.7539009706352132, "grad_norm": 1.2610673705348483, "learning_rate": 5.202749876825011e-06, "loss": 0.5446, "step": 6136 }, { "epoch": 0.7540238358520703, "grad_norm": 1.3027082000811856, "learning_rate": 5.197879303037119e-06, "loss": 0.6387, "step": 6137 }, { "epoch": 0.7541467010689273, "grad_norm": 1.2118673595606397, "learning_rate": 5.193010532285755e-06, "loss": 0.5791, "step": 6138 }, { "epoch": 0.7542695662857845, "grad_norm": 1.0593746234212837, "learning_rate": 5.188143565466503e-06, "loss": 0.4491, "step": 6139 }, { "epoch": 0.7543924315026416, "grad_norm": 1.3584070051575179, "learning_rate": 5.183278403474611e-06, "loss": 0.7176, "step": 6140 }, { "epoch": 0.7545152967194987, "grad_norm": 1.499931011260162, "learning_rate": 5.1784150472049975e-06, "loss": 0.6363, "step": 6141 }, { "epoch": 0.7546381619363558, "grad_norm": 1.5272644928820764, "learning_rate": 5.173553497552235e-06, "loss": 0.6509, "step": 6142 }, { "epoch": 0.7547610271532129, "grad_norm": 1.4759613239165421, "learning_rate": 5.168693755410581e-06, "loss": 0.4918, "step": 6143 }, { "epoch": 0.75488389237007, "grad_norm": 1.228810580076364, "learning_rate": 5.163835821673952e-06, "loss": 0.5377, "step": 6144 }, { "epoch": 0.7550067575869271, "grad_norm": 1.1113373284193049, "learning_rate": 5.158979697235938e-06, "loss": 0.6257, "step": 6145 }, { "epoch": 0.7551296228037843, "grad_norm": 1.105737129222101, "learning_rate": 5.154125382989783e-06, "loss": 0.6495, "step": 6146 }, { "epoch": 0.7552524880206414, "grad_norm": 1.118900294928338, "learning_rate": 5.149272879828411e-06, "loss": 0.5114, "step": 6147 }, { "epoch": 0.7553753532374985, "grad_norm": 1.1864789048762436, "learning_rate": 5.144422188644414e-06, "loss": 0.6526, "step": 6148 }, { "epoch": 0.7554982184543556, "grad_norm": 1.1015799944886335, "learning_rate": 5.139573310330035e-06, "loss": 0.5812, "step": 6149 }, { "epoch": 0.7556210836712127, "grad_norm": 1.6425011729817087, "learning_rate": 5.134726245777202e-06, "loss": 0.5987, "step": 6150 }, { "epoch": 0.7557439488880698, "grad_norm": 1.3189035533624787, "learning_rate": 5.1298809958774884e-06, "loss": 0.5212, "step": 6151 }, { "epoch": 0.7558668141049268, "grad_norm": 1.0176286382053101, "learning_rate": 5.125037561522166e-06, "loss": 0.59, "step": 6152 }, { "epoch": 0.755989679321784, "grad_norm": 1.407559640736647, "learning_rate": 5.120195943602138e-06, "loss": 0.6334, "step": 6153 }, { "epoch": 0.7561125445386411, "grad_norm": 1.2585253669721768, "learning_rate": 5.115356143007993e-06, "loss": 0.5648, "step": 6154 }, { "epoch": 0.7562354097554982, "grad_norm": 1.121747527505758, "learning_rate": 5.110518160629987e-06, "loss": 0.5633, "step": 6155 }, { "epoch": 0.7563582749723553, "grad_norm": 0.9225109886306078, "learning_rate": 5.105681997358023e-06, "loss": 0.6463, "step": 6156 }, { "epoch": 0.7564811401892124, "grad_norm": 1.1917777836092838, "learning_rate": 5.100847654081695e-06, "loss": 0.585, "step": 6157 }, { "epoch": 0.7566040054060695, "grad_norm": 1.0895499549305776, "learning_rate": 5.096015131690233e-06, "loss": 0.6331, "step": 6158 }, { "epoch": 0.7567268706229267, "grad_norm": 0.901903859344225, "learning_rate": 5.091184431072567e-06, "loss": 0.5574, "step": 6159 }, { "epoch": 0.7568497358397838, "grad_norm": 1.4040315849000065, "learning_rate": 5.086355553117259e-06, "loss": 0.6491, "step": 6160 }, { "epoch": 0.7569726010566409, "grad_norm": 1.145943015333412, "learning_rate": 5.08152849871256e-06, "loss": 0.4806, "step": 6161 }, { "epoch": 0.757095466273498, "grad_norm": 1.2144487851649939, "learning_rate": 5.07670326874636e-06, "loss": 0.5783, "step": 6162 }, { "epoch": 0.7572183314903551, "grad_norm": 1.0630192076072107, "learning_rate": 5.07187986410625e-06, "loss": 0.627, "step": 6163 }, { "epoch": 0.7573411967072122, "grad_norm": 1.441261929979935, "learning_rate": 5.067058285679448e-06, "loss": 0.5747, "step": 6164 }, { "epoch": 0.7574640619240693, "grad_norm": 1.2627915262666378, "learning_rate": 5.06223853435286e-06, "loss": 0.5178, "step": 6165 }, { "epoch": 0.7575869271409265, "grad_norm": 1.2279999824632948, "learning_rate": 5.057420611013041e-06, "loss": 0.6524, "step": 6166 }, { "epoch": 0.7577097923577835, "grad_norm": 1.268294591501964, "learning_rate": 5.052604516546221e-06, "loss": 0.5171, "step": 6167 }, { "epoch": 0.7578326575746406, "grad_norm": 1.544119733374258, "learning_rate": 5.047790251838293e-06, "loss": 0.6296, "step": 6168 }, { "epoch": 0.7579555227914977, "grad_norm": 1.3334093714663329, "learning_rate": 5.042977817774802e-06, "loss": 0.5049, "step": 6169 }, { "epoch": 0.7580783880083548, "grad_norm": 1.387452131540937, "learning_rate": 5.038167215240967e-06, "loss": 0.5679, "step": 6170 }, { "epoch": 0.7582012532252119, "grad_norm": 1.5397749387987048, "learning_rate": 5.033358445121669e-06, "loss": 0.5457, "step": 6171 }, { "epoch": 0.758324118442069, "grad_norm": 1.1448887450597311, "learning_rate": 5.028551508301453e-06, "loss": 0.5245, "step": 6172 }, { "epoch": 0.7584469836589262, "grad_norm": 0.9206369068121089, "learning_rate": 5.0237464056645155e-06, "loss": 0.577, "step": 6173 }, { "epoch": 0.7585698488757833, "grad_norm": 1.1482537768766417, "learning_rate": 5.0189431380947295e-06, "loss": 0.5781, "step": 6174 }, { "epoch": 0.7586927140926404, "grad_norm": 1.0220101529823937, "learning_rate": 5.014141706475626e-06, "loss": 0.5492, "step": 6175 }, { "epoch": 0.7588155793094975, "grad_norm": 1.3622774311055634, "learning_rate": 5.009342111690393e-06, "loss": 0.5655, "step": 6176 }, { "epoch": 0.7589384445263546, "grad_norm": 1.0652049592489619, "learning_rate": 5.0045443546218855e-06, "loss": 0.6516, "step": 6177 }, { "epoch": 0.7590613097432117, "grad_norm": 1.0541472095861057, "learning_rate": 4.999748436152621e-06, "loss": 0.5034, "step": 6178 }, { "epoch": 0.7591841749600688, "grad_norm": 1.3687837913679113, "learning_rate": 4.9949543571647834e-06, "loss": 0.4931, "step": 6179 }, { "epoch": 0.759307040176926, "grad_norm": 1.2365334204426892, "learning_rate": 4.9901621185402005e-06, "loss": 0.5504, "step": 6180 }, { "epoch": 0.759429905393783, "grad_norm": 1.1483223381318586, "learning_rate": 4.985371721160381e-06, "loss": 0.5601, "step": 6181 }, { "epoch": 0.7595527706106401, "grad_norm": 1.268905294065355, "learning_rate": 4.980583165906486e-06, "loss": 0.6327, "step": 6182 }, { "epoch": 0.7596756358274972, "grad_norm": 1.126703186176789, "learning_rate": 4.9757964536593444e-06, "loss": 0.5699, "step": 6183 }, { "epoch": 0.7597985010443543, "grad_norm": 1.1687955472833225, "learning_rate": 4.971011585299431e-06, "loss": 0.5893, "step": 6184 }, { "epoch": 0.7599213662612114, "grad_norm": 1.7013007344719617, "learning_rate": 4.966228561706895e-06, "loss": 0.6978, "step": 6185 }, { "epoch": 0.7600442314780685, "grad_norm": 1.282032821629575, "learning_rate": 4.9614473837615505e-06, "loss": 0.5414, "step": 6186 }, { "epoch": 0.7601670966949257, "grad_norm": 1.1737288099912215, "learning_rate": 4.956668052342852e-06, "loss": 0.5107, "step": 6187 }, { "epoch": 0.7602899619117828, "grad_norm": 1.081648149539416, "learning_rate": 4.951890568329937e-06, "loss": 0.5472, "step": 6188 }, { "epoch": 0.7604128271286399, "grad_norm": 1.2943207177251297, "learning_rate": 4.947114932601577e-06, "loss": 0.5688, "step": 6189 }, { "epoch": 0.760535692345497, "grad_norm": 1.1216193742291742, "learning_rate": 4.94234114603624e-06, "loss": 0.5954, "step": 6190 }, { "epoch": 0.7606585575623541, "grad_norm": 1.136562457481189, "learning_rate": 4.937569209512019e-06, "loss": 0.6068, "step": 6191 }, { "epoch": 0.7607814227792112, "grad_norm": 1.4005803900789797, "learning_rate": 4.9327991239066885e-06, "loss": 0.6366, "step": 6192 }, { "epoch": 0.7609042879960684, "grad_norm": 1.109078145593001, "learning_rate": 4.928030890097666e-06, "loss": 0.4931, "step": 6193 }, { "epoch": 0.7610271532129255, "grad_norm": 1.3206458846754716, "learning_rate": 4.923264508962044e-06, "loss": 0.7064, "step": 6194 }, { "epoch": 0.7611500184297826, "grad_norm": 1.199083028037502, "learning_rate": 4.91849998137657e-06, "loss": 0.6143, "step": 6195 }, { "epoch": 0.7612728836466396, "grad_norm": 1.1561876973244052, "learning_rate": 4.9137373082176336e-06, "loss": 0.7175, "step": 6196 }, { "epoch": 0.7613957488634967, "grad_norm": 1.236131186143729, "learning_rate": 4.908976490361316e-06, "loss": 0.6585, "step": 6197 }, { "epoch": 0.7615186140803538, "grad_norm": 1.3188430926487582, "learning_rate": 4.904217528683327e-06, "loss": 0.6059, "step": 6198 }, { "epoch": 0.7616414792972109, "grad_norm": 1.2367378743342927, "learning_rate": 4.899460424059056e-06, "loss": 0.5243, "step": 6199 }, { "epoch": 0.761764344514068, "grad_norm": 1.4464975492447327, "learning_rate": 4.894705177363523e-06, "loss": 0.6234, "step": 6200 }, { "epoch": 0.7618872097309252, "grad_norm": 1.3710133008679388, "learning_rate": 4.88995178947145e-06, "loss": 0.5522, "step": 6201 }, { "epoch": 0.7620100749477823, "grad_norm": 1.5029693734151188, "learning_rate": 4.885200261257172e-06, "loss": 0.5972, "step": 6202 }, { "epoch": 0.7621329401646394, "grad_norm": 1.3219938948833083, "learning_rate": 4.880450593594717e-06, "loss": 0.615, "step": 6203 }, { "epoch": 0.7622558053814965, "grad_norm": 1.2337912491301737, "learning_rate": 4.87570278735774e-06, "loss": 0.5287, "step": 6204 }, { "epoch": 0.7623786705983536, "grad_norm": 1.175639188728221, "learning_rate": 4.870956843419579e-06, "loss": 0.5069, "step": 6205 }, { "epoch": 0.7625015358152107, "grad_norm": 1.2774523654752785, "learning_rate": 4.866212762653221e-06, "loss": 0.5115, "step": 6206 }, { "epoch": 0.7626244010320679, "grad_norm": 1.400148555438113, "learning_rate": 4.861470545931302e-06, "loss": 0.5766, "step": 6207 }, { "epoch": 0.762747266248925, "grad_norm": 1.0746691885530342, "learning_rate": 4.856730194126124e-06, "loss": 0.5796, "step": 6208 }, { "epoch": 0.7628701314657821, "grad_norm": 1.3226997939066472, "learning_rate": 4.851991708109646e-06, "loss": 0.5499, "step": 6209 }, { "epoch": 0.7629929966826391, "grad_norm": 1.3808453118503121, "learning_rate": 4.8472550887534865e-06, "loss": 0.5127, "step": 6210 }, { "epoch": 0.7631158618994962, "grad_norm": 1.2170373017741853, "learning_rate": 4.842520336928904e-06, "loss": 0.7245, "step": 6211 }, { "epoch": 0.7632387271163533, "grad_norm": 1.179310433568015, "learning_rate": 4.837787453506833e-06, "loss": 0.6094, "step": 6212 }, { "epoch": 0.7633615923332104, "grad_norm": 1.2921501332671048, "learning_rate": 4.83305643935786e-06, "loss": 0.598, "step": 6213 }, { "epoch": 0.7634844575500676, "grad_norm": 1.1203097315269281, "learning_rate": 4.828327295352217e-06, "loss": 0.5371, "step": 6214 }, { "epoch": 0.7636073227669247, "grad_norm": 1.2654813508610638, "learning_rate": 4.8236000223598045e-06, "loss": 0.4966, "step": 6215 }, { "epoch": 0.7637301879837818, "grad_norm": 1.1691843620722076, "learning_rate": 4.8188746212501634e-06, "loss": 0.5401, "step": 6216 }, { "epoch": 0.7638530532006389, "grad_norm": 1.4363930082260319, "learning_rate": 4.814151092892518e-06, "loss": 0.6883, "step": 6217 }, { "epoch": 0.763975918417496, "grad_norm": 1.069100891001439, "learning_rate": 4.809429438155717e-06, "loss": 0.5987, "step": 6218 }, { "epoch": 0.7640987836343531, "grad_norm": 1.234878531666306, "learning_rate": 4.804709657908283e-06, "loss": 0.6507, "step": 6219 }, { "epoch": 0.7642216488512102, "grad_norm": 1.4696871933726865, "learning_rate": 4.799991753018393e-06, "loss": 0.6745, "step": 6220 }, { "epoch": 0.7643445140680674, "grad_norm": 1.1252327997901903, "learning_rate": 4.795275724353867e-06, "loss": 0.6441, "step": 6221 }, { "epoch": 0.7644673792849245, "grad_norm": 1.4135921864366447, "learning_rate": 4.790561572782192e-06, "loss": 0.68, "step": 6222 }, { "epoch": 0.7645902445017816, "grad_norm": 1.3494369101663166, "learning_rate": 4.785849299170502e-06, "loss": 0.7376, "step": 6223 }, { "epoch": 0.7647131097186387, "grad_norm": 1.175910356601133, "learning_rate": 4.7811389043856e-06, "loss": 0.5506, "step": 6224 }, { "epoch": 0.7648359749354957, "grad_norm": 1.3267349188180304, "learning_rate": 4.776430389293919e-06, "loss": 0.5065, "step": 6225 }, { "epoch": 0.7649588401523528, "grad_norm": 1.3451028909304565, "learning_rate": 4.77172375476157e-06, "loss": 0.623, "step": 6226 }, { "epoch": 0.7650817053692099, "grad_norm": 1.183749142953873, "learning_rate": 4.767019001654295e-06, "loss": 0.6685, "step": 6227 }, { "epoch": 0.7652045705860671, "grad_norm": 1.1141377490944588, "learning_rate": 4.762316130837522e-06, "loss": 0.5884, "step": 6228 }, { "epoch": 0.7653274358029242, "grad_norm": 1.2140686364616433, "learning_rate": 4.757615143176296e-06, "loss": 0.6978, "step": 6229 }, { "epoch": 0.7654503010197813, "grad_norm": 1.2692802896492696, "learning_rate": 4.752916039535345e-06, "loss": 0.4974, "step": 6230 }, { "epoch": 0.7655731662366384, "grad_norm": 1.1548387817101293, "learning_rate": 4.74821882077903e-06, "loss": 0.4873, "step": 6231 }, { "epoch": 0.7656960314534955, "grad_norm": 1.2511501276402661, "learning_rate": 4.743523487771378e-06, "loss": 0.4687, "step": 6232 }, { "epoch": 0.7658188966703526, "grad_norm": 1.2041355973355108, "learning_rate": 4.73883004137607e-06, "loss": 0.4655, "step": 6233 }, { "epoch": 0.7659417618872097, "grad_norm": 1.2371555197407673, "learning_rate": 4.7341384824564235e-06, "loss": 0.6041, "step": 6234 }, { "epoch": 0.7660646271040669, "grad_norm": 1.2287600718545706, "learning_rate": 4.729448811875428e-06, "loss": 0.5772, "step": 6235 }, { "epoch": 0.766187492320924, "grad_norm": 1.2173094081965368, "learning_rate": 4.724761030495716e-06, "loss": 0.6274, "step": 6236 }, { "epoch": 0.7663103575377811, "grad_norm": 1.5665707781841818, "learning_rate": 4.72007513917958e-06, "loss": 0.5051, "step": 6237 }, { "epoch": 0.7664332227546382, "grad_norm": 1.2592965735061168, "learning_rate": 4.71539113878895e-06, "loss": 0.5514, "step": 6238 }, { "epoch": 0.7665560879714952, "grad_norm": 1.2057976180370376, "learning_rate": 4.710709030185422e-06, "loss": 0.505, "step": 6239 }, { "epoch": 0.7666789531883523, "grad_norm": 0.9619849404331129, "learning_rate": 4.706028814230245e-06, "loss": 0.5444, "step": 6240 }, { "epoch": 0.7668018184052094, "grad_norm": 1.0867129792128223, "learning_rate": 4.701350491784302e-06, "loss": 0.5499, "step": 6241 }, { "epoch": 0.7669246836220666, "grad_norm": 1.0849058589879117, "learning_rate": 4.696674063708148e-06, "loss": 0.6069, "step": 6242 }, { "epoch": 0.7670475488389237, "grad_norm": 1.213358131736252, "learning_rate": 4.691999530861981e-06, "loss": 0.5266, "step": 6243 }, { "epoch": 0.7671704140557808, "grad_norm": 1.7185450092412362, "learning_rate": 4.687326894105657e-06, "loss": 0.688, "step": 6244 }, { "epoch": 0.7672932792726379, "grad_norm": 1.0864161310553995, "learning_rate": 4.682656154298662e-06, "loss": 0.6191, "step": 6245 }, { "epoch": 0.767416144489495, "grad_norm": 1.2538813087375211, "learning_rate": 4.67798731230016e-06, "loss": 0.553, "step": 6246 }, { "epoch": 0.7675390097063521, "grad_norm": 1.2481555904935009, "learning_rate": 4.673320368968951e-06, "loss": 0.6408, "step": 6247 }, { "epoch": 0.7676618749232093, "grad_norm": 1.2628510458930893, "learning_rate": 4.668655325163493e-06, "loss": 0.5718, "step": 6248 }, { "epoch": 0.7677847401400664, "grad_norm": 1.4523061637155512, "learning_rate": 4.663992181741883e-06, "loss": 0.4845, "step": 6249 }, { "epoch": 0.7679076053569235, "grad_norm": 1.0683473972235542, "learning_rate": 4.659330939561879e-06, "loss": 0.5216, "step": 6250 }, { "epoch": 0.7680304705737806, "grad_norm": 1.0445964178561986, "learning_rate": 4.654671599480893e-06, "loss": 0.5919, "step": 6251 }, { "epoch": 0.7681533357906377, "grad_norm": 1.1016632752617819, "learning_rate": 4.650014162355969e-06, "loss": 0.5199, "step": 6252 }, { "epoch": 0.7682762010074948, "grad_norm": 1.5814117651379662, "learning_rate": 4.6453586290438214e-06, "loss": 0.6055, "step": 6253 }, { "epoch": 0.7683990662243518, "grad_norm": 1.133664177329624, "learning_rate": 4.640705000400795e-06, "loss": 0.6018, "step": 6254 }, { "epoch": 0.768521931441209, "grad_norm": 1.0327133270464934, "learning_rate": 4.636053277282909e-06, "loss": 0.6042, "step": 6255 }, { "epoch": 0.7686447966580661, "grad_norm": 1.1111313158176601, "learning_rate": 4.631403460545806e-06, "loss": 0.4757, "step": 6256 }, { "epoch": 0.7687676618749232, "grad_norm": 1.0784030098250352, "learning_rate": 4.626755551044798e-06, "loss": 0.5982, "step": 6257 }, { "epoch": 0.7688905270917803, "grad_norm": 1.152555130728522, "learning_rate": 4.622109549634829e-06, "loss": 0.503, "step": 6258 }, { "epoch": 0.7690133923086374, "grad_norm": 1.141108486730237, "learning_rate": 4.617465457170504e-06, "loss": 0.5294, "step": 6259 }, { "epoch": 0.7691362575254945, "grad_norm": 1.1776200238177257, "learning_rate": 4.6128232745060815e-06, "loss": 0.5346, "step": 6260 }, { "epoch": 0.7692591227423516, "grad_norm": 1.0542351619729198, "learning_rate": 4.608183002495445e-06, "loss": 0.5826, "step": 6261 }, { "epoch": 0.7693819879592088, "grad_norm": 1.1949258155079843, "learning_rate": 4.603544641992161e-06, "loss": 0.5741, "step": 6262 }, { "epoch": 0.7695048531760659, "grad_norm": 1.090343224994622, "learning_rate": 4.598908193849412e-06, "loss": 0.517, "step": 6263 }, { "epoch": 0.769627718392923, "grad_norm": 1.3212678407968577, "learning_rate": 4.594273658920052e-06, "loss": 0.4831, "step": 6264 }, { "epoch": 0.7697505836097801, "grad_norm": 1.297364160759807, "learning_rate": 4.58964103805656e-06, "loss": 0.5932, "step": 6265 }, { "epoch": 0.7698734488266372, "grad_norm": 1.2194681166402321, "learning_rate": 4.585010332111093e-06, "loss": 0.518, "step": 6266 }, { "epoch": 0.7699963140434943, "grad_norm": 1.3243342586449698, "learning_rate": 4.580381541935429e-06, "loss": 0.6362, "step": 6267 }, { "epoch": 0.7701191792603514, "grad_norm": 1.4170735812436925, "learning_rate": 4.575754668381011e-06, "loss": 0.5805, "step": 6268 }, { "epoch": 0.7702420444772085, "grad_norm": 1.0651749335276803, "learning_rate": 4.571129712298913e-06, "loss": 0.5212, "step": 6269 }, { "epoch": 0.7703649096940656, "grad_norm": 1.707586176196874, "learning_rate": 4.5665066745398705e-06, "loss": 0.5746, "step": 6270 }, { "epoch": 0.7704877749109227, "grad_norm": 1.1164525809246417, "learning_rate": 4.561885555954269e-06, "loss": 0.5952, "step": 6271 }, { "epoch": 0.7706106401277798, "grad_norm": 1.2462006244562889, "learning_rate": 4.557266357392119e-06, "loss": 0.5665, "step": 6272 }, { "epoch": 0.7707335053446369, "grad_norm": 1.2779997039016302, "learning_rate": 4.552649079703099e-06, "loss": 0.584, "step": 6273 }, { "epoch": 0.770856370561494, "grad_norm": 1.4478910558602702, "learning_rate": 4.548033723736527e-06, "loss": 0.5768, "step": 6274 }, { "epoch": 0.7709792357783511, "grad_norm": 1.4396267369227167, "learning_rate": 4.543420290341374e-06, "loss": 0.6062, "step": 6275 }, { "epoch": 0.7711021009952083, "grad_norm": 1.1691108865823459, "learning_rate": 4.538808780366239e-06, "loss": 0.5635, "step": 6276 }, { "epoch": 0.7712249662120654, "grad_norm": 1.7962300711292578, "learning_rate": 4.534199194659387e-06, "loss": 0.6638, "step": 6277 }, { "epoch": 0.7713478314289225, "grad_norm": 1.115311339490574, "learning_rate": 4.5295915340687255e-06, "loss": 0.7471, "step": 6278 }, { "epoch": 0.7714706966457796, "grad_norm": 1.2300007070444725, "learning_rate": 4.524985799441792e-06, "loss": 0.6419, "step": 6279 }, { "epoch": 0.7715935618626367, "grad_norm": 1.5551230809595364, "learning_rate": 4.520381991625794e-06, "loss": 0.6703, "step": 6280 }, { "epoch": 0.7717164270794938, "grad_norm": 1.5281624205583717, "learning_rate": 4.515780111467555e-06, "loss": 0.6306, "step": 6281 }, { "epoch": 0.771839292296351, "grad_norm": 1.321248570268165, "learning_rate": 4.511180159813582e-06, "loss": 0.5789, "step": 6282 }, { "epoch": 0.771962157513208, "grad_norm": 1.2776144186638512, "learning_rate": 4.506582137509992e-06, "loss": 0.62, "step": 6283 }, { "epoch": 0.7720850227300651, "grad_norm": 1.1343581520592374, "learning_rate": 4.501986045402565e-06, "loss": 0.5413, "step": 6284 }, { "epoch": 0.7722078879469222, "grad_norm": 1.2800598931571545, "learning_rate": 4.497391884336722e-06, "loss": 0.6004, "step": 6285 }, { "epoch": 0.7723307531637793, "grad_norm": 1.228349872726117, "learning_rate": 4.492799655157538e-06, "loss": 0.6428, "step": 6286 }, { "epoch": 0.7724536183806364, "grad_norm": 1.0588536903270813, "learning_rate": 4.488209358709708e-06, "loss": 0.4612, "step": 6287 }, { "epoch": 0.7725764835974935, "grad_norm": 1.3225315427993296, "learning_rate": 4.483620995837597e-06, "loss": 0.623, "step": 6288 }, { "epoch": 0.7726993488143507, "grad_norm": 1.3575675162472496, "learning_rate": 4.4790345673852055e-06, "loss": 0.5715, "step": 6289 }, { "epoch": 0.7728222140312078, "grad_norm": 1.3992061602907506, "learning_rate": 4.474450074196171e-06, "loss": 0.5579, "step": 6290 }, { "epoch": 0.7729450792480649, "grad_norm": 1.5290432796897608, "learning_rate": 4.4698675171137895e-06, "loss": 0.6415, "step": 6291 }, { "epoch": 0.773067944464922, "grad_norm": 1.3211549144070112, "learning_rate": 4.465286896980979e-06, "loss": 0.5601, "step": 6292 }, { "epoch": 0.7731908096817791, "grad_norm": 1.2204652421168538, "learning_rate": 4.460708214640331e-06, "loss": 0.5406, "step": 6293 }, { "epoch": 0.7733136748986362, "grad_norm": 1.475594296265917, "learning_rate": 4.456131470934053e-06, "loss": 0.623, "step": 6294 }, { "epoch": 0.7734365401154933, "grad_norm": 1.4330346514116319, "learning_rate": 4.451556666704018e-06, "loss": 0.6226, "step": 6295 }, { "epoch": 0.7735594053323505, "grad_norm": 1.3036524714302877, "learning_rate": 4.44698380279172e-06, "loss": 0.6437, "step": 6296 }, { "epoch": 0.7736822705492076, "grad_norm": 1.2009381532581267, "learning_rate": 4.442412880038312e-06, "loss": 0.632, "step": 6297 }, { "epoch": 0.7738051357660646, "grad_norm": 1.1721925096919241, "learning_rate": 4.437843899284592e-06, "loss": 0.6381, "step": 6298 }, { "epoch": 0.7739280009829217, "grad_norm": 0.8816396309665469, "learning_rate": 4.433276861370984e-06, "loss": 0.5699, "step": 6299 }, { "epoch": 0.7740508661997788, "grad_norm": 1.096711218461627, "learning_rate": 4.428711767137568e-06, "loss": 0.5081, "step": 6300 }, { "epoch": 0.7741737314166359, "grad_norm": 1.187224521518917, "learning_rate": 4.424148617424066e-06, "loss": 0.6486, "step": 6301 }, { "epoch": 0.774296596633493, "grad_norm": 1.3437645508284541, "learning_rate": 4.4195874130698455e-06, "loss": 0.5115, "step": 6302 }, { "epoch": 0.7744194618503502, "grad_norm": 1.2812004063804139, "learning_rate": 4.415028154913892e-06, "loss": 0.607, "step": 6303 }, { "epoch": 0.7745423270672073, "grad_norm": 1.2460332617770993, "learning_rate": 4.410470843794876e-06, "loss": 0.5748, "step": 6304 }, { "epoch": 0.7746651922840644, "grad_norm": 1.161534117412885, "learning_rate": 4.405915480551065e-06, "loss": 0.5862, "step": 6305 }, { "epoch": 0.7747880575009215, "grad_norm": 1.169254750086548, "learning_rate": 4.401362066020402e-06, "loss": 0.6463, "step": 6306 }, { "epoch": 0.7749109227177786, "grad_norm": 1.2416404815848552, "learning_rate": 4.396810601040448e-06, "loss": 0.4814, "step": 6307 }, { "epoch": 0.7750337879346357, "grad_norm": 1.2811906028072304, "learning_rate": 4.39226108644842e-06, "loss": 0.6075, "step": 6308 }, { "epoch": 0.7751566531514928, "grad_norm": 1.367855909537754, "learning_rate": 4.387713523081176e-06, "loss": 0.5333, "step": 6309 }, { "epoch": 0.77527951836835, "grad_norm": 1.1571307652975766, "learning_rate": 4.383167911775201e-06, "loss": 0.6512, "step": 6310 }, { "epoch": 0.7754023835852071, "grad_norm": 1.2752795849758816, "learning_rate": 4.378624253366636e-06, "loss": 0.5576, "step": 6311 }, { "epoch": 0.7755252488020641, "grad_norm": 1.1593394072036474, "learning_rate": 4.3740825486912585e-06, "loss": 0.567, "step": 6312 }, { "epoch": 0.7756481140189212, "grad_norm": 1.388428378866682, "learning_rate": 4.36954279858449e-06, "loss": 0.564, "step": 6313 }, { "epoch": 0.7757709792357783, "grad_norm": 1.1445288971920875, "learning_rate": 4.365005003881377e-06, "loss": 0.476, "step": 6314 }, { "epoch": 0.7758938444526354, "grad_norm": 1.2019430415626244, "learning_rate": 4.360469165416623e-06, "loss": 0.642, "step": 6315 }, { "epoch": 0.7760167096694925, "grad_norm": 1.4794268971423072, "learning_rate": 4.355935284024571e-06, "loss": 0.4888, "step": 6316 }, { "epoch": 0.7761395748863497, "grad_norm": 1.176335318534209, "learning_rate": 4.35140336053919e-06, "loss": 0.6049, "step": 6317 }, { "epoch": 0.7762624401032068, "grad_norm": 1.494392420033212, "learning_rate": 4.346873395794107e-06, "loss": 0.6341, "step": 6318 }, { "epoch": 0.7763853053200639, "grad_norm": 1.1841494859069324, "learning_rate": 4.342345390622564e-06, "loss": 0.4877, "step": 6319 }, { "epoch": 0.776508170536921, "grad_norm": 1.3489284927829526, "learning_rate": 4.33781934585748e-06, "loss": 0.5334, "step": 6320 }, { "epoch": 0.7766310357537781, "grad_norm": 1.4090251268799716, "learning_rate": 4.333295262331375e-06, "loss": 0.5623, "step": 6321 }, { "epoch": 0.7767539009706352, "grad_norm": 1.1891350081756187, "learning_rate": 4.328773140876436e-06, "loss": 0.5196, "step": 6322 }, { "epoch": 0.7768767661874924, "grad_norm": 1.4505951136339352, "learning_rate": 4.324252982324465e-06, "loss": 0.5824, "step": 6323 }, { "epoch": 0.7769996314043495, "grad_norm": 1.338649143849978, "learning_rate": 4.3197347875069285e-06, "loss": 0.5553, "step": 6324 }, { "epoch": 0.7771224966212066, "grad_norm": 1.1185256708797666, "learning_rate": 4.315218557254912e-06, "loss": 0.5589, "step": 6325 }, { "epoch": 0.7772453618380637, "grad_norm": 1.1474253076566034, "learning_rate": 4.310704292399147e-06, "loss": 0.5581, "step": 6326 }, { "epoch": 0.7773682270549207, "grad_norm": 1.089106132635584, "learning_rate": 4.306191993770011e-06, "loss": 0.5871, "step": 6327 }, { "epoch": 0.7774910922717778, "grad_norm": 1.1188221277347377, "learning_rate": 4.3016816621975006e-06, "loss": 0.5219, "step": 6328 }, { "epoch": 0.7776139574886349, "grad_norm": 1.117543283510135, "learning_rate": 4.297173298511273e-06, "loss": 0.5379, "step": 6329 }, { "epoch": 0.777736822705492, "grad_norm": 1.3813160165405014, "learning_rate": 4.292666903540597e-06, "loss": 0.6394, "step": 6330 }, { "epoch": 0.7778596879223492, "grad_norm": 1.0893415296594056, "learning_rate": 4.288162478114413e-06, "loss": 0.5567, "step": 6331 }, { "epoch": 0.7779825531392063, "grad_norm": 0.8923941716615689, "learning_rate": 4.283660023061268e-06, "loss": 0.4933, "step": 6332 }, { "epoch": 0.7781054183560634, "grad_norm": 1.1712365771903017, "learning_rate": 4.27915953920937e-06, "loss": 0.4574, "step": 6333 }, { "epoch": 0.7782282835729205, "grad_norm": 1.2451441924504392, "learning_rate": 4.274661027386542e-06, "loss": 0.5874, "step": 6334 }, { "epoch": 0.7783511487897776, "grad_norm": 0.9506620956511708, "learning_rate": 4.270164488420262e-06, "loss": 0.6527, "step": 6335 }, { "epoch": 0.7784740140066347, "grad_norm": 1.1217161997432044, "learning_rate": 4.265669923137642e-06, "loss": 0.5478, "step": 6336 }, { "epoch": 0.7785968792234919, "grad_norm": 1.2982192293674244, "learning_rate": 4.261177332365422e-06, "loss": 0.6504, "step": 6337 }, { "epoch": 0.778719744440349, "grad_norm": 1.2673951282099476, "learning_rate": 4.256686716929989e-06, "loss": 0.4959, "step": 6338 }, { "epoch": 0.7788426096572061, "grad_norm": 1.8439238103805369, "learning_rate": 4.25219807765736e-06, "loss": 0.6752, "step": 6339 }, { "epoch": 0.7789654748740632, "grad_norm": 1.0034257929036368, "learning_rate": 4.247711415373198e-06, "loss": 0.5738, "step": 6340 }, { "epoch": 0.7790883400909202, "grad_norm": 1.282751498064715, "learning_rate": 4.243226730902785e-06, "loss": 0.6502, "step": 6341 }, { "epoch": 0.7792112053077773, "grad_norm": 1.1988018394083266, "learning_rate": 4.238744025071055e-06, "loss": 0.5843, "step": 6342 }, { "epoch": 0.7793340705246344, "grad_norm": 1.1466223617884441, "learning_rate": 4.234263298702576e-06, "loss": 0.6241, "step": 6343 }, { "epoch": 0.7794569357414916, "grad_norm": 1.3020636761124067, "learning_rate": 4.229784552621541e-06, "loss": 0.6531, "step": 6344 }, { "epoch": 0.7795798009583487, "grad_norm": 1.195616737250621, "learning_rate": 4.2253077876517914e-06, "loss": 0.5644, "step": 6345 }, { "epoch": 0.7797026661752058, "grad_norm": 1.191497457784127, "learning_rate": 4.220833004616796e-06, "loss": 0.5805, "step": 6346 }, { "epoch": 0.7798255313920629, "grad_norm": 1.0790726158480264, "learning_rate": 4.2163602043396696e-06, "loss": 0.63, "step": 6347 }, { "epoch": 0.77994839660892, "grad_norm": 1.2802460160323088, "learning_rate": 4.211889387643145e-06, "loss": 0.6217, "step": 6348 }, { "epoch": 0.7800712618257771, "grad_norm": 1.102591487733063, "learning_rate": 4.207420555349603e-06, "loss": 0.5816, "step": 6349 }, { "epoch": 0.7801941270426342, "grad_norm": 1.4522210464894152, "learning_rate": 4.202953708281059e-06, "loss": 0.599, "step": 6350 }, { "epoch": 0.7803169922594914, "grad_norm": 1.4264709161255151, "learning_rate": 4.198488847259163e-06, "loss": 0.6184, "step": 6351 }, { "epoch": 0.7804398574763485, "grad_norm": 1.063731599709147, "learning_rate": 4.19402597310519e-06, "loss": 0.4693, "step": 6352 }, { "epoch": 0.7805627226932056, "grad_norm": 1.0267937233786617, "learning_rate": 4.189565086640057e-06, "loss": 0.5871, "step": 6353 }, { "epoch": 0.7806855879100627, "grad_norm": 1.3485291621133302, "learning_rate": 4.185106188684325e-06, "loss": 0.5714, "step": 6354 }, { "epoch": 0.7808084531269198, "grad_norm": 1.179742233015615, "learning_rate": 4.180649280058168e-06, "loss": 0.551, "step": 6355 }, { "epoch": 0.7809313183437768, "grad_norm": 2.4295551956194257, "learning_rate": 4.176194361581414e-06, "loss": 0.7863, "step": 6356 }, { "epoch": 0.7810541835606339, "grad_norm": 1.1464936677641009, "learning_rate": 4.1717414340735025e-06, "loss": 0.4728, "step": 6357 }, { "epoch": 0.7811770487774911, "grad_norm": 1.6943456124160596, "learning_rate": 4.167290498353541e-06, "loss": 0.6278, "step": 6358 }, { "epoch": 0.7812999139943482, "grad_norm": 1.482910948166948, "learning_rate": 4.162841555240234e-06, "loss": 0.472, "step": 6359 }, { "epoch": 0.7814227792112053, "grad_norm": 1.2982297333127846, "learning_rate": 4.158394605551946e-06, "loss": 0.6667, "step": 6360 }, { "epoch": 0.7815456444280624, "grad_norm": 1.0813169674145438, "learning_rate": 4.153949650106658e-06, "loss": 0.5995, "step": 6361 }, { "epoch": 0.7816685096449195, "grad_norm": 1.1032079273278157, "learning_rate": 4.149506689721989e-06, "loss": 0.5762, "step": 6362 }, { "epoch": 0.7817913748617766, "grad_norm": 1.267656571558536, "learning_rate": 4.1450657252152035e-06, "loss": 0.5645, "step": 6363 }, { "epoch": 0.7819142400786337, "grad_norm": 1.229325390236225, "learning_rate": 4.140626757403176e-06, "loss": 0.4978, "step": 6364 }, { "epoch": 0.7820371052954909, "grad_norm": 1.2457260499198592, "learning_rate": 4.1361897871024315e-06, "loss": 0.5622, "step": 6365 }, { "epoch": 0.782159970512348, "grad_norm": 1.2108514941095834, "learning_rate": 4.13175481512912e-06, "loss": 0.5225, "step": 6366 }, { "epoch": 0.7822828357292051, "grad_norm": 1.1185234792134309, "learning_rate": 4.127321842299034e-06, "loss": 0.6205, "step": 6367 }, { "epoch": 0.7824057009460622, "grad_norm": 1.309468851924351, "learning_rate": 4.122890869427572e-06, "loss": 0.5279, "step": 6368 }, { "epoch": 0.7825285661629193, "grad_norm": 1.3403369336864561, "learning_rate": 4.118461897329804e-06, "loss": 0.5178, "step": 6369 }, { "epoch": 0.7826514313797763, "grad_norm": 1.1035733711454436, "learning_rate": 4.114034926820396e-06, "loss": 0.5318, "step": 6370 }, { "epoch": 0.7827742965966334, "grad_norm": 1.3898582923800196, "learning_rate": 4.10960995871367e-06, "loss": 0.5926, "step": 6371 }, { "epoch": 0.7828971618134906, "grad_norm": 1.173248597209357, "learning_rate": 4.10518699382356e-06, "loss": 0.6342, "step": 6372 }, { "epoch": 0.7830200270303477, "grad_norm": 1.1123426866405737, "learning_rate": 4.1007660329636484e-06, "loss": 0.5247, "step": 6373 }, { "epoch": 0.7831428922472048, "grad_norm": 1.031864611776161, "learning_rate": 4.096347076947145e-06, "loss": 0.5623, "step": 6374 }, { "epoch": 0.7832657574640619, "grad_norm": 1.1399836391964697, "learning_rate": 4.091930126586879e-06, "loss": 0.5731, "step": 6375 }, { "epoch": 0.783388622680919, "grad_norm": 1.0851796072308377, "learning_rate": 4.087515182695326e-06, "loss": 0.5729, "step": 6376 }, { "epoch": 0.7835114878977761, "grad_norm": 1.2949402561003092, "learning_rate": 4.083102246084584e-06, "loss": 0.6382, "step": 6377 }, { "epoch": 0.7836343531146333, "grad_norm": 1.2171780648030777, "learning_rate": 4.078691317566392e-06, "loss": 0.6878, "step": 6378 }, { "epoch": 0.7837572183314904, "grad_norm": 1.1558877417037818, "learning_rate": 4.074282397952097e-06, "loss": 0.6211, "step": 6379 }, { "epoch": 0.7838800835483475, "grad_norm": 1.0627479289185289, "learning_rate": 4.069875488052702e-06, "loss": 0.5469, "step": 6380 }, { "epoch": 0.7840029487652046, "grad_norm": 1.1780304198041518, "learning_rate": 4.06547058867883e-06, "loss": 0.5747, "step": 6381 }, { "epoch": 0.7841258139820617, "grad_norm": 1.1747232771691627, "learning_rate": 4.061067700640726e-06, "loss": 0.5794, "step": 6382 }, { "epoch": 0.7842486791989188, "grad_norm": 1.2538153459748345, "learning_rate": 4.056666824748282e-06, "loss": 0.5801, "step": 6383 }, { "epoch": 0.784371544415776, "grad_norm": 1.153206407016388, "learning_rate": 4.052267961810995e-06, "loss": 0.6959, "step": 6384 }, { "epoch": 0.784494409632633, "grad_norm": 1.203354041344826, "learning_rate": 4.047871112638029e-06, "loss": 0.5536, "step": 6385 }, { "epoch": 0.7846172748494901, "grad_norm": 1.0981025476444854, "learning_rate": 4.043476278038139e-06, "loss": 0.5449, "step": 6386 }, { "epoch": 0.7847401400663472, "grad_norm": 1.179212524859334, "learning_rate": 4.039083458819736e-06, "loss": 0.5676, "step": 6387 }, { "epoch": 0.7848630052832043, "grad_norm": 1.2657799036720427, "learning_rate": 4.034692655790839e-06, "loss": 0.5442, "step": 6388 }, { "epoch": 0.7849858705000614, "grad_norm": 1.3260802507243543, "learning_rate": 4.030303869759124e-06, "loss": 0.6277, "step": 6389 }, { "epoch": 0.7851087357169185, "grad_norm": 1.0487944038845685, "learning_rate": 4.025917101531866e-06, "loss": 0.5743, "step": 6390 }, { "epoch": 0.7852316009337756, "grad_norm": 1.0636905915856605, "learning_rate": 4.0215323519159896e-06, "loss": 0.4998, "step": 6391 }, { "epoch": 0.7853544661506328, "grad_norm": 1.4193590246917769, "learning_rate": 4.017149621718043e-06, "loss": 0.5759, "step": 6392 }, { "epoch": 0.7854773313674899, "grad_norm": 1.368373316466973, "learning_rate": 4.012768911744192e-06, "loss": 0.548, "step": 6393 }, { "epoch": 0.785600196584347, "grad_norm": 1.067448448843834, "learning_rate": 4.0083902228002495e-06, "loss": 0.5173, "step": 6394 }, { "epoch": 0.7857230618012041, "grad_norm": 1.0718766898679024, "learning_rate": 4.004013555691633e-06, "loss": 0.5448, "step": 6395 }, { "epoch": 0.7858459270180612, "grad_norm": 1.0818973722182088, "learning_rate": 3.999638911223422e-06, "loss": 0.671, "step": 6396 }, { "epoch": 0.7859687922349183, "grad_norm": 1.1303400345549652, "learning_rate": 3.9952662902002886e-06, "loss": 0.599, "step": 6397 }, { "epoch": 0.7860916574517754, "grad_norm": 1.1877748364497664, "learning_rate": 3.990895693426557e-06, "loss": 0.704, "step": 6398 }, { "epoch": 0.7862145226686326, "grad_norm": 1.3437048796530693, "learning_rate": 3.98652712170616e-06, "loss": 0.5602, "step": 6399 }, { "epoch": 0.7863373878854896, "grad_norm": 1.1595183508342337, "learning_rate": 3.982160575842675e-06, "loss": 0.581, "step": 6400 }, { "epoch": 0.7864602531023467, "grad_norm": 1.287196411361099, "learning_rate": 3.977796056639304e-06, "loss": 0.4728, "step": 6401 }, { "epoch": 0.7865831183192038, "grad_norm": 1.3882436086320296, "learning_rate": 3.973433564898863e-06, "loss": 0.6688, "step": 6402 }, { "epoch": 0.7867059835360609, "grad_norm": 0.9757612698364961, "learning_rate": 3.9690731014238066e-06, "loss": 0.573, "step": 6403 }, { "epoch": 0.786828848752918, "grad_norm": 1.1751548173421602, "learning_rate": 3.964714667016216e-06, "loss": 0.5683, "step": 6404 }, { "epoch": 0.7869517139697751, "grad_norm": 1.1070720775167464, "learning_rate": 3.960358262477801e-06, "loss": 0.6687, "step": 6405 }, { "epoch": 0.7870745791866323, "grad_norm": 1.6455586454867985, "learning_rate": 3.956003888609883e-06, "loss": 0.5319, "step": 6406 }, { "epoch": 0.7871974444034894, "grad_norm": 1.0312317327339884, "learning_rate": 3.951651546213428e-06, "loss": 0.6023, "step": 6407 }, { "epoch": 0.7873203096203465, "grad_norm": 1.4600606412307755, "learning_rate": 3.94730123608902e-06, "loss": 0.4955, "step": 6408 }, { "epoch": 0.7874431748372036, "grad_norm": 1.4330159089196322, "learning_rate": 3.942952959036874e-06, "loss": 0.6677, "step": 6409 }, { "epoch": 0.7875660400540607, "grad_norm": 1.4061565404207061, "learning_rate": 3.938606715856821e-06, "loss": 0.5714, "step": 6410 }, { "epoch": 0.7876889052709178, "grad_norm": 1.312156009631735, "learning_rate": 3.934262507348325e-06, "loss": 0.6172, "step": 6411 }, { "epoch": 0.787811770487775, "grad_norm": 1.2080318682454945, "learning_rate": 3.929920334310481e-06, "loss": 0.6, "step": 6412 }, { "epoch": 0.7879346357046321, "grad_norm": 1.2783011805927, "learning_rate": 3.925580197541996e-06, "loss": 0.522, "step": 6413 }, { "epoch": 0.7880575009214891, "grad_norm": 1.1007626578127319, "learning_rate": 3.921242097841214e-06, "loss": 0.6709, "step": 6414 }, { "epoch": 0.7881803661383462, "grad_norm": 1.2565768194215137, "learning_rate": 3.916906036006101e-06, "loss": 0.572, "step": 6415 }, { "epoch": 0.7883032313552033, "grad_norm": 1.598188579422172, "learning_rate": 3.912572012834248e-06, "loss": 0.6368, "step": 6416 }, { "epoch": 0.7884260965720604, "grad_norm": 1.10086950677803, "learning_rate": 3.908240029122865e-06, "loss": 0.6444, "step": 6417 }, { "epoch": 0.7885489617889175, "grad_norm": 1.2413225643550114, "learning_rate": 3.903910085668798e-06, "loss": 0.6921, "step": 6418 }, { "epoch": 0.7886718270057747, "grad_norm": 1.2069566058805543, "learning_rate": 3.899582183268512e-06, "loss": 0.7292, "step": 6419 }, { "epoch": 0.7887946922226318, "grad_norm": 1.1687523206241253, "learning_rate": 3.895256322718091e-06, "loss": 0.5883, "step": 6420 }, { "epoch": 0.7889175574394889, "grad_norm": 1.299960982080885, "learning_rate": 3.890932504813258e-06, "loss": 0.5473, "step": 6421 }, { "epoch": 0.789040422656346, "grad_norm": 1.1088226563377537, "learning_rate": 3.886610730349337e-06, "loss": 0.4827, "step": 6422 }, { "epoch": 0.7891632878732031, "grad_norm": 1.2337964141967341, "learning_rate": 3.882291000121308e-06, "loss": 0.5231, "step": 6423 }, { "epoch": 0.7892861530900602, "grad_norm": 1.3270369468301646, "learning_rate": 3.877973314923744e-06, "loss": 0.4989, "step": 6424 }, { "epoch": 0.7894090183069173, "grad_norm": 1.294735577890553, "learning_rate": 3.873657675550864e-06, "loss": 0.5597, "step": 6425 }, { "epoch": 0.7895318835237745, "grad_norm": 1.2623850877297624, "learning_rate": 3.869344082796489e-06, "loss": 0.6133, "step": 6426 }, { "epoch": 0.7896547487406316, "grad_norm": 1.1894550431539992, "learning_rate": 3.8650325374540935e-06, "loss": 0.5833, "step": 6427 }, { "epoch": 0.7897776139574887, "grad_norm": 1.2423876386517976, "learning_rate": 3.860723040316747e-06, "loss": 0.4913, "step": 6428 }, { "epoch": 0.7899004791743457, "grad_norm": 1.3239590937875159, "learning_rate": 3.8564155921771585e-06, "loss": 0.6653, "step": 6429 }, { "epoch": 0.7900233443912028, "grad_norm": 1.3466981035405838, "learning_rate": 3.852110193827651e-06, "loss": 0.5313, "step": 6430 }, { "epoch": 0.7901462096080599, "grad_norm": 1.1524080876702048, "learning_rate": 3.847806846060175e-06, "loss": 0.6373, "step": 6431 }, { "epoch": 0.790269074824917, "grad_norm": 1.0949654452342286, "learning_rate": 3.843505549666311e-06, "loss": 0.5441, "step": 6432 }, { "epoch": 0.7903919400417742, "grad_norm": 1.162059359962414, "learning_rate": 3.839206305437239e-06, "loss": 0.4976, "step": 6433 }, { "epoch": 0.7905148052586313, "grad_norm": 1.2171041007481742, "learning_rate": 3.834909114163797e-06, "loss": 0.619, "step": 6434 }, { "epoch": 0.7906376704754884, "grad_norm": 1.31162893276971, "learning_rate": 3.830613976636408e-06, "loss": 0.6234, "step": 6435 }, { "epoch": 0.7907605356923455, "grad_norm": 1.1807717292303859, "learning_rate": 3.826320893645149e-06, "loss": 0.5433, "step": 6436 }, { "epoch": 0.7908834009092026, "grad_norm": 2.853773176406448, "learning_rate": 3.822029865979693e-06, "loss": 0.7286, "step": 6437 }, { "epoch": 0.7910062661260597, "grad_norm": 1.1253641783960042, "learning_rate": 3.817740894429352e-06, "loss": 0.5828, "step": 6438 }, { "epoch": 0.7911291313429168, "grad_norm": 1.184639768440339, "learning_rate": 3.8134539797830557e-06, "loss": 0.6848, "step": 6439 }, { "epoch": 0.791251996559774, "grad_norm": 1.2819660339827694, "learning_rate": 3.8091691228293515e-06, "loss": 0.649, "step": 6440 }, { "epoch": 0.7913748617766311, "grad_norm": 1.002655236324231, "learning_rate": 3.804886324356409e-06, "loss": 0.4944, "step": 6441 }, { "epoch": 0.7914977269934882, "grad_norm": 1.5206286696622926, "learning_rate": 3.8006055851520262e-06, "loss": 0.5149, "step": 6442 }, { "epoch": 0.7916205922103452, "grad_norm": 1.2708695289601533, "learning_rate": 3.796326906003619e-06, "loss": 0.508, "step": 6443 }, { "epoch": 0.7917434574272023, "grad_norm": 1.1695576949414879, "learning_rate": 3.792050287698216e-06, "loss": 0.5756, "step": 6444 }, { "epoch": 0.7918663226440594, "grad_norm": 1.3830810609824422, "learning_rate": 3.7877757310224753e-06, "loss": 0.5615, "step": 6445 }, { "epoch": 0.7919891878609165, "grad_norm": 1.5352070262979256, "learning_rate": 3.783503236762674e-06, "loss": 0.607, "step": 6446 }, { "epoch": 0.7921120530777737, "grad_norm": 1.650364656551014, "learning_rate": 3.7792328057047175e-06, "loss": 0.6522, "step": 6447 }, { "epoch": 0.7922349182946308, "grad_norm": 1.1134995808141652, "learning_rate": 3.774964438634112e-06, "loss": 0.4638, "step": 6448 }, { "epoch": 0.7923577835114879, "grad_norm": 1.1743103533394614, "learning_rate": 3.7706981363359995e-06, "loss": 0.6567, "step": 6449 }, { "epoch": 0.792480648728345, "grad_norm": 1.2365906247199938, "learning_rate": 3.766433899595147e-06, "loss": 0.554, "step": 6450 }, { "epoch": 0.7926035139452021, "grad_norm": 1.4940534148559228, "learning_rate": 3.762171729195921e-06, "loss": 0.4774, "step": 6451 }, { "epoch": 0.7927263791620592, "grad_norm": 1.3142908503713426, "learning_rate": 3.757911625922325e-06, "loss": 0.5259, "step": 6452 }, { "epoch": 0.7928492443789164, "grad_norm": 1.0788817864967324, "learning_rate": 3.7536535905579785e-06, "loss": 0.755, "step": 6453 }, { "epoch": 0.7929721095957735, "grad_norm": 1.0395672633324722, "learning_rate": 3.7493976238861223e-06, "loss": 0.5966, "step": 6454 }, { "epoch": 0.7930949748126306, "grad_norm": 1.2735877734770218, "learning_rate": 3.745143726689607e-06, "loss": 0.6048, "step": 6455 }, { "epoch": 0.7932178400294877, "grad_norm": 1.0759749326843477, "learning_rate": 3.7408918997509125e-06, "loss": 0.5823, "step": 6456 }, { "epoch": 0.7933407052463448, "grad_norm": 1.299802548455004, "learning_rate": 3.73664214385214e-06, "loss": 0.5973, "step": 6457 }, { "epoch": 0.7934635704632018, "grad_norm": 1.0749359097794697, "learning_rate": 3.732394459774996e-06, "loss": 0.5226, "step": 6458 }, { "epoch": 0.7935864356800589, "grad_norm": 1.1163897907588094, "learning_rate": 3.728148848300821e-06, "loss": 0.566, "step": 6459 }, { "epoch": 0.793709300896916, "grad_norm": 1.2248027817620533, "learning_rate": 3.7239053102105568e-06, "loss": 0.5907, "step": 6460 }, { "epoch": 0.7938321661137732, "grad_norm": 1.2392874280595396, "learning_rate": 3.7196638462847916e-06, "loss": 0.5529, "step": 6461 }, { "epoch": 0.7939550313306303, "grad_norm": 1.0669637873349986, "learning_rate": 3.715424457303702e-06, "loss": 0.6596, "step": 6462 }, { "epoch": 0.7940778965474874, "grad_norm": 1.279486137450477, "learning_rate": 3.7111871440471036e-06, "loss": 0.5699, "step": 6463 }, { "epoch": 0.7942007617643445, "grad_norm": 1.0776713305288672, "learning_rate": 3.7069519072944168e-06, "loss": 0.5934, "step": 6464 }, { "epoch": 0.7943236269812016, "grad_norm": 1.3487538206012244, "learning_rate": 3.702718747824688e-06, "loss": 0.6345, "step": 6465 }, { "epoch": 0.7944464921980587, "grad_norm": 1.1561151168377748, "learning_rate": 3.6984876664165845e-06, "loss": 0.635, "step": 6466 }, { "epoch": 0.7945693574149159, "grad_norm": 1.1255545324116007, "learning_rate": 3.6942586638483768e-06, "loss": 0.6435, "step": 6467 }, { "epoch": 0.794692222631773, "grad_norm": 1.1343031204131906, "learning_rate": 3.690031740897968e-06, "loss": 0.6131, "step": 6468 }, { "epoch": 0.7948150878486301, "grad_norm": 1.2551486680015362, "learning_rate": 3.6858068983428745e-06, "loss": 0.5694, "step": 6469 }, { "epoch": 0.7949379530654872, "grad_norm": 0.9951622270732979, "learning_rate": 3.6815841369602297e-06, "loss": 0.5369, "step": 6470 }, { "epoch": 0.7950608182823443, "grad_norm": 1.1457846567177274, "learning_rate": 3.677363457526775e-06, "loss": 0.6058, "step": 6471 }, { "epoch": 0.7951836834992013, "grad_norm": 1.1182311954029494, "learning_rate": 3.673144860818884e-06, "loss": 0.548, "step": 6472 }, { "epoch": 0.7953065487160584, "grad_norm": 1.2428482839916495, "learning_rate": 3.6689283476125392e-06, "loss": 0.5412, "step": 6473 }, { "epoch": 0.7954294139329156, "grad_norm": 1.196242476377996, "learning_rate": 3.6647139186833435e-06, "loss": 0.5551, "step": 6474 }, { "epoch": 0.7955522791497727, "grad_norm": 1.1210831106864632, "learning_rate": 3.6605015748065053e-06, "loss": 0.68, "step": 6475 }, { "epoch": 0.7956751443666298, "grad_norm": 1.128707496770722, "learning_rate": 3.6562913167568645e-06, "loss": 0.4951, "step": 6476 }, { "epoch": 0.7957980095834869, "grad_norm": 1.1555575232062891, "learning_rate": 3.652083145308874e-06, "loss": 0.5059, "step": 6477 }, { "epoch": 0.795920874800344, "grad_norm": 1.0472352098244835, "learning_rate": 3.6478770612365902e-06, "loss": 0.5267, "step": 6478 }, { "epoch": 0.7960437400172011, "grad_norm": 1.266101226426438, "learning_rate": 3.6436730653136986e-06, "loss": 0.5999, "step": 6479 }, { "epoch": 0.7961666052340582, "grad_norm": 1.5855026794215894, "learning_rate": 3.6394711583135e-06, "loss": 0.5402, "step": 6480 }, { "epoch": 0.7962894704509154, "grad_norm": 1.183273506368117, "learning_rate": 3.635271341008911e-06, "loss": 0.6562, "step": 6481 }, { "epoch": 0.7964123356677725, "grad_norm": 1.4979611962037622, "learning_rate": 3.631073614172449e-06, "loss": 0.6049, "step": 6482 }, { "epoch": 0.7965352008846296, "grad_norm": 1.1127330546860483, "learning_rate": 3.6268779785762686e-06, "loss": 0.5996, "step": 6483 }, { "epoch": 0.7966580661014867, "grad_norm": 1.2520995254215717, "learning_rate": 3.6226844349921294e-06, "loss": 0.54, "step": 6484 }, { "epoch": 0.7967809313183438, "grad_norm": 1.3189069142640883, "learning_rate": 3.6184929841914004e-06, "loss": 0.5324, "step": 6485 }, { "epoch": 0.7969037965352009, "grad_norm": 1.2180682272858359, "learning_rate": 3.6143036269450796e-06, "loss": 0.5755, "step": 6486 }, { "epoch": 0.7970266617520579, "grad_norm": 1.0362377923757387, "learning_rate": 3.610116364023759e-06, "loss": 0.6174, "step": 6487 }, { "epoch": 0.7971495269689151, "grad_norm": 1.3702172091879075, "learning_rate": 3.6059311961976756e-06, "loss": 0.5918, "step": 6488 }, { "epoch": 0.7972723921857722, "grad_norm": 1.1040644710679723, "learning_rate": 3.6017481242366503e-06, "loss": 0.608, "step": 6489 }, { "epoch": 0.7973952574026293, "grad_norm": 1.724860920292001, "learning_rate": 3.5975671489101423e-06, "loss": 0.64, "step": 6490 }, { "epoch": 0.7975181226194864, "grad_norm": 1.3299069267958865, "learning_rate": 3.5933882709872023e-06, "loss": 0.6253, "step": 6491 }, { "epoch": 0.7976409878363435, "grad_norm": 1.2000463483718757, "learning_rate": 3.589211491236523e-06, "loss": 0.4869, "step": 6492 }, { "epoch": 0.7977638530532006, "grad_norm": 1.0423761785376433, "learning_rate": 3.5850368104263836e-06, "loss": 0.5248, "step": 6493 }, { "epoch": 0.7978867182700577, "grad_norm": 1.2807673961064288, "learning_rate": 3.5808642293246995e-06, "loss": 0.5331, "step": 6494 }, { "epoch": 0.7980095834869149, "grad_norm": 1.2152272169696805, "learning_rate": 3.5766937486989802e-06, "loss": 0.6368, "step": 6495 }, { "epoch": 0.798132448703772, "grad_norm": 1.123028475489048, "learning_rate": 3.572525369316364e-06, "loss": 0.527, "step": 6496 }, { "epoch": 0.7982553139206291, "grad_norm": 1.0539683958423678, "learning_rate": 3.568359091943599e-06, "loss": 0.5376, "step": 6497 }, { "epoch": 0.7983781791374862, "grad_norm": 1.0733199216504214, "learning_rate": 3.564194917347035e-06, "loss": 0.5713, "step": 6498 }, { "epoch": 0.7985010443543433, "grad_norm": 1.2949409283278837, "learning_rate": 3.56003284629266e-06, "loss": 0.5234, "step": 6499 }, { "epoch": 0.7986239095712004, "grad_norm": 1.144117602338913, "learning_rate": 3.5558728795460467e-06, "loss": 0.5744, "step": 6500 }, { "epoch": 0.7987467747880576, "grad_norm": 1.4407706395219206, "learning_rate": 3.5517150178724058e-06, "loss": 0.5821, "step": 6501 }, { "epoch": 0.7988696400049146, "grad_norm": 1.1478281570061284, "learning_rate": 3.547559262036537e-06, "loss": 0.5775, "step": 6502 }, { "epoch": 0.7989925052217717, "grad_norm": 1.120519339995065, "learning_rate": 3.5434056128028715e-06, "loss": 0.638, "step": 6503 }, { "epoch": 0.7991153704386288, "grad_norm": 1.4326891972859441, "learning_rate": 3.5392540709354486e-06, "loss": 0.6414, "step": 6504 }, { "epoch": 0.7992382356554859, "grad_norm": 1.2533094667647149, "learning_rate": 3.5351046371979084e-06, "loss": 0.6828, "step": 6505 }, { "epoch": 0.799361100872343, "grad_norm": 1.3200050300972437, "learning_rate": 3.5309573123535184e-06, "loss": 0.4258, "step": 6506 }, { "epoch": 0.7994839660892001, "grad_norm": 1.1545565272102847, "learning_rate": 3.5268120971651528e-06, "loss": 0.5393, "step": 6507 }, { "epoch": 0.7996068313060573, "grad_norm": 1.2758716249348616, "learning_rate": 3.5226689923952975e-06, "loss": 0.5941, "step": 6508 }, { "epoch": 0.7997296965229144, "grad_norm": 1.2230129715819809, "learning_rate": 3.518527998806046e-06, "loss": 0.4965, "step": 6509 }, { "epoch": 0.7998525617397715, "grad_norm": 1.109488418005055, "learning_rate": 3.5143891171591088e-06, "loss": 0.649, "step": 6510 }, { "epoch": 0.7999754269566286, "grad_norm": 1.0660614383764564, "learning_rate": 3.510252348215805e-06, "loss": 0.5315, "step": 6511 }, { "epoch": 0.8000982921734857, "grad_norm": 1.3935135243921934, "learning_rate": 3.5061176927370745e-06, "loss": 0.6089, "step": 6512 }, { "epoch": 0.8002211573903428, "grad_norm": 1.1552127567676278, "learning_rate": 3.5019851514834476e-06, "loss": 0.6253, "step": 6513 }, { "epoch": 0.8003440226072, "grad_norm": 1.0911819562987974, "learning_rate": 3.4978547252150862e-06, "loss": 0.5231, "step": 6514 }, { "epoch": 0.8004668878240571, "grad_norm": 1.4479780269284768, "learning_rate": 3.4937264146917587e-06, "loss": 0.7726, "step": 6515 }, { "epoch": 0.8005897530409141, "grad_norm": 1.415097129929438, "learning_rate": 3.4896002206728313e-06, "loss": 0.648, "step": 6516 }, { "epoch": 0.8007126182577712, "grad_norm": 1.3822310322939217, "learning_rate": 3.485476143917295e-06, "loss": 0.5831, "step": 6517 }, { "epoch": 0.8008354834746283, "grad_norm": 1.1504583571140703, "learning_rate": 3.4813541851837498e-06, "loss": 0.498, "step": 6518 }, { "epoch": 0.8009583486914854, "grad_norm": 1.126825296275138, "learning_rate": 3.4772343452304047e-06, "loss": 0.4883, "step": 6519 }, { "epoch": 0.8010812139083425, "grad_norm": 1.3701444040188937, "learning_rate": 3.4731166248150693e-06, "loss": 0.4841, "step": 6520 }, { "epoch": 0.8012040791251996, "grad_norm": 1.1787381677791764, "learning_rate": 3.4690010246951765e-06, "loss": 0.5674, "step": 6521 }, { "epoch": 0.8013269443420568, "grad_norm": 1.184600074263304, "learning_rate": 3.464887545627767e-06, "loss": 0.5442, "step": 6522 }, { "epoch": 0.8014498095589139, "grad_norm": 1.2398135947911038, "learning_rate": 3.4607761883694834e-06, "loss": 0.5566, "step": 6523 }, { "epoch": 0.801572674775771, "grad_norm": 1.0421044571567675, "learning_rate": 3.4566669536765893e-06, "loss": 0.4775, "step": 6524 }, { "epoch": 0.8016955399926281, "grad_norm": 1.1805060052244447, "learning_rate": 3.452559842304938e-06, "loss": 0.6005, "step": 6525 }, { "epoch": 0.8018184052094852, "grad_norm": 1.3150545272747156, "learning_rate": 3.4484548550100254e-06, "loss": 0.645, "step": 6526 }, { "epoch": 0.8019412704263423, "grad_norm": 1.0933424802301084, "learning_rate": 3.4443519925469236e-06, "loss": 0.5705, "step": 6527 }, { "epoch": 0.8020641356431994, "grad_norm": 1.2106395564031611, "learning_rate": 3.440251255670337e-06, "loss": 0.5279, "step": 6528 }, { "epoch": 0.8021870008600566, "grad_norm": 1.1414703758341649, "learning_rate": 3.4361526451345536e-06, "loss": 0.5074, "step": 6529 }, { "epoch": 0.8023098660769137, "grad_norm": 1.1687683168115293, "learning_rate": 3.4320561616935076e-06, "loss": 0.5943, "step": 6530 }, { "epoch": 0.8024327312937707, "grad_norm": 1.0685306735384221, "learning_rate": 3.427961806100704e-06, "loss": 0.5826, "step": 6531 }, { "epoch": 0.8025555965106278, "grad_norm": 1.4429497636960236, "learning_rate": 3.423869579109284e-06, "loss": 0.6924, "step": 6532 }, { "epoch": 0.8026784617274849, "grad_norm": 1.3157979336501653, "learning_rate": 3.4197794814719768e-06, "loss": 0.5831, "step": 6533 }, { "epoch": 0.802801326944342, "grad_norm": 1.2762501416464123, "learning_rate": 3.4156915139411343e-06, "loss": 0.4966, "step": 6534 }, { "epoch": 0.8029241921611991, "grad_norm": 1.305026729866893, "learning_rate": 3.4116056772687147e-06, "loss": 0.4747, "step": 6535 }, { "epoch": 0.8030470573780563, "grad_norm": 1.1426892692237653, "learning_rate": 3.407521972206272e-06, "loss": 0.5286, "step": 6536 }, { "epoch": 0.8031699225949134, "grad_norm": 1.170717748742045, "learning_rate": 3.403440399504984e-06, "loss": 0.4411, "step": 6537 }, { "epoch": 0.8032927878117705, "grad_norm": 1.120335291988548, "learning_rate": 3.3993609599156277e-06, "loss": 0.5401, "step": 6538 }, { "epoch": 0.8034156530286276, "grad_norm": 1.0338392852373568, "learning_rate": 3.3952836541885933e-06, "loss": 0.5786, "step": 6539 }, { "epoch": 0.8035385182454847, "grad_norm": 1.2562124891847526, "learning_rate": 3.3912084830738695e-06, "loss": 0.5953, "step": 6540 }, { "epoch": 0.8036613834623418, "grad_norm": 1.3367626403916293, "learning_rate": 3.3871354473210573e-06, "loss": 0.5625, "step": 6541 }, { "epoch": 0.803784248679199, "grad_norm": 1.1696854725086632, "learning_rate": 3.383064547679374e-06, "loss": 0.6206, "step": 6542 }, { "epoch": 0.8039071138960561, "grad_norm": 1.0364021400769534, "learning_rate": 3.378995784897622e-06, "loss": 0.5824, "step": 6543 }, { "epoch": 0.8040299791129132, "grad_norm": 1.1830244366176506, "learning_rate": 3.3749291597242327e-06, "loss": 0.5857, "step": 6544 }, { "epoch": 0.8041528443297702, "grad_norm": 1.1018824268245122, "learning_rate": 3.370864672907232e-06, "loss": 0.649, "step": 6545 }, { "epoch": 0.8042757095466273, "grad_norm": 1.2032933683254468, "learning_rate": 3.3668023251942615e-06, "loss": 0.5454, "step": 6546 }, { "epoch": 0.8043985747634844, "grad_norm": 1.2298423546020991, "learning_rate": 3.362742117332554e-06, "loss": 0.5497, "step": 6547 }, { "epoch": 0.8045214399803415, "grad_norm": 1.1713759609424717, "learning_rate": 3.358684050068965e-06, "loss": 0.6088, "step": 6548 }, { "epoch": 0.8046443051971987, "grad_norm": 1.1516574952450425, "learning_rate": 3.35462812414995e-06, "loss": 0.5273, "step": 6549 }, { "epoch": 0.8047671704140558, "grad_norm": 1.3588516006229703, "learning_rate": 3.3505743403215712e-06, "loss": 0.5416, "step": 6550 }, { "epoch": 0.8048900356309129, "grad_norm": 1.2483880473124298, "learning_rate": 3.346522699329489e-06, "loss": 0.5545, "step": 6551 }, { "epoch": 0.80501290084777, "grad_norm": 1.4884849568339045, "learning_rate": 3.3424732019189806e-06, "loss": 0.6376, "step": 6552 }, { "epoch": 0.8051357660646271, "grad_norm": 1.1524273865848518, "learning_rate": 3.338425848834929e-06, "loss": 0.6671, "step": 6553 }, { "epoch": 0.8052586312814842, "grad_norm": 1.0296901305919288, "learning_rate": 3.3343806408218116e-06, "loss": 0.5595, "step": 6554 }, { "epoch": 0.8053814964983413, "grad_norm": 1.0960006745538549, "learning_rate": 3.3303375786237244e-06, "loss": 0.5795, "step": 6555 }, { "epoch": 0.8055043617151985, "grad_norm": 1.295671907826558, "learning_rate": 3.32629666298435e-06, "loss": 0.5491, "step": 6556 }, { "epoch": 0.8056272269320556, "grad_norm": 1.2197572237751704, "learning_rate": 3.3222578946470085e-06, "loss": 0.632, "step": 6557 }, { "epoch": 0.8057500921489127, "grad_norm": 1.178294746153841, "learning_rate": 3.3182212743545885e-06, "loss": 0.6364, "step": 6558 }, { "epoch": 0.8058729573657698, "grad_norm": 1.2512185146064205, "learning_rate": 3.314186802849607e-06, "loss": 0.6382, "step": 6559 }, { "epoch": 0.8059958225826268, "grad_norm": 1.2462941782107417, "learning_rate": 3.3101544808741813e-06, "loss": 0.5694, "step": 6560 }, { "epoch": 0.8061186877994839, "grad_norm": 1.2878580790091343, "learning_rate": 3.306124309170023e-06, "loss": 0.6091, "step": 6561 }, { "epoch": 0.806241553016341, "grad_norm": 1.1091711123365207, "learning_rate": 3.3020962884784667e-06, "loss": 0.4651, "step": 6562 }, { "epoch": 0.8063644182331982, "grad_norm": 1.1402474036791126, "learning_rate": 3.2980704195404237e-06, "loss": 0.5105, "step": 6563 }, { "epoch": 0.8064872834500553, "grad_norm": 1.418235739398273, "learning_rate": 3.2940467030964472e-06, "loss": 0.5351, "step": 6564 }, { "epoch": 0.8066101486669124, "grad_norm": 1.1131578010291614, "learning_rate": 3.2900251398866598e-06, "loss": 0.5852, "step": 6565 }, { "epoch": 0.8067330138837695, "grad_norm": 1.3631557088535295, "learning_rate": 3.28600573065081e-06, "loss": 0.6633, "step": 6566 }, { "epoch": 0.8068558791006266, "grad_norm": 1.170326759645731, "learning_rate": 3.28198847612823e-06, "loss": 0.7074, "step": 6567 }, { "epoch": 0.8069787443174837, "grad_norm": 1.1319388595560822, "learning_rate": 3.2779733770578846e-06, "loss": 0.4765, "step": 6568 }, { "epoch": 0.8071016095343408, "grad_norm": 1.2090452419940494, "learning_rate": 3.2739604341783103e-06, "loss": 0.5317, "step": 6569 }, { "epoch": 0.807224474751198, "grad_norm": 1.449425099630093, "learning_rate": 3.2699496482276747e-06, "loss": 0.5568, "step": 6570 }, { "epoch": 0.8073473399680551, "grad_norm": 1.2652417209073439, "learning_rate": 3.265941019943723e-06, "loss": 0.4969, "step": 6571 }, { "epoch": 0.8074702051849122, "grad_norm": 1.2355378110096031, "learning_rate": 3.2619345500638246e-06, "loss": 0.6878, "step": 6572 }, { "epoch": 0.8075930704017693, "grad_norm": 1.000399883983508, "learning_rate": 3.2579302393249446e-06, "loss": 0.551, "step": 6573 }, { "epoch": 0.8077159356186263, "grad_norm": 1.108589831742819, "learning_rate": 3.2539280884636422e-06, "loss": 0.5607, "step": 6574 }, { "epoch": 0.8078388008354834, "grad_norm": 1.204251196327704, "learning_rate": 3.2499280982160934e-06, "loss": 0.5106, "step": 6575 }, { "epoch": 0.8079616660523405, "grad_norm": 1.0341993736579531, "learning_rate": 3.2459302693180686e-06, "loss": 0.5324, "step": 6576 }, { "epoch": 0.8080845312691977, "grad_norm": 1.270308190385964, "learning_rate": 3.2419346025049483e-06, "loss": 0.5823, "step": 6577 }, { "epoch": 0.8082073964860548, "grad_norm": 1.2307863995294144, "learning_rate": 3.237941098511698e-06, "loss": 0.6035, "step": 6578 }, { "epoch": 0.8083302617029119, "grad_norm": 1.0968860615438496, "learning_rate": 3.233949758072905e-06, "loss": 0.496, "step": 6579 }, { "epoch": 0.808453126919769, "grad_norm": 1.342930169260937, "learning_rate": 3.22996058192275e-06, "loss": 0.5478, "step": 6580 }, { "epoch": 0.8085759921366261, "grad_norm": 1.3865298866296984, "learning_rate": 3.2259735707950117e-06, "loss": 0.6669, "step": 6581 }, { "epoch": 0.8086988573534832, "grad_norm": 1.1780942708161426, "learning_rate": 3.2219887254230797e-06, "loss": 0.5148, "step": 6582 }, { "epoch": 0.8088217225703404, "grad_norm": 1.2916964346630626, "learning_rate": 3.2180060465399357e-06, "loss": 0.5725, "step": 6583 }, { "epoch": 0.8089445877871975, "grad_norm": 1.0829214169878858, "learning_rate": 3.214025534878176e-06, "loss": 0.501, "step": 6584 }, { "epoch": 0.8090674530040546, "grad_norm": 2.837902364840793, "learning_rate": 3.2100471911699796e-06, "loss": 0.663, "step": 6585 }, { "epoch": 0.8091903182209117, "grad_norm": 1.3447867045849686, "learning_rate": 3.2060710161471427e-06, "loss": 0.5673, "step": 6586 }, { "epoch": 0.8093131834377688, "grad_norm": 1.1506329687023507, "learning_rate": 3.2020970105410607e-06, "loss": 0.5806, "step": 6587 }, { "epoch": 0.8094360486546259, "grad_norm": 1.028433289490699, "learning_rate": 3.198125175082717e-06, "loss": 0.4876, "step": 6588 }, { "epoch": 0.8095589138714829, "grad_norm": 1.034826025604305, "learning_rate": 3.1941555105027115e-06, "loss": 0.4956, "step": 6589 }, { "epoch": 0.80968177908834, "grad_norm": 1.4820237847318491, "learning_rate": 3.1901880175312307e-06, "loss": 0.7092, "step": 6590 }, { "epoch": 0.8098046443051972, "grad_norm": 1.0181243270664089, "learning_rate": 3.1862226968980813e-06, "loss": 0.549, "step": 6591 }, { "epoch": 0.8099275095220543, "grad_norm": 1.2610479771501715, "learning_rate": 3.182259549332649e-06, "loss": 0.5888, "step": 6592 }, { "epoch": 0.8100503747389114, "grad_norm": 1.3764671316543609, "learning_rate": 3.1782985755639344e-06, "loss": 0.4851, "step": 6593 }, { "epoch": 0.8101732399557685, "grad_norm": 1.0011270257374725, "learning_rate": 3.174339776320523e-06, "loss": 0.6529, "step": 6594 }, { "epoch": 0.8102961051726256, "grad_norm": 1.506668905236413, "learning_rate": 3.170383152330627e-06, "loss": 0.5768, "step": 6595 }, { "epoch": 0.8104189703894827, "grad_norm": 1.1439017130230327, "learning_rate": 3.1664287043220265e-06, "loss": 0.5449, "step": 6596 }, { "epoch": 0.8105418356063399, "grad_norm": 1.242880301756144, "learning_rate": 3.162476433022127e-06, "loss": 0.6261, "step": 6597 }, { "epoch": 0.810664700823197, "grad_norm": 1.236972669281351, "learning_rate": 3.158526339157915e-06, "loss": 0.6504, "step": 6598 }, { "epoch": 0.8107875660400541, "grad_norm": 1.580301141338173, "learning_rate": 3.1545784234559883e-06, "loss": 0.5171, "step": 6599 }, { "epoch": 0.8109104312569112, "grad_norm": 1.259991584902611, "learning_rate": 3.1506326866425445e-06, "loss": 0.5164, "step": 6600 }, { "epoch": 0.8110332964737683, "grad_norm": 1.2390048422112785, "learning_rate": 3.146689129443368e-06, "loss": 0.5066, "step": 6601 }, { "epoch": 0.8111561616906254, "grad_norm": 0.9852001304042092, "learning_rate": 3.142747752583854e-06, "loss": 0.6965, "step": 6602 }, { "epoch": 0.8112790269074824, "grad_norm": 1.1927690156634907, "learning_rate": 3.1388085567889934e-06, "loss": 0.5818, "step": 6603 }, { "epoch": 0.8114018921243396, "grad_norm": 1.378240931673566, "learning_rate": 3.1348715427833824e-06, "loss": 0.4956, "step": 6604 }, { "epoch": 0.8115247573411967, "grad_norm": 1.1216324039789205, "learning_rate": 3.130936711291198e-06, "loss": 0.6209, "step": 6605 }, { "epoch": 0.8116476225580538, "grad_norm": 1.2084548067295575, "learning_rate": 3.1270040630362313e-06, "loss": 0.5898, "step": 6606 }, { "epoch": 0.8117704877749109, "grad_norm": 1.0142923020282795, "learning_rate": 3.1230735987418733e-06, "loss": 0.5087, "step": 6607 }, { "epoch": 0.811893352991768, "grad_norm": 1.0622935571471055, "learning_rate": 3.1191453191310967e-06, "loss": 0.5655, "step": 6608 }, { "epoch": 0.8120162182086251, "grad_norm": 1.2306787755934865, "learning_rate": 3.1152192249264907e-06, "loss": 0.5305, "step": 6609 }, { "epoch": 0.8121390834254822, "grad_norm": 1.5002231141946876, "learning_rate": 3.111295316850231e-06, "loss": 0.6466, "step": 6610 }, { "epoch": 0.8122619486423394, "grad_norm": 1.0940522239791095, "learning_rate": 3.107373595624101e-06, "loss": 0.5179, "step": 6611 }, { "epoch": 0.8123848138591965, "grad_norm": 1.0757731537945112, "learning_rate": 3.1034540619694683e-06, "loss": 0.6137, "step": 6612 }, { "epoch": 0.8125076790760536, "grad_norm": 1.2142603080287282, "learning_rate": 3.09953671660731e-06, "loss": 0.6075, "step": 6613 }, { "epoch": 0.8126305442929107, "grad_norm": 1.274661046750979, "learning_rate": 3.0956215602581933e-06, "loss": 0.5427, "step": 6614 }, { "epoch": 0.8127534095097678, "grad_norm": 1.1376622293187173, "learning_rate": 3.0917085936422934e-06, "loss": 0.6093, "step": 6615 }, { "epoch": 0.8128762747266249, "grad_norm": 1.436701806909365, "learning_rate": 3.0877978174793642e-06, "loss": 0.6159, "step": 6616 }, { "epoch": 0.812999139943482, "grad_norm": 1.2170212514902943, "learning_rate": 3.083889232488775e-06, "loss": 0.5898, "step": 6617 }, { "epoch": 0.8131220051603391, "grad_norm": 1.454776465415841, "learning_rate": 3.0799828393894863e-06, "loss": 0.6446, "step": 6618 }, { "epoch": 0.8132448703771962, "grad_norm": 1.2473799239599976, "learning_rate": 3.076078638900046e-06, "loss": 0.5544, "step": 6619 }, { "epoch": 0.8133677355940533, "grad_norm": 1.3037776451640537, "learning_rate": 3.0721766317386153e-06, "loss": 0.6522, "step": 6620 }, { "epoch": 0.8134906008109104, "grad_norm": 1.2859992804700793, "learning_rate": 3.068276818622929e-06, "loss": 0.4743, "step": 6621 }, { "epoch": 0.8136134660277675, "grad_norm": 1.239383242025873, "learning_rate": 3.0643792002703515e-06, "loss": 0.513, "step": 6622 }, { "epoch": 0.8137363312446246, "grad_norm": 1.0895889890579138, "learning_rate": 3.0604837773978095e-06, "loss": 0.5914, "step": 6623 }, { "epoch": 0.8138591964614817, "grad_norm": 1.1279186187079415, "learning_rate": 3.0565905507218473e-06, "loss": 0.5089, "step": 6624 }, { "epoch": 0.8139820616783389, "grad_norm": 1.2144969041894462, "learning_rate": 3.0526995209586016e-06, "loss": 0.5679, "step": 6625 }, { "epoch": 0.814104926895196, "grad_norm": 1.2419345354860063, "learning_rate": 3.048810688823794e-06, "loss": 0.674, "step": 6626 }, { "epoch": 0.8142277921120531, "grad_norm": 1.1755492401778183, "learning_rate": 3.0449240550327577e-06, "loss": 0.6689, "step": 6627 }, { "epoch": 0.8143506573289102, "grad_norm": 1.2801635186562725, "learning_rate": 3.041039620300402e-06, "loss": 0.6315, "step": 6628 }, { "epoch": 0.8144735225457673, "grad_norm": 1.0727600928886059, "learning_rate": 3.03715738534126e-06, "loss": 0.6236, "step": 6629 }, { "epoch": 0.8145963877626244, "grad_norm": 1.246386858577507, "learning_rate": 3.0332773508694302e-06, "loss": 0.5527, "step": 6630 }, { "epoch": 0.8147192529794816, "grad_norm": 1.2286201785416135, "learning_rate": 3.02939951759863e-06, "loss": 0.6076, "step": 6631 }, { "epoch": 0.8148421181963387, "grad_norm": 1.590263955294201, "learning_rate": 3.0255238862421474e-06, "loss": 0.6019, "step": 6632 }, { "epoch": 0.8149649834131957, "grad_norm": 1.2710184031908658, "learning_rate": 3.021650457512897e-06, "loss": 0.5708, "step": 6633 }, { "epoch": 0.8150878486300528, "grad_norm": 1.0609259617985503, "learning_rate": 3.0177792321233595e-06, "loss": 0.5744, "step": 6634 }, { "epoch": 0.8152107138469099, "grad_norm": 1.4519166753218147, "learning_rate": 3.013910210785629e-06, "loss": 0.5812, "step": 6635 }, { "epoch": 0.815333579063767, "grad_norm": 1.138053853944056, "learning_rate": 3.0100433942113776e-06, "loss": 0.5444, "step": 6636 }, { "epoch": 0.8154564442806241, "grad_norm": 1.357586394165309, "learning_rate": 3.006178783111887e-06, "loss": 0.497, "step": 6637 }, { "epoch": 0.8155793094974813, "grad_norm": 1.1485955357223068, "learning_rate": 3.002316378198029e-06, "loss": 0.5243, "step": 6638 }, { "epoch": 0.8157021747143384, "grad_norm": 1.0310259360235285, "learning_rate": 2.9984561801802635e-06, "loss": 0.6582, "step": 6639 }, { "epoch": 0.8158250399311955, "grad_norm": 1.1665346582627323, "learning_rate": 2.994598189768649e-06, "loss": 0.6266, "step": 6640 }, { "epoch": 0.8159479051480526, "grad_norm": 1.448259035797744, "learning_rate": 2.9907424076728417e-06, "loss": 0.5615, "step": 6641 }, { "epoch": 0.8160707703649097, "grad_norm": 1.0820879805593566, "learning_rate": 2.986888834602089e-06, "loss": 0.5001, "step": 6642 }, { "epoch": 0.8161936355817668, "grad_norm": 1.0661010836289169, "learning_rate": 2.9830374712652235e-06, "loss": 0.4813, "step": 6643 }, { "epoch": 0.816316500798624, "grad_norm": 1.2151525275855166, "learning_rate": 2.9791883183706823e-06, "loss": 0.6267, "step": 6644 }, { "epoch": 0.8164393660154811, "grad_norm": 1.0278831543573936, "learning_rate": 2.975341376626496e-06, "loss": 0.5935, "step": 6645 }, { "epoch": 0.8165622312323382, "grad_norm": 1.02205664773775, "learning_rate": 2.971496646740276e-06, "loss": 0.531, "step": 6646 }, { "epoch": 0.8166850964491952, "grad_norm": 1.6279077271845546, "learning_rate": 2.9676541294192423e-06, "loss": 0.5692, "step": 6647 }, { "epoch": 0.8168079616660523, "grad_norm": 1.584983072270823, "learning_rate": 2.9638138253701974e-06, "loss": 0.5804, "step": 6648 }, { "epoch": 0.8169308268829094, "grad_norm": 1.1046145144490644, "learning_rate": 2.9599757352995466e-06, "loss": 0.5285, "step": 6649 }, { "epoch": 0.8170536920997665, "grad_norm": 1.164620131032551, "learning_rate": 2.9561398599132733e-06, "loss": 0.5489, "step": 6650 }, { "epoch": 0.8171765573166236, "grad_norm": 1.2014289776083942, "learning_rate": 2.9523061999169646e-06, "loss": 0.6245, "step": 6651 }, { "epoch": 0.8172994225334808, "grad_norm": 1.1420238064805255, "learning_rate": 2.9484747560157986e-06, "loss": 0.6696, "step": 6652 }, { "epoch": 0.8174222877503379, "grad_norm": 1.0226800960630062, "learning_rate": 2.944645528914548e-06, "loss": 0.6125, "step": 6653 }, { "epoch": 0.817545152967195, "grad_norm": 1.263234788677675, "learning_rate": 2.9408185193175673e-06, "loss": 0.6009, "step": 6654 }, { "epoch": 0.8176680181840521, "grad_norm": 1.0963894815570328, "learning_rate": 2.9369937279288138e-06, "loss": 0.5181, "step": 6655 }, { "epoch": 0.8177908834009092, "grad_norm": 1.0929934948098048, "learning_rate": 2.9331711554518364e-06, "loss": 0.626, "step": 6656 }, { "epoch": 0.8179137486177663, "grad_norm": 1.4359150319593135, "learning_rate": 2.9293508025897644e-06, "loss": 0.678, "step": 6657 }, { "epoch": 0.8180366138346234, "grad_norm": 1.2301269771260959, "learning_rate": 2.9255326700453365e-06, "loss": 0.7046, "step": 6658 }, { "epoch": 0.8181594790514806, "grad_norm": 1.4431381161082344, "learning_rate": 2.9217167585208587e-06, "loss": 0.652, "step": 6659 }, { "epoch": 0.8182823442683377, "grad_norm": 1.2741470987590664, "learning_rate": 2.917903068718262e-06, "loss": 0.6521, "step": 6660 }, { "epoch": 0.8184052094851948, "grad_norm": 1.0947300207977313, "learning_rate": 2.914091601339036e-06, "loss": 0.5886, "step": 6661 }, { "epoch": 0.8185280747020518, "grad_norm": 1.6204572721787587, "learning_rate": 2.9102823570842846e-06, "loss": 0.6271, "step": 6662 }, { "epoch": 0.8186509399189089, "grad_norm": 1.110354373559304, "learning_rate": 2.9064753366546836e-06, "loss": 0.5593, "step": 6663 }, { "epoch": 0.818773805135766, "grad_norm": 1.2220960792925708, "learning_rate": 2.9026705407505165e-06, "loss": 0.5542, "step": 6664 }, { "epoch": 0.8188966703526231, "grad_norm": 1.0459882029403638, "learning_rate": 2.8988679700716534e-06, "loss": 0.565, "step": 6665 }, { "epoch": 0.8190195355694803, "grad_norm": 1.3742594376107566, "learning_rate": 2.89506762531754e-06, "loss": 0.5362, "step": 6666 }, { "epoch": 0.8191424007863374, "grad_norm": 1.2677968403144895, "learning_rate": 2.891269507187242e-06, "loss": 0.5853, "step": 6667 }, { "epoch": 0.8192652660031945, "grad_norm": 1.376411475787457, "learning_rate": 2.887473616379387e-06, "loss": 0.6112, "step": 6668 }, { "epoch": 0.8193881312200516, "grad_norm": 1.3405592903620076, "learning_rate": 2.8836799535922116e-06, "loss": 0.6079, "step": 6669 }, { "epoch": 0.8195109964369087, "grad_norm": 1.1794673365212178, "learning_rate": 2.8798885195235224e-06, "loss": 0.5116, "step": 6670 }, { "epoch": 0.8196338616537658, "grad_norm": 1.3914273727394872, "learning_rate": 2.876099314870747e-06, "loss": 0.5637, "step": 6671 }, { "epoch": 0.819756726870623, "grad_norm": 1.0127705318110645, "learning_rate": 2.8723123403308726e-06, "loss": 0.5782, "step": 6672 }, { "epoch": 0.8198795920874801, "grad_norm": 1.1326996966192378, "learning_rate": 2.868527596600497e-06, "loss": 0.6257, "step": 6673 }, { "epoch": 0.8200024573043372, "grad_norm": 1.6723540848975844, "learning_rate": 2.86474508437579e-06, "loss": 0.521, "step": 6674 }, { "epoch": 0.8201253225211943, "grad_norm": 1.219717050945167, "learning_rate": 2.860964804352525e-06, "loss": 0.5168, "step": 6675 }, { "epoch": 0.8202481877380513, "grad_norm": 1.1192611111718203, "learning_rate": 2.8571867572260626e-06, "loss": 0.566, "step": 6676 }, { "epoch": 0.8203710529549084, "grad_norm": 1.0999282645537292, "learning_rate": 2.8534109436913445e-06, "loss": 0.5611, "step": 6677 }, { "epoch": 0.8204939181717655, "grad_norm": 1.1246203680190694, "learning_rate": 2.8496373644429095e-06, "loss": 0.6206, "step": 6678 }, { "epoch": 0.8206167833886227, "grad_norm": 1.0551399699641502, "learning_rate": 2.8458660201748836e-06, "loss": 0.6029, "step": 6679 }, { "epoch": 0.8207396486054798, "grad_norm": 1.4174703812547376, "learning_rate": 2.842096911580985e-06, "loss": 0.508, "step": 6680 }, { "epoch": 0.8208625138223369, "grad_norm": 1.2352559103495444, "learning_rate": 2.8383300393545098e-06, "loss": 0.593, "step": 6681 }, { "epoch": 0.820985379039194, "grad_norm": 1.7575841772027923, "learning_rate": 2.834565404188351e-06, "loss": 0.5423, "step": 6682 }, { "epoch": 0.8211082442560511, "grad_norm": 1.3678715395975665, "learning_rate": 2.8308030067749955e-06, "loss": 0.7476, "step": 6683 }, { "epoch": 0.8212311094729082, "grad_norm": 1.1013341787213586, "learning_rate": 2.8270428478065015e-06, "loss": 0.4219, "step": 6684 }, { "epoch": 0.8213539746897653, "grad_norm": 1.234816175529207, "learning_rate": 2.8232849279745366e-06, "loss": 0.5597, "step": 6685 }, { "epoch": 0.8214768399066225, "grad_norm": 1.1505280510947504, "learning_rate": 2.8195292479703315e-06, "loss": 0.5565, "step": 6686 }, { "epoch": 0.8215997051234796, "grad_norm": 1.293565021906243, "learning_rate": 2.815775808484737e-06, "loss": 0.4525, "step": 6687 }, { "epoch": 0.8217225703403367, "grad_norm": 1.2246973199185827, "learning_rate": 2.8120246102081614e-06, "loss": 0.6332, "step": 6688 }, { "epoch": 0.8218454355571938, "grad_norm": 1.1981459450721315, "learning_rate": 2.808275653830617e-06, "loss": 0.5691, "step": 6689 }, { "epoch": 0.8219683007740509, "grad_norm": 1.0864360521171215, "learning_rate": 2.804528940041699e-06, "loss": 0.4738, "step": 6690 }, { "epoch": 0.8220911659909079, "grad_norm": 1.549297906294064, "learning_rate": 2.800784469530596e-06, "loss": 0.5843, "step": 6691 }, { "epoch": 0.822214031207765, "grad_norm": 1.0538714499954862, "learning_rate": 2.797042242986071e-06, "loss": 0.5612, "step": 6692 }, { "epoch": 0.8223368964246222, "grad_norm": 1.2166665343929424, "learning_rate": 2.7933022610964877e-06, "loss": 0.6322, "step": 6693 }, { "epoch": 0.8224597616414793, "grad_norm": 1.6505162137711726, "learning_rate": 2.7895645245497926e-06, "loss": 0.5546, "step": 6694 }, { "epoch": 0.8225826268583364, "grad_norm": 1.4206748954461306, "learning_rate": 2.7858290340335126e-06, "loss": 0.479, "step": 6695 }, { "epoch": 0.8227054920751935, "grad_norm": 1.1580981686423113, "learning_rate": 2.7820957902347744e-06, "loss": 0.6716, "step": 6696 }, { "epoch": 0.8228283572920506, "grad_norm": 0.9362086848140213, "learning_rate": 2.7783647938402724e-06, "loss": 0.5586, "step": 6697 }, { "epoch": 0.8229512225089077, "grad_norm": 1.1955384393283492, "learning_rate": 2.7746360455363123e-06, "loss": 0.5219, "step": 6698 }, { "epoch": 0.8230740877257648, "grad_norm": 1.2716538952923222, "learning_rate": 2.7709095460087656e-06, "loss": 0.5186, "step": 6699 }, { "epoch": 0.823196952942622, "grad_norm": 1.202886058399535, "learning_rate": 2.767185295943101e-06, "loss": 0.5794, "step": 6700 }, { "epoch": 0.8233198181594791, "grad_norm": 1.1275291781855898, "learning_rate": 2.7634632960243667e-06, "loss": 0.5676, "step": 6701 }, { "epoch": 0.8234426833763362, "grad_norm": 1.0508388977069862, "learning_rate": 2.759743546937202e-06, "loss": 0.4392, "step": 6702 }, { "epoch": 0.8235655485931933, "grad_norm": 1.3596213040145566, "learning_rate": 2.756026049365834e-06, "loss": 0.5566, "step": 6703 }, { "epoch": 0.8236884138100504, "grad_norm": 1.2148379880683073, "learning_rate": 2.7523108039940662e-06, "loss": 0.5347, "step": 6704 }, { "epoch": 0.8238112790269074, "grad_norm": 1.1893431121837401, "learning_rate": 2.748597811505297e-06, "loss": 0.5564, "step": 6705 }, { "epoch": 0.8239341442437645, "grad_norm": 1.05767644230728, "learning_rate": 2.744887072582507e-06, "loss": 0.4321, "step": 6706 }, { "epoch": 0.8240570094606217, "grad_norm": 0.9834984731299086, "learning_rate": 2.7411785879082663e-06, "loss": 0.5399, "step": 6707 }, { "epoch": 0.8241798746774788, "grad_norm": 1.2461144278014782, "learning_rate": 2.737472358164721e-06, "loss": 0.3974, "step": 6708 }, { "epoch": 0.8243027398943359, "grad_norm": 0.9509187069299544, "learning_rate": 2.7337683840336074e-06, "loss": 0.6535, "step": 6709 }, { "epoch": 0.824425605111193, "grad_norm": 1.4227799917916462, "learning_rate": 2.7300666661962558e-06, "loss": 0.612, "step": 6710 }, { "epoch": 0.8245484703280501, "grad_norm": 1.2731069826355923, "learning_rate": 2.726367205333563e-06, "loss": 0.694, "step": 6711 }, { "epoch": 0.8246713355449072, "grad_norm": 1.1192784133038312, "learning_rate": 2.7226700021260267e-06, "loss": 0.5372, "step": 6712 }, { "epoch": 0.8247942007617644, "grad_norm": 1.1586262840332504, "learning_rate": 2.718975057253722e-06, "loss": 0.6421, "step": 6713 }, { "epoch": 0.8249170659786215, "grad_norm": 1.103265875031216, "learning_rate": 2.7152823713963125e-06, "loss": 0.5731, "step": 6714 }, { "epoch": 0.8250399311954786, "grad_norm": 1.2326790460630666, "learning_rate": 2.7115919452330403e-06, "loss": 0.7605, "step": 6715 }, { "epoch": 0.8251627964123357, "grad_norm": 1.09978782657009, "learning_rate": 2.7079037794427346e-06, "loss": 0.5048, "step": 6716 }, { "epoch": 0.8252856616291928, "grad_norm": 1.221544360755342, "learning_rate": 2.704217874703812e-06, "loss": 0.5354, "step": 6717 }, { "epoch": 0.8254085268460499, "grad_norm": 1.3188020376134162, "learning_rate": 2.7005342316942748e-06, "loss": 0.6741, "step": 6718 }, { "epoch": 0.825531392062907, "grad_norm": 1.1799220851659824, "learning_rate": 2.696852851091696e-06, "loss": 0.5387, "step": 6719 }, { "epoch": 0.825654257279764, "grad_norm": 1.0437313632359728, "learning_rate": 2.6931737335732476e-06, "loss": 0.5898, "step": 6720 }, { "epoch": 0.8257771224966212, "grad_norm": 1.2828370993469216, "learning_rate": 2.689496879815681e-06, "loss": 0.6639, "step": 6721 }, { "epoch": 0.8258999877134783, "grad_norm": 1.2000903972313532, "learning_rate": 2.685822290495324e-06, "loss": 0.5169, "step": 6722 }, { "epoch": 0.8260228529303354, "grad_norm": 1.2453176661728391, "learning_rate": 2.6821499662881004e-06, "loss": 0.5914, "step": 6723 }, { "epoch": 0.8261457181471925, "grad_norm": 1.2348465635188044, "learning_rate": 2.6784799078694987e-06, "loss": 0.622, "step": 6724 }, { "epoch": 0.8262685833640496, "grad_norm": 1.103949748518898, "learning_rate": 2.674812115914617e-06, "loss": 0.6275, "step": 6725 }, { "epoch": 0.8263914485809067, "grad_norm": 1.3712147806994244, "learning_rate": 2.6711465910981125e-06, "loss": 0.5289, "step": 6726 }, { "epoch": 0.8265143137977639, "grad_norm": 1.0656816338626431, "learning_rate": 2.667483334094239e-06, "loss": 0.5292, "step": 6727 }, { "epoch": 0.826637179014621, "grad_norm": 1.1026289375048874, "learning_rate": 2.6638223455768242e-06, "loss": 0.5908, "step": 6728 }, { "epoch": 0.8267600442314781, "grad_norm": 1.1950040681203076, "learning_rate": 2.6601636262192874e-06, "loss": 0.5318, "step": 6729 }, { "epoch": 0.8268829094483352, "grad_norm": 1.2993865168631362, "learning_rate": 2.6565071766946277e-06, "loss": 0.5659, "step": 6730 }, { "epoch": 0.8270057746651923, "grad_norm": 1.1142046733826005, "learning_rate": 2.6528529976754128e-06, "loss": 0.5457, "step": 6731 }, { "epoch": 0.8271286398820494, "grad_norm": 1.1057480213489241, "learning_rate": 2.649201089833826e-06, "loss": 0.6505, "step": 6732 }, { "epoch": 0.8272515050989065, "grad_norm": 1.0775837318671488, "learning_rate": 2.6455514538415943e-06, "loss": 0.5442, "step": 6733 }, { "epoch": 0.8273743703157636, "grad_norm": 1.1715722326599842, "learning_rate": 2.641904090370056e-06, "loss": 0.5296, "step": 6734 }, { "epoch": 0.8274972355326207, "grad_norm": 1.133066026055284, "learning_rate": 2.638259000090109e-06, "loss": 0.5006, "step": 6735 }, { "epoch": 0.8276201007494778, "grad_norm": 1.2965043905697884, "learning_rate": 2.634616183672256e-06, "loss": 0.5812, "step": 6736 }, { "epoch": 0.8277429659663349, "grad_norm": 1.1362180896367655, "learning_rate": 2.6309756417865607e-06, "loss": 0.5852, "step": 6737 }, { "epoch": 0.827865831183192, "grad_norm": 1.2819979184423702, "learning_rate": 2.6273373751026837e-06, "loss": 0.6023, "step": 6738 }, { "epoch": 0.8279886964000491, "grad_norm": 1.1075058326751737, "learning_rate": 2.6237013842898533e-06, "loss": 0.5113, "step": 6739 }, { "epoch": 0.8281115616169062, "grad_norm": 1.0374971612355373, "learning_rate": 2.6200676700168898e-06, "loss": 0.6453, "step": 6740 }, { "epoch": 0.8282344268337634, "grad_norm": 1.1985649477886615, "learning_rate": 2.616436232952196e-06, "loss": 0.7429, "step": 6741 }, { "epoch": 0.8283572920506205, "grad_norm": 1.2428549876462966, "learning_rate": 2.6128070737637437e-06, "loss": 0.5348, "step": 6742 }, { "epoch": 0.8284801572674776, "grad_norm": 1.3843463063007098, "learning_rate": 2.609180193119095e-06, "loss": 0.4913, "step": 6743 }, { "epoch": 0.8286030224843347, "grad_norm": 1.17806790704285, "learning_rate": 2.6055555916853945e-06, "loss": 0.5002, "step": 6744 }, { "epoch": 0.8287258877011918, "grad_norm": 1.3855038103947404, "learning_rate": 2.601933270129364e-06, "loss": 0.5611, "step": 6745 }, { "epoch": 0.8288487529180489, "grad_norm": 1.4201895144681331, "learning_rate": 2.5983132291173007e-06, "loss": 0.6038, "step": 6746 }, { "epoch": 0.828971618134906, "grad_norm": 1.2804528512646467, "learning_rate": 2.5946954693150915e-06, "loss": 0.5383, "step": 6747 }, { "epoch": 0.8290944833517632, "grad_norm": 1.4133322981603207, "learning_rate": 2.591079991388203e-06, "loss": 0.5701, "step": 6748 }, { "epoch": 0.8292173485686202, "grad_norm": 1.356532178487605, "learning_rate": 2.5874667960016725e-06, "loss": 0.6107, "step": 6749 }, { "epoch": 0.8293402137854773, "grad_norm": 1.3090798478797694, "learning_rate": 2.5838558838201304e-06, "loss": 0.5284, "step": 6750 }, { "epoch": 0.8294630790023344, "grad_norm": 1.3690586069530413, "learning_rate": 2.580247255507769e-06, "loss": 0.636, "step": 6751 }, { "epoch": 0.8295859442191915, "grad_norm": 1.206100248302592, "learning_rate": 2.576640911728387e-06, "loss": 0.5986, "step": 6752 }, { "epoch": 0.8297088094360486, "grad_norm": 1.3717278332705034, "learning_rate": 2.573036853145337e-06, "loss": 0.4865, "step": 6753 }, { "epoch": 0.8298316746529057, "grad_norm": 1.2346591351248921, "learning_rate": 2.569435080421567e-06, "loss": 0.559, "step": 6754 }, { "epoch": 0.8299545398697629, "grad_norm": 0.9919722849938025, "learning_rate": 2.5658355942195994e-06, "loss": 0.6822, "step": 6755 }, { "epoch": 0.83007740508662, "grad_norm": 1.163215737948197, "learning_rate": 2.5622383952015386e-06, "loss": 0.5657, "step": 6756 }, { "epoch": 0.8302002703034771, "grad_norm": 1.2348485850347681, "learning_rate": 2.5586434840290597e-06, "loss": 0.5667, "step": 6757 }, { "epoch": 0.8303231355203342, "grad_norm": 1.2415098018323636, "learning_rate": 2.555050861363428e-06, "loss": 0.5982, "step": 6758 }, { "epoch": 0.8304460007371913, "grad_norm": 1.090014099957309, "learning_rate": 2.5514605278654844e-06, "loss": 0.5505, "step": 6759 }, { "epoch": 0.8305688659540484, "grad_norm": 1.0384056362094325, "learning_rate": 2.547872484195642e-06, "loss": 0.5823, "step": 6760 }, { "epoch": 0.8306917311709056, "grad_norm": 1.024749825732001, "learning_rate": 2.544286731013905e-06, "loss": 0.5605, "step": 6761 }, { "epoch": 0.8308145963877627, "grad_norm": 1.256143566067871, "learning_rate": 2.540703268979838e-06, "loss": 0.5543, "step": 6762 }, { "epoch": 0.8309374616046198, "grad_norm": 1.3706659372737529, "learning_rate": 2.5371220987526105e-06, "loss": 0.5796, "step": 6763 }, { "epoch": 0.8310603268214768, "grad_norm": 1.7010787757748296, "learning_rate": 2.533543220990944e-06, "loss": 0.6681, "step": 6764 }, { "epoch": 0.8311831920383339, "grad_norm": 1.0019446906974296, "learning_rate": 2.5299666363531594e-06, "loss": 0.614, "step": 6765 }, { "epoch": 0.831306057255191, "grad_norm": 1.181270799211324, "learning_rate": 2.526392345497136e-06, "loss": 0.6422, "step": 6766 }, { "epoch": 0.8314289224720481, "grad_norm": 1.3342129485230054, "learning_rate": 2.522820349080348e-06, "loss": 0.5605, "step": 6767 }, { "epoch": 0.8315517876889053, "grad_norm": 1.1678601118515612, "learning_rate": 2.5192506477598415e-06, "loss": 0.499, "step": 6768 }, { "epoch": 0.8316746529057624, "grad_norm": 1.4431158706911964, "learning_rate": 2.515683242192236e-06, "loss": 0.6446, "step": 6769 }, { "epoch": 0.8317975181226195, "grad_norm": 0.7927753618197991, "learning_rate": 2.5121181330337336e-06, "loss": 0.576, "step": 6770 }, { "epoch": 0.8319203833394766, "grad_norm": 1.1640749253824674, "learning_rate": 2.5085553209401123e-06, "loss": 0.4917, "step": 6771 }, { "epoch": 0.8320432485563337, "grad_norm": 1.3415999355854327, "learning_rate": 2.5049948065667355e-06, "loss": 0.5644, "step": 6772 }, { "epoch": 0.8321661137731908, "grad_norm": 1.3670645786417723, "learning_rate": 2.5014365905685237e-06, "loss": 0.7449, "step": 6773 }, { "epoch": 0.832288978990048, "grad_norm": 1.2439839368707077, "learning_rate": 2.497880673600002e-06, "loss": 0.6078, "step": 6774 }, { "epoch": 0.8324118442069051, "grad_norm": 1.0061908624524034, "learning_rate": 2.494327056315247e-06, "loss": 0.585, "step": 6775 }, { "epoch": 0.8325347094237622, "grad_norm": 1.1512145509299403, "learning_rate": 2.4907757393679326e-06, "loss": 0.6337, "step": 6776 }, { "epoch": 0.8326575746406193, "grad_norm": 1.3345155391633208, "learning_rate": 2.487226723411291e-06, "loss": 0.5735, "step": 6777 }, { "epoch": 0.8327804398574763, "grad_norm": 1.40636569427536, "learning_rate": 2.4836800090981455e-06, "loss": 0.7069, "step": 6778 }, { "epoch": 0.8329033050743334, "grad_norm": 1.2005396560504944, "learning_rate": 2.4801355970808955e-06, "loss": 0.6887, "step": 6779 }, { "epoch": 0.8330261702911905, "grad_norm": 1.2277900921105345, "learning_rate": 2.4765934880115042e-06, "loss": 0.516, "step": 6780 }, { "epoch": 0.8331490355080476, "grad_norm": 1.1887043397465535, "learning_rate": 2.4730536825415247e-06, "loss": 0.5651, "step": 6781 }, { "epoch": 0.8332719007249048, "grad_norm": 1.365192714989377, "learning_rate": 2.4695161813220783e-06, "loss": 0.5267, "step": 6782 }, { "epoch": 0.8333947659417619, "grad_norm": 0.9567203897450611, "learning_rate": 2.4659809850038724e-06, "loss": 0.4923, "step": 6783 }, { "epoch": 0.833517631158619, "grad_norm": 1.2590127137019946, "learning_rate": 2.462448094237174e-06, "loss": 0.652, "step": 6784 }, { "epoch": 0.8336404963754761, "grad_norm": 0.9913490018858003, "learning_rate": 2.458917509671839e-06, "loss": 0.4959, "step": 6785 }, { "epoch": 0.8337633615923332, "grad_norm": 1.347642208923607, "learning_rate": 2.4553892319573012e-06, "loss": 0.5232, "step": 6786 }, { "epoch": 0.8338862268091903, "grad_norm": 1.0302702059179398, "learning_rate": 2.4518632617425563e-06, "loss": 0.6331, "step": 6787 }, { "epoch": 0.8340090920260474, "grad_norm": 1.195327860553905, "learning_rate": 2.4483395996761903e-06, "loss": 0.6027, "step": 6788 }, { "epoch": 0.8341319572429046, "grad_norm": 1.1057681542069673, "learning_rate": 2.444818246406347e-06, "loss": 0.6788, "step": 6789 }, { "epoch": 0.8342548224597617, "grad_norm": 1.171803819367584, "learning_rate": 2.4412992025807708e-06, "loss": 0.5732, "step": 6790 }, { "epoch": 0.8343776876766188, "grad_norm": 1.5541570071004414, "learning_rate": 2.437782468846756e-06, "loss": 0.5699, "step": 6791 }, { "epoch": 0.8345005528934759, "grad_norm": 1.365700958918804, "learning_rate": 2.4342680458511916e-06, "loss": 0.5762, "step": 6792 }, { "epoch": 0.8346234181103329, "grad_norm": 1.3817968884933955, "learning_rate": 2.4307559342405227e-06, "loss": 0.5013, "step": 6793 }, { "epoch": 0.83474628332719, "grad_norm": 1.4007872617384005, "learning_rate": 2.4272461346607904e-06, "loss": 0.5618, "step": 6794 }, { "epoch": 0.8348691485440471, "grad_norm": 1.3563995428829838, "learning_rate": 2.4237386477575917e-06, "loss": 0.5614, "step": 6795 }, { "epoch": 0.8349920137609043, "grad_norm": 1.1285463595879426, "learning_rate": 2.420233474176109e-06, "loss": 0.5616, "step": 6796 }, { "epoch": 0.8351148789777614, "grad_norm": 1.1488647706957578, "learning_rate": 2.4167306145610996e-06, "loss": 0.6441, "step": 6797 }, { "epoch": 0.8352377441946185, "grad_norm": 1.1702438936311688, "learning_rate": 2.413230069556885e-06, "loss": 0.6706, "step": 6798 }, { "epoch": 0.8353606094114756, "grad_norm": 1.2814855723793075, "learning_rate": 2.409731839807375e-06, "loss": 0.7008, "step": 6799 }, { "epoch": 0.8354834746283327, "grad_norm": 1.1007601819765673, "learning_rate": 2.4062359259560348e-06, "loss": 0.5221, "step": 6800 }, { "epoch": 0.8356063398451898, "grad_norm": 1.2670593208790457, "learning_rate": 2.4027423286459284e-06, "loss": 0.5589, "step": 6801 }, { "epoch": 0.835729205062047, "grad_norm": 1.443193546471224, "learning_rate": 2.3992510485196716e-06, "loss": 0.6503, "step": 6802 }, { "epoch": 0.8358520702789041, "grad_norm": 1.1912053206381632, "learning_rate": 2.3957620862194695e-06, "loss": 0.5141, "step": 6803 }, { "epoch": 0.8359749354957612, "grad_norm": 1.1670943890320036, "learning_rate": 2.392275442387087e-06, "loss": 0.5424, "step": 6804 }, { "epoch": 0.8360978007126183, "grad_norm": 1.0959559406062556, "learning_rate": 2.3887911176638737e-06, "loss": 0.5653, "step": 6805 }, { "epoch": 0.8362206659294754, "grad_norm": 1.103361456052754, "learning_rate": 2.3853091126907493e-06, "loss": 0.4627, "step": 6806 }, { "epoch": 0.8363435311463324, "grad_norm": 1.348824232266682, "learning_rate": 2.381829428108203e-06, "loss": 0.4428, "step": 6807 }, { "epoch": 0.8364663963631895, "grad_norm": 1.2575225502546108, "learning_rate": 2.3783520645562996e-06, "loss": 0.5699, "step": 6808 }, { "epoch": 0.8365892615800467, "grad_norm": 1.14253230087687, "learning_rate": 2.374877022674682e-06, "loss": 0.5433, "step": 6809 }, { "epoch": 0.8367121267969038, "grad_norm": 1.1844506622575386, "learning_rate": 2.3714043031025608e-06, "loss": 0.6338, "step": 6810 }, { "epoch": 0.8368349920137609, "grad_norm": 1.2001261929452505, "learning_rate": 2.3679339064787165e-06, "loss": 0.5371, "step": 6811 }, { "epoch": 0.836957857230618, "grad_norm": 1.2108825857238563, "learning_rate": 2.364465833441507e-06, "loss": 0.584, "step": 6812 }, { "epoch": 0.8370807224474751, "grad_norm": 1.358888245891515, "learning_rate": 2.3610000846288637e-06, "loss": 0.5448, "step": 6813 }, { "epoch": 0.8372035876643322, "grad_norm": 1.1173590740611345, "learning_rate": 2.3575366606782916e-06, "loss": 0.583, "step": 6814 }, { "epoch": 0.8373264528811893, "grad_norm": 1.1850474700561449, "learning_rate": 2.3540755622268597e-06, "loss": 0.5614, "step": 6815 }, { "epoch": 0.8374493180980465, "grad_norm": 1.137570536277114, "learning_rate": 2.3506167899112146e-06, "loss": 0.4902, "step": 6816 }, { "epoch": 0.8375721833149036, "grad_norm": 1.4700998487200152, "learning_rate": 2.34716034436758e-06, "loss": 0.5428, "step": 6817 }, { "epoch": 0.8376950485317607, "grad_norm": 1.3601037801587244, "learning_rate": 2.3437062262317398e-06, "loss": 0.5704, "step": 6818 }, { "epoch": 0.8378179137486178, "grad_norm": 1.3198350595135102, "learning_rate": 2.3402544361390614e-06, "loss": 0.623, "step": 6819 }, { "epoch": 0.8379407789654749, "grad_norm": 1.329421282363694, "learning_rate": 2.3368049747244786e-06, "loss": 0.6589, "step": 6820 }, { "epoch": 0.838063644182332, "grad_norm": 0.990077082537294, "learning_rate": 2.3333578426225e-06, "loss": 0.6808, "step": 6821 }, { "epoch": 0.838186509399189, "grad_norm": 1.2249565343764999, "learning_rate": 2.329913040467195e-06, "loss": 0.5145, "step": 6822 }, { "epoch": 0.8383093746160462, "grad_norm": 1.0652389721538855, "learning_rate": 2.326470568892221e-06, "loss": 0.6507, "step": 6823 }, { "epoch": 0.8384322398329033, "grad_norm": 1.2572113511689809, "learning_rate": 2.3230304285307956e-06, "loss": 0.5925, "step": 6824 }, { "epoch": 0.8385551050497604, "grad_norm": 1.363281602968501, "learning_rate": 2.319592620015708e-06, "loss": 0.566, "step": 6825 }, { "epoch": 0.8386779702666175, "grad_norm": 1.2778184282473029, "learning_rate": 2.3161571439793255e-06, "loss": 0.4766, "step": 6826 }, { "epoch": 0.8388008354834746, "grad_norm": 1.2556696258744082, "learning_rate": 2.3127240010535728e-06, "loss": 0.7007, "step": 6827 }, { "epoch": 0.8389237007003317, "grad_norm": 1.1960655347535225, "learning_rate": 2.309293191869966e-06, "loss": 0.5521, "step": 6828 }, { "epoch": 0.8390465659171888, "grad_norm": 1.164009283262368, "learning_rate": 2.305864717059571e-06, "loss": 0.6342, "step": 6829 }, { "epoch": 0.839169431134046, "grad_norm": 1.2694664033478418, "learning_rate": 2.3024385772530408e-06, "loss": 0.5156, "step": 6830 }, { "epoch": 0.8392922963509031, "grad_norm": 1.7940408437758117, "learning_rate": 2.2990147730805855e-06, "loss": 0.5489, "step": 6831 }, { "epoch": 0.8394151615677602, "grad_norm": 1.4314802588855344, "learning_rate": 2.2955933051719924e-06, "loss": 0.5945, "step": 6832 }, { "epoch": 0.8395380267846173, "grad_norm": 1.2956245552840162, "learning_rate": 2.292174174156623e-06, "loss": 0.633, "step": 6833 }, { "epoch": 0.8396608920014744, "grad_norm": 1.1032378598724923, "learning_rate": 2.2887573806633983e-06, "loss": 0.5005, "step": 6834 }, { "epoch": 0.8397837572183315, "grad_norm": 1.4436948606897222, "learning_rate": 2.285342925320818e-06, "loss": 0.573, "step": 6835 }, { "epoch": 0.8399066224351885, "grad_norm": 1.0262916654716179, "learning_rate": 2.2819308087569502e-06, "loss": 0.6227, "step": 6836 }, { "epoch": 0.8400294876520457, "grad_norm": 1.4602043654642334, "learning_rate": 2.2785210315994325e-06, "loss": 0.6326, "step": 6837 }, { "epoch": 0.8401523528689028, "grad_norm": 1.2735939799020335, "learning_rate": 2.2751135944754637e-06, "loss": 0.6044, "step": 6838 }, { "epoch": 0.8402752180857599, "grad_norm": 1.0284444574082756, "learning_rate": 2.2717084980118304e-06, "loss": 0.5668, "step": 6839 }, { "epoch": 0.840398083302617, "grad_norm": 1.0186080998247287, "learning_rate": 2.2683057428348715e-06, "loss": 0.609, "step": 6840 }, { "epoch": 0.8405209485194741, "grad_norm": 1.220851595482856, "learning_rate": 2.264905329570506e-06, "loss": 0.5973, "step": 6841 }, { "epoch": 0.8406438137363312, "grad_norm": 1.2431263052500283, "learning_rate": 2.2615072588442116e-06, "loss": 0.6312, "step": 6842 }, { "epoch": 0.8407666789531884, "grad_norm": 1.0751511835196335, "learning_rate": 2.258111531281045e-06, "loss": 0.4625, "step": 6843 }, { "epoch": 0.8408895441700455, "grad_norm": 1.1118636615094226, "learning_rate": 2.2547181475056313e-06, "loss": 0.5287, "step": 6844 }, { "epoch": 0.8410124093869026, "grad_norm": 1.2008393179658705, "learning_rate": 2.251327108142155e-06, "loss": 0.5407, "step": 6845 }, { "epoch": 0.8411352746037597, "grad_norm": 1.1586495062323667, "learning_rate": 2.2479384138143794e-06, "loss": 0.56, "step": 6846 }, { "epoch": 0.8412581398206168, "grad_norm": 1.0806378989840746, "learning_rate": 2.2445520651456326e-06, "loss": 0.4778, "step": 6847 }, { "epoch": 0.8413810050374739, "grad_norm": 1.343216190248116, "learning_rate": 2.2411680627588143e-06, "loss": 0.7237, "step": 6848 }, { "epoch": 0.841503870254331, "grad_norm": 1.196412644606064, "learning_rate": 2.237786407276384e-06, "loss": 0.5438, "step": 6849 }, { "epoch": 0.8416267354711882, "grad_norm": 1.177508184370154, "learning_rate": 2.234407099320378e-06, "loss": 0.5144, "step": 6850 }, { "epoch": 0.8417496006880452, "grad_norm": 1.0593085512927205, "learning_rate": 2.2310301395124016e-06, "loss": 0.6224, "step": 6851 }, { "epoch": 0.8418724659049023, "grad_norm": 1.0191860816396656, "learning_rate": 2.227655528473618e-06, "loss": 0.6072, "step": 6852 }, { "epoch": 0.8419953311217594, "grad_norm": 1.218209509101871, "learning_rate": 2.224283266824773e-06, "loss": 0.6005, "step": 6853 }, { "epoch": 0.8421181963386165, "grad_norm": 1.1194156406000617, "learning_rate": 2.22091335518616e-06, "loss": 0.5623, "step": 6854 }, { "epoch": 0.8422410615554736, "grad_norm": 1.1784403862192825, "learning_rate": 2.2175457941776654e-06, "loss": 0.5665, "step": 6855 }, { "epoch": 0.8423639267723307, "grad_norm": 1.1971085763264901, "learning_rate": 2.214180584418723e-06, "loss": 0.5925, "step": 6856 }, { "epoch": 0.8424867919891879, "grad_norm": 1.1235386142270245, "learning_rate": 2.2108177265283468e-06, "loss": 0.5701, "step": 6857 }, { "epoch": 0.842609657206045, "grad_norm": 1.310039064734899, "learning_rate": 2.207457221125101e-06, "loss": 0.581, "step": 6858 }, { "epoch": 0.8427325224229021, "grad_norm": 1.133331282736554, "learning_rate": 2.204099068827144e-06, "loss": 0.562, "step": 6859 }, { "epoch": 0.8428553876397592, "grad_norm": 1.1924379415524147, "learning_rate": 2.200743270252177e-06, "loss": 0.6492, "step": 6860 }, { "epoch": 0.8429782528566163, "grad_norm": 1.1779305776589508, "learning_rate": 2.1973898260174773e-06, "loss": 0.5777, "step": 6861 }, { "epoch": 0.8431011180734734, "grad_norm": 1.3360749686118727, "learning_rate": 2.1940387367398956e-06, "loss": 0.6504, "step": 6862 }, { "epoch": 0.8432239832903305, "grad_norm": 1.124216894356352, "learning_rate": 2.1906900030358353e-06, "loss": 0.4269, "step": 6863 }, { "epoch": 0.8433468485071877, "grad_norm": 1.161785291957812, "learning_rate": 2.1873436255212814e-06, "loss": 0.5938, "step": 6864 }, { "epoch": 0.8434697137240448, "grad_norm": 1.111500258402825, "learning_rate": 2.183999604811767e-06, "loss": 0.663, "step": 6865 }, { "epoch": 0.8435925789409018, "grad_norm": 1.1016112115122996, "learning_rate": 2.1806579415224172e-06, "loss": 0.584, "step": 6866 }, { "epoch": 0.8437154441577589, "grad_norm": 1.0978664405714536, "learning_rate": 2.1773186362678993e-06, "loss": 0.6065, "step": 6867 }, { "epoch": 0.843838309374616, "grad_norm": 1.2102102693034091, "learning_rate": 2.1739816896624643e-06, "loss": 0.687, "step": 6868 }, { "epoch": 0.8439611745914731, "grad_norm": 1.183715431575639, "learning_rate": 2.170647102319914e-06, "loss": 0.558, "step": 6869 }, { "epoch": 0.8440840398083302, "grad_norm": 1.043688855163674, "learning_rate": 2.1673148748536287e-06, "loss": 0.5754, "step": 6870 }, { "epoch": 0.8442069050251874, "grad_norm": 1.1655764093126952, "learning_rate": 2.1639850078765523e-06, "loss": 0.6076, "step": 6871 }, { "epoch": 0.8443297702420445, "grad_norm": 1.410950804261355, "learning_rate": 2.1606575020011864e-06, "loss": 0.5584, "step": 6872 }, { "epoch": 0.8444526354589016, "grad_norm": 1.638778174905786, "learning_rate": 2.157332357839607e-06, "loss": 0.4989, "step": 6873 }, { "epoch": 0.8445755006757587, "grad_norm": 1.410103514895907, "learning_rate": 2.1540095760034513e-06, "loss": 0.5667, "step": 6874 }, { "epoch": 0.8446983658926158, "grad_norm": 1.4912663814902771, "learning_rate": 2.15068915710393e-06, "loss": 0.5584, "step": 6875 }, { "epoch": 0.8448212311094729, "grad_norm": 1.1005152512652243, "learning_rate": 2.1473711017518032e-06, "loss": 0.538, "step": 6876 }, { "epoch": 0.84494409632633, "grad_norm": 1.0538920725111258, "learning_rate": 2.1440554105574097e-06, "loss": 0.5884, "step": 6877 }, { "epoch": 0.8450669615431872, "grad_norm": 1.099772022386202, "learning_rate": 2.140742084130649e-06, "loss": 0.5476, "step": 6878 }, { "epoch": 0.8451898267600443, "grad_norm": 1.2179953505895624, "learning_rate": 2.137431123080991e-06, "loss": 0.5237, "step": 6879 }, { "epoch": 0.8453126919769013, "grad_norm": 1.1458454787268766, "learning_rate": 2.1341225280174586e-06, "loss": 0.5099, "step": 6880 }, { "epoch": 0.8454355571937584, "grad_norm": 1.1233660811502177, "learning_rate": 2.13081629954865e-06, "loss": 0.4907, "step": 6881 }, { "epoch": 0.8455584224106155, "grad_norm": 1.698029156120956, "learning_rate": 2.1275124382827243e-06, "loss": 0.6413, "step": 6882 }, { "epoch": 0.8456812876274726, "grad_norm": 1.3451009561641756, "learning_rate": 2.1242109448274015e-06, "loss": 0.578, "step": 6883 }, { "epoch": 0.8458041528443297, "grad_norm": 1.1795169964937318, "learning_rate": 2.120911819789974e-06, "loss": 0.5302, "step": 6884 }, { "epoch": 0.8459270180611869, "grad_norm": 1.3545255521853616, "learning_rate": 2.117615063777293e-06, "loss": 0.6243, "step": 6885 }, { "epoch": 0.846049883278044, "grad_norm": 1.2876256826176304, "learning_rate": 2.1143206773957797e-06, "loss": 0.5721, "step": 6886 }, { "epoch": 0.8461727484949011, "grad_norm": 0.9964197761213719, "learning_rate": 2.1110286612514077e-06, "loss": 0.4817, "step": 6887 }, { "epoch": 0.8462956137117582, "grad_norm": 1.1855114907052011, "learning_rate": 2.107739015949725e-06, "loss": 0.6919, "step": 6888 }, { "epoch": 0.8464184789286153, "grad_norm": 1.3483264434918811, "learning_rate": 2.104451742095845e-06, "loss": 0.5636, "step": 6889 }, { "epoch": 0.8465413441454724, "grad_norm": 1.1447133896646586, "learning_rate": 2.101166840294433e-06, "loss": 0.5695, "step": 6890 }, { "epoch": 0.8466642093623296, "grad_norm": 1.1413622721630428, "learning_rate": 2.0978843111497324e-06, "loss": 0.6007, "step": 6891 }, { "epoch": 0.8467870745791867, "grad_norm": 1.0922169550209095, "learning_rate": 2.0946041552655314e-06, "loss": 0.6444, "step": 6892 }, { "epoch": 0.8469099397960438, "grad_norm": 1.3552531632554912, "learning_rate": 2.0913263732452093e-06, "loss": 0.5907, "step": 6893 }, { "epoch": 0.8470328050129009, "grad_norm": 1.3288302354622983, "learning_rate": 2.0880509656916836e-06, "loss": 0.6255, "step": 6894 }, { "epoch": 0.8471556702297579, "grad_norm": 1.2917033007095524, "learning_rate": 2.0847779332074475e-06, "loss": 0.6118, "step": 6895 }, { "epoch": 0.847278535446615, "grad_norm": 1.1367789630646372, "learning_rate": 2.081507276394544e-06, "loss": 0.5518, "step": 6896 }, { "epoch": 0.8474014006634721, "grad_norm": 1.3603466013678194, "learning_rate": 2.078238995854608e-06, "loss": 0.6155, "step": 6897 }, { "epoch": 0.8475242658803293, "grad_norm": 1.194132798791412, "learning_rate": 2.0749730921888022e-06, "loss": 0.5517, "step": 6898 }, { "epoch": 0.8476471310971864, "grad_norm": 1.1443761555235443, "learning_rate": 2.0717095659978784e-06, "loss": 0.5325, "step": 6899 }, { "epoch": 0.8477699963140435, "grad_norm": 1.1272635695565338, "learning_rate": 2.0684484178821333e-06, "loss": 0.6538, "step": 6900 }, { "epoch": 0.8478928615309006, "grad_norm": 1.213715841112267, "learning_rate": 2.0651896484414383e-06, "loss": 0.5976, "step": 6901 }, { "epoch": 0.8480157267477577, "grad_norm": 1.4333793843316127, "learning_rate": 2.061933258275226e-06, "loss": 0.5875, "step": 6902 }, { "epoch": 0.8481385919646148, "grad_norm": 1.0616829268804249, "learning_rate": 2.0586792479824766e-06, "loss": 0.5784, "step": 6903 }, { "epoch": 0.848261457181472, "grad_norm": 1.107578331787731, "learning_rate": 2.0554276181617603e-06, "loss": 0.5128, "step": 6904 }, { "epoch": 0.8483843223983291, "grad_norm": 1.2149614568204523, "learning_rate": 2.05217836941118e-06, "loss": 0.5298, "step": 6905 }, { "epoch": 0.8485071876151862, "grad_norm": 1.2307210025882571, "learning_rate": 2.0489315023284244e-06, "loss": 0.5418, "step": 6906 }, { "epoch": 0.8486300528320433, "grad_norm": 1.0724232649889842, "learning_rate": 2.045687017510724e-06, "loss": 0.6216, "step": 6907 }, { "epoch": 0.8487529180489004, "grad_norm": 1.507047269704493, "learning_rate": 2.0424449155548846e-06, "loss": 0.5742, "step": 6908 }, { "epoch": 0.8488757832657574, "grad_norm": 0.9910833262866477, "learning_rate": 2.039205197057273e-06, "loss": 0.5996, "step": 6909 }, { "epoch": 0.8489986484826145, "grad_norm": 1.213373555139544, "learning_rate": 2.0359678626138102e-06, "loss": 0.633, "step": 6910 }, { "epoch": 0.8491215136994716, "grad_norm": 1.1462146537578735, "learning_rate": 2.0327329128199834e-06, "loss": 0.5674, "step": 6911 }, { "epoch": 0.8492443789163288, "grad_norm": 1.1367267939728634, "learning_rate": 2.029500348270842e-06, "loss": 0.5652, "step": 6912 }, { "epoch": 0.8493672441331859, "grad_norm": 1.0850826097956563, "learning_rate": 2.026270169560998e-06, "loss": 0.5876, "step": 6913 }, { "epoch": 0.849490109350043, "grad_norm": 1.309295820087309, "learning_rate": 2.023042377284615e-06, "loss": 0.5028, "step": 6914 }, { "epoch": 0.8496129745669001, "grad_norm": 1.9365358584180028, "learning_rate": 2.0198169720354283e-06, "loss": 0.7945, "step": 6915 }, { "epoch": 0.8497358397837572, "grad_norm": 1.1309566622436364, "learning_rate": 2.0165939544067306e-06, "loss": 0.7024, "step": 6916 }, { "epoch": 0.8498587050006143, "grad_norm": 1.098094640221295, "learning_rate": 2.013373324991377e-06, "loss": 0.5145, "step": 6917 }, { "epoch": 0.8499815702174714, "grad_norm": 1.2024433350938737, "learning_rate": 2.0101550843817768e-06, "loss": 0.5744, "step": 6918 }, { "epoch": 0.8501044354343286, "grad_norm": 1.0517680626265715, "learning_rate": 2.0069392331699077e-06, "loss": 0.5968, "step": 6919 }, { "epoch": 0.8502273006511857, "grad_norm": 1.1395959042948411, "learning_rate": 2.003725771947305e-06, "loss": 0.5915, "step": 6920 }, { "epoch": 0.8503501658680428, "grad_norm": 1.0433597535311392, "learning_rate": 2.0005147013050594e-06, "loss": 0.5973, "step": 6921 }, { "epoch": 0.8504730310848999, "grad_norm": 1.286462184145451, "learning_rate": 1.997306021833832e-06, "loss": 0.5009, "step": 6922 }, { "epoch": 0.850595896301757, "grad_norm": 1.193368401201695, "learning_rate": 1.9940997341238347e-06, "loss": 0.6671, "step": 6923 }, { "epoch": 0.850718761518614, "grad_norm": 1.2097771036693499, "learning_rate": 1.9908958387648485e-06, "loss": 0.6892, "step": 6924 }, { "epoch": 0.8508416267354711, "grad_norm": 1.3006088168187133, "learning_rate": 1.987694336346203e-06, "loss": 0.5591, "step": 6925 }, { "epoch": 0.8509644919523283, "grad_norm": 1.313741289111501, "learning_rate": 1.9844952274567955e-06, "loss": 0.4287, "step": 6926 }, { "epoch": 0.8510873571691854, "grad_norm": 2.04714638823587, "learning_rate": 1.9812985126850875e-06, "loss": 0.6746, "step": 6927 }, { "epoch": 0.8512102223860425, "grad_norm": 1.3033301003987963, "learning_rate": 1.9781041926190847e-06, "loss": 0.5689, "step": 6928 }, { "epoch": 0.8513330876028996, "grad_norm": 1.1584400759685762, "learning_rate": 1.974912267846369e-06, "loss": 0.5034, "step": 6929 }, { "epoch": 0.8514559528197567, "grad_norm": 1.0380747842346651, "learning_rate": 1.971722738954064e-06, "loss": 0.5506, "step": 6930 }, { "epoch": 0.8515788180366138, "grad_norm": 1.1506462666680672, "learning_rate": 1.968535606528877e-06, "loss": 0.5168, "step": 6931 }, { "epoch": 0.851701683253471, "grad_norm": 1.6536542869417012, "learning_rate": 1.965350871157049e-06, "loss": 0.7033, "step": 6932 }, { "epoch": 0.8518245484703281, "grad_norm": 1.2476242297632292, "learning_rate": 1.9621685334243984e-06, "loss": 0.4957, "step": 6933 }, { "epoch": 0.8519474136871852, "grad_norm": 1.2067622590062625, "learning_rate": 1.9589885939162917e-06, "loss": 0.5024, "step": 6934 }, { "epoch": 0.8520702789040423, "grad_norm": 1.0688003805477082, "learning_rate": 1.9558110532176576e-06, "loss": 0.6746, "step": 6935 }, { "epoch": 0.8521931441208994, "grad_norm": 1.3168755791970088, "learning_rate": 1.9526359119129856e-06, "loss": 0.6329, "step": 6936 }, { "epoch": 0.8523160093377565, "grad_norm": 1.3088445244887286, "learning_rate": 1.9494631705863265e-06, "loss": 0.5642, "step": 6937 }, { "epoch": 0.8524388745546135, "grad_norm": 1.0715370081174134, "learning_rate": 1.9462928298212785e-06, "loss": 0.5486, "step": 6938 }, { "epoch": 0.8525617397714707, "grad_norm": 0.9892201425241608, "learning_rate": 1.943124890201007e-06, "loss": 0.5847, "step": 6939 }, { "epoch": 0.8526846049883278, "grad_norm": 1.1338600906539757, "learning_rate": 1.9399593523082387e-06, "loss": 0.5466, "step": 6940 }, { "epoch": 0.8528074702051849, "grad_norm": 1.3080046668792473, "learning_rate": 1.9367962167252483e-06, "loss": 0.5203, "step": 6941 }, { "epoch": 0.852930335422042, "grad_norm": 1.2719614784794118, "learning_rate": 1.9336354840338737e-06, "loss": 0.42, "step": 6942 }, { "epoch": 0.8530532006388991, "grad_norm": 1.269753491834709, "learning_rate": 1.9304771548155148e-06, "loss": 0.5189, "step": 6943 }, { "epoch": 0.8531760658557562, "grad_norm": 1.2914249075605218, "learning_rate": 1.927321229651128e-06, "loss": 0.69, "step": 6944 }, { "epoch": 0.8532989310726133, "grad_norm": 1.030882424251882, "learning_rate": 1.9241677091212183e-06, "loss": 0.5431, "step": 6945 }, { "epoch": 0.8534217962894705, "grad_norm": 0.9406385365443406, "learning_rate": 1.9210165938058594e-06, "loss": 0.5874, "step": 6946 }, { "epoch": 0.8535446615063276, "grad_norm": 1.322416090185305, "learning_rate": 1.917867884284679e-06, "loss": 0.5417, "step": 6947 }, { "epoch": 0.8536675267231847, "grad_norm": 1.357858429908527, "learning_rate": 1.9147215811368597e-06, "loss": 0.5167, "step": 6948 }, { "epoch": 0.8537903919400418, "grad_norm": 1.1075828713061624, "learning_rate": 1.9115776849411425e-06, "loss": 0.5343, "step": 6949 }, { "epoch": 0.8539132571568989, "grad_norm": 1.3059370309061946, "learning_rate": 1.9084361962758306e-06, "loss": 0.5945, "step": 6950 }, { "epoch": 0.854036122373756, "grad_norm": 1.2339655922983717, "learning_rate": 1.9052971157187816e-06, "loss": 0.5938, "step": 6951 }, { "epoch": 0.8541589875906132, "grad_norm": 1.2000268039538502, "learning_rate": 1.9021604438474016e-06, "loss": 0.61, "step": 6952 }, { "epoch": 0.8542818528074702, "grad_norm": 1.2388664340200246, "learning_rate": 1.899026181238666e-06, "loss": 0.5537, "step": 6953 }, { "epoch": 0.8544047180243273, "grad_norm": 1.1882052729697237, "learning_rate": 1.8958943284691056e-06, "loss": 0.5258, "step": 6954 }, { "epoch": 0.8545275832411844, "grad_norm": 1.3013620235602423, "learning_rate": 1.8927648861147956e-06, "loss": 0.5707, "step": 6955 }, { "epoch": 0.8546504484580415, "grad_norm": 1.350830549946907, "learning_rate": 1.889637854751386e-06, "loss": 0.5224, "step": 6956 }, { "epoch": 0.8547733136748986, "grad_norm": 1.1941854450805582, "learning_rate": 1.8865132349540615e-06, "loss": 0.5617, "step": 6957 }, { "epoch": 0.8548961788917557, "grad_norm": 1.143490838333734, "learning_rate": 1.8833910272975906e-06, "loss": 0.485, "step": 6958 }, { "epoch": 0.8550190441086128, "grad_norm": 1.149179124144171, "learning_rate": 1.8802712323562742e-06, "loss": 0.5827, "step": 6959 }, { "epoch": 0.85514190932547, "grad_norm": 1.1784717216400384, "learning_rate": 1.8771538507039815e-06, "loss": 0.5557, "step": 6960 }, { "epoch": 0.8552647745423271, "grad_norm": 1.4158572751560874, "learning_rate": 1.8740388829141285e-06, "loss": 0.4622, "step": 6961 }, { "epoch": 0.8553876397591842, "grad_norm": 1.0294180093329781, "learning_rate": 1.8709263295597023e-06, "loss": 0.4801, "step": 6962 }, { "epoch": 0.8555105049760413, "grad_norm": 1.2427983419234006, "learning_rate": 1.8678161912132313e-06, "loss": 0.6624, "step": 6963 }, { "epoch": 0.8556333701928984, "grad_norm": 1.2850339634974473, "learning_rate": 1.8647084684468096e-06, "loss": 0.5114, "step": 6964 }, { "epoch": 0.8557562354097555, "grad_norm": 1.303928495224064, "learning_rate": 1.8616031618320767e-06, "loss": 0.5127, "step": 6965 }, { "epoch": 0.8558791006266127, "grad_norm": 1.3782415102497176, "learning_rate": 1.8585002719402372e-06, "loss": 0.4942, "step": 6966 }, { "epoch": 0.8560019658434697, "grad_norm": 1.5420213797043345, "learning_rate": 1.8553997993420495e-06, "loss": 0.5815, "step": 6967 }, { "epoch": 0.8561248310603268, "grad_norm": 1.3218386199489882, "learning_rate": 1.852301744607816e-06, "loss": 0.6343, "step": 6968 }, { "epoch": 0.8562476962771839, "grad_norm": 1.150760340776776, "learning_rate": 1.8492061083074174e-06, "loss": 0.5536, "step": 6969 }, { "epoch": 0.856370561494041, "grad_norm": 1.1124392236196428, "learning_rate": 1.8461128910102665e-06, "loss": 0.4925, "step": 6970 }, { "epoch": 0.8564934267108981, "grad_norm": 1.1796815921183443, "learning_rate": 1.8430220932853465e-06, "loss": 0.6406, "step": 6971 }, { "epoch": 0.8566162919277552, "grad_norm": 1.0542176200180555, "learning_rate": 1.8399337157011842e-06, "loss": 0.4744, "step": 6972 }, { "epoch": 0.8567391571446124, "grad_norm": 1.1402604262844094, "learning_rate": 1.836847758825867e-06, "loss": 0.5848, "step": 6973 }, { "epoch": 0.8568620223614695, "grad_norm": 1.2323245121129194, "learning_rate": 1.8337642232270424e-06, "loss": 0.6379, "step": 6974 }, { "epoch": 0.8569848875783266, "grad_norm": 1.3151526352205773, "learning_rate": 1.8306831094719002e-06, "loss": 0.4878, "step": 6975 }, { "epoch": 0.8571077527951837, "grad_norm": 1.2222669056633022, "learning_rate": 1.8276044181271935e-06, "loss": 0.5826, "step": 6976 }, { "epoch": 0.8572306180120408, "grad_norm": 1.2115603213200874, "learning_rate": 1.8245281497592293e-06, "loss": 0.7264, "step": 6977 }, { "epoch": 0.8573534832288979, "grad_norm": 1.4250920653554906, "learning_rate": 1.8214543049338683e-06, "loss": 0.5281, "step": 6978 }, { "epoch": 0.857476348445755, "grad_norm": 1.3240510946209247, "learning_rate": 1.8183828842165183e-06, "loss": 0.6343, "step": 6979 }, { "epoch": 0.8575992136626122, "grad_norm": 1.4336218261160918, "learning_rate": 1.815313888172151e-06, "loss": 0.4703, "step": 6980 }, { "epoch": 0.8577220788794693, "grad_norm": 0.9834406254437499, "learning_rate": 1.8122473173652893e-06, "loss": 0.5953, "step": 6981 }, { "epoch": 0.8578449440963263, "grad_norm": 1.1653727760671861, "learning_rate": 1.8091831723600105e-06, "loss": 0.4802, "step": 6982 }, { "epoch": 0.8579678093131834, "grad_norm": 1.0051222058779414, "learning_rate": 1.8061214537199388e-06, "loss": 0.5984, "step": 6983 }, { "epoch": 0.8580906745300405, "grad_norm": 1.0848273315454016, "learning_rate": 1.8030621620082604e-06, "loss": 0.5147, "step": 6984 }, { "epoch": 0.8582135397468976, "grad_norm": 1.1093433114785107, "learning_rate": 1.8000052977877152e-06, "loss": 0.5835, "step": 6985 }, { "epoch": 0.8583364049637547, "grad_norm": 1.4387666054370678, "learning_rate": 1.7969508616205866e-06, "loss": 0.5357, "step": 6986 }, { "epoch": 0.8584592701806119, "grad_norm": 1.1465275255673681, "learning_rate": 1.7938988540687233e-06, "loss": 0.5735, "step": 6987 }, { "epoch": 0.858582135397469, "grad_norm": 1.2617703657295776, "learning_rate": 1.7908492756935203e-06, "loss": 0.5287, "step": 6988 }, { "epoch": 0.8587050006143261, "grad_norm": 1.172331008901022, "learning_rate": 1.787802127055933e-06, "loss": 0.6038, "step": 6989 }, { "epoch": 0.8588278658311832, "grad_norm": 1.2455444627702117, "learning_rate": 1.784757408716457e-06, "loss": 0.5429, "step": 6990 }, { "epoch": 0.8589507310480403, "grad_norm": 1.2102588200684823, "learning_rate": 1.7817151212351507e-06, "loss": 0.5744, "step": 6991 }, { "epoch": 0.8590735962648974, "grad_norm": 1.0210023586401331, "learning_rate": 1.7786752651716281e-06, "loss": 0.5376, "step": 6992 }, { "epoch": 0.8591964614817545, "grad_norm": 0.925224418069438, "learning_rate": 1.7756378410850437e-06, "loss": 0.5724, "step": 6993 }, { "epoch": 0.8593193266986117, "grad_norm": 1.2105793335232833, "learning_rate": 1.772602849534119e-06, "loss": 0.5526, "step": 6994 }, { "epoch": 0.8594421919154688, "grad_norm": 1.1716781064849016, "learning_rate": 1.7695702910771106e-06, "loss": 0.5483, "step": 6995 }, { "epoch": 0.8595650571323259, "grad_norm": 1.11717812353179, "learning_rate": 1.7665401662718522e-06, "loss": 0.4881, "step": 6996 }, { "epoch": 0.8596879223491829, "grad_norm": 1.2405850970821097, "learning_rate": 1.7635124756757031e-06, "loss": 0.5839, "step": 6997 }, { "epoch": 0.85981078756604, "grad_norm": 1.4501021911516512, "learning_rate": 1.760487219845598e-06, "loss": 0.6727, "step": 6998 }, { "epoch": 0.8599336527828971, "grad_norm": 1.3714016410083725, "learning_rate": 1.7574643993379996e-06, "loss": 0.5237, "step": 6999 }, { "epoch": 0.8600565179997542, "grad_norm": 1.11266419771346, "learning_rate": 1.7544440147089518e-06, "loss": 0.5532, "step": 7000 }, { "epoch": 0.8601793832166114, "grad_norm": 1.5139103375665575, "learning_rate": 1.751426066514022e-06, "loss": 0.6971, "step": 7001 }, { "epoch": 0.8603022484334685, "grad_norm": 1.52116482761324, "learning_rate": 1.7484105553083523e-06, "loss": 0.5829, "step": 7002 }, { "epoch": 0.8604251136503256, "grad_norm": 1.0828494802121706, "learning_rate": 1.7453974816466162e-06, "loss": 0.6066, "step": 7003 }, { "epoch": 0.8605479788671827, "grad_norm": 1.103416675112805, "learning_rate": 1.7423868460830566e-06, "loss": 0.5162, "step": 7004 }, { "epoch": 0.8606708440840398, "grad_norm": 1.414592567015034, "learning_rate": 1.7393786491714591e-06, "loss": 0.5037, "step": 7005 }, { "epoch": 0.8607937093008969, "grad_norm": 1.1867347265682837, "learning_rate": 1.7363728914651594e-06, "loss": 0.5975, "step": 7006 }, { "epoch": 0.860916574517754, "grad_norm": 1.2673898456234125, "learning_rate": 1.7333695735170468e-06, "loss": 0.6894, "step": 7007 }, { "epoch": 0.8610394397346112, "grad_norm": 1.523925310308078, "learning_rate": 1.7303686958795662e-06, "loss": 0.5536, "step": 7008 }, { "epoch": 0.8611623049514683, "grad_norm": 1.1039539643642007, "learning_rate": 1.7273702591047091e-06, "loss": 0.4574, "step": 7009 }, { "epoch": 0.8612851701683254, "grad_norm": 1.2459524540369955, "learning_rate": 1.7243742637440129e-06, "loss": 0.5344, "step": 7010 }, { "epoch": 0.8614080353851824, "grad_norm": 1.1935359030270527, "learning_rate": 1.7213807103485768e-06, "loss": 0.6423, "step": 7011 }, { "epoch": 0.8615309006020395, "grad_norm": 1.2638107258611198, "learning_rate": 1.7183895994690468e-06, "loss": 0.5832, "step": 7012 }, { "epoch": 0.8616537658188966, "grad_norm": 1.3637484158193363, "learning_rate": 1.715400931655613e-06, "loss": 0.5644, "step": 7013 }, { "epoch": 0.8617766310357537, "grad_norm": 1.8028596450034893, "learning_rate": 1.7124147074580254e-06, "loss": 0.666, "step": 7014 }, { "epoch": 0.8618994962526109, "grad_norm": 1.2148301748151145, "learning_rate": 1.7094309274255764e-06, "loss": 0.6183, "step": 7015 }, { "epoch": 0.862022361469468, "grad_norm": 1.0831737724984019, "learning_rate": 1.7064495921071221e-06, "loss": 0.6582, "step": 7016 }, { "epoch": 0.8621452266863251, "grad_norm": 1.2636343927659575, "learning_rate": 1.7034707020510487e-06, "loss": 0.5009, "step": 7017 }, { "epoch": 0.8622680919031822, "grad_norm": 1.0384607457139157, "learning_rate": 1.7004942578053078e-06, "loss": 0.5758, "step": 7018 }, { "epoch": 0.8623909571200393, "grad_norm": 1.1022218046466152, "learning_rate": 1.6975202599174e-06, "loss": 0.4964, "step": 7019 }, { "epoch": 0.8625138223368964, "grad_norm": 1.2267089559772537, "learning_rate": 1.6945487089343725e-06, "loss": 0.6413, "step": 7020 }, { "epoch": 0.8626366875537536, "grad_norm": 1.2513225642663799, "learning_rate": 1.6915796054028182e-06, "loss": 0.5207, "step": 7021 }, { "epoch": 0.8627595527706107, "grad_norm": 1.047820304884196, "learning_rate": 1.6886129498688884e-06, "loss": 0.5801, "step": 7022 }, { "epoch": 0.8628824179874678, "grad_norm": 1.1681106870146072, "learning_rate": 1.6856487428782802e-06, "loss": 0.7013, "step": 7023 }, { "epoch": 0.8630052832043249, "grad_norm": 1.2342146354294474, "learning_rate": 1.6826869849762372e-06, "loss": 0.5288, "step": 7024 }, { "epoch": 0.863128148421182, "grad_norm": 1.1498088409923326, "learning_rate": 1.679727676707562e-06, "loss": 0.445, "step": 7025 }, { "epoch": 0.863251013638039, "grad_norm": 1.2817437015163842, "learning_rate": 1.6767708186165875e-06, "loss": 0.5838, "step": 7026 }, { "epoch": 0.8633738788548961, "grad_norm": 1.472674684126238, "learning_rate": 1.6738164112472238e-06, "loss": 0.5498, "step": 7027 }, { "epoch": 0.8634967440717533, "grad_norm": 1.277735778340668, "learning_rate": 1.6708644551429043e-06, "loss": 0.5215, "step": 7028 }, { "epoch": 0.8636196092886104, "grad_norm": 1.3584413336729237, "learning_rate": 1.6679149508466263e-06, "loss": 0.6749, "step": 7029 }, { "epoch": 0.8637424745054675, "grad_norm": 1.1674375309846945, "learning_rate": 1.6649678989009343e-06, "loss": 0.4971, "step": 7030 }, { "epoch": 0.8638653397223246, "grad_norm": 1.1350581361139884, "learning_rate": 1.6620232998479129e-06, "loss": 0.5352, "step": 7031 }, { "epoch": 0.8639882049391817, "grad_norm": 1.7075849849952407, "learning_rate": 1.659081154229209e-06, "loss": 0.5882, "step": 7032 }, { "epoch": 0.8641110701560388, "grad_norm": 1.3868460704418455, "learning_rate": 1.6561414625860028e-06, "loss": 0.5754, "step": 7033 }, { "epoch": 0.864233935372896, "grad_norm": 1.4713950086020007, "learning_rate": 1.6532042254590418e-06, "loss": 0.6252, "step": 7034 }, { "epoch": 0.8643568005897531, "grad_norm": 1.2853126277694031, "learning_rate": 1.650269443388604e-06, "loss": 0.5728, "step": 7035 }, { "epoch": 0.8644796658066102, "grad_norm": 1.4181585997693573, "learning_rate": 1.647337116914529e-06, "loss": 0.6623, "step": 7036 }, { "epoch": 0.8646025310234673, "grad_norm": 1.2088387068654736, "learning_rate": 1.644407246576189e-06, "loss": 0.4785, "step": 7037 }, { "epoch": 0.8647253962403244, "grad_norm": 1.0130331985027898, "learning_rate": 1.6414798329125291e-06, "loss": 0.4989, "step": 7038 }, { "epoch": 0.8648482614571815, "grad_norm": 1.4802572732257813, "learning_rate": 1.6385548764620174e-06, "loss": 0.523, "step": 7039 }, { "epoch": 0.8649711266740385, "grad_norm": 1.3892402936820858, "learning_rate": 1.635632377762688e-06, "loss": 0.6487, "step": 7040 }, { "epoch": 0.8650939918908956, "grad_norm": 1.9640204494950129, "learning_rate": 1.632712337352108e-06, "loss": 0.6086, "step": 7041 }, { "epoch": 0.8652168571077528, "grad_norm": 1.393553010404348, "learning_rate": 1.6297947557674042e-06, "loss": 0.6103, "step": 7042 }, { "epoch": 0.8653397223246099, "grad_norm": 1.1926917944949256, "learning_rate": 1.626879633545249e-06, "loss": 0.5878, "step": 7043 }, { "epoch": 0.865462587541467, "grad_norm": 1.1242083930028437, "learning_rate": 1.6239669712218553e-06, "loss": 0.5462, "step": 7044 }, { "epoch": 0.8655854527583241, "grad_norm": 0.9836032656220917, "learning_rate": 1.6210567693329892e-06, "loss": 0.6053, "step": 7045 }, { "epoch": 0.8657083179751812, "grad_norm": 1.2183314120866056, "learning_rate": 1.6181490284139645e-06, "loss": 0.5135, "step": 7046 }, { "epoch": 0.8658311831920383, "grad_norm": 1.3344259569077528, "learning_rate": 1.6152437489996464e-06, "loss": 0.5712, "step": 7047 }, { "epoch": 0.8659540484088954, "grad_norm": 1.2079925289884772, "learning_rate": 1.612340931624434e-06, "loss": 0.5339, "step": 7048 }, { "epoch": 0.8660769136257526, "grad_norm": 1.1565710592005356, "learning_rate": 1.6094405768222841e-06, "loss": 0.5281, "step": 7049 }, { "epoch": 0.8661997788426097, "grad_norm": 1.1383293487968251, "learning_rate": 1.606542685126703e-06, "loss": 0.5722, "step": 7050 }, { "epoch": 0.8663226440594668, "grad_norm": 1.18490809261576, "learning_rate": 1.6036472570707323e-06, "loss": 0.4752, "step": 7051 }, { "epoch": 0.8664455092763239, "grad_norm": 1.0727401508173766, "learning_rate": 1.6007542931869712e-06, "loss": 0.5904, "step": 7052 }, { "epoch": 0.866568374493181, "grad_norm": 1.0811734476034245, "learning_rate": 1.597863794007559e-06, "loss": 0.5379, "step": 7053 }, { "epoch": 0.8666912397100381, "grad_norm": 1.408237194399131, "learning_rate": 1.5949757600641906e-06, "loss": 0.6241, "step": 7054 }, { "epoch": 0.8668141049268951, "grad_norm": 1.265418244899921, "learning_rate": 1.5920901918880925e-06, "loss": 0.5395, "step": 7055 }, { "epoch": 0.8669369701437523, "grad_norm": 1.253680255529607, "learning_rate": 1.5892070900100503e-06, "loss": 0.5904, "step": 7056 }, { "epoch": 0.8670598353606094, "grad_norm": 1.14718609523142, "learning_rate": 1.5863264549603945e-06, "loss": 0.5418, "step": 7057 }, { "epoch": 0.8671827005774665, "grad_norm": 1.0820255735845286, "learning_rate": 1.5834482872689949e-06, "loss": 0.5951, "step": 7058 }, { "epoch": 0.8673055657943236, "grad_norm": 1.333439127795634, "learning_rate": 1.5805725874652726e-06, "loss": 0.5771, "step": 7059 }, { "epoch": 0.8674284310111807, "grad_norm": 1.2702119107341001, "learning_rate": 1.5776993560781948e-06, "loss": 0.5751, "step": 7060 }, { "epoch": 0.8675512962280378, "grad_norm": 1.2370088754475632, "learning_rate": 1.5748285936362772e-06, "loss": 0.5756, "step": 7061 }, { "epoch": 0.867674161444895, "grad_norm": 1.1079391742519422, "learning_rate": 1.5719603006675703e-06, "loss": 0.5454, "step": 7062 }, { "epoch": 0.8677970266617521, "grad_norm": 1.0804770487569744, "learning_rate": 1.5690944776996875e-06, "loss": 0.5139, "step": 7063 }, { "epoch": 0.8679198918786092, "grad_norm": 1.2477276117500156, "learning_rate": 1.566231125259765e-06, "loss": 0.4859, "step": 7064 }, { "epoch": 0.8680427570954663, "grad_norm": 1.4361049527422436, "learning_rate": 1.5633702438745118e-06, "loss": 0.5755, "step": 7065 }, { "epoch": 0.8681656223123234, "grad_norm": 1.0831917465394987, "learning_rate": 1.5605118340701602e-06, "loss": 0.5373, "step": 7066 }, { "epoch": 0.8682884875291805, "grad_norm": 1.0379245691782841, "learning_rate": 1.5576558963725029e-06, "loss": 0.6023, "step": 7067 }, { "epoch": 0.8684113527460376, "grad_norm": 1.1112577675981596, "learning_rate": 1.5548024313068633e-06, "loss": 0.6331, "step": 7068 }, { "epoch": 0.8685342179628947, "grad_norm": 1.2230260757221156, "learning_rate": 1.5519514393981193e-06, "loss": 0.696, "step": 7069 }, { "epoch": 0.8686570831797518, "grad_norm": 1.2529479035369482, "learning_rate": 1.5491029211706986e-06, "loss": 0.4927, "step": 7070 }, { "epoch": 0.8687799483966089, "grad_norm": 1.092910634144334, "learning_rate": 1.5462568771485618e-06, "loss": 0.6714, "step": 7071 }, { "epoch": 0.868902813613466, "grad_norm": 0.9339830293641502, "learning_rate": 1.5434133078552204e-06, "loss": 0.5823, "step": 7072 }, { "epoch": 0.8690256788303231, "grad_norm": 1.3300937505727808, "learning_rate": 1.5405722138137323e-06, "loss": 0.658, "step": 7073 }, { "epoch": 0.8691485440471802, "grad_norm": 1.0575265164086565, "learning_rate": 1.5377335955466997e-06, "loss": 0.5947, "step": 7074 }, { "epoch": 0.8692714092640373, "grad_norm": 1.328593842749922, "learning_rate": 1.534897453576265e-06, "loss": 0.5478, "step": 7075 }, { "epoch": 0.8693942744808945, "grad_norm": 1.5099519162458095, "learning_rate": 1.532063788424119e-06, "loss": 0.6602, "step": 7076 }, { "epoch": 0.8695171396977516, "grad_norm": 1.3700253309438115, "learning_rate": 1.5292326006114998e-06, "loss": 0.6138, "step": 7077 }, { "epoch": 0.8696400049146087, "grad_norm": 1.2482873253650553, "learning_rate": 1.5264038906591793e-06, "loss": 0.488, "step": 7078 }, { "epoch": 0.8697628701314658, "grad_norm": 1.1149725006587603, "learning_rate": 1.5235776590874844e-06, "loss": 0.5841, "step": 7079 }, { "epoch": 0.8698857353483229, "grad_norm": 1.027152888425496, "learning_rate": 1.5207539064162811e-06, "loss": 0.5655, "step": 7080 }, { "epoch": 0.87000860056518, "grad_norm": 1.1750479612510039, "learning_rate": 1.5179326331649823e-06, "loss": 0.6057, "step": 7081 }, { "epoch": 0.8701314657820372, "grad_norm": 1.2513315492440755, "learning_rate": 1.515113839852541e-06, "loss": 0.5876, "step": 7082 }, { "epoch": 0.8702543309988943, "grad_norm": 1.0363686623454096, "learning_rate": 1.5122975269974542e-06, "loss": 0.6052, "step": 7083 }, { "epoch": 0.8703771962157513, "grad_norm": 1.0921375343662225, "learning_rate": 1.5094836951177676e-06, "loss": 0.5656, "step": 7084 }, { "epoch": 0.8705000614326084, "grad_norm": 1.1329961916002316, "learning_rate": 1.5066723447310688e-06, "loss": 0.6701, "step": 7085 }, { "epoch": 0.8706229266494655, "grad_norm": 1.2497184208770165, "learning_rate": 1.5038634763544822e-06, "loss": 0.6749, "step": 7086 }, { "epoch": 0.8707457918663226, "grad_norm": 1.1890373489576522, "learning_rate": 1.5010570905046833e-06, "loss": 0.5459, "step": 7087 }, { "epoch": 0.8708686570831797, "grad_norm": 1.333386363700467, "learning_rate": 1.4982531876978923e-06, "loss": 0.528, "step": 7088 }, { "epoch": 0.8709915223000368, "grad_norm": 1.3694462028124077, "learning_rate": 1.4954517684498614e-06, "loss": 0.5794, "step": 7089 }, { "epoch": 0.871114387516894, "grad_norm": 1.0712935556312295, "learning_rate": 1.4926528332759005e-06, "loss": 0.643, "step": 7090 }, { "epoch": 0.8712372527337511, "grad_norm": 1.167536010826607, "learning_rate": 1.489856382690849e-06, "loss": 0.5695, "step": 7091 }, { "epoch": 0.8713601179506082, "grad_norm": 1.3294777534214932, "learning_rate": 1.4870624172091041e-06, "loss": 0.5469, "step": 7092 }, { "epoch": 0.8714829831674653, "grad_norm": 1.3048186449271808, "learning_rate": 1.4842709373445896e-06, "loss": 0.6042, "step": 7093 }, { "epoch": 0.8716058483843224, "grad_norm": 1.0889874024113757, "learning_rate": 1.4814819436107846e-06, "loss": 0.5687, "step": 7094 }, { "epoch": 0.8717287136011795, "grad_norm": 1.0007404611345647, "learning_rate": 1.4786954365207072e-06, "loss": 0.548, "step": 7095 }, { "epoch": 0.8718515788180367, "grad_norm": 1.2848220843062472, "learning_rate": 1.4759114165869126e-06, "loss": 0.6026, "step": 7096 }, { "epoch": 0.8719744440348938, "grad_norm": 1.1107676179413724, "learning_rate": 1.4731298843215107e-06, "loss": 0.5149, "step": 7097 }, { "epoch": 0.8720973092517508, "grad_norm": 1.1445316196832824, "learning_rate": 1.4703508402361343e-06, "loss": 0.6012, "step": 7098 }, { "epoch": 0.8722201744686079, "grad_norm": 1.1542008766709428, "learning_rate": 1.4675742848419842e-06, "loss": 0.5649, "step": 7099 }, { "epoch": 0.872343039685465, "grad_norm": 1.2591490391002773, "learning_rate": 1.4648002186497805e-06, "loss": 0.5765, "step": 7100 }, { "epoch": 0.8724659049023221, "grad_norm": 1.010989739203606, "learning_rate": 1.4620286421698014e-06, "loss": 0.5777, "step": 7101 }, { "epoch": 0.8725887701191792, "grad_norm": 1.0995054096111203, "learning_rate": 1.459259555911851e-06, "loss": 0.6595, "step": 7102 }, { "epoch": 0.8727116353360364, "grad_norm": 1.3659930608704987, "learning_rate": 1.456492960385295e-06, "loss": 0.5859, "step": 7103 }, { "epoch": 0.8728345005528935, "grad_norm": 1.126439515338653, "learning_rate": 1.4537288560990247e-06, "loss": 0.5175, "step": 7104 }, { "epoch": 0.8729573657697506, "grad_norm": 1.1954613743311027, "learning_rate": 1.4509672435614819e-06, "loss": 0.5957, "step": 7105 }, { "epoch": 0.8730802309866077, "grad_norm": 1.6116689015974976, "learning_rate": 1.448208123280645e-06, "loss": 0.597, "step": 7106 }, { "epoch": 0.8732030962034648, "grad_norm": 1.1034312208109913, "learning_rate": 1.4454514957640363e-06, "loss": 0.6195, "step": 7107 }, { "epoch": 0.8733259614203219, "grad_norm": 1.3871281060603522, "learning_rate": 1.4426973615187239e-06, "loss": 0.5615, "step": 7108 }, { "epoch": 0.873448826637179, "grad_norm": 1.1574308129566302, "learning_rate": 1.4399457210513072e-06, "loss": 0.6941, "step": 7109 }, { "epoch": 0.8735716918540362, "grad_norm": 1.5862134571416906, "learning_rate": 1.4371965748679333e-06, "loss": 0.7052, "step": 7110 }, { "epoch": 0.8736945570708933, "grad_norm": 1.2300268416404474, "learning_rate": 1.4344499234742941e-06, "loss": 0.4743, "step": 7111 }, { "epoch": 0.8738174222877504, "grad_norm": 1.1719561756920505, "learning_rate": 1.4317057673756172e-06, "loss": 0.5731, "step": 7112 }, { "epoch": 0.8739402875046074, "grad_norm": 1.163794583447879, "learning_rate": 1.4289641070766674e-06, "loss": 0.6094, "step": 7113 }, { "epoch": 0.8740631527214645, "grad_norm": 1.156608843944235, "learning_rate": 1.4262249430817609e-06, "loss": 0.6023, "step": 7114 }, { "epoch": 0.8741860179383216, "grad_norm": 1.2811697916874987, "learning_rate": 1.4234882758947482e-06, "loss": 0.4726, "step": 7115 }, { "epoch": 0.8743088831551787, "grad_norm": 1.199707485651841, "learning_rate": 1.4207541060190182e-06, "loss": 0.6142, "step": 7116 }, { "epoch": 0.8744317483720359, "grad_norm": 1.1997249773232852, "learning_rate": 1.4180224339575055e-06, "loss": 0.5768, "step": 7117 }, { "epoch": 0.874554613588893, "grad_norm": 1.5744977257433705, "learning_rate": 1.4152932602126844e-06, "loss": 0.7534, "step": 7118 }, { "epoch": 0.8746774788057501, "grad_norm": 1.5284889403095072, "learning_rate": 1.4125665852865704e-06, "loss": 0.5615, "step": 7119 }, { "epoch": 0.8748003440226072, "grad_norm": 0.9640913126400859, "learning_rate": 1.4098424096807138e-06, "loss": 0.6022, "step": 7120 }, { "epoch": 0.8749232092394643, "grad_norm": 1.1229616283881876, "learning_rate": 1.407120733896209e-06, "loss": 0.5803, "step": 7121 }, { "epoch": 0.8750460744563214, "grad_norm": 1.315628239910807, "learning_rate": 1.4044015584336934e-06, "loss": 0.5135, "step": 7122 }, { "epoch": 0.8751689396731785, "grad_norm": 1.0920323136835997, "learning_rate": 1.401684883793342e-06, "loss": 0.5707, "step": 7123 }, { "epoch": 0.8752918048900357, "grad_norm": 1.0843033736582204, "learning_rate": 1.3989707104748673e-06, "loss": 0.5989, "step": 7124 }, { "epoch": 0.8754146701068928, "grad_norm": 1.1224203367629808, "learning_rate": 1.3962590389775242e-06, "loss": 0.5152, "step": 7125 }, { "epoch": 0.8755375353237499, "grad_norm": 1.0944664726660147, "learning_rate": 1.3935498698001093e-06, "loss": 0.475, "step": 7126 }, { "epoch": 0.875660400540607, "grad_norm": 1.4178362902858213, "learning_rate": 1.3908432034409518e-06, "loss": 0.5777, "step": 7127 }, { "epoch": 0.875783265757464, "grad_norm": 1.1638741385205535, "learning_rate": 1.3881390403979321e-06, "loss": 0.5209, "step": 7128 }, { "epoch": 0.8759061309743211, "grad_norm": 1.3014281401697947, "learning_rate": 1.3854373811684557e-06, "loss": 0.4988, "step": 7129 }, { "epoch": 0.8760289961911782, "grad_norm": 1.1122182990996692, "learning_rate": 1.382738226249483e-06, "loss": 0.5466, "step": 7130 }, { "epoch": 0.8761518614080354, "grad_norm": 1.2336539463981147, "learning_rate": 1.3800415761375007e-06, "loss": 0.613, "step": 7131 }, { "epoch": 0.8762747266248925, "grad_norm": 1.1077015809935749, "learning_rate": 1.377347431328545e-06, "loss": 0.478, "step": 7132 }, { "epoch": 0.8763975918417496, "grad_norm": 1.1737832179164718, "learning_rate": 1.3746557923181795e-06, "loss": 0.5686, "step": 7133 }, { "epoch": 0.8765204570586067, "grad_norm": 1.0736900985901194, "learning_rate": 1.3719666596015184e-06, "loss": 0.5569, "step": 7134 }, { "epoch": 0.8766433222754638, "grad_norm": 1.1052829161600521, "learning_rate": 1.3692800336732108e-06, "loss": 0.6328, "step": 7135 }, { "epoch": 0.8767661874923209, "grad_norm": 0.93739960073414, "learning_rate": 1.3665959150274382e-06, "loss": 0.5063, "step": 7136 }, { "epoch": 0.876889052709178, "grad_norm": 0.9676139344106852, "learning_rate": 1.3639143041579371e-06, "loss": 0.5146, "step": 7137 }, { "epoch": 0.8770119179260352, "grad_norm": 1.3099465946798965, "learning_rate": 1.3612352015579631e-06, "loss": 0.5391, "step": 7138 }, { "epoch": 0.8771347831428923, "grad_norm": 1.2346471139935726, "learning_rate": 1.358558607720327e-06, "loss": 0.5512, "step": 7139 }, { "epoch": 0.8772576483597494, "grad_norm": 1.186522653568312, "learning_rate": 1.3558845231373617e-06, "loss": 0.5045, "step": 7140 }, { "epoch": 0.8773805135766065, "grad_norm": 1.4101778849544024, "learning_rate": 1.353212948300957e-06, "loss": 0.6003, "step": 7141 }, { "epoch": 0.8775033787934635, "grad_norm": 1.4711073773646366, "learning_rate": 1.3505438837025265e-06, "loss": 0.5275, "step": 7142 }, { "epoch": 0.8776262440103206, "grad_norm": 1.7882339920740296, "learning_rate": 1.3478773298330322e-06, "loss": 0.5225, "step": 7143 }, { "epoch": 0.8777491092271777, "grad_norm": 1.1083825637459748, "learning_rate": 1.345213287182962e-06, "loss": 0.505, "step": 7144 }, { "epoch": 0.8778719744440349, "grad_norm": 1.3739069083342748, "learning_rate": 1.3425517562423539e-06, "loss": 0.463, "step": 7145 }, { "epoch": 0.877994839660892, "grad_norm": 1.2214116792634364, "learning_rate": 1.3398927375007814e-06, "loss": 0.5634, "step": 7146 }, { "epoch": 0.8781177048777491, "grad_norm": 0.974087787109072, "learning_rate": 1.3372362314473464e-06, "loss": 0.5581, "step": 7147 }, { "epoch": 0.8782405700946062, "grad_norm": 1.169104503227177, "learning_rate": 1.334582238570703e-06, "loss": 0.5423, "step": 7148 }, { "epoch": 0.8783634353114633, "grad_norm": 1.280295937218546, "learning_rate": 1.3319307593590325e-06, "loss": 0.5904, "step": 7149 }, { "epoch": 0.8784863005283204, "grad_norm": 1.5893455323214367, "learning_rate": 1.3292817943000597e-06, "loss": 0.5646, "step": 7150 }, { "epoch": 0.8786091657451776, "grad_norm": 1.1637959353045442, "learning_rate": 1.3266353438810414e-06, "loss": 0.5679, "step": 7151 }, { "epoch": 0.8787320309620347, "grad_norm": 1.1844183775195403, "learning_rate": 1.3239914085887767e-06, "loss": 0.4705, "step": 7152 }, { "epoch": 0.8788548961788918, "grad_norm": 1.2746473388000763, "learning_rate": 1.321349988909603e-06, "loss": 0.4772, "step": 7153 }, { "epoch": 0.8789777613957489, "grad_norm": 1.186276654090902, "learning_rate": 1.318711085329387e-06, "loss": 0.6164, "step": 7154 }, { "epoch": 0.879100626612606, "grad_norm": 1.0850454851573872, "learning_rate": 1.3160746983335437e-06, "loss": 0.6293, "step": 7155 }, { "epoch": 0.8792234918294631, "grad_norm": 1.0511073775664987, "learning_rate": 1.3134408284070115e-06, "loss": 0.6565, "step": 7156 }, { "epoch": 0.8793463570463201, "grad_norm": 1.3999461297396405, "learning_rate": 1.310809476034283e-06, "loss": 0.6137, "step": 7157 }, { "epoch": 0.8794692222631773, "grad_norm": 1.2566963277294994, "learning_rate": 1.3081806416993714e-06, "loss": 0.6139, "step": 7158 }, { "epoch": 0.8795920874800344, "grad_norm": 1.086665724845854, "learning_rate": 1.305554325885836e-06, "loss": 0.5238, "step": 7159 }, { "epoch": 0.8797149526968915, "grad_norm": 1.0383189725746282, "learning_rate": 1.3029305290767708e-06, "loss": 0.5885, "step": 7160 }, { "epoch": 0.8798378179137486, "grad_norm": 1.176676143131754, "learning_rate": 1.3003092517548076e-06, "loss": 0.5437, "step": 7161 }, { "epoch": 0.8799606831306057, "grad_norm": 1.0363111515423418, "learning_rate": 1.2976904944021112e-06, "loss": 0.5127, "step": 7162 }, { "epoch": 0.8800835483474628, "grad_norm": 1.2338249051880787, "learning_rate": 1.2950742575003843e-06, "loss": 0.5631, "step": 7163 }, { "epoch": 0.88020641356432, "grad_norm": 1.0953819034194399, "learning_rate": 1.2924605415308722e-06, "loss": 0.4927, "step": 7164 }, { "epoch": 0.8803292787811771, "grad_norm": 0.9599801064516802, "learning_rate": 1.2898493469743433e-06, "loss": 0.651, "step": 7165 }, { "epoch": 0.8804521439980342, "grad_norm": 1.1466090956054567, "learning_rate": 1.287240674311117e-06, "loss": 0.6026, "step": 7166 }, { "epoch": 0.8805750092148913, "grad_norm": 1.5484140955339734, "learning_rate": 1.284634524021031e-06, "loss": 0.5622, "step": 7167 }, { "epoch": 0.8806978744317484, "grad_norm": 1.5410232668661374, "learning_rate": 1.2820308965834854e-06, "loss": 0.5479, "step": 7168 }, { "epoch": 0.8808207396486055, "grad_norm": 0.9846117607644819, "learning_rate": 1.2794297924773868e-06, "loss": 0.5958, "step": 7169 }, { "epoch": 0.8809436048654626, "grad_norm": 1.2360515580937632, "learning_rate": 1.2768312121812008e-06, "loss": 0.5134, "step": 7170 }, { "epoch": 0.8810664700823196, "grad_norm": 1.1697933108967524, "learning_rate": 1.2742351561729138e-06, "loss": 0.5869, "step": 7171 }, { "epoch": 0.8811893352991768, "grad_norm": 1.324341639706237, "learning_rate": 1.2716416249300532e-06, "loss": 0.5236, "step": 7172 }, { "epoch": 0.8813122005160339, "grad_norm": 1.1612275670285621, "learning_rate": 1.269050618929688e-06, "loss": 0.5429, "step": 7173 }, { "epoch": 0.881435065732891, "grad_norm": 1.1795503643393226, "learning_rate": 1.2664621386484098e-06, "loss": 0.5946, "step": 7174 }, { "epoch": 0.8815579309497481, "grad_norm": 1.1060931868689656, "learning_rate": 1.2638761845623565e-06, "loss": 0.6443, "step": 7175 }, { "epoch": 0.8816807961666052, "grad_norm": 1.2874056093639015, "learning_rate": 1.2612927571471972e-06, "loss": 0.6115, "step": 7176 }, { "epoch": 0.8818036613834623, "grad_norm": 1.0534853943589673, "learning_rate": 1.2587118568781387e-06, "loss": 0.487, "step": 7177 }, { "epoch": 0.8819265266003194, "grad_norm": 1.1666029203287116, "learning_rate": 1.2561334842299161e-06, "loss": 0.4891, "step": 7178 }, { "epoch": 0.8820493918171766, "grad_norm": 1.1689607538584867, "learning_rate": 1.2535576396768085e-06, "loss": 0.5274, "step": 7179 }, { "epoch": 0.8821722570340337, "grad_norm": 1.188423261784822, "learning_rate": 1.250984323692625e-06, "loss": 0.611, "step": 7180 }, { "epoch": 0.8822951222508908, "grad_norm": 1.4810712488683937, "learning_rate": 1.248413536750707e-06, "loss": 0.6182, "step": 7181 }, { "epoch": 0.8824179874677479, "grad_norm": 1.123631806181217, "learning_rate": 1.2458452793239383e-06, "loss": 0.4948, "step": 7182 }, { "epoch": 0.882540852684605, "grad_norm": 1.1709921634849327, "learning_rate": 1.2432795518847306e-06, "loss": 0.5025, "step": 7183 }, { "epoch": 0.8826637179014621, "grad_norm": 1.2450626382885601, "learning_rate": 1.2407163549050366e-06, "loss": 0.503, "step": 7184 }, { "epoch": 0.8827865831183193, "grad_norm": 1.2321987402173789, "learning_rate": 1.2381556888563338e-06, "loss": 0.6218, "step": 7185 }, { "epoch": 0.8829094483351763, "grad_norm": 1.3067392918251524, "learning_rate": 1.2355975542096444e-06, "loss": 0.5989, "step": 7186 }, { "epoch": 0.8830323135520334, "grad_norm": 1.123352387251633, "learning_rate": 1.2330419514355195e-06, "loss": 0.4611, "step": 7187 }, { "epoch": 0.8831551787688905, "grad_norm": 1.2481998589799197, "learning_rate": 1.2304888810040487e-06, "loss": 0.5839, "step": 7188 }, { "epoch": 0.8832780439857476, "grad_norm": 1.1376594699823634, "learning_rate": 1.2279383433848462e-06, "loss": 0.6097, "step": 7189 }, { "epoch": 0.8834009092026047, "grad_norm": 1.228377304117327, "learning_rate": 1.2253903390470717e-06, "loss": 0.4284, "step": 7190 }, { "epoch": 0.8835237744194618, "grad_norm": 1.3570942305967904, "learning_rate": 1.222844868459415e-06, "loss": 0.5888, "step": 7191 }, { "epoch": 0.883646639636319, "grad_norm": 1.1423632676601039, "learning_rate": 1.2203019320900938e-06, "loss": 0.5084, "step": 7192 }, { "epoch": 0.8837695048531761, "grad_norm": 1.209855595078981, "learning_rate": 1.2177615304068702e-06, "loss": 0.5875, "step": 7193 }, { "epoch": 0.8838923700700332, "grad_norm": 1.1827062672941506, "learning_rate": 1.215223663877027e-06, "loss": 0.5442, "step": 7194 }, { "epoch": 0.8840152352868903, "grad_norm": 1.2713788544819864, "learning_rate": 1.2126883329673977e-06, "loss": 0.5418, "step": 7195 }, { "epoch": 0.8841381005037474, "grad_norm": 0.935256179299919, "learning_rate": 1.2101555381443341e-06, "loss": 0.5248, "step": 7196 }, { "epoch": 0.8842609657206045, "grad_norm": 1.2719315501624957, "learning_rate": 1.2076252798737318e-06, "loss": 0.5417, "step": 7197 }, { "epoch": 0.8843838309374616, "grad_norm": 1.3907404704377302, "learning_rate": 1.2050975586210106e-06, "loss": 0.5545, "step": 7198 }, { "epoch": 0.8845066961543188, "grad_norm": 1.2597781208801382, "learning_rate": 1.2025723748511297e-06, "loss": 0.5836, "step": 7199 }, { "epoch": 0.8846295613711758, "grad_norm": 1.1563419191438897, "learning_rate": 1.2000497290285827e-06, "loss": 0.5405, "step": 7200 }, { "epoch": 0.8847524265880329, "grad_norm": 1.0795588042996045, "learning_rate": 1.1975296216173887e-06, "loss": 0.4926, "step": 7201 }, { "epoch": 0.88487529180489, "grad_norm": 0.9615561487762116, "learning_rate": 1.1950120530811131e-06, "loss": 0.4986, "step": 7202 }, { "epoch": 0.8849981570217471, "grad_norm": 1.2529066913400242, "learning_rate": 1.1924970238828393e-06, "loss": 0.5281, "step": 7203 }, { "epoch": 0.8851210222386042, "grad_norm": 1.2438796072048526, "learning_rate": 1.1899845344851951e-06, "loss": 0.5438, "step": 7204 }, { "epoch": 0.8852438874554613, "grad_norm": 1.1936971080322796, "learning_rate": 1.1874745853503293e-06, "loss": 0.5115, "step": 7205 }, { "epoch": 0.8853667526723185, "grad_norm": 1.2284166379319617, "learning_rate": 1.1849671769399427e-06, "loss": 0.6159, "step": 7206 }, { "epoch": 0.8854896178891756, "grad_norm": 1.0585751806795822, "learning_rate": 1.1824623097152466e-06, "loss": 0.595, "step": 7207 }, { "epoch": 0.8856124831060327, "grad_norm": 1.1157502875188798, "learning_rate": 1.179959984137002e-06, "loss": 0.5019, "step": 7208 }, { "epoch": 0.8857353483228898, "grad_norm": 1.9311462710645566, "learning_rate": 1.1774602006654888e-06, "loss": 0.6685, "step": 7209 }, { "epoch": 0.8858582135397469, "grad_norm": 1.0373406267833987, "learning_rate": 1.1749629597605299e-06, "loss": 0.6161, "step": 7210 }, { "epoch": 0.885981078756604, "grad_norm": 1.1183663905587788, "learning_rate": 1.1724682618814792e-06, "loss": 0.7076, "step": 7211 }, { "epoch": 0.8861039439734612, "grad_norm": 1.3386467949589127, "learning_rate": 1.1699761074872128e-06, "loss": 0.6747, "step": 7212 }, { "epoch": 0.8862268091903183, "grad_norm": 1.3666089790074722, "learning_rate": 1.1674864970361527e-06, "loss": 0.6983, "step": 7213 }, { "epoch": 0.8863496744071754, "grad_norm": 1.5738903178389743, "learning_rate": 1.164999430986242e-06, "loss": 0.5801, "step": 7214 }, { "epoch": 0.8864725396240324, "grad_norm": 1.1612066991584469, "learning_rate": 1.1625149097949672e-06, "loss": 0.5588, "step": 7215 }, { "epoch": 0.8865954048408895, "grad_norm": 1.1786335109364945, "learning_rate": 1.1600329339193321e-06, "loss": 0.6341, "step": 7216 }, { "epoch": 0.8867182700577466, "grad_norm": 1.2504332538046197, "learning_rate": 1.1575535038158852e-06, "loss": 0.577, "step": 7217 }, { "epoch": 0.8868411352746037, "grad_norm": 1.4573122487253933, "learning_rate": 1.1550766199407014e-06, "loss": 0.5181, "step": 7218 }, { "epoch": 0.8869640004914608, "grad_norm": 1.0602207128836274, "learning_rate": 1.1526022827493832e-06, "loss": 0.5152, "step": 7219 }, { "epoch": 0.887086865708318, "grad_norm": 1.1291236613259268, "learning_rate": 1.1501304926970728e-06, "loss": 0.6097, "step": 7220 }, { "epoch": 0.8872097309251751, "grad_norm": 1.0998580106317288, "learning_rate": 1.1476612502384354e-06, "loss": 0.5362, "step": 7221 }, { "epoch": 0.8873325961420322, "grad_norm": 1.1634115628205506, "learning_rate": 1.1451945558276788e-06, "loss": 0.529, "step": 7222 }, { "epoch": 0.8874554613588893, "grad_norm": 1.1261881269032434, "learning_rate": 1.142730409918532e-06, "loss": 0.4753, "step": 7223 }, { "epoch": 0.8875783265757464, "grad_norm": 1.3045338525537384, "learning_rate": 1.1402688129642575e-06, "loss": 0.6128, "step": 7224 }, { "epoch": 0.8877011917926035, "grad_norm": 0.945541942593487, "learning_rate": 1.137809765417651e-06, "loss": 0.5363, "step": 7225 }, { "epoch": 0.8878240570094607, "grad_norm": 1.2117866789053169, "learning_rate": 1.1353532677310413e-06, "loss": 0.5872, "step": 7226 }, { "epoch": 0.8879469222263178, "grad_norm": 1.130293352562666, "learning_rate": 1.13289932035628e-06, "loss": 0.633, "step": 7227 }, { "epoch": 0.8880697874431749, "grad_norm": 0.9554885986758416, "learning_rate": 1.1304479237447574e-06, "loss": 0.6931, "step": 7228 }, { "epoch": 0.888192652660032, "grad_norm": 1.1352119313475202, "learning_rate": 1.1279990783473948e-06, "loss": 0.5552, "step": 7229 }, { "epoch": 0.888315517876889, "grad_norm": 1.640121584954948, "learning_rate": 1.1255527846146369e-06, "loss": 0.7, "step": 7230 }, { "epoch": 0.8884383830937461, "grad_norm": 1.3523719108706085, "learning_rate": 1.1231090429964668e-06, "loss": 0.5682, "step": 7231 }, { "epoch": 0.8885612483106032, "grad_norm": 1.4702060282505494, "learning_rate": 1.1206678539423886e-06, "loss": 0.484, "step": 7232 }, { "epoch": 0.8886841135274604, "grad_norm": 1.0367980142722115, "learning_rate": 1.118229217901453e-06, "loss": 0.5421, "step": 7233 }, { "epoch": 0.8888069787443175, "grad_norm": 1.0867618530217535, "learning_rate": 1.1157931353222244e-06, "loss": 0.5567, "step": 7234 }, { "epoch": 0.8889298439611746, "grad_norm": 1.1299165092939212, "learning_rate": 1.1133596066528079e-06, "loss": 0.6433, "step": 7235 }, { "epoch": 0.8890527091780317, "grad_norm": 1.2222239779128936, "learning_rate": 1.1109286323408318e-06, "loss": 0.5444, "step": 7236 }, { "epoch": 0.8891755743948888, "grad_norm": 1.1696469270630028, "learning_rate": 1.1085002128334603e-06, "loss": 0.6137, "step": 7237 }, { "epoch": 0.8892984396117459, "grad_norm": 1.0149359633313544, "learning_rate": 1.1060743485773861e-06, "loss": 0.5468, "step": 7238 }, { "epoch": 0.889421304828603, "grad_norm": 1.0579148150558437, "learning_rate": 1.1036510400188287e-06, "loss": 0.498, "step": 7239 }, { "epoch": 0.8895441700454602, "grad_norm": 1.1524838903702488, "learning_rate": 1.101230287603542e-06, "loss": 0.4918, "step": 7240 }, { "epoch": 0.8896670352623173, "grad_norm": 1.1250495621250822, "learning_rate": 1.0988120917768074e-06, "loss": 0.5677, "step": 7241 }, { "epoch": 0.8897899004791744, "grad_norm": 1.5174278810458959, "learning_rate": 1.0963964529834381e-06, "loss": 0.5923, "step": 7242 }, { "epoch": 0.8899127656960315, "grad_norm": 1.0790610681057444, "learning_rate": 1.0939833716677683e-06, "loss": 0.6502, "step": 7243 }, { "epoch": 0.8900356309128885, "grad_norm": 1.2230323642657168, "learning_rate": 1.091572848273678e-06, "loss": 0.5676, "step": 7244 }, { "epoch": 0.8901584961297456, "grad_norm": 1.271777255624629, "learning_rate": 1.0891648832445611e-06, "loss": 0.6082, "step": 7245 }, { "epoch": 0.8902813613466027, "grad_norm": 1.0587205141622902, "learning_rate": 1.0867594770233514e-06, "loss": 0.684, "step": 7246 }, { "epoch": 0.8904042265634599, "grad_norm": 1.2313051131954105, "learning_rate": 1.084356630052503e-06, "loss": 0.4649, "step": 7247 }, { "epoch": 0.890527091780317, "grad_norm": 1.1404455016178037, "learning_rate": 1.0819563427740064e-06, "loss": 0.7054, "step": 7248 }, { "epoch": 0.8906499569971741, "grad_norm": 1.3193714657095157, "learning_rate": 1.0795586156293814e-06, "loss": 0.559, "step": 7249 }, { "epoch": 0.8907728222140312, "grad_norm": 1.0501910562856978, "learning_rate": 1.0771634490596683e-06, "loss": 0.5741, "step": 7250 }, { "epoch": 0.8908956874308883, "grad_norm": 0.9572939625227387, "learning_rate": 1.0747708435054464e-06, "loss": 0.5601, "step": 7251 }, { "epoch": 0.8910185526477454, "grad_norm": 1.0899804295723317, "learning_rate": 1.0723807994068208e-06, "loss": 0.6158, "step": 7252 }, { "epoch": 0.8911414178646025, "grad_norm": 1.2243014809440305, "learning_rate": 1.0699933172034242e-06, "loss": 0.6259, "step": 7253 }, { "epoch": 0.8912642830814597, "grad_norm": 1.2603056220008562, "learning_rate": 1.0676083973344158e-06, "loss": 0.5946, "step": 7254 }, { "epoch": 0.8913871482983168, "grad_norm": 1.1244771878363886, "learning_rate": 1.0652260402384895e-06, "loss": 0.5708, "step": 7255 }, { "epoch": 0.8915100135151739, "grad_norm": 1.35318692698293, "learning_rate": 1.062846246353863e-06, "loss": 0.622, "step": 7256 }, { "epoch": 0.891632878732031, "grad_norm": 1.1302563019519738, "learning_rate": 1.0604690161182827e-06, "loss": 0.4932, "step": 7257 }, { "epoch": 0.8917557439488881, "grad_norm": 1.2787160669608806, "learning_rate": 1.0580943499690277e-06, "loss": 0.5568, "step": 7258 }, { "epoch": 0.8918786091657451, "grad_norm": 1.226956107705599, "learning_rate": 1.0557222483428962e-06, "loss": 0.6041, "step": 7259 }, { "epoch": 0.8920014743826022, "grad_norm": 1.2608832895937212, "learning_rate": 1.0533527116762298e-06, "loss": 0.495, "step": 7260 }, { "epoch": 0.8921243395994594, "grad_norm": 1.2448200823464721, "learning_rate": 1.0509857404048827e-06, "loss": 0.4802, "step": 7261 }, { "epoch": 0.8922472048163165, "grad_norm": 1.214137528680801, "learning_rate": 1.0486213349642486e-06, "loss": 0.6519, "step": 7262 }, { "epoch": 0.8923700700331736, "grad_norm": 1.1970609304671151, "learning_rate": 1.046259495789238e-06, "loss": 0.5857, "step": 7263 }, { "epoch": 0.8924929352500307, "grad_norm": 1.1675089442779372, "learning_rate": 1.043900223314303e-06, "loss": 0.5589, "step": 7264 }, { "epoch": 0.8926158004668878, "grad_norm": 1.0915150767746276, "learning_rate": 1.0415435179734118e-06, "loss": 0.5837, "step": 7265 }, { "epoch": 0.8927386656837449, "grad_norm": 1.375100870048378, "learning_rate": 1.0391893802000674e-06, "loss": 0.5644, "step": 7266 }, { "epoch": 0.892861530900602, "grad_norm": 1.4742278386791212, "learning_rate": 1.0368378104272986e-06, "loss": 0.5946, "step": 7267 }, { "epoch": 0.8929843961174592, "grad_norm": 1.3462286628849933, "learning_rate": 1.0344888090876592e-06, "loss": 0.5716, "step": 7268 }, { "epoch": 0.8931072613343163, "grad_norm": 1.4163741265879155, "learning_rate": 1.0321423766132354e-06, "loss": 0.5364, "step": 7269 }, { "epoch": 0.8932301265511734, "grad_norm": 1.3958915175805189, "learning_rate": 1.0297985134356319e-06, "loss": 0.6212, "step": 7270 }, { "epoch": 0.8933529917680305, "grad_norm": 1.2738968099861134, "learning_rate": 1.0274572199859972e-06, "loss": 0.5785, "step": 7271 }, { "epoch": 0.8934758569848876, "grad_norm": 1.3110429055982349, "learning_rate": 1.0251184966949883e-06, "loss": 0.4934, "step": 7272 }, { "epoch": 0.8935987222017446, "grad_norm": 1.122914332710962, "learning_rate": 1.0227823439928065e-06, "loss": 0.5617, "step": 7273 }, { "epoch": 0.8937215874186017, "grad_norm": 1.114688710310556, "learning_rate": 1.0204487623091624e-06, "loss": 0.5135, "step": 7274 }, { "epoch": 0.8938444526354589, "grad_norm": 1.2671920452450514, "learning_rate": 1.0181177520733082e-06, "loss": 0.5827, "step": 7275 }, { "epoch": 0.893967317852316, "grad_norm": 1.3284198256686657, "learning_rate": 1.0157893137140206e-06, "loss": 0.4771, "step": 7276 }, { "epoch": 0.8940901830691731, "grad_norm": 1.0456814829348866, "learning_rate": 1.0134634476595955e-06, "loss": 0.647, "step": 7277 }, { "epoch": 0.8942130482860302, "grad_norm": 1.2622693316726232, "learning_rate": 1.011140154337864e-06, "loss": 0.5347, "step": 7278 }, { "epoch": 0.8943359135028873, "grad_norm": 1.2188678578124523, "learning_rate": 1.0088194341761792e-06, "loss": 0.5788, "step": 7279 }, { "epoch": 0.8944587787197444, "grad_norm": 1.3005895432318344, "learning_rate": 1.0065012876014261e-06, "loss": 0.5629, "step": 7280 }, { "epoch": 0.8945816439366016, "grad_norm": 1.155093049983235, "learning_rate": 1.0041857150400075e-06, "loss": 0.5285, "step": 7281 }, { "epoch": 0.8947045091534587, "grad_norm": 0.9982231635566494, "learning_rate": 1.0018727169178604e-06, "loss": 0.5235, "step": 7282 }, { "epoch": 0.8948273743703158, "grad_norm": 1.3670901712552659, "learning_rate": 9.995622936604465e-07, "loss": 0.6342, "step": 7283 }, { "epoch": 0.8949502395871729, "grad_norm": 1.3825952581684497, "learning_rate": 9.972544456927556e-07, "loss": 0.6068, "step": 7284 }, { "epoch": 0.89507310480403, "grad_norm": 1.0775106795531932, "learning_rate": 9.949491734392952e-07, "loss": 0.6023, "step": 7285 }, { "epoch": 0.8951959700208871, "grad_norm": 1.3810480867200448, "learning_rate": 9.926464773241089e-07, "loss": 0.5659, "step": 7286 }, { "epoch": 0.8953188352377442, "grad_norm": 1.2297569102728276, "learning_rate": 9.90346357770765e-07, "loss": 0.5416, "step": 7287 }, { "epoch": 0.8954417004546013, "grad_norm": 1.320118790468435, "learning_rate": 9.880488152023499e-07, "loss": 0.5088, "step": 7288 }, { "epoch": 0.8955645656714584, "grad_norm": 1.3582348185876296, "learning_rate": 9.857538500414837e-07, "loss": 0.5407, "step": 7289 }, { "epoch": 0.8956874308883155, "grad_norm": 1.20407267056099, "learning_rate": 9.834614627103123e-07, "loss": 0.5158, "step": 7290 }, { "epoch": 0.8958102961051726, "grad_norm": 1.085646674353756, "learning_rate": 9.811716536305066e-07, "loss": 0.6043, "step": 7291 }, { "epoch": 0.8959331613220297, "grad_norm": 1.4813585261395066, "learning_rate": 9.788844232232563e-07, "loss": 0.5506, "step": 7292 }, { "epoch": 0.8960560265388868, "grad_norm": 1.4126614811120668, "learning_rate": 9.765997719092867e-07, "loss": 0.5474, "step": 7293 }, { "epoch": 0.896178891755744, "grad_norm": 1.328891301200044, "learning_rate": 9.743177001088482e-07, "loss": 0.605, "step": 7294 }, { "epoch": 0.8963017569726011, "grad_norm": 1.4395184420919336, "learning_rate": 9.720382082417052e-07, "loss": 0.6257, "step": 7295 }, { "epoch": 0.8964246221894582, "grad_norm": 1.1894733726966868, "learning_rate": 9.69761296727162e-07, "loss": 0.6622, "step": 7296 }, { "epoch": 0.8965474874063153, "grad_norm": 1.1013187674232772, "learning_rate": 9.674869659840334e-07, "loss": 0.4495, "step": 7297 }, { "epoch": 0.8966703526231724, "grad_norm": 1.09016997879365, "learning_rate": 9.652152164306788e-07, "loss": 0.5486, "step": 7298 }, { "epoch": 0.8967932178400295, "grad_norm": 1.0065132755106543, "learning_rate": 9.62946048484965e-07, "loss": 0.4664, "step": 7299 }, { "epoch": 0.8969160830568866, "grad_norm": 1.2696243635091953, "learning_rate": 9.606794625642934e-07, "loss": 0.6572, "step": 7300 }, { "epoch": 0.8970389482737438, "grad_norm": 1.1698437265247876, "learning_rate": 9.584154590855836e-07, "loss": 0.6262, "step": 7301 }, { "epoch": 0.8971618134906008, "grad_norm": 1.3595150993129614, "learning_rate": 9.561540384652879e-07, "loss": 0.5893, "step": 7302 }, { "epoch": 0.8972846787074579, "grad_norm": 1.330477211949914, "learning_rate": 9.538952011193814e-07, "loss": 0.722, "step": 7303 }, { "epoch": 0.897407543924315, "grad_norm": 1.153633398543148, "learning_rate": 9.516389474633585e-07, "loss": 0.5303, "step": 7304 }, { "epoch": 0.8975304091411721, "grad_norm": 1.1145931237867663, "learning_rate": 9.493852779122441e-07, "loss": 0.5859, "step": 7305 }, { "epoch": 0.8976532743580292, "grad_norm": 1.2983873111713549, "learning_rate": 9.471341928805865e-07, "loss": 0.5848, "step": 7306 }, { "epoch": 0.8977761395748863, "grad_norm": 1.0359622897393745, "learning_rate": 9.448856927824612e-07, "loss": 0.5828, "step": 7307 }, { "epoch": 0.8978990047917434, "grad_norm": 1.2133637287357546, "learning_rate": 9.426397780314555e-07, "loss": 0.5753, "step": 7308 }, { "epoch": 0.8980218700086006, "grad_norm": 1.0699368348074108, "learning_rate": 9.403964490407041e-07, "loss": 0.5668, "step": 7309 }, { "epoch": 0.8981447352254577, "grad_norm": 1.2555050927561713, "learning_rate": 9.381557062228435e-07, "loss": 0.5991, "step": 7310 }, { "epoch": 0.8982676004423148, "grad_norm": 1.3332569577853675, "learning_rate": 9.359175499900474e-07, "loss": 0.6055, "step": 7311 }, { "epoch": 0.8983904656591719, "grad_norm": 1.0591066447713628, "learning_rate": 9.336819807540081e-07, "loss": 0.4539, "step": 7312 }, { "epoch": 0.898513330876029, "grad_norm": 1.3148002346304082, "learning_rate": 9.31448998925945e-07, "loss": 0.6561, "step": 7313 }, { "epoch": 0.8986361960928861, "grad_norm": 1.3052925287511619, "learning_rate": 9.292186049166029e-07, "loss": 0.5654, "step": 7314 }, { "epoch": 0.8987590613097433, "grad_norm": 1.0966535376600863, "learning_rate": 9.269907991362436e-07, "loss": 0.4592, "step": 7315 }, { "epoch": 0.8988819265266004, "grad_norm": 1.0840102359341908, "learning_rate": 9.247655819946609e-07, "loss": 0.5061, "step": 7316 }, { "epoch": 0.8990047917434574, "grad_norm": 1.1533568375466714, "learning_rate": 9.225429539011676e-07, "loss": 0.5649, "step": 7317 }, { "epoch": 0.8991276569603145, "grad_norm": 1.172659874331371, "learning_rate": 9.203229152646047e-07, "loss": 0.4254, "step": 7318 }, { "epoch": 0.8992505221771716, "grad_norm": 1.3081768539933165, "learning_rate": 9.181054664933291e-07, "loss": 0.6356, "step": 7319 }, { "epoch": 0.8993733873940287, "grad_norm": 1.232129063467181, "learning_rate": 9.158906079952295e-07, "loss": 0.5803, "step": 7320 }, { "epoch": 0.8994962526108858, "grad_norm": 1.2162229434818421, "learning_rate": 9.136783401777165e-07, "loss": 0.5807, "step": 7321 }, { "epoch": 0.899619117827743, "grad_norm": 1.2598601904897229, "learning_rate": 9.114686634477165e-07, "loss": 0.626, "step": 7322 }, { "epoch": 0.8997419830446001, "grad_norm": 1.5135268279666063, "learning_rate": 9.092615782116909e-07, "loss": 0.5313, "step": 7323 }, { "epoch": 0.8998648482614572, "grad_norm": 1.44424354448404, "learning_rate": 9.070570848756116e-07, "loss": 0.6485, "step": 7324 }, { "epoch": 0.8999877134783143, "grad_norm": 1.174379515220077, "learning_rate": 9.048551838449909e-07, "loss": 0.5931, "step": 7325 }, { "epoch": 0.9001105786951714, "grad_norm": 1.4638361197695775, "learning_rate": 9.026558755248465e-07, "loss": 0.5528, "step": 7326 }, { "epoch": 0.9002334439120285, "grad_norm": 1.0861841442848719, "learning_rate": 9.004591603197315e-07, "loss": 0.5048, "step": 7327 }, { "epoch": 0.9003563091288856, "grad_norm": 1.1543783104942913, "learning_rate": 8.98265038633711e-07, "loss": 0.6044, "step": 7328 }, { "epoch": 0.9004791743457428, "grad_norm": 1.4215039371831497, "learning_rate": 8.960735108703872e-07, "loss": 0.5652, "step": 7329 }, { "epoch": 0.9006020395625999, "grad_norm": 1.0815430297678506, "learning_rate": 8.938845774328725e-07, "loss": 0.6115, "step": 7330 }, { "epoch": 0.9007249047794569, "grad_norm": 1.3477305195612737, "learning_rate": 8.916982387238082e-07, "loss": 0.635, "step": 7331 }, { "epoch": 0.900847769996314, "grad_norm": 1.1447310104823756, "learning_rate": 8.895144951453593e-07, "loss": 0.5478, "step": 7332 }, { "epoch": 0.9009706352131711, "grad_norm": 1.3250492046357754, "learning_rate": 8.873333470992079e-07, "loss": 0.5218, "step": 7333 }, { "epoch": 0.9010935004300282, "grad_norm": 1.1779475331273581, "learning_rate": 8.851547949865646e-07, "loss": 0.5997, "step": 7334 }, { "epoch": 0.9012163656468853, "grad_norm": 2.0197976450066206, "learning_rate": 8.82978839208154e-07, "loss": 0.6826, "step": 7335 }, { "epoch": 0.9013392308637425, "grad_norm": 0.9825328881908474, "learning_rate": 8.808054801642407e-07, "loss": 0.4703, "step": 7336 }, { "epoch": 0.9014620960805996, "grad_norm": 1.3726740750921003, "learning_rate": 8.786347182545884e-07, "loss": 0.4609, "step": 7337 }, { "epoch": 0.9015849612974567, "grad_norm": 1.26716723324268, "learning_rate": 8.764665538785028e-07, "loss": 0.5731, "step": 7338 }, { "epoch": 0.9017078265143138, "grad_norm": 1.6136966989182453, "learning_rate": 8.743009874347979e-07, "loss": 0.6178, "step": 7339 }, { "epoch": 0.9018306917311709, "grad_norm": 1.0349168180811938, "learning_rate": 8.72138019321817e-07, "loss": 0.5444, "step": 7340 }, { "epoch": 0.901953556948028, "grad_norm": 1.3998477294530673, "learning_rate": 8.699776499374285e-07, "loss": 0.6279, "step": 7341 }, { "epoch": 0.9020764221648851, "grad_norm": 1.0439567759953747, "learning_rate": 8.678198796790126e-07, "loss": 0.5017, "step": 7342 }, { "epoch": 0.9021992873817423, "grad_norm": 1.6416446193590235, "learning_rate": 8.656647089434788e-07, "loss": 0.6674, "step": 7343 }, { "epoch": 0.9023221525985994, "grad_norm": 1.1684008348218362, "learning_rate": 8.635121381272582e-07, "loss": 0.4792, "step": 7344 }, { "epoch": 0.9024450178154565, "grad_norm": 0.9744367708902468, "learning_rate": 8.613621676263023e-07, "loss": 0.5563, "step": 7345 }, { "epoch": 0.9025678830323135, "grad_norm": 1.4420577459180015, "learning_rate": 8.592147978360831e-07, "loss": 0.582, "step": 7346 }, { "epoch": 0.9026907482491706, "grad_norm": 1.2768524781892316, "learning_rate": 8.570700291515948e-07, "loss": 0.5791, "step": 7347 }, { "epoch": 0.9028136134660277, "grad_norm": 1.177967796409102, "learning_rate": 8.549278619673534e-07, "loss": 0.4895, "step": 7348 }, { "epoch": 0.9029364786828848, "grad_norm": 1.1969329154218218, "learning_rate": 8.527882966774003e-07, "loss": 0.612, "step": 7349 }, { "epoch": 0.903059343899742, "grad_norm": 1.2463790956060845, "learning_rate": 8.506513336752908e-07, "loss": 0.5258, "step": 7350 }, { "epoch": 0.9031822091165991, "grad_norm": 1.3361240402285488, "learning_rate": 8.485169733541071e-07, "loss": 0.5068, "step": 7351 }, { "epoch": 0.9033050743334562, "grad_norm": 1.485243495167198, "learning_rate": 8.463852161064517e-07, "loss": 0.661, "step": 7352 }, { "epoch": 0.9034279395503133, "grad_norm": 1.1820307583357978, "learning_rate": 8.442560623244444e-07, "loss": 0.5496, "step": 7353 }, { "epoch": 0.9035508047671704, "grad_norm": 1.2407683765698518, "learning_rate": 8.421295123997319e-07, "loss": 0.5609, "step": 7354 }, { "epoch": 0.9036736699840275, "grad_norm": 0.9828791103033973, "learning_rate": 8.400055667234779e-07, "loss": 0.5576, "step": 7355 }, { "epoch": 0.9037965352008847, "grad_norm": 1.1313495418356077, "learning_rate": 8.378842256863717e-07, "loss": 0.4856, "step": 7356 }, { "epoch": 0.9039194004177418, "grad_norm": 0.9925762408241992, "learning_rate": 8.357654896786143e-07, "loss": 0.6221, "step": 7357 }, { "epoch": 0.9040422656345989, "grad_norm": 1.338122705402375, "learning_rate": 8.336493590899391e-07, "loss": 0.5688, "step": 7358 }, { "epoch": 0.904165130851456, "grad_norm": 1.3300157745621437, "learning_rate": 8.31535834309593e-07, "loss": 0.7752, "step": 7359 }, { "epoch": 0.9042879960683131, "grad_norm": 1.1740511905024327, "learning_rate": 8.294249157263417e-07, "loss": 0.5766, "step": 7360 }, { "epoch": 0.9044108612851701, "grad_norm": 1.3953858565085957, "learning_rate": 8.273166037284812e-07, "loss": 0.4916, "step": 7361 }, { "epoch": 0.9045337265020272, "grad_norm": 1.0408673407880593, "learning_rate": 8.252108987038131e-07, "loss": 0.6251, "step": 7362 }, { "epoch": 0.9046565917188844, "grad_norm": 1.1768182037781447, "learning_rate": 8.231078010396775e-07, "loss": 0.4799, "step": 7363 }, { "epoch": 0.9047794569357415, "grad_norm": 1.1768448167602896, "learning_rate": 8.210073111229199e-07, "loss": 0.589, "step": 7364 }, { "epoch": 0.9049023221525986, "grad_norm": 1.1846838932122357, "learning_rate": 8.189094293399163e-07, "loss": 0.64, "step": 7365 }, { "epoch": 0.9050251873694557, "grad_norm": 1.1152546577216471, "learning_rate": 8.168141560765496e-07, "loss": 0.5649, "step": 7366 }, { "epoch": 0.9051480525863128, "grad_norm": 1.5172007134796563, "learning_rate": 8.147214917182433e-07, "loss": 0.5096, "step": 7367 }, { "epoch": 0.9052709178031699, "grad_norm": 1.2844129554398886, "learning_rate": 8.12631436649921e-07, "loss": 0.5714, "step": 7368 }, { "epoch": 0.905393783020027, "grad_norm": 1.1438575981546617, "learning_rate": 8.105439912560403e-07, "loss": 0.538, "step": 7369 }, { "epoch": 0.9055166482368842, "grad_norm": 1.1421000845316795, "learning_rate": 8.08459155920569e-07, "loss": 0.5624, "step": 7370 }, { "epoch": 0.9056395134537413, "grad_norm": 1.0220736723673371, "learning_rate": 8.063769310270003e-07, "loss": 0.5493, "step": 7371 }, { "epoch": 0.9057623786705984, "grad_norm": 1.3069090035841764, "learning_rate": 8.042973169583479e-07, "loss": 0.5944, "step": 7372 }, { "epoch": 0.9058852438874555, "grad_norm": 1.1936668315898897, "learning_rate": 8.022203140971373e-07, "loss": 0.5985, "step": 7373 }, { "epoch": 0.9060081091043126, "grad_norm": 1.1900978889137754, "learning_rate": 8.001459228254282e-07, "loss": 0.6878, "step": 7374 }, { "epoch": 0.9061309743211696, "grad_norm": 1.0584104218020853, "learning_rate": 7.980741435247851e-07, "loss": 0.5468, "step": 7375 }, { "epoch": 0.9062538395380267, "grad_norm": 1.1068304765832104, "learning_rate": 7.960049765763034e-07, "loss": 0.467, "step": 7376 }, { "epoch": 0.9063767047548839, "grad_norm": 1.2160362334128485, "learning_rate": 7.939384223605867e-07, "loss": 0.6291, "step": 7377 }, { "epoch": 0.906499569971741, "grad_norm": 1.3391390087414707, "learning_rate": 7.918744812577694e-07, "loss": 0.5582, "step": 7378 }, { "epoch": 0.9066224351885981, "grad_norm": 1.4335144490085237, "learning_rate": 7.898131536474995e-07, "loss": 0.5949, "step": 7379 }, { "epoch": 0.9067453004054552, "grad_norm": 1.2569774052182772, "learning_rate": 7.877544399089421e-07, "loss": 0.5164, "step": 7380 }, { "epoch": 0.9068681656223123, "grad_norm": 1.0466711443253034, "learning_rate": 7.856983404207857e-07, "loss": 0.6433, "step": 7381 }, { "epoch": 0.9069910308391694, "grad_norm": 1.2874468393120422, "learning_rate": 7.836448555612363e-07, "loss": 0.5349, "step": 7382 }, { "epoch": 0.9071138960560265, "grad_norm": 1.130931359387852, "learning_rate": 7.815939857080218e-07, "loss": 0.5717, "step": 7383 }, { "epoch": 0.9072367612728837, "grad_norm": 1.0681144242018192, "learning_rate": 7.79545731238382e-07, "loss": 0.521, "step": 7384 }, { "epoch": 0.9073596264897408, "grad_norm": 1.2824329976563837, "learning_rate": 7.775000925290804e-07, "loss": 0.5622, "step": 7385 }, { "epoch": 0.9074824917065979, "grad_norm": 1.322568589583127, "learning_rate": 7.754570699564028e-07, "loss": 0.5711, "step": 7386 }, { "epoch": 0.907605356923455, "grad_norm": 1.1435557044690343, "learning_rate": 7.734166638961488e-07, "loss": 0.5797, "step": 7387 }, { "epoch": 0.9077282221403121, "grad_norm": 1.1716282413759616, "learning_rate": 7.713788747236361e-07, "loss": 0.6918, "step": 7388 }, { "epoch": 0.9078510873571692, "grad_norm": 1.443430999644405, "learning_rate": 7.693437028137018e-07, "loss": 0.6288, "step": 7389 }, { "epoch": 0.9079739525740262, "grad_norm": 1.2514390309732086, "learning_rate": 7.673111485407064e-07, "loss": 0.48, "step": 7390 }, { "epoch": 0.9080968177908834, "grad_norm": 1.1980042334038932, "learning_rate": 7.652812122785225e-07, "loss": 0.6855, "step": 7391 }, { "epoch": 0.9082196830077405, "grad_norm": 1.0763674460246806, "learning_rate": 7.632538944005429e-07, "loss": 0.5028, "step": 7392 }, { "epoch": 0.9083425482245976, "grad_norm": 1.335989954133638, "learning_rate": 7.612291952796813e-07, "loss": 0.6955, "step": 7393 }, { "epoch": 0.9084654134414547, "grad_norm": 1.3457346861380066, "learning_rate": 7.592071152883695e-07, "loss": 0.5585, "step": 7394 }, { "epoch": 0.9085882786583118, "grad_norm": 1.210934494019051, "learning_rate": 7.571876547985518e-07, "loss": 0.6442, "step": 7395 }, { "epoch": 0.9087111438751689, "grad_norm": 1.3260774194774971, "learning_rate": 7.551708141816977e-07, "loss": 0.6158, "step": 7396 }, { "epoch": 0.908834009092026, "grad_norm": 1.5504574664796285, "learning_rate": 7.531565938087937e-07, "loss": 0.6007, "step": 7397 }, { "epoch": 0.9089568743088832, "grad_norm": 1.168548637911704, "learning_rate": 7.511449940503368e-07, "loss": 0.6667, "step": 7398 }, { "epoch": 0.9090797395257403, "grad_norm": 1.3570701861075336, "learning_rate": 7.491360152763543e-07, "loss": 0.5903, "step": 7399 }, { "epoch": 0.9092026047425974, "grad_norm": 1.1662153962610542, "learning_rate": 7.471296578563774e-07, "loss": 0.5529, "step": 7400 }, { "epoch": 0.9093254699594545, "grad_norm": 1.4529866989946973, "learning_rate": 7.451259221594709e-07, "loss": 0.5953, "step": 7401 }, { "epoch": 0.9094483351763116, "grad_norm": 1.1984694828742244, "learning_rate": 7.431248085542031e-07, "loss": 0.5064, "step": 7402 }, { "epoch": 0.9095712003931687, "grad_norm": 1.0743844133921867, "learning_rate": 7.411263174086696e-07, "loss": 0.5563, "step": 7403 }, { "epoch": 0.9096940656100257, "grad_norm": 1.1100491552887946, "learning_rate": 7.391304490904732e-07, "loss": 0.616, "step": 7404 }, { "epoch": 0.9098169308268829, "grad_norm": 1.133687946572893, "learning_rate": 7.371372039667518e-07, "loss": 0.562, "step": 7405 }, { "epoch": 0.90993979604374, "grad_norm": 1.259498965925745, "learning_rate": 7.351465824041403e-07, "loss": 0.5851, "step": 7406 }, { "epoch": 0.9100626612605971, "grad_norm": 1.4424995433308245, "learning_rate": 7.33158584768806e-07, "loss": 0.449, "step": 7407 }, { "epoch": 0.9101855264774542, "grad_norm": 1.2259176342336795, "learning_rate": 7.311732114264247e-07, "loss": 0.581, "step": 7408 }, { "epoch": 0.9103083916943113, "grad_norm": 0.932056455163265, "learning_rate": 7.291904627421942e-07, "loss": 0.6239, "step": 7409 }, { "epoch": 0.9104312569111684, "grad_norm": 1.1314021822769924, "learning_rate": 7.27210339080831e-07, "loss": 0.5526, "step": 7410 }, { "epoch": 0.9105541221280256, "grad_norm": 1.2417622887016955, "learning_rate": 7.252328408065606e-07, "loss": 0.5078, "step": 7411 }, { "epoch": 0.9106769873448827, "grad_norm": 0.8763889908073847, "learning_rate": 7.232579682831353e-07, "loss": 0.6007, "step": 7412 }, { "epoch": 0.9107998525617398, "grad_norm": 1.1878848317599615, "learning_rate": 7.212857218738178e-07, "loss": 0.6352, "step": 7413 }, { "epoch": 0.9109227177785969, "grad_norm": 1.0513080412866922, "learning_rate": 7.193161019413946e-07, "loss": 0.5135, "step": 7414 }, { "epoch": 0.911045582995454, "grad_norm": 1.0210567366246597, "learning_rate": 7.173491088481576e-07, "loss": 0.6163, "step": 7415 }, { "epoch": 0.9111684482123111, "grad_norm": 1.1484762220145306, "learning_rate": 7.153847429559257e-07, "loss": 0.487, "step": 7416 }, { "epoch": 0.9112913134291682, "grad_norm": 0.9637544302062211, "learning_rate": 7.134230046260348e-07, "loss": 0.5994, "step": 7417 }, { "epoch": 0.9114141786460254, "grad_norm": 0.911363192675256, "learning_rate": 7.114638942193264e-07, "loss": 0.5987, "step": 7418 }, { "epoch": 0.9115370438628824, "grad_norm": 1.2626822511862728, "learning_rate": 7.09507412096172e-07, "loss": 0.5183, "step": 7419 }, { "epoch": 0.9116599090797395, "grad_norm": 1.091505616924391, "learning_rate": 7.075535586164506e-07, "loss": 0.5655, "step": 7420 }, { "epoch": 0.9117827742965966, "grad_norm": 1.3134796842163683, "learning_rate": 7.056023341395662e-07, "loss": 0.5433, "step": 7421 }, { "epoch": 0.9119056395134537, "grad_norm": 1.3497897486190136, "learning_rate": 7.036537390244269e-07, "loss": 0.4822, "step": 7422 }, { "epoch": 0.9120285047303108, "grad_norm": 0.9605286262418606, "learning_rate": 7.017077736294675e-07, "loss": 0.5865, "step": 7423 }, { "epoch": 0.912151369947168, "grad_norm": 1.1590328483404775, "learning_rate": 6.997644383126367e-07, "loss": 0.5538, "step": 7424 }, { "epoch": 0.9122742351640251, "grad_norm": 1.0625572858472507, "learning_rate": 6.978237334313953e-07, "loss": 0.4958, "step": 7425 }, { "epoch": 0.9123971003808822, "grad_norm": 1.447381315786846, "learning_rate": 6.958856593427277e-07, "loss": 0.5877, "step": 7426 }, { "epoch": 0.9125199655977393, "grad_norm": 1.4496618084437536, "learning_rate": 6.939502164031236e-07, "loss": 0.5157, "step": 7427 }, { "epoch": 0.9126428308145964, "grad_norm": 1.3249894906173498, "learning_rate": 6.920174049686035e-07, "loss": 0.6976, "step": 7428 }, { "epoch": 0.9127656960314535, "grad_norm": 1.2130497230643087, "learning_rate": 6.900872253946894e-07, "loss": 0.5589, "step": 7429 }, { "epoch": 0.9128885612483106, "grad_norm": 1.0124603046939584, "learning_rate": 6.881596780364291e-07, "loss": 0.6136, "step": 7430 }, { "epoch": 0.9130114264651678, "grad_norm": 1.1338916073579977, "learning_rate": 6.862347632483757e-07, "loss": 0.584, "step": 7431 }, { "epoch": 0.9131342916820249, "grad_norm": 1.2753367901217667, "learning_rate": 6.843124813846141e-07, "loss": 0.6135, "step": 7432 }, { "epoch": 0.9132571568988819, "grad_norm": 1.0232245277966712, "learning_rate": 6.823928327987283e-07, "loss": 0.5746, "step": 7433 }, { "epoch": 0.913380022115739, "grad_norm": 1.3835179976566894, "learning_rate": 6.804758178438309e-07, "loss": 0.5775, "step": 7434 }, { "epoch": 0.9135028873325961, "grad_norm": 1.5749504391869904, "learning_rate": 6.785614368725396e-07, "loss": 0.5293, "step": 7435 }, { "epoch": 0.9136257525494532, "grad_norm": 1.0702776740253543, "learning_rate": 6.766496902369929e-07, "loss": 0.6318, "step": 7436 }, { "epoch": 0.9137486177663103, "grad_norm": 1.343195289076973, "learning_rate": 6.747405782888478e-07, "loss": 0.6872, "step": 7437 }, { "epoch": 0.9138714829831674, "grad_norm": 1.5901252859425012, "learning_rate": 6.728341013792683e-07, "loss": 0.6265, "step": 7438 }, { "epoch": 0.9139943482000246, "grad_norm": 1.2769696623342857, "learning_rate": 6.70930259858944e-07, "loss": 0.5585, "step": 7439 }, { "epoch": 0.9141172134168817, "grad_norm": 1.0346857981971591, "learning_rate": 6.690290540780681e-07, "loss": 0.4981, "step": 7440 }, { "epoch": 0.9142400786337388, "grad_norm": 1.577875695225468, "learning_rate": 6.671304843863607e-07, "loss": 0.5249, "step": 7441 }, { "epoch": 0.9143629438505959, "grad_norm": 1.1717119301262469, "learning_rate": 6.652345511330477e-07, "loss": 0.5746, "step": 7442 }, { "epoch": 0.914485809067453, "grad_norm": 1.259663222502918, "learning_rate": 6.633412546668733e-07, "loss": 0.5935, "step": 7443 }, { "epoch": 0.9146086742843101, "grad_norm": 1.1896270195180563, "learning_rate": 6.614505953361022e-07, "loss": 0.6427, "step": 7444 }, { "epoch": 0.9147315395011673, "grad_norm": 1.4747479581535414, "learning_rate": 6.59562573488503e-07, "loss": 0.5379, "step": 7445 }, { "epoch": 0.9148544047180244, "grad_norm": 1.2407436136763283, "learning_rate": 6.576771894713662e-07, "loss": 0.4644, "step": 7446 }, { "epoch": 0.9149772699348815, "grad_norm": 1.2389790380211883, "learning_rate": 6.557944436314978e-07, "loss": 0.6148, "step": 7447 }, { "epoch": 0.9151001351517385, "grad_norm": 1.2790231923389215, "learning_rate": 6.539143363152189e-07, "loss": 0.4957, "step": 7448 }, { "epoch": 0.9152230003685956, "grad_norm": 1.3036884351212124, "learning_rate": 6.52036867868358e-07, "loss": 0.6464, "step": 7449 }, { "epoch": 0.9153458655854527, "grad_norm": 1.1726342912816294, "learning_rate": 6.501620386362639e-07, "loss": 0.6104, "step": 7450 }, { "epoch": 0.9154687308023098, "grad_norm": 1.2542563969747738, "learning_rate": 6.482898489638023e-07, "loss": 0.5362, "step": 7451 }, { "epoch": 0.915591596019167, "grad_norm": 1.2517222400089432, "learning_rate": 6.46420299195351e-07, "loss": 0.5627, "step": 7452 }, { "epoch": 0.9157144612360241, "grad_norm": 1.3820837620006212, "learning_rate": 6.445533896747968e-07, "loss": 0.5867, "step": 7453 }, { "epoch": 0.9158373264528812, "grad_norm": 1.1436820627007673, "learning_rate": 6.426891207455482e-07, "loss": 0.5594, "step": 7454 }, { "epoch": 0.9159601916697383, "grad_norm": 1.1410066060655573, "learning_rate": 6.408274927505276e-07, "loss": 0.5233, "step": 7455 }, { "epoch": 0.9160830568865954, "grad_norm": 1.2766399568650266, "learning_rate": 6.389685060321643e-07, "loss": 0.5892, "step": 7456 }, { "epoch": 0.9162059221034525, "grad_norm": 1.2760460261313693, "learning_rate": 6.371121609324115e-07, "loss": 0.5807, "step": 7457 }, { "epoch": 0.9163287873203096, "grad_norm": 1.0724016075591136, "learning_rate": 6.352584577927278e-07, "loss": 0.5554, "step": 7458 }, { "epoch": 0.9164516525371668, "grad_norm": 1.6082017114980387, "learning_rate": 6.334073969540955e-07, "loss": 0.6497, "step": 7459 }, { "epoch": 0.9165745177540239, "grad_norm": 1.0609278483348399, "learning_rate": 6.315589787570003e-07, "loss": 0.5465, "step": 7460 }, { "epoch": 0.916697382970881, "grad_norm": 0.93904670962972, "learning_rate": 6.297132035414488e-07, "loss": 0.4481, "step": 7461 }, { "epoch": 0.916820248187738, "grad_norm": 1.408359262394457, "learning_rate": 6.278700716469593e-07, "loss": 0.5833, "step": 7462 }, { "epoch": 0.9169431134045951, "grad_norm": 1.1483995005694225, "learning_rate": 6.260295834125623e-07, "loss": 0.5451, "step": 7463 }, { "epoch": 0.9170659786214522, "grad_norm": 1.2900639855330667, "learning_rate": 6.241917391768071e-07, "loss": 0.5817, "step": 7464 }, { "epoch": 0.9171888438383093, "grad_norm": 1.3698286227708507, "learning_rate": 6.223565392777481e-07, "loss": 0.5667, "step": 7465 }, { "epoch": 0.9173117090551665, "grad_norm": 1.2035518861906918, "learning_rate": 6.205239840529636e-07, "loss": 0.5526, "step": 7466 }, { "epoch": 0.9174345742720236, "grad_norm": 1.11184468495026, "learning_rate": 6.186940738395374e-07, "loss": 0.4558, "step": 7467 }, { "epoch": 0.9175574394888807, "grad_norm": 1.0962230135689264, "learning_rate": 6.16866808974072e-07, "loss": 0.6505, "step": 7468 }, { "epoch": 0.9176803047057378, "grad_norm": 1.2036613996283163, "learning_rate": 6.15042189792675e-07, "loss": 0.4791, "step": 7469 }, { "epoch": 0.9178031699225949, "grad_norm": 1.25639424101414, "learning_rate": 6.132202166309814e-07, "loss": 0.5718, "step": 7470 }, { "epoch": 0.917926035139452, "grad_norm": 1.310982914606062, "learning_rate": 6.114008898241247e-07, "loss": 0.4481, "step": 7471 }, { "epoch": 0.9180489003563091, "grad_norm": 1.167494027471138, "learning_rate": 6.095842097067639e-07, "loss": 0.6046, "step": 7472 }, { "epoch": 0.9181717655731663, "grad_norm": 1.3007452112309061, "learning_rate": 6.0777017661306e-07, "loss": 0.5384, "step": 7473 }, { "epoch": 0.9182946307900234, "grad_norm": 0.9939433546945621, "learning_rate": 6.059587908766962e-07, "loss": 0.5196, "step": 7474 }, { "epoch": 0.9184174960068805, "grad_norm": 1.3558916089791166, "learning_rate": 6.041500528308641e-07, "loss": 0.6181, "step": 7475 }, { "epoch": 0.9185403612237376, "grad_norm": 1.3885850684455214, "learning_rate": 6.023439628082694e-07, "loss": 0.5659, "step": 7476 }, { "epoch": 0.9186632264405946, "grad_norm": 1.202045329832611, "learning_rate": 6.005405211411297e-07, "loss": 0.562, "step": 7477 }, { "epoch": 0.9187860916574517, "grad_norm": 1.3961785463104528, "learning_rate": 5.987397281611779e-07, "loss": 0.5833, "step": 7478 }, { "epoch": 0.9189089568743088, "grad_norm": 1.1271943880325428, "learning_rate": 5.969415841996606e-07, "loss": 0.7027, "step": 7479 }, { "epoch": 0.919031822091166, "grad_norm": 1.1850117223166388, "learning_rate": 5.951460895873284e-07, "loss": 0.6882, "step": 7480 }, { "epoch": 0.9191546873080231, "grad_norm": 1.2151737689272932, "learning_rate": 5.933532446544538e-07, "loss": 0.5696, "step": 7481 }, { "epoch": 0.9192775525248802, "grad_norm": 1.3673352867813822, "learning_rate": 5.915630497308228e-07, "loss": 0.5811, "step": 7482 }, { "epoch": 0.9194004177417373, "grad_norm": 1.2209655345548363, "learning_rate": 5.897755051457238e-07, "loss": 0.6102, "step": 7483 }, { "epoch": 0.9195232829585944, "grad_norm": 1.1830278097678935, "learning_rate": 5.87990611227967e-07, "loss": 0.5038, "step": 7484 }, { "epoch": 0.9196461481754515, "grad_norm": 1.0917949291400777, "learning_rate": 5.862083683058733e-07, "loss": 0.6013, "step": 7485 }, { "epoch": 0.9197690133923087, "grad_norm": 1.4034150073926124, "learning_rate": 5.844287767072753e-07, "loss": 0.6402, "step": 7486 }, { "epoch": 0.9198918786091658, "grad_norm": 1.390301691857146, "learning_rate": 5.82651836759513e-07, "loss": 0.6217, "step": 7487 }, { "epoch": 0.9200147438260229, "grad_norm": 1.1929088317719423, "learning_rate": 5.808775487894447e-07, "loss": 0.5444, "step": 7488 }, { "epoch": 0.92013760904288, "grad_norm": 1.1917352715161051, "learning_rate": 5.791059131234411e-07, "loss": 0.5086, "step": 7489 }, { "epoch": 0.9202604742597371, "grad_norm": 1.0217615820846444, "learning_rate": 5.773369300873849e-07, "loss": 0.6224, "step": 7490 }, { "epoch": 0.9203833394765942, "grad_norm": 1.0548144474841965, "learning_rate": 5.755706000066624e-07, "loss": 0.441, "step": 7491 }, { "epoch": 0.9205062046934512, "grad_norm": 1.1937192361223212, "learning_rate": 5.738069232061837e-07, "loss": 0.4559, "step": 7492 }, { "epoch": 0.9206290699103084, "grad_norm": 1.4542029403397685, "learning_rate": 5.720459000103644e-07, "loss": 0.5869, "step": 7493 }, { "epoch": 0.9207519351271655, "grad_norm": 1.081297535989145, "learning_rate": 5.702875307431321e-07, "loss": 0.6081, "step": 7494 }, { "epoch": 0.9208748003440226, "grad_norm": 1.226044340282077, "learning_rate": 5.685318157279313e-07, "loss": 0.582, "step": 7495 }, { "epoch": 0.9209976655608797, "grad_norm": 1.3295515232667456, "learning_rate": 5.667787552877085e-07, "loss": 0.5153, "step": 7496 }, { "epoch": 0.9211205307777368, "grad_norm": 1.4400019687916938, "learning_rate": 5.650283497449327e-07, "loss": 0.5952, "step": 7497 }, { "epoch": 0.9212433959945939, "grad_norm": 1.2332996193111736, "learning_rate": 5.632805994215761e-07, "loss": 0.5507, "step": 7498 }, { "epoch": 0.921366261211451, "grad_norm": 1.2189498384144462, "learning_rate": 5.615355046391302e-07, "loss": 0.6372, "step": 7499 }, { "epoch": 0.9214891264283082, "grad_norm": 1.0361304598818593, "learning_rate": 5.597930657185913e-07, "loss": 0.5814, "step": 7500 }, { "epoch": 0.9216119916451653, "grad_norm": 1.0852873295876413, "learning_rate": 5.58053282980468e-07, "loss": 0.5401, "step": 7501 }, { "epoch": 0.9217348568620224, "grad_norm": 1.0801300288331062, "learning_rate": 5.56316156744786e-07, "loss": 0.4789, "step": 7502 }, { "epoch": 0.9218577220788795, "grad_norm": 1.3732560514619276, "learning_rate": 5.545816873310733e-07, "loss": 0.5444, "step": 7503 }, { "epoch": 0.9219805872957366, "grad_norm": 1.3856717508322618, "learning_rate": 5.52849875058381e-07, "loss": 0.5887, "step": 7504 }, { "epoch": 0.9221034525125937, "grad_norm": 1.2263662540648121, "learning_rate": 5.511207202452595e-07, "loss": 0.628, "step": 7505 }, { "epoch": 0.9222263177294507, "grad_norm": 1.0518584401625717, "learning_rate": 5.493942232097792e-07, "loss": 0.5888, "step": 7506 }, { "epoch": 0.9223491829463079, "grad_norm": 1.7495591596430669, "learning_rate": 5.476703842695114e-07, "loss": 0.5861, "step": 7507 }, { "epoch": 0.922472048163165, "grad_norm": 1.156540432758227, "learning_rate": 5.459492037415536e-07, "loss": 0.5434, "step": 7508 }, { "epoch": 0.9225949133800221, "grad_norm": 1.189786024401316, "learning_rate": 5.442306819425013e-07, "loss": 0.4787, "step": 7509 }, { "epoch": 0.9227177785968792, "grad_norm": 1.2193426443347493, "learning_rate": 5.425148191884666e-07, "loss": 0.5717, "step": 7510 }, { "epoch": 0.9228406438137363, "grad_norm": 1.5793656319093057, "learning_rate": 5.408016157950701e-07, "loss": 0.6063, "step": 7511 }, { "epoch": 0.9229635090305934, "grad_norm": 1.0709744819984381, "learning_rate": 5.390910720774433e-07, "loss": 0.6011, "step": 7512 }, { "epoch": 0.9230863742474505, "grad_norm": 1.1075993088169265, "learning_rate": 5.373831883502345e-07, "loss": 0.4356, "step": 7513 }, { "epoch": 0.9232092394643077, "grad_norm": 1.1091177190936585, "learning_rate": 5.35677964927594e-07, "loss": 0.5629, "step": 7514 }, { "epoch": 0.9233321046811648, "grad_norm": 1.3189165668150655, "learning_rate": 5.339754021231857e-07, "loss": 0.4588, "step": 7515 }, { "epoch": 0.9234549698980219, "grad_norm": 1.5232790316092173, "learning_rate": 5.322755002501878e-07, "loss": 0.624, "step": 7516 }, { "epoch": 0.923577835114879, "grad_norm": 1.1302665328761694, "learning_rate": 5.305782596212866e-07, "loss": 0.5907, "step": 7517 }, { "epoch": 0.9237007003317361, "grad_norm": 1.4993851031560625, "learning_rate": 5.288836805486758e-07, "loss": 0.6651, "step": 7518 }, { "epoch": 0.9238235655485932, "grad_norm": 1.2343386248515626, "learning_rate": 5.271917633440627e-07, "loss": 0.4837, "step": 7519 }, { "epoch": 0.9239464307654504, "grad_norm": 1.1716248760448322, "learning_rate": 5.255025083186682e-07, "loss": 0.5207, "step": 7520 }, { "epoch": 0.9240692959823074, "grad_norm": 1.324219699627738, "learning_rate": 5.23815915783214e-07, "loss": 0.5705, "step": 7521 }, { "epoch": 0.9241921611991645, "grad_norm": 1.1354336882013718, "learning_rate": 5.221319860479401e-07, "loss": 0.5854, "step": 7522 }, { "epoch": 0.9243150264160216, "grad_norm": 1.1101797017029, "learning_rate": 5.204507194225971e-07, "loss": 0.5829, "step": 7523 }, { "epoch": 0.9244378916328787, "grad_norm": 1.2139709708707587, "learning_rate": 5.18772116216441e-07, "loss": 0.5703, "step": 7524 }, { "epoch": 0.9245607568497358, "grad_norm": 1.4039291714936641, "learning_rate": 5.170961767382398e-07, "loss": 0.575, "step": 7525 }, { "epoch": 0.9246836220665929, "grad_norm": 1.1650057925415465, "learning_rate": 5.154229012962702e-07, "loss": 0.5524, "step": 7526 }, { "epoch": 0.92480648728345, "grad_norm": 1.231152512181629, "learning_rate": 5.137522901983244e-07, "loss": 0.5529, "step": 7527 }, { "epoch": 0.9249293525003072, "grad_norm": 1.1810067195750897, "learning_rate": 5.120843437516981e-07, "loss": 0.5136, "step": 7528 }, { "epoch": 0.9250522177171643, "grad_norm": 1.0629762695212648, "learning_rate": 5.104190622631977e-07, "loss": 0.6027, "step": 7529 }, { "epoch": 0.9251750829340214, "grad_norm": 1.4407960363326655, "learning_rate": 5.087564460391431e-07, "loss": 0.5281, "step": 7530 }, { "epoch": 0.9252979481508785, "grad_norm": 1.2528916107707408, "learning_rate": 5.070964953853629e-07, "loss": 0.4544, "step": 7531 }, { "epoch": 0.9254208133677356, "grad_norm": 1.165741975834856, "learning_rate": 5.054392106071914e-07, "loss": 0.5719, "step": 7532 }, { "epoch": 0.9255436785845927, "grad_norm": 1.1799612416634437, "learning_rate": 5.03784592009478e-07, "loss": 0.5331, "step": 7533 }, { "epoch": 0.9256665438014499, "grad_norm": 1.1838636265310092, "learning_rate": 5.021326398965742e-07, "loss": 0.6522, "step": 7534 }, { "epoch": 0.9257894090183069, "grad_norm": 1.2072869203863958, "learning_rate": 5.004833545723519e-07, "loss": 0.6155, "step": 7535 }, { "epoch": 0.925912274235164, "grad_norm": 1.07204280236886, "learning_rate": 4.988367363401835e-07, "loss": 0.637, "step": 7536 }, { "epoch": 0.9260351394520211, "grad_norm": 1.1964336067246337, "learning_rate": 4.971927855029551e-07, "loss": 0.603, "step": 7537 }, { "epoch": 0.9261580046688782, "grad_norm": 1.2739797270326663, "learning_rate": 4.95551502363058e-07, "loss": 0.6303, "step": 7538 }, { "epoch": 0.9262808698857353, "grad_norm": 1.2058023645172338, "learning_rate": 4.939128872223975e-07, "loss": 0.7118, "step": 7539 }, { "epoch": 0.9264037351025924, "grad_norm": 1.1916738175124733, "learning_rate": 4.922769403823873e-07, "loss": 0.4985, "step": 7540 }, { "epoch": 0.9265266003194496, "grad_norm": 1.1716803368272342, "learning_rate": 4.90643662143947e-07, "loss": 0.5147, "step": 7541 }, { "epoch": 0.9266494655363067, "grad_norm": 1.0637762926681107, "learning_rate": 4.890130528075093e-07, "loss": 0.4848, "step": 7542 }, { "epoch": 0.9267723307531638, "grad_norm": 1.4779562506218884, "learning_rate": 4.873851126730128e-07, "loss": 0.6367, "step": 7543 }, { "epoch": 0.9268951959700209, "grad_norm": 1.1196969609122163, "learning_rate": 4.857598420399078e-07, "loss": 0.6352, "step": 7544 }, { "epoch": 0.927018061186878, "grad_norm": 1.2044920220169406, "learning_rate": 4.841372412071504e-07, "loss": 0.5491, "step": 7545 }, { "epoch": 0.9271409264037351, "grad_norm": 1.2797697316159495, "learning_rate": 4.8251731047321e-07, "loss": 0.6921, "step": 7546 }, { "epoch": 0.9272637916205922, "grad_norm": 1.1824413981666217, "learning_rate": 4.809000501360616e-07, "loss": 0.5611, "step": 7547 }, { "epoch": 0.9273866568374494, "grad_norm": 1.3007679155829126, "learning_rate": 4.79285460493189e-07, "loss": 0.53, "step": 7548 }, { "epoch": 0.9275095220543065, "grad_norm": 1.0714754860445481, "learning_rate": 4.776735418415846e-07, "loss": 0.6056, "step": 7549 }, { "epoch": 0.9276323872711635, "grad_norm": 1.157306503532158, "learning_rate": 4.760642944777527e-07, "loss": 0.666, "step": 7550 }, { "epoch": 0.9277552524880206, "grad_norm": 1.134809838494938, "learning_rate": 4.744577186977034e-07, "loss": 0.5167, "step": 7551 }, { "epoch": 0.9278781177048777, "grad_norm": 1.2735767777700502, "learning_rate": 4.728538147969536e-07, "loss": 0.4674, "step": 7552 }, { "epoch": 0.9280009829217348, "grad_norm": 0.9252164863915822, "learning_rate": 4.7125258307053385e-07, "loss": 0.543, "step": 7553 }, { "epoch": 0.928123848138592, "grad_norm": 1.0115628926180718, "learning_rate": 4.6965402381297874e-07, "loss": 0.4964, "step": 7554 }, { "epoch": 0.9282467133554491, "grad_norm": 1.0329618494395612, "learning_rate": 4.6805813731833456e-07, "loss": 0.5304, "step": 7555 }, { "epoch": 0.9283695785723062, "grad_norm": 1.4872740729775655, "learning_rate": 4.664649238801516e-07, "loss": 0.6603, "step": 7556 }, { "epoch": 0.9284924437891633, "grad_norm": 1.1090426522164871, "learning_rate": 4.6487438379149207e-07, "loss": 0.5436, "step": 7557 }, { "epoch": 0.9286153090060204, "grad_norm": 1.2503771432251478, "learning_rate": 4.632865173449285e-07, "loss": 0.6682, "step": 7558 }, { "epoch": 0.9287381742228775, "grad_norm": 1.7444897759105944, "learning_rate": 4.617013248325341e-07, "loss": 0.5431, "step": 7559 }, { "epoch": 0.9288610394397346, "grad_norm": 1.3451354403595412, "learning_rate": 4.601188065458989e-07, "loss": 0.5671, "step": 7560 }, { "epoch": 0.9289839046565918, "grad_norm": 1.2233176594359372, "learning_rate": 4.5853896277610995e-07, "loss": 0.5341, "step": 7561 }, { "epoch": 0.9291067698734489, "grad_norm": 1.1566304834793975, "learning_rate": 4.569617938137799e-07, "loss": 0.6052, "step": 7562 }, { "epoch": 0.929229635090306, "grad_norm": 1.1324201132903031, "learning_rate": 4.5538729994900994e-07, "loss": 0.6098, "step": 7563 }, { "epoch": 0.929352500307163, "grad_norm": 1.4421768446391816, "learning_rate": 4.5381548147142015e-07, "loss": 0.631, "step": 7564 }, { "epoch": 0.9294753655240201, "grad_norm": 1.0246141307413499, "learning_rate": 4.5224633867014086e-07, "loss": 0.5575, "step": 7565 }, { "epoch": 0.9295982307408772, "grad_norm": 1.3849306067603793, "learning_rate": 4.5067987183379956e-07, "loss": 0.5684, "step": 7566 }, { "epoch": 0.9297210959577343, "grad_norm": 1.0946811309193745, "learning_rate": 4.491160812505407e-07, "loss": 0.5664, "step": 7567 }, { "epoch": 0.9298439611745914, "grad_norm": 1.1926903813153242, "learning_rate": 4.4755496720801094e-07, "loss": 0.6269, "step": 7568 }, { "epoch": 0.9299668263914486, "grad_norm": 1.2744799248873335, "learning_rate": 4.4599652999337213e-07, "loss": 0.612, "step": 7569 }, { "epoch": 0.9300896916083057, "grad_norm": 1.22310957661172, "learning_rate": 4.444407698932834e-07, "loss": 0.5787, "step": 7570 }, { "epoch": 0.9302125568251628, "grad_norm": 1.365443305761694, "learning_rate": 4.428876871939208e-07, "loss": 0.5019, "step": 7571 }, { "epoch": 0.9303354220420199, "grad_norm": 1.4289757741233113, "learning_rate": 4.4133728218095916e-07, "loss": 0.6216, "step": 7572 }, { "epoch": 0.930458287258877, "grad_norm": 1.1794713815549989, "learning_rate": 4.3978955513959195e-07, "loss": 0.5506, "step": 7573 }, { "epoch": 0.9305811524757341, "grad_norm": 1.2651670730273994, "learning_rate": 4.382445063545065e-07, "loss": 0.5726, "step": 7574 }, { "epoch": 0.9307040176925913, "grad_norm": 1.0614558040498419, "learning_rate": 4.367021361099105e-07, "loss": 0.4429, "step": 7575 }, { "epoch": 0.9308268829094484, "grad_norm": 1.3182950029562455, "learning_rate": 4.351624446895086e-07, "loss": 0.6701, "step": 7576 }, { "epoch": 0.9309497481263055, "grad_norm": 1.2086058691459074, "learning_rate": 4.336254323765193e-07, "loss": 0.4612, "step": 7577 }, { "epoch": 0.9310726133431626, "grad_norm": 0.9901983450403038, "learning_rate": 4.320910994536664e-07, "loss": 0.5715, "step": 7578 }, { "epoch": 0.9311954785600196, "grad_norm": 1.2162198472550705, "learning_rate": 4.3055944620317754e-07, "loss": 0.505, "step": 7579 }, { "epoch": 0.9313183437768767, "grad_norm": 1.3017071392052155, "learning_rate": 4.2903047290679233e-07, "loss": 0.4979, "step": 7580 }, { "epoch": 0.9314412089937338, "grad_norm": 1.0329859385959645, "learning_rate": 4.2750417984575573e-07, "loss": 0.6187, "step": 7581 }, { "epoch": 0.931564074210591, "grad_norm": 1.2966734734007774, "learning_rate": 4.259805673008216e-07, "loss": 0.5345, "step": 7582 }, { "epoch": 0.9316869394274481, "grad_norm": 1.4012156466964318, "learning_rate": 4.2445963555224396e-07, "loss": 0.6099, "step": 7583 }, { "epoch": 0.9318098046443052, "grad_norm": 1.4260738848455499, "learning_rate": 4.2294138487979083e-07, "loss": 0.6309, "step": 7584 }, { "epoch": 0.9319326698611623, "grad_norm": 1.380703309969176, "learning_rate": 4.214258155627371e-07, "loss": 0.5451, "step": 7585 }, { "epoch": 0.9320555350780194, "grad_norm": 1.2804590354001912, "learning_rate": 4.1991292787985636e-07, "loss": 0.6175, "step": 7586 }, { "epoch": 0.9321784002948765, "grad_norm": 1.0452421613208913, "learning_rate": 4.1840272210943773e-07, "loss": 0.5274, "step": 7587 }, { "epoch": 0.9323012655117336, "grad_norm": 1.0481901119187438, "learning_rate": 4.168951985292724e-07, "loss": 0.6125, "step": 7588 }, { "epoch": 0.9324241307285908, "grad_norm": 1.0999381458839763, "learning_rate": 4.1539035741666344e-07, "loss": 0.6385, "step": 7589 }, { "epoch": 0.9325469959454479, "grad_norm": 1.2008521591209065, "learning_rate": 4.1388819904841115e-07, "loss": 0.6005, "step": 7590 }, { "epoch": 0.932669861162305, "grad_norm": 1.0355107464483728, "learning_rate": 4.123887237008311e-07, "loss": 0.6406, "step": 7591 }, { "epoch": 0.9327927263791621, "grad_norm": 1.1555470561010432, "learning_rate": 4.1089193164974115e-07, "loss": 0.6452, "step": 7592 }, { "epoch": 0.9329155915960192, "grad_norm": 1.167440063786132, "learning_rate": 4.0939782317046924e-07, "loss": 0.4638, "step": 7593 }, { "epoch": 0.9330384568128762, "grad_norm": 1.2503176472630044, "learning_rate": 4.0790639853784227e-07, "loss": 0.5974, "step": 7594 }, { "epoch": 0.9331613220297333, "grad_norm": 1.3292695712140326, "learning_rate": 4.0641765802619914e-07, "loss": 0.5265, "step": 7595 }, { "epoch": 0.9332841872465905, "grad_norm": 1.1973436337053034, "learning_rate": 4.049316019093874e-07, "loss": 0.4632, "step": 7596 }, { "epoch": 0.9334070524634476, "grad_norm": 1.459983082533924, "learning_rate": 4.0344823046075343e-07, "loss": 0.5181, "step": 7597 }, { "epoch": 0.9335299176803047, "grad_norm": 1.0989139699311392, "learning_rate": 4.0196754395315726e-07, "loss": 0.5176, "step": 7598 }, { "epoch": 0.9336527828971618, "grad_norm": 1.2190286802492338, "learning_rate": 4.0048954265895774e-07, "loss": 0.566, "step": 7599 }, { "epoch": 0.9337756481140189, "grad_norm": 1.2587262121285197, "learning_rate": 3.990142268500274e-07, "loss": 0.4629, "step": 7600 }, { "epoch": 0.933898513330876, "grad_norm": 1.1185463414261367, "learning_rate": 3.975415967977375e-07, "loss": 0.5124, "step": 7601 }, { "epoch": 0.9340213785477331, "grad_norm": 1.0913800550705328, "learning_rate": 3.96071652772973e-07, "loss": 0.6341, "step": 7602 }, { "epoch": 0.9341442437645903, "grad_norm": 1.8892974397199394, "learning_rate": 3.9460439504611587e-07, "loss": 0.6697, "step": 7603 }, { "epoch": 0.9342671089814474, "grad_norm": 1.557678586277179, "learning_rate": 3.9313982388706206e-07, "loss": 0.7368, "step": 7604 }, { "epoch": 0.9343899741983045, "grad_norm": 1.239293640144346, "learning_rate": 3.9167793956520927e-07, "loss": 0.6569, "step": 7605 }, { "epoch": 0.9345128394151616, "grad_norm": 1.1821572020037776, "learning_rate": 3.902187423494591e-07, "loss": 0.5139, "step": 7606 }, { "epoch": 0.9346357046320187, "grad_norm": 1.1797996605664147, "learning_rate": 3.8876223250822516e-07, "loss": 0.5718, "step": 7607 }, { "epoch": 0.9347585698488757, "grad_norm": 1.0455565428639864, "learning_rate": 3.8730841030942155e-07, "loss": 0.5012, "step": 7608 }, { "epoch": 0.9348814350657328, "grad_norm": 1.1777442364006812, "learning_rate": 3.858572760204693e-07, "loss": 0.6181, "step": 7609 }, { "epoch": 0.93500430028259, "grad_norm": 1.0631880575579615, "learning_rate": 3.844088299082932e-07, "loss": 0.5724, "step": 7610 }, { "epoch": 0.9351271654994471, "grad_norm": 1.1681741945494173, "learning_rate": 3.829630722393301e-07, "loss": 0.6057, "step": 7611 }, { "epoch": 0.9352500307163042, "grad_norm": 1.3375625974928524, "learning_rate": 3.815200032795141e-07, "loss": 0.6464, "step": 7612 }, { "epoch": 0.9353728959331613, "grad_norm": 1.3490583168663888, "learning_rate": 3.800796232942894e-07, "loss": 0.6589, "step": 7613 }, { "epoch": 0.9354957611500184, "grad_norm": 1.240808594629904, "learning_rate": 3.78641932548604e-07, "loss": 0.507, "step": 7614 }, { "epoch": 0.9356186263668755, "grad_norm": 1.1593411380745904, "learning_rate": 3.7720693130691155e-07, "loss": 0.6044, "step": 7615 }, { "epoch": 0.9357414915837327, "grad_norm": 1.0961455940063567, "learning_rate": 3.7577461983317407e-07, "loss": 0.5418, "step": 7616 }, { "epoch": 0.9358643568005898, "grad_norm": 0.9177259773838493, "learning_rate": 3.743449983908526e-07, "loss": 0.5612, "step": 7617 }, { "epoch": 0.9359872220174469, "grad_norm": 1.021622607526969, "learning_rate": 3.7291806724291667e-07, "loss": 0.5662, "step": 7618 }, { "epoch": 0.936110087234304, "grad_norm": 1.1778253294059775, "learning_rate": 3.7149382665184305e-07, "loss": 0.5799, "step": 7619 }, { "epoch": 0.9362329524511611, "grad_norm": 1.0887702499753598, "learning_rate": 3.700722768796122e-07, "loss": 0.5299, "step": 7620 }, { "epoch": 0.9363558176680182, "grad_norm": 0.9996505715606064, "learning_rate": 3.686534181877066e-07, "loss": 0.5406, "step": 7621 }, { "epoch": 0.9364786828848753, "grad_norm": 1.3593952345686473, "learning_rate": 3.6723725083711745e-07, "loss": 0.6371, "step": 7622 }, { "epoch": 0.9366015481017324, "grad_norm": 1.2293722638301758, "learning_rate": 3.658237750883398e-07, "loss": 0.5092, "step": 7623 }, { "epoch": 0.9367244133185895, "grad_norm": 1.1571976457327158, "learning_rate": 3.644129912013705e-07, "loss": 0.4482, "step": 7624 }, { "epoch": 0.9368472785354466, "grad_norm": 1.3073514100536288, "learning_rate": 3.630048994357188e-07, "loss": 0.4755, "step": 7625 }, { "epoch": 0.9369701437523037, "grad_norm": 1.4432515514108184, "learning_rate": 3.615995000503891e-07, "loss": 0.5116, "step": 7626 }, { "epoch": 0.9370930089691608, "grad_norm": 1.018433170106163, "learning_rate": 3.601967933039013e-07, "loss": 0.562, "step": 7627 }, { "epoch": 0.9372158741860179, "grad_norm": 1.2766977892408913, "learning_rate": 3.5879677945426904e-07, "loss": 0.6301, "step": 7628 }, { "epoch": 0.937338739402875, "grad_norm": 1.1588956854590802, "learning_rate": 3.573994587590163e-07, "loss": 0.5534, "step": 7629 }, { "epoch": 0.9374616046197322, "grad_norm": 1.0647464324773823, "learning_rate": 3.5600483147517406e-07, "loss": 0.5563, "step": 7630 }, { "epoch": 0.9375844698365893, "grad_norm": 1.312398586186313, "learning_rate": 3.5461289785927384e-07, "loss": 0.7674, "step": 7631 }, { "epoch": 0.9377073350534464, "grad_norm": 1.146344891638692, "learning_rate": 3.532236581673526e-07, "loss": 0.6437, "step": 7632 }, { "epoch": 0.9378302002703035, "grad_norm": 0.917890302177743, "learning_rate": 3.5183711265495077e-07, "loss": 0.5234, "step": 7633 }, { "epoch": 0.9379530654871606, "grad_norm": 1.0665241883988756, "learning_rate": 3.504532615771161e-07, "loss": 0.4835, "step": 7634 }, { "epoch": 0.9380759307040177, "grad_norm": 1.4816109076733543, "learning_rate": 3.490721051883966e-07, "loss": 0.6297, "step": 7635 }, { "epoch": 0.9381987959208749, "grad_norm": 1.3237109353149827, "learning_rate": 3.476936437428524e-07, "loss": 0.5734, "step": 7636 }, { "epoch": 0.9383216611377319, "grad_norm": 1.1668981556701867, "learning_rate": 3.46317877494034e-07, "loss": 0.5591, "step": 7637 }, { "epoch": 0.938444526354589, "grad_norm": 1.1057585314899907, "learning_rate": 3.449448066950139e-07, "loss": 0.5957, "step": 7638 }, { "epoch": 0.9385673915714461, "grad_norm": 1.1397613496608534, "learning_rate": 3.435744315983519e-07, "loss": 0.7315, "step": 7639 }, { "epoch": 0.9386902567883032, "grad_norm": 1.135656600408037, "learning_rate": 3.422067524561262e-07, "loss": 0.5943, "step": 7640 }, { "epoch": 0.9388131220051603, "grad_norm": 1.2905850621865576, "learning_rate": 3.408417695199073e-07, "loss": 0.5918, "step": 7641 }, { "epoch": 0.9389359872220174, "grad_norm": 1.5345375478775598, "learning_rate": 3.39479483040776e-07, "loss": 0.6256, "step": 7642 }, { "epoch": 0.9390588524388745, "grad_norm": 1.1730211813462579, "learning_rate": 3.3811989326932026e-07, "loss": 0.5106, "step": 7643 }, { "epoch": 0.9391817176557317, "grad_norm": 1.2883806074501003, "learning_rate": 3.367630004556216e-07, "loss": 0.5597, "step": 7644 }, { "epoch": 0.9393045828725888, "grad_norm": 1.2469242709651642, "learning_rate": 3.354088048492754e-07, "loss": 0.5826, "step": 7645 }, { "epoch": 0.9394274480894459, "grad_norm": 1.5765814102391031, "learning_rate": 3.340573066993757e-07, "loss": 0.5022, "step": 7646 }, { "epoch": 0.939550313306303, "grad_norm": 1.1390410728973255, "learning_rate": 3.3270850625452377e-07, "loss": 0.5567, "step": 7647 }, { "epoch": 0.9396731785231601, "grad_norm": 1.1313677842451682, "learning_rate": 3.3136240376281935e-07, "loss": 0.5878, "step": 7648 }, { "epoch": 0.9397960437400172, "grad_norm": 1.4379235763668676, "learning_rate": 3.3001899947187275e-07, "loss": 0.5683, "step": 7649 }, { "epoch": 0.9399189089568744, "grad_norm": 1.3740783830783299, "learning_rate": 3.28678293628793e-07, "loss": 0.5808, "step": 7650 }, { "epoch": 0.9400417741737315, "grad_norm": 1.0785353076026944, "learning_rate": 3.273402864801944e-07, "loss": 0.6104, "step": 7651 }, { "epoch": 0.9401646393905885, "grad_norm": 1.053508305753356, "learning_rate": 3.2600497827219524e-07, "loss": 0.5497, "step": 7652 }, { "epoch": 0.9402875046074456, "grad_norm": 1.4694993350700754, "learning_rate": 3.246723692504139e-07, "loss": 0.5211, "step": 7653 }, { "epoch": 0.9404103698243027, "grad_norm": 1.5266780056444087, "learning_rate": 3.2334245965997933e-07, "loss": 0.5505, "step": 7654 }, { "epoch": 0.9405332350411598, "grad_norm": 1.0348506383532476, "learning_rate": 3.220152497455175e-07, "loss": 0.5656, "step": 7655 }, { "epoch": 0.9406561002580169, "grad_norm": 1.2284643503611357, "learning_rate": 3.206907397511599e-07, "loss": 0.5705, "step": 7656 }, { "epoch": 0.940778965474874, "grad_norm": 1.2935486099028568, "learning_rate": 3.1936892992054155e-07, "loss": 0.5539, "step": 7657 }, { "epoch": 0.9409018306917312, "grad_norm": 1.0840939951415647, "learning_rate": 3.18049820496803e-07, "loss": 0.5691, "step": 7658 }, { "epoch": 0.9410246959085883, "grad_norm": 1.2148124782213034, "learning_rate": 3.167334117225834e-07, "loss": 0.5931, "step": 7659 }, { "epoch": 0.9411475611254454, "grad_norm": 1.156851033705549, "learning_rate": 3.154197038400275e-07, "loss": 0.4651, "step": 7660 }, { "epoch": 0.9412704263423025, "grad_norm": 1.3142138367922893, "learning_rate": 3.141086970907853e-07, "loss": 0.5899, "step": 7661 }, { "epoch": 0.9413932915591596, "grad_norm": 1.4086110045504152, "learning_rate": 3.1280039171600715e-07, "loss": 0.4885, "step": 7662 }, { "epoch": 0.9415161567760167, "grad_norm": 1.3394471574017637, "learning_rate": 3.1149478795634736e-07, "loss": 0.7062, "step": 7663 }, { "epoch": 0.9416390219928739, "grad_norm": 1.276759628210885, "learning_rate": 3.1019188605196035e-07, "loss": 0.606, "step": 7664 }, { "epoch": 0.941761887209731, "grad_norm": 1.3121554144422776, "learning_rate": 3.088916862425112e-07, "loss": 0.6494, "step": 7665 }, { "epoch": 0.941884752426588, "grad_norm": 1.2061823434607322, "learning_rate": 3.0759418876716183e-07, "loss": 0.5366, "step": 7666 }, { "epoch": 0.9420076176434451, "grad_norm": 1.1761001464353842, "learning_rate": 3.062993938645781e-07, "loss": 0.5473, "step": 7667 }, { "epoch": 0.9421304828603022, "grad_norm": 1.0445555063555247, "learning_rate": 3.0500730177292604e-07, "loss": 0.4826, "step": 7668 }, { "epoch": 0.9422533480771593, "grad_norm": 1.1286654676033918, "learning_rate": 3.037179127298823e-07, "loss": 0.5783, "step": 7669 }, { "epoch": 0.9423762132940164, "grad_norm": 0.8991888517416979, "learning_rate": 3.024312269726204e-07, "loss": 0.5654, "step": 7670 }, { "epoch": 0.9424990785108736, "grad_norm": 1.080597445653462, "learning_rate": 3.0114724473781443e-07, "loss": 0.4585, "step": 7671 }, { "epoch": 0.9426219437277307, "grad_norm": 1.5146910984784896, "learning_rate": 2.998659662616504e-07, "loss": 0.5183, "step": 7672 }, { "epoch": 0.9427448089445878, "grad_norm": 1.1604855129824596, "learning_rate": 2.985873917798082e-07, "loss": 0.4901, "step": 7673 }, { "epoch": 0.9428676741614449, "grad_norm": 1.3546162155912078, "learning_rate": 2.97311521527473e-07, "loss": 0.6506, "step": 7674 }, { "epoch": 0.942990539378302, "grad_norm": 1.1104357296521736, "learning_rate": 2.9603835573933034e-07, "loss": 0.4881, "step": 7675 }, { "epoch": 0.9431134045951591, "grad_norm": 1.2167835179437068, "learning_rate": 2.947678946495763e-07, "loss": 0.5145, "step": 7676 }, { "epoch": 0.9432362698120162, "grad_norm": 1.2089540758539854, "learning_rate": 2.935001384919006e-07, "loss": 0.5732, "step": 7677 }, { "epoch": 0.9433591350288734, "grad_norm": 1.2450434970785484, "learning_rate": 2.9223508749950003e-07, "loss": 0.5643, "step": 7678 }, { "epoch": 0.9434820002457305, "grad_norm": 1.3375545333780399, "learning_rate": 2.909727419050717e-07, "loss": 0.5929, "step": 7679 }, { "epoch": 0.9436048654625876, "grad_norm": 1.307464360584236, "learning_rate": 2.89713101940815e-07, "loss": 0.6049, "step": 7680 }, { "epoch": 0.9437277306794446, "grad_norm": 1.1471000077699096, "learning_rate": 2.8845616783843455e-07, "loss": 0.5083, "step": 7681 }, { "epoch": 0.9438505958963017, "grad_norm": 1.2616424933308497, "learning_rate": 2.872019398291337e-07, "loss": 0.6776, "step": 7682 }, { "epoch": 0.9439734611131588, "grad_norm": 1.2312749132120369, "learning_rate": 2.8595041814362124e-07, "loss": 0.6789, "step": 7683 }, { "epoch": 0.944096326330016, "grad_norm": 1.2433560521009637, "learning_rate": 2.8470160301210304e-07, "loss": 0.5234, "step": 7684 }, { "epoch": 0.9442191915468731, "grad_norm": 1.126740187826306, "learning_rate": 2.83455494664297e-07, "loss": 0.6146, "step": 7685 }, { "epoch": 0.9443420567637302, "grad_norm": 1.058258586861683, "learning_rate": 2.822120933294098e-07, "loss": 0.5567, "step": 7686 }, { "epoch": 0.9444649219805873, "grad_norm": 1.4750702341519464, "learning_rate": 2.8097139923615845e-07, "loss": 0.6626, "step": 7687 }, { "epoch": 0.9445877871974444, "grad_norm": 1.111153597401485, "learning_rate": 2.797334126127654e-07, "loss": 0.5399, "step": 7688 }, { "epoch": 0.9447106524143015, "grad_norm": 1.0396822088589652, "learning_rate": 2.784981336869452e-07, "loss": 0.5825, "step": 7689 }, { "epoch": 0.9448335176311586, "grad_norm": 1.0391625286754607, "learning_rate": 2.772655626859211e-07, "loss": 0.5604, "step": 7690 }, { "epoch": 0.9449563828480158, "grad_norm": 1.1313248465909034, "learning_rate": 2.7603569983641496e-07, "loss": 0.511, "step": 7691 }, { "epoch": 0.9450792480648729, "grad_norm": 1.2839432594572726, "learning_rate": 2.748085453646559e-07, "loss": 0.5899, "step": 7692 }, { "epoch": 0.94520211328173, "grad_norm": 1.2314748683971133, "learning_rate": 2.7358409949636674e-07, "loss": 0.5711, "step": 7693 }, { "epoch": 0.9453249784985871, "grad_norm": 1.3251307665458316, "learning_rate": 2.723623624567789e-07, "loss": 0.6191, "step": 7694 }, { "epoch": 0.9454478437154441, "grad_norm": 1.1777499791145356, "learning_rate": 2.711433344706227e-07, "loss": 0.5908, "step": 7695 }, { "epoch": 0.9455707089323012, "grad_norm": 1.0237677025867713, "learning_rate": 2.69927015762132e-07, "loss": 0.5327, "step": 7696 }, { "epoch": 0.9456935741491583, "grad_norm": 1.233825379201078, "learning_rate": 2.687134065550362e-07, "loss": 0.5611, "step": 7697 }, { "epoch": 0.9458164393660154, "grad_norm": 1.344919927520651, "learning_rate": 2.675025070725734e-07, "loss": 0.6251, "step": 7698 }, { "epoch": 0.9459393045828726, "grad_norm": 1.0519554151540325, "learning_rate": 2.662943175374838e-07, "loss": 0.6561, "step": 7699 }, { "epoch": 0.9460621697997297, "grad_norm": 1.074051997762522, "learning_rate": 2.650888381719996e-07, "loss": 0.6206, "step": 7700 }, { "epoch": 0.9461850350165868, "grad_norm": 1.2115747037814342, "learning_rate": 2.6388606919786673e-07, "loss": 0.5607, "step": 7701 }, { "epoch": 0.9463079002334439, "grad_norm": 1.2559670442708277, "learning_rate": 2.626860108363233e-07, "loss": 0.5294, "step": 7702 }, { "epoch": 0.946430765450301, "grad_norm": 1.2130607676126903, "learning_rate": 2.614886633081143e-07, "loss": 0.7381, "step": 7703 }, { "epoch": 0.9465536306671581, "grad_norm": 1.202303420132463, "learning_rate": 2.602940268334819e-07, "loss": 0.6602, "step": 7704 }, { "epoch": 0.9466764958840153, "grad_norm": 1.1073351252885164, "learning_rate": 2.5910210163217376e-07, "loss": 0.4925, "step": 7705 }, { "epoch": 0.9467993611008724, "grad_norm": 1.0949693657888397, "learning_rate": 2.5791288792343437e-07, "loss": 0.7064, "step": 7706 }, { "epoch": 0.9469222263177295, "grad_norm": 1.2402852638648325, "learning_rate": 2.567263859260155e-07, "loss": 0.507, "step": 7707 }, { "epoch": 0.9470450915345866, "grad_norm": 1.1407232584373486, "learning_rate": 2.555425958581642e-07, "loss": 0.5431, "step": 7708 }, { "epoch": 0.9471679567514437, "grad_norm": 1.274230711854219, "learning_rate": 2.5436151793762964e-07, "loss": 0.4729, "step": 7709 }, { "epoch": 0.9472908219683007, "grad_norm": 1.1734181321872967, "learning_rate": 2.531831523816663e-07, "loss": 0.6083, "step": 7710 }, { "epoch": 0.9474136871851578, "grad_norm": 1.0863077605639766, "learning_rate": 2.520074994070243e-07, "loss": 0.509, "step": 7711 }, { "epoch": 0.947536552402015, "grad_norm": 1.273673464036806, "learning_rate": 2.5083455922996044e-07, "loss": 0.5552, "step": 7712 }, { "epoch": 0.9476594176188721, "grad_norm": 1.0288444341910625, "learning_rate": 2.496643320662256e-07, "loss": 0.6034, "step": 7713 }, { "epoch": 0.9477822828357292, "grad_norm": 1.1844803873452392, "learning_rate": 2.484968181310793e-07, "loss": 0.6305, "step": 7714 }, { "epoch": 0.9479051480525863, "grad_norm": 1.0249993631932606, "learning_rate": 2.4733201763927624e-07, "loss": 0.5759, "step": 7715 }, { "epoch": 0.9480280132694434, "grad_norm": 0.9913582097930854, "learning_rate": 2.461699308050752e-07, "loss": 0.6718, "step": 7716 }, { "epoch": 0.9481508784863005, "grad_norm": 1.105959546725019, "learning_rate": 2.450105578422318e-07, "loss": 0.5243, "step": 7717 }, { "epoch": 0.9482737437031576, "grad_norm": 1.1951080716465345, "learning_rate": 2.438538989640071e-07, "loss": 0.6769, "step": 7718 }, { "epoch": 0.9483966089200148, "grad_norm": 1.1550996990946776, "learning_rate": 2.4269995438316093e-07, "loss": 0.5139, "step": 7719 }, { "epoch": 0.9485194741368719, "grad_norm": 1.4942744161779253, "learning_rate": 2.415487243119535e-07, "loss": 0.5157, "step": 7720 }, { "epoch": 0.948642339353729, "grad_norm": 0.9714307014394438, "learning_rate": 2.404002089621471e-07, "loss": 0.6096, "step": 7721 }, { "epoch": 0.9487652045705861, "grad_norm": 1.3642022194367578, "learning_rate": 2.3925440854500104e-07, "loss": 0.5703, "step": 7722 }, { "epoch": 0.9488880697874432, "grad_norm": 1.0515903433462852, "learning_rate": 2.3811132327128172e-07, "loss": 0.4996, "step": 7723 }, { "epoch": 0.9490109350043003, "grad_norm": 1.230839537686157, "learning_rate": 2.369709533512493e-07, "loss": 0.5716, "step": 7724 }, { "epoch": 0.9491338002211573, "grad_norm": 1.1346337483475637, "learning_rate": 2.3583329899466765e-07, "loss": 0.6728, "step": 7725 }, { "epoch": 0.9492566654380145, "grad_norm": 1.2318020765376274, "learning_rate": 2.346983604108044e-07, "loss": 0.5783, "step": 7726 }, { "epoch": 0.9493795306548716, "grad_norm": 1.2400669949142467, "learning_rate": 2.3356613780841919e-07, "loss": 0.5432, "step": 7727 }, { "epoch": 0.9495023958717287, "grad_norm": 1.3675173036290396, "learning_rate": 2.3243663139578042e-07, "loss": 0.5681, "step": 7728 }, { "epoch": 0.9496252610885858, "grad_norm": 1.1953108988535237, "learning_rate": 2.3130984138065026e-07, "loss": 0.4946, "step": 7729 }, { "epoch": 0.9497481263054429, "grad_norm": 1.2125222913589289, "learning_rate": 2.301857679702979e-07, "loss": 0.5587, "step": 7730 }, { "epoch": 0.9498709915223, "grad_norm": 1.0323853121591349, "learning_rate": 2.2906441137148793e-07, "loss": 0.54, "step": 7731 }, { "epoch": 0.9499938567391571, "grad_norm": 1.307333243913769, "learning_rate": 2.2794577179048702e-07, "loss": 0.5525, "step": 7732 }, { "epoch": 0.9501167219560143, "grad_norm": 1.1259516507745981, "learning_rate": 2.2682984943305894e-07, "loss": 0.5894, "step": 7733 }, { "epoch": 0.9502395871728714, "grad_norm": 1.3993593866404728, "learning_rate": 2.2571664450447616e-07, "loss": 0.575, "step": 7734 }, { "epoch": 0.9503624523897285, "grad_norm": 1.1668009973224958, "learning_rate": 2.2460615720949984e-07, "loss": 0.5976, "step": 7735 }, { "epoch": 0.9504853176065856, "grad_norm": 1.0209570869610247, "learning_rate": 2.2349838775239828e-07, "loss": 0.6417, "step": 7736 }, { "epoch": 0.9506081828234427, "grad_norm": 1.0907213377045921, "learning_rate": 2.2239333633694182e-07, "loss": 0.6428, "step": 7737 }, { "epoch": 0.9507310480402998, "grad_norm": 1.268297329102986, "learning_rate": 2.2129100316639282e-07, "loss": 0.6551, "step": 7738 }, { "epoch": 0.9508539132571568, "grad_norm": 1.1150788543940735, "learning_rate": 2.2019138844352249e-07, "loss": 0.5866, "step": 7739 }, { "epoch": 0.950976778474014, "grad_norm": 1.256398518975664, "learning_rate": 2.19094492370594e-07, "loss": 0.6194, "step": 7740 }, { "epoch": 0.9510996436908711, "grad_norm": 0.9816137782255879, "learning_rate": 2.1800031514937757e-07, "loss": 0.5503, "step": 7741 }, { "epoch": 0.9512225089077282, "grad_norm": 1.296707386615462, "learning_rate": 2.1690885698113728e-07, "loss": 0.566, "step": 7742 }, { "epoch": 0.9513453741245853, "grad_norm": 1.330449368016545, "learning_rate": 2.1582011806664248e-07, "loss": 0.583, "step": 7743 }, { "epoch": 0.9514682393414424, "grad_norm": 1.1708921784677875, "learning_rate": 2.1473409860615635e-07, "loss": 0.4582, "step": 7744 }, { "epoch": 0.9515911045582995, "grad_norm": 1.1733241553795841, "learning_rate": 2.1365079879944904e-07, "loss": 0.6118, "step": 7745 }, { "epoch": 0.9517139697751567, "grad_norm": 1.3774588336233446, "learning_rate": 2.1257021884578286e-07, "loss": 0.5544, "step": 7746 }, { "epoch": 0.9518368349920138, "grad_norm": 1.120622943860527, "learning_rate": 2.114923589439255e-07, "loss": 0.538, "step": 7747 }, { "epoch": 0.9519597002088709, "grad_norm": 1.8646951394917248, "learning_rate": 2.1041721929214163e-07, "loss": 0.5723, "step": 7748 }, { "epoch": 0.952082565425728, "grad_norm": 1.1445923739800998, "learning_rate": 2.0934480008819645e-07, "loss": 0.5665, "step": 7749 }, { "epoch": 0.9522054306425851, "grad_norm": 1.3660339368907883, "learning_rate": 2.0827510152935546e-07, "loss": 0.6652, "step": 7750 }, { "epoch": 0.9523282958594422, "grad_norm": 1.0695277123321518, "learning_rate": 2.0720812381238131e-07, "loss": 0.5386, "step": 7751 }, { "epoch": 0.9524511610762993, "grad_norm": 1.2304844276253297, "learning_rate": 2.0614386713353696e-07, "loss": 0.5952, "step": 7752 }, { "epoch": 0.9525740262931565, "grad_norm": 1.0126325743751725, "learning_rate": 2.0508233168858749e-07, "loss": 0.6165, "step": 7753 }, { "epoch": 0.9526968915100135, "grad_norm": 1.2989811539676857, "learning_rate": 2.040235176727967e-07, "loss": 0.5372, "step": 7754 }, { "epoch": 0.9528197567268706, "grad_norm": 1.1930240723499068, "learning_rate": 2.0296742528092216e-07, "loss": 0.5509, "step": 7755 }, { "epoch": 0.9529426219437277, "grad_norm": 1.2895261776846862, "learning_rate": 2.0191405470722847e-07, "loss": 0.5655, "step": 7756 }, { "epoch": 0.9530654871605848, "grad_norm": 1.5238762000936108, "learning_rate": 2.008634061454756e-07, "loss": 0.5791, "step": 7757 }, { "epoch": 0.9531883523774419, "grad_norm": 1.2362623829606096, "learning_rate": 1.9981547978892234e-07, "loss": 0.6902, "step": 7758 }, { "epoch": 0.953311217594299, "grad_norm": 1.0647834371014968, "learning_rate": 1.9877027583032947e-07, "loss": 0.5089, "step": 7759 }, { "epoch": 0.9534340828111562, "grad_norm": 1.098431183844803, "learning_rate": 1.9772779446195488e-07, "loss": 0.5215, "step": 7760 }, { "epoch": 0.9535569480280133, "grad_norm": 1.1371855354091476, "learning_rate": 1.966880358755585e-07, "loss": 0.6419, "step": 7761 }, { "epoch": 0.9536798132448704, "grad_norm": 1.0222149762098098, "learning_rate": 1.9565100026239237e-07, "loss": 0.5949, "step": 7762 }, { "epoch": 0.9538026784617275, "grad_norm": 1.3273189733477422, "learning_rate": 1.9461668781321717e-07, "loss": 0.5318, "step": 7763 }, { "epoch": 0.9539255436785846, "grad_norm": 1.1809587661422711, "learning_rate": 1.9358509871828577e-07, "loss": 0.5696, "step": 7764 }, { "epoch": 0.9540484088954417, "grad_norm": 1.2833313356330704, "learning_rate": 1.925562331673514e-07, "loss": 0.4753, "step": 7765 }, { "epoch": 0.9541712741122989, "grad_norm": 1.180651274838312, "learning_rate": 1.9153009134966926e-07, "loss": 0.5781, "step": 7766 }, { "epoch": 0.954294139329156, "grad_norm": 1.2833201169043948, "learning_rate": 1.905066734539884e-07, "loss": 0.4828, "step": 7767 }, { "epoch": 0.954417004546013, "grad_norm": 1.0623706044866086, "learning_rate": 1.894859796685633e-07, "loss": 0.6133, "step": 7768 }, { "epoch": 0.9545398697628701, "grad_norm": 1.462711857768134, "learning_rate": 1.884680101811437e-07, "loss": 0.5146, "step": 7769 }, { "epoch": 0.9546627349797272, "grad_norm": 1.193002284134927, "learning_rate": 1.8745276517897647e-07, "loss": 0.6401, "step": 7770 }, { "epoch": 0.9547856001965843, "grad_norm": 1.2366251851023828, "learning_rate": 1.8644024484880894e-07, "loss": 0.5428, "step": 7771 }, { "epoch": 0.9549084654134414, "grad_norm": 1.157281241312102, "learning_rate": 1.8543044937689213e-07, "loss": 0.6449, "step": 7772 }, { "epoch": 0.9550313306302985, "grad_norm": 1.1095978004926672, "learning_rate": 1.8442337894896577e-07, "loss": 0.5514, "step": 7773 }, { "epoch": 0.9551541958471557, "grad_norm": 1.131583384029103, "learning_rate": 1.8341903375027836e-07, "loss": 0.5175, "step": 7774 }, { "epoch": 0.9552770610640128, "grad_norm": 1.3994685950103056, "learning_rate": 1.8241741396557044e-07, "loss": 0.5701, "step": 7775 }, { "epoch": 0.9553999262808699, "grad_norm": 1.149376941646617, "learning_rate": 1.8141851977908298e-07, "loss": 0.633, "step": 7776 }, { "epoch": 0.955522791497727, "grad_norm": 1.128274504291997, "learning_rate": 1.804223513745573e-07, "loss": 0.6017, "step": 7777 }, { "epoch": 0.9556456567145841, "grad_norm": 1.1978741058322593, "learning_rate": 1.7942890893523022e-07, "loss": 0.5591, "step": 7778 }, { "epoch": 0.9557685219314412, "grad_norm": 1.2382695270097392, "learning_rate": 1.7843819264384386e-07, "loss": 0.557, "step": 7779 }, { "epoch": 0.9558913871482984, "grad_norm": 1.3979740022736589, "learning_rate": 1.7745020268262746e-07, "loss": 0.599, "step": 7780 }, { "epoch": 0.9560142523651555, "grad_norm": 1.0870985096580672, "learning_rate": 1.7646493923332063e-07, "loss": 0.6419, "step": 7781 }, { "epoch": 0.9561371175820126, "grad_norm": 1.2025885596609573, "learning_rate": 1.7548240247715342e-07, "loss": 0.5946, "step": 7782 }, { "epoch": 0.9562599827988696, "grad_norm": 1.1592519263874321, "learning_rate": 1.745025925948579e-07, "loss": 0.6325, "step": 7783 }, { "epoch": 0.9563828480157267, "grad_norm": 1.467817384611979, "learning_rate": 1.7352550976666493e-07, "loss": 0.5258, "step": 7784 }, { "epoch": 0.9565057132325838, "grad_norm": 1.326984590855388, "learning_rate": 1.725511541723007e-07, "loss": 0.6657, "step": 7785 }, { "epoch": 0.9566285784494409, "grad_norm": 1.2537415624659347, "learning_rate": 1.7157952599099192e-07, "loss": 0.4361, "step": 7786 }, { "epoch": 0.956751443666298, "grad_norm": 1.2336630612375346, "learning_rate": 1.7061062540146387e-07, "loss": 0.5907, "step": 7787 }, { "epoch": 0.9568743088831552, "grad_norm": 1.1893502296910492, "learning_rate": 1.6964445258193906e-07, "loss": 0.5646, "step": 7788 }, { "epoch": 0.9569971741000123, "grad_norm": 1.00979372986818, "learning_rate": 1.6868100771014027e-07, "loss": 0.5768, "step": 7789 }, { "epoch": 0.9571200393168694, "grad_norm": 1.4393506909322724, "learning_rate": 1.677202909632841e-07, "loss": 0.5672, "step": 7790 }, { "epoch": 0.9572429045337265, "grad_norm": 1.0784113284944765, "learning_rate": 1.6676230251809088e-07, "loss": 0.524, "step": 7791 }, { "epoch": 0.9573657697505836, "grad_norm": 0.9169108804801552, "learning_rate": 1.6580704255077295e-07, "loss": 0.7061, "step": 7792 }, { "epoch": 0.9574886349674407, "grad_norm": 1.2076275878828486, "learning_rate": 1.6485451123704974e-07, "loss": 0.5377, "step": 7793 }, { "epoch": 0.9576115001842979, "grad_norm": 0.9689551870524671, "learning_rate": 1.6390470875212615e-07, "loss": 0.5424, "step": 7794 }, { "epoch": 0.957734365401155, "grad_norm": 1.1978926816427216, "learning_rate": 1.6295763527071906e-07, "loss": 0.601, "step": 7795 }, { "epoch": 0.9578572306180121, "grad_norm": 1.129254135959243, "learning_rate": 1.6201329096703076e-07, "loss": 0.7094, "step": 7796 }, { "epoch": 0.9579800958348691, "grad_norm": 0.9335342572751679, "learning_rate": 1.6107167601477235e-07, "loss": 0.5668, "step": 7797 }, { "epoch": 0.9581029610517262, "grad_norm": 1.319627733046934, "learning_rate": 1.6013279058714357e-07, "loss": 0.5457, "step": 7798 }, { "epoch": 0.9582258262685833, "grad_norm": 1.1800137571930962, "learning_rate": 1.5919663485684965e-07, "loss": 0.5947, "step": 7799 }, { "epoch": 0.9583486914854404, "grad_norm": 1.2660377373128198, "learning_rate": 1.5826320899608616e-07, "loss": 0.5229, "step": 7800 }, { "epoch": 0.9584715567022976, "grad_norm": 1.0904579243787162, "learning_rate": 1.5733251317655574e-07, "loss": 0.5415, "step": 7801 }, { "epoch": 0.9585944219191547, "grad_norm": 1.2779831951737801, "learning_rate": 1.5640454756945144e-07, "loss": 0.5257, "step": 7802 }, { "epoch": 0.9587172871360118, "grad_norm": 1.3894241237869698, "learning_rate": 1.554793123454651e-07, "loss": 0.5656, "step": 7803 }, { "epoch": 0.9588401523528689, "grad_norm": 1.2782347841819308, "learning_rate": 1.5455680767479053e-07, "loss": 0.4775, "step": 7804 }, { "epoch": 0.958963017569726, "grad_norm": 1.2547011304764095, "learning_rate": 1.5363703372711368e-07, "loss": 0.4802, "step": 7805 }, { "epoch": 0.9590858827865831, "grad_norm": 1.5929000502170487, "learning_rate": 1.5271999067162256e-07, "loss": 0.5811, "step": 7806 }, { "epoch": 0.9592087480034402, "grad_norm": 1.2630847855860625, "learning_rate": 1.5180567867700223e-07, "loss": 0.5442, "step": 7807 }, { "epoch": 0.9593316132202974, "grad_norm": 1.2416497962759796, "learning_rate": 1.5089409791143316e-07, "loss": 0.6059, "step": 7808 }, { "epoch": 0.9594544784371545, "grad_norm": 1.2998795044066904, "learning_rate": 1.4998524854259454e-07, "loss": 0.5373, "step": 7809 }, { "epoch": 0.9595773436540116, "grad_norm": 1.231736791491449, "learning_rate": 1.4907913073766432e-07, "loss": 0.4938, "step": 7810 }, { "epoch": 0.9597002088708687, "grad_norm": 1.208974796872807, "learning_rate": 1.4817574466331586e-07, "loss": 0.5234, "step": 7811 }, { "epoch": 0.9598230740877257, "grad_norm": 1.1224774242808986, "learning_rate": 1.4727509048572118e-07, "loss": 0.631, "step": 7812 }, { "epoch": 0.9599459393045828, "grad_norm": 1.3679978207043557, "learning_rate": 1.4637716837055115e-07, "loss": 0.5635, "step": 7813 }, { "epoch": 0.96006880452144, "grad_norm": 1.2163918254298796, "learning_rate": 1.4548197848297194e-07, "loss": 0.6382, "step": 7814 }, { "epoch": 0.9601916697382971, "grad_norm": 1.0043549220685004, "learning_rate": 1.4458952098764688e-07, "loss": 0.5331, "step": 7815 }, { "epoch": 0.9603145349551542, "grad_norm": 1.0330617932711146, "learning_rate": 1.4369979604873962e-07, "loss": 0.5411, "step": 7816 }, { "epoch": 0.9604374001720113, "grad_norm": 1.2646790002677737, "learning_rate": 1.4281280382990758e-07, "loss": 0.5867, "step": 7817 }, { "epoch": 0.9605602653888684, "grad_norm": 1.4945623973963051, "learning_rate": 1.419285444943086e-07, "loss": 0.5834, "step": 7818 }, { "epoch": 0.9606831306057255, "grad_norm": 1.0294876991311295, "learning_rate": 1.4104701820459588e-07, "loss": 0.6015, "step": 7819 }, { "epoch": 0.9608059958225826, "grad_norm": 1.1897915624463136, "learning_rate": 1.4016822512292138e-07, "loss": 0.5649, "step": 7820 }, { "epoch": 0.9609288610394398, "grad_norm": 1.2220832203509522, "learning_rate": 1.3929216541093083e-07, "loss": 0.4561, "step": 7821 }, { "epoch": 0.9610517262562969, "grad_norm": 1.0416252484187243, "learning_rate": 1.3841883922977194e-07, "loss": 0.5972, "step": 7822 }, { "epoch": 0.961174591473154, "grad_norm": 1.558740683134636, "learning_rate": 1.3754824674008792e-07, "loss": 0.6514, "step": 7823 }, { "epoch": 0.9612974566900111, "grad_norm": 1.1655806424345687, "learning_rate": 1.3668038810201565e-07, "loss": 0.553, "step": 7824 }, { "epoch": 0.9614203219068682, "grad_norm": 1.2271037192766345, "learning_rate": 1.3581526347519414e-07, "loss": 0.5399, "step": 7825 }, { "epoch": 0.9615431871237252, "grad_norm": 1.07035049010988, "learning_rate": 1.3495287301875936e-07, "loss": 0.5396, "step": 7826 }, { "epoch": 0.9616660523405823, "grad_norm": 1.062820416160557, "learning_rate": 1.3409321689133947e-07, "loss": 0.4961, "step": 7827 }, { "epoch": 0.9617889175574394, "grad_norm": 1.103191521914067, "learning_rate": 1.3323629525106295e-07, "loss": 0.6166, "step": 7828 }, { "epoch": 0.9619117827742966, "grad_norm": 1.0478264960751524, "learning_rate": 1.3238210825555542e-07, "loss": 0.6083, "step": 7829 }, { "epoch": 0.9620346479911537, "grad_norm": 1.0203360179324665, "learning_rate": 1.3153065606193948e-07, "loss": 0.5818, "step": 7830 }, { "epoch": 0.9621575132080108, "grad_norm": 1.1661228962540515, "learning_rate": 1.3068193882683488e-07, "loss": 0.5798, "step": 7831 }, { "epoch": 0.9622803784248679, "grad_norm": 1.2853486585145466, "learning_rate": 1.2983595670635507e-07, "loss": 0.5356, "step": 7832 }, { "epoch": 0.962403243641725, "grad_norm": 1.2828433160849324, "learning_rate": 1.2899270985611555e-07, "loss": 0.5182, "step": 7833 }, { "epoch": 0.9625261088585821, "grad_norm": 1.1228234217251525, "learning_rate": 1.281521984312256e-07, "loss": 0.413, "step": 7834 }, { "epoch": 0.9626489740754393, "grad_norm": 1.259596506910912, "learning_rate": 1.2731442258629156e-07, "loss": 0.5704, "step": 7835 }, { "epoch": 0.9627718392922964, "grad_norm": 1.0664921966998073, "learning_rate": 1.2647938247541345e-07, "loss": 0.5263, "step": 7836 }, { "epoch": 0.9628947045091535, "grad_norm": 1.0271684271424386, "learning_rate": 1.2564707825219845e-07, "loss": 0.6159, "step": 7837 }, { "epoch": 0.9630175697260106, "grad_norm": 1.2240750363396535, "learning_rate": 1.2481751006973908e-07, "loss": 0.6253, "step": 7838 }, { "epoch": 0.9631404349428677, "grad_norm": 1.2712395672077883, "learning_rate": 1.2399067808062992e-07, "loss": 0.5662, "step": 7839 }, { "epoch": 0.9632633001597248, "grad_norm": 1.1310653029322084, "learning_rate": 1.23166582436961e-07, "loss": 0.4871, "step": 7840 }, { "epoch": 0.9633861653765818, "grad_norm": 1.138013784386157, "learning_rate": 1.2234522329031773e-07, "loss": 0.5243, "step": 7841 }, { "epoch": 0.963509030593439, "grad_norm": 1.6357193906220397, "learning_rate": 1.2152660079178923e-07, "loss": 0.5472, "step": 7842 }, { "epoch": 0.9636318958102961, "grad_norm": 1.01002401548189, "learning_rate": 1.2071071509194842e-07, "loss": 0.5062, "step": 7843 }, { "epoch": 0.9637547610271532, "grad_norm": 1.183643709581267, "learning_rate": 1.1989756634087856e-07, "loss": 0.534, "step": 7844 }, { "epoch": 0.9638776262440103, "grad_norm": 1.4632212109028846, "learning_rate": 1.1908715468815002e-07, "loss": 0.5903, "step": 7845 }, { "epoch": 0.9640004914608674, "grad_norm": 1.2603680441770666, "learning_rate": 1.1827948028283353e-07, "loss": 0.6708, "step": 7846 }, { "epoch": 0.9641233566777245, "grad_norm": 0.9238407645808355, "learning_rate": 1.174745432734936e-07, "loss": 0.6333, "step": 7847 }, { "epoch": 0.9642462218945816, "grad_norm": 1.435427910260267, "learning_rate": 1.166723438081968e-07, "loss": 0.5737, "step": 7848 }, { "epoch": 0.9643690871114388, "grad_norm": 1.3445081587757026, "learning_rate": 1.1587288203450008e-07, "loss": 0.4705, "step": 7849 }, { "epoch": 0.9644919523282959, "grad_norm": 1.263315501445857, "learning_rate": 1.1507615809945915e-07, "loss": 0.6518, "step": 7850 }, { "epoch": 0.964614817545153, "grad_norm": 1.8432322424374505, "learning_rate": 1.1428217214962677e-07, "loss": 0.6743, "step": 7851 }, { "epoch": 0.9647376827620101, "grad_norm": 1.0715337610122606, "learning_rate": 1.1349092433105279e-07, "loss": 0.5433, "step": 7852 }, { "epoch": 0.9648605479788672, "grad_norm": 1.1324431929266474, "learning_rate": 1.1270241478927912e-07, "loss": 0.7205, "step": 7853 }, { "epoch": 0.9649834131957243, "grad_norm": 1.2023922937365075, "learning_rate": 1.1191664366934973e-07, "loss": 0.5672, "step": 7854 }, { "epoch": 0.9651062784125815, "grad_norm": 1.4412746126662541, "learning_rate": 1.1113361111580067e-07, "loss": 0.5859, "step": 7855 }, { "epoch": 0.9652291436294385, "grad_norm": 1.4758271084433805, "learning_rate": 1.1035331727266673e-07, "loss": 0.585, "step": 7856 }, { "epoch": 0.9653520088462956, "grad_norm": 1.3513443958017548, "learning_rate": 1.095757622834781e-07, "loss": 0.5381, "step": 7857 }, { "epoch": 0.9654748740631527, "grad_norm": 1.0897966685427745, "learning_rate": 1.088009462912587e-07, "loss": 0.5588, "step": 7858 }, { "epoch": 0.9655977392800098, "grad_norm": 1.0221207644919628, "learning_rate": 1.0802886943853285e-07, "loss": 0.5543, "step": 7859 }, { "epoch": 0.9657206044968669, "grad_norm": 1.2854865583545483, "learning_rate": 1.0725953186731863e-07, "loss": 0.4937, "step": 7860 }, { "epoch": 0.965843469713724, "grad_norm": 1.2437548804979761, "learning_rate": 1.0649293371913115e-07, "loss": 0.53, "step": 7861 }, { "epoch": 0.9659663349305811, "grad_norm": 1.3344262165983112, "learning_rate": 1.0572907513498097e-07, "loss": 0.467, "step": 7862 }, { "epoch": 0.9660892001474383, "grad_norm": 1.2797513428434228, "learning_rate": 1.0496795625537403e-07, "loss": 0.5429, "step": 7863 }, { "epoch": 0.9662120653642954, "grad_norm": 1.595350582486819, "learning_rate": 1.0420957722031333e-07, "loss": 0.5748, "step": 7864 }, { "epoch": 0.9663349305811525, "grad_norm": 1.0069494447636738, "learning_rate": 1.0345393816929893e-07, "loss": 0.5515, "step": 7865 }, { "epoch": 0.9664577957980096, "grad_norm": 1.0030442090378906, "learning_rate": 1.0270103924132467e-07, "loss": 0.54, "step": 7866 }, { "epoch": 0.9665806610148667, "grad_norm": 1.1874198432200562, "learning_rate": 1.0195088057488311e-07, "loss": 0.4029, "step": 7867 }, { "epoch": 0.9667035262317238, "grad_norm": 1.0127673890376376, "learning_rate": 1.0120346230795884e-07, "loss": 0.5571, "step": 7868 }, { "epoch": 0.966826391448581, "grad_norm": 1.0434956882061928, "learning_rate": 1.0045878457803692e-07, "loss": 0.5062, "step": 7869 }, { "epoch": 0.966949256665438, "grad_norm": 1.0843732012955516, "learning_rate": 9.971684752209276e-08, "loss": 0.5623, "step": 7870 }, { "epoch": 0.9670721218822951, "grad_norm": 0.9709289290507604, "learning_rate": 9.897765127660386e-08, "loss": 0.6182, "step": 7871 }, { "epoch": 0.9671949870991522, "grad_norm": 1.15068633244589, "learning_rate": 9.824119597753811e-08, "loss": 0.5617, "step": 7872 }, { "epoch": 0.9673178523160093, "grad_norm": 1.1685937495762564, "learning_rate": 9.75074817603655e-08, "loss": 0.6076, "step": 7873 }, { "epoch": 0.9674407175328664, "grad_norm": 1.106623121785899, "learning_rate": 9.677650876004307e-08, "loss": 0.5953, "step": 7874 }, { "epoch": 0.9675635827497235, "grad_norm": 1.149518495285776, "learning_rate": 9.604827711103326e-08, "loss": 0.4912, "step": 7875 }, { "epoch": 0.9676864479665807, "grad_norm": 1.1651617580859603, "learning_rate": 9.532278694728557e-08, "loss": 0.5402, "step": 7876 }, { "epoch": 0.9678093131834378, "grad_norm": 1.1016711358611102, "learning_rate": 9.460003840225162e-08, "loss": 0.5845, "step": 7877 }, { "epoch": 0.9679321784002949, "grad_norm": 1.058526545480281, "learning_rate": 9.388003160887503e-08, "loss": 0.6663, "step": 7878 }, { "epoch": 0.968055043617152, "grad_norm": 1.1322672398107858, "learning_rate": 9.316276669959822e-08, "loss": 0.5523, "step": 7879 }, { "epoch": 0.9681779088340091, "grad_norm": 1.2170712987449457, "learning_rate": 9.244824380635564e-08, "loss": 0.51, "step": 7880 }, { "epoch": 0.9683007740508662, "grad_norm": 1.138276174872761, "learning_rate": 9.173646306058048e-08, "loss": 0.5533, "step": 7881 }, { "epoch": 0.9684236392677233, "grad_norm": 1.4696893534943545, "learning_rate": 9.102742459319802e-08, "loss": 0.5779, "step": 7882 }, { "epoch": 0.9685465044845805, "grad_norm": 1.2321755547085937, "learning_rate": 9.032112853463393e-08, "loss": 0.602, "step": 7883 }, { "epoch": 0.9686693697014376, "grad_norm": 1.480143059616058, "learning_rate": 8.961757501480595e-08, "loss": 0.477, "step": 7884 }, { "epoch": 0.9687922349182946, "grad_norm": 1.3018568987357602, "learning_rate": 8.891676416312722e-08, "loss": 0.6656, "step": 7885 }, { "epoch": 0.9689151001351517, "grad_norm": 1.3736015772222996, "learning_rate": 8.82186961085063e-08, "loss": 0.5592, "step": 7886 }, { "epoch": 0.9690379653520088, "grad_norm": 1.06676897312019, "learning_rate": 8.752337097935215e-08, "loss": 0.6257, "step": 7887 }, { "epoch": 0.9691608305688659, "grad_norm": 1.1760388033941362, "learning_rate": 8.683078890356245e-08, "loss": 0.675, "step": 7888 }, { "epoch": 0.969283695785723, "grad_norm": 1.3283605240580882, "learning_rate": 8.614095000853361e-08, "loss": 0.4837, "step": 7889 }, { "epoch": 0.9694065610025802, "grad_norm": 1.2929912986975878, "learning_rate": 8.545385442115749e-08, "loss": 0.521, "step": 7890 }, { "epoch": 0.9695294262194373, "grad_norm": 1.0361170708801757, "learning_rate": 8.476950226782131e-08, "loss": 0.5212, "step": 7891 }, { "epoch": 0.9696522914362944, "grad_norm": 1.5784468490084351, "learning_rate": 8.408789367440606e-08, "loss": 0.5681, "step": 7892 }, { "epoch": 0.9697751566531515, "grad_norm": 1.0596577671868384, "learning_rate": 8.340902876628809e-08, "loss": 0.5127, "step": 7893 }, { "epoch": 0.9698980218700086, "grad_norm": 1.098563537368093, "learning_rate": 8.273290766834252e-08, "loss": 0.6252, "step": 7894 }, { "epoch": 0.9700208870868657, "grad_norm": 1.255153123232875, "learning_rate": 8.20595305049382e-08, "loss": 0.6063, "step": 7895 }, { "epoch": 0.9701437523037229, "grad_norm": 1.1012402399947991, "learning_rate": 8.138889739993604e-08, "loss": 0.5111, "step": 7896 }, { "epoch": 0.97026661752058, "grad_norm": 1.178007276576795, "learning_rate": 8.072100847669572e-08, "loss": 0.4986, "step": 7897 }, { "epoch": 0.9703894827374371, "grad_norm": 1.0949534645993448, "learning_rate": 8.005586385807063e-08, "loss": 0.5295, "step": 7898 }, { "epoch": 0.9705123479542941, "grad_norm": 1.242704729613472, "learning_rate": 7.93934636664112e-08, "loss": 0.5907, "step": 7899 }, { "epoch": 0.9706352131711512, "grad_norm": 1.2234788962066618, "learning_rate": 7.873380802356001e-08, "loss": 0.603, "step": 7900 }, { "epoch": 0.9707580783880083, "grad_norm": 1.3517708866066238, "learning_rate": 7.807689705085663e-08, "loss": 0.6634, "step": 7901 }, { "epoch": 0.9708809436048654, "grad_norm": 1.7638370668463743, "learning_rate": 7.742273086913609e-08, "loss": 0.6677, "step": 7902 }, { "epoch": 0.9710038088217225, "grad_norm": 1.3537622310126303, "learning_rate": 7.677130959872713e-08, "loss": 0.6169, "step": 7903 }, { "epoch": 0.9711266740385797, "grad_norm": 1.0445942288011771, "learning_rate": 7.612263335945724e-08, "loss": 0.5362, "step": 7904 }, { "epoch": 0.9712495392554368, "grad_norm": 1.3524656171439051, "learning_rate": 7.547670227064263e-08, "loss": 0.5442, "step": 7905 }, { "epoch": 0.9713724044722939, "grad_norm": 1.084863996981803, "learning_rate": 7.483351645109993e-08, "loss": 0.4713, "step": 7906 }, { "epoch": 0.971495269689151, "grad_norm": 1.202249386489926, "learning_rate": 7.41930760191395e-08, "loss": 0.5131, "step": 7907 }, { "epoch": 0.9716181349060081, "grad_norm": 1.2916819666004233, "learning_rate": 7.355538109256377e-08, "loss": 0.5977, "step": 7908 }, { "epoch": 0.9717410001228652, "grad_norm": 1.21514774930444, "learning_rate": 7.292043178867558e-08, "loss": 0.5509, "step": 7909 }, { "epoch": 0.9718638653397224, "grad_norm": 1.2734012752189985, "learning_rate": 7.228822822426817e-08, "loss": 0.7077, "step": 7910 }, { "epoch": 0.9719867305565795, "grad_norm": 1.0402355840230186, "learning_rate": 7.165877051563186e-08, "loss": 0.5439, "step": 7911 }, { "epoch": 0.9721095957734366, "grad_norm": 1.1710558085608551, "learning_rate": 7.103205877855067e-08, "loss": 0.4638, "step": 7912 }, { "epoch": 0.9722324609902937, "grad_norm": 1.24515789894153, "learning_rate": 7.040809312830576e-08, "loss": 0.5224, "step": 7913 }, { "epoch": 0.9723553262071507, "grad_norm": 1.0618036784460894, "learning_rate": 6.978687367966862e-08, "loss": 0.6131, "step": 7914 }, { "epoch": 0.9724781914240078, "grad_norm": 1.2546990018641369, "learning_rate": 6.91684005469112e-08, "loss": 0.7781, "step": 7915 }, { "epoch": 0.9726010566408649, "grad_norm": 1.1669641811514635, "learning_rate": 6.855267384379582e-08, "loss": 0.6181, "step": 7916 }, { "epoch": 0.972723921857722, "grad_norm": 1.1762953107896017, "learning_rate": 6.793969368358355e-08, "loss": 0.5334, "step": 7917 }, { "epoch": 0.9728467870745792, "grad_norm": 1.1754956621957804, "learning_rate": 6.732946017902586e-08, "loss": 0.5494, "step": 7918 }, { "epoch": 0.9729696522914363, "grad_norm": 1.2601131627612836, "learning_rate": 6.672197344237296e-08, "loss": 0.6458, "step": 7919 }, { "epoch": 0.9730925175082934, "grad_norm": 1.2555201500510678, "learning_rate": 6.611723358536547e-08, "loss": 0.5598, "step": 7920 }, { "epoch": 0.9732153827251505, "grad_norm": 1.0137792034608388, "learning_rate": 6.551524071924442e-08, "loss": 0.6303, "step": 7921 }, { "epoch": 0.9733382479420076, "grad_norm": 1.1763139350291203, "learning_rate": 6.491599495474288e-08, "loss": 0.5067, "step": 7922 }, { "epoch": 0.9734611131588647, "grad_norm": 1.2708653495468742, "learning_rate": 6.431949640208434e-08, "loss": 0.5651, "step": 7923 }, { "epoch": 0.9735839783757219, "grad_norm": 1.0036587251052789, "learning_rate": 6.372574517099439e-08, "loss": 0.6054, "step": 7924 }, { "epoch": 0.973706843592579, "grad_norm": 1.1031794382653075, "learning_rate": 6.313474137068731e-08, "loss": 0.602, "step": 7925 }, { "epoch": 0.9738297088094361, "grad_norm": 1.2188623832368932, "learning_rate": 6.254648510987616e-08, "loss": 0.6484, "step": 7926 }, { "epoch": 0.9739525740262932, "grad_norm": 1.3875938360138016, "learning_rate": 6.196097649676768e-08, "loss": 0.4916, "step": 7927 }, { "epoch": 0.9740754392431502, "grad_norm": 1.0654032931635287, "learning_rate": 6.13782156390591e-08, "loss": 0.442, "step": 7928 }, { "epoch": 0.9741983044600073, "grad_norm": 1.169409280054631, "learning_rate": 6.079820264394797e-08, "loss": 0.6159, "step": 7929 }, { "epoch": 0.9743211696768644, "grad_norm": 1.0792372220021367, "learning_rate": 6.022093761812398e-08, "loss": 0.4995, "step": 7930 }, { "epoch": 0.9744440348937216, "grad_norm": 1.1128586648601857, "learning_rate": 5.964642066776882e-08, "loss": 0.5698, "step": 7931 }, { "epoch": 0.9745669001105787, "grad_norm": 1.2528218279451848, "learning_rate": 5.907465189856465e-08, "loss": 0.5263, "step": 7932 }, { "epoch": 0.9746897653274358, "grad_norm": 1.2244366225352308, "learning_rate": 5.8505631415682325e-08, "loss": 0.5017, "step": 7933 }, { "epoch": 0.9748126305442929, "grad_norm": 1.2539076609290896, "learning_rate": 5.7939359323791465e-08, "loss": 0.6105, "step": 7934 }, { "epoch": 0.97493549576115, "grad_norm": 1.1489300293536928, "learning_rate": 5.737583572705041e-08, "loss": 0.6399, "step": 7935 }, { "epoch": 0.9750583609780071, "grad_norm": 1.1700725300190982, "learning_rate": 5.681506072911957e-08, "loss": 0.6184, "step": 7936 }, { "epoch": 0.9751812261948642, "grad_norm": 1.235799962506757, "learning_rate": 5.6257034433148115e-08, "loss": 0.4631, "step": 7937 }, { "epoch": 0.9753040914117214, "grad_norm": 1.0913657431826391, "learning_rate": 5.570175694178226e-08, "loss": 0.5921, "step": 7938 }, { "epoch": 0.9754269566285785, "grad_norm": 1.0760461238397525, "learning_rate": 5.5149228357160296e-08, "loss": 0.6713, "step": 7939 }, { "epoch": 0.9755498218454356, "grad_norm": 1.2242991904488805, "learning_rate": 5.459944878091761e-08, "loss": 0.5842, "step": 7940 }, { "epoch": 0.9756726870622927, "grad_norm": 1.2327749684788447, "learning_rate": 5.405241831418162e-08, "loss": 0.4756, "step": 7941 }, { "epoch": 0.9757955522791498, "grad_norm": 1.068152234292588, "learning_rate": 5.350813705757518e-08, "loss": 0.603, "step": 7942 }, { "epoch": 0.9759184174960068, "grad_norm": 1.1897083274468803, "learning_rate": 5.2966605111214874e-08, "loss": 0.4709, "step": 7943 }, { "epoch": 0.976041282712864, "grad_norm": 1.313111495341666, "learning_rate": 5.242782257471268e-08, "loss": 0.4953, "step": 7944 }, { "epoch": 0.9761641479297211, "grad_norm": 1.0800561987648858, "learning_rate": 5.189178954717599e-08, "loss": 0.6013, "step": 7945 }, { "epoch": 0.9762870131465782, "grad_norm": 1.3389974771336113, "learning_rate": 5.135850612720094e-08, "loss": 0.5721, "step": 7946 }, { "epoch": 0.9764098783634353, "grad_norm": 1.3572087239177455, "learning_rate": 5.082797241288406e-08, "loss": 0.671, "step": 7947 }, { "epoch": 0.9765327435802924, "grad_norm": 1.3453947114069504, "learning_rate": 5.030018850181228e-08, "loss": 0.6102, "step": 7948 }, { "epoch": 0.9766556087971495, "grad_norm": 1.3591428248387296, "learning_rate": 4.977515449106962e-08, "loss": 0.5486, "step": 7949 }, { "epoch": 0.9767784740140066, "grad_norm": 0.9631579693292484, "learning_rate": 4.925287047723048e-08, "loss": 0.5603, "step": 7950 }, { "epoch": 0.9769013392308638, "grad_norm": 1.2568657638678462, "learning_rate": 4.8733336556368024e-08, "loss": 0.514, "step": 7951 }, { "epoch": 0.9770242044477209, "grad_norm": 1.175602890883501, "learning_rate": 4.82165528240458e-08, "loss": 0.5091, "step": 7952 }, { "epoch": 0.977147069664578, "grad_norm": 1.0622539234544548, "learning_rate": 4.770251937532277e-08, "loss": 0.5107, "step": 7953 }, { "epoch": 0.9772699348814351, "grad_norm": 1.0745401143223374, "learning_rate": 4.719123630475164e-08, "loss": 0.545, "step": 7954 }, { "epoch": 0.9773928000982922, "grad_norm": 1.5821311719041475, "learning_rate": 4.66827037063805e-08, "loss": 0.6015, "step": 7955 }, { "epoch": 0.9775156653151493, "grad_norm": 1.0583656333207705, "learning_rate": 4.6176921673751204e-08, "loss": 0.6337, "step": 7956 }, { "epoch": 0.9776385305320064, "grad_norm": 1.1070197926275904, "learning_rate": 4.567389029989599e-08, "loss": 0.617, "step": 7957 }, { "epoch": 0.9777613957488634, "grad_norm": 1.0679210824532126, "learning_rate": 4.517360967734918e-08, "loss": 0.5336, "step": 7958 }, { "epoch": 0.9778842609657206, "grad_norm": 1.3410533541143352, "learning_rate": 4.467607989812883e-08, "loss": 0.6369, "step": 7959 }, { "epoch": 0.9780071261825777, "grad_norm": 1.030391663776545, "learning_rate": 4.418130105375673e-08, "loss": 0.6517, "step": 7960 }, { "epoch": 0.9781299913994348, "grad_norm": 0.9959157031611443, "learning_rate": 4.368927323524174e-08, "loss": 0.5897, "step": 7961 }, { "epoch": 0.9782528566162919, "grad_norm": 1.3050964357223351, "learning_rate": 4.3199996533089815e-08, "loss": 0.5397, "step": 7962 }, { "epoch": 0.978375721833149, "grad_norm": 0.8896088758523613, "learning_rate": 4.271347103730061e-08, "loss": 0.6142, "step": 7963 }, { "epoch": 0.9784985870500061, "grad_norm": 1.2023294808831217, "learning_rate": 4.222969683736755e-08, "loss": 0.3741, "step": 7964 }, { "epoch": 0.9786214522668633, "grad_norm": 1.1933642361404244, "learning_rate": 4.1748674022276114e-08, "loss": 0.5069, "step": 7965 }, { "epoch": 0.9787443174837204, "grad_norm": 1.2872590561845276, "learning_rate": 4.127040268050886e-08, "loss": 0.5833, "step": 7966 }, { "epoch": 0.9788671827005775, "grad_norm": 1.1323445563720687, "learning_rate": 4.0794882900040406e-08, "loss": 0.5304, "step": 7967 }, { "epoch": 0.9789900479174346, "grad_norm": 1.2654883498556326, "learning_rate": 4.032211476833914e-08, "loss": 0.5629, "step": 7968 }, { "epoch": 0.9791129131342917, "grad_norm": 1.2941472860593901, "learning_rate": 3.985209837236881e-08, "loss": 0.517, "step": 7969 }, { "epoch": 0.9792357783511488, "grad_norm": 1.2425996545586697, "learning_rate": 3.93848337985836e-08, "loss": 0.5414, "step": 7970 }, { "epoch": 0.979358643568006, "grad_norm": 1.3570834239302219, "learning_rate": 3.892032113293642e-08, "loss": 0.7001, "step": 7971 }, { "epoch": 0.979481508784863, "grad_norm": 1.3417121385509694, "learning_rate": 3.845856046086893e-08, "loss": 0.4966, "step": 7972 }, { "epoch": 0.9796043740017201, "grad_norm": 1.08750849704422, "learning_rate": 3.799955186732151e-08, "loss": 0.6133, "step": 7973 }, { "epoch": 0.9797272392185772, "grad_norm": 1.3516878253637432, "learning_rate": 3.7543295436723304e-08, "loss": 0.6282, "step": 7974 }, { "epoch": 0.9798501044354343, "grad_norm": 1.2026392993506154, "learning_rate": 3.7089791253002156e-08, "loss": 0.5349, "step": 7975 }, { "epoch": 0.9799729696522914, "grad_norm": 1.7867146346854232, "learning_rate": 3.6639039399574694e-08, "loss": 0.5569, "step": 7976 }, { "epoch": 0.9800958348691485, "grad_norm": 1.1463882722807537, "learning_rate": 3.6191039959356245e-08, "loss": 0.5784, "step": 7977 }, { "epoch": 0.9802187000860056, "grad_norm": 1.5999679377963991, "learning_rate": 3.574579301475256e-08, "loss": 0.6132, "step": 7978 }, { "epoch": 0.9803415653028628, "grad_norm": 1.158497689802478, "learning_rate": 3.530329864766313e-08, "loss": 0.6096, "step": 7979 }, { "epoch": 0.9804644305197199, "grad_norm": 1.0769792176614135, "learning_rate": 3.4863556939482846e-08, "loss": 0.5246, "step": 7980 }, { "epoch": 0.980587295736577, "grad_norm": 0.9578720939662675, "learning_rate": 3.4426567971097e-08, "loss": 0.6509, "step": 7981 }, { "epoch": 0.9807101609534341, "grad_norm": 1.2122610858541727, "learning_rate": 3.39923318228913e-08, "loss": 0.581, "step": 7982 }, { "epoch": 0.9808330261702912, "grad_norm": 1.3255379578308624, "learning_rate": 3.3560848574736845e-08, "loss": 0.5788, "step": 7983 }, { "epoch": 0.9809558913871483, "grad_norm": 1.1930225220959974, "learning_rate": 3.313211830600349e-08, "loss": 0.5971, "step": 7984 }, { "epoch": 0.9810787566040055, "grad_norm": 1.008919057693785, "learning_rate": 3.270614109555314e-08, "loss": 0.5792, "step": 7985 }, { "epoch": 0.9812016218208626, "grad_norm": 1.116604457057502, "learning_rate": 3.228291702174313e-08, "loss": 0.5848, "step": 7986 }, { "epoch": 0.9813244870377196, "grad_norm": 1.4768299633623003, "learning_rate": 3.1862446162421176e-08, "loss": 0.5909, "step": 7987 }, { "epoch": 0.9814473522545767, "grad_norm": 0.9052940825873653, "learning_rate": 3.144472859493042e-08, "loss": 0.6401, "step": 7988 }, { "epoch": 0.9815702174714338, "grad_norm": 1.3580090879637217, "learning_rate": 3.1029764396106055e-08, "loss": 0.676, "step": 7989 }, { "epoch": 0.9816930826882909, "grad_norm": 1.5065730431542972, "learning_rate": 3.061755364228036e-08, "loss": 0.6013, "step": 7990 }, { "epoch": 0.981815947905148, "grad_norm": 1.0822583020195302, "learning_rate": 3.020809640927602e-08, "loss": 0.6047, "step": 7991 }, { "epoch": 0.9819388131220051, "grad_norm": 1.1678185295095227, "learning_rate": 2.9801392772409453e-08, "loss": 0.5539, "step": 7992 }, { "epoch": 0.9820616783388623, "grad_norm": 1.3103490886477447, "learning_rate": 2.9397442806492482e-08, "loss": 0.5965, "step": 7993 }, { "epoch": 0.9821845435557194, "grad_norm": 1.3160897809879666, "learning_rate": 2.8996246585827335e-08, "loss": 0.4586, "step": 7994 }, { "epoch": 0.9823074087725765, "grad_norm": 1.059804929204877, "learning_rate": 2.859780418421165e-08, "loss": 0.5156, "step": 7995 }, { "epoch": 0.9824302739894336, "grad_norm": 1.2021995691119096, "learning_rate": 2.8202115674938468e-08, "loss": 0.5525, "step": 7996 }, { "epoch": 0.9825531392062907, "grad_norm": 1.122114966080332, "learning_rate": 2.7809181130789562e-08, "loss": 0.6372, "step": 7997 }, { "epoch": 0.9826760044231478, "grad_norm": 1.3181658357819488, "learning_rate": 2.7419000624043787e-08, "loss": 0.6736, "step": 7998 }, { "epoch": 0.982798869640005, "grad_norm": 1.0298937972038045, "learning_rate": 2.7031574226472066e-08, "loss": 0.6448, "step": 7999 }, { "epoch": 0.9829217348568621, "grad_norm": 1.2091202816441327, "learning_rate": 2.6646902009339057e-08, "loss": 0.6068, "step": 8000 }, { "epoch": 0.9830446000737191, "grad_norm": 1.146241201450024, "learning_rate": 2.626498404340316e-08, "loss": 0.5511, "step": 8001 }, { "epoch": 0.9831674652905762, "grad_norm": 1.5853153464112946, "learning_rate": 2.5885820398916516e-08, "loss": 0.5442, "step": 8002 }, { "epoch": 0.9832903305074333, "grad_norm": 1.2505045186058528, "learning_rate": 2.5509411145621665e-08, "loss": 0.5351, "step": 8003 }, { "epoch": 0.9834131957242904, "grad_norm": 1.3015526576547096, "learning_rate": 2.5135756352756555e-08, "loss": 0.4487, "step": 8004 }, { "epoch": 0.9835360609411475, "grad_norm": 1.3422090225823184, "learning_rate": 2.4764856089054544e-08, "loss": 0.4901, "step": 8005 }, { "epoch": 0.9836589261580047, "grad_norm": 1.0367906110222358, "learning_rate": 2.4396710422739387e-08, "loss": 0.5757, "step": 8006 }, { "epoch": 0.9837817913748618, "grad_norm": 1.2785086612035967, "learning_rate": 2.4031319421530252e-08, "loss": 0.5972, "step": 8007 }, { "epoch": 0.9839046565917189, "grad_norm": 1.1801608054858275, "learning_rate": 2.366868315263504e-08, "loss": 0.4894, "step": 8008 }, { "epoch": 0.984027521808576, "grad_norm": 1.2047013576955874, "learning_rate": 2.330880168276206e-08, "loss": 0.5826, "step": 8009 }, { "epoch": 0.9841503870254331, "grad_norm": 1.3211801042268811, "learning_rate": 2.2951675078108357e-08, "loss": 0.5803, "step": 8010 }, { "epoch": 0.9842732522422902, "grad_norm": 1.5036403353921273, "learning_rate": 2.2597303404363058e-08, "loss": 0.585, "step": 8011 }, { "epoch": 0.9843961174591473, "grad_norm": 1.0847191609711093, "learning_rate": 2.2245686726712346e-08, "loss": 0.6442, "step": 8012 }, { "epoch": 0.9845189826760045, "grad_norm": 1.197202403882334, "learning_rate": 2.1896825109834486e-08, "loss": 0.6062, "step": 8013 }, { "epoch": 0.9846418478928616, "grad_norm": 1.045142255260294, "learning_rate": 2.1550718617898145e-08, "loss": 0.5121, "step": 8014 }, { "epoch": 0.9847647131097187, "grad_norm": 1.2950442699398432, "learning_rate": 2.120736731456907e-08, "loss": 0.4944, "step": 8015 }, { "epoch": 0.9848875783265757, "grad_norm": 1.4577182925995809, "learning_rate": 2.0866771263003403e-08, "loss": 0.5893, "step": 8016 }, { "epoch": 0.9850104435434328, "grad_norm": 0.9954081424058401, "learning_rate": 2.0528930525852697e-08, "loss": 0.6524, "step": 8017 }, { "epoch": 0.9851333087602899, "grad_norm": 1.2474771357371692, "learning_rate": 2.0193845165258906e-08, "loss": 0.6423, "step": 8018 }, { "epoch": 0.985256173977147, "grad_norm": 1.1105295620082798, "learning_rate": 1.9861515242861062e-08, "loss": 0.5478, "step": 8019 }, { "epoch": 0.9853790391940042, "grad_norm": 1.7241847412940476, "learning_rate": 1.953194081978693e-08, "loss": 0.6548, "step": 8020 }, { "epoch": 0.9855019044108613, "grad_norm": 1.0615741785387525, "learning_rate": 1.9205121956661352e-08, "loss": 0.5874, "step": 8021 }, { "epoch": 0.9856247696277184, "grad_norm": 1.2052386574769773, "learning_rate": 1.8881058713599577e-08, "loss": 0.4678, "step": 8022 }, { "epoch": 0.9857476348445755, "grad_norm": 1.352163825361923, "learning_rate": 1.855975115021058e-08, "loss": 0.5425, "step": 8023 }, { "epoch": 0.9858705000614326, "grad_norm": 1.1690527519829623, "learning_rate": 1.824119932559709e-08, "loss": 0.5392, "step": 8024 }, { "epoch": 0.9859933652782897, "grad_norm": 1.101156659463525, "learning_rate": 1.792540329835557e-08, "loss": 0.5938, "step": 8025 }, { "epoch": 0.9861162304951469, "grad_norm": 1.0497699161702165, "learning_rate": 1.7612363126572883e-08, "loss": 0.4782, "step": 8026 }, { "epoch": 0.986239095712004, "grad_norm": 1.2807776718229729, "learning_rate": 1.730207886783297e-08, "loss": 0.6416, "step": 8027 }, { "epoch": 0.9863619609288611, "grad_norm": 1.3824338193359418, "learning_rate": 1.699455057920851e-08, "loss": 0.6204, "step": 8028 }, { "epoch": 0.9864848261457182, "grad_norm": 1.3701888639593096, "learning_rate": 1.6689778317269254e-08, "loss": 0.4473, "step": 8029 }, { "epoch": 0.9866076913625752, "grad_norm": 1.291171584184624, "learning_rate": 1.6387762138075358e-08, "loss": 0.5684, "step": 8030 }, { "epoch": 0.9867305565794323, "grad_norm": 1.4069802423509112, "learning_rate": 1.6088502097179047e-08, "loss": 0.6201, "step": 8031 }, { "epoch": 0.9868534217962894, "grad_norm": 1.461091691568767, "learning_rate": 1.5791998249629625e-08, "loss": 0.532, "step": 8032 }, { "epoch": 0.9869762870131465, "grad_norm": 1.4011663285862548, "learning_rate": 1.5498250649965128e-08, "loss": 0.494, "step": 8033 }, { "epoch": 0.9870991522300037, "grad_norm": 1.2151867134185521, "learning_rate": 1.520725935222067e-08, "loss": 0.5002, "step": 8034 }, { "epoch": 0.9872220174468608, "grad_norm": 1.1515237826510736, "learning_rate": 1.49190244099201e-08, "loss": 0.4859, "step": 8035 }, { "epoch": 0.9873448826637179, "grad_norm": 1.03410070250484, "learning_rate": 1.4633545876084342e-08, "loss": 0.5838, "step": 8036 }, { "epoch": 0.987467747880575, "grad_norm": 1.3148321917928756, "learning_rate": 1.4350823803224721e-08, "loss": 0.6352, "step": 8037 }, { "epoch": 0.9875906130974321, "grad_norm": 1.0877545873442767, "learning_rate": 1.4070858243344641e-08, "loss": 0.5652, "step": 8038 }, { "epoch": 0.9877134783142892, "grad_norm": 1.4480697564781828, "learning_rate": 1.3793649247942909e-08, "loss": 0.5722, "step": 8039 }, { "epoch": 0.9878363435311464, "grad_norm": 1.1001578773628062, "learning_rate": 1.3519196868010398e-08, "loss": 0.5908, "step": 8040 }, { "epoch": 0.9879592087480035, "grad_norm": 1.1270821010951588, "learning_rate": 1.3247501154031727e-08, "loss": 0.5378, "step": 8041 }, { "epoch": 0.9880820739648606, "grad_norm": 1.2983532840943826, "learning_rate": 1.297856215598192e-08, "loss": 0.5569, "step": 8042 }, { "epoch": 0.9882049391817177, "grad_norm": 0.946385960171128, "learning_rate": 1.2712379923331407e-08, "loss": 0.5166, "step": 8043 }, { "epoch": 0.9883278043985748, "grad_norm": 1.1529525483906802, "learning_rate": 1.2448954505042686e-08, "loss": 0.5381, "step": 8044 }, { "epoch": 0.9884506696154318, "grad_norm": 1.2930677968355404, "learning_rate": 1.2188285949571998e-08, "loss": 0.6993, "step": 8045 }, { "epoch": 0.9885735348322889, "grad_norm": 1.279952365652824, "learning_rate": 1.1930374304865988e-08, "loss": 0.4988, "step": 8046 }, { "epoch": 0.988696400049146, "grad_norm": 1.255779286030602, "learning_rate": 1.1675219618366706e-08, "loss": 0.6135, "step": 8047 }, { "epoch": 0.9888192652660032, "grad_norm": 1.0888947647310458, "learning_rate": 1.1422821937008276e-08, "loss": 0.6865, "step": 8048 }, { "epoch": 0.9889421304828603, "grad_norm": 1.3028248344196969, "learning_rate": 1.1173181307216896e-08, "loss": 0.6437, "step": 8049 }, { "epoch": 0.9890649956997174, "grad_norm": 1.0345401796797176, "learning_rate": 1.0926297774912497e-08, "loss": 0.463, "step": 8050 }, { "epoch": 0.9891878609165745, "grad_norm": 1.0782471415112382, "learning_rate": 1.0682171385508755e-08, "loss": 0.5508, "step": 8051 }, { "epoch": 0.9893107261334316, "grad_norm": 1.3700187691455787, "learning_rate": 1.0440802183911414e-08, "loss": 0.5773, "step": 8052 }, { "epoch": 0.9894335913502887, "grad_norm": 1.3831200546951379, "learning_rate": 1.0202190214516626e-08, "loss": 0.5857, "step": 8053 }, { "epoch": 0.9895564565671459, "grad_norm": 1.1532807383274042, "learning_rate": 9.966335521215953e-09, "loss": 0.4818, "step": 8054 }, { "epoch": 0.989679321784003, "grad_norm": 1.7486969928729637, "learning_rate": 9.733238147394685e-09, "loss": 0.7503, "step": 8055 }, { "epoch": 0.9898021870008601, "grad_norm": 1.0789812101709106, "learning_rate": 9.502898135930194e-09, "loss": 0.6643, "step": 8056 }, { "epoch": 0.9899250522177172, "grad_norm": 1.256241760451359, "learning_rate": 9.275315529188588e-09, "loss": 0.5815, "step": 8057 }, { "epoch": 0.9900479174345743, "grad_norm": 0.9913749750296283, "learning_rate": 9.050490369036379e-09, "loss": 0.6511, "step": 8058 }, { "epoch": 0.9901707826514313, "grad_norm": 1.193731879382614, "learning_rate": 8.828422696825488e-09, "loss": 0.5444, "step": 8059 }, { "epoch": 0.9902936478682884, "grad_norm": 1.1522595968898106, "learning_rate": 8.609112553406573e-09, "loss": 0.553, "step": 8060 }, { "epoch": 0.9904165130851456, "grad_norm": 1.1922913072739427, "learning_rate": 8.392559979117365e-09, "loss": 0.6307, "step": 8061 }, { "epoch": 0.9905393783020027, "grad_norm": 1.0885113901487542, "learning_rate": 8.178765013792665e-09, "loss": 0.5182, "step": 8062 }, { "epoch": 0.9906622435188598, "grad_norm": 1.059366582473548, "learning_rate": 7.967727696761019e-09, "loss": 0.4716, "step": 8063 }, { "epoch": 0.9907851087357169, "grad_norm": 1.1331354244885037, "learning_rate": 7.759448066836373e-09, "loss": 0.7602, "step": 8064 }, { "epoch": 0.990907973952574, "grad_norm": 1.2511727757546656, "learning_rate": 7.553926162334745e-09, "loss": 0.5693, "step": 8065 }, { "epoch": 0.9910308391694311, "grad_norm": 1.0254268663095505, "learning_rate": 7.351162021059232e-09, "loss": 0.576, "step": 8066 }, { "epoch": 0.9911537043862882, "grad_norm": 1.2346048819449105, "learning_rate": 7.151155680304999e-09, "loss": 0.5562, "step": 8067 }, { "epoch": 0.9912765696031454, "grad_norm": 1.1295040279289164, "learning_rate": 6.953907176864283e-09, "loss": 0.532, "step": 8068 }, { "epoch": 0.9913994348200025, "grad_norm": 1.3487594159123066, "learning_rate": 6.759416547019725e-09, "loss": 0.594, "step": 8069 }, { "epoch": 0.9915223000368596, "grad_norm": 1.141790709939659, "learning_rate": 6.567683826546045e-09, "loss": 0.5862, "step": 8070 }, { "epoch": 0.9916451652537167, "grad_norm": 1.0129926502007005, "learning_rate": 6.37870905071003e-09, "loss": 0.5676, "step": 8071 }, { "epoch": 0.9917680304705738, "grad_norm": 1.2319202432729983, "learning_rate": 6.192492254273874e-09, "loss": 0.5399, "step": 8072 }, { "epoch": 0.9918908956874309, "grad_norm": 1.358428666072059, "learning_rate": 6.009033471491842e-09, "loss": 0.5432, "step": 8073 }, { "epoch": 0.992013760904288, "grad_norm": 1.2703686594197823, "learning_rate": 5.828332736106945e-09, "loss": 0.591, "step": 8074 }, { "epoch": 0.9921366261211451, "grad_norm": 1.2566480489320797, "learning_rate": 5.650390081359258e-09, "loss": 0.5864, "step": 8075 }, { "epoch": 0.9922594913380022, "grad_norm": 1.4061430826771173, "learning_rate": 5.4752055399825975e-09, "loss": 0.6404, "step": 8076 }, { "epoch": 0.9923823565548593, "grad_norm": 1.1273950435925322, "learning_rate": 5.302779144197856e-09, "loss": 0.5398, "step": 8077 }, { "epoch": 0.9925052217717164, "grad_norm": 1.149868913475122, "learning_rate": 5.1331109257229945e-09, "loss": 0.5924, "step": 8078 }, { "epoch": 0.9926280869885735, "grad_norm": 0.894895228330755, "learning_rate": 4.966200915766383e-09, "loss": 0.6267, "step": 8079 }, { "epoch": 0.9927509522054306, "grad_norm": 1.3657698253822583, "learning_rate": 4.802049145031795e-09, "loss": 0.6386, "step": 8080 }, { "epoch": 0.9928738174222878, "grad_norm": 1.3283900160039688, "learning_rate": 4.640655643713409e-09, "loss": 0.5925, "step": 8081 }, { "epoch": 0.9929966826391449, "grad_norm": 1.2846436957105956, "learning_rate": 4.482020441497481e-09, "loss": 0.6135, "step": 8082 }, { "epoch": 0.993119547856002, "grad_norm": 1.229760704485686, "learning_rate": 4.326143567564e-09, "loss": 0.561, "step": 8083 }, { "epoch": 0.9932424130728591, "grad_norm": 1.2561293313153818, "learning_rate": 4.173025050586699e-09, "loss": 0.5363, "step": 8084 }, { "epoch": 0.9933652782897162, "grad_norm": 1.2379941230131533, "learning_rate": 4.022664918729713e-09, "loss": 0.4974, "step": 8085 }, { "epoch": 0.9934881435065733, "grad_norm": 1.1089662326132168, "learning_rate": 3.875063199650919e-09, "loss": 0.6001, "step": 8086 }, { "epoch": 0.9936110087234304, "grad_norm": 0.9836847990763243, "learning_rate": 3.730219920501932e-09, "loss": 0.5979, "step": 8087 }, { "epoch": 0.9937338739402876, "grad_norm": 1.3118670908057515, "learning_rate": 3.5881351079247725e-09, "loss": 0.603, "step": 8088 }, { "epoch": 0.9938567391571446, "grad_norm": 1.1592653635999126, "learning_rate": 3.448808788053537e-09, "loss": 0.5075, "step": 8089 }, { "epoch": 0.9939796043740017, "grad_norm": 1.5547443906199319, "learning_rate": 3.312240986519388e-09, "loss": 0.6175, "step": 8090 }, { "epoch": 0.9941024695908588, "grad_norm": 1.2245728276802659, "learning_rate": 3.1784317284405675e-09, "loss": 0.5673, "step": 8091 }, { "epoch": 0.9942253348077159, "grad_norm": 1.121501165908033, "learning_rate": 3.0473810384323843e-09, "loss": 0.5212, "step": 8092 }, { "epoch": 0.994348200024573, "grad_norm": 1.4546621802910542, "learning_rate": 2.9190889406005562e-09, "loss": 0.7227, "step": 8093 }, { "epoch": 0.9944710652414301, "grad_norm": 1.206607802667171, "learning_rate": 2.7935554585412083e-09, "loss": 0.5336, "step": 8094 }, { "epoch": 0.9945939304582873, "grad_norm": 1.1905174298427603, "learning_rate": 2.6707806153475347e-09, "loss": 0.5954, "step": 8095 }, { "epoch": 0.9947167956751444, "grad_norm": 1.3660613361689262, "learning_rate": 2.5507644336014713e-09, "loss": 0.4851, "step": 8096 }, { "epoch": 0.9948396608920015, "grad_norm": 1.515088390666289, "learning_rate": 2.4335069353820238e-09, "loss": 0.5198, "step": 8097 }, { "epoch": 0.9949625261088586, "grad_norm": 1.2389341530846003, "learning_rate": 2.3190081422569398e-09, "loss": 0.6061, "step": 8098 }, { "epoch": 0.9950853913257157, "grad_norm": 1.3230517596742473, "learning_rate": 2.2072680752843745e-09, "loss": 0.5587, "step": 8099 }, { "epoch": 0.9952082565425728, "grad_norm": 1.273574959961994, "learning_rate": 2.0982867550228822e-09, "loss": 0.6437, "step": 8100 }, { "epoch": 0.99533112175943, "grad_norm": 1.3290024029323153, "learning_rate": 1.9920642015164305e-09, "loss": 0.6371, "step": 8101 }, { "epoch": 0.9954539869762871, "grad_norm": 1.2081423261001714, "learning_rate": 1.8886004343043885e-09, "loss": 0.6055, "step": 8102 }, { "epoch": 0.9955768521931441, "grad_norm": 1.2185807134160314, "learning_rate": 1.7878954724165342e-09, "loss": 0.5698, "step": 8103 }, { "epoch": 0.9956997174100012, "grad_norm": 1.1682743303340581, "learning_rate": 1.6899493343797146e-09, "loss": 0.5024, "step": 8104 }, { "epoch": 0.9958225826268583, "grad_norm": 1.644492469946242, "learning_rate": 1.5947620382095185e-09, "loss": 0.6892, "step": 8105 }, { "epoch": 0.9959454478437154, "grad_norm": 1.1627091033293284, "learning_rate": 1.5023336014152734e-09, "loss": 0.6729, "step": 8106 }, { "epoch": 0.9960683130605725, "grad_norm": 0.9518923304789875, "learning_rate": 1.412664040996714e-09, "loss": 0.5794, "step": 8107 }, { "epoch": 0.9961911782774296, "grad_norm": 1.1649882565587246, "learning_rate": 1.325753373448979e-09, "loss": 0.5122, "step": 8108 }, { "epoch": 0.9963140434942868, "grad_norm": 1.4891176846848169, "learning_rate": 1.2416016147609454e-09, "loss": 0.5772, "step": 8109 }, { "epoch": 0.9964369087111439, "grad_norm": 1.1507961867299497, "learning_rate": 1.160208780408567e-09, "loss": 0.5582, "step": 8110 }, { "epoch": 0.996559773928001, "grad_norm": 1.3381867488167045, "learning_rate": 1.0815748853648666e-09, "loss": 0.6928, "step": 8111 }, { "epoch": 0.9966826391448581, "grad_norm": 1.2678745470480035, "learning_rate": 1.00569994409494e-09, "loss": 0.4518, "step": 8112 }, { "epoch": 0.9968055043617152, "grad_norm": 1.0056908690486248, "learning_rate": 9.325839705542904e-10, "loss": 0.5258, "step": 8113 }, { "epoch": 0.9969283695785723, "grad_norm": 2.6414259662547233, "learning_rate": 8.622269781921599e-10, "loss": 0.6567, "step": 8114 }, { "epoch": 0.9970512347954295, "grad_norm": 1.230066034200606, "learning_rate": 7.946289799515282e-10, "loss": 0.6774, "step": 8115 }, { "epoch": 0.9971741000122866, "grad_norm": 1.2614608407967711, "learning_rate": 7.297899882641179e-10, "loss": 0.5443, "step": 8116 }, { "epoch": 0.9972969652291437, "grad_norm": 1.2970510495321734, "learning_rate": 6.677100150587201e-10, "loss": 0.4728, "step": 8117 }, { "epoch": 0.9974198304460007, "grad_norm": 1.2892208367019529, "learning_rate": 6.083890717545337e-10, "loss": 0.4951, "step": 8118 }, { "epoch": 0.9975426956628578, "grad_norm": 1.2145295710459174, "learning_rate": 5.518271692628308e-10, "loss": 0.5616, "step": 8119 }, { "epoch": 0.9976655608797149, "grad_norm": 1.4451372381912968, "learning_rate": 4.980243179869559e-10, "loss": 0.6648, "step": 8120 }, { "epoch": 0.997788426096572, "grad_norm": 1.5163099524470016, "learning_rate": 4.4698052782399244e-10, "loss": 0.5214, "step": 8121 }, { "epoch": 0.9979112913134291, "grad_norm": 1.5005740637742002, "learning_rate": 3.986958081647618e-10, "loss": 0.6044, "step": 8122 }, { "epoch": 0.9980341565302863, "grad_norm": 1.1093454080536318, "learning_rate": 3.5317016788882773e-10, "loss": 0.5659, "step": 8123 }, { "epoch": 0.9981570217471434, "grad_norm": 1.3680888011948018, "learning_rate": 3.1040361536949223e-10, "loss": 0.5435, "step": 8124 }, { "epoch": 0.9982798869640005, "grad_norm": 1.1747927808944483, "learning_rate": 2.703961584771264e-10, "loss": 0.5416, "step": 8125 }, { "epoch": 0.9984027521808576, "grad_norm": 1.571899306684621, "learning_rate": 2.331478045691782e-10, "loss": 0.6808, "step": 8126 }, { "epoch": 0.9985256173977147, "grad_norm": 1.1213493827740544, "learning_rate": 1.986585604951685e-10, "loss": 0.5923, "step": 8127 }, { "epoch": 0.9986484826145718, "grad_norm": 1.3563344396482295, "learning_rate": 1.6692843260168734e-10, "loss": 0.6233, "step": 8128 }, { "epoch": 0.998771347831429, "grad_norm": 1.0785520360116365, "learning_rate": 1.3795742672406687e-10, "loss": 0.5568, "step": 8129 }, { "epoch": 0.9988942130482861, "grad_norm": 1.2934007494087418, "learning_rate": 1.1174554819137761e-10, "loss": 0.5272, "step": 8130 }, { "epoch": 0.9990170782651432, "grad_norm": 1.0959046373814003, "learning_rate": 8.82928018264284e-11, "loss": 0.547, "step": 8131 }, { "epoch": 0.9991399434820002, "grad_norm": 1.2850966928887688, "learning_rate": 6.759919194077036e-11, "loss": 0.5341, "step": 8132 }, { "epoch": 0.9992628086988573, "grad_norm": 1.1637352576145872, "learning_rate": 4.966472234302355e-11, "loss": 0.6085, "step": 8133 }, { "epoch": 0.9993856739157144, "grad_norm": 1.125407046195776, "learning_rate": 3.4489396332215705e-11, "loss": 0.597, "step": 8134 }, { "epoch": 0.9995085391325715, "grad_norm": 1.7983791458558973, "learning_rate": 2.2073216697782174e-11, "loss": 0.6456, "step": 8135 }, { "epoch": 0.9996314043494287, "grad_norm": 1.0156589430648115, "learning_rate": 1.2416185724561935e-11, "loss": 0.4818, "step": 8136 }, { "epoch": 0.9997542695662858, "grad_norm": 1.011594477518567, "learning_rate": 5.518305189466944e-12, "loss": 0.5449, "step": 8137 }, { "epoch": 0.9998771347831429, "grad_norm": 1.1716177249184163, "learning_rate": 1.3795763614821155e-12, "loss": 0.5594, "step": 8138 }, { "epoch": 1.0, "grad_norm": 1.2461582729332843, "learning_rate": 0.0, "loss": 0.5178, "step": 8139 } ], "logging_steps": 1, "max_steps": 8139, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 168053621296128.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }