{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998370964063468, "eval_steps": 500, "global_step": 3836, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002606457498452416, "grad_norm": 5.023480880666873, "learning_rate": 2.6041666666666667e-08, "loss": 0.3892, "step": 1 }, { "epoch": 0.0005212914996904832, "grad_norm": 4.909762423774256, "learning_rate": 5.208333333333333e-08, "loss": 0.3909, "step": 2 }, { "epoch": 0.0007819372495357248, "grad_norm": 4.8175213005740245, "learning_rate": 7.8125e-08, "loss": 0.38, "step": 3 }, { "epoch": 0.0010425829993809665, "grad_norm": 5.009924055710931, "learning_rate": 1.0416666666666667e-07, "loss": 0.4106, "step": 4 }, { "epoch": 0.001303228749226208, "grad_norm": 4.913623097486901, "learning_rate": 1.3020833333333334e-07, "loss": 0.3933, "step": 5 }, { "epoch": 0.0015638744990714496, "grad_norm": 4.967010145704452, "learning_rate": 1.5625e-07, "loss": 0.3758, "step": 6 }, { "epoch": 0.0018245202489166911, "grad_norm": 4.9467264347173625, "learning_rate": 1.8229166666666669e-07, "loss": 0.3852, "step": 7 }, { "epoch": 0.002085165998761933, "grad_norm": 4.731574918150005, "learning_rate": 2.0833333333333333e-07, "loss": 0.3831, "step": 8 }, { "epoch": 0.0023458117486071742, "grad_norm": 4.667147278381995, "learning_rate": 2.3437500000000003e-07, "loss": 0.3993, "step": 9 }, { "epoch": 0.002606457498452416, "grad_norm": 4.9268293643189685, "learning_rate": 2.604166666666667e-07, "loss": 0.3958, "step": 10 }, { "epoch": 0.0028671032482976574, "grad_norm": 4.46800351591083, "learning_rate": 2.864583333333333e-07, "loss": 0.3971, "step": 11 }, { "epoch": 0.003127748998142899, "grad_norm": 4.514828151171496, "learning_rate": 3.125e-07, "loss": 0.3812, "step": 12 }, { "epoch": 0.0033883947479881405, "grad_norm": 4.458952675697283, "learning_rate": 3.3854166666666667e-07, "loss": 0.3907, "step": 13 }, { "epoch": 0.0036490404978333823, "grad_norm": 4.480931325084065, "learning_rate": 3.6458333333333337e-07, "loss": 0.3893, "step": 14 }, { "epoch": 0.003909686247678624, "grad_norm": 4.234287512708887, "learning_rate": 3.90625e-07, "loss": 0.381, "step": 15 }, { "epoch": 0.004170331997523866, "grad_norm": 3.988000940220502, "learning_rate": 4.1666666666666667e-07, "loss": 0.3852, "step": 16 }, { "epoch": 0.004430977747369107, "grad_norm": 4.0166356673624275, "learning_rate": 4.427083333333334e-07, "loss": 0.3885, "step": 17 }, { "epoch": 0.0046916234972143485, "grad_norm": 3.751367719486177, "learning_rate": 4.6875000000000006e-07, "loss": 0.3737, "step": 18 }, { "epoch": 0.00495226924705959, "grad_norm": 3.324457146479496, "learning_rate": 4.947916666666667e-07, "loss": 0.3847, "step": 19 }, { "epoch": 0.005212914996904832, "grad_norm": 2.922741647144965, "learning_rate": 5.208333333333334e-07, "loss": 0.3455, "step": 20 }, { "epoch": 0.005473560746750073, "grad_norm": 3.0960429790199933, "learning_rate": 5.468750000000001e-07, "loss": 0.3394, "step": 21 }, { "epoch": 0.005734206496595315, "grad_norm": 2.849986744043752, "learning_rate": 5.729166666666667e-07, "loss": 0.3604, "step": 22 }, { "epoch": 0.0059948522464405565, "grad_norm": 2.500692012432588, "learning_rate": 5.989583333333335e-07, "loss": 0.3386, "step": 23 }, { "epoch": 0.006255497996285798, "grad_norm": 2.3885318206307726, "learning_rate": 6.25e-07, "loss": 0.3491, "step": 24 }, { "epoch": 0.00651614374613104, "grad_norm": 2.4729504751396236, "learning_rate": 6.510416666666668e-07, "loss": 0.346, "step": 25 }, { "epoch": 0.006776789495976281, "grad_norm": 2.3043000896999817, "learning_rate": 6.770833333333333e-07, "loss": 0.3408, "step": 26 }, { "epoch": 0.007037435245821523, "grad_norm": 2.2538552009766333, "learning_rate": 7.03125e-07, "loss": 0.3384, "step": 27 }, { "epoch": 0.0072980809956667645, "grad_norm": 2.144496545651035, "learning_rate": 7.291666666666667e-07, "loss": 0.3117, "step": 28 }, { "epoch": 0.007558726745512006, "grad_norm": 2.2727600765183285, "learning_rate": 7.552083333333333e-07, "loss": 0.3349, "step": 29 }, { "epoch": 0.007819372495357248, "grad_norm": 1.8690967797352183, "learning_rate": 7.8125e-07, "loss": 0.3201, "step": 30 }, { "epoch": 0.00808001824520249, "grad_norm": 2.0109099502796854, "learning_rate": 8.072916666666667e-07, "loss": 0.3233, "step": 31 }, { "epoch": 0.008340663995047732, "grad_norm": 1.8713806109047637, "learning_rate": 8.333333333333333e-07, "loss": 0.3241, "step": 32 }, { "epoch": 0.008601309744892972, "grad_norm": 1.6024201587622886, "learning_rate": 8.59375e-07, "loss": 0.3163, "step": 33 }, { "epoch": 0.008861955494738213, "grad_norm": 1.6641886490653215, "learning_rate": 8.854166666666668e-07, "loss": 0.3259, "step": 34 }, { "epoch": 0.009122601244583455, "grad_norm": 1.616827051144846, "learning_rate": 9.114583333333333e-07, "loss": 0.3385, "step": 35 }, { "epoch": 0.009383246994428697, "grad_norm": 1.7202214848265245, "learning_rate": 9.375000000000001e-07, "loss": 0.3223, "step": 36 }, { "epoch": 0.009643892744273939, "grad_norm": 1.7747571231782027, "learning_rate": 9.635416666666667e-07, "loss": 0.3078, "step": 37 }, { "epoch": 0.00990453849411918, "grad_norm": 1.734934901102669, "learning_rate": 9.895833333333333e-07, "loss": 0.3, "step": 38 }, { "epoch": 0.010165184243964422, "grad_norm": 1.5135484103405965, "learning_rate": 1.0156250000000001e-06, "loss": 0.2862, "step": 39 }, { "epoch": 0.010425829993809664, "grad_norm": 1.5997189791185322, "learning_rate": 1.0416666666666667e-06, "loss": 0.3004, "step": 40 }, { "epoch": 0.010686475743654906, "grad_norm": 1.7122100840509558, "learning_rate": 1.0677083333333333e-06, "loss": 0.2767, "step": 41 }, { "epoch": 0.010947121493500146, "grad_norm": 1.6892832973828444, "learning_rate": 1.0937500000000001e-06, "loss": 0.2938, "step": 42 }, { "epoch": 0.011207767243345388, "grad_norm": 1.5378119534040435, "learning_rate": 1.1197916666666667e-06, "loss": 0.2998, "step": 43 }, { "epoch": 0.01146841299319063, "grad_norm": 1.5159139664191217, "learning_rate": 1.1458333333333333e-06, "loss": 0.2925, "step": 44 }, { "epoch": 0.011729058743035871, "grad_norm": 1.6268868667168352, "learning_rate": 1.1718750000000001e-06, "loss": 0.2748, "step": 45 }, { "epoch": 0.011989704492881113, "grad_norm": 1.5305026260528887, "learning_rate": 1.197916666666667e-06, "loss": 0.2856, "step": 46 }, { "epoch": 0.012250350242726355, "grad_norm": 1.4731077007869622, "learning_rate": 1.2239583333333333e-06, "loss": 0.2885, "step": 47 }, { "epoch": 0.012510995992571597, "grad_norm": 1.4376114932639523, "learning_rate": 1.25e-06, "loss": 0.2785, "step": 48 }, { "epoch": 0.012771641742416838, "grad_norm": 1.389391724379442, "learning_rate": 1.2760416666666667e-06, "loss": 0.2715, "step": 49 }, { "epoch": 0.01303228749226208, "grad_norm": 1.3029133402214954, "learning_rate": 1.3020833333333335e-06, "loss": 0.2804, "step": 50 }, { "epoch": 0.01329293324210732, "grad_norm": 1.4055163762248266, "learning_rate": 1.328125e-06, "loss": 0.2753, "step": 51 }, { "epoch": 0.013553578991952562, "grad_norm": 1.2820391154445472, "learning_rate": 1.3541666666666667e-06, "loss": 0.2566, "step": 52 }, { "epoch": 0.013814224741797804, "grad_norm": 1.3131638343416099, "learning_rate": 1.3802083333333335e-06, "loss": 0.2695, "step": 53 }, { "epoch": 0.014074870491643045, "grad_norm": 1.2846790544457705, "learning_rate": 1.40625e-06, "loss": 0.2722, "step": 54 }, { "epoch": 0.014335516241488287, "grad_norm": 1.2530575781912192, "learning_rate": 1.4322916666666667e-06, "loss": 0.261, "step": 55 }, { "epoch": 0.014596161991333529, "grad_norm": 1.2547881467992106, "learning_rate": 1.4583333333333335e-06, "loss": 0.2626, "step": 56 }, { "epoch": 0.01485680774117877, "grad_norm": 1.3523377364119034, "learning_rate": 1.484375e-06, "loss": 0.2694, "step": 57 }, { "epoch": 0.015117453491024013, "grad_norm": 1.2680536997899998, "learning_rate": 1.5104166666666667e-06, "loss": 0.2716, "step": 58 }, { "epoch": 0.015378099240869254, "grad_norm": 1.3104575722200311, "learning_rate": 1.5364583333333335e-06, "loss": 0.2851, "step": 59 }, { "epoch": 0.015638744990714496, "grad_norm": 1.3032469491223764, "learning_rate": 1.5625e-06, "loss": 0.2701, "step": 60 }, { "epoch": 0.015899390740559738, "grad_norm": 1.3497893487248456, "learning_rate": 1.5885416666666667e-06, "loss": 0.2711, "step": 61 }, { "epoch": 0.01616003649040498, "grad_norm": 1.2663707442242538, "learning_rate": 1.6145833333333335e-06, "loss": 0.2486, "step": 62 }, { "epoch": 0.01642068224025022, "grad_norm": 1.4416923639042365, "learning_rate": 1.640625e-06, "loss": 0.2569, "step": 63 }, { "epoch": 0.016681327990095463, "grad_norm": 1.2630880150527939, "learning_rate": 1.6666666666666667e-06, "loss": 0.2282, "step": 64 }, { "epoch": 0.0169419737399407, "grad_norm": 1.187382988318828, "learning_rate": 1.6927083333333335e-06, "loss": 0.2447, "step": 65 }, { "epoch": 0.017202619489785943, "grad_norm": 1.4053936003260927, "learning_rate": 1.71875e-06, "loss": 0.272, "step": 66 }, { "epoch": 0.017463265239631185, "grad_norm": 1.4221434259951633, "learning_rate": 1.7447916666666667e-06, "loss": 0.2546, "step": 67 }, { "epoch": 0.017723910989476427, "grad_norm": 1.5012459221988665, "learning_rate": 1.7708333333333337e-06, "loss": 0.28, "step": 68 }, { "epoch": 0.01798455673932167, "grad_norm": 1.3237907459939333, "learning_rate": 1.796875e-06, "loss": 0.2644, "step": 69 }, { "epoch": 0.01824520248916691, "grad_norm": 1.2883527458004582, "learning_rate": 1.8229166666666666e-06, "loss": 0.2581, "step": 70 }, { "epoch": 0.018505848239012152, "grad_norm": 1.5888052868847422, "learning_rate": 1.8489583333333337e-06, "loss": 0.2502, "step": 71 }, { "epoch": 0.018766493988857394, "grad_norm": 1.4811280211613818, "learning_rate": 1.8750000000000003e-06, "loss": 0.2636, "step": 72 }, { "epoch": 0.019027139738702636, "grad_norm": 1.2125681990662138, "learning_rate": 1.9010416666666666e-06, "loss": 0.2318, "step": 73 }, { "epoch": 0.019287785488547877, "grad_norm": 1.3679565387469472, "learning_rate": 1.9270833333333334e-06, "loss": 0.2466, "step": 74 }, { "epoch": 0.01954843123839312, "grad_norm": 1.3494762948921257, "learning_rate": 1.953125e-06, "loss": 0.2537, "step": 75 }, { "epoch": 0.01980907698823836, "grad_norm": 1.3141932679984487, "learning_rate": 1.9791666666666666e-06, "loss": 0.2588, "step": 76 }, { "epoch": 0.020069722738083603, "grad_norm": 1.3467820938018822, "learning_rate": 2.0052083333333337e-06, "loss": 0.2631, "step": 77 }, { "epoch": 0.020330368487928845, "grad_norm": 1.4302226770250015, "learning_rate": 2.0312500000000002e-06, "loss": 0.2588, "step": 78 }, { "epoch": 0.020591014237774086, "grad_norm": 1.2555173775618436, "learning_rate": 2.057291666666667e-06, "loss": 0.236, "step": 79 }, { "epoch": 0.020851659987619328, "grad_norm": 1.291725393241093, "learning_rate": 2.0833333333333334e-06, "loss": 0.2384, "step": 80 }, { "epoch": 0.02111230573746457, "grad_norm": 1.2876531100752384, "learning_rate": 2.109375e-06, "loss": 0.2415, "step": 81 }, { "epoch": 0.02137295148730981, "grad_norm": 1.3313573700739778, "learning_rate": 2.1354166666666666e-06, "loss": 0.2286, "step": 82 }, { "epoch": 0.02163359723715505, "grad_norm": 1.2442518403123786, "learning_rate": 2.1614583333333336e-06, "loss": 0.2391, "step": 83 }, { "epoch": 0.021894242987000292, "grad_norm": 1.3337855979162434, "learning_rate": 2.1875000000000002e-06, "loss": 0.257, "step": 84 }, { "epoch": 0.022154888736845534, "grad_norm": 1.3065274314696351, "learning_rate": 2.213541666666667e-06, "loss": 0.238, "step": 85 }, { "epoch": 0.022415534486690775, "grad_norm": 1.2511220186476184, "learning_rate": 2.2395833333333334e-06, "loss": 0.2477, "step": 86 }, { "epoch": 0.022676180236536017, "grad_norm": 1.2040195727698675, "learning_rate": 2.265625e-06, "loss": 0.2504, "step": 87 }, { "epoch": 0.02293682598638126, "grad_norm": 1.244106733549268, "learning_rate": 2.2916666666666666e-06, "loss": 0.2442, "step": 88 }, { "epoch": 0.0231974717362265, "grad_norm": 1.2325698413002473, "learning_rate": 2.3177083333333336e-06, "loss": 0.2435, "step": 89 }, { "epoch": 0.023458117486071742, "grad_norm": 1.3388792815519999, "learning_rate": 2.3437500000000002e-06, "loss": 0.2592, "step": 90 }, { "epoch": 0.023718763235916984, "grad_norm": 1.3930706667338617, "learning_rate": 2.369791666666667e-06, "loss": 0.2672, "step": 91 }, { "epoch": 0.023979408985762226, "grad_norm": 1.3242217830460732, "learning_rate": 2.395833333333334e-06, "loss": 0.249, "step": 92 }, { "epoch": 0.024240054735607468, "grad_norm": 1.390908600277077, "learning_rate": 2.421875e-06, "loss": 0.2275, "step": 93 }, { "epoch": 0.02450070048545271, "grad_norm": 1.3639383048933642, "learning_rate": 2.4479166666666666e-06, "loss": 0.2338, "step": 94 }, { "epoch": 0.02476134623529795, "grad_norm": 1.2711275100528354, "learning_rate": 2.4739583333333336e-06, "loss": 0.2232, "step": 95 }, { "epoch": 0.025021991985143193, "grad_norm": 1.4446447756551226, "learning_rate": 2.5e-06, "loss": 0.2635, "step": 96 }, { "epoch": 0.025282637734988435, "grad_norm": 1.5007961873638085, "learning_rate": 2.5260416666666672e-06, "loss": 0.2349, "step": 97 }, { "epoch": 0.025543283484833677, "grad_norm": 1.4076887610476205, "learning_rate": 2.5520833333333334e-06, "loss": 0.2417, "step": 98 }, { "epoch": 0.02580392923467892, "grad_norm": 1.2001380851289543, "learning_rate": 2.5781250000000004e-06, "loss": 0.2447, "step": 99 }, { "epoch": 0.02606457498452416, "grad_norm": 1.2746980294065395, "learning_rate": 2.604166666666667e-06, "loss": 0.2177, "step": 100 }, { "epoch": 0.0263252207343694, "grad_norm": 1.542834560668501, "learning_rate": 2.630208333333333e-06, "loss": 0.2375, "step": 101 }, { "epoch": 0.02658586648421464, "grad_norm": 1.3951803362693649, "learning_rate": 2.65625e-06, "loss": 0.2596, "step": 102 }, { "epoch": 0.026846512234059882, "grad_norm": 1.634180451964562, "learning_rate": 2.682291666666667e-06, "loss": 0.224, "step": 103 }, { "epoch": 0.027107157983905124, "grad_norm": 1.353673303883085, "learning_rate": 2.7083333333333334e-06, "loss": 0.2334, "step": 104 }, { "epoch": 0.027367803733750366, "grad_norm": 1.2979717348230226, "learning_rate": 2.7343750000000004e-06, "loss": 0.2483, "step": 105 }, { "epoch": 0.027628449483595607, "grad_norm": 1.256588713957518, "learning_rate": 2.760416666666667e-06, "loss": 0.2403, "step": 106 }, { "epoch": 0.02788909523344085, "grad_norm": 1.37923752780982, "learning_rate": 2.7864583333333336e-06, "loss": 0.2426, "step": 107 }, { "epoch": 0.02814974098328609, "grad_norm": 1.2663551357521017, "learning_rate": 2.8125e-06, "loss": 0.2145, "step": 108 }, { "epoch": 0.028410386733131333, "grad_norm": 1.301666969409469, "learning_rate": 2.838541666666667e-06, "loss": 0.2391, "step": 109 }, { "epoch": 0.028671032482976574, "grad_norm": 1.2286673147850626, "learning_rate": 2.8645833333333334e-06, "loss": 0.2215, "step": 110 }, { "epoch": 0.028931678232821816, "grad_norm": 1.306117601873296, "learning_rate": 2.8906250000000004e-06, "loss": 0.2434, "step": 111 }, { "epoch": 0.029192323982667058, "grad_norm": 1.283585442293343, "learning_rate": 2.916666666666667e-06, "loss": 0.2334, "step": 112 }, { "epoch": 0.0294529697325123, "grad_norm": 1.1495467056019582, "learning_rate": 2.9427083333333336e-06, "loss": 0.2223, "step": 113 }, { "epoch": 0.02971361548235754, "grad_norm": 1.268981064407803, "learning_rate": 2.96875e-06, "loss": 0.2394, "step": 114 }, { "epoch": 0.029974261232202783, "grad_norm": 1.3423913581781175, "learning_rate": 2.994791666666667e-06, "loss": 0.2446, "step": 115 }, { "epoch": 0.030234906982048025, "grad_norm": 1.4689691214469902, "learning_rate": 3.0208333333333334e-06, "loss": 0.2381, "step": 116 }, { "epoch": 0.030495552731893267, "grad_norm": 1.3405968916614481, "learning_rate": 3.0468750000000004e-06, "loss": 0.2466, "step": 117 }, { "epoch": 0.03075619848173851, "grad_norm": 1.4594825890597283, "learning_rate": 3.072916666666667e-06, "loss": 0.2436, "step": 118 }, { "epoch": 0.03101684423158375, "grad_norm": 1.2840141460551675, "learning_rate": 3.0989583333333336e-06, "loss": 0.2512, "step": 119 }, { "epoch": 0.03127748998142899, "grad_norm": 1.3242171144909844, "learning_rate": 3.125e-06, "loss": 0.2421, "step": 120 }, { "epoch": 0.03153813573127423, "grad_norm": 1.35861449330447, "learning_rate": 3.151041666666667e-06, "loss": 0.2377, "step": 121 }, { "epoch": 0.031798781481119476, "grad_norm": 1.212492106698224, "learning_rate": 3.1770833333333333e-06, "loss": 0.2333, "step": 122 }, { "epoch": 0.032059427230964714, "grad_norm": 1.2927186979442968, "learning_rate": 3.2031250000000004e-06, "loss": 0.2281, "step": 123 }, { "epoch": 0.03232007298080996, "grad_norm": 1.364190325207712, "learning_rate": 3.229166666666667e-06, "loss": 0.2531, "step": 124 }, { "epoch": 0.0325807187306552, "grad_norm": 1.3254924113143436, "learning_rate": 3.2552083333333335e-06, "loss": 0.2496, "step": 125 }, { "epoch": 0.03284136448050044, "grad_norm": 1.2448613234578525, "learning_rate": 3.28125e-06, "loss": 0.2279, "step": 126 }, { "epoch": 0.03310201023034568, "grad_norm": 1.415269467268163, "learning_rate": 3.307291666666667e-06, "loss": 0.2404, "step": 127 }, { "epoch": 0.033362655980190926, "grad_norm": 1.3786606579613123, "learning_rate": 3.3333333333333333e-06, "loss": 0.2559, "step": 128 }, { "epoch": 0.033623301730036165, "grad_norm": 1.4990927010105628, "learning_rate": 3.3593750000000003e-06, "loss": 0.2436, "step": 129 }, { "epoch": 0.0338839474798814, "grad_norm": 1.4642313162389016, "learning_rate": 3.385416666666667e-06, "loss": 0.2345, "step": 130 }, { "epoch": 0.03414459322972665, "grad_norm": 1.3712970632033383, "learning_rate": 3.4114583333333335e-06, "loss": 0.2364, "step": 131 }, { "epoch": 0.03440523897957189, "grad_norm": 1.276764913055735, "learning_rate": 3.4375e-06, "loss": 0.2362, "step": 132 }, { "epoch": 0.03466588472941713, "grad_norm": 1.2967294285163582, "learning_rate": 3.463541666666667e-06, "loss": 0.2353, "step": 133 }, { "epoch": 0.03492653047926237, "grad_norm": 1.3465640446975125, "learning_rate": 3.4895833333333333e-06, "loss": 0.225, "step": 134 }, { "epoch": 0.035187176229107615, "grad_norm": 1.1983587985561204, "learning_rate": 3.5156250000000003e-06, "loss": 0.2177, "step": 135 }, { "epoch": 0.035447821978952854, "grad_norm": 1.400131263215834, "learning_rate": 3.5416666666666673e-06, "loss": 0.2291, "step": 136 }, { "epoch": 0.0357084677287981, "grad_norm": 1.2599895615725354, "learning_rate": 3.5677083333333335e-06, "loss": 0.2211, "step": 137 }, { "epoch": 0.03596911347864334, "grad_norm": 1.3079662115517945, "learning_rate": 3.59375e-06, "loss": 0.2276, "step": 138 }, { "epoch": 0.03622975922848858, "grad_norm": 1.2315333597414184, "learning_rate": 3.619791666666667e-06, "loss": 0.2256, "step": 139 }, { "epoch": 0.03649040497833382, "grad_norm": 1.3377643741614427, "learning_rate": 3.6458333333333333e-06, "loss": 0.2322, "step": 140 }, { "epoch": 0.036751050728179066, "grad_norm": 1.4088599325740154, "learning_rate": 3.6718750000000003e-06, "loss": 0.2533, "step": 141 }, { "epoch": 0.037011696478024304, "grad_norm": 1.2992012599849174, "learning_rate": 3.6979166666666673e-06, "loss": 0.2198, "step": 142 }, { "epoch": 0.03727234222786955, "grad_norm": 1.268687332178181, "learning_rate": 3.7239583333333335e-06, "loss": 0.2236, "step": 143 }, { "epoch": 0.03753298797771479, "grad_norm": 1.3357133767281324, "learning_rate": 3.7500000000000005e-06, "loss": 0.2217, "step": 144 }, { "epoch": 0.03779363372756003, "grad_norm": 1.2627261412123874, "learning_rate": 3.776041666666667e-06, "loss": 0.2305, "step": 145 }, { "epoch": 0.03805427947740527, "grad_norm": 1.4684362545381684, "learning_rate": 3.8020833333333333e-06, "loss": 0.223, "step": 146 }, { "epoch": 0.03831492522725051, "grad_norm": 1.309521115796194, "learning_rate": 3.828125000000001e-06, "loss": 0.2425, "step": 147 }, { "epoch": 0.038575570977095755, "grad_norm": 1.271924270347421, "learning_rate": 3.854166666666667e-06, "loss": 0.2139, "step": 148 }, { "epoch": 0.03883621672694099, "grad_norm": 1.2331628839105742, "learning_rate": 3.880208333333333e-06, "loss": 0.2337, "step": 149 }, { "epoch": 0.03909686247678624, "grad_norm": 1.1877493055863286, "learning_rate": 3.90625e-06, "loss": 0.227, "step": 150 }, { "epoch": 0.03935750822663148, "grad_norm": 1.2716078141580314, "learning_rate": 3.932291666666667e-06, "loss": 0.243, "step": 151 }, { "epoch": 0.03961815397647672, "grad_norm": 1.2217732521261158, "learning_rate": 3.958333333333333e-06, "loss": 0.2137, "step": 152 }, { "epoch": 0.03987879972632196, "grad_norm": 1.3633563783573515, "learning_rate": 3.984375e-06, "loss": 0.2439, "step": 153 }, { "epoch": 0.040139445476167206, "grad_norm": 1.2937107387689506, "learning_rate": 4.010416666666667e-06, "loss": 0.2321, "step": 154 }, { "epoch": 0.040400091226012444, "grad_norm": 1.3139403177549762, "learning_rate": 4.0364583333333335e-06, "loss": 0.2205, "step": 155 }, { "epoch": 0.04066073697585769, "grad_norm": 1.3803380375280359, "learning_rate": 4.0625000000000005e-06, "loss": 0.2445, "step": 156 }, { "epoch": 0.04092138272570293, "grad_norm": 1.243195721847114, "learning_rate": 4.0885416666666675e-06, "loss": 0.2215, "step": 157 }, { "epoch": 0.04118202847554817, "grad_norm": 1.4206213007293327, "learning_rate": 4.114583333333334e-06, "loss": 0.235, "step": 158 }, { "epoch": 0.04144267422539341, "grad_norm": 1.334392681683925, "learning_rate": 4.140625000000001e-06, "loss": 0.2257, "step": 159 }, { "epoch": 0.041703319975238656, "grad_norm": 1.2623554584137613, "learning_rate": 4.166666666666667e-06, "loss": 0.2385, "step": 160 }, { "epoch": 0.041963965725083895, "grad_norm": 1.2103725404361045, "learning_rate": 4.192708333333334e-06, "loss": 0.2378, "step": 161 }, { "epoch": 0.04222461147492914, "grad_norm": 1.2071564762864249, "learning_rate": 4.21875e-06, "loss": 0.2097, "step": 162 }, { "epoch": 0.04248525722477438, "grad_norm": 1.2534819854923331, "learning_rate": 4.244791666666667e-06, "loss": 0.2296, "step": 163 }, { "epoch": 0.04274590297461962, "grad_norm": 1.1843842536397509, "learning_rate": 4.270833333333333e-06, "loss": 0.219, "step": 164 }, { "epoch": 0.04300654872446486, "grad_norm": 1.3158137366554865, "learning_rate": 4.296875e-06, "loss": 0.2307, "step": 165 }, { "epoch": 0.0432671944743101, "grad_norm": 1.357988893385128, "learning_rate": 4.322916666666667e-06, "loss": 0.2042, "step": 166 }, { "epoch": 0.043527840224155345, "grad_norm": 1.2304081907406552, "learning_rate": 4.3489583333333334e-06, "loss": 0.231, "step": 167 }, { "epoch": 0.043788485974000584, "grad_norm": 1.2826305294271403, "learning_rate": 4.3750000000000005e-06, "loss": 0.225, "step": 168 }, { "epoch": 0.04404913172384583, "grad_norm": 1.3402350101759406, "learning_rate": 4.4010416666666675e-06, "loss": 0.2211, "step": 169 }, { "epoch": 0.04430977747369107, "grad_norm": 1.1397924651598474, "learning_rate": 4.427083333333334e-06, "loss": 0.2117, "step": 170 }, { "epoch": 0.04457042322353631, "grad_norm": 1.2068114948310753, "learning_rate": 4.453125000000001e-06, "loss": 0.2116, "step": 171 }, { "epoch": 0.04483106897338155, "grad_norm": 1.3146543944188458, "learning_rate": 4.479166666666667e-06, "loss": 0.2222, "step": 172 }, { "epoch": 0.045091714723226796, "grad_norm": 1.272297668542469, "learning_rate": 4.505208333333334e-06, "loss": 0.2347, "step": 173 }, { "epoch": 0.045352360473072034, "grad_norm": 1.3362491908769902, "learning_rate": 4.53125e-06, "loss": 0.2287, "step": 174 }, { "epoch": 0.04561300622291728, "grad_norm": 1.2166469157832003, "learning_rate": 4.557291666666667e-06, "loss": 0.2216, "step": 175 }, { "epoch": 0.04587365197276252, "grad_norm": 1.4589667079330393, "learning_rate": 4.583333333333333e-06, "loss": 0.2607, "step": 176 }, { "epoch": 0.04613429772260776, "grad_norm": 1.41295996531479, "learning_rate": 4.609375e-06, "loss": 0.2337, "step": 177 }, { "epoch": 0.046394943472453, "grad_norm": 1.3972035803140082, "learning_rate": 4.635416666666667e-06, "loss": 0.2361, "step": 178 }, { "epoch": 0.04665558922229825, "grad_norm": 1.3258834834034632, "learning_rate": 4.661458333333333e-06, "loss": 0.2242, "step": 179 }, { "epoch": 0.046916234972143485, "grad_norm": 1.311090589214678, "learning_rate": 4.6875000000000004e-06, "loss": 0.2255, "step": 180 }, { "epoch": 0.04717688072198873, "grad_norm": 1.2428200076802622, "learning_rate": 4.7135416666666675e-06, "loss": 0.2092, "step": 181 }, { "epoch": 0.04743752647183397, "grad_norm": 1.2418766741556888, "learning_rate": 4.739583333333334e-06, "loss": 0.2054, "step": 182 }, { "epoch": 0.047698172221679214, "grad_norm": 1.3068201378164321, "learning_rate": 4.765625000000001e-06, "loss": 0.2395, "step": 183 }, { "epoch": 0.04795881797152445, "grad_norm": 1.24952037404213, "learning_rate": 4.791666666666668e-06, "loss": 0.2131, "step": 184 }, { "epoch": 0.04821946372136969, "grad_norm": 1.3584923119206156, "learning_rate": 4.817708333333334e-06, "loss": 0.2334, "step": 185 }, { "epoch": 0.048480109471214936, "grad_norm": 1.3613684188461834, "learning_rate": 4.84375e-06, "loss": 0.2081, "step": 186 }, { "epoch": 0.048740755221060174, "grad_norm": 1.3960875588624524, "learning_rate": 4.869791666666667e-06, "loss": 0.2379, "step": 187 }, { "epoch": 0.04900140097090542, "grad_norm": 1.3092333324455596, "learning_rate": 4.895833333333333e-06, "loss": 0.2323, "step": 188 }, { "epoch": 0.04926204672075066, "grad_norm": 1.2070359348043012, "learning_rate": 4.921875e-06, "loss": 0.1977, "step": 189 }, { "epoch": 0.0495226924705959, "grad_norm": 1.1891796758604343, "learning_rate": 4.947916666666667e-06, "loss": 0.2045, "step": 190 }, { "epoch": 0.04978333822044114, "grad_norm": 1.2207517632727867, "learning_rate": 4.973958333333333e-06, "loss": 0.2206, "step": 191 }, { "epoch": 0.050043983970286386, "grad_norm": 1.295180466730222, "learning_rate": 5e-06, "loss": 0.2155, "step": 192 }, { "epoch": 0.050304629720131624, "grad_norm": 1.2528559948910722, "learning_rate": 4.999999070920249e-06, "loss": 0.2161, "step": 193 }, { "epoch": 0.05056527546997687, "grad_norm": 1.337855147472058, "learning_rate": 4.999996283681687e-06, "loss": 0.2314, "step": 194 }, { "epoch": 0.05082592121982211, "grad_norm": 1.1642918062334315, "learning_rate": 4.999991638286384e-06, "loss": 0.2298, "step": 195 }, { "epoch": 0.05108656696966735, "grad_norm": 1.3276185444282333, "learning_rate": 4.9999851347377946e-06, "loss": 0.237, "step": 196 }, { "epoch": 0.05134721271951259, "grad_norm": 1.3923755714260602, "learning_rate": 4.9999767730407515e-06, "loss": 0.219, "step": 197 }, { "epoch": 0.05160785846935784, "grad_norm": 1.3740624515182074, "learning_rate": 4.99996655320147e-06, "loss": 0.2182, "step": 198 }, { "epoch": 0.051868504219203075, "grad_norm": 1.3380375496673806, "learning_rate": 4.999954475227547e-06, "loss": 0.2408, "step": 199 }, { "epoch": 0.05212914996904832, "grad_norm": 1.2455487731644035, "learning_rate": 4.999940539127958e-06, "loss": 0.2222, "step": 200 }, { "epoch": 0.05238979571889356, "grad_norm": 1.189531284329726, "learning_rate": 4.999924744913062e-06, "loss": 0.2183, "step": 201 }, { "epoch": 0.0526504414687388, "grad_norm": 1.2450319439313389, "learning_rate": 4.999907092594598e-06, "loss": 0.2413, "step": 202 }, { "epoch": 0.05291108721858404, "grad_norm": 1.2643640110057834, "learning_rate": 4.999887582185688e-06, "loss": 0.2485, "step": 203 }, { "epoch": 0.05317173296842928, "grad_norm": 1.1534368270129693, "learning_rate": 4.99986621370083e-06, "loss": 0.2171, "step": 204 }, { "epoch": 0.053432378718274526, "grad_norm": 1.274589400883266, "learning_rate": 4.999842987155909e-06, "loss": 0.2281, "step": 205 }, { "epoch": 0.053693024468119764, "grad_norm": 1.3237308678357673, "learning_rate": 4.99981790256819e-06, "loss": 0.247, "step": 206 }, { "epoch": 0.05395367021796501, "grad_norm": 1.1468231776988635, "learning_rate": 4.999790959956312e-06, "loss": 0.2179, "step": 207 }, { "epoch": 0.05421431596781025, "grad_norm": 1.2912859634178433, "learning_rate": 4.999762159340305e-06, "loss": 0.2291, "step": 208 }, { "epoch": 0.05447496171765549, "grad_norm": 1.3876297338976957, "learning_rate": 4.999731500741575e-06, "loss": 0.2214, "step": 209 }, { "epoch": 0.05473560746750073, "grad_norm": 1.2278217031775658, "learning_rate": 4.999698984182909e-06, "loss": 0.2243, "step": 210 }, { "epoch": 0.054996253217345976, "grad_norm": 1.2388688802722572, "learning_rate": 4.999664609688474e-06, "loss": 0.219, "step": 211 }, { "epoch": 0.055256898967191215, "grad_norm": 1.1360695068420827, "learning_rate": 4.999628377283821e-06, "loss": 0.2228, "step": 212 }, { "epoch": 0.05551754471703646, "grad_norm": 1.2434391695636524, "learning_rate": 4.999590286995879e-06, "loss": 0.2331, "step": 213 }, { "epoch": 0.0557781904668817, "grad_norm": 1.3268346831233018, "learning_rate": 4.99955033885296e-06, "loss": 0.2305, "step": 214 }, { "epoch": 0.056038836216726944, "grad_norm": 1.3058898435950121, "learning_rate": 4.999508532884756e-06, "loss": 0.2386, "step": 215 }, { "epoch": 0.05629948196657218, "grad_norm": 1.3121319306978072, "learning_rate": 4.999464869122339e-06, "loss": 0.2377, "step": 216 }, { "epoch": 0.05656012771641743, "grad_norm": 1.3591255137008724, "learning_rate": 4.999419347598164e-06, "loss": 0.2322, "step": 217 }, { "epoch": 0.056820773466262665, "grad_norm": 1.350158672588046, "learning_rate": 4.999371968346064e-06, "loss": 0.236, "step": 218 }, { "epoch": 0.05708141921610791, "grad_norm": 1.2403301185314375, "learning_rate": 4.999322731401256e-06, "loss": 0.2181, "step": 219 }, { "epoch": 0.05734206496595315, "grad_norm": 1.3848571366813867, "learning_rate": 4.999271636800334e-06, "loss": 0.2268, "step": 220 }, { "epoch": 0.05760271071579839, "grad_norm": 1.3887544800373213, "learning_rate": 4.999218684581277e-06, "loss": 0.2269, "step": 221 }, { "epoch": 0.05786335646564363, "grad_norm": 1.419740694434746, "learning_rate": 4.999163874783441e-06, "loss": 0.2356, "step": 222 }, { "epoch": 0.05812400221548887, "grad_norm": 1.1638792769204565, "learning_rate": 4.999107207447564e-06, "loss": 0.2314, "step": 223 }, { "epoch": 0.058384647965334116, "grad_norm": 1.215262043078709, "learning_rate": 4.999048682615766e-06, "loss": 0.2158, "step": 224 }, { "epoch": 0.058645293715179354, "grad_norm": 1.2306598716712647, "learning_rate": 4.998988300331545e-06, "loss": 0.2248, "step": 225 }, { "epoch": 0.0589059394650246, "grad_norm": 1.207372668102452, "learning_rate": 4.9989260606397816e-06, "loss": 0.2175, "step": 226 }, { "epoch": 0.05916658521486984, "grad_norm": 1.2453980931908324, "learning_rate": 4.998861963586737e-06, "loss": 0.2319, "step": 227 }, { "epoch": 0.05942723096471508, "grad_norm": 1.290462821313982, "learning_rate": 4.998796009220051e-06, "loss": 0.2079, "step": 228 }, { "epoch": 0.05968787671456032, "grad_norm": 1.190940919392508, "learning_rate": 4.998728197588746e-06, "loss": 0.2034, "step": 229 }, { "epoch": 0.05994852246440557, "grad_norm": 1.3539991959819664, "learning_rate": 4.9986585287432236e-06, "loss": 0.2166, "step": 230 }, { "epoch": 0.060209168214250805, "grad_norm": 1.401676648951938, "learning_rate": 4.998587002735266e-06, "loss": 0.2246, "step": 231 }, { "epoch": 0.06046981396409605, "grad_norm": 1.4922630053400392, "learning_rate": 4.998513619618036e-06, "loss": 0.2356, "step": 232 }, { "epoch": 0.06073045971394129, "grad_norm": 1.3774465938356697, "learning_rate": 4.998438379446077e-06, "loss": 0.2357, "step": 233 }, { "epoch": 0.060991105463786534, "grad_norm": 1.2532373052281334, "learning_rate": 4.998361282275311e-06, "loss": 0.2219, "step": 234 }, { "epoch": 0.06125175121363177, "grad_norm": 1.217048599059285, "learning_rate": 4.998282328163043e-06, "loss": 0.231, "step": 235 }, { "epoch": 0.06151239696347702, "grad_norm": 1.253221618832272, "learning_rate": 4.998201517167956e-06, "loss": 0.2348, "step": 236 }, { "epoch": 0.061773042713322256, "grad_norm": 1.2211369770229268, "learning_rate": 4.998118849350114e-06, "loss": 0.2194, "step": 237 }, { "epoch": 0.0620336884631675, "grad_norm": 1.3270102679771774, "learning_rate": 4.998034324770962e-06, "loss": 0.2274, "step": 238 }, { "epoch": 0.06229433421301274, "grad_norm": 1.2654898310715428, "learning_rate": 4.997947943493322e-06, "loss": 0.2152, "step": 239 }, { "epoch": 0.06255497996285798, "grad_norm": 1.2751832492411945, "learning_rate": 4.997859705581399e-06, "loss": 0.2286, "step": 240 }, { "epoch": 0.06281562571270322, "grad_norm": 1.2635949123796655, "learning_rate": 4.997769611100779e-06, "loss": 0.2203, "step": 241 }, { "epoch": 0.06307627146254846, "grad_norm": 1.2575405390620424, "learning_rate": 4.997677660118423e-06, "loss": 0.2223, "step": 242 }, { "epoch": 0.0633369172123937, "grad_norm": 1.2474265797869604, "learning_rate": 4.997583852702675e-06, "loss": 0.2093, "step": 243 }, { "epoch": 0.06359756296223895, "grad_norm": 1.2604079411003983, "learning_rate": 4.997488188923262e-06, "loss": 0.2198, "step": 244 }, { "epoch": 0.06385820871208418, "grad_norm": 1.3023859128675233, "learning_rate": 4.997390668851284e-06, "loss": 0.2409, "step": 245 }, { "epoch": 0.06411885446192943, "grad_norm": 1.1846176889964994, "learning_rate": 4.9972912925592245e-06, "loss": 0.228, "step": 246 }, { "epoch": 0.06437950021177467, "grad_norm": 1.1496372351679327, "learning_rate": 4.997190060120948e-06, "loss": 0.2089, "step": 247 }, { "epoch": 0.06464014596161992, "grad_norm": 1.1284262850848807, "learning_rate": 4.997086971611696e-06, "loss": 0.2118, "step": 248 }, { "epoch": 0.06490079171146515, "grad_norm": 1.200641956541452, "learning_rate": 4.996982027108091e-06, "loss": 0.2239, "step": 249 }, { "epoch": 0.0651614374613104, "grad_norm": 1.2263054442386825, "learning_rate": 4.996875226688133e-06, "loss": 0.2221, "step": 250 }, { "epoch": 0.06542208321115564, "grad_norm": 1.2582351422814173, "learning_rate": 4.996766570431203e-06, "loss": 0.2286, "step": 251 }, { "epoch": 0.06568272896100089, "grad_norm": 1.2646707902685472, "learning_rate": 4.996656058418064e-06, "loss": 0.2319, "step": 252 }, { "epoch": 0.06594337471084612, "grad_norm": 1.186657583431584, "learning_rate": 4.996543690730852e-06, "loss": 0.2212, "step": 253 }, { "epoch": 0.06620402046069136, "grad_norm": 1.1765330093828443, "learning_rate": 4.996429467453088e-06, "loss": 0.2091, "step": 254 }, { "epoch": 0.06646466621053661, "grad_norm": 1.2283234343513816, "learning_rate": 4.99631338866967e-06, "loss": 0.2087, "step": 255 }, { "epoch": 0.06672531196038185, "grad_norm": 1.1869037648086798, "learning_rate": 4.996195454466873e-06, "loss": 0.2176, "step": 256 }, { "epoch": 0.06698595771022708, "grad_norm": 1.2874988928604547, "learning_rate": 4.996075664932356e-06, "loss": 0.2296, "step": 257 }, { "epoch": 0.06724660346007233, "grad_norm": 1.1859938933555394, "learning_rate": 4.995954020155153e-06, "loss": 0.2225, "step": 258 }, { "epoch": 0.06750724920991757, "grad_norm": 1.2141827460540071, "learning_rate": 4.9958305202256795e-06, "loss": 0.2309, "step": 259 }, { "epoch": 0.0677678949597628, "grad_norm": 1.0839580954510446, "learning_rate": 4.995705165235726e-06, "loss": 0.2101, "step": 260 }, { "epoch": 0.06802854070960805, "grad_norm": 1.227646078268069, "learning_rate": 4.995577955278465e-06, "loss": 0.2319, "step": 261 }, { "epoch": 0.0682891864594533, "grad_norm": 1.1521030819959475, "learning_rate": 4.995448890448449e-06, "loss": 0.2095, "step": 262 }, { "epoch": 0.06854983220929854, "grad_norm": 1.2901517295914369, "learning_rate": 4.995317970841605e-06, "loss": 0.2313, "step": 263 }, { "epoch": 0.06881047795914377, "grad_norm": 1.1585627619243482, "learning_rate": 4.995185196555242e-06, "loss": 0.2168, "step": 264 }, { "epoch": 0.06907112370898902, "grad_norm": 1.2024777833572844, "learning_rate": 4.9950505676880455e-06, "loss": 0.2205, "step": 265 }, { "epoch": 0.06933176945883426, "grad_norm": 1.246622502337289, "learning_rate": 4.994914084340082e-06, "loss": 0.2032, "step": 266 }, { "epoch": 0.06959241520867951, "grad_norm": 1.2933550764673314, "learning_rate": 4.994775746612792e-06, "loss": 0.2342, "step": 267 }, { "epoch": 0.06985306095852474, "grad_norm": 1.1882940782687053, "learning_rate": 4.994635554608999e-06, "loss": 0.2225, "step": 268 }, { "epoch": 0.07011370670836999, "grad_norm": 1.3468293703147427, "learning_rate": 4.9944935084329015e-06, "loss": 0.2272, "step": 269 }, { "epoch": 0.07037435245821523, "grad_norm": 1.117603116103112, "learning_rate": 4.994349608190079e-06, "loss": 0.2154, "step": 270 }, { "epoch": 0.07063499820806048, "grad_norm": 1.256786393471711, "learning_rate": 4.994203853987485e-06, "loss": 0.2374, "step": 271 }, { "epoch": 0.07089564395790571, "grad_norm": 1.2432582035418598, "learning_rate": 4.994056245933454e-06, "loss": 0.2299, "step": 272 }, { "epoch": 0.07115628970775095, "grad_norm": 1.2390454209234303, "learning_rate": 4.9939067841376985e-06, "loss": 0.2312, "step": 273 }, { "epoch": 0.0714169354575962, "grad_norm": 1.3100812183156436, "learning_rate": 4.993755468711308e-06, "loss": 0.2071, "step": 274 }, { "epoch": 0.07167758120744143, "grad_norm": 1.1582716265370592, "learning_rate": 4.99360229976675e-06, "loss": 0.2113, "step": 275 }, { "epoch": 0.07193822695728667, "grad_norm": 1.2602904525716598, "learning_rate": 4.993447277417867e-06, "loss": 0.2137, "step": 276 }, { "epoch": 0.07219887270713192, "grad_norm": 1.168236462511693, "learning_rate": 4.993290401779886e-06, "loss": 0.2287, "step": 277 }, { "epoch": 0.07245951845697716, "grad_norm": 1.2251705852237686, "learning_rate": 4.993131672969402e-06, "loss": 0.2228, "step": 278 }, { "epoch": 0.0727201642068224, "grad_norm": 1.2503971365537208, "learning_rate": 4.992971091104396e-06, "loss": 0.2157, "step": 279 }, { "epoch": 0.07298080995666764, "grad_norm": 1.3344183239399459, "learning_rate": 4.992808656304221e-06, "loss": 0.2291, "step": 280 }, { "epoch": 0.07324145570651289, "grad_norm": 1.326298165005528, "learning_rate": 4.99264436868961e-06, "loss": 0.2257, "step": 281 }, { "epoch": 0.07350210145635813, "grad_norm": 1.2409323308527875, "learning_rate": 4.99247822838267e-06, "loss": 0.2344, "step": 282 }, { "epoch": 0.07376274720620336, "grad_norm": 1.4172911641950523, "learning_rate": 4.9923102355068895e-06, "loss": 0.2205, "step": 283 }, { "epoch": 0.07402339295604861, "grad_norm": 1.2090357655353212, "learning_rate": 4.99214039018713e-06, "loss": 0.2062, "step": 284 }, { "epoch": 0.07428403870589385, "grad_norm": 1.143669924967448, "learning_rate": 4.991968692549632e-06, "loss": 0.2092, "step": 285 }, { "epoch": 0.0745446844557391, "grad_norm": 1.3423635285752378, "learning_rate": 4.991795142722012e-06, "loss": 0.2287, "step": 286 }, { "epoch": 0.07480533020558433, "grad_norm": 1.134451884935925, "learning_rate": 4.991619740833263e-06, "loss": 0.2253, "step": 287 }, { "epoch": 0.07506597595542958, "grad_norm": 1.4042087467803073, "learning_rate": 4.9914424870137565e-06, "loss": 0.2505, "step": 288 }, { "epoch": 0.07532662170527482, "grad_norm": 1.403682234588897, "learning_rate": 4.991263381395236e-06, "loss": 0.2171, "step": 289 }, { "epoch": 0.07558726745512007, "grad_norm": 1.2080895909944345, "learning_rate": 4.991082424110826e-06, "loss": 0.2082, "step": 290 }, { "epoch": 0.0758479132049653, "grad_norm": 1.3716429157764367, "learning_rate": 4.9908996152950266e-06, "loss": 0.2198, "step": 291 }, { "epoch": 0.07610855895481054, "grad_norm": 1.1632346690430284, "learning_rate": 4.990714955083709e-06, "loss": 0.2206, "step": 292 }, { "epoch": 0.07636920470465579, "grad_norm": 1.2327830612924675, "learning_rate": 4.990528443614129e-06, "loss": 0.2242, "step": 293 }, { "epoch": 0.07662985045450102, "grad_norm": 1.344058854189743, "learning_rate": 4.9903400810249116e-06, "loss": 0.2017, "step": 294 }, { "epoch": 0.07689049620434626, "grad_norm": 1.1572099558916, "learning_rate": 4.99014986745606e-06, "loss": 0.2288, "step": 295 }, { "epoch": 0.07715114195419151, "grad_norm": 1.238107486803331, "learning_rate": 4.9899578030489534e-06, "loss": 0.217, "step": 296 }, { "epoch": 0.07741178770403676, "grad_norm": 1.238729597939622, "learning_rate": 4.989763887946346e-06, "loss": 0.2251, "step": 297 }, { "epoch": 0.07767243345388199, "grad_norm": 1.1344150108021531, "learning_rate": 4.9895681222923685e-06, "loss": 0.2143, "step": 298 }, { "epoch": 0.07793307920372723, "grad_norm": 1.3038038599539603, "learning_rate": 4.989370506232525e-06, "loss": 0.2246, "step": 299 }, { "epoch": 0.07819372495357248, "grad_norm": 1.2207855748567653, "learning_rate": 4.989171039913698e-06, "loss": 0.2275, "step": 300 }, { "epoch": 0.07845437070341772, "grad_norm": 1.2122611201596998, "learning_rate": 4.988969723484142e-06, "loss": 0.222, "step": 301 }, { "epoch": 0.07871501645326295, "grad_norm": 1.2700618792582374, "learning_rate": 4.9887665570934905e-06, "loss": 0.2263, "step": 302 }, { "epoch": 0.0789756622031082, "grad_norm": 1.2006038114534607, "learning_rate": 4.988561540892748e-06, "loss": 0.2173, "step": 303 }, { "epoch": 0.07923630795295344, "grad_norm": 1.2021006948482258, "learning_rate": 4.988354675034296e-06, "loss": 0.2148, "step": 304 }, { "epoch": 0.07949695370279869, "grad_norm": 1.1204479093674418, "learning_rate": 4.98814595967189e-06, "loss": 0.2005, "step": 305 }, { "epoch": 0.07975759945264392, "grad_norm": 1.3059911211274469, "learning_rate": 4.987935394960661e-06, "loss": 0.2218, "step": 306 }, { "epoch": 0.08001824520248917, "grad_norm": 1.2326392638258832, "learning_rate": 4.9877229810571145e-06, "loss": 0.2184, "step": 307 }, { "epoch": 0.08027889095233441, "grad_norm": 1.1660040545123636, "learning_rate": 4.98750871811913e-06, "loss": 0.2191, "step": 308 }, { "epoch": 0.08053953670217966, "grad_norm": 1.2836375542745249, "learning_rate": 4.98729260630596e-06, "loss": 0.225, "step": 309 }, { "epoch": 0.08080018245202489, "grad_norm": 1.2208819342103603, "learning_rate": 4.987074645778234e-06, "loss": 0.2243, "step": 310 }, { "epoch": 0.08106082820187013, "grad_norm": 1.22022101040841, "learning_rate": 4.986854836697953e-06, "loss": 0.2062, "step": 311 }, { "epoch": 0.08132147395171538, "grad_norm": 1.2528896592554655, "learning_rate": 4.986633179228495e-06, "loss": 0.2189, "step": 312 }, { "epoch": 0.08158211970156061, "grad_norm": 1.2442459785721947, "learning_rate": 4.986409673534609e-06, "loss": 0.2207, "step": 313 }, { "epoch": 0.08184276545140586, "grad_norm": 1.2190435806736082, "learning_rate": 4.986184319782418e-06, "loss": 0.2164, "step": 314 }, { "epoch": 0.0821034112012511, "grad_norm": 1.1858619191022657, "learning_rate": 4.98595711813942e-06, "loss": 0.2101, "step": 315 }, { "epoch": 0.08236405695109635, "grad_norm": 1.2660834690815714, "learning_rate": 4.9857280687744856e-06, "loss": 0.2223, "step": 316 }, { "epoch": 0.08262470270094158, "grad_norm": 1.211465935020055, "learning_rate": 4.98549717185786e-06, "loss": 0.2219, "step": 317 }, { "epoch": 0.08288534845078682, "grad_norm": 1.1001770983799184, "learning_rate": 4.985264427561158e-06, "loss": 0.2183, "step": 318 }, { "epoch": 0.08314599420063207, "grad_norm": 1.2319060929665375, "learning_rate": 4.985029836057372e-06, "loss": 0.2303, "step": 319 }, { "epoch": 0.08340663995047731, "grad_norm": 1.1203902694665382, "learning_rate": 4.984793397520865e-06, "loss": 0.1969, "step": 320 }, { "epoch": 0.08366728570032254, "grad_norm": 1.129233306617484, "learning_rate": 4.984555112127373e-06, "loss": 0.2051, "step": 321 }, { "epoch": 0.08392793145016779, "grad_norm": 1.1170576167633055, "learning_rate": 4.984314980054005e-06, "loss": 0.2136, "step": 322 }, { "epoch": 0.08418857720001303, "grad_norm": 1.0873933591656604, "learning_rate": 4.984073001479241e-06, "loss": 0.2058, "step": 323 }, { "epoch": 0.08444922294985828, "grad_norm": 1.1611020342561413, "learning_rate": 4.983829176582939e-06, "loss": 0.2195, "step": 324 }, { "epoch": 0.08470986869970351, "grad_norm": 1.1115049222755184, "learning_rate": 4.98358350554632e-06, "loss": 0.2179, "step": 325 }, { "epoch": 0.08497051444954876, "grad_norm": 1.1465609118747304, "learning_rate": 4.983335988551986e-06, "loss": 0.2114, "step": 326 }, { "epoch": 0.085231160199394, "grad_norm": 1.1201039351755278, "learning_rate": 4.983086625783907e-06, "loss": 0.2045, "step": 327 }, { "epoch": 0.08549180594923925, "grad_norm": 1.200681296018806, "learning_rate": 4.982835417427424e-06, "loss": 0.2174, "step": 328 }, { "epoch": 0.08575245169908448, "grad_norm": 1.125701151702069, "learning_rate": 4.982582363669252e-06, "loss": 0.2037, "step": 329 }, { "epoch": 0.08601309744892972, "grad_norm": 1.1814394486799351, "learning_rate": 4.982327464697476e-06, "loss": 0.2148, "step": 330 }, { "epoch": 0.08627374319877497, "grad_norm": 1.1486309665399745, "learning_rate": 4.982070720701554e-06, "loss": 0.2247, "step": 331 }, { "epoch": 0.0865343889486202, "grad_norm": 1.1270742475371276, "learning_rate": 4.981812131872315e-06, "loss": 0.2153, "step": 332 }, { "epoch": 0.08679503469846545, "grad_norm": 1.1477941426002427, "learning_rate": 4.981551698401956e-06, "loss": 0.2169, "step": 333 }, { "epoch": 0.08705568044831069, "grad_norm": 1.1907142055750248, "learning_rate": 4.981289420484051e-06, "loss": 0.2162, "step": 334 }, { "epoch": 0.08731632619815594, "grad_norm": 1.1522139733240349, "learning_rate": 4.981025298313541e-06, "loss": 0.2076, "step": 335 }, { "epoch": 0.08757697194800117, "grad_norm": 1.0711263390095467, "learning_rate": 4.980759332086736e-06, "loss": 0.2066, "step": 336 }, { "epoch": 0.08783761769784641, "grad_norm": 1.125336944751074, "learning_rate": 4.980491522001322e-06, "loss": 0.2225, "step": 337 }, { "epoch": 0.08809826344769166, "grad_norm": 1.1857523710720335, "learning_rate": 4.980221868256351e-06, "loss": 0.2022, "step": 338 }, { "epoch": 0.0883589091975369, "grad_norm": 1.13051619975769, "learning_rate": 4.979950371052248e-06, "loss": 0.203, "step": 339 }, { "epoch": 0.08861955494738213, "grad_norm": 1.1275797506804155, "learning_rate": 4.9796770305908045e-06, "loss": 0.2206, "step": 340 }, { "epoch": 0.08888020069722738, "grad_norm": 1.1436065277862681, "learning_rate": 4.979401847075188e-06, "loss": 0.2174, "step": 341 }, { "epoch": 0.08914084644707262, "grad_norm": 1.144011534219575, "learning_rate": 4.979124820709931e-06, "loss": 0.2032, "step": 342 }, { "epoch": 0.08940149219691787, "grad_norm": 1.1383788627338367, "learning_rate": 4.978845951700936e-06, "loss": 0.1987, "step": 343 }, { "epoch": 0.0896621379467631, "grad_norm": 1.111439494622931, "learning_rate": 4.978565240255477e-06, "loss": 0.2024, "step": 344 }, { "epoch": 0.08992278369660835, "grad_norm": 1.1109295453722348, "learning_rate": 4.9782826865821974e-06, "loss": 0.2182, "step": 345 }, { "epoch": 0.09018342944645359, "grad_norm": 1.1270691476525767, "learning_rate": 4.977998290891109e-06, "loss": 0.2202, "step": 346 }, { "epoch": 0.09044407519629884, "grad_norm": 1.1191240157319848, "learning_rate": 4.977712053393593e-06, "loss": 0.2212, "step": 347 }, { "epoch": 0.09070472094614407, "grad_norm": 1.302482922154109, "learning_rate": 4.9774239743023975e-06, "loss": 0.2236, "step": 348 }, { "epoch": 0.09096536669598931, "grad_norm": 1.1949101133331321, "learning_rate": 4.977134053831642e-06, "loss": 0.2245, "step": 349 }, { "epoch": 0.09122601244583456, "grad_norm": 1.1584883863415452, "learning_rate": 4.976842292196817e-06, "loss": 0.216, "step": 350 }, { "epoch": 0.09148665819567979, "grad_norm": 1.2179632569921017, "learning_rate": 4.976548689614773e-06, "loss": 0.2207, "step": 351 }, { "epoch": 0.09174730394552504, "grad_norm": 1.1314536772385082, "learning_rate": 4.9762532463037385e-06, "loss": 0.2094, "step": 352 }, { "epoch": 0.09200794969537028, "grad_norm": 1.1619718547204847, "learning_rate": 4.975955962483304e-06, "loss": 0.1985, "step": 353 }, { "epoch": 0.09226859544521553, "grad_norm": 1.1310890892164964, "learning_rate": 4.97565683837443e-06, "loss": 0.2106, "step": 354 }, { "epoch": 0.09252924119506076, "grad_norm": 1.0804600522489038, "learning_rate": 4.9753558741994446e-06, "loss": 0.2146, "step": 355 }, { "epoch": 0.092789886944906, "grad_norm": 1.0966154509877462, "learning_rate": 4.9750530701820446e-06, "loss": 0.2082, "step": 356 }, { "epoch": 0.09305053269475125, "grad_norm": 1.1626673092566886, "learning_rate": 4.974748426547291e-06, "loss": 0.2021, "step": 357 }, { "epoch": 0.0933111784445965, "grad_norm": 1.1104951443161037, "learning_rate": 4.974441943521616e-06, "loss": 0.2081, "step": 358 }, { "epoch": 0.09357182419444172, "grad_norm": 1.2414392038211457, "learning_rate": 4.974133621332818e-06, "loss": 0.2225, "step": 359 }, { "epoch": 0.09383246994428697, "grad_norm": 1.2352749318031297, "learning_rate": 4.9738234602100605e-06, "loss": 0.2299, "step": 360 }, { "epoch": 0.09409311569413221, "grad_norm": 1.2490101297316876, "learning_rate": 4.973511460383875e-06, "loss": 0.2256, "step": 361 }, { "epoch": 0.09435376144397746, "grad_norm": 1.1543751134800784, "learning_rate": 4.97319762208616e-06, "loss": 0.2165, "step": 362 }, { "epoch": 0.09461440719382269, "grad_norm": 1.0702828231587727, "learning_rate": 4.972881945550181e-06, "loss": 0.2063, "step": 363 }, { "epoch": 0.09487505294366794, "grad_norm": 1.102250313940339, "learning_rate": 4.972564431010567e-06, "loss": 0.2051, "step": 364 }, { "epoch": 0.09513569869351318, "grad_norm": 1.1808957156883473, "learning_rate": 4.972245078703317e-06, "loss": 0.2044, "step": 365 }, { "epoch": 0.09539634444335843, "grad_norm": 1.08507132684555, "learning_rate": 4.971923888865792e-06, "loss": 0.2153, "step": 366 }, { "epoch": 0.09565699019320366, "grad_norm": 1.2593630764481016, "learning_rate": 4.971600861736723e-06, "loss": 0.2199, "step": 367 }, { "epoch": 0.0959176359430489, "grad_norm": 1.1599106014830074, "learning_rate": 4.971275997556203e-06, "loss": 0.2199, "step": 368 }, { "epoch": 0.09617828169289415, "grad_norm": 1.2167983421820492, "learning_rate": 4.970949296565693e-06, "loss": 0.2147, "step": 369 }, { "epoch": 0.09643892744273938, "grad_norm": 1.1036232127219445, "learning_rate": 4.970620759008015e-06, "loss": 0.2082, "step": 370 }, { "epoch": 0.09669957319258463, "grad_norm": 1.1415586091886185, "learning_rate": 4.970290385127363e-06, "loss": 0.232, "step": 371 }, { "epoch": 0.09696021894242987, "grad_norm": 1.1593850854805008, "learning_rate": 4.969958175169291e-06, "loss": 0.2226, "step": 372 }, { "epoch": 0.09722086469227512, "grad_norm": 1.1304188956435166, "learning_rate": 4.9696241293807155e-06, "loss": 0.2153, "step": 373 }, { "epoch": 0.09748151044212035, "grad_norm": 1.166831187252501, "learning_rate": 4.969288248009924e-06, "loss": 0.2109, "step": 374 }, { "epoch": 0.09774215619196559, "grad_norm": 1.1121955844140692, "learning_rate": 4.968950531306564e-06, "loss": 0.1975, "step": 375 }, { "epoch": 0.09800280194181084, "grad_norm": 1.1365559136119268, "learning_rate": 4.968610979521647e-06, "loss": 0.2259, "step": 376 }, { "epoch": 0.09826344769165608, "grad_norm": 1.2406619428890189, "learning_rate": 4.968269592907552e-06, "loss": 0.2196, "step": 377 }, { "epoch": 0.09852409344150131, "grad_norm": 1.1382032683820376, "learning_rate": 4.967926371718017e-06, "loss": 0.2145, "step": 378 }, { "epoch": 0.09878473919134656, "grad_norm": 1.195220873204447, "learning_rate": 4.967581316208147e-06, "loss": 0.2077, "step": 379 }, { "epoch": 0.0990453849411918, "grad_norm": 1.0935629440884256, "learning_rate": 4.96723442663441e-06, "loss": 0.1936, "step": 380 }, { "epoch": 0.09930603069103705, "grad_norm": 1.2658058282891576, "learning_rate": 4.966885703254634e-06, "loss": 0.2169, "step": 381 }, { "epoch": 0.09956667644088228, "grad_norm": 1.2674714522423611, "learning_rate": 4.966535146328014e-06, "loss": 0.2114, "step": 382 }, { "epoch": 0.09982732219072753, "grad_norm": 1.1852683387441327, "learning_rate": 4.966182756115107e-06, "loss": 0.2174, "step": 383 }, { "epoch": 0.10008796794057277, "grad_norm": 1.0859611501069677, "learning_rate": 4.965828532877831e-06, "loss": 0.1984, "step": 384 }, { "epoch": 0.100348613690418, "grad_norm": 1.1804965603908564, "learning_rate": 4.965472476879467e-06, "loss": 0.1906, "step": 385 }, { "epoch": 0.10060925944026325, "grad_norm": 1.118392712271162, "learning_rate": 4.96511458838466e-06, "loss": 0.2148, "step": 386 }, { "epoch": 0.1008699051901085, "grad_norm": 1.0317324589372456, "learning_rate": 4.964754867659413e-06, "loss": 0.209, "step": 387 }, { "epoch": 0.10113055093995374, "grad_norm": 1.2103908566297117, "learning_rate": 4.964393314971096e-06, "loss": 0.2062, "step": 388 }, { "epoch": 0.10139119668979897, "grad_norm": 1.0210671805600802, "learning_rate": 4.9640299305884365e-06, "loss": 0.2083, "step": 389 }, { "epoch": 0.10165184243964422, "grad_norm": 1.1092853582521347, "learning_rate": 4.963664714781525e-06, "loss": 0.213, "step": 390 }, { "epoch": 0.10191248818948946, "grad_norm": 1.1790184862667987, "learning_rate": 4.963297667821814e-06, "loss": 0.216, "step": 391 }, { "epoch": 0.1021731339393347, "grad_norm": 1.1596692190808229, "learning_rate": 4.962928789982117e-06, "loss": 0.1879, "step": 392 }, { "epoch": 0.10243377968917994, "grad_norm": 1.150600136691263, "learning_rate": 4.962558081536604e-06, "loss": 0.1978, "step": 393 }, { "epoch": 0.10269442543902518, "grad_norm": 1.160322888236028, "learning_rate": 4.9621855427608134e-06, "loss": 0.23, "step": 394 }, { "epoch": 0.10295507118887043, "grad_norm": 1.2203011261176657, "learning_rate": 4.9618111739316366e-06, "loss": 0.2152, "step": 395 }, { "epoch": 0.10321571693871567, "grad_norm": 1.245708468388321, "learning_rate": 4.961434975327331e-06, "loss": 0.2207, "step": 396 }, { "epoch": 0.1034763626885609, "grad_norm": 1.0851747833620913, "learning_rate": 4.961056947227509e-06, "loss": 0.2021, "step": 397 }, { "epoch": 0.10373700843840615, "grad_norm": 1.1365684130374432, "learning_rate": 4.960677089913146e-06, "loss": 0.2032, "step": 398 }, { "epoch": 0.1039976541882514, "grad_norm": 1.1247048982809813, "learning_rate": 4.960295403666578e-06, "loss": 0.2179, "step": 399 }, { "epoch": 0.10425829993809664, "grad_norm": 1.0468433458203552, "learning_rate": 4.959911888771496e-06, "loss": 0.1914, "step": 400 }, { "epoch": 0.10451894568794187, "grad_norm": 1.0595925365095717, "learning_rate": 4.9595265455129544e-06, "loss": 0.1998, "step": 401 }, { "epoch": 0.10477959143778712, "grad_norm": 1.0774057042521508, "learning_rate": 4.959139374177364e-06, "loss": 0.204, "step": 402 }, { "epoch": 0.10504023718763236, "grad_norm": 1.0445629585816503, "learning_rate": 4.958750375052496e-06, "loss": 0.201, "step": 403 }, { "epoch": 0.1053008829374776, "grad_norm": 1.2217159963884687, "learning_rate": 4.958359548427478e-06, "loss": 0.2235, "step": 404 }, { "epoch": 0.10556152868732284, "grad_norm": 1.1522607047038955, "learning_rate": 4.957966894592799e-06, "loss": 0.2249, "step": 405 }, { "epoch": 0.10582217443716808, "grad_norm": 1.1084866959072464, "learning_rate": 4.957572413840303e-06, "loss": 0.1905, "step": 406 }, { "epoch": 0.10608282018701333, "grad_norm": 1.163241741958231, "learning_rate": 4.957176106463194e-06, "loss": 0.2248, "step": 407 }, { "epoch": 0.10634346593685856, "grad_norm": 1.0746376465935439, "learning_rate": 4.956777972756033e-06, "loss": 0.219, "step": 408 }, { "epoch": 0.1066041116867038, "grad_norm": 1.0540172424214922, "learning_rate": 4.956378013014738e-06, "loss": 0.1997, "step": 409 }, { "epoch": 0.10686475743654905, "grad_norm": 1.1233823661058004, "learning_rate": 4.955976227536584e-06, "loss": 0.2071, "step": 410 }, { "epoch": 0.1071254031863943, "grad_norm": 1.1420225823494352, "learning_rate": 4.955572616620205e-06, "loss": 0.2163, "step": 411 }, { "epoch": 0.10738604893623953, "grad_norm": 1.1095711457769295, "learning_rate": 4.95516718056559e-06, "loss": 0.2084, "step": 412 }, { "epoch": 0.10764669468608477, "grad_norm": 1.1754205141773715, "learning_rate": 4.9547599196740844e-06, "loss": 0.2051, "step": 413 }, { "epoch": 0.10790734043593002, "grad_norm": 1.1358680925982534, "learning_rate": 4.95435083424839e-06, "loss": 0.1955, "step": 414 }, { "epoch": 0.10816798618577526, "grad_norm": 1.0621187861609125, "learning_rate": 4.953939924592567e-06, "loss": 0.2011, "step": 415 }, { "epoch": 0.1084286319356205, "grad_norm": 1.1953354907137688, "learning_rate": 4.953527191012029e-06, "loss": 0.2064, "step": 416 }, { "epoch": 0.10868927768546574, "grad_norm": 1.1255289043722554, "learning_rate": 4.953112633813544e-06, "loss": 0.1937, "step": 417 }, { "epoch": 0.10894992343531099, "grad_norm": 1.1012979853029599, "learning_rate": 4.95269625330524e-06, "loss": 0.2068, "step": 418 }, { "epoch": 0.10921056918515623, "grad_norm": 1.2798326780113025, "learning_rate": 4.952278049796596e-06, "loss": 0.2372, "step": 419 }, { "epoch": 0.10947121493500146, "grad_norm": 1.168251348063635, "learning_rate": 4.951858023598448e-06, "loss": 0.2166, "step": 420 }, { "epoch": 0.10973186068484671, "grad_norm": 1.057260379520718, "learning_rate": 4.951436175022987e-06, "loss": 0.2091, "step": 421 }, { "epoch": 0.10999250643469195, "grad_norm": 1.1053287863663124, "learning_rate": 4.951012504383756e-06, "loss": 0.2125, "step": 422 }, { "epoch": 0.11025315218453718, "grad_norm": 1.1458584313190894, "learning_rate": 4.950587011995656e-06, "loss": 0.2214, "step": 423 }, { "epoch": 0.11051379793438243, "grad_norm": 1.0955108158948859, "learning_rate": 4.9501596981749375e-06, "loss": 0.2092, "step": 424 }, { "epoch": 0.11077444368422767, "grad_norm": 1.213427286412284, "learning_rate": 4.949730563239211e-06, "loss": 0.2028, "step": 425 }, { "epoch": 0.11103508943407292, "grad_norm": 1.1504955631240694, "learning_rate": 4.949299607507434e-06, "loss": 0.2068, "step": 426 }, { "epoch": 0.11129573518391815, "grad_norm": 1.2892064400072472, "learning_rate": 4.9488668312999215e-06, "loss": 0.2166, "step": 427 }, { "epoch": 0.1115563809337634, "grad_norm": 1.2297334314943589, "learning_rate": 4.94843223493834e-06, "loss": 0.2242, "step": 428 }, { "epoch": 0.11181702668360864, "grad_norm": 1.1041794866536292, "learning_rate": 4.94799581874571e-06, "loss": 0.2052, "step": 429 }, { "epoch": 0.11207767243345389, "grad_norm": 1.13244101278659, "learning_rate": 4.947557583046403e-06, "loss": 0.2172, "step": 430 }, { "epoch": 0.11233831818329912, "grad_norm": 1.174696924400452, "learning_rate": 4.947117528166144e-06, "loss": 0.2052, "step": 431 }, { "epoch": 0.11259896393314436, "grad_norm": 1.3065536736368641, "learning_rate": 4.94667565443201e-06, "loss": 0.2156, "step": 432 }, { "epoch": 0.11285960968298961, "grad_norm": 1.2871925566023166, "learning_rate": 4.9462319621724295e-06, "loss": 0.2228, "step": 433 }, { "epoch": 0.11312025543283485, "grad_norm": 1.0984624373913543, "learning_rate": 4.945786451717183e-06, "loss": 0.2133, "step": 434 }, { "epoch": 0.11338090118268009, "grad_norm": 1.0976652988661446, "learning_rate": 4.945339123397402e-06, "loss": 0.1969, "step": 435 }, { "epoch": 0.11364154693252533, "grad_norm": 1.3501621892625337, "learning_rate": 4.944889977545571e-06, "loss": 0.2252, "step": 436 }, { "epoch": 0.11390219268237058, "grad_norm": 1.1206408732507736, "learning_rate": 4.944439014495521e-06, "loss": 0.2089, "step": 437 }, { "epoch": 0.11416283843221582, "grad_norm": 1.1844426154580396, "learning_rate": 4.94398623458244e-06, "loss": 0.2128, "step": 438 }, { "epoch": 0.11442348418206105, "grad_norm": 1.2417830477907454, "learning_rate": 4.94353163814286e-06, "loss": 0.2162, "step": 439 }, { "epoch": 0.1146841299319063, "grad_norm": 1.1751222764032099, "learning_rate": 4.943075225514667e-06, "loss": 0.2138, "step": 440 }, { "epoch": 0.11494477568175154, "grad_norm": 1.316803592330761, "learning_rate": 4.942616997037096e-06, "loss": 0.2167, "step": 441 }, { "epoch": 0.11520542143159677, "grad_norm": 1.0847176473399318, "learning_rate": 4.942156953050733e-06, "loss": 0.1971, "step": 442 }, { "epoch": 0.11546606718144202, "grad_norm": 1.1393762312308913, "learning_rate": 4.94169509389751e-06, "loss": 0.2178, "step": 443 }, { "epoch": 0.11572671293128726, "grad_norm": 1.3346431236999603, "learning_rate": 4.94123141992071e-06, "loss": 0.1947, "step": 444 }, { "epoch": 0.11598735868113251, "grad_norm": 1.1592361436822725, "learning_rate": 4.940765931464967e-06, "loss": 0.214, "step": 445 }, { "epoch": 0.11624800443097774, "grad_norm": 1.1959646611923167, "learning_rate": 4.940298628876261e-06, "loss": 0.2057, "step": 446 }, { "epoch": 0.11650865018082299, "grad_norm": 1.2206775320875654, "learning_rate": 4.939829512501921e-06, "loss": 0.2237, "step": 447 }, { "epoch": 0.11676929593066823, "grad_norm": 1.167117848437347, "learning_rate": 4.9393585826906245e-06, "loss": 0.213, "step": 448 }, { "epoch": 0.11702994168051348, "grad_norm": 1.0924600398654587, "learning_rate": 4.938885839792395e-06, "loss": 0.1913, "step": 449 }, { "epoch": 0.11729058743035871, "grad_norm": 1.0839862621652845, "learning_rate": 4.938411284158608e-06, "loss": 0.2036, "step": 450 }, { "epoch": 0.11755123318020395, "grad_norm": 1.1137143704961665, "learning_rate": 4.9379349161419795e-06, "loss": 0.2285, "step": 451 }, { "epoch": 0.1178118789300492, "grad_norm": 1.2204619931394156, "learning_rate": 4.937456736096581e-06, "loss": 0.2237, "step": 452 }, { "epoch": 0.11807252467989444, "grad_norm": 1.263048397924981, "learning_rate": 4.936976744377824e-06, "loss": 0.2169, "step": 453 }, { "epoch": 0.11833317042973968, "grad_norm": 1.1334227850099623, "learning_rate": 4.936494941342469e-06, "loss": 0.2134, "step": 454 }, { "epoch": 0.11859381617958492, "grad_norm": 1.0863793620867286, "learning_rate": 4.9360113273486235e-06, "loss": 0.2211, "step": 455 }, { "epoch": 0.11885446192943017, "grad_norm": 1.15593221038835, "learning_rate": 4.93552590275574e-06, "loss": 0.2095, "step": 456 }, { "epoch": 0.11911510767927541, "grad_norm": 1.219452276983786, "learning_rate": 4.935038667924617e-06, "loss": 0.2076, "step": 457 }, { "epoch": 0.11937575342912064, "grad_norm": 1.3708807115879806, "learning_rate": 4.934549623217399e-06, "loss": 0.2288, "step": 458 }, { "epoch": 0.11963639917896589, "grad_norm": 1.2731341036300525, "learning_rate": 4.934058768997573e-06, "loss": 0.2058, "step": 459 }, { "epoch": 0.11989704492881113, "grad_norm": 1.141404003852532, "learning_rate": 4.9335661056299755e-06, "loss": 0.2091, "step": 460 }, { "epoch": 0.12015769067865636, "grad_norm": 1.2117629214506533, "learning_rate": 4.933071633480785e-06, "loss": 0.2021, "step": 461 }, { "epoch": 0.12041833642850161, "grad_norm": 1.0219039368459546, "learning_rate": 4.932575352917524e-06, "loss": 0.1868, "step": 462 }, { "epoch": 0.12067898217834686, "grad_norm": 1.1310252191072823, "learning_rate": 4.932077264309062e-06, "loss": 0.2069, "step": 463 }, { "epoch": 0.1209396279281921, "grad_norm": 1.2465241712420914, "learning_rate": 4.931577368025607e-06, "loss": 0.2101, "step": 464 }, { "epoch": 0.12120027367803733, "grad_norm": 1.1819138975825272, "learning_rate": 4.9310756644387155e-06, "loss": 0.2041, "step": 465 }, { "epoch": 0.12146091942788258, "grad_norm": 1.0587980253115248, "learning_rate": 4.930572153921287e-06, "loss": 0.1973, "step": 466 }, { "epoch": 0.12172156517772782, "grad_norm": 1.077945562883169, "learning_rate": 4.93006683684756e-06, "loss": 0.1925, "step": 467 }, { "epoch": 0.12198221092757307, "grad_norm": 1.1174849877101407, "learning_rate": 4.92955971359312e-06, "loss": 0.2032, "step": 468 }, { "epoch": 0.1222428566774183, "grad_norm": 1.0730944011160337, "learning_rate": 4.929050784534892e-06, "loss": 0.198, "step": 469 }, { "epoch": 0.12250350242726354, "grad_norm": 1.0738778056777263, "learning_rate": 4.928540050051146e-06, "loss": 0.1881, "step": 470 }, { "epoch": 0.12276414817710879, "grad_norm": 1.086321234408292, "learning_rate": 4.928027510521491e-06, "loss": 0.1874, "step": 471 }, { "epoch": 0.12302479392695403, "grad_norm": 1.1953765980425175, "learning_rate": 4.927513166326881e-06, "loss": 0.2023, "step": 472 }, { "epoch": 0.12328543967679927, "grad_norm": 1.26780644753853, "learning_rate": 4.926997017849609e-06, "loss": 0.2146, "step": 473 }, { "epoch": 0.12354608542664451, "grad_norm": 1.0533043926784156, "learning_rate": 4.9264790654733076e-06, "loss": 0.1958, "step": 474 }, { "epoch": 0.12380673117648976, "grad_norm": 1.168530454740135, "learning_rate": 4.925959309582954e-06, "loss": 0.2018, "step": 475 }, { "epoch": 0.124067376926335, "grad_norm": 1.1577175366504873, "learning_rate": 4.925437750564863e-06, "loss": 0.1938, "step": 476 }, { "epoch": 0.12432802267618023, "grad_norm": 1.0937153093946255, "learning_rate": 4.924914388806691e-06, "loss": 0.2092, "step": 477 }, { "epoch": 0.12458866842602548, "grad_norm": 1.0596091220758854, "learning_rate": 4.924389224697433e-06, "loss": 0.2134, "step": 478 }, { "epoch": 0.12484931417587072, "grad_norm": 1.0672685313346664, "learning_rate": 4.923862258627426e-06, "loss": 0.1949, "step": 479 }, { "epoch": 0.12510995992571597, "grad_norm": 1.0866112741325737, "learning_rate": 4.923333490988343e-06, "loss": 0.2174, "step": 480 }, { "epoch": 0.12537060567556121, "grad_norm": 1.110499753500649, "learning_rate": 4.9228029221731995e-06, "loss": 0.2121, "step": 481 }, { "epoch": 0.12563125142540643, "grad_norm": 1.1322270530429546, "learning_rate": 4.922270552576347e-06, "loss": 0.1985, "step": 482 }, { "epoch": 0.12589189717525168, "grad_norm": 1.086601132849964, "learning_rate": 4.921736382593477e-06, "loss": 0.2162, "step": 483 }, { "epoch": 0.12615254292509692, "grad_norm": 1.1586142029694844, "learning_rate": 4.921200412621619e-06, "loss": 0.1971, "step": 484 }, { "epoch": 0.12641318867494217, "grad_norm": 1.1069894085726864, "learning_rate": 4.920662643059139e-06, "loss": 0.2041, "step": 485 }, { "epoch": 0.1266738344247874, "grad_norm": 1.0597305263831693, "learning_rate": 4.920123074305743e-06, "loss": 0.2055, "step": 486 }, { "epoch": 0.12693448017463266, "grad_norm": 1.0428438670216424, "learning_rate": 4.919581706762472e-06, "loss": 0.2, "step": 487 }, { "epoch": 0.1271951259244779, "grad_norm": 1.1668701393837202, "learning_rate": 4.919038540831705e-06, "loss": 0.2011, "step": 488 }, { "epoch": 0.12745577167432315, "grad_norm": 1.0571379127102554, "learning_rate": 4.918493576917158e-06, "loss": 0.1977, "step": 489 }, { "epoch": 0.12771641742416837, "grad_norm": 1.0669206092592076, "learning_rate": 4.917946815423883e-06, "loss": 0.2195, "step": 490 }, { "epoch": 0.1279770631740136, "grad_norm": 1.1535443090389002, "learning_rate": 4.9173982567582674e-06, "loss": 0.2123, "step": 491 }, { "epoch": 0.12823770892385886, "grad_norm": 1.1423083449606384, "learning_rate": 4.916847901328035e-06, "loss": 0.2134, "step": 492 }, { "epoch": 0.1284983546737041, "grad_norm": 1.1970989191749175, "learning_rate": 4.9162957495422455e-06, "loss": 0.2171, "step": 493 }, { "epoch": 0.12875900042354935, "grad_norm": 1.078200345440434, "learning_rate": 4.915741801811294e-06, "loss": 0.2119, "step": 494 }, { "epoch": 0.1290196461733946, "grad_norm": 1.0937004603037532, "learning_rate": 4.915186058546908e-06, "loss": 0.1982, "step": 495 }, { "epoch": 0.12928029192323984, "grad_norm": 1.158832769377338, "learning_rate": 4.914628520162154e-06, "loss": 0.2113, "step": 496 }, { "epoch": 0.12954093767308505, "grad_norm": 1.1626344733884886, "learning_rate": 4.914069187071426e-06, "loss": 0.218, "step": 497 }, { "epoch": 0.1298015834229303, "grad_norm": 1.1832456544373953, "learning_rate": 4.913508059690461e-06, "loss": 0.2021, "step": 498 }, { "epoch": 0.13006222917277555, "grad_norm": 1.1521964852044893, "learning_rate": 4.91294513843632e-06, "loss": 0.2161, "step": 499 }, { "epoch": 0.1303228749226208, "grad_norm": 1.2357392255741413, "learning_rate": 4.912380423727405e-06, "loss": 0.1845, "step": 500 }, { "epoch": 0.1303228749226208, "eval_loss": 0.20621350407600403, "eval_runtime": 55.2894, "eval_samples_per_second": 44.873, "eval_steps_per_second": 5.625, "step": 500 }, { "epoch": 0.13058352067246604, "grad_norm": 1.1825633794019559, "learning_rate": 4.9118139159834475e-06, "loss": 0.2171, "step": 501 }, { "epoch": 0.13084416642231128, "grad_norm": 1.144229234671981, "learning_rate": 4.911245615625512e-06, "loss": 0.2145, "step": 502 }, { "epoch": 0.13110481217215653, "grad_norm": 1.1054272656525803, "learning_rate": 4.9106755230759955e-06, "loss": 0.2036, "step": 503 }, { "epoch": 0.13136545792200177, "grad_norm": 1.2101895563892755, "learning_rate": 4.910103638758627e-06, "loss": 0.2083, "step": 504 }, { "epoch": 0.131626103671847, "grad_norm": 1.2241620014041494, "learning_rate": 4.909529963098467e-06, "loss": 0.2124, "step": 505 }, { "epoch": 0.13188674942169223, "grad_norm": 1.1504414098425468, "learning_rate": 4.9089544965219095e-06, "loss": 0.1985, "step": 506 }, { "epoch": 0.13214739517153748, "grad_norm": 1.1209304700113019, "learning_rate": 4.908377239456676e-06, "loss": 0.2146, "step": 507 }, { "epoch": 0.13240804092138272, "grad_norm": 1.0525025826847803, "learning_rate": 4.907798192331821e-06, "loss": 0.196, "step": 508 }, { "epoch": 0.13266868667122797, "grad_norm": 1.1660455547358535, "learning_rate": 4.9072173555777304e-06, "loss": 0.2053, "step": 509 }, { "epoch": 0.13292933242107322, "grad_norm": 1.1719283025352758, "learning_rate": 4.90663472962612e-06, "loss": 0.2041, "step": 510 }, { "epoch": 0.13318997817091846, "grad_norm": 1.0941812075329291, "learning_rate": 4.906050314910031e-06, "loss": 0.1976, "step": 511 }, { "epoch": 0.1334506239207637, "grad_norm": 1.068628730737023, "learning_rate": 4.905464111863841e-06, "loss": 0.1969, "step": 512 }, { "epoch": 0.13371126967060892, "grad_norm": 1.1674406336653589, "learning_rate": 4.904876120923253e-06, "loss": 0.199, "step": 513 }, { "epoch": 0.13397191542045417, "grad_norm": 1.1924820961602454, "learning_rate": 4.904286342525298e-06, "loss": 0.1982, "step": 514 }, { "epoch": 0.1342325611702994, "grad_norm": 1.1622725378129894, "learning_rate": 4.903694777108337e-06, "loss": 0.1997, "step": 515 }, { "epoch": 0.13449320692014466, "grad_norm": 1.1116447352873096, "learning_rate": 4.903101425112062e-06, "loss": 0.2011, "step": 516 }, { "epoch": 0.1347538526699899, "grad_norm": 1.1326682250384315, "learning_rate": 4.902506286977486e-06, "loss": 0.2085, "step": 517 }, { "epoch": 0.13501449841983515, "grad_norm": 1.2221800579908655, "learning_rate": 4.9019093631469575e-06, "loss": 0.199, "step": 518 }, { "epoch": 0.1352751441696804, "grad_norm": 1.0915548717722814, "learning_rate": 4.901310654064145e-06, "loss": 0.2023, "step": 519 }, { "epoch": 0.1355357899195256, "grad_norm": 1.0999082489957013, "learning_rate": 4.900710160174048e-06, "loss": 0.2129, "step": 520 }, { "epoch": 0.13579643566937086, "grad_norm": 1.1022061395135934, "learning_rate": 4.900107881922994e-06, "loss": 0.2017, "step": 521 }, { "epoch": 0.1360570814192161, "grad_norm": 1.011752885683048, "learning_rate": 4.899503819758633e-06, "loss": 0.2004, "step": 522 }, { "epoch": 0.13631772716906135, "grad_norm": 0.9937016200565075, "learning_rate": 4.898897974129943e-06, "loss": 0.2057, "step": 523 }, { "epoch": 0.1365783729189066, "grad_norm": 1.05594012884413, "learning_rate": 4.898290345487226e-06, "loss": 0.1989, "step": 524 }, { "epoch": 0.13683901866875184, "grad_norm": 1.1553991709852662, "learning_rate": 4.897680934282113e-06, "loss": 0.2024, "step": 525 }, { "epoch": 0.13709966441859708, "grad_norm": 1.0457686496255811, "learning_rate": 4.897069740967554e-06, "loss": 0.1963, "step": 526 }, { "epoch": 0.13736031016844233, "grad_norm": 1.0365758729596155, "learning_rate": 4.896456765997829e-06, "loss": 0.1912, "step": 527 }, { "epoch": 0.13762095591828755, "grad_norm": 1.0704441745241242, "learning_rate": 4.89584200982854e-06, "loss": 0.1969, "step": 528 }, { "epoch": 0.1378816016681328, "grad_norm": 0.9896722615370207, "learning_rate": 4.895225472916612e-06, "loss": 0.188, "step": 529 }, { "epoch": 0.13814224741797804, "grad_norm": 1.1216888937039267, "learning_rate": 4.894607155720294e-06, "loss": 0.2207, "step": 530 }, { "epoch": 0.13840289316782328, "grad_norm": 1.4256835818154245, "learning_rate": 4.893987058699162e-06, "loss": 0.1956, "step": 531 }, { "epoch": 0.13866353891766853, "grad_norm": 1.1660047753041414, "learning_rate": 4.893365182314108e-06, "loss": 0.2023, "step": 532 }, { "epoch": 0.13892418466751377, "grad_norm": 1.0858480337159027, "learning_rate": 4.8927415270273525e-06, "loss": 0.2117, "step": 533 }, { "epoch": 0.13918483041735902, "grad_norm": 1.1671907902651357, "learning_rate": 4.892116093302436e-06, "loss": 0.2112, "step": 534 }, { "epoch": 0.13944547616720424, "grad_norm": 1.0593913138320652, "learning_rate": 4.8914888816042186e-06, "loss": 0.2121, "step": 535 }, { "epoch": 0.13970612191704948, "grad_norm": 1.0980087754847343, "learning_rate": 4.890859892398886e-06, "loss": 0.2054, "step": 536 }, { "epoch": 0.13996676766689473, "grad_norm": 1.0793856093346874, "learning_rate": 4.890229126153942e-06, "loss": 0.1934, "step": 537 }, { "epoch": 0.14022741341673997, "grad_norm": 1.0500802090352275, "learning_rate": 4.889596583338213e-06, "loss": 0.1976, "step": 538 }, { "epoch": 0.14048805916658522, "grad_norm": 1.1548444183601698, "learning_rate": 4.888962264421845e-06, "loss": 0.2, "step": 539 }, { "epoch": 0.14074870491643046, "grad_norm": 1.1242354622212878, "learning_rate": 4.8883261698763045e-06, "loss": 0.2004, "step": 540 }, { "epoch": 0.1410093506662757, "grad_norm": 1.1076515211720048, "learning_rate": 4.887688300174377e-06, "loss": 0.1994, "step": 541 }, { "epoch": 0.14126999641612095, "grad_norm": 1.0855537211590511, "learning_rate": 4.887048655790169e-06, "loss": 0.2079, "step": 542 }, { "epoch": 0.14153064216596617, "grad_norm": 1.1568999705480736, "learning_rate": 4.886407237199103e-06, "loss": 0.1977, "step": 543 }, { "epoch": 0.14179128791581141, "grad_norm": 1.1129333529687897, "learning_rate": 4.8857640448779246e-06, "loss": 0.1972, "step": 544 }, { "epoch": 0.14205193366565666, "grad_norm": 1.1754312522803152, "learning_rate": 4.885119079304694e-06, "loss": 0.1939, "step": 545 }, { "epoch": 0.1423125794155019, "grad_norm": 1.1326754275429542, "learning_rate": 4.884472340958791e-06, "loss": 0.2151, "step": 546 }, { "epoch": 0.14257322516534715, "grad_norm": 1.2436249999550015, "learning_rate": 4.883823830320913e-06, "loss": 0.2168, "step": 547 }, { "epoch": 0.1428338709151924, "grad_norm": 1.0340683520801175, "learning_rate": 4.883173547873073e-06, "loss": 0.1884, "step": 548 }, { "epoch": 0.14309451666503764, "grad_norm": 1.1039500581796489, "learning_rate": 4.882521494098605e-06, "loss": 0.2061, "step": 549 }, { "epoch": 0.14335516241488286, "grad_norm": 1.1269264487984179, "learning_rate": 4.881867669482157e-06, "loss": 0.2098, "step": 550 }, { "epoch": 0.1436158081647281, "grad_norm": 1.1181429702103622, "learning_rate": 4.8812120745096906e-06, "loss": 0.2073, "step": 551 }, { "epoch": 0.14387645391457335, "grad_norm": 1.135931688182785, "learning_rate": 4.880554709668486e-06, "loss": 0.2063, "step": 552 }, { "epoch": 0.1441370996644186, "grad_norm": 1.1109190731396077, "learning_rate": 4.879895575447141e-06, "loss": 0.2184, "step": 553 }, { "epoch": 0.14439774541426384, "grad_norm": 1.2662310187617263, "learning_rate": 4.879234672335564e-06, "loss": 0.1945, "step": 554 }, { "epoch": 0.14465839116410908, "grad_norm": 1.0724191355981658, "learning_rate": 4.878572000824982e-06, "loss": 0.1979, "step": 555 }, { "epoch": 0.14491903691395433, "grad_norm": 1.1606049782311942, "learning_rate": 4.8779075614079354e-06, "loss": 0.2033, "step": 556 }, { "epoch": 0.14517968266379958, "grad_norm": 1.05376112916396, "learning_rate": 4.877241354578275e-06, "loss": 0.196, "step": 557 }, { "epoch": 0.1454403284136448, "grad_norm": 1.107603710599297, "learning_rate": 4.87657338083117e-06, "loss": 0.2125, "step": 558 }, { "epoch": 0.14570097416349004, "grad_norm": 1.2279402378549504, "learning_rate": 4.875903640663101e-06, "loss": 0.2008, "step": 559 }, { "epoch": 0.14596161991333528, "grad_norm": 1.0434403116705722, "learning_rate": 4.875232134571863e-06, "loss": 0.199, "step": 560 }, { "epoch": 0.14622226566318053, "grad_norm": 1.0331454878726583, "learning_rate": 4.874558863056559e-06, "loss": 0.1984, "step": 561 }, { "epoch": 0.14648291141302577, "grad_norm": 1.022422038217463, "learning_rate": 4.8738838266176094e-06, "loss": 0.1825, "step": 562 }, { "epoch": 0.14674355716287102, "grad_norm": 1.039484393652422, "learning_rate": 4.873207025756744e-06, "loss": 0.2112, "step": 563 }, { "epoch": 0.14700420291271626, "grad_norm": 1.1395649902514557, "learning_rate": 4.872528460977005e-06, "loss": 0.2014, "step": 564 }, { "epoch": 0.1472648486625615, "grad_norm": 1.0425442684939703, "learning_rate": 4.871848132782744e-06, "loss": 0.1847, "step": 565 }, { "epoch": 0.14752549441240673, "grad_norm": 1.2046741182940908, "learning_rate": 4.871166041679626e-06, "loss": 0.1991, "step": 566 }, { "epoch": 0.14778614016225197, "grad_norm": 1.1211984395771037, "learning_rate": 4.870482188174622e-06, "loss": 0.1999, "step": 567 }, { "epoch": 0.14804678591209722, "grad_norm": 1.120433243992959, "learning_rate": 4.869796572776018e-06, "loss": 0.1952, "step": 568 }, { "epoch": 0.14830743166194246, "grad_norm": 1.10091706809171, "learning_rate": 4.8691091959934054e-06, "loss": 0.1982, "step": 569 }, { "epoch": 0.1485680774117877, "grad_norm": 1.1183835789464394, "learning_rate": 4.868420058337687e-06, "loss": 0.2092, "step": 570 }, { "epoch": 0.14882872316163295, "grad_norm": 1.0371804976773336, "learning_rate": 4.8677291603210745e-06, "loss": 0.2003, "step": 571 }, { "epoch": 0.1490893689114782, "grad_norm": 1.1464027071329634, "learning_rate": 4.867036502457087e-06, "loss": 0.2145, "step": 572 }, { "epoch": 0.14935001466132342, "grad_norm": 1.08659501352552, "learning_rate": 4.866342085260551e-06, "loss": 0.2109, "step": 573 }, { "epoch": 0.14961066041116866, "grad_norm": 1.0854603067761843, "learning_rate": 4.865645909247604e-06, "loss": 0.2048, "step": 574 }, { "epoch": 0.1498713061610139, "grad_norm": 1.0674455581212698, "learning_rate": 4.864947974935686e-06, "loss": 0.2075, "step": 575 }, { "epoch": 0.15013195191085915, "grad_norm": 1.1067538854228516, "learning_rate": 4.864248282843548e-06, "loss": 0.2055, "step": 576 }, { "epoch": 0.1503925976607044, "grad_norm": 1.108605126696899, "learning_rate": 4.863546833491245e-06, "loss": 0.2109, "step": 577 }, { "epoch": 0.15065324341054964, "grad_norm": 1.0716802123319196, "learning_rate": 4.862843627400139e-06, "loss": 0.1901, "step": 578 }, { "epoch": 0.1509138891603949, "grad_norm": 1.0870925055451213, "learning_rate": 4.862138665092898e-06, "loss": 0.2069, "step": 579 }, { "epoch": 0.15117453491024013, "grad_norm": 1.1331993322826823, "learning_rate": 4.861431947093494e-06, "loss": 0.1924, "step": 580 }, { "epoch": 0.15143518066008535, "grad_norm": 1.1869813720874995, "learning_rate": 4.860723473927206e-06, "loss": 0.2028, "step": 581 }, { "epoch": 0.1516958264099306, "grad_norm": 1.0440223971611904, "learning_rate": 4.860013246120616e-06, "loss": 0.2047, "step": 582 }, { "epoch": 0.15195647215977584, "grad_norm": 1.105973347003753, "learning_rate": 4.8593012642016105e-06, "loss": 0.2118, "step": 583 }, { "epoch": 0.15221711790962109, "grad_norm": 1.318808997795614, "learning_rate": 4.85858752869938e-06, "loss": 0.2081, "step": 584 }, { "epoch": 0.15247776365946633, "grad_norm": 1.1517472598916363, "learning_rate": 4.857872040144418e-06, "loss": 0.1927, "step": 585 }, { "epoch": 0.15273840940931158, "grad_norm": 1.1804732948259302, "learning_rate": 4.857154799068522e-06, "loss": 0.2069, "step": 586 }, { "epoch": 0.15299905515915682, "grad_norm": 1.3681313569486804, "learning_rate": 4.856435806004791e-06, "loss": 0.1952, "step": 587 }, { "epoch": 0.15325970090900204, "grad_norm": 1.12811756521905, "learning_rate": 4.855715061487626e-06, "loss": 0.206, "step": 588 }, { "epoch": 0.15352034665884728, "grad_norm": 1.1067409979898604, "learning_rate": 4.854992566052731e-06, "loss": 0.1949, "step": 589 }, { "epoch": 0.15378099240869253, "grad_norm": 1.132471946274934, "learning_rate": 4.8542683202371105e-06, "loss": 0.1994, "step": 590 }, { "epoch": 0.15404163815853777, "grad_norm": 1.0256717940257505, "learning_rate": 4.85354232457907e-06, "loss": 0.1983, "step": 591 }, { "epoch": 0.15430228390838302, "grad_norm": 1.1013044176763551, "learning_rate": 4.8528145796182155e-06, "loss": 0.1956, "step": 592 }, { "epoch": 0.15456292965822827, "grad_norm": 1.127219735687816, "learning_rate": 4.852085085895454e-06, "loss": 0.2133, "step": 593 }, { "epoch": 0.1548235754080735, "grad_norm": 1.1187629917531008, "learning_rate": 4.851353843952992e-06, "loss": 0.1981, "step": 594 }, { "epoch": 0.15508422115791876, "grad_norm": 1.1102784455224672, "learning_rate": 4.850620854334334e-06, "loss": 0.1892, "step": 595 }, { "epoch": 0.15534486690776397, "grad_norm": 1.1099075919188115, "learning_rate": 4.849886117584286e-06, "loss": 0.2087, "step": 596 }, { "epoch": 0.15560551265760922, "grad_norm": 1.0767063684229123, "learning_rate": 4.849149634248951e-06, "loss": 0.2008, "step": 597 }, { "epoch": 0.15586615840745446, "grad_norm": 1.0682799518557837, "learning_rate": 4.84841140487573e-06, "loss": 0.2021, "step": 598 }, { "epoch": 0.1561268041572997, "grad_norm": 1.0228832201010691, "learning_rate": 4.847671430013322e-06, "loss": 0.1873, "step": 599 }, { "epoch": 0.15638744990714495, "grad_norm": 1.157439081669475, "learning_rate": 4.846929710211724e-06, "loss": 0.2094, "step": 600 }, { "epoch": 0.1566480956569902, "grad_norm": 1.2202515994117855, "learning_rate": 4.846186246022228e-06, "loss": 0.1973, "step": 601 }, { "epoch": 0.15690874140683544, "grad_norm": 1.081320706748401, "learning_rate": 4.845441037997428e-06, "loss": 0.2007, "step": 602 }, { "epoch": 0.1571693871566807, "grad_norm": 0.9946434329454258, "learning_rate": 4.8446940866912055e-06, "loss": 0.1849, "step": 603 }, { "epoch": 0.1574300329065259, "grad_norm": 1.0665171840382737, "learning_rate": 4.843945392658744e-06, "loss": 0.2019, "step": 604 }, { "epoch": 0.15769067865637115, "grad_norm": 1.1384759974440248, "learning_rate": 4.843194956456522e-06, "loss": 0.2105, "step": 605 }, { "epoch": 0.1579513244062164, "grad_norm": 1.0951020015629205, "learning_rate": 4.84244277864231e-06, "loss": 0.1888, "step": 606 }, { "epoch": 0.15821197015606164, "grad_norm": 1.1935542732129856, "learning_rate": 4.841688859775176e-06, "loss": 0.2056, "step": 607 }, { "epoch": 0.1584726159059069, "grad_norm": 1.0924107854606055, "learning_rate": 4.840933200415479e-06, "loss": 0.1996, "step": 608 }, { "epoch": 0.15873326165575213, "grad_norm": 1.0622882660706423, "learning_rate": 4.8401758011248735e-06, "loss": 0.2004, "step": 609 }, { "epoch": 0.15899390740559738, "grad_norm": 1.2399992886738505, "learning_rate": 4.839416662466307e-06, "loss": 0.2157, "step": 610 }, { "epoch": 0.1592545531554426, "grad_norm": 1.1344311103362736, "learning_rate": 4.838655785004022e-06, "loss": 0.2046, "step": 611 }, { "epoch": 0.15951519890528784, "grad_norm": 1.2165688562297783, "learning_rate": 4.837893169303548e-06, "loss": 0.1948, "step": 612 }, { "epoch": 0.1597758446551331, "grad_norm": 1.2692607714671922, "learning_rate": 4.837128815931712e-06, "loss": 0.2035, "step": 613 }, { "epoch": 0.16003649040497833, "grad_norm": 1.096342701080017, "learning_rate": 4.836362725456628e-06, "loss": 0.212, "step": 614 }, { "epoch": 0.16029713615482358, "grad_norm": 1.132833857445837, "learning_rate": 4.835594898447705e-06, "loss": 0.1998, "step": 615 }, { "epoch": 0.16055778190466882, "grad_norm": 1.1961922655211261, "learning_rate": 4.834825335475641e-06, "loss": 0.2039, "step": 616 }, { "epoch": 0.16081842765451407, "grad_norm": 1.1270743941593029, "learning_rate": 4.834054037112423e-06, "loss": 0.1888, "step": 617 }, { "epoch": 0.1610790734043593, "grad_norm": 1.128459136972985, "learning_rate": 4.833281003931331e-06, "loss": 0.2136, "step": 618 }, { "epoch": 0.16133971915420453, "grad_norm": 1.0307556996174267, "learning_rate": 4.832506236506931e-06, "loss": 0.1948, "step": 619 }, { "epoch": 0.16160036490404978, "grad_norm": 1.110437686544556, "learning_rate": 4.831729735415081e-06, "loss": 0.2045, "step": 620 }, { "epoch": 0.16186101065389502, "grad_norm": 0.997423141146995, "learning_rate": 4.830951501232924e-06, "loss": 0.1935, "step": 621 }, { "epoch": 0.16212165640374027, "grad_norm": 1.0154344167380789, "learning_rate": 4.830171534538895e-06, "loss": 0.1901, "step": 622 }, { "epoch": 0.1623823021535855, "grad_norm": 1.032991020250622, "learning_rate": 4.829389835912715e-06, "loss": 0.1957, "step": 623 }, { "epoch": 0.16264294790343076, "grad_norm": 1.1366317528784267, "learning_rate": 4.828606405935391e-06, "loss": 0.194, "step": 624 }, { "epoch": 0.162903593653276, "grad_norm": 1.1076339020829897, "learning_rate": 4.82782124518922e-06, "loss": 0.2038, "step": 625 }, { "epoch": 0.16316423940312122, "grad_norm": 1.2481367919913737, "learning_rate": 4.827034354257782e-06, "loss": 0.2175, "step": 626 }, { "epoch": 0.16342488515296646, "grad_norm": 1.1156785774658573, "learning_rate": 4.8262457337259465e-06, "loss": 0.2045, "step": 627 }, { "epoch": 0.1636855309028117, "grad_norm": 1.044796602717087, "learning_rate": 4.825455384179864e-06, "loss": 0.1896, "step": 628 }, { "epoch": 0.16394617665265696, "grad_norm": 1.0615840842046096, "learning_rate": 4.8246633062069744e-06, "loss": 0.1928, "step": 629 }, { "epoch": 0.1642068224025022, "grad_norm": 1.0430683077685108, "learning_rate": 4.823869500395999e-06, "loss": 0.1944, "step": 630 }, { "epoch": 0.16446746815234745, "grad_norm": 1.0302097864432338, "learning_rate": 4.823073967336948e-06, "loss": 0.1889, "step": 631 }, { "epoch": 0.1647281139021927, "grad_norm": 1.101693434181295, "learning_rate": 4.822276707621109e-06, "loss": 0.2086, "step": 632 }, { "epoch": 0.16498875965203794, "grad_norm": 1.0368063473662303, "learning_rate": 4.821477721841058e-06, "loss": 0.1814, "step": 633 }, { "epoch": 0.16524940540188315, "grad_norm": 1.0062856545490781, "learning_rate": 4.820677010590652e-06, "loss": 0.1912, "step": 634 }, { "epoch": 0.1655100511517284, "grad_norm": 1.0500239654699925, "learning_rate": 4.819874574465031e-06, "loss": 0.1949, "step": 635 }, { "epoch": 0.16577069690157364, "grad_norm": 1.0885596314684753, "learning_rate": 4.819070414060616e-06, "loss": 0.2109, "step": 636 }, { "epoch": 0.1660313426514189, "grad_norm": 1.0926621471946387, "learning_rate": 4.81826452997511e-06, "loss": 0.2138, "step": 637 }, { "epoch": 0.16629198840126413, "grad_norm": 1.1306259980972675, "learning_rate": 4.817456922807499e-06, "loss": 0.1931, "step": 638 }, { "epoch": 0.16655263415110938, "grad_norm": 1.1617548574719472, "learning_rate": 4.816647593158047e-06, "loss": 0.2032, "step": 639 }, { "epoch": 0.16681327990095463, "grad_norm": 1.0643070073454748, "learning_rate": 4.815836541628299e-06, "loss": 0.1964, "step": 640 }, { "epoch": 0.16707392565079987, "grad_norm": 1.1723779161106669, "learning_rate": 4.815023768821082e-06, "loss": 0.1992, "step": 641 }, { "epoch": 0.1673345714006451, "grad_norm": 1.2249720198047547, "learning_rate": 4.814209275340498e-06, "loss": 0.1978, "step": 642 }, { "epoch": 0.16759521715049033, "grad_norm": 1.0500176770527099, "learning_rate": 4.813393061791933e-06, "loss": 0.1922, "step": 643 }, { "epoch": 0.16785586290033558, "grad_norm": 1.0664617506564376, "learning_rate": 4.8125751287820484e-06, "loss": 0.204, "step": 644 }, { "epoch": 0.16811650865018082, "grad_norm": 1.2896749494242672, "learning_rate": 4.8117554769187835e-06, "loss": 0.2011, "step": 645 }, { "epoch": 0.16837715440002607, "grad_norm": 1.0878071157542273, "learning_rate": 4.8109341068113566e-06, "loss": 0.1886, "step": 646 }, { "epoch": 0.16863780014987131, "grad_norm": 1.0258331513183527, "learning_rate": 4.8101110190702616e-06, "loss": 0.1954, "step": 647 }, { "epoch": 0.16889844589971656, "grad_norm": 1.185075871010396, "learning_rate": 4.8092862143072705e-06, "loss": 0.2093, "step": 648 }, { "epoch": 0.16915909164956178, "grad_norm": 1.1318102891672697, "learning_rate": 4.8084596931354296e-06, "loss": 0.1946, "step": 649 }, { "epoch": 0.16941973739940702, "grad_norm": 1.1261180357362337, "learning_rate": 4.807631456169064e-06, "loss": 0.2056, "step": 650 }, { "epoch": 0.16968038314925227, "grad_norm": 1.1239264370628748, "learning_rate": 4.806801504023771e-06, "loss": 0.22, "step": 651 }, { "epoch": 0.1699410288990975, "grad_norm": 1.1244911411708225, "learning_rate": 4.805969837316424e-06, "loss": 0.2022, "step": 652 }, { "epoch": 0.17020167464894276, "grad_norm": 1.0996338760638402, "learning_rate": 4.805136456665172e-06, "loss": 0.2011, "step": 653 }, { "epoch": 0.170462320398788, "grad_norm": 1.1076152837235755, "learning_rate": 4.804301362689435e-06, "loss": 0.2085, "step": 654 }, { "epoch": 0.17072296614863325, "grad_norm": 1.0822004028319006, "learning_rate": 4.803464556009909e-06, "loss": 0.2016, "step": 655 }, { "epoch": 0.1709836118984785, "grad_norm": 1.031071544807452, "learning_rate": 4.8026260372485625e-06, "loss": 0.1828, "step": 656 }, { "epoch": 0.1712442576483237, "grad_norm": 1.0337528620553569, "learning_rate": 4.801785807028635e-06, "loss": 0.2062, "step": 657 }, { "epoch": 0.17150490339816896, "grad_norm": 1.131851538185433, "learning_rate": 4.8009438659746396e-06, "loss": 0.2145, "step": 658 }, { "epoch": 0.1717655491480142, "grad_norm": 1.1378759828646419, "learning_rate": 4.800100214712361e-06, "loss": 0.1994, "step": 659 }, { "epoch": 0.17202619489785945, "grad_norm": 1.049595530301827, "learning_rate": 4.7992548538688554e-06, "loss": 0.1994, "step": 660 }, { "epoch": 0.1722868406477047, "grad_norm": 1.0179794571728296, "learning_rate": 4.7984077840724475e-06, "loss": 0.1934, "step": 661 }, { "epoch": 0.17254748639754994, "grad_norm": 1.0772923066704974, "learning_rate": 4.797559005952733e-06, "loss": 0.2082, "step": 662 }, { "epoch": 0.17280813214739518, "grad_norm": 1.1527481654580436, "learning_rate": 4.796708520140581e-06, "loss": 0.2112, "step": 663 }, { "epoch": 0.1730687778972404, "grad_norm": 1.0373926969137355, "learning_rate": 4.795856327268124e-06, "loss": 0.2012, "step": 664 }, { "epoch": 0.17332942364708565, "grad_norm": 1.0614492443960328, "learning_rate": 4.795002427968767e-06, "loss": 0.1923, "step": 665 }, { "epoch": 0.1735900693969309, "grad_norm": 0.9715721635467356, "learning_rate": 4.794146822877182e-06, "loss": 0.1821, "step": 666 }, { "epoch": 0.17385071514677614, "grad_norm": 1.1001943012084339, "learning_rate": 4.79328951262931e-06, "loss": 0.2159, "step": 667 }, { "epoch": 0.17411136089662138, "grad_norm": 1.05908720902553, "learning_rate": 4.792430497862358e-06, "loss": 0.2008, "step": 668 }, { "epoch": 0.17437200664646663, "grad_norm": 1.0990300098077888, "learning_rate": 4.791569779214802e-06, "loss": 0.2022, "step": 669 }, { "epoch": 0.17463265239631187, "grad_norm": 1.0847693110996794, "learning_rate": 4.790707357326381e-06, "loss": 0.2015, "step": 670 }, { "epoch": 0.17489329814615712, "grad_norm": 1.0407717344717147, "learning_rate": 4.789843232838104e-06, "loss": 0.1877, "step": 671 }, { "epoch": 0.17515394389600233, "grad_norm": 1.098167311122374, "learning_rate": 4.788977406392242e-06, "loss": 0.1907, "step": 672 }, { "epoch": 0.17541458964584758, "grad_norm": 1.181139808089054, "learning_rate": 4.7881098786323325e-06, "loss": 0.2072, "step": 673 }, { "epoch": 0.17567523539569282, "grad_norm": 1.031433971923844, "learning_rate": 4.787240650203178e-06, "loss": 0.189, "step": 674 }, { "epoch": 0.17593588114553807, "grad_norm": 1.0838482902376672, "learning_rate": 4.786369721750844e-06, "loss": 0.1997, "step": 675 }, { "epoch": 0.17619652689538332, "grad_norm": 1.04435803500681, "learning_rate": 4.785497093922662e-06, "loss": 0.1984, "step": 676 }, { "epoch": 0.17645717264522856, "grad_norm": 1.1407073537765495, "learning_rate": 4.784622767367222e-06, "loss": 0.1942, "step": 677 }, { "epoch": 0.1767178183950738, "grad_norm": 1.1760079488913053, "learning_rate": 4.78374674273438e-06, "loss": 0.2092, "step": 678 }, { "epoch": 0.17697846414491902, "grad_norm": 1.1277367662937579, "learning_rate": 4.782869020675255e-06, "loss": 0.2027, "step": 679 }, { "epoch": 0.17723910989476427, "grad_norm": 0.9712625062896029, "learning_rate": 4.781989601842224e-06, "loss": 0.1839, "step": 680 }, { "epoch": 0.1774997556446095, "grad_norm": 1.2132021997691438, "learning_rate": 4.7811084868889275e-06, "loss": 0.1918, "step": 681 }, { "epoch": 0.17776040139445476, "grad_norm": 1.0576999700793133, "learning_rate": 4.780225676470268e-06, "loss": 0.1747, "step": 682 }, { "epoch": 0.1780210471443, "grad_norm": 1.1235786434959403, "learning_rate": 4.779341171242405e-06, "loss": 0.2075, "step": 683 }, { "epoch": 0.17828169289414525, "grad_norm": 1.1729875922444302, "learning_rate": 4.77845497186276e-06, "loss": 0.1945, "step": 684 }, { "epoch": 0.1785423386439905, "grad_norm": 1.1443558228194373, "learning_rate": 4.777567078990012e-06, "loss": 0.2087, "step": 685 }, { "epoch": 0.17880298439383574, "grad_norm": 1.2227324124745917, "learning_rate": 4.776677493284101e-06, "loss": 0.2078, "step": 686 }, { "epoch": 0.17906363014368096, "grad_norm": 1.011497026531825, "learning_rate": 4.775786215406223e-06, "loss": 0.1916, "step": 687 }, { "epoch": 0.1793242758935262, "grad_norm": 1.1229290485920538, "learning_rate": 4.774893246018831e-06, "loss": 0.1955, "step": 688 }, { "epoch": 0.17958492164337145, "grad_norm": 1.1148999459906488, "learning_rate": 4.773998585785641e-06, "loss": 0.2095, "step": 689 }, { "epoch": 0.1798455673932167, "grad_norm": 1.0260523222472793, "learning_rate": 4.773102235371617e-06, "loss": 0.1841, "step": 690 }, { "epoch": 0.18010621314306194, "grad_norm": 1.0624682969637003, "learning_rate": 4.772204195442986e-06, "loss": 0.1938, "step": 691 }, { "epoch": 0.18036685889290718, "grad_norm": 1.115185179948455, "learning_rate": 4.771304466667229e-06, "loss": 0.2016, "step": 692 }, { "epoch": 0.18062750464275243, "grad_norm": 1.0320930744973207, "learning_rate": 4.770403049713082e-06, "loss": 0.1991, "step": 693 }, { "epoch": 0.18088815039259767, "grad_norm": 1.125821973172626, "learning_rate": 4.769499945250533e-06, "loss": 0.2082, "step": 694 }, { "epoch": 0.1811487961424429, "grad_norm": 1.1202606562167456, "learning_rate": 4.768595153950829e-06, "loss": 0.2015, "step": 695 }, { "epoch": 0.18140944189228814, "grad_norm": 1.0891173326017467, "learning_rate": 4.76768867648647e-06, "loss": 0.1988, "step": 696 }, { "epoch": 0.18167008764213338, "grad_norm": 1.1546613788231848, "learning_rate": 4.766780513531205e-06, "loss": 0.2071, "step": 697 }, { "epoch": 0.18193073339197863, "grad_norm": 1.03977332190638, "learning_rate": 4.7658706657600395e-06, "loss": 0.1923, "step": 698 }, { "epoch": 0.18219137914182387, "grad_norm": 1.0413436910283043, "learning_rate": 4.764959133849231e-06, "loss": 0.198, "step": 699 }, { "epoch": 0.18245202489166912, "grad_norm": 0.9722028965308553, "learning_rate": 4.764045918476288e-06, "loss": 0.1866, "step": 700 }, { "epoch": 0.18271267064151436, "grad_norm": 1.1107014458396067, "learning_rate": 4.7631310203199706e-06, "loss": 0.1982, "step": 701 }, { "epoch": 0.18297331639135958, "grad_norm": 1.032442032912896, "learning_rate": 4.762214440060289e-06, "loss": 0.1959, "step": 702 }, { "epoch": 0.18323396214120483, "grad_norm": 1.0279447313701335, "learning_rate": 4.761296178378504e-06, "loss": 0.1769, "step": 703 }, { "epoch": 0.18349460789105007, "grad_norm": 1.0870804590090468, "learning_rate": 4.760376235957127e-06, "loss": 0.2002, "step": 704 }, { "epoch": 0.18375525364089532, "grad_norm": 1.1617180929191986, "learning_rate": 4.759454613479918e-06, "loss": 0.1969, "step": 705 }, { "epoch": 0.18401589939074056, "grad_norm": 1.101746318280623, "learning_rate": 4.758531311631884e-06, "loss": 0.1884, "step": 706 }, { "epoch": 0.1842765451405858, "grad_norm": 1.0302115933179692, "learning_rate": 4.7576063310992835e-06, "loss": 0.1979, "step": 707 }, { "epoch": 0.18453719089043105, "grad_norm": 1.0699066538087196, "learning_rate": 4.756679672569621e-06, "loss": 0.196, "step": 708 }, { "epoch": 0.1847978366402763, "grad_norm": 1.0139300985541333, "learning_rate": 4.7557513367316475e-06, "loss": 0.1823, "step": 709 }, { "epoch": 0.18505848239012151, "grad_norm": 1.111612082520724, "learning_rate": 4.7548213242753616e-06, "loss": 0.211, "step": 710 }, { "epoch": 0.18531912813996676, "grad_norm": 1.010977169255638, "learning_rate": 4.753889635892008e-06, "loss": 0.1855, "step": 711 }, { "epoch": 0.185579773889812, "grad_norm": 1.1581794130232832, "learning_rate": 4.752956272274078e-06, "loss": 0.1998, "step": 712 }, { "epoch": 0.18584041963965725, "grad_norm": 1.0706672168644153, "learning_rate": 4.752021234115304e-06, "loss": 0.2003, "step": 713 }, { "epoch": 0.1861010653895025, "grad_norm": 1.094418022986349, "learning_rate": 4.751084522110669e-06, "loss": 0.1918, "step": 714 }, { "epoch": 0.18636171113934774, "grad_norm": 1.1357402524893316, "learning_rate": 4.750146136956396e-06, "loss": 0.1898, "step": 715 }, { "epoch": 0.186622356889193, "grad_norm": 1.1977098642706674, "learning_rate": 4.749206079349952e-06, "loss": 0.1929, "step": 716 }, { "epoch": 0.1868830026390382, "grad_norm": 1.0205280820382259, "learning_rate": 4.74826434999005e-06, "loss": 0.1708, "step": 717 }, { "epoch": 0.18714364838888345, "grad_norm": 1.1294254178570962, "learning_rate": 4.747320949576641e-06, "loss": 0.2003, "step": 718 }, { "epoch": 0.1874042941387287, "grad_norm": 1.3408447124020728, "learning_rate": 4.746375878810921e-06, "loss": 0.2023, "step": 719 }, { "epoch": 0.18766493988857394, "grad_norm": 1.1376147096571556, "learning_rate": 4.745429138395329e-06, "loss": 0.1977, "step": 720 }, { "epoch": 0.18792558563841918, "grad_norm": 1.1192736911799153, "learning_rate": 4.744480729033539e-06, "loss": 0.2136, "step": 721 }, { "epoch": 0.18818623138826443, "grad_norm": 1.2147023036436002, "learning_rate": 4.743530651430472e-06, "loss": 0.2055, "step": 722 }, { "epoch": 0.18844687713810968, "grad_norm": 1.1319789885261118, "learning_rate": 4.742578906292286e-06, "loss": 0.2036, "step": 723 }, { "epoch": 0.18870752288795492, "grad_norm": 1.0617507983812244, "learning_rate": 4.741625494326379e-06, "loss": 0.2071, "step": 724 }, { "epoch": 0.18896816863780014, "grad_norm": 1.0621385458269959, "learning_rate": 4.740670416241386e-06, "loss": 0.1906, "step": 725 }, { "epoch": 0.18922881438764538, "grad_norm": 1.1202680163720193, "learning_rate": 4.739713672747183e-06, "loss": 0.2055, "step": 726 }, { "epoch": 0.18948946013749063, "grad_norm": 1.0309703967936898, "learning_rate": 4.7387552645548834e-06, "loss": 0.1926, "step": 727 }, { "epoch": 0.18975010588733587, "grad_norm": 1.2186337777294347, "learning_rate": 4.737795192376836e-06, "loss": 0.2044, "step": 728 }, { "epoch": 0.19001075163718112, "grad_norm": 1.181851752506151, "learning_rate": 4.73683345692663e-06, "loss": 0.201, "step": 729 }, { "epoch": 0.19027139738702636, "grad_norm": 1.0220539842509926, "learning_rate": 4.735870058919084e-06, "loss": 0.1961, "step": 730 }, { "epoch": 0.1905320431368716, "grad_norm": 1.1755246321515482, "learning_rate": 4.7349049990702624e-06, "loss": 0.204, "step": 731 }, { "epoch": 0.19079268888671685, "grad_norm": 1.1376783108231912, "learning_rate": 4.733938278097456e-06, "loss": 0.1883, "step": 732 }, { "epoch": 0.19105333463656207, "grad_norm": 1.0348544002799045, "learning_rate": 4.732969896719194e-06, "loss": 0.2011, "step": 733 }, { "epoch": 0.19131398038640732, "grad_norm": 0.9701244234207226, "learning_rate": 4.731999855655239e-06, "loss": 0.1844, "step": 734 }, { "epoch": 0.19157462613625256, "grad_norm": 1.0443264305998883, "learning_rate": 4.731028155626588e-06, "loss": 0.1845, "step": 735 }, { "epoch": 0.1918352718860978, "grad_norm": 1.0988970619130984, "learning_rate": 4.730054797355471e-06, "loss": 0.1976, "step": 736 }, { "epoch": 0.19209591763594305, "grad_norm": 1.132249014059117, "learning_rate": 4.729079781565349e-06, "loss": 0.1927, "step": 737 }, { "epoch": 0.1923565633857883, "grad_norm": 1.106592857872028, "learning_rate": 4.728103108980915e-06, "loss": 0.204, "step": 738 }, { "epoch": 0.19261720913563354, "grad_norm": 0.9762507688081139, "learning_rate": 4.727124780328097e-06, "loss": 0.1737, "step": 739 }, { "epoch": 0.19287785488547876, "grad_norm": 1.106322078596077, "learning_rate": 4.726144796334049e-06, "loss": 0.2055, "step": 740 }, { "epoch": 0.193138500635324, "grad_norm": 1.0031336498742895, "learning_rate": 4.7251631577271585e-06, "loss": 0.1884, "step": 741 }, { "epoch": 0.19339914638516925, "grad_norm": 1.0222092024648375, "learning_rate": 4.724179865237042e-06, "loss": 0.1782, "step": 742 }, { "epoch": 0.1936597921350145, "grad_norm": 1.0427927704765936, "learning_rate": 4.723194919594545e-06, "loss": 0.1993, "step": 743 }, { "epoch": 0.19392043788485974, "grad_norm": 1.0940055315295494, "learning_rate": 4.722208321531743e-06, "loss": 0.2008, "step": 744 }, { "epoch": 0.194181083634705, "grad_norm": 1.0375241367708676, "learning_rate": 4.721220071781936e-06, "loss": 0.1918, "step": 745 }, { "epoch": 0.19444172938455023, "grad_norm": 1.0793663086279626, "learning_rate": 4.720230171079657e-06, "loss": 0.2104, "step": 746 }, { "epoch": 0.19470237513439548, "grad_norm": 1.0239303754228282, "learning_rate": 4.719238620160662e-06, "loss": 0.1904, "step": 747 }, { "epoch": 0.1949630208842407, "grad_norm": 1.0933861462965642, "learning_rate": 4.7182454197619355e-06, "loss": 0.203, "step": 748 }, { "epoch": 0.19522366663408594, "grad_norm": 1.1141048299129739, "learning_rate": 4.717250570621686e-06, "loss": 0.1891, "step": 749 }, { "epoch": 0.19548431238393119, "grad_norm": 1.0802080393798392, "learning_rate": 4.716254073479352e-06, "loss": 0.196, "step": 750 }, { "epoch": 0.19574495813377643, "grad_norm": 1.1749653739212071, "learning_rate": 4.71525592907559e-06, "loss": 0.1821, "step": 751 }, { "epoch": 0.19600560388362168, "grad_norm": 1.1286257660983583, "learning_rate": 4.714256138152287e-06, "loss": 0.1913, "step": 752 }, { "epoch": 0.19626624963346692, "grad_norm": 1.0361869606001164, "learning_rate": 4.71325470145255e-06, "loss": 0.1873, "step": 753 }, { "epoch": 0.19652689538331217, "grad_norm": 1.1590510036038018, "learning_rate": 4.712251619720712e-06, "loss": 0.2, "step": 754 }, { "epoch": 0.19678754113315738, "grad_norm": 1.0698043318565782, "learning_rate": 4.711246893702327e-06, "loss": 0.1751, "step": 755 }, { "epoch": 0.19704818688300263, "grad_norm": 1.0394562257200282, "learning_rate": 4.71024052414417e-06, "loss": 0.2008, "step": 756 }, { "epoch": 0.19730883263284787, "grad_norm": 1.2327521354629953, "learning_rate": 4.709232511794242e-06, "loss": 0.183, "step": 757 }, { "epoch": 0.19756947838269312, "grad_norm": 1.0266833413688505, "learning_rate": 4.70822285740176e-06, "loss": 0.1863, "step": 758 }, { "epoch": 0.19783012413253837, "grad_norm": 1.1161295747627655, "learning_rate": 4.707211561717162e-06, "loss": 0.2063, "step": 759 }, { "epoch": 0.1980907698823836, "grad_norm": 1.094700752168074, "learning_rate": 4.706198625492111e-06, "loss": 0.1937, "step": 760 }, { "epoch": 0.19835141563222886, "grad_norm": 0.9562964166907154, "learning_rate": 4.7051840494794845e-06, "loss": 0.1742, "step": 761 }, { "epoch": 0.1986120613820741, "grad_norm": 1.1700063782296517, "learning_rate": 4.704167834433378e-06, "loss": 0.1992, "step": 762 }, { "epoch": 0.19887270713191932, "grad_norm": 1.1759534762681636, "learning_rate": 4.70314998110911e-06, "loss": 0.2193, "step": 763 }, { "epoch": 0.19913335288176456, "grad_norm": 1.1704865826909405, "learning_rate": 4.702130490263215e-06, "loss": 0.2181, "step": 764 }, { "epoch": 0.1993939986316098, "grad_norm": 1.0236631369656641, "learning_rate": 4.70110936265344e-06, "loss": 0.1862, "step": 765 }, { "epoch": 0.19965464438145505, "grad_norm": 1.0661770373931818, "learning_rate": 4.700086599038755e-06, "loss": 0.2, "step": 766 }, { "epoch": 0.1999152901313003, "grad_norm": 0.9894692730014091, "learning_rate": 4.69906220017934e-06, "loss": 0.195, "step": 767 }, { "epoch": 0.20017593588114554, "grad_norm": 1.0819390606747081, "learning_rate": 4.698036166836598e-06, "loss": 0.1978, "step": 768 }, { "epoch": 0.2004365816309908, "grad_norm": 1.109161645930981, "learning_rate": 4.69700849977314e-06, "loss": 0.2052, "step": 769 }, { "epoch": 0.200697227380836, "grad_norm": 1.0409976580600624, "learning_rate": 4.695979199752794e-06, "loss": 0.1922, "step": 770 }, { "epoch": 0.20095787313068125, "grad_norm": 1.0881647238946446, "learning_rate": 4.694948267540601e-06, "loss": 0.1943, "step": 771 }, { "epoch": 0.2012185188805265, "grad_norm": 1.1012806925283039, "learning_rate": 4.693915703902816e-06, "loss": 0.1945, "step": 772 }, { "epoch": 0.20147916463037174, "grad_norm": 1.061318260332211, "learning_rate": 4.692881509606906e-06, "loss": 0.1837, "step": 773 }, { "epoch": 0.201739810380217, "grad_norm": 1.1151145752085305, "learning_rate": 4.691845685421551e-06, "loss": 0.2192, "step": 774 }, { "epoch": 0.20200045613006223, "grad_norm": 1.1907535411689876, "learning_rate": 4.69080823211664e-06, "loss": 0.1852, "step": 775 }, { "epoch": 0.20226110187990748, "grad_norm": 1.0495009207900328, "learning_rate": 4.689769150463277e-06, "loss": 0.1831, "step": 776 }, { "epoch": 0.20252174762975272, "grad_norm": 1.0229451627497241, "learning_rate": 4.688728441233771e-06, "loss": 0.1886, "step": 777 }, { "epoch": 0.20278239337959794, "grad_norm": 0.9955341426790423, "learning_rate": 4.687686105201645e-06, "loss": 0.1933, "step": 778 }, { "epoch": 0.2030430391294432, "grad_norm": 1.083469211973567, "learning_rate": 4.686642143141629e-06, "loss": 0.1834, "step": 779 }, { "epoch": 0.20330368487928843, "grad_norm": 1.1050222389604538, "learning_rate": 4.685596555829664e-06, "loss": 0.1994, "step": 780 }, { "epoch": 0.20356433062913368, "grad_norm": 1.0671777534723947, "learning_rate": 4.684549344042894e-06, "loss": 0.1904, "step": 781 }, { "epoch": 0.20382497637897892, "grad_norm": 0.9718207591202269, "learning_rate": 4.683500508559676e-06, "loss": 0.1783, "step": 782 }, { "epoch": 0.20408562212882417, "grad_norm": 1.0851767107643262, "learning_rate": 4.682450050159571e-06, "loss": 0.2061, "step": 783 }, { "epoch": 0.2043462678786694, "grad_norm": 1.0278496582192649, "learning_rate": 4.681397969623347e-06, "loss": 0.1924, "step": 784 }, { "epoch": 0.20460691362851466, "grad_norm": 1.1315310274491046, "learning_rate": 4.680344267732977e-06, "loss": 0.2101, "step": 785 }, { "epoch": 0.20486755937835988, "grad_norm": 1.0504831320393817, "learning_rate": 4.679288945271639e-06, "loss": 0.1984, "step": 786 }, { "epoch": 0.20512820512820512, "grad_norm": 0.954881467970483, "learning_rate": 4.678232003023716e-06, "loss": 0.1792, "step": 787 }, { "epoch": 0.20538885087805037, "grad_norm": 1.0448542250323527, "learning_rate": 4.677173441774796e-06, "loss": 0.2004, "step": 788 }, { "epoch": 0.2056494966278956, "grad_norm": 1.019000830534794, "learning_rate": 4.676113262311668e-06, "loss": 0.1877, "step": 789 }, { "epoch": 0.20591014237774086, "grad_norm": 1.0144054383673704, "learning_rate": 4.675051465422326e-06, "loss": 0.2001, "step": 790 }, { "epoch": 0.2061707881275861, "grad_norm": 0.9939997474379112, "learning_rate": 4.673988051895965e-06, "loss": 0.1902, "step": 791 }, { "epoch": 0.20643143387743135, "grad_norm": 1.0620174526419726, "learning_rate": 4.6729230225229815e-06, "loss": 0.195, "step": 792 }, { "epoch": 0.20669207962727656, "grad_norm": 1.0776698278331056, "learning_rate": 4.671856378094974e-06, "loss": 0.1948, "step": 793 }, { "epoch": 0.2069527253771218, "grad_norm": 1.0280108013515827, "learning_rate": 4.670788119404739e-06, "loss": 0.1826, "step": 794 }, { "epoch": 0.20721337112696706, "grad_norm": 1.0265347639386146, "learning_rate": 4.669718247246275e-06, "loss": 0.1866, "step": 795 }, { "epoch": 0.2074740168768123, "grad_norm": 1.0492252231708852, "learning_rate": 4.66864676241478e-06, "loss": 0.1901, "step": 796 }, { "epoch": 0.20773466262665755, "grad_norm": 1.0273642608096223, "learning_rate": 4.6675736657066504e-06, "loss": 0.19, "step": 797 }, { "epoch": 0.2079953083765028, "grad_norm": 1.045456703967718, "learning_rate": 4.666498957919479e-06, "loss": 0.1898, "step": 798 }, { "epoch": 0.20825595412634804, "grad_norm": 0.9772556324261736, "learning_rate": 4.6654226398520574e-06, "loss": 0.1804, "step": 799 }, { "epoch": 0.20851659987619328, "grad_norm": 1.0189609218382047, "learning_rate": 4.664344712304375e-06, "loss": 0.1859, "step": 800 }, { "epoch": 0.2087772456260385, "grad_norm": 1.0400647265431398, "learning_rate": 4.663265176077615e-06, "loss": 0.1837, "step": 801 }, { "epoch": 0.20903789137588374, "grad_norm": 1.0550364503575058, "learning_rate": 4.6621840319741576e-06, "loss": 0.1926, "step": 802 }, { "epoch": 0.209298537125729, "grad_norm": 1.0600424542521414, "learning_rate": 4.661101280797579e-06, "loss": 0.2002, "step": 803 }, { "epoch": 0.20955918287557423, "grad_norm": 1.0141364436394604, "learning_rate": 4.660016923352648e-06, "loss": 0.2066, "step": 804 }, { "epoch": 0.20981982862541948, "grad_norm": 1.1628954615355622, "learning_rate": 4.6589309604453285e-06, "loss": 0.1931, "step": 805 }, { "epoch": 0.21008047437526473, "grad_norm": 1.0065154363793067, "learning_rate": 4.657843392882778e-06, "loss": 0.1886, "step": 806 }, { "epoch": 0.21034112012510997, "grad_norm": 1.1216674408572078, "learning_rate": 4.656754221473346e-06, "loss": 0.1892, "step": 807 }, { "epoch": 0.2106017658749552, "grad_norm": 1.0028045484689514, "learning_rate": 4.6556634470265725e-06, "loss": 0.1901, "step": 808 }, { "epoch": 0.21086241162480043, "grad_norm": 1.0204688021626227, "learning_rate": 4.654571070353193e-06, "loss": 0.1838, "step": 809 }, { "epoch": 0.21112305737464568, "grad_norm": 1.0606680877036978, "learning_rate": 4.6534770922651305e-06, "loss": 0.1885, "step": 810 }, { "epoch": 0.21138370312449092, "grad_norm": 1.100613408417792, "learning_rate": 4.6523815135754995e-06, "loss": 0.1887, "step": 811 }, { "epoch": 0.21164434887433617, "grad_norm": 1.0509595243187553, "learning_rate": 4.651284335098603e-06, "loss": 0.1925, "step": 812 }, { "epoch": 0.21190499462418141, "grad_norm": 1.0434374966262125, "learning_rate": 4.650185557649936e-06, "loss": 0.186, "step": 813 }, { "epoch": 0.21216564037402666, "grad_norm": 1.1007990715980167, "learning_rate": 4.6490851820461785e-06, "loss": 0.2012, "step": 814 }, { "epoch": 0.2124262861238719, "grad_norm": 1.0242448161505786, "learning_rate": 4.6479832091052e-06, "loss": 0.1946, "step": 815 }, { "epoch": 0.21268693187371712, "grad_norm": 1.0324888494904014, "learning_rate": 4.646879639646058e-06, "loss": 0.1745, "step": 816 }, { "epoch": 0.21294757762356237, "grad_norm": 1.1229135882564423, "learning_rate": 4.645774474488995e-06, "loss": 0.2086, "step": 817 }, { "epoch": 0.2132082233734076, "grad_norm": 1.0299160788671997, "learning_rate": 4.64466771445544e-06, "loss": 0.1892, "step": 818 }, { "epoch": 0.21346886912325286, "grad_norm": 1.0422730465108285, "learning_rate": 4.643559360368008e-06, "loss": 0.1951, "step": 819 }, { "epoch": 0.2137295148730981, "grad_norm": 1.0777057248099637, "learning_rate": 4.642449413050499e-06, "loss": 0.1937, "step": 820 }, { "epoch": 0.21399016062294335, "grad_norm": 1.0108797071970355, "learning_rate": 4.6413378733278945e-06, "loss": 0.2136, "step": 821 }, { "epoch": 0.2142508063727886, "grad_norm": 0.988730115920663, "learning_rate": 4.640224742026365e-06, "loss": 0.1869, "step": 822 }, { "epoch": 0.21451145212263384, "grad_norm": 1.1646647292041912, "learning_rate": 4.639110019973258e-06, "loss": 0.1966, "step": 823 }, { "epoch": 0.21477209787247906, "grad_norm": 1.057676444128772, "learning_rate": 4.637993707997107e-06, "loss": 0.1864, "step": 824 }, { "epoch": 0.2150327436223243, "grad_norm": 1.0484248310346083, "learning_rate": 4.6368758069276274e-06, "loss": 0.1942, "step": 825 }, { "epoch": 0.21529338937216955, "grad_norm": 1.0996163235300986, "learning_rate": 4.635756317595714e-06, "loss": 0.2042, "step": 826 }, { "epoch": 0.2155540351220148, "grad_norm": 1.114181170114126, "learning_rate": 4.634635240833442e-06, "loss": 0.1918, "step": 827 }, { "epoch": 0.21581468087186004, "grad_norm": 0.9885946785701413, "learning_rate": 4.6335125774740665e-06, "loss": 0.1649, "step": 828 }, { "epoch": 0.21607532662170528, "grad_norm": 1.047292741119579, "learning_rate": 4.632388328352023e-06, "loss": 0.1914, "step": 829 }, { "epoch": 0.21633597237155053, "grad_norm": 0.9466713815316381, "learning_rate": 4.6312624943029275e-06, "loss": 0.1804, "step": 830 }, { "epoch": 0.21659661812139575, "grad_norm": 1.009626044402925, "learning_rate": 4.630135076163569e-06, "loss": 0.1845, "step": 831 }, { "epoch": 0.216857263871241, "grad_norm": 0.9493509273830573, "learning_rate": 4.629006074771918e-06, "loss": 0.1723, "step": 832 }, { "epoch": 0.21711790962108624, "grad_norm": 1.1044807792650086, "learning_rate": 4.627875490967119e-06, "loss": 0.2102, "step": 833 }, { "epoch": 0.21737855537093148, "grad_norm": 1.0517882968101357, "learning_rate": 4.626743325589496e-06, "loss": 0.1895, "step": 834 }, { "epoch": 0.21763920112077673, "grad_norm": 1.011500482166389, "learning_rate": 4.625609579480544e-06, "loss": 0.1871, "step": 835 }, { "epoch": 0.21789984687062197, "grad_norm": 1.0078367167651745, "learning_rate": 4.624474253482938e-06, "loss": 0.1889, "step": 836 }, { "epoch": 0.21816049262046722, "grad_norm": 1.0262689295712726, "learning_rate": 4.623337348440524e-06, "loss": 0.1933, "step": 837 }, { "epoch": 0.21842113837031246, "grad_norm": 1.0406057278752576, "learning_rate": 4.622198865198321e-06, "loss": 0.1933, "step": 838 }, { "epoch": 0.21868178412015768, "grad_norm": 1.0618427167670472, "learning_rate": 4.621058804602523e-06, "loss": 0.1816, "step": 839 }, { "epoch": 0.21894242987000292, "grad_norm": 1.1439728988206137, "learning_rate": 4.619917167500496e-06, "loss": 0.2002, "step": 840 }, { "epoch": 0.21920307561984817, "grad_norm": 1.1424542313209005, "learning_rate": 4.6187739547407785e-06, "loss": 0.201, "step": 841 }, { "epoch": 0.21946372136969342, "grad_norm": 0.9971915642974236, "learning_rate": 4.617629167173078e-06, "loss": 0.1819, "step": 842 }, { "epoch": 0.21972436711953866, "grad_norm": 1.0485255146624461, "learning_rate": 4.616482805648273e-06, "loss": 0.1928, "step": 843 }, { "epoch": 0.2199850128693839, "grad_norm": 1.0323226620145345, "learning_rate": 4.615334871018415e-06, "loss": 0.1715, "step": 844 }, { "epoch": 0.22024565861922915, "grad_norm": 1.1366755538214917, "learning_rate": 4.614185364136719e-06, "loss": 0.1733, "step": 845 }, { "epoch": 0.22050630436907437, "grad_norm": 1.0517129028300403, "learning_rate": 4.613034285857575e-06, "loss": 0.1872, "step": 846 }, { "epoch": 0.2207669501189196, "grad_norm": 1.1204070434584552, "learning_rate": 4.611881637036536e-06, "loss": 0.1919, "step": 847 }, { "epoch": 0.22102759586876486, "grad_norm": 1.1055526784200618, "learning_rate": 4.610727418530324e-06, "loss": 0.1872, "step": 848 }, { "epoch": 0.2212882416186101, "grad_norm": 0.9533733522419943, "learning_rate": 4.609571631196829e-06, "loss": 0.1741, "step": 849 }, { "epoch": 0.22154888736845535, "grad_norm": 0.9620415291886345, "learning_rate": 4.6084142758951055e-06, "loss": 0.1851, "step": 850 }, { "epoch": 0.2218095331183006, "grad_norm": 1.0728527178750589, "learning_rate": 4.607255353485373e-06, "loss": 0.198, "step": 851 }, { "epoch": 0.22207017886814584, "grad_norm": 0.9773209779277348, "learning_rate": 4.606094864829016e-06, "loss": 0.1725, "step": 852 }, { "epoch": 0.22233082461799109, "grad_norm": 1.089115809199585, "learning_rate": 4.604932810788587e-06, "loss": 0.1947, "step": 853 }, { "epoch": 0.2225914703678363, "grad_norm": 0.9837671714076787, "learning_rate": 4.603769192227795e-06, "loss": 0.1813, "step": 854 }, { "epoch": 0.22285211611768155, "grad_norm": 1.59679372031344, "learning_rate": 4.602604010011518e-06, "loss": 0.1892, "step": 855 }, { "epoch": 0.2231127618675268, "grad_norm": 1.2240627762688878, "learning_rate": 4.601437265005792e-06, "loss": 0.205, "step": 856 }, { "epoch": 0.22337340761737204, "grad_norm": 1.0328406780582005, "learning_rate": 4.600268958077818e-06, "loss": 0.1854, "step": 857 }, { "epoch": 0.22363405336721728, "grad_norm": 1.0481351544744422, "learning_rate": 4.599099090095955e-06, "loss": 0.1822, "step": 858 }, { "epoch": 0.22389469911706253, "grad_norm": 1.1155827911899292, "learning_rate": 4.5979276619297245e-06, "loss": 0.1925, "step": 859 }, { "epoch": 0.22415534486690777, "grad_norm": 1.0255525850156038, "learning_rate": 4.5967546744498044e-06, "loss": 0.1758, "step": 860 }, { "epoch": 0.22441599061675302, "grad_norm": 1.0508846168425168, "learning_rate": 4.595580128528037e-06, "loss": 0.2019, "step": 861 }, { "epoch": 0.22467663636659824, "grad_norm": 1.0471167608269443, "learning_rate": 4.594404025037418e-06, "loss": 0.2019, "step": 862 }, { "epoch": 0.22493728211644348, "grad_norm": 1.079586564648262, "learning_rate": 4.593226364852102e-06, "loss": 0.186, "step": 863 }, { "epoch": 0.22519792786628873, "grad_norm": 1.0864070773015229, "learning_rate": 4.592047148847404e-06, "loss": 0.2144, "step": 864 }, { "epoch": 0.22545857361613397, "grad_norm": 1.0629580416997757, "learning_rate": 4.590866377899789e-06, "loss": 0.1998, "step": 865 }, { "epoch": 0.22571921936597922, "grad_norm": 1.0706388415268826, "learning_rate": 4.589684052886884e-06, "loss": 0.1906, "step": 866 }, { "epoch": 0.22597986511582446, "grad_norm": 1.1309369479622695, "learning_rate": 4.5885001746874665e-06, "loss": 0.193, "step": 867 }, { "epoch": 0.2262405108656697, "grad_norm": 1.1229689393271354, "learning_rate": 4.587314744181471e-06, "loss": 0.1939, "step": 868 }, { "epoch": 0.22650115661551493, "grad_norm": 1.051051917271782, "learning_rate": 4.586127762249985e-06, "loss": 0.1916, "step": 869 }, { "epoch": 0.22676180236536017, "grad_norm": 1.120438592555173, "learning_rate": 4.58493922977525e-06, "loss": 0.1975, "step": 870 }, { "epoch": 0.22702244811520542, "grad_norm": 1.0997994227311607, "learning_rate": 4.583749147640658e-06, "loss": 0.1826, "step": 871 }, { "epoch": 0.22728309386505066, "grad_norm": 1.0044819200032233, "learning_rate": 4.582557516730755e-06, "loss": 0.1832, "step": 872 }, { "epoch": 0.2275437396148959, "grad_norm": 1.029770339232231, "learning_rate": 4.581364337931237e-06, "loss": 0.1839, "step": 873 }, { "epoch": 0.22780438536474115, "grad_norm": 1.1488095620932777, "learning_rate": 4.58016961212895e-06, "loss": 0.2027, "step": 874 }, { "epoch": 0.2280650311145864, "grad_norm": 1.0303015506338626, "learning_rate": 4.5789733402118895e-06, "loss": 0.1785, "step": 875 }, { "epoch": 0.22832567686443164, "grad_norm": 1.0271899663363993, "learning_rate": 4.577775523069204e-06, "loss": 0.1903, "step": 876 }, { "epoch": 0.22858632261427686, "grad_norm": 1.0157457846893845, "learning_rate": 4.5765761615911856e-06, "loss": 0.1791, "step": 877 }, { "epoch": 0.2288469683641221, "grad_norm": 1.0581820017934918, "learning_rate": 4.575375256669276e-06, "loss": 0.1897, "step": 878 }, { "epoch": 0.22910761411396735, "grad_norm": 1.0961779583005702, "learning_rate": 4.5741728091960645e-06, "loss": 0.1792, "step": 879 }, { "epoch": 0.2293682598638126, "grad_norm": 1.035904916645586, "learning_rate": 4.572968820065288e-06, "loss": 0.1991, "step": 880 }, { "epoch": 0.22962890561365784, "grad_norm": 1.0627112800492153, "learning_rate": 4.571763290171827e-06, "loss": 0.1961, "step": 881 }, { "epoch": 0.2298895513635031, "grad_norm": 1.1519545141869294, "learning_rate": 4.570556220411708e-06, "loss": 0.2018, "step": 882 }, { "epoch": 0.23015019711334833, "grad_norm": 1.1007888679587414, "learning_rate": 4.569347611682104e-06, "loss": 0.1871, "step": 883 }, { "epoch": 0.23041084286319355, "grad_norm": 1.1183438859511756, "learning_rate": 4.568137464881328e-06, "loss": 0.1947, "step": 884 }, { "epoch": 0.2306714886130388, "grad_norm": 1.176217719394581, "learning_rate": 4.5669257809088394e-06, "loss": 0.1963, "step": 885 }, { "epoch": 0.23093213436288404, "grad_norm": 1.1690312232284403, "learning_rate": 4.5657125606652385e-06, "loss": 0.187, "step": 886 }, { "epoch": 0.23119278011272928, "grad_norm": 1.102279702358986, "learning_rate": 4.564497805052269e-06, "loss": 0.1841, "step": 887 }, { "epoch": 0.23145342586257453, "grad_norm": 1.1357388453338466, "learning_rate": 4.563281514972814e-06, "loss": 0.1938, "step": 888 }, { "epoch": 0.23171407161241978, "grad_norm": 1.1104928757284516, "learning_rate": 4.562063691330897e-06, "loss": 0.2047, "step": 889 }, { "epoch": 0.23197471736226502, "grad_norm": 1.0707373133412244, "learning_rate": 4.560844335031684e-06, "loss": 0.1868, "step": 890 }, { "epoch": 0.23223536311211027, "grad_norm": 1.0165412102438676, "learning_rate": 4.5596234469814775e-06, "loss": 0.199, "step": 891 }, { "epoch": 0.23249600886195548, "grad_norm": 0.9873056470263109, "learning_rate": 4.55840102808772e-06, "loss": 0.1947, "step": 892 }, { "epoch": 0.23275665461180073, "grad_norm": 1.0983614683985325, "learning_rate": 4.557177079258989e-06, "loss": 0.1941, "step": 893 }, { "epoch": 0.23301730036164597, "grad_norm": 1.124524646731415, "learning_rate": 4.555951601405005e-06, "loss": 0.1912, "step": 894 }, { "epoch": 0.23327794611149122, "grad_norm": 1.05263665891015, "learning_rate": 4.5547245954366185e-06, "loss": 0.2008, "step": 895 }, { "epoch": 0.23353859186133646, "grad_norm": 1.0224830464943286, "learning_rate": 4.55349606226582e-06, "loss": 0.1915, "step": 896 }, { "epoch": 0.2337992376111817, "grad_norm": 1.1059556444953884, "learning_rate": 4.552266002805732e-06, "loss": 0.1891, "step": 897 }, { "epoch": 0.23405988336102695, "grad_norm": 1.0689648710863262, "learning_rate": 4.551034417970616e-06, "loss": 0.1964, "step": 898 }, { "epoch": 0.23432052911087217, "grad_norm": 1.0407535087455115, "learning_rate": 4.549801308675862e-06, "loss": 0.1796, "step": 899 }, { "epoch": 0.23458117486071742, "grad_norm": 1.0320955236832479, "learning_rate": 4.548566675837996e-06, "loss": 0.1938, "step": 900 }, { "epoch": 0.23484182061056266, "grad_norm": 1.0563045408408862, "learning_rate": 4.547330520374677e-06, "loss": 0.1798, "step": 901 }, { "epoch": 0.2351024663604079, "grad_norm": 1.0031378122672938, "learning_rate": 4.546092843204694e-06, "loss": 0.1758, "step": 902 }, { "epoch": 0.23536311211025315, "grad_norm": 1.057971868211065, "learning_rate": 4.544853645247966e-06, "loss": 0.1895, "step": 903 }, { "epoch": 0.2356237578600984, "grad_norm": 1.0040027985987048, "learning_rate": 4.543612927425547e-06, "loss": 0.1804, "step": 904 }, { "epoch": 0.23588440360994364, "grad_norm": 1.0638635911773926, "learning_rate": 4.542370690659615e-06, "loss": 0.1938, "step": 905 }, { "epoch": 0.2361450493597889, "grad_norm": 0.9930101021440698, "learning_rate": 4.541126935873481e-06, "loss": 0.1717, "step": 906 }, { "epoch": 0.2364056951096341, "grad_norm": 1.0041546877855625, "learning_rate": 4.539881663991583e-06, "loss": 0.1817, "step": 907 }, { "epoch": 0.23666634085947935, "grad_norm": 1.0553955991115567, "learning_rate": 4.538634875939486e-06, "loss": 0.1842, "step": 908 }, { "epoch": 0.2369269866093246, "grad_norm": 1.0329526056471579, "learning_rate": 4.537386572643882e-06, "loss": 0.1742, "step": 909 }, { "epoch": 0.23718763235916984, "grad_norm": 1.0414591536973532, "learning_rate": 4.536136755032592e-06, "loss": 0.1848, "step": 910 }, { "epoch": 0.2374482781090151, "grad_norm": 1.061008563242901, "learning_rate": 4.534885424034557e-06, "loss": 0.2025, "step": 911 }, { "epoch": 0.23770892385886033, "grad_norm": 1.0073553737714593, "learning_rate": 4.5336325805798475e-06, "loss": 0.1834, "step": 912 }, { "epoch": 0.23796956960870558, "grad_norm": 1.0232736410515533, "learning_rate": 4.532378225599657e-06, "loss": 0.1953, "step": 913 }, { "epoch": 0.23823021535855082, "grad_norm": 1.0158550003165048, "learning_rate": 4.5311223600263016e-06, "loss": 0.1843, "step": 914 }, { "epoch": 0.23849086110839604, "grad_norm": 1.0787330810745053, "learning_rate": 4.529864984793221e-06, "loss": 0.1801, "step": 915 }, { "epoch": 0.23875150685824129, "grad_norm": 0.9994732738217595, "learning_rate": 4.528606100834976e-06, "loss": 0.1772, "step": 916 }, { "epoch": 0.23901215260808653, "grad_norm": 1.0349909017977543, "learning_rate": 4.527345709087251e-06, "loss": 0.1814, "step": 917 }, { "epoch": 0.23927279835793178, "grad_norm": 0.9852640124121084, "learning_rate": 4.526083810486848e-06, "loss": 0.1679, "step": 918 }, { "epoch": 0.23953344410777702, "grad_norm": 1.1055286541906326, "learning_rate": 4.524820405971691e-06, "loss": 0.1945, "step": 919 }, { "epoch": 0.23979408985762227, "grad_norm": 0.970323760646403, "learning_rate": 4.523555496480824e-06, "loss": 0.1873, "step": 920 }, { "epoch": 0.2400547356074675, "grad_norm": 1.0867684358480312, "learning_rate": 4.522289082954406e-06, "loss": 0.1954, "step": 921 }, { "epoch": 0.24031538135731273, "grad_norm": 1.0390027899722887, "learning_rate": 4.5210211663337195e-06, "loss": 0.1856, "step": 922 }, { "epoch": 0.24057602710715797, "grad_norm": 1.0893578235153853, "learning_rate": 4.519751747561158e-06, "loss": 0.1945, "step": 923 }, { "epoch": 0.24083667285700322, "grad_norm": 1.0596152279509168, "learning_rate": 4.518480827580237e-06, "loss": 0.1989, "step": 924 }, { "epoch": 0.24109731860684847, "grad_norm": 1.0345940312216921, "learning_rate": 4.517208407335584e-06, "loss": 0.1688, "step": 925 }, { "epoch": 0.2413579643566937, "grad_norm": 1.103236060402568, "learning_rate": 4.515934487772942e-06, "loss": 0.2017, "step": 926 }, { "epoch": 0.24161861010653896, "grad_norm": 1.0790401998790056, "learning_rate": 4.5146590698391714e-06, "loss": 0.2061, "step": 927 }, { "epoch": 0.2418792558563842, "grad_norm": 1.0897878417352238, "learning_rate": 4.513382154482242e-06, "loss": 0.1866, "step": 928 }, { "epoch": 0.24213990160622945, "grad_norm": 1.0749246896534004, "learning_rate": 4.512103742651241e-06, "loss": 0.1826, "step": 929 }, { "epoch": 0.24240054735607466, "grad_norm": 1.1706741938685037, "learning_rate": 4.510823835296364e-06, "loss": 0.1873, "step": 930 }, { "epoch": 0.2426611931059199, "grad_norm": 0.9642028065361701, "learning_rate": 4.509542433368921e-06, "loss": 0.1675, "step": 931 }, { "epoch": 0.24292183885576515, "grad_norm": 1.0401837908195013, "learning_rate": 4.50825953782133e-06, "loss": 0.1921, "step": 932 }, { "epoch": 0.2431824846056104, "grad_norm": 1.0751530327979022, "learning_rate": 4.5069751496071225e-06, "loss": 0.1926, "step": 933 }, { "epoch": 0.24344313035545564, "grad_norm": 1.0582621652280448, "learning_rate": 4.505689269680937e-06, "loss": 0.1937, "step": 934 }, { "epoch": 0.2437037761053009, "grad_norm": 1.0811744797641425, "learning_rate": 4.504401898998522e-06, "loss": 0.2022, "step": 935 }, { "epoch": 0.24396442185514614, "grad_norm": 1.0050421118044508, "learning_rate": 4.503113038516732e-06, "loss": 0.1966, "step": 936 }, { "epoch": 0.24422506760499135, "grad_norm": 1.0442886992177955, "learning_rate": 4.501822689193532e-06, "loss": 0.1808, "step": 937 }, { "epoch": 0.2444857133548366, "grad_norm": 1.1235568826829827, "learning_rate": 4.500530851987992e-06, "loss": 0.1942, "step": 938 }, { "epoch": 0.24474635910468184, "grad_norm": 1.0003273735050437, "learning_rate": 4.499237527860287e-06, "loss": 0.1828, "step": 939 }, { "epoch": 0.2450070048545271, "grad_norm": 1.0272657137804342, "learning_rate": 4.4979427177716974e-06, "loss": 0.1939, "step": 940 }, { "epoch": 0.24526765060437233, "grad_norm": 1.0129182119626317, "learning_rate": 4.4966464226846105e-06, "loss": 0.167, "step": 941 }, { "epoch": 0.24552829635421758, "grad_norm": 1.1023999434272822, "learning_rate": 4.495348643562514e-06, "loss": 0.1855, "step": 942 }, { "epoch": 0.24578894210406282, "grad_norm": 1.0074603568043243, "learning_rate": 4.494049381370002e-06, "loss": 0.1745, "step": 943 }, { "epoch": 0.24604958785390807, "grad_norm": 1.1362649093013448, "learning_rate": 4.4927486370727656e-06, "loss": 0.1715, "step": 944 }, { "epoch": 0.2463102336037533, "grad_norm": 1.0771659934455513, "learning_rate": 4.491446411637605e-06, "loss": 0.1972, "step": 945 }, { "epoch": 0.24657087935359853, "grad_norm": 1.139901797134465, "learning_rate": 4.4901427060324135e-06, "loss": 0.1817, "step": 946 }, { "epoch": 0.24683152510344378, "grad_norm": 1.0535529549485476, "learning_rate": 4.488837521226192e-06, "loss": 0.1899, "step": 947 }, { "epoch": 0.24709217085328902, "grad_norm": 1.0175911169130663, "learning_rate": 4.487530858189033e-06, "loss": 0.187, "step": 948 }, { "epoch": 0.24735281660313427, "grad_norm": 1.026243340002725, "learning_rate": 4.486222717892135e-06, "loss": 0.1931, "step": 949 }, { "epoch": 0.2476134623529795, "grad_norm": 0.9358600838498397, "learning_rate": 4.4849131013077915e-06, "loss": 0.1742, "step": 950 }, { "epoch": 0.24787410810282476, "grad_norm": 0.9773320630481207, "learning_rate": 4.483602009409391e-06, "loss": 0.1787, "step": 951 }, { "epoch": 0.24813475385267, "grad_norm": 1.1786810721086762, "learning_rate": 4.482289443171421e-06, "loss": 0.1914, "step": 952 }, { "epoch": 0.24839539960251522, "grad_norm": 1.0351263547811473, "learning_rate": 4.480975403569466e-06, "loss": 0.1897, "step": 953 }, { "epoch": 0.24865604535236047, "grad_norm": 1.014025605383907, "learning_rate": 4.479659891580203e-06, "loss": 0.1913, "step": 954 }, { "epoch": 0.2489166911022057, "grad_norm": 1.0052256187893258, "learning_rate": 4.478342908181404e-06, "loss": 0.1754, "step": 955 }, { "epoch": 0.24917733685205096, "grad_norm": 1.0050501669028455, "learning_rate": 4.477024454351937e-06, "loss": 0.1799, "step": 956 }, { "epoch": 0.2494379826018962, "grad_norm": 1.0128055571412342, "learning_rate": 4.475704531071759e-06, "loss": 0.1886, "step": 957 }, { "epoch": 0.24969862835174145, "grad_norm": 1.006753552237613, "learning_rate": 4.4743831393219215e-06, "loss": 0.1853, "step": 958 }, { "epoch": 0.2499592741015867, "grad_norm": 1.0748574561714443, "learning_rate": 4.473060280084568e-06, "loss": 0.1985, "step": 959 }, { "epoch": 0.25021991985143194, "grad_norm": 1.0226564853411986, "learning_rate": 4.471735954342932e-06, "loss": 0.1996, "step": 960 }, { "epoch": 0.2504805656012772, "grad_norm": 1.0020409309941618, "learning_rate": 4.470410163081336e-06, "loss": 0.1845, "step": 961 }, { "epoch": 0.25074121135112243, "grad_norm": 1.0714227009735604, "learning_rate": 4.469082907285192e-06, "loss": 0.189, "step": 962 }, { "epoch": 0.2510018571009677, "grad_norm": 0.9772682947459668, "learning_rate": 4.4677541879410025e-06, "loss": 0.1777, "step": 963 }, { "epoch": 0.25126250285081286, "grad_norm": 0.9584560206309807, "learning_rate": 4.4664240060363565e-06, "loss": 0.1882, "step": 964 }, { "epoch": 0.2515231486006581, "grad_norm": 1.0536302467611511, "learning_rate": 4.465092362559929e-06, "loss": 0.1969, "step": 965 }, { "epoch": 0.25178379435050335, "grad_norm": 0.9354630714790837, "learning_rate": 4.463759258501485e-06, "loss": 0.1729, "step": 966 }, { "epoch": 0.2520444401003486, "grad_norm": 1.0264885212959998, "learning_rate": 4.4624246948518685e-06, "loss": 0.1969, "step": 967 }, { "epoch": 0.25230508585019384, "grad_norm": 0.9784059173161666, "learning_rate": 4.461088672603015e-06, "loss": 0.1766, "step": 968 }, { "epoch": 0.2525657316000391, "grad_norm": 1.0675636066350502, "learning_rate": 4.459751192747941e-06, "loss": 0.1886, "step": 969 }, { "epoch": 0.25282637734988433, "grad_norm": 1.2005277221638748, "learning_rate": 4.458412256280747e-06, "loss": 0.1851, "step": 970 }, { "epoch": 0.2530870230997296, "grad_norm": 1.0849197045133356, "learning_rate": 4.457071864196616e-06, "loss": 0.1951, "step": 971 }, { "epoch": 0.2533476688495748, "grad_norm": 1.052446055001028, "learning_rate": 4.455730017491812e-06, "loss": 0.1917, "step": 972 }, { "epoch": 0.25360831459942007, "grad_norm": 1.032720378442882, "learning_rate": 4.454386717163682e-06, "loss": 0.1927, "step": 973 }, { "epoch": 0.2538689603492653, "grad_norm": 1.0917613359780296, "learning_rate": 4.453041964210653e-06, "loss": 0.1936, "step": 974 }, { "epoch": 0.25412960609911056, "grad_norm": 1.042978020019947, "learning_rate": 4.451695759632229e-06, "loss": 0.1883, "step": 975 }, { "epoch": 0.2543902518489558, "grad_norm": 1.0411481505458753, "learning_rate": 4.450348104428998e-06, "loss": 0.2, "step": 976 }, { "epoch": 0.25465089759880105, "grad_norm": 1.036573927657432, "learning_rate": 4.448998999602621e-06, "loss": 0.1839, "step": 977 }, { "epoch": 0.2549115433486463, "grad_norm": 1.0662244996873365, "learning_rate": 4.447648446155841e-06, "loss": 0.1873, "step": 978 }, { "epoch": 0.2551721890984915, "grad_norm": 1.0867558289689565, "learning_rate": 4.446296445092473e-06, "loss": 0.1791, "step": 979 }, { "epoch": 0.25543283484833673, "grad_norm": 1.0825508754028097, "learning_rate": 4.4449429974174115e-06, "loss": 0.1885, "step": 980 }, { "epoch": 0.255693480598182, "grad_norm": 1.0927938441595804, "learning_rate": 4.443588104136626e-06, "loss": 0.1895, "step": 981 }, { "epoch": 0.2559541263480272, "grad_norm": 0.9668473416696179, "learning_rate": 4.442231766257159e-06, "loss": 0.1707, "step": 982 }, { "epoch": 0.25621477209787247, "grad_norm": 1.134695307363031, "learning_rate": 4.440873984787127e-06, "loss": 0.1876, "step": 983 }, { "epoch": 0.2564754178477177, "grad_norm": 1.0942364507686761, "learning_rate": 4.43951476073572e-06, "loss": 0.1934, "step": 984 }, { "epoch": 0.25673606359756296, "grad_norm": 1.0044620220207034, "learning_rate": 4.4381540951132e-06, "loss": 0.1857, "step": 985 }, { "epoch": 0.2569967093474082, "grad_norm": 1.0205812979765883, "learning_rate": 4.436791988930901e-06, "loss": 0.1681, "step": 986 }, { "epoch": 0.25725735509725345, "grad_norm": 0.9537960164773563, "learning_rate": 4.435428443201226e-06, "loss": 0.1648, "step": 987 }, { "epoch": 0.2575180008470987, "grad_norm": 1.0256851758471601, "learning_rate": 4.434063458937652e-06, "loss": 0.1788, "step": 988 }, { "epoch": 0.25777864659694394, "grad_norm": 0.9912584263921695, "learning_rate": 4.432697037154718e-06, "loss": 0.1861, "step": 989 }, { "epoch": 0.2580392923467892, "grad_norm": 0.9931530741182537, "learning_rate": 4.43132917886804e-06, "loss": 0.1841, "step": 990 }, { "epoch": 0.25829993809663443, "grad_norm": 1.0303274292345075, "learning_rate": 4.429959885094295e-06, "loss": 0.1999, "step": 991 }, { "epoch": 0.2585605838464797, "grad_norm": 1.0139183223553634, "learning_rate": 4.428589156851231e-06, "loss": 0.1956, "step": 992 }, { "epoch": 0.2588212295963249, "grad_norm": 1.0574910776466213, "learning_rate": 4.42721699515766e-06, "loss": 0.1797, "step": 993 }, { "epoch": 0.2590818753461701, "grad_norm": 1.0924771163129359, "learning_rate": 4.42584340103346e-06, "loss": 0.199, "step": 994 }, { "epoch": 0.25934252109601535, "grad_norm": 1.0175899433353541, "learning_rate": 4.424468375499573e-06, "loss": 0.185, "step": 995 }, { "epoch": 0.2596031668458606, "grad_norm": 1.0503389291190095, "learning_rate": 4.423091919578008e-06, "loss": 0.1826, "step": 996 }, { "epoch": 0.25986381259570585, "grad_norm": 1.0708329197637354, "learning_rate": 4.421714034291833e-06, "loss": 0.1762, "step": 997 }, { "epoch": 0.2601244583455511, "grad_norm": 1.001691534076744, "learning_rate": 4.4203347206651805e-06, "loss": 0.1741, "step": 998 }, { "epoch": 0.26038510409539634, "grad_norm": 1.1186807850392488, "learning_rate": 4.418953979723244e-06, "loss": 0.1985, "step": 999 }, { "epoch": 0.2606457498452416, "grad_norm": 1.053424324212857, "learning_rate": 4.417571812492279e-06, "loss": 0.1762, "step": 1000 }, { "epoch": 0.2606457498452416, "eval_loss": 0.18625357747077942, "eval_runtime": 55.1518, "eval_samples_per_second": 44.985, "eval_steps_per_second": 5.639, "step": 1000 }, { "epoch": 0.2609063955950868, "grad_norm": 1.163505290844706, "learning_rate": 4.416188219999601e-06, "loss": 0.1833, "step": 1001 }, { "epoch": 0.26116704134493207, "grad_norm": 1.11937620408044, "learning_rate": 4.4148032032735835e-06, "loss": 0.1926, "step": 1002 }, { "epoch": 0.2614276870947773, "grad_norm": 1.156210036972495, "learning_rate": 4.41341676334366e-06, "loss": 0.1858, "step": 1003 }, { "epoch": 0.26168833284462256, "grad_norm": 1.1160422060753117, "learning_rate": 4.4120289012403185e-06, "loss": 0.183, "step": 1004 }, { "epoch": 0.2619489785944678, "grad_norm": 1.0041572890614394, "learning_rate": 4.410639617995109e-06, "loss": 0.175, "step": 1005 }, { "epoch": 0.26220962434431305, "grad_norm": 1.06404688255645, "learning_rate": 4.409248914640636e-06, "loss": 0.1828, "step": 1006 }, { "epoch": 0.2624702700941583, "grad_norm": 1.1458855914156931, "learning_rate": 4.407856792210558e-06, "loss": 0.1988, "step": 1007 }, { "epoch": 0.26273091584400354, "grad_norm": 1.0077994572517868, "learning_rate": 4.4064632517395875e-06, "loss": 0.1741, "step": 1008 }, { "epoch": 0.26299156159384873, "grad_norm": 1.073118284724241, "learning_rate": 4.405068294263496e-06, "loss": 0.1816, "step": 1009 }, { "epoch": 0.263252207343694, "grad_norm": 1.0615384711829852, "learning_rate": 4.4036719208191025e-06, "loss": 0.1785, "step": 1010 }, { "epoch": 0.2635128530935392, "grad_norm": 0.9945116299287444, "learning_rate": 4.402274132444282e-06, "loss": 0.1748, "step": 1011 }, { "epoch": 0.26377349884338447, "grad_norm": 1.1015786953340845, "learning_rate": 4.400874930177959e-06, "loss": 0.1839, "step": 1012 }, { "epoch": 0.2640341445932297, "grad_norm": 1.110572086901268, "learning_rate": 4.399474315060111e-06, "loss": 0.1726, "step": 1013 }, { "epoch": 0.26429479034307496, "grad_norm": 1.1103362601912892, "learning_rate": 4.398072288131763e-06, "loss": 0.1794, "step": 1014 }, { "epoch": 0.2645554360929202, "grad_norm": 1.1321782639661997, "learning_rate": 4.396668850434993e-06, "loss": 0.1902, "step": 1015 }, { "epoch": 0.26481608184276545, "grad_norm": 1.0933008739237793, "learning_rate": 4.395264003012924e-06, "loss": 0.1857, "step": 1016 }, { "epoch": 0.2650767275926107, "grad_norm": 1.043964494948007, "learning_rate": 4.393857746909728e-06, "loss": 0.1832, "step": 1017 }, { "epoch": 0.26533737334245594, "grad_norm": 1.1056932489630709, "learning_rate": 4.392450083170625e-06, "loss": 0.1692, "step": 1018 }, { "epoch": 0.2655980190923012, "grad_norm": 1.104569719180271, "learning_rate": 4.3910410128418805e-06, "loss": 0.2013, "step": 1019 }, { "epoch": 0.26585866484214643, "grad_norm": 1.0212357043981242, "learning_rate": 4.389630536970806e-06, "loss": 0.1824, "step": 1020 }, { "epoch": 0.2661193105919917, "grad_norm": 1.0435781917676963, "learning_rate": 4.388218656605755e-06, "loss": 0.1911, "step": 1021 }, { "epoch": 0.2663799563418369, "grad_norm": 1.1132673228316095, "learning_rate": 4.386805372796129e-06, "loss": 0.1993, "step": 1022 }, { "epoch": 0.26664060209168217, "grad_norm": 1.0128838303213967, "learning_rate": 4.38539068659237e-06, "loss": 0.1782, "step": 1023 }, { "epoch": 0.2669012478415274, "grad_norm": 1.1488928685811264, "learning_rate": 4.383974599045963e-06, "loss": 0.1876, "step": 1024 }, { "epoch": 0.2671618935913726, "grad_norm": 1.0410446580055064, "learning_rate": 4.382557111209436e-06, "loss": 0.2014, "step": 1025 }, { "epoch": 0.26742253934121785, "grad_norm": 1.0897909248573585, "learning_rate": 4.3811382241363545e-06, "loss": 0.1879, "step": 1026 }, { "epoch": 0.2676831850910631, "grad_norm": 1.20651967745606, "learning_rate": 4.379717938881326e-06, "loss": 0.1875, "step": 1027 }, { "epoch": 0.26794383084090834, "grad_norm": 1.0103412576038395, "learning_rate": 4.378296256499998e-06, "loss": 0.1713, "step": 1028 }, { "epoch": 0.2682044765907536, "grad_norm": 1.1130534990684005, "learning_rate": 4.376873178049056e-06, "loss": 0.1901, "step": 1029 }, { "epoch": 0.2684651223405988, "grad_norm": 1.1128529192463197, "learning_rate": 4.375448704586221e-06, "loss": 0.1912, "step": 1030 }, { "epoch": 0.2687257680904441, "grad_norm": 1.0237585086425272, "learning_rate": 4.374022837170254e-06, "loss": 0.1911, "step": 1031 }, { "epoch": 0.2689864138402893, "grad_norm": 1.033954050382386, "learning_rate": 4.37259557686095e-06, "loss": 0.2033, "step": 1032 }, { "epoch": 0.26924705959013456, "grad_norm": 1.0172375233470987, "learning_rate": 4.37116692471914e-06, "loss": 0.1725, "step": 1033 }, { "epoch": 0.2695077053399798, "grad_norm": 1.0788490021516788, "learning_rate": 4.369736881806691e-06, "loss": 0.1925, "step": 1034 }, { "epoch": 0.26976835108982505, "grad_norm": 0.9585550738264194, "learning_rate": 4.368305449186499e-06, "loss": 0.1754, "step": 1035 }, { "epoch": 0.2700289968396703, "grad_norm": 0.9906282449456824, "learning_rate": 4.366872627922498e-06, "loss": 0.184, "step": 1036 }, { "epoch": 0.27028964258951554, "grad_norm": 1.081814738089856, "learning_rate": 4.365438419079652e-06, "loss": 0.1803, "step": 1037 }, { "epoch": 0.2705502883393608, "grad_norm": 1.0637604102954519, "learning_rate": 4.364002823723956e-06, "loss": 0.1915, "step": 1038 }, { "epoch": 0.27081093408920603, "grad_norm": 1.0621288762645462, "learning_rate": 4.3625658429224374e-06, "loss": 0.1908, "step": 1039 }, { "epoch": 0.2710715798390512, "grad_norm": 0.9569326733213535, "learning_rate": 4.36112747774315e-06, "loss": 0.1831, "step": 1040 }, { "epoch": 0.27133222558889647, "grad_norm": 1.018057579649558, "learning_rate": 4.359687729255181e-06, "loss": 0.1749, "step": 1041 }, { "epoch": 0.2715928713387417, "grad_norm": 1.1268334621224996, "learning_rate": 4.358246598528641e-06, "loss": 0.1884, "step": 1042 }, { "epoch": 0.27185351708858696, "grad_norm": 1.1014895053454996, "learning_rate": 4.356804086634671e-06, "loss": 0.1812, "step": 1043 }, { "epoch": 0.2721141628384322, "grad_norm": 1.0165731756656105, "learning_rate": 4.355360194645439e-06, "loss": 0.1825, "step": 1044 }, { "epoch": 0.27237480858827745, "grad_norm": 1.0210648403262754, "learning_rate": 4.353914923634136e-06, "loss": 0.1905, "step": 1045 }, { "epoch": 0.2726354543381227, "grad_norm": 0.9864140039833914, "learning_rate": 4.35246827467498e-06, "loss": 0.1707, "step": 1046 }, { "epoch": 0.27289610008796794, "grad_norm": 1.0479381111458392, "learning_rate": 4.3510202488432155e-06, "loss": 0.1754, "step": 1047 }, { "epoch": 0.2731567458378132, "grad_norm": 0.9830749380168137, "learning_rate": 4.349570847215104e-06, "loss": 0.1926, "step": 1048 }, { "epoch": 0.27341739158765843, "grad_norm": 0.9871323639502001, "learning_rate": 4.348120070867934e-06, "loss": 0.1776, "step": 1049 }, { "epoch": 0.2736780373375037, "grad_norm": 1.0952036607580267, "learning_rate": 4.346667920880016e-06, "loss": 0.186, "step": 1050 }, { "epoch": 0.2739386830873489, "grad_norm": 0.9953701941459495, "learning_rate": 4.34521439833068e-06, "loss": 0.1825, "step": 1051 }, { "epoch": 0.27419932883719417, "grad_norm": 1.0811603547759856, "learning_rate": 4.343759504300278e-06, "loss": 0.1896, "step": 1052 }, { "epoch": 0.2744599745870394, "grad_norm": 1.077179331717754, "learning_rate": 4.3423032398701785e-06, "loss": 0.174, "step": 1053 }, { "epoch": 0.27472062033688466, "grad_norm": 0.9969840463001316, "learning_rate": 4.34084560612277e-06, "loss": 0.1655, "step": 1054 }, { "epoch": 0.27498126608672985, "grad_norm": 1.0729530511496939, "learning_rate": 4.33938660414146e-06, "loss": 0.1879, "step": 1055 }, { "epoch": 0.2752419118365751, "grad_norm": 0.9452040571192022, "learning_rate": 4.337926235010672e-06, "loss": 0.1694, "step": 1056 }, { "epoch": 0.27550255758642034, "grad_norm": 0.9979523889359836, "learning_rate": 4.336464499815844e-06, "loss": 0.1764, "step": 1057 }, { "epoch": 0.2757632033362656, "grad_norm": 1.084241857058132, "learning_rate": 4.335001399643433e-06, "loss": 0.1883, "step": 1058 }, { "epoch": 0.27602384908611083, "grad_norm": 0.9985190104503913, "learning_rate": 4.333536935580905e-06, "loss": 0.1737, "step": 1059 }, { "epoch": 0.2762844948359561, "grad_norm": 1.0896617548544587, "learning_rate": 4.332071108716747e-06, "loss": 0.1819, "step": 1060 }, { "epoch": 0.2765451405858013, "grad_norm": 1.0070051590589986, "learning_rate": 4.330603920140453e-06, "loss": 0.1731, "step": 1061 }, { "epoch": 0.27680578633564656, "grad_norm": 0.9842915737075661, "learning_rate": 4.329135370942531e-06, "loss": 0.1692, "step": 1062 }, { "epoch": 0.2770664320854918, "grad_norm": 1.044820436074587, "learning_rate": 4.327665462214501e-06, "loss": 0.1824, "step": 1063 }, { "epoch": 0.27732707783533705, "grad_norm": 0.9883223868438766, "learning_rate": 4.326194195048894e-06, "loss": 0.1831, "step": 1064 }, { "epoch": 0.2775877235851823, "grad_norm": 1.03196836255341, "learning_rate": 4.324721570539247e-06, "loss": 0.1692, "step": 1065 }, { "epoch": 0.27784836933502755, "grad_norm": 1.0223070128261558, "learning_rate": 4.323247589780111e-06, "loss": 0.1753, "step": 1066 }, { "epoch": 0.2781090150848728, "grad_norm": 1.0735347887820281, "learning_rate": 4.321772253867041e-06, "loss": 0.1898, "step": 1067 }, { "epoch": 0.27836966083471804, "grad_norm": 1.0171791352649133, "learning_rate": 4.320295563896601e-06, "loss": 0.1723, "step": 1068 }, { "epoch": 0.2786303065845633, "grad_norm": 1.1038116511326692, "learning_rate": 4.318817520966362e-06, "loss": 0.1851, "step": 1069 }, { "epoch": 0.27889095233440847, "grad_norm": 1.0546405824830116, "learning_rate": 4.317338126174899e-06, "loss": 0.1849, "step": 1070 }, { "epoch": 0.2791515980842537, "grad_norm": 1.0291121759374597, "learning_rate": 4.315857380621794e-06, "loss": 0.1862, "step": 1071 }, { "epoch": 0.27941224383409896, "grad_norm": 1.0399735737627864, "learning_rate": 4.314375285407629e-06, "loss": 0.1785, "step": 1072 }, { "epoch": 0.2796728895839442, "grad_norm": 0.9612184763360174, "learning_rate": 4.312891841633995e-06, "loss": 0.1718, "step": 1073 }, { "epoch": 0.27993353533378945, "grad_norm": 1.0077203550087894, "learning_rate": 4.311407050403479e-06, "loss": 0.1783, "step": 1074 }, { "epoch": 0.2801941810836347, "grad_norm": 1.0307311974045847, "learning_rate": 4.309920912819674e-06, "loss": 0.1823, "step": 1075 }, { "epoch": 0.28045482683347994, "grad_norm": 1.0665816415452272, "learning_rate": 4.308433429987172e-06, "loss": 0.1767, "step": 1076 }, { "epoch": 0.2807154725833252, "grad_norm": 1.0793518068390542, "learning_rate": 4.306944603011565e-06, "loss": 0.1892, "step": 1077 }, { "epoch": 0.28097611833317043, "grad_norm": 0.9660384868014887, "learning_rate": 4.305454432999445e-06, "loss": 0.1783, "step": 1078 }, { "epoch": 0.2812367640830157, "grad_norm": 1.0381289232416164, "learning_rate": 4.303962921058401e-06, "loss": 0.1748, "step": 1079 }, { "epoch": 0.2814974098328609, "grad_norm": 1.0885390092380494, "learning_rate": 4.302470068297019e-06, "loss": 0.1886, "step": 1080 }, { "epoch": 0.28175805558270617, "grad_norm": 1.0281821394324324, "learning_rate": 4.300975875824884e-06, "loss": 0.1748, "step": 1081 }, { "epoch": 0.2820187013325514, "grad_norm": 1.0316655528954966, "learning_rate": 4.2994803447525735e-06, "loss": 0.1803, "step": 1082 }, { "epoch": 0.28227934708239666, "grad_norm": 1.1232606556121654, "learning_rate": 4.297983476191663e-06, "loss": 0.1902, "step": 1083 }, { "epoch": 0.2825399928322419, "grad_norm": 1.0823866607742594, "learning_rate": 4.29648527125472e-06, "loss": 0.1866, "step": 1084 }, { "epoch": 0.2828006385820871, "grad_norm": 1.0518211210963708, "learning_rate": 4.294985731055306e-06, "loss": 0.1935, "step": 1085 }, { "epoch": 0.28306128433193234, "grad_norm": 1.06694971391566, "learning_rate": 4.2934848567079745e-06, "loss": 0.1807, "step": 1086 }, { "epoch": 0.2833219300817776, "grad_norm": 1.1241214381352058, "learning_rate": 4.2919826493282725e-06, "loss": 0.179, "step": 1087 }, { "epoch": 0.28358257583162283, "grad_norm": 1.0700319523250135, "learning_rate": 4.290479110032735e-06, "loss": 0.1887, "step": 1088 }, { "epoch": 0.2838432215814681, "grad_norm": 0.9549366037373767, "learning_rate": 4.28897423993889e-06, "loss": 0.1721, "step": 1089 }, { "epoch": 0.2841038673313133, "grad_norm": 1.0305654074630781, "learning_rate": 4.28746804016525e-06, "loss": 0.1703, "step": 1090 }, { "epoch": 0.28436451308115857, "grad_norm": 0.9781325039532861, "learning_rate": 4.285960511831322e-06, "loss": 0.1719, "step": 1091 }, { "epoch": 0.2846251588310038, "grad_norm": 1.0613989344370232, "learning_rate": 4.284451656057595e-06, "loss": 0.191, "step": 1092 }, { "epoch": 0.28488580458084906, "grad_norm": 1.0883022986617312, "learning_rate": 4.282941473965548e-06, "loss": 0.1888, "step": 1093 }, { "epoch": 0.2851464503306943, "grad_norm": 1.038097403995419, "learning_rate": 4.281429966677644e-06, "loss": 0.1772, "step": 1094 }, { "epoch": 0.28540709608053955, "grad_norm": 1.0527559890335454, "learning_rate": 4.279917135317333e-06, "loss": 0.1809, "step": 1095 }, { "epoch": 0.2856677418303848, "grad_norm": 1.0363102034149005, "learning_rate": 4.2784029810090456e-06, "loss": 0.1795, "step": 1096 }, { "epoch": 0.28592838758023004, "grad_norm": 1.0118840970183816, "learning_rate": 4.2768875048782e-06, "loss": 0.19, "step": 1097 }, { "epoch": 0.2861890333300753, "grad_norm": 1.0053334640568494, "learning_rate": 4.275370708051194e-06, "loss": 0.1823, "step": 1098 }, { "epoch": 0.2864496790799205, "grad_norm": 1.0044756168611548, "learning_rate": 4.2738525916554065e-06, "loss": 0.193, "step": 1099 }, { "epoch": 0.2867103248297657, "grad_norm": 1.01720732277456, "learning_rate": 4.2723331568192004e-06, "loss": 0.1738, "step": 1100 }, { "epoch": 0.28697097057961096, "grad_norm": 1.0221352597794795, "learning_rate": 4.270812404671916e-06, "loss": 0.1813, "step": 1101 }, { "epoch": 0.2872316163294562, "grad_norm": 1.1269807565883239, "learning_rate": 4.269290336343873e-06, "loss": 0.1979, "step": 1102 }, { "epoch": 0.28749226207930145, "grad_norm": 1.1029089247709294, "learning_rate": 4.267766952966369e-06, "loss": 0.2033, "step": 1103 }, { "epoch": 0.2877529078291467, "grad_norm": 1.0827033509372137, "learning_rate": 4.266242255671681e-06, "loss": 0.184, "step": 1104 }, { "epoch": 0.28801355357899194, "grad_norm": 1.0684390875456669, "learning_rate": 4.2647162455930615e-06, "loss": 0.1778, "step": 1105 }, { "epoch": 0.2882741993288372, "grad_norm": 1.0805596185410016, "learning_rate": 4.2631889238647375e-06, "loss": 0.1829, "step": 1106 }, { "epoch": 0.28853484507868243, "grad_norm": 1.2279765249343384, "learning_rate": 4.261660291621912e-06, "loss": 0.1978, "step": 1107 }, { "epoch": 0.2887954908285277, "grad_norm": 1.1183393279128837, "learning_rate": 4.260130350000763e-06, "loss": 0.1899, "step": 1108 }, { "epoch": 0.2890561365783729, "grad_norm": 0.9992874663298923, "learning_rate": 4.258599100138439e-06, "loss": 0.1783, "step": 1109 }, { "epoch": 0.28931678232821817, "grad_norm": 1.0244672828246613, "learning_rate": 4.257066543173064e-06, "loss": 0.17, "step": 1110 }, { "epoch": 0.2895774280780634, "grad_norm": 1.072467334466611, "learning_rate": 4.255532680243732e-06, "loss": 0.1914, "step": 1111 }, { "epoch": 0.28983807382790866, "grad_norm": 1.0307113945838484, "learning_rate": 4.253997512490507e-06, "loss": 0.1859, "step": 1112 }, { "epoch": 0.2900987195777539, "grad_norm": 1.0077522834273651, "learning_rate": 4.252461041054426e-06, "loss": 0.1881, "step": 1113 }, { "epoch": 0.29035936532759915, "grad_norm": 1.016171390524973, "learning_rate": 4.250923267077489e-06, "loss": 0.1798, "step": 1114 }, { "epoch": 0.2906200110774444, "grad_norm": 1.0269995081894994, "learning_rate": 4.249384191702671e-06, "loss": 0.1871, "step": 1115 }, { "epoch": 0.2908806568272896, "grad_norm": 0.9821618555287907, "learning_rate": 4.247843816073909e-06, "loss": 0.1787, "step": 1116 }, { "epoch": 0.29114130257713483, "grad_norm": 1.0415330433555035, "learning_rate": 4.246302141336108e-06, "loss": 0.1856, "step": 1117 }, { "epoch": 0.2914019483269801, "grad_norm": 1.0844126602745714, "learning_rate": 4.2447591686351406e-06, "loss": 0.1816, "step": 1118 }, { "epoch": 0.2916625940768253, "grad_norm": 1.059213010822447, "learning_rate": 4.243214899117842e-06, "loss": 0.189, "step": 1119 }, { "epoch": 0.29192323982667057, "grad_norm": 1.0687241202074098, "learning_rate": 4.2416693339320115e-06, "loss": 0.1855, "step": 1120 }, { "epoch": 0.2921838855765158, "grad_norm": 1.2590325477667395, "learning_rate": 4.240122474226413e-06, "loss": 0.2116, "step": 1121 }, { "epoch": 0.29244453132636106, "grad_norm": 1.005589965754238, "learning_rate": 4.238574321150769e-06, "loss": 0.1779, "step": 1122 }, { "epoch": 0.2927051770762063, "grad_norm": 1.0268406878695489, "learning_rate": 4.237024875855768e-06, "loss": 0.1829, "step": 1123 }, { "epoch": 0.29296582282605155, "grad_norm": 1.020026537534643, "learning_rate": 4.235474139493055e-06, "loss": 0.1635, "step": 1124 }, { "epoch": 0.2932264685758968, "grad_norm": 1.0138333346917967, "learning_rate": 4.233922113215237e-06, "loss": 0.1811, "step": 1125 }, { "epoch": 0.29348711432574204, "grad_norm": 1.0292312388014235, "learning_rate": 4.23236879817588e-06, "loss": 0.1817, "step": 1126 }, { "epoch": 0.2937477600755873, "grad_norm": 1.1074697429829612, "learning_rate": 4.230814195529504e-06, "loss": 0.1952, "step": 1127 }, { "epoch": 0.29400840582543253, "grad_norm": 1.1639396962887003, "learning_rate": 4.229258306431592e-06, "loss": 0.1892, "step": 1128 }, { "epoch": 0.2942690515752778, "grad_norm": 1.0385643716046795, "learning_rate": 4.227701132038576e-06, "loss": 0.1821, "step": 1129 }, { "epoch": 0.294529697325123, "grad_norm": 1.0736198399149008, "learning_rate": 4.226142673507852e-06, "loss": 0.1765, "step": 1130 }, { "epoch": 0.2947903430749682, "grad_norm": 1.1567679992134634, "learning_rate": 4.2245829319977635e-06, "loss": 0.1898, "step": 1131 }, { "epoch": 0.29505098882481345, "grad_norm": 1.0854786080317342, "learning_rate": 4.22302190866761e-06, "loss": 0.1927, "step": 1132 }, { "epoch": 0.2953116345746587, "grad_norm": 1.0841846000481026, "learning_rate": 4.221459604677643e-06, "loss": 0.1862, "step": 1133 }, { "epoch": 0.29557228032450394, "grad_norm": 1.0513568391387578, "learning_rate": 4.219896021189067e-06, "loss": 0.1877, "step": 1134 }, { "epoch": 0.2958329260743492, "grad_norm": 1.1006292654474281, "learning_rate": 4.218331159364039e-06, "loss": 0.1802, "step": 1135 }, { "epoch": 0.29609357182419443, "grad_norm": 1.034157165307131, "learning_rate": 4.2167650203656605e-06, "loss": 0.1846, "step": 1136 }, { "epoch": 0.2963542175740397, "grad_norm": 1.04657100532185, "learning_rate": 4.215197605357989e-06, "loss": 0.1978, "step": 1137 }, { "epoch": 0.2966148633238849, "grad_norm": 1.0536165460438285, "learning_rate": 4.213628915506025e-06, "loss": 0.1691, "step": 1138 }, { "epoch": 0.29687550907373017, "grad_norm": 1.0036699057470746, "learning_rate": 4.212058951975721e-06, "loss": 0.165, "step": 1139 }, { "epoch": 0.2971361548235754, "grad_norm": 1.0953935354379487, "learning_rate": 4.210487715933973e-06, "loss": 0.1952, "step": 1140 }, { "epoch": 0.29739680057342066, "grad_norm": 1.037672577910314, "learning_rate": 4.208915208548624e-06, "loss": 0.1668, "step": 1141 }, { "epoch": 0.2976574463232659, "grad_norm": 1.0371288514291237, "learning_rate": 4.207341430988461e-06, "loss": 0.1751, "step": 1142 }, { "epoch": 0.29791809207311115, "grad_norm": 1.0524683210280397, "learning_rate": 4.205766384423218e-06, "loss": 0.1873, "step": 1143 }, { "epoch": 0.2981787378229564, "grad_norm": 1.0424134372023794, "learning_rate": 4.204190070023567e-06, "loss": 0.177, "step": 1144 }, { "epoch": 0.29843938357280164, "grad_norm": 0.9660723309507773, "learning_rate": 4.202612488961129e-06, "loss": 0.1739, "step": 1145 }, { "epoch": 0.29870002932264683, "grad_norm": 1.0504566482449436, "learning_rate": 4.2010336424084596e-06, "loss": 0.179, "step": 1146 }, { "epoch": 0.2989606750724921, "grad_norm": 1.0939342856354157, "learning_rate": 4.1994535315390605e-06, "loss": 0.1872, "step": 1147 }, { "epoch": 0.2992213208223373, "grad_norm": 1.062140140973594, "learning_rate": 4.19787215752737e-06, "loss": 0.1826, "step": 1148 }, { "epoch": 0.29948196657218257, "grad_norm": 1.0317882672328451, "learning_rate": 4.196289521548767e-06, "loss": 0.1807, "step": 1149 }, { "epoch": 0.2997426123220278, "grad_norm": 1.0497724637527084, "learning_rate": 4.194705624779566e-06, "loss": 0.1777, "step": 1150 }, { "epoch": 0.30000325807187306, "grad_norm": 1.12041781090401, "learning_rate": 4.193120468397021e-06, "loss": 0.1949, "step": 1151 }, { "epoch": 0.3002639038217183, "grad_norm": 0.9801845609535198, "learning_rate": 4.191534053579322e-06, "loss": 0.1764, "step": 1152 }, { "epoch": 0.30052454957156355, "grad_norm": 0.9790369032630297, "learning_rate": 4.189946381505593e-06, "loss": 0.1707, "step": 1153 }, { "epoch": 0.3007851953214088, "grad_norm": 1.0596895382559626, "learning_rate": 4.188357453355893e-06, "loss": 0.1885, "step": 1154 }, { "epoch": 0.30104584107125404, "grad_norm": 1.0144793466231403, "learning_rate": 4.186767270311215e-06, "loss": 0.1677, "step": 1155 }, { "epoch": 0.3013064868210993, "grad_norm": 1.0879149018625027, "learning_rate": 4.1851758335534844e-06, "loss": 0.1922, "step": 1156 }, { "epoch": 0.30156713257094453, "grad_norm": 0.9682358726815921, "learning_rate": 4.183583144265559e-06, "loss": 0.17, "step": 1157 }, { "epoch": 0.3018277783207898, "grad_norm": 1.0272986771084622, "learning_rate": 4.181989203631227e-06, "loss": 0.1867, "step": 1158 }, { "epoch": 0.302088424070635, "grad_norm": 1.0815114761668625, "learning_rate": 4.1803940128352055e-06, "loss": 0.1778, "step": 1159 }, { "epoch": 0.30234906982048027, "grad_norm": 1.118661938462028, "learning_rate": 4.178797573063144e-06, "loss": 0.18, "step": 1160 }, { "epoch": 0.30260971557032545, "grad_norm": 1.1249720669397403, "learning_rate": 4.177199885501617e-06, "loss": 0.1888, "step": 1161 }, { "epoch": 0.3028703613201707, "grad_norm": 0.9794189240835236, "learning_rate": 4.175600951338129e-06, "loss": 0.171, "step": 1162 }, { "epoch": 0.30313100707001595, "grad_norm": 1.048207785633605, "learning_rate": 4.174000771761109e-06, "loss": 0.177, "step": 1163 }, { "epoch": 0.3033916528198612, "grad_norm": 1.022880412346382, "learning_rate": 4.172399347959912e-06, "loss": 0.1741, "step": 1164 }, { "epoch": 0.30365229856970644, "grad_norm": 1.0815982779394873, "learning_rate": 4.1707966811248206e-06, "loss": 0.1784, "step": 1165 }, { "epoch": 0.3039129443195517, "grad_norm": 1.0207367168831472, "learning_rate": 4.169192772447036e-06, "loss": 0.1924, "step": 1166 }, { "epoch": 0.3041735900693969, "grad_norm": 1.0533254019546952, "learning_rate": 4.167587623118687e-06, "loss": 0.1768, "step": 1167 }, { "epoch": 0.30443423581924217, "grad_norm": 1.0144040788441215, "learning_rate": 4.1659812343328246e-06, "loss": 0.1809, "step": 1168 }, { "epoch": 0.3046948815690874, "grad_norm": 1.051984153564179, "learning_rate": 4.164373607283416e-06, "loss": 0.1827, "step": 1169 }, { "epoch": 0.30495552731893266, "grad_norm": 1.040154743425248, "learning_rate": 4.162764743165355e-06, "loss": 0.1784, "step": 1170 }, { "epoch": 0.3052161730687779, "grad_norm": 1.1286025054646212, "learning_rate": 4.161154643174451e-06, "loss": 0.195, "step": 1171 }, { "epoch": 0.30547681881862315, "grad_norm": 1.131177625453606, "learning_rate": 4.1595433085074334e-06, "loss": 0.1864, "step": 1172 }, { "epoch": 0.3057374645684684, "grad_norm": 1.076733173931983, "learning_rate": 4.157930740361949e-06, "loss": 0.1817, "step": 1173 }, { "epoch": 0.30599811031831364, "grad_norm": 1.0159244550981945, "learning_rate": 4.156316939936559e-06, "loss": 0.1898, "step": 1174 }, { "epoch": 0.3062587560681589, "grad_norm": 0.9496927046163359, "learning_rate": 4.154701908430747e-06, "loss": 0.1572, "step": 1175 }, { "epoch": 0.3065194018180041, "grad_norm": 1.0392196029241172, "learning_rate": 4.153085647044904e-06, "loss": 0.1866, "step": 1176 }, { "epoch": 0.3067800475678493, "grad_norm": 0.9961911304702914, "learning_rate": 4.15146815698034e-06, "loss": 0.1814, "step": 1177 }, { "epoch": 0.30704069331769457, "grad_norm": 1.0248242045813247, "learning_rate": 4.149849439439277e-06, "loss": 0.1787, "step": 1178 }, { "epoch": 0.3073013390675398, "grad_norm": 1.0395916852435456, "learning_rate": 4.148229495624849e-06, "loss": 0.188, "step": 1179 }, { "epoch": 0.30756198481738506, "grad_norm": 1.016892245900251, "learning_rate": 4.146608326741101e-06, "loss": 0.1646, "step": 1180 }, { "epoch": 0.3078226305672303, "grad_norm": 1.0492982999012148, "learning_rate": 4.144985933992989e-06, "loss": 0.1826, "step": 1181 }, { "epoch": 0.30808327631707555, "grad_norm": 1.0551760542289461, "learning_rate": 4.1433623185863805e-06, "loss": 0.1859, "step": 1182 }, { "epoch": 0.3083439220669208, "grad_norm": 1.1159088245983322, "learning_rate": 4.141737481728049e-06, "loss": 0.1849, "step": 1183 }, { "epoch": 0.30860456781676604, "grad_norm": 1.0432089607495791, "learning_rate": 4.140111424625676e-06, "loss": 0.1818, "step": 1184 }, { "epoch": 0.3088652135666113, "grad_norm": 0.9743185660498711, "learning_rate": 4.138484148487853e-06, "loss": 0.1716, "step": 1185 }, { "epoch": 0.30912585931645653, "grad_norm": 1.1252004466611039, "learning_rate": 4.1368556545240724e-06, "loss": 0.1821, "step": 1186 }, { "epoch": 0.3093865050663018, "grad_norm": 1.0906710976372729, "learning_rate": 4.135225943944737e-06, "loss": 0.1746, "step": 1187 }, { "epoch": 0.309647150816147, "grad_norm": 1.0476726326547232, "learning_rate": 4.133595017961152e-06, "loss": 0.1824, "step": 1188 }, { "epoch": 0.30990779656599227, "grad_norm": 1.1302795537658323, "learning_rate": 4.131962877785525e-06, "loss": 0.1951, "step": 1189 }, { "epoch": 0.3101684423158375, "grad_norm": 1.1746872955639385, "learning_rate": 4.130329524630966e-06, "loss": 0.1666, "step": 1190 }, { "epoch": 0.31042908806568276, "grad_norm": 1.0236727954205898, "learning_rate": 4.128694959711488e-06, "loss": 0.1624, "step": 1191 }, { "epoch": 0.31068973381552795, "grad_norm": 1.1030413040818845, "learning_rate": 4.127059184242004e-06, "loss": 0.1699, "step": 1192 }, { "epoch": 0.3109503795653732, "grad_norm": 1.2099600816262452, "learning_rate": 4.125422199438326e-06, "loss": 0.1729, "step": 1193 }, { "epoch": 0.31121102531521844, "grad_norm": 1.2187181309110164, "learning_rate": 4.123784006517166e-06, "loss": 0.1852, "step": 1194 }, { "epoch": 0.3114716710650637, "grad_norm": 1.0286317950601152, "learning_rate": 4.122144606696135e-06, "loss": 0.1849, "step": 1195 }, { "epoch": 0.3117323168149089, "grad_norm": 1.1813718022270685, "learning_rate": 4.120504001193737e-06, "loss": 0.1929, "step": 1196 }, { "epoch": 0.3119929625647542, "grad_norm": 1.1957838130376761, "learning_rate": 4.118862191229376e-06, "loss": 0.1802, "step": 1197 }, { "epoch": 0.3122536083145994, "grad_norm": 1.0285076099699255, "learning_rate": 4.117219178023349e-06, "loss": 0.1748, "step": 1198 }, { "epoch": 0.31251425406444466, "grad_norm": 1.1155610659696018, "learning_rate": 4.11557496279685e-06, "loss": 0.1892, "step": 1199 }, { "epoch": 0.3127748998142899, "grad_norm": 1.141291801161469, "learning_rate": 4.113929546771963e-06, "loss": 0.1642, "step": 1200 }, { "epoch": 0.31303554556413515, "grad_norm": 1.0685603865487314, "learning_rate": 4.112282931171668e-06, "loss": 0.1851, "step": 1201 }, { "epoch": 0.3132961913139804, "grad_norm": 0.9977726134057867, "learning_rate": 4.1106351172198325e-06, "loss": 0.1795, "step": 1202 }, { "epoch": 0.31355683706382564, "grad_norm": 1.0917062926402339, "learning_rate": 4.1089861061412175e-06, "loss": 0.169, "step": 1203 }, { "epoch": 0.3138174828136709, "grad_norm": 1.1393397911125576, "learning_rate": 4.1073358991614745e-06, "loss": 0.1866, "step": 1204 }, { "epoch": 0.31407812856351613, "grad_norm": 1.084408778525964, "learning_rate": 4.105684497507141e-06, "loss": 0.1871, "step": 1205 }, { "epoch": 0.3143387743133614, "grad_norm": 1.1411229610950235, "learning_rate": 4.1040319024056465e-06, "loss": 0.1834, "step": 1206 }, { "epoch": 0.31459942006320657, "grad_norm": 1.025431532752507, "learning_rate": 4.102378115085302e-06, "loss": 0.1722, "step": 1207 }, { "epoch": 0.3148600658130518, "grad_norm": 1.0699855700155876, "learning_rate": 4.10072313677531e-06, "loss": 0.1835, "step": 1208 }, { "epoch": 0.31512071156289706, "grad_norm": 1.0180904865359433, "learning_rate": 4.0990669687057545e-06, "loss": 0.1707, "step": 1209 }, { "epoch": 0.3153813573127423, "grad_norm": 1.138293956452587, "learning_rate": 4.0974096121076076e-06, "loss": 0.1874, "step": 1210 }, { "epoch": 0.31564200306258755, "grad_norm": 1.0876840202197038, "learning_rate": 4.09575106821272e-06, "loss": 0.1843, "step": 1211 }, { "epoch": 0.3159026488124328, "grad_norm": 0.9870371953914789, "learning_rate": 4.094091338253829e-06, "loss": 0.1736, "step": 1212 }, { "epoch": 0.31616329456227804, "grad_norm": 1.0432612112678168, "learning_rate": 4.092430423464549e-06, "loss": 0.1811, "step": 1213 }, { "epoch": 0.3164239403121233, "grad_norm": 0.9798574849302096, "learning_rate": 4.0907683250793814e-06, "loss": 0.1831, "step": 1214 }, { "epoch": 0.31668458606196853, "grad_norm": 1.0421659384890647, "learning_rate": 4.089105044333702e-06, "loss": 0.1947, "step": 1215 }, { "epoch": 0.3169452318118138, "grad_norm": 1.0416703602551471, "learning_rate": 4.0874405824637676e-06, "loss": 0.1702, "step": 1216 }, { "epoch": 0.317205877561659, "grad_norm": 1.0530926146132709, "learning_rate": 4.085774940706712e-06, "loss": 0.1843, "step": 1217 }, { "epoch": 0.31746652331150427, "grad_norm": 1.048889151881887, "learning_rate": 4.084108120300546e-06, "loss": 0.1844, "step": 1218 }, { "epoch": 0.3177271690613495, "grad_norm": 1.0873219760538166, "learning_rate": 4.082440122484159e-06, "loss": 0.1784, "step": 1219 }, { "epoch": 0.31798781481119476, "grad_norm": 1.011605184075989, "learning_rate": 4.080770948497311e-06, "loss": 0.1764, "step": 1220 }, { "epoch": 0.31824846056104, "grad_norm": 1.0184619428873618, "learning_rate": 4.07910059958064e-06, "loss": 0.1764, "step": 1221 }, { "epoch": 0.3185091063108852, "grad_norm": 1.0261543848597865, "learning_rate": 4.077429076975655e-06, "loss": 0.1722, "step": 1222 }, { "epoch": 0.31876975206073044, "grad_norm": 1.0469827086453316, "learning_rate": 4.075756381924739e-06, "loss": 0.1698, "step": 1223 }, { "epoch": 0.3190303978105757, "grad_norm": 1.0425741313434638, "learning_rate": 4.074082515671145e-06, "loss": 0.1819, "step": 1224 }, { "epoch": 0.31929104356042093, "grad_norm": 1.0699085242292896, "learning_rate": 4.072407479458998e-06, "loss": 0.1789, "step": 1225 }, { "epoch": 0.3195516893102662, "grad_norm": 1.0387913824661283, "learning_rate": 4.070731274533291e-06, "loss": 0.1731, "step": 1226 }, { "epoch": 0.3198123350601114, "grad_norm": 1.1095319635355108, "learning_rate": 4.069053902139887e-06, "loss": 0.1778, "step": 1227 }, { "epoch": 0.32007298080995666, "grad_norm": 1.0357978191991473, "learning_rate": 4.067375363525516e-06, "loss": 0.1786, "step": 1228 }, { "epoch": 0.3203336265598019, "grad_norm": 1.016946463852851, "learning_rate": 4.065695659937775e-06, "loss": 0.1887, "step": 1229 }, { "epoch": 0.32059427230964715, "grad_norm": 1.0755025975148427, "learning_rate": 4.064014792625126e-06, "loss": 0.1754, "step": 1230 }, { "epoch": 0.3208549180594924, "grad_norm": 0.9507970726256197, "learning_rate": 4.062332762836899e-06, "loss": 0.1644, "step": 1231 }, { "epoch": 0.32111556380933765, "grad_norm": 0.9941401491954454, "learning_rate": 4.060649571823284e-06, "loss": 0.1681, "step": 1232 }, { "epoch": 0.3213762095591829, "grad_norm": 1.1155837631997065, "learning_rate": 4.0589652208353355e-06, "loss": 0.179, "step": 1233 }, { "epoch": 0.32163685530902814, "grad_norm": 1.1601850314808135, "learning_rate": 4.057279711124973e-06, "loss": 0.1805, "step": 1234 }, { "epoch": 0.3218975010588734, "grad_norm": 1.126377510578527, "learning_rate": 4.055593043944973e-06, "loss": 0.1994, "step": 1235 }, { "epoch": 0.3221581468087186, "grad_norm": 1.1417010499621925, "learning_rate": 4.053905220548974e-06, "loss": 0.1805, "step": 1236 }, { "epoch": 0.3224187925585638, "grad_norm": 1.1309757488969703, "learning_rate": 4.052216242191475e-06, "loss": 0.1814, "step": 1237 }, { "epoch": 0.32267943830840906, "grad_norm": 1.1547430010392448, "learning_rate": 4.050526110127832e-06, "loss": 0.1813, "step": 1238 }, { "epoch": 0.3229400840582543, "grad_norm": 1.1011829854044857, "learning_rate": 4.0488348256142595e-06, "loss": 0.1839, "step": 1239 }, { "epoch": 0.32320072980809955, "grad_norm": 1.0254851072530595, "learning_rate": 4.047142389907827e-06, "loss": 0.1713, "step": 1240 }, { "epoch": 0.3234613755579448, "grad_norm": 1.0051010145220638, "learning_rate": 4.045448804266462e-06, "loss": 0.167, "step": 1241 }, { "epoch": 0.32372202130779004, "grad_norm": 0.9493462077178605, "learning_rate": 4.043754069948944e-06, "loss": 0.1743, "step": 1242 }, { "epoch": 0.3239826670576353, "grad_norm": 1.1319800380410345, "learning_rate": 4.042058188214909e-06, "loss": 0.179, "step": 1243 }, { "epoch": 0.32424331280748053, "grad_norm": 1.0689874717023216, "learning_rate": 4.040361160324844e-06, "loss": 0.1803, "step": 1244 }, { "epoch": 0.3245039585573258, "grad_norm": 1.04669590031845, "learning_rate": 4.038662987540088e-06, "loss": 0.1886, "step": 1245 }, { "epoch": 0.324764604307171, "grad_norm": 1.1105093581046177, "learning_rate": 4.036963671122831e-06, "loss": 0.1913, "step": 1246 }, { "epoch": 0.32502525005701627, "grad_norm": 1.018246187635074, "learning_rate": 4.0352632123361155e-06, "loss": 0.1753, "step": 1247 }, { "epoch": 0.3252858958068615, "grad_norm": 1.0171935541943158, "learning_rate": 4.033561612443829e-06, "loss": 0.1716, "step": 1248 }, { "epoch": 0.32554654155670676, "grad_norm": 1.045832785202852, "learning_rate": 4.031858872710709e-06, "loss": 0.1765, "step": 1249 }, { "epoch": 0.325807187306552, "grad_norm": 1.1259834701968872, "learning_rate": 4.030154994402341e-06, "loss": 0.1937, "step": 1250 }, { "epoch": 0.32606783305639725, "grad_norm": 0.9876450959824058, "learning_rate": 4.028449978785156e-06, "loss": 0.171, "step": 1251 }, { "epoch": 0.32632847880624244, "grad_norm": 1.0421817587425526, "learning_rate": 4.0267438271264304e-06, "loss": 0.1795, "step": 1252 }, { "epoch": 0.3265891245560877, "grad_norm": 1.0262672761363638, "learning_rate": 4.025036540694285e-06, "loss": 0.1803, "step": 1253 }, { "epoch": 0.32684977030593293, "grad_norm": 1.0781667114853997, "learning_rate": 4.023328120757685e-06, "loss": 0.1699, "step": 1254 }, { "epoch": 0.3271104160557782, "grad_norm": 0.9626429272580509, "learning_rate": 4.021618568586434e-06, "loss": 0.1641, "step": 1255 }, { "epoch": 0.3273710618056234, "grad_norm": 1.0653304017035465, "learning_rate": 4.019907885451184e-06, "loss": 0.1845, "step": 1256 }, { "epoch": 0.32763170755546867, "grad_norm": 1.05024352165696, "learning_rate": 4.018196072623421e-06, "loss": 0.1749, "step": 1257 }, { "epoch": 0.3278923533053139, "grad_norm": 1.1087721265903203, "learning_rate": 4.016483131375476e-06, "loss": 0.191, "step": 1258 }, { "epoch": 0.32815299905515916, "grad_norm": 1.0693769532897845, "learning_rate": 4.014769062980513e-06, "loss": 0.1692, "step": 1259 }, { "epoch": 0.3284136448050044, "grad_norm": 1.0353424393836292, "learning_rate": 4.01305386871254e-06, "loss": 0.1742, "step": 1260 }, { "epoch": 0.32867429055484965, "grad_norm": 1.032750923758966, "learning_rate": 4.011337549846398e-06, "loss": 0.1922, "step": 1261 }, { "epoch": 0.3289349363046949, "grad_norm": 1.0061831193589021, "learning_rate": 4.009620107657763e-06, "loss": 0.178, "step": 1262 }, { "epoch": 0.32919558205454014, "grad_norm": 1.013200064002582, "learning_rate": 4.00790154342315e-06, "loss": 0.1787, "step": 1263 }, { "epoch": 0.3294562278043854, "grad_norm": 0.9646052007062735, "learning_rate": 4.006181858419905e-06, "loss": 0.1725, "step": 1264 }, { "epoch": 0.3297168735542306, "grad_norm": 1.0196882300586172, "learning_rate": 4.004461053926206e-06, "loss": 0.1689, "step": 1265 }, { "epoch": 0.3299775193040759, "grad_norm": 0.9348916011846183, "learning_rate": 4.002739131221066e-06, "loss": 0.16, "step": 1266 }, { "epoch": 0.33023816505392106, "grad_norm": 1.0457521121123599, "learning_rate": 4.0010160915843286e-06, "loss": 0.1767, "step": 1267 }, { "epoch": 0.3304988108037663, "grad_norm": 1.0958762741873225, "learning_rate": 3.999291936296664e-06, "loss": 0.1881, "step": 1268 }, { "epoch": 0.33075945655361155, "grad_norm": 1.1230314361048188, "learning_rate": 3.997566666639578e-06, "loss": 0.1823, "step": 1269 }, { "epoch": 0.3310201023034568, "grad_norm": 1.0302892823001237, "learning_rate": 3.995840283895399e-06, "loss": 0.1728, "step": 1270 }, { "epoch": 0.33128074805330204, "grad_norm": 1.0552490589668277, "learning_rate": 3.9941127893472845e-06, "loss": 0.1791, "step": 1271 }, { "epoch": 0.3315413938031473, "grad_norm": 1.087719707525748, "learning_rate": 3.99238418427922e-06, "loss": 0.1658, "step": 1272 }, { "epoch": 0.33180203955299253, "grad_norm": 1.0416050537474315, "learning_rate": 3.9906544699760145e-06, "loss": 0.1849, "step": 1273 }, { "epoch": 0.3320626853028378, "grad_norm": 0.9620143036602093, "learning_rate": 3.988923647723301e-06, "loss": 0.1748, "step": 1274 }, { "epoch": 0.332323331052683, "grad_norm": 1.0133916590292273, "learning_rate": 3.987191718807538e-06, "loss": 0.1736, "step": 1275 }, { "epoch": 0.33258397680252827, "grad_norm": 1.1118648249304537, "learning_rate": 3.9854586845160055e-06, "loss": 0.1797, "step": 1276 }, { "epoch": 0.3328446225523735, "grad_norm": 0.9516584249157207, "learning_rate": 3.9837245461368055e-06, "loss": 0.157, "step": 1277 }, { "epoch": 0.33310526830221876, "grad_norm": 1.0006148141827405, "learning_rate": 3.981989304958861e-06, "loss": 0.1705, "step": 1278 }, { "epoch": 0.333365914052064, "grad_norm": 1.015537315493189, "learning_rate": 3.980252962271911e-06, "loss": 0.171, "step": 1279 }, { "epoch": 0.33362655980190925, "grad_norm": 1.1139144397596938, "learning_rate": 3.978515519366519e-06, "loss": 0.1881, "step": 1280 }, { "epoch": 0.3338872055517545, "grad_norm": 1.0451201897020976, "learning_rate": 3.976776977534062e-06, "loss": 0.1802, "step": 1281 }, { "epoch": 0.33414785130159974, "grad_norm": 1.057787055560908, "learning_rate": 3.975037338066736e-06, "loss": 0.1815, "step": 1282 }, { "epoch": 0.33440849705144493, "grad_norm": 1.0019163965243323, "learning_rate": 3.973296602257553e-06, "loss": 0.1655, "step": 1283 }, { "epoch": 0.3346691428012902, "grad_norm": 0.986607716901985, "learning_rate": 3.9715547714003355e-06, "loss": 0.1855, "step": 1284 }, { "epoch": 0.3349297885511354, "grad_norm": 0.9774958678327729, "learning_rate": 3.969811846789727e-06, "loss": 0.1702, "step": 1285 }, { "epoch": 0.33519043430098067, "grad_norm": 1.0916298937157116, "learning_rate": 3.968067829721178e-06, "loss": 0.1766, "step": 1286 }, { "epoch": 0.3354510800508259, "grad_norm": 0.9089320012412946, "learning_rate": 3.966322721490954e-06, "loss": 0.1673, "step": 1287 }, { "epoch": 0.33571172580067116, "grad_norm": 0.9958990221452606, "learning_rate": 3.96457652339613e-06, "loss": 0.1792, "step": 1288 }, { "epoch": 0.3359723715505164, "grad_norm": 0.9291105875934697, "learning_rate": 3.9628292367345946e-06, "loss": 0.1617, "step": 1289 }, { "epoch": 0.33623301730036165, "grad_norm": 1.0642018392088273, "learning_rate": 3.961080862805039e-06, "loss": 0.175, "step": 1290 }, { "epoch": 0.3364936630502069, "grad_norm": 1.0330933128840605, "learning_rate": 3.959331402906969e-06, "loss": 0.1799, "step": 1291 }, { "epoch": 0.33675430880005214, "grad_norm": 1.0637548170595088, "learning_rate": 3.9575808583406926e-06, "loss": 0.1902, "step": 1292 }, { "epoch": 0.3370149545498974, "grad_norm": 1.0700675749535353, "learning_rate": 3.955829230407328e-06, "loss": 0.1894, "step": 1293 }, { "epoch": 0.33727560029974263, "grad_norm": 1.0218775380615843, "learning_rate": 3.954076520408796e-06, "loss": 0.171, "step": 1294 }, { "epoch": 0.3375362460495879, "grad_norm": 1.0589563262619035, "learning_rate": 3.952322729647823e-06, "loss": 0.1831, "step": 1295 }, { "epoch": 0.3377968917994331, "grad_norm": 1.0818372197819563, "learning_rate": 3.950567859427938e-06, "loss": 0.1971, "step": 1296 }, { "epoch": 0.33805753754927836, "grad_norm": 1.0489992196470928, "learning_rate": 3.948811911053473e-06, "loss": 0.1851, "step": 1297 }, { "epoch": 0.33831818329912355, "grad_norm": 0.9590518945113762, "learning_rate": 3.947054885829559e-06, "loss": 0.1644, "step": 1298 }, { "epoch": 0.3385788290489688, "grad_norm": 1.1513817320350306, "learning_rate": 3.945296785062131e-06, "loss": 0.2045, "step": 1299 }, { "epoch": 0.33883947479881404, "grad_norm": 1.0435956195755651, "learning_rate": 3.943537610057921e-06, "loss": 0.1788, "step": 1300 }, { "epoch": 0.3391001205486593, "grad_norm": 1.0188935332773505, "learning_rate": 3.941777362124461e-06, "loss": 0.1767, "step": 1301 }, { "epoch": 0.33936076629850453, "grad_norm": 0.9902985035177468, "learning_rate": 3.940016042570079e-06, "loss": 0.1727, "step": 1302 }, { "epoch": 0.3396214120483498, "grad_norm": 1.0238118472672386, "learning_rate": 3.9382536527039e-06, "loss": 0.1686, "step": 1303 }, { "epoch": 0.339882057798195, "grad_norm": 0.936323858232634, "learning_rate": 3.936490193835843e-06, "loss": 0.1623, "step": 1304 }, { "epoch": 0.34014270354804027, "grad_norm": 1.0097817705070626, "learning_rate": 3.9347256672766255e-06, "loss": 0.1528, "step": 1305 }, { "epoch": 0.3404033492978855, "grad_norm": 1.0927397887348842, "learning_rate": 3.932960074337755e-06, "loss": 0.1827, "step": 1306 }, { "epoch": 0.34066399504773076, "grad_norm": 1.0595917315745251, "learning_rate": 3.931193416331534e-06, "loss": 0.1791, "step": 1307 }, { "epoch": 0.340924640797576, "grad_norm": 0.9265573564818702, "learning_rate": 3.929425694571055e-06, "loss": 0.1675, "step": 1308 }, { "epoch": 0.34118528654742125, "grad_norm": 1.0637659925213165, "learning_rate": 3.9276569103702e-06, "loss": 0.1662, "step": 1309 }, { "epoch": 0.3414459322972665, "grad_norm": 1.1355308859555309, "learning_rate": 3.925887065043643e-06, "loss": 0.1844, "step": 1310 }, { "epoch": 0.34170657804711174, "grad_norm": 1.0181618631102256, "learning_rate": 3.924116159906848e-06, "loss": 0.1723, "step": 1311 }, { "epoch": 0.341967223796957, "grad_norm": 1.0503086194580806, "learning_rate": 3.922344196276063e-06, "loss": 0.1715, "step": 1312 }, { "epoch": 0.3422278695468022, "grad_norm": 1.017984089694206, "learning_rate": 3.920571175468324e-06, "loss": 0.1602, "step": 1313 }, { "epoch": 0.3424885152966474, "grad_norm": 1.1048377208801674, "learning_rate": 3.918797098801453e-06, "loss": 0.1747, "step": 1314 }, { "epoch": 0.34274916104649267, "grad_norm": 0.9995842393277609, "learning_rate": 3.9170219675940595e-06, "loss": 0.1735, "step": 1315 }, { "epoch": 0.3430098067963379, "grad_norm": 1.130644274712732, "learning_rate": 3.915245783165531e-06, "loss": 0.1861, "step": 1316 }, { "epoch": 0.34327045254618316, "grad_norm": 1.2037108759274266, "learning_rate": 3.9134685468360435e-06, "loss": 0.1809, "step": 1317 }, { "epoch": 0.3435310982960284, "grad_norm": 1.007812823913045, "learning_rate": 3.911690259926551e-06, "loss": 0.1762, "step": 1318 }, { "epoch": 0.34379174404587365, "grad_norm": 1.1321134218882285, "learning_rate": 3.90991092375879e-06, "loss": 0.1852, "step": 1319 }, { "epoch": 0.3440523897957189, "grad_norm": 1.0000019389352954, "learning_rate": 3.908130539655278e-06, "loss": 0.1786, "step": 1320 }, { "epoch": 0.34431303554556414, "grad_norm": 0.9936899906496572, "learning_rate": 3.906349108939308e-06, "loss": 0.1672, "step": 1321 }, { "epoch": 0.3445736812954094, "grad_norm": 1.0924846609425982, "learning_rate": 3.904566632934955e-06, "loss": 0.1775, "step": 1322 }, { "epoch": 0.34483432704525463, "grad_norm": 1.0384433561586208, "learning_rate": 3.902783112967067e-06, "loss": 0.1716, "step": 1323 }, { "epoch": 0.3450949727950999, "grad_norm": 1.0469408394438862, "learning_rate": 3.900998550361271e-06, "loss": 0.1823, "step": 1324 }, { "epoch": 0.3453556185449451, "grad_norm": 1.101254692643326, "learning_rate": 3.899212946443967e-06, "loss": 0.1717, "step": 1325 }, { "epoch": 0.34561626429479037, "grad_norm": 1.0859780537188495, "learning_rate": 3.897426302542331e-06, "loss": 0.1615, "step": 1326 }, { "epoch": 0.3458769100446356, "grad_norm": 1.0820447056985372, "learning_rate": 3.89563861998431e-06, "loss": 0.1883, "step": 1327 }, { "epoch": 0.3461375557944808, "grad_norm": 0.9741935068373674, "learning_rate": 3.893849900098623e-06, "loss": 0.1715, "step": 1328 }, { "epoch": 0.34639820154432605, "grad_norm": 1.0076414453064026, "learning_rate": 3.8920601442147625e-06, "loss": 0.1761, "step": 1329 }, { "epoch": 0.3466588472941713, "grad_norm": 1.0298196022256325, "learning_rate": 3.890269353662987e-06, "loss": 0.1771, "step": 1330 }, { "epoch": 0.34691949304401654, "grad_norm": 1.0537106989979028, "learning_rate": 3.888477529774328e-06, "loss": 0.1643, "step": 1331 }, { "epoch": 0.3471801387938618, "grad_norm": 1.0555105515461727, "learning_rate": 3.886684673880583e-06, "loss": 0.1686, "step": 1332 }, { "epoch": 0.347440784543707, "grad_norm": 1.0028771954783164, "learning_rate": 3.884890787314316e-06, "loss": 0.1577, "step": 1333 }, { "epoch": 0.34770143029355227, "grad_norm": 0.9807015629837946, "learning_rate": 3.8830958714088595e-06, "loss": 0.1719, "step": 1334 }, { "epoch": 0.3479620760433975, "grad_norm": 1.084175241363834, "learning_rate": 3.8812999274983085e-06, "loss": 0.1828, "step": 1335 }, { "epoch": 0.34822272179324276, "grad_norm": 1.0801357920408954, "learning_rate": 3.879502956917524e-06, "loss": 0.1722, "step": 1336 }, { "epoch": 0.348483367543088, "grad_norm": 1.1545347163741513, "learning_rate": 3.877704961002127e-06, "loss": 0.1799, "step": 1337 }, { "epoch": 0.34874401329293325, "grad_norm": 1.0627521153653174, "learning_rate": 3.875905941088505e-06, "loss": 0.1776, "step": 1338 }, { "epoch": 0.3490046590427785, "grad_norm": 1.013620160702109, "learning_rate": 3.874105898513803e-06, "loss": 0.18, "step": 1339 }, { "epoch": 0.34926530479262374, "grad_norm": 0.9958925507500515, "learning_rate": 3.872304834615929e-06, "loss": 0.1567, "step": 1340 }, { "epoch": 0.349525950542469, "grad_norm": 1.0083768285302437, "learning_rate": 3.870502750733547e-06, "loss": 0.1679, "step": 1341 }, { "epoch": 0.34978659629231423, "grad_norm": 1.0454060558926288, "learning_rate": 3.868699648206081e-06, "loss": 0.1645, "step": 1342 }, { "epoch": 0.3500472420421594, "grad_norm": 0.9982226813816303, "learning_rate": 3.866895528373713e-06, "loss": 0.1863, "step": 1343 }, { "epoch": 0.35030788779200467, "grad_norm": 1.075926161340213, "learning_rate": 3.8650903925773795e-06, "loss": 0.1717, "step": 1344 }, { "epoch": 0.3505685335418499, "grad_norm": 0.997929790041064, "learning_rate": 3.863284242158772e-06, "loss": 0.1798, "step": 1345 }, { "epoch": 0.35082917929169516, "grad_norm": 1.0329517677636832, "learning_rate": 3.861477078460337e-06, "loss": 0.1717, "step": 1346 }, { "epoch": 0.3510898250415404, "grad_norm": 1.0995617856910864, "learning_rate": 3.859668902825274e-06, "loss": 0.1871, "step": 1347 }, { "epoch": 0.35135047079138565, "grad_norm": 1.0634474933169864, "learning_rate": 3.857859716597534e-06, "loss": 0.1813, "step": 1348 }, { "epoch": 0.3516111165412309, "grad_norm": 1.0033562310648416, "learning_rate": 3.8560495211218215e-06, "loss": 0.1616, "step": 1349 }, { "epoch": 0.35187176229107614, "grad_norm": 0.9849475615416727, "learning_rate": 3.854238317743586e-06, "loss": 0.163, "step": 1350 }, { "epoch": 0.3521324080409214, "grad_norm": 1.0273596002283305, "learning_rate": 3.852426107809032e-06, "loss": 0.1838, "step": 1351 }, { "epoch": 0.35239305379076663, "grad_norm": 1.0812520192264397, "learning_rate": 3.8506128926651095e-06, "loss": 0.1802, "step": 1352 }, { "epoch": 0.3526536995406119, "grad_norm": 0.9529967745194364, "learning_rate": 3.8487986736595135e-06, "loss": 0.1706, "step": 1353 }, { "epoch": 0.3529143452904571, "grad_norm": 1.0909576138284665, "learning_rate": 3.846983452140689e-06, "loss": 0.1711, "step": 1354 }, { "epoch": 0.35317499104030237, "grad_norm": 1.128432441150742, "learning_rate": 3.845167229457824e-06, "loss": 0.1822, "step": 1355 }, { "epoch": 0.3534356367901476, "grad_norm": 1.0835545931837447, "learning_rate": 3.843350006960852e-06, "loss": 0.1806, "step": 1356 }, { "epoch": 0.35369628253999286, "grad_norm": 1.0728355216707797, "learning_rate": 3.841531786000448e-06, "loss": 0.1795, "step": 1357 }, { "epoch": 0.35395692828983805, "grad_norm": 1.1528734386987343, "learning_rate": 3.83971256792803e-06, "loss": 0.1758, "step": 1358 }, { "epoch": 0.3542175740396833, "grad_norm": 0.9899010069282475, "learning_rate": 3.8378923540957555e-06, "loss": 0.1591, "step": 1359 }, { "epoch": 0.35447821978952854, "grad_norm": 1.0729079050037145, "learning_rate": 3.836071145856526e-06, "loss": 0.1788, "step": 1360 }, { "epoch": 0.3547388655393738, "grad_norm": 1.1264947658844118, "learning_rate": 3.8342489445639776e-06, "loss": 0.1825, "step": 1361 }, { "epoch": 0.354999511289219, "grad_norm": 1.0048387281156623, "learning_rate": 3.832425751572488e-06, "loss": 0.1747, "step": 1362 }, { "epoch": 0.3552601570390643, "grad_norm": 1.0225846269549554, "learning_rate": 3.83060156823717e-06, "loss": 0.1685, "step": 1363 }, { "epoch": 0.3555208027889095, "grad_norm": 1.0043760364038967, "learning_rate": 3.828776395913872e-06, "loss": 0.1557, "step": 1364 }, { "epoch": 0.35578144853875476, "grad_norm": 1.0273034663761749, "learning_rate": 3.82695023595918e-06, "loss": 0.1703, "step": 1365 }, { "epoch": 0.3560420942886, "grad_norm": 0.981747485330154, "learning_rate": 3.825123089730413e-06, "loss": 0.1792, "step": 1366 }, { "epoch": 0.35630274003844525, "grad_norm": 0.9545597255303488, "learning_rate": 3.823294958585621e-06, "loss": 0.1561, "step": 1367 }, { "epoch": 0.3565633857882905, "grad_norm": 1.0113555687932105, "learning_rate": 3.821465843883588e-06, "loss": 0.1709, "step": 1368 }, { "epoch": 0.35682403153813574, "grad_norm": 1.1036648750662414, "learning_rate": 3.819635746983829e-06, "loss": 0.1756, "step": 1369 }, { "epoch": 0.357084677287981, "grad_norm": 1.0698170386182853, "learning_rate": 3.81780466924659e-06, "loss": 0.155, "step": 1370 }, { "epoch": 0.35734532303782623, "grad_norm": 1.0546957415081266, "learning_rate": 3.8159726120328434e-06, "loss": 0.1706, "step": 1371 }, { "epoch": 0.3576059687876715, "grad_norm": 1.0182119957189857, "learning_rate": 3.814139576704291e-06, "loss": 0.173, "step": 1372 }, { "epoch": 0.3578666145375167, "grad_norm": 1.1226958194236054, "learning_rate": 3.8123055646233626e-06, "loss": 0.1752, "step": 1373 }, { "epoch": 0.3581272602873619, "grad_norm": 1.060476222406519, "learning_rate": 3.810470577153212e-06, "loss": 0.1711, "step": 1374 }, { "epoch": 0.35838790603720716, "grad_norm": 1.025580089742172, "learning_rate": 3.808634615657719e-06, "loss": 0.1682, "step": 1375 }, { "epoch": 0.3586485517870524, "grad_norm": 1.0701308263830818, "learning_rate": 3.8067976815014885e-06, "loss": 0.1719, "step": 1376 }, { "epoch": 0.35890919753689765, "grad_norm": 1.098405213690586, "learning_rate": 3.804959776049846e-06, "loss": 0.1602, "step": 1377 }, { "epoch": 0.3591698432867429, "grad_norm": 0.9781366724460396, "learning_rate": 3.8031209006688397e-06, "loss": 0.1739, "step": 1378 }, { "epoch": 0.35943048903658814, "grad_norm": 0.9875215452725905, "learning_rate": 3.8012810567252404e-06, "loss": 0.1801, "step": 1379 }, { "epoch": 0.3596911347864334, "grad_norm": 1.0543923189342237, "learning_rate": 3.7994402455865375e-06, "loss": 0.1758, "step": 1380 }, { "epoch": 0.35995178053627863, "grad_norm": 1.0472827216070868, "learning_rate": 3.7975984686209376e-06, "loss": 0.1743, "step": 1381 }, { "epoch": 0.3602124262861239, "grad_norm": 0.9876723025855519, "learning_rate": 3.795755727197368e-06, "loss": 0.1722, "step": 1382 }, { "epoch": 0.3604730720359691, "grad_norm": 1.069097923559786, "learning_rate": 3.7939120226854724e-06, "loss": 0.1732, "step": 1383 }, { "epoch": 0.36073371778581437, "grad_norm": 1.0411676323988448, "learning_rate": 3.7920673564556083e-06, "loss": 0.1732, "step": 1384 }, { "epoch": 0.3609943635356596, "grad_norm": 1.091093783366776, "learning_rate": 3.79022172987885e-06, "loss": 0.1881, "step": 1385 }, { "epoch": 0.36125500928550486, "grad_norm": 0.971643925084656, "learning_rate": 3.788375144326985e-06, "loss": 0.1687, "step": 1386 }, { "epoch": 0.3615156550353501, "grad_norm": 1.0149242884738847, "learning_rate": 3.786527601172513e-06, "loss": 0.1743, "step": 1387 }, { "epoch": 0.36177630078519535, "grad_norm": 1.0094974302037405, "learning_rate": 3.784679101788647e-06, "loss": 0.163, "step": 1388 }, { "epoch": 0.36203694653504054, "grad_norm": 0.9955092035185366, "learning_rate": 3.782829647549308e-06, "loss": 0.1676, "step": 1389 }, { "epoch": 0.3622975922848858, "grad_norm": 1.1021283228777692, "learning_rate": 3.78097923982913e-06, "loss": 0.1951, "step": 1390 }, { "epoch": 0.36255823803473103, "grad_norm": 1.2061891664658861, "learning_rate": 3.7791278800034532e-06, "loss": 0.1738, "step": 1391 }, { "epoch": 0.3628188837845763, "grad_norm": 1.1465550702701575, "learning_rate": 3.7772755694483265e-06, "loss": 0.1797, "step": 1392 }, { "epoch": 0.3630795295344215, "grad_norm": 1.0966235934075577, "learning_rate": 3.775422309540505e-06, "loss": 0.1702, "step": 1393 }, { "epoch": 0.36334017528426676, "grad_norm": 1.0203247233206516, "learning_rate": 3.7735681016574504e-06, "loss": 0.1798, "step": 1394 }, { "epoch": 0.363600821034112, "grad_norm": 1.1456427439745507, "learning_rate": 3.7717129471773283e-06, "loss": 0.1909, "step": 1395 }, { "epoch": 0.36386146678395725, "grad_norm": 1.0055001798544854, "learning_rate": 3.7698568474790064e-06, "loss": 0.1768, "step": 1396 }, { "epoch": 0.3641221125338025, "grad_norm": 1.1202690821056291, "learning_rate": 3.7679998039420586e-06, "loss": 0.1802, "step": 1397 }, { "epoch": 0.36438275828364775, "grad_norm": 0.9714352291827393, "learning_rate": 3.766141817946757e-06, "loss": 0.1671, "step": 1398 }, { "epoch": 0.364643404033493, "grad_norm": 1.0819951072740248, "learning_rate": 3.7642828908740746e-06, "loss": 0.1726, "step": 1399 }, { "epoch": 0.36490404978333824, "grad_norm": 1.1006740774183301, "learning_rate": 3.7624230241056854e-06, "loss": 0.1726, "step": 1400 }, { "epoch": 0.3651646955331835, "grad_norm": 1.0263030702340796, "learning_rate": 3.760562219023962e-06, "loss": 0.1758, "step": 1401 }, { "epoch": 0.3654253412830287, "grad_norm": 0.9902916439905386, "learning_rate": 3.7587004770119716e-06, "loss": 0.1643, "step": 1402 }, { "epoch": 0.36568598703287397, "grad_norm": 1.0431108595022713, "learning_rate": 3.756837799453481e-06, "loss": 0.1567, "step": 1403 }, { "epoch": 0.36594663278271916, "grad_norm": 0.9883769415235437, "learning_rate": 3.7549741877329504e-06, "loss": 0.1608, "step": 1404 }, { "epoch": 0.3662072785325644, "grad_norm": 1.035707522346972, "learning_rate": 3.7531096432355345e-06, "loss": 0.1724, "step": 1405 }, { "epoch": 0.36646792428240965, "grad_norm": 1.142463135424401, "learning_rate": 3.7512441673470836e-06, "loss": 0.1632, "step": 1406 }, { "epoch": 0.3667285700322549, "grad_norm": 1.0989821420236159, "learning_rate": 3.749377761454136e-06, "loss": 0.1751, "step": 1407 }, { "epoch": 0.36698921578210014, "grad_norm": 1.1186242790951426, "learning_rate": 3.747510426943925e-06, "loss": 0.1832, "step": 1408 }, { "epoch": 0.3672498615319454, "grad_norm": 1.0399261567778162, "learning_rate": 3.7456421652043727e-06, "loss": 0.1657, "step": 1409 }, { "epoch": 0.36751050728179063, "grad_norm": 1.0130399699813422, "learning_rate": 3.7437729776240894e-06, "loss": 0.1655, "step": 1410 }, { "epoch": 0.3677711530316359, "grad_norm": 1.084225777635918, "learning_rate": 3.741902865592376e-06, "loss": 0.1985, "step": 1411 }, { "epoch": 0.3680317987814811, "grad_norm": 0.9789284485686888, "learning_rate": 3.740031830499219e-06, "loss": 0.1598, "step": 1412 }, { "epoch": 0.36829244453132637, "grad_norm": 0.9928733464580919, "learning_rate": 3.738159873735289e-06, "loss": 0.1795, "step": 1413 }, { "epoch": 0.3685530902811716, "grad_norm": 1.0738159976718993, "learning_rate": 3.7362869966919467e-06, "loss": 0.1644, "step": 1414 }, { "epoch": 0.36881373603101686, "grad_norm": 1.0329837839728784, "learning_rate": 3.7344132007612317e-06, "loss": 0.1841, "step": 1415 }, { "epoch": 0.3690743817808621, "grad_norm": 0.9839715452579383, "learning_rate": 3.7325384873358695e-06, "loss": 0.1766, "step": 1416 }, { "epoch": 0.36933502753070735, "grad_norm": 1.047712487267498, "learning_rate": 3.730662857809266e-06, "loss": 0.1791, "step": 1417 }, { "epoch": 0.3695956732805526, "grad_norm": 1.0267496804527663, "learning_rate": 3.7287863135755098e-06, "loss": 0.1672, "step": 1418 }, { "epoch": 0.3698563190303978, "grad_norm": 1.0563999372994728, "learning_rate": 3.7269088560293677e-06, "loss": 0.174, "step": 1419 }, { "epoch": 0.37011696478024303, "grad_norm": 1.0404073077721452, "learning_rate": 3.7250304865662857e-06, "loss": 0.1677, "step": 1420 }, { "epoch": 0.3703776105300883, "grad_norm": 1.117056851204864, "learning_rate": 3.723151206582388e-06, "loss": 0.1732, "step": 1421 }, { "epoch": 0.3706382562799335, "grad_norm": 1.1020379867114398, "learning_rate": 3.7212710174744753e-06, "loss": 0.176, "step": 1422 }, { "epoch": 0.37089890202977877, "grad_norm": 1.0648219502434406, "learning_rate": 3.719389920640025e-06, "loss": 0.1826, "step": 1423 }, { "epoch": 0.371159547779624, "grad_norm": 1.1365645793560788, "learning_rate": 3.7175079174771872e-06, "loss": 0.1611, "step": 1424 }, { "epoch": 0.37142019352946926, "grad_norm": 1.0567945300465027, "learning_rate": 3.7156250093847868e-06, "loss": 0.1692, "step": 1425 }, { "epoch": 0.3716808392793145, "grad_norm": 0.9508039242942072, "learning_rate": 3.713741197762323e-06, "loss": 0.1602, "step": 1426 }, { "epoch": 0.37194148502915975, "grad_norm": 1.0263218461762098, "learning_rate": 3.711856484009961e-06, "loss": 0.1709, "step": 1427 }, { "epoch": 0.372202130779005, "grad_norm": 1.0594233972189866, "learning_rate": 3.7099708695285436e-06, "loss": 0.1657, "step": 1428 }, { "epoch": 0.37246277652885024, "grad_norm": 1.0243248605911144, "learning_rate": 3.70808435571958e-06, "loss": 0.1702, "step": 1429 }, { "epoch": 0.3727234222786955, "grad_norm": 1.0463049744716304, "learning_rate": 3.706196943985245e-06, "loss": 0.1853, "step": 1430 }, { "epoch": 0.3729840680285407, "grad_norm": 1.0088979270579648, "learning_rate": 3.704308635728385e-06, "loss": 0.1667, "step": 1431 }, { "epoch": 0.373244713778386, "grad_norm": 1.0917638312988922, "learning_rate": 3.7024194323525115e-06, "loss": 0.172, "step": 1432 }, { "epoch": 0.3735053595282312, "grad_norm": 1.0891316672635327, "learning_rate": 3.7005293352618e-06, "loss": 0.1691, "step": 1433 }, { "epoch": 0.3737660052780764, "grad_norm": 1.0860115969252908, "learning_rate": 3.6986383458610925e-06, "loss": 0.1872, "step": 1434 }, { "epoch": 0.37402665102792165, "grad_norm": 1.0340781174950158, "learning_rate": 3.6967464655558916e-06, "loss": 0.1599, "step": 1435 }, { "epoch": 0.3742872967777669, "grad_norm": 0.9890949235599821, "learning_rate": 3.694853695752364e-06, "loss": 0.1614, "step": 1436 }, { "epoch": 0.37454794252761214, "grad_norm": 0.9813633032362149, "learning_rate": 3.6929600378573373e-06, "loss": 0.1717, "step": 1437 }, { "epoch": 0.3748085882774574, "grad_norm": 1.0558740303723453, "learning_rate": 3.6910654932782984e-06, "loss": 0.1544, "step": 1438 }, { "epoch": 0.37506923402730263, "grad_norm": 1.1272826402664458, "learning_rate": 3.689170063423394e-06, "loss": 0.1757, "step": 1439 }, { "epoch": 0.3753298797771479, "grad_norm": 1.0376175863404278, "learning_rate": 3.6872737497014286e-06, "loss": 0.1699, "step": 1440 }, { "epoch": 0.3755905255269931, "grad_norm": 1.0370462548613402, "learning_rate": 3.6853765535218632e-06, "loss": 0.1719, "step": 1441 }, { "epoch": 0.37585117127683837, "grad_norm": 1.026482697246554, "learning_rate": 3.6834784762948156e-06, "loss": 0.1694, "step": 1442 }, { "epoch": 0.3761118170266836, "grad_norm": 1.2022514872792471, "learning_rate": 3.6815795194310567e-06, "loss": 0.1832, "step": 1443 }, { "epoch": 0.37637246277652886, "grad_norm": 1.0389262570009816, "learning_rate": 3.6796796843420134e-06, "loss": 0.1828, "step": 1444 }, { "epoch": 0.3766331085263741, "grad_norm": 0.9815636876354642, "learning_rate": 3.677778972439765e-06, "loss": 0.168, "step": 1445 }, { "epoch": 0.37689375427621935, "grad_norm": 1.0645301254691022, "learning_rate": 3.67587738513704e-06, "loss": 0.1802, "step": 1446 }, { "epoch": 0.3771544000260646, "grad_norm": 1.0576441238026597, "learning_rate": 3.6739749238472207e-06, "loss": 0.1785, "step": 1447 }, { "epoch": 0.37741504577590984, "grad_norm": 0.9920651838734169, "learning_rate": 3.672071589984337e-06, "loss": 0.1804, "step": 1448 }, { "epoch": 0.37767569152575503, "grad_norm": 0.9758415760301283, "learning_rate": 3.670167384963069e-06, "loss": 0.1629, "step": 1449 }, { "epoch": 0.3779363372756003, "grad_norm": 0.9621756765975833, "learning_rate": 3.6682623101987423e-06, "loss": 0.1666, "step": 1450 }, { "epoch": 0.3781969830254455, "grad_norm": 0.9795875595776733, "learning_rate": 3.6663563671073317e-06, "loss": 0.1658, "step": 1451 }, { "epoch": 0.37845762877529077, "grad_norm": 1.0466210435187628, "learning_rate": 3.664449557105454e-06, "loss": 0.1812, "step": 1452 }, { "epoch": 0.378718274525136, "grad_norm": 1.0463705867563178, "learning_rate": 3.662541881610372e-06, "loss": 0.157, "step": 1453 }, { "epoch": 0.37897892027498126, "grad_norm": 1.0447152798249761, "learning_rate": 3.6606333420399933e-06, "loss": 0.1782, "step": 1454 }, { "epoch": 0.3792395660248265, "grad_norm": 0.9709135348772735, "learning_rate": 3.6587239398128644e-06, "loss": 0.1623, "step": 1455 }, { "epoch": 0.37950021177467175, "grad_norm": 1.052592003625377, "learning_rate": 3.6568136763481766e-06, "loss": 0.1601, "step": 1456 }, { "epoch": 0.379760857524517, "grad_norm": 1.0589951434609834, "learning_rate": 3.6549025530657588e-06, "loss": 0.1687, "step": 1457 }, { "epoch": 0.38002150327436224, "grad_norm": 1.1044438078903742, "learning_rate": 3.652990571386079e-06, "loss": 0.1908, "step": 1458 }, { "epoch": 0.3802821490242075, "grad_norm": 1.1006143864282014, "learning_rate": 3.6510777327302455e-06, "loss": 0.1615, "step": 1459 }, { "epoch": 0.38054279477405273, "grad_norm": 1.1593316022562603, "learning_rate": 3.649164038520001e-06, "loss": 0.1833, "step": 1460 }, { "epoch": 0.380803440523898, "grad_norm": 1.0743740752870277, "learning_rate": 3.6472494901777255e-06, "loss": 0.173, "step": 1461 }, { "epoch": 0.3810640862737432, "grad_norm": 1.0302851759738432, "learning_rate": 3.6453340891264344e-06, "loss": 0.1735, "step": 1462 }, { "epoch": 0.38132473202358846, "grad_norm": 1.0600916275978218, "learning_rate": 3.643417836789774e-06, "loss": 0.1764, "step": 1463 }, { "epoch": 0.3815853777734337, "grad_norm": 1.0180213145256118, "learning_rate": 3.641500734592026e-06, "loss": 0.1733, "step": 1464 }, { "epoch": 0.3818460235232789, "grad_norm": 0.9915525354597111, "learning_rate": 3.6395827839581046e-06, "loss": 0.1753, "step": 1465 }, { "epoch": 0.38210666927312414, "grad_norm": 1.0582390397018275, "learning_rate": 3.63766398631355e-06, "loss": 0.1812, "step": 1466 }, { "epoch": 0.3823673150229694, "grad_norm": 1.0622554856583828, "learning_rate": 3.6357443430845383e-06, "loss": 0.1697, "step": 1467 }, { "epoch": 0.38262796077281463, "grad_norm": 1.010930084627285, "learning_rate": 3.633823855697869e-06, "loss": 0.1708, "step": 1468 }, { "epoch": 0.3828886065226599, "grad_norm": 1.1354892208728222, "learning_rate": 3.631902525580971e-06, "loss": 0.1734, "step": 1469 }, { "epoch": 0.3831492522725051, "grad_norm": 1.0659479868360011, "learning_rate": 3.6299803541619e-06, "loss": 0.1751, "step": 1470 }, { "epoch": 0.38340989802235037, "grad_norm": 1.0359912210416837, "learning_rate": 3.6280573428693354e-06, "loss": 0.1802, "step": 1471 }, { "epoch": 0.3836705437721956, "grad_norm": 1.0111152185774306, "learning_rate": 3.6261334931325833e-06, "loss": 0.1697, "step": 1472 }, { "epoch": 0.38393118952204086, "grad_norm": 1.0377358959315766, "learning_rate": 3.6242088063815705e-06, "loss": 0.1777, "step": 1473 }, { "epoch": 0.3841918352718861, "grad_norm": 0.9812730468426374, "learning_rate": 3.622283284046847e-06, "loss": 0.1608, "step": 1474 }, { "epoch": 0.38445248102173135, "grad_norm": 1.0414218911391726, "learning_rate": 3.620356927559585e-06, "loss": 0.1767, "step": 1475 }, { "epoch": 0.3847131267715766, "grad_norm": 1.0300608403255547, "learning_rate": 3.618429738351574e-06, "loss": 0.1772, "step": 1476 }, { "epoch": 0.38497377252142184, "grad_norm": 0.9830873622463012, "learning_rate": 3.6165017178552252e-06, "loss": 0.1711, "step": 1477 }, { "epoch": 0.3852344182712671, "grad_norm": 0.9600847518715817, "learning_rate": 3.6145728675035658e-06, "loss": 0.1775, "step": 1478 }, { "epoch": 0.38549506402111233, "grad_norm": 0.9975408576106283, "learning_rate": 3.6126431887302397e-06, "loss": 0.1799, "step": 1479 }, { "epoch": 0.3857557097709575, "grad_norm": 0.9815328503795121, "learning_rate": 3.6107126829695093e-06, "loss": 0.1704, "step": 1480 }, { "epoch": 0.38601635552080277, "grad_norm": 1.0359556088806494, "learning_rate": 3.608781351656249e-06, "loss": 0.1628, "step": 1481 }, { "epoch": 0.386277001270648, "grad_norm": 1.079307994992167, "learning_rate": 3.6068491962259457e-06, "loss": 0.1718, "step": 1482 }, { "epoch": 0.38653764702049326, "grad_norm": 1.0065461790366124, "learning_rate": 3.604916218114702e-06, "loss": 0.1673, "step": 1483 }, { "epoch": 0.3867982927703385, "grad_norm": 1.164819789353568, "learning_rate": 3.6029824187592304e-06, "loss": 0.1698, "step": 1484 }, { "epoch": 0.38705893852018375, "grad_norm": 1.1769482969654779, "learning_rate": 3.6010477995968548e-06, "loss": 0.1757, "step": 1485 }, { "epoch": 0.387319584270029, "grad_norm": 1.0515115173980705, "learning_rate": 3.599112362065506e-06, "loss": 0.171, "step": 1486 }, { "epoch": 0.38758023001987424, "grad_norm": 0.9801559314082038, "learning_rate": 3.5971761076037256e-06, "loss": 0.1775, "step": 1487 }, { "epoch": 0.3878408757697195, "grad_norm": 0.9653811204835866, "learning_rate": 3.5952390376506614e-06, "loss": 0.1659, "step": 1488 }, { "epoch": 0.38810152151956473, "grad_norm": 1.044617783287181, "learning_rate": 3.593301153646067e-06, "loss": 0.1762, "step": 1489 }, { "epoch": 0.38836216726941, "grad_norm": 1.1108099440227424, "learning_rate": 3.591362457030302e-06, "loss": 0.189, "step": 1490 }, { "epoch": 0.3886228130192552, "grad_norm": 0.9858151344811652, "learning_rate": 3.5894229492443284e-06, "loss": 0.1651, "step": 1491 }, { "epoch": 0.38888345876910047, "grad_norm": 0.9972557216189503, "learning_rate": 3.5874826317297135e-06, "loss": 0.1846, "step": 1492 }, { "epoch": 0.3891441045189457, "grad_norm": 1.0350871650484486, "learning_rate": 3.585541505928624e-06, "loss": 0.1738, "step": 1493 }, { "epoch": 0.38940475026879096, "grad_norm": 1.072873209527027, "learning_rate": 3.5835995732838292e-06, "loss": 0.1846, "step": 1494 }, { "epoch": 0.38966539601863615, "grad_norm": 1.0184153417238655, "learning_rate": 3.581656835238697e-06, "loss": 0.1695, "step": 1495 }, { "epoch": 0.3899260417684814, "grad_norm": 1.042723872360239, "learning_rate": 3.579713293237194e-06, "loss": 0.1613, "step": 1496 }, { "epoch": 0.39018668751832664, "grad_norm": 1.0048962419336336, "learning_rate": 3.577768948723885e-06, "loss": 0.1513, "step": 1497 }, { "epoch": 0.3904473332681719, "grad_norm": 1.0376443155733195, "learning_rate": 3.5758238031439306e-06, "loss": 0.1804, "step": 1498 }, { "epoch": 0.3907079790180171, "grad_norm": 1.046969922853264, "learning_rate": 3.5738778579430867e-06, "loss": 0.1683, "step": 1499 }, { "epoch": 0.39096862476786237, "grad_norm": 1.026251767445393, "learning_rate": 3.5719311145677057e-06, "loss": 0.1704, "step": 1500 }, { "epoch": 0.39096862476786237, "eval_loss": 0.17104974389076233, "eval_runtime": 55.3707, "eval_samples_per_second": 44.807, "eval_steps_per_second": 5.617, "step": 1500 }, { "epoch": 0.3912292705177076, "grad_norm": 1.0627334733929505, "learning_rate": 3.5699835744647293e-06, "loss": 0.1827, "step": 1501 }, { "epoch": 0.39148991626755286, "grad_norm": 1.0849992244606397, "learning_rate": 3.5680352390816942e-06, "loss": 0.1736, "step": 1502 }, { "epoch": 0.3917505620173981, "grad_norm": 0.969007334243714, "learning_rate": 3.566086109866729e-06, "loss": 0.1622, "step": 1503 }, { "epoch": 0.39201120776724335, "grad_norm": 0.9479249107113539, "learning_rate": 3.5641361882685487e-06, "loss": 0.168, "step": 1504 }, { "epoch": 0.3922718535170886, "grad_norm": 1.0596149546437397, "learning_rate": 3.562185475736461e-06, "loss": 0.1764, "step": 1505 }, { "epoch": 0.39253249926693384, "grad_norm": 1.1311565961519008, "learning_rate": 3.5602339737203593e-06, "loss": 0.1622, "step": 1506 }, { "epoch": 0.3927931450167791, "grad_norm": 0.9965606948890194, "learning_rate": 3.5582816836707248e-06, "loss": 0.1702, "step": 1507 }, { "epoch": 0.39305379076662433, "grad_norm": 1.1012555755288624, "learning_rate": 3.5563286070386237e-06, "loss": 0.1825, "step": 1508 }, { "epoch": 0.3933144365164696, "grad_norm": 1.0754921636492956, "learning_rate": 3.5543747452757067e-06, "loss": 0.1854, "step": 1509 }, { "epoch": 0.39357508226631477, "grad_norm": 1.0676630804081295, "learning_rate": 3.5524200998342095e-06, "loss": 0.1804, "step": 1510 }, { "epoch": 0.39383572801616, "grad_norm": 1.0260335473863325, "learning_rate": 3.5504646721669484e-06, "loss": 0.1651, "step": 1511 }, { "epoch": 0.39409637376600526, "grad_norm": 1.0088379911738914, "learning_rate": 3.5485084637273225e-06, "loss": 0.1834, "step": 1512 }, { "epoch": 0.3943570195158505, "grad_norm": 1.06847158699796, "learning_rate": 3.546551475969311e-06, "loss": 0.1704, "step": 1513 }, { "epoch": 0.39461766526569575, "grad_norm": 0.9531511324167403, "learning_rate": 3.5445937103474713e-06, "loss": 0.1618, "step": 1514 }, { "epoch": 0.394878311015541, "grad_norm": 0.9951457886199593, "learning_rate": 3.5426351683169397e-06, "loss": 0.1657, "step": 1515 }, { "epoch": 0.39513895676538624, "grad_norm": 0.9491282600703335, "learning_rate": 3.5406758513334316e-06, "loss": 0.1735, "step": 1516 }, { "epoch": 0.3953996025152315, "grad_norm": 0.958175227622784, "learning_rate": 3.538715760853233e-06, "loss": 0.1672, "step": 1517 }, { "epoch": 0.39566024826507673, "grad_norm": 1.0051049364406932, "learning_rate": 3.536754898333211e-06, "loss": 0.1655, "step": 1518 }, { "epoch": 0.395920894014922, "grad_norm": 1.0135129046551488, "learning_rate": 3.5347932652308025e-06, "loss": 0.1531, "step": 1519 }, { "epoch": 0.3961815397647672, "grad_norm": 0.948312860224614, "learning_rate": 3.532830863004018e-06, "loss": 0.1522, "step": 1520 }, { "epoch": 0.39644218551461247, "grad_norm": 0.987099419108315, "learning_rate": 3.5308676931114415e-06, "loss": 0.1618, "step": 1521 }, { "epoch": 0.3967028312644577, "grad_norm": 1.0714192971332688, "learning_rate": 3.5289037570122246e-06, "loss": 0.1707, "step": 1522 }, { "epoch": 0.39696347701430296, "grad_norm": 1.0014610502060475, "learning_rate": 3.526939056166091e-06, "loss": 0.1489, "step": 1523 }, { "epoch": 0.3972241227641482, "grad_norm": 0.9930380361733596, "learning_rate": 3.5249735920333312e-06, "loss": 0.166, "step": 1524 }, { "epoch": 0.3974847685139934, "grad_norm": 0.9184929622511191, "learning_rate": 3.523007366074804e-06, "loss": 0.1602, "step": 1525 }, { "epoch": 0.39774541426383864, "grad_norm": 1.0035176718938998, "learning_rate": 3.521040379751933e-06, "loss": 0.16, "step": 1526 }, { "epoch": 0.3980060600136839, "grad_norm": 1.0185097064205273, "learning_rate": 3.519072634526709e-06, "loss": 0.167, "step": 1527 }, { "epoch": 0.3982667057635291, "grad_norm": 1.0457559052916379, "learning_rate": 3.517104131861685e-06, "loss": 0.1709, "step": 1528 }, { "epoch": 0.3985273515133744, "grad_norm": 1.0364997482225102, "learning_rate": 3.5151348732199776e-06, "loss": 0.1814, "step": 1529 }, { "epoch": 0.3987879972632196, "grad_norm": 1.0427577255027676, "learning_rate": 3.513164860065267e-06, "loss": 0.1707, "step": 1530 }, { "epoch": 0.39904864301306486, "grad_norm": 0.9933093506013475, "learning_rate": 3.5111940938617904e-06, "loss": 0.1658, "step": 1531 }, { "epoch": 0.3993092887629101, "grad_norm": 1.046736174317575, "learning_rate": 3.509222576074349e-06, "loss": 0.1785, "step": 1532 }, { "epoch": 0.39956993451275535, "grad_norm": 1.0166777051276363, "learning_rate": 3.5072503081682995e-06, "loss": 0.1762, "step": 1533 }, { "epoch": 0.3998305802626006, "grad_norm": 1.0058661928798416, "learning_rate": 3.5052772916095584e-06, "loss": 0.16, "step": 1534 }, { "epoch": 0.40009122601244584, "grad_norm": 1.0583888827552097, "learning_rate": 3.5033035278645955e-06, "loss": 0.162, "step": 1535 }, { "epoch": 0.4003518717622911, "grad_norm": 0.9790612855345961, "learning_rate": 3.50132901840044e-06, "loss": 0.1657, "step": 1536 }, { "epoch": 0.40061251751213633, "grad_norm": 0.9707712712654152, "learning_rate": 3.499353764684672e-06, "loss": 0.1584, "step": 1537 }, { "epoch": 0.4008731632619816, "grad_norm": 0.993303641018267, "learning_rate": 3.4973777681854265e-06, "loss": 0.1615, "step": 1538 }, { "epoch": 0.4011338090118268, "grad_norm": 1.1674925624461732, "learning_rate": 3.4954010303713902e-06, "loss": 0.1895, "step": 1539 }, { "epoch": 0.401394454761672, "grad_norm": 1.0323140522365777, "learning_rate": 3.4934235527118e-06, "loss": 0.1668, "step": 1540 }, { "epoch": 0.40165510051151726, "grad_norm": 0.9879509284869203, "learning_rate": 3.4914453366764455e-06, "loss": 0.1746, "step": 1541 }, { "epoch": 0.4019157462613625, "grad_norm": 0.9785892453885964, "learning_rate": 3.4894663837356607e-06, "loss": 0.165, "step": 1542 }, { "epoch": 0.40217639201120775, "grad_norm": 1.0508135202077689, "learning_rate": 3.487486695360331e-06, "loss": 0.1692, "step": 1543 }, { "epoch": 0.402437037761053, "grad_norm": 0.9573796471218162, "learning_rate": 3.485506273021887e-06, "loss": 0.1628, "step": 1544 }, { "epoch": 0.40269768351089824, "grad_norm": 0.9373657134971046, "learning_rate": 3.483525118192304e-06, "loss": 0.1462, "step": 1545 }, { "epoch": 0.4029583292607435, "grad_norm": 1.1242454275125642, "learning_rate": 3.4815432323441043e-06, "loss": 0.1841, "step": 1546 }, { "epoch": 0.40321897501058873, "grad_norm": 1.0115851464877852, "learning_rate": 3.4795606169503506e-06, "loss": 0.1802, "step": 1547 }, { "epoch": 0.403479620760434, "grad_norm": 0.9918607619820161, "learning_rate": 3.47757727348465e-06, "loss": 0.1819, "step": 1548 }, { "epoch": 0.4037402665102792, "grad_norm": 1.0341633622859887, "learning_rate": 3.4755932034211493e-06, "loss": 0.1706, "step": 1549 }, { "epoch": 0.40400091226012447, "grad_norm": 0.9997123229407087, "learning_rate": 3.4736084082345355e-06, "loss": 0.161, "step": 1550 }, { "epoch": 0.4042615580099697, "grad_norm": 1.0682602603487648, "learning_rate": 3.4716228894000366e-06, "loss": 0.1765, "step": 1551 }, { "epoch": 0.40452220375981496, "grad_norm": 0.9790601347052925, "learning_rate": 3.4696366483934156e-06, "loss": 0.1689, "step": 1552 }, { "epoch": 0.4047828495096602, "grad_norm": 1.159398647139056, "learning_rate": 3.4676496866909736e-06, "loss": 0.1891, "step": 1553 }, { "epoch": 0.40504349525950545, "grad_norm": 0.9879889643859893, "learning_rate": 3.465662005769548e-06, "loss": 0.1812, "step": 1554 }, { "epoch": 0.4053041410093507, "grad_norm": 1.0372859267705032, "learning_rate": 3.463673607106509e-06, "loss": 0.1821, "step": 1555 }, { "epoch": 0.4055647867591959, "grad_norm": 1.0373618507485032, "learning_rate": 3.461684492179763e-06, "loss": 0.185, "step": 1556 }, { "epoch": 0.40582543250904113, "grad_norm": 1.086736244950815, "learning_rate": 3.459694662467745e-06, "loss": 0.177, "step": 1557 }, { "epoch": 0.4060860782588864, "grad_norm": 0.9681153215483695, "learning_rate": 3.4577041194494253e-06, "loss": 0.1535, "step": 1558 }, { "epoch": 0.4063467240087316, "grad_norm": 0.9777298790857816, "learning_rate": 3.455712864604302e-06, "loss": 0.1612, "step": 1559 }, { "epoch": 0.40660736975857686, "grad_norm": 0.9789679281846334, "learning_rate": 3.4537208994124015e-06, "loss": 0.1531, "step": 1560 }, { "epoch": 0.4068680155084221, "grad_norm": 1.0728346831820663, "learning_rate": 3.4517282253542806e-06, "loss": 0.1656, "step": 1561 }, { "epoch": 0.40712866125826735, "grad_norm": 1.0626788181611606, "learning_rate": 3.449734843911022e-06, "loss": 0.157, "step": 1562 }, { "epoch": 0.4073893070081126, "grad_norm": 1.0656606368013117, "learning_rate": 3.4477407565642333e-06, "loss": 0.1705, "step": 1563 }, { "epoch": 0.40764995275795785, "grad_norm": 1.0119590858093308, "learning_rate": 3.4457459647960477e-06, "loss": 0.1472, "step": 1564 }, { "epoch": 0.4079105985078031, "grad_norm": 1.1888416205188501, "learning_rate": 3.4437504700891215e-06, "loss": 0.1835, "step": 1565 }, { "epoch": 0.40817124425764834, "grad_norm": 1.123330406670444, "learning_rate": 3.441754273926634e-06, "loss": 0.1701, "step": 1566 }, { "epoch": 0.4084318900074936, "grad_norm": 1.0346462144848954, "learning_rate": 3.4397573777922853e-06, "loss": 0.1738, "step": 1567 }, { "epoch": 0.4086925357573388, "grad_norm": 1.0032337229335375, "learning_rate": 3.4377597831702962e-06, "loss": 0.162, "step": 1568 }, { "epoch": 0.40895318150718407, "grad_norm": 1.056151598323862, "learning_rate": 3.435761491545407e-06, "loss": 0.1626, "step": 1569 }, { "epoch": 0.4092138272570293, "grad_norm": 1.0170539234367735, "learning_rate": 3.433762504402874e-06, "loss": 0.1712, "step": 1570 }, { "epoch": 0.4094744730068745, "grad_norm": 0.9616055751114734, "learning_rate": 3.4317628232284734e-06, "loss": 0.1556, "step": 1571 }, { "epoch": 0.40973511875671975, "grad_norm": 1.1010192959930656, "learning_rate": 3.429762449508495e-06, "loss": 0.1777, "step": 1572 }, { "epoch": 0.409995764506565, "grad_norm": 1.0008256096818822, "learning_rate": 3.427761384729744e-06, "loss": 0.1744, "step": 1573 }, { "epoch": 0.41025641025641024, "grad_norm": 0.9571549063524155, "learning_rate": 3.425759630379541e-06, "loss": 0.1626, "step": 1574 }, { "epoch": 0.4105170560062555, "grad_norm": 1.0179170489172409, "learning_rate": 3.423757187945716e-06, "loss": 0.1686, "step": 1575 }, { "epoch": 0.41077770175610073, "grad_norm": 1.0279428529594723, "learning_rate": 3.421754058916612e-06, "loss": 0.1482, "step": 1576 }, { "epoch": 0.411038347505946, "grad_norm": 1.0478224766323474, "learning_rate": 3.4197502447810836e-06, "loss": 0.171, "step": 1577 }, { "epoch": 0.4112989932557912, "grad_norm": 1.0131888519945775, "learning_rate": 3.4177457470284916e-06, "loss": 0.1564, "step": 1578 }, { "epoch": 0.41155963900563647, "grad_norm": 1.0824852860699306, "learning_rate": 3.4157405671487077e-06, "loss": 0.1811, "step": 1579 }, { "epoch": 0.4118202847554817, "grad_norm": 0.9814387499036377, "learning_rate": 3.4137347066321097e-06, "loss": 0.1622, "step": 1580 }, { "epoch": 0.41208093050532696, "grad_norm": 0.9309424562314182, "learning_rate": 3.41172816696958e-06, "loss": 0.1548, "step": 1581 }, { "epoch": 0.4123415762551722, "grad_norm": 1.0329690416557908, "learning_rate": 3.4097209496525087e-06, "loss": 0.1714, "step": 1582 }, { "epoch": 0.41260222200501745, "grad_norm": 0.9334595952178787, "learning_rate": 3.4077130561727858e-06, "loss": 0.1659, "step": 1583 }, { "epoch": 0.4128628677548627, "grad_norm": 0.9537845736923762, "learning_rate": 3.4057044880228064e-06, "loss": 0.1584, "step": 1584 }, { "epoch": 0.41312351350470794, "grad_norm": 1.0693349683789246, "learning_rate": 3.4036952466954674e-06, "loss": 0.1773, "step": 1585 }, { "epoch": 0.41338415925455313, "grad_norm": 0.9887128000473697, "learning_rate": 3.4016853336841638e-06, "loss": 0.1741, "step": 1586 }, { "epoch": 0.4136448050043984, "grad_norm": 1.0251182988520864, "learning_rate": 3.3996747504827926e-06, "loss": 0.1777, "step": 1587 }, { "epoch": 0.4139054507542436, "grad_norm": 0.9586995885054259, "learning_rate": 3.397663498585747e-06, "loss": 0.1654, "step": 1588 }, { "epoch": 0.41416609650408887, "grad_norm": 0.9407754158228189, "learning_rate": 3.3956515794879166e-06, "loss": 0.1546, "step": 1589 }, { "epoch": 0.4144267422539341, "grad_norm": 1.023251683953715, "learning_rate": 3.39363899468469e-06, "loss": 0.1684, "step": 1590 }, { "epoch": 0.41468738800377936, "grad_norm": 1.027156638685618, "learning_rate": 3.391625745671947e-06, "loss": 0.1775, "step": 1591 }, { "epoch": 0.4149480337536246, "grad_norm": 1.0316140797905131, "learning_rate": 3.3896118339460635e-06, "loss": 0.1573, "step": 1592 }, { "epoch": 0.41520867950346985, "grad_norm": 1.0462417895597538, "learning_rate": 3.3875972610039075e-06, "loss": 0.156, "step": 1593 }, { "epoch": 0.4154693252533151, "grad_norm": 1.0634926932694508, "learning_rate": 3.385582028342837e-06, "loss": 0.1714, "step": 1594 }, { "epoch": 0.41572997100316034, "grad_norm": 1.0523335403078318, "learning_rate": 3.383566137460702e-06, "loss": 0.1696, "step": 1595 }, { "epoch": 0.4159906167530056, "grad_norm": 1.0123056340257934, "learning_rate": 3.3815495898558424e-06, "loss": 0.1609, "step": 1596 }, { "epoch": 0.4162512625028508, "grad_norm": 1.0373800650644867, "learning_rate": 3.3795323870270837e-06, "loss": 0.1601, "step": 1597 }, { "epoch": 0.4165119082526961, "grad_norm": 1.0084713786354695, "learning_rate": 3.377514530473739e-06, "loss": 0.1585, "step": 1598 }, { "epoch": 0.4167725540025413, "grad_norm": 0.978514525484888, "learning_rate": 3.37549602169561e-06, "loss": 0.1521, "step": 1599 }, { "epoch": 0.41703319975238656, "grad_norm": 1.0437820526983963, "learning_rate": 3.3734768621929805e-06, "loss": 0.1612, "step": 1600 }, { "epoch": 0.41729384550223175, "grad_norm": 1.0081783656235779, "learning_rate": 3.3714570534666175e-06, "loss": 0.1631, "step": 1601 }, { "epoch": 0.417554491252077, "grad_norm": 0.949445428499815, "learning_rate": 3.369436597017774e-06, "loss": 0.1563, "step": 1602 }, { "epoch": 0.41781513700192224, "grad_norm": 1.0013003357931072, "learning_rate": 3.3674154943481795e-06, "loss": 0.1692, "step": 1603 }, { "epoch": 0.4180757827517675, "grad_norm": 1.037262130912649, "learning_rate": 3.3653937469600483e-06, "loss": 0.1638, "step": 1604 }, { "epoch": 0.41833642850161273, "grad_norm": 1.0370330642820582, "learning_rate": 3.363371356356072e-06, "loss": 0.1665, "step": 1605 }, { "epoch": 0.418597074251458, "grad_norm": 0.9381199963658197, "learning_rate": 3.361348324039419e-06, "loss": 0.1512, "step": 1606 }, { "epoch": 0.4188577200013032, "grad_norm": 1.0569976788558317, "learning_rate": 3.3593246515137373e-06, "loss": 0.1721, "step": 1607 }, { "epoch": 0.41911836575114847, "grad_norm": 1.1020362945431479, "learning_rate": 3.3573003402831487e-06, "loss": 0.1625, "step": 1608 }, { "epoch": 0.4193790115009937, "grad_norm": 1.0182738898993946, "learning_rate": 3.3552753918522507e-06, "loss": 0.1736, "step": 1609 }, { "epoch": 0.41963965725083896, "grad_norm": 0.9389005371430739, "learning_rate": 3.353249807726115e-06, "loss": 0.16, "step": 1610 }, { "epoch": 0.4199003030006842, "grad_norm": 1.0002495556343975, "learning_rate": 3.351223589410283e-06, "loss": 0.1624, "step": 1611 }, { "epoch": 0.42016094875052945, "grad_norm": 0.9938548521638579, "learning_rate": 3.349196738410771e-06, "loss": 0.1733, "step": 1612 }, { "epoch": 0.4204215945003747, "grad_norm": 1.0745303339609047, "learning_rate": 3.347169256234064e-06, "loss": 0.1661, "step": 1613 }, { "epoch": 0.42068224025021994, "grad_norm": 0.9451557944642562, "learning_rate": 3.3451411443871145e-06, "loss": 0.1512, "step": 1614 }, { "epoch": 0.4209428860000652, "grad_norm": 1.034873618003438, "learning_rate": 3.343112404377347e-06, "loss": 0.1721, "step": 1615 }, { "epoch": 0.4212035317499104, "grad_norm": 1.0989087468663188, "learning_rate": 3.341083037712649e-06, "loss": 0.1796, "step": 1616 }, { "epoch": 0.4214641774997556, "grad_norm": 0.9931616491578401, "learning_rate": 3.339053045901375e-06, "loss": 0.1722, "step": 1617 }, { "epoch": 0.42172482324960087, "grad_norm": 0.9917097948838897, "learning_rate": 3.337022430452346e-06, "loss": 0.1569, "step": 1618 }, { "epoch": 0.4219854689994461, "grad_norm": 1.0384287401654366, "learning_rate": 3.3349911928748424e-06, "loss": 0.1785, "step": 1619 }, { "epoch": 0.42224611474929136, "grad_norm": 0.9990874726186374, "learning_rate": 3.3329593346786125e-06, "loss": 0.1658, "step": 1620 }, { "epoch": 0.4225067604991366, "grad_norm": 1.0150683200976958, "learning_rate": 3.3309268573738605e-06, "loss": 0.1661, "step": 1621 }, { "epoch": 0.42276740624898185, "grad_norm": 1.0279392018023983, "learning_rate": 3.328893762471255e-06, "loss": 0.1886, "step": 1622 }, { "epoch": 0.4230280519988271, "grad_norm": 1.0098944173393523, "learning_rate": 3.3268600514819214e-06, "loss": 0.1686, "step": 1623 }, { "epoch": 0.42328869774867234, "grad_norm": 0.9964246109049048, "learning_rate": 3.324825725917442e-06, "loss": 0.1686, "step": 1624 }, { "epoch": 0.4235493434985176, "grad_norm": 0.9843369103951943, "learning_rate": 3.32279078728986e-06, "loss": 0.1768, "step": 1625 }, { "epoch": 0.42380998924836283, "grad_norm": 0.9461800969060045, "learning_rate": 3.320755237111669e-06, "loss": 0.1579, "step": 1626 }, { "epoch": 0.4240706349982081, "grad_norm": 1.0310589381444089, "learning_rate": 3.318719076895821e-06, "loss": 0.1656, "step": 1627 }, { "epoch": 0.4243312807480533, "grad_norm": 1.0740362025583994, "learning_rate": 3.316682308155721e-06, "loss": 0.1598, "step": 1628 }, { "epoch": 0.42459192649789856, "grad_norm": 1.058830362690267, "learning_rate": 3.314644932405224e-06, "loss": 0.173, "step": 1629 }, { "epoch": 0.4248525722477438, "grad_norm": 1.0355706545264993, "learning_rate": 3.312606951158638e-06, "loss": 0.1702, "step": 1630 }, { "epoch": 0.425113217997589, "grad_norm": 1.0691074698276224, "learning_rate": 3.3105683659307207e-06, "loss": 0.1592, "step": 1631 }, { "epoch": 0.42537386374743424, "grad_norm": 1.015351964669341, "learning_rate": 3.308529178236679e-06, "loss": 0.1555, "step": 1632 }, { "epoch": 0.4256345094972795, "grad_norm": 1.01630907734922, "learning_rate": 3.306489389592168e-06, "loss": 0.1721, "step": 1633 }, { "epoch": 0.42589515524712473, "grad_norm": 0.9926463185466904, "learning_rate": 3.304449001513287e-06, "loss": 0.1583, "step": 1634 }, { "epoch": 0.42615580099697, "grad_norm": 0.9431051798216379, "learning_rate": 3.3024080155165837e-06, "loss": 0.1486, "step": 1635 }, { "epoch": 0.4264164467468152, "grad_norm": 1.0232239295798136, "learning_rate": 3.3003664331190487e-06, "loss": 0.1663, "step": 1636 }, { "epoch": 0.42667709249666047, "grad_norm": 1.0581870127766781, "learning_rate": 3.2983242558381178e-06, "loss": 0.176, "step": 1637 }, { "epoch": 0.4269377382465057, "grad_norm": 0.9415243709038383, "learning_rate": 3.296281485191665e-06, "loss": 0.1579, "step": 1638 }, { "epoch": 0.42719838399635096, "grad_norm": 1.0634859571864321, "learning_rate": 3.294238122698009e-06, "loss": 0.1643, "step": 1639 }, { "epoch": 0.4274590297461962, "grad_norm": 1.0274163687891966, "learning_rate": 3.292194169875908e-06, "loss": 0.1745, "step": 1640 }, { "epoch": 0.42771967549604145, "grad_norm": 0.9621632921048424, "learning_rate": 3.2901496282445567e-06, "loss": 0.155, "step": 1641 }, { "epoch": 0.4279803212458867, "grad_norm": 1.0764362051780378, "learning_rate": 3.2881044993235893e-06, "loss": 0.1716, "step": 1642 }, { "epoch": 0.42824096699573194, "grad_norm": 1.0884014580523436, "learning_rate": 3.2860587846330773e-06, "loss": 0.165, "step": 1643 }, { "epoch": 0.4285016127455772, "grad_norm": 1.090708001948278, "learning_rate": 3.284012485693524e-06, "loss": 0.1813, "step": 1644 }, { "epoch": 0.42876225849542243, "grad_norm": 0.996367356509976, "learning_rate": 3.281965604025871e-06, "loss": 0.1748, "step": 1645 }, { "epoch": 0.4290229042452677, "grad_norm": 1.1025312263119378, "learning_rate": 3.279918141151492e-06, "loss": 0.1927, "step": 1646 }, { "epoch": 0.42928354999511287, "grad_norm": 1.0021205812218463, "learning_rate": 3.2778700985921897e-06, "loss": 0.1707, "step": 1647 }, { "epoch": 0.4295441957449581, "grad_norm": 1.0153601003208057, "learning_rate": 3.2758214778702026e-06, "loss": 0.1531, "step": 1648 }, { "epoch": 0.42980484149480336, "grad_norm": 0.9885653746535285, "learning_rate": 3.273772280508194e-06, "loss": 0.156, "step": 1649 }, { "epoch": 0.4300654872446486, "grad_norm": 1.005198152036611, "learning_rate": 3.2717225080292598e-06, "loss": 0.1628, "step": 1650 }, { "epoch": 0.43032613299449385, "grad_norm": 1.1581429281518025, "learning_rate": 3.2696721619569214e-06, "loss": 0.1789, "step": 1651 }, { "epoch": 0.4305867787443391, "grad_norm": 1.0305245385932094, "learning_rate": 3.2676212438151256e-06, "loss": 0.1513, "step": 1652 }, { "epoch": 0.43084742449418434, "grad_norm": 0.9941742057332584, "learning_rate": 3.2655697551282473e-06, "loss": 0.1626, "step": 1653 }, { "epoch": 0.4311080702440296, "grad_norm": 0.9958298383846002, "learning_rate": 3.2635176974210824e-06, "loss": 0.1547, "step": 1654 }, { "epoch": 0.43136871599387483, "grad_norm": 1.0629851302674018, "learning_rate": 3.2614650722188517e-06, "loss": 0.1718, "step": 1655 }, { "epoch": 0.4316293617437201, "grad_norm": 0.9588710531500187, "learning_rate": 3.2594118810471982e-06, "loss": 0.1522, "step": 1656 }, { "epoch": 0.4318900074935653, "grad_norm": 0.95543470201312, "learning_rate": 3.2573581254321824e-06, "loss": 0.1567, "step": 1657 }, { "epoch": 0.43215065324341057, "grad_norm": 1.0058367559518289, "learning_rate": 3.2553038069002885e-06, "loss": 0.1549, "step": 1658 }, { "epoch": 0.4324112989932558, "grad_norm": 1.0132420046443256, "learning_rate": 3.253248926978416e-06, "loss": 0.1559, "step": 1659 }, { "epoch": 0.43267194474310106, "grad_norm": 1.0498798054830505, "learning_rate": 3.2511934871938825e-06, "loss": 0.1818, "step": 1660 }, { "epoch": 0.4329325904929463, "grad_norm": 1.011258461486697, "learning_rate": 3.249137489074423e-06, "loss": 0.1649, "step": 1661 }, { "epoch": 0.4331932362427915, "grad_norm": 0.965232227632515, "learning_rate": 3.247080934148186e-06, "loss": 0.1569, "step": 1662 }, { "epoch": 0.43345388199263674, "grad_norm": 0.9712674421181829, "learning_rate": 3.2450238239437344e-06, "loss": 0.1592, "step": 1663 }, { "epoch": 0.433714527742482, "grad_norm": 0.9602511031778689, "learning_rate": 3.242966159990044e-06, "loss": 0.1523, "step": 1664 }, { "epoch": 0.4339751734923272, "grad_norm": 1.1474346946104743, "learning_rate": 3.2409079438165015e-06, "loss": 0.1856, "step": 1665 }, { "epoch": 0.43423581924217247, "grad_norm": 1.006948576539231, "learning_rate": 3.238849176952904e-06, "loss": 0.1638, "step": 1666 }, { "epoch": 0.4344964649920177, "grad_norm": 1.0052492217003057, "learning_rate": 3.23678986092946e-06, "loss": 0.1681, "step": 1667 }, { "epoch": 0.43475711074186296, "grad_norm": 0.9489375609079275, "learning_rate": 3.2347299972767824e-06, "loss": 0.1539, "step": 1668 }, { "epoch": 0.4350177564917082, "grad_norm": 1.018288028971016, "learning_rate": 3.232669587525895e-06, "loss": 0.1689, "step": 1669 }, { "epoch": 0.43527840224155345, "grad_norm": 1.0027655370716173, "learning_rate": 3.230608633208225e-06, "loss": 0.1606, "step": 1670 }, { "epoch": 0.4355390479913987, "grad_norm": 1.0556555414414648, "learning_rate": 3.2285471358556063e-06, "loss": 0.1792, "step": 1671 }, { "epoch": 0.43579969374124394, "grad_norm": 1.0687774340901568, "learning_rate": 3.226485097000273e-06, "loss": 0.1854, "step": 1672 }, { "epoch": 0.4360603394910892, "grad_norm": 0.983554796478519, "learning_rate": 3.2244225181748654e-06, "loss": 0.1647, "step": 1673 }, { "epoch": 0.43632098524093443, "grad_norm": 1.03611424220492, "learning_rate": 3.2223594009124247e-06, "loss": 0.1763, "step": 1674 }, { "epoch": 0.4365816309907797, "grad_norm": 1.0161740703248943, "learning_rate": 3.2202957467463893e-06, "loss": 0.1618, "step": 1675 }, { "epoch": 0.4368422767406249, "grad_norm": 1.0041919135118396, "learning_rate": 3.2182315572105995e-06, "loss": 0.1686, "step": 1676 }, { "epoch": 0.4371029224904701, "grad_norm": 1.0471929574056769, "learning_rate": 3.2161668338392924e-06, "loss": 0.1739, "step": 1677 }, { "epoch": 0.43736356824031536, "grad_norm": 1.004633382076604, "learning_rate": 3.2141015781671025e-06, "loss": 0.1674, "step": 1678 }, { "epoch": 0.4376242139901606, "grad_norm": 1.0069652933117466, "learning_rate": 3.2120357917290597e-06, "loss": 0.1631, "step": 1679 }, { "epoch": 0.43788485974000585, "grad_norm": 1.0335446754178546, "learning_rate": 3.209969476060587e-06, "loss": 0.1605, "step": 1680 }, { "epoch": 0.4381455054898511, "grad_norm": 0.9929381109536376, "learning_rate": 3.2079026326975037e-06, "loss": 0.1594, "step": 1681 }, { "epoch": 0.43840615123969634, "grad_norm": 1.0547239633136538, "learning_rate": 3.2058352631760198e-06, "loss": 0.1634, "step": 1682 }, { "epoch": 0.4386667969895416, "grad_norm": 1.0271262749422003, "learning_rate": 3.2037673690327343e-06, "loss": 0.1623, "step": 1683 }, { "epoch": 0.43892744273938683, "grad_norm": 0.9766343549356166, "learning_rate": 3.2016989518046397e-06, "loss": 0.1588, "step": 1684 }, { "epoch": 0.4391880884892321, "grad_norm": 1.0136019253543773, "learning_rate": 3.199630013029115e-06, "loss": 0.1633, "step": 1685 }, { "epoch": 0.4394487342390773, "grad_norm": 0.9845539843286003, "learning_rate": 3.1975605542439276e-06, "loss": 0.1624, "step": 1686 }, { "epoch": 0.43970937998892257, "grad_norm": 0.9506406619038998, "learning_rate": 3.195490576987231e-06, "loss": 0.1556, "step": 1687 }, { "epoch": 0.4399700257387678, "grad_norm": 0.9705109957496552, "learning_rate": 3.1934200827975654e-06, "loss": 0.1752, "step": 1688 }, { "epoch": 0.44023067148861306, "grad_norm": 1.0161316265564382, "learning_rate": 3.1913490732138537e-06, "loss": 0.1592, "step": 1689 }, { "epoch": 0.4404913172384583, "grad_norm": 1.0053373216416581, "learning_rate": 3.1892775497754014e-06, "loss": 0.1672, "step": 1690 }, { "epoch": 0.44075196298830355, "grad_norm": 1.061559985271864, "learning_rate": 3.187205514021897e-06, "loss": 0.1768, "step": 1691 }, { "epoch": 0.44101260873814874, "grad_norm": 1.117184083109042, "learning_rate": 3.1851329674934116e-06, "loss": 0.1841, "step": 1692 }, { "epoch": 0.441273254487994, "grad_norm": 0.9511891911130729, "learning_rate": 3.183059911730392e-06, "loss": 0.1544, "step": 1693 }, { "epoch": 0.4415339002378392, "grad_norm": 0.9827569786924061, "learning_rate": 3.1809863482736663e-06, "loss": 0.1626, "step": 1694 }, { "epoch": 0.4417945459876845, "grad_norm": 0.9896121424352229, "learning_rate": 3.1789122786644394e-06, "loss": 0.1546, "step": 1695 }, { "epoch": 0.4420551917375297, "grad_norm": 1.010745150668344, "learning_rate": 3.176837704444291e-06, "loss": 0.1778, "step": 1696 }, { "epoch": 0.44231583748737496, "grad_norm": 0.9816619128389741, "learning_rate": 3.174762627155179e-06, "loss": 0.1693, "step": 1697 }, { "epoch": 0.4425764832372202, "grad_norm": 0.9424963490248378, "learning_rate": 3.1726870483394312e-06, "loss": 0.1531, "step": 1698 }, { "epoch": 0.44283712898706545, "grad_norm": 1.0588623052256874, "learning_rate": 3.1706109695397515e-06, "loss": 0.1715, "step": 1699 }, { "epoch": 0.4430977747369107, "grad_norm": 1.0421447852910934, "learning_rate": 3.168534392299214e-06, "loss": 0.1675, "step": 1700 }, { "epoch": 0.44335842048675594, "grad_norm": 1.1225220416733732, "learning_rate": 3.1664573181612616e-06, "loss": 0.1793, "step": 1701 }, { "epoch": 0.4436190662366012, "grad_norm": 1.0344111885177878, "learning_rate": 3.1643797486697116e-06, "loss": 0.1759, "step": 1702 }, { "epoch": 0.44387971198644643, "grad_norm": 1.1416420074260722, "learning_rate": 3.1623016853687434e-06, "loss": 0.1823, "step": 1703 }, { "epoch": 0.4441403577362917, "grad_norm": 1.0818714604168744, "learning_rate": 3.1602231298029074e-06, "loss": 0.1733, "step": 1704 }, { "epoch": 0.4444010034861369, "grad_norm": 1.0569640283257182, "learning_rate": 3.1581440835171185e-06, "loss": 0.1781, "step": 1705 }, { "epoch": 0.44466164923598217, "grad_norm": 1.002359910876816, "learning_rate": 3.1560645480566566e-06, "loss": 0.162, "step": 1706 }, { "epoch": 0.44492229498582736, "grad_norm": 1.0576185625903023, "learning_rate": 3.153984524967165e-06, "loss": 0.1516, "step": 1707 }, { "epoch": 0.4451829407356726, "grad_norm": 1.0270560469590937, "learning_rate": 3.15190401579465e-06, "loss": 0.1662, "step": 1708 }, { "epoch": 0.44544358648551785, "grad_norm": 0.989235552546934, "learning_rate": 3.149823022085478e-06, "loss": 0.1691, "step": 1709 }, { "epoch": 0.4457042322353631, "grad_norm": 1.062838162159658, "learning_rate": 3.1477415453863772e-06, "loss": 0.1604, "step": 1710 }, { "epoch": 0.44596487798520834, "grad_norm": 1.0528181246592638, "learning_rate": 3.1456595872444332e-06, "loss": 0.1573, "step": 1711 }, { "epoch": 0.4462255237350536, "grad_norm": 1.1135913765484957, "learning_rate": 3.143577149207091e-06, "loss": 0.1593, "step": 1712 }, { "epoch": 0.44648616948489883, "grad_norm": 1.008809840005391, "learning_rate": 3.1414942328221494e-06, "loss": 0.1535, "step": 1713 }, { "epoch": 0.4467468152347441, "grad_norm": 1.0815013656917745, "learning_rate": 3.139410839637767e-06, "loss": 0.178, "step": 1714 }, { "epoch": 0.4470074609845893, "grad_norm": 1.0200038359930406, "learning_rate": 3.1373269712024533e-06, "loss": 0.1614, "step": 1715 }, { "epoch": 0.44726810673443457, "grad_norm": 1.0516783551281395, "learning_rate": 3.135242629065073e-06, "loss": 0.1635, "step": 1716 }, { "epoch": 0.4475287524842798, "grad_norm": 1.041619668828157, "learning_rate": 3.1331578147748416e-06, "loss": 0.1597, "step": 1717 }, { "epoch": 0.44778939823412506, "grad_norm": 0.9902412331024265, "learning_rate": 3.131072529881326e-06, "loss": 0.1544, "step": 1718 }, { "epoch": 0.4480500439839703, "grad_norm": 0.9955625103364052, "learning_rate": 3.1289867759344434e-06, "loss": 0.1613, "step": 1719 }, { "epoch": 0.44831068973381555, "grad_norm": 1.0350313818418917, "learning_rate": 3.126900554484459e-06, "loss": 0.1609, "step": 1720 }, { "epoch": 0.4485713354836608, "grad_norm": 1.075799035943732, "learning_rate": 3.1248138670819857e-06, "loss": 0.1616, "step": 1721 }, { "epoch": 0.44883198123350604, "grad_norm": 1.0670308573490974, "learning_rate": 3.122726715277983e-06, "loss": 0.1711, "step": 1722 }, { "epoch": 0.44909262698335123, "grad_norm": 0.9629547732305407, "learning_rate": 3.120639100623755e-06, "loss": 0.149, "step": 1723 }, { "epoch": 0.4493532727331965, "grad_norm": 1.037597045706153, "learning_rate": 3.1185510246709487e-06, "loss": 0.1565, "step": 1724 }, { "epoch": 0.4496139184830417, "grad_norm": 1.146640918437456, "learning_rate": 3.116462488971559e-06, "loss": 0.1714, "step": 1725 }, { "epoch": 0.44987456423288696, "grad_norm": 1.0972138741983242, "learning_rate": 3.1143734950779155e-06, "loss": 0.1511, "step": 1726 }, { "epoch": 0.4501352099827322, "grad_norm": 1.0727990844976123, "learning_rate": 3.1122840445426927e-06, "loss": 0.1688, "step": 1727 }, { "epoch": 0.45039585573257745, "grad_norm": 1.124024385170111, "learning_rate": 3.1101941389189045e-06, "loss": 0.1642, "step": 1728 }, { "epoch": 0.4506565014824227, "grad_norm": 1.053045641821991, "learning_rate": 3.1081037797599006e-06, "loss": 0.1697, "step": 1729 }, { "epoch": 0.45091714723226795, "grad_norm": 1.0255378698914872, "learning_rate": 3.106012968619371e-06, "loss": 0.1677, "step": 1730 }, { "epoch": 0.4511777929821132, "grad_norm": 1.0153436573276848, "learning_rate": 3.103921707051338e-06, "loss": 0.1664, "step": 1731 }, { "epoch": 0.45143843873195844, "grad_norm": 1.1001976125940571, "learning_rate": 3.1018299966101624e-06, "loss": 0.1865, "step": 1732 }, { "epoch": 0.4516990844818037, "grad_norm": 1.122283584487178, "learning_rate": 3.0997378388505354e-06, "loss": 0.1667, "step": 1733 }, { "epoch": 0.4519597302316489, "grad_norm": 1.0287074838379138, "learning_rate": 3.097645235327483e-06, "loss": 0.1649, "step": 1734 }, { "epoch": 0.45222037598149417, "grad_norm": 0.9941189384529955, "learning_rate": 3.095552187596361e-06, "loss": 0.1582, "step": 1735 }, { "epoch": 0.4524810217313394, "grad_norm": 1.013943218514299, "learning_rate": 3.0934586972128574e-06, "loss": 0.1695, "step": 1736 }, { "epoch": 0.45274166748118466, "grad_norm": 0.8885080300265826, "learning_rate": 3.091364765732986e-06, "loss": 0.141, "step": 1737 }, { "epoch": 0.45300231323102985, "grad_norm": 1.0295086004487772, "learning_rate": 3.0892703947130914e-06, "loss": 0.1635, "step": 1738 }, { "epoch": 0.4532629589808751, "grad_norm": 1.1198959556104868, "learning_rate": 3.087175585709844e-06, "loss": 0.1797, "step": 1739 }, { "epoch": 0.45352360473072034, "grad_norm": 1.0061588329768794, "learning_rate": 3.085080340280239e-06, "loss": 0.157, "step": 1740 }, { "epoch": 0.4537842504805656, "grad_norm": 0.9954238000200184, "learning_rate": 3.082984659981596e-06, "loss": 0.1758, "step": 1741 }, { "epoch": 0.45404489623041083, "grad_norm": 0.9967661531451351, "learning_rate": 3.0808885463715584e-06, "loss": 0.157, "step": 1742 }, { "epoch": 0.4543055419802561, "grad_norm": 1.0044690052364265, "learning_rate": 3.0787920010080923e-06, "loss": 0.1638, "step": 1743 }, { "epoch": 0.4545661877301013, "grad_norm": 1.0227075369742937, "learning_rate": 3.076695025449484e-06, "loss": 0.1653, "step": 1744 }, { "epoch": 0.45482683347994657, "grad_norm": 0.9754072569399966, "learning_rate": 3.0745976212543393e-06, "loss": 0.1544, "step": 1745 }, { "epoch": 0.4550874792297918, "grad_norm": 0.996214692603509, "learning_rate": 3.072499789981582e-06, "loss": 0.1579, "step": 1746 }, { "epoch": 0.45534812497963706, "grad_norm": 0.9825870945073872, "learning_rate": 3.070401533190455e-06, "loss": 0.1602, "step": 1747 }, { "epoch": 0.4556087707294823, "grad_norm": 1.0069354856470072, "learning_rate": 3.068302852440517e-06, "loss": 0.1638, "step": 1748 }, { "epoch": 0.45586941647932755, "grad_norm": 1.0721395293581484, "learning_rate": 3.0662037492916397e-06, "loss": 0.1685, "step": 1749 }, { "epoch": 0.4561300622291728, "grad_norm": 0.9843738562321713, "learning_rate": 3.064104225304013e-06, "loss": 0.1632, "step": 1750 }, { "epoch": 0.45639070797901804, "grad_norm": 0.9610532682011957, "learning_rate": 3.0620042820381344e-06, "loss": 0.158, "step": 1751 }, { "epoch": 0.4566513537288633, "grad_norm": 1.0473861645426523, "learning_rate": 3.059903921054818e-06, "loss": 0.1657, "step": 1752 }, { "epoch": 0.4569119994787085, "grad_norm": 1.009830810243948, "learning_rate": 3.0578031439151855e-06, "loss": 0.1588, "step": 1753 }, { "epoch": 0.4571726452285537, "grad_norm": 1.0436573844778168, "learning_rate": 3.0557019521806667e-06, "loss": 0.1549, "step": 1754 }, { "epoch": 0.45743329097839897, "grad_norm": 1.0392156828864065, "learning_rate": 3.053600347413004e-06, "loss": 0.1737, "step": 1755 }, { "epoch": 0.4576939367282442, "grad_norm": 1.0409709192566687, "learning_rate": 3.0514983311742426e-06, "loss": 0.1561, "step": 1756 }, { "epoch": 0.45795458247808946, "grad_norm": 0.9452750820358679, "learning_rate": 3.0493959050267347e-06, "loss": 0.1537, "step": 1757 }, { "epoch": 0.4582152282279347, "grad_norm": 1.0260516465744205, "learning_rate": 3.04729307053314e-06, "loss": 0.1597, "step": 1758 }, { "epoch": 0.45847587397777995, "grad_norm": 1.0341217948060655, "learning_rate": 3.045189829256416e-06, "loss": 0.1643, "step": 1759 }, { "epoch": 0.4587365197276252, "grad_norm": 1.0484016595679562, "learning_rate": 3.0430861827598277e-06, "loss": 0.1674, "step": 1760 }, { "epoch": 0.45899716547747044, "grad_norm": 0.9907286339434775, "learning_rate": 3.0409821326069387e-06, "loss": 0.1544, "step": 1761 }, { "epoch": 0.4592578112273157, "grad_norm": 0.9067563410401509, "learning_rate": 3.0388776803616138e-06, "loss": 0.143, "step": 1762 }, { "epoch": 0.4595184569771609, "grad_norm": 1.048817118170809, "learning_rate": 3.036772827588017e-06, "loss": 0.1657, "step": 1763 }, { "epoch": 0.4597791027270062, "grad_norm": 1.054558422520034, "learning_rate": 3.034667575850607e-06, "loss": 0.1652, "step": 1764 }, { "epoch": 0.4600397484768514, "grad_norm": 1.0072920362279385, "learning_rate": 3.032561926714142e-06, "loss": 0.1464, "step": 1765 }, { "epoch": 0.46030039422669666, "grad_norm": 1.019467824047166, "learning_rate": 3.0304558817436767e-06, "loss": 0.1511, "step": 1766 }, { "epoch": 0.4605610399765419, "grad_norm": 1.0326208347571282, "learning_rate": 3.028349442504556e-06, "loss": 0.1755, "step": 1767 }, { "epoch": 0.4608216857263871, "grad_norm": 1.038358861627148, "learning_rate": 3.02624261056242e-06, "loss": 0.1722, "step": 1768 }, { "epoch": 0.46108233147623234, "grad_norm": 1.067730281708065, "learning_rate": 3.0241353874832015e-06, "loss": 0.1677, "step": 1769 }, { "epoch": 0.4613429772260776, "grad_norm": 1.0735544711316614, "learning_rate": 3.0220277748331223e-06, "loss": 0.1593, "step": 1770 }, { "epoch": 0.46160362297592283, "grad_norm": 1.1705122016896654, "learning_rate": 3.019919774178695e-06, "loss": 0.18, "step": 1771 }, { "epoch": 0.4618642687257681, "grad_norm": 0.9897577789789938, "learning_rate": 3.017811387086721e-06, "loss": 0.1668, "step": 1772 }, { "epoch": 0.4621249144756133, "grad_norm": 1.0352741458373302, "learning_rate": 3.015702615124287e-06, "loss": 0.1606, "step": 1773 }, { "epoch": 0.46238556022545857, "grad_norm": 1.0768515446772755, "learning_rate": 3.013593459858767e-06, "loss": 0.1627, "step": 1774 }, { "epoch": 0.4626462059753038, "grad_norm": 1.0449607310343276, "learning_rate": 3.0114839228578197e-06, "loss": 0.1685, "step": 1775 }, { "epoch": 0.46290685172514906, "grad_norm": 0.9945465495774585, "learning_rate": 3.0093740056893882e-06, "loss": 0.1594, "step": 1776 }, { "epoch": 0.4631674974749943, "grad_norm": 0.9278874044777426, "learning_rate": 3.007263709921697e-06, "loss": 0.1539, "step": 1777 }, { "epoch": 0.46342814322483955, "grad_norm": 0.9978576148770297, "learning_rate": 3.005153037123253e-06, "loss": 0.1586, "step": 1778 }, { "epoch": 0.4636887889746848, "grad_norm": 1.0554832749920728, "learning_rate": 3.003041988862842e-06, "loss": 0.1748, "step": 1779 }, { "epoch": 0.46394943472453004, "grad_norm": 0.9834024052552087, "learning_rate": 3.000930566709531e-06, "loss": 0.161, "step": 1780 }, { "epoch": 0.4642100804743753, "grad_norm": 1.0645578776305253, "learning_rate": 2.998818772232663e-06, "loss": 0.1715, "step": 1781 }, { "epoch": 0.46447072622422053, "grad_norm": 0.9786530093708065, "learning_rate": 2.996706607001858e-06, "loss": 0.1671, "step": 1782 }, { "epoch": 0.4647313719740657, "grad_norm": 0.9526418295450894, "learning_rate": 2.9945940725870127e-06, "loss": 0.1507, "step": 1783 }, { "epoch": 0.46499201772391097, "grad_norm": 0.9453565908569723, "learning_rate": 2.9924811705582966e-06, "loss": 0.1489, "step": 1784 }, { "epoch": 0.4652526634737562, "grad_norm": 0.9466823205372044, "learning_rate": 2.990367902486155e-06, "loss": 0.1597, "step": 1785 }, { "epoch": 0.46551330922360146, "grad_norm": 1.0604460391371326, "learning_rate": 2.988254269941302e-06, "loss": 0.1705, "step": 1786 }, { "epoch": 0.4657739549734467, "grad_norm": 0.9332757452503788, "learning_rate": 2.986140274494723e-06, "loss": 0.151, "step": 1787 }, { "epoch": 0.46603460072329195, "grad_norm": 0.9845591163678713, "learning_rate": 2.984025917717678e-06, "loss": 0.1602, "step": 1788 }, { "epoch": 0.4662952464731372, "grad_norm": 1.0525862614934836, "learning_rate": 2.9819112011816886e-06, "loss": 0.1765, "step": 1789 }, { "epoch": 0.46655589222298244, "grad_norm": 0.986348420547428, "learning_rate": 2.979796126458548e-06, "loss": 0.1618, "step": 1790 }, { "epoch": 0.4668165379728277, "grad_norm": 0.9847676333767487, "learning_rate": 2.9776806951203154e-06, "loss": 0.1612, "step": 1791 }, { "epoch": 0.46707718372267293, "grad_norm": 1.0884252223083928, "learning_rate": 2.975564908739313e-06, "loss": 0.1663, "step": 1792 }, { "epoch": 0.4673378294725182, "grad_norm": 1.032502102420595, "learning_rate": 2.9734487688881294e-06, "loss": 0.1712, "step": 1793 }, { "epoch": 0.4675984752223634, "grad_norm": 0.9838291762624741, "learning_rate": 2.9713322771396147e-06, "loss": 0.1602, "step": 1794 }, { "epoch": 0.46785912097220866, "grad_norm": 1.0323047661668823, "learning_rate": 2.9692154350668797e-06, "loss": 0.1589, "step": 1795 }, { "epoch": 0.4681197667220539, "grad_norm": 1.0664202663202935, "learning_rate": 2.967098244243297e-06, "loss": 0.1727, "step": 1796 }, { "epoch": 0.46838041247189915, "grad_norm": 1.0991063860769315, "learning_rate": 2.9649807062424984e-06, "loss": 0.1697, "step": 1797 }, { "epoch": 0.46864105822174434, "grad_norm": 1.0021136268167143, "learning_rate": 2.962862822638372e-06, "loss": 0.1611, "step": 1798 }, { "epoch": 0.4689017039715896, "grad_norm": 0.9487293969183447, "learning_rate": 2.960744595005066e-06, "loss": 0.1448, "step": 1799 }, { "epoch": 0.46916234972143483, "grad_norm": 0.9748652681854845, "learning_rate": 2.95862602491698e-06, "loss": 0.1499, "step": 1800 }, { "epoch": 0.4694229954712801, "grad_norm": 1.0904883237470113, "learning_rate": 2.956507113948772e-06, "loss": 0.1631, "step": 1801 }, { "epoch": 0.4696836412211253, "grad_norm": 1.0475114153166432, "learning_rate": 2.9543878636753514e-06, "loss": 0.1698, "step": 1802 }, { "epoch": 0.46994428697097057, "grad_norm": 0.9284572654447412, "learning_rate": 2.9522682756718796e-06, "loss": 0.1487, "step": 1803 }, { "epoch": 0.4702049327208158, "grad_norm": 0.9737074028475463, "learning_rate": 2.950148351513771e-06, "loss": 0.1531, "step": 1804 }, { "epoch": 0.47046557847066106, "grad_norm": 1.0569728221889172, "learning_rate": 2.9480280927766875e-06, "loss": 0.1629, "step": 1805 }, { "epoch": 0.4707262242205063, "grad_norm": 1.0256618085564446, "learning_rate": 2.9459075010365406e-06, "loss": 0.1631, "step": 1806 }, { "epoch": 0.47098686997035155, "grad_norm": 0.9933335559027364, "learning_rate": 2.9437865778694903e-06, "loss": 0.1667, "step": 1807 }, { "epoch": 0.4712475157201968, "grad_norm": 1.0259740019989474, "learning_rate": 2.9416653248519404e-06, "loss": 0.1538, "step": 1808 }, { "epoch": 0.47150816147004204, "grad_norm": 1.0562788632109625, "learning_rate": 2.9395437435605435e-06, "loss": 0.1704, "step": 1809 }, { "epoch": 0.4717688072198873, "grad_norm": 0.9551572543321618, "learning_rate": 2.9374218355721925e-06, "loss": 0.1542, "step": 1810 }, { "epoch": 0.47202945296973253, "grad_norm": 1.2218002625018722, "learning_rate": 2.935299602464025e-06, "loss": 0.1583, "step": 1811 }, { "epoch": 0.4722900987195778, "grad_norm": 1.0508199340840323, "learning_rate": 2.933177045813421e-06, "loss": 0.1667, "step": 1812 }, { "epoch": 0.472550744469423, "grad_norm": 1.124104316382477, "learning_rate": 2.931054167198e-06, "loss": 0.1758, "step": 1813 }, { "epoch": 0.4728113902192682, "grad_norm": 1.0980064944456067, "learning_rate": 2.9289309681956194e-06, "loss": 0.1659, "step": 1814 }, { "epoch": 0.47307203596911346, "grad_norm": 0.9736787730336669, "learning_rate": 2.926807450384377e-06, "loss": 0.1581, "step": 1815 }, { "epoch": 0.4733326817189587, "grad_norm": 1.026426464230457, "learning_rate": 2.924683615342607e-06, "loss": 0.1508, "step": 1816 }, { "epoch": 0.47359332746880395, "grad_norm": 1.0528857833280332, "learning_rate": 2.92255946464888e-06, "loss": 0.1699, "step": 1817 }, { "epoch": 0.4738539732186492, "grad_norm": 1.0120786229792191, "learning_rate": 2.920434999881998e-06, "loss": 0.1642, "step": 1818 }, { "epoch": 0.47411461896849444, "grad_norm": 0.9313963246515207, "learning_rate": 2.918310222621001e-06, "loss": 0.1393, "step": 1819 }, { "epoch": 0.4743752647183397, "grad_norm": 0.9812881628901591, "learning_rate": 2.9161851344451563e-06, "loss": 0.1643, "step": 1820 }, { "epoch": 0.47463591046818493, "grad_norm": 1.0411604249729065, "learning_rate": 2.9140597369339684e-06, "loss": 0.1605, "step": 1821 }, { "epoch": 0.4748965562180302, "grad_norm": 0.9739836302041345, "learning_rate": 2.9119340316671663e-06, "loss": 0.1528, "step": 1822 }, { "epoch": 0.4751572019678754, "grad_norm": 0.9973985739719623, "learning_rate": 2.9098080202247093e-06, "loss": 0.1643, "step": 1823 }, { "epoch": 0.47541784771772067, "grad_norm": 0.9629435018488247, "learning_rate": 2.9076817041867863e-06, "loss": 0.1572, "step": 1824 }, { "epoch": 0.4756784934675659, "grad_norm": 0.9746707462492857, "learning_rate": 2.90555508513381e-06, "loss": 0.1665, "step": 1825 }, { "epoch": 0.47593913921741116, "grad_norm": 1.0140718740580918, "learning_rate": 2.9034281646464197e-06, "loss": 0.1482, "step": 1826 }, { "epoch": 0.4761997849672564, "grad_norm": 1.0837722507226648, "learning_rate": 2.901300944305479e-06, "loss": 0.1773, "step": 1827 }, { "epoch": 0.47646043071710165, "grad_norm": 1.0214278407525696, "learning_rate": 2.8991734256920723e-06, "loss": 0.1522, "step": 1828 }, { "epoch": 0.47672107646694684, "grad_norm": 1.0537313066500635, "learning_rate": 2.8970456103875083e-06, "loss": 0.1688, "step": 1829 }, { "epoch": 0.4769817222167921, "grad_norm": 1.0410037269156978, "learning_rate": 2.894917499973315e-06, "loss": 0.1695, "step": 1830 }, { "epoch": 0.4772423679666373, "grad_norm": 1.0562924380458103, "learning_rate": 2.892789096031239e-06, "loss": 0.1625, "step": 1831 }, { "epoch": 0.47750301371648257, "grad_norm": 1.0198077393383884, "learning_rate": 2.890660400143248e-06, "loss": 0.1602, "step": 1832 }, { "epoch": 0.4777636594663278, "grad_norm": 1.026814276160862, "learning_rate": 2.8885314138915215e-06, "loss": 0.1588, "step": 1833 }, { "epoch": 0.47802430521617306, "grad_norm": 0.9514691643542366, "learning_rate": 2.8864021388584606e-06, "loss": 0.1446, "step": 1834 }, { "epoch": 0.4782849509660183, "grad_norm": 1.0453952045453967, "learning_rate": 2.884272576626677e-06, "loss": 0.1731, "step": 1835 }, { "epoch": 0.47854559671586355, "grad_norm": 1.1704926657468815, "learning_rate": 2.882142728778997e-06, "loss": 0.1719, "step": 1836 }, { "epoch": 0.4788062424657088, "grad_norm": 1.0767310461255297, "learning_rate": 2.880012596898461e-06, "loss": 0.161, "step": 1837 }, { "epoch": 0.47906688821555404, "grad_norm": 1.078098625662582, "learning_rate": 2.877882182568317e-06, "loss": 0.1452, "step": 1838 }, { "epoch": 0.4793275339653993, "grad_norm": 1.0359810169828467, "learning_rate": 2.875751487372026e-06, "loss": 0.1565, "step": 1839 }, { "epoch": 0.47958817971524453, "grad_norm": 1.0490845469887224, "learning_rate": 2.873620512893257e-06, "loss": 0.157, "step": 1840 }, { "epoch": 0.4798488254650898, "grad_norm": 1.0162116596739768, "learning_rate": 2.8714892607158847e-06, "loss": 0.1598, "step": 1841 }, { "epoch": 0.480109471214935, "grad_norm": 1.0595408216967246, "learning_rate": 2.8693577324239925e-06, "loss": 0.1751, "step": 1842 }, { "epoch": 0.48037011696478027, "grad_norm": 0.9909883116561726, "learning_rate": 2.8672259296018683e-06, "loss": 0.1714, "step": 1843 }, { "epoch": 0.48063076271462546, "grad_norm": 1.0199348711519416, "learning_rate": 2.865093853834004e-06, "loss": 0.1633, "step": 1844 }, { "epoch": 0.4808914084644707, "grad_norm": 1.0548108042159665, "learning_rate": 2.8629615067050942e-06, "loss": 0.1682, "step": 1845 }, { "epoch": 0.48115205421431595, "grad_norm": 0.9944766941020864, "learning_rate": 2.8608288898000356e-06, "loss": 0.1541, "step": 1846 }, { "epoch": 0.4814126999641612, "grad_norm": 1.0182039198667803, "learning_rate": 2.8586960047039248e-06, "loss": 0.1626, "step": 1847 }, { "epoch": 0.48167334571400644, "grad_norm": 0.9913311248741717, "learning_rate": 2.8565628530020584e-06, "loss": 0.1631, "step": 1848 }, { "epoch": 0.4819339914638517, "grad_norm": 1.027221742170344, "learning_rate": 2.8544294362799298e-06, "loss": 0.1585, "step": 1849 }, { "epoch": 0.48219463721369693, "grad_norm": 0.9977047993770611, "learning_rate": 2.8522957561232323e-06, "loss": 0.1474, "step": 1850 }, { "epoch": 0.4824552829635422, "grad_norm": 1.1036245885141764, "learning_rate": 2.8501618141178515e-06, "loss": 0.1696, "step": 1851 }, { "epoch": 0.4827159287133874, "grad_norm": 0.9979309379032396, "learning_rate": 2.84802761184987e-06, "loss": 0.1476, "step": 1852 }, { "epoch": 0.48297657446323267, "grad_norm": 0.9959086242229521, "learning_rate": 2.8458931509055626e-06, "loss": 0.1543, "step": 1853 }, { "epoch": 0.4832372202130779, "grad_norm": 0.9852672070120748, "learning_rate": 2.8437584328713976e-06, "loss": 0.1631, "step": 1854 }, { "epoch": 0.48349786596292316, "grad_norm": 0.9990370977120556, "learning_rate": 2.841623459334033e-06, "loss": 0.1571, "step": 1855 }, { "epoch": 0.4837585117127684, "grad_norm": 1.0394145420981562, "learning_rate": 2.8394882318803174e-06, "loss": 0.1529, "step": 1856 }, { "epoch": 0.48401915746261365, "grad_norm": 0.9827087402978096, "learning_rate": 2.8373527520972884e-06, "loss": 0.1598, "step": 1857 }, { "epoch": 0.4842798032124589, "grad_norm": 0.941234703045496, "learning_rate": 2.835217021572171e-06, "loss": 0.1551, "step": 1858 }, { "epoch": 0.4845404489623041, "grad_norm": 1.027600687094897, "learning_rate": 2.8330810418923752e-06, "loss": 0.1769, "step": 1859 }, { "epoch": 0.4848010947121493, "grad_norm": 1.0272077995523117, "learning_rate": 2.8309448146454993e-06, "loss": 0.1492, "step": 1860 }, { "epoch": 0.4850617404619946, "grad_norm": 1.0239621167895783, "learning_rate": 2.828808341419321e-06, "loss": 0.1611, "step": 1861 }, { "epoch": 0.4853223862118398, "grad_norm": 1.0473560353879234, "learning_rate": 2.8266716238018065e-06, "loss": 0.1695, "step": 1862 }, { "epoch": 0.48558303196168506, "grad_norm": 0.9477022202409562, "learning_rate": 2.8245346633810995e-06, "loss": 0.1581, "step": 1863 }, { "epoch": 0.4858436777115303, "grad_norm": 1.0381756613941882, "learning_rate": 2.822397461745524e-06, "loss": 0.1762, "step": 1864 }, { "epoch": 0.48610432346137555, "grad_norm": 1.0659264594477782, "learning_rate": 2.8202600204835865e-06, "loss": 0.1616, "step": 1865 }, { "epoch": 0.4863649692112208, "grad_norm": 0.9992508848655994, "learning_rate": 2.8181223411839686e-06, "loss": 0.1541, "step": 1866 }, { "epoch": 0.48662561496106604, "grad_norm": 1.0181560733324544, "learning_rate": 2.8159844254355295e-06, "loss": 0.1604, "step": 1867 }, { "epoch": 0.4868862607109113, "grad_norm": 1.0105424961185085, "learning_rate": 2.8138462748273072e-06, "loss": 0.1607, "step": 1868 }, { "epoch": 0.48714690646075653, "grad_norm": 0.987497720999258, "learning_rate": 2.811707890948508e-06, "loss": 0.1615, "step": 1869 }, { "epoch": 0.4874075522106018, "grad_norm": 0.9613598912981475, "learning_rate": 2.8095692753885177e-06, "loss": 0.1608, "step": 1870 }, { "epoch": 0.487668197960447, "grad_norm": 1.0233078887845362, "learning_rate": 2.8074304297368913e-06, "loss": 0.154, "step": 1871 }, { "epoch": 0.48792884371029227, "grad_norm": 1.0498170233838058, "learning_rate": 2.805291355583355e-06, "loss": 0.1791, "step": 1872 }, { "epoch": 0.4881894894601375, "grad_norm": 1.0215213686354523, "learning_rate": 2.803152054517806e-06, "loss": 0.165, "step": 1873 }, { "epoch": 0.4884501352099827, "grad_norm": 1.038808862307563, "learning_rate": 2.8010125281303076e-06, "loss": 0.1565, "step": 1874 }, { "epoch": 0.48871078095982795, "grad_norm": 0.9636527399543188, "learning_rate": 2.798872778011094e-06, "loss": 0.1546, "step": 1875 }, { "epoch": 0.4889714267096732, "grad_norm": 1.074119112218017, "learning_rate": 2.7967328057505637e-06, "loss": 0.1746, "step": 1876 }, { "epoch": 0.48923207245951844, "grad_norm": 0.9684116784241704, "learning_rate": 2.7945926129392797e-06, "loss": 0.1567, "step": 1877 }, { "epoch": 0.4894927182093637, "grad_norm": 1.0559105911353384, "learning_rate": 2.792452201167971e-06, "loss": 0.1678, "step": 1878 }, { "epoch": 0.48975336395920893, "grad_norm": 0.9748368902571805, "learning_rate": 2.790311572027528e-06, "loss": 0.1561, "step": 1879 }, { "epoch": 0.4900140097090542, "grad_norm": 1.051436014228005, "learning_rate": 2.7881707271090018e-06, "loss": 0.1641, "step": 1880 }, { "epoch": 0.4902746554588994, "grad_norm": 0.9872732842313837, "learning_rate": 2.7860296680036064e-06, "loss": 0.1529, "step": 1881 }, { "epoch": 0.49053530120874467, "grad_norm": 1.0764235208944692, "learning_rate": 2.7838883963027118e-06, "loss": 0.1559, "step": 1882 }, { "epoch": 0.4907959469585899, "grad_norm": 1.0369714715161724, "learning_rate": 2.7817469135978496e-06, "loss": 0.1572, "step": 1883 }, { "epoch": 0.49105659270843516, "grad_norm": 0.9982249305277084, "learning_rate": 2.779605221480706e-06, "loss": 0.1519, "step": 1884 }, { "epoch": 0.4913172384582804, "grad_norm": 0.9809053118699702, "learning_rate": 2.7774633215431213e-06, "loss": 0.1532, "step": 1885 }, { "epoch": 0.49157788420812565, "grad_norm": 1.0309395265199055, "learning_rate": 2.7753212153770947e-06, "loss": 0.1675, "step": 1886 }, { "epoch": 0.4918385299579709, "grad_norm": 0.9819029365825094, "learning_rate": 2.7731789045747746e-06, "loss": 0.1526, "step": 1887 }, { "epoch": 0.49209917570781614, "grad_norm": 1.1265628631784292, "learning_rate": 2.7710363907284643e-06, "loss": 0.1619, "step": 1888 }, { "epoch": 0.49235982145766133, "grad_norm": 0.9948184346325105, "learning_rate": 2.768893675430615e-06, "loss": 0.1517, "step": 1889 }, { "epoch": 0.4926204672075066, "grad_norm": 1.020741923643777, "learning_rate": 2.766750760273831e-06, "loss": 0.1582, "step": 1890 }, { "epoch": 0.4928811129573518, "grad_norm": 1.061906939994438, "learning_rate": 2.764607646850863e-06, "loss": 0.1527, "step": 1891 }, { "epoch": 0.49314175870719706, "grad_norm": 1.0069504746621516, "learning_rate": 2.76246433675461e-06, "loss": 0.1471, "step": 1892 }, { "epoch": 0.4934024044570423, "grad_norm": 1.0001504756172337, "learning_rate": 2.7603208315781164e-06, "loss": 0.1557, "step": 1893 }, { "epoch": 0.49366305020688755, "grad_norm": 1.0304032891272095, "learning_rate": 2.7581771329145713e-06, "loss": 0.1569, "step": 1894 }, { "epoch": 0.4939236959567328, "grad_norm": 1.0359137690614102, "learning_rate": 2.75603324235731e-06, "loss": 0.1567, "step": 1895 }, { "epoch": 0.49418434170657805, "grad_norm": 1.0804470134729278, "learning_rate": 2.7538891614998074e-06, "loss": 0.1711, "step": 1896 }, { "epoch": 0.4944449874564233, "grad_norm": 0.9671949353284675, "learning_rate": 2.7517448919356815e-06, "loss": 0.1509, "step": 1897 }, { "epoch": 0.49470563320626854, "grad_norm": 1.072722394922496, "learning_rate": 2.749600435258691e-06, "loss": 0.1689, "step": 1898 }, { "epoch": 0.4949662789561138, "grad_norm": 0.9821077895249661, "learning_rate": 2.747455793062732e-06, "loss": 0.1631, "step": 1899 }, { "epoch": 0.495226924705959, "grad_norm": 0.9629851447472, "learning_rate": 2.745310966941839e-06, "loss": 0.143, "step": 1900 }, { "epoch": 0.49548757045580427, "grad_norm": 1.0653278323347448, "learning_rate": 2.743165958490186e-06, "loss": 0.1715, "step": 1901 }, { "epoch": 0.4957482162056495, "grad_norm": 1.0386551007603453, "learning_rate": 2.741020769302077e-06, "loss": 0.1537, "step": 1902 }, { "epoch": 0.49600886195549476, "grad_norm": 1.0567201167658806, "learning_rate": 2.7388754009719553e-06, "loss": 0.1619, "step": 1903 }, { "epoch": 0.49626950770534, "grad_norm": 1.1169913815503412, "learning_rate": 2.7367298550943954e-06, "loss": 0.1607, "step": 1904 }, { "epoch": 0.4965301534551852, "grad_norm": 1.0420080575682655, "learning_rate": 2.7345841332641027e-06, "loss": 0.161, "step": 1905 }, { "epoch": 0.49679079920503044, "grad_norm": 1.0691626185984777, "learning_rate": 2.7324382370759174e-06, "loss": 0.1645, "step": 1906 }, { "epoch": 0.4970514449548757, "grad_norm": 1.0818010017370674, "learning_rate": 2.730292168124802e-06, "loss": 0.16, "step": 1907 }, { "epoch": 0.49731209070472093, "grad_norm": 1.0362968888715312, "learning_rate": 2.7281459280058563e-06, "loss": 0.1457, "step": 1908 }, { "epoch": 0.4975727364545662, "grad_norm": 0.962021564108025, "learning_rate": 2.7259995183143004e-06, "loss": 0.1467, "step": 1909 }, { "epoch": 0.4978333822044114, "grad_norm": 1.036731028036928, "learning_rate": 2.7238529406454834e-06, "loss": 0.1603, "step": 1910 }, { "epoch": 0.49809402795425667, "grad_norm": 1.0070534255615573, "learning_rate": 2.7217061965948794e-06, "loss": 0.1537, "step": 1911 }, { "epoch": 0.4983546737041019, "grad_norm": 0.9975589489671762, "learning_rate": 2.719559287758085e-06, "loss": 0.1478, "step": 1912 }, { "epoch": 0.49861531945394716, "grad_norm": 1.0247765469628443, "learning_rate": 2.717412215730819e-06, "loss": 0.1589, "step": 1913 }, { "epoch": 0.4988759652037924, "grad_norm": 1.0309219725878769, "learning_rate": 2.7152649821089245e-06, "loss": 0.1635, "step": 1914 }, { "epoch": 0.49913661095363765, "grad_norm": 1.0477785604179826, "learning_rate": 2.7131175884883604e-06, "loss": 0.1593, "step": 1915 }, { "epoch": 0.4993972567034829, "grad_norm": 0.9912377462868179, "learning_rate": 2.7109700364652075e-06, "loss": 0.1545, "step": 1916 }, { "epoch": 0.49965790245332814, "grad_norm": 1.0366754021351936, "learning_rate": 2.7088223276356635e-06, "loss": 0.163, "step": 1917 }, { "epoch": 0.4999185482031734, "grad_norm": 1.0326089419774247, "learning_rate": 2.7066744635960423e-06, "loss": 0.1545, "step": 1918 }, { "epoch": 0.5001791939530186, "grad_norm": 1.0299097380918676, "learning_rate": 2.7045264459427737e-06, "loss": 0.164, "step": 1919 }, { "epoch": 0.5004398397028639, "grad_norm": 0.9547490964790395, "learning_rate": 2.702378276272402e-06, "loss": 0.1491, "step": 1920 }, { "epoch": 0.5007004854527091, "grad_norm": 1.0307862356936643, "learning_rate": 2.7002299561815838e-06, "loss": 0.1572, "step": 1921 }, { "epoch": 0.5009611312025544, "grad_norm": 0.9831528056668766, "learning_rate": 2.6980814872670863e-06, "loss": 0.1559, "step": 1922 }, { "epoch": 0.5012217769523996, "grad_norm": 0.9754971616140389, "learning_rate": 2.6959328711257905e-06, "loss": 0.1625, "step": 1923 }, { "epoch": 0.5014824227022449, "grad_norm": 1.0450139366718496, "learning_rate": 2.6937841093546842e-06, "loss": 0.1677, "step": 1924 }, { "epoch": 0.50174306845209, "grad_norm": 0.9941512146598429, "learning_rate": 2.6916352035508637e-06, "loss": 0.1628, "step": 1925 }, { "epoch": 0.5020037142019353, "grad_norm": 1.0041276368073928, "learning_rate": 2.6894861553115337e-06, "loss": 0.1582, "step": 1926 }, { "epoch": 0.5022643599517805, "grad_norm": 1.0265293718667494, "learning_rate": 2.6873369662340036e-06, "loss": 0.1672, "step": 1927 }, { "epoch": 0.5025250057016257, "grad_norm": 1.0334252377823383, "learning_rate": 2.6851876379156884e-06, "loss": 0.1735, "step": 1928 }, { "epoch": 0.502785651451471, "grad_norm": 1.0066439656611044, "learning_rate": 2.6830381719541047e-06, "loss": 0.1606, "step": 1929 }, { "epoch": 0.5030462972013162, "grad_norm": 1.0074346262744536, "learning_rate": 2.680888569946874e-06, "loss": 0.1563, "step": 1930 }, { "epoch": 0.5033069429511615, "grad_norm": 1.0056811983093625, "learning_rate": 2.6787388334917175e-06, "loss": 0.1619, "step": 1931 }, { "epoch": 0.5035675887010067, "grad_norm": 1.0012520256879707, "learning_rate": 2.6765889641864562e-06, "loss": 0.1575, "step": 1932 }, { "epoch": 0.503828234450852, "grad_norm": 0.980327135884438, "learning_rate": 2.67443896362901e-06, "loss": 0.1595, "step": 1933 }, { "epoch": 0.5040888802006972, "grad_norm": 1.0297703835694803, "learning_rate": 2.6722888334173974e-06, "loss": 0.17, "step": 1934 }, { "epoch": 0.5043495259505425, "grad_norm": 0.9959080142097637, "learning_rate": 2.6701385751497304e-06, "loss": 0.162, "step": 1935 }, { "epoch": 0.5046101717003877, "grad_norm": 1.0204730387294605, "learning_rate": 2.6679881904242198e-06, "loss": 0.1536, "step": 1936 }, { "epoch": 0.504870817450233, "grad_norm": 1.0413499783373115, "learning_rate": 2.665837680839169e-06, "loss": 0.1611, "step": 1937 }, { "epoch": 0.5051314632000782, "grad_norm": 1.009054268854807, "learning_rate": 2.663687047992972e-06, "loss": 0.1566, "step": 1938 }, { "epoch": 0.5053921089499235, "grad_norm": 0.9709807770472649, "learning_rate": 2.6615362934841192e-06, "loss": 0.1454, "step": 1939 }, { "epoch": 0.5056527546997687, "grad_norm": 0.9219558568781696, "learning_rate": 2.6593854189111857e-06, "loss": 0.147, "step": 1940 }, { "epoch": 0.505913400449614, "grad_norm": 1.0064895517515515, "learning_rate": 2.65723442587284e-06, "loss": 0.1525, "step": 1941 }, { "epoch": 0.5061740461994592, "grad_norm": 1.043764257879054, "learning_rate": 2.655083315967838e-06, "loss": 0.1616, "step": 1942 }, { "epoch": 0.5064346919493043, "grad_norm": 1.066112507623887, "learning_rate": 2.6529320907950202e-06, "loss": 0.1609, "step": 1943 }, { "epoch": 0.5066953376991497, "grad_norm": 1.0191691670238512, "learning_rate": 2.650780751953316e-06, "loss": 0.1517, "step": 1944 }, { "epoch": 0.5069559834489948, "grad_norm": 1.106495708698216, "learning_rate": 2.648629301041737e-06, "loss": 0.1611, "step": 1945 }, { "epoch": 0.5072166291988401, "grad_norm": 1.0763104352050774, "learning_rate": 2.646477739659378e-06, "loss": 0.1586, "step": 1946 }, { "epoch": 0.5074772749486853, "grad_norm": 1.144155289235771, "learning_rate": 2.644326069405419e-06, "loss": 0.1525, "step": 1947 }, { "epoch": 0.5077379206985306, "grad_norm": 1.0548337226879048, "learning_rate": 2.6421742918791155e-06, "loss": 0.1622, "step": 1948 }, { "epoch": 0.5079985664483758, "grad_norm": 0.9471611433242265, "learning_rate": 2.640022408679808e-06, "loss": 0.1472, "step": 1949 }, { "epoch": 0.5082592121982211, "grad_norm": 1.0443706908195634, "learning_rate": 2.6378704214069133e-06, "loss": 0.1778, "step": 1950 }, { "epoch": 0.5085198579480663, "grad_norm": 1.0905371633275955, "learning_rate": 2.6357183316599243e-06, "loss": 0.1608, "step": 1951 }, { "epoch": 0.5087805036979116, "grad_norm": 1.1449283443823297, "learning_rate": 2.633566141038413e-06, "loss": 0.1691, "step": 1952 }, { "epoch": 0.5090411494477568, "grad_norm": 1.0097931661473336, "learning_rate": 2.631413851142024e-06, "loss": 0.1482, "step": 1953 }, { "epoch": 0.5093017951976021, "grad_norm": 0.9773601070846428, "learning_rate": 2.629261463570476e-06, "loss": 0.1442, "step": 1954 }, { "epoch": 0.5095624409474473, "grad_norm": 0.9444965594353211, "learning_rate": 2.627108979923562e-06, "loss": 0.1459, "step": 1955 }, { "epoch": 0.5098230866972926, "grad_norm": 1.1744031525475065, "learning_rate": 2.6249564018011437e-06, "loss": 0.1595, "step": 1956 }, { "epoch": 0.5100837324471378, "grad_norm": 1.0466304850483799, "learning_rate": 2.6228037308031557e-06, "loss": 0.1592, "step": 1957 }, { "epoch": 0.510344378196983, "grad_norm": 1.087600675702793, "learning_rate": 2.6206509685296e-06, "loss": 0.1647, "step": 1958 }, { "epoch": 0.5106050239468283, "grad_norm": 1.0883284572088845, "learning_rate": 2.618498116580546e-06, "loss": 0.167, "step": 1959 }, { "epoch": 0.5108656696966735, "grad_norm": 1.0003743433084724, "learning_rate": 2.6163451765561324e-06, "loss": 0.1559, "step": 1960 }, { "epoch": 0.5111263154465188, "grad_norm": 1.036209942433463, "learning_rate": 2.614192150056561e-06, "loss": 0.1474, "step": 1961 }, { "epoch": 0.511386961196364, "grad_norm": 1.0222100472659243, "learning_rate": 2.6120390386820975e-06, "loss": 0.1619, "step": 1962 }, { "epoch": 0.5116476069462093, "grad_norm": 1.067020900418114, "learning_rate": 2.609885844033072e-06, "loss": 0.152, "step": 1963 }, { "epoch": 0.5119082526960544, "grad_norm": 1.0662149872695597, "learning_rate": 2.607732567709877e-06, "loss": 0.1613, "step": 1964 }, { "epoch": 0.5121688984458997, "grad_norm": 1.0425465693112868, "learning_rate": 2.605579211312964e-06, "loss": 0.1535, "step": 1965 }, { "epoch": 0.5124295441957449, "grad_norm": 1.0822697909313344, "learning_rate": 2.6034257764428456e-06, "loss": 0.1552, "step": 1966 }, { "epoch": 0.5126901899455902, "grad_norm": 1.0630549236312894, "learning_rate": 2.601272264700091e-06, "loss": 0.1639, "step": 1967 }, { "epoch": 0.5129508356954354, "grad_norm": 1.066744590009312, "learning_rate": 2.5991186776853277e-06, "loss": 0.1702, "step": 1968 }, { "epoch": 0.5132114814452807, "grad_norm": 0.9582762525180832, "learning_rate": 2.59696501699924e-06, "loss": 0.1518, "step": 1969 }, { "epoch": 0.5134721271951259, "grad_norm": 0.9878419864641058, "learning_rate": 2.594811284242565e-06, "loss": 0.1494, "step": 1970 }, { "epoch": 0.5137327729449712, "grad_norm": 1.0655767576702024, "learning_rate": 2.5926574810160936e-06, "loss": 0.1705, "step": 1971 }, { "epoch": 0.5139934186948164, "grad_norm": 1.0280724195765705, "learning_rate": 2.590503608920672e-06, "loss": 0.1678, "step": 1972 }, { "epoch": 0.5142540644446616, "grad_norm": 1.0149387820517635, "learning_rate": 2.5883496695571934e-06, "loss": 0.166, "step": 1973 }, { "epoch": 0.5145147101945069, "grad_norm": 1.0366023304442813, "learning_rate": 2.5861956645266036e-06, "loss": 0.1681, "step": 1974 }, { "epoch": 0.5147753559443521, "grad_norm": 1.004821283423068, "learning_rate": 2.584041595429897e-06, "loss": 0.1579, "step": 1975 }, { "epoch": 0.5150360016941974, "grad_norm": 1.026003020905388, "learning_rate": 2.581887463868114e-06, "loss": 0.1533, "step": 1976 }, { "epoch": 0.5152966474440426, "grad_norm": 1.0166580135665382, "learning_rate": 2.579733271442344e-06, "loss": 0.1501, "step": 1977 }, { "epoch": 0.5155572931938879, "grad_norm": 1.0071454099532906, "learning_rate": 2.57757901975372e-06, "loss": 0.1601, "step": 1978 }, { "epoch": 0.5158179389437331, "grad_norm": 0.9859673831633917, "learning_rate": 2.5754247104034177e-06, "loss": 0.1548, "step": 1979 }, { "epoch": 0.5160785846935784, "grad_norm": 0.9676880920468741, "learning_rate": 2.5732703449926595e-06, "loss": 0.1517, "step": 1980 }, { "epoch": 0.5163392304434236, "grad_norm": 1.0417203127002623, "learning_rate": 2.5711159251227053e-06, "loss": 0.157, "step": 1981 }, { "epoch": 0.5165998761932689, "grad_norm": 1.028601017614281, "learning_rate": 2.568961452394859e-06, "loss": 0.158, "step": 1982 }, { "epoch": 0.516860521943114, "grad_norm": 1.0252978528263852, "learning_rate": 2.5668069284104614e-06, "loss": 0.1443, "step": 1983 }, { "epoch": 0.5171211676929593, "grad_norm": 0.9819816353952421, "learning_rate": 2.564652354770892e-06, "loss": 0.1516, "step": 1984 }, { "epoch": 0.5173818134428045, "grad_norm": 1.0605633621305337, "learning_rate": 2.5624977330775676e-06, "loss": 0.1546, "step": 1985 }, { "epoch": 0.5176424591926498, "grad_norm": 0.9817144138579444, "learning_rate": 2.560343064931941e-06, "loss": 0.1563, "step": 1986 }, { "epoch": 0.517903104942495, "grad_norm": 0.9986485637575293, "learning_rate": 2.5581883519354976e-06, "loss": 0.1632, "step": 1987 }, { "epoch": 0.5181637506923402, "grad_norm": 0.9732828682631686, "learning_rate": 2.5560335956897603e-06, "loss": 0.1586, "step": 1988 }, { "epoch": 0.5184243964421855, "grad_norm": 0.9657487952161188, "learning_rate": 2.553878797796278e-06, "loss": 0.1508, "step": 1989 }, { "epoch": 0.5186850421920307, "grad_norm": 0.9856354316775389, "learning_rate": 2.551723959856637e-06, "loss": 0.1423, "step": 1990 }, { "epoch": 0.518945687941876, "grad_norm": 0.9691072776817569, "learning_rate": 2.5495690834724478e-06, "loss": 0.1526, "step": 1991 }, { "epoch": 0.5192063336917212, "grad_norm": 0.9594044232284029, "learning_rate": 2.5474141702453536e-06, "loss": 0.1613, "step": 1992 }, { "epoch": 0.5194669794415665, "grad_norm": 1.031472753720505, "learning_rate": 2.5452592217770233e-06, "loss": 0.1694, "step": 1993 }, { "epoch": 0.5197276251914117, "grad_norm": 1.0121503234198337, "learning_rate": 2.543104239669152e-06, "loss": 0.1546, "step": 1994 }, { "epoch": 0.519988270941257, "grad_norm": 0.9673900994153073, "learning_rate": 2.54094922552346e-06, "loss": 0.1455, "step": 1995 }, { "epoch": 0.5202489166911022, "grad_norm": 1.0062512562290047, "learning_rate": 2.5387941809416895e-06, "loss": 0.161, "step": 1996 }, { "epoch": 0.5205095624409475, "grad_norm": 0.9604868565976807, "learning_rate": 2.53663910752561e-06, "loss": 0.1569, "step": 1997 }, { "epoch": 0.5207702081907927, "grad_norm": 0.9738557278246472, "learning_rate": 2.5344840068770076e-06, "loss": 0.1456, "step": 1998 }, { "epoch": 0.521030853940638, "grad_norm": 1.003083858164384, "learning_rate": 2.5323288805976915e-06, "loss": 0.1718, "step": 1999 }, { "epoch": 0.5212914996904832, "grad_norm": 0.9739513009518886, "learning_rate": 2.530173730289488e-06, "loss": 0.1529, "step": 2000 }, { "epoch": 0.5212914996904832, "eval_loss": 0.15785683691501617, "eval_runtime": 55.1612, "eval_samples_per_second": 44.977, "eval_steps_per_second": 5.638, "step": 2000 }, { "epoch": 0.5215521454403285, "grad_norm": 0.9876219834716871, "learning_rate": 2.5280185575542438e-06, "loss": 0.1542, "step": 2001 }, { "epoch": 0.5218127911901737, "grad_norm": 0.9711523566818734, "learning_rate": 2.5258633639938195e-06, "loss": 0.1522, "step": 2002 }, { "epoch": 0.5220734369400188, "grad_norm": 0.9933772600752394, "learning_rate": 2.5237081512100935e-06, "loss": 0.152, "step": 2003 }, { "epoch": 0.5223340826898641, "grad_norm": 1.0086296593880575, "learning_rate": 2.521552920804956e-06, "loss": 0.1514, "step": 2004 }, { "epoch": 0.5225947284397093, "grad_norm": 1.0266832102085834, "learning_rate": 2.5193976743803138e-06, "loss": 0.1512, "step": 2005 }, { "epoch": 0.5228553741895546, "grad_norm": 1.0399101550888092, "learning_rate": 2.5172424135380817e-06, "loss": 0.1725, "step": 2006 }, { "epoch": 0.5231160199393998, "grad_norm": 1.0103316895000944, "learning_rate": 2.515087139880188e-06, "loss": 0.1565, "step": 2007 }, { "epoch": 0.5233766656892451, "grad_norm": 0.9398038037201235, "learning_rate": 2.51293185500857e-06, "loss": 0.147, "step": 2008 }, { "epoch": 0.5236373114390903, "grad_norm": 1.0180474746188617, "learning_rate": 2.510776560525171e-06, "loss": 0.1546, "step": 2009 }, { "epoch": 0.5238979571889356, "grad_norm": 1.1122205027253163, "learning_rate": 2.5086212580319457e-06, "loss": 0.1596, "step": 2010 }, { "epoch": 0.5241586029387808, "grad_norm": 1.0385270623051366, "learning_rate": 2.5064659491308514e-06, "loss": 0.1566, "step": 2011 }, { "epoch": 0.5244192486886261, "grad_norm": 0.9837534063131084, "learning_rate": 2.50431063542385e-06, "loss": 0.1523, "step": 2012 }, { "epoch": 0.5246798944384713, "grad_norm": 0.9709471561507054, "learning_rate": 2.502155318512911e-06, "loss": 0.159, "step": 2013 }, { "epoch": 0.5249405401883166, "grad_norm": 1.0649544004719047, "learning_rate": 2.5e-06, "loss": 0.1661, "step": 2014 }, { "epoch": 0.5252011859381618, "grad_norm": 0.9702615566393833, "learning_rate": 2.49784468148709e-06, "loss": 0.1496, "step": 2015 }, { "epoch": 0.5254618316880071, "grad_norm": 0.9571036312684065, "learning_rate": 2.49568936457615e-06, "loss": 0.1327, "step": 2016 }, { "epoch": 0.5257224774378523, "grad_norm": 0.9932359048230276, "learning_rate": 2.4935340508691502e-06, "loss": 0.1573, "step": 2017 }, { "epoch": 0.5259831231876975, "grad_norm": 0.9949638518682418, "learning_rate": 2.491378741968055e-06, "loss": 0.147, "step": 2018 }, { "epoch": 0.5262437689375428, "grad_norm": 1.0478773660082246, "learning_rate": 2.4892234394748296e-06, "loss": 0.1659, "step": 2019 }, { "epoch": 0.526504414687388, "grad_norm": 1.0535631890964392, "learning_rate": 2.487068144991431e-06, "loss": 0.1736, "step": 2020 }, { "epoch": 0.5267650604372333, "grad_norm": 0.9460263439515274, "learning_rate": 2.484912860119812e-06, "loss": 0.1387, "step": 2021 }, { "epoch": 0.5270257061870784, "grad_norm": 1.0585983678276256, "learning_rate": 2.4827575864619183e-06, "loss": 0.1583, "step": 2022 }, { "epoch": 0.5272863519369237, "grad_norm": 1.0311643260248609, "learning_rate": 2.480602325619687e-06, "loss": 0.1543, "step": 2023 }, { "epoch": 0.5275469976867689, "grad_norm": 1.0437714817014947, "learning_rate": 2.4784470791950442e-06, "loss": 0.1627, "step": 2024 }, { "epoch": 0.5278076434366142, "grad_norm": 1.1371674374253873, "learning_rate": 2.476291848789907e-06, "loss": 0.155, "step": 2025 }, { "epoch": 0.5280682891864594, "grad_norm": 1.0322858782935438, "learning_rate": 2.4741366360061813e-06, "loss": 0.1536, "step": 2026 }, { "epoch": 0.5283289349363047, "grad_norm": 1.0628247293976354, "learning_rate": 2.4719814424457566e-06, "loss": 0.1648, "step": 2027 }, { "epoch": 0.5285895806861499, "grad_norm": 1.049754709089933, "learning_rate": 2.4698262697105128e-06, "loss": 0.1629, "step": 2028 }, { "epoch": 0.5288502264359952, "grad_norm": 1.0781390364751562, "learning_rate": 2.4676711194023102e-06, "loss": 0.1606, "step": 2029 }, { "epoch": 0.5291108721858404, "grad_norm": 1.1283722621485919, "learning_rate": 2.4655159931229932e-06, "loss": 0.1688, "step": 2030 }, { "epoch": 0.5293715179356857, "grad_norm": 1.0091418878064424, "learning_rate": 2.463360892474391e-06, "loss": 0.1595, "step": 2031 }, { "epoch": 0.5296321636855309, "grad_norm": 1.0094549831241346, "learning_rate": 2.461205819058311e-06, "loss": 0.1487, "step": 2032 }, { "epoch": 0.5298928094353762, "grad_norm": 1.0145999148681406, "learning_rate": 2.4590507744765414e-06, "loss": 0.1585, "step": 2033 }, { "epoch": 0.5301534551852214, "grad_norm": 1.0249744286909237, "learning_rate": 2.4568957603308494e-06, "loss": 0.1393, "step": 2034 }, { "epoch": 0.5304141009350666, "grad_norm": 0.9621776181855131, "learning_rate": 2.4547407782229775e-06, "loss": 0.1454, "step": 2035 }, { "epoch": 0.5306747466849119, "grad_norm": 1.0809966845194399, "learning_rate": 2.452585829754647e-06, "loss": 0.173, "step": 2036 }, { "epoch": 0.5309353924347571, "grad_norm": 1.0932920592197177, "learning_rate": 2.4504309165275527e-06, "loss": 0.1731, "step": 2037 }, { "epoch": 0.5311960381846024, "grad_norm": 1.027151872752591, "learning_rate": 2.448276040143364e-06, "loss": 0.1531, "step": 2038 }, { "epoch": 0.5314566839344476, "grad_norm": 1.0016505453766016, "learning_rate": 2.446121202203723e-06, "loss": 0.1469, "step": 2039 }, { "epoch": 0.5317173296842929, "grad_norm": 0.9920667265100828, "learning_rate": 2.4439664043102414e-06, "loss": 0.1586, "step": 2040 }, { "epoch": 0.531977975434138, "grad_norm": 1.0347948710976944, "learning_rate": 2.441811648064503e-06, "loss": 0.1589, "step": 2041 }, { "epoch": 0.5322386211839834, "grad_norm": 1.009757850830885, "learning_rate": 2.43965693506806e-06, "loss": 0.1556, "step": 2042 }, { "epoch": 0.5324992669338285, "grad_norm": 1.036109816450004, "learning_rate": 2.4375022669224328e-06, "loss": 0.1627, "step": 2043 }, { "epoch": 0.5327599126836738, "grad_norm": 1.0610661246965365, "learning_rate": 2.4353476452291086e-06, "loss": 0.1682, "step": 2044 }, { "epoch": 0.533020558433519, "grad_norm": 1.0745046221013186, "learning_rate": 2.43319307158954e-06, "loss": 0.1515, "step": 2045 }, { "epoch": 0.5332812041833643, "grad_norm": 0.9830478879570864, "learning_rate": 2.431038547605142e-06, "loss": 0.153, "step": 2046 }, { "epoch": 0.5335418499332095, "grad_norm": 0.9513381404299631, "learning_rate": 2.4288840748772955e-06, "loss": 0.1531, "step": 2047 }, { "epoch": 0.5338024956830548, "grad_norm": 0.9935459763512898, "learning_rate": 2.4267296550073413e-06, "loss": 0.1514, "step": 2048 }, { "epoch": 0.5340631414329, "grad_norm": 1.0188551159847528, "learning_rate": 2.4245752895965828e-06, "loss": 0.155, "step": 2049 }, { "epoch": 0.5343237871827452, "grad_norm": 0.9623216151933183, "learning_rate": 2.4224209802462818e-06, "loss": 0.1509, "step": 2050 }, { "epoch": 0.5345844329325905, "grad_norm": 1.070370257781988, "learning_rate": 2.420266728557657e-06, "loss": 0.1531, "step": 2051 }, { "epoch": 0.5348450786824357, "grad_norm": 0.9669532261223401, "learning_rate": 2.4181125361318868e-06, "loss": 0.1495, "step": 2052 }, { "epoch": 0.535105724432281, "grad_norm": 1.1062690766990486, "learning_rate": 2.4159584045701037e-06, "loss": 0.1588, "step": 2053 }, { "epoch": 0.5353663701821262, "grad_norm": 1.0469380652744849, "learning_rate": 2.413804335473397e-06, "loss": 0.1613, "step": 2054 }, { "epoch": 0.5356270159319715, "grad_norm": 1.1036317816050867, "learning_rate": 2.411650330442807e-06, "loss": 0.1612, "step": 2055 }, { "epoch": 0.5358876616818167, "grad_norm": 1.0433231757604704, "learning_rate": 2.409496391079329e-06, "loss": 0.1531, "step": 2056 }, { "epoch": 0.536148307431662, "grad_norm": 1.0654498817706457, "learning_rate": 2.4073425189839068e-06, "loss": 0.1552, "step": 2057 }, { "epoch": 0.5364089531815072, "grad_norm": 0.9928479562137358, "learning_rate": 2.4051887157574356e-06, "loss": 0.146, "step": 2058 }, { "epoch": 0.5366695989313525, "grad_norm": 1.0070126994023263, "learning_rate": 2.4030349830007607e-06, "loss": 0.1498, "step": 2059 }, { "epoch": 0.5369302446811977, "grad_norm": 0.9548266373857146, "learning_rate": 2.4008813223146723e-06, "loss": 0.1556, "step": 2060 }, { "epoch": 0.537190890431043, "grad_norm": 1.0031141516895297, "learning_rate": 2.39872773529991e-06, "loss": 0.1434, "step": 2061 }, { "epoch": 0.5374515361808881, "grad_norm": 1.0127805678033828, "learning_rate": 2.3965742235571557e-06, "loss": 0.154, "step": 2062 }, { "epoch": 0.5377121819307334, "grad_norm": 1.021311707706447, "learning_rate": 2.3944207886870364e-06, "loss": 0.1564, "step": 2063 }, { "epoch": 0.5379728276805786, "grad_norm": 1.0939165399326254, "learning_rate": 2.3922674322901236e-06, "loss": 0.1673, "step": 2064 }, { "epoch": 0.5382334734304238, "grad_norm": 0.9580665707337931, "learning_rate": 2.390114155966928e-06, "loss": 0.148, "step": 2065 }, { "epoch": 0.5384941191802691, "grad_norm": 1.038672678230463, "learning_rate": 2.387960961317903e-06, "loss": 0.1611, "step": 2066 }, { "epoch": 0.5387547649301143, "grad_norm": 1.025598126786263, "learning_rate": 2.38580784994344e-06, "loss": 0.1532, "step": 2067 }, { "epoch": 0.5390154106799596, "grad_norm": 0.9597423036162838, "learning_rate": 2.383654823443868e-06, "loss": 0.1499, "step": 2068 }, { "epoch": 0.5392760564298048, "grad_norm": 1.077022775986837, "learning_rate": 2.3815018834194542e-06, "loss": 0.1649, "step": 2069 }, { "epoch": 0.5395367021796501, "grad_norm": 1.1079517646162587, "learning_rate": 2.3793490314704005e-06, "loss": 0.1481, "step": 2070 }, { "epoch": 0.5397973479294953, "grad_norm": 1.0144876165214585, "learning_rate": 2.377196269196845e-06, "loss": 0.1573, "step": 2071 }, { "epoch": 0.5400579936793406, "grad_norm": 0.9908335662111537, "learning_rate": 2.3750435981988576e-06, "loss": 0.1482, "step": 2072 }, { "epoch": 0.5403186394291858, "grad_norm": 1.0195454113251925, "learning_rate": 2.3728910200764394e-06, "loss": 0.144, "step": 2073 }, { "epoch": 0.5405792851790311, "grad_norm": 0.9867315200098398, "learning_rate": 2.3707385364295245e-06, "loss": 0.1563, "step": 2074 }, { "epoch": 0.5408399309288763, "grad_norm": 1.077078613831441, "learning_rate": 2.368586148857977e-06, "loss": 0.1585, "step": 2075 }, { "epoch": 0.5411005766787216, "grad_norm": 1.0540229441710243, "learning_rate": 2.366433858961587e-06, "loss": 0.1527, "step": 2076 }, { "epoch": 0.5413612224285668, "grad_norm": 1.0524923565056699, "learning_rate": 2.3642816683400756e-06, "loss": 0.1528, "step": 2077 }, { "epoch": 0.5416218681784121, "grad_norm": 1.148936256374794, "learning_rate": 2.362129578593088e-06, "loss": 0.1652, "step": 2078 }, { "epoch": 0.5418825139282573, "grad_norm": 1.0557599929754389, "learning_rate": 2.3599775913201924e-06, "loss": 0.154, "step": 2079 }, { "epoch": 0.5421431596781024, "grad_norm": 1.0120263291036646, "learning_rate": 2.3578257081208853e-06, "loss": 0.1558, "step": 2080 }, { "epoch": 0.5424038054279477, "grad_norm": 1.0024209605986973, "learning_rate": 2.355673930594582e-06, "loss": 0.1649, "step": 2081 }, { "epoch": 0.5426644511777929, "grad_norm": 1.1074929933965239, "learning_rate": 2.3535222603406223e-06, "loss": 0.1808, "step": 2082 }, { "epoch": 0.5429250969276382, "grad_norm": 1.1244544005495727, "learning_rate": 2.3513706989582643e-06, "loss": 0.1766, "step": 2083 }, { "epoch": 0.5431857426774834, "grad_norm": 0.999340329031876, "learning_rate": 2.3492192480466845e-06, "loss": 0.1552, "step": 2084 }, { "epoch": 0.5434463884273287, "grad_norm": 1.0468537613101114, "learning_rate": 2.3470679092049806e-06, "loss": 0.1488, "step": 2085 }, { "epoch": 0.5437070341771739, "grad_norm": 0.9924024318883216, "learning_rate": 2.344916684032163e-06, "loss": 0.1505, "step": 2086 }, { "epoch": 0.5439676799270192, "grad_norm": 1.0028572191170195, "learning_rate": 2.3427655741271605e-06, "loss": 0.1525, "step": 2087 }, { "epoch": 0.5442283256768644, "grad_norm": 0.9940939707587166, "learning_rate": 2.3406145810888143e-06, "loss": 0.1422, "step": 2088 }, { "epoch": 0.5444889714267097, "grad_norm": 0.9930508110025648, "learning_rate": 2.338463706515882e-06, "loss": 0.1482, "step": 2089 }, { "epoch": 0.5447496171765549, "grad_norm": 0.9604943158494527, "learning_rate": 2.3363129520070286e-06, "loss": 0.1424, "step": 2090 }, { "epoch": 0.5450102629264002, "grad_norm": 1.0309415060284863, "learning_rate": 2.334162319160832e-06, "loss": 0.1498, "step": 2091 }, { "epoch": 0.5452709086762454, "grad_norm": 1.123056416131242, "learning_rate": 2.3320118095757806e-06, "loss": 0.1652, "step": 2092 }, { "epoch": 0.5455315544260907, "grad_norm": 1.1323667129506132, "learning_rate": 2.3298614248502696e-06, "loss": 0.1564, "step": 2093 }, { "epoch": 0.5457922001759359, "grad_norm": 0.9882107856174492, "learning_rate": 2.327711166582604e-06, "loss": 0.1434, "step": 2094 }, { "epoch": 0.5460528459257811, "grad_norm": 1.070018499322061, "learning_rate": 2.325561036370991e-06, "loss": 0.1764, "step": 2095 }, { "epoch": 0.5463134916756264, "grad_norm": 1.071951709252844, "learning_rate": 2.3234110358135446e-06, "loss": 0.1639, "step": 2096 }, { "epoch": 0.5465741374254716, "grad_norm": 1.049123769334437, "learning_rate": 2.3212611665082834e-06, "loss": 0.1469, "step": 2097 }, { "epoch": 0.5468347831753169, "grad_norm": 0.9787143870064599, "learning_rate": 2.319111430053126e-06, "loss": 0.1593, "step": 2098 }, { "epoch": 0.547095428925162, "grad_norm": 0.9680936414326738, "learning_rate": 2.3169618280458957e-06, "loss": 0.1482, "step": 2099 }, { "epoch": 0.5473560746750074, "grad_norm": 1.0258393252148927, "learning_rate": 2.3148123620843132e-06, "loss": 0.1412, "step": 2100 }, { "epoch": 0.5476167204248525, "grad_norm": 0.9611331436697393, "learning_rate": 2.3126630337659973e-06, "loss": 0.151, "step": 2101 }, { "epoch": 0.5478773661746978, "grad_norm": 0.9907615069023914, "learning_rate": 2.310513844688467e-06, "loss": 0.1515, "step": 2102 }, { "epoch": 0.548138011924543, "grad_norm": 0.9993935458551754, "learning_rate": 2.3083647964491367e-06, "loss": 0.1603, "step": 2103 }, { "epoch": 0.5483986576743883, "grad_norm": 1.0363281429696931, "learning_rate": 2.306215890645316e-06, "loss": 0.1605, "step": 2104 }, { "epoch": 0.5486593034242335, "grad_norm": 0.9884564949237181, "learning_rate": 2.3040671288742103e-06, "loss": 0.1458, "step": 2105 }, { "epoch": 0.5489199491740788, "grad_norm": 1.013484236383574, "learning_rate": 2.3019185127329145e-06, "loss": 0.1618, "step": 2106 }, { "epoch": 0.549180594923924, "grad_norm": 1.0258139442534282, "learning_rate": 2.299770043818417e-06, "loss": 0.1509, "step": 2107 }, { "epoch": 0.5494412406737693, "grad_norm": 0.989663319457852, "learning_rate": 2.2976217237275983e-06, "loss": 0.1499, "step": 2108 }, { "epoch": 0.5497018864236145, "grad_norm": 1.0007847950684743, "learning_rate": 2.2954735540572263e-06, "loss": 0.1549, "step": 2109 }, { "epoch": 0.5499625321734597, "grad_norm": 1.0280190451888445, "learning_rate": 2.293325536403958e-06, "loss": 0.156, "step": 2110 }, { "epoch": 0.550223177923305, "grad_norm": 1.012457702737526, "learning_rate": 2.2911776723643378e-06, "loss": 0.1591, "step": 2111 }, { "epoch": 0.5504838236731502, "grad_norm": 1.0476150369160286, "learning_rate": 2.2890299635347933e-06, "loss": 0.1683, "step": 2112 }, { "epoch": 0.5507444694229955, "grad_norm": 0.9761905121160829, "learning_rate": 2.2868824115116404e-06, "loss": 0.1636, "step": 2113 }, { "epoch": 0.5510051151728407, "grad_norm": 0.9954112430464046, "learning_rate": 2.2847350178910763e-06, "loss": 0.1533, "step": 2114 }, { "epoch": 0.551265760922686, "grad_norm": 1.0326123221105887, "learning_rate": 2.2825877842691813e-06, "loss": 0.1578, "step": 2115 }, { "epoch": 0.5515264066725312, "grad_norm": 0.9703276081115407, "learning_rate": 2.2804407122419165e-06, "loss": 0.1628, "step": 2116 }, { "epoch": 0.5517870524223765, "grad_norm": 1.0816214312932304, "learning_rate": 2.2782938034051214e-06, "loss": 0.1535, "step": 2117 }, { "epoch": 0.5520476981722217, "grad_norm": 0.9953649223459319, "learning_rate": 2.276147059354517e-06, "loss": 0.1568, "step": 2118 }, { "epoch": 0.552308343922067, "grad_norm": 1.004319451001873, "learning_rate": 2.2740004816857e-06, "loss": 0.1759, "step": 2119 }, { "epoch": 0.5525689896719121, "grad_norm": 0.978740333492704, "learning_rate": 2.271854071994144e-06, "loss": 0.1533, "step": 2120 }, { "epoch": 0.5528296354217574, "grad_norm": 0.9925503449082834, "learning_rate": 2.2697078318751974e-06, "loss": 0.1461, "step": 2121 }, { "epoch": 0.5530902811716026, "grad_norm": 0.944547748322964, "learning_rate": 2.2675617629240842e-06, "loss": 0.1383, "step": 2122 }, { "epoch": 0.5533509269214479, "grad_norm": 1.0400392457274503, "learning_rate": 2.2654158667358977e-06, "loss": 0.1634, "step": 2123 }, { "epoch": 0.5536115726712931, "grad_norm": 1.0791709209370017, "learning_rate": 2.2632701449056054e-06, "loss": 0.1517, "step": 2124 }, { "epoch": 0.5538722184211383, "grad_norm": 1.0100210790151294, "learning_rate": 2.2611245990280455e-06, "loss": 0.1594, "step": 2125 }, { "epoch": 0.5541328641709836, "grad_norm": 1.1044181589247548, "learning_rate": 2.258979230697923e-06, "loss": 0.1669, "step": 2126 }, { "epoch": 0.5543935099208288, "grad_norm": 0.9733896576416867, "learning_rate": 2.2568340415098154e-06, "loss": 0.1447, "step": 2127 }, { "epoch": 0.5546541556706741, "grad_norm": 1.1052680654962224, "learning_rate": 2.2546890330581616e-06, "loss": 0.1632, "step": 2128 }, { "epoch": 0.5549148014205193, "grad_norm": 0.9760077335907369, "learning_rate": 2.2525442069372687e-06, "loss": 0.1404, "step": 2129 }, { "epoch": 0.5551754471703646, "grad_norm": 0.984482800331238, "learning_rate": 2.25039956474131e-06, "loss": 0.1546, "step": 2130 }, { "epoch": 0.5554360929202098, "grad_norm": 1.0160762326347481, "learning_rate": 2.2482551080643185e-06, "loss": 0.1562, "step": 2131 }, { "epoch": 0.5556967386700551, "grad_norm": 1.0069764471067362, "learning_rate": 2.246110838500194e-06, "loss": 0.157, "step": 2132 }, { "epoch": 0.5559573844199003, "grad_norm": 1.0076608022628797, "learning_rate": 2.2439667576426905e-06, "loss": 0.1427, "step": 2133 }, { "epoch": 0.5562180301697456, "grad_norm": 0.9754565217243696, "learning_rate": 2.241822867085429e-06, "loss": 0.1534, "step": 2134 }, { "epoch": 0.5564786759195908, "grad_norm": 1.0551479466895044, "learning_rate": 2.2396791684218844e-06, "loss": 0.1488, "step": 2135 }, { "epoch": 0.5567393216694361, "grad_norm": 0.9826893676417257, "learning_rate": 2.2375356632453906e-06, "loss": 0.1503, "step": 2136 }, { "epoch": 0.5569999674192813, "grad_norm": 1.0066612639938133, "learning_rate": 2.235392353149137e-06, "loss": 0.151, "step": 2137 }, { "epoch": 0.5572606131691266, "grad_norm": 1.0258316775716945, "learning_rate": 2.2332492397261695e-06, "loss": 0.1577, "step": 2138 }, { "epoch": 0.5575212589189718, "grad_norm": 1.0828873372393484, "learning_rate": 2.231106324569386e-06, "loss": 0.1618, "step": 2139 }, { "epoch": 0.5577819046688169, "grad_norm": 1.027393030949496, "learning_rate": 2.2289636092715365e-06, "loss": 0.1447, "step": 2140 }, { "epoch": 0.5580425504186622, "grad_norm": 0.9459666013561879, "learning_rate": 2.2268210954252258e-06, "loss": 0.1516, "step": 2141 }, { "epoch": 0.5583031961685074, "grad_norm": 1.097081329534781, "learning_rate": 2.2246787846229057e-06, "loss": 0.148, "step": 2142 }, { "epoch": 0.5585638419183527, "grad_norm": 1.052001609548158, "learning_rate": 2.2225366784568796e-06, "loss": 0.1576, "step": 2143 }, { "epoch": 0.5588244876681979, "grad_norm": 0.9859705496208491, "learning_rate": 2.2203947785192954e-06, "loss": 0.133, "step": 2144 }, { "epoch": 0.5590851334180432, "grad_norm": 1.028535088927534, "learning_rate": 2.2182530864021508e-06, "loss": 0.1485, "step": 2145 }, { "epoch": 0.5593457791678884, "grad_norm": 1.102375120257499, "learning_rate": 2.2161116036972886e-06, "loss": 0.166, "step": 2146 }, { "epoch": 0.5596064249177337, "grad_norm": 1.0299787562125642, "learning_rate": 2.2139703319963945e-06, "loss": 0.1545, "step": 2147 }, { "epoch": 0.5598670706675789, "grad_norm": 1.100930496304531, "learning_rate": 2.2118292728909986e-06, "loss": 0.1609, "step": 2148 }, { "epoch": 0.5601277164174242, "grad_norm": 0.9992740394001599, "learning_rate": 2.2096884279724733e-06, "loss": 0.1515, "step": 2149 }, { "epoch": 0.5603883621672694, "grad_norm": 0.9649581516293544, "learning_rate": 2.2075477988320295e-06, "loss": 0.1482, "step": 2150 }, { "epoch": 0.5606490079171147, "grad_norm": 1.0302798330085698, "learning_rate": 2.205407387060721e-06, "loss": 0.1536, "step": 2151 }, { "epoch": 0.5609096536669599, "grad_norm": 1.042688028381088, "learning_rate": 2.2032671942494367e-06, "loss": 0.154, "step": 2152 }, { "epoch": 0.5611702994168052, "grad_norm": 1.0767188958588403, "learning_rate": 2.2011272219889067e-06, "loss": 0.162, "step": 2153 }, { "epoch": 0.5614309451666504, "grad_norm": 0.9950766530714366, "learning_rate": 2.1989874718696936e-06, "loss": 0.1453, "step": 2154 }, { "epoch": 0.5616915909164956, "grad_norm": 0.9815875177635808, "learning_rate": 2.196847945482195e-06, "loss": 0.1605, "step": 2155 }, { "epoch": 0.5619522366663409, "grad_norm": 1.0718922286511419, "learning_rate": 2.194708644416646e-06, "loss": 0.1637, "step": 2156 }, { "epoch": 0.562212882416186, "grad_norm": 0.9912828458194322, "learning_rate": 2.192569570263109e-06, "loss": 0.1477, "step": 2157 }, { "epoch": 0.5624735281660314, "grad_norm": 0.9787119611645707, "learning_rate": 2.1904307246114827e-06, "loss": 0.151, "step": 2158 }, { "epoch": 0.5627341739158765, "grad_norm": 0.9860897124746065, "learning_rate": 2.1882921090514923e-06, "loss": 0.1552, "step": 2159 }, { "epoch": 0.5629948196657218, "grad_norm": 1.06025566549454, "learning_rate": 2.1861537251726944e-06, "loss": 0.1589, "step": 2160 }, { "epoch": 0.563255465415567, "grad_norm": 1.0083132883361363, "learning_rate": 2.184015574564471e-06, "loss": 0.1536, "step": 2161 }, { "epoch": 0.5635161111654123, "grad_norm": 0.9540517561716054, "learning_rate": 2.1818776588160323e-06, "loss": 0.1536, "step": 2162 }, { "epoch": 0.5637767569152575, "grad_norm": 1.0218474328286804, "learning_rate": 2.179739979516414e-06, "loss": 0.1628, "step": 2163 }, { "epoch": 0.5640374026651028, "grad_norm": 0.9910751451357173, "learning_rate": 2.1776025382544765e-06, "loss": 0.1612, "step": 2164 }, { "epoch": 0.564298048414948, "grad_norm": 0.968115508330369, "learning_rate": 2.1754653366189018e-06, "loss": 0.1487, "step": 2165 }, { "epoch": 0.5645586941647933, "grad_norm": 0.9973509473057403, "learning_rate": 2.173328376198194e-06, "loss": 0.1537, "step": 2166 }, { "epoch": 0.5648193399146385, "grad_norm": 1.035157131755293, "learning_rate": 2.1711916585806793e-06, "loss": 0.1593, "step": 2167 }, { "epoch": 0.5650799856644838, "grad_norm": 0.9966049535923551, "learning_rate": 2.1690551853545016e-06, "loss": 0.1573, "step": 2168 }, { "epoch": 0.565340631414329, "grad_norm": 1.077823162938419, "learning_rate": 2.166918958107625e-06, "loss": 0.1473, "step": 2169 }, { "epoch": 0.5656012771641742, "grad_norm": 1.0030807654448803, "learning_rate": 2.1647829784278294e-06, "loss": 0.1355, "step": 2170 }, { "epoch": 0.5658619229140195, "grad_norm": 0.9833425922865919, "learning_rate": 2.1626472479027125e-06, "loss": 0.1543, "step": 2171 }, { "epoch": 0.5661225686638647, "grad_norm": 1.0578231496550672, "learning_rate": 2.1605117681196834e-06, "loss": 0.1502, "step": 2172 }, { "epoch": 0.56638321441371, "grad_norm": 0.98861015858645, "learning_rate": 2.1583765406659678e-06, "loss": 0.1469, "step": 2173 }, { "epoch": 0.5666438601635552, "grad_norm": 1.046641894901774, "learning_rate": 2.1562415671286032e-06, "loss": 0.1415, "step": 2174 }, { "epoch": 0.5669045059134005, "grad_norm": 1.0139258844992838, "learning_rate": 2.154106849094438e-06, "loss": 0.1531, "step": 2175 }, { "epoch": 0.5671651516632457, "grad_norm": 0.9977592085659993, "learning_rate": 2.151972388150131e-06, "loss": 0.1526, "step": 2176 }, { "epoch": 0.567425797413091, "grad_norm": 1.030752889588643, "learning_rate": 2.1498381858821497e-06, "loss": 0.1635, "step": 2177 }, { "epoch": 0.5676864431629361, "grad_norm": 1.0216102839088508, "learning_rate": 2.1477042438767685e-06, "loss": 0.152, "step": 2178 }, { "epoch": 0.5679470889127815, "grad_norm": 1.0513669172460183, "learning_rate": 2.1455705637200706e-06, "loss": 0.1576, "step": 2179 }, { "epoch": 0.5682077346626266, "grad_norm": 1.0947637507758232, "learning_rate": 2.1434371469979424e-06, "loss": 0.1607, "step": 2180 }, { "epoch": 0.5684683804124719, "grad_norm": 1.0654434382028686, "learning_rate": 2.1413039952960756e-06, "loss": 0.1629, "step": 2181 }, { "epoch": 0.5687290261623171, "grad_norm": 0.9567122420346108, "learning_rate": 2.1391711101999656e-06, "loss": 0.1367, "step": 2182 }, { "epoch": 0.5689896719121624, "grad_norm": 0.9518385375585521, "learning_rate": 2.1370384932949066e-06, "loss": 0.1271, "step": 2183 }, { "epoch": 0.5692503176620076, "grad_norm": 1.039423576214157, "learning_rate": 2.1349061461659966e-06, "loss": 0.1574, "step": 2184 }, { "epoch": 0.5695109634118528, "grad_norm": 1.0496915253005168, "learning_rate": 2.132774070398132e-06, "loss": 0.1581, "step": 2185 }, { "epoch": 0.5697716091616981, "grad_norm": 1.1287942979261283, "learning_rate": 2.130642267576008e-06, "loss": 0.1531, "step": 2186 }, { "epoch": 0.5700322549115433, "grad_norm": 1.0221378261316214, "learning_rate": 2.1285107392841166e-06, "loss": 0.1595, "step": 2187 }, { "epoch": 0.5702929006613886, "grad_norm": 1.0507241894358814, "learning_rate": 2.1263794871067443e-06, "loss": 0.1461, "step": 2188 }, { "epoch": 0.5705535464112338, "grad_norm": 1.0340499411452786, "learning_rate": 2.124248512627975e-06, "loss": 0.1548, "step": 2189 }, { "epoch": 0.5708141921610791, "grad_norm": 1.0089428877309514, "learning_rate": 2.1221178174316833e-06, "loss": 0.1478, "step": 2190 }, { "epoch": 0.5710748379109243, "grad_norm": 1.066300936810937, "learning_rate": 2.1199874031015395e-06, "loss": 0.1628, "step": 2191 }, { "epoch": 0.5713354836607696, "grad_norm": 1.0512088241035247, "learning_rate": 2.117857271221003e-06, "loss": 0.1555, "step": 2192 }, { "epoch": 0.5715961294106148, "grad_norm": 1.079433554707369, "learning_rate": 2.1157274233733242e-06, "loss": 0.1605, "step": 2193 }, { "epoch": 0.5718567751604601, "grad_norm": 1.0033574545451727, "learning_rate": 2.11359786114154e-06, "loss": 0.1468, "step": 2194 }, { "epoch": 0.5721174209103053, "grad_norm": 1.0181770686661877, "learning_rate": 2.1114685861084793e-06, "loss": 0.1481, "step": 2195 }, { "epoch": 0.5723780666601506, "grad_norm": 1.159811606845994, "learning_rate": 2.1093395998567527e-06, "loss": 0.1667, "step": 2196 }, { "epoch": 0.5726387124099958, "grad_norm": 0.9713049549222819, "learning_rate": 2.107210903968761e-06, "loss": 0.1492, "step": 2197 }, { "epoch": 0.572899358159841, "grad_norm": 1.0097003515889187, "learning_rate": 2.1050825000266862e-06, "loss": 0.1472, "step": 2198 }, { "epoch": 0.5731600039096862, "grad_norm": 1.0741808425742816, "learning_rate": 2.1029543896124925e-06, "loss": 0.1561, "step": 2199 }, { "epoch": 0.5734206496595314, "grad_norm": 1.0744672651668516, "learning_rate": 2.1008265743079286e-06, "loss": 0.1523, "step": 2200 }, { "epoch": 0.5736812954093767, "grad_norm": 1.1217693663325867, "learning_rate": 2.098699055694522e-06, "loss": 0.1546, "step": 2201 }, { "epoch": 0.5739419411592219, "grad_norm": 0.9744197761903075, "learning_rate": 2.0965718353535807e-06, "loss": 0.1422, "step": 2202 }, { "epoch": 0.5742025869090672, "grad_norm": 1.0203947863756524, "learning_rate": 2.0944449148661898e-06, "loss": 0.1408, "step": 2203 }, { "epoch": 0.5744632326589124, "grad_norm": 1.0769348790599578, "learning_rate": 2.0923182958132146e-06, "loss": 0.1476, "step": 2204 }, { "epoch": 0.5747238784087577, "grad_norm": 1.033618227326323, "learning_rate": 2.0901919797752915e-06, "loss": 0.1423, "step": 2205 }, { "epoch": 0.5749845241586029, "grad_norm": 1.034158692652536, "learning_rate": 2.0880659683328346e-06, "loss": 0.1568, "step": 2206 }, { "epoch": 0.5752451699084482, "grad_norm": 1.0882288857861793, "learning_rate": 2.0859402630660324e-06, "loss": 0.1596, "step": 2207 }, { "epoch": 0.5755058156582934, "grad_norm": 0.9895597161190343, "learning_rate": 2.0838148655548433e-06, "loss": 0.1425, "step": 2208 }, { "epoch": 0.5757664614081387, "grad_norm": 1.052956954989122, "learning_rate": 2.0816897773790002e-06, "loss": 0.1499, "step": 2209 }, { "epoch": 0.5760271071579839, "grad_norm": 1.0314067368115467, "learning_rate": 2.0795650001180028e-06, "loss": 0.1443, "step": 2210 }, { "epoch": 0.5762877529078292, "grad_norm": 1.0430938541644132, "learning_rate": 2.077440535351121e-06, "loss": 0.1487, "step": 2211 }, { "epoch": 0.5765483986576744, "grad_norm": 1.0187526387349028, "learning_rate": 2.0753163846573933e-06, "loss": 0.1381, "step": 2212 }, { "epoch": 0.5768090444075197, "grad_norm": 1.0505442825162616, "learning_rate": 2.073192549615623e-06, "loss": 0.1495, "step": 2213 }, { "epoch": 0.5770696901573649, "grad_norm": 1.1222593698408607, "learning_rate": 2.0710690318043814e-06, "loss": 0.1568, "step": 2214 }, { "epoch": 0.5773303359072102, "grad_norm": 1.0441280013453145, "learning_rate": 2.0689458328020017e-06, "loss": 0.1679, "step": 2215 }, { "epoch": 0.5775909816570554, "grad_norm": 0.9818645527607318, "learning_rate": 2.0668229541865796e-06, "loss": 0.1459, "step": 2216 }, { "epoch": 0.5778516274069005, "grad_norm": 1.0709193641486843, "learning_rate": 2.0647003975359757e-06, "loss": 0.149, "step": 2217 }, { "epoch": 0.5781122731567458, "grad_norm": 1.0641925688881924, "learning_rate": 2.0625781644278083e-06, "loss": 0.1478, "step": 2218 }, { "epoch": 0.578372918906591, "grad_norm": 1.198686019942226, "learning_rate": 2.060456256439457e-06, "loss": 0.1535, "step": 2219 }, { "epoch": 0.5786335646564363, "grad_norm": 1.088165370330873, "learning_rate": 2.058334675148061e-06, "loss": 0.165, "step": 2220 }, { "epoch": 0.5788942104062815, "grad_norm": 0.9556456157010834, "learning_rate": 2.056213422130511e-06, "loss": 0.1356, "step": 2221 }, { "epoch": 0.5791548561561268, "grad_norm": 1.1236655340681876, "learning_rate": 2.05409249896346e-06, "loss": 0.1716, "step": 2222 }, { "epoch": 0.579415501905972, "grad_norm": 1.0141389312416058, "learning_rate": 2.0519719072233133e-06, "loss": 0.1515, "step": 2223 }, { "epoch": 0.5796761476558173, "grad_norm": 0.9948280968544522, "learning_rate": 2.049851648486229e-06, "loss": 0.1369, "step": 2224 }, { "epoch": 0.5799367934056625, "grad_norm": 0.9914728532291761, "learning_rate": 2.0477317243281204e-06, "loss": 0.1509, "step": 2225 }, { "epoch": 0.5801974391555078, "grad_norm": 0.9878926343685213, "learning_rate": 2.04561213632465e-06, "loss": 0.1503, "step": 2226 }, { "epoch": 0.580458084905353, "grad_norm": 1.0730040267373895, "learning_rate": 2.043492886051229e-06, "loss": 0.1465, "step": 2227 }, { "epoch": 0.5807187306551983, "grad_norm": 1.110634521367655, "learning_rate": 2.041373975083021e-06, "loss": 0.174, "step": 2228 }, { "epoch": 0.5809793764050435, "grad_norm": 0.9775714796434773, "learning_rate": 2.0392554049949348e-06, "loss": 0.1437, "step": 2229 }, { "epoch": 0.5812400221548888, "grad_norm": 1.026726352643684, "learning_rate": 2.037137177361628e-06, "loss": 0.1605, "step": 2230 }, { "epoch": 0.581500667904734, "grad_norm": 0.9344897366025389, "learning_rate": 2.035019293757503e-06, "loss": 0.1346, "step": 2231 }, { "epoch": 0.5817613136545792, "grad_norm": 1.0149590119363239, "learning_rate": 2.0329017557567034e-06, "loss": 0.143, "step": 2232 }, { "epoch": 0.5820219594044245, "grad_norm": 0.9956503892138889, "learning_rate": 2.030784564933121e-06, "loss": 0.1423, "step": 2233 }, { "epoch": 0.5822826051542697, "grad_norm": 1.0350717409911525, "learning_rate": 2.028667722860386e-06, "loss": 0.1541, "step": 2234 }, { "epoch": 0.582543250904115, "grad_norm": 0.9826133644952325, "learning_rate": 2.026551231111871e-06, "loss": 0.1487, "step": 2235 }, { "epoch": 0.5828038966539602, "grad_norm": 1.0736674306455891, "learning_rate": 2.024435091260687e-06, "loss": 0.1479, "step": 2236 }, { "epoch": 0.5830645424038055, "grad_norm": 1.0264043192106254, "learning_rate": 2.022319304879686e-06, "loss": 0.1459, "step": 2237 }, { "epoch": 0.5833251881536506, "grad_norm": 1.0659808001580402, "learning_rate": 2.0202038735414532e-06, "loss": 0.1471, "step": 2238 }, { "epoch": 0.5835858339034959, "grad_norm": 1.04065549593578, "learning_rate": 2.018088798818312e-06, "loss": 0.1502, "step": 2239 }, { "epoch": 0.5838464796533411, "grad_norm": 1.0654848476236403, "learning_rate": 2.0159740822823233e-06, "loss": 0.1602, "step": 2240 }, { "epoch": 0.5841071254031864, "grad_norm": 0.9407537524885415, "learning_rate": 2.0138597255052766e-06, "loss": 0.134, "step": 2241 }, { "epoch": 0.5843677711530316, "grad_norm": 1.016667476827833, "learning_rate": 2.0117457300586996e-06, "loss": 0.1598, "step": 2242 }, { "epoch": 0.5846284169028769, "grad_norm": 1.0095557741703, "learning_rate": 2.0096320975138463e-06, "loss": 0.1534, "step": 2243 }, { "epoch": 0.5848890626527221, "grad_norm": 1.0037018043085746, "learning_rate": 2.007518829441704e-06, "loss": 0.1504, "step": 2244 }, { "epoch": 0.5851497084025674, "grad_norm": 1.0059465109579495, "learning_rate": 2.005405927412988e-06, "loss": 0.1456, "step": 2245 }, { "epoch": 0.5854103541524126, "grad_norm": 1.0140643927525803, "learning_rate": 2.0032933929981425e-06, "loss": 0.1511, "step": 2246 }, { "epoch": 0.5856709999022578, "grad_norm": 0.9921769606122608, "learning_rate": 2.0011812277673375e-06, "loss": 0.1484, "step": 2247 }, { "epoch": 0.5859316456521031, "grad_norm": 0.9497335508935446, "learning_rate": 1.9990694332904705e-06, "loss": 0.1382, "step": 2248 }, { "epoch": 0.5861922914019483, "grad_norm": 0.9653768701055637, "learning_rate": 1.9969580111371587e-06, "loss": 0.1385, "step": 2249 }, { "epoch": 0.5864529371517936, "grad_norm": 0.9659992924363038, "learning_rate": 1.9948469628767475e-06, "loss": 0.1388, "step": 2250 }, { "epoch": 0.5867135829016388, "grad_norm": 1.0061388643911007, "learning_rate": 1.9927362900783037e-06, "loss": 0.1534, "step": 2251 }, { "epoch": 0.5869742286514841, "grad_norm": 1.0435858615435403, "learning_rate": 1.990625994310612e-06, "loss": 0.1481, "step": 2252 }, { "epoch": 0.5872348744013293, "grad_norm": 0.9699984959718214, "learning_rate": 1.988516077142181e-06, "loss": 0.1469, "step": 2253 }, { "epoch": 0.5874955201511746, "grad_norm": 1.0039279755264117, "learning_rate": 1.986406540141234e-06, "loss": 0.1548, "step": 2254 }, { "epoch": 0.5877561659010198, "grad_norm": 0.9604535106102814, "learning_rate": 1.984297384875714e-06, "loss": 0.1457, "step": 2255 }, { "epoch": 0.5880168116508651, "grad_norm": 0.9791101792038013, "learning_rate": 1.98218861291328e-06, "loss": 0.1436, "step": 2256 }, { "epoch": 0.5882774574007102, "grad_norm": 0.987884794341313, "learning_rate": 1.980080225821305e-06, "loss": 0.1383, "step": 2257 }, { "epoch": 0.5885381031505555, "grad_norm": 1.090451223270261, "learning_rate": 1.977972225166878e-06, "loss": 0.1597, "step": 2258 }, { "epoch": 0.5887987489004007, "grad_norm": 1.0639645422062922, "learning_rate": 1.9758646125167997e-06, "loss": 0.1588, "step": 2259 }, { "epoch": 0.589059394650246, "grad_norm": 1.0182019861465226, "learning_rate": 1.973757389437581e-06, "loss": 0.1507, "step": 2260 }, { "epoch": 0.5893200404000912, "grad_norm": 0.9396496791471483, "learning_rate": 1.9716505574954455e-06, "loss": 0.1417, "step": 2261 }, { "epoch": 0.5895806861499364, "grad_norm": 0.9908234291622007, "learning_rate": 1.9695441182563237e-06, "loss": 0.1462, "step": 2262 }, { "epoch": 0.5898413318997817, "grad_norm": 0.9619452414461761, "learning_rate": 1.967438073285858e-06, "loss": 0.1461, "step": 2263 }, { "epoch": 0.5901019776496269, "grad_norm": 1.0452625974021985, "learning_rate": 1.965332424149394e-06, "loss": 0.1532, "step": 2264 }, { "epoch": 0.5903626233994722, "grad_norm": 1.053600963172338, "learning_rate": 1.963227172411984e-06, "loss": 0.1565, "step": 2265 }, { "epoch": 0.5906232691493174, "grad_norm": 1.026281674676374, "learning_rate": 1.9611223196383866e-06, "loss": 0.1513, "step": 2266 }, { "epoch": 0.5908839148991627, "grad_norm": 1.0898151928155257, "learning_rate": 1.9590178673930617e-06, "loss": 0.1561, "step": 2267 }, { "epoch": 0.5911445606490079, "grad_norm": 1.1398691162807641, "learning_rate": 1.956913817240173e-06, "loss": 0.1656, "step": 2268 }, { "epoch": 0.5914052063988532, "grad_norm": 1.0402036431168513, "learning_rate": 1.9548101707435845e-06, "loss": 0.1464, "step": 2269 }, { "epoch": 0.5916658521486984, "grad_norm": 1.0119938487119753, "learning_rate": 1.9527069294668617e-06, "loss": 0.1487, "step": 2270 }, { "epoch": 0.5919264978985437, "grad_norm": 1.068998789330398, "learning_rate": 1.9506040949732657e-06, "loss": 0.1593, "step": 2271 }, { "epoch": 0.5921871436483889, "grad_norm": 0.986104942580718, "learning_rate": 1.9485016688257578e-06, "loss": 0.148, "step": 2272 }, { "epoch": 0.5924477893982342, "grad_norm": 1.09046647902206, "learning_rate": 1.946399652586997e-06, "loss": 0.1535, "step": 2273 }, { "epoch": 0.5927084351480794, "grad_norm": 1.0905871930663738, "learning_rate": 1.9442980478193332e-06, "loss": 0.1589, "step": 2274 }, { "epoch": 0.5929690808979247, "grad_norm": 0.9880938945467669, "learning_rate": 1.9421968560848158e-06, "loss": 0.1394, "step": 2275 }, { "epoch": 0.5932297266477699, "grad_norm": 1.0194099247921462, "learning_rate": 1.9400960789451827e-06, "loss": 0.147, "step": 2276 }, { "epoch": 0.593490372397615, "grad_norm": 1.0620990084945328, "learning_rate": 1.937995717961866e-06, "loss": 0.1669, "step": 2277 }, { "epoch": 0.5937510181474603, "grad_norm": 1.0481069046618074, "learning_rate": 1.935895774695988e-06, "loss": 0.1541, "step": 2278 }, { "epoch": 0.5940116638973055, "grad_norm": 1.0477389690861114, "learning_rate": 1.9337962507083603e-06, "loss": 0.1508, "step": 2279 }, { "epoch": 0.5942723096471508, "grad_norm": 1.040221508043416, "learning_rate": 1.9316971475594835e-06, "loss": 0.1539, "step": 2280 }, { "epoch": 0.594532955396996, "grad_norm": 1.0673112737559334, "learning_rate": 1.9295984668095457e-06, "loss": 0.17, "step": 2281 }, { "epoch": 0.5947936011468413, "grad_norm": 1.0838846876244945, "learning_rate": 1.9275002100184186e-06, "loss": 0.1643, "step": 2282 }, { "epoch": 0.5950542468966865, "grad_norm": 1.0265228325453233, "learning_rate": 1.9254023787456615e-06, "loss": 0.1454, "step": 2283 }, { "epoch": 0.5953148926465318, "grad_norm": 0.9724515603933543, "learning_rate": 1.9233049745505167e-06, "loss": 0.1454, "step": 2284 }, { "epoch": 0.595575538396377, "grad_norm": 1.0826953199161005, "learning_rate": 1.9212079989919073e-06, "loss": 0.1576, "step": 2285 }, { "epoch": 0.5958361841462223, "grad_norm": 1.077371927750516, "learning_rate": 1.919111453628442e-06, "loss": 0.1431, "step": 2286 }, { "epoch": 0.5960968298960675, "grad_norm": 1.0502685413675823, "learning_rate": 1.9170153400184054e-06, "loss": 0.1601, "step": 2287 }, { "epoch": 0.5963574756459128, "grad_norm": 1.0235970445156009, "learning_rate": 1.914919659719762e-06, "loss": 0.1526, "step": 2288 }, { "epoch": 0.596618121395758, "grad_norm": 1.0771225996057687, "learning_rate": 1.912824414290157e-06, "loss": 0.1574, "step": 2289 }, { "epoch": 0.5968787671456033, "grad_norm": 1.0541693479029677, "learning_rate": 1.9107296052869086e-06, "loss": 0.1392, "step": 2290 }, { "epoch": 0.5971394128954485, "grad_norm": 1.1145387138871667, "learning_rate": 1.9086352342670144e-06, "loss": 0.1426, "step": 2291 }, { "epoch": 0.5974000586452937, "grad_norm": 1.1009641005630786, "learning_rate": 1.9065413027871437e-06, "loss": 0.157, "step": 2292 }, { "epoch": 0.597660704395139, "grad_norm": 0.9663272706871868, "learning_rate": 1.9044478124036395e-06, "loss": 0.1567, "step": 2293 }, { "epoch": 0.5979213501449842, "grad_norm": 0.975977819307003, "learning_rate": 1.902354764672518e-06, "loss": 0.1421, "step": 2294 }, { "epoch": 0.5981819958948295, "grad_norm": 1.1158591565628349, "learning_rate": 1.9002621611494654e-06, "loss": 0.152, "step": 2295 }, { "epoch": 0.5984426416446746, "grad_norm": 1.052406298809587, "learning_rate": 1.8981700033898387e-06, "loss": 0.1433, "step": 2296 }, { "epoch": 0.59870328739452, "grad_norm": 0.9751708485090503, "learning_rate": 1.896078292948663e-06, "loss": 0.1395, "step": 2297 }, { "epoch": 0.5989639331443651, "grad_norm": 1.0830807915315248, "learning_rate": 1.8939870313806302e-06, "loss": 0.1627, "step": 2298 }, { "epoch": 0.5992245788942104, "grad_norm": 1.0533470658308133, "learning_rate": 1.8918962202401003e-06, "loss": 0.158, "step": 2299 }, { "epoch": 0.5994852246440556, "grad_norm": 1.027498446062874, "learning_rate": 1.8898058610810963e-06, "loss": 0.1639, "step": 2300 }, { "epoch": 0.5997458703939009, "grad_norm": 1.0675005241887805, "learning_rate": 1.887715955457308e-06, "loss": 0.1481, "step": 2301 }, { "epoch": 0.6000065161437461, "grad_norm": 1.0894080318375594, "learning_rate": 1.8856265049220852e-06, "loss": 0.1525, "step": 2302 }, { "epoch": 0.6002671618935914, "grad_norm": 1.0527818673648892, "learning_rate": 1.8835375110284424e-06, "loss": 0.1564, "step": 2303 }, { "epoch": 0.6005278076434366, "grad_norm": 0.9912448211400121, "learning_rate": 1.8814489753290517e-06, "loss": 0.153, "step": 2304 }, { "epoch": 0.6007884533932819, "grad_norm": 0.9935014424580795, "learning_rate": 1.8793608993762464e-06, "loss": 0.1426, "step": 2305 }, { "epoch": 0.6010490991431271, "grad_norm": 1.0206887082676674, "learning_rate": 1.8772732847220182e-06, "loss": 0.1516, "step": 2306 }, { "epoch": 0.6013097448929723, "grad_norm": 1.0523842588381234, "learning_rate": 1.8751861329180149e-06, "loss": 0.1494, "step": 2307 }, { "epoch": 0.6015703906428176, "grad_norm": 0.95188903992871, "learning_rate": 1.873099445515542e-06, "loss": 0.1471, "step": 2308 }, { "epoch": 0.6018310363926628, "grad_norm": 0.9355750242838281, "learning_rate": 1.8710132240655575e-06, "loss": 0.1394, "step": 2309 }, { "epoch": 0.6020916821425081, "grad_norm": 1.0128712787872685, "learning_rate": 1.868927470118675e-06, "loss": 0.1537, "step": 2310 }, { "epoch": 0.6023523278923533, "grad_norm": 0.9898674555801695, "learning_rate": 1.8668421852251588e-06, "loss": 0.141, "step": 2311 }, { "epoch": 0.6026129736421986, "grad_norm": 1.0425744310484402, "learning_rate": 1.8647573709349275e-06, "loss": 0.1561, "step": 2312 }, { "epoch": 0.6028736193920438, "grad_norm": 1.0451193171407507, "learning_rate": 1.8626730287975467e-06, "loss": 0.1477, "step": 2313 }, { "epoch": 0.6031342651418891, "grad_norm": 0.9965731101052837, "learning_rate": 1.860589160362234e-06, "loss": 0.147, "step": 2314 }, { "epoch": 0.6033949108917342, "grad_norm": 1.0223967233161984, "learning_rate": 1.8585057671778512e-06, "loss": 0.1466, "step": 2315 }, { "epoch": 0.6036555566415795, "grad_norm": 1.0271398669236385, "learning_rate": 1.8564228507929099e-06, "loss": 0.1567, "step": 2316 }, { "epoch": 0.6039162023914247, "grad_norm": 1.044522738704929, "learning_rate": 1.8543404127555672e-06, "loss": 0.1437, "step": 2317 }, { "epoch": 0.60417684814127, "grad_norm": 1.0010216964690735, "learning_rate": 1.852258454613623e-06, "loss": 0.1517, "step": 2318 }, { "epoch": 0.6044374938911152, "grad_norm": 0.9825658428677795, "learning_rate": 1.8501769779145223e-06, "loss": 0.1548, "step": 2319 }, { "epoch": 0.6046981396409605, "grad_norm": 1.0127260261753837, "learning_rate": 1.8480959842053508e-06, "loss": 0.1606, "step": 2320 }, { "epoch": 0.6049587853908057, "grad_norm": 0.9708872702338144, "learning_rate": 1.8460154750328351e-06, "loss": 0.1427, "step": 2321 }, { "epoch": 0.6052194311406509, "grad_norm": 1.0162494321692541, "learning_rate": 1.843935451943344e-06, "loss": 0.1582, "step": 2322 }, { "epoch": 0.6054800768904962, "grad_norm": 1.017171500464688, "learning_rate": 1.841855916482882e-06, "loss": 0.1556, "step": 2323 }, { "epoch": 0.6057407226403414, "grad_norm": 1.0877147938390503, "learning_rate": 1.839776870197093e-06, "loss": 0.1589, "step": 2324 }, { "epoch": 0.6060013683901867, "grad_norm": 0.9667726899418153, "learning_rate": 1.8376983146312577e-06, "loss": 0.1456, "step": 2325 }, { "epoch": 0.6062620141400319, "grad_norm": 1.042016392873869, "learning_rate": 1.8356202513302896e-06, "loss": 0.1523, "step": 2326 }, { "epoch": 0.6065226598898772, "grad_norm": 1.0018557614285166, "learning_rate": 1.8335426818387386e-06, "loss": 0.1413, "step": 2327 }, { "epoch": 0.6067833056397224, "grad_norm": 1.0231869755679397, "learning_rate": 1.831465607700787e-06, "loss": 0.1416, "step": 2328 }, { "epoch": 0.6070439513895677, "grad_norm": 1.049096396647423, "learning_rate": 1.8293890304602491e-06, "loss": 0.1542, "step": 2329 }, { "epoch": 0.6073045971394129, "grad_norm": 1.0185860373087001, "learning_rate": 1.8273129516605698e-06, "loss": 0.1534, "step": 2330 }, { "epoch": 0.6075652428892582, "grad_norm": 1.0231403922572886, "learning_rate": 1.825237372844822e-06, "loss": 0.1406, "step": 2331 }, { "epoch": 0.6078258886391034, "grad_norm": 1.0518133494257642, "learning_rate": 1.8231622955557094e-06, "loss": 0.1525, "step": 2332 }, { "epoch": 0.6080865343889487, "grad_norm": 0.9865779051694159, "learning_rate": 1.8210877213355613e-06, "loss": 0.1472, "step": 2333 }, { "epoch": 0.6083471801387939, "grad_norm": 1.1338089298490799, "learning_rate": 1.819013651726334e-06, "loss": 0.1503, "step": 2334 }, { "epoch": 0.6086078258886392, "grad_norm": 1.0087023275595872, "learning_rate": 1.8169400882696081e-06, "loss": 0.1507, "step": 2335 }, { "epoch": 0.6088684716384843, "grad_norm": 1.0547552753701206, "learning_rate": 1.8148670325065893e-06, "loss": 0.148, "step": 2336 }, { "epoch": 0.6091291173883295, "grad_norm": 1.08832183472011, "learning_rate": 1.8127944859781037e-06, "loss": 0.1486, "step": 2337 }, { "epoch": 0.6093897631381748, "grad_norm": 1.0872522938890057, "learning_rate": 1.8107224502245997e-06, "loss": 0.1553, "step": 2338 }, { "epoch": 0.60965040888802, "grad_norm": 1.081321353291394, "learning_rate": 1.8086509267861472e-06, "loss": 0.1451, "step": 2339 }, { "epoch": 0.6099110546378653, "grad_norm": 0.9742205337210225, "learning_rate": 1.806579917202435e-06, "loss": 0.1436, "step": 2340 }, { "epoch": 0.6101717003877105, "grad_norm": 1.0216240690585208, "learning_rate": 1.8045094230127697e-06, "loss": 0.143, "step": 2341 }, { "epoch": 0.6104323461375558, "grad_norm": 1.0615148525797429, "learning_rate": 1.802439445756073e-06, "loss": 0.1543, "step": 2342 }, { "epoch": 0.610692991887401, "grad_norm": 1.1387358341404814, "learning_rate": 1.8003699869708858e-06, "loss": 0.1567, "step": 2343 }, { "epoch": 0.6109536376372463, "grad_norm": 1.0289061393829773, "learning_rate": 1.7983010481953605e-06, "loss": 0.1478, "step": 2344 }, { "epoch": 0.6112142833870915, "grad_norm": 0.9985256240020861, "learning_rate": 1.7962326309672661e-06, "loss": 0.1595, "step": 2345 }, { "epoch": 0.6114749291369368, "grad_norm": 1.007912626566653, "learning_rate": 1.7941647368239806e-06, "loss": 0.1487, "step": 2346 }, { "epoch": 0.611735574886782, "grad_norm": 1.0410430926192646, "learning_rate": 1.7920973673024967e-06, "loss": 0.1407, "step": 2347 }, { "epoch": 0.6119962206366273, "grad_norm": 1.0657075927580646, "learning_rate": 1.7900305239394134e-06, "loss": 0.1612, "step": 2348 }, { "epoch": 0.6122568663864725, "grad_norm": 0.9637631780440921, "learning_rate": 1.7879642082709411e-06, "loss": 0.1374, "step": 2349 }, { "epoch": 0.6125175121363178, "grad_norm": 1.018566590372631, "learning_rate": 1.785898421832898e-06, "loss": 0.14, "step": 2350 }, { "epoch": 0.612778157886163, "grad_norm": 1.0176816863244884, "learning_rate": 1.7838331661607078e-06, "loss": 0.148, "step": 2351 }, { "epoch": 0.6130388036360082, "grad_norm": 0.9880782773749687, "learning_rate": 1.7817684427894016e-06, "loss": 0.1536, "step": 2352 }, { "epoch": 0.6132994493858535, "grad_norm": 1.0324588812412572, "learning_rate": 1.7797042532536118e-06, "loss": 0.1476, "step": 2353 }, { "epoch": 0.6135600951356986, "grad_norm": 1.029261101737035, "learning_rate": 1.7776405990875761e-06, "loss": 0.1544, "step": 2354 }, { "epoch": 0.613820740885544, "grad_norm": 1.076696345849544, "learning_rate": 1.7755774818251348e-06, "loss": 0.1555, "step": 2355 }, { "epoch": 0.6140813866353891, "grad_norm": 1.0332261194311643, "learning_rate": 1.7735149029997273e-06, "loss": 0.1477, "step": 2356 }, { "epoch": 0.6143420323852344, "grad_norm": 1.0506764634336088, "learning_rate": 1.7714528641443952e-06, "loss": 0.1479, "step": 2357 }, { "epoch": 0.6146026781350796, "grad_norm": 1.0619097688452144, "learning_rate": 1.7693913667917757e-06, "loss": 0.1366, "step": 2358 }, { "epoch": 0.6148633238849249, "grad_norm": 1.0470201428181227, "learning_rate": 1.7673304124741055e-06, "loss": 0.1497, "step": 2359 }, { "epoch": 0.6151239696347701, "grad_norm": 0.9697863469801495, "learning_rate": 1.7652700027232184e-06, "loss": 0.139, "step": 2360 }, { "epoch": 0.6153846153846154, "grad_norm": 1.0236009172135931, "learning_rate": 1.7632101390705408e-06, "loss": 0.1472, "step": 2361 }, { "epoch": 0.6156452611344606, "grad_norm": 0.9867270202723373, "learning_rate": 1.7611508230470963e-06, "loss": 0.1395, "step": 2362 }, { "epoch": 0.6159059068843059, "grad_norm": 0.9919017491667901, "learning_rate": 1.7590920561835002e-06, "loss": 0.1531, "step": 2363 }, { "epoch": 0.6161665526341511, "grad_norm": 0.9855831725496565, "learning_rate": 1.7570338400099569e-06, "loss": 0.1475, "step": 2364 }, { "epoch": 0.6164271983839964, "grad_norm": 0.9787813351315375, "learning_rate": 1.7549761760562662e-06, "loss": 0.1275, "step": 2365 }, { "epoch": 0.6166878441338416, "grad_norm": 1.0369700987823687, "learning_rate": 1.7529190658518142e-06, "loss": 0.1544, "step": 2366 }, { "epoch": 0.6169484898836868, "grad_norm": 1.0703001606563864, "learning_rate": 1.750862510925577e-06, "loss": 0.1551, "step": 2367 }, { "epoch": 0.6172091356335321, "grad_norm": 0.9828476549836456, "learning_rate": 1.7488065128061187e-06, "loss": 0.1345, "step": 2368 }, { "epoch": 0.6174697813833773, "grad_norm": 1.023093610758241, "learning_rate": 1.7467510730215854e-06, "loss": 0.1582, "step": 2369 }, { "epoch": 0.6177304271332226, "grad_norm": 1.0380592896997525, "learning_rate": 1.7446961930997126e-06, "loss": 0.1627, "step": 2370 }, { "epoch": 0.6179910728830678, "grad_norm": 1.0929438895772603, "learning_rate": 1.7426418745678182e-06, "loss": 0.1605, "step": 2371 }, { "epoch": 0.6182517186329131, "grad_norm": 1.0134077030962814, "learning_rate": 1.7405881189528024e-06, "loss": 0.142, "step": 2372 }, { "epoch": 0.6185123643827583, "grad_norm": 1.0520456141946088, "learning_rate": 1.7385349277811483e-06, "loss": 0.1571, "step": 2373 }, { "epoch": 0.6187730101326036, "grad_norm": 1.0112256114632345, "learning_rate": 1.7364823025789184e-06, "loss": 0.144, "step": 2374 }, { "epoch": 0.6190336558824487, "grad_norm": 0.9905267280677591, "learning_rate": 1.7344302448717537e-06, "loss": 0.1419, "step": 2375 }, { "epoch": 0.619294301632294, "grad_norm": 1.0239538819234828, "learning_rate": 1.732378756184875e-06, "loss": 0.1513, "step": 2376 }, { "epoch": 0.6195549473821392, "grad_norm": 0.922352142791185, "learning_rate": 1.7303278380430794e-06, "loss": 0.1302, "step": 2377 }, { "epoch": 0.6198155931319845, "grad_norm": 0.9953606936246381, "learning_rate": 1.7282774919707406e-06, "loss": 0.1407, "step": 2378 }, { "epoch": 0.6200762388818297, "grad_norm": 1.0680827593930897, "learning_rate": 1.726227719491807e-06, "loss": 0.1581, "step": 2379 }, { "epoch": 0.620336884631675, "grad_norm": 1.003610697750953, "learning_rate": 1.7241785221297984e-06, "loss": 0.1461, "step": 2380 }, { "epoch": 0.6205975303815202, "grad_norm": 1.0074745773920504, "learning_rate": 1.7221299014078107e-06, "loss": 0.1547, "step": 2381 }, { "epoch": 0.6208581761313655, "grad_norm": 1.0420806554127309, "learning_rate": 1.7200818588485088e-06, "loss": 0.1648, "step": 2382 }, { "epoch": 0.6211188218812107, "grad_norm": 1.057713282858075, "learning_rate": 1.7180343959741292e-06, "loss": 0.1604, "step": 2383 }, { "epoch": 0.6213794676310559, "grad_norm": 1.0294605241257024, "learning_rate": 1.715987514306476e-06, "loss": 0.1549, "step": 2384 }, { "epoch": 0.6216401133809012, "grad_norm": 0.9815607570743676, "learning_rate": 1.713941215366924e-06, "loss": 0.1465, "step": 2385 }, { "epoch": 0.6219007591307464, "grad_norm": 1.0484010633916205, "learning_rate": 1.7118955006764116e-06, "loss": 0.1425, "step": 2386 }, { "epoch": 0.6221614048805917, "grad_norm": 1.0088700126680605, "learning_rate": 1.7098503717554437e-06, "loss": 0.149, "step": 2387 }, { "epoch": 0.6224220506304369, "grad_norm": 1.0304467257323693, "learning_rate": 1.707805830124093e-06, "loss": 0.1578, "step": 2388 }, { "epoch": 0.6226826963802822, "grad_norm": 1.0698654954883622, "learning_rate": 1.705761877301991e-06, "loss": 0.1594, "step": 2389 }, { "epoch": 0.6229433421301274, "grad_norm": 0.9891633443888281, "learning_rate": 1.703718514808336e-06, "loss": 0.1361, "step": 2390 }, { "epoch": 0.6232039878799727, "grad_norm": 0.9931992264533162, "learning_rate": 1.7016757441618837e-06, "loss": 0.1376, "step": 2391 }, { "epoch": 0.6234646336298179, "grad_norm": 1.0257234976711331, "learning_rate": 1.6996335668809515e-06, "loss": 0.1489, "step": 2392 }, { "epoch": 0.6237252793796632, "grad_norm": 0.9887910721243626, "learning_rate": 1.6975919844834172e-06, "loss": 0.1391, "step": 2393 }, { "epoch": 0.6239859251295083, "grad_norm": 1.066981531636984, "learning_rate": 1.6955509984867136e-06, "loss": 0.1531, "step": 2394 }, { "epoch": 0.6242465708793536, "grad_norm": 1.0295010438373557, "learning_rate": 1.6935106104078325e-06, "loss": 0.1586, "step": 2395 }, { "epoch": 0.6245072166291988, "grad_norm": 1.050229395017074, "learning_rate": 1.691470821763322e-06, "loss": 0.1504, "step": 2396 }, { "epoch": 0.6247678623790441, "grad_norm": 0.9669401107842058, "learning_rate": 1.6894316340692802e-06, "loss": 0.1308, "step": 2397 }, { "epoch": 0.6250285081288893, "grad_norm": 1.000478985463435, "learning_rate": 1.6873930488413628e-06, "loss": 0.1292, "step": 2398 }, { "epoch": 0.6252891538787345, "grad_norm": 1.0217506392214566, "learning_rate": 1.6853550675947772e-06, "loss": 0.1594, "step": 2399 }, { "epoch": 0.6255497996285798, "grad_norm": 0.9472257020038567, "learning_rate": 1.6833176918442796e-06, "loss": 0.1371, "step": 2400 }, { "epoch": 0.625810445378425, "grad_norm": 0.957218682696104, "learning_rate": 1.6812809231041794e-06, "loss": 0.1428, "step": 2401 }, { "epoch": 0.6260710911282703, "grad_norm": 1.0589213917506268, "learning_rate": 1.6792447628883318e-06, "loss": 0.1477, "step": 2402 }, { "epoch": 0.6263317368781155, "grad_norm": 1.0312853785731895, "learning_rate": 1.6772092127101407e-06, "loss": 0.1434, "step": 2403 }, { "epoch": 0.6265923826279608, "grad_norm": 1.0971752387631697, "learning_rate": 1.6751742740825583e-06, "loss": 0.1612, "step": 2404 }, { "epoch": 0.626853028377806, "grad_norm": 1.017448516228627, "learning_rate": 1.673139948518079e-06, "loss": 0.1417, "step": 2405 }, { "epoch": 0.6271136741276513, "grad_norm": 1.0023612532722428, "learning_rate": 1.6711062375287451e-06, "loss": 0.1533, "step": 2406 }, { "epoch": 0.6273743198774965, "grad_norm": 1.1084756847635346, "learning_rate": 1.6690731426261403e-06, "loss": 0.1585, "step": 2407 }, { "epoch": 0.6276349656273418, "grad_norm": 1.0297001204640788, "learning_rate": 1.6670406653213883e-06, "loss": 0.1548, "step": 2408 }, { "epoch": 0.627895611377187, "grad_norm": 1.0441164432901078, "learning_rate": 1.665008807125158e-06, "loss": 0.1535, "step": 2409 }, { "epoch": 0.6281562571270323, "grad_norm": 1.0884737259044983, "learning_rate": 1.6629775695476551e-06, "loss": 0.152, "step": 2410 }, { "epoch": 0.6284169028768775, "grad_norm": 1.0018676776916817, "learning_rate": 1.6609469540986257e-06, "loss": 0.1474, "step": 2411 }, { "epoch": 0.6286775486267228, "grad_norm": 1.0188879652228289, "learning_rate": 1.6589169622873524e-06, "loss": 0.1427, "step": 2412 }, { "epoch": 0.628938194376568, "grad_norm": 1.012854091169766, "learning_rate": 1.6568875956226537e-06, "loss": 0.1489, "step": 2413 }, { "epoch": 0.6291988401264131, "grad_norm": 1.0267761050338815, "learning_rate": 1.654858855612886e-06, "loss": 0.1491, "step": 2414 }, { "epoch": 0.6294594858762584, "grad_norm": 0.9774870111772558, "learning_rate": 1.6528307437659369e-06, "loss": 0.1438, "step": 2415 }, { "epoch": 0.6297201316261036, "grad_norm": 0.9931252424228169, "learning_rate": 1.6508032615892294e-06, "loss": 0.1409, "step": 2416 }, { "epoch": 0.6299807773759489, "grad_norm": 1.0941066463516829, "learning_rate": 1.6487764105897172e-06, "loss": 0.1556, "step": 2417 }, { "epoch": 0.6302414231257941, "grad_norm": 0.9741792606155382, "learning_rate": 1.6467501922738862e-06, "loss": 0.137, "step": 2418 }, { "epoch": 0.6305020688756394, "grad_norm": 1.0263894359201442, "learning_rate": 1.6447246081477503e-06, "loss": 0.1522, "step": 2419 }, { "epoch": 0.6307627146254846, "grad_norm": 1.111415747002901, "learning_rate": 1.6426996597168517e-06, "loss": 0.1579, "step": 2420 }, { "epoch": 0.6310233603753299, "grad_norm": 1.0468516400896506, "learning_rate": 1.6406753484862636e-06, "loss": 0.1501, "step": 2421 }, { "epoch": 0.6312840061251751, "grad_norm": 1.0531444785139537, "learning_rate": 1.6386516759605813e-06, "loss": 0.1373, "step": 2422 }, { "epoch": 0.6315446518750204, "grad_norm": 1.0644472791901831, "learning_rate": 1.6366286436439294e-06, "loss": 0.1467, "step": 2423 }, { "epoch": 0.6318052976248656, "grad_norm": 1.0847886460830667, "learning_rate": 1.6346062530399525e-06, "loss": 0.1445, "step": 2424 }, { "epoch": 0.6320659433747109, "grad_norm": 1.0760701897238045, "learning_rate": 1.6325845056518211e-06, "loss": 0.1525, "step": 2425 }, { "epoch": 0.6323265891245561, "grad_norm": 1.091352331336454, "learning_rate": 1.6305634029822267e-06, "loss": 0.1531, "step": 2426 }, { "epoch": 0.6325872348744014, "grad_norm": 1.0282276749668309, "learning_rate": 1.6285429465333827e-06, "loss": 0.1537, "step": 2427 }, { "epoch": 0.6328478806242466, "grad_norm": 1.0596873392293855, "learning_rate": 1.6265231378070197e-06, "loss": 0.1585, "step": 2428 }, { "epoch": 0.6331085263740918, "grad_norm": 0.9797186145711477, "learning_rate": 1.6245039783043905e-06, "loss": 0.1547, "step": 2429 }, { "epoch": 0.6333691721239371, "grad_norm": 1.01747918885474, "learning_rate": 1.6224854695262616e-06, "loss": 0.1576, "step": 2430 }, { "epoch": 0.6336298178737823, "grad_norm": 1.0283332886546255, "learning_rate": 1.6204676129729175e-06, "loss": 0.1473, "step": 2431 }, { "epoch": 0.6338904636236276, "grad_norm": 1.0711123432311398, "learning_rate": 1.6184504101441584e-06, "loss": 0.163, "step": 2432 }, { "epoch": 0.6341511093734727, "grad_norm": 1.0545546869243871, "learning_rate": 1.6164338625392977e-06, "loss": 0.1448, "step": 2433 }, { "epoch": 0.634411755123318, "grad_norm": 1.009592403962238, "learning_rate": 1.614417971657164e-06, "loss": 0.1476, "step": 2434 }, { "epoch": 0.6346724008731632, "grad_norm": 1.0760774541967553, "learning_rate": 1.6124027389960938e-06, "loss": 0.1513, "step": 2435 }, { "epoch": 0.6349330466230085, "grad_norm": 1.0413188328983882, "learning_rate": 1.6103881660539369e-06, "loss": 0.133, "step": 2436 }, { "epoch": 0.6351936923728537, "grad_norm": 1.0012756697891025, "learning_rate": 1.6083742543280537e-06, "loss": 0.1434, "step": 2437 }, { "epoch": 0.635454338122699, "grad_norm": 0.9649360041297952, "learning_rate": 1.6063610053153106e-06, "loss": 0.142, "step": 2438 }, { "epoch": 0.6357149838725442, "grad_norm": 1.0493798475756189, "learning_rate": 1.6043484205120834e-06, "loss": 0.1394, "step": 2439 }, { "epoch": 0.6359756296223895, "grad_norm": 1.0881263232851144, "learning_rate": 1.6023365014142544e-06, "loss": 0.1616, "step": 2440 }, { "epoch": 0.6362362753722347, "grad_norm": 1.0487608335954752, "learning_rate": 1.600325249517208e-06, "loss": 0.1523, "step": 2441 }, { "epoch": 0.63649692112208, "grad_norm": 1.050058773921465, "learning_rate": 1.5983146663158368e-06, "loss": 0.1566, "step": 2442 }, { "epoch": 0.6367575668719252, "grad_norm": 1.0809298606643984, "learning_rate": 1.5963047533045334e-06, "loss": 0.1474, "step": 2443 }, { "epoch": 0.6370182126217704, "grad_norm": 1.0121926787099373, "learning_rate": 1.5942955119771942e-06, "loss": 0.1462, "step": 2444 }, { "epoch": 0.6372788583716157, "grad_norm": 0.9802308867927557, "learning_rate": 1.592286943827216e-06, "loss": 0.1488, "step": 2445 }, { "epoch": 0.6375395041214609, "grad_norm": 0.9871812872937085, "learning_rate": 1.5902790503474928e-06, "loss": 0.1554, "step": 2446 }, { "epoch": 0.6378001498713062, "grad_norm": 0.9606555845800195, "learning_rate": 1.5882718330304209e-06, "loss": 0.1441, "step": 2447 }, { "epoch": 0.6380607956211514, "grad_norm": 0.9474740726420307, "learning_rate": 1.586265293367891e-06, "loss": 0.1326, "step": 2448 }, { "epoch": 0.6383214413709967, "grad_norm": 1.0070973664248328, "learning_rate": 1.5842594328512927e-06, "loss": 0.1532, "step": 2449 }, { "epoch": 0.6385820871208419, "grad_norm": 0.9571854236631201, "learning_rate": 1.5822542529715084e-06, "loss": 0.1319, "step": 2450 }, { "epoch": 0.6388427328706872, "grad_norm": 0.9554736852917455, "learning_rate": 1.5802497552189177e-06, "loss": 0.135, "step": 2451 }, { "epoch": 0.6391033786205323, "grad_norm": 1.038679859507958, "learning_rate": 1.578245941083389e-06, "loss": 0.1508, "step": 2452 }, { "epoch": 0.6393640243703776, "grad_norm": 1.0855503897057666, "learning_rate": 1.576242812054285e-06, "loss": 0.1435, "step": 2453 }, { "epoch": 0.6396246701202228, "grad_norm": 0.9388280468893719, "learning_rate": 1.57424036962046e-06, "loss": 0.1387, "step": 2454 }, { "epoch": 0.6398853158700681, "grad_norm": 1.009579374906636, "learning_rate": 1.572238615270256e-06, "loss": 0.1505, "step": 2455 }, { "epoch": 0.6401459616199133, "grad_norm": 1.0166067380730686, "learning_rate": 1.5702375504915062e-06, "loss": 0.1407, "step": 2456 }, { "epoch": 0.6404066073697586, "grad_norm": 0.9881069256709692, "learning_rate": 1.5682371767715276e-06, "loss": 0.1424, "step": 2457 }, { "epoch": 0.6406672531196038, "grad_norm": 1.0366011779383666, "learning_rate": 1.5662374955971268e-06, "loss": 0.1518, "step": 2458 }, { "epoch": 0.640927898869449, "grad_norm": 0.9963141298796366, "learning_rate": 1.5642385084545936e-06, "loss": 0.1366, "step": 2459 }, { "epoch": 0.6411885446192943, "grad_norm": 1.1019611709495551, "learning_rate": 1.562240216829704e-06, "loss": 0.1509, "step": 2460 }, { "epoch": 0.6414491903691395, "grad_norm": 1.0872609715828387, "learning_rate": 1.5602426222077144e-06, "loss": 0.1574, "step": 2461 }, { "epoch": 0.6417098361189848, "grad_norm": 1.0661714133951434, "learning_rate": 1.5582457260733664e-06, "loss": 0.1602, "step": 2462 }, { "epoch": 0.64197048186883, "grad_norm": 1.062417552372735, "learning_rate": 1.5562495299108793e-06, "loss": 0.1429, "step": 2463 }, { "epoch": 0.6422311276186753, "grad_norm": 1.0179116877978596, "learning_rate": 1.5542540352039525e-06, "loss": 0.1469, "step": 2464 }, { "epoch": 0.6424917733685205, "grad_norm": 1.0578850738095653, "learning_rate": 1.552259243435767e-06, "loss": 0.144, "step": 2465 }, { "epoch": 0.6427524191183658, "grad_norm": 1.009124990430885, "learning_rate": 1.550265156088978e-06, "loss": 0.1453, "step": 2466 }, { "epoch": 0.643013064868211, "grad_norm": 0.9740079957317941, "learning_rate": 1.5482717746457199e-06, "loss": 0.1361, "step": 2467 }, { "epoch": 0.6432737106180563, "grad_norm": 0.9993377475520598, "learning_rate": 1.5462791005875994e-06, "loss": 0.1325, "step": 2468 }, { "epoch": 0.6435343563679015, "grad_norm": 1.0009581083014423, "learning_rate": 1.544287135395699e-06, "loss": 0.1476, "step": 2469 }, { "epoch": 0.6437950021177468, "grad_norm": 1.0456321557137112, "learning_rate": 1.542295880550575e-06, "loss": 0.1463, "step": 2470 }, { "epoch": 0.644055647867592, "grad_norm": 1.0075753776639664, "learning_rate": 1.5403053375322548e-06, "loss": 0.1438, "step": 2471 }, { "epoch": 0.6443162936174373, "grad_norm": 1.0465388547239043, "learning_rate": 1.5383155078202377e-06, "loss": 0.1475, "step": 2472 }, { "epoch": 0.6445769393672824, "grad_norm": 1.0082940688698416, "learning_rate": 1.5363263928934919e-06, "loss": 0.152, "step": 2473 }, { "epoch": 0.6448375851171276, "grad_norm": 0.9917457278086194, "learning_rate": 1.534337994230453e-06, "loss": 0.1498, "step": 2474 }, { "epoch": 0.6450982308669729, "grad_norm": 1.029707138550733, "learning_rate": 1.5323503133090273e-06, "loss": 0.1549, "step": 2475 }, { "epoch": 0.6453588766168181, "grad_norm": 1.0163860627116441, "learning_rate": 1.5303633516065852e-06, "loss": 0.1435, "step": 2476 }, { "epoch": 0.6456195223666634, "grad_norm": 0.9762846100378089, "learning_rate": 1.528377110599964e-06, "loss": 0.1348, "step": 2477 }, { "epoch": 0.6458801681165086, "grad_norm": 0.9856216011357384, "learning_rate": 1.5263915917654654e-06, "loss": 0.138, "step": 2478 }, { "epoch": 0.6461408138663539, "grad_norm": 1.0311952826059645, "learning_rate": 1.5244067965788518e-06, "loss": 0.1424, "step": 2479 }, { "epoch": 0.6464014596161991, "grad_norm": 1.040038073350177, "learning_rate": 1.5224227265153512e-06, "loss": 0.1585, "step": 2480 }, { "epoch": 0.6466621053660444, "grad_norm": 1.0170790973104975, "learning_rate": 1.52043938304965e-06, "loss": 0.1471, "step": 2481 }, { "epoch": 0.6469227511158896, "grad_norm": 1.0886459449333803, "learning_rate": 1.5184567676558965e-06, "loss": 0.1447, "step": 2482 }, { "epoch": 0.6471833968657349, "grad_norm": 1.0678175068457325, "learning_rate": 1.5164748818076964e-06, "loss": 0.1422, "step": 2483 }, { "epoch": 0.6474440426155801, "grad_norm": 1.0696481231188408, "learning_rate": 1.5144937269781142e-06, "loss": 0.155, "step": 2484 }, { "epoch": 0.6477046883654254, "grad_norm": 1.0390533125534438, "learning_rate": 1.5125133046396699e-06, "loss": 0.1548, "step": 2485 }, { "epoch": 0.6479653341152706, "grad_norm": 1.0289931486477137, "learning_rate": 1.5105336162643403e-06, "loss": 0.1524, "step": 2486 }, { "epoch": 0.6482259798651159, "grad_norm": 0.9940636519279898, "learning_rate": 1.5085546633235554e-06, "loss": 0.1526, "step": 2487 }, { "epoch": 0.6484866256149611, "grad_norm": 0.9739244579667212, "learning_rate": 1.5065764472882e-06, "loss": 0.1404, "step": 2488 }, { "epoch": 0.6487472713648063, "grad_norm": 0.9835457991525941, "learning_rate": 1.5045989696286112e-06, "loss": 0.1417, "step": 2489 }, { "epoch": 0.6490079171146516, "grad_norm": 1.0087472875076144, "learning_rate": 1.5026222318145745e-06, "loss": 0.1451, "step": 2490 }, { "epoch": 0.6492685628644967, "grad_norm": 1.0072610084977272, "learning_rate": 1.500646235315329e-06, "loss": 0.1482, "step": 2491 }, { "epoch": 0.649529208614342, "grad_norm": 0.9941602337391459, "learning_rate": 1.4986709815995604e-06, "loss": 0.1489, "step": 2492 }, { "epoch": 0.6497898543641872, "grad_norm": 1.113216541959655, "learning_rate": 1.4966964721354047e-06, "loss": 0.1541, "step": 2493 }, { "epoch": 0.6500505001140325, "grad_norm": 1.0523883401676812, "learning_rate": 1.494722708390442e-06, "loss": 0.1402, "step": 2494 }, { "epoch": 0.6503111458638777, "grad_norm": 1.0127205034843119, "learning_rate": 1.4927496918317009e-06, "loss": 0.1423, "step": 2495 }, { "epoch": 0.650571791613723, "grad_norm": 1.035049936848075, "learning_rate": 1.4907774239256517e-06, "loss": 0.1537, "step": 2496 }, { "epoch": 0.6508324373635682, "grad_norm": 1.0497732180260138, "learning_rate": 1.4888059061382098e-06, "loss": 0.1472, "step": 2497 }, { "epoch": 0.6510930831134135, "grad_norm": 1.0503496279436444, "learning_rate": 1.486835139934734e-06, "loss": 0.1585, "step": 2498 }, { "epoch": 0.6513537288632587, "grad_norm": 1.0520254299351255, "learning_rate": 1.4848651267800224e-06, "loss": 0.1389, "step": 2499 }, { "epoch": 0.651614374613104, "grad_norm": 0.9559219215683535, "learning_rate": 1.4828958681383163e-06, "loss": 0.1393, "step": 2500 }, { "epoch": 0.651614374613104, "eval_loss": 0.14706705510616302, "eval_runtime": 55.2142, "eval_samples_per_second": 44.934, "eval_steps_per_second": 5.633, "step": 2500 }, { "epoch": 0.6518750203629492, "grad_norm": 1.0453022971842574, "learning_rate": 1.4809273654732925e-06, "loss": 0.159, "step": 2501 }, { "epoch": 0.6521356661127945, "grad_norm": 1.0944338932117934, "learning_rate": 1.4789596202480678e-06, "loss": 0.1563, "step": 2502 }, { "epoch": 0.6523963118626397, "grad_norm": 1.023187255944381, "learning_rate": 1.4769926339251972e-06, "loss": 0.1517, "step": 2503 }, { "epoch": 0.6526569576124849, "grad_norm": 1.0183687864100757, "learning_rate": 1.475026407966669e-06, "loss": 0.1567, "step": 2504 }, { "epoch": 0.6529176033623302, "grad_norm": 1.0137033282522079, "learning_rate": 1.4730609438339095e-06, "loss": 0.1487, "step": 2505 }, { "epoch": 0.6531782491121754, "grad_norm": 1.0429114752288071, "learning_rate": 1.4710962429877763e-06, "loss": 0.1533, "step": 2506 }, { "epoch": 0.6534388948620207, "grad_norm": 0.984869417035571, "learning_rate": 1.4691323068885593e-06, "loss": 0.1375, "step": 2507 }, { "epoch": 0.6536995406118659, "grad_norm": 1.0339031142623911, "learning_rate": 1.4671691369959826e-06, "loss": 0.1458, "step": 2508 }, { "epoch": 0.6539601863617112, "grad_norm": 1.0995491600241698, "learning_rate": 1.4652067347691985e-06, "loss": 0.1454, "step": 2509 }, { "epoch": 0.6542208321115563, "grad_norm": 1.0170812957730866, "learning_rate": 1.4632451016667899e-06, "loss": 0.1454, "step": 2510 }, { "epoch": 0.6544814778614017, "grad_norm": 1.062490319828971, "learning_rate": 1.461284239146768e-06, "loss": 0.1515, "step": 2511 }, { "epoch": 0.6547421236112468, "grad_norm": 1.0676897838287063, "learning_rate": 1.4593241486665705e-06, "loss": 0.1501, "step": 2512 }, { "epoch": 0.6550027693610921, "grad_norm": 0.9523393725535919, "learning_rate": 1.45736483168306e-06, "loss": 0.1389, "step": 2513 }, { "epoch": 0.6552634151109373, "grad_norm": 1.0675269912812198, "learning_rate": 1.4554062896525295e-06, "loss": 0.138, "step": 2514 }, { "epoch": 0.6555240608607826, "grad_norm": 1.0800194375684145, "learning_rate": 1.45344852403069e-06, "loss": 0.1561, "step": 2515 }, { "epoch": 0.6557847066106278, "grad_norm": 0.9431041901686343, "learning_rate": 1.4514915362726773e-06, "loss": 0.1306, "step": 2516 }, { "epoch": 0.6560453523604731, "grad_norm": 1.0336111570117983, "learning_rate": 1.4495353278330527e-06, "loss": 0.1511, "step": 2517 }, { "epoch": 0.6563059981103183, "grad_norm": 1.0243600238045496, "learning_rate": 1.447579900165792e-06, "loss": 0.1399, "step": 2518 }, { "epoch": 0.6565666438601635, "grad_norm": 0.9646221706370105, "learning_rate": 1.4456252547242935e-06, "loss": 0.1393, "step": 2519 }, { "epoch": 0.6568272896100088, "grad_norm": 1.0181262185463889, "learning_rate": 1.4436713929613771e-06, "loss": 0.1485, "step": 2520 }, { "epoch": 0.657087935359854, "grad_norm": 1.0980002700630929, "learning_rate": 1.4417183163292759e-06, "loss": 0.145, "step": 2521 }, { "epoch": 0.6573485811096993, "grad_norm": 1.0156563949213813, "learning_rate": 1.4397660262796411e-06, "loss": 0.1444, "step": 2522 }, { "epoch": 0.6576092268595445, "grad_norm": 1.0064015090325573, "learning_rate": 1.4378145242635397e-06, "loss": 0.1403, "step": 2523 }, { "epoch": 0.6578698726093898, "grad_norm": 0.9975326039681444, "learning_rate": 1.4358638117314521e-06, "loss": 0.1379, "step": 2524 }, { "epoch": 0.658130518359235, "grad_norm": 1.0659683052974558, "learning_rate": 1.4339138901332723e-06, "loss": 0.1442, "step": 2525 }, { "epoch": 0.6583911641090803, "grad_norm": 0.9782296740730826, "learning_rate": 1.4319647609183058e-06, "loss": 0.1403, "step": 2526 }, { "epoch": 0.6586518098589255, "grad_norm": 0.9426005098935972, "learning_rate": 1.430016425535271e-06, "loss": 0.1293, "step": 2527 }, { "epoch": 0.6589124556087708, "grad_norm": 0.9913498389392441, "learning_rate": 1.428068885432296e-06, "loss": 0.1451, "step": 2528 }, { "epoch": 0.659173101358616, "grad_norm": 0.9823159845045176, "learning_rate": 1.4261221420569135e-06, "loss": 0.1444, "step": 2529 }, { "epoch": 0.6594337471084613, "grad_norm": 1.013526580801994, "learning_rate": 1.4241761968560703e-06, "loss": 0.1535, "step": 2530 }, { "epoch": 0.6596943928583064, "grad_norm": 0.9484672656988519, "learning_rate": 1.4222310512761162e-06, "loss": 0.1375, "step": 2531 }, { "epoch": 0.6599550386081517, "grad_norm": 1.0411254097249778, "learning_rate": 1.4202867067628068e-06, "loss": 0.1554, "step": 2532 }, { "epoch": 0.6602156843579969, "grad_norm": 0.98380515263286, "learning_rate": 1.418343164761304e-06, "loss": 0.1408, "step": 2533 }, { "epoch": 0.6604763301078421, "grad_norm": 1.0245712530460669, "learning_rate": 1.4164004267161718e-06, "loss": 0.1438, "step": 2534 }, { "epoch": 0.6607369758576874, "grad_norm": 1.0683077179293656, "learning_rate": 1.4144584940713769e-06, "loss": 0.1544, "step": 2535 }, { "epoch": 0.6609976216075326, "grad_norm": 1.0282063096040879, "learning_rate": 1.4125173682702869e-06, "loss": 0.1375, "step": 2536 }, { "epoch": 0.6612582673573779, "grad_norm": 1.0639788738573688, "learning_rate": 1.4105770507556718e-06, "loss": 0.1513, "step": 2537 }, { "epoch": 0.6615189131072231, "grad_norm": 1.0233216507028322, "learning_rate": 1.4086375429696987e-06, "loss": 0.1453, "step": 2538 }, { "epoch": 0.6617795588570684, "grad_norm": 0.9791457949047909, "learning_rate": 1.4066988463539338e-06, "loss": 0.1442, "step": 2539 }, { "epoch": 0.6620402046069136, "grad_norm": 1.0674210940842361, "learning_rate": 1.4047609623493396e-06, "loss": 0.1494, "step": 2540 }, { "epoch": 0.6623008503567589, "grad_norm": 0.9893582338023428, "learning_rate": 1.4028238923962753e-06, "loss": 0.1366, "step": 2541 }, { "epoch": 0.6625614961066041, "grad_norm": 0.9553575684596772, "learning_rate": 1.400887637934495e-06, "loss": 0.1422, "step": 2542 }, { "epoch": 0.6628221418564494, "grad_norm": 1.0147878521736533, "learning_rate": 1.3989522004031457e-06, "loss": 0.1319, "step": 2543 }, { "epoch": 0.6630827876062946, "grad_norm": 1.0574786586725946, "learning_rate": 1.3970175812407705e-06, "loss": 0.1481, "step": 2544 }, { "epoch": 0.6633434333561399, "grad_norm": 1.0065268689226394, "learning_rate": 1.3950837818852982e-06, "loss": 0.1398, "step": 2545 }, { "epoch": 0.6636040791059851, "grad_norm": 0.9811983987609384, "learning_rate": 1.393150803774055e-06, "loss": 0.1391, "step": 2546 }, { "epoch": 0.6638647248558304, "grad_norm": 1.0395005211555888, "learning_rate": 1.3912186483437522e-06, "loss": 0.15, "step": 2547 }, { "epoch": 0.6641253706056756, "grad_norm": 1.0884915761715732, "learning_rate": 1.3892873170304913e-06, "loss": 0.1477, "step": 2548 }, { "epoch": 0.6643860163555207, "grad_norm": 1.045570615223479, "learning_rate": 1.3873568112697596e-06, "loss": 0.1529, "step": 2549 }, { "epoch": 0.664646662105366, "grad_norm": 1.0195399873496476, "learning_rate": 1.3854271324964353e-06, "loss": 0.1555, "step": 2550 }, { "epoch": 0.6649073078552112, "grad_norm": 1.0370073334237833, "learning_rate": 1.3834982821447762e-06, "loss": 0.1384, "step": 2551 }, { "epoch": 0.6651679536050565, "grad_norm": 1.1123662662867184, "learning_rate": 1.3815702616484262e-06, "loss": 0.1609, "step": 2552 }, { "epoch": 0.6654285993549017, "grad_norm": 1.0781415008719764, "learning_rate": 1.3796430724404155e-06, "loss": 0.16, "step": 2553 }, { "epoch": 0.665689245104747, "grad_norm": 1.0190237074529083, "learning_rate": 1.3777167159531532e-06, "loss": 0.1431, "step": 2554 }, { "epoch": 0.6659498908545922, "grad_norm": 1.015632154165992, "learning_rate": 1.3757911936184303e-06, "loss": 0.1345, "step": 2555 }, { "epoch": 0.6662105366044375, "grad_norm": 1.0267823348301541, "learning_rate": 1.3738665068674176e-06, "loss": 0.1385, "step": 2556 }, { "epoch": 0.6664711823542827, "grad_norm": 1.1003255070715927, "learning_rate": 1.3719426571306654e-06, "loss": 0.1483, "step": 2557 }, { "epoch": 0.666731828104128, "grad_norm": 0.9610242593455333, "learning_rate": 1.3700196458381012e-06, "loss": 0.1337, "step": 2558 }, { "epoch": 0.6669924738539732, "grad_norm": 1.0148548871316228, "learning_rate": 1.368097474419029e-06, "loss": 0.1381, "step": 2559 }, { "epoch": 0.6672531196038185, "grad_norm": 1.1419567302831384, "learning_rate": 1.3661761443021316e-06, "loss": 0.163, "step": 2560 }, { "epoch": 0.6675137653536637, "grad_norm": 1.0094084748921628, "learning_rate": 1.364255656915463e-06, "loss": 0.1409, "step": 2561 }, { "epoch": 0.667774411103509, "grad_norm": 1.0856318990097902, "learning_rate": 1.36233601368645e-06, "loss": 0.1578, "step": 2562 }, { "epoch": 0.6680350568533542, "grad_norm": 1.0325502887744613, "learning_rate": 1.3604172160418965e-06, "loss": 0.1491, "step": 2563 }, { "epoch": 0.6682957026031995, "grad_norm": 1.050765280195333, "learning_rate": 1.3584992654079742e-06, "loss": 0.1445, "step": 2564 }, { "epoch": 0.6685563483530447, "grad_norm": 1.043026424636727, "learning_rate": 1.3565821632102269e-06, "loss": 0.1397, "step": 2565 }, { "epoch": 0.6688169941028899, "grad_norm": 1.0052609930658476, "learning_rate": 1.3546659108735666e-06, "loss": 0.1435, "step": 2566 }, { "epoch": 0.6690776398527352, "grad_norm": 1.0390878301020108, "learning_rate": 1.352750509822275e-06, "loss": 0.1414, "step": 2567 }, { "epoch": 0.6693382856025804, "grad_norm": 1.022586454363177, "learning_rate": 1.3508359614799998e-06, "loss": 0.1419, "step": 2568 }, { "epoch": 0.6695989313524257, "grad_norm": 1.0234522787004605, "learning_rate": 1.3489222672697545e-06, "loss": 0.1364, "step": 2569 }, { "epoch": 0.6698595771022708, "grad_norm": 1.0002235277049334, "learning_rate": 1.3470094286139213e-06, "loss": 0.1393, "step": 2570 }, { "epoch": 0.6701202228521161, "grad_norm": 1.1124986939245138, "learning_rate": 1.345097446934242e-06, "loss": 0.1609, "step": 2571 }, { "epoch": 0.6703808686019613, "grad_norm": 0.9961638558939647, "learning_rate": 1.3431863236518242e-06, "loss": 0.1386, "step": 2572 }, { "epoch": 0.6706415143518066, "grad_norm": 1.0890698411283755, "learning_rate": 1.3412760601871364e-06, "loss": 0.1488, "step": 2573 }, { "epoch": 0.6709021601016518, "grad_norm": 0.969980725327966, "learning_rate": 1.3393666579600078e-06, "loss": 0.139, "step": 2574 }, { "epoch": 0.6711628058514971, "grad_norm": 1.0520581714810135, "learning_rate": 1.337458118389628e-06, "loss": 0.1425, "step": 2575 }, { "epoch": 0.6714234516013423, "grad_norm": 1.036561736454976, "learning_rate": 1.3355504428945464e-06, "loss": 0.1395, "step": 2576 }, { "epoch": 0.6716840973511876, "grad_norm": 1.0050081754548053, "learning_rate": 1.3336436328926697e-06, "loss": 0.1506, "step": 2577 }, { "epoch": 0.6719447431010328, "grad_norm": 0.9785053220608313, "learning_rate": 1.3317376898012573e-06, "loss": 0.1348, "step": 2578 }, { "epoch": 0.6722053888508781, "grad_norm": 1.0756949783667535, "learning_rate": 1.3298326150369313e-06, "loss": 0.1444, "step": 2579 }, { "epoch": 0.6724660346007233, "grad_norm": 1.0240960954992928, "learning_rate": 1.3279284100156633e-06, "loss": 0.1376, "step": 2580 }, { "epoch": 0.6727266803505685, "grad_norm": 1.0074760861469667, "learning_rate": 1.3260250761527802e-06, "loss": 0.1419, "step": 2581 }, { "epoch": 0.6729873261004138, "grad_norm": 1.0737036314302646, "learning_rate": 1.32412261486296e-06, "loss": 0.1474, "step": 2582 }, { "epoch": 0.673247971850259, "grad_norm": 1.0050493130647724, "learning_rate": 1.3222210275602364e-06, "loss": 0.1453, "step": 2583 }, { "epoch": 0.6735086176001043, "grad_norm": 1.0712012408622265, "learning_rate": 1.3203203156579875e-06, "loss": 0.1468, "step": 2584 }, { "epoch": 0.6737692633499495, "grad_norm": 1.0884419913214385, "learning_rate": 1.3184204805689435e-06, "loss": 0.1479, "step": 2585 }, { "epoch": 0.6740299090997948, "grad_norm": 0.9996545177615469, "learning_rate": 1.316521523705185e-06, "loss": 0.1385, "step": 2586 }, { "epoch": 0.67429055484964, "grad_norm": 1.0407744927921874, "learning_rate": 1.3146234464781372e-06, "loss": 0.1448, "step": 2587 }, { "epoch": 0.6745512005994853, "grad_norm": 0.9500301214803192, "learning_rate": 1.3127262502985722e-06, "loss": 0.1418, "step": 2588 }, { "epoch": 0.6748118463493304, "grad_norm": 1.0465971837408732, "learning_rate": 1.3108299365766064e-06, "loss": 0.1443, "step": 2589 }, { "epoch": 0.6750724920991757, "grad_norm": 1.135964825337514, "learning_rate": 1.3089345067217025e-06, "loss": 0.1585, "step": 2590 }, { "epoch": 0.6753331378490209, "grad_norm": 1.1214037369437557, "learning_rate": 1.307039962142664e-06, "loss": 0.1461, "step": 2591 }, { "epoch": 0.6755937835988662, "grad_norm": 0.9748987109195437, "learning_rate": 1.3051463042476358e-06, "loss": 0.1374, "step": 2592 }, { "epoch": 0.6758544293487114, "grad_norm": 1.053013280758005, "learning_rate": 1.3032535344441096e-06, "loss": 0.1463, "step": 2593 }, { "epoch": 0.6761150750985567, "grad_norm": 0.9930997502377236, "learning_rate": 1.301361654138909e-06, "loss": 0.1418, "step": 2594 }, { "epoch": 0.6763757208484019, "grad_norm": 1.0712205297525403, "learning_rate": 1.2994706647382001e-06, "loss": 0.1456, "step": 2595 }, { "epoch": 0.6766363665982471, "grad_norm": 1.0145383937903447, "learning_rate": 1.297580567647489e-06, "loss": 0.138, "step": 2596 }, { "epoch": 0.6768970123480924, "grad_norm": 1.030755501207374, "learning_rate": 1.2956913642716157e-06, "loss": 0.1334, "step": 2597 }, { "epoch": 0.6771576580979376, "grad_norm": 1.0130394160338176, "learning_rate": 1.2938030560147558e-06, "loss": 0.1366, "step": 2598 }, { "epoch": 0.6774183038477829, "grad_norm": 0.9769655124334526, "learning_rate": 1.2919156442804216e-06, "loss": 0.133, "step": 2599 }, { "epoch": 0.6776789495976281, "grad_norm": 0.963595189768441, "learning_rate": 1.2900291304714568e-06, "loss": 0.1325, "step": 2600 }, { "epoch": 0.6779395953474734, "grad_norm": 0.9171957183139163, "learning_rate": 1.2881435159900396e-06, "loss": 0.1255, "step": 2601 }, { "epoch": 0.6782002410973186, "grad_norm": 1.062165667219299, "learning_rate": 1.2862588022376782e-06, "loss": 0.1495, "step": 2602 }, { "epoch": 0.6784608868471639, "grad_norm": 1.055964561150847, "learning_rate": 1.284374990615213e-06, "loss": 0.146, "step": 2603 }, { "epoch": 0.6787215325970091, "grad_norm": 1.052744986825108, "learning_rate": 1.2824920825228132e-06, "loss": 0.151, "step": 2604 }, { "epoch": 0.6789821783468544, "grad_norm": 1.019675001213746, "learning_rate": 1.2806100793599758e-06, "loss": 0.1479, "step": 2605 }, { "epoch": 0.6792428240966996, "grad_norm": 1.026285793331002, "learning_rate": 1.278728982525525e-06, "loss": 0.139, "step": 2606 }, { "epoch": 0.6795034698465449, "grad_norm": 1.061878797211571, "learning_rate": 1.276848793417613e-06, "loss": 0.1481, "step": 2607 }, { "epoch": 0.67976411559639, "grad_norm": 0.9844495181225681, "learning_rate": 1.2749695134337149e-06, "loss": 0.1367, "step": 2608 }, { "epoch": 0.6800247613462354, "grad_norm": 1.065190169651591, "learning_rate": 1.2730911439706327e-06, "loss": 0.1519, "step": 2609 }, { "epoch": 0.6802854070960805, "grad_norm": 1.0246032089384, "learning_rate": 1.2712136864244917e-06, "loss": 0.15, "step": 2610 }, { "epoch": 0.6805460528459257, "grad_norm": 1.0564069857011371, "learning_rate": 1.2693371421907342e-06, "loss": 0.1447, "step": 2611 }, { "epoch": 0.680806698595771, "grad_norm": 1.0384980709537075, "learning_rate": 1.2674615126641313e-06, "loss": 0.1332, "step": 2612 }, { "epoch": 0.6810673443456162, "grad_norm": 1.0119144309053054, "learning_rate": 1.2655867992387687e-06, "loss": 0.1541, "step": 2613 }, { "epoch": 0.6813279900954615, "grad_norm": 0.9930092344236637, "learning_rate": 1.2637130033080541e-06, "loss": 0.1408, "step": 2614 }, { "epoch": 0.6815886358453067, "grad_norm": 1.0669214403673288, "learning_rate": 1.2618401262647111e-06, "loss": 0.1506, "step": 2615 }, { "epoch": 0.681849281595152, "grad_norm": 1.069593658783162, "learning_rate": 1.2599681695007822e-06, "loss": 0.1489, "step": 2616 }, { "epoch": 0.6821099273449972, "grad_norm": 1.0536792698608908, "learning_rate": 1.258097134407625e-06, "loss": 0.1428, "step": 2617 }, { "epoch": 0.6823705730948425, "grad_norm": 1.0615194063188897, "learning_rate": 1.2562270223759104e-06, "loss": 0.1556, "step": 2618 }, { "epoch": 0.6826312188446877, "grad_norm": 1.0380981643417533, "learning_rate": 1.2543578347956282e-06, "loss": 0.1463, "step": 2619 }, { "epoch": 0.682891864594533, "grad_norm": 0.9568354108521965, "learning_rate": 1.2524895730560755e-06, "loss": 0.131, "step": 2620 }, { "epoch": 0.6831525103443782, "grad_norm": 0.9619888260258553, "learning_rate": 1.250622238545865e-06, "loss": 0.1238, "step": 2621 }, { "epoch": 0.6834131560942235, "grad_norm": 1.007327100049184, "learning_rate": 1.2487558326529177e-06, "loss": 0.1558, "step": 2622 }, { "epoch": 0.6836738018440687, "grad_norm": 1.0657262694230134, "learning_rate": 1.2468903567644661e-06, "loss": 0.1548, "step": 2623 }, { "epoch": 0.683934447593914, "grad_norm": 0.9568534621092734, "learning_rate": 1.2450258122670508e-06, "loss": 0.1316, "step": 2624 }, { "epoch": 0.6841950933437592, "grad_norm": 1.003887880794525, "learning_rate": 1.2431622005465193e-06, "loss": 0.1412, "step": 2625 }, { "epoch": 0.6844557390936044, "grad_norm": 1.0217925770708252, "learning_rate": 1.2412995229880295e-06, "loss": 0.1478, "step": 2626 }, { "epoch": 0.6847163848434497, "grad_norm": 1.0280837472025706, "learning_rate": 1.2394377809760396e-06, "loss": 0.1433, "step": 2627 }, { "epoch": 0.6849770305932948, "grad_norm": 0.9533732626607724, "learning_rate": 1.2375769758943148e-06, "loss": 0.1295, "step": 2628 }, { "epoch": 0.6852376763431401, "grad_norm": 1.0517399946359842, "learning_rate": 1.2357171091259263e-06, "loss": 0.1498, "step": 2629 }, { "epoch": 0.6854983220929853, "grad_norm": 1.0665901211133482, "learning_rate": 1.2338581820532442e-06, "loss": 0.1437, "step": 2630 }, { "epoch": 0.6857589678428306, "grad_norm": 0.9954416913712691, "learning_rate": 1.2320001960579414e-06, "loss": 0.1316, "step": 2631 }, { "epoch": 0.6860196135926758, "grad_norm": 1.0564288807823046, "learning_rate": 1.2301431525209942e-06, "loss": 0.1468, "step": 2632 }, { "epoch": 0.6862802593425211, "grad_norm": 0.9889017496203254, "learning_rate": 1.2282870528226731e-06, "loss": 0.1329, "step": 2633 }, { "epoch": 0.6865409050923663, "grad_norm": 1.0461216280799392, "learning_rate": 1.2264318983425498e-06, "loss": 0.141, "step": 2634 }, { "epoch": 0.6868015508422116, "grad_norm": 1.0077247348600462, "learning_rate": 1.2245776904594953e-06, "loss": 0.1432, "step": 2635 }, { "epoch": 0.6870621965920568, "grad_norm": 0.9808154284233027, "learning_rate": 1.222724430551674e-06, "loss": 0.1461, "step": 2636 }, { "epoch": 0.6873228423419021, "grad_norm": 0.9989848382971014, "learning_rate": 1.2208721199965476e-06, "loss": 0.1329, "step": 2637 }, { "epoch": 0.6875834880917473, "grad_norm": 1.0367632697686657, "learning_rate": 1.2190207601708707e-06, "loss": 0.1477, "step": 2638 }, { "epoch": 0.6878441338415926, "grad_norm": 1.086854413157629, "learning_rate": 1.2171703524506925e-06, "loss": 0.1402, "step": 2639 }, { "epoch": 0.6881047795914378, "grad_norm": 1.0540737508399942, "learning_rate": 1.215320898211354e-06, "loss": 0.1561, "step": 2640 }, { "epoch": 0.688365425341283, "grad_norm": 1.1374820927040505, "learning_rate": 1.2134723988274868e-06, "loss": 0.1575, "step": 2641 }, { "epoch": 0.6886260710911283, "grad_norm": 0.9645505364416838, "learning_rate": 1.2116248556730151e-06, "loss": 0.1394, "step": 2642 }, { "epoch": 0.6888867168409735, "grad_norm": 1.0222206938003806, "learning_rate": 1.2097782701211511e-06, "loss": 0.1411, "step": 2643 }, { "epoch": 0.6891473625908188, "grad_norm": 0.9799690849253535, "learning_rate": 1.207932643544392e-06, "loss": 0.1354, "step": 2644 }, { "epoch": 0.689408008340664, "grad_norm": 1.0518423018945602, "learning_rate": 1.2060879773145282e-06, "loss": 0.146, "step": 2645 }, { "epoch": 0.6896686540905093, "grad_norm": 1.039093504520467, "learning_rate": 1.2042442728026325e-06, "loss": 0.1471, "step": 2646 }, { "epoch": 0.6899292998403544, "grad_norm": 1.0271646028805357, "learning_rate": 1.2024015313790632e-06, "loss": 0.1468, "step": 2647 }, { "epoch": 0.6901899455901997, "grad_norm": 1.0425041734489902, "learning_rate": 1.200559754413464e-06, "loss": 0.1397, "step": 2648 }, { "epoch": 0.6904505913400449, "grad_norm": 1.066808279019748, "learning_rate": 1.1987189432747604e-06, "loss": 0.1498, "step": 2649 }, { "epoch": 0.6907112370898902, "grad_norm": 0.9633082222513829, "learning_rate": 1.1968790993311613e-06, "loss": 0.1377, "step": 2650 }, { "epoch": 0.6909718828397354, "grad_norm": 0.9731725920294785, "learning_rate": 1.1950402239501547e-06, "loss": 0.1342, "step": 2651 }, { "epoch": 0.6912325285895807, "grad_norm": 1.0088562034611035, "learning_rate": 1.193202318498512e-06, "loss": 0.1335, "step": 2652 }, { "epoch": 0.6914931743394259, "grad_norm": 1.0015613131335608, "learning_rate": 1.1913653843422813e-06, "loss": 0.1285, "step": 2653 }, { "epoch": 0.6917538200892712, "grad_norm": 1.01073635572539, "learning_rate": 1.1895294228467886e-06, "loss": 0.1376, "step": 2654 }, { "epoch": 0.6920144658391164, "grad_norm": 1.0588585150773644, "learning_rate": 1.1876944353766382e-06, "loss": 0.1441, "step": 2655 }, { "epoch": 0.6922751115889616, "grad_norm": 1.0092372982661468, "learning_rate": 1.1858604232957096e-06, "loss": 0.144, "step": 2656 }, { "epoch": 0.6925357573388069, "grad_norm": 1.1508362608263896, "learning_rate": 1.1840273879671576e-06, "loss": 0.15, "step": 2657 }, { "epoch": 0.6927964030886521, "grad_norm": 0.9815547754738853, "learning_rate": 1.18219533075341e-06, "loss": 0.1387, "step": 2658 }, { "epoch": 0.6930570488384974, "grad_norm": 1.0697476128074805, "learning_rate": 1.1803642530161715e-06, "loss": 0.1477, "step": 2659 }, { "epoch": 0.6933176945883426, "grad_norm": 1.065940732061916, "learning_rate": 1.1785341561164135e-06, "loss": 0.1524, "step": 2660 }, { "epoch": 0.6935783403381879, "grad_norm": 1.0866499503083356, "learning_rate": 1.1767050414143798e-06, "loss": 0.1378, "step": 2661 }, { "epoch": 0.6938389860880331, "grad_norm": 1.0195872400710613, "learning_rate": 1.1748769102695882e-06, "loss": 0.1376, "step": 2662 }, { "epoch": 0.6940996318378784, "grad_norm": 1.0135890851243687, "learning_rate": 1.1730497640408203e-06, "loss": 0.1466, "step": 2663 }, { "epoch": 0.6943602775877236, "grad_norm": 1.0557938167530094, "learning_rate": 1.1712236040861278e-06, "loss": 0.1439, "step": 2664 }, { "epoch": 0.6946209233375689, "grad_norm": 0.9868038600811403, "learning_rate": 1.1693984317628314e-06, "loss": 0.1472, "step": 2665 }, { "epoch": 0.694881569087414, "grad_norm": 1.004306874199482, "learning_rate": 1.1675742484275132e-06, "loss": 0.1306, "step": 2666 }, { "epoch": 0.6951422148372594, "grad_norm": 1.0074767649243925, "learning_rate": 1.1657510554360224e-06, "loss": 0.1397, "step": 2667 }, { "epoch": 0.6954028605871045, "grad_norm": 1.0923517183743445, "learning_rate": 1.1639288541434745e-06, "loss": 0.1428, "step": 2668 }, { "epoch": 0.6956635063369498, "grad_norm": 0.96511102218528, "learning_rate": 1.1621076459042447e-06, "loss": 0.1312, "step": 2669 }, { "epoch": 0.695924152086795, "grad_norm": 1.0083175739310832, "learning_rate": 1.160287432071971e-06, "loss": 0.1432, "step": 2670 }, { "epoch": 0.6961847978366402, "grad_norm": 1.0131322880118148, "learning_rate": 1.1584682139995527e-06, "loss": 0.1295, "step": 2671 }, { "epoch": 0.6964454435864855, "grad_norm": 1.0204086914001496, "learning_rate": 1.1566499930391484e-06, "loss": 0.1417, "step": 2672 }, { "epoch": 0.6967060893363307, "grad_norm": 1.0200443804782022, "learning_rate": 1.1548327705421763e-06, "loss": 0.135, "step": 2673 }, { "epoch": 0.696966735086176, "grad_norm": 1.0370248286272976, "learning_rate": 1.153016547859311e-06, "loss": 0.1408, "step": 2674 }, { "epoch": 0.6972273808360212, "grad_norm": 1.0374831286448227, "learning_rate": 1.1512013263404867e-06, "loss": 0.1448, "step": 2675 }, { "epoch": 0.6974880265858665, "grad_norm": 1.0172505083263623, "learning_rate": 1.1493871073348926e-06, "loss": 0.1438, "step": 2676 }, { "epoch": 0.6977486723357117, "grad_norm": 1.029347045508623, "learning_rate": 1.1475738921909682e-06, "loss": 0.1457, "step": 2677 }, { "epoch": 0.698009318085557, "grad_norm": 1.0237617305393838, "learning_rate": 1.1457616822564145e-06, "loss": 0.1465, "step": 2678 }, { "epoch": 0.6982699638354022, "grad_norm": 1.0086817431745012, "learning_rate": 1.1439504788781797e-06, "loss": 0.1439, "step": 2679 }, { "epoch": 0.6985306095852475, "grad_norm": 1.0450088664342063, "learning_rate": 1.1421402834024662e-06, "loss": 0.149, "step": 2680 }, { "epoch": 0.6987912553350927, "grad_norm": 1.0051747546968028, "learning_rate": 1.1403310971747263e-06, "loss": 0.1429, "step": 2681 }, { "epoch": 0.699051901084938, "grad_norm": 1.0639293850410587, "learning_rate": 1.1385229215396638e-06, "loss": 0.1509, "step": 2682 }, { "epoch": 0.6993125468347832, "grad_norm": 1.0251949371445561, "learning_rate": 1.1367157578412288e-06, "loss": 0.1414, "step": 2683 }, { "epoch": 0.6995731925846285, "grad_norm": 1.1315071443649436, "learning_rate": 1.1349096074226205e-06, "loss": 0.1613, "step": 2684 }, { "epoch": 0.6998338383344737, "grad_norm": 1.0970099864961895, "learning_rate": 1.1331044716262869e-06, "loss": 0.1403, "step": 2685 }, { "epoch": 0.7000944840843188, "grad_norm": 1.0179124992271626, "learning_rate": 1.1313003517939189e-06, "loss": 0.1384, "step": 2686 }, { "epoch": 0.7003551298341641, "grad_norm": 0.9930734176540283, "learning_rate": 1.1294972492664537e-06, "loss": 0.1328, "step": 2687 }, { "epoch": 0.7006157755840093, "grad_norm": 0.999391403034874, "learning_rate": 1.127695165384072e-06, "loss": 0.1417, "step": 2688 }, { "epoch": 0.7008764213338546, "grad_norm": 1.0306219027435277, "learning_rate": 1.125894101486198e-06, "loss": 0.1427, "step": 2689 }, { "epoch": 0.7011370670836998, "grad_norm": 1.0473877063324988, "learning_rate": 1.1240940589114953e-06, "loss": 0.1548, "step": 2690 }, { "epoch": 0.7013977128335451, "grad_norm": 0.9615496949427018, "learning_rate": 1.1222950389978735e-06, "loss": 0.1244, "step": 2691 }, { "epoch": 0.7016583585833903, "grad_norm": 1.060546955672704, "learning_rate": 1.1204970430824782e-06, "loss": 0.1477, "step": 2692 }, { "epoch": 0.7019190043332356, "grad_norm": 0.9611393890644319, "learning_rate": 1.118700072501692e-06, "loss": 0.1336, "step": 2693 }, { "epoch": 0.7021796500830808, "grad_norm": 1.1047876179826503, "learning_rate": 1.1169041285911411e-06, "loss": 0.1515, "step": 2694 }, { "epoch": 0.7024402958329261, "grad_norm": 1.138854819490159, "learning_rate": 1.1151092126856845e-06, "loss": 0.1483, "step": 2695 }, { "epoch": 0.7027009415827713, "grad_norm": 1.061181700212821, "learning_rate": 1.113315326119418e-06, "loss": 0.1487, "step": 2696 }, { "epoch": 0.7029615873326166, "grad_norm": 1.0421168403517715, "learning_rate": 1.111522470225672e-06, "loss": 0.1436, "step": 2697 }, { "epoch": 0.7032222330824618, "grad_norm": 1.0296744826421336, "learning_rate": 1.1097306463370142e-06, "loss": 0.1477, "step": 2698 }, { "epoch": 0.7034828788323071, "grad_norm": 0.9768505729069606, "learning_rate": 1.1079398557852392e-06, "loss": 0.127, "step": 2699 }, { "epoch": 0.7037435245821523, "grad_norm": 1.0508352594507897, "learning_rate": 1.1061500999013771e-06, "loss": 0.1387, "step": 2700 }, { "epoch": 0.7040041703319975, "grad_norm": 1.0911919696384855, "learning_rate": 1.1043613800156905e-06, "loss": 0.1517, "step": 2701 }, { "epoch": 0.7042648160818428, "grad_norm": 1.0073634706803296, "learning_rate": 1.1025736974576693e-06, "loss": 0.1282, "step": 2702 }, { "epoch": 0.704525461831688, "grad_norm": 1.0226642767809557, "learning_rate": 1.1007870535560331e-06, "loss": 0.1364, "step": 2703 }, { "epoch": 0.7047861075815333, "grad_norm": 1.0226857976022548, "learning_rate": 1.0990014496387296e-06, "loss": 0.1291, "step": 2704 }, { "epoch": 0.7050467533313785, "grad_norm": 0.977593120775085, "learning_rate": 1.0972168870329337e-06, "loss": 0.1236, "step": 2705 }, { "epoch": 0.7053073990812238, "grad_norm": 0.9986491680866159, "learning_rate": 1.0954333670650461e-06, "loss": 0.1417, "step": 2706 }, { "epoch": 0.7055680448310689, "grad_norm": 0.9828246919792702, "learning_rate": 1.0936508910606917e-06, "loss": 0.126, "step": 2707 }, { "epoch": 0.7058286905809142, "grad_norm": 1.1146268282672689, "learning_rate": 1.0918694603447221e-06, "loss": 0.1521, "step": 2708 }, { "epoch": 0.7060893363307594, "grad_norm": 1.109777986068823, "learning_rate": 1.0900890762412106e-06, "loss": 0.1506, "step": 2709 }, { "epoch": 0.7063499820806047, "grad_norm": 1.0307526698892624, "learning_rate": 1.0883097400734494e-06, "loss": 0.1398, "step": 2710 }, { "epoch": 0.7066106278304499, "grad_norm": 1.0505182570081053, "learning_rate": 1.086531453163957e-06, "loss": 0.1431, "step": 2711 }, { "epoch": 0.7068712735802952, "grad_norm": 1.063265044306466, "learning_rate": 1.0847542168344695e-06, "loss": 0.1403, "step": 2712 }, { "epoch": 0.7071319193301404, "grad_norm": 1.0466786388996447, "learning_rate": 1.0829780324059416e-06, "loss": 0.148, "step": 2713 }, { "epoch": 0.7073925650799857, "grad_norm": 1.1005607876899595, "learning_rate": 1.0812029011985472e-06, "loss": 0.1387, "step": 2714 }, { "epoch": 0.7076532108298309, "grad_norm": 1.0350178758716102, "learning_rate": 1.0794288245316772e-06, "loss": 0.1553, "step": 2715 }, { "epoch": 0.7079138565796761, "grad_norm": 1.0401800167745072, "learning_rate": 1.0776558037239385e-06, "loss": 0.1416, "step": 2716 }, { "epoch": 0.7081745023295214, "grad_norm": 1.0038873673594657, "learning_rate": 1.0758838400931524e-06, "loss": 0.1318, "step": 2717 }, { "epoch": 0.7084351480793666, "grad_norm": 1.0236285873373983, "learning_rate": 1.0741129349563567e-06, "loss": 0.146, "step": 2718 }, { "epoch": 0.7086957938292119, "grad_norm": 1.141909987459158, "learning_rate": 1.072343089629801e-06, "loss": 0.1435, "step": 2719 }, { "epoch": 0.7089564395790571, "grad_norm": 1.0600338569481333, "learning_rate": 1.0705743054289464e-06, "loss": 0.1523, "step": 2720 }, { "epoch": 0.7092170853289024, "grad_norm": 0.9497207098226399, "learning_rate": 1.0688065836684666e-06, "loss": 0.13, "step": 2721 }, { "epoch": 0.7094777310787476, "grad_norm": 1.0223358865563235, "learning_rate": 1.0670399256622455e-06, "loss": 0.1367, "step": 2722 }, { "epoch": 0.7097383768285929, "grad_norm": 1.0320668409336258, "learning_rate": 1.0652743327233744e-06, "loss": 0.1342, "step": 2723 }, { "epoch": 0.709999022578438, "grad_norm": 0.9813599999311909, "learning_rate": 1.0635098061641572e-06, "loss": 0.1328, "step": 2724 }, { "epoch": 0.7102596683282834, "grad_norm": 1.0352773909605864, "learning_rate": 1.061746347296102e-06, "loss": 0.1447, "step": 2725 }, { "epoch": 0.7105203140781285, "grad_norm": 1.0021541301163654, "learning_rate": 1.0599839574299217e-06, "loss": 0.1409, "step": 2726 }, { "epoch": 0.7107809598279738, "grad_norm": 1.097550938051, "learning_rate": 1.0582226378755391e-06, "loss": 0.1424, "step": 2727 }, { "epoch": 0.711041605577819, "grad_norm": 1.0101037058081772, "learning_rate": 1.056462389942079e-06, "loss": 0.1436, "step": 2728 }, { "epoch": 0.7113022513276643, "grad_norm": 1.0858063720809428, "learning_rate": 1.0547032149378694e-06, "loss": 0.1529, "step": 2729 }, { "epoch": 0.7115628970775095, "grad_norm": 1.0027853764148664, "learning_rate": 1.052945114170441e-06, "loss": 0.1379, "step": 2730 }, { "epoch": 0.7118235428273547, "grad_norm": 1.0446372234492034, "learning_rate": 1.0511880889465284e-06, "loss": 0.1488, "step": 2731 }, { "epoch": 0.7120841885772, "grad_norm": 1.0431014837020243, "learning_rate": 1.0494321405720627e-06, "loss": 0.1406, "step": 2732 }, { "epoch": 0.7123448343270452, "grad_norm": 0.9972873260439811, "learning_rate": 1.047677270352177e-06, "loss": 0.138, "step": 2733 }, { "epoch": 0.7126054800768905, "grad_norm": 1.1141891632867487, "learning_rate": 1.045923479591204e-06, "loss": 0.1451, "step": 2734 }, { "epoch": 0.7128661258267357, "grad_norm": 1.0447689069073876, "learning_rate": 1.0441707695926726e-06, "loss": 0.1452, "step": 2735 }, { "epoch": 0.713126771576581, "grad_norm": 0.9845450546233399, "learning_rate": 1.042419141659308e-06, "loss": 0.1297, "step": 2736 }, { "epoch": 0.7133874173264262, "grad_norm": 0.9785823986162984, "learning_rate": 1.0406685970930325e-06, "loss": 0.1255, "step": 2737 }, { "epoch": 0.7136480630762715, "grad_norm": 1.0323876300542758, "learning_rate": 1.038919137194962e-06, "loss": 0.1425, "step": 2738 }, { "epoch": 0.7139087088261167, "grad_norm": 1.0294934934587283, "learning_rate": 1.0371707632654071e-06, "loss": 0.1387, "step": 2739 }, { "epoch": 0.714169354575962, "grad_norm": 0.9722223729319975, "learning_rate": 1.0354234766038696e-06, "loss": 0.1294, "step": 2740 }, { "epoch": 0.7144300003258072, "grad_norm": 0.9929694040390598, "learning_rate": 1.0336772785090466e-06, "loss": 0.1364, "step": 2741 }, { "epoch": 0.7146906460756525, "grad_norm": 1.0146490869395086, "learning_rate": 1.0319321702788234e-06, "loss": 0.15, "step": 2742 }, { "epoch": 0.7149512918254977, "grad_norm": 1.076007503979087, "learning_rate": 1.030188153210274e-06, "loss": 0.1578, "step": 2743 }, { "epoch": 0.715211937575343, "grad_norm": 0.9988237972076198, "learning_rate": 1.028445228599665e-06, "loss": 0.1494, "step": 2744 }, { "epoch": 0.7154725833251881, "grad_norm": 1.0070993355427804, "learning_rate": 1.0267033977424485e-06, "loss": 0.1367, "step": 2745 }, { "epoch": 0.7157332290750335, "grad_norm": 1.0340945372801513, "learning_rate": 1.0249626619332642e-06, "loss": 0.1368, "step": 2746 }, { "epoch": 0.7159938748248786, "grad_norm": 1.0631510736217753, "learning_rate": 1.0232230224659385e-06, "loss": 0.1464, "step": 2747 }, { "epoch": 0.7162545205747238, "grad_norm": 1.0431811338952552, "learning_rate": 1.0214844806334817e-06, "loss": 0.1398, "step": 2748 }, { "epoch": 0.7165151663245691, "grad_norm": 0.9725297841635681, "learning_rate": 1.0197470377280888e-06, "loss": 0.1328, "step": 2749 }, { "epoch": 0.7167758120744143, "grad_norm": 1.0276033310875103, "learning_rate": 1.0180106950411397e-06, "loss": 0.1303, "step": 2750 }, { "epoch": 0.7170364578242596, "grad_norm": 0.9940666837044154, "learning_rate": 1.0162754538631945e-06, "loss": 0.1368, "step": 2751 }, { "epoch": 0.7172971035741048, "grad_norm": 1.0262007934625723, "learning_rate": 1.0145413154839945e-06, "loss": 0.14, "step": 2752 }, { "epoch": 0.7175577493239501, "grad_norm": 1.0272283718817685, "learning_rate": 1.0128082811924625e-06, "loss": 0.1381, "step": 2753 }, { "epoch": 0.7178183950737953, "grad_norm": 1.054389741113487, "learning_rate": 1.0110763522767e-06, "loss": 0.1447, "step": 2754 }, { "epoch": 0.7180790408236406, "grad_norm": 0.9955610174018157, "learning_rate": 1.0093455300239871e-06, "loss": 0.1404, "step": 2755 }, { "epoch": 0.7183396865734858, "grad_norm": 1.0761478291826063, "learning_rate": 1.0076158157207801e-06, "loss": 0.1502, "step": 2756 }, { "epoch": 0.7186003323233311, "grad_norm": 1.044539538726412, "learning_rate": 1.0058872106527157e-06, "loss": 0.1447, "step": 2757 }, { "epoch": 0.7188609780731763, "grad_norm": 1.032492448435089, "learning_rate": 1.0041597161046025e-06, "loss": 0.1399, "step": 2758 }, { "epoch": 0.7191216238230216, "grad_norm": 1.0401990225285596, "learning_rate": 1.0024333333604225e-06, "loss": 0.1413, "step": 2759 }, { "epoch": 0.7193822695728668, "grad_norm": 1.0894418387727531, "learning_rate": 1.0007080637033359e-06, "loss": 0.1333, "step": 2760 }, { "epoch": 0.7196429153227121, "grad_norm": 1.0461031124505706, "learning_rate": 9.989839084156725e-07, "loss": 0.1426, "step": 2761 }, { "epoch": 0.7199035610725573, "grad_norm": 1.0001843636415415, "learning_rate": 9.972608687789346e-07, "loss": 0.1345, "step": 2762 }, { "epoch": 0.7201642068224025, "grad_norm": 1.0055212784220258, "learning_rate": 9.95538946073794e-07, "loss": 0.1348, "step": 2763 }, { "epoch": 0.7204248525722478, "grad_norm": 1.0539264048714472, "learning_rate": 9.938181415800966e-07, "loss": 0.1404, "step": 2764 }, { "epoch": 0.7206854983220929, "grad_norm": 1.111512197660239, "learning_rate": 9.92098456576851e-07, "loss": 0.1571, "step": 2765 }, { "epoch": 0.7209461440719382, "grad_norm": 0.996452249391587, "learning_rate": 9.903798923422369e-07, "loss": 0.1332, "step": 2766 }, { "epoch": 0.7212067898217834, "grad_norm": 0.9955665892237566, "learning_rate": 9.886624501536028e-07, "loss": 0.1326, "step": 2767 }, { "epoch": 0.7214674355716287, "grad_norm": 1.0972779041769753, "learning_rate": 9.869461312874603e-07, "loss": 0.1542, "step": 2768 }, { "epoch": 0.7217280813214739, "grad_norm": 0.9998227446571175, "learning_rate": 9.852309370194873e-07, "loss": 0.1247, "step": 2769 }, { "epoch": 0.7219887270713192, "grad_norm": 0.9251544175899823, "learning_rate": 9.835168686245252e-07, "loss": 0.1231, "step": 2770 }, { "epoch": 0.7222493728211644, "grad_norm": 1.0076461013824176, "learning_rate": 9.818039273765794e-07, "loss": 0.1387, "step": 2771 }, { "epoch": 0.7225100185710097, "grad_norm": 1.024976368140024, "learning_rate": 9.800921145488171e-07, "loss": 0.143, "step": 2772 }, { "epoch": 0.7227706643208549, "grad_norm": 1.0436811052950414, "learning_rate": 9.783814314135656e-07, "loss": 0.1429, "step": 2773 }, { "epoch": 0.7230313100707002, "grad_norm": 1.0445717760596118, "learning_rate": 9.766718792423159e-07, "loss": 0.1394, "step": 2774 }, { "epoch": 0.7232919558205454, "grad_norm": 1.0497329656407732, "learning_rate": 9.749634593057159e-07, "loss": 0.1454, "step": 2775 }, { "epoch": 0.7235526015703907, "grad_norm": 1.1063949464993423, "learning_rate": 9.732561728735698e-07, "loss": 0.1326, "step": 2776 }, { "epoch": 0.7238132473202359, "grad_norm": 1.0213665428728442, "learning_rate": 9.715500212148444e-07, "loss": 0.1374, "step": 2777 }, { "epoch": 0.7240738930700811, "grad_norm": 1.047264219898337, "learning_rate": 9.698450055976596e-07, "loss": 0.1453, "step": 2778 }, { "epoch": 0.7243345388199264, "grad_norm": 1.0339248631921798, "learning_rate": 9.681411272892913e-07, "loss": 0.1497, "step": 2779 }, { "epoch": 0.7245951845697716, "grad_norm": 1.0616164256044434, "learning_rate": 9.664383875561726e-07, "loss": 0.1346, "step": 2780 }, { "epoch": 0.7248558303196169, "grad_norm": 1.0628312102554063, "learning_rate": 9.647367876638858e-07, "loss": 0.1588, "step": 2781 }, { "epoch": 0.7251164760694621, "grad_norm": 0.9314522060582116, "learning_rate": 9.630363288771689e-07, "loss": 0.1295, "step": 2782 }, { "epoch": 0.7253771218193074, "grad_norm": 1.0246353937139718, "learning_rate": 9.613370124599124e-07, "loss": 0.1405, "step": 2783 }, { "epoch": 0.7256377675691525, "grad_norm": 1.0433919470721438, "learning_rate": 9.596388396751567e-07, "loss": 0.1464, "step": 2784 }, { "epoch": 0.7258984133189978, "grad_norm": 0.9573834037234705, "learning_rate": 9.579418117850915e-07, "loss": 0.1295, "step": 2785 }, { "epoch": 0.726159059068843, "grad_norm": 1.009355239491816, "learning_rate": 9.562459300510562e-07, "loss": 0.1319, "step": 2786 }, { "epoch": 0.7264197048186883, "grad_norm": 1.0537562406222964, "learning_rate": 9.545511957335388e-07, "loss": 0.1433, "step": 2787 }, { "epoch": 0.7266803505685335, "grad_norm": 0.9783106506745689, "learning_rate": 9.528576100921736e-07, "loss": 0.1332, "step": 2788 }, { "epoch": 0.7269409963183788, "grad_norm": 0.9917246368837797, "learning_rate": 9.511651743857406e-07, "loss": 0.1389, "step": 2789 }, { "epoch": 0.727201642068224, "grad_norm": 0.9757600255037729, "learning_rate": 9.494738898721681e-07, "loss": 0.1332, "step": 2790 }, { "epoch": 0.7274622878180693, "grad_norm": 1.0708280505110856, "learning_rate": 9.477837578085261e-07, "loss": 0.1355, "step": 2791 }, { "epoch": 0.7277229335679145, "grad_norm": 1.040449193594735, "learning_rate": 9.460947794510264e-07, "loss": 0.148, "step": 2792 }, { "epoch": 0.7279835793177597, "grad_norm": 0.9500474904954316, "learning_rate": 9.444069560550279e-07, "loss": 0.1327, "step": 2793 }, { "epoch": 0.728244225067605, "grad_norm": 1.0543171689012683, "learning_rate": 9.427202888750278e-07, "loss": 0.1359, "step": 2794 }, { "epoch": 0.7285048708174502, "grad_norm": 1.0250904347116594, "learning_rate": 9.410347791646648e-07, "loss": 0.1412, "step": 2795 }, { "epoch": 0.7287655165672955, "grad_norm": 1.0672785025708242, "learning_rate": 9.393504281767163e-07, "loss": 0.1412, "step": 2796 }, { "epoch": 0.7290261623171407, "grad_norm": 1.0332625719993092, "learning_rate": 9.376672371631021e-07, "loss": 0.1406, "step": 2797 }, { "epoch": 0.729286808066986, "grad_norm": 1.0644501375575817, "learning_rate": 9.359852073748746e-07, "loss": 0.1377, "step": 2798 }, { "epoch": 0.7295474538168312, "grad_norm": 1.0029642063796778, "learning_rate": 9.343043400622256e-07, "loss": 0.1379, "step": 2799 }, { "epoch": 0.7298080995666765, "grad_norm": 0.9717784397020088, "learning_rate": 9.326246364744845e-07, "loss": 0.1227, "step": 2800 }, { "epoch": 0.7300687453165217, "grad_norm": 1.0775849450128914, "learning_rate": 9.309460978601137e-07, "loss": 0.146, "step": 2801 }, { "epoch": 0.730329391066367, "grad_norm": 1.0145844336301848, "learning_rate": 9.292687254667096e-07, "loss": 0.1295, "step": 2802 }, { "epoch": 0.7305900368162122, "grad_norm": 1.1171725215122095, "learning_rate": 9.27592520541003e-07, "loss": 0.1513, "step": 2803 }, { "epoch": 0.7308506825660575, "grad_norm": 1.1361983681801135, "learning_rate": 9.259174843288557e-07, "loss": 0.1473, "step": 2804 }, { "epoch": 0.7311113283159026, "grad_norm": 1.044749088644531, "learning_rate": 9.242436180752623e-07, "loss": 0.1441, "step": 2805 }, { "epoch": 0.7313719740657479, "grad_norm": 1.0815354732407847, "learning_rate": 9.225709230243455e-07, "loss": 0.1417, "step": 2806 }, { "epoch": 0.7316326198155931, "grad_norm": 0.9508000656587523, "learning_rate": 9.208994004193605e-07, "loss": 0.1242, "step": 2807 }, { "epoch": 0.7318932655654383, "grad_norm": 1.1000060635012703, "learning_rate": 9.192290515026903e-07, "loss": 0.1545, "step": 2808 }, { "epoch": 0.7321539113152836, "grad_norm": 1.0086258322961672, "learning_rate": 9.175598775158417e-07, "loss": 0.1358, "step": 2809 }, { "epoch": 0.7324145570651288, "grad_norm": 1.0357471633069821, "learning_rate": 9.158918796994543e-07, "loss": 0.1462, "step": 2810 }, { "epoch": 0.7326752028149741, "grad_norm": 1.0231522032592377, "learning_rate": 9.142250592932891e-07, "loss": 0.1402, "step": 2811 }, { "epoch": 0.7329358485648193, "grad_norm": 1.0650645024591552, "learning_rate": 9.125594175362326e-07, "loss": 0.1425, "step": 2812 }, { "epoch": 0.7331964943146646, "grad_norm": 1.0295589709300526, "learning_rate": 9.108949556662991e-07, "loss": 0.1547, "step": 2813 }, { "epoch": 0.7334571400645098, "grad_norm": 1.0584597621597986, "learning_rate": 9.092316749206198e-07, "loss": 0.1437, "step": 2814 }, { "epoch": 0.7337177858143551, "grad_norm": 1.023116549723628, "learning_rate": 9.075695765354511e-07, "loss": 0.1386, "step": 2815 }, { "epoch": 0.7339784315642003, "grad_norm": 1.0051189566980858, "learning_rate": 9.059086617461723e-07, "loss": 0.138, "step": 2816 }, { "epoch": 0.7342390773140456, "grad_norm": 1.0727745559639634, "learning_rate": 9.042489317872807e-07, "loss": 0.1477, "step": 2817 }, { "epoch": 0.7344997230638908, "grad_norm": 0.9976750946672831, "learning_rate": 9.025903878923934e-07, "loss": 0.1492, "step": 2818 }, { "epoch": 0.7347603688137361, "grad_norm": 0.9787598133324921, "learning_rate": 9.009330312942455e-07, "loss": 0.129, "step": 2819 }, { "epoch": 0.7350210145635813, "grad_norm": 1.0207841760269447, "learning_rate": 8.992768632246907e-07, "loss": 0.1394, "step": 2820 }, { "epoch": 0.7352816603134266, "grad_norm": 0.9896474007231164, "learning_rate": 8.976218849146987e-07, "loss": 0.1383, "step": 2821 }, { "epoch": 0.7355423060632718, "grad_norm": 0.9899015996950713, "learning_rate": 8.959680975943541e-07, "loss": 0.1419, "step": 2822 }, { "epoch": 0.735802951813117, "grad_norm": 1.0315029488006593, "learning_rate": 8.943155024928588e-07, "loss": 0.1498, "step": 2823 }, { "epoch": 0.7360635975629622, "grad_norm": 0.9593421064068879, "learning_rate": 8.926641008385268e-07, "loss": 0.1177, "step": 2824 }, { "epoch": 0.7363242433128074, "grad_norm": 0.9612961897464944, "learning_rate": 8.910138938587828e-07, "loss": 0.1298, "step": 2825 }, { "epoch": 0.7365848890626527, "grad_norm": 1.0353987053298024, "learning_rate": 8.893648827801685e-07, "loss": 0.136, "step": 2826 }, { "epoch": 0.7368455348124979, "grad_norm": 1.1033464030188456, "learning_rate": 8.877170688283332e-07, "loss": 0.1438, "step": 2827 }, { "epoch": 0.7371061805623432, "grad_norm": 1.0363204923892146, "learning_rate": 8.860704532280373e-07, "loss": 0.1341, "step": 2828 }, { "epoch": 0.7373668263121884, "grad_norm": 1.0770869383270643, "learning_rate": 8.844250372031504e-07, "loss": 0.1482, "step": 2829 }, { "epoch": 0.7376274720620337, "grad_norm": 1.037303020986428, "learning_rate": 8.827808219766513e-07, "loss": 0.1348, "step": 2830 }, { "epoch": 0.7378881178118789, "grad_norm": 1.094787618662015, "learning_rate": 8.811378087706251e-07, "loss": 0.152, "step": 2831 }, { "epoch": 0.7381487635617242, "grad_norm": 1.0352699380401178, "learning_rate": 8.794959988062632e-07, "loss": 0.1396, "step": 2832 }, { "epoch": 0.7384094093115694, "grad_norm": 0.9923984595155899, "learning_rate": 8.778553933038655e-07, "loss": 0.1298, "step": 2833 }, { "epoch": 0.7386700550614147, "grad_norm": 1.0743805638936086, "learning_rate": 8.762159934828337e-07, "loss": 0.1397, "step": 2834 }, { "epoch": 0.7389307008112599, "grad_norm": 1.1344730980567537, "learning_rate": 8.745778005616745e-07, "loss": 0.155, "step": 2835 }, { "epoch": 0.7391913465611052, "grad_norm": 1.0916680330034698, "learning_rate": 8.729408157579968e-07, "loss": 0.1448, "step": 2836 }, { "epoch": 0.7394519923109504, "grad_norm": 1.0077737375232028, "learning_rate": 8.713050402885129e-07, "loss": 0.1364, "step": 2837 }, { "epoch": 0.7397126380607956, "grad_norm": 1.0104496488883805, "learning_rate": 8.696704753690344e-07, "loss": 0.1343, "step": 2838 }, { "epoch": 0.7399732838106409, "grad_norm": 1.019131573671018, "learning_rate": 8.680371222144757e-07, "loss": 0.1354, "step": 2839 }, { "epoch": 0.7402339295604861, "grad_norm": 1.0104932777972166, "learning_rate": 8.664049820388492e-07, "loss": 0.1411, "step": 2840 }, { "epoch": 0.7404945753103314, "grad_norm": 1.1244234521182992, "learning_rate": 8.647740560552631e-07, "loss": 0.172, "step": 2841 }, { "epoch": 0.7407552210601765, "grad_norm": 0.9727573838486306, "learning_rate": 8.631443454759283e-07, "loss": 0.131, "step": 2842 }, { "epoch": 0.7410158668100219, "grad_norm": 0.9938753548747574, "learning_rate": 8.615158515121486e-07, "loss": 0.1333, "step": 2843 }, { "epoch": 0.741276512559867, "grad_norm": 1.0083238790403926, "learning_rate": 8.598885753743247e-07, "loss": 0.1324, "step": 2844 }, { "epoch": 0.7415371583097123, "grad_norm": 1.1282557061592409, "learning_rate": 8.582625182719515e-07, "loss": 0.1541, "step": 2845 }, { "epoch": 0.7417978040595575, "grad_norm": 0.9903353876005387, "learning_rate": 8.566376814136201e-07, "loss": 0.1274, "step": 2846 }, { "epoch": 0.7420584498094028, "grad_norm": 0.9627301471933991, "learning_rate": 8.550140660070114e-07, "loss": 0.1248, "step": 2847 }, { "epoch": 0.742319095559248, "grad_norm": 1.014022887757275, "learning_rate": 8.533916732588996e-07, "loss": 0.1359, "step": 2848 }, { "epoch": 0.7425797413090933, "grad_norm": 0.9891181148944014, "learning_rate": 8.517705043751517e-07, "loss": 0.1349, "step": 2849 }, { "epoch": 0.7428403870589385, "grad_norm": 0.9888231981730512, "learning_rate": 8.501505605607236e-07, "loss": 0.1381, "step": 2850 }, { "epoch": 0.7431010328087838, "grad_norm": 1.0035917239130554, "learning_rate": 8.485318430196604e-07, "loss": 0.1348, "step": 2851 }, { "epoch": 0.743361678558629, "grad_norm": 1.0372450293156208, "learning_rate": 8.469143529550968e-07, "loss": 0.1483, "step": 2852 }, { "epoch": 0.7436223243084742, "grad_norm": 1.0653753375739816, "learning_rate": 8.452980915692544e-07, "loss": 0.1444, "step": 2853 }, { "epoch": 0.7438829700583195, "grad_norm": 1.0158424846260306, "learning_rate": 8.436830600634416e-07, "loss": 0.1397, "step": 2854 }, { "epoch": 0.7441436158081647, "grad_norm": 1.0138716909902203, "learning_rate": 8.420692596380523e-07, "loss": 0.1344, "step": 2855 }, { "epoch": 0.74440426155801, "grad_norm": 1.0746752661990862, "learning_rate": 8.404566914925672e-07, "loss": 0.1431, "step": 2856 }, { "epoch": 0.7446649073078552, "grad_norm": 0.9868229898327919, "learning_rate": 8.388453568255503e-07, "loss": 0.1335, "step": 2857 }, { "epoch": 0.7449255530577005, "grad_norm": 1.0246957081476749, "learning_rate": 8.372352568346453e-07, "loss": 0.1374, "step": 2858 }, { "epoch": 0.7451861988075457, "grad_norm": 1.0531084696412414, "learning_rate": 8.356263927165845e-07, "loss": 0.1482, "step": 2859 }, { "epoch": 0.745446844557391, "grad_norm": 1.0494959789373377, "learning_rate": 8.340187656671767e-07, "loss": 0.1346, "step": 2860 }, { "epoch": 0.7457074903072362, "grad_norm": 1.0498912513937098, "learning_rate": 8.324123768813133e-07, "loss": 0.1445, "step": 2861 }, { "epoch": 0.7459681360570815, "grad_norm": 1.0751838196464856, "learning_rate": 8.308072275529652e-07, "loss": 0.1324, "step": 2862 }, { "epoch": 0.7462287818069266, "grad_norm": 1.0417354769868794, "learning_rate": 8.29203318875181e-07, "loss": 0.1421, "step": 2863 }, { "epoch": 0.746489427556772, "grad_norm": 1.0198344383250402, "learning_rate": 8.27600652040089e-07, "loss": 0.1441, "step": 2864 }, { "epoch": 0.7467500733066171, "grad_norm": 1.0756751795695347, "learning_rate": 8.259992282388917e-07, "loss": 0.1458, "step": 2865 }, { "epoch": 0.7470107190564624, "grad_norm": 0.9952908221184781, "learning_rate": 8.243990486618717e-07, "loss": 0.1407, "step": 2866 }, { "epoch": 0.7472713648063076, "grad_norm": 1.033452208747855, "learning_rate": 8.228001144983833e-07, "loss": 0.1478, "step": 2867 }, { "epoch": 0.7475320105561528, "grad_norm": 0.9952234687195376, "learning_rate": 8.212024269368565e-07, "loss": 0.1354, "step": 2868 }, { "epoch": 0.7477926563059981, "grad_norm": 1.0137112734699973, "learning_rate": 8.196059871647949e-07, "loss": 0.1414, "step": 2869 }, { "epoch": 0.7480533020558433, "grad_norm": 1.029780511033022, "learning_rate": 8.180107963687741e-07, "loss": 0.1513, "step": 2870 }, { "epoch": 0.7483139478056886, "grad_norm": 1.0217627356447518, "learning_rate": 8.164168557344413e-07, "loss": 0.1496, "step": 2871 }, { "epoch": 0.7485745935555338, "grad_norm": 1.0726417492339198, "learning_rate": 8.148241664465157e-07, "loss": 0.1435, "step": 2872 }, { "epoch": 0.7488352393053791, "grad_norm": 0.9594985267316217, "learning_rate": 8.132327296887863e-07, "loss": 0.1265, "step": 2873 }, { "epoch": 0.7490958850552243, "grad_norm": 0.9805894393135942, "learning_rate": 8.116425466441077e-07, "loss": 0.1353, "step": 2874 }, { "epoch": 0.7493565308050696, "grad_norm": 1.027303037483049, "learning_rate": 8.100536184944077e-07, "loss": 0.1359, "step": 2875 }, { "epoch": 0.7496171765549148, "grad_norm": 0.9973573694287466, "learning_rate": 8.084659464206787e-07, "loss": 0.1255, "step": 2876 }, { "epoch": 0.7498778223047601, "grad_norm": 1.0534161336500276, "learning_rate": 8.068795316029796e-07, "loss": 0.1507, "step": 2877 }, { "epoch": 0.7501384680546053, "grad_norm": 0.9848895322957023, "learning_rate": 8.052943752204339e-07, "loss": 0.1337, "step": 2878 }, { "epoch": 0.7503991138044506, "grad_norm": 1.0555640411073959, "learning_rate": 8.037104784512343e-07, "loss": 0.1436, "step": 2879 }, { "epoch": 0.7506597595542958, "grad_norm": 0.9887942931984288, "learning_rate": 8.021278424726308e-07, "loss": 0.1274, "step": 2880 }, { "epoch": 0.7509204053041411, "grad_norm": 1.0599805922151835, "learning_rate": 8.005464684609399e-07, "loss": 0.1473, "step": 2881 }, { "epoch": 0.7511810510539862, "grad_norm": 0.9506991758132086, "learning_rate": 7.989663575915407e-07, "loss": 0.1267, "step": 2882 }, { "epoch": 0.7514416968038314, "grad_norm": 1.100366057354153, "learning_rate": 7.97387511038872e-07, "loss": 0.1431, "step": 2883 }, { "epoch": 0.7517023425536767, "grad_norm": 1.0610850893353998, "learning_rate": 7.958099299764332e-07, "loss": 0.1427, "step": 2884 }, { "epoch": 0.7519629883035219, "grad_norm": 1.0638658369248024, "learning_rate": 7.942336155767833e-07, "loss": 0.1312, "step": 2885 }, { "epoch": 0.7522236340533672, "grad_norm": 1.0037147409294132, "learning_rate": 7.926585690115396e-07, "loss": 0.1388, "step": 2886 }, { "epoch": 0.7524842798032124, "grad_norm": 1.134227647216052, "learning_rate": 7.910847914513772e-07, "loss": 0.1484, "step": 2887 }, { "epoch": 0.7527449255530577, "grad_norm": 1.0625077133260257, "learning_rate": 7.895122840660272e-07, "loss": 0.1422, "step": 2888 }, { "epoch": 0.7530055713029029, "grad_norm": 0.9926391744359631, "learning_rate": 7.879410480242791e-07, "loss": 0.1288, "step": 2889 }, { "epoch": 0.7532662170527482, "grad_norm": 0.9648096304442743, "learning_rate": 7.863710844939759e-07, "loss": 0.1261, "step": 2890 }, { "epoch": 0.7535268628025934, "grad_norm": 1.0701354154263794, "learning_rate": 7.848023946420119e-07, "loss": 0.1453, "step": 2891 }, { "epoch": 0.7537875085524387, "grad_norm": 1.0644864282985782, "learning_rate": 7.8323497963434e-07, "loss": 0.1455, "step": 2892 }, { "epoch": 0.7540481543022839, "grad_norm": 1.0256461708650102, "learning_rate": 7.816688406359624e-07, "loss": 0.1389, "step": 2893 }, { "epoch": 0.7543088000521292, "grad_norm": 1.0068758041700208, "learning_rate": 7.801039788109332e-07, "loss": 0.142, "step": 2894 }, { "epoch": 0.7545694458019744, "grad_norm": 1.005007921337603, "learning_rate": 7.78540395322358e-07, "loss": 0.1289, "step": 2895 }, { "epoch": 0.7548300915518197, "grad_norm": 0.9794317431830796, "learning_rate": 7.769780913323916e-07, "loss": 0.1329, "step": 2896 }, { "epoch": 0.7550907373016649, "grad_norm": 0.9405179581078938, "learning_rate": 7.754170680022369e-07, "loss": 0.1283, "step": 2897 }, { "epoch": 0.7553513830515101, "grad_norm": 0.9915396965501253, "learning_rate": 7.738573264921481e-07, "loss": 0.1342, "step": 2898 }, { "epoch": 0.7556120288013554, "grad_norm": 1.0126877835897181, "learning_rate": 7.722988679614238e-07, "loss": 0.1391, "step": 2899 }, { "epoch": 0.7558726745512006, "grad_norm": 1.0624280570095277, "learning_rate": 7.707416935684092e-07, "loss": 0.1475, "step": 2900 }, { "epoch": 0.7561333203010459, "grad_norm": 1.0006925276037495, "learning_rate": 7.691858044704964e-07, "loss": 0.1356, "step": 2901 }, { "epoch": 0.756393966050891, "grad_norm": 1.0122917429469882, "learning_rate": 7.676312018241211e-07, "loss": 0.1396, "step": 2902 }, { "epoch": 0.7566546118007363, "grad_norm": 1.0132415203646563, "learning_rate": 7.660778867847632e-07, "loss": 0.1297, "step": 2903 }, { "epoch": 0.7569152575505815, "grad_norm": 0.9990351124762333, "learning_rate": 7.645258605069444e-07, "loss": 0.1338, "step": 2904 }, { "epoch": 0.7571759033004268, "grad_norm": 1.0598691370835613, "learning_rate": 7.62975124144232e-07, "loss": 0.1499, "step": 2905 }, { "epoch": 0.757436549050272, "grad_norm": 1.0238291523296745, "learning_rate": 7.614256788492316e-07, "loss": 0.1391, "step": 2906 }, { "epoch": 0.7576971948001173, "grad_norm": 1.0164751127895597, "learning_rate": 7.598775257735877e-07, "loss": 0.1338, "step": 2907 }, { "epoch": 0.7579578405499625, "grad_norm": 1.0750444829260257, "learning_rate": 7.583306660679888e-07, "loss": 0.1453, "step": 2908 }, { "epoch": 0.7582184862998078, "grad_norm": 1.074737140485268, "learning_rate": 7.567851008821586e-07, "loss": 0.147, "step": 2909 }, { "epoch": 0.758479132049653, "grad_norm": 1.0557574072445786, "learning_rate": 7.552408313648602e-07, "loss": 0.1426, "step": 2910 }, { "epoch": 0.7587397777994983, "grad_norm": 1.0700077654373776, "learning_rate": 7.536978586638921e-07, "loss": 0.1375, "step": 2911 }, { "epoch": 0.7590004235493435, "grad_norm": 1.1105435261512686, "learning_rate": 7.521561839260927e-07, "loss": 0.1493, "step": 2912 }, { "epoch": 0.7592610692991888, "grad_norm": 0.9917949824161661, "learning_rate": 7.506158082973305e-07, "loss": 0.1268, "step": 2913 }, { "epoch": 0.759521715049034, "grad_norm": 1.0529744152907878, "learning_rate": 7.49076732922511e-07, "loss": 0.138, "step": 2914 }, { "epoch": 0.7597823607988792, "grad_norm": 1.030126698989935, "learning_rate": 7.475389589455748e-07, "loss": 0.1367, "step": 2915 }, { "epoch": 0.7600430065487245, "grad_norm": 1.1055057482115709, "learning_rate": 7.460024875094926e-07, "loss": 0.1608, "step": 2916 }, { "epoch": 0.7603036522985697, "grad_norm": 1.0321504313386536, "learning_rate": 7.444673197562682e-07, "loss": 0.1399, "step": 2917 }, { "epoch": 0.760564298048415, "grad_norm": 1.0528383579335765, "learning_rate": 7.429334568269364e-07, "loss": 0.1462, "step": 2918 }, { "epoch": 0.7608249437982602, "grad_norm": 0.997106522499361, "learning_rate": 7.414008998615619e-07, "loss": 0.1413, "step": 2919 }, { "epoch": 0.7610855895481055, "grad_norm": 0.9786322857639989, "learning_rate": 7.398696499992386e-07, "loss": 0.1336, "step": 2920 }, { "epoch": 0.7613462352979506, "grad_norm": 1.0657780739351825, "learning_rate": 7.383397083780882e-07, "loss": 0.1428, "step": 2921 }, { "epoch": 0.761606881047796, "grad_norm": 1.0288211114361725, "learning_rate": 7.368110761352631e-07, "loss": 0.1306, "step": 2922 }, { "epoch": 0.7618675267976411, "grad_norm": 1.0753662704608904, "learning_rate": 7.3528375440694e-07, "loss": 0.1463, "step": 2923 }, { "epoch": 0.7621281725474864, "grad_norm": 1.031619304170033, "learning_rate": 7.337577443283192e-07, "loss": 0.1488, "step": 2924 }, { "epoch": 0.7623888182973316, "grad_norm": 1.0897491079076032, "learning_rate": 7.322330470336314e-07, "loss": 0.1495, "step": 2925 }, { "epoch": 0.7626494640471769, "grad_norm": 1.0108358931251555, "learning_rate": 7.307096636561281e-07, "loss": 0.1335, "step": 2926 }, { "epoch": 0.7629101097970221, "grad_norm": 1.0482690933416767, "learning_rate": 7.291875953280842e-07, "loss": 0.1353, "step": 2927 }, { "epoch": 0.7631707555468674, "grad_norm": 1.032647100681464, "learning_rate": 7.276668431808004e-07, "loss": 0.1416, "step": 2928 }, { "epoch": 0.7634314012967126, "grad_norm": 0.9695816665555137, "learning_rate": 7.261474083445941e-07, "loss": 0.1351, "step": 2929 }, { "epoch": 0.7636920470465578, "grad_norm": 1.003755120697761, "learning_rate": 7.246292919488068e-07, "loss": 0.1398, "step": 2930 }, { "epoch": 0.7639526927964031, "grad_norm": 1.0887335695400413, "learning_rate": 7.231124951218002e-07, "loss": 0.1519, "step": 2931 }, { "epoch": 0.7642133385462483, "grad_norm": 1.0122171079600635, "learning_rate": 7.215970189909546e-07, "loss": 0.1298, "step": 2932 }, { "epoch": 0.7644739842960936, "grad_norm": 0.9638674382533268, "learning_rate": 7.20082864682668e-07, "loss": 0.1313, "step": 2933 }, { "epoch": 0.7647346300459388, "grad_norm": 1.2263859758448297, "learning_rate": 7.185700333223564e-07, "loss": 0.161, "step": 2934 }, { "epoch": 0.7649952757957841, "grad_norm": 1.082735926212554, "learning_rate": 7.170585260344528e-07, "loss": 0.1359, "step": 2935 }, { "epoch": 0.7652559215456293, "grad_norm": 1.0029308678960605, "learning_rate": 7.155483439424058e-07, "loss": 0.1419, "step": 2936 }, { "epoch": 0.7655165672954746, "grad_norm": 1.0771768847571228, "learning_rate": 7.140394881686785e-07, "loss": 0.1484, "step": 2937 }, { "epoch": 0.7657772130453198, "grad_norm": 0.9515488936767031, "learning_rate": 7.1253195983475e-07, "loss": 0.1309, "step": 2938 }, { "epoch": 0.7660378587951651, "grad_norm": 1.0031350866864501, "learning_rate": 7.110257600611117e-07, "loss": 0.1432, "step": 2939 }, { "epoch": 0.7662985045450103, "grad_norm": 0.9796883206348989, "learning_rate": 7.09520889967265e-07, "loss": 0.1326, "step": 2940 }, { "epoch": 0.7665591502948556, "grad_norm": 1.0887269307041498, "learning_rate": 7.08017350671728e-07, "loss": 0.1479, "step": 2941 }, { "epoch": 0.7668197960447007, "grad_norm": 1.1035545007049898, "learning_rate": 7.065151432920258e-07, "loss": 0.1469, "step": 2942 }, { "epoch": 0.767080441794546, "grad_norm": 1.0397523050277833, "learning_rate": 7.050142689446951e-07, "loss": 0.1377, "step": 2943 }, { "epoch": 0.7673410875443912, "grad_norm": 1.0183886901234156, "learning_rate": 7.035147287452803e-07, "loss": 0.135, "step": 2944 }, { "epoch": 0.7676017332942364, "grad_norm": 0.985194072253848, "learning_rate": 7.020165238083382e-07, "loss": 0.1311, "step": 2945 }, { "epoch": 0.7678623790440817, "grad_norm": 0.9754627183757976, "learning_rate": 7.005196552474275e-07, "loss": 0.1272, "step": 2946 }, { "epoch": 0.7681230247939269, "grad_norm": 1.0329419853404136, "learning_rate": 6.990241241751167e-07, "loss": 0.139, "step": 2947 }, { "epoch": 0.7683836705437722, "grad_norm": 1.084242942561076, "learning_rate": 6.975299317029813e-07, "loss": 0.1342, "step": 2948 }, { "epoch": 0.7686443162936174, "grad_norm": 1.040587423954025, "learning_rate": 6.960370789415999e-07, "loss": 0.1437, "step": 2949 }, { "epoch": 0.7689049620434627, "grad_norm": 1.0475181175491146, "learning_rate": 6.945455670005558e-07, "loss": 0.1487, "step": 2950 }, { "epoch": 0.7691656077933079, "grad_norm": 0.9981145710570071, "learning_rate": 6.930553969884354e-07, "loss": 0.1391, "step": 2951 }, { "epoch": 0.7694262535431532, "grad_norm": 1.0326858374394945, "learning_rate": 6.915665700128291e-07, "loss": 0.1342, "step": 2952 }, { "epoch": 0.7696868992929984, "grad_norm": 1.0922783316946685, "learning_rate": 6.900790871803273e-07, "loss": 0.1503, "step": 2953 }, { "epoch": 0.7699475450428437, "grad_norm": 0.977405758826059, "learning_rate": 6.885929495965216e-07, "loss": 0.135, "step": 2954 }, { "epoch": 0.7702081907926889, "grad_norm": 0.9805544138234038, "learning_rate": 6.871081583660058e-07, "loss": 0.128, "step": 2955 }, { "epoch": 0.7704688365425342, "grad_norm": 1.023801124520644, "learning_rate": 6.856247145923708e-07, "loss": 0.1384, "step": 2956 }, { "epoch": 0.7707294822923794, "grad_norm": 0.9883730493320755, "learning_rate": 6.841426193782066e-07, "loss": 0.1363, "step": 2957 }, { "epoch": 0.7709901280422247, "grad_norm": 1.0548366267257485, "learning_rate": 6.82661873825101e-07, "loss": 0.1312, "step": 2958 }, { "epoch": 0.7712507737920699, "grad_norm": 1.0201488610318101, "learning_rate": 6.811824790336382e-07, "loss": 0.1372, "step": 2959 }, { "epoch": 0.771511419541915, "grad_norm": 1.052402404708872, "learning_rate": 6.797044361033986e-07, "loss": 0.1406, "step": 2960 }, { "epoch": 0.7717720652917603, "grad_norm": 1.0146122678241551, "learning_rate": 6.7822774613296e-07, "loss": 0.1346, "step": 2961 }, { "epoch": 0.7720327110416055, "grad_norm": 0.9720379919144682, "learning_rate": 6.767524102198903e-07, "loss": 0.1347, "step": 2962 }, { "epoch": 0.7722933567914508, "grad_norm": 1.0333403747081185, "learning_rate": 6.752784294607531e-07, "loss": 0.1215, "step": 2963 }, { "epoch": 0.772554002541296, "grad_norm": 1.0804681087916708, "learning_rate": 6.738058049511067e-07, "loss": 0.1526, "step": 2964 }, { "epoch": 0.7728146482911413, "grad_norm": 1.021680083595052, "learning_rate": 6.72334537785499e-07, "loss": 0.1367, "step": 2965 }, { "epoch": 0.7730752940409865, "grad_norm": 1.0045928896914442, "learning_rate": 6.708646290574694e-07, "loss": 0.139, "step": 2966 }, { "epoch": 0.7733359397908318, "grad_norm": 1.082062533968339, "learning_rate": 6.693960798595478e-07, "loss": 0.1566, "step": 2967 }, { "epoch": 0.773596585540677, "grad_norm": 1.005301467676196, "learning_rate": 6.679288912832538e-07, "loss": 0.1279, "step": 2968 }, { "epoch": 0.7738572312905223, "grad_norm": 1.029432548865933, "learning_rate": 6.664630644190953e-07, "loss": 0.1416, "step": 2969 }, { "epoch": 0.7741178770403675, "grad_norm": 1.0807707104767301, "learning_rate": 6.649986003565678e-07, "loss": 0.1404, "step": 2970 }, { "epoch": 0.7743785227902128, "grad_norm": 0.9831076268637929, "learning_rate": 6.635355001841559e-07, "loss": 0.1327, "step": 2971 }, { "epoch": 0.774639168540058, "grad_norm": 1.020259822500214, "learning_rate": 6.620737649893291e-07, "loss": 0.1429, "step": 2972 }, { "epoch": 0.7748998142899033, "grad_norm": 1.041578281839584, "learning_rate": 6.606133958585401e-07, "loss": 0.1428, "step": 2973 }, { "epoch": 0.7751604600397485, "grad_norm": 1.0460251281168351, "learning_rate": 6.591543938772302e-07, "loss": 0.1421, "step": 2974 }, { "epoch": 0.7754211057895937, "grad_norm": 1.0091133549674334, "learning_rate": 6.576967601298223e-07, "loss": 0.1387, "step": 2975 }, { "epoch": 0.775681751539439, "grad_norm": 1.0244661202864513, "learning_rate": 6.562404956997229e-07, "loss": 0.1407, "step": 2976 }, { "epoch": 0.7759423972892842, "grad_norm": 1.0304601699523699, "learning_rate": 6.547856016693193e-07, "loss": 0.1389, "step": 2977 }, { "epoch": 0.7762030430391295, "grad_norm": 1.0035760713452446, "learning_rate": 6.533320791199848e-07, "loss": 0.1288, "step": 2978 }, { "epoch": 0.7764636887889746, "grad_norm": 0.9569878463709806, "learning_rate": 6.518799291320671e-07, "loss": 0.1203, "step": 2979 }, { "epoch": 0.77672433453882, "grad_norm": 1.0114556979519682, "learning_rate": 6.504291527848972e-07, "loss": 0.1325, "step": 2980 }, { "epoch": 0.7769849802886651, "grad_norm": 1.1091274745881525, "learning_rate": 6.489797511567856e-07, "loss": 0.1604, "step": 2981 }, { "epoch": 0.7772456260385104, "grad_norm": 1.0306226391292717, "learning_rate": 6.475317253250196e-07, "loss": 0.1442, "step": 2982 }, { "epoch": 0.7775062717883556, "grad_norm": 1.0427334131341899, "learning_rate": 6.460850763658647e-07, "loss": 0.1436, "step": 2983 }, { "epoch": 0.7777669175382009, "grad_norm": 1.018370300815242, "learning_rate": 6.446398053545619e-07, "loss": 0.1416, "step": 2984 }, { "epoch": 0.7780275632880461, "grad_norm": 0.9990417984797189, "learning_rate": 6.431959133653296e-07, "loss": 0.1316, "step": 2985 }, { "epoch": 0.7782882090378914, "grad_norm": 0.9430688256413039, "learning_rate": 6.417534014713594e-07, "loss": 0.1263, "step": 2986 }, { "epoch": 0.7785488547877366, "grad_norm": 1.0333066064665775, "learning_rate": 6.403122707448198e-07, "loss": 0.1319, "step": 2987 }, { "epoch": 0.7788095005375819, "grad_norm": 1.0271693345533905, "learning_rate": 6.388725222568498e-07, "loss": 0.1364, "step": 2988 }, { "epoch": 0.7790701462874271, "grad_norm": 1.0914602817026913, "learning_rate": 6.374341570775633e-07, "loss": 0.1413, "step": 2989 }, { "epoch": 0.7793307920372723, "grad_norm": 1.0141766999257062, "learning_rate": 6.359971762760442e-07, "loss": 0.1389, "step": 2990 }, { "epoch": 0.7795914377871176, "grad_norm": 1.014719762599316, "learning_rate": 6.345615809203487e-07, "loss": 0.1395, "step": 2991 }, { "epoch": 0.7798520835369628, "grad_norm": 0.990799121540676, "learning_rate": 6.331273720775028e-07, "loss": 0.134, "step": 2992 }, { "epoch": 0.7801127292868081, "grad_norm": 1.065149581807141, "learning_rate": 6.316945508135014e-07, "loss": 0.1495, "step": 2993 }, { "epoch": 0.7803733750366533, "grad_norm": 1.064685148333649, "learning_rate": 6.302631181933106e-07, "loss": 0.1391, "step": 2994 }, { "epoch": 0.7806340207864986, "grad_norm": 1.0820720496695118, "learning_rate": 6.288330752808605e-07, "loss": 0.1464, "step": 2995 }, { "epoch": 0.7808946665363438, "grad_norm": 1.0611085558430804, "learning_rate": 6.274044231390503e-07, "loss": 0.1392, "step": 2996 }, { "epoch": 0.7811553122861891, "grad_norm": 1.0069462214279048, "learning_rate": 6.259771628297465e-07, "loss": 0.1436, "step": 2997 }, { "epoch": 0.7814159580360343, "grad_norm": 1.0675676082659054, "learning_rate": 6.245512954137795e-07, "loss": 0.1451, "step": 2998 }, { "epoch": 0.7816766037858796, "grad_norm": 1.0922462474603276, "learning_rate": 6.23126821950945e-07, "loss": 0.1453, "step": 2999 }, { "epoch": 0.7819372495357247, "grad_norm": 1.039509404981061, "learning_rate": 6.217037435000028e-07, "loss": 0.1442, "step": 3000 }, { "epoch": 0.7819372495357247, "eval_loss": 0.13870234787464142, "eval_runtime": 55.1905, "eval_samples_per_second": 44.953, "eval_steps_per_second": 5.635, "step": 3000 }, { "epoch": 0.78219789528557, "grad_norm": 1.0370561381904408, "learning_rate": 6.20282061118675e-07, "loss": 0.1429, "step": 3001 }, { "epoch": 0.7824585410354152, "grad_norm": 1.1033997898100005, "learning_rate": 6.18861775863647e-07, "loss": 0.1485, "step": 3002 }, { "epoch": 0.7827191867852605, "grad_norm": 1.0627266033577, "learning_rate": 6.174428887905648e-07, "loss": 0.1379, "step": 3003 }, { "epoch": 0.7829798325351057, "grad_norm": 1.0538356895136547, "learning_rate": 6.16025400954037e-07, "loss": 0.1412, "step": 3004 }, { "epoch": 0.7832404782849509, "grad_norm": 1.0255137778180363, "learning_rate": 6.146093134076314e-07, "loss": 0.139, "step": 3005 }, { "epoch": 0.7835011240347962, "grad_norm": 1.006116478210167, "learning_rate": 6.131946272038719e-07, "loss": 0.1311, "step": 3006 }, { "epoch": 0.7837617697846414, "grad_norm": 1.0236123106378343, "learning_rate": 6.11781343394246e-07, "loss": 0.1294, "step": 3007 }, { "epoch": 0.7840224155344867, "grad_norm": 1.069844685009874, "learning_rate": 6.103694630291954e-07, "loss": 0.1483, "step": 3008 }, { "epoch": 0.7842830612843319, "grad_norm": 0.981384129629387, "learning_rate": 6.089589871581203e-07, "loss": 0.1382, "step": 3009 }, { "epoch": 0.7845437070341772, "grad_norm": 1.105322320128344, "learning_rate": 6.07549916829375e-07, "loss": 0.1462, "step": 3010 }, { "epoch": 0.7848043527840224, "grad_norm": 1.0807980604387897, "learning_rate": 6.061422530902727e-07, "loss": 0.1384, "step": 3011 }, { "epoch": 0.7850649985338677, "grad_norm": 1.0547238689599385, "learning_rate": 6.047359969870773e-07, "loss": 0.148, "step": 3012 }, { "epoch": 0.7853256442837129, "grad_norm": 1.0003462258663869, "learning_rate": 6.033311495650074e-07, "loss": 0.1317, "step": 3013 }, { "epoch": 0.7855862900335582, "grad_norm": 1.0392487689281174, "learning_rate": 6.019277118682371e-07, "loss": 0.1432, "step": 3014 }, { "epoch": 0.7858469357834034, "grad_norm": 0.9992654982985604, "learning_rate": 6.005256849398899e-07, "loss": 0.1403, "step": 3015 }, { "epoch": 0.7861075815332487, "grad_norm": 1.0219970408354433, "learning_rate": 5.991250698220416e-07, "loss": 0.1419, "step": 3016 }, { "epoch": 0.7863682272830939, "grad_norm": 1.0638526549755005, "learning_rate": 5.977258675557191e-07, "loss": 0.1515, "step": 3017 }, { "epoch": 0.7866288730329392, "grad_norm": 1.0494384763817408, "learning_rate": 5.963280791808984e-07, "loss": 0.1294, "step": 3018 }, { "epoch": 0.7868895187827843, "grad_norm": 1.0424765311364204, "learning_rate": 5.949317057365042e-07, "loss": 0.1443, "step": 3019 }, { "epoch": 0.7871501645326295, "grad_norm": 1.009417181397818, "learning_rate": 5.935367482604124e-07, "loss": 0.1352, "step": 3020 }, { "epoch": 0.7874108102824748, "grad_norm": 1.0168829752438138, "learning_rate": 5.921432077894429e-07, "loss": 0.1331, "step": 3021 }, { "epoch": 0.78767145603232, "grad_norm": 1.029723756645666, "learning_rate": 5.907510853593642e-07, "loss": 0.15, "step": 3022 }, { "epoch": 0.7879321017821653, "grad_norm": 1.0194911237653252, "learning_rate": 5.89360382004891e-07, "loss": 0.1444, "step": 3023 }, { "epoch": 0.7881927475320105, "grad_norm": 1.0180717014807485, "learning_rate": 5.879710987596818e-07, "loss": 0.1359, "step": 3024 }, { "epoch": 0.7884533932818558, "grad_norm": 1.0469562212746326, "learning_rate": 5.865832366563415e-07, "loss": 0.1482, "step": 3025 }, { "epoch": 0.788714039031701, "grad_norm": 1.058500773214032, "learning_rate": 5.851967967264164e-07, "loss": 0.1478, "step": 3026 }, { "epoch": 0.7889746847815463, "grad_norm": 1.0121911686405354, "learning_rate": 5.838117800003995e-07, "loss": 0.1426, "step": 3027 }, { "epoch": 0.7892353305313915, "grad_norm": 1.0601128039219343, "learning_rate": 5.824281875077212e-07, "loss": 0.1443, "step": 3028 }, { "epoch": 0.7894959762812368, "grad_norm": 1.0210832202136877, "learning_rate": 5.810460202767563e-07, "loss": 0.132, "step": 3029 }, { "epoch": 0.789756622031082, "grad_norm": 1.0352708231352026, "learning_rate": 5.796652793348203e-07, "loss": 0.1493, "step": 3030 }, { "epoch": 0.7900172677809273, "grad_norm": 0.9762149800584622, "learning_rate": 5.78285965708168e-07, "loss": 0.1325, "step": 3031 }, { "epoch": 0.7902779135307725, "grad_norm": 1.0560747808302398, "learning_rate": 5.769080804219929e-07, "loss": 0.1493, "step": 3032 }, { "epoch": 0.7905385592806178, "grad_norm": 1.0257906657288807, "learning_rate": 5.755316245004275e-07, "loss": 0.1437, "step": 3033 }, { "epoch": 0.790799205030463, "grad_norm": 1.0185952976317156, "learning_rate": 5.741565989665413e-07, "loss": 0.135, "step": 3034 }, { "epoch": 0.7910598507803082, "grad_norm": 1.0513645665220146, "learning_rate": 5.72783004842341e-07, "loss": 0.143, "step": 3035 }, { "epoch": 0.7913204965301535, "grad_norm": 0.9981639558898984, "learning_rate": 5.714108431487692e-07, "loss": 0.1235, "step": 3036 }, { "epoch": 0.7915811422799987, "grad_norm": 1.0463574030275353, "learning_rate": 5.70040114905705e-07, "loss": 0.1382, "step": 3037 }, { "epoch": 0.791841788029844, "grad_norm": 1.0592333295796332, "learning_rate": 5.68670821131961e-07, "loss": 0.1429, "step": 3038 }, { "epoch": 0.7921024337796891, "grad_norm": 0.9985472920492665, "learning_rate": 5.673029628452819e-07, "loss": 0.1259, "step": 3039 }, { "epoch": 0.7923630795295344, "grad_norm": 1.0311861610857007, "learning_rate": 5.659365410623491e-07, "loss": 0.131, "step": 3040 }, { "epoch": 0.7926237252793796, "grad_norm": 1.0780884349759454, "learning_rate": 5.645715567987742e-07, "loss": 0.1377, "step": 3041 }, { "epoch": 0.7928843710292249, "grad_norm": 1.0105698549115683, "learning_rate": 5.63208011069099e-07, "loss": 0.1292, "step": 3042 }, { "epoch": 0.7931450167790701, "grad_norm": 1.0085421834274362, "learning_rate": 5.618459048868008e-07, "loss": 0.1279, "step": 3043 }, { "epoch": 0.7934056625289154, "grad_norm": 1.0581098172390904, "learning_rate": 5.604852392642813e-07, "loss": 0.1383, "step": 3044 }, { "epoch": 0.7936663082787606, "grad_norm": 1.03357499999497, "learning_rate": 5.591260152128736e-07, "loss": 0.1352, "step": 3045 }, { "epoch": 0.7939269540286059, "grad_norm": 1.1052998036962827, "learning_rate": 5.577682337428417e-07, "loss": 0.1472, "step": 3046 }, { "epoch": 0.7941875997784511, "grad_norm": 1.117446027480273, "learning_rate": 5.564118958633746e-07, "loss": 0.1392, "step": 3047 }, { "epoch": 0.7944482455282964, "grad_norm": 0.9928819203137788, "learning_rate": 5.550570025825888e-07, "loss": 0.1299, "step": 3048 }, { "epoch": 0.7947088912781416, "grad_norm": 1.061418511746051, "learning_rate": 5.537035549075279e-07, "loss": 0.1396, "step": 3049 }, { "epoch": 0.7949695370279868, "grad_norm": 1.0508415379512206, "learning_rate": 5.523515538441604e-07, "loss": 0.1321, "step": 3050 }, { "epoch": 0.7952301827778321, "grad_norm": 1.0473830120547718, "learning_rate": 5.510010003973795e-07, "loss": 0.1368, "step": 3051 }, { "epoch": 0.7954908285276773, "grad_norm": 1.0972185257466907, "learning_rate": 5.496518955710023e-07, "loss": 0.1435, "step": 3052 }, { "epoch": 0.7957514742775226, "grad_norm": 1.0816069789337213, "learning_rate": 5.483042403677707e-07, "loss": 0.143, "step": 3053 }, { "epoch": 0.7960121200273678, "grad_norm": 1.0499298776565804, "learning_rate": 5.469580357893484e-07, "loss": 0.1383, "step": 3054 }, { "epoch": 0.7962727657772131, "grad_norm": 1.0082076849215011, "learning_rate": 5.456132828363181e-07, "loss": 0.1391, "step": 3055 }, { "epoch": 0.7965334115270583, "grad_norm": 1.0185820222875943, "learning_rate": 5.442699825081885e-07, "loss": 0.1429, "step": 3056 }, { "epoch": 0.7967940572769036, "grad_norm": 0.9995794237849733, "learning_rate": 5.429281358033847e-07, "loss": 0.1342, "step": 3057 }, { "epoch": 0.7970547030267487, "grad_norm": 1.0763434186208047, "learning_rate": 5.415877437192535e-07, "loss": 0.1395, "step": 3058 }, { "epoch": 0.797315348776594, "grad_norm": 1.0387491596786655, "learning_rate": 5.402488072520587e-07, "loss": 0.1309, "step": 3059 }, { "epoch": 0.7975759945264392, "grad_norm": 1.0035943910158585, "learning_rate": 5.389113273969857e-07, "loss": 0.1279, "step": 3060 }, { "epoch": 0.7978366402762845, "grad_norm": 1.0992950608503993, "learning_rate": 5.375753051481324e-07, "loss": 0.1559, "step": 3061 }, { "epoch": 0.7980972860261297, "grad_norm": 1.063584342698759, "learning_rate": 5.36240741498516e-07, "loss": 0.1442, "step": 3062 }, { "epoch": 0.798357931775975, "grad_norm": 1.0462219403947453, "learning_rate": 5.349076374400708e-07, "loss": 0.1427, "step": 3063 }, { "epoch": 0.7986185775258202, "grad_norm": 1.0018632922504476, "learning_rate": 5.33575993963644e-07, "loss": 0.1349, "step": 3064 }, { "epoch": 0.7988792232756654, "grad_norm": 1.021456731354663, "learning_rate": 5.32245812058998e-07, "loss": 0.1464, "step": 3065 }, { "epoch": 0.7991398690255107, "grad_norm": 1.0294698565337967, "learning_rate": 5.309170927148088e-07, "loss": 0.143, "step": 3066 }, { "epoch": 0.7994005147753559, "grad_norm": 1.0277150100306802, "learning_rate": 5.295898369186655e-07, "loss": 0.1314, "step": 3067 }, { "epoch": 0.7996611605252012, "grad_norm": 1.0658659790465952, "learning_rate": 5.282640456570692e-07, "loss": 0.1334, "step": 3068 }, { "epoch": 0.7999218062750464, "grad_norm": 1.043817776559183, "learning_rate": 5.269397199154319e-07, "loss": 0.1401, "step": 3069 }, { "epoch": 0.8001824520248917, "grad_norm": 1.0399448245088065, "learning_rate": 5.256168606780784e-07, "loss": 0.138, "step": 3070 }, { "epoch": 0.8004430977747369, "grad_norm": 1.029798658513136, "learning_rate": 5.24295468928242e-07, "loss": 0.1321, "step": 3071 }, { "epoch": 0.8007037435245822, "grad_norm": 1.1001565208185593, "learning_rate": 5.229755456480635e-07, "loss": 0.1409, "step": 3072 }, { "epoch": 0.8009643892744274, "grad_norm": 0.9725159287454221, "learning_rate": 5.216570918185956e-07, "loss": 0.1278, "step": 3073 }, { "epoch": 0.8012250350242727, "grad_norm": 1.0734900864878614, "learning_rate": 5.203401084197973e-07, "loss": 0.1404, "step": 3074 }, { "epoch": 0.8014856807741179, "grad_norm": 1.036523943154871, "learning_rate": 5.190245964305338e-07, "loss": 0.1306, "step": 3075 }, { "epoch": 0.8017463265239632, "grad_norm": 1.047906588029982, "learning_rate": 5.177105568285793e-07, "loss": 0.1369, "step": 3076 }, { "epoch": 0.8020069722738083, "grad_norm": 1.0786045463868865, "learning_rate": 5.163979905906102e-07, "loss": 0.1466, "step": 3077 }, { "epoch": 0.8022676180236537, "grad_norm": 1.054716990354842, "learning_rate": 5.150868986922092e-07, "loss": 0.1385, "step": 3078 }, { "epoch": 0.8025282637734988, "grad_norm": 1.0691199292465923, "learning_rate": 5.137772821078649e-07, "loss": 0.1349, "step": 3079 }, { "epoch": 0.802788909523344, "grad_norm": 0.9626789685472136, "learning_rate": 5.124691418109673e-07, "loss": 0.1243, "step": 3080 }, { "epoch": 0.8030495552731893, "grad_norm": 1.045780373938692, "learning_rate": 5.111624787738095e-07, "loss": 0.1385, "step": 3081 }, { "epoch": 0.8033102010230345, "grad_norm": 1.0346723982901056, "learning_rate": 5.09857293967587e-07, "loss": 0.1469, "step": 3082 }, { "epoch": 0.8035708467728798, "grad_norm": 1.0090088435677345, "learning_rate": 5.085535883623966e-07, "loss": 0.136, "step": 3083 }, { "epoch": 0.803831492522725, "grad_norm": 1.039974768191357, "learning_rate": 5.072513629272352e-07, "loss": 0.145, "step": 3084 }, { "epoch": 0.8040921382725703, "grad_norm": 1.0169541192118892, "learning_rate": 5.059506186299992e-07, "loss": 0.136, "step": 3085 }, { "epoch": 0.8043527840224155, "grad_norm": 1.0315057818506108, "learning_rate": 5.046513564374861e-07, "loss": 0.136, "step": 3086 }, { "epoch": 0.8046134297722608, "grad_norm": 0.9707735511229557, "learning_rate": 5.033535773153906e-07, "loss": 0.1363, "step": 3087 }, { "epoch": 0.804874075522106, "grad_norm": 1.0539750541647872, "learning_rate": 5.020572822283027e-07, "loss": 0.1313, "step": 3088 }, { "epoch": 0.8051347212719513, "grad_norm": 0.9886334451356578, "learning_rate": 5.007624721397139e-07, "loss": 0.1354, "step": 3089 }, { "epoch": 0.8053953670217965, "grad_norm": 0.9788612937728397, "learning_rate": 4.994691480120086e-07, "loss": 0.1318, "step": 3090 }, { "epoch": 0.8056560127716418, "grad_norm": 0.9861239549859085, "learning_rate": 4.981773108064683e-07, "loss": 0.1174, "step": 3091 }, { "epoch": 0.805916658521487, "grad_norm": 0.9863685067794247, "learning_rate": 4.968869614832681e-07, "loss": 0.146, "step": 3092 }, { "epoch": 0.8061773042713323, "grad_norm": 1.0618376710857431, "learning_rate": 4.955981010014793e-07, "loss": 0.1325, "step": 3093 }, { "epoch": 0.8064379500211775, "grad_norm": 1.0232766754975289, "learning_rate": 4.943107303190639e-07, "loss": 0.1376, "step": 3094 }, { "epoch": 0.8066985957710228, "grad_norm": 0.9940094296754405, "learning_rate": 4.930248503928778e-07, "loss": 0.1319, "step": 3095 }, { "epoch": 0.806959241520868, "grad_norm": 1.0333644206575456, "learning_rate": 4.917404621786703e-07, "loss": 0.1283, "step": 3096 }, { "epoch": 0.8072198872707131, "grad_norm": 0.9900556074254572, "learning_rate": 4.904575666310798e-07, "loss": 0.1198, "step": 3097 }, { "epoch": 0.8074805330205584, "grad_norm": 1.0478361050390466, "learning_rate": 4.891761647036364e-07, "loss": 0.1316, "step": 3098 }, { "epoch": 0.8077411787704036, "grad_norm": 1.0712234373506138, "learning_rate": 4.878962573487598e-07, "loss": 0.1349, "step": 3099 }, { "epoch": 0.8080018245202489, "grad_norm": 1.037641451261, "learning_rate": 4.866178455177584e-07, "loss": 0.1347, "step": 3100 }, { "epoch": 0.8082624702700941, "grad_norm": 1.074662561485503, "learning_rate": 4.853409301608297e-07, "loss": 0.1493, "step": 3101 }, { "epoch": 0.8085231160199394, "grad_norm": 1.0036841117333255, "learning_rate": 4.840655122270582e-07, "loss": 0.1367, "step": 3102 }, { "epoch": 0.8087837617697846, "grad_norm": 1.1105727071010367, "learning_rate": 4.827915926644169e-07, "loss": 0.1351, "step": 3103 }, { "epoch": 0.8090444075196299, "grad_norm": 1.086215315854713, "learning_rate": 4.815191724197634e-07, "loss": 0.1294, "step": 3104 }, { "epoch": 0.8093050532694751, "grad_norm": 1.0096080570317534, "learning_rate": 4.802482524388418e-07, "loss": 0.1368, "step": 3105 }, { "epoch": 0.8095656990193204, "grad_norm": 1.1018274446226384, "learning_rate": 4.78978833666281e-07, "loss": 0.1429, "step": 3106 }, { "epoch": 0.8098263447691656, "grad_norm": 1.0086819586955356, "learning_rate": 4.777109170455938e-07, "loss": 0.1384, "step": 3107 }, { "epoch": 0.8100869905190109, "grad_norm": 1.005119995763712, "learning_rate": 4.7644450351917617e-07, "loss": 0.1324, "step": 3108 }, { "epoch": 0.8103476362688561, "grad_norm": 0.9775851533706599, "learning_rate": 4.751795940283094e-07, "loss": 0.1263, "step": 3109 }, { "epoch": 0.8106082820187014, "grad_norm": 0.990846289296137, "learning_rate": 4.7391618951315277e-07, "loss": 0.1339, "step": 3110 }, { "epoch": 0.8108689277685466, "grad_norm": 1.0429268595995056, "learning_rate": 4.7265429091274935e-07, "loss": 0.1256, "step": 3111 }, { "epoch": 0.8111295735183918, "grad_norm": 1.0333894942118158, "learning_rate": 4.713938991650241e-07, "loss": 0.1445, "step": 3112 }, { "epoch": 0.8113902192682371, "grad_norm": 1.043516390371068, "learning_rate": 4.701350152067796e-07, "loss": 0.1254, "step": 3113 }, { "epoch": 0.8116508650180823, "grad_norm": 1.0435422085472696, "learning_rate": 4.688776399736991e-07, "loss": 0.1405, "step": 3114 }, { "epoch": 0.8119115107679276, "grad_norm": 1.0371127870572379, "learning_rate": 4.676217744003439e-07, "loss": 0.1341, "step": 3115 }, { "epoch": 0.8121721565177727, "grad_norm": 1.0322784075996423, "learning_rate": 4.6636741942015314e-07, "loss": 0.1455, "step": 3116 }, { "epoch": 0.812432802267618, "grad_norm": 1.0502783633711221, "learning_rate": 4.6511457596544406e-07, "loss": 0.1414, "step": 3117 }, { "epoch": 0.8126934480174632, "grad_norm": 1.0128166405071222, "learning_rate": 4.6386324496740855e-07, "loss": 0.1268, "step": 3118 }, { "epoch": 0.8129540937673085, "grad_norm": 0.9950873972584975, "learning_rate": 4.626134273561175e-07, "loss": 0.1301, "step": 3119 }, { "epoch": 0.8132147395171537, "grad_norm": 1.0395803507297237, "learning_rate": 4.6136512406051495e-07, "loss": 0.1404, "step": 3120 }, { "epoch": 0.813475385266999, "grad_norm": 1.024407368149671, "learning_rate": 4.601183360084174e-07, "loss": 0.1437, "step": 3121 }, { "epoch": 0.8137360310168442, "grad_norm": 0.9819635153796186, "learning_rate": 4.5887306412651933e-07, "loss": 0.1308, "step": 3122 }, { "epoch": 0.8139966767666895, "grad_norm": 1.0124452021112351, "learning_rate": 4.576293093403855e-07, "loss": 0.135, "step": 3123 }, { "epoch": 0.8142573225165347, "grad_norm": 0.9673027205345195, "learning_rate": 4.563870725744543e-07, "loss": 0.1221, "step": 3124 }, { "epoch": 0.81451796826638, "grad_norm": 1.0254908539614593, "learning_rate": 4.551463547520341e-07, "loss": 0.1408, "step": 3125 }, { "epoch": 0.8147786140162252, "grad_norm": 1.004085978168319, "learning_rate": 4.539071567953077e-07, "loss": 0.1293, "step": 3126 }, { "epoch": 0.8150392597660704, "grad_norm": 1.0351916526529452, "learning_rate": 4.526694796253242e-07, "loss": 0.1328, "step": 3127 }, { "epoch": 0.8152999055159157, "grad_norm": 1.0476394825339148, "learning_rate": 4.514333241620045e-07, "loss": 0.1381, "step": 3128 }, { "epoch": 0.8155605512657609, "grad_norm": 0.9998693524247784, "learning_rate": 4.5019869132413877e-07, "loss": 0.1324, "step": 3129 }, { "epoch": 0.8158211970156062, "grad_norm": 1.0305215299978732, "learning_rate": 4.4896558202938505e-07, "loss": 0.1317, "step": 3130 }, { "epoch": 0.8160818427654514, "grad_norm": 1.0666394962298387, "learning_rate": 4.4773399719426816e-07, "loss": 0.1429, "step": 3131 }, { "epoch": 0.8163424885152967, "grad_norm": 1.016858004812368, "learning_rate": 4.4650393773418123e-07, "loss": 0.1373, "step": 3132 }, { "epoch": 0.8166031342651419, "grad_norm": 1.0943835682492913, "learning_rate": 4.4527540456338224e-07, "loss": 0.1356, "step": 3133 }, { "epoch": 0.8168637800149872, "grad_norm": 1.0468937900259092, "learning_rate": 4.440483985949953e-07, "loss": 0.1392, "step": 3134 }, { "epoch": 0.8171244257648324, "grad_norm": 1.0473999662719322, "learning_rate": 4.428229207410106e-07, "loss": 0.1408, "step": 3135 }, { "epoch": 0.8173850715146777, "grad_norm": 1.0246371740655302, "learning_rate": 4.4159897191228096e-07, "loss": 0.1388, "step": 3136 }, { "epoch": 0.8176457172645228, "grad_norm": 1.0255258966823846, "learning_rate": 4.403765530185228e-07, "loss": 0.1227, "step": 3137 }, { "epoch": 0.8179063630143681, "grad_norm": 0.9880258850833953, "learning_rate": 4.3915566496831654e-07, "loss": 0.1279, "step": 3138 }, { "epoch": 0.8181670087642133, "grad_norm": 1.048529858330144, "learning_rate": 4.3793630866910326e-07, "loss": 0.145, "step": 3139 }, { "epoch": 0.8184276545140586, "grad_norm": 1.0686241181642646, "learning_rate": 4.367184850271872e-07, "loss": 0.146, "step": 3140 }, { "epoch": 0.8186883002639038, "grad_norm": 1.074407508361948, "learning_rate": 4.3550219494773116e-07, "loss": 0.1534, "step": 3141 }, { "epoch": 0.818948946013749, "grad_norm": 1.0339660302926383, "learning_rate": 4.3428743933476207e-07, "loss": 0.1413, "step": 3142 }, { "epoch": 0.8192095917635943, "grad_norm": 1.0128186996893271, "learning_rate": 4.330742190911616e-07, "loss": 0.1308, "step": 3143 }, { "epoch": 0.8194702375134395, "grad_norm": 0.9887705387769143, "learning_rate": 4.3186253511867227e-07, "loss": 0.1327, "step": 3144 }, { "epoch": 0.8197308832632848, "grad_norm": 1.01738078121885, "learning_rate": 4.306523883178965e-07, "loss": 0.127, "step": 3145 }, { "epoch": 0.81999152901313, "grad_norm": 1.0190167130498915, "learning_rate": 4.294437795882919e-07, "loss": 0.136, "step": 3146 }, { "epoch": 0.8202521747629753, "grad_norm": 1.0125999576005222, "learning_rate": 4.2823670982817365e-07, "loss": 0.1348, "step": 3147 }, { "epoch": 0.8205128205128205, "grad_norm": 1.0398414381932004, "learning_rate": 4.2703117993471295e-07, "loss": 0.1434, "step": 3148 }, { "epoch": 0.8207734662626658, "grad_norm": 1.0175133559923923, "learning_rate": 4.258271908039363e-07, "loss": 0.1365, "step": 3149 }, { "epoch": 0.821034112012511, "grad_norm": 0.9965683505901163, "learning_rate": 4.2462474333072544e-07, "loss": 0.1401, "step": 3150 }, { "epoch": 0.8212947577623563, "grad_norm": 1.0707260161634409, "learning_rate": 4.2342383840881544e-07, "loss": 0.1427, "step": 3151 }, { "epoch": 0.8215554035122015, "grad_norm": 1.0187813091934976, "learning_rate": 4.222244769307965e-07, "loss": 0.1379, "step": 3152 }, { "epoch": 0.8218160492620468, "grad_norm": 1.015882381093062, "learning_rate": 4.21026659788111e-07, "loss": 0.1373, "step": 3153 }, { "epoch": 0.822076695011892, "grad_norm": 1.010539718298193, "learning_rate": 4.198303878710508e-07, "loss": 0.137, "step": 3154 }, { "epoch": 0.8223373407617373, "grad_norm": 1.0076408094360836, "learning_rate": 4.186356620687637e-07, "loss": 0.1248, "step": 3155 }, { "epoch": 0.8225979865115824, "grad_norm": 0.9818637161553928, "learning_rate": 4.174424832692453e-07, "loss": 0.1208, "step": 3156 }, { "epoch": 0.8228586322614276, "grad_norm": 1.0016675511965552, "learning_rate": 4.1625085235934225e-07, "loss": 0.131, "step": 3157 }, { "epoch": 0.8231192780112729, "grad_norm": 0.9948810546443069, "learning_rate": 4.1506077022475e-07, "loss": 0.1246, "step": 3158 }, { "epoch": 0.8233799237611181, "grad_norm": 0.9265411247653795, "learning_rate": 4.138722377500154e-07, "loss": 0.1092, "step": 3159 }, { "epoch": 0.8236405695109634, "grad_norm": 0.9622075195601035, "learning_rate": 4.1268525581853015e-07, "loss": 0.1272, "step": 3160 }, { "epoch": 0.8239012152608086, "grad_norm": 0.9594575113358553, "learning_rate": 4.1149982531253435e-07, "loss": 0.1243, "step": 3161 }, { "epoch": 0.8241618610106539, "grad_norm": 1.046598925676203, "learning_rate": 4.1031594711311686e-07, "loss": 0.1427, "step": 3162 }, { "epoch": 0.8244225067604991, "grad_norm": 1.115385488444262, "learning_rate": 4.091336221002112e-07, "loss": 0.1403, "step": 3163 }, { "epoch": 0.8246831525103444, "grad_norm": 0.9908958013006361, "learning_rate": 4.079528511525968e-07, "loss": 0.1295, "step": 3164 }, { "epoch": 0.8249437982601896, "grad_norm": 1.0156464245228076, "learning_rate": 4.067736351478979e-07, "loss": 0.1369, "step": 3165 }, { "epoch": 0.8252044440100349, "grad_norm": 0.9953842704160607, "learning_rate": 4.0559597496258263e-07, "loss": 0.133, "step": 3166 }, { "epoch": 0.8254650897598801, "grad_norm": 0.994871580526206, "learning_rate": 4.044198714719633e-07, "loss": 0.1267, "step": 3167 }, { "epoch": 0.8257257355097254, "grad_norm": 1.0729406417345078, "learning_rate": 4.032453255501956e-07, "loss": 0.1405, "step": 3168 }, { "epoch": 0.8259863812595706, "grad_norm": 1.028017228181433, "learning_rate": 4.020723380702768e-07, "loss": 0.131, "step": 3169 }, { "epoch": 0.8262470270094159, "grad_norm": 1.137827322702497, "learning_rate": 4.0090090990404563e-07, "loss": 0.1485, "step": 3170 }, { "epoch": 0.8265076727592611, "grad_norm": 1.027003541271744, "learning_rate": 3.9973104192218283e-07, "loss": 0.1328, "step": 3171 }, { "epoch": 0.8267683185091063, "grad_norm": 0.950146608086048, "learning_rate": 3.9856273499420837e-07, "loss": 0.1211, "step": 3172 }, { "epoch": 0.8270289642589516, "grad_norm": 1.0222171815873964, "learning_rate": 3.97395989988483e-07, "loss": 0.1324, "step": 3173 }, { "epoch": 0.8272896100087967, "grad_norm": 1.0149956158980964, "learning_rate": 3.962308077722049e-07, "loss": 0.1415, "step": 3174 }, { "epoch": 0.827550255758642, "grad_norm": 1.047306982975552, "learning_rate": 3.950671892114141e-07, "loss": 0.1429, "step": 3175 }, { "epoch": 0.8278109015084872, "grad_norm": 0.9675770022118501, "learning_rate": 3.9390513517098413e-07, "loss": 0.1246, "step": 3176 }, { "epoch": 0.8280715472583325, "grad_norm": 1.0571767439118347, "learning_rate": 3.9274464651462786e-07, "loss": 0.1497, "step": 3177 }, { "epoch": 0.8283321930081777, "grad_norm": 1.0055175825225062, "learning_rate": 3.9158572410489537e-07, "loss": 0.1363, "step": 3178 }, { "epoch": 0.828592838758023, "grad_norm": 1.0315350887391403, "learning_rate": 3.904283688031715e-07, "loss": 0.1389, "step": 3179 }, { "epoch": 0.8288534845078682, "grad_norm": 1.076365300292966, "learning_rate": 3.8927258146967626e-07, "loss": 0.1427, "step": 3180 }, { "epoch": 0.8291141302577135, "grad_norm": 1.0207787379578597, "learning_rate": 3.8811836296346466e-07, "loss": 0.1362, "step": 3181 }, { "epoch": 0.8293747760075587, "grad_norm": 1.036335912525057, "learning_rate": 3.8696571414242546e-07, "loss": 0.1319, "step": 3182 }, { "epoch": 0.829635421757404, "grad_norm": 1.0234356531024995, "learning_rate": 3.858146358632811e-07, "loss": 0.1332, "step": 3183 }, { "epoch": 0.8298960675072492, "grad_norm": 1.020829768591857, "learning_rate": 3.846651289815856e-07, "loss": 0.1281, "step": 3184 }, { "epoch": 0.8301567132570945, "grad_norm": 1.042331895817039, "learning_rate": 3.8351719435172685e-07, "loss": 0.1355, "step": 3185 }, { "epoch": 0.8304173590069397, "grad_norm": 1.005855598257104, "learning_rate": 3.823708328269232e-07, "loss": 0.1381, "step": 3186 }, { "epoch": 0.8306780047567849, "grad_norm": 1.0424915030757615, "learning_rate": 3.81226045259222e-07, "loss": 0.1324, "step": 3187 }, { "epoch": 0.8309386505066302, "grad_norm": 1.0274798675492849, "learning_rate": 3.800828324995043e-07, "loss": 0.1415, "step": 3188 }, { "epoch": 0.8311992962564754, "grad_norm": 1.0041885289005548, "learning_rate": 3.789411953974778e-07, "loss": 0.1331, "step": 3189 }, { "epoch": 0.8314599420063207, "grad_norm": 1.0469658501207157, "learning_rate": 3.7780113480167967e-07, "loss": 0.1365, "step": 3190 }, { "epoch": 0.8317205877561659, "grad_norm": 1.0660143830521849, "learning_rate": 3.7666265155947677e-07, "loss": 0.1426, "step": 3191 }, { "epoch": 0.8319812335060112, "grad_norm": 1.0084675618198704, "learning_rate": 3.7552574651706253e-07, "loss": 0.1213, "step": 3192 }, { "epoch": 0.8322418792558564, "grad_norm": 1.0538485997223468, "learning_rate": 3.7439042051945565e-07, "loss": 0.1296, "step": 3193 }, { "epoch": 0.8325025250057017, "grad_norm": 1.0360465203879101, "learning_rate": 3.7325667441050457e-07, "loss": 0.1391, "step": 3194 }, { "epoch": 0.8327631707555468, "grad_norm": 0.9979556506999565, "learning_rate": 3.7212450903288094e-07, "loss": 0.1354, "step": 3195 }, { "epoch": 0.8330238165053921, "grad_norm": 1.0733516224262867, "learning_rate": 3.7099392522808253e-07, "loss": 0.1279, "step": 3196 }, { "epoch": 0.8332844622552373, "grad_norm": 1.0619090614955122, "learning_rate": 3.698649238364313e-07, "loss": 0.1339, "step": 3197 }, { "epoch": 0.8335451080050826, "grad_norm": 1.0312222735512109, "learning_rate": 3.6873750569707297e-07, "loss": 0.1294, "step": 3198 }, { "epoch": 0.8338057537549278, "grad_norm": 1.0465729450118422, "learning_rate": 3.676116716479769e-07, "loss": 0.1383, "step": 3199 }, { "epoch": 0.8340663995047731, "grad_norm": 0.9886393370014455, "learning_rate": 3.664874225259338e-07, "loss": 0.1321, "step": 3200 }, { "epoch": 0.8343270452546183, "grad_norm": 1.0243059118843878, "learning_rate": 3.6536475916655907e-07, "loss": 0.1402, "step": 3201 }, { "epoch": 0.8345876910044635, "grad_norm": 1.0854470900047604, "learning_rate": 3.6424368240428666e-07, "loss": 0.1276, "step": 3202 }, { "epoch": 0.8348483367543088, "grad_norm": 1.0968909473143595, "learning_rate": 3.6312419307237277e-07, "loss": 0.1586, "step": 3203 }, { "epoch": 0.835108982504154, "grad_norm": 1.0026882001512396, "learning_rate": 3.62006292002893e-07, "loss": 0.1333, "step": 3204 }, { "epoch": 0.8353696282539993, "grad_norm": 0.948947778229227, "learning_rate": 3.6088998002674266e-07, "loss": 0.1269, "step": 3205 }, { "epoch": 0.8356302740038445, "grad_norm": 1.3206206255475854, "learning_rate": 3.597752579736363e-07, "loss": 0.1387, "step": 3206 }, { "epoch": 0.8358909197536898, "grad_norm": 0.9977709107912706, "learning_rate": 3.5866212667210576e-07, "loss": 0.1363, "step": 3207 }, { "epoch": 0.836151565503535, "grad_norm": 1.0651885434969406, "learning_rate": 3.57550586949503e-07, "loss": 0.1432, "step": 3208 }, { "epoch": 0.8364122112533803, "grad_norm": 0.9846932595928221, "learning_rate": 3.564406396319936e-07, "loss": 0.1217, "step": 3209 }, { "epoch": 0.8366728570032255, "grad_norm": 1.0255257704515015, "learning_rate": 3.5533228554456095e-07, "loss": 0.1409, "step": 3210 }, { "epoch": 0.8369335027530708, "grad_norm": 1.0624653901822638, "learning_rate": 3.5422552551100607e-07, "loss": 0.1493, "step": 3211 }, { "epoch": 0.837194148502916, "grad_norm": 1.067166270205157, "learning_rate": 3.5312036035394263e-07, "loss": 0.1444, "step": 3212 }, { "epoch": 0.8374547942527613, "grad_norm": 1.0834807855334705, "learning_rate": 3.520167908948002e-07, "loss": 0.1466, "step": 3213 }, { "epoch": 0.8377154400026064, "grad_norm": 0.9974875900050085, "learning_rate": 3.5091481795382216e-07, "loss": 0.1274, "step": 3214 }, { "epoch": 0.8379760857524517, "grad_norm": 0.9857208378255934, "learning_rate": 3.4981444235006464e-07, "loss": 0.136, "step": 3215 }, { "epoch": 0.8382367315022969, "grad_norm": 1.0126868088711751, "learning_rate": 3.487156649013973e-07, "loss": 0.1228, "step": 3216 }, { "epoch": 0.8384973772521421, "grad_norm": 0.9872556495768618, "learning_rate": 3.476184864245011e-07, "loss": 0.1233, "step": 3217 }, { "epoch": 0.8387580230019874, "grad_norm": 1.0261451911201598, "learning_rate": 3.4652290773487003e-07, "loss": 0.1386, "step": 3218 }, { "epoch": 0.8390186687518326, "grad_norm": 1.0158044846449361, "learning_rate": 3.4542892964680804e-07, "loss": 0.1347, "step": 3219 }, { "epoch": 0.8392793145016779, "grad_norm": 0.9906921916066627, "learning_rate": 3.44336552973428e-07, "loss": 0.1287, "step": 3220 }, { "epoch": 0.8395399602515231, "grad_norm": 1.0040303096724454, "learning_rate": 3.432457785266552e-07, "loss": 0.1391, "step": 3221 }, { "epoch": 0.8398006060013684, "grad_norm": 1.001375588234231, "learning_rate": 3.4215660711722276e-07, "loss": 0.1409, "step": 3222 }, { "epoch": 0.8400612517512136, "grad_norm": 0.9744232657938052, "learning_rate": 3.410690395546717e-07, "loss": 0.1241, "step": 3223 }, { "epoch": 0.8403218975010589, "grad_norm": 1.02318374551476, "learning_rate": 3.399830766473525e-07, "loss": 0.1319, "step": 3224 }, { "epoch": 0.8405825432509041, "grad_norm": 1.1072264043731679, "learning_rate": 3.3889871920242235e-07, "loss": 0.1345, "step": 3225 }, { "epoch": 0.8408431890007494, "grad_norm": 1.0627678394241595, "learning_rate": 3.378159680258428e-07, "loss": 0.1543, "step": 3226 }, { "epoch": 0.8411038347505946, "grad_norm": 0.9746011845121422, "learning_rate": 3.3673482392238584e-07, "loss": 0.1277, "step": 3227 }, { "epoch": 0.8413644805004399, "grad_norm": 0.9943314395896826, "learning_rate": 3.356552876956257e-07, "loss": 0.1272, "step": 3228 }, { "epoch": 0.8416251262502851, "grad_norm": 1.0638066597909779, "learning_rate": 3.345773601479427e-07, "loss": 0.1447, "step": 3229 }, { "epoch": 0.8418857720001304, "grad_norm": 1.136961124610057, "learning_rate": 3.3350104208052153e-07, "loss": 0.1597, "step": 3230 }, { "epoch": 0.8421464177499756, "grad_norm": 1.0281255229442279, "learning_rate": 3.3242633429335034e-07, "loss": 0.1241, "step": 3231 }, { "epoch": 0.8424070634998208, "grad_norm": 1.005335538805916, "learning_rate": 3.3135323758522047e-07, "loss": 0.1347, "step": 3232 }, { "epoch": 0.842667709249666, "grad_norm": 1.0735318102291929, "learning_rate": 3.302817527537255e-07, "loss": 0.1373, "step": 3233 }, { "epoch": 0.8429283549995112, "grad_norm": 1.0405882044247379, "learning_rate": 3.292118805952621e-07, "loss": 0.1413, "step": 3234 }, { "epoch": 0.8431890007493565, "grad_norm": 1.0250804897323114, "learning_rate": 3.281436219050274e-07, "loss": 0.1298, "step": 3235 }, { "epoch": 0.8434496464992017, "grad_norm": 0.9877151960231637, "learning_rate": 3.2707697747701913e-07, "loss": 0.1275, "step": 3236 }, { "epoch": 0.843710292249047, "grad_norm": 1.0704760448296484, "learning_rate": 3.260119481040355e-07, "loss": 0.1393, "step": 3237 }, { "epoch": 0.8439709379988922, "grad_norm": 0.994588181489142, "learning_rate": 3.249485345776743e-07, "loss": 0.1321, "step": 3238 }, { "epoch": 0.8442315837487375, "grad_norm": 0.9959525052841522, "learning_rate": 3.2388673768833226e-07, "loss": 0.1269, "step": 3239 }, { "epoch": 0.8444922294985827, "grad_norm": 1.048857377905849, "learning_rate": 3.228265582252041e-07, "loss": 0.1377, "step": 3240 }, { "epoch": 0.844752875248428, "grad_norm": 1.100206288668051, "learning_rate": 3.2176799697628434e-07, "loss": 0.1354, "step": 3241 }, { "epoch": 0.8450135209982732, "grad_norm": 1.0843736608674157, "learning_rate": 3.2071105472836207e-07, "loss": 0.1432, "step": 3242 }, { "epoch": 0.8452741667481185, "grad_norm": 1.0742172282500173, "learning_rate": 3.196557322670235e-07, "loss": 0.1416, "step": 3243 }, { "epoch": 0.8455348124979637, "grad_norm": 1.0773923575489037, "learning_rate": 3.186020303766532e-07, "loss": 0.1376, "step": 3244 }, { "epoch": 0.845795458247809, "grad_norm": 1.0653589394888583, "learning_rate": 3.175499498404291e-07, "loss": 0.1372, "step": 3245 }, { "epoch": 0.8460561039976542, "grad_norm": 1.0234823691393404, "learning_rate": 3.164994914403241e-07, "loss": 0.1376, "step": 3246 }, { "epoch": 0.8463167497474994, "grad_norm": 1.037010824224123, "learning_rate": 3.154506559571066e-07, "loss": 0.1487, "step": 3247 }, { "epoch": 0.8465773954973447, "grad_norm": 1.0453756917966264, "learning_rate": 3.1440344417033754e-07, "loss": 0.1416, "step": 3248 }, { "epoch": 0.8468380412471899, "grad_norm": 1.01559235208902, "learning_rate": 3.1335785685837117e-07, "loss": 0.1392, "step": 3249 }, { "epoch": 0.8470986869970352, "grad_norm": 1.0353879701833697, "learning_rate": 3.123138947983556e-07, "loss": 0.1302, "step": 3250 }, { "epoch": 0.8473593327468804, "grad_norm": 1.0352750657022407, "learning_rate": 3.1127155876622986e-07, "loss": 0.1453, "step": 3251 }, { "epoch": 0.8476199784967257, "grad_norm": 1.0062349502596062, "learning_rate": 3.102308495367243e-07, "loss": 0.1306, "step": 3252 }, { "epoch": 0.8478806242465708, "grad_norm": 0.9919757778149241, "learning_rate": 3.091917678833603e-07, "loss": 0.1296, "step": 3253 }, { "epoch": 0.8481412699964161, "grad_norm": 1.0942946137063423, "learning_rate": 3.081543145784499e-07, "loss": 0.1391, "step": 3254 }, { "epoch": 0.8484019157462613, "grad_norm": 1.0939743926931396, "learning_rate": 3.071184903930946e-07, "loss": 0.1301, "step": 3255 }, { "epoch": 0.8486625614961066, "grad_norm": 1.016332313542457, "learning_rate": 3.060842960971844e-07, "loss": 0.1406, "step": 3256 }, { "epoch": 0.8489232072459518, "grad_norm": 1.0637613581778431, "learning_rate": 3.050517324593996e-07, "loss": 0.1376, "step": 3257 }, { "epoch": 0.8491838529957971, "grad_norm": 1.0432973206225564, "learning_rate": 3.0402080024720724e-07, "loss": 0.1458, "step": 3258 }, { "epoch": 0.8494444987456423, "grad_norm": 1.0205513936631783, "learning_rate": 3.029915002268605e-07, "loss": 0.1325, "step": 3259 }, { "epoch": 0.8497051444954876, "grad_norm": 1.110756963755259, "learning_rate": 3.0196383316340226e-07, "loss": 0.1367, "step": 3260 }, { "epoch": 0.8499657902453328, "grad_norm": 0.9837104968229591, "learning_rate": 3.0093779982065973e-07, "loss": 0.1206, "step": 3261 }, { "epoch": 0.850226435995178, "grad_norm": 1.028021118117912, "learning_rate": 2.9991340096124644e-07, "loss": 0.1303, "step": 3262 }, { "epoch": 0.8504870817450233, "grad_norm": 1.047173804378036, "learning_rate": 2.988906373465608e-07, "loss": 0.137, "step": 3263 }, { "epoch": 0.8507477274948685, "grad_norm": 1.080013596783809, "learning_rate": 2.978695097367862e-07, "loss": 0.1423, "step": 3264 }, { "epoch": 0.8510083732447138, "grad_norm": 1.0809448964717756, "learning_rate": 2.968500188908899e-07, "loss": 0.1372, "step": 3265 }, { "epoch": 0.851269018994559, "grad_norm": 1.0412677042338414, "learning_rate": 2.958321655666219e-07, "loss": 0.133, "step": 3266 }, { "epoch": 0.8515296647444043, "grad_norm": 0.9727543387296185, "learning_rate": 2.948159505205164e-07, "loss": 0.1293, "step": 3267 }, { "epoch": 0.8517903104942495, "grad_norm": 0.9930566657708491, "learning_rate": 2.938013745078899e-07, "loss": 0.1282, "step": 3268 }, { "epoch": 0.8520509562440948, "grad_norm": 1.0437692116131936, "learning_rate": 2.9278843828283803e-07, "loss": 0.1403, "step": 3269 }, { "epoch": 0.85231160199394, "grad_norm": 0.984993465179924, "learning_rate": 2.917771425982413e-07, "loss": 0.127, "step": 3270 }, { "epoch": 0.8525722477437853, "grad_norm": 0.9664622246420943, "learning_rate": 2.9076748820575875e-07, "loss": 0.129, "step": 3271 }, { "epoch": 0.8528328934936305, "grad_norm": 1.0309060890441992, "learning_rate": 2.897594758558298e-07, "loss": 0.1378, "step": 3272 }, { "epoch": 0.8530935392434758, "grad_norm": 1.016341248575509, "learning_rate": 2.8875310629767314e-07, "loss": 0.1357, "step": 3273 }, { "epoch": 0.8533541849933209, "grad_norm": 1.0035669259236824, "learning_rate": 2.8774838027928854e-07, "loss": 0.1287, "step": 3274 }, { "epoch": 0.8536148307431662, "grad_norm": 1.1151508169907736, "learning_rate": 2.867452985474503e-07, "loss": 0.1349, "step": 3275 }, { "epoch": 0.8538754764930114, "grad_norm": 1.0189415866265221, "learning_rate": 2.857438618477135e-07, "loss": 0.135, "step": 3276 }, { "epoch": 0.8541361222428567, "grad_norm": 1.0306003304992768, "learning_rate": 2.8474407092441035e-07, "loss": 0.1395, "step": 3277 }, { "epoch": 0.8543967679927019, "grad_norm": 1.0225542380684676, "learning_rate": 2.837459265206491e-07, "loss": 0.13, "step": 3278 }, { "epoch": 0.8546574137425471, "grad_norm": 1.1017447315695656, "learning_rate": 2.8274942937831393e-07, "loss": 0.1366, "step": 3279 }, { "epoch": 0.8549180594923924, "grad_norm": 0.9933422030256426, "learning_rate": 2.817545802380653e-07, "loss": 0.1303, "step": 3280 }, { "epoch": 0.8551787052422376, "grad_norm": 1.0278843349673825, "learning_rate": 2.8076137983933856e-07, "loss": 0.137, "step": 3281 }, { "epoch": 0.8554393509920829, "grad_norm": 0.9826932046144561, "learning_rate": 2.797698289203432e-07, "loss": 0.1231, "step": 3282 }, { "epoch": 0.8556999967419281, "grad_norm": 1.0589259269399618, "learning_rate": 2.7877992821806394e-07, "loss": 0.132, "step": 3283 }, { "epoch": 0.8559606424917734, "grad_norm": 1.0271999228939044, "learning_rate": 2.7779167846825797e-07, "loss": 0.1397, "step": 3284 }, { "epoch": 0.8562212882416186, "grad_norm": 1.0462886325560343, "learning_rate": 2.768050804054551e-07, "loss": 0.1368, "step": 3285 }, { "epoch": 0.8564819339914639, "grad_norm": 1.020992091100361, "learning_rate": 2.7582013476295826e-07, "loss": 0.1379, "step": 3286 }, { "epoch": 0.8567425797413091, "grad_norm": 1.0126118517372449, "learning_rate": 2.7483684227284204e-07, "loss": 0.1414, "step": 3287 }, { "epoch": 0.8570032254911544, "grad_norm": 0.9833736009116881, "learning_rate": 2.738552036659517e-07, "loss": 0.1276, "step": 3288 }, { "epoch": 0.8572638712409996, "grad_norm": 1.0611968080266936, "learning_rate": 2.728752196719034e-07, "loss": 0.1392, "step": 3289 }, { "epoch": 0.8575245169908449, "grad_norm": 1.0293866940988545, "learning_rate": 2.718968910190853e-07, "loss": 0.1334, "step": 3290 }, { "epoch": 0.85778516274069, "grad_norm": 1.0495485318577287, "learning_rate": 2.709202184346524e-07, "loss": 0.1538, "step": 3291 }, { "epoch": 0.8580458084905354, "grad_norm": 0.998749637815533, "learning_rate": 2.6994520264452957e-07, "loss": 0.1278, "step": 3292 }, { "epoch": 0.8583064542403805, "grad_norm": 1.0250776689453007, "learning_rate": 2.689718443734121e-07, "loss": 0.1421, "step": 3293 }, { "epoch": 0.8585670999902257, "grad_norm": 1.038510404740346, "learning_rate": 2.6800014434476137e-07, "loss": 0.1333, "step": 3294 }, { "epoch": 0.858827745740071, "grad_norm": 1.0957018694185017, "learning_rate": 2.6703010328080675e-07, "loss": 0.1466, "step": 3295 }, { "epoch": 0.8590883914899162, "grad_norm": 0.9983445897955722, "learning_rate": 2.6606172190254467e-07, "loss": 0.1348, "step": 3296 }, { "epoch": 0.8593490372397615, "grad_norm": 1.0180153156157647, "learning_rate": 2.650950009297382e-07, "loss": 0.1322, "step": 3297 }, { "epoch": 0.8596096829896067, "grad_norm": 1.0390077477367676, "learning_rate": 2.6412994108091586e-07, "loss": 0.1413, "step": 3298 }, { "epoch": 0.859870328739452, "grad_norm": 1.0344659087295576, "learning_rate": 2.6316654307337126e-07, "loss": 0.1403, "step": 3299 }, { "epoch": 0.8601309744892972, "grad_norm": 1.0556467966677554, "learning_rate": 2.6220480762316396e-07, "loss": 0.14, "step": 3300 }, { "epoch": 0.8603916202391425, "grad_norm": 0.9980328507283948, "learning_rate": 2.612447354451175e-07, "loss": 0.1377, "step": 3301 }, { "epoch": 0.8606522659889877, "grad_norm": 1.06801529419405, "learning_rate": 2.6028632725281723e-07, "loss": 0.1345, "step": 3302 }, { "epoch": 0.860912911738833, "grad_norm": 1.0071983934420112, "learning_rate": 2.593295837586146e-07, "loss": 0.1296, "step": 3303 }, { "epoch": 0.8611735574886782, "grad_norm": 1.041340619710759, "learning_rate": 2.58374505673622e-07, "loss": 0.1403, "step": 3304 }, { "epoch": 0.8614342032385235, "grad_norm": 1.1144215398602442, "learning_rate": 2.5742109370771427e-07, "loss": 0.1377, "step": 3305 }, { "epoch": 0.8616948489883687, "grad_norm": 0.9744653260461316, "learning_rate": 2.56469348569528e-07, "loss": 0.1217, "step": 3306 }, { "epoch": 0.861955494738214, "grad_norm": 1.0097524850561868, "learning_rate": 2.555192709664617e-07, "loss": 0.1378, "step": 3307 }, { "epoch": 0.8622161404880592, "grad_norm": 1.1021743723663286, "learning_rate": 2.5457086160467187e-07, "loss": 0.1472, "step": 3308 }, { "epoch": 0.8624767862379044, "grad_norm": 1.079502076432865, "learning_rate": 2.5362412118907886e-07, "loss": 0.143, "step": 3309 }, { "epoch": 0.8627374319877497, "grad_norm": 1.0905281971061644, "learning_rate": 2.526790504233592e-07, "loss": 0.1472, "step": 3310 }, { "epoch": 0.8629980777375948, "grad_norm": 1.0192730533071948, "learning_rate": 2.5173565000995047e-07, "loss": 0.1336, "step": 3311 }, { "epoch": 0.8632587234874401, "grad_norm": 1.1002839315138864, "learning_rate": 2.507939206500479e-07, "loss": 0.1431, "step": 3312 }, { "epoch": 0.8635193692372853, "grad_norm": 1.0169252844547598, "learning_rate": 2.498538630436048e-07, "loss": 0.1362, "step": 3313 }, { "epoch": 0.8637800149871306, "grad_norm": 1.0146148321471258, "learning_rate": 2.4891547788933185e-07, "loss": 0.1424, "step": 3314 }, { "epoch": 0.8640406607369758, "grad_norm": 1.0818722338369071, "learning_rate": 2.479787658846963e-07, "loss": 0.1433, "step": 3315 }, { "epoch": 0.8643013064868211, "grad_norm": 1.0612498159953618, "learning_rate": 2.4704372772592326e-07, "loss": 0.1376, "step": 3316 }, { "epoch": 0.8645619522366663, "grad_norm": 1.041680376828039, "learning_rate": 2.4611036410799233e-07, "loss": 0.1388, "step": 3317 }, { "epoch": 0.8648225979865116, "grad_norm": 1.0451319603663622, "learning_rate": 2.4517867572463884e-07, "loss": 0.1363, "step": 3318 }, { "epoch": 0.8650832437363568, "grad_norm": 0.9936040597625276, "learning_rate": 2.44248663268353e-07, "loss": 0.1279, "step": 3319 }, { "epoch": 0.8653438894862021, "grad_norm": 0.9954177125767946, "learning_rate": 2.433203274303794e-07, "loss": 0.1262, "step": 3320 }, { "epoch": 0.8656045352360473, "grad_norm": 1.0626454557236942, "learning_rate": 2.423936689007167e-07, "loss": 0.1375, "step": 3321 }, { "epoch": 0.8658651809858926, "grad_norm": 1.0630844602346468, "learning_rate": 2.414686883681158e-07, "loss": 0.1415, "step": 3322 }, { "epoch": 0.8661258267357378, "grad_norm": 1.0298155371646995, "learning_rate": 2.4054538652008295e-07, "loss": 0.1417, "step": 3323 }, { "epoch": 0.866386472485583, "grad_norm": 1.0511330599114912, "learning_rate": 2.3962376404287365e-07, "loss": 0.1447, "step": 3324 }, { "epoch": 0.8666471182354283, "grad_norm": 1.035509012928421, "learning_rate": 2.3870382162149627e-07, "loss": 0.1455, "step": 3325 }, { "epoch": 0.8669077639852735, "grad_norm": 1.0399459576259023, "learning_rate": 2.3778555993971164e-07, "loss": 0.1366, "step": 3326 }, { "epoch": 0.8671684097351188, "grad_norm": 1.034946345590324, "learning_rate": 2.3686897968002998e-07, "loss": 0.1303, "step": 3327 }, { "epoch": 0.867429055484964, "grad_norm": 1.0666131430641983, "learning_rate": 2.359540815237124e-07, "loss": 0.1399, "step": 3328 }, { "epoch": 0.8676897012348093, "grad_norm": 1.0111710563727303, "learning_rate": 2.350408661507697e-07, "loss": 0.1471, "step": 3329 }, { "epoch": 0.8679503469846545, "grad_norm": 0.9915355368185084, "learning_rate": 2.3412933423996114e-07, "loss": 0.129, "step": 3330 }, { "epoch": 0.8682109927344998, "grad_norm": 1.028513729272633, "learning_rate": 2.3321948646879616e-07, "loss": 0.1391, "step": 3331 }, { "epoch": 0.8684716384843449, "grad_norm": 1.0749211803720333, "learning_rate": 2.3231132351353075e-07, "loss": 0.1449, "step": 3332 }, { "epoch": 0.8687322842341902, "grad_norm": 1.0619881643121862, "learning_rate": 2.314048460491708e-07, "loss": 0.1526, "step": 3333 }, { "epoch": 0.8689929299840354, "grad_norm": 0.9769977986366863, "learning_rate": 2.305000547494679e-07, "loss": 0.1314, "step": 3334 }, { "epoch": 0.8692535757338807, "grad_norm": 0.9783563188561993, "learning_rate": 2.295969502869194e-07, "loss": 0.1286, "step": 3335 }, { "epoch": 0.8695142214837259, "grad_norm": 1.0046225489046658, "learning_rate": 2.2869553333277145e-07, "loss": 0.1271, "step": 3336 }, { "epoch": 0.8697748672335712, "grad_norm": 0.9818131529500744, "learning_rate": 2.2779580455701444e-07, "loss": 0.1322, "step": 3337 }, { "epoch": 0.8700355129834164, "grad_norm": 0.9834053934961525, "learning_rate": 2.2689776462838348e-07, "loss": 0.1307, "step": 3338 }, { "epoch": 0.8702961587332616, "grad_norm": 1.0074569029098974, "learning_rate": 2.2600141421436017e-07, "loss": 0.1268, "step": 3339 }, { "epoch": 0.8705568044831069, "grad_norm": 1.0683366899065327, "learning_rate": 2.251067539811691e-07, "loss": 0.1381, "step": 3340 }, { "epoch": 0.8708174502329521, "grad_norm": 1.0380495521657798, "learning_rate": 2.242137845937778e-07, "loss": 0.1375, "step": 3341 }, { "epoch": 0.8710780959827974, "grad_norm": 0.9814687635098425, "learning_rate": 2.233225067158995e-07, "loss": 0.134, "step": 3342 }, { "epoch": 0.8713387417326426, "grad_norm": 1.039665238238666, "learning_rate": 2.2243292100998792e-07, "loss": 0.1372, "step": 3343 }, { "epoch": 0.8715993874824879, "grad_norm": 0.9801993750763405, "learning_rate": 2.2154502813724026e-07, "loss": 0.1309, "step": 3344 }, { "epoch": 0.8718600332323331, "grad_norm": 0.9635498373424813, "learning_rate": 2.2065882875759532e-07, "loss": 0.1313, "step": 3345 }, { "epoch": 0.8721206789821784, "grad_norm": 1.0177693300217192, "learning_rate": 2.1977432352973226e-07, "loss": 0.1383, "step": 3346 }, { "epoch": 0.8723813247320236, "grad_norm": 1.0169907436834853, "learning_rate": 2.188915131110725e-07, "loss": 0.1398, "step": 3347 }, { "epoch": 0.8726419704818689, "grad_norm": 1.0411035190400215, "learning_rate": 2.1801039815777637e-07, "loss": 0.1368, "step": 3348 }, { "epoch": 0.8729026162317141, "grad_norm": 0.991919127065499, "learning_rate": 2.1713097932474586e-07, "loss": 0.1281, "step": 3349 }, { "epoch": 0.8731632619815594, "grad_norm": 0.9711589463652968, "learning_rate": 2.1625325726562009e-07, "loss": 0.1311, "step": 3350 }, { "epoch": 0.8734239077314045, "grad_norm": 1.0461379514271885, "learning_rate": 2.1537723263277883e-07, "loss": 0.1381, "step": 3351 }, { "epoch": 0.8736845534812498, "grad_norm": 1.0535473269826963, "learning_rate": 2.1450290607733903e-07, "loss": 0.1421, "step": 3352 }, { "epoch": 0.873945199231095, "grad_norm": 1.0259802316022275, "learning_rate": 2.136302782491559e-07, "loss": 0.1365, "step": 3353 }, { "epoch": 0.8742058449809402, "grad_norm": 1.0214596694080438, "learning_rate": 2.127593497968225e-07, "loss": 0.1226, "step": 3354 }, { "epoch": 0.8744664907307855, "grad_norm": 1.131379130104583, "learning_rate": 2.1189012136766774e-07, "loss": 0.1408, "step": 3355 }, { "epoch": 0.8747271364806307, "grad_norm": 0.9974686030841757, "learning_rate": 2.1102259360775895e-07, "loss": 0.1341, "step": 3356 }, { "epoch": 0.874987782230476, "grad_norm": 1.0122253937153516, "learning_rate": 2.1015676716189703e-07, "loss": 0.1342, "step": 3357 }, { "epoch": 0.8752484279803212, "grad_norm": 1.0101281251426018, "learning_rate": 2.0929264267361888e-07, "loss": 0.1373, "step": 3358 }, { "epoch": 0.8755090737301665, "grad_norm": 1.031337835698769, "learning_rate": 2.0843022078519848e-07, "loss": 0.1374, "step": 3359 }, { "epoch": 0.8757697194800117, "grad_norm": 1.032392635749291, "learning_rate": 2.07569502137642e-07, "loss": 0.1408, "step": 3360 }, { "epoch": 0.876030365229857, "grad_norm": 0.9786509930400092, "learning_rate": 2.067104873706907e-07, "loss": 0.1196, "step": 3361 }, { "epoch": 0.8762910109797022, "grad_norm": 1.0579246012750199, "learning_rate": 2.0585317712281866e-07, "loss": 0.1407, "step": 3362 }, { "epoch": 0.8765516567295475, "grad_norm": 1.0012603791486652, "learning_rate": 2.0499757203123416e-07, "loss": 0.1308, "step": 3363 }, { "epoch": 0.8768123024793927, "grad_norm": 1.0045411916850606, "learning_rate": 2.0414367273187713e-07, "loss": 0.1317, "step": 3364 }, { "epoch": 0.877072948229238, "grad_norm": 1.0045234407433812, "learning_rate": 2.032914798594196e-07, "loss": 0.1283, "step": 3365 }, { "epoch": 0.8773335939790832, "grad_norm": 1.0192689165525681, "learning_rate": 2.0244099404726686e-07, "loss": 0.1375, "step": 3366 }, { "epoch": 0.8775942397289285, "grad_norm": 1.0539493841385685, "learning_rate": 2.015922159275538e-07, "loss": 0.1498, "step": 3367 }, { "epoch": 0.8778548854787737, "grad_norm": 1.0187589147289915, "learning_rate": 2.007451461311455e-07, "loss": 0.1326, "step": 3368 }, { "epoch": 0.8781155312286189, "grad_norm": 1.1028505178963168, "learning_rate": 1.9989978528763915e-07, "loss": 0.1328, "step": 3369 }, { "epoch": 0.8783761769784642, "grad_norm": 1.0516957831274196, "learning_rate": 1.990561340253608e-07, "loss": 0.1369, "step": 3370 }, { "epoch": 0.8786368227283093, "grad_norm": 0.9824259509167337, "learning_rate": 1.9821419297136546e-07, "loss": 0.1186, "step": 3371 }, { "epoch": 0.8788974684781546, "grad_norm": 1.0396530079667843, "learning_rate": 1.9737396275143816e-07, "loss": 0.1357, "step": 3372 }, { "epoch": 0.8791581142279998, "grad_norm": 1.0641057652621235, "learning_rate": 1.9653544399009183e-07, "loss": 0.1451, "step": 3373 }, { "epoch": 0.8794187599778451, "grad_norm": 1.040701238986467, "learning_rate": 1.9569863731056544e-07, "loss": 0.1409, "step": 3374 }, { "epoch": 0.8796794057276903, "grad_norm": 1.0635103677700937, "learning_rate": 1.9486354333482866e-07, "loss": 0.1401, "step": 3375 }, { "epoch": 0.8799400514775356, "grad_norm": 1.0738876373710164, "learning_rate": 1.940301626835761e-07, "loss": 0.1357, "step": 3376 }, { "epoch": 0.8802006972273808, "grad_norm": 1.018852590792337, "learning_rate": 1.9319849597622954e-07, "loss": 0.1348, "step": 3377 }, { "epoch": 0.8804613429772261, "grad_norm": 1.0524909180026567, "learning_rate": 1.9236854383093668e-07, "loss": 0.1429, "step": 3378 }, { "epoch": 0.8807219887270713, "grad_norm": 1.0301190397305438, "learning_rate": 1.9154030686457077e-07, "loss": 0.1307, "step": 3379 }, { "epoch": 0.8809826344769166, "grad_norm": 0.9840182233292807, "learning_rate": 1.9071378569273047e-07, "loss": 0.1305, "step": 3380 }, { "epoch": 0.8812432802267618, "grad_norm": 1.028353715269271, "learning_rate": 1.8988898092973885e-07, "loss": 0.1321, "step": 3381 }, { "epoch": 0.8815039259766071, "grad_norm": 0.9743025740741776, "learning_rate": 1.8906589318864384e-07, "loss": 0.1239, "step": 3382 }, { "epoch": 0.8817645717264523, "grad_norm": 1.0779315530884948, "learning_rate": 1.882445230812166e-07, "loss": 0.1475, "step": 3383 }, { "epoch": 0.8820252174762975, "grad_norm": 0.9792599528763515, "learning_rate": 1.8742487121795188e-07, "loss": 0.1281, "step": 3384 }, { "epoch": 0.8822858632261428, "grad_norm": 1.046010862267601, "learning_rate": 1.866069382080671e-07, "loss": 0.1375, "step": 3385 }, { "epoch": 0.882546508975988, "grad_norm": 1.0075154032323426, "learning_rate": 1.8579072465950197e-07, "loss": 0.1356, "step": 3386 }, { "epoch": 0.8828071547258333, "grad_norm": 1.1002121818251636, "learning_rate": 1.849762311789191e-07, "loss": 0.1437, "step": 3387 }, { "epoch": 0.8830678004756785, "grad_norm": 1.049515720329721, "learning_rate": 1.8416345837170114e-07, "loss": 0.134, "step": 3388 }, { "epoch": 0.8833284462255238, "grad_norm": 1.0198256139776496, "learning_rate": 1.8335240684195427e-07, "loss": 0.1292, "step": 3389 }, { "epoch": 0.883589091975369, "grad_norm": 1.0252278000492578, "learning_rate": 1.8254307719250214e-07, "loss": 0.1296, "step": 3390 }, { "epoch": 0.8838497377252142, "grad_norm": 1.0488956665774616, "learning_rate": 1.8173547002489024e-07, "loss": 0.1382, "step": 3391 }, { "epoch": 0.8841103834750594, "grad_norm": 1.072937333384503, "learning_rate": 1.8092958593938476e-07, "loss": 0.1407, "step": 3392 }, { "epoch": 0.8843710292249047, "grad_norm": 1.0115564821733578, "learning_rate": 1.8012542553496964e-07, "loss": 0.1363, "step": 3393 }, { "epoch": 0.8846316749747499, "grad_norm": 0.9566512189258157, "learning_rate": 1.793229894093479e-07, "loss": 0.1237, "step": 3394 }, { "epoch": 0.8848923207245952, "grad_norm": 1.0133518127825958, "learning_rate": 1.7852227815894242e-07, "loss": 0.1235, "step": 3395 }, { "epoch": 0.8851529664744404, "grad_norm": 1.0439558522163568, "learning_rate": 1.7772329237889158e-07, "loss": 0.1399, "step": 3396 }, { "epoch": 0.8854136122242857, "grad_norm": 1.0452812496367954, "learning_rate": 1.769260326630526e-07, "loss": 0.1345, "step": 3397 }, { "epoch": 0.8856742579741309, "grad_norm": 1.055434239544034, "learning_rate": 1.7613049960400057e-07, "loss": 0.1336, "step": 3398 }, { "epoch": 0.8859349037239761, "grad_norm": 1.0042113927261962, "learning_rate": 1.753366937930262e-07, "loss": 0.1233, "step": 3399 }, { "epoch": 0.8861955494738214, "grad_norm": 1.021638440667988, "learning_rate": 1.7454461582013665e-07, "loss": 0.1305, "step": 3400 }, { "epoch": 0.8864561952236666, "grad_norm": 1.0675752626218222, "learning_rate": 1.7375426627405433e-07, "loss": 0.1448, "step": 3401 }, { "epoch": 0.8867168409735119, "grad_norm": 1.043440051792785, "learning_rate": 1.7296564574221796e-07, "loss": 0.1402, "step": 3402 }, { "epoch": 0.8869774867233571, "grad_norm": 1.0081471490521887, "learning_rate": 1.7217875481078034e-07, "loss": 0.1313, "step": 3403 }, { "epoch": 0.8872381324732024, "grad_norm": 1.0123597513507827, "learning_rate": 1.7139359406460888e-07, "loss": 0.1368, "step": 3404 }, { "epoch": 0.8874987782230476, "grad_norm": 1.0402694981511154, "learning_rate": 1.7061016408728537e-07, "loss": 0.1347, "step": 3405 }, { "epoch": 0.8877594239728929, "grad_norm": 0.9828729586447071, "learning_rate": 1.698284654611057e-07, "loss": 0.1281, "step": 3406 }, { "epoch": 0.8880200697227381, "grad_norm": 1.0673773530099637, "learning_rate": 1.6904849876707653e-07, "loss": 0.1444, "step": 3407 }, { "epoch": 0.8882807154725834, "grad_norm": 1.0491185929488964, "learning_rate": 1.6827026458492018e-07, "loss": 0.1371, "step": 3408 }, { "epoch": 0.8885413612224285, "grad_norm": 1.0557417291037903, "learning_rate": 1.6749376349306957e-07, "loss": 0.1373, "step": 3409 }, { "epoch": 0.8888020069722739, "grad_norm": 1.0126884065386383, "learning_rate": 1.6671899606866966e-07, "loss": 0.1408, "step": 3410 }, { "epoch": 0.889062652722119, "grad_norm": 1.0404255685982855, "learning_rate": 1.6594596288757702e-07, "loss": 0.1266, "step": 3411 }, { "epoch": 0.8893232984719643, "grad_norm": 0.9947444112223235, "learning_rate": 1.651746645243596e-07, "loss": 0.1291, "step": 3412 }, { "epoch": 0.8895839442218095, "grad_norm": 1.0468464337534147, "learning_rate": 1.6440510155229544e-07, "loss": 0.137, "step": 3413 }, { "epoch": 0.8898445899716547, "grad_norm": 1.039834656168193, "learning_rate": 1.6363727454337202e-07, "loss": 0.1428, "step": 3414 }, { "epoch": 0.8901052357215, "grad_norm": 1.0200885066356908, "learning_rate": 1.6287118406828868e-07, "loss": 0.1368, "step": 3415 }, { "epoch": 0.8903658814713452, "grad_norm": 1.0018138269571744, "learning_rate": 1.6210683069645212e-07, "loss": 0.1253, "step": 3416 }, { "epoch": 0.8906265272211905, "grad_norm": 1.0135529937932095, "learning_rate": 1.6134421499597874e-07, "loss": 0.1353, "step": 3417 }, { "epoch": 0.8908871729710357, "grad_norm": 1.030739340850227, "learning_rate": 1.6058333753369265e-07, "loss": 0.1422, "step": 3418 }, { "epoch": 0.891147818720881, "grad_norm": 1.0364948765463964, "learning_rate": 1.59824198875127e-07, "loss": 0.1325, "step": 3419 }, { "epoch": 0.8914084644707262, "grad_norm": 1.040957176554188, "learning_rate": 1.5906679958452215e-07, "loss": 0.1371, "step": 3420 }, { "epoch": 0.8916691102205715, "grad_norm": 1.1218476451792034, "learning_rate": 1.583111402248247e-07, "loss": 0.1472, "step": 3421 }, { "epoch": 0.8919297559704167, "grad_norm": 1.0345938943845185, "learning_rate": 1.5755722135769036e-07, "loss": 0.1349, "step": 3422 }, { "epoch": 0.892190401720262, "grad_norm": 1.0375464141940578, "learning_rate": 1.5680504354347868e-07, "loss": 0.1357, "step": 3423 }, { "epoch": 0.8924510474701072, "grad_norm": 1.0624509987312094, "learning_rate": 1.5605460734125582e-07, "loss": 0.1375, "step": 3424 }, { "epoch": 0.8927116932199525, "grad_norm": 1.116848715502364, "learning_rate": 1.5530591330879502e-07, "loss": 0.1444, "step": 3425 }, { "epoch": 0.8929723389697977, "grad_norm": 1.0490876012026864, "learning_rate": 1.545589620025731e-07, "loss": 0.1447, "step": 3426 }, { "epoch": 0.893232984719643, "grad_norm": 0.9983799201193844, "learning_rate": 1.5381375397777122e-07, "loss": 0.1269, "step": 3427 }, { "epoch": 0.8934936304694882, "grad_norm": 1.0579399868276267, "learning_rate": 1.530702897882766e-07, "loss": 0.1333, "step": 3428 }, { "epoch": 0.8937542762193333, "grad_norm": 0.9502690616638636, "learning_rate": 1.5232856998667862e-07, "loss": 0.1233, "step": 3429 }, { "epoch": 0.8940149219691786, "grad_norm": 1.0368823639839433, "learning_rate": 1.5158859512427044e-07, "loss": 0.1287, "step": 3430 }, { "epoch": 0.8942755677190238, "grad_norm": 1.022555297715405, "learning_rate": 1.5085036575104906e-07, "loss": 0.1403, "step": 3431 }, { "epoch": 0.8945362134688691, "grad_norm": 1.0727382078520935, "learning_rate": 1.501138824157139e-07, "loss": 0.1472, "step": 3432 }, { "epoch": 0.8947968592187143, "grad_norm": 1.022685825955965, "learning_rate": 1.4937914566566596e-07, "loss": 0.1318, "step": 3433 }, { "epoch": 0.8950575049685596, "grad_norm": 1.0359750530093563, "learning_rate": 1.4864615604700872e-07, "loss": 0.1342, "step": 3434 }, { "epoch": 0.8953181507184048, "grad_norm": 1.0449561344971663, "learning_rate": 1.4791491410454633e-07, "loss": 0.1378, "step": 3435 }, { "epoch": 0.8955787964682501, "grad_norm": 1.0594760338215938, "learning_rate": 1.471854203817852e-07, "loss": 0.1457, "step": 3436 }, { "epoch": 0.8958394422180953, "grad_norm": 1.0289456925281493, "learning_rate": 1.4645767542093042e-07, "loss": 0.1406, "step": 3437 }, { "epoch": 0.8961000879679406, "grad_norm": 1.0254808852594772, "learning_rate": 1.4573167976288994e-07, "loss": 0.136, "step": 3438 }, { "epoch": 0.8963607337177858, "grad_norm": 1.0814068294028545, "learning_rate": 1.4500743394726956e-07, "loss": 0.1519, "step": 3439 }, { "epoch": 0.8966213794676311, "grad_norm": 1.0262084354641112, "learning_rate": 1.4428493851237403e-07, "loss": 0.1296, "step": 3440 }, { "epoch": 0.8968820252174763, "grad_norm": 1.0186312302155949, "learning_rate": 1.4356419399520928e-07, "loss": 0.1426, "step": 3441 }, { "epoch": 0.8971426709673216, "grad_norm": 1.0303539258198802, "learning_rate": 1.4284520093147818e-07, "loss": 0.1345, "step": 3442 }, { "epoch": 0.8974033167171668, "grad_norm": 1.0459883938506798, "learning_rate": 1.421279598555822e-07, "loss": 0.1425, "step": 3443 }, { "epoch": 0.8976639624670121, "grad_norm": 1.039804571457154, "learning_rate": 1.4141247130062069e-07, "loss": 0.14, "step": 3444 }, { "epoch": 0.8979246082168573, "grad_norm": 0.9831514441662857, "learning_rate": 1.4069873579839023e-07, "loss": 0.1343, "step": 3445 }, { "epoch": 0.8981852539667025, "grad_norm": 1.0239054617820427, "learning_rate": 1.3998675387938476e-07, "loss": 0.138, "step": 3446 }, { "epoch": 0.8984458997165478, "grad_norm": 0.9505994281625351, "learning_rate": 1.3927652607279434e-07, "loss": 0.1168, "step": 3447 }, { "epoch": 0.898706545466393, "grad_norm": 0.9632866072686116, "learning_rate": 1.3856805290650644e-07, "loss": 0.1291, "step": 3448 }, { "epoch": 0.8989671912162382, "grad_norm": 1.1232304887716946, "learning_rate": 1.3786133490710284e-07, "loss": 0.1568, "step": 3449 }, { "epoch": 0.8992278369660834, "grad_norm": 1.0330287301525987, "learning_rate": 1.3715637259986147e-07, "loss": 0.143, "step": 3450 }, { "epoch": 0.8994884827159287, "grad_norm": 0.9937880403559202, "learning_rate": 1.364531665087554e-07, "loss": 0.1285, "step": 3451 }, { "epoch": 0.8997491284657739, "grad_norm": 1.007686239228357, "learning_rate": 1.3575171715645235e-07, "loss": 0.1319, "step": 3452 }, { "epoch": 0.9000097742156192, "grad_norm": 1.084929445499813, "learning_rate": 1.350520250643142e-07, "loss": 0.1463, "step": 3453 }, { "epoch": 0.9002704199654644, "grad_norm": 1.044282390324297, "learning_rate": 1.343540907523963e-07, "loss": 0.1444, "step": 3454 }, { "epoch": 0.9005310657153097, "grad_norm": 1.0371357408955586, "learning_rate": 1.3365791473944928e-07, "loss": 0.1352, "step": 3455 }, { "epoch": 0.9007917114651549, "grad_norm": 1.06054810087059, "learning_rate": 1.3296349754291366e-07, "loss": 0.1427, "step": 3456 }, { "epoch": 0.9010523572150002, "grad_norm": 1.0710866191493416, "learning_rate": 1.32270839678926e-07, "loss": 0.1501, "step": 3457 }, { "epoch": 0.9013130029648454, "grad_norm": 1.0241930387570142, "learning_rate": 1.3157994166231342e-07, "loss": 0.1322, "step": 3458 }, { "epoch": 0.9015736487146907, "grad_norm": 1.0239123975502924, "learning_rate": 1.3089080400659538e-07, "loss": 0.1303, "step": 3459 }, { "epoch": 0.9018342944645359, "grad_norm": 1.0282767877465733, "learning_rate": 1.3020342722398273e-07, "loss": 0.1312, "step": 3460 }, { "epoch": 0.9020949402143811, "grad_norm": 1.0317698933451915, "learning_rate": 1.295178118253787e-07, "loss": 0.1376, "step": 3461 }, { "epoch": 0.9023555859642264, "grad_norm": 1.0213364089759402, "learning_rate": 1.2883395832037516e-07, "loss": 0.1413, "step": 3462 }, { "epoch": 0.9026162317140716, "grad_norm": 1.0254497409399437, "learning_rate": 1.2815186721725603e-07, "loss": 0.146, "step": 3463 }, { "epoch": 0.9028768774639169, "grad_norm": 1.0630133079169044, "learning_rate": 1.2747153902299524e-07, "loss": 0.1425, "step": 3464 }, { "epoch": 0.9031375232137621, "grad_norm": 1.0636050080970403, "learning_rate": 1.2679297424325608e-07, "loss": 0.1337, "step": 3465 }, { "epoch": 0.9033981689636074, "grad_norm": 0.9947121691256213, "learning_rate": 1.2611617338239097e-07, "loss": 0.1269, "step": 3466 }, { "epoch": 0.9036588147134526, "grad_norm": 1.0435952326061186, "learning_rate": 1.254411369434419e-07, "loss": 0.1416, "step": 3467 }, { "epoch": 0.9039194604632979, "grad_norm": 0.997565200134285, "learning_rate": 1.247678654281384e-07, "loss": 0.1299, "step": 3468 }, { "epoch": 0.904180106213143, "grad_norm": 1.0986656718289451, "learning_rate": 1.2409635933689934e-07, "loss": 0.135, "step": 3469 }, { "epoch": 0.9044407519629883, "grad_norm": 1.0503012911556455, "learning_rate": 1.234266191688302e-07, "loss": 0.1467, "step": 3470 }, { "epoch": 0.9047013977128335, "grad_norm": 0.947612384184128, "learning_rate": 1.2275864542172556e-07, "loss": 0.1288, "step": 3471 }, { "epoch": 0.9049620434626788, "grad_norm": 1.050379233802157, "learning_rate": 1.2209243859206577e-07, "loss": 0.1413, "step": 3472 }, { "epoch": 0.905222689212524, "grad_norm": 0.9842921432305909, "learning_rate": 1.214279991750178e-07, "loss": 0.1186, "step": 3473 }, { "epoch": 0.9054833349623693, "grad_norm": 1.014883734674535, "learning_rate": 1.2076532766443578e-07, "loss": 0.1317, "step": 3474 }, { "epoch": 0.9057439807122145, "grad_norm": 0.9900008401251662, "learning_rate": 1.201044245528596e-07, "loss": 0.1236, "step": 3475 }, { "epoch": 0.9060046264620597, "grad_norm": 1.0527758469914914, "learning_rate": 1.1944529033151436e-07, "loss": 0.1366, "step": 3476 }, { "epoch": 0.906265272211905, "grad_norm": 1.0140364240816475, "learning_rate": 1.1878792549031071e-07, "loss": 0.1232, "step": 3477 }, { "epoch": 0.9065259179617502, "grad_norm": 0.9898567164592422, "learning_rate": 1.1813233051784417e-07, "loss": 0.1337, "step": 3478 }, { "epoch": 0.9067865637115955, "grad_norm": 0.951685398893153, "learning_rate": 1.1747850590139497e-07, "loss": 0.1276, "step": 3479 }, { "epoch": 0.9070472094614407, "grad_norm": 1.0340844436466048, "learning_rate": 1.1682645212692634e-07, "loss": 0.1351, "step": 3480 }, { "epoch": 0.907307855211286, "grad_norm": 1.0734059811798362, "learning_rate": 1.1617616967908751e-07, "loss": 0.152, "step": 3481 }, { "epoch": 0.9075685009611312, "grad_norm": 1.0621037869255667, "learning_rate": 1.1552765904120966e-07, "loss": 0.1307, "step": 3482 }, { "epoch": 0.9078291467109765, "grad_norm": 1.0543289050706155, "learning_rate": 1.1488092069530666e-07, "loss": 0.1364, "step": 3483 }, { "epoch": 0.9080897924608217, "grad_norm": 1.0822438142101372, "learning_rate": 1.1423595512207591e-07, "loss": 0.1316, "step": 3484 }, { "epoch": 0.908350438210667, "grad_norm": 1.0280339426148468, "learning_rate": 1.1359276280089731e-07, "loss": 0.1289, "step": 3485 }, { "epoch": 0.9086110839605122, "grad_norm": 0.9847601613063435, "learning_rate": 1.1295134420983179e-07, "loss": 0.1296, "step": 3486 }, { "epoch": 0.9088717297103575, "grad_norm": 1.0693207231910067, "learning_rate": 1.1231169982562324e-07, "loss": 0.1387, "step": 3487 }, { "epoch": 0.9091323754602026, "grad_norm": 1.019371700545764, "learning_rate": 1.1167383012369609e-07, "loss": 0.1274, "step": 3488 }, { "epoch": 0.909393021210048, "grad_norm": 0.9883245273316015, "learning_rate": 1.1103773557815528e-07, "loss": 0.1344, "step": 3489 }, { "epoch": 0.9096536669598931, "grad_norm": 1.0209112680720247, "learning_rate": 1.1040341666178733e-07, "loss": 0.1278, "step": 3490 }, { "epoch": 0.9099143127097383, "grad_norm": 1.0719061227452695, "learning_rate": 1.0977087384605845e-07, "loss": 0.1401, "step": 3491 }, { "epoch": 0.9101749584595836, "grad_norm": 1.0906761535207157, "learning_rate": 1.0914010760111476e-07, "loss": 0.1532, "step": 3492 }, { "epoch": 0.9104356042094288, "grad_norm": 0.9866662221024926, "learning_rate": 1.0851111839578182e-07, "loss": 0.1261, "step": 3493 }, { "epoch": 0.9106962499592741, "grad_norm": 1.0120749897168315, "learning_rate": 1.078839066975651e-07, "loss": 0.1346, "step": 3494 }, { "epoch": 0.9109568957091193, "grad_norm": 1.0654407306992462, "learning_rate": 1.072584729726478e-07, "loss": 0.1286, "step": 3495 }, { "epoch": 0.9112175414589646, "grad_norm": 1.0464588995573376, "learning_rate": 1.0663481768589196e-07, "loss": 0.1345, "step": 3496 }, { "epoch": 0.9114781872088098, "grad_norm": 0.9813120659145919, "learning_rate": 1.0601294130083845e-07, "loss": 0.1302, "step": 3497 }, { "epoch": 0.9117388329586551, "grad_norm": 1.0594777214545217, "learning_rate": 1.0539284427970559e-07, "loss": 0.1433, "step": 3498 }, { "epoch": 0.9119994787085003, "grad_norm": 1.0441532343145963, "learning_rate": 1.0477452708338886e-07, "loss": 0.1456, "step": 3499 }, { "epoch": 0.9122601244583456, "grad_norm": 0.9703383188925891, "learning_rate": 1.0415799017146094e-07, "loss": 0.13, "step": 3500 }, { "epoch": 0.9122601244583456, "eval_loss": 0.13526123762130737, "eval_runtime": 55.3326, "eval_samples_per_second": 44.838, "eval_steps_per_second": 5.621, "step": 3500 }, { "epoch": 0.9125207702081908, "grad_norm": 1.0376832168098484, "learning_rate": 1.0354323400217136e-07, "loss": 0.1447, "step": 3501 }, { "epoch": 0.9127814159580361, "grad_norm": 1.0288847148379714, "learning_rate": 1.029302590324463e-07, "loss": 0.1367, "step": 3502 }, { "epoch": 0.9130420617078813, "grad_norm": 1.0414192007249634, "learning_rate": 1.0231906571788769e-07, "loss": 0.1359, "step": 3503 }, { "epoch": 0.9133027074577266, "grad_norm": 1.0249607728029364, "learning_rate": 1.0170965451277382e-07, "loss": 0.1406, "step": 3504 }, { "epoch": 0.9135633532075718, "grad_norm": 0.9920138899020396, "learning_rate": 1.0110202587005763e-07, "loss": 0.1341, "step": 3505 }, { "epoch": 0.913823998957417, "grad_norm": 1.0615256923679999, "learning_rate": 1.0049618024136703e-07, "loss": 0.1335, "step": 3506 }, { "epoch": 0.9140846447072623, "grad_norm": 0.9888232085787176, "learning_rate": 9.9892118077006e-08, "loss": 0.1246, "step": 3507 }, { "epoch": 0.9143452904571074, "grad_norm": 1.0694324381839553, "learning_rate": 9.928983982595175e-08, "loss": 0.1486, "step": 3508 }, { "epoch": 0.9146059362069527, "grad_norm": 1.0279181626658325, "learning_rate": 9.868934593585594e-08, "loss": 0.1359, "step": 3509 }, { "epoch": 0.9148665819567979, "grad_norm": 1.034421740030866, "learning_rate": 9.809063685304376e-08, "loss": 0.1333, "step": 3510 }, { "epoch": 0.9151272277066432, "grad_norm": 1.0744470653511857, "learning_rate": 9.749371302251398e-08, "loss": 0.1432, "step": 3511 }, { "epoch": 0.9153878734564884, "grad_norm": 0.9774248950258813, "learning_rate": 9.689857488793891e-08, "loss": 0.1283, "step": 3512 }, { "epoch": 0.9156485192063337, "grad_norm": 1.0400882716818902, "learning_rate": 9.63052228916625e-08, "loss": 0.1404, "step": 3513 }, { "epoch": 0.9159091649561789, "grad_norm": 1.0060709734184379, "learning_rate": 9.571365747470251e-08, "loss": 0.1411, "step": 3514 }, { "epoch": 0.9161698107060242, "grad_norm": 1.0267442372314344, "learning_rate": 9.51238790767478e-08, "loss": 0.1277, "step": 3515 }, { "epoch": 0.9164304564558694, "grad_norm": 0.9654763334707889, "learning_rate": 9.45358881361591e-08, "loss": 0.1253, "step": 3516 }, { "epoch": 0.9166911022057147, "grad_norm": 0.994621674897337, "learning_rate": 9.394968508996905e-08, "loss": 0.1387, "step": 3517 }, { "epoch": 0.9169517479555599, "grad_norm": 1.0135532225723511, "learning_rate": 9.336527037388132e-08, "loss": 0.1271, "step": 3518 }, { "epoch": 0.9172123937054052, "grad_norm": 1.058482878364632, "learning_rate": 9.278264442226931e-08, "loss": 0.1446, "step": 3519 }, { "epoch": 0.9174730394552504, "grad_norm": 1.0539120410257758, "learning_rate": 9.22018076681791e-08, "loss": 0.1371, "step": 3520 }, { "epoch": 0.9177336852050956, "grad_norm": 0.9872556942180936, "learning_rate": 9.162276054332509e-08, "loss": 0.1277, "step": 3521 }, { "epoch": 0.9179943309549409, "grad_norm": 1.0114393008589366, "learning_rate": 9.10455034780916e-08, "loss": 0.1333, "step": 3522 }, { "epoch": 0.9182549767047861, "grad_norm": 1.000148282516362, "learning_rate": 9.047003690153322e-08, "loss": 0.1373, "step": 3523 }, { "epoch": 0.9185156224546314, "grad_norm": 1.0103436844004139, "learning_rate": 8.989636124137363e-08, "loss": 0.1344, "step": 3524 }, { "epoch": 0.9187762682044766, "grad_norm": 1.0140429534256785, "learning_rate": 8.932447692400509e-08, "loss": 0.1302, "step": 3525 }, { "epoch": 0.9190369139543219, "grad_norm": 0.9874485314958994, "learning_rate": 8.875438437448813e-08, "loss": 0.1328, "step": 3526 }, { "epoch": 0.919297559704167, "grad_norm": 1.0187005973335785, "learning_rate": 8.818608401655243e-08, "loss": 0.1341, "step": 3527 }, { "epoch": 0.9195582054540123, "grad_norm": 1.013270994853351, "learning_rate": 8.761957627259509e-08, "loss": 0.1342, "step": 3528 }, { "epoch": 0.9198188512038575, "grad_norm": 1.0313455993287752, "learning_rate": 8.705486156367987e-08, "loss": 0.1299, "step": 3529 }, { "epoch": 0.9200794969537028, "grad_norm": 1.0538617669665602, "learning_rate": 8.649194030953989e-08, "loss": 0.1276, "step": 3530 }, { "epoch": 0.920340142703548, "grad_norm": 1.0600185425257531, "learning_rate": 8.59308129285738e-08, "loss": 0.15, "step": 3531 }, { "epoch": 0.9206007884533933, "grad_norm": 0.9826585261953865, "learning_rate": 8.537147983784716e-08, "loss": 0.1247, "step": 3532 }, { "epoch": 0.9208614342032385, "grad_norm": 0.9988416340333064, "learning_rate": 8.481394145309213e-08, "loss": 0.1367, "step": 3533 }, { "epoch": 0.9211220799530838, "grad_norm": 1.00168581737741, "learning_rate": 8.425819818870667e-08, "loss": 0.1373, "step": 3534 }, { "epoch": 0.921382725702929, "grad_norm": 1.0825744698431554, "learning_rate": 8.370425045775454e-08, "loss": 0.1486, "step": 3535 }, { "epoch": 0.9216433714527742, "grad_norm": 1.0020662973419197, "learning_rate": 8.3152098671965e-08, "loss": 0.1225, "step": 3536 }, { "epoch": 0.9219040172026195, "grad_norm": 0.9845704451611995, "learning_rate": 8.260174324173336e-08, "loss": 0.1336, "step": 3537 }, { "epoch": 0.9221646629524647, "grad_norm": 1.0316828429497216, "learning_rate": 8.205318457611772e-08, "loss": 0.1327, "step": 3538 }, { "epoch": 0.92242530870231, "grad_norm": 1.0151132909872662, "learning_rate": 8.150642308284218e-08, "loss": 0.1316, "step": 3539 }, { "epoch": 0.9226859544521552, "grad_norm": 0.9967898846928351, "learning_rate": 8.096145916829529e-08, "loss": 0.1394, "step": 3540 }, { "epoch": 0.9229466002020005, "grad_norm": 0.9779293096874706, "learning_rate": 8.041829323752858e-08, "loss": 0.1284, "step": 3541 }, { "epoch": 0.9232072459518457, "grad_norm": 0.997049729176109, "learning_rate": 7.98769256942572e-08, "loss": 0.1328, "step": 3542 }, { "epoch": 0.923467891701691, "grad_norm": 1.043701778102716, "learning_rate": 7.933735694086147e-08, "loss": 0.1371, "step": 3543 }, { "epoch": 0.9237285374515362, "grad_norm": 1.0629327238771353, "learning_rate": 7.879958737838201e-08, "loss": 0.1444, "step": 3544 }, { "epoch": 0.9239891832013815, "grad_norm": 1.0426622170664837, "learning_rate": 7.826361740652321e-08, "loss": 0.1293, "step": 3545 }, { "epoch": 0.9242498289512266, "grad_norm": 1.0450985598969036, "learning_rate": 7.77294474236534e-08, "loss": 0.1269, "step": 3546 }, { "epoch": 0.924510474701072, "grad_norm": 1.006076527492885, "learning_rate": 7.719707782680108e-08, "loss": 0.1306, "step": 3547 }, { "epoch": 0.9247711204509171, "grad_norm": 1.1561580753846084, "learning_rate": 7.66665090116575e-08, "loss": 0.1393, "step": 3548 }, { "epoch": 0.9250317662007624, "grad_norm": 1.019556650901964, "learning_rate": 7.613774137257502e-08, "loss": 0.1426, "step": 3549 }, { "epoch": 0.9252924119506076, "grad_norm": 0.9809585768289977, "learning_rate": 7.561077530256755e-08, "loss": 0.1227, "step": 3550 }, { "epoch": 0.9255530577004528, "grad_norm": 1.0808366432819665, "learning_rate": 7.508561119331014e-08, "loss": 0.1393, "step": 3551 }, { "epoch": 0.9258137034502981, "grad_norm": 1.0462003597863274, "learning_rate": 7.456224943513779e-08, "loss": 0.1328, "step": 3552 }, { "epoch": 0.9260743492001433, "grad_norm": 1.0219742475515758, "learning_rate": 7.404069041704682e-08, "loss": 0.1239, "step": 3553 }, { "epoch": 0.9263349949499886, "grad_norm": 1.0839954555317433, "learning_rate": 7.352093452669324e-08, "loss": 0.1441, "step": 3554 }, { "epoch": 0.9265956406998338, "grad_norm": 1.0273039043391048, "learning_rate": 7.30029821503922e-08, "loss": 0.1299, "step": 3555 }, { "epoch": 0.9268562864496791, "grad_norm": 1.036006108458096, "learning_rate": 7.248683367311937e-08, "loss": 0.1362, "step": 3556 }, { "epoch": 0.9271169321995243, "grad_norm": 0.9953864361783246, "learning_rate": 7.1972489478509e-08, "loss": 0.1248, "step": 3557 }, { "epoch": 0.9273775779493696, "grad_norm": 1.0416240545252868, "learning_rate": 7.145994994885497e-08, "loss": 0.1341, "step": 3558 }, { "epoch": 0.9276382236992148, "grad_norm": 1.0379077762266653, "learning_rate": 7.094921546510896e-08, "loss": 0.1493, "step": 3559 }, { "epoch": 0.9278988694490601, "grad_norm": 1.067633859700405, "learning_rate": 7.044028640688122e-08, "loss": 0.1392, "step": 3560 }, { "epoch": 0.9281595151989053, "grad_norm": 1.0253320800895225, "learning_rate": 6.993316315244081e-08, "loss": 0.1262, "step": 3561 }, { "epoch": 0.9284201609487506, "grad_norm": 0.9998511186946533, "learning_rate": 6.942784607871373e-08, "loss": 0.1301, "step": 3562 }, { "epoch": 0.9286808066985958, "grad_norm": 1.0798336058337545, "learning_rate": 6.892433556128425e-08, "loss": 0.1388, "step": 3563 }, { "epoch": 0.9289414524484411, "grad_norm": 1.0554889039573419, "learning_rate": 6.842263197439303e-08, "loss": 0.1381, "step": 3564 }, { "epoch": 0.9292020981982863, "grad_norm": 1.0417481350205045, "learning_rate": 6.792273569093843e-08, "loss": 0.1351, "step": 3565 }, { "epoch": 0.9294627439481314, "grad_norm": 1.0246372615829635, "learning_rate": 6.742464708247548e-08, "loss": 0.1315, "step": 3566 }, { "epoch": 0.9297233896979767, "grad_norm": 1.0590715216600777, "learning_rate": 6.692836651921498e-08, "loss": 0.1448, "step": 3567 }, { "epoch": 0.9299840354478219, "grad_norm": 1.0799803792986893, "learning_rate": 6.643389437002462e-08, "loss": 0.1473, "step": 3568 }, { "epoch": 0.9302446811976672, "grad_norm": 1.0144859618591338, "learning_rate": 6.594123100242738e-08, "loss": 0.1416, "step": 3569 }, { "epoch": 0.9305053269475124, "grad_norm": 1.0475479612740177, "learning_rate": 6.545037678260257e-08, "loss": 0.1361, "step": 3570 }, { "epoch": 0.9307659726973577, "grad_norm": 1.1047864295767278, "learning_rate": 6.496133207538392e-08, "loss": 0.1494, "step": 3571 }, { "epoch": 0.9310266184472029, "grad_norm": 1.0321020741043438, "learning_rate": 6.447409724426063e-08, "loss": 0.1347, "step": 3572 }, { "epoch": 0.9312872641970482, "grad_norm": 1.0087333663132938, "learning_rate": 6.398867265137693e-08, "loss": 0.1352, "step": 3573 }, { "epoch": 0.9315479099468934, "grad_norm": 1.046086801750974, "learning_rate": 6.350505865753142e-08, "loss": 0.1357, "step": 3574 }, { "epoch": 0.9318085556967387, "grad_norm": 0.9872585263400669, "learning_rate": 6.302325562217632e-08, "loss": 0.1262, "step": 3575 }, { "epoch": 0.9320692014465839, "grad_norm": 1.004682169125181, "learning_rate": 6.254326390341958e-08, "loss": 0.1315, "step": 3576 }, { "epoch": 0.9323298471964292, "grad_norm": 1.0234080054596086, "learning_rate": 6.206508385802057e-08, "loss": 0.1249, "step": 3577 }, { "epoch": 0.9325904929462744, "grad_norm": 1.071631671425825, "learning_rate": 6.158871584139303e-08, "loss": 0.1464, "step": 3578 }, { "epoch": 0.9328511386961197, "grad_norm": 0.9851367400991176, "learning_rate": 6.111416020760513e-08, "loss": 0.1343, "step": 3579 }, { "epoch": 0.9331117844459649, "grad_norm": 0.9861785949324284, "learning_rate": 6.06414173093764e-08, "loss": 0.1231, "step": 3580 }, { "epoch": 0.9333724301958101, "grad_norm": 0.9399288223577089, "learning_rate": 6.017048749807935e-08, "loss": 0.1127, "step": 3581 }, { "epoch": 0.9336330759456554, "grad_norm": 1.0248439669065388, "learning_rate": 5.970137112373903e-08, "loss": 0.1392, "step": 3582 }, { "epoch": 0.9338937216955006, "grad_norm": 1.0264934721503716, "learning_rate": 5.923406853503288e-08, "loss": 0.1376, "step": 3583 }, { "epoch": 0.9341543674453459, "grad_norm": 1.016054961274228, "learning_rate": 5.876858007929004e-08, "loss": 0.1371, "step": 3584 }, { "epoch": 0.934415013195191, "grad_norm": 1.067776394167892, "learning_rate": 5.830490610249068e-08, "loss": 0.1357, "step": 3585 }, { "epoch": 0.9346756589450363, "grad_norm": 1.0009131480360036, "learning_rate": 5.784304694926773e-08, "loss": 0.1323, "step": 3586 }, { "epoch": 0.9349363046948815, "grad_norm": 1.033454420045262, "learning_rate": 5.738300296290411e-08, "loss": 0.1419, "step": 3587 }, { "epoch": 0.9351969504447268, "grad_norm": 1.0025556073711595, "learning_rate": 5.692477448533351e-08, "loss": 0.1306, "step": 3588 }, { "epoch": 0.935457596194572, "grad_norm": 1.0846026108653721, "learning_rate": 5.6468361857140716e-08, "loss": 0.1473, "step": 3589 }, { "epoch": 0.9357182419444173, "grad_norm": 0.972870775080033, "learning_rate": 5.601376541756076e-08, "loss": 0.1332, "step": 3590 }, { "epoch": 0.9359788876942625, "grad_norm": 1.011115452108713, "learning_rate": 5.5560985504478916e-08, "loss": 0.1364, "step": 3591 }, { "epoch": 0.9362395334441078, "grad_norm": 0.9879437379387968, "learning_rate": 5.511002245442987e-08, "loss": 0.1204, "step": 3592 }, { "epoch": 0.936500179193953, "grad_norm": 1.0140036118195237, "learning_rate": 5.4660876602598e-08, "loss": 0.132, "step": 3593 }, { "epoch": 0.9367608249437983, "grad_norm": 1.014740388116095, "learning_rate": 5.4213548282817664e-08, "loss": 0.1404, "step": 3594 }, { "epoch": 0.9370214706936435, "grad_norm": 1.0910514222222014, "learning_rate": 5.376803782757095e-08, "loss": 0.1442, "step": 3595 }, { "epoch": 0.9372821164434887, "grad_norm": 1.0195721344907136, "learning_rate": 5.3324345567990485e-08, "loss": 0.1276, "step": 3596 }, { "epoch": 0.937542762193334, "grad_norm": 0.9976631913083124, "learning_rate": 5.288247183385664e-08, "loss": 0.1331, "step": 3597 }, { "epoch": 0.9378034079431792, "grad_norm": 1.0133849898936322, "learning_rate": 5.244241695359753e-08, "loss": 0.1276, "step": 3598 }, { "epoch": 0.9380640536930245, "grad_norm": 1.0041829195574885, "learning_rate": 5.200418125429069e-08, "loss": 0.1284, "step": 3599 }, { "epoch": 0.9383246994428697, "grad_norm": 1.006876239155681, "learning_rate": 5.156776506166028e-08, "loss": 0.1302, "step": 3600 }, { "epoch": 0.938585345192715, "grad_norm": 1.0306183058113634, "learning_rate": 5.1133168700078785e-08, "loss": 0.1285, "step": 3601 }, { "epoch": 0.9388459909425602, "grad_norm": 1.1043950396856226, "learning_rate": 5.070039249256642e-08, "loss": 0.1475, "step": 3602 }, { "epoch": 0.9391066366924055, "grad_norm": 0.9952842544392068, "learning_rate": 5.026943676078977e-08, "loss": 0.1241, "step": 3603 }, { "epoch": 0.9393672824422507, "grad_norm": 1.0166446008120196, "learning_rate": 4.984030182506233e-08, "loss": 0.1362, "step": 3604 }, { "epoch": 0.939627928192096, "grad_norm": 1.0011024701455353, "learning_rate": 4.94129880043448e-08, "loss": 0.1334, "step": 3605 }, { "epoch": 0.9398885739419411, "grad_norm": 0.935086881096917, "learning_rate": 4.89874956162445e-08, "loss": 0.1244, "step": 3606 }, { "epoch": 0.9401492196917864, "grad_norm": 1.0783900638021577, "learning_rate": 4.856382497701373e-08, "loss": 0.1402, "step": 3607 }, { "epoch": 0.9404098654416316, "grad_norm": 1.017395645330172, "learning_rate": 4.814197640155199e-08, "loss": 0.1287, "step": 3608 }, { "epoch": 0.9406705111914769, "grad_norm": 1.0806298113060342, "learning_rate": 4.772195020340431e-08, "loss": 0.1433, "step": 3609 }, { "epoch": 0.9409311569413221, "grad_norm": 1.0383554766520486, "learning_rate": 4.73037466947604e-08, "loss": 0.1449, "step": 3610 }, { "epoch": 0.9411918026911673, "grad_norm": 1.002724631528354, "learning_rate": 4.6887366186456065e-08, "loss": 0.1308, "step": 3611 }, { "epoch": 0.9414524484410126, "grad_norm": 0.9981005797160374, "learning_rate": 4.64728089879718e-08, "loss": 0.1285, "step": 3612 }, { "epoch": 0.9417130941908578, "grad_norm": 0.9994375810790456, "learning_rate": 4.606007540743307e-08, "loss": 0.1255, "step": 3613 }, { "epoch": 0.9419737399407031, "grad_norm": 0.9817323375923956, "learning_rate": 4.564916575160977e-08, "loss": 0.124, "step": 3614 }, { "epoch": 0.9422343856905483, "grad_norm": 1.0458153735622233, "learning_rate": 4.52400803259162e-08, "loss": 0.1416, "step": 3615 }, { "epoch": 0.9424950314403936, "grad_norm": 1.0598074035313043, "learning_rate": 4.4832819434410535e-08, "loss": 0.14, "step": 3616 }, { "epoch": 0.9427556771902388, "grad_norm": 1.0525308805313345, "learning_rate": 4.442738337979535e-08, "loss": 0.1366, "step": 3617 }, { "epoch": 0.9430163229400841, "grad_norm": 1.1024732360021703, "learning_rate": 4.402377246341627e-08, "loss": 0.1425, "step": 3618 }, { "epoch": 0.9432769686899293, "grad_norm": 1.0359260535647483, "learning_rate": 4.362198698526304e-08, "loss": 0.1297, "step": 3619 }, { "epoch": 0.9435376144397746, "grad_norm": 1.016547825503135, "learning_rate": 4.322202724396818e-08, "loss": 0.1339, "step": 3620 }, { "epoch": 0.9437982601896198, "grad_norm": 1.0643425004109563, "learning_rate": 4.282389353680666e-08, "loss": 0.1466, "step": 3621 }, { "epoch": 0.9440589059394651, "grad_norm": 1.0003743077396554, "learning_rate": 4.2427586159697886e-08, "loss": 0.13, "step": 3622 }, { "epoch": 0.9443195516893103, "grad_norm": 1.008136197024598, "learning_rate": 4.203310540720207e-08, "loss": 0.1315, "step": 3623 }, { "epoch": 0.9445801974391556, "grad_norm": 1.0535799176776934, "learning_rate": 4.164045157252272e-08, "loss": 0.1365, "step": 3624 }, { "epoch": 0.9448408431890007, "grad_norm": 1.040535766353014, "learning_rate": 4.1249624947505006e-08, "loss": 0.1386, "step": 3625 }, { "epoch": 0.945101488938846, "grad_norm": 1.035950214655199, "learning_rate": 4.086062582263656e-08, "loss": 0.1311, "step": 3626 }, { "epoch": 0.9453621346886912, "grad_norm": 1.0290227107587278, "learning_rate": 4.0473454487046374e-08, "loss": 0.1237, "step": 3627 }, { "epoch": 0.9456227804385364, "grad_norm": 1.0190763725643333, "learning_rate": 4.008811122850426e-08, "loss": 0.1238, "step": 3628 }, { "epoch": 0.9458834261883817, "grad_norm": 1.005070016690952, "learning_rate": 3.9704596333422497e-08, "loss": 0.1231, "step": 3629 }, { "epoch": 0.9461440719382269, "grad_norm": 1.0160670330353272, "learning_rate": 3.9322910086853625e-08, "loss": 0.131, "step": 3630 }, { "epoch": 0.9464047176880722, "grad_norm": 1.0080731908721718, "learning_rate": 3.894305277249155e-08, "loss": 0.1294, "step": 3631 }, { "epoch": 0.9466653634379174, "grad_norm": 0.9871839679559375, "learning_rate": 3.856502467266987e-08, "loss": 0.1259, "step": 3632 }, { "epoch": 0.9469260091877627, "grad_norm": 1.0092081442752368, "learning_rate": 3.818882606836355e-08, "loss": 0.1305, "step": 3633 }, { "epoch": 0.9471866549376079, "grad_norm": 1.0312406851722717, "learning_rate": 3.7814457239187255e-08, "loss": 0.1304, "step": 3634 }, { "epoch": 0.9474473006874532, "grad_norm": 0.9547697078290628, "learning_rate": 3.7441918463395896e-08, "loss": 0.1228, "step": 3635 }, { "epoch": 0.9477079464372984, "grad_norm": 1.0082605139037006, "learning_rate": 3.707121001788438e-08, "loss": 0.1393, "step": 3636 }, { "epoch": 0.9479685921871437, "grad_norm": 1.0572449486346627, "learning_rate": 3.67023321781862e-08, "loss": 0.1439, "step": 3637 }, { "epoch": 0.9482292379369889, "grad_norm": 1.0030690751265658, "learning_rate": 3.633528521847507e-08, "loss": 0.1275, "step": 3638 }, { "epoch": 0.9484898836868342, "grad_norm": 1.045544723819435, "learning_rate": 3.5970069411564166e-08, "loss": 0.1355, "step": 3639 }, { "epoch": 0.9487505294366794, "grad_norm": 1.0437376457226768, "learning_rate": 3.5606685028904686e-08, "loss": 0.1306, "step": 3640 }, { "epoch": 0.9490111751865247, "grad_norm": 1.0116446129617904, "learning_rate": 3.524513234058724e-08, "loss": 0.1381, "step": 3641 }, { "epoch": 0.9492718209363699, "grad_norm": 1.0302485509706565, "learning_rate": 3.4885411615341034e-08, "loss": 0.1369, "step": 3642 }, { "epoch": 0.949532466686215, "grad_norm": 0.984728227100928, "learning_rate": 3.452752312053331e-08, "loss": 0.1338, "step": 3643 }, { "epoch": 0.9497931124360603, "grad_norm": 1.0130990435536544, "learning_rate": 3.4171467122169344e-08, "loss": 0.1352, "step": 3644 }, { "epoch": 0.9500537581859055, "grad_norm": 1.0170331847846072, "learning_rate": 3.381724388489327e-08, "loss": 0.1398, "step": 3645 }, { "epoch": 0.9503144039357508, "grad_norm": 1.0191605655146927, "learning_rate": 3.346485367198588e-08, "loss": 0.1296, "step": 3646 }, { "epoch": 0.950575049685596, "grad_norm": 0.9679990297629372, "learning_rate": 3.311429674536654e-08, "loss": 0.1267, "step": 3647 }, { "epoch": 0.9508356954354413, "grad_norm": 0.9504297777478861, "learning_rate": 3.2765573365591e-08, "loss": 0.1288, "step": 3648 }, { "epoch": 0.9510963411852865, "grad_norm": 1.0243242668147892, "learning_rate": 3.2418683791853034e-08, "loss": 0.1297, "step": 3649 }, { "epoch": 0.9513569869351318, "grad_norm": 1.0824564353858546, "learning_rate": 3.207362828198307e-08, "loss": 0.1466, "step": 3650 }, { "epoch": 0.951617632684977, "grad_norm": 0.9991647339239573, "learning_rate": 3.173040709244818e-08, "loss": 0.1265, "step": 3651 }, { "epoch": 0.9518782784348223, "grad_norm": 0.9937886009433301, "learning_rate": 3.138902047835263e-08, "loss": 0.126, "step": 3652 }, { "epoch": 0.9521389241846675, "grad_norm": 1.079505936779241, "learning_rate": 3.1049468693436776e-08, "loss": 0.1395, "step": 3653 }, { "epoch": 0.9523995699345128, "grad_norm": 1.0198158391150678, "learning_rate": 3.071175199007653e-08, "loss": 0.1418, "step": 3654 }, { "epoch": 0.952660215684358, "grad_norm": 1.0009834382501757, "learning_rate": 3.0375870619284983e-08, "loss": 0.1323, "step": 3655 }, { "epoch": 0.9529208614342033, "grad_norm": 1.006845004923118, "learning_rate": 3.00418248307105e-08, "loss": 0.1275, "step": 3656 }, { "epoch": 0.9531815071840485, "grad_norm": 1.025349181619966, "learning_rate": 2.9709614872637237e-08, "loss": 0.1343, "step": 3657 }, { "epoch": 0.9534421529338937, "grad_norm": 1.0113877657173538, "learning_rate": 2.9379240991984635e-08, "loss": 0.1281, "step": 3658 }, { "epoch": 0.953702798683739, "grad_norm": 1.0580665353125998, "learning_rate": 2.9050703434308214e-08, "loss": 0.1283, "step": 3659 }, { "epoch": 0.9539634444335842, "grad_norm": 0.965922392481087, "learning_rate": 2.8724002443797083e-08, "loss": 0.1215, "step": 3660 }, { "epoch": 0.9542240901834295, "grad_norm": 1.0508808327315313, "learning_rate": 2.839913826327728e-08, "loss": 0.1411, "step": 3661 }, { "epoch": 0.9544847359332747, "grad_norm": 1.0749790899502745, "learning_rate": 2.807611113420816e-08, "loss": 0.134, "step": 3662 }, { "epoch": 0.95474538168312, "grad_norm": 1.04991600948722, "learning_rate": 2.775492129668378e-08, "loss": 0.1377, "step": 3663 }, { "epoch": 0.9550060274329651, "grad_norm": 0.9384945434296492, "learning_rate": 2.743556898943345e-08, "loss": 0.1171, "step": 3664 }, { "epoch": 0.9552666731828104, "grad_norm": 1.0354726428942245, "learning_rate": 2.7118054449820075e-08, "loss": 0.1327, "step": 3665 }, { "epoch": 0.9555273189326556, "grad_norm": 1.0537038959052971, "learning_rate": 2.680237791384044e-08, "loss": 0.1421, "step": 3666 }, { "epoch": 0.9557879646825009, "grad_norm": 1.041194959673283, "learning_rate": 2.6488539616125464e-08, "loss": 0.1265, "step": 3667 }, { "epoch": 0.9560486104323461, "grad_norm": 1.0086715746249162, "learning_rate": 2.6176539789939947e-08, "loss": 0.1321, "step": 3668 }, { "epoch": 0.9563092561821914, "grad_norm": 1.03099775676143, "learning_rate": 2.5866378667182557e-08, "loss": 0.1333, "step": 3669 }, { "epoch": 0.9565699019320366, "grad_norm": 1.0420680961120343, "learning_rate": 2.5558056478383887e-08, "loss": 0.1384, "step": 3670 }, { "epoch": 0.9568305476818819, "grad_norm": 1.0275549925349616, "learning_rate": 2.5251573452709232e-08, "loss": 0.1333, "step": 3671 }, { "epoch": 0.9570911934317271, "grad_norm": 1.0453184738506704, "learning_rate": 2.4946929817956376e-08, "loss": 0.139, "step": 3672 }, { "epoch": 0.9573518391815723, "grad_norm": 1.0346585246073015, "learning_rate": 2.4644125800555583e-08, "loss": 0.1448, "step": 3673 }, { "epoch": 0.9576124849314176, "grad_norm": 0.9912704800638665, "learning_rate": 2.4343161625570433e-08, "loss": 0.1347, "step": 3674 }, { "epoch": 0.9578731306812628, "grad_norm": 1.053865681199402, "learning_rate": 2.4044037516696704e-08, "loss": 0.1327, "step": 3675 }, { "epoch": 0.9581337764311081, "grad_norm": 0.9929640082727349, "learning_rate": 2.3746753696262113e-08, "loss": 0.1313, "step": 3676 }, { "epoch": 0.9583944221809533, "grad_norm": 0.9850152293308614, "learning_rate": 2.3451310385227132e-08, "loss": 0.1287, "step": 3677 }, { "epoch": 0.9586550679307986, "grad_norm": 1.0082839548358922, "learning_rate": 2.3157707803184438e-08, "loss": 0.1411, "step": 3678 }, { "epoch": 0.9589157136806438, "grad_norm": 1.0661481537084514, "learning_rate": 2.28659461683578e-08, "loss": 0.1389, "step": 3679 }, { "epoch": 0.9591763594304891, "grad_norm": 1.0503325194345756, "learning_rate": 2.2576025697603198e-08, "loss": 0.1415, "step": 3680 }, { "epoch": 0.9594370051803343, "grad_norm": 0.9909706546972481, "learning_rate": 2.2287946606407983e-08, "loss": 0.1321, "step": 3681 }, { "epoch": 0.9596976509301796, "grad_norm": 1.053532326388919, "learning_rate": 2.2001709108891155e-08, "loss": 0.1395, "step": 3682 }, { "epoch": 0.9599582966800247, "grad_norm": 0.9774009384503755, "learning_rate": 2.1717313417802534e-08, "loss": 0.122, "step": 3683 }, { "epoch": 0.96021894242987, "grad_norm": 1.0965498612973985, "learning_rate": 2.143475974452275e-08, "loss": 0.1548, "step": 3684 }, { "epoch": 0.9604795881797152, "grad_norm": 1.0377735197002003, "learning_rate": 2.115404829906437e-08, "loss": 0.1372, "step": 3685 }, { "epoch": 0.9607402339295605, "grad_norm": 1.0082574303095027, "learning_rate": 2.0875179290069934e-08, "loss": 0.1363, "step": 3686 }, { "epoch": 0.9610008796794057, "grad_norm": 0.9596294168436796, "learning_rate": 2.0598152924812266e-08, "loss": 0.127, "step": 3687 }, { "epoch": 0.9612615254292509, "grad_norm": 1.0085544494797785, "learning_rate": 2.032296940919526e-08, "loss": 0.1304, "step": 3688 }, { "epoch": 0.9615221711790962, "grad_norm": 1.0127673235084156, "learning_rate": 2.004962894775281e-08, "loss": 0.1378, "step": 3689 }, { "epoch": 0.9617828169289414, "grad_norm": 1.0360554569354892, "learning_rate": 1.9778131743649064e-08, "loss": 0.1405, "step": 3690 }, { "epoch": 0.9620434626787867, "grad_norm": 1.0176317074530619, "learning_rate": 1.9508477998678442e-08, "loss": 0.1353, "step": 3691 }, { "epoch": 0.9623041084286319, "grad_norm": 1.0173171647841355, "learning_rate": 1.9240667913264233e-08, "loss": 0.1256, "step": 3692 }, { "epoch": 0.9625647541784772, "grad_norm": 1.0455990534072077, "learning_rate": 1.8974701686459985e-08, "loss": 0.1404, "step": 3693 }, { "epoch": 0.9628253999283224, "grad_norm": 1.0393222538401334, "learning_rate": 1.8710579515948957e-08, "loss": 0.1374, "step": 3694 }, { "epoch": 0.9630860456781677, "grad_norm": 1.0020668963418713, "learning_rate": 1.8448301598043838e-08, "loss": 0.1294, "step": 3695 }, { "epoch": 0.9633466914280129, "grad_norm": 1.1148301607431181, "learning_rate": 1.8187868127685914e-08, "loss": 0.1412, "step": 3696 }, { "epoch": 0.9636073371778582, "grad_norm": 0.9944701646863686, "learning_rate": 1.7929279298446177e-08, "loss": 0.1237, "step": 3697 }, { "epoch": 0.9638679829277034, "grad_norm": 1.0047211805141152, "learning_rate": 1.767253530252422e-08, "loss": 0.1368, "step": 3698 }, { "epoch": 0.9641286286775487, "grad_norm": 1.0400526926772873, "learning_rate": 1.74176363307485e-08, "loss": 0.133, "step": 3699 }, { "epoch": 0.9643892744273939, "grad_norm": 1.0202494904063617, "learning_rate": 1.716458257257636e-08, "loss": 0.133, "step": 3700 }, { "epoch": 0.9646499201772392, "grad_norm": 1.0326505373186803, "learning_rate": 1.6913374216093458e-08, "loss": 0.1378, "step": 3701 }, { "epoch": 0.9649105659270844, "grad_norm": 1.0057374067299316, "learning_rate": 1.666401144801405e-08, "loss": 0.1315, "step": 3702 }, { "epoch": 0.9651712116769295, "grad_norm": 1.0734165443726367, "learning_rate": 1.6416494453680156e-08, "loss": 0.1411, "step": 3703 }, { "epoch": 0.9654318574267748, "grad_norm": 1.070263570746881, "learning_rate": 1.6170823417062386e-08, "loss": 0.1442, "step": 3704 }, { "epoch": 0.96569250317662, "grad_norm": 1.009213807497551, "learning_rate": 1.592699852075885e-08, "loss": 0.1216, "step": 3705 }, { "epoch": 0.9659531489264653, "grad_norm": 1.0874230739911288, "learning_rate": 1.568501994599625e-08, "loss": 0.1446, "step": 3706 }, { "epoch": 0.9662137946763105, "grad_norm": 1.0038349258209198, "learning_rate": 1.5444887872627934e-08, "loss": 0.1384, "step": 3707 }, { "epoch": 0.9664744404261558, "grad_norm": 1.0277750045685674, "learning_rate": 1.5206602479135857e-08, "loss": 0.135, "step": 3708 }, { "epoch": 0.966735086176001, "grad_norm": 0.949767167693486, "learning_rate": 1.4970163942628623e-08, "loss": 0.1246, "step": 3709 }, { "epoch": 0.9669957319258463, "grad_norm": 1.0010575014799323, "learning_rate": 1.4735572438842605e-08, "loss": 0.1341, "step": 3710 }, { "epoch": 0.9672563776756915, "grad_norm": 1.0582305206427884, "learning_rate": 1.45028281421411e-08, "loss": 0.1392, "step": 3711 }, { "epoch": 0.9675170234255368, "grad_norm": 0.9817545956022543, "learning_rate": 1.4271931225514624e-08, "loss": 0.13, "step": 3712 }, { "epoch": 0.967777669175382, "grad_norm": 1.010787924647862, "learning_rate": 1.4042881860580338e-08, "loss": 0.1321, "step": 3713 }, { "epoch": 0.9680383149252273, "grad_norm": 0.9998017798187441, "learning_rate": 1.381568021758234e-08, "loss": 0.1363, "step": 3714 }, { "epoch": 0.9682989606750725, "grad_norm": 1.001902121040935, "learning_rate": 1.3590326465391656e-08, "loss": 0.1353, "step": 3715 }, { "epoch": 0.9685596064249178, "grad_norm": 1.0464080288873792, "learning_rate": 1.336682077150514e-08, "loss": 0.1441, "step": 3716 }, { "epoch": 0.968820252174763, "grad_norm": 1.026465837098362, "learning_rate": 1.3145163302046849e-08, "loss": 0.1329, "step": 3717 }, { "epoch": 0.9690808979246082, "grad_norm": 1.0257898560265244, "learning_rate": 1.2925354221766661e-08, "loss": 0.1373, "step": 3718 }, { "epoch": 0.9693415436744535, "grad_norm": 0.9892186278311755, "learning_rate": 1.2707393694040838e-08, "loss": 0.1255, "step": 3719 }, { "epoch": 0.9696021894242987, "grad_norm": 1.0068384480374275, "learning_rate": 1.2491281880871175e-08, "loss": 0.1268, "step": 3720 }, { "epoch": 0.969862835174144, "grad_norm": 1.1363986447916923, "learning_rate": 1.2277018942885855e-08, "loss": 0.1337, "step": 3721 }, { "epoch": 0.9701234809239891, "grad_norm": 1.025313464757123, "learning_rate": 1.206460503933915e-08, "loss": 0.1272, "step": 3722 }, { "epoch": 0.9703841266738344, "grad_norm": 1.029893834946728, "learning_rate": 1.185404032811005e-08, "loss": 0.1389, "step": 3723 }, { "epoch": 0.9706447724236796, "grad_norm": 1.1085173871869307, "learning_rate": 1.1645324965704473e-08, "loss": 0.1356, "step": 3724 }, { "epoch": 0.9709054181735249, "grad_norm": 1.042179302293193, "learning_rate": 1.143845910725222e-08, "loss": 0.1383, "step": 3725 }, { "epoch": 0.9711660639233701, "grad_norm": 1.016923777337031, "learning_rate": 1.1233442906509462e-08, "loss": 0.1426, "step": 3726 }, { "epoch": 0.9714267096732154, "grad_norm": 1.056290157367691, "learning_rate": 1.1030276515857364e-08, "loss": 0.1379, "step": 3727 }, { "epoch": 0.9716873554230606, "grad_norm": 1.0813738661298762, "learning_rate": 1.0828960086302075e-08, "loss": 0.1302, "step": 3728 }, { "epoch": 0.9719480011729059, "grad_norm": 1.0355406390058315, "learning_rate": 1.0629493767475019e-08, "loss": 0.131, "step": 3729 }, { "epoch": 0.9722086469227511, "grad_norm": 1.0032617996901918, "learning_rate": 1.0431877707632043e-08, "loss": 0.1285, "step": 3730 }, { "epoch": 0.9724692926725964, "grad_norm": 1.0501788985955847, "learning_rate": 1.0236112053654267e-08, "loss": 0.1396, "step": 3731 }, { "epoch": 0.9727299384224416, "grad_norm": 1.0266923498416527, "learning_rate": 1.0042196951046968e-08, "loss": 0.1376, "step": 3732 }, { "epoch": 0.9729905841722868, "grad_norm": 1.0471981954815217, "learning_rate": 9.850132543940127e-09, "loss": 0.1327, "step": 3733 }, { "epoch": 0.9732512299221321, "grad_norm": 1.0665401010036675, "learning_rate": 9.659918975088444e-09, "loss": 0.1424, "step": 3734 }, { "epoch": 0.9735118756719773, "grad_norm": 1.0101269222626974, "learning_rate": 9.47155638587105e-09, "loss": 0.1339, "step": 3735 }, { "epoch": 0.9737725214218226, "grad_norm": 1.078940153788344, "learning_rate": 9.285044916290675e-09, "loss": 0.1428, "step": 3736 }, { "epoch": 0.9740331671716678, "grad_norm": 0.9678194413677886, "learning_rate": 9.100384704974486e-09, "loss": 0.1243, "step": 3737 }, { "epoch": 0.9742938129215131, "grad_norm": 1.0145765494706318, "learning_rate": 8.91757588917408e-09, "loss": 0.1356, "step": 3738 }, { "epoch": 0.9745544586713583, "grad_norm": 1.0419514903525855, "learning_rate": 8.73661860476438e-09, "loss": 0.1359, "step": 3739 }, { "epoch": 0.9748151044212036, "grad_norm": 1.0005632037593668, "learning_rate": 8.557512986244464e-09, "loss": 0.1312, "step": 3740 }, { "epoch": 0.9750757501710487, "grad_norm": 1.0408522635520958, "learning_rate": 8.380259166736738e-09, "loss": 0.1351, "step": 3741 }, { "epoch": 0.975336395920894, "grad_norm": 1.011949212101085, "learning_rate": 8.20485727798831e-09, "loss": 0.1383, "step": 3742 }, { "epoch": 0.9755970416707392, "grad_norm": 0.976624102009713, "learning_rate": 8.031307450368232e-09, "loss": 0.131, "step": 3743 }, { "epoch": 0.9758576874205845, "grad_norm": 1.0194139346920934, "learning_rate": 7.85960981287026e-09, "loss": 0.1351, "step": 3744 }, { "epoch": 0.9761183331704297, "grad_norm": 1.1380148725583867, "learning_rate": 7.68976449311093e-09, "loss": 0.1422, "step": 3745 }, { "epoch": 0.976378978920275, "grad_norm": 0.9839906152941162, "learning_rate": 7.521771617330365e-09, "loss": 0.129, "step": 3746 }, { "epoch": 0.9766396246701202, "grad_norm": 0.983505943751515, "learning_rate": 7.3556313103911865e-09, "loss": 0.1345, "step": 3747 }, { "epoch": 0.9769002704199654, "grad_norm": 1.0648337868784408, "learning_rate": 7.191343695779618e-09, "loss": 0.1359, "step": 3748 }, { "epoch": 0.9771609161698107, "grad_norm": 0.9516684827458731, "learning_rate": 7.028908895604647e-09, "loss": 0.1214, "step": 3749 }, { "epoch": 0.9774215619196559, "grad_norm": 0.9980586195724908, "learning_rate": 6.86832703059831e-09, "loss": 0.1272, "step": 3750 }, { "epoch": 0.9776822076695012, "grad_norm": 1.0384416211176806, "learning_rate": 6.709598220115132e-09, "loss": 0.143, "step": 3751 }, { "epoch": 0.9779428534193464, "grad_norm": 1.054481197857415, "learning_rate": 6.552722582132687e-09, "loss": 0.13, "step": 3752 }, { "epoch": 0.9782034991691917, "grad_norm": 0.9928045320520236, "learning_rate": 6.397700233250759e-09, "loss": 0.1238, "step": 3753 }, { "epoch": 0.9784641449190369, "grad_norm": 1.0033294221567641, "learning_rate": 6.24453128869218e-09, "loss": 0.1351, "step": 3754 }, { "epoch": 0.9787247906688822, "grad_norm": 1.09805574488703, "learning_rate": 6.0932158623014385e-09, "loss": 0.1472, "step": 3755 }, { "epoch": 0.9789854364187274, "grad_norm": 0.9826689836717596, "learning_rate": 5.94375406654607e-09, "loss": 0.1287, "step": 3756 }, { "epoch": 0.9792460821685727, "grad_norm": 1.0110229950369918, "learning_rate": 5.796146012515824e-09, "loss": 0.1339, "step": 3757 }, { "epoch": 0.9795067279184179, "grad_norm": 0.9957905330671405, "learning_rate": 5.650391809922107e-09, "loss": 0.1279, "step": 3758 }, { "epoch": 0.9797673736682632, "grad_norm": 1.0747454073476626, "learning_rate": 5.506491567098537e-09, "loss": 0.1303, "step": 3759 }, { "epoch": 0.9800280194181084, "grad_norm": 1.0119048341347656, "learning_rate": 5.364445391001228e-09, "loss": 0.1354, "step": 3760 }, { "epoch": 0.9802886651679537, "grad_norm": 1.054341820985021, "learning_rate": 5.2242533872082225e-09, "loss": 0.1374, "step": 3761 }, { "epoch": 0.9805493109177988, "grad_norm": 1.047023366213886, "learning_rate": 5.085915659918672e-09, "loss": 0.1423, "step": 3762 }, { "epoch": 0.980809956667644, "grad_norm": 1.0149484727040166, "learning_rate": 4.949432311954494e-09, "loss": 0.1285, "step": 3763 }, { "epoch": 0.9810706024174893, "grad_norm": 1.0259236489421373, "learning_rate": 4.814803444758431e-09, "loss": 0.1341, "step": 3764 }, { "epoch": 0.9813312481673345, "grad_norm": 0.991334242607583, "learning_rate": 4.68202915839544e-09, "loss": 0.1327, "step": 3765 }, { "epoch": 0.9815918939171798, "grad_norm": 1.013173537916188, "learning_rate": 4.551109551551581e-09, "loss": 0.1322, "step": 3766 }, { "epoch": 0.981852539667025, "grad_norm": 1.0336343836395956, "learning_rate": 4.422044721534846e-09, "loss": 0.1446, "step": 3767 }, { "epoch": 0.9821131854168703, "grad_norm": 1.0265054205536486, "learning_rate": 4.294834764274614e-09, "loss": 0.1394, "step": 3768 }, { "epoch": 0.9823738311667155, "grad_norm": 0.9931205561476413, "learning_rate": 4.1694797743210835e-09, "loss": 0.1215, "step": 3769 }, { "epoch": 0.9826344769165608, "grad_norm": 1.0200730084100553, "learning_rate": 4.04597984484667e-09, "loss": 0.1308, "step": 3770 }, { "epoch": 0.982895122666406, "grad_norm": 0.9895032732366518, "learning_rate": 3.924335067643781e-09, "loss": 0.1252, "step": 3771 }, { "epoch": 0.9831557684162513, "grad_norm": 1.011367007052262, "learning_rate": 3.804545533126758e-09, "loss": 0.1271, "step": 3772 }, { "epoch": 0.9834164141660965, "grad_norm": 1.0255606361614837, "learning_rate": 3.6866113303307696e-09, "loss": 0.1325, "step": 3773 }, { "epoch": 0.9836770599159418, "grad_norm": 1.0098994065364013, "learning_rate": 3.5705325469123644e-09, "loss": 0.1337, "step": 3774 }, { "epoch": 0.983937705665787, "grad_norm": 0.9813719415140462, "learning_rate": 3.456309269148361e-09, "loss": 0.1311, "step": 3775 }, { "epoch": 0.9841983514156323, "grad_norm": 1.0252840178355787, "learning_rate": 3.3439415819369604e-09, "loss": 0.1362, "step": 3776 }, { "epoch": 0.9844589971654775, "grad_norm": 1.0030242634897528, "learning_rate": 3.2334295687969084e-09, "loss": 0.1284, "step": 3777 }, { "epoch": 0.9847196429153227, "grad_norm": 1.0412552771738106, "learning_rate": 3.124773311867779e-09, "loss": 0.1356, "step": 3778 }, { "epoch": 0.984980288665168, "grad_norm": 0.9910919360176359, "learning_rate": 3.0179728919099703e-09, "loss": 0.1299, "step": 3779 }, { "epoch": 0.9852409344150131, "grad_norm": 1.0409028705242365, "learning_rate": 2.9130283883044285e-09, "loss": 0.1311, "step": 3780 }, { "epoch": 0.9855015801648584, "grad_norm": 0.9891829586947859, "learning_rate": 2.8099398790520926e-09, "loss": 0.1225, "step": 3781 }, { "epoch": 0.9857622259147036, "grad_norm": 1.0466824378196475, "learning_rate": 2.70870744077556e-09, "loss": 0.1346, "step": 3782 }, { "epoch": 0.9860228716645489, "grad_norm": 0.9712977883158774, "learning_rate": 2.6093311487168647e-09, "loss": 0.1189, "step": 3783 }, { "epoch": 0.9862835174143941, "grad_norm": 1.002922356250604, "learning_rate": 2.5118110767388682e-09, "loss": 0.1331, "step": 3784 }, { "epoch": 0.9865441631642394, "grad_norm": 0.9971615538391205, "learning_rate": 2.4161472973247004e-09, "loss": 0.1254, "step": 3785 }, { "epoch": 0.9868048089140846, "grad_norm": 1.0508864031213263, "learning_rate": 2.322339881577762e-09, "loss": 0.136, "step": 3786 }, { "epoch": 0.9870654546639299, "grad_norm": 1.0217596318884625, "learning_rate": 2.2303888992220026e-09, "loss": 0.1341, "step": 3787 }, { "epoch": 0.9873261004137751, "grad_norm": 1.0033285080959626, "learning_rate": 2.140294418600808e-09, "loss": 0.1209, "step": 3788 }, { "epoch": 0.9875867461636204, "grad_norm": 1.0206422139076556, "learning_rate": 2.0520565066783905e-09, "loss": 0.1263, "step": 3789 }, { "epoch": 0.9878473919134656, "grad_norm": 1.0055557226864529, "learning_rate": 1.965675229038677e-09, "loss": 0.1314, "step": 3790 }, { "epoch": 0.9881080376633109, "grad_norm": 1.0263235874250507, "learning_rate": 1.881150649886143e-09, "loss": 0.1276, "step": 3791 }, { "epoch": 0.9883686834131561, "grad_norm": 1.059177042107859, "learning_rate": 1.7984828320444236e-09, "loss": 0.1408, "step": 3792 }, { "epoch": 0.9886293291630014, "grad_norm": 1.0871363739134967, "learning_rate": 1.7176718369574242e-09, "loss": 0.1443, "step": 3793 }, { "epoch": 0.9888899749128466, "grad_norm": 1.0084719100760378, "learning_rate": 1.6387177246893205e-09, "loss": 0.1338, "step": 3794 }, { "epoch": 0.9891506206626918, "grad_norm": 0.9774630080951626, "learning_rate": 1.5616205539240038e-09, "loss": 0.1212, "step": 3795 }, { "epoch": 0.9894112664125371, "grad_norm": 1.0550289008162697, "learning_rate": 1.486380381964525e-09, "loss": 0.1388, "step": 3796 }, { "epoch": 0.9896719121623823, "grad_norm": 1.0322194448726816, "learning_rate": 1.4129972647344836e-09, "loss": 0.1386, "step": 3797 }, { "epoch": 0.9899325579122276, "grad_norm": 1.0243400522741297, "learning_rate": 1.3414712567769161e-09, "loss": 0.1331, "step": 3798 }, { "epoch": 0.9901932036620728, "grad_norm": 0.9611250727234391, "learning_rate": 1.271802411254297e-09, "loss": 0.1246, "step": 3799 }, { "epoch": 0.990453849411918, "grad_norm": 1.0144709260438471, "learning_rate": 1.2039907799490935e-09, "loss": 0.1286, "step": 3800 }, { "epoch": 0.9907144951617632, "grad_norm": 1.077563399770333, "learning_rate": 1.138036413263488e-09, "loss": 0.1484, "step": 3801 }, { "epoch": 0.9909751409116085, "grad_norm": 0.9822261055859796, "learning_rate": 1.0739393602185454e-09, "loss": 0.1272, "step": 3802 }, { "epoch": 0.9912357866614537, "grad_norm": 1.0435415838171302, "learning_rate": 1.0116996684556014e-09, "loss": 0.1333, "step": 3803 }, { "epoch": 0.991496432411299, "grad_norm": 1.0262138496087088, "learning_rate": 9.513173842348732e-10, "loss": 0.1303, "step": 3804 }, { "epoch": 0.9917570781611442, "grad_norm": 1.0272628710833365, "learning_rate": 8.927925524365721e-10, "loss": 0.1291, "step": 3805 }, { "epoch": 0.9920177239109895, "grad_norm": 0.9555277307296939, "learning_rate": 8.361252165597911e-10, "loss": 0.1215, "step": 3806 }, { "epoch": 0.9922783696608347, "grad_norm": 1.0331692152769054, "learning_rate": 7.813154187236161e-10, "loss": 0.1318, "step": 3807 }, { "epoch": 0.99253901541068, "grad_norm": 1.0592100185599904, "learning_rate": 7.283631996662933e-10, "loss": 0.142, "step": 3808 }, { "epoch": 0.9927996611605252, "grad_norm": 1.0086975803315426, "learning_rate": 6.772685987449512e-10, "loss": 0.1272, "step": 3809 }, { "epoch": 0.9930603069103704, "grad_norm": 1.0404051829970486, "learning_rate": 6.28031653936434e-10, "loss": 0.1379, "step": 3810 }, { "epoch": 0.9933209526602157, "grad_norm": 1.02090475275395, "learning_rate": 5.806524018367454e-10, "loss": 0.1275, "step": 3811 }, { "epoch": 0.9935815984100609, "grad_norm": 0.9992816356067408, "learning_rate": 5.351308776613273e-10, "loss": 0.1286, "step": 3812 }, { "epoch": 0.9938422441599062, "grad_norm": 1.0933064143040583, "learning_rate": 4.914671152447814e-10, "loss": 0.1551, "step": 3813 }, { "epoch": 0.9941028899097514, "grad_norm": 0.9710580751839265, "learning_rate": 4.49661147040592e-10, "loss": 0.1256, "step": 3814 }, { "epoch": 0.9943635356595967, "grad_norm": 1.011380478934978, "learning_rate": 4.0971300412140367e-10, "loss": 0.1332, "step": 3815 }, { "epoch": 0.9946241814094419, "grad_norm": 1.030362252551106, "learning_rate": 3.7162271617985357e-10, "loss": 0.1395, "step": 3816 }, { "epoch": 0.9948848271592872, "grad_norm": 1.0332945967360339, "learning_rate": 3.3539031152635125e-10, "loss": 0.1345, "step": 3817 }, { "epoch": 0.9951454729091324, "grad_norm": 1.021639931885412, "learning_rate": 3.0101581709185424e-10, "loss": 0.1372, "step": 3818 }, { "epoch": 0.9954061186589777, "grad_norm": 1.0622517155200424, "learning_rate": 2.684992584250923e-10, "loss": 0.1323, "step": 3819 }, { "epoch": 0.9956667644088228, "grad_norm": 1.0131398473634277, "learning_rate": 2.3784065969451043e-10, "loss": 0.129, "step": 3820 }, { "epoch": 0.9959274101586681, "grad_norm": 0.948628478468711, "learning_rate": 2.0904004368799136e-10, "loss": 0.12, "step": 3821 }, { "epoch": 0.9961880559085133, "grad_norm": 1.0314666855458154, "learning_rate": 1.8209743181146766e-10, "loss": 0.1318, "step": 3822 }, { "epoch": 0.9964487016583586, "grad_norm": 0.9923064494048915, "learning_rate": 1.570128440908647e-10, "loss": 0.1352, "step": 3823 }, { "epoch": 0.9967093474082038, "grad_norm": 1.0519176370778205, "learning_rate": 1.3378629917015772e-10, "loss": 0.1392, "step": 3824 }, { "epoch": 0.996969993158049, "grad_norm": 1.07334880612234, "learning_rate": 1.1241781431303723e-10, "loss": 0.1437, "step": 3825 }, { "epoch": 0.9972306389078943, "grad_norm": 1.0600844089470798, "learning_rate": 9.290740540207621e-11, "loss": 0.1359, "step": 3826 }, { "epoch": 0.9974912846577395, "grad_norm": 0.9771894247315461, "learning_rate": 7.525508693845274e-11, "loss": 0.1285, "step": 3827 }, { "epoch": 0.9977519304075848, "grad_norm": 1.0460936677743096, "learning_rate": 5.94608720427825e-11, "loss": 0.1358, "step": 3828 }, { "epoch": 0.99801257615743, "grad_norm": 1.0018661934281066, "learning_rate": 4.5524772453731103e-11, "loss": 0.1397, "step": 3829 }, { "epoch": 0.9982732219072753, "grad_norm": 1.0403800714829685, "learning_rate": 3.344679853023447e-11, "loss": 0.1386, "step": 3830 }, { "epoch": 0.9985338676571205, "grad_norm": 0.996543895525393, "learning_rate": 2.322695924900087e-11, "loss": 0.1268, "step": 3831 }, { "epoch": 0.9987945134069658, "grad_norm": 1.0799725803378954, "learning_rate": 1.4865262205898678e-11, "loss": 0.1479, "step": 3832 }, { "epoch": 0.999055159156811, "grad_norm": 0.9487903025153326, "learning_rate": 8.361713616233946e-12, "loss": 0.1109, "step": 3833 }, { "epoch": 0.9993158049066563, "grad_norm": 1.06169908631579, "learning_rate": 3.716318313640166e-12, "loss": 0.1413, "step": 3834 }, { "epoch": 0.9995764506565015, "grad_norm": 1.0293575338031966, "learning_rate": 9.290797511884998e-13, "loss": 0.1332, "step": 3835 }, { "epoch": 0.9998370964063468, "grad_norm": 0.9932561530750641, "learning_rate": 0.0, "loss": 0.1303, "step": 3836 }, { "epoch": 0.9998370964063468, "step": 3836, "total_flos": 695217018961920.0, "train_loss": 0.1678922613490299, "train_runtime": 21677.0737, "train_samples_per_second": 11.327, "train_steps_per_second": 0.177 } ], "logging_steps": 1, "max_steps": 3836, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 695217018961920.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }