{ "best_metric": 1.4320533275604248, "best_model_checkpoint": "model/chessformer-3/checkpoint-22000", "epoch": 1.0, "eval_steps": 500, "global_step": 22491, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.892445867235783e-05, "grad_norm": 15.056964874267578, "learning_rate": 2e-06, "loss": 8.6442, "step": 2 }, { "epoch": 0.00017784891734471566, "grad_norm": 12.359731674194336, "learning_rate": 4e-06, "loss": 8.4787, "step": 4 }, { "epoch": 0.0002667733760170735, "grad_norm": 6.386526584625244, "learning_rate": 6e-06, "loss": 8.1571, "step": 6 }, { "epoch": 0.0003556978346894313, "grad_norm": 3.8906912803649902, "learning_rate": 8e-06, "loss": 7.9194, "step": 8 }, { "epoch": 0.00044462229336178917, "grad_norm": 2.871436357498169, "learning_rate": 1e-05, "loss": 7.7565, "step": 10 }, { "epoch": 0.000533546752034147, "grad_norm": 2.3168184757232666, "learning_rate": 1.2e-05, "loss": 7.6304, "step": 12 }, { "epoch": 0.0006224712107065049, "grad_norm": 1.9175453186035156, "learning_rate": 1.4e-05, "loss": 7.5403, "step": 14 }, { "epoch": 0.0007113956693788626, "grad_norm": 3.3437652587890625, "learning_rate": 1.6e-05, "loss": 7.464, "step": 16 }, { "epoch": 0.0008003201280512205, "grad_norm": 1.4156451225280762, "learning_rate": 1.8e-05, "loss": 7.4051, "step": 18 }, { "epoch": 0.0008892445867235783, "grad_norm": 2.325925588607788, "learning_rate": 2e-05, "loss": 7.3434, "step": 20 }, { "epoch": 0.0009781690453959362, "grad_norm": 1.561901569366455, "learning_rate": 2.2e-05, "loss": 7.2769, "step": 22 }, { "epoch": 0.001067093504068294, "grad_norm": 1.9596675634384155, "learning_rate": 2.4e-05, "loss": 7.1864, "step": 24 }, { "epoch": 0.0011560179627406518, "grad_norm": 3.5041377544403076, "learning_rate": 2.6e-05, "loss": 7.0938, "step": 26 }, { "epoch": 0.0012449424214130097, "grad_norm": 9.32341480255127, "learning_rate": 2.8e-05, "loss": 7.0071, "step": 28 }, { "epoch": 0.0013338668800853674, "grad_norm": 7.433465480804443, "learning_rate": 3e-05, "loss": 6.9707, "step": 30 }, { "epoch": 0.0014227913387577253, "grad_norm": 32.543617248535156, "learning_rate": 3.2e-05, "loss": 6.9459, "step": 32 }, { "epoch": 0.0015117157974300832, "grad_norm": 19.157636642456055, "learning_rate": 3.4000000000000007e-05, "loss": 6.9043, "step": 34 }, { "epoch": 0.001600640256102441, "grad_norm": 4.467665672302246, "learning_rate": 3.6e-05, "loss": 6.8362, "step": 36 }, { "epoch": 0.0016895647147747988, "grad_norm": 9.15857982635498, "learning_rate": 3.8e-05, "loss": 6.7782, "step": 38 }, { "epoch": 0.0017784891734471567, "grad_norm": 11.840293884277344, "learning_rate": 4e-05, "loss": 6.7454, "step": 40 }, { "epoch": 0.0018674136321195146, "grad_norm": 5.0099263191223145, "learning_rate": 4.2000000000000004e-05, "loss": 6.699, "step": 42 }, { "epoch": 0.0019563380907918725, "grad_norm": 5.58508825302124, "learning_rate": 4.4e-05, "loss": 6.6336, "step": 44 }, { "epoch": 0.00204526254946423, "grad_norm": 4.943291187286377, "learning_rate": 4.6e-05, "loss": 6.6033, "step": 46 }, { "epoch": 0.002134187008136588, "grad_norm": 3.4954986572265625, "learning_rate": 4.8e-05, "loss": 6.5511, "step": 48 }, { "epoch": 0.002223111466808946, "grad_norm": 1.8841090202331543, "learning_rate": 5e-05, "loss": 6.4977, "step": 50 }, { "epoch": 0.0023120359254813036, "grad_norm": 2.3768599033355713, "learning_rate": 5.2e-05, "loss": 6.4315, "step": 52 }, { "epoch": 0.0024009603841536613, "grad_norm": 2.597606658935547, "learning_rate": 5.4e-05, "loss": 6.3706, "step": 54 }, { "epoch": 0.0024898848428260194, "grad_norm": 5.908978462219238, "learning_rate": 5.6e-05, "loss": 6.3226, "step": 56 }, { "epoch": 0.002578809301498377, "grad_norm": 3.5320820808410645, "learning_rate": 5.800000000000001e-05, "loss": 6.2571, "step": 58 }, { "epoch": 0.0026677337601707348, "grad_norm": 3.2798655033111572, "learning_rate": 6e-05, "loss": 6.2272, "step": 60 }, { "epoch": 0.002756658218843093, "grad_norm": 4.321415901184082, "learning_rate": 6.2e-05, "loss": 6.1702, "step": 62 }, { "epoch": 0.0028455826775154506, "grad_norm": 3.398996353149414, "learning_rate": 6.4e-05, "loss": 6.1203, "step": 64 }, { "epoch": 0.0029345071361878083, "grad_norm": 5.903692245483398, "learning_rate": 6.6e-05, "loss": 6.0742, "step": 66 }, { "epoch": 0.0030234315948601664, "grad_norm": 2.3560030460357666, "learning_rate": 6.800000000000001e-05, "loss": 6.0274, "step": 68 }, { "epoch": 0.003112356053532524, "grad_norm": 3.1397175788879395, "learning_rate": 7.000000000000001e-05, "loss": 5.9813, "step": 70 }, { "epoch": 0.003201280512204882, "grad_norm": 4.45662784576416, "learning_rate": 7.2e-05, "loss": 5.9562, "step": 72 }, { "epoch": 0.00329020497087724, "grad_norm": 5.479487895965576, "learning_rate": 7.4e-05, "loss": 5.9073, "step": 74 }, { "epoch": 0.0033791294295495975, "grad_norm": 4.0991363525390625, "learning_rate": 7.6e-05, "loss": 5.8533, "step": 76 }, { "epoch": 0.0034680538882219556, "grad_norm": 2.800793170928955, "learning_rate": 7.8e-05, "loss": 5.8174, "step": 78 }, { "epoch": 0.0035569783468943133, "grad_norm": 3.3895621299743652, "learning_rate": 8e-05, "loss": 5.7922, "step": 80 }, { "epoch": 0.003645902805566671, "grad_norm": 4.780695915222168, "learning_rate": 8.2e-05, "loss": 5.7626, "step": 82 }, { "epoch": 0.003734827264239029, "grad_norm": 5.153146743774414, "learning_rate": 8.400000000000001e-05, "loss": 5.726, "step": 84 }, { "epoch": 0.003823751722911387, "grad_norm": 6.493163108825684, "learning_rate": 8.599999999999999e-05, "loss": 5.7056, "step": 86 }, { "epoch": 0.003912676181583745, "grad_norm": 7.0106916427612305, "learning_rate": 8.8e-05, "loss": 5.6722, "step": 88 }, { "epoch": 0.004001600640256103, "grad_norm": 4.670555591583252, "learning_rate": 8.999999999999999e-05, "loss": 5.6304, "step": 90 }, { "epoch": 0.00409052509892846, "grad_norm": 5.772502899169922, "learning_rate": 9.2e-05, "loss": 5.634, "step": 92 }, { "epoch": 0.004179449557600818, "grad_norm": 4.904117107391357, "learning_rate": 9.400000000000001e-05, "loss": 5.5903, "step": 94 }, { "epoch": 0.004268374016273176, "grad_norm": 4.786822319030762, "learning_rate": 9.6e-05, "loss": 5.5855, "step": 96 }, { "epoch": 0.004357298474945534, "grad_norm": 6.82847261428833, "learning_rate": 9.800000000000001e-05, "loss": 5.5506, "step": 98 }, { "epoch": 0.004446222933617892, "grad_norm": 5.267941474914551, "learning_rate": 0.0001, "loss": 5.5599, "step": 100 }, { "epoch": 0.0045351473922902496, "grad_norm": 4.727493762969971, "learning_rate": 0.000102, "loss": 5.536, "step": 102 }, { "epoch": 0.004624071850962607, "grad_norm": 5.104313373565674, "learning_rate": 0.000104, "loss": 5.4914, "step": 104 }, { "epoch": 0.004712996309634965, "grad_norm": 5.250439167022705, "learning_rate": 0.000106, "loss": 5.4865, "step": 106 }, { "epoch": 0.004801920768307323, "grad_norm": 6.170891761779785, "learning_rate": 0.000108, "loss": 5.4896, "step": 108 }, { "epoch": 0.004890845226979681, "grad_norm": 6.01248025894165, "learning_rate": 0.00011, "loss": 5.4465, "step": 110 }, { "epoch": 0.004979769685652039, "grad_norm": 7.449190139770508, "learning_rate": 0.000112, "loss": 5.4305, "step": 112 }, { "epoch": 0.0050686941443243965, "grad_norm": 4.532025337219238, "learning_rate": 0.000114, "loss": 5.4133, "step": 114 }, { "epoch": 0.005157618602996754, "grad_norm": 6.034667491912842, "learning_rate": 0.00011600000000000001, "loss": 5.403, "step": 116 }, { "epoch": 0.005246543061669112, "grad_norm": 8.826780319213867, "learning_rate": 0.000118, "loss": 5.4014, "step": 118 }, { "epoch": 0.0053354675203414696, "grad_norm": 6.5053606033325195, "learning_rate": 0.00012, "loss": 5.3666, "step": 120 }, { "epoch": 0.005424391979013828, "grad_norm": 5.656647205352783, "learning_rate": 0.000122, "loss": 5.3378, "step": 122 }, { "epoch": 0.005513316437686186, "grad_norm": 4.237445831298828, "learning_rate": 0.000124, "loss": 5.331, "step": 124 }, { "epoch": 0.0056022408963585435, "grad_norm": 5.622873306274414, "learning_rate": 0.000126, "loss": 5.3327, "step": 126 }, { "epoch": 0.005691165355030901, "grad_norm": 5.530211925506592, "learning_rate": 0.000128, "loss": 5.3176, "step": 128 }, { "epoch": 0.005780089813703259, "grad_norm": 6.660238742828369, "learning_rate": 0.00013000000000000002, "loss": 5.3224, "step": 130 }, { "epoch": 0.0058690142723756165, "grad_norm": 5.829392910003662, "learning_rate": 0.000132, "loss": 5.2855, "step": 132 }, { "epoch": 0.005957938731047975, "grad_norm": 4.142682075500488, "learning_rate": 0.000134, "loss": 5.2567, "step": 134 }, { "epoch": 0.006046863189720333, "grad_norm": 4.414917469024658, "learning_rate": 0.00013600000000000003, "loss": 5.2591, "step": 136 }, { "epoch": 0.00613578764839269, "grad_norm": 5.137867450714111, "learning_rate": 0.00013800000000000002, "loss": 5.2214, "step": 138 }, { "epoch": 0.006224712107065048, "grad_norm": 4.432452201843262, "learning_rate": 0.00014000000000000001, "loss": 5.2201, "step": 140 }, { "epoch": 0.006313636565737406, "grad_norm": 3.24981951713562, "learning_rate": 0.00014199999999999998, "loss": 5.222, "step": 142 }, { "epoch": 0.006402561024409764, "grad_norm": 7.362234592437744, "learning_rate": 0.000144, "loss": 5.2342, "step": 144 }, { "epoch": 0.006491485483082122, "grad_norm": 6.990148067474365, "learning_rate": 0.000146, "loss": 5.1899, "step": 146 }, { "epoch": 0.00658040994175448, "grad_norm": 9.778069496154785, "learning_rate": 0.000148, "loss": 5.2028, "step": 148 }, { "epoch": 0.006669334400426837, "grad_norm": 7.326447486877441, "learning_rate": 0.00015, "loss": 5.1736, "step": 150 }, { "epoch": 0.006758258859099195, "grad_norm": 6.0494065284729, "learning_rate": 0.000152, "loss": 5.1704, "step": 152 }, { "epoch": 0.006847183317771553, "grad_norm": 7.345653057098389, "learning_rate": 0.000154, "loss": 5.1535, "step": 154 }, { "epoch": 0.006936107776443911, "grad_norm": 5.5805768966674805, "learning_rate": 0.000156, "loss": 5.1525, "step": 156 }, { "epoch": 0.007025032235116269, "grad_norm": 5.274758815765381, "learning_rate": 0.000158, "loss": 5.1331, "step": 158 }, { "epoch": 0.007113956693788627, "grad_norm": 4.2305169105529785, "learning_rate": 0.00016, "loss": 5.1053, "step": 160 }, { "epoch": 0.007202881152460984, "grad_norm": 6.961759567260742, "learning_rate": 0.000162, "loss": 5.1053, "step": 162 }, { "epoch": 0.007291805611133342, "grad_norm": 6.679699420928955, "learning_rate": 0.000164, "loss": 5.0955, "step": 164 }, { "epoch": 0.0073807300698057, "grad_norm": 6.162731647491455, "learning_rate": 0.00016600000000000002, "loss": 5.0938, "step": 166 }, { "epoch": 0.007469654528478058, "grad_norm": 7.818715572357178, "learning_rate": 0.00016800000000000002, "loss": 5.0897, "step": 168 }, { "epoch": 0.007558578987150416, "grad_norm": 5.024120807647705, "learning_rate": 0.00017, "loss": 5.0611, "step": 170 }, { "epoch": 0.007647503445822774, "grad_norm": 6.050165176391602, "learning_rate": 0.00017199999999999998, "loss": 5.053, "step": 172 }, { "epoch": 0.007736427904495131, "grad_norm": 4.531955242156982, "learning_rate": 0.000174, "loss": 5.021, "step": 174 }, { "epoch": 0.00782535236316749, "grad_norm": 4.957339286804199, "learning_rate": 0.000176, "loss": 5.003, "step": 176 }, { "epoch": 0.007914276821839848, "grad_norm": 5.476766586303711, "learning_rate": 0.000178, "loss": 5.0004, "step": 178 }, { "epoch": 0.008003201280512205, "grad_norm": 3.472067356109619, "learning_rate": 0.00017999999999999998, "loss": 4.9962, "step": 180 }, { "epoch": 0.008092125739184563, "grad_norm": 4.526614665985107, "learning_rate": 0.000182, "loss": 4.9533, "step": 182 }, { "epoch": 0.00818105019785692, "grad_norm": 4.315027236938477, "learning_rate": 0.000184, "loss": 4.9476, "step": 184 }, { "epoch": 0.008269974656529278, "grad_norm": 5.466373920440674, "learning_rate": 0.000186, "loss": 4.9424, "step": 186 }, { "epoch": 0.008358899115201636, "grad_norm": 4.121331691741943, "learning_rate": 0.00018800000000000002, "loss": 4.8852, "step": 188 }, { "epoch": 0.008447823573873994, "grad_norm": 5.156192302703857, "learning_rate": 0.00019, "loss": 4.8728, "step": 190 }, { "epoch": 0.008536748032546351, "grad_norm": 2.8727962970733643, "learning_rate": 0.000192, "loss": 4.8646, "step": 192 }, { "epoch": 0.008625672491218709, "grad_norm": 3.9635331630706787, "learning_rate": 0.000194, "loss": 4.8479, "step": 194 }, { "epoch": 0.008714596949891068, "grad_norm": 2.293034315109253, "learning_rate": 0.00019600000000000002, "loss": 4.8295, "step": 196 }, { "epoch": 0.008803521408563426, "grad_norm": 4.057344436645508, "learning_rate": 0.00019800000000000002, "loss": 4.8595, "step": 198 }, { "epoch": 0.008892445867235784, "grad_norm": 6.884203910827637, "learning_rate": 0.0002, "loss": 4.8364, "step": 200 }, { "epoch": 0.008981370325908141, "grad_norm": 3.6881725788116455, "learning_rate": 0.000202, "loss": 4.8192, "step": 202 }, { "epoch": 0.009070294784580499, "grad_norm": 3.199767827987671, "learning_rate": 0.000204, "loss": 4.7844, "step": 204 }, { "epoch": 0.009159219243252857, "grad_norm": 3.4823689460754395, "learning_rate": 0.000206, "loss": 4.7691, "step": 206 }, { "epoch": 0.009248143701925214, "grad_norm": 3.9715564250946045, "learning_rate": 0.000208, "loss": 4.7412, "step": 208 }, { "epoch": 0.009337068160597572, "grad_norm": 4.0734710693359375, "learning_rate": 0.00021, "loss": 4.7082, "step": 210 }, { "epoch": 0.00942599261926993, "grad_norm": 3.408423662185669, "learning_rate": 0.000212, "loss": 4.6992, "step": 212 }, { "epoch": 0.009514917077942288, "grad_norm": 3.8470771312713623, "learning_rate": 0.000214, "loss": 4.6654, "step": 214 }, { "epoch": 0.009603841536614645, "grad_norm": 3.259704113006592, "learning_rate": 0.000216, "loss": 4.6517, "step": 216 }, { "epoch": 0.009692765995287003, "grad_norm": 3.3092963695526123, "learning_rate": 0.000218, "loss": 4.6479, "step": 218 }, { "epoch": 0.009781690453959362, "grad_norm": 3.3664305210113525, "learning_rate": 0.00022, "loss": 4.6234, "step": 220 }, { "epoch": 0.00987061491263172, "grad_norm": 2.985428810119629, "learning_rate": 0.000222, "loss": 4.5937, "step": 222 }, { "epoch": 0.009959539371304078, "grad_norm": 4.405549049377441, "learning_rate": 0.000224, "loss": 4.5766, "step": 224 }, { "epoch": 0.010048463829976435, "grad_norm": 3.270683765411377, "learning_rate": 0.00022600000000000002, "loss": 4.5669, "step": 226 }, { "epoch": 0.010137388288648793, "grad_norm": 2.5708577632904053, "learning_rate": 0.000228, "loss": 4.52, "step": 228 }, { "epoch": 0.01022631274732115, "grad_norm": 4.556532859802246, "learning_rate": 0.00023, "loss": 4.5271, "step": 230 }, { "epoch": 0.010315237205993508, "grad_norm": 5.423923492431641, "learning_rate": 0.00023200000000000003, "loss": 4.5133, "step": 232 }, { "epoch": 0.010404161664665866, "grad_norm": 3.195823907852173, "learning_rate": 0.00023400000000000002, "loss": 4.4832, "step": 234 }, { "epoch": 0.010493086123338224, "grad_norm": 2.918025016784668, "learning_rate": 0.000236, "loss": 4.4876, "step": 236 }, { "epoch": 0.010582010582010581, "grad_norm": 3.2062084674835205, "learning_rate": 0.00023799999999999998, "loss": 4.4573, "step": 238 }, { "epoch": 0.010670935040682939, "grad_norm": 3.347294330596924, "learning_rate": 0.00024, "loss": 4.4288, "step": 240 }, { "epoch": 0.010759859499355299, "grad_norm": 2.824416160583496, "learning_rate": 0.000242, "loss": 4.3892, "step": 242 }, { "epoch": 0.010848783958027656, "grad_norm": 2.5121309757232666, "learning_rate": 0.000244, "loss": 4.3841, "step": 244 }, { "epoch": 0.010937708416700014, "grad_norm": 3.1845433712005615, "learning_rate": 0.000246, "loss": 4.3617, "step": 246 }, { "epoch": 0.011026632875372372, "grad_norm": 3.0984976291656494, "learning_rate": 0.000248, "loss": 4.3292, "step": 248 }, { "epoch": 0.01111555733404473, "grad_norm": 2.4962711334228516, "learning_rate": 0.00025, "loss": 4.292, "step": 250 }, { "epoch": 0.011204481792717087, "grad_norm": 3.3320326805114746, "learning_rate": 0.000252, "loss": 4.2964, "step": 252 }, { "epoch": 0.011293406251389445, "grad_norm": 3.5653789043426514, "learning_rate": 0.000254, "loss": 4.2572, "step": 254 }, { "epoch": 0.011382330710061802, "grad_norm": 2.946533679962158, "learning_rate": 0.000256, "loss": 4.2446, "step": 256 }, { "epoch": 0.01147125516873416, "grad_norm": 3.6157164573669434, "learning_rate": 0.00025800000000000004, "loss": 4.2731, "step": 258 }, { "epoch": 0.011560179627406518, "grad_norm": 3.1283135414123535, "learning_rate": 0.00026000000000000003, "loss": 4.2333, "step": 260 }, { "epoch": 0.011649104086078875, "grad_norm": 3.6416194438934326, "learning_rate": 0.000262, "loss": 4.1951, "step": 262 }, { "epoch": 0.011738028544751233, "grad_norm": 2.4387340545654297, "learning_rate": 0.000264, "loss": 4.1772, "step": 264 }, { "epoch": 0.011826953003423592, "grad_norm": 2.776474714279175, "learning_rate": 0.000266, "loss": 4.1483, "step": 266 }, { "epoch": 0.01191587746209595, "grad_norm": 3.2721757888793945, "learning_rate": 0.000268, "loss": 4.1457, "step": 268 }, { "epoch": 0.012004801920768308, "grad_norm": 3.0057179927825928, "learning_rate": 0.00027, "loss": 4.0978, "step": 270 }, { "epoch": 0.012093726379440665, "grad_norm": 3.400449514389038, "learning_rate": 0.00027200000000000005, "loss": 4.0854, "step": 272 }, { "epoch": 0.012182650838113023, "grad_norm": 2.8374624252319336, "learning_rate": 0.00027400000000000005, "loss": 4.0779, "step": 274 }, { "epoch": 0.01227157529678538, "grad_norm": 3.7050421237945557, "learning_rate": 0.00027600000000000004, "loss": 4.0588, "step": 276 }, { "epoch": 0.012360499755457739, "grad_norm": 3.274221181869507, "learning_rate": 0.00027800000000000004, "loss": 4.0334, "step": 278 }, { "epoch": 0.012449424214130096, "grad_norm": 2.5664162635803223, "learning_rate": 0.00028000000000000003, "loss": 4.0303, "step": 280 }, { "epoch": 0.012538348672802454, "grad_norm": 2.751819133758545, "learning_rate": 0.00028199999999999997, "loss": 3.9866, "step": 282 }, { "epoch": 0.012627273131474812, "grad_norm": 2.635796546936035, "learning_rate": 0.00028399999999999996, "loss": 3.967, "step": 284 }, { "epoch": 0.01271619759014717, "grad_norm": 2.1804494857788086, "learning_rate": 0.00028599999999999996, "loss": 3.9361, "step": 286 }, { "epoch": 0.012805122048819529, "grad_norm": 2.3362250328063965, "learning_rate": 0.000288, "loss": 3.9309, "step": 288 }, { "epoch": 0.012894046507491886, "grad_norm": 2.179434061050415, "learning_rate": 0.00029, "loss": 3.9056, "step": 290 }, { "epoch": 0.012982970966164244, "grad_norm": 2.6744179725646973, "learning_rate": 0.000292, "loss": 3.9158, "step": 292 }, { "epoch": 0.013071895424836602, "grad_norm": 2.8267312049865723, "learning_rate": 0.000294, "loss": 3.9134, "step": 294 }, { "epoch": 0.01316081988350896, "grad_norm": 2.420217990875244, "learning_rate": 0.000296, "loss": 3.8633, "step": 296 }, { "epoch": 0.013249744342181317, "grad_norm": 3.261594533920288, "learning_rate": 0.000298, "loss": 3.8427, "step": 298 }, { "epoch": 0.013338668800853675, "grad_norm": 2.8842456340789795, "learning_rate": 0.0003, "loss": 3.8155, "step": 300 }, { "epoch": 0.013427593259526032, "grad_norm": 2.741398334503174, "learning_rate": 0.000302, "loss": 3.7851, "step": 302 }, { "epoch": 0.01351651771819839, "grad_norm": 2.377107620239258, "learning_rate": 0.000304, "loss": 3.7656, "step": 304 }, { "epoch": 0.013605442176870748, "grad_norm": 3.3047285079956055, "learning_rate": 0.000306, "loss": 3.7708, "step": 306 }, { "epoch": 0.013694366635543105, "grad_norm": 3.042447090148926, "learning_rate": 0.000308, "loss": 3.7339, "step": 308 }, { "epoch": 0.013783291094215463, "grad_norm": 2.671283721923828, "learning_rate": 0.00031, "loss": 3.7213, "step": 310 }, { "epoch": 0.013872215552887823, "grad_norm": 3.2665302753448486, "learning_rate": 0.000312, "loss": 3.7418, "step": 312 }, { "epoch": 0.01396114001156018, "grad_norm": 2.6072757244110107, "learning_rate": 0.000314, "loss": 3.7062, "step": 314 }, { "epoch": 0.014050064470232538, "grad_norm": 2.4585163593292236, "learning_rate": 0.000316, "loss": 3.675, "step": 316 }, { "epoch": 0.014138988928904896, "grad_norm": 2.604253053665161, "learning_rate": 0.00031800000000000003, "loss": 3.6659, "step": 318 }, { "epoch": 0.014227913387577253, "grad_norm": 2.0046472549438477, "learning_rate": 0.00032, "loss": 3.6397, "step": 320 }, { "epoch": 0.014316837846249611, "grad_norm": 2.0930159091949463, "learning_rate": 0.000322, "loss": 3.6476, "step": 322 }, { "epoch": 0.014405762304921969, "grad_norm": 2.7829599380493164, "learning_rate": 0.000324, "loss": 3.6397, "step": 324 }, { "epoch": 0.014494686763594326, "grad_norm": 2.372708559036255, "learning_rate": 0.000326, "loss": 3.6136, "step": 326 }, { "epoch": 0.014583611222266684, "grad_norm": 2.2821056842803955, "learning_rate": 0.000328, "loss": 3.5951, "step": 328 }, { "epoch": 0.014672535680939042, "grad_norm": 2.620718002319336, "learning_rate": 0.00033, "loss": 3.5921, "step": 330 }, { "epoch": 0.0147614601396114, "grad_norm": 2.5476973056793213, "learning_rate": 0.00033200000000000005, "loss": 3.5755, "step": 332 }, { "epoch": 0.014850384598283759, "grad_norm": 2.17576003074646, "learning_rate": 0.00033400000000000004, "loss": 3.5587, "step": 334 }, { "epoch": 0.014939309056956116, "grad_norm": 2.368635416030884, "learning_rate": 0.00033600000000000004, "loss": 3.5409, "step": 336 }, { "epoch": 0.015028233515628474, "grad_norm": 3.101987361907959, "learning_rate": 0.00033800000000000003, "loss": 3.5305, "step": 338 }, { "epoch": 0.015117157974300832, "grad_norm": 2.805263042449951, "learning_rate": 0.00034, "loss": 3.5345, "step": 340 }, { "epoch": 0.01520608243297319, "grad_norm": 2.2070181369781494, "learning_rate": 0.000342, "loss": 3.5125, "step": 342 }, { "epoch": 0.015295006891645547, "grad_norm": 2.235079765319824, "learning_rate": 0.00034399999999999996, "loss": 3.4963, "step": 344 }, { "epoch": 0.015383931350317905, "grad_norm": 2.6045584678649902, "learning_rate": 0.000346, "loss": 3.5081, "step": 346 }, { "epoch": 0.015472855808990263, "grad_norm": 2.463226318359375, "learning_rate": 0.000348, "loss": 3.4777, "step": 348 }, { "epoch": 0.01556178026766262, "grad_norm": 2.3299500942230225, "learning_rate": 0.00035, "loss": 3.4477, "step": 350 }, { "epoch": 0.01565070472633498, "grad_norm": 1.9035766124725342, "learning_rate": 0.000352, "loss": 3.4405, "step": 352 }, { "epoch": 0.015739629185007337, "grad_norm": 2.3295583724975586, "learning_rate": 0.000354, "loss": 3.4328, "step": 354 }, { "epoch": 0.015828553643679695, "grad_norm": 2.2347898483276367, "learning_rate": 0.000356, "loss": 3.4162, "step": 356 }, { "epoch": 0.015917478102352053, "grad_norm": 2.777460813522339, "learning_rate": 0.000358, "loss": 3.4241, "step": 358 }, { "epoch": 0.01600640256102441, "grad_norm": 1.8630869388580322, "learning_rate": 0.00035999999999999997, "loss": 3.408, "step": 360 }, { "epoch": 0.016095327019696768, "grad_norm": 2.6221816539764404, "learning_rate": 0.000362, "loss": 3.4151, "step": 362 }, { "epoch": 0.016184251478369126, "grad_norm": 2.0977978706359863, "learning_rate": 0.000364, "loss": 3.3877, "step": 364 }, { "epoch": 0.016273175937041483, "grad_norm": 1.9285433292388916, "learning_rate": 0.000366, "loss": 3.3893, "step": 366 }, { "epoch": 0.01636210039571384, "grad_norm": 1.8546204566955566, "learning_rate": 0.000368, "loss": 3.3685, "step": 368 }, { "epoch": 0.0164510248543862, "grad_norm": 1.7032506465911865, "learning_rate": 0.00037, "loss": 3.3536, "step": 370 }, { "epoch": 0.016539949313058556, "grad_norm": 1.8106862306594849, "learning_rate": 0.000372, "loss": 3.3425, "step": 372 }, { "epoch": 0.016628873771730914, "grad_norm": 1.9233118295669556, "learning_rate": 0.000374, "loss": 3.3492, "step": 374 }, { "epoch": 0.016717798230403272, "grad_norm": 1.8728731870651245, "learning_rate": 0.00037600000000000003, "loss": 3.3253, "step": 376 }, { "epoch": 0.01680672268907563, "grad_norm": 2.1671135425567627, "learning_rate": 0.000378, "loss": 3.3353, "step": 378 }, { "epoch": 0.016895647147747987, "grad_norm": 2.369257926940918, "learning_rate": 0.00038, "loss": 3.303, "step": 380 }, { "epoch": 0.016984571606420345, "grad_norm": 2.2066473960876465, "learning_rate": 0.000382, "loss": 3.2988, "step": 382 }, { "epoch": 0.017073496065092703, "grad_norm": 2.1249470710754395, "learning_rate": 0.000384, "loss": 3.2837, "step": 384 }, { "epoch": 0.01716242052376506, "grad_norm": 2.062621831893921, "learning_rate": 0.000386, "loss": 3.269, "step": 386 }, { "epoch": 0.017251344982437418, "grad_norm": 1.8696043491363525, "learning_rate": 0.000388, "loss": 3.2692, "step": 388 }, { "epoch": 0.017340269441109776, "grad_norm": 1.7806510925292969, "learning_rate": 0.00039000000000000005, "loss": 3.2694, "step": 390 }, { "epoch": 0.017429193899782137, "grad_norm": 1.5019872188568115, "learning_rate": 0.00039200000000000004, "loss": 3.2581, "step": 392 }, { "epoch": 0.017518118358454494, "grad_norm": 1.6843113899230957, "learning_rate": 0.00039400000000000004, "loss": 3.248, "step": 394 }, { "epoch": 0.017607042817126852, "grad_norm": 1.682606816291809, "learning_rate": 0.00039600000000000003, "loss": 3.2564, "step": 396 }, { "epoch": 0.01769596727579921, "grad_norm": 2.0928115844726562, "learning_rate": 0.000398, "loss": 3.2256, "step": 398 }, { "epoch": 0.017784891734471567, "grad_norm": 1.9646433591842651, "learning_rate": 0.0004, "loss": 3.2244, "step": 400 }, { "epoch": 0.017873816193143925, "grad_norm": 1.8194555044174194, "learning_rate": 0.000402, "loss": 3.1879, "step": 402 }, { "epoch": 0.017962740651816283, "grad_norm": 1.7736834287643433, "learning_rate": 0.000404, "loss": 3.1818, "step": 404 }, { "epoch": 0.01805166511048864, "grad_norm": 1.6785839796066284, "learning_rate": 0.00040600000000000006, "loss": 3.1718, "step": 406 }, { "epoch": 0.018140589569160998, "grad_norm": 1.5538625717163086, "learning_rate": 0.000408, "loss": 3.1648, "step": 408 }, { "epoch": 0.018229514027833356, "grad_norm": 1.4424693584442139, "learning_rate": 0.00041, "loss": 3.1574, "step": 410 }, { "epoch": 0.018318438486505714, "grad_norm": 1.8182153701782227, "learning_rate": 0.000412, "loss": 3.1613, "step": 412 }, { "epoch": 0.01840736294517807, "grad_norm": 1.6871416568756104, "learning_rate": 0.000414, "loss": 3.1314, "step": 414 }, { "epoch": 0.01849628740385043, "grad_norm": 1.3707678318023682, "learning_rate": 0.000416, "loss": 3.1252, "step": 416 }, { "epoch": 0.018585211862522787, "grad_norm": 1.730461597442627, "learning_rate": 0.00041799999999999997, "loss": 3.1065, "step": 418 }, { "epoch": 0.018674136321195144, "grad_norm": 1.8215389251708984, "learning_rate": 0.00042, "loss": 3.1239, "step": 420 }, { "epoch": 0.018763060779867502, "grad_norm": 1.7602427005767822, "learning_rate": 0.000422, "loss": 3.108, "step": 422 }, { "epoch": 0.01885198523853986, "grad_norm": 1.5596345663070679, "learning_rate": 0.000424, "loss": 3.0993, "step": 424 }, { "epoch": 0.018940909697212217, "grad_norm": 1.3841055631637573, "learning_rate": 0.000426, "loss": 3.0894, "step": 426 }, { "epoch": 0.019029834155884575, "grad_norm": 1.6600890159606934, "learning_rate": 0.000428, "loss": 3.1196, "step": 428 }, { "epoch": 0.019118758614556933, "grad_norm": 1.7408195734024048, "learning_rate": 0.00043, "loss": 3.08, "step": 430 }, { "epoch": 0.01920768307322929, "grad_norm": 1.5704591274261475, "learning_rate": 0.000432, "loss": 3.0946, "step": 432 }, { "epoch": 0.019296607531901648, "grad_norm": 1.6484616994857788, "learning_rate": 0.00043400000000000003, "loss": 3.0796, "step": 434 }, { "epoch": 0.019385531990574006, "grad_norm": 1.5855478048324585, "learning_rate": 0.000436, "loss": 3.0624, "step": 436 }, { "epoch": 0.019474456449246367, "grad_norm": 1.427921175956726, "learning_rate": 0.000438, "loss": 3.0426, "step": 438 }, { "epoch": 0.019563380907918725, "grad_norm": 1.3351646661758423, "learning_rate": 0.00044, "loss": 3.0436, "step": 440 }, { "epoch": 0.019652305366591082, "grad_norm": 1.4644699096679688, "learning_rate": 0.000442, "loss": 3.0203, "step": 442 }, { "epoch": 0.01974122982526344, "grad_norm": 1.3958910703659058, "learning_rate": 0.000444, "loss": 3.0173, "step": 444 }, { "epoch": 0.019830154283935798, "grad_norm": 1.179125428199768, "learning_rate": 0.000446, "loss": 3.0025, "step": 446 }, { "epoch": 0.019919078742608155, "grad_norm": 1.343889594078064, "learning_rate": 0.000448, "loss": 2.9919, "step": 448 }, { "epoch": 0.020008003201280513, "grad_norm": 1.136616587638855, "learning_rate": 0.00045000000000000004, "loss": 2.9778, "step": 450 }, { "epoch": 0.02009692765995287, "grad_norm": 1.4144681692123413, "learning_rate": 0.00045200000000000004, "loss": 2.9919, "step": 452 }, { "epoch": 0.02018585211862523, "grad_norm": 1.300412654876709, "learning_rate": 0.00045400000000000003, "loss": 2.9753, "step": 454 }, { "epoch": 0.020274776577297586, "grad_norm": 1.2180825471878052, "learning_rate": 0.000456, "loss": 2.9706, "step": 456 }, { "epoch": 0.020363701035969944, "grad_norm": 1.2152589559555054, "learning_rate": 0.000458, "loss": 2.9791, "step": 458 }, { "epoch": 0.0204526254946423, "grad_norm": 1.3307461738586426, "learning_rate": 0.00046, "loss": 2.9722, "step": 460 }, { "epoch": 0.02054154995331466, "grad_norm": 1.3811430931091309, "learning_rate": 0.000462, "loss": 2.9623, "step": 462 }, { "epoch": 0.020630474411987017, "grad_norm": 1.6451685428619385, "learning_rate": 0.00046400000000000006, "loss": 2.9489, "step": 464 }, { "epoch": 0.020719398870659374, "grad_norm": 1.451253890991211, "learning_rate": 0.00046600000000000005, "loss": 2.9488, "step": 466 }, { "epoch": 0.020808323329331732, "grad_norm": 1.1156989336013794, "learning_rate": 0.00046800000000000005, "loss": 2.9374, "step": 468 }, { "epoch": 0.02089724778800409, "grad_norm": 1.3331708908081055, "learning_rate": 0.00047, "loss": 2.9271, "step": 470 }, { "epoch": 0.020986172246676447, "grad_norm": 1.2767914533615112, "learning_rate": 0.000472, "loss": 2.9176, "step": 472 }, { "epoch": 0.021075096705348805, "grad_norm": 1.5849168300628662, "learning_rate": 0.000474, "loss": 2.916, "step": 474 }, { "epoch": 0.021164021164021163, "grad_norm": 1.1405562162399292, "learning_rate": 0.00047599999999999997, "loss": 2.9101, "step": 476 }, { "epoch": 0.02125294562269352, "grad_norm": 1.2228617668151855, "learning_rate": 0.00047799999999999996, "loss": 2.9069, "step": 478 }, { "epoch": 0.021341870081365878, "grad_norm": 1.2419967651367188, "learning_rate": 0.00048, "loss": 2.8927, "step": 480 }, { "epoch": 0.021430794540038236, "grad_norm": 1.4789477586746216, "learning_rate": 0.000482, "loss": 2.8991, "step": 482 }, { "epoch": 0.021519718998710597, "grad_norm": 1.4335638284683228, "learning_rate": 0.000484, "loss": 2.8878, "step": 484 }, { "epoch": 0.021608643457382955, "grad_norm": 1.608608603477478, "learning_rate": 0.000486, "loss": 2.8878, "step": 486 }, { "epoch": 0.021697567916055312, "grad_norm": 1.1100876331329346, "learning_rate": 0.000488, "loss": 2.8723, "step": 488 }, { "epoch": 0.02178649237472767, "grad_norm": 1.4269640445709229, "learning_rate": 0.00049, "loss": 2.8704, "step": 490 }, { "epoch": 0.021875416833400028, "grad_norm": 1.4346448183059692, "learning_rate": 0.000492, "loss": 2.8758, "step": 492 }, { "epoch": 0.021964341292072385, "grad_norm": 1.3498926162719727, "learning_rate": 0.000494, "loss": 2.8468, "step": 494 }, { "epoch": 0.022053265750744743, "grad_norm": 1.1565560102462769, "learning_rate": 0.000496, "loss": 2.8485, "step": 496 }, { "epoch": 0.0221421902094171, "grad_norm": 1.066014289855957, "learning_rate": 0.000498, "loss": 2.8366, "step": 498 }, { "epoch": 0.02223111466808946, "grad_norm": 0.9729352593421936, "learning_rate": 0.0005, "loss": 2.8219, "step": 500 }, { "epoch": 0.02223111466808946, "eval_loss": 2.707019329071045, "eval_runtime": 12.411, "eval_samples_per_second": 556.764, "eval_steps_per_second": 69.616, "step": 500 }, { "epoch": 0.022320039126761816, "grad_norm": 1.277807354927063, "learning_rate": 0.0005020000000000001, "loss": 2.8322, "step": 502 }, { "epoch": 0.022408963585434174, "grad_norm": 1.2700484991073608, "learning_rate": 0.000504, "loss": 2.8281, "step": 504 }, { "epoch": 0.02249788804410653, "grad_norm": 1.3185160160064697, "learning_rate": 0.000506, "loss": 2.8253, "step": 506 }, { "epoch": 0.02258681250277889, "grad_norm": 1.1390444040298462, "learning_rate": 0.000508, "loss": 2.8258, "step": 508 }, { "epoch": 0.022675736961451247, "grad_norm": 1.3068996667861938, "learning_rate": 0.00051, "loss": 2.7982, "step": 510 }, { "epoch": 0.022764661420123605, "grad_norm": 1.2303944826126099, "learning_rate": 0.000512, "loss": 2.7974, "step": 512 }, { "epoch": 0.022853585878795962, "grad_norm": 1.3849083185195923, "learning_rate": 0.000514, "loss": 2.7972, "step": 514 }, { "epoch": 0.02294251033746832, "grad_norm": 0.9999197125434875, "learning_rate": 0.0005160000000000001, "loss": 2.7885, "step": 516 }, { "epoch": 0.023031434796140678, "grad_norm": 1.1116942167282104, "learning_rate": 0.000518, "loss": 2.7936, "step": 518 }, { "epoch": 0.023120359254813035, "grad_norm": 1.0687438249588013, "learning_rate": 0.0005200000000000001, "loss": 2.7732, "step": 520 }, { "epoch": 0.023209283713485393, "grad_norm": 1.048213243484497, "learning_rate": 0.000522, "loss": 2.7626, "step": 522 }, { "epoch": 0.02329820817215775, "grad_norm": 1.0101920366287231, "learning_rate": 0.000524, "loss": 2.7557, "step": 524 }, { "epoch": 0.02338713263083011, "grad_norm": 1.3061760663986206, "learning_rate": 0.000526, "loss": 2.7727, "step": 526 }, { "epoch": 0.023476057089502466, "grad_norm": 1.0070077180862427, "learning_rate": 0.000528, "loss": 2.7614, "step": 528 }, { "epoch": 0.023564981548174827, "grad_norm": 0.9444275498390198, "learning_rate": 0.0005300000000000001, "loss": 2.7597, "step": 530 }, { "epoch": 0.023653906006847185, "grad_norm": 1.201491355895996, "learning_rate": 0.000532, "loss": 2.751, "step": 532 }, { "epoch": 0.023742830465519543, "grad_norm": 1.0685005187988281, "learning_rate": 0.0005340000000000001, "loss": 2.7524, "step": 534 }, { "epoch": 0.0238317549241919, "grad_norm": 1.089680790901184, "learning_rate": 0.000536, "loss": 2.7506, "step": 536 }, { "epoch": 0.023920679382864258, "grad_norm": 1.0986379384994507, "learning_rate": 0.0005380000000000001, "loss": 2.7398, "step": 538 }, { "epoch": 0.024009603841536616, "grad_norm": 1.0519707202911377, "learning_rate": 0.00054, "loss": 2.7296, "step": 540 }, { "epoch": 0.024098528300208973, "grad_norm": 1.0870462656021118, "learning_rate": 0.0005420000000000001, "loss": 2.7153, "step": 542 }, { "epoch": 0.02418745275888133, "grad_norm": 0.8601863980293274, "learning_rate": 0.0005440000000000001, "loss": 2.7028, "step": 544 }, { "epoch": 0.02427637721755369, "grad_norm": 0.96519935131073, "learning_rate": 0.000546, "loss": 2.6951, "step": 546 }, { "epoch": 0.024365301676226046, "grad_norm": 1.0897146463394165, "learning_rate": 0.0005480000000000001, "loss": 2.6911, "step": 548 }, { "epoch": 0.024454226134898404, "grad_norm": 0.9683467149734497, "learning_rate": 0.00055, "loss": 2.6904, "step": 550 }, { "epoch": 0.02454315059357076, "grad_norm": 1.0787107944488525, "learning_rate": 0.0005520000000000001, "loss": 2.69, "step": 552 }, { "epoch": 0.02463207505224312, "grad_norm": 1.0309861898422241, "learning_rate": 0.000554, "loss": 2.691, "step": 554 }, { "epoch": 0.024720999510915477, "grad_norm": 0.9567499756813049, "learning_rate": 0.0005560000000000001, "loss": 2.6843, "step": 556 }, { "epoch": 0.024809923969587835, "grad_norm": 0.8283863663673401, "learning_rate": 0.000558, "loss": 2.6758, "step": 558 }, { "epoch": 0.024898848428260192, "grad_norm": 0.8621578216552734, "learning_rate": 0.0005600000000000001, "loss": 2.6639, "step": 560 }, { "epoch": 0.02498777288693255, "grad_norm": 0.9517662525177002, "learning_rate": 0.0005620000000000001, "loss": 2.6644, "step": 562 }, { "epoch": 0.025076697345604908, "grad_norm": 0.9091687202453613, "learning_rate": 0.0005639999999999999, "loss": 2.6538, "step": 564 }, { "epoch": 0.025165621804277265, "grad_norm": 0.8210693597793579, "learning_rate": 0.000566, "loss": 2.6505, "step": 566 }, { "epoch": 0.025254546262949623, "grad_norm": 0.906134843826294, "learning_rate": 0.0005679999999999999, "loss": 2.6422, "step": 568 }, { "epoch": 0.02534347072162198, "grad_norm": 0.7618714570999146, "learning_rate": 0.00057, "loss": 2.6204, "step": 570 }, { "epoch": 0.02543239518029434, "grad_norm": 0.7566989064216614, "learning_rate": 0.0005719999999999999, "loss": 2.6204, "step": 572 }, { "epoch": 0.025521319638966696, "grad_norm": 0.8227992057800293, "learning_rate": 0.000574, "loss": 2.6237, "step": 574 }, { "epoch": 0.025610244097639057, "grad_norm": 0.7188429236412048, "learning_rate": 0.000576, "loss": 2.6089, "step": 576 }, { "epoch": 0.025699168556311415, "grad_norm": 0.7563546895980835, "learning_rate": 0.000578, "loss": 2.594, "step": 578 }, { "epoch": 0.025788093014983773, "grad_norm": 0.7829350233078003, "learning_rate": 0.00058, "loss": 2.6021, "step": 580 }, { "epoch": 0.02587701747365613, "grad_norm": 0.7614354491233826, "learning_rate": 0.0005819999999999999, "loss": 2.6012, "step": 582 }, { "epoch": 0.025965941932328488, "grad_norm": 0.9515312314033508, "learning_rate": 0.000584, "loss": 2.5882, "step": 584 }, { "epoch": 0.026054866391000846, "grad_norm": 0.8379482626914978, "learning_rate": 0.0005859999999999999, "loss": 2.6015, "step": 586 }, { "epoch": 0.026143790849673203, "grad_norm": 0.8818963170051575, "learning_rate": 0.000588, "loss": 2.5888, "step": 588 }, { "epoch": 0.02623271530834556, "grad_norm": 0.7838352918624878, "learning_rate": 0.00059, "loss": 2.5806, "step": 590 }, { "epoch": 0.02632163976701792, "grad_norm": 0.7371238470077515, "learning_rate": 0.000592, "loss": 2.5727, "step": 592 }, { "epoch": 0.026410564225690276, "grad_norm": 0.776374340057373, "learning_rate": 0.000594, "loss": 2.5669, "step": 594 }, { "epoch": 0.026499488684362634, "grad_norm": 0.8051691651344299, "learning_rate": 0.000596, "loss": 2.5712, "step": 596 }, { "epoch": 0.026588413143034992, "grad_norm": 0.8638246655464172, "learning_rate": 0.000598, "loss": 2.5693, "step": 598 }, { "epoch": 0.02667733760170735, "grad_norm": 0.7371112704277039, "learning_rate": 0.0006, "loss": 2.5555, "step": 600 }, { "epoch": 0.026766262060379707, "grad_norm": 0.7257919311523438, "learning_rate": 0.000602, "loss": 2.5526, "step": 602 }, { "epoch": 0.026855186519052065, "grad_norm": 0.6093848943710327, "learning_rate": 0.000604, "loss": 2.5433, "step": 604 }, { "epoch": 0.026944110977724423, "grad_norm": 0.8363651633262634, "learning_rate": 0.000606, "loss": 2.5358, "step": 606 }, { "epoch": 0.02703303543639678, "grad_norm": 0.9323018193244934, "learning_rate": 0.000608, "loss": 2.554, "step": 608 }, { "epoch": 0.027121959895069138, "grad_norm": 0.743377149105072, "learning_rate": 0.00061, "loss": 2.5392, "step": 610 }, { "epoch": 0.027210884353741496, "grad_norm": 0.6902635097503662, "learning_rate": 0.000612, "loss": 2.5257, "step": 612 }, { "epoch": 0.027299808812413853, "grad_norm": 0.6234912276268005, "learning_rate": 0.000614, "loss": 2.521, "step": 614 }, { "epoch": 0.02738873327108621, "grad_norm": 0.6421347260475159, "learning_rate": 0.000616, "loss": 2.5161, "step": 616 }, { "epoch": 0.02747765772975857, "grad_norm": 0.6125656962394714, "learning_rate": 0.0006180000000000001, "loss": 2.5081, "step": 618 }, { "epoch": 0.027566582188430926, "grad_norm": 0.6754232048988342, "learning_rate": 0.00062, "loss": 2.5047, "step": 620 }, { "epoch": 0.027655506647103287, "grad_norm": 0.6989823579788208, "learning_rate": 0.000622, "loss": 2.5019, "step": 622 }, { "epoch": 0.027744431105775645, "grad_norm": 0.572574257850647, "learning_rate": 0.000624, "loss": 2.4895, "step": 624 }, { "epoch": 0.027833355564448003, "grad_norm": 0.6032222509384155, "learning_rate": 0.000626, "loss": 2.4796, "step": 626 }, { "epoch": 0.02792228002312036, "grad_norm": 0.64015793800354, "learning_rate": 0.000628, "loss": 2.4823, "step": 628 }, { "epoch": 0.028011204481792718, "grad_norm": 1.0496829748153687, "learning_rate": 0.00063, "loss": 2.4876, "step": 630 }, { "epoch": 0.028100128940465076, "grad_norm": 0.7946370244026184, "learning_rate": 0.000632, "loss": 2.4895, "step": 632 }, { "epoch": 0.028189053399137434, "grad_norm": 0.7404975295066833, "learning_rate": 0.000634, "loss": 2.4849, "step": 634 }, { "epoch": 0.02827797785780979, "grad_norm": 0.7491384148597717, "learning_rate": 0.0006360000000000001, "loss": 2.4866, "step": 636 }, { "epoch": 0.02836690231648215, "grad_norm": 0.7001233696937561, "learning_rate": 0.000638, "loss": 2.4697, "step": 638 }, { "epoch": 0.028455826775154507, "grad_norm": 0.6039974689483643, "learning_rate": 0.00064, "loss": 2.4638, "step": 640 }, { "epoch": 0.028544751233826864, "grad_norm": 0.5607019662857056, "learning_rate": 0.000642, "loss": 2.459, "step": 642 }, { "epoch": 0.028633675692499222, "grad_norm": 0.7814773321151733, "learning_rate": 0.000644, "loss": 2.4615, "step": 644 }, { "epoch": 0.02872260015117158, "grad_norm": 0.8695835471153259, "learning_rate": 0.000646, "loss": 2.472, "step": 646 }, { "epoch": 0.028811524609843937, "grad_norm": 0.6280689239501953, "learning_rate": 0.000648, "loss": 2.4554, "step": 648 }, { "epoch": 0.028900449068516295, "grad_norm": 0.6447476148605347, "learning_rate": 0.0006500000000000001, "loss": 2.4426, "step": 650 }, { "epoch": 0.028989373527188653, "grad_norm": 0.7183859944343567, "learning_rate": 0.000652, "loss": 2.445, "step": 652 }, { "epoch": 0.02907829798586101, "grad_norm": 0.657177746295929, "learning_rate": 0.0006540000000000001, "loss": 2.4433, "step": 654 }, { "epoch": 0.029167222444533368, "grad_norm": 0.635399341583252, "learning_rate": 0.000656, "loss": 2.4327, "step": 656 }, { "epoch": 0.029256146903205726, "grad_norm": 0.519986629486084, "learning_rate": 0.0006580000000000001, "loss": 2.4328, "step": 658 }, { "epoch": 0.029345071361878083, "grad_norm": 0.6304401755332947, "learning_rate": 0.00066, "loss": 2.4328, "step": 660 }, { "epoch": 0.02943399582055044, "grad_norm": 0.6917545199394226, "learning_rate": 0.000662, "loss": 2.4259, "step": 662 }, { "epoch": 0.0295229202792228, "grad_norm": 0.844325840473175, "learning_rate": 0.0006640000000000001, "loss": 2.4414, "step": 664 }, { "epoch": 0.029611844737895156, "grad_norm": 0.8940752744674683, "learning_rate": 0.000666, "loss": 2.4353, "step": 666 }, { "epoch": 0.029700769196567518, "grad_norm": 0.8456497192382812, "learning_rate": 0.0006680000000000001, "loss": 2.4598, "step": 668 }, { "epoch": 0.029789693655239875, "grad_norm": 0.7692534327507019, "learning_rate": 0.00067, "loss": 2.4437, "step": 670 }, { "epoch": 0.029878618113912233, "grad_norm": 0.5847371816635132, "learning_rate": 0.0006720000000000001, "loss": 2.4399, "step": 672 }, { "epoch": 0.02996754257258459, "grad_norm": 0.505262017250061, "learning_rate": 0.000674, "loss": 2.4225, "step": 674 }, { "epoch": 0.03005646703125695, "grad_norm": 0.4932805001735687, "learning_rate": 0.0006760000000000001, "loss": 2.4107, "step": 676 }, { "epoch": 0.030145391489929306, "grad_norm": 0.4631360173225403, "learning_rate": 0.0006780000000000001, "loss": 2.4122, "step": 678 }, { "epoch": 0.030234315948601664, "grad_norm": 0.49897080659866333, "learning_rate": 0.00068, "loss": 2.4029, "step": 680 }, { "epoch": 0.03032324040727402, "grad_norm": 0.5904172658920288, "learning_rate": 0.0006820000000000001, "loss": 2.396, "step": 682 }, { "epoch": 0.03041216486594638, "grad_norm": 0.6271270513534546, "learning_rate": 0.000684, "loss": 2.3943, "step": 684 }, { "epoch": 0.030501089324618737, "grad_norm": 0.7415940761566162, "learning_rate": 0.0006860000000000001, "loss": 2.3821, "step": 686 }, { "epoch": 0.030590013783291094, "grad_norm": 0.9829635620117188, "learning_rate": 0.0006879999999999999, "loss": 2.4093, "step": 688 }, { "epoch": 0.030678938241963452, "grad_norm": 0.7612989544868469, "learning_rate": 0.00069, "loss": 2.4255, "step": 690 }, { "epoch": 0.03076786270063581, "grad_norm": 0.6028714776039124, "learning_rate": 0.000692, "loss": 2.4156, "step": 692 }, { "epoch": 0.030856787159308167, "grad_norm": 0.4892686903476715, "learning_rate": 0.000694, "loss": 2.3899, "step": 694 }, { "epoch": 0.030945711617980525, "grad_norm": 0.49779361486434937, "learning_rate": 0.000696, "loss": 2.388, "step": 696 }, { "epoch": 0.031034636076652883, "grad_norm": 0.43636444211006165, "learning_rate": 0.0006979999999999999, "loss": 2.3775, "step": 698 }, { "epoch": 0.03112356053532524, "grad_norm": 0.5327876806259155, "learning_rate": 0.0007, "loss": 2.3719, "step": 700 }, { "epoch": 0.031212484993997598, "grad_norm": 0.5242130160331726, "learning_rate": 0.0007019999999999999, "loss": 2.3703, "step": 702 }, { "epoch": 0.03130140945266996, "grad_norm": 0.523585319519043, "learning_rate": 0.000704, "loss": 2.3688, "step": 704 }, { "epoch": 0.031390333911342314, "grad_norm": 0.5895810127258301, "learning_rate": 0.0007059999999999999, "loss": 2.3672, "step": 706 }, { "epoch": 0.031479258370014675, "grad_norm": 0.7120672464370728, "learning_rate": 0.000708, "loss": 2.3507, "step": 708 }, { "epoch": 0.03156818282868703, "grad_norm": 0.8193224668502808, "learning_rate": 0.00071, "loss": 2.3585, "step": 710 }, { "epoch": 0.03165710728735939, "grad_norm": 0.7592872977256775, "learning_rate": 0.000712, "loss": 2.3881, "step": 712 }, { "epoch": 0.031746031746031744, "grad_norm": 0.6383097171783447, "learning_rate": 0.000714, "loss": 2.3597, "step": 714 }, { "epoch": 0.031834956204704105, "grad_norm": 0.5765336751937866, "learning_rate": 0.000716, "loss": 2.3569, "step": 716 }, { "epoch": 0.03192388066337646, "grad_norm": 0.44413337111473083, "learning_rate": 0.000718, "loss": 2.3484, "step": 718 }, { "epoch": 0.03201280512204882, "grad_norm": 0.42275282740592957, "learning_rate": 0.0007199999999999999, "loss": 2.3459, "step": 720 }, { "epoch": 0.032101729580721175, "grad_norm": 0.4223826229572296, "learning_rate": 0.000722, "loss": 2.3355, "step": 722 }, { "epoch": 0.032190654039393536, "grad_norm": 0.45038753747940063, "learning_rate": 0.000724, "loss": 2.3321, "step": 724 }, { "epoch": 0.03227957849806589, "grad_norm": 0.5487307906150818, "learning_rate": 0.000726, "loss": 2.3356, "step": 726 }, { "epoch": 0.03236850295673825, "grad_norm": 0.5726903676986694, "learning_rate": 0.000728, "loss": 2.3356, "step": 728 }, { "epoch": 0.032457427415410606, "grad_norm": 0.6729259490966797, "learning_rate": 0.00073, "loss": 2.3351, "step": 730 }, { "epoch": 0.03254635187408297, "grad_norm": 0.6266768574714661, "learning_rate": 0.000732, "loss": 2.323, "step": 732 }, { "epoch": 0.03263527633275532, "grad_norm": 0.5825262069702148, "learning_rate": 0.000734, "loss": 2.3358, "step": 734 }, { "epoch": 0.03272420079142768, "grad_norm": 0.5942145586013794, "learning_rate": 0.000736, "loss": 2.3235, "step": 736 }, { "epoch": 0.03281312525010004, "grad_norm": 0.5730122923851013, "learning_rate": 0.000738, "loss": 2.3229, "step": 738 }, { "epoch": 0.0329020497087724, "grad_norm": 0.6102321743965149, "learning_rate": 0.00074, "loss": 2.3301, "step": 740 }, { "epoch": 0.03299097416744476, "grad_norm": 0.4807248115539551, "learning_rate": 0.000742, "loss": 2.3169, "step": 742 }, { "epoch": 0.03307989862611711, "grad_norm": 0.4981859028339386, "learning_rate": 0.000744, "loss": 2.3238, "step": 744 }, { "epoch": 0.033168823084789474, "grad_norm": 0.5408335328102112, "learning_rate": 0.000746, "loss": 2.305, "step": 746 }, { "epoch": 0.03325774754346183, "grad_norm": 0.5518751740455627, "learning_rate": 0.000748, "loss": 2.313, "step": 748 }, { "epoch": 0.03334667200213419, "grad_norm": 0.5658276677131653, "learning_rate": 0.00075, "loss": 2.3078, "step": 750 }, { "epoch": 0.033435596460806544, "grad_norm": 0.481430321931839, "learning_rate": 0.0007520000000000001, "loss": 2.3019, "step": 752 }, { "epoch": 0.033524520919478905, "grad_norm": 0.5349968075752258, "learning_rate": 0.000754, "loss": 2.3053, "step": 754 }, { "epoch": 0.03361344537815126, "grad_norm": 0.5499293208122253, "learning_rate": 0.000756, "loss": 2.3026, "step": 756 }, { "epoch": 0.03370236983682362, "grad_norm": 0.6066279411315918, "learning_rate": 0.000758, "loss": 2.3015, "step": 758 }, { "epoch": 0.033791294295495974, "grad_norm": 0.5410886406898499, "learning_rate": 0.00076, "loss": 2.2962, "step": 760 }, { "epoch": 0.033880218754168336, "grad_norm": 0.5119853615760803, "learning_rate": 0.000762, "loss": 2.3011, "step": 762 }, { "epoch": 0.03396914321284069, "grad_norm": 0.5836531519889832, "learning_rate": 0.000764, "loss": 2.2847, "step": 764 }, { "epoch": 0.03405806767151305, "grad_norm": 0.5594723224639893, "learning_rate": 0.0007660000000000001, "loss": 2.2891, "step": 766 }, { "epoch": 0.034146992130185405, "grad_norm": 0.7394406199455261, "learning_rate": 0.000768, "loss": 2.301, "step": 768 }, { "epoch": 0.034235916588857766, "grad_norm": 0.45566725730895996, "learning_rate": 0.0007700000000000001, "loss": 2.2951, "step": 770 }, { "epoch": 0.03432484104753012, "grad_norm": 0.4442650079727173, "learning_rate": 0.000772, "loss": 2.2941, "step": 772 }, { "epoch": 0.03441376550620248, "grad_norm": 0.4281231164932251, "learning_rate": 0.0007740000000000001, "loss": 2.2621, "step": 774 }, { "epoch": 0.034502689964874836, "grad_norm": 0.47158682346343994, "learning_rate": 0.000776, "loss": 2.2682, "step": 776 }, { "epoch": 0.0345916144235472, "grad_norm": 0.46278801560401917, "learning_rate": 0.000778, "loss": 2.2633, "step": 778 }, { "epoch": 0.03468053888221955, "grad_norm": 0.5910720229148865, "learning_rate": 0.0007800000000000001, "loss": 2.2861, "step": 780 }, { "epoch": 0.03476946334089191, "grad_norm": 0.6585924625396729, "learning_rate": 0.000782, "loss": 2.281, "step": 782 }, { "epoch": 0.034858387799564274, "grad_norm": 0.5449870824813843, "learning_rate": 0.0007840000000000001, "loss": 2.2765, "step": 784 }, { "epoch": 0.03494731225823663, "grad_norm": 0.4451574981212616, "learning_rate": 0.000786, "loss": 2.2599, "step": 786 }, { "epoch": 0.03503623671690899, "grad_norm": 0.4292662441730499, "learning_rate": 0.0007880000000000001, "loss": 2.2572, "step": 788 }, { "epoch": 0.03512516117558134, "grad_norm": 0.446139931678772, "learning_rate": 0.00079, "loss": 2.2538, "step": 790 }, { "epoch": 0.035214085634253704, "grad_norm": 0.5321487784385681, "learning_rate": 0.0007920000000000001, "loss": 2.2516, "step": 792 }, { "epoch": 0.03530301009292606, "grad_norm": 0.5208760499954224, "learning_rate": 0.0007940000000000001, "loss": 2.2596, "step": 794 }, { "epoch": 0.03539193455159842, "grad_norm": 0.5374959707260132, "learning_rate": 0.000796, "loss": 2.2597, "step": 796 }, { "epoch": 0.035480859010270774, "grad_norm": 0.6247416734695435, "learning_rate": 0.0007980000000000001, "loss": 2.2682, "step": 798 }, { "epoch": 0.035569783468943135, "grad_norm": 0.720618724822998, "learning_rate": 0.0008, "loss": 2.2675, "step": 800 }, { "epoch": 0.03565870792761549, "grad_norm": 0.5454574227333069, "learning_rate": 0.0008020000000000001, "loss": 2.2625, "step": 802 }, { "epoch": 0.03574763238628785, "grad_norm": 0.47981786727905273, "learning_rate": 0.000804, "loss": 2.2511, "step": 804 }, { "epoch": 0.035836556844960205, "grad_norm": 0.4263577461242676, "learning_rate": 0.0008060000000000001, "loss": 2.2463, "step": 806 }, { "epoch": 0.035925481303632566, "grad_norm": 0.41618812084198, "learning_rate": 0.000808, "loss": 2.2463, "step": 808 }, { "epoch": 0.03601440576230492, "grad_norm": 0.3966350257396698, "learning_rate": 0.0008100000000000001, "loss": 2.2392, "step": 810 }, { "epoch": 0.03610333022097728, "grad_norm": 0.48056676983833313, "learning_rate": 0.0008120000000000001, "loss": 2.2379, "step": 812 }, { "epoch": 0.036192254679649635, "grad_norm": 0.4377332031726837, "learning_rate": 0.0008139999999999999, "loss": 2.237, "step": 814 }, { "epoch": 0.036281179138321996, "grad_norm": 0.3729042410850525, "learning_rate": 0.000816, "loss": 2.2328, "step": 816 }, { "epoch": 0.03637010359699435, "grad_norm": 0.3967163562774658, "learning_rate": 0.0008179999999999999, "loss": 2.2196, "step": 818 }, { "epoch": 0.03645902805566671, "grad_norm": 0.44842958450317383, "learning_rate": 0.00082, "loss": 2.2195, "step": 820 }, { "epoch": 0.036547952514339066, "grad_norm": 0.46168598532676697, "learning_rate": 0.0008219999999999999, "loss": 2.2219, "step": 822 }, { "epoch": 0.03663687697301143, "grad_norm": 0.5996253490447998, "learning_rate": 0.000824, "loss": 2.2359, "step": 824 }, { "epoch": 0.03672580143168378, "grad_norm": 0.575162410736084, "learning_rate": 0.000826, "loss": 2.2405, "step": 826 }, { "epoch": 0.03681472589035614, "grad_norm": 0.7242479920387268, "learning_rate": 0.000828, "loss": 2.2319, "step": 828 }, { "epoch": 0.036903650349028504, "grad_norm": 0.5244578719139099, "learning_rate": 0.00083, "loss": 2.2303, "step": 830 }, { "epoch": 0.03699257480770086, "grad_norm": 0.5107928514480591, "learning_rate": 0.000832, "loss": 2.2203, "step": 832 }, { "epoch": 0.03708149926637322, "grad_norm": 0.43508070707321167, "learning_rate": 0.000834, "loss": 2.2151, "step": 834 }, { "epoch": 0.03717042372504557, "grad_norm": 0.4192095398902893, "learning_rate": 0.0008359999999999999, "loss": 2.2141, "step": 836 }, { "epoch": 0.037259348183717934, "grad_norm": 0.49640336632728577, "learning_rate": 0.000838, "loss": 2.2206, "step": 838 }, { "epoch": 0.03734827264239029, "grad_norm": 0.4244445264339447, "learning_rate": 0.00084, "loss": 2.213, "step": 840 }, { "epoch": 0.03743719710106265, "grad_norm": 0.5675143003463745, "learning_rate": 0.000842, "loss": 2.2096, "step": 842 }, { "epoch": 0.037526121559735004, "grad_norm": 0.5136843323707581, "learning_rate": 0.000844, "loss": 2.2086, "step": 844 }, { "epoch": 0.037615046018407365, "grad_norm": 0.660275399684906, "learning_rate": 0.000846, "loss": 2.2125, "step": 846 }, { "epoch": 0.03770397047707972, "grad_norm": 0.4898814558982849, "learning_rate": 0.000848, "loss": 2.2159, "step": 848 }, { "epoch": 0.03779289493575208, "grad_norm": 0.47421759366989136, "learning_rate": 0.00085, "loss": 2.2037, "step": 850 }, { "epoch": 0.037881819394424435, "grad_norm": 0.46137571334838867, "learning_rate": 0.000852, "loss": 2.1975, "step": 852 }, { "epoch": 0.037970743853096796, "grad_norm": 0.4468640089035034, "learning_rate": 0.000854, "loss": 2.1951, "step": 854 }, { "epoch": 0.03805966831176915, "grad_norm": 0.43135735392570496, "learning_rate": 0.000856, "loss": 2.1939, "step": 856 }, { "epoch": 0.03814859277044151, "grad_norm": 0.5193342566490173, "learning_rate": 0.000858, "loss": 2.199, "step": 858 }, { "epoch": 0.038237517229113865, "grad_norm": 0.4511534571647644, "learning_rate": 0.00086, "loss": 2.1918, "step": 860 }, { "epoch": 0.03832644168778623, "grad_norm": 0.40734702348709106, "learning_rate": 0.000862, "loss": 2.1835, "step": 862 }, { "epoch": 0.03841536614645858, "grad_norm": 0.38885200023651123, "learning_rate": 0.000864, "loss": 2.1822, "step": 864 }, { "epoch": 0.03850429060513094, "grad_norm": 0.4552465081214905, "learning_rate": 0.000866, "loss": 2.1789, "step": 866 }, { "epoch": 0.038593215063803296, "grad_norm": 0.45240339636802673, "learning_rate": 0.0008680000000000001, "loss": 2.1767, "step": 868 }, { "epoch": 0.03868213952247566, "grad_norm": 0.3916630744934082, "learning_rate": 0.00087, "loss": 2.1763, "step": 870 }, { "epoch": 0.03877106398114801, "grad_norm": 0.34862634539604187, "learning_rate": 0.000872, "loss": 2.1735, "step": 872 }, { "epoch": 0.03885998843982037, "grad_norm": 0.5051468014717102, "learning_rate": 0.000874, "loss": 2.1749, "step": 874 }, { "epoch": 0.038948912898492734, "grad_norm": 0.43139076232910156, "learning_rate": 0.000876, "loss": 2.1651, "step": 876 }, { "epoch": 0.03903783735716509, "grad_norm": 0.5403070449829102, "learning_rate": 0.000878, "loss": 2.1745, "step": 878 }, { "epoch": 0.03912676181583745, "grad_norm": 0.4276416599750519, "learning_rate": 0.00088, "loss": 2.1771, "step": 880 }, { "epoch": 0.0392156862745098, "grad_norm": 0.4407658576965332, "learning_rate": 0.000882, "loss": 2.1773, "step": 882 }, { "epoch": 0.039304610733182165, "grad_norm": 0.42780712246894836, "learning_rate": 0.000884, "loss": 2.175, "step": 884 }, { "epoch": 0.03939353519185452, "grad_norm": 0.4067372977733612, "learning_rate": 0.0008860000000000001, "loss": 2.1651, "step": 886 }, { "epoch": 0.03948245965052688, "grad_norm": 0.40394145250320435, "learning_rate": 0.000888, "loss": 2.1654, "step": 888 }, { "epoch": 0.039571384109199234, "grad_norm": 0.4600394666194916, "learning_rate": 0.0008900000000000001, "loss": 2.158, "step": 890 }, { "epoch": 0.039660308567871595, "grad_norm": 0.5146481990814209, "learning_rate": 0.000892, "loss": 2.175, "step": 892 }, { "epoch": 0.03974923302654395, "grad_norm": 0.5569943189620972, "learning_rate": 0.000894, "loss": 2.1744, "step": 894 }, { "epoch": 0.03983815748521631, "grad_norm": 0.5006479024887085, "learning_rate": 0.000896, "loss": 2.1685, "step": 896 }, { "epoch": 0.039927081943888665, "grad_norm": 0.45075637102127075, "learning_rate": 0.000898, "loss": 2.1591, "step": 898 }, { "epoch": 0.040016006402561026, "grad_norm": 0.450245201587677, "learning_rate": 0.0009000000000000001, "loss": 2.1606, "step": 900 }, { "epoch": 0.04010493086123338, "grad_norm": 0.41936612129211426, "learning_rate": 0.000902, "loss": 2.16, "step": 902 }, { "epoch": 0.04019385531990574, "grad_norm": 0.44605305790901184, "learning_rate": 0.0009040000000000001, "loss": 2.1485, "step": 904 }, { "epoch": 0.040282779778578096, "grad_norm": 0.4443049430847168, "learning_rate": 0.000906, "loss": 2.1607, "step": 906 }, { "epoch": 0.04037170423725046, "grad_norm": 0.40073588490486145, "learning_rate": 0.0009080000000000001, "loss": 2.1574, "step": 908 }, { "epoch": 0.04046062869592281, "grad_norm": 0.35070598125457764, "learning_rate": 0.00091, "loss": 2.1515, "step": 910 }, { "epoch": 0.04054955315459517, "grad_norm": 0.37149950861930847, "learning_rate": 0.000912, "loss": 2.1477, "step": 912 }, { "epoch": 0.040638477613267526, "grad_norm": 0.3967283368110657, "learning_rate": 0.0009140000000000001, "loss": 2.1482, "step": 914 }, { "epoch": 0.04072740207193989, "grad_norm": 0.3983295261859894, "learning_rate": 0.000916, "loss": 2.1413, "step": 916 }, { "epoch": 0.04081632653061224, "grad_norm": 0.7367068529129028, "learning_rate": 0.0009180000000000001, "loss": 2.164, "step": 918 }, { "epoch": 0.0409052509892846, "grad_norm": 0.5214952230453491, "learning_rate": 0.00092, "loss": 2.1595, "step": 920 }, { "epoch": 0.040994175447956964, "grad_norm": 0.4257142245769501, "learning_rate": 0.0009220000000000001, "loss": 2.149, "step": 922 }, { "epoch": 0.04108309990662932, "grad_norm": 0.3842937648296356, "learning_rate": 0.000924, "loss": 2.1419, "step": 924 }, { "epoch": 0.04117202436530168, "grad_norm": 0.37283626198768616, "learning_rate": 0.0009260000000000001, "loss": 2.1455, "step": 926 }, { "epoch": 0.041260948823974034, "grad_norm": 0.38478460907936096, "learning_rate": 0.0009280000000000001, "loss": 2.1416, "step": 928 }, { "epoch": 0.041349873282646395, "grad_norm": 0.37205952405929565, "learning_rate": 0.00093, "loss": 2.147, "step": 930 }, { "epoch": 0.04143879774131875, "grad_norm": 0.38429123163223267, "learning_rate": 0.0009320000000000001, "loss": 2.1287, "step": 932 }, { "epoch": 0.04152772219999111, "grad_norm": 0.38537994027137756, "learning_rate": 0.000934, "loss": 2.129, "step": 934 }, { "epoch": 0.041616646658663464, "grad_norm": 0.35850268602371216, "learning_rate": 0.0009360000000000001, "loss": 2.1279, "step": 936 }, { "epoch": 0.041705571117335825, "grad_norm": 0.40889298915863037, "learning_rate": 0.0009379999999999999, "loss": 2.1328, "step": 938 }, { "epoch": 0.04179449557600818, "grad_norm": 0.44349735975265503, "learning_rate": 0.00094, "loss": 2.1313, "step": 940 }, { "epoch": 0.04188342003468054, "grad_norm": 0.38156867027282715, "learning_rate": 0.000942, "loss": 2.1207, "step": 942 }, { "epoch": 0.041972344493352895, "grad_norm": 0.34405317902565, "learning_rate": 0.000944, "loss": 2.115, "step": 944 }, { "epoch": 0.042061268952025256, "grad_norm": 0.3376498818397522, "learning_rate": 0.000946, "loss": 2.1125, "step": 946 }, { "epoch": 0.04215019341069761, "grad_norm": 0.4137539565563202, "learning_rate": 0.000948, "loss": 2.1226, "step": 948 }, { "epoch": 0.04223911786936997, "grad_norm": 0.4108043611049652, "learning_rate": 0.00095, "loss": 2.1165, "step": 950 }, { "epoch": 0.042328042328042326, "grad_norm": 0.46412286162376404, "learning_rate": 0.0009519999999999999, "loss": 2.1204, "step": 952 }, { "epoch": 0.04241696678671469, "grad_norm": 0.5203374624252319, "learning_rate": 0.000954, "loss": 2.1255, "step": 954 }, { "epoch": 0.04250589124538704, "grad_norm": 0.5785240530967712, "learning_rate": 0.0009559999999999999, "loss": 2.1322, "step": 956 }, { "epoch": 0.0425948157040594, "grad_norm": 0.43567749857902527, "learning_rate": 0.000958, "loss": 2.1276, "step": 958 }, { "epoch": 0.042683740162731756, "grad_norm": 0.40512555837631226, "learning_rate": 0.00096, "loss": 2.1237, "step": 960 }, { "epoch": 0.04277266462140412, "grad_norm": 0.3421748876571655, "learning_rate": 0.000962, "loss": 2.1184, "step": 962 }, { "epoch": 0.04286158908007647, "grad_norm": 0.3585096001625061, "learning_rate": 0.000964, "loss": 2.1104, "step": 964 }, { "epoch": 0.04295051353874883, "grad_norm": 0.37870660424232483, "learning_rate": 0.000966, "loss": 2.112, "step": 966 }, { "epoch": 0.043039437997421194, "grad_norm": 0.37261155247688293, "learning_rate": 0.000968, "loss": 2.1118, "step": 968 }, { "epoch": 0.04312836245609355, "grad_norm": 0.3406919538974762, "learning_rate": 0.0009699999999999999, "loss": 2.1024, "step": 970 }, { "epoch": 0.04321728691476591, "grad_norm": 0.3668791651725769, "learning_rate": 0.000972, "loss": 2.1062, "step": 972 }, { "epoch": 0.043306211373438264, "grad_norm": 0.40684422850608826, "learning_rate": 0.000974, "loss": 2.0927, "step": 974 }, { "epoch": 0.043395135832110625, "grad_norm": 0.4066929221153259, "learning_rate": 0.000976, "loss": 2.1012, "step": 976 }, { "epoch": 0.04348406029078298, "grad_norm": 0.34670764207839966, "learning_rate": 0.000978, "loss": 2.0961, "step": 978 }, { "epoch": 0.04357298474945534, "grad_norm": 0.3352697789669037, "learning_rate": 0.00098, "loss": 2.0942, "step": 980 }, { "epoch": 0.043661909208127694, "grad_norm": 0.3769042491912842, "learning_rate": 0.000982, "loss": 2.0997, "step": 982 }, { "epoch": 0.043750833666800056, "grad_norm": 0.3640815317630768, "learning_rate": 0.000984, "loss": 2.107, "step": 984 }, { "epoch": 0.04383975812547241, "grad_norm": 0.40777748823165894, "learning_rate": 0.0009860000000000001, "loss": 2.0952, "step": 986 }, { "epoch": 0.04392868258414477, "grad_norm": 0.32192882895469666, "learning_rate": 0.000988, "loss": 2.0917, "step": 988 }, { "epoch": 0.044017607042817125, "grad_norm": 0.3336566090583801, "learning_rate": 0.00099, "loss": 2.0988, "step": 990 }, { "epoch": 0.044106531501489486, "grad_norm": 0.31402355432510376, "learning_rate": 0.000992, "loss": 2.0945, "step": 992 }, { "epoch": 0.04419545596016184, "grad_norm": 0.37855321168899536, "learning_rate": 0.000994, "loss": 2.0981, "step": 994 }, { "epoch": 0.0442843804188342, "grad_norm": 0.4493234157562256, "learning_rate": 0.000996, "loss": 2.0848, "step": 996 }, { "epoch": 0.044373304877506556, "grad_norm": 0.5948104858398438, "learning_rate": 0.000998, "loss": 2.1067, "step": 998 }, { "epoch": 0.04446222933617892, "grad_norm": 0.5480212569236755, "learning_rate": 0.001, "loss": 2.1027, "step": 1000 }, { "epoch": 0.04446222933617892, "eval_loss": 2.0268025398254395, "eval_runtime": 12.884, "eval_samples_per_second": 536.325, "eval_steps_per_second": 67.06, "step": 1000 }, { "epoch": 0.04455115379485127, "grad_norm": 0.41122549772262573, "learning_rate": 0.0009999999987529585, "loss": 2.0929, "step": 1002 }, { "epoch": 0.04464007825352363, "grad_norm": 0.33955812454223633, "learning_rate": 0.000999999995011834, "loss": 2.0879, "step": 1004 }, { "epoch": 0.04472900271219599, "grad_norm": 0.3389456570148468, "learning_rate": 0.0009999999887766267, "loss": 2.0822, "step": 1006 }, { "epoch": 0.04481792717086835, "grad_norm": 0.356757253408432, "learning_rate": 0.0009999999800473362, "loss": 2.0849, "step": 1008 }, { "epoch": 0.0449068516295407, "grad_norm": 0.3514195680618286, "learning_rate": 0.000999999968823963, "loss": 2.0822, "step": 1010 }, { "epoch": 0.04499577608821306, "grad_norm": 0.3277719020843506, "learning_rate": 0.0009999999551065068, "loss": 2.0798, "step": 1012 }, { "epoch": 0.045084700546885424, "grad_norm": 0.33637166023254395, "learning_rate": 0.000999999938894968, "loss": 2.0844, "step": 1014 }, { "epoch": 0.04517362500555778, "grad_norm": 0.3239494860172272, "learning_rate": 0.0009999999201893463, "loss": 2.0747, "step": 1016 }, { "epoch": 0.04526254946423014, "grad_norm": 0.3227521777153015, "learning_rate": 0.000999999898989642, "loss": 2.0656, "step": 1018 }, { "epoch": 0.045351473922902494, "grad_norm": 0.3502488136291504, "learning_rate": 0.0009999998752958554, "loss": 2.0771, "step": 1020 }, { "epoch": 0.045440398381574855, "grad_norm": 0.33191025257110596, "learning_rate": 0.0009999998491079863, "loss": 2.0785, "step": 1022 }, { "epoch": 0.04552932284024721, "grad_norm": 0.31606367230415344, "learning_rate": 0.000999999820426035, "loss": 2.0669, "step": 1024 }, { "epoch": 0.04561824729891957, "grad_norm": 0.3388826847076416, "learning_rate": 0.0009999997892500016, "loss": 2.0602, "step": 1026 }, { "epoch": 0.045707171757591925, "grad_norm": 0.32445207238197327, "learning_rate": 0.0009999997555798863, "loss": 2.06, "step": 1028 }, { "epoch": 0.045796096216264286, "grad_norm": 0.2825400233268738, "learning_rate": 0.000999999719415689, "loss": 2.0588, "step": 1030 }, { "epoch": 0.04588502067493664, "grad_norm": 0.3642183542251587, "learning_rate": 0.0009999996807574104, "loss": 2.0636, "step": 1032 }, { "epoch": 0.045973945133609, "grad_norm": 0.39157211780548096, "learning_rate": 0.0009999996396050502, "loss": 2.0637, "step": 1034 }, { "epoch": 0.046062869592281355, "grad_norm": 0.36561650037765503, "learning_rate": 0.000999999595958609, "loss": 2.0696, "step": 1036 }, { "epoch": 0.046151794050953716, "grad_norm": 0.3913203477859497, "learning_rate": 0.0009999995498180868, "loss": 2.0639, "step": 1038 }, { "epoch": 0.04624071850962607, "grad_norm": 0.31236109137535095, "learning_rate": 0.0009999995011834835, "loss": 2.058, "step": 1040 }, { "epoch": 0.04632964296829843, "grad_norm": 0.3150652050971985, "learning_rate": 0.0009999994500548, "loss": 2.061, "step": 1042 }, { "epoch": 0.046418567426970786, "grad_norm": 0.31297731399536133, "learning_rate": 0.0009999993964320362, "loss": 2.059, "step": 1044 }, { "epoch": 0.04650749188564315, "grad_norm": 0.35480573773384094, "learning_rate": 0.0009999993403151924, "loss": 2.0582, "step": 1046 }, { "epoch": 0.0465964163443155, "grad_norm": 0.34173378348350525, "learning_rate": 0.000999999281704269, "loss": 2.0601, "step": 1048 }, { "epoch": 0.04668534080298786, "grad_norm": 0.3285476267337799, "learning_rate": 0.000999999220599266, "loss": 2.0555, "step": 1050 }, { "epoch": 0.04677426526166022, "grad_norm": 0.3582735061645508, "learning_rate": 0.000999999157000184, "loss": 2.0468, "step": 1052 }, { "epoch": 0.04686318972033258, "grad_norm": 0.3618029057979584, "learning_rate": 0.0009999990909070231, "loss": 2.0443, "step": 1054 }, { "epoch": 0.04695211417900493, "grad_norm": 0.3185672461986542, "learning_rate": 0.000999999022319784, "loss": 2.0531, "step": 1056 }, { "epoch": 0.04704103863767729, "grad_norm": 0.38706305623054504, "learning_rate": 0.0009999989512384665, "loss": 2.0531, "step": 1058 }, { "epoch": 0.047129963096349654, "grad_norm": 0.28708359599113464, "learning_rate": 0.0009999988776630713, "loss": 2.0482, "step": 1060 }, { "epoch": 0.04721888755502201, "grad_norm": 0.37909284234046936, "learning_rate": 0.0009999988015935988, "loss": 2.0437, "step": 1062 }, { "epoch": 0.04730781201369437, "grad_norm": 0.35751160979270935, "learning_rate": 0.0009999987230300492, "loss": 2.0528, "step": 1064 }, { "epoch": 0.047396736472366724, "grad_norm": 0.31660065054893494, "learning_rate": 0.000999998641972423, "loss": 2.0399, "step": 1066 }, { "epoch": 0.047485660931039085, "grad_norm": 0.3356022238731384, "learning_rate": 0.0009999985584207205, "loss": 2.039, "step": 1068 }, { "epoch": 0.04757458538971144, "grad_norm": 0.30728912353515625, "learning_rate": 0.0009999984723749423, "loss": 2.0428, "step": 1070 }, { "epoch": 0.0476635098483838, "grad_norm": 0.3009743094444275, "learning_rate": 0.0009999983838350887, "loss": 2.0365, "step": 1072 }, { "epoch": 0.047752434307056155, "grad_norm": 0.308032751083374, "learning_rate": 0.0009999982928011603, "loss": 2.0353, "step": 1074 }, { "epoch": 0.047841358765728516, "grad_norm": 0.30482032895088196, "learning_rate": 0.0009999981992731572, "loss": 2.0267, "step": 1076 }, { "epoch": 0.04793028322440087, "grad_norm": 0.2963328957557678, "learning_rate": 0.0009999981032510802, "loss": 2.0352, "step": 1078 }, { "epoch": 0.04801920768307323, "grad_norm": 0.32439178228378296, "learning_rate": 0.0009999980047349295, "loss": 2.0265, "step": 1080 }, { "epoch": 0.048108132141745585, "grad_norm": 0.3173850178718567, "learning_rate": 0.000999997903724706, "loss": 2.0351, "step": 1082 }, { "epoch": 0.04819705660041795, "grad_norm": 0.33313167095184326, "learning_rate": 0.0009999978002204098, "loss": 2.0257, "step": 1084 }, { "epoch": 0.0482859810590903, "grad_norm": 0.36748653650283813, "learning_rate": 0.0009999976942220417, "loss": 2.0357, "step": 1086 }, { "epoch": 0.04837490551776266, "grad_norm": 0.33924633264541626, "learning_rate": 0.000999997585729602, "loss": 2.0275, "step": 1088 }, { "epoch": 0.048463829976435016, "grad_norm": 0.3379874527454376, "learning_rate": 0.0009999974747430913, "loss": 2.0361, "step": 1090 }, { "epoch": 0.04855275443510738, "grad_norm": 0.2935180068016052, "learning_rate": 0.0009999973612625102, "loss": 2.0229, "step": 1092 }, { "epoch": 0.04864167889377973, "grad_norm": 0.3268764317035675, "learning_rate": 0.0009999972452878593, "loss": 2.0253, "step": 1094 }, { "epoch": 0.04873060335245209, "grad_norm": 0.27827081084251404, "learning_rate": 0.0009999971268191393, "loss": 2.0234, "step": 1096 }, { "epoch": 0.04881952781112445, "grad_norm": 0.3332640528678894, "learning_rate": 0.0009999970058563506, "loss": 2.023, "step": 1098 }, { "epoch": 0.04890845226979681, "grad_norm": 0.2747075855731964, "learning_rate": 0.0009999968823994937, "loss": 2.0189, "step": 1100 }, { "epoch": 0.04899737672846916, "grad_norm": 0.32247695326805115, "learning_rate": 0.0009999967564485695, "loss": 2.0231, "step": 1102 }, { "epoch": 0.04908630118714152, "grad_norm": 0.2975604832172394, "learning_rate": 0.0009999966280035784, "loss": 2.0139, "step": 1104 }, { "epoch": 0.049175225645813885, "grad_norm": 0.3203318417072296, "learning_rate": 0.000999996497064521, "loss": 2.0207, "step": 1106 }, { "epoch": 0.04926415010448624, "grad_norm": 0.3753434717655182, "learning_rate": 0.0009999963636313982, "loss": 2.0153, "step": 1108 }, { "epoch": 0.0493530745631586, "grad_norm": 0.35570961236953735, "learning_rate": 0.0009999962277042107, "loss": 2.0186, "step": 1110 }, { "epoch": 0.049441999021830954, "grad_norm": 0.3200981020927429, "learning_rate": 0.0009999960892829588, "loss": 2.0169, "step": 1112 }, { "epoch": 0.049530923480503315, "grad_norm": 0.31688785552978516, "learning_rate": 0.0009999959483676436, "loss": 2.0098, "step": 1114 }, { "epoch": 0.04961984793917567, "grad_norm": 0.2900930941104889, "learning_rate": 0.0009999958049582655, "loss": 2.0136, "step": 1116 }, { "epoch": 0.04970877239784803, "grad_norm": 0.337090402841568, "learning_rate": 0.000999995659054825, "loss": 2.0138, "step": 1118 }, { "epoch": 0.049797696856520385, "grad_norm": 0.41873857378959656, "learning_rate": 0.0009999955106573238, "loss": 2.0086, "step": 1120 }, { "epoch": 0.049886621315192746, "grad_norm": 0.3219401240348816, "learning_rate": 0.0009999953597657617, "loss": 2.018, "step": 1122 }, { "epoch": 0.0499755457738651, "grad_norm": 0.337938517332077, "learning_rate": 0.0009999952063801396, "loss": 2.0093, "step": 1124 }, { "epoch": 0.05006447023253746, "grad_norm": 0.3835712969303131, "learning_rate": 0.0009999950505004587, "loss": 2.0144, "step": 1126 }, { "epoch": 0.050153394691209816, "grad_norm": 0.3164381682872772, "learning_rate": 0.0009999948921267192, "loss": 2.0111, "step": 1128 }, { "epoch": 0.05024231914988218, "grad_norm": 0.2837233245372772, "learning_rate": 0.0009999947312589225, "loss": 2.0099, "step": 1130 }, { "epoch": 0.05033124360855453, "grad_norm": 0.30504587292671204, "learning_rate": 0.0009999945678970687, "loss": 2.0049, "step": 1132 }, { "epoch": 0.05042016806722689, "grad_norm": 0.30361393094062805, "learning_rate": 0.0009999944020411592, "loss": 2.0056, "step": 1134 }, { "epoch": 0.050509092525899246, "grad_norm": 0.2646671533584595, "learning_rate": 0.0009999942336911948, "loss": 1.9977, "step": 1136 }, { "epoch": 0.05059801698457161, "grad_norm": 0.3026483952999115, "learning_rate": 0.000999994062847176, "loss": 2.0039, "step": 1138 }, { "epoch": 0.05068694144324396, "grad_norm": 0.2975611388683319, "learning_rate": 0.0009999938895091035, "loss": 2.0043, "step": 1140 }, { "epoch": 0.05077586590191632, "grad_norm": 0.3021259605884552, "learning_rate": 0.000999993713676979, "loss": 2.0006, "step": 1142 }, { "epoch": 0.05086479036058868, "grad_norm": 0.2845633924007416, "learning_rate": 0.0009999935353508028, "loss": 1.9903, "step": 1144 }, { "epoch": 0.05095371481926104, "grad_norm": 0.2758135497570038, "learning_rate": 0.0009999933545305757, "loss": 2.0041, "step": 1146 }, { "epoch": 0.05104263927793339, "grad_norm": 0.2746117413043976, "learning_rate": 0.0009999931712162985, "loss": 1.9894, "step": 1148 }, { "epoch": 0.051131563736605753, "grad_norm": 0.2912278473377228, "learning_rate": 0.0009999929854079728, "loss": 1.9964, "step": 1150 }, { "epoch": 0.051220488195278115, "grad_norm": 0.28964367508888245, "learning_rate": 0.0009999927971055989, "loss": 1.9883, "step": 1152 }, { "epoch": 0.05130941265395047, "grad_norm": 0.2836471498012543, "learning_rate": 0.0009999926063091779, "loss": 1.9925, "step": 1154 }, { "epoch": 0.05139833711262283, "grad_norm": 0.2557382583618164, "learning_rate": 0.000999992413018711, "loss": 1.996, "step": 1156 }, { "epoch": 0.051487261571295184, "grad_norm": 0.3542958199977875, "learning_rate": 0.0009999922172341988, "loss": 1.9893, "step": 1158 }, { "epoch": 0.051576186029967545, "grad_norm": 0.4211042821407318, "learning_rate": 0.0009999920189556425, "loss": 1.9965, "step": 1160 }, { "epoch": 0.0516651104886399, "grad_norm": 0.3262304365634918, "learning_rate": 0.0009999918181830428, "loss": 1.9993, "step": 1162 }, { "epoch": 0.05175403494731226, "grad_norm": 0.33062461018562317, "learning_rate": 0.0009999916149164012, "loss": 1.9873, "step": 1164 }, { "epoch": 0.051842959405984615, "grad_norm": 0.2799008786678314, "learning_rate": 0.0009999914091557182, "loss": 1.9846, "step": 1166 }, { "epoch": 0.051931883864656976, "grad_norm": 0.2756935656070709, "learning_rate": 0.0009999912009009953, "loss": 1.9872, "step": 1168 }, { "epoch": 0.05202080832332933, "grad_norm": 0.2897654175758362, "learning_rate": 0.000999990990152233, "loss": 1.9908, "step": 1170 }, { "epoch": 0.05210973278200169, "grad_norm": 0.27705973386764526, "learning_rate": 0.0009999907769094327, "loss": 1.9873, "step": 1172 }, { "epoch": 0.052198657240674046, "grad_norm": 0.3140580654144287, "learning_rate": 0.0009999905611725957, "loss": 1.9881, "step": 1174 }, { "epoch": 0.05228758169934641, "grad_norm": 0.27007198333740234, "learning_rate": 0.0009999903429417226, "loss": 1.9862, "step": 1176 }, { "epoch": 0.05237650615801876, "grad_norm": 0.2722287178039551, "learning_rate": 0.0009999901222168144, "loss": 1.9773, "step": 1178 }, { "epoch": 0.05246543061669112, "grad_norm": 0.2842351496219635, "learning_rate": 0.0009999898989978728, "loss": 1.9826, "step": 1180 }, { "epoch": 0.052554355075363476, "grad_norm": 0.29141294956207275, "learning_rate": 0.0009999896732848985, "loss": 1.9808, "step": 1182 }, { "epoch": 0.05264327953403584, "grad_norm": 0.2940419316291809, "learning_rate": 0.000999989445077893, "loss": 1.9789, "step": 1184 }, { "epoch": 0.05273220399270819, "grad_norm": 0.30048856139183044, "learning_rate": 0.0009999892143768567, "loss": 1.979, "step": 1186 }, { "epoch": 0.05282112845138055, "grad_norm": 0.33986565470695496, "learning_rate": 0.0009999889811817915, "loss": 1.9688, "step": 1188 }, { "epoch": 0.05291005291005291, "grad_norm": 0.27422598004341125, "learning_rate": 0.0009999887454926982, "loss": 1.9818, "step": 1190 }, { "epoch": 0.05299897736872527, "grad_norm": 0.3004554808139801, "learning_rate": 0.0009999885073095779, "loss": 1.9704, "step": 1192 }, { "epoch": 0.05308790182739762, "grad_norm": 0.2710491716861725, "learning_rate": 0.0009999882666324321, "loss": 1.9735, "step": 1194 }, { "epoch": 0.053176826286069984, "grad_norm": 0.28977084159851074, "learning_rate": 0.0009999880234612616, "loss": 1.9682, "step": 1196 }, { "epoch": 0.053265750744742345, "grad_norm": 0.28438761830329895, "learning_rate": 0.0009999877777960681, "loss": 1.9728, "step": 1198 }, { "epoch": 0.0533546752034147, "grad_norm": 0.25546666979789734, "learning_rate": 0.0009999875296368525, "loss": 1.9651, "step": 1200 }, { "epoch": 0.05344359966208706, "grad_norm": 0.2793266773223877, "learning_rate": 0.000999987278983616, "loss": 1.9721, "step": 1202 }, { "epoch": 0.053532524120759414, "grad_norm": 0.3147725462913513, "learning_rate": 0.00099998702583636, "loss": 1.9682, "step": 1204 }, { "epoch": 0.053621448579431776, "grad_norm": 0.29513031244277954, "learning_rate": 0.000999986770195086, "loss": 1.9685, "step": 1206 }, { "epoch": 0.05371037303810413, "grad_norm": 0.25496989488601685, "learning_rate": 0.0009999865120597948, "loss": 1.9681, "step": 1208 }, { "epoch": 0.05379929749677649, "grad_norm": 0.2535437345504761, "learning_rate": 0.0009999862514304878, "loss": 1.9657, "step": 1210 }, { "epoch": 0.053888221955448845, "grad_norm": 0.9937949180603027, "learning_rate": 0.0009999859883071665, "loss": 1.9724, "step": 1212 }, { "epoch": 0.053977146414121206, "grad_norm": 0.7421650290489197, "learning_rate": 0.000999985722689832, "loss": 2.2602, "step": 1214 }, { "epoch": 0.05406607087279356, "grad_norm": 1.2442095279693604, "learning_rate": 0.0009999854545784859, "loss": 2.1722, "step": 1216 }, { "epoch": 0.05415499533146592, "grad_norm": 1.6552417278289795, "learning_rate": 0.0009999851839731292, "loss": 2.1954, "step": 1218 }, { "epoch": 0.054243919790138276, "grad_norm": 0.8579690456390381, "learning_rate": 0.0009999849108737633, "loss": 2.263, "step": 1220 }, { "epoch": 0.05433284424881064, "grad_norm": 1.149450421333313, "learning_rate": 0.00099998463528039, "loss": 2.1687, "step": 1222 }, { "epoch": 0.05442176870748299, "grad_norm": 3.364642858505249, "learning_rate": 0.00099998435719301, "loss": 2.3092, "step": 1224 }, { "epoch": 0.05451069316615535, "grad_norm": 2.966097593307495, "learning_rate": 0.0009999840766116252, "loss": 2.2565, "step": 1226 }, { "epoch": 0.054599617624827707, "grad_norm": 1.0643404722213745, "learning_rate": 0.0009999837935362368, "loss": 2.2603, "step": 1228 }, { "epoch": 0.05468854208350007, "grad_norm": 1.2306264638900757, "learning_rate": 0.0009999835079668462, "loss": 2.2179, "step": 1230 }, { "epoch": 0.05477746654217242, "grad_norm": 3.267204999923706, "learning_rate": 0.000999983219903455, "loss": 2.2186, "step": 1232 }, { "epoch": 0.05486639100084478, "grad_norm": 1.2005592584609985, "learning_rate": 0.0009999829293460645, "loss": 2.2835, "step": 1234 }, { "epoch": 0.05495531545951714, "grad_norm": 7.615020275115967, "learning_rate": 0.000999982636294676, "loss": 2.2995, "step": 1236 }, { "epoch": 0.0550442399181895, "grad_norm": 1.7953623533248901, "learning_rate": 0.0009999823407492912, "loss": 2.2996, "step": 1238 }, { "epoch": 0.05513316437686185, "grad_norm": 1.1320980787277222, "learning_rate": 0.0009999820427099115, "loss": 2.1913, "step": 1240 }, { "epoch": 0.055222088835534214, "grad_norm": 1.0222786664962769, "learning_rate": 0.0009999817421765384, "loss": 2.1571, "step": 1242 }, { "epoch": 0.055311013294206575, "grad_norm": 0.7209634780883789, "learning_rate": 0.0009999814391491732, "loss": 2.1174, "step": 1244 }, { "epoch": 0.05539993775287893, "grad_norm": 0.489495187997818, "learning_rate": 0.000999981133627818, "loss": 2.1004, "step": 1246 }, { "epoch": 0.05548886221155129, "grad_norm": 0.357696533203125, "learning_rate": 0.0009999808256124737, "loss": 2.0732, "step": 1248 }, { "epoch": 0.055577786670223644, "grad_norm": 0.41004520654678345, "learning_rate": 0.0009999805151031417, "loss": 2.0589, "step": 1250 }, { "epoch": 0.055666711128896006, "grad_norm": 0.29356011748313904, "learning_rate": 0.0009999802020998244, "loss": 2.0436, "step": 1252 }, { "epoch": 0.05575563558756836, "grad_norm": 0.2679998278617859, "learning_rate": 0.0009999798866025225, "loss": 2.0365, "step": 1254 }, { "epoch": 0.05584456004624072, "grad_norm": 0.28258460760116577, "learning_rate": 0.0009999795686112382, "loss": 2.0301, "step": 1256 }, { "epoch": 0.055933484504913075, "grad_norm": 0.24409818649291992, "learning_rate": 0.0009999792481259727, "loss": 2.0139, "step": 1258 }, { "epoch": 0.056022408963585436, "grad_norm": 0.2604723870754242, "learning_rate": 0.0009999789251467278, "loss": 2.0132, "step": 1260 }, { "epoch": 0.05611133342225779, "grad_norm": 0.23944151401519775, "learning_rate": 0.000999978599673505, "loss": 2.0037, "step": 1262 }, { "epoch": 0.05620025788093015, "grad_norm": 0.24858349561691284, "learning_rate": 0.0009999782717063058, "loss": 2.0112, "step": 1264 }, { "epoch": 0.056289182339602506, "grad_norm": 0.22487503290176392, "learning_rate": 0.0009999779412451322, "loss": 1.9946, "step": 1266 }, { "epoch": 0.05637810679827487, "grad_norm": 0.23384034633636475, "learning_rate": 0.0009999776082899854, "loss": 1.9879, "step": 1268 }, { "epoch": 0.05646703125694722, "grad_norm": 0.23720683157444, "learning_rate": 0.0009999772728408675, "loss": 1.9923, "step": 1270 }, { "epoch": 0.05655595571561958, "grad_norm": 0.23450630903244019, "learning_rate": 0.00099997693489778, "loss": 1.9795, "step": 1272 }, { "epoch": 0.05664488017429194, "grad_norm": 0.2357773780822754, "learning_rate": 0.0009999765944607244, "loss": 1.9775, "step": 1274 }, { "epoch": 0.0567338046329643, "grad_norm": 0.24982024729251862, "learning_rate": 0.0009999762515297025, "loss": 1.9817, "step": 1276 }, { "epoch": 0.05682272909163665, "grad_norm": 0.24399292469024658, "learning_rate": 0.0009999759061047162, "loss": 1.9772, "step": 1278 }, { "epoch": 0.05691165355030901, "grad_norm": 0.23936818540096283, "learning_rate": 0.000999975558185767, "loss": 1.9714, "step": 1280 }, { "epoch": 0.05700057800898137, "grad_norm": 0.24449017643928528, "learning_rate": 0.000999975207772857, "loss": 1.9671, "step": 1282 }, { "epoch": 0.05708950246765373, "grad_norm": 0.24265700578689575, "learning_rate": 0.0009999748548659875, "loss": 1.9614, "step": 1284 }, { "epoch": 0.05717842692632608, "grad_norm": 0.22446395456790924, "learning_rate": 0.0009999744994651602, "loss": 1.9688, "step": 1286 }, { "epoch": 0.057267351384998444, "grad_norm": 0.21999609470367432, "learning_rate": 0.0009999741415703774, "loss": 1.9591, "step": 1288 }, { "epoch": 0.057356275843670805, "grad_norm": 0.21769745647907257, "learning_rate": 0.0009999737811816406, "loss": 1.9581, "step": 1290 }, { "epoch": 0.05744520030234316, "grad_norm": 0.2255491316318512, "learning_rate": 0.0009999734182989514, "loss": 1.9604, "step": 1292 }, { "epoch": 0.05753412476101552, "grad_norm": 0.22669969499111176, "learning_rate": 0.0009999730529223119, "loss": 1.9538, "step": 1294 }, { "epoch": 0.057623049219687875, "grad_norm": 0.22954627871513367, "learning_rate": 0.0009999726850517237, "loss": 1.9534, "step": 1296 }, { "epoch": 0.057711973678360236, "grad_norm": 0.22537246346473694, "learning_rate": 0.0009999723146871889, "loss": 1.951, "step": 1298 }, { "epoch": 0.05780089813703259, "grad_norm": 0.2216878980398178, "learning_rate": 0.0009999719418287093, "loss": 1.9498, "step": 1300 }, { "epoch": 0.05788982259570495, "grad_norm": 0.23779501020908356, "learning_rate": 0.0009999715664762866, "loss": 1.951, "step": 1302 }, { "epoch": 0.057978747054377305, "grad_norm": 0.23081955313682556, "learning_rate": 0.0009999711886299226, "loss": 1.9514, "step": 1304 }, { "epoch": 0.058067671513049667, "grad_norm": 0.22819961607456207, "learning_rate": 0.0009999708082896192, "loss": 1.9488, "step": 1306 }, { "epoch": 0.05815659597172202, "grad_norm": 0.23504947125911713, "learning_rate": 0.0009999704254553788, "loss": 1.9488, "step": 1308 }, { "epoch": 0.05824552043039438, "grad_norm": 0.24413910508155823, "learning_rate": 0.0009999700401272025, "loss": 1.9443, "step": 1310 }, { "epoch": 0.058334444889066736, "grad_norm": 0.22513625025749207, "learning_rate": 0.000999969652305093, "loss": 1.9458, "step": 1312 }, { "epoch": 0.0584233693477391, "grad_norm": 0.22811587154865265, "learning_rate": 0.0009999692619890516, "loss": 1.9439, "step": 1314 }, { "epoch": 0.05851229380641145, "grad_norm": 0.21283623576164246, "learning_rate": 0.0009999688691790809, "loss": 1.9435, "step": 1316 }, { "epoch": 0.05860121826508381, "grad_norm": 0.22420313954353333, "learning_rate": 0.000999968473875182, "loss": 1.9389, "step": 1318 }, { "epoch": 0.05869014272375617, "grad_norm": 0.23344479501247406, "learning_rate": 0.000999968076077358, "loss": 1.9442, "step": 1320 }, { "epoch": 0.05877906718242853, "grad_norm": 0.21948975324630737, "learning_rate": 0.0009999676757856098, "loss": 1.9358, "step": 1322 }, { "epoch": 0.05886799164110088, "grad_norm": 0.24658890068531036, "learning_rate": 0.0009999672729999398, "loss": 1.9407, "step": 1324 }, { "epoch": 0.05895691609977324, "grad_norm": 0.23471440374851227, "learning_rate": 0.0009999668677203501, "loss": 1.942, "step": 1326 }, { "epoch": 0.0590458405584456, "grad_norm": 0.23664714395999908, "learning_rate": 0.000999966459946843, "loss": 1.9393, "step": 1328 }, { "epoch": 0.05913476501711796, "grad_norm": 0.21111926436424255, "learning_rate": 0.0009999660496794198, "loss": 1.9344, "step": 1330 }, { "epoch": 0.05922368947579031, "grad_norm": 0.20922254025936127, "learning_rate": 0.0009999656369180832, "loss": 1.9326, "step": 1332 }, { "epoch": 0.059312613934462674, "grad_norm": 0.2358391135931015, "learning_rate": 0.0009999652216628348, "loss": 1.9361, "step": 1334 }, { "epoch": 0.059401538393135035, "grad_norm": 0.2448922097682953, "learning_rate": 0.000999964803913677, "loss": 1.931, "step": 1336 }, { "epoch": 0.05949046285180739, "grad_norm": 0.2407871037721634, "learning_rate": 0.0009999643836706119, "loss": 1.9348, "step": 1338 }, { "epoch": 0.05957938731047975, "grad_norm": 0.22217388451099396, "learning_rate": 0.0009999639609336412, "loss": 1.9406, "step": 1340 }, { "epoch": 0.059668311769152105, "grad_norm": 0.2148897796869278, "learning_rate": 0.0009999635357027675, "loss": 1.927, "step": 1342 }, { "epoch": 0.059757236227824466, "grad_norm": 0.22098839282989502, "learning_rate": 0.0009999631079779926, "loss": 1.9321, "step": 1344 }, { "epoch": 0.05984616068649682, "grad_norm": 0.22021619975566864, "learning_rate": 0.0009999626777593188, "loss": 1.9246, "step": 1346 }, { "epoch": 0.05993508514516918, "grad_norm": 0.23338301479816437, "learning_rate": 0.000999962245046748, "loss": 1.9298, "step": 1348 }, { "epoch": 0.060024009603841535, "grad_norm": 0.2286379635334015, "learning_rate": 0.0009999618098402827, "loss": 1.9241, "step": 1350 }, { "epoch": 0.0601129340625139, "grad_norm": 0.23167788982391357, "learning_rate": 0.000999961372139925, "loss": 1.9308, "step": 1352 }, { "epoch": 0.06020185852118625, "grad_norm": 0.23941820859909058, "learning_rate": 0.0009999609319456767, "loss": 1.922, "step": 1354 }, { "epoch": 0.06029078297985861, "grad_norm": 0.23236848413944244, "learning_rate": 0.0009999604892575406, "loss": 1.9277, "step": 1356 }, { "epoch": 0.060379707438530966, "grad_norm": 0.2312156707048416, "learning_rate": 0.0009999600440755182, "loss": 1.9266, "step": 1358 }, { "epoch": 0.06046863189720333, "grad_norm": 0.2197667956352234, "learning_rate": 0.0009999595963996123, "loss": 1.9266, "step": 1360 }, { "epoch": 0.06055755635587568, "grad_norm": 0.22230570018291473, "learning_rate": 0.000999959146229825, "loss": 1.9171, "step": 1362 }, { "epoch": 0.06064648081454804, "grad_norm": 0.22791069746017456, "learning_rate": 0.0009999586935661585, "loss": 1.9194, "step": 1364 }, { "epoch": 0.0607354052732204, "grad_norm": 0.21917258203029633, "learning_rate": 0.000999958238408615, "loss": 1.9182, "step": 1366 }, { "epoch": 0.06082432973189276, "grad_norm": 0.2185303419828415, "learning_rate": 0.0009999577807571968, "loss": 1.9117, "step": 1368 }, { "epoch": 0.06091325419056511, "grad_norm": 0.21194784343242645, "learning_rate": 0.000999957320611906, "loss": 1.9163, "step": 1370 }, { "epoch": 0.06100217864923747, "grad_norm": 0.24148310720920563, "learning_rate": 0.0009999568579727456, "loss": 1.9209, "step": 1372 }, { "epoch": 0.06109110310790983, "grad_norm": 0.2278190702199936, "learning_rate": 0.0009999563928397171, "loss": 1.9118, "step": 1374 }, { "epoch": 0.06118002756658219, "grad_norm": 0.23531115055084229, "learning_rate": 0.000999955925212823, "loss": 1.9211, "step": 1376 }, { "epoch": 0.06126895202525454, "grad_norm": 0.23974572122097015, "learning_rate": 0.0009999554550920659, "loss": 1.9173, "step": 1378 }, { "epoch": 0.061357876483926904, "grad_norm": 0.2158331573009491, "learning_rate": 0.000999954982477448, "loss": 1.9132, "step": 1380 }, { "epoch": 0.061446800942599265, "grad_norm": 0.21482673287391663, "learning_rate": 0.0009999545073689717, "loss": 1.9197, "step": 1382 }, { "epoch": 0.06153572540127162, "grad_norm": 0.23469914495944977, "learning_rate": 0.0009999540297666394, "loss": 1.9115, "step": 1384 }, { "epoch": 0.06162464985994398, "grad_norm": 0.22895728051662445, "learning_rate": 0.0009999535496704533, "loss": 1.9155, "step": 1386 }, { "epoch": 0.061713574318616335, "grad_norm": 0.22930032014846802, "learning_rate": 0.0009999530670804157, "loss": 1.909, "step": 1388 }, { "epoch": 0.061802498777288696, "grad_norm": 0.21696867048740387, "learning_rate": 0.0009999525819965297, "loss": 1.9058, "step": 1390 }, { "epoch": 0.06189142323596105, "grad_norm": 0.22047823667526245, "learning_rate": 0.0009999520944187967, "loss": 1.907, "step": 1392 }, { "epoch": 0.06198034769463341, "grad_norm": 0.2223616987466812, "learning_rate": 0.00099995160434722, "loss": 1.9068, "step": 1394 }, { "epoch": 0.062069272153305766, "grad_norm": 0.22555837035179138, "learning_rate": 0.0009999511117818017, "loss": 1.9109, "step": 1396 }, { "epoch": 0.06215819661197813, "grad_norm": 0.20461831986904144, "learning_rate": 0.000999950616722544, "loss": 1.9028, "step": 1398 }, { "epoch": 0.06224712107065048, "grad_norm": 0.22076964378356934, "learning_rate": 0.00099995011916945, "loss": 1.8984, "step": 1400 }, { "epoch": 0.06233604552932284, "grad_norm": 0.20543377101421356, "learning_rate": 0.0009999496191225217, "loss": 1.9059, "step": 1402 }, { "epoch": 0.062424969987995196, "grad_norm": 0.20205794274806976, "learning_rate": 0.0009999491165817616, "loss": 1.91, "step": 1404 }, { "epoch": 0.06251389444666755, "grad_norm": 0.22159075736999512, "learning_rate": 0.0009999486115471723, "loss": 1.9041, "step": 1406 }, { "epoch": 0.06260281890533992, "grad_norm": 0.20887672901153564, "learning_rate": 0.0009999481040187566, "loss": 1.9044, "step": 1408 }, { "epoch": 0.06269174336401227, "grad_norm": 0.22122983634471893, "learning_rate": 0.0009999475939965164, "loss": 1.906, "step": 1410 }, { "epoch": 0.06278066782268463, "grad_norm": 0.2313462495803833, "learning_rate": 0.0009999470814804547, "loss": 1.9103, "step": 1412 }, { "epoch": 0.06286959228135698, "grad_norm": 0.22419576346874237, "learning_rate": 0.0009999465664705743, "loss": 1.9024, "step": 1414 }, { "epoch": 0.06295851674002935, "grad_norm": 0.21045628190040588, "learning_rate": 0.000999946048966877, "loss": 1.9055, "step": 1416 }, { "epoch": 0.0630474411987017, "grad_norm": 0.2150772362947464, "learning_rate": 0.0009999455289693663, "loss": 1.9068, "step": 1418 }, { "epoch": 0.06313636565737406, "grad_norm": 0.22035369277000427, "learning_rate": 0.000999945006478044, "loss": 1.9028, "step": 1420 }, { "epoch": 0.06322529011604641, "grad_norm": 0.22336137294769287, "learning_rate": 0.000999944481492913, "loss": 1.906, "step": 1422 }, { "epoch": 0.06331421457471878, "grad_norm": 0.22286519408226013, "learning_rate": 0.000999943954013976, "loss": 1.9031, "step": 1424 }, { "epoch": 0.06340313903339113, "grad_norm": 0.2087024748325348, "learning_rate": 0.0009999434240412356, "loss": 1.9006, "step": 1426 }, { "epoch": 0.06349206349206349, "grad_norm": 0.22912132740020752, "learning_rate": 0.0009999428915746943, "loss": 1.9031, "step": 1428 }, { "epoch": 0.06358098795073586, "grad_norm": 0.21829701960086823, "learning_rate": 0.000999942356614355, "loss": 1.8998, "step": 1430 }, { "epoch": 0.06366991240940821, "grad_norm": 0.22255627810955048, "learning_rate": 0.0009999418191602202, "loss": 1.8958, "step": 1432 }, { "epoch": 0.06375883686808057, "grad_norm": 0.2457275241613388, "learning_rate": 0.0009999412792122927, "loss": 1.8966, "step": 1434 }, { "epoch": 0.06384776132675292, "grad_norm": 0.22053119540214539, "learning_rate": 0.000999940736770575, "loss": 1.8982, "step": 1436 }, { "epoch": 0.06393668578542529, "grad_norm": 0.21990668773651123, "learning_rate": 0.00099994019183507, "loss": 1.8965, "step": 1438 }, { "epoch": 0.06402561024409764, "grad_norm": 0.21976561844348907, "learning_rate": 0.0009999396444057803, "loss": 1.8872, "step": 1440 }, { "epoch": 0.06411453470277, "grad_norm": 0.2285648137331009, "learning_rate": 0.0009999390944827086, "loss": 1.9001, "step": 1442 }, { "epoch": 0.06420345916144235, "grad_norm": 0.20996513962745667, "learning_rate": 0.0009999385420658578, "loss": 1.8845, "step": 1444 }, { "epoch": 0.06429238362011472, "grad_norm": 0.2080724537372589, "learning_rate": 0.0009999379871552304, "loss": 1.8932, "step": 1446 }, { "epoch": 0.06438130807878707, "grad_norm": 0.22280140221118927, "learning_rate": 0.0009999374297508295, "loss": 1.8875, "step": 1448 }, { "epoch": 0.06447023253745943, "grad_norm": 0.2291831374168396, "learning_rate": 0.0009999368698526577, "loss": 1.8935, "step": 1450 }, { "epoch": 0.06455915699613178, "grad_norm": 0.21631364524364471, "learning_rate": 0.0009999363074607178, "loss": 1.8842, "step": 1452 }, { "epoch": 0.06464808145480415, "grad_norm": 0.22055967152118683, "learning_rate": 0.0009999357425750127, "loss": 1.8922, "step": 1454 }, { "epoch": 0.0647370059134765, "grad_norm": 0.2344065010547638, "learning_rate": 0.000999935175195545, "loss": 1.8895, "step": 1456 }, { "epoch": 0.06482593037214886, "grad_norm": 0.2481888383626938, "learning_rate": 0.0009999346053223175, "loss": 1.8887, "step": 1458 }, { "epoch": 0.06491485483082121, "grad_norm": 0.21356667578220367, "learning_rate": 0.0009999340329553334, "loss": 1.8872, "step": 1460 }, { "epoch": 0.06500377928949358, "grad_norm": 0.22886356711387634, "learning_rate": 0.0009999334580945956, "loss": 1.8847, "step": 1462 }, { "epoch": 0.06509270374816593, "grad_norm": 0.2166658341884613, "learning_rate": 0.0009999328807401065, "loss": 1.8917, "step": 1464 }, { "epoch": 0.06518162820683829, "grad_norm": 0.217306986451149, "learning_rate": 0.000999932300891869, "loss": 1.8965, "step": 1466 }, { "epoch": 0.06527055266551064, "grad_norm": 0.20982764661312103, "learning_rate": 0.0009999317185498864, "loss": 1.8833, "step": 1468 }, { "epoch": 0.06535947712418301, "grad_norm": 0.2036888599395752, "learning_rate": 0.0009999311337141614, "loss": 1.8866, "step": 1470 }, { "epoch": 0.06544840158285536, "grad_norm": 0.20320679247379303, "learning_rate": 0.000999930546384697, "loss": 1.8832, "step": 1472 }, { "epoch": 0.06553732604152772, "grad_norm": 0.20226582884788513, "learning_rate": 0.000999929956561496, "loss": 1.8863, "step": 1474 }, { "epoch": 0.06562625050020009, "grad_norm": 0.19525253772735596, "learning_rate": 0.0009999293642445613, "loss": 1.8798, "step": 1476 }, { "epoch": 0.06571517495887244, "grad_norm": 0.20026175677776337, "learning_rate": 0.000999928769433896, "loss": 1.8813, "step": 1478 }, { "epoch": 0.0658040994175448, "grad_norm": 0.21397806704044342, "learning_rate": 0.0009999281721295031, "loss": 1.8751, "step": 1480 }, { "epoch": 0.06589302387621715, "grad_norm": 0.22252975404262543, "learning_rate": 0.0009999275723313853, "loss": 1.8884, "step": 1482 }, { "epoch": 0.06598194833488952, "grad_norm": 0.2118113934993744, "learning_rate": 0.0009999269700395458, "loss": 1.8773, "step": 1484 }, { "epoch": 0.06607087279356187, "grad_norm": 0.21514491736888885, "learning_rate": 0.0009999263652539878, "loss": 1.8834, "step": 1486 }, { "epoch": 0.06615979725223423, "grad_norm": 0.20763011276721954, "learning_rate": 0.000999925757974714, "loss": 1.876, "step": 1488 }, { "epoch": 0.06624872171090658, "grad_norm": 0.21687519550323486, "learning_rate": 0.0009999251482017276, "loss": 1.8788, "step": 1490 }, { "epoch": 0.06633764616957895, "grad_norm": 0.22184109687805176, "learning_rate": 0.0009999245359350315, "loss": 1.8724, "step": 1492 }, { "epoch": 0.0664265706282513, "grad_norm": 0.22225528955459595, "learning_rate": 0.0009999239211746288, "loss": 1.8765, "step": 1494 }, { "epoch": 0.06651549508692366, "grad_norm": 0.20817163586616516, "learning_rate": 0.0009999233039205226, "loss": 1.8764, "step": 1496 }, { "epoch": 0.06660441954559601, "grad_norm": 0.2178126573562622, "learning_rate": 0.000999922684172716, "loss": 1.886, "step": 1498 }, { "epoch": 0.06669334400426838, "grad_norm": 0.23448695242404938, "learning_rate": 0.0009999220619312122, "loss": 1.8872, "step": 1500 }, { "epoch": 0.06669334400426838, "eval_loss": 1.8269543647766113, "eval_runtime": 12.3418, "eval_samples_per_second": 559.886, "eval_steps_per_second": 70.006, "step": 1500 }, { "epoch": 0.06678226846294073, "grad_norm": 0.21950295567512512, "learning_rate": 0.000999921437196014, "loss": 1.8797, "step": 1502 }, { "epoch": 0.06687119292161309, "grad_norm": 0.21364814043045044, "learning_rate": 0.0009999208099671247, "loss": 1.8754, "step": 1504 }, { "epoch": 0.06696011738028544, "grad_norm": 0.20813779532909393, "learning_rate": 0.0009999201802445474, "loss": 1.8765, "step": 1506 }, { "epoch": 0.06704904183895781, "grad_norm": 0.2039484828710556, "learning_rate": 0.0009999195480282855, "loss": 1.8757, "step": 1508 }, { "epoch": 0.06713796629763016, "grad_norm": 0.22736653685569763, "learning_rate": 0.0009999189133183416, "loss": 1.8799, "step": 1510 }, { "epoch": 0.06722689075630252, "grad_norm": 0.20011046528816223, "learning_rate": 0.0009999182761147192, "loss": 1.8747, "step": 1512 }, { "epoch": 0.06731581521497487, "grad_norm": 0.1882735788822174, "learning_rate": 0.0009999176364174217, "loss": 1.866, "step": 1514 }, { "epoch": 0.06740473967364724, "grad_norm": 0.2117367535829544, "learning_rate": 0.0009999169942264518, "loss": 1.8735, "step": 1516 }, { "epoch": 0.0674936641323196, "grad_norm": 0.20652443170547485, "learning_rate": 0.000999916349541813, "loss": 1.8761, "step": 1518 }, { "epoch": 0.06758258859099195, "grad_norm": 0.20789290964603424, "learning_rate": 0.0009999157023635088, "loss": 1.8637, "step": 1520 }, { "epoch": 0.06767151304966432, "grad_norm": 0.19228237867355347, "learning_rate": 0.0009999150526915418, "loss": 1.8699, "step": 1522 }, { "epoch": 0.06776043750833667, "grad_norm": 0.20845188200473785, "learning_rate": 0.0009999144005259156, "loss": 1.8741, "step": 1524 }, { "epoch": 0.06784936196700903, "grad_norm": 0.20529267191886902, "learning_rate": 0.0009999137458666333, "loss": 1.868, "step": 1526 }, { "epoch": 0.06793828642568138, "grad_norm": 0.20668277144432068, "learning_rate": 0.0009999130887136983, "loss": 1.8751, "step": 1528 }, { "epoch": 0.06802721088435375, "grad_norm": 0.21079662442207336, "learning_rate": 0.0009999124290671138, "loss": 1.8683, "step": 1530 }, { "epoch": 0.0681161353430261, "grad_norm": 0.21383321285247803, "learning_rate": 0.0009999117669268831, "loss": 1.8794, "step": 1532 }, { "epoch": 0.06820505980169846, "grad_norm": 0.20688050985336304, "learning_rate": 0.0009999111022930096, "loss": 1.8711, "step": 1534 }, { "epoch": 0.06829398426037081, "grad_norm": 0.2304670512676239, "learning_rate": 0.0009999104351654964, "loss": 1.8715, "step": 1536 }, { "epoch": 0.06838290871904318, "grad_norm": 0.22473397850990295, "learning_rate": 0.0009999097655443473, "loss": 1.8662, "step": 1538 }, { "epoch": 0.06847183317771553, "grad_norm": 0.2325805276632309, "learning_rate": 0.0009999090934295649, "loss": 1.8631, "step": 1540 }, { "epoch": 0.06856075763638789, "grad_norm": 0.21634715795516968, "learning_rate": 0.0009999084188211532, "loss": 1.8677, "step": 1542 }, { "epoch": 0.06864968209506024, "grad_norm": 0.21803300082683563, "learning_rate": 0.0009999077417191153, "loss": 1.8657, "step": 1544 }, { "epoch": 0.06873860655373261, "grad_norm": 0.20068170130252838, "learning_rate": 0.0009999070621234546, "loss": 1.862, "step": 1546 }, { "epoch": 0.06882753101240496, "grad_norm": 0.19963085651397705, "learning_rate": 0.0009999063800341745, "loss": 1.868, "step": 1548 }, { "epoch": 0.06891645547107732, "grad_norm": 0.19617609679698944, "learning_rate": 0.0009999056954512783, "loss": 1.8689, "step": 1550 }, { "epoch": 0.06900537992974967, "grad_norm": 0.20461757481098175, "learning_rate": 0.0009999050083747696, "loss": 1.8631, "step": 1552 }, { "epoch": 0.06909430438842204, "grad_norm": 0.21538877487182617, "learning_rate": 0.0009999043188046518, "loss": 1.8703, "step": 1554 }, { "epoch": 0.0691832288470944, "grad_norm": 0.20673204958438873, "learning_rate": 0.0009999036267409284, "loss": 1.8687, "step": 1556 }, { "epoch": 0.06927215330576675, "grad_norm": 0.24245448410511017, "learning_rate": 0.0009999029321836024, "loss": 1.8601, "step": 1558 }, { "epoch": 0.0693610777644391, "grad_norm": 0.2253614217042923, "learning_rate": 0.0009999022351326776, "loss": 1.8716, "step": 1560 }, { "epoch": 0.06945000222311147, "grad_norm": 0.21160924434661865, "learning_rate": 0.0009999015355881577, "loss": 1.8608, "step": 1562 }, { "epoch": 0.06953892668178382, "grad_norm": 0.20701120793819427, "learning_rate": 0.000999900833550046, "loss": 1.8621, "step": 1564 }, { "epoch": 0.06962785114045618, "grad_norm": 0.210323303937912, "learning_rate": 0.0009999001290183457, "loss": 1.8667, "step": 1566 }, { "epoch": 0.06971677559912855, "grad_norm": 0.21616849303245544, "learning_rate": 0.0009998994219930606, "loss": 1.8677, "step": 1568 }, { "epoch": 0.0698057000578009, "grad_norm": 0.21297746896743774, "learning_rate": 0.0009998987124741944, "loss": 1.869, "step": 1570 }, { "epoch": 0.06989462451647326, "grad_norm": 0.2072431594133377, "learning_rate": 0.0009998980004617504, "loss": 1.8584, "step": 1572 }, { "epoch": 0.06998354897514561, "grad_norm": 0.2180069535970688, "learning_rate": 0.000999897285955732, "loss": 1.8578, "step": 1574 }, { "epoch": 0.07007247343381798, "grad_norm": 0.23733854293823242, "learning_rate": 0.0009998965689561432, "loss": 1.8605, "step": 1576 }, { "epoch": 0.07016139789249033, "grad_norm": 0.19760969281196594, "learning_rate": 0.0009998958494629874, "loss": 1.8575, "step": 1578 }, { "epoch": 0.07025032235116269, "grad_norm": 0.21075817942619324, "learning_rate": 0.0009998951274762678, "loss": 1.87, "step": 1580 }, { "epoch": 0.07033924680983504, "grad_norm": 0.2083798050880432, "learning_rate": 0.0009998944029959884, "loss": 1.8528, "step": 1582 }, { "epoch": 0.07042817126850741, "grad_norm": 0.2075483202934265, "learning_rate": 0.000999893676022153, "loss": 1.8584, "step": 1584 }, { "epoch": 0.07051709572717976, "grad_norm": 0.2059869021177292, "learning_rate": 0.0009998929465547647, "loss": 1.8558, "step": 1586 }, { "epoch": 0.07060602018585212, "grad_norm": 0.2247011363506317, "learning_rate": 0.0009998922145938276, "loss": 1.8625, "step": 1588 }, { "epoch": 0.07069494464452447, "grad_norm": 0.21843503415584564, "learning_rate": 0.0009998914801393449, "loss": 1.8617, "step": 1590 }, { "epoch": 0.07078386910319684, "grad_norm": 0.1983787566423416, "learning_rate": 0.0009998907431913206, "loss": 1.8538, "step": 1592 }, { "epoch": 0.0708727935618692, "grad_norm": 0.19233089685440063, "learning_rate": 0.0009998900037497586, "loss": 1.8622, "step": 1594 }, { "epoch": 0.07096171802054155, "grad_norm": 0.22240038216114044, "learning_rate": 0.000999889261814662, "loss": 1.8567, "step": 1596 }, { "epoch": 0.0710506424792139, "grad_norm": 0.20009489357471466, "learning_rate": 0.0009998885173860348, "loss": 1.8603, "step": 1598 }, { "epoch": 0.07113956693788627, "grad_norm": 0.21011285483837128, "learning_rate": 0.0009998877704638807, "loss": 1.8492, "step": 1600 }, { "epoch": 0.07122849139655862, "grad_norm": 0.2004971206188202, "learning_rate": 0.0009998870210482034, "loss": 1.851, "step": 1602 }, { "epoch": 0.07131741585523098, "grad_norm": 0.20903609693050385, "learning_rate": 0.0009998862691390066, "loss": 1.8618, "step": 1604 }, { "epoch": 0.07140634031390333, "grad_norm": 0.21211233735084534, "learning_rate": 0.0009998855147362944, "loss": 1.8508, "step": 1606 }, { "epoch": 0.0714952647725757, "grad_norm": 0.19902560114860535, "learning_rate": 0.0009998847578400701, "loss": 1.852, "step": 1608 }, { "epoch": 0.07158418923124805, "grad_norm": 0.19855988025665283, "learning_rate": 0.0009998839984503376, "loss": 1.8467, "step": 1610 }, { "epoch": 0.07167311368992041, "grad_norm": 0.1922021359205246, "learning_rate": 0.000999883236567101, "loss": 1.8498, "step": 1612 }, { "epoch": 0.07176203814859278, "grad_norm": 0.18983250856399536, "learning_rate": 0.0009998824721903635, "loss": 1.8572, "step": 1614 }, { "epoch": 0.07185096260726513, "grad_norm": 0.20658980309963226, "learning_rate": 0.0009998817053201295, "loss": 1.8525, "step": 1616 }, { "epoch": 0.07193988706593749, "grad_norm": 0.21265624463558197, "learning_rate": 0.0009998809359564023, "loss": 1.8482, "step": 1618 }, { "epoch": 0.07202881152460984, "grad_norm": 0.21423251926898956, "learning_rate": 0.0009998801640991861, "loss": 1.8608, "step": 1620 }, { "epoch": 0.07211773598328221, "grad_norm": 0.20722512900829315, "learning_rate": 0.000999879389748485, "loss": 1.8522, "step": 1622 }, { "epoch": 0.07220666044195456, "grad_norm": 0.20465902984142303, "learning_rate": 0.0009998786129043022, "loss": 1.8502, "step": 1624 }, { "epoch": 0.07229558490062692, "grad_norm": 0.20114745199680328, "learning_rate": 0.000999877833566642, "loss": 1.8477, "step": 1626 }, { "epoch": 0.07238450935929927, "grad_norm": 0.2192513346672058, "learning_rate": 0.0009998770517355082, "loss": 1.8445, "step": 1628 }, { "epoch": 0.07247343381797164, "grad_norm": 0.2060314267873764, "learning_rate": 0.0009998762674109046, "loss": 1.8545, "step": 1630 }, { "epoch": 0.07256235827664399, "grad_norm": 0.20097291469573975, "learning_rate": 0.0009998754805928354, "loss": 1.8489, "step": 1632 }, { "epoch": 0.07265128273531635, "grad_norm": 0.21980713307857513, "learning_rate": 0.000999874691281304, "loss": 1.8508, "step": 1634 }, { "epoch": 0.0727402071939887, "grad_norm": 0.20329026877880096, "learning_rate": 0.000999873899476315, "loss": 1.8407, "step": 1636 }, { "epoch": 0.07282913165266107, "grad_norm": 0.20128802955150604, "learning_rate": 0.0009998731051778717, "loss": 1.8521, "step": 1638 }, { "epoch": 0.07291805611133342, "grad_norm": 0.20749594271183014, "learning_rate": 0.0009998723083859786, "loss": 1.8492, "step": 1640 }, { "epoch": 0.07300698057000578, "grad_norm": 0.20556269586086273, "learning_rate": 0.0009998715091006393, "loss": 1.839, "step": 1642 }, { "epoch": 0.07309590502867813, "grad_norm": 0.20244896411895752, "learning_rate": 0.0009998707073218583, "loss": 1.8373, "step": 1644 }, { "epoch": 0.0731848294873505, "grad_norm": 0.2151784747838974, "learning_rate": 0.0009998699030496388, "loss": 1.8453, "step": 1646 }, { "epoch": 0.07327375394602285, "grad_norm": 0.21280355751514435, "learning_rate": 0.0009998690962839856, "loss": 1.8447, "step": 1648 }, { "epoch": 0.07336267840469521, "grad_norm": 0.20697881281375885, "learning_rate": 0.000999868287024902, "loss": 1.8475, "step": 1650 }, { "epoch": 0.07345160286336756, "grad_norm": 0.19814634323120117, "learning_rate": 0.0009998674752723926, "loss": 1.8451, "step": 1652 }, { "epoch": 0.07354052732203993, "grad_norm": 0.192799910902977, "learning_rate": 0.0009998666610264613, "loss": 1.837, "step": 1654 }, { "epoch": 0.07362945178071229, "grad_norm": 0.2121358960866928, "learning_rate": 0.0009998658442871122, "loss": 1.8423, "step": 1656 }, { "epoch": 0.07371837623938464, "grad_norm": 0.21163220703601837, "learning_rate": 0.000999865025054349, "loss": 1.839, "step": 1658 }, { "epoch": 0.07380730069805701, "grad_norm": 0.2047502100467682, "learning_rate": 0.0009998642033281764, "loss": 1.8361, "step": 1660 }, { "epoch": 0.07389622515672936, "grad_norm": 0.2158685177564621, "learning_rate": 0.000999863379108598, "loss": 1.8407, "step": 1662 }, { "epoch": 0.07398514961540172, "grad_norm": 0.21105967462062836, "learning_rate": 0.0009998625523956182, "loss": 1.8494, "step": 1664 }, { "epoch": 0.07407407407407407, "grad_norm": 0.2116025984287262, "learning_rate": 0.000999861723189241, "loss": 1.8446, "step": 1666 }, { "epoch": 0.07416299853274644, "grad_norm": 0.19938991963863373, "learning_rate": 0.0009998608914894706, "loss": 1.8381, "step": 1668 }, { "epoch": 0.07425192299141879, "grad_norm": 0.20230786502361298, "learning_rate": 0.0009998600572963107, "loss": 1.8361, "step": 1670 }, { "epoch": 0.07434084745009115, "grad_norm": 0.20542068779468536, "learning_rate": 0.0009998592206097662, "loss": 1.8327, "step": 1672 }, { "epoch": 0.0744297719087635, "grad_norm": 0.20988449454307556, "learning_rate": 0.000999858381429841, "loss": 1.8391, "step": 1674 }, { "epoch": 0.07451869636743587, "grad_norm": 0.20840682089328766, "learning_rate": 0.0009998575397565392, "loss": 1.8346, "step": 1676 }, { "epoch": 0.07460762082610822, "grad_norm": 0.1958722621202469, "learning_rate": 0.000999856695589865, "loss": 1.8342, "step": 1678 }, { "epoch": 0.07469654528478058, "grad_norm": 0.20700527727603912, "learning_rate": 0.0009998558489298225, "loss": 1.8323, "step": 1680 }, { "epoch": 0.07478546974345293, "grad_norm": 0.20546066761016846, "learning_rate": 0.000999854999776416, "loss": 1.8312, "step": 1682 }, { "epoch": 0.0748743942021253, "grad_norm": 0.2187362015247345, "learning_rate": 0.00099985414812965, "loss": 1.8375, "step": 1684 }, { "epoch": 0.07496331866079765, "grad_norm": 0.1964145451784134, "learning_rate": 0.0009998532939895285, "loss": 1.8339, "step": 1686 }, { "epoch": 0.07505224311947001, "grad_norm": 0.20831523835659027, "learning_rate": 0.0009998524373560557, "loss": 1.8381, "step": 1688 }, { "epoch": 0.07514116757814236, "grad_norm": 0.21572189033031464, "learning_rate": 0.0009998515782292361, "loss": 1.8409, "step": 1690 }, { "epoch": 0.07523009203681473, "grad_norm": 0.21819041669368744, "learning_rate": 0.0009998507166090738, "loss": 1.8326, "step": 1692 }, { "epoch": 0.07531901649548708, "grad_norm": 0.20171982049942017, "learning_rate": 0.000999849852495573, "loss": 1.8375, "step": 1694 }, { "epoch": 0.07540794095415944, "grad_norm": 0.20552539825439453, "learning_rate": 0.0009998489858887383, "loss": 1.8351, "step": 1696 }, { "epoch": 0.07549686541283179, "grad_norm": 0.19860213994979858, "learning_rate": 0.000999848116788574, "loss": 1.8321, "step": 1698 }, { "epoch": 0.07558578987150416, "grad_norm": 0.20631815493106842, "learning_rate": 0.0009998472451950842, "loss": 1.8372, "step": 1700 }, { "epoch": 0.07567471433017652, "grad_norm": 0.2032255083322525, "learning_rate": 0.0009998463711082732, "loss": 1.8302, "step": 1702 }, { "epoch": 0.07576363878884887, "grad_norm": 0.22408516705036163, "learning_rate": 0.000999845494528146, "loss": 1.8288, "step": 1704 }, { "epoch": 0.07585256324752124, "grad_norm": 0.2116139978170395, "learning_rate": 0.000999844615454706, "loss": 1.8269, "step": 1706 }, { "epoch": 0.07594148770619359, "grad_norm": 0.21274049580097198, "learning_rate": 0.0009998437338879583, "loss": 1.8308, "step": 1708 }, { "epoch": 0.07603041216486595, "grad_norm": 0.2098899781703949, "learning_rate": 0.0009998428498279072, "loss": 1.8334, "step": 1710 }, { "epoch": 0.0761193366235383, "grad_norm": 0.20081061124801636, "learning_rate": 0.0009998419632745567, "loss": 1.8261, "step": 1712 }, { "epoch": 0.07620826108221067, "grad_norm": 0.19824229180812836, "learning_rate": 0.0009998410742279118, "loss": 1.8391, "step": 1714 }, { "epoch": 0.07629718554088302, "grad_norm": 0.19863907992839813, "learning_rate": 0.0009998401826879766, "loss": 1.8336, "step": 1716 }, { "epoch": 0.07638610999955538, "grad_norm": 0.19055074453353882, "learning_rate": 0.0009998392886547557, "loss": 1.8333, "step": 1718 }, { "epoch": 0.07647503445822773, "grad_norm": 0.20255160331726074, "learning_rate": 0.0009998383921282531, "loss": 1.8275, "step": 1720 }, { "epoch": 0.0765639589169001, "grad_norm": 0.19958724081516266, "learning_rate": 0.000999837493108474, "loss": 1.8305, "step": 1722 }, { "epoch": 0.07665288337557245, "grad_norm": 0.2084696739912033, "learning_rate": 0.0009998365915954222, "loss": 1.8254, "step": 1724 }, { "epoch": 0.07674180783424481, "grad_norm": 0.19476568698883057, "learning_rate": 0.0009998356875891028, "loss": 1.8294, "step": 1726 }, { "epoch": 0.07683073229291716, "grad_norm": 0.20423997938632965, "learning_rate": 0.00099983478108952, "loss": 1.8298, "step": 1728 }, { "epoch": 0.07691965675158953, "grad_norm": 0.20384973287582397, "learning_rate": 0.000999833872096678, "loss": 1.8219, "step": 1730 }, { "epoch": 0.07700858121026188, "grad_norm": 0.22047263383865356, "learning_rate": 0.000999832960610582, "loss": 1.8308, "step": 1732 }, { "epoch": 0.07709750566893424, "grad_norm": 0.2119770050048828, "learning_rate": 0.000999832046631236, "loss": 1.827, "step": 1734 }, { "epoch": 0.07718643012760659, "grad_norm": 0.20165476202964783, "learning_rate": 0.000999831130158645, "loss": 1.8247, "step": 1736 }, { "epoch": 0.07727535458627896, "grad_norm": 0.20627140998840332, "learning_rate": 0.0009998302111928132, "loss": 1.8295, "step": 1738 }, { "epoch": 0.07736427904495131, "grad_norm": 0.19049489498138428, "learning_rate": 0.0009998292897337455, "loss": 1.8265, "step": 1740 }, { "epoch": 0.07745320350362367, "grad_norm": 0.2037346363067627, "learning_rate": 0.0009998283657814463, "loss": 1.8253, "step": 1742 }, { "epoch": 0.07754212796229602, "grad_norm": 0.19989527761936188, "learning_rate": 0.00099982743933592, "loss": 1.8227, "step": 1744 }, { "epoch": 0.07763105242096839, "grad_norm": 0.19065894186496735, "learning_rate": 0.0009998265103971717, "loss": 1.8203, "step": 1746 }, { "epoch": 0.07771997687964075, "grad_norm": 0.20793575048446655, "learning_rate": 0.0009998255789652058, "loss": 1.8259, "step": 1748 }, { "epoch": 0.0778089013383131, "grad_norm": 0.19519171118736267, "learning_rate": 0.000999824645040027, "loss": 1.8263, "step": 1750 }, { "epoch": 0.07789782579698547, "grad_norm": 0.18882858753204346, "learning_rate": 0.0009998237086216398, "loss": 1.8275, "step": 1752 }, { "epoch": 0.07798675025565782, "grad_norm": 0.19698387384414673, "learning_rate": 0.000999822769710049, "loss": 1.8215, "step": 1754 }, { "epoch": 0.07807567471433018, "grad_norm": 0.1997542679309845, "learning_rate": 0.0009998218283052591, "loss": 1.8214, "step": 1756 }, { "epoch": 0.07816459917300253, "grad_norm": 0.20124317705631256, "learning_rate": 0.000999820884407275, "loss": 1.8274, "step": 1758 }, { "epoch": 0.0782535236316749, "grad_norm": 0.2041187584400177, "learning_rate": 0.0009998199380161015, "loss": 1.8218, "step": 1760 }, { "epoch": 0.07834244809034725, "grad_norm": 0.18576522171497345, "learning_rate": 0.0009998189891317433, "loss": 1.8191, "step": 1762 }, { "epoch": 0.0784313725490196, "grad_norm": 0.17914514243602753, "learning_rate": 0.0009998180377542047, "loss": 1.8233, "step": 1764 }, { "epoch": 0.07852029700769196, "grad_norm": 0.20891062915325165, "learning_rate": 0.000999817083883491, "loss": 1.8232, "step": 1766 }, { "epoch": 0.07860922146636433, "grad_norm": 0.19134004414081573, "learning_rate": 0.0009998161275196068, "loss": 1.8189, "step": 1768 }, { "epoch": 0.07869814592503668, "grad_norm": 0.18456409871578217, "learning_rate": 0.0009998151686625566, "loss": 1.8209, "step": 1770 }, { "epoch": 0.07878707038370904, "grad_norm": 0.1992667019367218, "learning_rate": 0.0009998142073123454, "loss": 1.8167, "step": 1772 }, { "epoch": 0.07887599484238139, "grad_norm": 0.21500921249389648, "learning_rate": 0.000999813243468978, "loss": 1.8153, "step": 1774 }, { "epoch": 0.07896491930105376, "grad_norm": 0.2065228521823883, "learning_rate": 0.000999812277132459, "loss": 1.8155, "step": 1776 }, { "epoch": 0.07905384375972611, "grad_norm": 0.20505499839782715, "learning_rate": 0.0009998113083027936, "loss": 1.8183, "step": 1778 }, { "epoch": 0.07914276821839847, "grad_norm": 0.1991395652294159, "learning_rate": 0.0009998103369799863, "loss": 1.8083, "step": 1780 }, { "epoch": 0.07923169267707082, "grad_norm": 0.19899892807006836, "learning_rate": 0.0009998093631640422, "loss": 1.8152, "step": 1782 }, { "epoch": 0.07932061713574319, "grad_norm": 0.21442580223083496, "learning_rate": 0.0009998083868549658, "loss": 1.8218, "step": 1784 }, { "epoch": 0.07940954159441554, "grad_norm": 0.20083315670490265, "learning_rate": 0.0009998074080527625, "loss": 1.8157, "step": 1786 }, { "epoch": 0.0794984660530879, "grad_norm": 0.19688989222049713, "learning_rate": 0.000999806426757437, "loss": 1.8132, "step": 1788 }, { "epoch": 0.07958739051176025, "grad_norm": 0.1846112161874771, "learning_rate": 0.0009998054429689936, "loss": 1.8219, "step": 1790 }, { "epoch": 0.07967631497043262, "grad_norm": 0.1728316843509674, "learning_rate": 0.000999804456687438, "loss": 1.8128, "step": 1792 }, { "epoch": 0.07976523942910498, "grad_norm": 0.18066954612731934, "learning_rate": 0.0009998034679127748, "loss": 1.8129, "step": 1794 }, { "epoch": 0.07985416388777733, "grad_norm": 0.1870201677083969, "learning_rate": 0.000999802476645009, "loss": 1.8113, "step": 1796 }, { "epoch": 0.0799430883464497, "grad_norm": 0.2108452171087265, "learning_rate": 0.0009998014828841452, "loss": 1.8179, "step": 1798 }, { "epoch": 0.08003201280512205, "grad_norm": 0.19203181564807892, "learning_rate": 0.000999800486630189, "loss": 1.8089, "step": 1800 }, { "epoch": 0.0801209372637944, "grad_norm": 0.1975417286157608, "learning_rate": 0.0009997994878831447, "loss": 1.8162, "step": 1802 }, { "epoch": 0.08020986172246676, "grad_norm": 0.19258712232112885, "learning_rate": 0.0009997984866430176, "loss": 1.8176, "step": 1804 }, { "epoch": 0.08029878618113913, "grad_norm": 0.1956678330898285, "learning_rate": 0.0009997974829098128, "loss": 1.8093, "step": 1806 }, { "epoch": 0.08038771063981148, "grad_norm": 0.19371239840984344, "learning_rate": 0.0009997964766835352, "loss": 1.8101, "step": 1808 }, { "epoch": 0.08047663509848384, "grad_norm": 0.18697892129421234, "learning_rate": 0.00099979546796419, "loss": 1.8084, "step": 1810 }, { "epoch": 0.08056555955715619, "grad_norm": 0.1840774416923523, "learning_rate": 0.0009997944567517816, "loss": 1.8017, "step": 1812 }, { "epoch": 0.08065448401582856, "grad_norm": 0.1876949518918991, "learning_rate": 0.000999793443046316, "loss": 1.8098, "step": 1814 }, { "epoch": 0.08074340847450091, "grad_norm": 0.19937770068645477, "learning_rate": 0.0009997924268477973, "loss": 1.8146, "step": 1816 }, { "epoch": 0.08083233293317327, "grad_norm": 0.18788865208625793, "learning_rate": 0.0009997914081562311, "loss": 1.8212, "step": 1818 }, { "epoch": 0.08092125739184562, "grad_norm": 0.17869558930397034, "learning_rate": 0.0009997903869716225, "loss": 1.8126, "step": 1820 }, { "epoch": 0.08101018185051799, "grad_norm": 0.19068653881549835, "learning_rate": 0.0009997893632939766, "loss": 1.8133, "step": 1822 }, { "epoch": 0.08109910630919034, "grad_norm": 0.18737611174583435, "learning_rate": 0.0009997883371232982, "loss": 1.8099, "step": 1824 }, { "epoch": 0.0811880307678627, "grad_norm": 0.19693921506404877, "learning_rate": 0.0009997873084595927, "loss": 1.8155, "step": 1826 }, { "epoch": 0.08127695522653505, "grad_norm": 0.19971245527267456, "learning_rate": 0.000999786277302865, "loss": 1.8165, "step": 1828 }, { "epoch": 0.08136587968520742, "grad_norm": 0.19381284713745117, "learning_rate": 0.0009997852436531205, "loss": 1.8095, "step": 1830 }, { "epoch": 0.08145480414387977, "grad_norm": 0.18925556540489197, "learning_rate": 0.0009997842075103642, "loss": 1.8148, "step": 1832 }, { "epoch": 0.08154372860255213, "grad_norm": 0.19684088230133057, "learning_rate": 0.0009997831688746016, "loss": 1.8153, "step": 1834 }, { "epoch": 0.08163265306122448, "grad_norm": 0.1987828016281128, "learning_rate": 0.0009997821277458372, "loss": 1.8126, "step": 1836 }, { "epoch": 0.08172157751989685, "grad_norm": 0.19290605187416077, "learning_rate": 0.0009997810841240768, "loss": 1.8085, "step": 1838 }, { "epoch": 0.0818105019785692, "grad_norm": 0.19497103989124298, "learning_rate": 0.0009997800380093253, "loss": 1.81, "step": 1840 }, { "epoch": 0.08189942643724156, "grad_norm": 0.20089541375637054, "learning_rate": 0.000999778989401588, "loss": 1.809, "step": 1842 }, { "epoch": 0.08198835089591393, "grad_norm": 0.19982276856899261, "learning_rate": 0.0009997779383008702, "loss": 1.8178, "step": 1844 }, { "epoch": 0.08207727535458628, "grad_norm": 0.20949578285217285, "learning_rate": 0.0009997768847071773, "loss": 1.8105, "step": 1846 }, { "epoch": 0.08216619981325864, "grad_norm": 0.20833182334899902, "learning_rate": 0.000999775828620514, "loss": 1.8152, "step": 1848 }, { "epoch": 0.08225512427193099, "grad_norm": 0.21134817600250244, "learning_rate": 0.000999774770040886, "loss": 1.805, "step": 1850 }, { "epoch": 0.08234404873060336, "grad_norm": 0.21612076461315155, "learning_rate": 0.0009997737089682986, "loss": 1.8034, "step": 1852 }, { "epoch": 0.08243297318927571, "grad_norm": 0.18973609805107117, "learning_rate": 0.000999772645402757, "loss": 1.8057, "step": 1854 }, { "epoch": 0.08252189764794807, "grad_norm": 0.19349431991577148, "learning_rate": 0.0009997715793442663, "loss": 1.807, "step": 1856 }, { "epoch": 0.08261082210662042, "grad_norm": 0.19564490020275116, "learning_rate": 0.0009997705107928322, "loss": 1.8048, "step": 1858 }, { "epoch": 0.08269974656529279, "grad_norm": 0.18188372254371643, "learning_rate": 0.0009997694397484596, "loss": 1.8138, "step": 1860 }, { "epoch": 0.08278867102396514, "grad_norm": 0.18335072696208954, "learning_rate": 0.0009997683662111541, "loss": 1.8099, "step": 1862 }, { "epoch": 0.0828775954826375, "grad_norm": 0.18123309314250946, "learning_rate": 0.000999767290180921, "loss": 1.7991, "step": 1864 }, { "epoch": 0.08296651994130985, "grad_norm": 0.1729104071855545, "learning_rate": 0.000999766211657766, "loss": 1.8033, "step": 1866 }, { "epoch": 0.08305544439998222, "grad_norm": 0.1979542374610901, "learning_rate": 0.000999765130641694, "loss": 1.8014, "step": 1868 }, { "epoch": 0.08314436885865457, "grad_norm": 0.19370914995670319, "learning_rate": 0.0009997640471327107, "loss": 1.8032, "step": 1870 }, { "epoch": 0.08323329331732693, "grad_norm": 0.19891789555549622, "learning_rate": 0.0009997629611308212, "loss": 1.8034, "step": 1872 }, { "epoch": 0.08332221777599928, "grad_norm": 0.1980421394109726, "learning_rate": 0.0009997618726360312, "loss": 1.8026, "step": 1874 }, { "epoch": 0.08341114223467165, "grad_norm": 0.18609797954559326, "learning_rate": 0.000999760781648346, "loss": 1.8015, "step": 1876 }, { "epoch": 0.083500066693344, "grad_norm": 0.18574324250221252, "learning_rate": 0.000999759688167771, "loss": 1.8089, "step": 1878 }, { "epoch": 0.08358899115201636, "grad_norm": 0.1960146427154541, "learning_rate": 0.0009997585921943117, "loss": 1.7994, "step": 1880 }, { "epoch": 0.08367791561068871, "grad_norm": 0.19020849466323853, "learning_rate": 0.0009997574937279736, "loss": 1.8026, "step": 1882 }, { "epoch": 0.08376684006936108, "grad_norm": 0.18406033515930176, "learning_rate": 0.0009997563927687623, "loss": 1.804, "step": 1884 }, { "epoch": 0.08385576452803344, "grad_norm": 0.1974494904279709, "learning_rate": 0.000999755289316683, "loss": 1.8038, "step": 1886 }, { "epoch": 0.08394468898670579, "grad_norm": 0.1874603033065796, "learning_rate": 0.0009997541833717415, "loss": 1.7976, "step": 1888 }, { "epoch": 0.08403361344537816, "grad_norm": 0.1843891739845276, "learning_rate": 0.000999753074933943, "loss": 1.8011, "step": 1890 }, { "epoch": 0.08412253790405051, "grad_norm": 0.17537960410118103, "learning_rate": 0.0009997519640032935, "loss": 1.8076, "step": 1892 }, { "epoch": 0.08421146236272287, "grad_norm": 0.17957009375095367, "learning_rate": 0.000999750850579798, "loss": 1.7974, "step": 1894 }, { "epoch": 0.08430038682139522, "grad_norm": 0.18118196725845337, "learning_rate": 0.0009997497346634623, "loss": 1.7928, "step": 1896 }, { "epoch": 0.08438931128006759, "grad_norm": 0.19623412191867828, "learning_rate": 0.0009997486162542921, "loss": 1.7973, "step": 1898 }, { "epoch": 0.08447823573873994, "grad_norm": 0.20058177411556244, "learning_rate": 0.0009997474953522929, "loss": 1.799, "step": 1900 }, { "epoch": 0.0845671601974123, "grad_norm": 0.19219110906124115, "learning_rate": 0.00099974637195747, "loss": 1.8008, "step": 1902 }, { "epoch": 0.08465608465608465, "grad_norm": 0.18925371766090393, "learning_rate": 0.0009997452460698292, "loss": 1.7954, "step": 1904 }, { "epoch": 0.08474500911475702, "grad_norm": 0.183928444981575, "learning_rate": 0.0009997441176893764, "loss": 1.8056, "step": 1906 }, { "epoch": 0.08483393357342937, "grad_norm": 0.18831098079681396, "learning_rate": 0.000999742986816117, "loss": 1.799, "step": 1908 }, { "epoch": 0.08492285803210173, "grad_norm": 0.19508256018161774, "learning_rate": 0.0009997418534500567, "loss": 1.8007, "step": 1910 }, { "epoch": 0.08501178249077408, "grad_norm": 0.188640758395195, "learning_rate": 0.0009997407175912007, "loss": 1.7926, "step": 1912 }, { "epoch": 0.08510070694944645, "grad_norm": 0.18694116175174713, "learning_rate": 0.0009997395792395553, "loss": 1.7915, "step": 1914 }, { "epoch": 0.0851896314081188, "grad_norm": 0.19411993026733398, "learning_rate": 0.000999738438395126, "loss": 1.7981, "step": 1916 }, { "epoch": 0.08527855586679116, "grad_norm": 0.2087039351463318, "learning_rate": 0.0009997372950579183, "loss": 1.7954, "step": 1918 }, { "epoch": 0.08536748032546351, "grad_norm": 0.20052039623260498, "learning_rate": 0.000999736149227938, "loss": 1.7936, "step": 1920 }, { "epoch": 0.08545640478413588, "grad_norm": 0.21173499524593353, "learning_rate": 0.000999735000905191, "loss": 1.8015, "step": 1922 }, { "epoch": 0.08554532924280824, "grad_norm": 0.17747758328914642, "learning_rate": 0.0009997338500896827, "loss": 1.7894, "step": 1924 }, { "epoch": 0.08563425370148059, "grad_norm": 0.1921745240688324, "learning_rate": 0.000999732696781419, "loss": 1.7967, "step": 1926 }, { "epoch": 0.08572317816015294, "grad_norm": 0.18577106297016144, "learning_rate": 0.0009997315409804057, "loss": 1.7903, "step": 1928 }, { "epoch": 0.08581210261882531, "grad_norm": 0.1862160861492157, "learning_rate": 0.0009997303826866485, "loss": 1.7978, "step": 1930 }, { "epoch": 0.08590102707749767, "grad_norm": 0.18871815502643585, "learning_rate": 0.0009997292219001532, "loss": 1.7985, "step": 1932 }, { "epoch": 0.08598995153617002, "grad_norm": 0.18492014706134796, "learning_rate": 0.0009997280586209257, "loss": 1.795, "step": 1934 }, { "epoch": 0.08607887599484239, "grad_norm": 0.17806781828403473, "learning_rate": 0.0009997268928489717, "loss": 1.7889, "step": 1936 }, { "epoch": 0.08616780045351474, "grad_norm": 0.18848101794719696, "learning_rate": 0.0009997257245842968, "loss": 1.7931, "step": 1938 }, { "epoch": 0.0862567249121871, "grad_norm": 0.18113906681537628, "learning_rate": 0.000999724553826907, "loss": 1.7927, "step": 1940 }, { "epoch": 0.08634564937085945, "grad_norm": 0.19498959183692932, "learning_rate": 0.0009997233805768085, "loss": 1.7927, "step": 1942 }, { "epoch": 0.08643457382953182, "grad_norm": 0.19100996851921082, "learning_rate": 0.0009997222048340065, "loss": 1.7918, "step": 1944 }, { "epoch": 0.08652349828820417, "grad_norm": 0.19174836575984955, "learning_rate": 0.0009997210265985073, "loss": 1.7889, "step": 1946 }, { "epoch": 0.08661242274687653, "grad_norm": 0.20086248219013214, "learning_rate": 0.0009997198458703168, "loss": 1.7958, "step": 1948 }, { "epoch": 0.08670134720554888, "grad_norm": 0.18942195177078247, "learning_rate": 0.0009997186626494407, "loss": 1.7884, "step": 1950 }, { "epoch": 0.08679027166422125, "grad_norm": 0.17851027846336365, "learning_rate": 0.000999717476935885, "loss": 1.7867, "step": 1952 }, { "epoch": 0.0868791961228936, "grad_norm": 0.1856180876493454, "learning_rate": 0.0009997162887296553, "loss": 1.7886, "step": 1954 }, { "epoch": 0.08696812058156596, "grad_norm": 0.18615032732486725, "learning_rate": 0.0009997150980307582, "loss": 1.7869, "step": 1956 }, { "epoch": 0.08705704504023831, "grad_norm": 0.18606998026371002, "learning_rate": 0.000999713904839199, "loss": 1.7897, "step": 1958 }, { "epoch": 0.08714596949891068, "grad_norm": 0.19354918599128723, "learning_rate": 0.000999712709154984, "loss": 1.7938, "step": 1960 }, { "epoch": 0.08723489395758303, "grad_norm": 0.19163134694099426, "learning_rate": 0.0009997115109781189, "loss": 1.7934, "step": 1962 }, { "epoch": 0.08732381841625539, "grad_norm": 0.17643679678440094, "learning_rate": 0.00099971031030861, "loss": 1.7914, "step": 1964 }, { "epoch": 0.08741274287492774, "grad_norm": 0.19244788587093353, "learning_rate": 0.000999709107146463, "loss": 1.7927, "step": 1966 }, { "epoch": 0.08750166733360011, "grad_norm": 0.1964365541934967, "learning_rate": 0.000999707901491684, "loss": 1.7919, "step": 1968 }, { "epoch": 0.08759059179227247, "grad_norm": 0.19603557884693146, "learning_rate": 0.0009997066933442793, "loss": 1.7924, "step": 1970 }, { "epoch": 0.08767951625094482, "grad_norm": 0.1899658441543579, "learning_rate": 0.0009997054827042544, "loss": 1.7871, "step": 1972 }, { "epoch": 0.08776844070961717, "grad_norm": 0.1820671409368515, "learning_rate": 0.0009997042695716158, "loss": 1.7918, "step": 1974 }, { "epoch": 0.08785736516828954, "grad_norm": 0.1810377836227417, "learning_rate": 0.000999703053946369, "loss": 1.7916, "step": 1976 }, { "epoch": 0.0879462896269619, "grad_norm": 0.182911679148674, "learning_rate": 0.000999701835828521, "loss": 1.787, "step": 1978 }, { "epoch": 0.08803521408563425, "grad_norm": 0.17979872226715088, "learning_rate": 0.000999700615218077, "loss": 1.7915, "step": 1980 }, { "epoch": 0.08812413854430662, "grad_norm": 0.18918099999427795, "learning_rate": 0.0009996993921150433, "loss": 1.7764, "step": 1982 }, { "epoch": 0.08821306300297897, "grad_norm": 0.18799486756324768, "learning_rate": 0.000999698166519426, "loss": 1.7828, "step": 1984 }, { "epoch": 0.08830198746165133, "grad_norm": 0.1944817304611206, "learning_rate": 0.0009996969384312316, "loss": 1.7836, "step": 1986 }, { "epoch": 0.08839091192032368, "grad_norm": 0.17584578692913055, "learning_rate": 0.0009996957078504658, "loss": 1.7905, "step": 1988 }, { "epoch": 0.08847983637899605, "grad_norm": 0.1802207976579666, "learning_rate": 0.000999694474777135, "loss": 1.7863, "step": 1990 }, { "epoch": 0.0885687608376684, "grad_norm": 0.18599218130111694, "learning_rate": 0.0009996932392112448, "loss": 1.7833, "step": 1992 }, { "epoch": 0.08865768529634076, "grad_norm": 0.1814405471086502, "learning_rate": 0.0009996920011528022, "loss": 1.7845, "step": 1994 }, { "epoch": 0.08874660975501311, "grad_norm": 0.1817612200975418, "learning_rate": 0.0009996907606018126, "loss": 1.7884, "step": 1996 }, { "epoch": 0.08883553421368548, "grad_norm": 0.19590908288955688, "learning_rate": 0.0009996895175582827, "loss": 1.7754, "step": 1998 }, { "epoch": 0.08892445867235783, "grad_norm": 0.17747312784194946, "learning_rate": 0.0009996882720222186, "loss": 1.7815, "step": 2000 }, { "epoch": 0.08892445867235783, "eval_loss": 1.7405998706817627, "eval_runtime": 12.3499, "eval_samples_per_second": 559.518, "eval_steps_per_second": 69.96, "step": 2000 }, { "epoch": 0.08901338313103019, "grad_norm": 0.18658480048179626, "learning_rate": 0.0009996870239936265, "loss": 1.7879, "step": 2002 }, { "epoch": 0.08910230758970254, "grad_norm": 0.18885967135429382, "learning_rate": 0.0009996857734725125, "loss": 1.7915, "step": 2004 }, { "epoch": 0.08919123204837491, "grad_norm": 0.19159406423568726, "learning_rate": 0.0009996845204588828, "loss": 1.7847, "step": 2006 }, { "epoch": 0.08928015650704726, "grad_norm": 0.18446260690689087, "learning_rate": 0.000999683264952744, "loss": 1.7862, "step": 2008 }, { "epoch": 0.08936908096571962, "grad_norm": 0.18793538212776184, "learning_rate": 0.000999682006954102, "loss": 1.7804, "step": 2010 }, { "epoch": 0.08945800542439197, "grad_norm": 0.18664175271987915, "learning_rate": 0.0009996807464629632, "loss": 1.7826, "step": 2012 }, { "epoch": 0.08954692988306434, "grad_norm": 0.19432403147220612, "learning_rate": 0.0009996794834793339, "loss": 1.7839, "step": 2014 }, { "epoch": 0.0896358543417367, "grad_norm": 0.18868719041347504, "learning_rate": 0.0009996782180032204, "loss": 1.7919, "step": 2016 }, { "epoch": 0.08972477880040905, "grad_norm": 0.19868771731853485, "learning_rate": 0.0009996769500346292, "loss": 1.7812, "step": 2018 }, { "epoch": 0.0898137032590814, "grad_norm": 0.2011130154132843, "learning_rate": 0.0009996756795735663, "loss": 1.7797, "step": 2020 }, { "epoch": 0.08990262771775377, "grad_norm": 0.19635726511478424, "learning_rate": 0.0009996744066200379, "loss": 1.7816, "step": 2022 }, { "epoch": 0.08999155217642613, "grad_norm": 0.17903649806976318, "learning_rate": 0.000999673131174051, "loss": 1.783, "step": 2024 }, { "epoch": 0.09008047663509848, "grad_norm": 0.18663077056407928, "learning_rate": 0.0009996718532356112, "loss": 1.7857, "step": 2026 }, { "epoch": 0.09016940109377085, "grad_norm": 0.1833386868238449, "learning_rate": 0.0009996705728047256, "loss": 1.7836, "step": 2028 }, { "epoch": 0.0902583255524432, "grad_norm": 0.19123496115207672, "learning_rate": 0.0009996692898814003, "loss": 1.7755, "step": 2030 }, { "epoch": 0.09034725001111556, "grad_norm": 0.1912190318107605, "learning_rate": 0.0009996680044656412, "loss": 1.7766, "step": 2032 }, { "epoch": 0.09043617446978791, "grad_norm": 0.18371352553367615, "learning_rate": 0.0009996667165574555, "loss": 1.7774, "step": 2034 }, { "epoch": 0.09052509892846028, "grad_norm": 0.18822483718395233, "learning_rate": 0.0009996654261568492, "loss": 1.7775, "step": 2036 }, { "epoch": 0.09061402338713263, "grad_norm": 0.17826074361801147, "learning_rate": 0.0009996641332638287, "loss": 1.7792, "step": 2038 }, { "epoch": 0.09070294784580499, "grad_norm": 0.1842014193534851, "learning_rate": 0.0009996628378784007, "loss": 1.7845, "step": 2040 }, { "epoch": 0.09079187230447734, "grad_norm": 0.1995820701122284, "learning_rate": 0.0009996615400005716, "loss": 1.7765, "step": 2042 }, { "epoch": 0.09088079676314971, "grad_norm": 0.19667015969753265, "learning_rate": 0.0009996602396303476, "loss": 1.771, "step": 2044 }, { "epoch": 0.09096972122182206, "grad_norm": 0.2025892585515976, "learning_rate": 0.0009996589367677355, "loss": 1.7775, "step": 2046 }, { "epoch": 0.09105864568049442, "grad_norm": 0.1986626386642456, "learning_rate": 0.0009996576314127417, "loss": 1.78, "step": 2048 }, { "epoch": 0.09114757013916677, "grad_norm": 0.20763103663921356, "learning_rate": 0.0009996563235653727, "loss": 1.7747, "step": 2050 }, { "epoch": 0.09123649459783914, "grad_norm": 0.1954943984746933, "learning_rate": 0.000999655013225635, "loss": 1.7739, "step": 2052 }, { "epoch": 0.0913254190565115, "grad_norm": 0.18959374725818634, "learning_rate": 0.000999653700393535, "loss": 1.7786, "step": 2054 }, { "epoch": 0.09141434351518385, "grad_norm": 0.17867428064346313, "learning_rate": 0.0009996523850690794, "loss": 1.7786, "step": 2056 }, { "epoch": 0.0915032679738562, "grad_norm": 0.19500364363193512, "learning_rate": 0.000999651067252275, "loss": 1.7711, "step": 2058 }, { "epoch": 0.09159219243252857, "grad_norm": 0.18299424648284912, "learning_rate": 0.000999649746943128, "loss": 1.7742, "step": 2060 }, { "epoch": 0.09168111689120093, "grad_norm": 0.21155034005641937, "learning_rate": 0.0009996484241416453, "loss": 1.771, "step": 2062 }, { "epoch": 0.09177004134987328, "grad_norm": 0.19016344845294952, "learning_rate": 0.000999647098847833, "loss": 1.7853, "step": 2064 }, { "epoch": 0.09185896580854563, "grad_norm": 0.19537319242954254, "learning_rate": 0.0009996457710616984, "loss": 1.7822, "step": 2066 }, { "epoch": 0.091947890267218, "grad_norm": 0.1982775330543518, "learning_rate": 0.0009996444407832474, "loss": 1.7752, "step": 2068 }, { "epoch": 0.09203681472589036, "grad_norm": 0.1813453882932663, "learning_rate": 0.0009996431080124873, "loss": 1.7731, "step": 2070 }, { "epoch": 0.09212573918456271, "grad_norm": 0.18596340715885162, "learning_rate": 0.0009996417727494242, "loss": 1.7777, "step": 2072 }, { "epoch": 0.09221466364323508, "grad_norm": 0.17686715722084045, "learning_rate": 0.000999640434994065, "loss": 1.7787, "step": 2074 }, { "epoch": 0.09230358810190743, "grad_norm": 0.1908753216266632, "learning_rate": 0.0009996390947464167, "loss": 1.7713, "step": 2076 }, { "epoch": 0.09239251256057979, "grad_norm": 0.1761447936296463, "learning_rate": 0.0009996377520064853, "loss": 1.7727, "step": 2078 }, { "epoch": 0.09248143701925214, "grad_norm": 0.18040457367897034, "learning_rate": 0.000999636406774278, "loss": 1.7783, "step": 2080 }, { "epoch": 0.09257036147792451, "grad_norm": 0.17208784818649292, "learning_rate": 0.0009996350590498015, "loss": 1.7716, "step": 2082 }, { "epoch": 0.09265928593659686, "grad_norm": 0.19492624700069427, "learning_rate": 0.0009996337088330623, "loss": 1.7755, "step": 2084 }, { "epoch": 0.09274821039526922, "grad_norm": 0.1833573430776596, "learning_rate": 0.000999632356124067, "loss": 1.7769, "step": 2086 }, { "epoch": 0.09283713485394157, "grad_norm": 0.1697169542312622, "learning_rate": 0.000999631000922823, "loss": 1.7732, "step": 2088 }, { "epoch": 0.09292605931261394, "grad_norm": 0.17216144502162933, "learning_rate": 0.000999629643229336, "loss": 1.7658, "step": 2090 }, { "epoch": 0.0930149837712863, "grad_norm": 0.17798788845539093, "learning_rate": 0.0009996282830436142, "loss": 1.7753, "step": 2092 }, { "epoch": 0.09310390822995865, "grad_norm": 0.17972996830940247, "learning_rate": 0.000999626920365663, "loss": 1.7783, "step": 2094 }, { "epoch": 0.093192832688631, "grad_norm": 0.1812281608581543, "learning_rate": 0.0009996255551954901, "loss": 1.7725, "step": 2096 }, { "epoch": 0.09328175714730337, "grad_norm": 0.18036170303821564, "learning_rate": 0.0009996241875331016, "loss": 1.77, "step": 2098 }, { "epoch": 0.09337068160597572, "grad_norm": 0.1780519038438797, "learning_rate": 0.000999622817378505, "loss": 1.773, "step": 2100 }, { "epoch": 0.09345960606464808, "grad_norm": 0.17772600054740906, "learning_rate": 0.000999621444731707, "loss": 1.7757, "step": 2102 }, { "epoch": 0.09354853052332043, "grad_norm": 0.1823798418045044, "learning_rate": 0.000999620069592714, "loss": 1.7643, "step": 2104 }, { "epoch": 0.0936374549819928, "grad_norm": 0.18164494633674622, "learning_rate": 0.0009996186919615333, "loss": 1.7707, "step": 2106 }, { "epoch": 0.09372637944066516, "grad_norm": 0.17441585659980774, "learning_rate": 0.0009996173118381718, "loss": 1.7731, "step": 2108 }, { "epoch": 0.09381530389933751, "grad_norm": 0.18183459341526031, "learning_rate": 0.0009996159292226358, "loss": 1.7637, "step": 2110 }, { "epoch": 0.09390422835800986, "grad_norm": 0.17130981385707855, "learning_rate": 0.000999614544114933, "loss": 1.7723, "step": 2112 }, { "epoch": 0.09399315281668223, "grad_norm": 0.18831345438957214, "learning_rate": 0.00099961315651507, "loss": 1.7687, "step": 2114 }, { "epoch": 0.09408207727535459, "grad_norm": 0.17629346251487732, "learning_rate": 0.0009996117664230534, "loss": 1.7694, "step": 2116 }, { "epoch": 0.09417100173402694, "grad_norm": 0.17761678993701935, "learning_rate": 0.0009996103738388901, "loss": 1.7664, "step": 2118 }, { "epoch": 0.09425992619269931, "grad_norm": 0.17654430866241455, "learning_rate": 0.0009996089787625878, "loss": 1.7683, "step": 2120 }, { "epoch": 0.09434885065137166, "grad_norm": 0.16883589327335358, "learning_rate": 0.0009996075811941527, "loss": 1.7703, "step": 2122 }, { "epoch": 0.09443777511004402, "grad_norm": 0.1744341105222702, "learning_rate": 0.0009996061811335922, "loss": 1.7722, "step": 2124 }, { "epoch": 0.09452669956871637, "grad_norm": 0.1795886754989624, "learning_rate": 0.0009996047785809131, "loss": 1.7625, "step": 2126 }, { "epoch": 0.09461562402738874, "grad_norm": 0.18121252954006195, "learning_rate": 0.0009996033735361226, "loss": 1.7678, "step": 2128 }, { "epoch": 0.0947045484860611, "grad_norm": 0.17234009504318237, "learning_rate": 0.0009996019659992274, "loss": 1.7731, "step": 2130 }, { "epoch": 0.09479347294473345, "grad_norm": 0.18222364783287048, "learning_rate": 0.0009996005559702345, "loss": 1.7666, "step": 2132 }, { "epoch": 0.0948823974034058, "grad_norm": 0.18646016716957092, "learning_rate": 0.0009995991434491513, "loss": 1.7708, "step": 2134 }, { "epoch": 0.09497132186207817, "grad_norm": 0.16806988418102264, "learning_rate": 0.0009995977284359846, "loss": 1.7684, "step": 2136 }, { "epoch": 0.09506024632075052, "grad_norm": 0.17961154878139496, "learning_rate": 0.0009995963109307414, "loss": 1.7631, "step": 2138 }, { "epoch": 0.09514917077942288, "grad_norm": 0.18496328592300415, "learning_rate": 0.000999594890933429, "loss": 1.7682, "step": 2140 }, { "epoch": 0.09523809523809523, "grad_norm": 0.17461057007312775, "learning_rate": 0.0009995934684440544, "loss": 1.7697, "step": 2142 }, { "epoch": 0.0953270196967676, "grad_norm": 0.17441897094249725, "learning_rate": 0.0009995920434626244, "loss": 1.7668, "step": 2144 }, { "epoch": 0.09541594415543996, "grad_norm": 0.183127298951149, "learning_rate": 0.0009995906159891465, "loss": 1.7578, "step": 2146 }, { "epoch": 0.09550486861411231, "grad_norm": 0.1828969568014145, "learning_rate": 0.0009995891860236277, "loss": 1.7704, "step": 2148 }, { "epoch": 0.09559379307278466, "grad_norm": 0.17396622896194458, "learning_rate": 0.0009995877535660751, "loss": 1.7636, "step": 2150 }, { "epoch": 0.09568271753145703, "grad_norm": 0.17213039100170135, "learning_rate": 0.000999586318616496, "loss": 1.7667, "step": 2152 }, { "epoch": 0.09577164199012939, "grad_norm": 0.16386523842811584, "learning_rate": 0.0009995848811748973, "loss": 1.7696, "step": 2154 }, { "epoch": 0.09586056644880174, "grad_norm": 0.16317681968212128, "learning_rate": 0.0009995834412412862, "loss": 1.7689, "step": 2156 }, { "epoch": 0.0959494909074741, "grad_norm": 0.16907323896884918, "learning_rate": 0.00099958199881567, "loss": 1.7692, "step": 2158 }, { "epoch": 0.09603841536614646, "grad_norm": 0.1724671572446823, "learning_rate": 0.000999580553898056, "loss": 1.7614, "step": 2160 }, { "epoch": 0.09612733982481882, "grad_norm": 0.17434446513652802, "learning_rate": 0.000999579106488451, "loss": 1.762, "step": 2162 }, { "epoch": 0.09621626428349117, "grad_norm": 0.17296640574932098, "learning_rate": 0.0009995776565868628, "loss": 1.7674, "step": 2164 }, { "epoch": 0.09630518874216354, "grad_norm": 0.1922890841960907, "learning_rate": 0.0009995762041932982, "loss": 1.7709, "step": 2166 }, { "epoch": 0.0963941132008359, "grad_norm": 0.15948012471199036, "learning_rate": 0.0009995747493077645, "loss": 1.7622, "step": 2168 }, { "epoch": 0.09648303765950825, "grad_norm": 0.1770942509174347, "learning_rate": 0.0009995732919302691, "loss": 1.7594, "step": 2170 }, { "epoch": 0.0965719621181806, "grad_norm": 0.17478297650814056, "learning_rate": 0.0009995718320608192, "loss": 1.76, "step": 2172 }, { "epoch": 0.09666088657685297, "grad_norm": 0.1589890867471695, "learning_rate": 0.000999570369699422, "loss": 1.7549, "step": 2174 }, { "epoch": 0.09674981103552532, "grad_norm": 0.17255498468875885, "learning_rate": 0.000999568904846085, "loss": 1.7638, "step": 2176 }, { "epoch": 0.09683873549419768, "grad_norm": 0.17751558125019073, "learning_rate": 0.0009995674375008153, "loss": 1.7597, "step": 2178 }, { "epoch": 0.09692765995287003, "grad_norm": 0.16570937633514404, "learning_rate": 0.0009995659676636205, "loss": 1.7608, "step": 2180 }, { "epoch": 0.0970165844115424, "grad_norm": 0.16247688233852386, "learning_rate": 0.0009995644953345072, "loss": 1.7596, "step": 2182 }, { "epoch": 0.09710550887021475, "grad_norm": 0.16889050602912903, "learning_rate": 0.000999563020513484, "loss": 1.7627, "step": 2184 }, { "epoch": 0.09719443332888711, "grad_norm": 0.16616292297840118, "learning_rate": 0.000999561543200557, "loss": 1.7563, "step": 2186 }, { "epoch": 0.09728335778755946, "grad_norm": 0.18441399931907654, "learning_rate": 0.0009995600633957342, "loss": 1.7615, "step": 2188 }, { "epoch": 0.09737228224623183, "grad_norm": 0.17722539603710175, "learning_rate": 0.0009995585810990229, "loss": 1.7568, "step": 2190 }, { "epoch": 0.09746120670490419, "grad_norm": 0.17260760068893433, "learning_rate": 0.0009995570963104304, "loss": 1.758, "step": 2192 }, { "epoch": 0.09755013116357654, "grad_norm": 0.17014533281326294, "learning_rate": 0.0009995556090299643, "loss": 1.7556, "step": 2194 }, { "epoch": 0.0976390556222489, "grad_norm": 0.1753702163696289, "learning_rate": 0.000999554119257632, "loss": 1.7636, "step": 2196 }, { "epoch": 0.09772798008092126, "grad_norm": 0.1720827966928482, "learning_rate": 0.0009995526269934406, "loss": 1.7559, "step": 2198 }, { "epoch": 0.09781690453959362, "grad_norm": 0.16954128444194794, "learning_rate": 0.000999551132237398, "loss": 1.7599, "step": 2200 }, { "epoch": 0.09790582899826597, "grad_norm": 0.1615387201309204, "learning_rate": 0.0009995496349895112, "loss": 1.7527, "step": 2202 }, { "epoch": 0.09799475345693832, "grad_norm": 0.1675902158021927, "learning_rate": 0.000999548135249788, "loss": 1.7502, "step": 2204 }, { "epoch": 0.09808367791561069, "grad_norm": 0.15957540273666382, "learning_rate": 0.0009995466330182357, "loss": 1.7642, "step": 2206 }, { "epoch": 0.09817260237428305, "grad_norm": 0.1576092690229416, "learning_rate": 0.000999545128294862, "loss": 1.7599, "step": 2208 }, { "epoch": 0.0982615268329554, "grad_norm": 0.17128455638885498, "learning_rate": 0.0009995436210796743, "loss": 1.754, "step": 2210 }, { "epoch": 0.09835045129162777, "grad_norm": 0.16489969193935394, "learning_rate": 0.00099954211137268, "loss": 1.754, "step": 2212 }, { "epoch": 0.09843937575030012, "grad_norm": 0.16618114709854126, "learning_rate": 0.0009995405991738869, "loss": 1.7533, "step": 2214 }, { "epoch": 0.09852830020897248, "grad_norm": 0.17597708106040955, "learning_rate": 0.000999539084483302, "loss": 1.7581, "step": 2216 }, { "epoch": 0.09861722466764483, "grad_norm": 0.1678888350725174, "learning_rate": 0.0009995375673009337, "loss": 1.7543, "step": 2218 }, { "epoch": 0.0987061491263172, "grad_norm": 0.1661226749420166, "learning_rate": 0.0009995360476267887, "loss": 1.753, "step": 2220 }, { "epoch": 0.09879507358498955, "grad_norm": 0.17764368653297424, "learning_rate": 0.0009995345254608752, "loss": 1.7608, "step": 2222 }, { "epoch": 0.09888399804366191, "grad_norm": 0.16960063576698303, "learning_rate": 0.0009995330008032005, "loss": 1.7567, "step": 2224 }, { "epoch": 0.09897292250233426, "grad_norm": 0.16657236218452454, "learning_rate": 0.0009995314736537724, "loss": 1.7529, "step": 2226 }, { "epoch": 0.09906184696100663, "grad_norm": 0.17307406663894653, "learning_rate": 0.0009995299440125982, "loss": 1.7584, "step": 2228 }, { "epoch": 0.09915077141967898, "grad_norm": 0.18175233900547028, "learning_rate": 0.0009995284118796857, "loss": 1.7607, "step": 2230 }, { "epoch": 0.09923969587835134, "grad_norm": 0.18064169585704803, "learning_rate": 0.0009995268772550428, "loss": 1.7559, "step": 2232 }, { "epoch": 0.0993286203370237, "grad_norm": 0.18270429968833923, "learning_rate": 0.0009995253401386768, "loss": 1.7546, "step": 2234 }, { "epoch": 0.09941754479569606, "grad_norm": 0.17944471538066864, "learning_rate": 0.0009995238005305954, "loss": 1.7585, "step": 2236 }, { "epoch": 0.09950646925436842, "grad_norm": 0.1786056011915207, "learning_rate": 0.0009995222584308066, "loss": 1.7606, "step": 2238 }, { "epoch": 0.09959539371304077, "grad_norm": 0.18306146562099457, "learning_rate": 0.0009995207138393176, "loss": 1.745, "step": 2240 }, { "epoch": 0.09968431817171312, "grad_norm": 0.17886687815189362, "learning_rate": 0.0009995191667561364, "loss": 1.7594, "step": 2242 }, { "epoch": 0.09977324263038549, "grad_norm": 0.17537564039230347, "learning_rate": 0.0009995176171812708, "loss": 1.7536, "step": 2244 }, { "epoch": 0.09986216708905785, "grad_norm": 0.18771038949489594, "learning_rate": 0.0009995160651147283, "loss": 1.7574, "step": 2246 }, { "epoch": 0.0999510915477302, "grad_norm": 0.1864713877439499, "learning_rate": 0.0009995145105565167, "loss": 1.7607, "step": 2248 }, { "epoch": 0.10004001600640255, "grad_norm": 0.1830584704875946, "learning_rate": 0.000999512953506644, "loss": 1.7579, "step": 2250 }, { "epoch": 0.10012894046507492, "grad_norm": 0.16705258190631866, "learning_rate": 0.0009995113939651177, "loss": 1.7557, "step": 2252 }, { "epoch": 0.10021786492374728, "grad_norm": 0.17379559576511383, "learning_rate": 0.0009995098319319456, "loss": 1.755, "step": 2254 }, { "epoch": 0.10030678938241963, "grad_norm": 0.17130069434642792, "learning_rate": 0.0009995082674071356, "loss": 1.7519, "step": 2256 }, { "epoch": 0.100395713841092, "grad_norm": 0.16507191956043243, "learning_rate": 0.0009995067003906954, "loss": 1.7497, "step": 2258 }, { "epoch": 0.10048463829976435, "grad_norm": 0.17065347731113434, "learning_rate": 0.0009995051308826328, "loss": 1.7598, "step": 2260 }, { "epoch": 0.10057356275843671, "grad_norm": 0.15946049988269806, "learning_rate": 0.0009995035588829556, "loss": 1.7528, "step": 2262 }, { "epoch": 0.10066248721710906, "grad_norm": 0.16204677522182465, "learning_rate": 0.0009995019843916722, "loss": 1.75, "step": 2264 }, { "epoch": 0.10075141167578143, "grad_norm": 0.17083314061164856, "learning_rate": 0.0009995004074087896, "loss": 1.7561, "step": 2266 }, { "epoch": 0.10084033613445378, "grad_norm": 0.16661955416202545, "learning_rate": 0.0009994988279343163, "loss": 1.7493, "step": 2268 }, { "epoch": 0.10092926059312614, "grad_norm": 0.17094558477401733, "learning_rate": 0.0009994972459682597, "loss": 1.7547, "step": 2270 }, { "epoch": 0.10101818505179849, "grad_norm": 0.1688528060913086, "learning_rate": 0.000999495661510628, "loss": 1.753, "step": 2272 }, { "epoch": 0.10110710951047086, "grad_norm": 0.16342481970787048, "learning_rate": 0.000999494074561429, "loss": 1.7535, "step": 2274 }, { "epoch": 0.10119603396914321, "grad_norm": 0.16310431063175201, "learning_rate": 0.0009994924851206707, "loss": 1.7499, "step": 2276 }, { "epoch": 0.10128495842781557, "grad_norm": 0.17421621084213257, "learning_rate": 0.000999490893188361, "loss": 1.752, "step": 2278 }, { "epoch": 0.10137388288648792, "grad_norm": 0.16932517290115356, "learning_rate": 0.0009994892987645076, "loss": 1.7496, "step": 2280 }, { "epoch": 0.10146280734516029, "grad_norm": 0.17567944526672363, "learning_rate": 0.000999487701849119, "loss": 1.7521, "step": 2282 }, { "epoch": 0.10155173180383265, "grad_norm": 0.1631414145231247, "learning_rate": 0.0009994861024422027, "loss": 1.7529, "step": 2284 }, { "epoch": 0.101640656262505, "grad_norm": 0.16340436041355133, "learning_rate": 0.0009994845005437667, "loss": 1.7517, "step": 2286 }, { "epoch": 0.10172958072117735, "grad_norm": 0.1647539883852005, "learning_rate": 0.0009994828961538192, "loss": 1.7488, "step": 2288 }, { "epoch": 0.10181850517984972, "grad_norm": 0.16869130730628967, "learning_rate": 0.0009994812892723682, "loss": 1.7569, "step": 2290 }, { "epoch": 0.10190742963852208, "grad_norm": 0.1707681268453598, "learning_rate": 0.0009994796798994214, "loss": 1.7453, "step": 2292 }, { "epoch": 0.10199635409719443, "grad_norm": 0.17477324604988098, "learning_rate": 0.0009994780680349872, "loss": 1.753, "step": 2294 }, { "epoch": 0.10208527855586678, "grad_norm": 0.16798841953277588, "learning_rate": 0.0009994764536790733, "loss": 1.7492, "step": 2296 }, { "epoch": 0.10217420301453915, "grad_norm": 0.17107439041137695, "learning_rate": 0.0009994748368316881, "loss": 1.7394, "step": 2298 }, { "epoch": 0.10226312747321151, "grad_norm": 0.17812353372573853, "learning_rate": 0.0009994732174928396, "loss": 1.755, "step": 2300 }, { "epoch": 0.10235205193188386, "grad_norm": 0.16799861192703247, "learning_rate": 0.0009994715956625356, "loss": 1.7451, "step": 2302 }, { "epoch": 0.10244097639055623, "grad_norm": 0.17055779695510864, "learning_rate": 0.0009994699713407845, "loss": 1.748, "step": 2304 }, { "epoch": 0.10252990084922858, "grad_norm": 0.17974479496479034, "learning_rate": 0.0009994683445275943, "loss": 1.7459, "step": 2306 }, { "epoch": 0.10261882530790094, "grad_norm": 0.1801697462797165, "learning_rate": 0.000999466715222973, "loss": 1.7471, "step": 2308 }, { "epoch": 0.10270774976657329, "grad_norm": 0.17756927013397217, "learning_rate": 0.0009994650834269287, "loss": 1.7523, "step": 2310 }, { "epoch": 0.10279667422524566, "grad_norm": 0.173659086227417, "learning_rate": 0.0009994634491394697, "loss": 1.7455, "step": 2312 }, { "epoch": 0.10288559868391801, "grad_norm": 0.16928839683532715, "learning_rate": 0.0009994618123606042, "loss": 1.744, "step": 2314 }, { "epoch": 0.10297452314259037, "grad_norm": 0.1663655787706375, "learning_rate": 0.0009994601730903402, "loss": 1.7498, "step": 2316 }, { "epoch": 0.10306344760126272, "grad_norm": 0.16375510394573212, "learning_rate": 0.000999458531328686, "loss": 1.7437, "step": 2318 }, { "epoch": 0.10315237205993509, "grad_norm": 0.16213606297969818, "learning_rate": 0.0009994568870756498, "loss": 1.745, "step": 2320 }, { "epoch": 0.10324129651860744, "grad_norm": 0.1694078892469406, "learning_rate": 0.0009994552403312397, "loss": 1.7449, "step": 2322 }, { "epoch": 0.1033302209772798, "grad_norm": 0.1763123720884323, "learning_rate": 0.000999453591095464, "loss": 1.7382, "step": 2324 }, { "epoch": 0.10341914543595215, "grad_norm": 0.16301725804805756, "learning_rate": 0.000999451939368331, "loss": 1.7417, "step": 2326 }, { "epoch": 0.10350806989462452, "grad_norm": 0.17242752015590668, "learning_rate": 0.0009994502851498484, "loss": 1.74, "step": 2328 }, { "epoch": 0.10359699435329688, "grad_norm": 0.16804571449756622, "learning_rate": 0.0009994486284400253, "loss": 1.7447, "step": 2330 }, { "epoch": 0.10368591881196923, "grad_norm": 0.1732250601053238, "learning_rate": 0.0009994469692388693, "loss": 1.7471, "step": 2332 }, { "epoch": 0.10377484327064158, "grad_norm": 0.1738041341304779, "learning_rate": 0.000999445307546389, "loss": 1.7442, "step": 2334 }, { "epoch": 0.10386376772931395, "grad_norm": 0.18072937428951263, "learning_rate": 0.0009994436433625926, "loss": 1.7448, "step": 2336 }, { "epoch": 0.1039526921879863, "grad_norm": 0.16701507568359375, "learning_rate": 0.0009994419766874883, "loss": 1.7394, "step": 2338 }, { "epoch": 0.10404161664665866, "grad_norm": 0.17945359647274017, "learning_rate": 0.0009994403075210846, "loss": 1.7454, "step": 2340 }, { "epoch": 0.10413054110533101, "grad_norm": 0.17203912138938904, "learning_rate": 0.0009994386358633898, "loss": 1.7446, "step": 2342 }, { "epoch": 0.10421946556400338, "grad_norm": 0.17877012491226196, "learning_rate": 0.0009994369617144121, "loss": 1.7451, "step": 2344 }, { "epoch": 0.10430839002267574, "grad_norm": 0.18563194572925568, "learning_rate": 0.00099943528507416, "loss": 1.7403, "step": 2346 }, { "epoch": 0.10439731448134809, "grad_norm": 0.1772448718547821, "learning_rate": 0.0009994336059426416, "loss": 1.7503, "step": 2348 }, { "epoch": 0.10448623894002046, "grad_norm": 0.17919184267520905, "learning_rate": 0.0009994319243198657, "loss": 1.7465, "step": 2350 }, { "epoch": 0.10457516339869281, "grad_norm": 0.18327289819717407, "learning_rate": 0.0009994302402058404, "loss": 1.7366, "step": 2352 }, { "epoch": 0.10466408785736517, "grad_norm": 0.17099310457706451, "learning_rate": 0.0009994285536005741, "loss": 1.7488, "step": 2354 }, { "epoch": 0.10475301231603752, "grad_norm": 0.1615380197763443, "learning_rate": 0.0009994268645040754, "loss": 1.7382, "step": 2356 }, { "epoch": 0.10484193677470989, "grad_norm": 0.18831703066825867, "learning_rate": 0.0009994251729163524, "loss": 1.7417, "step": 2358 }, { "epoch": 0.10493086123338224, "grad_norm": 0.16562096774578094, "learning_rate": 0.0009994234788374139, "loss": 1.7382, "step": 2360 }, { "epoch": 0.1050197856920546, "grad_norm": 0.1602797955274582, "learning_rate": 0.0009994217822672682, "loss": 1.7417, "step": 2362 }, { "epoch": 0.10510871015072695, "grad_norm": 0.15765704214572906, "learning_rate": 0.0009994200832059237, "loss": 1.7419, "step": 2364 }, { "epoch": 0.10519763460939932, "grad_norm": 0.165849968791008, "learning_rate": 0.0009994183816533888, "loss": 1.7475, "step": 2366 }, { "epoch": 0.10528655906807168, "grad_norm": 0.17611372470855713, "learning_rate": 0.0009994166776096723, "loss": 1.7364, "step": 2368 }, { "epoch": 0.10537548352674403, "grad_norm": 0.18146196007728577, "learning_rate": 0.0009994149710747823, "loss": 1.738, "step": 2370 }, { "epoch": 0.10546440798541638, "grad_norm": 0.17269714176654816, "learning_rate": 0.0009994132620487278, "loss": 1.7429, "step": 2372 }, { "epoch": 0.10555333244408875, "grad_norm": 0.1751071959733963, "learning_rate": 0.0009994115505315168, "loss": 1.7418, "step": 2374 }, { "epoch": 0.1056422569027611, "grad_norm": 0.1733344942331314, "learning_rate": 0.0009994098365231584, "loss": 1.7327, "step": 2376 }, { "epoch": 0.10573118136143346, "grad_norm": 0.17954912781715393, "learning_rate": 0.0009994081200236605, "loss": 1.741, "step": 2378 }, { "epoch": 0.10582010582010581, "grad_norm": 0.16836385428905487, "learning_rate": 0.000999406401033032, "loss": 1.736, "step": 2380 }, { "epoch": 0.10590903027877818, "grad_norm": 0.181427001953125, "learning_rate": 0.000999404679551282, "loss": 1.7416, "step": 2382 }, { "epoch": 0.10599795473745054, "grad_norm": 0.17160336673259735, "learning_rate": 0.0009994029555784182, "loss": 1.7419, "step": 2384 }, { "epoch": 0.10608687919612289, "grad_norm": 0.1804126501083374, "learning_rate": 0.0009994012291144494, "loss": 1.74, "step": 2386 }, { "epoch": 0.10617580365479524, "grad_norm": 0.1687408834695816, "learning_rate": 0.0009993995001593846, "loss": 1.7425, "step": 2388 }, { "epoch": 0.10626472811346761, "grad_norm": 0.16643251478672028, "learning_rate": 0.000999397768713232, "loss": 1.7386, "step": 2390 }, { "epoch": 0.10635365257213997, "grad_norm": 0.16881844401359558, "learning_rate": 0.0009993960347760005, "loss": 1.7383, "step": 2392 }, { "epoch": 0.10644257703081232, "grad_norm": 0.1696399301290512, "learning_rate": 0.0009993942983476988, "loss": 1.7429, "step": 2394 }, { "epoch": 0.10653150148948469, "grad_norm": 0.1724826991558075, "learning_rate": 0.0009993925594283352, "loss": 1.7405, "step": 2396 }, { "epoch": 0.10662042594815704, "grad_norm": 0.17419011890888214, "learning_rate": 0.000999390818017919, "loss": 1.7365, "step": 2398 }, { "epoch": 0.1067093504068294, "grad_norm": 0.16340835392475128, "learning_rate": 0.0009993890741164582, "loss": 1.734, "step": 2400 }, { "epoch": 0.10679827486550175, "grad_norm": 0.17877715826034546, "learning_rate": 0.000999387327723962, "loss": 1.74, "step": 2402 }, { "epoch": 0.10688719932417412, "grad_norm": 0.17038090527057648, "learning_rate": 0.0009993855788404387, "loss": 1.7377, "step": 2404 }, { "epoch": 0.10697612378284647, "grad_norm": 0.16269196569919586, "learning_rate": 0.0009993838274658972, "loss": 1.737, "step": 2406 }, { "epoch": 0.10706504824151883, "grad_norm": 0.1645708680152893, "learning_rate": 0.0009993820736003466, "loss": 1.7404, "step": 2408 }, { "epoch": 0.10715397270019118, "grad_norm": 0.16457295417785645, "learning_rate": 0.0009993803172437953, "loss": 1.7386, "step": 2410 }, { "epoch": 0.10724289715886355, "grad_norm": 0.1686396598815918, "learning_rate": 0.0009993785583962517, "loss": 1.7322, "step": 2412 }, { "epoch": 0.1073318216175359, "grad_norm": 0.1839965432882309, "learning_rate": 0.0009993767970577251, "loss": 1.7396, "step": 2414 }, { "epoch": 0.10742074607620826, "grad_norm": 0.178563192486763, "learning_rate": 0.0009993750332282245, "loss": 1.7399, "step": 2416 }, { "epoch": 0.10750967053488061, "grad_norm": 0.18056493997573853, "learning_rate": 0.000999373266907758, "loss": 1.7386, "step": 2418 }, { "epoch": 0.10759859499355298, "grad_norm": 0.17721746861934662, "learning_rate": 0.0009993714980963348, "loss": 1.73, "step": 2420 }, { "epoch": 0.10768751945222534, "grad_norm": 0.1596176028251648, "learning_rate": 0.0009993697267939637, "loss": 1.7382, "step": 2422 }, { "epoch": 0.10777644391089769, "grad_norm": 0.17123466730117798, "learning_rate": 0.0009993679530006536, "loss": 1.7313, "step": 2424 }, { "epoch": 0.10786536836957004, "grad_norm": 0.163445383310318, "learning_rate": 0.000999366176716413, "loss": 1.739, "step": 2426 }, { "epoch": 0.10795429282824241, "grad_norm": 0.16375136375427246, "learning_rate": 0.0009993643979412513, "loss": 1.7306, "step": 2428 }, { "epoch": 0.10804321728691477, "grad_norm": 0.16650699079036713, "learning_rate": 0.000999362616675177, "loss": 1.74, "step": 2430 }, { "epoch": 0.10813214174558712, "grad_norm": 0.16717134416103363, "learning_rate": 0.0009993608329181992, "loss": 1.7321, "step": 2432 }, { "epoch": 0.10822106620425948, "grad_norm": 0.1608593463897705, "learning_rate": 0.0009993590466703265, "loss": 1.7389, "step": 2434 }, { "epoch": 0.10830999066293184, "grad_norm": 0.16169093549251556, "learning_rate": 0.000999357257931568, "loss": 1.7329, "step": 2436 }, { "epoch": 0.1083989151216042, "grad_norm": 0.15770640969276428, "learning_rate": 0.0009993554667019327, "loss": 1.7344, "step": 2438 }, { "epoch": 0.10848783958027655, "grad_norm": 0.16067162156105042, "learning_rate": 0.0009993536729814294, "loss": 1.7307, "step": 2440 }, { "epoch": 0.10857676403894892, "grad_norm": 0.16905395686626434, "learning_rate": 0.0009993518767700668, "loss": 1.7335, "step": 2442 }, { "epoch": 0.10866568849762127, "grad_norm": 0.16266003251075745, "learning_rate": 0.0009993500780678547, "loss": 1.7362, "step": 2444 }, { "epoch": 0.10875461295629363, "grad_norm": 0.16899318993091583, "learning_rate": 0.000999348276874801, "loss": 1.7361, "step": 2446 }, { "epoch": 0.10884353741496598, "grad_norm": 0.16284890472888947, "learning_rate": 0.0009993464731909156, "loss": 1.7325, "step": 2448 }, { "epoch": 0.10893246187363835, "grad_norm": 0.16005484759807587, "learning_rate": 0.000999344667016207, "loss": 1.7263, "step": 2450 }, { "epoch": 0.1090213863323107, "grad_norm": 0.16001056134700775, "learning_rate": 0.0009993428583506842, "loss": 1.7337, "step": 2452 }, { "epoch": 0.10911031079098306, "grad_norm": 0.1561092734336853, "learning_rate": 0.0009993410471943564, "loss": 1.7349, "step": 2454 }, { "epoch": 0.10919923524965541, "grad_norm": 0.16243596374988556, "learning_rate": 0.0009993392335472327, "loss": 1.7319, "step": 2456 }, { "epoch": 0.10928815970832778, "grad_norm": 0.15440620481967926, "learning_rate": 0.0009993374174093218, "loss": 1.7362, "step": 2458 }, { "epoch": 0.10937708416700014, "grad_norm": 0.16307926177978516, "learning_rate": 0.000999335598780633, "loss": 1.7422, "step": 2460 }, { "epoch": 0.10946600862567249, "grad_norm": 0.16545718908309937, "learning_rate": 0.0009993337776611755, "loss": 1.7348, "step": 2462 }, { "epoch": 0.10955493308434484, "grad_norm": 0.16467604041099548, "learning_rate": 0.0009993319540509582, "loss": 1.7311, "step": 2464 }, { "epoch": 0.10964385754301721, "grad_norm": 0.15704773366451263, "learning_rate": 0.0009993301279499901, "loss": 1.7317, "step": 2466 }, { "epoch": 0.10973278200168957, "grad_norm": 0.1750647872686386, "learning_rate": 0.0009993282993582804, "loss": 1.7354, "step": 2468 }, { "epoch": 0.10982170646036192, "grad_norm": 0.17140831053256989, "learning_rate": 0.0009993264682758385, "loss": 1.7328, "step": 2470 }, { "epoch": 0.10991063091903427, "grad_norm": 0.18565557897090912, "learning_rate": 0.0009993246347026732, "loss": 1.732, "step": 2472 }, { "epoch": 0.10999955537770664, "grad_norm": 0.16787555813789368, "learning_rate": 0.0009993227986387935, "loss": 1.7332, "step": 2474 }, { "epoch": 0.110088479836379, "grad_norm": 0.16842927038669586, "learning_rate": 0.0009993209600842093, "loss": 1.7336, "step": 2476 }, { "epoch": 0.11017740429505135, "grad_norm": 0.16819126904010773, "learning_rate": 0.0009993191190389287, "loss": 1.7303, "step": 2478 }, { "epoch": 0.1102663287537237, "grad_norm": 0.1662745177745819, "learning_rate": 0.0009993172755029618, "loss": 1.7277, "step": 2480 }, { "epoch": 0.11035525321239607, "grad_norm": 0.15442778170108795, "learning_rate": 0.0009993154294763173, "loss": 1.7296, "step": 2482 }, { "epoch": 0.11044417767106843, "grad_norm": 0.1560961753129959, "learning_rate": 0.0009993135809590048, "loss": 1.7373, "step": 2484 }, { "epoch": 0.11053310212974078, "grad_norm": 0.15413883328437805, "learning_rate": 0.0009993117299510331, "loss": 1.7303, "step": 2486 }, { "epoch": 0.11062202658841315, "grad_norm": 0.148091122508049, "learning_rate": 0.0009993098764524116, "loss": 1.7288, "step": 2488 }, { "epoch": 0.1107109510470855, "grad_norm": 0.1534961462020874, "learning_rate": 0.0009993080204631496, "loss": 1.7328, "step": 2490 }, { "epoch": 0.11079987550575786, "grad_norm": 0.15761974453926086, "learning_rate": 0.0009993061619832562, "loss": 1.7275, "step": 2492 }, { "epoch": 0.11088879996443021, "grad_norm": 0.15099415183067322, "learning_rate": 0.000999304301012741, "loss": 1.7273, "step": 2494 }, { "epoch": 0.11097772442310258, "grad_norm": 0.16000492870807648, "learning_rate": 0.0009993024375516129, "loss": 1.7294, "step": 2496 }, { "epoch": 0.11106664888177493, "grad_norm": 0.1632835417985916, "learning_rate": 0.0009993005715998812, "loss": 1.7367, "step": 2498 }, { "epoch": 0.11115557334044729, "grad_norm": 0.16005057096481323, "learning_rate": 0.0009992987031575556, "loss": 1.7306, "step": 2500 }, { "epoch": 0.11115557334044729, "eval_loss": 1.6908477544784546, "eval_runtime": 12.3532, "eval_samples_per_second": 559.37, "eval_steps_per_second": 69.942, "step": 2500 }, { "epoch": 0.11124449779911964, "grad_norm": 0.16231466829776764, "learning_rate": 0.000999296832224645, "loss": 1.7317, "step": 2502 }, { "epoch": 0.11133342225779201, "grad_norm": 0.1661044955253601, "learning_rate": 0.0009992949588011593, "loss": 1.7268, "step": 2504 }, { "epoch": 0.11142234671646437, "grad_norm": 0.16056203842163086, "learning_rate": 0.0009992930828871072, "loss": 1.7362, "step": 2506 }, { "epoch": 0.11151127117513672, "grad_norm": 0.16574431955814362, "learning_rate": 0.0009992912044824984, "loss": 1.7286, "step": 2508 }, { "epoch": 0.11160019563380907, "grad_norm": 0.15495635569095612, "learning_rate": 0.0009992893235873422, "loss": 1.7297, "step": 2510 }, { "epoch": 0.11168912009248144, "grad_norm": 0.15354973077774048, "learning_rate": 0.0009992874402016479, "loss": 1.7335, "step": 2512 }, { "epoch": 0.1117780445511538, "grad_norm": 0.15808668732643127, "learning_rate": 0.000999285554325425, "loss": 1.7216, "step": 2514 }, { "epoch": 0.11186696900982615, "grad_norm": 0.17658215761184692, "learning_rate": 0.0009992836659586828, "loss": 1.7262, "step": 2516 }, { "epoch": 0.1119558934684985, "grad_norm": 0.15850138664245605, "learning_rate": 0.0009992817751014308, "loss": 1.7308, "step": 2518 }, { "epoch": 0.11204481792717087, "grad_norm": 0.14654642343521118, "learning_rate": 0.0009992798817536786, "loss": 1.7206, "step": 2520 }, { "epoch": 0.11213374238584323, "grad_norm": 0.1507309526205063, "learning_rate": 0.0009992779859154355, "loss": 1.7274, "step": 2522 }, { "epoch": 0.11222266684451558, "grad_norm": 0.15434762835502625, "learning_rate": 0.0009992760875867106, "loss": 1.7224, "step": 2524 }, { "epoch": 0.11231159130318794, "grad_norm": 0.15948152542114258, "learning_rate": 0.0009992741867675139, "loss": 1.7265, "step": 2526 }, { "epoch": 0.1124005157618603, "grad_norm": 0.18099045753479004, "learning_rate": 0.0009992722834578547, "loss": 1.7229, "step": 2528 }, { "epoch": 0.11248944022053266, "grad_norm": 0.17173022031784058, "learning_rate": 0.0009992703776577426, "loss": 1.7246, "step": 2530 }, { "epoch": 0.11257836467920501, "grad_norm": 0.16352210938930511, "learning_rate": 0.0009992684693671868, "loss": 1.7226, "step": 2532 }, { "epoch": 0.11266728913787738, "grad_norm": 0.1548294872045517, "learning_rate": 0.000999266558586197, "loss": 1.728, "step": 2534 }, { "epoch": 0.11275621359654973, "grad_norm": 0.16450268030166626, "learning_rate": 0.0009992646453147829, "loss": 1.7232, "step": 2536 }, { "epoch": 0.11284513805522209, "grad_norm": 0.1629658341407776, "learning_rate": 0.0009992627295529537, "loss": 1.7282, "step": 2538 }, { "epoch": 0.11293406251389444, "grad_norm": 0.145051047205925, "learning_rate": 0.0009992608113007192, "loss": 1.7257, "step": 2540 }, { "epoch": 0.11302298697256681, "grad_norm": 0.1702987104654312, "learning_rate": 0.000999258890558089, "loss": 1.7199, "step": 2542 }, { "epoch": 0.11311191143123916, "grad_norm": 0.1623816341161728, "learning_rate": 0.0009992569673250723, "loss": 1.7191, "step": 2544 }, { "epoch": 0.11320083588991152, "grad_norm": 0.15296286344528198, "learning_rate": 0.000999255041601679, "loss": 1.7202, "step": 2546 }, { "epoch": 0.11328976034858387, "grad_norm": 0.1567697525024414, "learning_rate": 0.000999253113387919, "loss": 1.7256, "step": 2548 }, { "epoch": 0.11337868480725624, "grad_norm": 0.153935045003891, "learning_rate": 0.0009992511826838013, "loss": 1.7252, "step": 2550 }, { "epoch": 0.1134676092659286, "grad_norm": 0.15539416670799255, "learning_rate": 0.000999249249489336, "loss": 1.7257, "step": 2552 }, { "epoch": 0.11355653372460095, "grad_norm": 0.15414440631866455, "learning_rate": 0.0009992473138045327, "loss": 1.7196, "step": 2554 }, { "epoch": 0.1136454581832733, "grad_norm": 0.15253779292106628, "learning_rate": 0.0009992453756294006, "loss": 1.7202, "step": 2556 }, { "epoch": 0.11373438264194567, "grad_norm": 0.1521858274936676, "learning_rate": 0.00099924343496395, "loss": 1.7215, "step": 2558 }, { "epoch": 0.11382330710061803, "grad_norm": 0.15332087874412537, "learning_rate": 0.00099924149180819, "loss": 1.7324, "step": 2560 }, { "epoch": 0.11391223155929038, "grad_norm": 0.16587688028812408, "learning_rate": 0.0009992395461621306, "loss": 1.7251, "step": 2562 }, { "epoch": 0.11400115601796273, "grad_norm": 0.16591493785381317, "learning_rate": 0.0009992375980257817, "loss": 1.7292, "step": 2564 }, { "epoch": 0.1140900804766351, "grad_norm": 0.15625201165676117, "learning_rate": 0.0009992356473991525, "loss": 1.7215, "step": 2566 }, { "epoch": 0.11417900493530746, "grad_norm": 0.15243349969387054, "learning_rate": 0.0009992336942822532, "loss": 1.7175, "step": 2568 }, { "epoch": 0.11426792939397981, "grad_norm": 0.15183353424072266, "learning_rate": 0.0009992317386750933, "loss": 1.7262, "step": 2570 }, { "epoch": 0.11435685385265217, "grad_norm": 0.15364834666252136, "learning_rate": 0.0009992297805776827, "loss": 1.726, "step": 2572 }, { "epoch": 0.11444577831132453, "grad_norm": 0.1533900946378708, "learning_rate": 0.000999227819990031, "loss": 1.7252, "step": 2574 }, { "epoch": 0.11453470276999689, "grad_norm": 0.15749764442443848, "learning_rate": 0.000999225856912148, "loss": 1.7221, "step": 2576 }, { "epoch": 0.11462362722866924, "grad_norm": 0.15611757338047028, "learning_rate": 0.000999223891344044, "loss": 1.7161, "step": 2578 }, { "epoch": 0.11471255168734161, "grad_norm": 0.15750350058078766, "learning_rate": 0.0009992219232857278, "loss": 1.7249, "step": 2580 }, { "epoch": 0.11480147614601396, "grad_norm": 0.16888903081417084, "learning_rate": 0.00099921995273721, "loss": 1.7217, "step": 2582 }, { "epoch": 0.11489040060468632, "grad_norm": 0.1606723666191101, "learning_rate": 0.0009992179796985004, "loss": 1.7213, "step": 2584 }, { "epoch": 0.11497932506335867, "grad_norm": 0.15816037356853485, "learning_rate": 0.0009992160041696083, "loss": 1.7144, "step": 2586 }, { "epoch": 0.11506824952203104, "grad_norm": 0.16229891777038574, "learning_rate": 0.0009992140261505442, "loss": 1.7215, "step": 2588 }, { "epoch": 0.1151571739807034, "grad_norm": 0.16987872123718262, "learning_rate": 0.0009992120456413175, "loss": 1.7208, "step": 2590 }, { "epoch": 0.11524609843937575, "grad_norm": 0.17010580003261566, "learning_rate": 0.0009992100626419382, "loss": 1.7199, "step": 2592 }, { "epoch": 0.1153350228980481, "grad_norm": 0.1615486890077591, "learning_rate": 0.0009992080771524165, "loss": 1.7174, "step": 2594 }, { "epoch": 0.11542394735672047, "grad_norm": 0.15959839522838593, "learning_rate": 0.000999206089172762, "loss": 1.7177, "step": 2596 }, { "epoch": 0.11551287181539283, "grad_norm": 0.1626252979040146, "learning_rate": 0.0009992040987029844, "loss": 1.7155, "step": 2598 }, { "epoch": 0.11560179627406518, "grad_norm": 0.15380766987800598, "learning_rate": 0.000999202105743094, "loss": 1.7182, "step": 2600 }, { "epoch": 0.11569072073273753, "grad_norm": 0.15339282155036926, "learning_rate": 0.0009992001102931008, "loss": 1.7212, "step": 2602 }, { "epoch": 0.1157796451914099, "grad_norm": 0.15971560776233673, "learning_rate": 0.0009991981123530144, "loss": 1.7232, "step": 2604 }, { "epoch": 0.11586856965008226, "grad_norm": 0.1581919938325882, "learning_rate": 0.000999196111922845, "loss": 1.7221, "step": 2606 }, { "epoch": 0.11595749410875461, "grad_norm": 0.1494348645210266, "learning_rate": 0.0009991941090026026, "loss": 1.7173, "step": 2608 }, { "epoch": 0.11604641856742696, "grad_norm": 0.14686119556427002, "learning_rate": 0.0009991921035922972, "loss": 1.72, "step": 2610 }, { "epoch": 0.11613534302609933, "grad_norm": 0.15541252493858337, "learning_rate": 0.0009991900956919384, "loss": 1.7245, "step": 2612 }, { "epoch": 0.11622426748477169, "grad_norm": 0.15264230966567993, "learning_rate": 0.000999188085301537, "loss": 1.7158, "step": 2614 }, { "epoch": 0.11631319194344404, "grad_norm": 0.1619824767112732, "learning_rate": 0.000999186072421102, "loss": 1.7179, "step": 2616 }, { "epoch": 0.1164021164021164, "grad_norm": 0.16652943193912506, "learning_rate": 0.0009991840570506443, "loss": 1.7214, "step": 2618 }, { "epoch": 0.11649104086078876, "grad_norm": 0.1658804565668106, "learning_rate": 0.0009991820391901738, "loss": 1.721, "step": 2620 }, { "epoch": 0.11657996531946112, "grad_norm": 0.1601102203130722, "learning_rate": 0.0009991800188397003, "loss": 1.7252, "step": 2622 }, { "epoch": 0.11666888977813347, "grad_norm": 0.147894486784935, "learning_rate": 0.000999177995999234, "loss": 1.7161, "step": 2624 }, { "epoch": 0.11675781423680584, "grad_norm": 0.1540108621120453, "learning_rate": 0.000999175970668785, "loss": 1.7226, "step": 2626 }, { "epoch": 0.1168467386954782, "grad_norm": 0.15112482011318207, "learning_rate": 0.0009991739428483635, "loss": 1.7159, "step": 2628 }, { "epoch": 0.11693566315415055, "grad_norm": 0.15371938049793243, "learning_rate": 0.0009991719125379795, "loss": 1.719, "step": 2630 }, { "epoch": 0.1170245876128229, "grad_norm": 0.15657579898834229, "learning_rate": 0.000999169879737643, "loss": 1.719, "step": 2632 }, { "epoch": 0.11711351207149527, "grad_norm": 0.15088775753974915, "learning_rate": 0.0009991678444473643, "loss": 1.71, "step": 2634 }, { "epoch": 0.11720243653016763, "grad_norm": 0.15197664499282837, "learning_rate": 0.0009991658066671536, "loss": 1.7177, "step": 2636 }, { "epoch": 0.11729136098883998, "grad_norm": 0.15489469468593597, "learning_rate": 0.000999163766397021, "loss": 1.7077, "step": 2638 }, { "epoch": 0.11738028544751233, "grad_norm": 0.14296165108680725, "learning_rate": 0.0009991617236369767, "loss": 1.7175, "step": 2640 }, { "epoch": 0.1174692099061847, "grad_norm": 0.14150473475456238, "learning_rate": 0.0009991596783870307, "loss": 1.7163, "step": 2642 }, { "epoch": 0.11755813436485706, "grad_norm": 0.1558849960565567, "learning_rate": 0.0009991576306471936, "loss": 1.7169, "step": 2644 }, { "epoch": 0.11764705882352941, "grad_norm": 0.16008678078651428, "learning_rate": 0.0009991555804174752, "loss": 1.7156, "step": 2646 }, { "epoch": 0.11773598328220176, "grad_norm": 0.1638006567955017, "learning_rate": 0.000999153527697886, "loss": 1.7208, "step": 2648 }, { "epoch": 0.11782490774087413, "grad_norm": 0.15358804166316986, "learning_rate": 0.0009991514724884362, "loss": 1.7161, "step": 2650 }, { "epoch": 0.11791383219954649, "grad_norm": 0.14830666780471802, "learning_rate": 0.000999149414789136, "loss": 1.7219, "step": 2652 }, { "epoch": 0.11800275665821884, "grad_norm": 0.15406952798366547, "learning_rate": 0.0009991473545999956, "loss": 1.71, "step": 2654 }, { "epoch": 0.1180916811168912, "grad_norm": 0.15282905101776123, "learning_rate": 0.0009991452919210252, "loss": 1.7205, "step": 2656 }, { "epoch": 0.11818060557556356, "grad_norm": 0.15068869292736053, "learning_rate": 0.0009991432267522353, "loss": 1.7166, "step": 2658 }, { "epoch": 0.11826953003423592, "grad_norm": 0.14806392788887024, "learning_rate": 0.0009991411590936363, "loss": 1.7165, "step": 2660 }, { "epoch": 0.11835845449290827, "grad_norm": 0.15650790929794312, "learning_rate": 0.0009991390889452383, "loss": 1.7121, "step": 2662 }, { "epoch": 0.11844737895158063, "grad_norm": 0.16825319826602936, "learning_rate": 0.0009991370163070516, "loss": 1.7079, "step": 2664 }, { "epoch": 0.118536303410253, "grad_norm": 0.15932542085647583, "learning_rate": 0.0009991349411790868, "loss": 1.7133, "step": 2666 }, { "epoch": 0.11862522786892535, "grad_norm": 0.15582598745822906, "learning_rate": 0.0009991328635613537, "loss": 1.7171, "step": 2668 }, { "epoch": 0.1187141523275977, "grad_norm": 0.1434444785118103, "learning_rate": 0.0009991307834538632, "loss": 1.7143, "step": 2670 }, { "epoch": 0.11880307678627007, "grad_norm": 0.1549002230167389, "learning_rate": 0.0009991287008566255, "loss": 1.7218, "step": 2672 }, { "epoch": 0.11889200124494242, "grad_norm": 0.15441077947616577, "learning_rate": 0.0009991266157696511, "loss": 1.7141, "step": 2674 }, { "epoch": 0.11898092570361478, "grad_norm": 0.1501181274652481, "learning_rate": 0.0009991245281929502, "loss": 1.7079, "step": 2676 }, { "epoch": 0.11906985016228713, "grad_norm": 0.1563824862241745, "learning_rate": 0.0009991224381265332, "loss": 1.7179, "step": 2678 }, { "epoch": 0.1191587746209595, "grad_norm": 0.15084655582904816, "learning_rate": 0.0009991203455704107, "loss": 1.7089, "step": 2680 }, { "epoch": 0.11924769907963186, "grad_norm": 0.15098781883716583, "learning_rate": 0.000999118250524593, "loss": 1.7131, "step": 2682 }, { "epoch": 0.11933662353830421, "grad_norm": 0.1477523148059845, "learning_rate": 0.0009991161529890908, "loss": 1.712, "step": 2684 }, { "epoch": 0.11942554799697656, "grad_norm": 0.15165407955646515, "learning_rate": 0.0009991140529639142, "loss": 1.7142, "step": 2686 }, { "epoch": 0.11951447245564893, "grad_norm": 0.14917202293872833, "learning_rate": 0.000999111950449074, "loss": 1.7139, "step": 2688 }, { "epoch": 0.11960339691432129, "grad_norm": 0.15467822551727295, "learning_rate": 0.0009991098454445806, "loss": 1.7058, "step": 2690 }, { "epoch": 0.11969232137299364, "grad_norm": 0.1571405827999115, "learning_rate": 0.0009991077379504442, "loss": 1.72, "step": 2692 }, { "epoch": 0.119781245831666, "grad_norm": 0.15948142111301422, "learning_rate": 0.0009991056279666758, "loss": 1.7136, "step": 2694 }, { "epoch": 0.11987017029033836, "grad_norm": 0.15302774310112, "learning_rate": 0.0009991035154932855, "loss": 1.7084, "step": 2696 }, { "epoch": 0.11995909474901072, "grad_norm": 0.14382019639015198, "learning_rate": 0.0009991014005302841, "loss": 1.7078, "step": 2698 }, { "epoch": 0.12004801920768307, "grad_norm": 0.14728422462940216, "learning_rate": 0.000999099283077682, "loss": 1.7078, "step": 2700 }, { "epoch": 0.12013694366635543, "grad_norm": 0.1458166539669037, "learning_rate": 0.00099909716313549, "loss": 1.714, "step": 2702 }, { "epoch": 0.1202258681250278, "grad_norm": 0.15718547999858856, "learning_rate": 0.0009990950407037184, "loss": 1.7146, "step": 2704 }, { "epoch": 0.12031479258370015, "grad_norm": 0.1658591479063034, "learning_rate": 0.000999092915782378, "loss": 1.7134, "step": 2706 }, { "epoch": 0.1204037170423725, "grad_norm": 0.1692623794078827, "learning_rate": 0.000999090788371479, "loss": 1.7091, "step": 2708 }, { "epoch": 0.12049264150104486, "grad_norm": 0.16184183955192566, "learning_rate": 0.0009990886584710324, "loss": 1.7098, "step": 2710 }, { "epoch": 0.12058156595971722, "grad_norm": 0.14483095705509186, "learning_rate": 0.0009990865260810487, "loss": 1.7069, "step": 2712 }, { "epoch": 0.12067049041838958, "grad_norm": 0.15486344695091248, "learning_rate": 0.0009990843912015387, "loss": 1.7116, "step": 2714 }, { "epoch": 0.12075941487706193, "grad_norm": 0.14510615170001984, "learning_rate": 0.000999082253832513, "loss": 1.7196, "step": 2716 }, { "epoch": 0.1208483393357343, "grad_norm": 0.15198862552642822, "learning_rate": 0.000999080113973982, "loss": 1.7065, "step": 2718 }, { "epoch": 0.12093726379440665, "grad_norm": 0.14839208126068115, "learning_rate": 0.0009990779716259567, "loss": 1.7075, "step": 2720 }, { "epoch": 0.12102618825307901, "grad_norm": 0.14327092468738556, "learning_rate": 0.0009990758267884475, "loss": 1.7133, "step": 2722 }, { "epoch": 0.12111511271175136, "grad_norm": 0.1524917483329773, "learning_rate": 0.0009990736794614652, "loss": 1.7002, "step": 2724 }, { "epoch": 0.12120403717042373, "grad_norm": 0.14945504069328308, "learning_rate": 0.0009990715296450206, "loss": 1.714, "step": 2726 }, { "epoch": 0.12129296162909609, "grad_norm": 0.15353934466838837, "learning_rate": 0.0009990693773391244, "loss": 1.7118, "step": 2728 }, { "epoch": 0.12138188608776844, "grad_norm": 0.14560256898403168, "learning_rate": 0.0009990672225437872, "loss": 1.7053, "step": 2730 }, { "epoch": 0.1214708105464408, "grad_norm": 0.15236537158489227, "learning_rate": 0.00099906506525902, "loss": 1.71, "step": 2732 }, { "epoch": 0.12155973500511316, "grad_norm": 0.15007594227790833, "learning_rate": 0.0009990629054848332, "loss": 1.7085, "step": 2734 }, { "epoch": 0.12164865946378552, "grad_norm": 0.1456535905599594, "learning_rate": 0.000999060743221238, "loss": 1.7157, "step": 2736 }, { "epoch": 0.12173758392245787, "grad_norm": 0.15266625583171844, "learning_rate": 0.0009990585784682448, "loss": 1.711, "step": 2738 }, { "epoch": 0.12182650838113022, "grad_norm": 0.16604028642177582, "learning_rate": 0.000999056411225865, "loss": 1.7128, "step": 2740 }, { "epoch": 0.12191543283980259, "grad_norm": 0.15921242535114288, "learning_rate": 0.0009990542414941084, "loss": 1.7046, "step": 2742 }, { "epoch": 0.12200435729847495, "grad_norm": 0.1624920517206192, "learning_rate": 0.0009990520692729867, "loss": 1.7087, "step": 2744 }, { "epoch": 0.1220932817571473, "grad_norm": 0.15475033223628998, "learning_rate": 0.0009990498945625103, "loss": 1.7067, "step": 2746 }, { "epoch": 0.12218220621581966, "grad_norm": 0.1475726068019867, "learning_rate": 0.0009990477173626904, "loss": 1.7074, "step": 2748 }, { "epoch": 0.12227113067449202, "grad_norm": 0.15255337953567505, "learning_rate": 0.0009990455376735375, "loss": 1.7045, "step": 2750 }, { "epoch": 0.12236005513316438, "grad_norm": 0.14628992974758148, "learning_rate": 0.0009990433554950627, "loss": 1.7138, "step": 2752 }, { "epoch": 0.12244897959183673, "grad_norm": 0.15724748373031616, "learning_rate": 0.0009990411708272768, "loss": 1.714, "step": 2754 }, { "epoch": 0.12253790405050909, "grad_norm": 0.15618425607681274, "learning_rate": 0.0009990389836701907, "loss": 1.6981, "step": 2756 }, { "epoch": 0.12262682850918145, "grad_norm": 0.15823392570018768, "learning_rate": 0.0009990367940238153, "loss": 1.7111, "step": 2758 }, { "epoch": 0.12271575296785381, "grad_norm": 0.1651688665151596, "learning_rate": 0.0009990346018881615, "loss": 1.7038, "step": 2760 }, { "epoch": 0.12280467742652616, "grad_norm": 0.1569395363330841, "learning_rate": 0.0009990324072632402, "loss": 1.7033, "step": 2762 }, { "epoch": 0.12289360188519853, "grad_norm": 0.15873000025749207, "learning_rate": 0.0009990302101490626, "loss": 1.7047, "step": 2764 }, { "epoch": 0.12298252634387088, "grad_norm": 0.1520274430513382, "learning_rate": 0.0009990280105456393, "loss": 1.6992, "step": 2766 }, { "epoch": 0.12307145080254324, "grad_norm": 0.14999651908874512, "learning_rate": 0.0009990258084529816, "loss": 1.7109, "step": 2768 }, { "epoch": 0.1231603752612156, "grad_norm": 0.16025303304195404, "learning_rate": 0.0009990236038711002, "loss": 1.7068, "step": 2770 }, { "epoch": 0.12324929971988796, "grad_norm": 0.15831394493579865, "learning_rate": 0.0009990213968000064, "loss": 1.703, "step": 2772 }, { "epoch": 0.12333822417856032, "grad_norm": 0.16204720735549927, "learning_rate": 0.0009990191872397108, "loss": 1.7041, "step": 2774 }, { "epoch": 0.12342714863723267, "grad_norm": 0.15055668354034424, "learning_rate": 0.000999016975190225, "loss": 1.6996, "step": 2776 }, { "epoch": 0.12351607309590502, "grad_norm": 0.15014596283435822, "learning_rate": 0.0009990147606515594, "loss": 1.7061, "step": 2778 }, { "epoch": 0.12360499755457739, "grad_norm": 0.15276427567005157, "learning_rate": 0.0009990125436237253, "loss": 1.7085, "step": 2780 }, { "epoch": 0.12369392201324975, "grad_norm": 0.14822447299957275, "learning_rate": 0.000999010324106734, "loss": 1.7116, "step": 2782 }, { "epoch": 0.1237828464719221, "grad_norm": 0.15309688448905945, "learning_rate": 0.0009990081021005964, "loss": 1.7078, "step": 2784 }, { "epoch": 0.12387177093059445, "grad_norm": 0.15499570965766907, "learning_rate": 0.0009990058776053233, "loss": 1.7011, "step": 2786 }, { "epoch": 0.12396069538926682, "grad_norm": 0.15672209858894348, "learning_rate": 0.0009990036506209263, "loss": 1.706, "step": 2788 }, { "epoch": 0.12404961984793918, "grad_norm": 0.15120433270931244, "learning_rate": 0.000999001421147416, "loss": 1.7017, "step": 2790 }, { "epoch": 0.12413854430661153, "grad_norm": 0.14683088660240173, "learning_rate": 0.000998999189184804, "loss": 1.7024, "step": 2792 }, { "epoch": 0.12422746876528389, "grad_norm": 0.15255948901176453, "learning_rate": 0.0009989969547331012, "loss": 1.7104, "step": 2794 }, { "epoch": 0.12431639322395625, "grad_norm": 0.15793496370315552, "learning_rate": 0.0009989947177923189, "loss": 1.707, "step": 2796 }, { "epoch": 0.12440531768262861, "grad_norm": 0.15464596450328827, "learning_rate": 0.0009989924783624677, "loss": 1.7053, "step": 2798 }, { "epoch": 0.12449424214130096, "grad_norm": 0.15060003101825714, "learning_rate": 0.0009989902364435596, "loss": 1.7017, "step": 2800 }, { "epoch": 0.12458316659997332, "grad_norm": 0.1549662947654724, "learning_rate": 0.0009989879920356051, "loss": 1.6983, "step": 2802 }, { "epoch": 0.12467209105864568, "grad_norm": 0.1606224775314331, "learning_rate": 0.000998985745138616, "loss": 1.7036, "step": 2804 }, { "epoch": 0.12476101551731804, "grad_norm": 0.14715637266635895, "learning_rate": 0.0009989834957526028, "loss": 1.7038, "step": 2806 }, { "epoch": 0.12484993997599039, "grad_norm": 0.16040703654289246, "learning_rate": 0.0009989812438775771, "loss": 1.7035, "step": 2808 }, { "epoch": 0.12493886443466276, "grad_norm": 0.14962682127952576, "learning_rate": 0.0009989789895135503, "loss": 1.6948, "step": 2810 }, { "epoch": 0.1250277888933351, "grad_norm": 0.14733915030956268, "learning_rate": 0.0009989767326605335, "loss": 1.6969, "step": 2812 }, { "epoch": 0.12511671335200747, "grad_norm": 0.15031510591506958, "learning_rate": 0.000998974473318538, "loss": 1.7009, "step": 2814 }, { "epoch": 0.12520563781067984, "grad_norm": 0.14500756561756134, "learning_rate": 0.0009989722114875747, "loss": 1.6987, "step": 2816 }, { "epoch": 0.12529456226935218, "grad_norm": 0.14894454181194305, "learning_rate": 0.0009989699471676554, "loss": 1.6968, "step": 2818 }, { "epoch": 0.12538348672802455, "grad_norm": 0.14639148116111755, "learning_rate": 0.0009989676803587914, "loss": 1.699, "step": 2820 }, { "epoch": 0.1254724111866969, "grad_norm": 0.1445195972919464, "learning_rate": 0.0009989654110609933, "loss": 1.7026, "step": 2822 }, { "epoch": 0.12556133564536925, "grad_norm": 0.1507134586572647, "learning_rate": 0.0009989631392742732, "loss": 1.7009, "step": 2824 }, { "epoch": 0.12565026010404162, "grad_norm": 0.16465866565704346, "learning_rate": 0.000998960864998642, "loss": 1.7034, "step": 2826 }, { "epoch": 0.12573918456271396, "grad_norm": 0.15758581459522247, "learning_rate": 0.0009989585882341113, "loss": 1.7026, "step": 2828 }, { "epoch": 0.12582810902138633, "grad_norm": 0.15651194751262665, "learning_rate": 0.0009989563089806924, "loss": 1.7085, "step": 2830 }, { "epoch": 0.1259170334800587, "grad_norm": 0.16078348457813263, "learning_rate": 0.0009989540272383962, "loss": 1.6964, "step": 2832 }, { "epoch": 0.12600595793873104, "grad_norm": 0.16136224567890167, "learning_rate": 0.000998951743007235, "loss": 1.6983, "step": 2834 }, { "epoch": 0.1260948823974034, "grad_norm": 0.1584000438451767, "learning_rate": 0.0009989494562872194, "loss": 1.6981, "step": 2836 }, { "epoch": 0.12618380685607578, "grad_norm": 0.15041996538639069, "learning_rate": 0.0009989471670783612, "loss": 1.7036, "step": 2838 }, { "epoch": 0.12627273131474812, "grad_norm": 0.14316673576831818, "learning_rate": 0.0009989448753806717, "loss": 1.7013, "step": 2840 }, { "epoch": 0.12636165577342048, "grad_norm": 0.1562669724225998, "learning_rate": 0.0009989425811941623, "loss": 1.7016, "step": 2842 }, { "epoch": 0.12645058023209282, "grad_norm": 0.15850582718849182, "learning_rate": 0.0009989402845188445, "loss": 1.6942, "step": 2844 }, { "epoch": 0.1265395046907652, "grad_norm": 0.14375914633274078, "learning_rate": 0.0009989379853547298, "loss": 1.699, "step": 2846 }, { "epoch": 0.12662842914943756, "grad_norm": 0.1460895985364914, "learning_rate": 0.0009989356837018296, "loss": 1.7018, "step": 2848 }, { "epoch": 0.1267173536081099, "grad_norm": 0.1511843204498291, "learning_rate": 0.0009989333795601553, "loss": 1.7008, "step": 2850 }, { "epoch": 0.12680627806678227, "grad_norm": 0.15189401805400848, "learning_rate": 0.0009989310729297185, "loss": 1.6961, "step": 2852 }, { "epoch": 0.12689520252545464, "grad_norm": 0.15254703164100647, "learning_rate": 0.0009989287638105307, "loss": 1.6967, "step": 2854 }, { "epoch": 0.12698412698412698, "grad_norm": 0.1455395370721817, "learning_rate": 0.0009989264522026033, "loss": 1.7, "step": 2856 }, { "epoch": 0.12707305144279935, "grad_norm": 0.14095422625541687, "learning_rate": 0.0009989241381059481, "loss": 1.6935, "step": 2858 }, { "epoch": 0.1271619759014717, "grad_norm": 0.14039580523967743, "learning_rate": 0.0009989218215205764, "loss": 1.7008, "step": 2860 }, { "epoch": 0.12725090036014405, "grad_norm": 0.14331957697868347, "learning_rate": 0.0009989195024465, "loss": 1.7059, "step": 2862 }, { "epoch": 0.12733982481881642, "grad_norm": 0.14794786274433136, "learning_rate": 0.0009989171808837303, "loss": 1.7019, "step": 2864 }, { "epoch": 0.12742874927748876, "grad_norm": 0.14051014184951782, "learning_rate": 0.0009989148568322786, "loss": 1.6935, "step": 2866 }, { "epoch": 0.12751767373616113, "grad_norm": 0.14383691549301147, "learning_rate": 0.0009989125302921572, "loss": 1.6944, "step": 2868 }, { "epoch": 0.1276065981948335, "grad_norm": 0.149238720536232, "learning_rate": 0.0009989102012633768, "loss": 1.7026, "step": 2870 }, { "epoch": 0.12769552265350584, "grad_norm": 0.1515042781829834, "learning_rate": 0.0009989078697459498, "loss": 1.6958, "step": 2872 }, { "epoch": 0.1277844471121782, "grad_norm": 0.14632724225521088, "learning_rate": 0.0009989055357398876, "loss": 1.7018, "step": 2874 }, { "epoch": 0.12787337157085057, "grad_norm": 0.14849677681922913, "learning_rate": 0.0009989031992452015, "loss": 1.6933, "step": 2876 }, { "epoch": 0.12796229602952292, "grad_norm": 0.15248118340969086, "learning_rate": 0.0009989008602619036, "loss": 1.6951, "step": 2878 }, { "epoch": 0.12805122048819528, "grad_norm": 0.14534400403499603, "learning_rate": 0.0009988985187900053, "loss": 1.6988, "step": 2880 }, { "epoch": 0.12814014494686762, "grad_norm": 0.14211168885231018, "learning_rate": 0.0009988961748295185, "loss": 1.695, "step": 2882 }, { "epoch": 0.12822906940554, "grad_norm": 0.14200276136398315, "learning_rate": 0.0009988938283804548, "loss": 1.6924, "step": 2884 }, { "epoch": 0.12831799386421236, "grad_norm": 0.13837294280529022, "learning_rate": 0.0009988914794428257, "loss": 1.7047, "step": 2886 }, { "epoch": 0.1284069183228847, "grad_norm": 0.14080971479415894, "learning_rate": 0.0009988891280166433, "loss": 1.6943, "step": 2888 }, { "epoch": 0.12849584278155707, "grad_norm": 0.14714248478412628, "learning_rate": 0.000998886774101919, "loss": 1.6969, "step": 2890 }, { "epoch": 0.12858476724022944, "grad_norm": 0.13803038001060486, "learning_rate": 0.0009988844176986645, "loss": 1.7008, "step": 2892 }, { "epoch": 0.12867369169890178, "grad_norm": 0.1391960233449936, "learning_rate": 0.0009988820588068918, "loss": 1.6993, "step": 2894 }, { "epoch": 0.12876261615757414, "grad_norm": 0.1502789705991745, "learning_rate": 0.0009988796974266126, "loss": 1.6965, "step": 2896 }, { "epoch": 0.12885154061624648, "grad_norm": 0.16213124990463257, "learning_rate": 0.0009988773335578387, "loss": 1.7027, "step": 2898 }, { "epoch": 0.12894046507491885, "grad_norm": 0.1447189748287201, "learning_rate": 0.0009988749672005819, "loss": 1.6945, "step": 2900 }, { "epoch": 0.12902938953359122, "grad_norm": 0.14806969463825226, "learning_rate": 0.000998872598354854, "loss": 1.6946, "step": 2902 }, { "epoch": 0.12911831399226356, "grad_norm": 0.14672061800956726, "learning_rate": 0.0009988702270206666, "loss": 1.6867, "step": 2904 }, { "epoch": 0.12920723845093593, "grad_norm": 0.1454840451478958, "learning_rate": 0.0009988678531980318, "loss": 1.6891, "step": 2906 }, { "epoch": 0.1292961629096083, "grad_norm": 0.13582098484039307, "learning_rate": 0.0009988654768869614, "loss": 1.6986, "step": 2908 }, { "epoch": 0.12938508736828064, "grad_norm": 0.13995283842086792, "learning_rate": 0.000998863098087467, "loss": 1.6971, "step": 2910 }, { "epoch": 0.129474011826953, "grad_norm": 0.1353904753923416, "learning_rate": 0.0009988607167995607, "loss": 1.6926, "step": 2912 }, { "epoch": 0.12956293628562537, "grad_norm": 0.13783226907253265, "learning_rate": 0.0009988583330232546, "loss": 1.6971, "step": 2914 }, { "epoch": 0.12965186074429771, "grad_norm": 0.14349259436130524, "learning_rate": 0.00099885594675856, "loss": 1.6915, "step": 2916 }, { "epoch": 0.12974078520297008, "grad_norm": 0.1502244919538498, "learning_rate": 0.0009988535580054893, "loss": 1.6919, "step": 2918 }, { "epoch": 0.12982970966164242, "grad_norm": 0.15048769116401672, "learning_rate": 0.0009988511667640542, "loss": 1.6992, "step": 2920 }, { "epoch": 0.1299186341203148, "grad_norm": 0.1476777046918869, "learning_rate": 0.0009988487730342667, "loss": 1.6874, "step": 2922 }, { "epoch": 0.13000755857898716, "grad_norm": 0.1491258591413498, "learning_rate": 0.0009988463768161387, "loss": 1.6969, "step": 2924 }, { "epoch": 0.1300964830376595, "grad_norm": 0.14041897654533386, "learning_rate": 0.0009988439781096821, "loss": 1.6917, "step": 2926 }, { "epoch": 0.13018540749633187, "grad_norm": 0.14257672429084778, "learning_rate": 0.0009988415769149092, "loss": 1.6875, "step": 2928 }, { "epoch": 0.13027433195500424, "grad_norm": 0.1400248110294342, "learning_rate": 0.0009988391732318313, "loss": 1.6904, "step": 2930 }, { "epoch": 0.13036325641367658, "grad_norm": 0.13869576156139374, "learning_rate": 0.000998836767060461, "loss": 1.6931, "step": 2932 }, { "epoch": 0.13045218087234894, "grad_norm": 0.15129977464675903, "learning_rate": 0.0009988343584008102, "loss": 1.6964, "step": 2934 }, { "epoch": 0.13054110533102128, "grad_norm": 0.14765389263629913, "learning_rate": 0.0009988319472528909, "loss": 1.691, "step": 2936 }, { "epoch": 0.13063002978969365, "grad_norm": 0.14335504174232483, "learning_rate": 0.0009988295336167147, "loss": 1.6875, "step": 2938 }, { "epoch": 0.13071895424836602, "grad_norm": 0.1386997550725937, "learning_rate": 0.0009988271174922942, "loss": 1.6867, "step": 2940 }, { "epoch": 0.13080787870703836, "grad_norm": 0.15370407700538635, "learning_rate": 0.0009988246988796412, "loss": 1.6949, "step": 2942 }, { "epoch": 0.13089680316571073, "grad_norm": 0.15418584644794464, "learning_rate": 0.000998822277778768, "loss": 1.6916, "step": 2944 }, { "epoch": 0.1309857276243831, "grad_norm": 0.15106600522994995, "learning_rate": 0.0009988198541896862, "loss": 1.695, "step": 2946 }, { "epoch": 0.13107465208305544, "grad_norm": 0.15028196573257446, "learning_rate": 0.000998817428112408, "loss": 1.6886, "step": 2948 }, { "epoch": 0.1311635765417278, "grad_norm": 0.14836204051971436, "learning_rate": 0.000998814999546946, "loss": 1.6949, "step": 2950 }, { "epoch": 0.13125250100040017, "grad_norm": 0.14906133711338043, "learning_rate": 0.000998812568493312, "loss": 1.6894, "step": 2952 }, { "epoch": 0.1313414254590725, "grad_norm": 0.14908622205257416, "learning_rate": 0.000998810134951518, "loss": 1.6903, "step": 2954 }, { "epoch": 0.13143034991774488, "grad_norm": 0.14440849423408508, "learning_rate": 0.0009988076989215762, "loss": 1.6966, "step": 2956 }, { "epoch": 0.13151927437641722, "grad_norm": 0.14841178059577942, "learning_rate": 0.0009988052604034989, "loss": 1.6882, "step": 2958 }, { "epoch": 0.1316081988350896, "grad_norm": 0.14035280048847198, "learning_rate": 0.0009988028193972983, "loss": 1.682, "step": 2960 }, { "epoch": 0.13169712329376196, "grad_norm": 0.1416475474834442, "learning_rate": 0.000998800375902986, "loss": 1.6944, "step": 2962 }, { "epoch": 0.1317860477524343, "grad_norm": 0.14471939206123352, "learning_rate": 0.0009987979299205748, "loss": 1.6878, "step": 2964 }, { "epoch": 0.13187497221110667, "grad_norm": 0.14979827404022217, "learning_rate": 0.000998795481450077, "loss": 1.6979, "step": 2966 }, { "epoch": 0.13196389666977903, "grad_norm": 0.14293459057807922, "learning_rate": 0.0009987930304915043, "loss": 1.697, "step": 2968 }, { "epoch": 0.13205282112845138, "grad_norm": 0.13399027287960052, "learning_rate": 0.0009987905770448694, "loss": 1.6866, "step": 2970 }, { "epoch": 0.13214174558712374, "grad_norm": 0.14252635836601257, "learning_rate": 0.0009987881211101842, "loss": 1.6921, "step": 2972 }, { "epoch": 0.13223067004579608, "grad_norm": 0.14642393589019775, "learning_rate": 0.0009987856626874608, "loss": 1.6899, "step": 2974 }, { "epoch": 0.13231959450446845, "grad_norm": 0.15148261189460754, "learning_rate": 0.000998783201776712, "loss": 1.6916, "step": 2976 }, { "epoch": 0.13240851896314082, "grad_norm": 0.14195556938648224, "learning_rate": 0.0009987807383779496, "loss": 1.691, "step": 2978 }, { "epoch": 0.13249744342181316, "grad_norm": 0.14468583464622498, "learning_rate": 0.0009987782724911862, "loss": 1.6911, "step": 2980 }, { "epoch": 0.13258636788048553, "grad_norm": 0.15198451280593872, "learning_rate": 0.0009987758041164342, "loss": 1.6966, "step": 2982 }, { "epoch": 0.1326752923391579, "grad_norm": 0.15600472688674927, "learning_rate": 0.0009987733332537053, "loss": 1.6958, "step": 2984 }, { "epoch": 0.13276421679783024, "grad_norm": 0.14877556264400482, "learning_rate": 0.0009987708599030125, "loss": 1.6921, "step": 2986 }, { "epoch": 0.1328531412565026, "grad_norm": 0.14063440263271332, "learning_rate": 0.0009987683840643677, "loss": 1.6859, "step": 2988 }, { "epoch": 0.13294206571517495, "grad_norm": 0.14707665145397186, "learning_rate": 0.0009987659057377834, "loss": 1.6789, "step": 2990 }, { "epoch": 0.1330309901738473, "grad_norm": 0.15149006247520447, "learning_rate": 0.000998763424923272, "loss": 1.6913, "step": 2992 }, { "epoch": 0.13311991463251968, "grad_norm": 0.14212511479854584, "learning_rate": 0.000998760941620846, "loss": 1.6941, "step": 2994 }, { "epoch": 0.13320883909119202, "grad_norm": 0.15261532366275787, "learning_rate": 0.0009987584558305174, "loss": 1.6848, "step": 2996 }, { "epoch": 0.1332977635498644, "grad_norm": 0.13972164690494537, "learning_rate": 0.000998755967552299, "loss": 1.6903, "step": 2998 }, { "epoch": 0.13338668800853676, "grad_norm": 0.16085447371006012, "learning_rate": 0.000998753476786203, "loss": 1.6976, "step": 3000 }, { "epoch": 0.13338668800853676, "eval_loss": 1.6553056240081787, "eval_runtime": 12.3492, "eval_samples_per_second": 559.55, "eval_steps_per_second": 69.964, "step": 3000 }, { "epoch": 0.1334756124672091, "grad_norm": 0.161129429936409, "learning_rate": 0.0009987509835322418, "loss": 1.685, "step": 3002 }, { "epoch": 0.13356453692588147, "grad_norm": 0.1477159708738327, "learning_rate": 0.000998748487790428, "loss": 1.687, "step": 3004 }, { "epoch": 0.13365346138455383, "grad_norm": 0.1458180546760559, "learning_rate": 0.000998745989560774, "loss": 1.6878, "step": 3006 }, { "epoch": 0.13374238584322617, "grad_norm": 0.15459588170051575, "learning_rate": 0.000998743488843292, "loss": 1.6907, "step": 3008 }, { "epoch": 0.13383131030189854, "grad_norm": 0.15089759230613708, "learning_rate": 0.000998740985637995, "loss": 1.6887, "step": 3010 }, { "epoch": 0.13392023476057088, "grad_norm": 0.14244233071804047, "learning_rate": 0.000998738479944895, "loss": 1.6896, "step": 3012 }, { "epoch": 0.13400915921924325, "grad_norm": 0.14067134261131287, "learning_rate": 0.0009987359717640047, "loss": 1.686, "step": 3014 }, { "epoch": 0.13409808367791562, "grad_norm": 0.1399071216583252, "learning_rate": 0.0009987334610953365, "loss": 1.6862, "step": 3016 }, { "epoch": 0.13418700813658796, "grad_norm": 0.14086857438087463, "learning_rate": 0.0009987309479389031, "loss": 1.6891, "step": 3018 }, { "epoch": 0.13427593259526033, "grad_norm": 0.14159750938415527, "learning_rate": 0.000998728432294717, "loss": 1.6865, "step": 3020 }, { "epoch": 0.1343648570539327, "grad_norm": 0.1503407210111618, "learning_rate": 0.0009987259141627904, "loss": 1.6943, "step": 3022 }, { "epoch": 0.13445378151260504, "grad_norm": 0.14096394181251526, "learning_rate": 0.0009987233935431366, "loss": 1.6877, "step": 3024 }, { "epoch": 0.1345427059712774, "grad_norm": 0.13867942988872528, "learning_rate": 0.0009987208704357673, "loss": 1.6827, "step": 3026 }, { "epoch": 0.13463163042994974, "grad_norm": 0.1480480581521988, "learning_rate": 0.0009987183448406957, "loss": 1.6863, "step": 3028 }, { "epoch": 0.1347205548886221, "grad_norm": 0.14531119167804718, "learning_rate": 0.0009987158167579344, "loss": 1.6917, "step": 3030 }, { "epoch": 0.13480947934729448, "grad_norm": 0.13942700624465942, "learning_rate": 0.0009987132861874956, "loss": 1.6876, "step": 3032 }, { "epoch": 0.13489840380596682, "grad_norm": 0.14429835975170135, "learning_rate": 0.0009987107531293923, "loss": 1.6851, "step": 3034 }, { "epoch": 0.1349873282646392, "grad_norm": 0.1691877692937851, "learning_rate": 0.000998708217583637, "loss": 1.6865, "step": 3036 }, { "epoch": 0.13507625272331156, "grad_norm": 0.15932905673980713, "learning_rate": 0.0009987056795502422, "loss": 1.6861, "step": 3038 }, { "epoch": 0.1351651771819839, "grad_norm": 0.15188652276992798, "learning_rate": 0.0009987031390292205, "loss": 1.6876, "step": 3040 }, { "epoch": 0.13525410164065627, "grad_norm": 0.1434570848941803, "learning_rate": 0.000998700596020585, "loss": 1.6842, "step": 3042 }, { "epoch": 0.13534302609932863, "grad_norm": 0.1466592699289322, "learning_rate": 0.000998698050524348, "loss": 1.6867, "step": 3044 }, { "epoch": 0.13543195055800097, "grad_norm": 0.1536661833524704, "learning_rate": 0.0009986955025405226, "loss": 1.6888, "step": 3046 }, { "epoch": 0.13552087501667334, "grad_norm": 0.14562800526618958, "learning_rate": 0.000998692952069121, "loss": 1.6877, "step": 3048 }, { "epoch": 0.13560979947534568, "grad_norm": 0.14590971171855927, "learning_rate": 0.0009986903991101564, "loss": 1.6895, "step": 3050 }, { "epoch": 0.13569872393401805, "grad_norm": 0.1405206024646759, "learning_rate": 0.0009986878436636412, "loss": 1.6892, "step": 3052 }, { "epoch": 0.13578764839269042, "grad_norm": 0.13950257003307343, "learning_rate": 0.0009986852857295882, "loss": 1.6866, "step": 3054 }, { "epoch": 0.13587657285136276, "grad_norm": 0.14230984449386597, "learning_rate": 0.0009986827253080101, "loss": 1.6809, "step": 3056 }, { "epoch": 0.13596549731003513, "grad_norm": 0.13012036681175232, "learning_rate": 0.00099868016239892, "loss": 1.6902, "step": 3058 }, { "epoch": 0.1360544217687075, "grad_norm": 0.13848593831062317, "learning_rate": 0.0009986775970023305, "loss": 1.6759, "step": 3060 }, { "epoch": 0.13614334622737984, "grad_norm": 0.14082428812980652, "learning_rate": 0.0009986750291182543, "loss": 1.6812, "step": 3062 }, { "epoch": 0.1362322706860522, "grad_norm": 0.1358221471309662, "learning_rate": 0.0009986724587467044, "loss": 1.6845, "step": 3064 }, { "epoch": 0.13632119514472454, "grad_norm": 0.13674212992191315, "learning_rate": 0.0009986698858876933, "loss": 1.6857, "step": 3066 }, { "epoch": 0.1364101196033969, "grad_norm": 0.13396227359771729, "learning_rate": 0.000998667310541234, "loss": 1.6859, "step": 3068 }, { "epoch": 0.13649904406206928, "grad_norm": 0.1398070603609085, "learning_rate": 0.0009986647327073397, "loss": 1.6786, "step": 3070 }, { "epoch": 0.13658796852074162, "grad_norm": 0.136416956782341, "learning_rate": 0.0009986621523860226, "loss": 1.6759, "step": 3072 }, { "epoch": 0.136676892979414, "grad_norm": 0.13669733703136444, "learning_rate": 0.0009986595695772962, "loss": 1.6856, "step": 3074 }, { "epoch": 0.13676581743808636, "grad_norm": 0.14353300631046295, "learning_rate": 0.0009986569842811727, "loss": 1.688, "step": 3076 }, { "epoch": 0.1368547418967587, "grad_norm": 0.1414812058210373, "learning_rate": 0.0009986543964976657, "loss": 1.6793, "step": 3078 }, { "epoch": 0.13694366635543107, "grad_norm": 0.13965506851673126, "learning_rate": 0.0009986518062267877, "loss": 1.6824, "step": 3080 }, { "epoch": 0.1370325908141034, "grad_norm": 0.14354394376277924, "learning_rate": 0.000998649213468552, "loss": 1.6847, "step": 3082 }, { "epoch": 0.13712151527277577, "grad_norm": 0.13872870802879333, "learning_rate": 0.0009986466182229708, "loss": 1.684, "step": 3084 }, { "epoch": 0.13721043973144814, "grad_norm": 0.1326591521501541, "learning_rate": 0.0009986440204900577, "loss": 1.6856, "step": 3086 }, { "epoch": 0.13729936419012048, "grad_norm": 0.14356881380081177, "learning_rate": 0.0009986414202698254, "loss": 1.6777, "step": 3088 }, { "epoch": 0.13738828864879285, "grad_norm": 0.14380203187465668, "learning_rate": 0.0009986388175622872, "loss": 1.6864, "step": 3090 }, { "epoch": 0.13747721310746522, "grad_norm": 0.3013915419578552, "learning_rate": 0.0009986362123674554, "loss": 1.6923, "step": 3092 }, { "epoch": 0.13756613756613756, "grad_norm": 0.26866304874420166, "learning_rate": 0.0009986336046853438, "loss": 1.6887, "step": 3094 }, { "epoch": 0.13765506202480993, "grad_norm": 0.16091522574424744, "learning_rate": 0.0009986309945159647, "loss": 1.6846, "step": 3096 }, { "epoch": 0.1377439864834823, "grad_norm": 0.22362981736660004, "learning_rate": 0.0009986283818593314, "loss": 1.6896, "step": 3098 }, { "epoch": 0.13783291094215464, "grad_norm": 0.2674184739589691, "learning_rate": 0.0009986257667154571, "loss": 1.6874, "step": 3100 }, { "epoch": 0.137921835400827, "grad_norm": 0.17050045728683472, "learning_rate": 0.0009986231490843548, "loss": 1.6888, "step": 3102 }, { "epoch": 0.13801075985949934, "grad_norm": 0.1663951575756073, "learning_rate": 0.0009986205289660372, "loss": 1.6818, "step": 3104 }, { "epoch": 0.1380996843181717, "grad_norm": 0.14594919979572296, "learning_rate": 0.000998617906360518, "loss": 1.6865, "step": 3106 }, { "epoch": 0.13818860877684408, "grad_norm": 0.17260988056659698, "learning_rate": 0.0009986152812678096, "loss": 1.6838, "step": 3108 }, { "epoch": 0.13827753323551642, "grad_norm": 0.15733318030834198, "learning_rate": 0.0009986126536879255, "loss": 1.6901, "step": 3110 }, { "epoch": 0.1383664576941888, "grad_norm": 0.1493389755487442, "learning_rate": 0.0009986100236208787, "loss": 1.6869, "step": 3112 }, { "epoch": 0.13845538215286116, "grad_norm": 0.16421248018741608, "learning_rate": 0.0009986073910666824, "loss": 1.6875, "step": 3114 }, { "epoch": 0.1385443066115335, "grad_norm": 0.15142826735973358, "learning_rate": 0.0009986047560253497, "loss": 1.6803, "step": 3116 }, { "epoch": 0.13863323107020586, "grad_norm": 0.148903951048851, "learning_rate": 0.0009986021184968936, "loss": 1.6907, "step": 3118 }, { "epoch": 0.1387221555288782, "grad_norm": 0.1498846411705017, "learning_rate": 0.0009985994784813274, "loss": 1.6831, "step": 3120 }, { "epoch": 0.13881107998755057, "grad_norm": 0.14996740221977234, "learning_rate": 0.0009985968359786644, "loss": 1.6851, "step": 3122 }, { "epoch": 0.13890000444622294, "grad_norm": 0.15415999293327332, "learning_rate": 0.0009985941909889174, "loss": 1.6834, "step": 3124 }, { "epoch": 0.13898892890489528, "grad_norm": 0.14532987773418427, "learning_rate": 0.0009985915435121, "loss": 1.6852, "step": 3126 }, { "epoch": 0.13907785336356765, "grad_norm": 0.1408744752407074, "learning_rate": 0.0009985888935482251, "loss": 1.6789, "step": 3128 }, { "epoch": 0.13916677782224002, "grad_norm": 0.14001396298408508, "learning_rate": 0.000998586241097306, "loss": 1.6841, "step": 3130 }, { "epoch": 0.13925570228091236, "grad_norm": 0.1425395905971527, "learning_rate": 0.000998583586159356, "loss": 1.6739, "step": 3132 }, { "epoch": 0.13934462673958473, "grad_norm": 0.13320642709732056, "learning_rate": 0.0009985809287343882, "loss": 1.6881, "step": 3134 }, { "epoch": 0.1394335511982571, "grad_norm": 0.13793528079986572, "learning_rate": 0.0009985782688224163, "loss": 1.6859, "step": 3136 }, { "epoch": 0.13952247565692943, "grad_norm": 0.14627783000469208, "learning_rate": 0.000998575606423453, "loss": 1.6844, "step": 3138 }, { "epoch": 0.1396114001156018, "grad_norm": 0.1399577409029007, "learning_rate": 0.0009985729415375119, "loss": 1.6793, "step": 3140 }, { "epoch": 0.13970032457427414, "grad_norm": 0.14193718135356903, "learning_rate": 0.000998570274164606, "loss": 1.6811, "step": 3142 }, { "epoch": 0.1397892490329465, "grad_norm": 0.13408489525318146, "learning_rate": 0.0009985676043047492, "loss": 1.6811, "step": 3144 }, { "epoch": 0.13987817349161888, "grad_norm": 0.13769420981407166, "learning_rate": 0.0009985649319579541, "loss": 1.6786, "step": 3146 }, { "epoch": 0.13996709795029122, "grad_norm": 0.1458839327096939, "learning_rate": 0.0009985622571242346, "loss": 1.6786, "step": 3148 }, { "epoch": 0.1400560224089636, "grad_norm": 0.15078197419643402, "learning_rate": 0.0009985595798036036, "loss": 1.6774, "step": 3150 }, { "epoch": 0.14014494686763596, "grad_norm": 0.13975287973880768, "learning_rate": 0.000998556899996075, "loss": 1.677, "step": 3152 }, { "epoch": 0.1402338713263083, "grad_norm": 0.13561749458312988, "learning_rate": 0.0009985542177016614, "loss": 1.6851, "step": 3154 }, { "epoch": 0.14032279578498066, "grad_norm": 0.1398334950208664, "learning_rate": 0.0009985515329203767, "loss": 1.6826, "step": 3156 }, { "epoch": 0.140411720243653, "grad_norm": 0.137189120054245, "learning_rate": 0.0009985488456522341, "loss": 1.6829, "step": 3158 }, { "epoch": 0.14050064470232537, "grad_norm": 0.14057347178459167, "learning_rate": 0.0009985461558972475, "loss": 1.6825, "step": 3160 }, { "epoch": 0.14058956916099774, "grad_norm": 0.14627771079540253, "learning_rate": 0.0009985434636554295, "loss": 1.6822, "step": 3162 }, { "epoch": 0.14067849361967008, "grad_norm": 0.14206919074058533, "learning_rate": 0.0009985407689267942, "loss": 1.6782, "step": 3164 }, { "epoch": 0.14076741807834245, "grad_norm": 0.13843832910060883, "learning_rate": 0.0009985380717113547, "loss": 1.6851, "step": 3166 }, { "epoch": 0.14085634253701482, "grad_norm": 0.13097672164440155, "learning_rate": 0.0009985353720091245, "loss": 1.6769, "step": 3168 }, { "epoch": 0.14094526699568716, "grad_norm": 0.1339416652917862, "learning_rate": 0.0009985326698201174, "loss": 1.6851, "step": 3170 }, { "epoch": 0.14103419145435953, "grad_norm": 0.1318987011909485, "learning_rate": 0.0009985299651443463, "loss": 1.6804, "step": 3172 }, { "epoch": 0.14112311591303187, "grad_norm": 0.12795791029930115, "learning_rate": 0.000998527257981825, "loss": 1.6775, "step": 3174 }, { "epoch": 0.14121204037170423, "grad_norm": 0.1405027210712433, "learning_rate": 0.000998524548332567, "loss": 1.6753, "step": 3176 }, { "epoch": 0.1413009648303766, "grad_norm": 0.13704566657543182, "learning_rate": 0.000998521836196586, "loss": 1.6747, "step": 3178 }, { "epoch": 0.14138988928904894, "grad_norm": 0.13784196972846985, "learning_rate": 0.000998519121573895, "loss": 1.6833, "step": 3180 }, { "epoch": 0.1414788137477213, "grad_norm": 0.14620931446552277, "learning_rate": 0.0009985164044645082, "loss": 1.6831, "step": 3182 }, { "epoch": 0.14156773820639368, "grad_norm": 0.13323108851909637, "learning_rate": 0.0009985136848684386, "loss": 1.6781, "step": 3184 }, { "epoch": 0.14165666266506602, "grad_norm": 0.13440033793449402, "learning_rate": 0.0009985109627857, "loss": 1.6794, "step": 3186 }, { "epoch": 0.1417455871237384, "grad_norm": 0.13603028655052185, "learning_rate": 0.000998508238216306, "loss": 1.6712, "step": 3188 }, { "epoch": 0.14183451158241075, "grad_norm": 0.14003223180770874, "learning_rate": 0.0009985055111602704, "loss": 1.6754, "step": 3190 }, { "epoch": 0.1419234360410831, "grad_norm": 0.13278284668922424, "learning_rate": 0.0009985027816176062, "loss": 1.6805, "step": 3192 }, { "epoch": 0.14201236049975546, "grad_norm": 0.1399509310722351, "learning_rate": 0.0009985000495883276, "loss": 1.6724, "step": 3194 }, { "epoch": 0.1421012849584278, "grad_norm": 0.14798682928085327, "learning_rate": 0.0009984973150724478, "loss": 1.6767, "step": 3196 }, { "epoch": 0.14219020941710017, "grad_norm": 0.14598892629146576, "learning_rate": 0.000998494578069981, "loss": 1.6795, "step": 3198 }, { "epoch": 0.14227913387577254, "grad_norm": 0.14363674819469452, "learning_rate": 0.0009984918385809402, "loss": 1.6739, "step": 3200 }, { "epoch": 0.14236805833444488, "grad_norm": 0.14036902785301208, "learning_rate": 0.0009984890966053396, "loss": 1.6797, "step": 3202 }, { "epoch": 0.14245698279311725, "grad_norm": 0.13619792461395264, "learning_rate": 0.0009984863521431925, "loss": 1.6821, "step": 3204 }, { "epoch": 0.14254590725178962, "grad_norm": 0.14569631218910217, "learning_rate": 0.0009984836051945124, "loss": 1.6698, "step": 3206 }, { "epoch": 0.14263483171046196, "grad_norm": 0.14593179523944855, "learning_rate": 0.0009984808557593138, "loss": 1.6768, "step": 3208 }, { "epoch": 0.14272375616913432, "grad_norm": 0.1467268168926239, "learning_rate": 0.0009984781038376099, "loss": 1.6805, "step": 3210 }, { "epoch": 0.14281268062780667, "grad_norm": 0.13134878873825073, "learning_rate": 0.0009984753494294144, "loss": 1.6728, "step": 3212 }, { "epoch": 0.14290160508647903, "grad_norm": 0.13109003007411957, "learning_rate": 0.000998472592534741, "loss": 1.6745, "step": 3214 }, { "epoch": 0.1429905295451514, "grad_norm": 0.14403113722801208, "learning_rate": 0.0009984698331536035, "loss": 1.6733, "step": 3216 }, { "epoch": 0.14307945400382374, "grad_norm": 0.14508023858070374, "learning_rate": 0.000998467071286016, "loss": 1.6752, "step": 3218 }, { "epoch": 0.1431683784624961, "grad_norm": 0.13933469355106354, "learning_rate": 0.0009984643069319919, "loss": 1.676, "step": 3220 }, { "epoch": 0.14325730292116848, "grad_norm": 0.13462568819522858, "learning_rate": 0.000998461540091545, "loss": 1.6755, "step": 3222 }, { "epoch": 0.14334622737984082, "grad_norm": 0.13455134630203247, "learning_rate": 0.0009984587707646893, "loss": 1.678, "step": 3224 }, { "epoch": 0.1434351518385132, "grad_norm": 0.13943925499916077, "learning_rate": 0.0009984559989514386, "loss": 1.6716, "step": 3226 }, { "epoch": 0.14352407629718555, "grad_norm": 0.14231517910957336, "learning_rate": 0.0009984532246518065, "loss": 1.6776, "step": 3228 }, { "epoch": 0.1436130007558579, "grad_norm": 0.14278076589107513, "learning_rate": 0.000998450447865807, "loss": 1.6738, "step": 3230 }, { "epoch": 0.14370192521453026, "grad_norm": 0.1441042423248291, "learning_rate": 0.000998447668593454, "loss": 1.6722, "step": 3232 }, { "epoch": 0.1437908496732026, "grad_norm": 0.13601696491241455, "learning_rate": 0.000998444886834761, "loss": 1.6692, "step": 3234 }, { "epoch": 0.14387977413187497, "grad_norm": 0.14175257086753845, "learning_rate": 0.0009984421025897425, "loss": 1.6691, "step": 3236 }, { "epoch": 0.14396869859054734, "grad_norm": 0.1315532624721527, "learning_rate": 0.000998439315858412, "loss": 1.6729, "step": 3238 }, { "epoch": 0.14405762304921968, "grad_norm": 0.13552317023277283, "learning_rate": 0.0009984365266407834, "loss": 1.6766, "step": 3240 }, { "epoch": 0.14414654750789205, "grad_norm": 0.13469535112380981, "learning_rate": 0.0009984337349368707, "loss": 1.6747, "step": 3242 }, { "epoch": 0.14423547196656442, "grad_norm": 0.1256866753101349, "learning_rate": 0.0009984309407466878, "loss": 1.6759, "step": 3244 }, { "epoch": 0.14432439642523676, "grad_norm": 0.13308246433734894, "learning_rate": 0.0009984281440702486, "loss": 1.6753, "step": 3246 }, { "epoch": 0.14441332088390912, "grad_norm": 0.134820818901062, "learning_rate": 0.0009984253449075672, "loss": 1.683, "step": 3248 }, { "epoch": 0.14450224534258146, "grad_norm": 0.13477694988250732, "learning_rate": 0.0009984225432586571, "loss": 1.6735, "step": 3250 }, { "epoch": 0.14459116980125383, "grad_norm": 0.13332480192184448, "learning_rate": 0.000998419739123533, "loss": 1.6705, "step": 3252 }, { "epoch": 0.1446800942599262, "grad_norm": 0.13237187266349792, "learning_rate": 0.0009984169325022083, "loss": 1.6745, "step": 3254 }, { "epoch": 0.14476901871859854, "grad_norm": 0.1395091861486435, "learning_rate": 0.0009984141233946972, "loss": 1.6736, "step": 3256 }, { "epoch": 0.1448579431772709, "grad_norm": 0.13899393379688263, "learning_rate": 0.0009984113118010139, "loss": 1.6721, "step": 3258 }, { "epoch": 0.14494686763594328, "grad_norm": 0.1368132382631302, "learning_rate": 0.0009984084977211721, "loss": 1.6735, "step": 3260 }, { "epoch": 0.14503579209461562, "grad_norm": 0.13790659606456757, "learning_rate": 0.000998405681155186, "loss": 1.6792, "step": 3262 }, { "epoch": 0.14512471655328799, "grad_norm": 0.13561876118183136, "learning_rate": 0.0009984028621030697, "loss": 1.6753, "step": 3264 }, { "epoch": 0.14521364101196033, "grad_norm": 0.14658573269844055, "learning_rate": 0.000998400040564837, "loss": 1.6835, "step": 3266 }, { "epoch": 0.1453025654706327, "grad_norm": 0.14082130789756775, "learning_rate": 0.0009983972165405024, "loss": 1.6713, "step": 3268 }, { "epoch": 0.14539148992930506, "grad_norm": 0.13444170355796814, "learning_rate": 0.0009983943900300795, "loss": 1.6771, "step": 3270 }, { "epoch": 0.1454804143879774, "grad_norm": 0.1398838758468628, "learning_rate": 0.000998391561033583, "loss": 1.677, "step": 3272 }, { "epoch": 0.14556933884664977, "grad_norm": 0.1403886079788208, "learning_rate": 0.0009983887295510264, "loss": 1.6713, "step": 3274 }, { "epoch": 0.14565826330532214, "grad_norm": 0.12883295118808746, "learning_rate": 0.000998385895582424, "loss": 1.6749, "step": 3276 }, { "epoch": 0.14574718776399448, "grad_norm": 0.1351172924041748, "learning_rate": 0.0009983830591277902, "loss": 1.669, "step": 3278 }, { "epoch": 0.14583611222266685, "grad_norm": 0.12804821133613586, "learning_rate": 0.000998380220187139, "loss": 1.6777, "step": 3280 }, { "epoch": 0.14592503668133922, "grad_norm": 0.13905996084213257, "learning_rate": 0.0009983773787604847, "loss": 1.6725, "step": 3282 }, { "epoch": 0.14601396114001156, "grad_norm": 0.14166028797626495, "learning_rate": 0.000998374534847841, "loss": 1.6792, "step": 3284 }, { "epoch": 0.14610288559868392, "grad_norm": 0.13185730576515198, "learning_rate": 0.0009983716884492224, "loss": 1.6752, "step": 3286 }, { "epoch": 0.14619181005735626, "grad_norm": 0.12768588960170746, "learning_rate": 0.0009983688395646434, "loss": 1.6759, "step": 3288 }, { "epoch": 0.14628073451602863, "grad_norm": 0.13263854384422302, "learning_rate": 0.0009983659881941177, "loss": 1.6696, "step": 3290 }, { "epoch": 0.146369658974701, "grad_norm": 0.12498720735311508, "learning_rate": 0.0009983631343376596, "loss": 1.674, "step": 3292 }, { "epoch": 0.14645858343337334, "grad_norm": 0.13268351554870605, "learning_rate": 0.0009983602779952837, "loss": 1.6687, "step": 3294 }, { "epoch": 0.1465475078920457, "grad_norm": 0.13163034617900848, "learning_rate": 0.0009983574191670037, "loss": 1.667, "step": 3296 }, { "epoch": 0.14663643235071808, "grad_norm": 0.13976821303367615, "learning_rate": 0.0009983545578528344, "loss": 1.6731, "step": 3298 }, { "epoch": 0.14672535680939042, "grad_norm": 0.1371973156929016, "learning_rate": 0.0009983516940527898, "loss": 1.6723, "step": 3300 }, { "epoch": 0.14681428126806279, "grad_norm": 0.13065384328365326, "learning_rate": 0.0009983488277668841, "loss": 1.6682, "step": 3302 }, { "epoch": 0.14690320572673513, "grad_norm": 0.1396087110042572, "learning_rate": 0.000998345958995132, "loss": 1.6707, "step": 3304 }, { "epoch": 0.1469921301854075, "grad_norm": 0.13367894291877747, "learning_rate": 0.0009983430877375475, "loss": 1.6715, "step": 3306 }, { "epoch": 0.14708105464407986, "grad_norm": 0.14132438600063324, "learning_rate": 0.0009983402139941448, "loss": 1.6709, "step": 3308 }, { "epoch": 0.1471699791027522, "grad_norm": 0.13156946003437042, "learning_rate": 0.0009983373377649384, "loss": 1.6656, "step": 3310 }, { "epoch": 0.14725890356142457, "grad_norm": 0.12967397272586823, "learning_rate": 0.0009983344590499426, "loss": 1.6671, "step": 3312 }, { "epoch": 0.14734782802009694, "grad_norm": 0.12589845061302185, "learning_rate": 0.000998331577849172, "loss": 1.6755, "step": 3314 }, { "epoch": 0.14743675247876928, "grad_norm": 0.12698271870613098, "learning_rate": 0.0009983286941626407, "loss": 1.6676, "step": 3316 }, { "epoch": 0.14752567693744165, "grad_norm": 0.1336132436990738, "learning_rate": 0.000998325807990363, "loss": 1.6753, "step": 3318 }, { "epoch": 0.14761460139611401, "grad_norm": 0.1344204992055893, "learning_rate": 0.0009983229193323536, "loss": 1.6665, "step": 3320 }, { "epoch": 0.14770352585478635, "grad_norm": 0.13292238116264343, "learning_rate": 0.0009983200281886266, "loss": 1.6717, "step": 3322 }, { "epoch": 0.14779245031345872, "grad_norm": 0.13287590444087982, "learning_rate": 0.0009983171345591968, "loss": 1.6657, "step": 3324 }, { "epoch": 0.14788137477213106, "grad_norm": 0.13067129254341125, "learning_rate": 0.0009983142384440782, "loss": 1.6659, "step": 3326 }, { "epoch": 0.14797029923080343, "grad_norm": 0.1255059391260147, "learning_rate": 0.0009983113398432857, "loss": 1.6669, "step": 3328 }, { "epoch": 0.1480592236894758, "grad_norm": 0.13463814556598663, "learning_rate": 0.0009983084387568334, "loss": 1.6683, "step": 3330 }, { "epoch": 0.14814814814814814, "grad_norm": 0.13709482550621033, "learning_rate": 0.0009983055351847358, "loss": 1.6648, "step": 3332 }, { "epoch": 0.1482370726068205, "grad_norm": 0.13506925106048584, "learning_rate": 0.0009983026291270076, "loss": 1.6638, "step": 3334 }, { "epoch": 0.14832599706549288, "grad_norm": 0.13321250677108765, "learning_rate": 0.0009982997205836633, "loss": 1.6754, "step": 3336 }, { "epoch": 0.14841492152416522, "grad_norm": 0.12868818640708923, "learning_rate": 0.000998296809554717, "loss": 1.6656, "step": 3338 }, { "epoch": 0.14850384598283758, "grad_norm": 0.12971720099449158, "learning_rate": 0.0009982938960401838, "loss": 1.6697, "step": 3340 }, { "epoch": 0.14859277044150992, "grad_norm": 0.13777963817119598, "learning_rate": 0.0009982909800400777, "loss": 1.6691, "step": 3342 }, { "epoch": 0.1486816949001823, "grad_norm": 0.134913831949234, "learning_rate": 0.0009982880615544134, "loss": 1.6654, "step": 3344 }, { "epoch": 0.14877061935885466, "grad_norm": 0.14073821902275085, "learning_rate": 0.0009982851405832057, "loss": 1.6667, "step": 3346 }, { "epoch": 0.148859543817527, "grad_norm": 0.12406160682439804, "learning_rate": 0.000998282217126469, "loss": 1.6641, "step": 3348 }, { "epoch": 0.14894846827619937, "grad_norm": 0.13188298046588898, "learning_rate": 0.000998279291184218, "loss": 1.6682, "step": 3350 }, { "epoch": 0.14903739273487174, "grad_norm": 0.13892415165901184, "learning_rate": 0.000998276362756467, "loss": 1.6669, "step": 3352 }, { "epoch": 0.14912631719354408, "grad_norm": 0.13346289098262787, "learning_rate": 0.0009982734318432307, "loss": 1.6626, "step": 3354 }, { "epoch": 0.14921524165221645, "grad_norm": 0.1317860335111618, "learning_rate": 0.000998270498444524, "loss": 1.664, "step": 3356 }, { "epoch": 0.1493041661108888, "grad_norm": 0.13750551640987396, "learning_rate": 0.0009982675625603613, "loss": 1.669, "step": 3358 }, { "epoch": 0.14939309056956115, "grad_norm": 0.13547618687152863, "learning_rate": 0.0009982646241907575, "loss": 1.664, "step": 3360 }, { "epoch": 0.14948201502823352, "grad_norm": 0.13562791049480438, "learning_rate": 0.0009982616833357268, "loss": 1.6637, "step": 3362 }, { "epoch": 0.14957093948690586, "grad_norm": 0.13718074560165405, "learning_rate": 0.0009982587399952841, "loss": 1.6725, "step": 3364 }, { "epoch": 0.14965986394557823, "grad_norm": 0.14120802283287048, "learning_rate": 0.0009982557941694442, "loss": 1.6668, "step": 3366 }, { "epoch": 0.1497487884042506, "grad_norm": 0.13966819643974304, "learning_rate": 0.0009982528458582216, "loss": 1.6678, "step": 3368 }, { "epoch": 0.14983771286292294, "grad_norm": 0.13596081733703613, "learning_rate": 0.000998249895061631, "loss": 1.6628, "step": 3370 }, { "epoch": 0.1499266373215953, "grad_norm": 0.1467939019203186, "learning_rate": 0.0009982469417796874, "loss": 1.6667, "step": 3372 }, { "epoch": 0.15001556178026768, "grad_norm": 0.13276167213916779, "learning_rate": 0.0009982439860124054, "loss": 1.6716, "step": 3374 }, { "epoch": 0.15010448623894002, "grad_norm": 0.1353362649679184, "learning_rate": 0.0009982410277597994, "loss": 1.6746, "step": 3376 }, { "epoch": 0.15019341069761238, "grad_norm": 0.13252989947795868, "learning_rate": 0.0009982380670218848, "loss": 1.6693, "step": 3378 }, { "epoch": 0.15028233515628472, "grad_norm": 0.1437995731830597, "learning_rate": 0.000998235103798676, "loss": 1.6689, "step": 3380 }, { "epoch": 0.1503712596149571, "grad_norm": 0.13310275971889496, "learning_rate": 0.0009982321380901874, "loss": 1.6703, "step": 3382 }, { "epoch": 0.15046018407362946, "grad_norm": 0.13596755266189575, "learning_rate": 0.0009982291698964348, "loss": 1.673, "step": 3384 }, { "epoch": 0.1505491085323018, "grad_norm": 0.13616810739040375, "learning_rate": 0.000998226199217432, "loss": 1.67, "step": 3386 }, { "epoch": 0.15063803299097417, "grad_norm": 0.13550317287445068, "learning_rate": 0.0009982232260531943, "loss": 1.663, "step": 3388 }, { "epoch": 0.15072695744964654, "grad_norm": 0.14142318069934845, "learning_rate": 0.0009982202504037363, "loss": 1.6656, "step": 3390 }, { "epoch": 0.15081588190831888, "grad_norm": 0.13731466233730316, "learning_rate": 0.0009982172722690732, "loss": 1.6683, "step": 3392 }, { "epoch": 0.15090480636699125, "grad_norm": 0.14071650803089142, "learning_rate": 0.0009982142916492196, "loss": 1.6575, "step": 3394 }, { "epoch": 0.15099373082566359, "grad_norm": 0.13527441024780273, "learning_rate": 0.0009982113085441903, "loss": 1.6707, "step": 3396 }, { "epoch": 0.15108265528433595, "grad_norm": 0.1322237253189087, "learning_rate": 0.0009982083229540003, "loss": 1.6704, "step": 3398 }, { "epoch": 0.15117157974300832, "grad_norm": 0.13059936463832855, "learning_rate": 0.0009982053348786648, "loss": 1.6671, "step": 3400 }, { "epoch": 0.15126050420168066, "grad_norm": 0.1345200389623642, "learning_rate": 0.000998202344318198, "loss": 1.6644, "step": 3402 }, { "epoch": 0.15134942866035303, "grad_norm": 0.13381880521774292, "learning_rate": 0.0009981993512726153, "loss": 1.6649, "step": 3404 }, { "epoch": 0.1514383531190254, "grad_norm": 0.12679660320281982, "learning_rate": 0.0009981963557419318, "loss": 1.6672, "step": 3406 }, { "epoch": 0.15152727757769774, "grad_norm": 0.12676650285720825, "learning_rate": 0.000998193357726162, "loss": 1.6654, "step": 3408 }, { "epoch": 0.1516162020363701, "grad_norm": 0.13394631445407867, "learning_rate": 0.0009981903572253209, "loss": 1.6672, "step": 3410 }, { "epoch": 0.15170512649504247, "grad_norm": 0.1335240751504898, "learning_rate": 0.0009981873542394238, "loss": 1.6594, "step": 3412 }, { "epoch": 0.15179405095371482, "grad_norm": 0.13069608807563782, "learning_rate": 0.0009981843487684855, "loss": 1.6684, "step": 3414 }, { "epoch": 0.15188297541238718, "grad_norm": 0.1335192620754242, "learning_rate": 0.0009981813408125207, "loss": 1.6606, "step": 3416 }, { "epoch": 0.15197189987105952, "grad_norm": 0.13265785574913025, "learning_rate": 0.0009981783303715447, "loss": 1.6701, "step": 3418 }, { "epoch": 0.1520608243297319, "grad_norm": 0.13182604312896729, "learning_rate": 0.0009981753174455727, "loss": 1.6665, "step": 3420 }, { "epoch": 0.15214974878840426, "grad_norm": 0.13200457394123077, "learning_rate": 0.0009981723020346196, "loss": 1.6655, "step": 3422 }, { "epoch": 0.1522386732470766, "grad_norm": 0.13492505252361298, "learning_rate": 0.0009981692841387001, "loss": 1.669, "step": 3424 }, { "epoch": 0.15232759770574897, "grad_norm": 0.13645698130130768, "learning_rate": 0.0009981662637578298, "loss": 1.6681, "step": 3426 }, { "epoch": 0.15241652216442134, "grad_norm": 0.1314222663640976, "learning_rate": 0.0009981632408920234, "loss": 1.6653, "step": 3428 }, { "epoch": 0.15250544662309368, "grad_norm": 0.1272493451833725, "learning_rate": 0.000998160215541296, "loss": 1.6654, "step": 3430 }, { "epoch": 0.15259437108176604, "grad_norm": 0.13651682436466217, "learning_rate": 0.0009981571877056628, "loss": 1.6594, "step": 3432 }, { "epoch": 0.15268329554043839, "grad_norm": 0.137037456035614, "learning_rate": 0.0009981541573851388, "loss": 1.6631, "step": 3434 }, { "epoch": 0.15277221999911075, "grad_norm": 0.13613489270210266, "learning_rate": 0.0009981511245797392, "loss": 1.6649, "step": 3436 }, { "epoch": 0.15286114445778312, "grad_norm": 0.14442335069179535, "learning_rate": 0.000998148089289479, "loss": 1.6635, "step": 3438 }, { "epoch": 0.15295006891645546, "grad_norm": 0.13192929327487946, "learning_rate": 0.0009981450515143735, "loss": 1.6667, "step": 3440 }, { "epoch": 0.15303899337512783, "grad_norm": 0.13971135020256042, "learning_rate": 0.000998142011254438, "loss": 1.6672, "step": 3442 }, { "epoch": 0.1531279178338002, "grad_norm": 0.1356842964887619, "learning_rate": 0.0009981389685096871, "loss": 1.6578, "step": 3444 }, { "epoch": 0.15321684229247254, "grad_norm": 0.12647144496440887, "learning_rate": 0.0009981359232801366, "loss": 1.6622, "step": 3446 }, { "epoch": 0.1533057667511449, "grad_norm": 0.1329360455274582, "learning_rate": 0.0009981328755658014, "loss": 1.6615, "step": 3448 }, { "epoch": 0.15339469120981725, "grad_norm": 0.13213002681732178, "learning_rate": 0.0009981298253666966, "loss": 1.6571, "step": 3450 }, { "epoch": 0.15348361566848961, "grad_norm": 0.13628779351711273, "learning_rate": 0.0009981267726828379, "loss": 1.6632, "step": 3452 }, { "epoch": 0.15357254012716198, "grad_norm": 0.1299857646226883, "learning_rate": 0.0009981237175142397, "loss": 1.6656, "step": 3454 }, { "epoch": 0.15366146458583432, "grad_norm": 0.14375044405460358, "learning_rate": 0.000998120659860918, "loss": 1.6644, "step": 3456 }, { "epoch": 0.1537503890445067, "grad_norm": 0.1285606473684311, "learning_rate": 0.0009981175997228876, "loss": 1.6627, "step": 3458 }, { "epoch": 0.15383931350317906, "grad_norm": 0.1306368112564087, "learning_rate": 0.000998114537100164, "loss": 1.6652, "step": 3460 }, { "epoch": 0.1539282379618514, "grad_norm": 0.1342526525259018, "learning_rate": 0.0009981114719927625, "loss": 1.6633, "step": 3462 }, { "epoch": 0.15401716242052377, "grad_norm": 0.13883987069129944, "learning_rate": 0.0009981084044006982, "loss": 1.6666, "step": 3464 }, { "epoch": 0.15410608687919614, "grad_norm": 0.12659454345703125, "learning_rate": 0.0009981053343239865, "loss": 1.6635, "step": 3466 }, { "epoch": 0.15419501133786848, "grad_norm": 0.13172872364521027, "learning_rate": 0.0009981022617626427, "loss": 1.6643, "step": 3468 }, { "epoch": 0.15428393579654084, "grad_norm": 0.1316794753074646, "learning_rate": 0.0009980991867166822, "loss": 1.6593, "step": 3470 }, { "epoch": 0.15437286025521318, "grad_norm": 0.14180782437324524, "learning_rate": 0.0009980961091861202, "loss": 1.6608, "step": 3472 }, { "epoch": 0.15446178471388555, "grad_norm": 0.13621553778648376, "learning_rate": 0.0009980930291709723, "loss": 1.6631, "step": 3474 }, { "epoch": 0.15455070917255792, "grad_norm": 0.12693116068840027, "learning_rate": 0.0009980899466712535, "loss": 1.6623, "step": 3476 }, { "epoch": 0.15463963363123026, "grad_norm": 0.12886109948158264, "learning_rate": 0.0009980868616869794, "loss": 1.6589, "step": 3478 }, { "epoch": 0.15472855808990263, "grad_norm": 0.12660932540893555, "learning_rate": 0.0009980837742181654, "loss": 1.6668, "step": 3480 }, { "epoch": 0.154817482548575, "grad_norm": 0.12759321928024292, "learning_rate": 0.0009980806842648268, "loss": 1.66, "step": 3482 }, { "epoch": 0.15490640700724734, "grad_norm": 0.12718096375465393, "learning_rate": 0.0009980775918269791, "loss": 1.6659, "step": 3484 }, { "epoch": 0.1549953314659197, "grad_norm": 0.12915131449699402, "learning_rate": 0.0009980744969046377, "loss": 1.6595, "step": 3486 }, { "epoch": 0.15508425592459205, "grad_norm": 0.12834011018276215, "learning_rate": 0.0009980713994978182, "loss": 1.6652, "step": 3488 }, { "epoch": 0.15517318038326441, "grad_norm": 0.12587936222553253, "learning_rate": 0.0009980682996065355, "loss": 1.6654, "step": 3490 }, { "epoch": 0.15526210484193678, "grad_norm": 0.13400059938430786, "learning_rate": 0.0009980651972308057, "loss": 1.6603, "step": 3492 }, { "epoch": 0.15535102930060912, "grad_norm": 0.13040411472320557, "learning_rate": 0.000998062092370644, "loss": 1.6602, "step": 3494 }, { "epoch": 0.1554399537592815, "grad_norm": 0.12811923027038574, "learning_rate": 0.000998058985026066, "loss": 1.6603, "step": 3496 }, { "epoch": 0.15552887821795386, "grad_norm": 0.12436926364898682, "learning_rate": 0.000998055875197087, "loss": 1.6562, "step": 3498 }, { "epoch": 0.1556178026766262, "grad_norm": 0.13370974361896515, "learning_rate": 0.0009980527628837227, "loss": 1.666, "step": 3500 }, { "epoch": 0.1556178026766262, "eval_loss": 1.6284940242767334, "eval_runtime": 12.3496, "eval_samples_per_second": 559.532, "eval_steps_per_second": 69.962, "step": 3500 }, { "epoch": 0.15570672713529857, "grad_norm": 0.13278929889202118, "learning_rate": 0.0009980496480859883, "loss": 1.6623, "step": 3502 }, { "epoch": 0.15579565159397094, "grad_norm": 0.13166457414627075, "learning_rate": 0.0009980465308038998, "loss": 1.6608, "step": 3504 }, { "epoch": 0.15588457605264328, "grad_norm": 0.12355690449476242, "learning_rate": 0.0009980434110374724, "loss": 1.6639, "step": 3506 }, { "epoch": 0.15597350051131564, "grad_norm": 0.12370448559522629, "learning_rate": 0.000998040288786722, "loss": 1.6587, "step": 3508 }, { "epoch": 0.15606242496998798, "grad_norm": 0.12431325763463974, "learning_rate": 0.0009980371640516638, "loss": 1.6608, "step": 3510 }, { "epoch": 0.15615134942866035, "grad_norm": 0.1281343400478363, "learning_rate": 0.0009980340368323135, "loss": 1.6655, "step": 3512 }, { "epoch": 0.15624027388733272, "grad_norm": 0.12553107738494873, "learning_rate": 0.0009980309071286868, "loss": 1.6553, "step": 3514 }, { "epoch": 0.15632919834600506, "grad_norm": 0.1262499839067459, "learning_rate": 0.0009980277749407995, "loss": 1.6578, "step": 3516 }, { "epoch": 0.15641812280467743, "grad_norm": 0.12978388369083405, "learning_rate": 0.0009980246402686668, "loss": 1.6575, "step": 3518 }, { "epoch": 0.1565070472633498, "grad_norm": 0.1310720145702362, "learning_rate": 0.0009980215031123046, "loss": 1.6541, "step": 3520 }, { "epoch": 0.15659597172202214, "grad_norm": 0.12396594882011414, "learning_rate": 0.0009980183634717284, "loss": 1.655, "step": 3522 }, { "epoch": 0.1566848961806945, "grad_norm": 0.1306004524230957, "learning_rate": 0.0009980152213469539, "loss": 1.6586, "step": 3524 }, { "epoch": 0.15677382063936685, "grad_norm": 0.13290682435035706, "learning_rate": 0.000998012076737997, "loss": 1.6586, "step": 3526 }, { "epoch": 0.1568627450980392, "grad_norm": 0.13578933477401733, "learning_rate": 0.000998008929644873, "loss": 1.657, "step": 3528 }, { "epoch": 0.15695166955671158, "grad_norm": 0.1279151439666748, "learning_rate": 0.0009980057800675977, "loss": 1.6591, "step": 3530 }, { "epoch": 0.15704059401538392, "grad_norm": 0.12457801401615143, "learning_rate": 0.0009980026280061872, "loss": 1.6639, "step": 3532 }, { "epoch": 0.1571295184740563, "grad_norm": 0.12850531935691833, "learning_rate": 0.0009979994734606568, "loss": 1.6618, "step": 3534 }, { "epoch": 0.15721844293272866, "grad_norm": 0.1258937120437622, "learning_rate": 0.0009979963164310224, "loss": 1.6541, "step": 3536 }, { "epoch": 0.157307367391401, "grad_norm": 0.12144545465707779, "learning_rate": 0.0009979931569172994, "loss": 1.6592, "step": 3538 }, { "epoch": 0.15739629185007337, "grad_norm": 0.1225188747048378, "learning_rate": 0.0009979899949195043, "loss": 1.6578, "step": 3540 }, { "epoch": 0.1574852163087457, "grad_norm": 0.13467775285243988, "learning_rate": 0.0009979868304376522, "loss": 1.6604, "step": 3542 }, { "epoch": 0.15757414076741807, "grad_norm": 0.13501925766468048, "learning_rate": 0.0009979836634717591, "loss": 1.6567, "step": 3544 }, { "epoch": 0.15766306522609044, "grad_norm": 0.1286332756280899, "learning_rate": 0.000997980494021841, "loss": 1.6608, "step": 3546 }, { "epoch": 0.15775198968476278, "grad_norm": 0.12571607530117035, "learning_rate": 0.0009979773220879136, "loss": 1.6518, "step": 3548 }, { "epoch": 0.15784091414343515, "grad_norm": 0.12651760876178741, "learning_rate": 0.0009979741476699923, "loss": 1.6562, "step": 3550 }, { "epoch": 0.15792983860210752, "grad_norm": 0.12374807894229889, "learning_rate": 0.0009979709707680937, "loss": 1.6635, "step": 3552 }, { "epoch": 0.15801876306077986, "grad_norm": 0.12591153383255005, "learning_rate": 0.000997967791382233, "loss": 1.6606, "step": 3554 }, { "epoch": 0.15810768751945223, "grad_norm": 0.13102535903453827, "learning_rate": 0.0009979646095124265, "loss": 1.6571, "step": 3556 }, { "epoch": 0.1581966119781246, "grad_norm": 0.1338418573141098, "learning_rate": 0.0009979614251586894, "loss": 1.6582, "step": 3558 }, { "epoch": 0.15828553643679694, "grad_norm": 0.13042640686035156, "learning_rate": 0.0009979582383210386, "loss": 1.6553, "step": 3560 }, { "epoch": 0.1583744608954693, "grad_norm": 0.13031484186649323, "learning_rate": 0.0009979550489994892, "loss": 1.6604, "step": 3562 }, { "epoch": 0.15846338535414164, "grad_norm": 0.12561386823654175, "learning_rate": 0.0009979518571940574, "loss": 1.6619, "step": 3564 }, { "epoch": 0.158552309812814, "grad_norm": 0.13122697174549103, "learning_rate": 0.0009979486629047589, "loss": 1.658, "step": 3566 }, { "epoch": 0.15864123427148638, "grad_norm": 0.13682737946510315, "learning_rate": 0.0009979454661316102, "loss": 1.6619, "step": 3568 }, { "epoch": 0.15873015873015872, "grad_norm": 0.12418843060731888, "learning_rate": 0.0009979422668746265, "loss": 1.6576, "step": 3570 }, { "epoch": 0.1588190831888311, "grad_norm": 0.13619369268417358, "learning_rate": 0.0009979390651338243, "loss": 1.6581, "step": 3572 }, { "epoch": 0.15890800764750346, "grad_norm": 0.12203360348939896, "learning_rate": 0.0009979358609092193, "loss": 1.657, "step": 3574 }, { "epoch": 0.1589969321061758, "grad_norm": 0.12217724323272705, "learning_rate": 0.0009979326542008276, "loss": 1.6533, "step": 3576 }, { "epoch": 0.15908585656484817, "grad_norm": 0.13109327852725983, "learning_rate": 0.0009979294450086652, "loss": 1.6624, "step": 3578 }, { "epoch": 0.1591747810235205, "grad_norm": 0.123811274766922, "learning_rate": 0.0009979262333327482, "loss": 1.6538, "step": 3580 }, { "epoch": 0.15926370548219287, "grad_norm": 0.12288839370012283, "learning_rate": 0.000997923019173092, "loss": 1.6553, "step": 3582 }, { "epoch": 0.15935262994086524, "grad_norm": 0.12873615324497223, "learning_rate": 0.0009979198025297138, "loss": 1.6612, "step": 3584 }, { "epoch": 0.15944155439953758, "grad_norm": 0.12492024898529053, "learning_rate": 0.0009979165834026285, "loss": 1.6557, "step": 3586 }, { "epoch": 0.15953047885820995, "grad_norm": 0.12257129698991776, "learning_rate": 0.0009979133617918528, "loss": 1.6571, "step": 3588 }, { "epoch": 0.15961940331688232, "grad_norm": 0.13002300262451172, "learning_rate": 0.0009979101376974026, "loss": 1.6584, "step": 3590 }, { "epoch": 0.15970832777555466, "grad_norm": 0.12845563888549805, "learning_rate": 0.000997906911119294, "loss": 1.6524, "step": 3592 }, { "epoch": 0.15979725223422703, "grad_norm": 0.1296927034854889, "learning_rate": 0.0009979036820575431, "loss": 1.6599, "step": 3594 }, { "epoch": 0.1598861766928994, "grad_norm": 0.11898598074913025, "learning_rate": 0.000997900450512166, "loss": 1.6547, "step": 3596 }, { "epoch": 0.15997510115157174, "grad_norm": 0.12477612495422363, "learning_rate": 0.0009978972164831786, "loss": 1.6576, "step": 3598 }, { "epoch": 0.1600640256102441, "grad_norm": 0.12979185581207275, "learning_rate": 0.0009978939799705974, "loss": 1.6585, "step": 3600 }, { "epoch": 0.16015295006891644, "grad_norm": 0.1325928121805191, "learning_rate": 0.0009978907409744385, "loss": 1.6561, "step": 3602 }, { "epoch": 0.1602418745275888, "grad_norm": 0.12693507969379425, "learning_rate": 0.0009978874994947178, "loss": 1.6591, "step": 3604 }, { "epoch": 0.16033079898626118, "grad_norm": 0.12471980601549149, "learning_rate": 0.0009978842555314516, "loss": 1.6559, "step": 3606 }, { "epoch": 0.16041972344493352, "grad_norm": 0.13122089207172394, "learning_rate": 0.000997881009084656, "loss": 1.6615, "step": 3608 }, { "epoch": 0.1605086479036059, "grad_norm": 0.12727150321006775, "learning_rate": 0.0009978777601543473, "loss": 1.6517, "step": 3610 }, { "epoch": 0.16059757236227826, "grad_norm": 0.13121071457862854, "learning_rate": 0.0009978745087405418, "loss": 1.6547, "step": 3612 }, { "epoch": 0.1606864968209506, "grad_norm": 0.13580727577209473, "learning_rate": 0.0009978712548432557, "loss": 1.6545, "step": 3614 }, { "epoch": 0.16077542127962297, "grad_norm": 0.13076774775981903, "learning_rate": 0.0009978679984625049, "loss": 1.658, "step": 3616 }, { "epoch": 0.1608643457382953, "grad_norm": 0.1376674324274063, "learning_rate": 0.000997864739598306, "loss": 1.6551, "step": 3618 }, { "epoch": 0.16095327019696767, "grad_norm": 0.13014668226242065, "learning_rate": 0.0009978614782506752, "loss": 1.6476, "step": 3620 }, { "epoch": 0.16104219465564004, "grad_norm": 0.12911993265151978, "learning_rate": 0.0009978582144196287, "loss": 1.6458, "step": 3622 }, { "epoch": 0.16113111911431238, "grad_norm": 0.12219536304473877, "learning_rate": 0.0009978549481051825, "loss": 1.6512, "step": 3624 }, { "epoch": 0.16122004357298475, "grad_norm": 0.12736880779266357, "learning_rate": 0.0009978516793073535, "loss": 1.6565, "step": 3626 }, { "epoch": 0.16130896803165712, "grad_norm": 0.12791965901851654, "learning_rate": 0.0009978484080261575, "loss": 1.6543, "step": 3628 }, { "epoch": 0.16139789249032946, "grad_norm": 0.12389549612998962, "learning_rate": 0.0009978451342616112, "loss": 1.653, "step": 3630 }, { "epoch": 0.16148681694900183, "grad_norm": 0.12796027958393097, "learning_rate": 0.0009978418580137308, "loss": 1.6529, "step": 3632 }, { "epoch": 0.16157574140767417, "grad_norm": 0.13139387965202332, "learning_rate": 0.0009978385792825323, "loss": 1.6517, "step": 3634 }, { "epoch": 0.16166466586634654, "grad_norm": 0.12868015468120575, "learning_rate": 0.0009978352980680324, "loss": 1.6522, "step": 3636 }, { "epoch": 0.1617535903250189, "grad_norm": 0.12967824935913086, "learning_rate": 0.0009978320143702475, "loss": 1.6532, "step": 3638 }, { "epoch": 0.16184251478369124, "grad_norm": 0.13034017384052277, "learning_rate": 0.0009978287281891937, "loss": 1.6552, "step": 3640 }, { "epoch": 0.1619314392423636, "grad_norm": 0.12667222321033478, "learning_rate": 0.0009978254395248878, "loss": 1.6524, "step": 3642 }, { "epoch": 0.16202036370103598, "grad_norm": 0.1248355358839035, "learning_rate": 0.0009978221483773458, "loss": 1.651, "step": 3644 }, { "epoch": 0.16210928815970832, "grad_norm": 0.12515072524547577, "learning_rate": 0.0009978188547465842, "loss": 1.6517, "step": 3646 }, { "epoch": 0.1621982126183807, "grad_norm": 0.12195508927106857, "learning_rate": 0.0009978155586326198, "loss": 1.6487, "step": 3648 }, { "epoch": 0.16228713707705306, "grad_norm": 0.12744548916816711, "learning_rate": 0.0009978122600354687, "loss": 1.6517, "step": 3650 }, { "epoch": 0.1623760615357254, "grad_norm": 0.1203659400343895, "learning_rate": 0.0009978089589551473, "loss": 1.6527, "step": 3652 }, { "epoch": 0.16246498599439776, "grad_norm": 0.12796948850154877, "learning_rate": 0.0009978056553916722, "loss": 1.6491, "step": 3654 }, { "epoch": 0.1625539104530701, "grad_norm": 0.12525366246700287, "learning_rate": 0.0009978023493450597, "loss": 1.6569, "step": 3656 }, { "epoch": 0.16264283491174247, "grad_norm": 0.12322328239679337, "learning_rate": 0.0009977990408153268, "loss": 1.6653, "step": 3658 }, { "epoch": 0.16273175937041484, "grad_norm": 0.13084837794303894, "learning_rate": 0.0009977957298024894, "loss": 1.6531, "step": 3660 }, { "epoch": 0.16282068382908718, "grad_norm": 0.12508228421211243, "learning_rate": 0.0009977924163065643, "loss": 1.6563, "step": 3662 }, { "epoch": 0.16290960828775955, "grad_norm": 0.12103430181741714, "learning_rate": 0.000997789100327568, "loss": 1.6506, "step": 3664 }, { "epoch": 0.16299853274643192, "grad_norm": 0.12172827869653702, "learning_rate": 0.0009977857818655172, "loss": 1.6474, "step": 3666 }, { "epoch": 0.16308745720510426, "grad_norm": 0.12971435487270355, "learning_rate": 0.000997782460920428, "loss": 1.6508, "step": 3668 }, { "epoch": 0.16317638166377663, "grad_norm": 0.1291271448135376, "learning_rate": 0.0009977791374923175, "loss": 1.6535, "step": 3670 }, { "epoch": 0.16326530612244897, "grad_norm": 0.13160645961761475, "learning_rate": 0.000997775811581202, "loss": 1.6513, "step": 3672 }, { "epoch": 0.16335423058112133, "grad_norm": 0.1266569048166275, "learning_rate": 0.000997772483187098, "loss": 1.6537, "step": 3674 }, { "epoch": 0.1634431550397937, "grad_norm": 0.12977223098278046, "learning_rate": 0.0009977691523100224, "loss": 1.6543, "step": 3676 }, { "epoch": 0.16353207949846604, "grad_norm": 0.1290341168642044, "learning_rate": 0.0009977658189499914, "loss": 1.6526, "step": 3678 }, { "epoch": 0.1636210039571384, "grad_norm": 0.12717925012111664, "learning_rate": 0.0009977624831070222, "loss": 1.6534, "step": 3680 }, { "epoch": 0.16370992841581078, "grad_norm": 0.12076234817504883, "learning_rate": 0.0009977591447811308, "loss": 1.6484, "step": 3682 }, { "epoch": 0.16379885287448312, "grad_norm": 0.12450318038463593, "learning_rate": 0.0009977558039723344, "loss": 1.6525, "step": 3684 }, { "epoch": 0.1638877773331555, "grad_norm": 0.12309740483760834, "learning_rate": 0.0009977524606806494, "loss": 1.6511, "step": 3686 }, { "epoch": 0.16397670179182786, "grad_norm": 0.12498360872268677, "learning_rate": 0.0009977491149060925, "loss": 1.6513, "step": 3688 }, { "epoch": 0.1640656262505002, "grad_norm": 0.13169927895069122, "learning_rate": 0.0009977457666486803, "loss": 1.6528, "step": 3690 }, { "epoch": 0.16415455070917256, "grad_norm": 0.12063473463058472, "learning_rate": 0.0009977424159084295, "loss": 1.6449, "step": 3692 }, { "epoch": 0.1642434751678449, "grad_norm": 0.12622106075286865, "learning_rate": 0.000997739062685357, "loss": 1.6565, "step": 3694 }, { "epoch": 0.16433239962651727, "grad_norm": 0.12294983863830566, "learning_rate": 0.0009977357069794795, "loss": 1.6491, "step": 3696 }, { "epoch": 0.16442132408518964, "grad_norm": 0.12392249703407288, "learning_rate": 0.0009977323487908136, "loss": 1.6513, "step": 3698 }, { "epoch": 0.16451024854386198, "grad_norm": 0.1264803558588028, "learning_rate": 0.0009977289881193763, "loss": 1.6466, "step": 3700 }, { "epoch": 0.16459917300253435, "grad_norm": 0.12120287120342255, "learning_rate": 0.0009977256249651839, "loss": 1.6532, "step": 3702 }, { "epoch": 0.16468809746120672, "grad_norm": 0.12672477960586548, "learning_rate": 0.0009977222593282535, "loss": 1.6501, "step": 3704 }, { "epoch": 0.16477702191987906, "grad_norm": 0.12327267974615097, "learning_rate": 0.0009977188912086022, "loss": 1.649, "step": 3706 }, { "epoch": 0.16486594637855143, "grad_norm": 0.12144723534584045, "learning_rate": 0.000997715520606246, "loss": 1.6497, "step": 3708 }, { "epoch": 0.16495487083722377, "grad_norm": 0.12345416843891144, "learning_rate": 0.0009977121475212025, "loss": 1.6519, "step": 3710 }, { "epoch": 0.16504379529589613, "grad_norm": 0.12130998075008392, "learning_rate": 0.000997708771953488, "loss": 1.6541, "step": 3712 }, { "epoch": 0.1651327197545685, "grad_norm": 0.1252111792564392, "learning_rate": 0.0009977053939031197, "loss": 1.6551, "step": 3714 }, { "epoch": 0.16522164421324084, "grad_norm": 0.1185317412018776, "learning_rate": 0.0009977020133701143, "loss": 1.6497, "step": 3716 }, { "epoch": 0.1653105686719132, "grad_norm": 0.13791343569755554, "learning_rate": 0.0009976986303544884, "loss": 1.6527, "step": 3718 }, { "epoch": 0.16539949313058558, "grad_norm": 0.1374451071023941, "learning_rate": 0.0009976952448562592, "loss": 1.6527, "step": 3720 }, { "epoch": 0.16548841758925792, "grad_norm": 0.12747934460639954, "learning_rate": 0.0009976918568754436, "loss": 1.6481, "step": 3722 }, { "epoch": 0.1655773420479303, "grad_norm": 0.13003109395503998, "learning_rate": 0.0009976884664120583, "loss": 1.654, "step": 3724 }, { "epoch": 0.16566626650660263, "grad_norm": 0.11762271076440811, "learning_rate": 0.0009976850734661204, "loss": 1.645, "step": 3726 }, { "epoch": 0.165755190965275, "grad_norm": 0.13263928890228271, "learning_rate": 0.0009976816780376467, "loss": 1.6455, "step": 3728 }, { "epoch": 0.16584411542394736, "grad_norm": 0.1377555876970291, "learning_rate": 0.0009976782801266542, "loss": 1.6414, "step": 3730 }, { "epoch": 0.1659330398826197, "grad_norm": 0.12296448647975922, "learning_rate": 0.0009976748797331598, "loss": 1.6466, "step": 3732 }, { "epoch": 0.16602196434129207, "grad_norm": 0.12507364153862, "learning_rate": 0.0009976714768571806, "loss": 1.6475, "step": 3734 }, { "epoch": 0.16611088879996444, "grad_norm": 0.12280484288930893, "learning_rate": 0.0009976680714987333, "loss": 1.6461, "step": 3736 }, { "epoch": 0.16619981325863678, "grad_norm": 0.12638729810714722, "learning_rate": 0.0009976646636578352, "loss": 1.6534, "step": 3738 }, { "epoch": 0.16628873771730915, "grad_norm": 0.12049468606710434, "learning_rate": 0.000997661253334503, "loss": 1.6484, "step": 3740 }, { "epoch": 0.16637766217598152, "grad_norm": 0.12127727270126343, "learning_rate": 0.000997657840528754, "loss": 1.6468, "step": 3742 }, { "epoch": 0.16646658663465386, "grad_norm": 0.1225084736943245, "learning_rate": 0.000997654425240605, "loss": 1.6486, "step": 3744 }, { "epoch": 0.16655551109332623, "grad_norm": 0.12508682906627655, "learning_rate": 0.0009976510074700732, "loss": 1.6429, "step": 3746 }, { "epoch": 0.16664443555199857, "grad_norm": 0.12496629357337952, "learning_rate": 0.0009976475872171754, "loss": 1.6486, "step": 3748 }, { "epoch": 0.16673336001067093, "grad_norm": 0.1250324249267578, "learning_rate": 0.000997644164481929, "loss": 1.6467, "step": 3750 }, { "epoch": 0.1668222844693433, "grad_norm": 0.1318887323141098, "learning_rate": 0.0009976407392643508, "loss": 1.6502, "step": 3752 }, { "epoch": 0.16691120892801564, "grad_norm": 0.12485989183187485, "learning_rate": 0.000997637311564458, "loss": 1.6462, "step": 3754 }, { "epoch": 0.167000133386688, "grad_norm": 0.12936322391033173, "learning_rate": 0.0009976338813822679, "loss": 1.6469, "step": 3756 }, { "epoch": 0.16708905784536038, "grad_norm": 0.12338431924581528, "learning_rate": 0.000997630448717797, "loss": 1.6471, "step": 3758 }, { "epoch": 0.16717798230403272, "grad_norm": 0.12906041741371155, "learning_rate": 0.0009976270135710632, "loss": 1.6512, "step": 3760 }, { "epoch": 0.1672669067627051, "grad_norm": 0.12762369215488434, "learning_rate": 0.000997623575942083, "loss": 1.6502, "step": 3762 }, { "epoch": 0.16735583122137743, "grad_norm": 0.12695105373859406, "learning_rate": 0.000997620135830874, "loss": 1.6524, "step": 3764 }, { "epoch": 0.1674447556800498, "grad_norm": 0.12652401626110077, "learning_rate": 0.000997616693237453, "loss": 1.6479, "step": 3766 }, { "epoch": 0.16753368013872216, "grad_norm": 0.12700212001800537, "learning_rate": 0.0009976132481618375, "loss": 1.6463, "step": 3768 }, { "epoch": 0.1676226045973945, "grad_norm": 0.1289287805557251, "learning_rate": 0.0009976098006040442, "loss": 1.6458, "step": 3770 }, { "epoch": 0.16771152905606687, "grad_norm": 0.12287376075983047, "learning_rate": 0.0009976063505640908, "loss": 1.6516, "step": 3772 }, { "epoch": 0.16780045351473924, "grad_norm": 0.1236732006072998, "learning_rate": 0.0009976028980419945, "loss": 1.649, "step": 3774 }, { "epoch": 0.16788937797341158, "grad_norm": 0.12876683473587036, "learning_rate": 0.000997599443037772, "loss": 1.6499, "step": 3776 }, { "epoch": 0.16797830243208395, "grad_norm": 0.12273410707712173, "learning_rate": 0.0009975959855514412, "loss": 1.6491, "step": 3778 }, { "epoch": 0.16806722689075632, "grad_norm": 0.12249413132667542, "learning_rate": 0.000997592525583019, "loss": 1.6492, "step": 3780 }, { "epoch": 0.16815615134942866, "grad_norm": 0.1305217146873474, "learning_rate": 0.0009975890631325223, "loss": 1.6416, "step": 3782 }, { "epoch": 0.16824507580810102, "grad_norm": 0.13063625991344452, "learning_rate": 0.0009975855981999692, "loss": 1.6455, "step": 3784 }, { "epoch": 0.16833400026677336, "grad_norm": 0.12041649222373962, "learning_rate": 0.0009975821307853763, "loss": 1.6451, "step": 3786 }, { "epoch": 0.16842292472544573, "grad_norm": 0.12157703936100006, "learning_rate": 0.0009975786608887613, "loss": 1.6447, "step": 3788 }, { "epoch": 0.1685118491841181, "grad_norm": 0.12154638022184372, "learning_rate": 0.0009975751885101412, "loss": 1.6527, "step": 3790 }, { "epoch": 0.16860077364279044, "grad_norm": 0.11739989370107651, "learning_rate": 0.0009975717136495336, "loss": 1.6468, "step": 3792 }, { "epoch": 0.1686896981014628, "grad_norm": 0.11798025667667389, "learning_rate": 0.0009975682363069557, "loss": 1.6459, "step": 3794 }, { "epoch": 0.16877862256013518, "grad_norm": 0.11994331330060959, "learning_rate": 0.0009975647564824247, "loss": 1.6495, "step": 3796 }, { "epoch": 0.16886754701880752, "grad_norm": 0.1198289692401886, "learning_rate": 0.0009975612741759583, "loss": 1.6497, "step": 3798 }, { "epoch": 0.16895647147747989, "grad_norm": 0.12364807724952698, "learning_rate": 0.0009975577893875734, "loss": 1.6444, "step": 3800 }, { "epoch": 0.16904539593615223, "grad_norm": 0.1229090690612793, "learning_rate": 0.0009975543021172879, "loss": 1.6499, "step": 3802 }, { "epoch": 0.1691343203948246, "grad_norm": 0.1250782459974289, "learning_rate": 0.000997550812365119, "loss": 1.6417, "step": 3804 }, { "epoch": 0.16922324485349696, "grad_norm": 0.12151206284761429, "learning_rate": 0.0009975473201310839, "loss": 1.6451, "step": 3806 }, { "epoch": 0.1693121693121693, "grad_norm": 0.12076232582330704, "learning_rate": 0.0009975438254152, "loss": 1.6476, "step": 3808 }, { "epoch": 0.16940109377084167, "grad_norm": 0.1340232938528061, "learning_rate": 0.0009975403282174852, "loss": 1.6504, "step": 3810 }, { "epoch": 0.16949001822951404, "grad_norm": 0.12269595265388489, "learning_rate": 0.0009975368285379564, "loss": 1.6462, "step": 3812 }, { "epoch": 0.16957894268818638, "grad_norm": 0.1238735169172287, "learning_rate": 0.0009975333263766316, "loss": 1.6437, "step": 3814 }, { "epoch": 0.16966786714685875, "grad_norm": 0.13427115976810455, "learning_rate": 0.0009975298217335278, "loss": 1.6458, "step": 3816 }, { "epoch": 0.1697567916055311, "grad_norm": 0.12311917543411255, "learning_rate": 0.0009975263146086628, "loss": 1.6445, "step": 3818 }, { "epoch": 0.16984571606420346, "grad_norm": 0.13276326656341553, "learning_rate": 0.000997522805002054, "loss": 1.6489, "step": 3820 }, { "epoch": 0.16993464052287582, "grad_norm": 0.12898485362529755, "learning_rate": 0.0009975192929137188, "loss": 1.6513, "step": 3822 }, { "epoch": 0.17002356498154816, "grad_norm": 0.12796077132225037, "learning_rate": 0.0009975157783436745, "loss": 1.6499, "step": 3824 }, { "epoch": 0.17011248944022053, "grad_norm": 0.11903078854084015, "learning_rate": 0.0009975122612919391, "loss": 1.6457, "step": 3826 }, { "epoch": 0.1702014138988929, "grad_norm": 0.11384111642837524, "learning_rate": 0.00099750874175853, "loss": 1.6383, "step": 3828 }, { "epoch": 0.17029033835756524, "grad_norm": 0.11926476657390594, "learning_rate": 0.0009975052197434648, "loss": 1.6462, "step": 3830 }, { "epoch": 0.1703792628162376, "grad_norm": 0.11415476351976395, "learning_rate": 0.0009975016952467608, "loss": 1.657, "step": 3832 }, { "epoch": 0.17046818727490998, "grad_norm": 0.1166810616850853, "learning_rate": 0.000997498168268436, "loss": 1.646, "step": 3834 }, { "epoch": 0.17055711173358232, "grad_norm": 0.13669024407863617, "learning_rate": 0.0009974946388085076, "loss": 1.6467, "step": 3836 }, { "epoch": 0.17064603619225469, "grad_norm": 0.12202465534210205, "learning_rate": 0.0009974911068669934, "loss": 1.643, "step": 3838 }, { "epoch": 0.17073496065092703, "grad_norm": 0.12026860564947128, "learning_rate": 0.0009974875724439109, "loss": 1.6458, "step": 3840 }, { "epoch": 0.1708238851095994, "grad_norm": 0.12143933773040771, "learning_rate": 0.0009974840355392778, "loss": 1.6471, "step": 3842 }, { "epoch": 0.17091280956827176, "grad_norm": 0.12261870503425598, "learning_rate": 0.000997480496153112, "loss": 1.6384, "step": 3844 }, { "epoch": 0.1710017340269441, "grad_norm": 0.12399966269731522, "learning_rate": 0.0009974769542854306, "loss": 1.6459, "step": 3846 }, { "epoch": 0.17109065848561647, "grad_norm": 0.12557664513587952, "learning_rate": 0.0009974734099362516, "loss": 1.6463, "step": 3848 }, { "epoch": 0.17117958294428884, "grad_norm": 0.1280864030122757, "learning_rate": 0.0009974698631055928, "loss": 1.6447, "step": 3850 }, { "epoch": 0.17126850740296118, "grad_norm": 0.12775865197181702, "learning_rate": 0.0009974663137934717, "loss": 1.6477, "step": 3852 }, { "epoch": 0.17135743186163355, "grad_norm": 0.12285788357257843, "learning_rate": 0.000997462761999906, "loss": 1.6416, "step": 3854 }, { "epoch": 0.1714463563203059, "grad_norm": 0.12305966019630432, "learning_rate": 0.0009974592077249137, "loss": 1.6478, "step": 3856 }, { "epoch": 0.17153528077897826, "grad_norm": 0.12091002613306046, "learning_rate": 0.000997455650968512, "loss": 1.6458, "step": 3858 }, { "epoch": 0.17162420523765062, "grad_norm": 0.12569975852966309, "learning_rate": 0.000997452091730719, "loss": 1.6378, "step": 3860 }, { "epoch": 0.17171312969632296, "grad_norm": 0.13074176013469696, "learning_rate": 0.0009974485300115526, "loss": 1.647, "step": 3862 }, { "epoch": 0.17180205415499533, "grad_norm": 0.11964616179466248, "learning_rate": 0.0009974449658110302, "loss": 1.6445, "step": 3864 }, { "epoch": 0.1718909786136677, "grad_norm": 0.11686211824417114, "learning_rate": 0.0009974413991291698, "loss": 1.6438, "step": 3866 }, { "epoch": 0.17197990307234004, "grad_norm": 0.11895184218883514, "learning_rate": 0.000997437829965989, "loss": 1.6416, "step": 3868 }, { "epoch": 0.1720688275310124, "grad_norm": 0.12349550426006317, "learning_rate": 0.0009974342583215058, "loss": 1.6466, "step": 3870 }, { "epoch": 0.17215775198968478, "grad_norm": 0.12045907974243164, "learning_rate": 0.000997430684195738, "loss": 1.6422, "step": 3872 }, { "epoch": 0.17224667644835712, "grad_norm": 0.12487021833658218, "learning_rate": 0.0009974271075887033, "loss": 1.645, "step": 3874 }, { "epoch": 0.17233560090702948, "grad_norm": 0.12949977815151215, "learning_rate": 0.0009974235285004195, "loss": 1.6428, "step": 3876 }, { "epoch": 0.17242452536570183, "grad_norm": 0.129159078001976, "learning_rate": 0.0009974199469309048, "loss": 1.6378, "step": 3878 }, { "epoch": 0.1725134498243742, "grad_norm": 0.12624463438987732, "learning_rate": 0.000997416362880177, "loss": 1.6427, "step": 3880 }, { "epoch": 0.17260237428304656, "grad_norm": 0.11681614816188812, "learning_rate": 0.0009974127763482535, "loss": 1.6428, "step": 3882 }, { "epoch": 0.1726912987417189, "grad_norm": 0.11823716759681702, "learning_rate": 0.0009974091873351525, "loss": 1.6421, "step": 3884 }, { "epoch": 0.17278022320039127, "grad_norm": 0.12332173436880112, "learning_rate": 0.000997405595840892, "loss": 1.6423, "step": 3886 }, { "epoch": 0.17286914765906364, "grad_norm": 0.1153373122215271, "learning_rate": 0.0009974020018654898, "loss": 1.649, "step": 3888 }, { "epoch": 0.17295807211773598, "grad_norm": 0.11574465781450272, "learning_rate": 0.000997398405408964, "loss": 1.6347, "step": 3890 }, { "epoch": 0.17304699657640835, "grad_norm": 0.11528904736042023, "learning_rate": 0.0009973948064713322, "loss": 1.6458, "step": 3892 }, { "epoch": 0.1731359210350807, "grad_norm": 0.11927869915962219, "learning_rate": 0.0009973912050526125, "loss": 1.6434, "step": 3894 }, { "epoch": 0.17322484549375305, "grad_norm": 0.12420177459716797, "learning_rate": 0.000997387601152823, "loss": 1.6429, "step": 3896 }, { "epoch": 0.17331376995242542, "grad_norm": 0.11699125170707703, "learning_rate": 0.0009973839947719817, "loss": 1.6461, "step": 3898 }, { "epoch": 0.17340269441109776, "grad_norm": 0.1246216669678688, "learning_rate": 0.0009973803859101064, "loss": 1.644, "step": 3900 }, { "epoch": 0.17349161886977013, "grad_norm": 0.13008350133895874, "learning_rate": 0.0009973767745672152, "loss": 1.6469, "step": 3902 }, { "epoch": 0.1735805433284425, "grad_norm": 0.12936833500862122, "learning_rate": 0.000997373160743326, "loss": 1.6459, "step": 3904 }, { "epoch": 0.17366946778711484, "grad_norm": 0.11948209255933762, "learning_rate": 0.000997369544438457, "loss": 1.6425, "step": 3906 }, { "epoch": 0.1737583922457872, "grad_norm": 0.11415665596723557, "learning_rate": 0.000997365925652626, "loss": 1.6449, "step": 3908 }, { "epoch": 0.17384731670445955, "grad_norm": 0.12373475730419159, "learning_rate": 0.0009973623043858513, "loss": 1.6428, "step": 3910 }, { "epoch": 0.17393624116313192, "grad_norm": 0.11973095685243607, "learning_rate": 0.0009973586806381507, "loss": 1.6411, "step": 3912 }, { "epoch": 0.17402516562180428, "grad_norm": 0.12207634001970291, "learning_rate": 0.0009973550544095427, "loss": 1.6408, "step": 3914 }, { "epoch": 0.17411409008047662, "grad_norm": 0.12171240895986557, "learning_rate": 0.0009973514257000451, "loss": 1.6446, "step": 3916 }, { "epoch": 0.174203014539149, "grad_norm": 0.12418859452009201, "learning_rate": 0.000997347794509676, "loss": 1.6475, "step": 3918 }, { "epoch": 0.17429193899782136, "grad_norm": 0.12470457702875137, "learning_rate": 0.0009973441608384535, "loss": 1.6453, "step": 3920 }, { "epoch": 0.1743808634564937, "grad_norm": 0.12560194730758667, "learning_rate": 0.0009973405246863957, "loss": 1.6393, "step": 3922 }, { "epoch": 0.17446978791516607, "grad_norm": 0.1209321841597557, "learning_rate": 0.0009973368860535208, "loss": 1.647, "step": 3924 }, { "epoch": 0.17455871237383844, "grad_norm": 0.11720415949821472, "learning_rate": 0.000997333244939847, "loss": 1.6403, "step": 3926 }, { "epoch": 0.17464763683251078, "grad_norm": 0.1247502788901329, "learning_rate": 0.0009973296013453927, "loss": 1.6419, "step": 3928 }, { "epoch": 0.17473656129118315, "grad_norm": 0.12291789054870605, "learning_rate": 0.0009973259552701755, "loss": 1.6477, "step": 3930 }, { "epoch": 0.17482548574985549, "grad_norm": 0.12356559187173843, "learning_rate": 0.000997322306714214, "loss": 1.6493, "step": 3932 }, { "epoch": 0.17491441020852785, "grad_norm": 0.12039901316165924, "learning_rate": 0.0009973186556775262, "loss": 1.6419, "step": 3934 }, { "epoch": 0.17500333466720022, "grad_norm": 0.11828312277793884, "learning_rate": 0.0009973150021601305, "loss": 1.6374, "step": 3936 }, { "epoch": 0.17509225912587256, "grad_norm": 0.12081318348646164, "learning_rate": 0.0009973113461620448, "loss": 1.6384, "step": 3938 }, { "epoch": 0.17518118358454493, "grad_norm": 0.11971206218004227, "learning_rate": 0.000997307687683288, "loss": 1.6403, "step": 3940 }, { "epoch": 0.1752701080432173, "grad_norm": 0.1235707625746727, "learning_rate": 0.0009973040267238775, "loss": 1.6432, "step": 3942 }, { "epoch": 0.17535903250188964, "grad_norm": 0.12012319266796112, "learning_rate": 0.000997300363283832, "loss": 1.6455, "step": 3944 }, { "epoch": 0.175447956960562, "grad_norm": 0.11857002973556519, "learning_rate": 0.0009972966973631698, "loss": 1.6372, "step": 3946 }, { "epoch": 0.17553688141923435, "grad_norm": 0.12104901671409607, "learning_rate": 0.000997293028961909, "loss": 1.6464, "step": 3948 }, { "epoch": 0.17562580587790672, "grad_norm": 0.12087158113718033, "learning_rate": 0.0009972893580800684, "loss": 1.6348, "step": 3950 }, { "epoch": 0.17571473033657908, "grad_norm": 0.11955670267343521, "learning_rate": 0.0009972856847176657, "loss": 1.6369, "step": 3952 }, { "epoch": 0.17580365479525142, "grad_norm": 0.12120344489812851, "learning_rate": 0.0009972820088747195, "loss": 1.6386, "step": 3954 }, { "epoch": 0.1758925792539238, "grad_norm": 0.12605014443397522, "learning_rate": 0.000997278330551248, "loss": 1.6385, "step": 3956 }, { "epoch": 0.17598150371259616, "grad_norm": 0.12289277464151382, "learning_rate": 0.0009972746497472697, "loss": 1.6435, "step": 3958 }, { "epoch": 0.1760704281712685, "grad_norm": 0.11947757750749588, "learning_rate": 0.0009972709664628028, "loss": 1.6271, "step": 3960 }, { "epoch": 0.17615935262994087, "grad_norm": 0.11600028723478317, "learning_rate": 0.000997267280697866, "loss": 1.6419, "step": 3962 }, { "epoch": 0.17624827708861324, "grad_norm": 0.11856868118047714, "learning_rate": 0.0009972635924524772, "loss": 1.6373, "step": 3964 }, { "epoch": 0.17633720154728558, "grad_norm": 0.11774558573961258, "learning_rate": 0.0009972599017266553, "loss": 1.6403, "step": 3966 }, { "epoch": 0.17642612600595795, "grad_norm": 0.1212194487452507, "learning_rate": 0.0009972562085204183, "loss": 1.6383, "step": 3968 }, { "epoch": 0.17651505046463029, "grad_norm": 0.11406757682561874, "learning_rate": 0.0009972525128337849, "loss": 1.6429, "step": 3970 }, { "epoch": 0.17660397492330265, "grad_norm": 0.11931362748146057, "learning_rate": 0.0009972488146667733, "loss": 1.6388, "step": 3972 }, { "epoch": 0.17669289938197502, "grad_norm": 0.12099288403987885, "learning_rate": 0.0009972451140194023, "loss": 1.6395, "step": 3974 }, { "epoch": 0.17678182384064736, "grad_norm": 0.1235419511795044, "learning_rate": 0.00099724141089169, "loss": 1.6444, "step": 3976 }, { "epoch": 0.17687074829931973, "grad_norm": 0.12513847649097443, "learning_rate": 0.000997237705283655, "loss": 1.6411, "step": 3978 }, { "epoch": 0.1769596727579921, "grad_norm": 0.11601508408784866, "learning_rate": 0.0009972339971953156, "loss": 1.6372, "step": 3980 }, { "epoch": 0.17704859721666444, "grad_norm": 0.11849774420261383, "learning_rate": 0.000997230286626691, "loss": 1.6378, "step": 3982 }, { "epoch": 0.1771375216753368, "grad_norm": 0.1219199150800705, "learning_rate": 0.0009972265735777987, "loss": 1.6404, "step": 3984 }, { "epoch": 0.17722644613400915, "grad_norm": 0.11740437895059586, "learning_rate": 0.000997222858048658, "loss": 1.6357, "step": 3986 }, { "epoch": 0.17731537059268151, "grad_norm": 0.11547745764255524, "learning_rate": 0.000997219140039287, "loss": 1.6347, "step": 3988 }, { "epoch": 0.17740429505135388, "grad_norm": 0.12119320780038834, "learning_rate": 0.0009972154195497043, "loss": 1.6375, "step": 3990 }, { "epoch": 0.17749321951002622, "grad_norm": 0.11464416235685349, "learning_rate": 0.0009972116965799287, "loss": 1.6361, "step": 3992 }, { "epoch": 0.1775821439686986, "grad_norm": 0.11988995969295502, "learning_rate": 0.0009972079711299787, "loss": 1.6456, "step": 3994 }, { "epoch": 0.17767106842737096, "grad_norm": 0.12310869991779327, "learning_rate": 0.0009972042431998726, "loss": 1.6377, "step": 3996 }, { "epoch": 0.1777599928860433, "grad_norm": 0.11940263211727142, "learning_rate": 0.0009972005127896295, "loss": 1.6408, "step": 3998 }, { "epoch": 0.17784891734471567, "grad_norm": 0.11837908625602722, "learning_rate": 0.0009971967798992675, "loss": 1.6408, "step": 4000 }, { "epoch": 0.17784891734471567, "eval_loss": 1.6105084419250488, "eval_runtime": 12.5593, "eval_samples_per_second": 550.192, "eval_steps_per_second": 68.794, "step": 4000 }, { "epoch": 0.177937841803388, "grad_norm": 0.12262357026338577, "learning_rate": 0.0009971930445288055, "loss": 1.6371, "step": 4002 }, { "epoch": 0.17802676626206038, "grad_norm": 0.1207030713558197, "learning_rate": 0.0009971893066782619, "loss": 1.6371, "step": 4004 }, { "epoch": 0.17811569072073274, "grad_norm": 0.12452663481235504, "learning_rate": 0.0009971855663476557, "loss": 1.6377, "step": 4006 }, { "epoch": 0.17820461517940508, "grad_norm": 0.12130036950111389, "learning_rate": 0.0009971818235370053, "loss": 1.6384, "step": 4008 }, { "epoch": 0.17829353963807745, "grad_norm": 0.125193789601326, "learning_rate": 0.0009971780782463294, "loss": 1.6409, "step": 4010 }, { "epoch": 0.17838246409674982, "grad_norm": 0.11911486834287643, "learning_rate": 0.0009971743304756468, "loss": 1.6442, "step": 4012 }, { "epoch": 0.17847138855542216, "grad_norm": 0.11762598156929016, "learning_rate": 0.000997170580224976, "loss": 1.6446, "step": 4014 }, { "epoch": 0.17856031301409453, "grad_norm": 0.12029220908880234, "learning_rate": 0.000997166827494336, "loss": 1.6428, "step": 4016 }, { "epoch": 0.1786492374727669, "grad_norm": 0.11997038871049881, "learning_rate": 0.000997163072283745, "loss": 1.6333, "step": 4018 }, { "epoch": 0.17873816193143924, "grad_norm": 0.11605632305145264, "learning_rate": 0.0009971593145932224, "loss": 1.6405, "step": 4020 }, { "epoch": 0.1788270863901116, "grad_norm": 0.11872642487287521, "learning_rate": 0.0009971555544227865, "loss": 1.6388, "step": 4022 }, { "epoch": 0.17891601084878395, "grad_norm": 0.11706759035587311, "learning_rate": 0.0009971517917724561, "loss": 1.6308, "step": 4024 }, { "epoch": 0.17900493530745631, "grad_norm": 0.11391259729862213, "learning_rate": 0.0009971480266422503, "loss": 1.636, "step": 4026 }, { "epoch": 0.17909385976612868, "grad_norm": 0.11665000766515732, "learning_rate": 0.0009971442590321873, "loss": 1.6341, "step": 4028 }, { "epoch": 0.17918278422480102, "grad_norm": 0.11588221043348312, "learning_rate": 0.0009971404889422865, "loss": 1.6384, "step": 4030 }, { "epoch": 0.1792717086834734, "grad_norm": 0.12535299360752106, "learning_rate": 0.0009971367163725661, "loss": 1.6425, "step": 4032 }, { "epoch": 0.17936063314214576, "grad_norm": 0.11556640267372131, "learning_rate": 0.0009971329413230455, "loss": 1.6384, "step": 4034 }, { "epoch": 0.1794495576008181, "grad_norm": 0.11973097175359726, "learning_rate": 0.0009971291637937431, "loss": 1.6318, "step": 4036 }, { "epoch": 0.17953848205949047, "grad_norm": 0.11719883978366852, "learning_rate": 0.000997125383784678, "loss": 1.6386, "step": 4038 }, { "epoch": 0.1796274065181628, "grad_norm": 0.11612795293331146, "learning_rate": 0.0009971216012958691, "loss": 1.6363, "step": 4040 }, { "epoch": 0.17971633097683518, "grad_norm": 0.11973612755537033, "learning_rate": 0.000997117816327335, "loss": 1.6354, "step": 4042 }, { "epoch": 0.17980525543550754, "grad_norm": 0.11819841712713242, "learning_rate": 0.0009971140288790948, "loss": 1.6378, "step": 4044 }, { "epoch": 0.17989417989417988, "grad_norm": 0.11543485522270203, "learning_rate": 0.000997110238951167, "loss": 1.6354, "step": 4046 }, { "epoch": 0.17998310435285225, "grad_norm": 0.11066845804452896, "learning_rate": 0.000997106446543571, "loss": 1.6385, "step": 4048 }, { "epoch": 0.18007202881152462, "grad_norm": 0.11605249345302582, "learning_rate": 0.0009971026516563258, "loss": 1.644, "step": 4050 }, { "epoch": 0.18016095327019696, "grad_norm": 0.11649958044290543, "learning_rate": 0.0009970988542894496, "loss": 1.6386, "step": 4052 }, { "epoch": 0.18024987772886933, "grad_norm": 0.11853307485580444, "learning_rate": 0.0009970950544429622, "loss": 1.6339, "step": 4054 }, { "epoch": 0.1803388021875417, "grad_norm": 0.12470807880163193, "learning_rate": 0.000997091252116882, "loss": 1.6345, "step": 4056 }, { "epoch": 0.18042772664621404, "grad_norm": 0.12232770025730133, "learning_rate": 0.000997087447311228, "loss": 1.6334, "step": 4058 }, { "epoch": 0.1805166511048864, "grad_norm": 0.11800771951675415, "learning_rate": 0.0009970836400260195, "loss": 1.6344, "step": 4060 }, { "epoch": 0.18060557556355875, "grad_norm": 0.12016160786151886, "learning_rate": 0.0009970798302612752, "loss": 1.6384, "step": 4062 }, { "epoch": 0.1806945000222311, "grad_norm": 0.11976221203804016, "learning_rate": 0.000997076018017014, "loss": 1.6344, "step": 4064 }, { "epoch": 0.18078342448090348, "grad_norm": 0.11883070319890976, "learning_rate": 0.0009970722032932554, "loss": 1.6326, "step": 4066 }, { "epoch": 0.18087234893957582, "grad_norm": 0.11358685046434402, "learning_rate": 0.000997068386090018, "loss": 1.6369, "step": 4068 }, { "epoch": 0.1809612733982482, "grad_norm": 0.1167619526386261, "learning_rate": 0.000997064566407321, "loss": 1.6324, "step": 4070 }, { "epoch": 0.18105019785692056, "grad_norm": 0.11498478800058365, "learning_rate": 0.0009970607442451834, "loss": 1.6365, "step": 4072 }, { "epoch": 0.1811391223155929, "grad_norm": 0.11521459370851517, "learning_rate": 0.0009970569196036244, "loss": 1.6374, "step": 4074 }, { "epoch": 0.18122804677426527, "grad_norm": 0.12538723647594452, "learning_rate": 0.000997053092482663, "loss": 1.6398, "step": 4076 }, { "epoch": 0.1813169712329376, "grad_norm": 0.1221718043088913, "learning_rate": 0.0009970492628823182, "loss": 1.6334, "step": 4078 }, { "epoch": 0.18140589569160998, "grad_norm": 0.11676152795553207, "learning_rate": 0.000997045430802609, "loss": 1.6289, "step": 4080 }, { "epoch": 0.18149482015028234, "grad_norm": 0.11201895028352737, "learning_rate": 0.000997041596243555, "loss": 1.637, "step": 4082 }, { "epoch": 0.18158374460895468, "grad_norm": 0.12027441710233688, "learning_rate": 0.0009970377592051747, "loss": 1.6364, "step": 4084 }, { "epoch": 0.18167266906762705, "grad_norm": 0.11957783252000809, "learning_rate": 0.0009970339196874879, "loss": 1.633, "step": 4086 }, { "epoch": 0.18176159352629942, "grad_norm": 0.1207532063126564, "learning_rate": 0.0009970300776905132, "loss": 1.6328, "step": 4088 }, { "epoch": 0.18185051798497176, "grad_norm": 0.11650995910167694, "learning_rate": 0.00099702623321427, "loss": 1.6376, "step": 4090 }, { "epoch": 0.18193944244364413, "grad_norm": 0.127812460064888, "learning_rate": 0.0009970223862587773, "loss": 1.6435, "step": 4092 }, { "epoch": 0.18202836690231647, "grad_norm": 0.12352623790502548, "learning_rate": 0.0009970185368240547, "loss": 1.6333, "step": 4094 }, { "epoch": 0.18211729136098884, "grad_norm": 0.12078031152486801, "learning_rate": 0.0009970146849101209, "loss": 1.6332, "step": 4096 }, { "epoch": 0.1822062158196612, "grad_norm": 0.1223541796207428, "learning_rate": 0.0009970108305169957, "loss": 1.636, "step": 4098 }, { "epoch": 0.18229514027833355, "grad_norm": 0.12119990587234497, "learning_rate": 0.0009970069736446976, "loss": 1.6355, "step": 4100 }, { "epoch": 0.1823840647370059, "grad_norm": 0.11915529519319534, "learning_rate": 0.0009970031142932463, "loss": 1.6365, "step": 4102 }, { "epoch": 0.18247298919567828, "grad_norm": 0.1180507242679596, "learning_rate": 0.000996999252462661, "loss": 1.6277, "step": 4104 }, { "epoch": 0.18256191365435062, "grad_norm": 0.11856456100940704, "learning_rate": 0.000996995388152961, "loss": 1.6403, "step": 4106 }, { "epoch": 0.182650838113023, "grad_norm": 0.11693500727415085, "learning_rate": 0.0009969915213641654, "loss": 1.6333, "step": 4108 }, { "epoch": 0.18273976257169536, "grad_norm": 0.11582940071821213, "learning_rate": 0.0009969876520962938, "loss": 1.6371, "step": 4110 }, { "epoch": 0.1828286870303677, "grad_norm": 0.11472149193286896, "learning_rate": 0.000996983780349365, "loss": 1.636, "step": 4112 }, { "epoch": 0.18291761148904007, "grad_norm": 0.11126035451889038, "learning_rate": 0.0009969799061233988, "loss": 1.6344, "step": 4114 }, { "epoch": 0.1830065359477124, "grad_norm": 0.11547966301441193, "learning_rate": 0.0009969760294184144, "loss": 1.6331, "step": 4116 }, { "epoch": 0.18309546040638477, "grad_norm": 0.11593402922153473, "learning_rate": 0.0009969721502344308, "loss": 1.6322, "step": 4118 }, { "epoch": 0.18318438486505714, "grad_norm": 0.11121030151844025, "learning_rate": 0.0009969682685714678, "loss": 1.6341, "step": 4120 }, { "epoch": 0.18327330932372948, "grad_norm": 0.11206244677305222, "learning_rate": 0.0009969643844295445, "loss": 1.6368, "step": 4122 }, { "epoch": 0.18336223378240185, "grad_norm": 0.12471252679824829, "learning_rate": 0.0009969604978086806, "loss": 1.6369, "step": 4124 }, { "epoch": 0.18345115824107422, "grad_norm": 0.12352842092514038, "learning_rate": 0.000996956608708895, "loss": 1.6323, "step": 4126 }, { "epoch": 0.18354008269974656, "grad_norm": 0.11872182041406631, "learning_rate": 0.0009969527171302076, "loss": 1.6305, "step": 4128 }, { "epoch": 0.18362900715841893, "grad_norm": 0.11692368239164352, "learning_rate": 0.0009969488230726373, "loss": 1.6387, "step": 4130 }, { "epoch": 0.18371793161709127, "grad_norm": 0.12516173720359802, "learning_rate": 0.000996944926536204, "loss": 1.6325, "step": 4132 }, { "epoch": 0.18380685607576364, "grad_norm": 0.1195317879319191, "learning_rate": 0.0009969410275209266, "loss": 1.6341, "step": 4134 }, { "epoch": 0.183895780534436, "grad_norm": 0.117120161652565, "learning_rate": 0.000996937126026825, "loss": 1.6323, "step": 4136 }, { "epoch": 0.18398470499310834, "grad_norm": 0.11949758231639862, "learning_rate": 0.0009969332220539186, "loss": 1.6385, "step": 4138 }, { "epoch": 0.1840736294517807, "grad_norm": 0.12189013510942459, "learning_rate": 0.0009969293156022268, "loss": 1.6276, "step": 4140 }, { "epoch": 0.18416255391045308, "grad_norm": 0.12576699256896973, "learning_rate": 0.000996925406671769, "loss": 1.6291, "step": 4142 }, { "epoch": 0.18425147836912542, "grad_norm": 0.12639765441417694, "learning_rate": 0.000996921495262565, "loss": 1.6325, "step": 4144 }, { "epoch": 0.1843404028277978, "grad_norm": 0.11879011988639832, "learning_rate": 0.000996917581374634, "loss": 1.6319, "step": 4146 }, { "epoch": 0.18442932728647016, "grad_norm": 0.11668401211500168, "learning_rate": 0.0009969136650079954, "loss": 1.6372, "step": 4148 }, { "epoch": 0.1845182517451425, "grad_norm": 0.11744611710309982, "learning_rate": 0.000996909746162669, "loss": 1.6369, "step": 4150 }, { "epoch": 0.18460717620381487, "grad_norm": 0.12382924556732178, "learning_rate": 0.0009969058248386745, "loss": 1.6325, "step": 4152 }, { "epoch": 0.1846961006624872, "grad_norm": 0.1276998668909073, "learning_rate": 0.0009969019010360313, "loss": 1.6285, "step": 4154 }, { "epoch": 0.18478502512115957, "grad_norm": 0.1217699944972992, "learning_rate": 0.0009968979747547587, "loss": 1.6361, "step": 4156 }, { "epoch": 0.18487394957983194, "grad_norm": 0.12193545699119568, "learning_rate": 0.0009968940459948769, "loss": 1.637, "step": 4158 }, { "epoch": 0.18496287403850428, "grad_norm": 0.11924280226230621, "learning_rate": 0.0009968901147564047, "loss": 1.641, "step": 4160 }, { "epoch": 0.18505179849717665, "grad_norm": 0.11626352369785309, "learning_rate": 0.0009968861810393623, "loss": 1.6306, "step": 4162 }, { "epoch": 0.18514072295584902, "grad_norm": 0.1163124293088913, "learning_rate": 0.000996882244843769, "loss": 1.6318, "step": 4164 }, { "epoch": 0.18522964741452136, "grad_norm": 0.11747079342603683, "learning_rate": 0.0009968783061696448, "loss": 1.6351, "step": 4166 }, { "epoch": 0.18531857187319373, "grad_norm": 0.11650542914867401, "learning_rate": 0.000996874365017009, "loss": 1.6323, "step": 4168 }, { "epoch": 0.18540749633186607, "grad_norm": 0.12692221999168396, "learning_rate": 0.0009968704213858814, "loss": 1.6342, "step": 4170 }, { "epoch": 0.18549642079053844, "grad_norm": 0.11849970370531082, "learning_rate": 0.0009968664752762818, "loss": 1.6372, "step": 4172 }, { "epoch": 0.1855853452492108, "grad_norm": 0.11646901071071625, "learning_rate": 0.0009968625266882295, "loss": 1.6309, "step": 4174 }, { "epoch": 0.18567426970788314, "grad_norm": 0.1176559180021286, "learning_rate": 0.0009968585756217447, "loss": 1.6308, "step": 4176 }, { "epoch": 0.1857631941665555, "grad_norm": 0.11736711859703064, "learning_rate": 0.0009968546220768468, "loss": 1.638, "step": 4178 }, { "epoch": 0.18585211862522788, "grad_norm": 0.11711812019348145, "learning_rate": 0.0009968506660535555, "loss": 1.6333, "step": 4180 }, { "epoch": 0.18594104308390022, "grad_norm": 0.12779754400253296, "learning_rate": 0.0009968467075518906, "loss": 1.6394, "step": 4182 }, { "epoch": 0.1860299675425726, "grad_norm": 0.11566952615976334, "learning_rate": 0.000996842746571872, "loss": 1.6316, "step": 4184 }, { "epoch": 0.18611889200124493, "grad_norm": 0.11619218438863754, "learning_rate": 0.000996838783113519, "loss": 1.6308, "step": 4186 }, { "epoch": 0.1862078164599173, "grad_norm": 0.11629801988601685, "learning_rate": 0.000996834817176852, "loss": 1.6298, "step": 4188 }, { "epoch": 0.18629674091858967, "grad_norm": 0.11523006111383438, "learning_rate": 0.0009968308487618902, "loss": 1.6338, "step": 4190 }, { "epoch": 0.186385665377262, "grad_norm": 0.11307571083307266, "learning_rate": 0.0009968268778686538, "loss": 1.6333, "step": 4192 }, { "epoch": 0.18647458983593437, "grad_norm": 0.11505776643753052, "learning_rate": 0.0009968229044971624, "loss": 1.6365, "step": 4194 }, { "epoch": 0.18656351429460674, "grad_norm": 0.11206153780221939, "learning_rate": 0.000996818928647436, "loss": 1.6315, "step": 4196 }, { "epoch": 0.18665243875327908, "grad_norm": 0.11358271539211273, "learning_rate": 0.0009968149503194943, "loss": 1.625, "step": 4198 }, { "epoch": 0.18674136321195145, "grad_norm": 0.11253488063812256, "learning_rate": 0.000996810969513357, "loss": 1.6295, "step": 4200 }, { "epoch": 0.18683028767062382, "grad_norm": 0.10948602855205536, "learning_rate": 0.0009968069862290442, "loss": 1.6361, "step": 4202 }, { "epoch": 0.18691921212929616, "grad_norm": 0.11469581723213196, "learning_rate": 0.0009968030004665759, "loss": 1.6329, "step": 4204 }, { "epoch": 0.18700813658796853, "grad_norm": 0.12032172828912735, "learning_rate": 0.0009967990122259714, "loss": 1.6314, "step": 4206 }, { "epoch": 0.18709706104664087, "grad_norm": 0.12292454391717911, "learning_rate": 0.0009967950215072512, "loss": 1.6348, "step": 4208 }, { "epoch": 0.18718598550531323, "grad_norm": 0.1108386218547821, "learning_rate": 0.0009967910283104349, "loss": 1.6317, "step": 4210 }, { "epoch": 0.1872749099639856, "grad_norm": 0.11762715131044388, "learning_rate": 0.0009967870326355423, "loss": 1.6301, "step": 4212 }, { "epoch": 0.18736383442265794, "grad_norm": 0.1212281808257103, "learning_rate": 0.0009967830344825939, "loss": 1.6304, "step": 4214 }, { "epoch": 0.1874527588813303, "grad_norm": 0.11403360962867737, "learning_rate": 0.0009967790338516092, "loss": 1.6324, "step": 4216 }, { "epoch": 0.18754168334000268, "grad_norm": 0.11312601715326309, "learning_rate": 0.000996775030742608, "loss": 1.634, "step": 4218 }, { "epoch": 0.18763060779867502, "grad_norm": 0.11520449817180634, "learning_rate": 0.0009967710251556105, "loss": 1.6309, "step": 4220 }, { "epoch": 0.1877195322573474, "grad_norm": 0.11794621497392654, "learning_rate": 0.0009967670170906368, "loss": 1.633, "step": 4222 }, { "epoch": 0.18780845671601973, "grad_norm": 0.11893275380134583, "learning_rate": 0.0009967630065477067, "loss": 1.6297, "step": 4224 }, { "epoch": 0.1878973811746921, "grad_norm": 0.11467410624027252, "learning_rate": 0.0009967589935268402, "loss": 1.63, "step": 4226 }, { "epoch": 0.18798630563336446, "grad_norm": 0.11384375393390656, "learning_rate": 0.0009967549780280575, "loss": 1.624, "step": 4228 }, { "epoch": 0.1880752300920368, "grad_norm": 0.11653546243906021, "learning_rate": 0.0009967509600513785, "loss": 1.6286, "step": 4230 }, { "epoch": 0.18816415455070917, "grad_norm": 0.11761261522769928, "learning_rate": 0.0009967469395968233, "loss": 1.629, "step": 4232 }, { "epoch": 0.18825307900938154, "grad_norm": 0.11235671490430832, "learning_rate": 0.0009967429166644118, "loss": 1.6301, "step": 4234 }, { "epoch": 0.18834200346805388, "grad_norm": 0.11122483015060425, "learning_rate": 0.000996738891254164, "loss": 1.6322, "step": 4236 }, { "epoch": 0.18843092792672625, "grad_norm": 0.11003600060939789, "learning_rate": 0.0009967348633661004, "loss": 1.6245, "step": 4238 }, { "epoch": 0.18851985238539862, "grad_norm": 0.12158048897981644, "learning_rate": 0.000996730833000241, "loss": 1.6395, "step": 4240 }, { "epoch": 0.18860877684407096, "grad_norm": 0.12556138634681702, "learning_rate": 0.0009967268001566053, "loss": 1.6307, "step": 4242 }, { "epoch": 0.18869770130274333, "grad_norm": 0.1180553287267685, "learning_rate": 0.0009967227648352143, "loss": 1.625, "step": 4244 }, { "epoch": 0.18878662576141567, "grad_norm": 0.11684153228998184, "learning_rate": 0.0009967187270360875, "loss": 1.6378, "step": 4246 }, { "epoch": 0.18887555022008803, "grad_norm": 0.11572734266519547, "learning_rate": 0.000996714686759245, "loss": 1.6324, "step": 4248 }, { "epoch": 0.1889644746787604, "grad_norm": 0.1246868148446083, "learning_rate": 0.0009967106440047077, "loss": 1.6308, "step": 4250 }, { "epoch": 0.18905339913743274, "grad_norm": 0.13137935101985931, "learning_rate": 0.0009967065987724951, "loss": 1.627, "step": 4252 }, { "epoch": 0.1891423235961051, "grad_norm": 0.12096533179283142, "learning_rate": 0.0009967025510626273, "loss": 1.6227, "step": 4254 }, { "epoch": 0.18923124805477748, "grad_norm": 0.11495944112539291, "learning_rate": 0.0009966985008751249, "loss": 1.6305, "step": 4256 }, { "epoch": 0.18932017251344982, "grad_norm": 0.10990019887685776, "learning_rate": 0.000996694448210008, "loss": 1.6281, "step": 4258 }, { "epoch": 0.1894090969721222, "grad_norm": 0.12176516652107239, "learning_rate": 0.0009966903930672965, "loss": 1.6315, "step": 4260 }, { "epoch": 0.18949802143079453, "grad_norm": 0.11939161270856857, "learning_rate": 0.000996686335447011, "loss": 1.6318, "step": 4262 }, { "epoch": 0.1895869458894669, "grad_norm": 0.11153752356767654, "learning_rate": 0.0009966822753491717, "loss": 1.6257, "step": 4264 }, { "epoch": 0.18967587034813926, "grad_norm": 0.10733985155820847, "learning_rate": 0.0009966782127737988, "loss": 1.6348, "step": 4266 }, { "epoch": 0.1897647948068116, "grad_norm": 0.1079716607928276, "learning_rate": 0.0009966741477209125, "loss": 1.6324, "step": 4268 }, { "epoch": 0.18985371926548397, "grad_norm": 0.11477085947990417, "learning_rate": 0.0009966700801905329, "loss": 1.6324, "step": 4270 }, { "epoch": 0.18994264372415634, "grad_norm": 0.12020217627286911, "learning_rate": 0.0009966660101826807, "loss": 1.6279, "step": 4272 }, { "epoch": 0.19003156818282868, "grad_norm": 0.1192086786031723, "learning_rate": 0.0009966619376973757, "loss": 1.6281, "step": 4274 }, { "epoch": 0.19012049264150105, "grad_norm": 0.11376933008432388, "learning_rate": 0.000996657862734639, "loss": 1.6346, "step": 4276 }, { "epoch": 0.1902094171001734, "grad_norm": 0.12346582859754562, "learning_rate": 0.0009966537852944901, "loss": 1.634, "step": 4278 }, { "epoch": 0.19029834155884576, "grad_norm": 0.11636168509721756, "learning_rate": 0.0009966497053769498, "loss": 1.6221, "step": 4280 }, { "epoch": 0.19038726601751813, "grad_norm": 0.11525320261716843, "learning_rate": 0.0009966456229820381, "loss": 1.6296, "step": 4282 }, { "epoch": 0.19047619047619047, "grad_norm": 0.11658184975385666, "learning_rate": 0.000996641538109776, "loss": 1.6273, "step": 4284 }, { "epoch": 0.19056511493486283, "grad_norm": 0.11971661448478699, "learning_rate": 0.0009966374507601832, "loss": 1.6275, "step": 4286 }, { "epoch": 0.1906540393935352, "grad_norm": 0.11325690895318985, "learning_rate": 0.0009966333609332805, "loss": 1.6265, "step": 4288 }, { "epoch": 0.19074296385220754, "grad_norm": 0.1084250956773758, "learning_rate": 0.000996629268629088, "loss": 1.6298, "step": 4290 }, { "epoch": 0.1908318883108799, "grad_norm": 0.11336717754602432, "learning_rate": 0.0009966251738476265, "loss": 1.6285, "step": 4292 }, { "epoch": 0.19092081276955228, "grad_norm": 0.11266341805458069, "learning_rate": 0.0009966210765889158, "loss": 1.6251, "step": 4294 }, { "epoch": 0.19100973722822462, "grad_norm": 0.1167646199464798, "learning_rate": 0.000996616976852977, "loss": 1.6338, "step": 4296 }, { "epoch": 0.191098661686897, "grad_norm": 0.1174037903547287, "learning_rate": 0.0009966128746398302, "loss": 1.6337, "step": 4298 }, { "epoch": 0.19118758614556933, "grad_norm": 0.12295513600111008, "learning_rate": 0.000996608769949496, "loss": 1.6288, "step": 4300 }, { "epoch": 0.1912765106042417, "grad_norm": 0.11581303924322128, "learning_rate": 0.000996604662781995, "loss": 1.6275, "step": 4302 }, { "epoch": 0.19136543506291406, "grad_norm": 0.11440426111221313, "learning_rate": 0.0009966005531373474, "loss": 1.6265, "step": 4304 }, { "epoch": 0.1914543595215864, "grad_norm": 0.11269820481538773, "learning_rate": 0.0009965964410155737, "loss": 1.6274, "step": 4306 }, { "epoch": 0.19154328398025877, "grad_norm": 0.11619145423173904, "learning_rate": 0.0009965923264166946, "loss": 1.6227, "step": 4308 }, { "epoch": 0.19163220843893114, "grad_norm": 0.10982932150363922, "learning_rate": 0.0009965882093407304, "loss": 1.6331, "step": 4310 }, { "epoch": 0.19172113289760348, "grad_norm": 0.11294446885585785, "learning_rate": 0.0009965840897877018, "loss": 1.6257, "step": 4312 }, { "epoch": 0.19181005735627585, "grad_norm": 0.11385387182235718, "learning_rate": 0.0009965799677576295, "loss": 1.631, "step": 4314 }, { "epoch": 0.1918989818149482, "grad_norm": 0.11767963320016861, "learning_rate": 0.0009965758432505338, "loss": 1.6264, "step": 4316 }, { "epoch": 0.19198790627362056, "grad_norm": 0.11952953040599823, "learning_rate": 0.0009965717162664354, "loss": 1.6238, "step": 4318 }, { "epoch": 0.19207683073229292, "grad_norm": 0.11729772388935089, "learning_rate": 0.000996567586805355, "loss": 1.6218, "step": 4320 }, { "epoch": 0.19216575519096527, "grad_norm": 0.12014216929674149, "learning_rate": 0.0009965634548673129, "loss": 1.6236, "step": 4322 }, { "epoch": 0.19225467964963763, "grad_norm": 0.11585142463445663, "learning_rate": 0.0009965593204523298, "loss": 1.6249, "step": 4324 }, { "epoch": 0.19234360410831, "grad_norm": 0.11284307390451431, "learning_rate": 0.0009965551835604265, "loss": 1.6247, "step": 4326 }, { "epoch": 0.19243252856698234, "grad_norm": 0.11561114341020584, "learning_rate": 0.0009965510441916235, "loss": 1.6257, "step": 4328 }, { "epoch": 0.1925214530256547, "grad_norm": 0.11217344552278519, "learning_rate": 0.0009965469023459413, "loss": 1.6261, "step": 4330 }, { "epoch": 0.19261037748432708, "grad_norm": 0.11755629628896713, "learning_rate": 0.000996542758023401, "loss": 1.633, "step": 4332 }, { "epoch": 0.19269930194299942, "grad_norm": 0.11235187947750092, "learning_rate": 0.0009965386112240227, "loss": 1.627, "step": 4334 }, { "epoch": 0.1927882264016718, "grad_norm": 0.11830323189496994, "learning_rate": 0.0009965344619478275, "loss": 1.6281, "step": 4336 }, { "epoch": 0.19287715086034413, "grad_norm": 0.11256743967533112, "learning_rate": 0.0009965303101948362, "loss": 1.6258, "step": 4338 }, { "epoch": 0.1929660753190165, "grad_norm": 0.11304402351379395, "learning_rate": 0.0009965261559650692, "loss": 1.6223, "step": 4340 }, { "epoch": 0.19305499977768886, "grad_norm": 0.11517365276813507, "learning_rate": 0.000996521999258547, "loss": 1.6252, "step": 4342 }, { "epoch": 0.1931439242363612, "grad_norm": 0.11518683284521103, "learning_rate": 0.000996517840075291, "loss": 1.6261, "step": 4344 }, { "epoch": 0.19323284869503357, "grad_norm": 0.10763294994831085, "learning_rate": 0.0009965136784153214, "loss": 1.6236, "step": 4346 }, { "epoch": 0.19332177315370594, "grad_norm": 0.11887557059526443, "learning_rate": 0.0009965095142786594, "loss": 1.6274, "step": 4348 }, { "epoch": 0.19341069761237828, "grad_norm": 0.11843670904636383, "learning_rate": 0.000996505347665325, "loss": 1.6258, "step": 4350 }, { "epoch": 0.19349962207105065, "grad_norm": 0.12076882272958755, "learning_rate": 0.0009965011785753402, "loss": 1.6246, "step": 4352 }, { "epoch": 0.193588546529723, "grad_norm": 0.11038120090961456, "learning_rate": 0.0009964970070087246, "loss": 1.6222, "step": 4354 }, { "epoch": 0.19367747098839536, "grad_norm": 0.11580009013414383, "learning_rate": 0.0009964928329654998, "loss": 1.6271, "step": 4356 }, { "epoch": 0.19376639544706772, "grad_norm": 0.11985108256340027, "learning_rate": 0.0009964886564456862, "loss": 1.6326, "step": 4358 }, { "epoch": 0.19385531990574006, "grad_norm": 0.11022962629795074, "learning_rate": 0.0009964844774493049, "loss": 1.6277, "step": 4360 }, { "epoch": 0.19394424436441243, "grad_norm": 0.11388500034809113, "learning_rate": 0.0009964802959763765, "loss": 1.6208, "step": 4362 }, { "epoch": 0.1940331688230848, "grad_norm": 0.11820525676012039, "learning_rate": 0.0009964761120269219, "loss": 1.6211, "step": 4364 }, { "epoch": 0.19412209328175714, "grad_norm": 0.1088062971830368, "learning_rate": 0.0009964719256009623, "loss": 1.6206, "step": 4366 }, { "epoch": 0.1942110177404295, "grad_norm": 0.11145839095115662, "learning_rate": 0.000996467736698518, "loss": 1.6248, "step": 4368 }, { "epoch": 0.19429994219910185, "grad_norm": 0.11437372118234634, "learning_rate": 0.0009964635453196104, "loss": 1.629, "step": 4370 }, { "epoch": 0.19438886665777422, "grad_norm": 0.11181259900331497, "learning_rate": 0.0009964593514642601, "loss": 1.6234, "step": 4372 }, { "epoch": 0.19447779111644659, "grad_norm": 0.10853833705186844, "learning_rate": 0.0009964551551324883, "loss": 1.6206, "step": 4374 }, { "epoch": 0.19456671557511893, "grad_norm": 0.11572292447090149, "learning_rate": 0.0009964509563243158, "loss": 1.6234, "step": 4376 }, { "epoch": 0.1946556400337913, "grad_norm": 0.1133660301566124, "learning_rate": 0.0009964467550397634, "loss": 1.6261, "step": 4378 }, { "epoch": 0.19474456449246366, "grad_norm": 0.1107514351606369, "learning_rate": 0.0009964425512788522, "loss": 1.6302, "step": 4380 }, { "epoch": 0.194833488951136, "grad_norm": 0.11202812194824219, "learning_rate": 0.0009964383450416032, "loss": 1.6259, "step": 4382 }, { "epoch": 0.19492241340980837, "grad_norm": 0.11862754821777344, "learning_rate": 0.0009964341363280374, "loss": 1.6295, "step": 4384 }, { "epoch": 0.19501133786848074, "grad_norm": 0.11439205706119537, "learning_rate": 0.0009964299251381756, "loss": 1.6283, "step": 4386 }, { "epoch": 0.19510026232715308, "grad_norm": 0.11098136007785797, "learning_rate": 0.000996425711472039, "loss": 1.6241, "step": 4388 }, { "epoch": 0.19518918678582545, "grad_norm": 0.12104503810405731, "learning_rate": 0.0009964214953296483, "loss": 1.6298, "step": 4390 }, { "epoch": 0.1952781112444978, "grad_norm": 0.1151038259267807, "learning_rate": 0.000996417276711025, "loss": 1.6237, "step": 4392 }, { "epoch": 0.19536703570317016, "grad_norm": 0.12040484696626663, "learning_rate": 0.00099641305561619, "loss": 1.6284, "step": 4394 }, { "epoch": 0.19545596016184252, "grad_norm": 0.11409257352352142, "learning_rate": 0.0009964088320451642, "loss": 1.6252, "step": 4396 }, { "epoch": 0.19554488462051486, "grad_norm": 0.1078067272901535, "learning_rate": 0.0009964046059979688, "loss": 1.6247, "step": 4398 }, { "epoch": 0.19563380907918723, "grad_norm": 0.11023259162902832, "learning_rate": 0.0009964003774746246, "loss": 1.6276, "step": 4400 }, { "epoch": 0.1957227335378596, "grad_norm": 0.11113858222961426, "learning_rate": 0.0009963961464751533, "loss": 1.6256, "step": 4402 }, { "epoch": 0.19581165799653194, "grad_norm": 0.11487720906734467, "learning_rate": 0.0009963919129995754, "loss": 1.6264, "step": 4404 }, { "epoch": 0.1959005824552043, "grad_norm": 0.10912340879440308, "learning_rate": 0.0009963876770479121, "loss": 1.632, "step": 4406 }, { "epoch": 0.19598950691387665, "grad_norm": 0.11209183931350708, "learning_rate": 0.0009963834386201849, "loss": 1.6318, "step": 4408 }, { "epoch": 0.19607843137254902, "grad_norm": 0.10975425690412521, "learning_rate": 0.0009963791977164146, "loss": 1.6252, "step": 4410 }, { "epoch": 0.19616735583122138, "grad_norm": 0.11096843332052231, "learning_rate": 0.0009963749543366225, "loss": 1.623, "step": 4412 }, { "epoch": 0.19625628028989373, "grad_norm": 0.10731083899736404, "learning_rate": 0.0009963707084808299, "loss": 1.6265, "step": 4414 }, { "epoch": 0.1963452047485661, "grad_norm": 0.10330415517091751, "learning_rate": 0.0009963664601490574, "loss": 1.6283, "step": 4416 }, { "epoch": 0.19643412920723846, "grad_norm": 0.10680415481328964, "learning_rate": 0.000996362209341327, "loss": 1.6248, "step": 4418 }, { "epoch": 0.1965230536659108, "grad_norm": 0.11378171294927597, "learning_rate": 0.0009963579560576593, "loss": 1.6208, "step": 4420 }, { "epoch": 0.19661197812458317, "grad_norm": 0.11050064116716385, "learning_rate": 0.0009963537002980758, "loss": 1.619, "step": 4422 }, { "epoch": 0.19670090258325554, "grad_norm": 0.11730819940567017, "learning_rate": 0.0009963494420625976, "loss": 1.6275, "step": 4424 }, { "epoch": 0.19678982704192788, "grad_norm": 0.11182375252246857, "learning_rate": 0.000996345181351246, "loss": 1.6219, "step": 4426 }, { "epoch": 0.19687875150060025, "grad_norm": 0.10767224431037903, "learning_rate": 0.0009963409181640423, "loss": 1.6238, "step": 4428 }, { "epoch": 0.1969676759592726, "grad_norm": 0.10671267658472061, "learning_rate": 0.0009963366525010077, "loss": 1.6212, "step": 4430 }, { "epoch": 0.19705660041794495, "grad_norm": 0.10829255729913712, "learning_rate": 0.0009963323843621634, "loss": 1.6243, "step": 4432 }, { "epoch": 0.19714552487661732, "grad_norm": 0.11310666054487228, "learning_rate": 0.0009963281137475308, "loss": 1.6223, "step": 4434 }, { "epoch": 0.19723444933528966, "grad_norm": 0.110753633081913, "learning_rate": 0.0009963238406571313, "loss": 1.6255, "step": 4436 }, { "epoch": 0.19732337379396203, "grad_norm": 0.11032912880182266, "learning_rate": 0.000996319565090986, "loss": 1.6242, "step": 4438 }, { "epoch": 0.1974122982526344, "grad_norm": 0.11288750916719437, "learning_rate": 0.0009963152870491163, "loss": 1.6233, "step": 4440 }, { "epoch": 0.19750122271130674, "grad_norm": 0.11633003503084183, "learning_rate": 0.0009963110065315437, "loss": 1.6237, "step": 4442 }, { "epoch": 0.1975901471699791, "grad_norm": 0.11502166092395782, "learning_rate": 0.0009963067235382894, "loss": 1.6245, "step": 4444 }, { "epoch": 0.19767907162865145, "grad_norm": 0.11149843782186508, "learning_rate": 0.0009963024380693745, "loss": 1.6264, "step": 4446 }, { "epoch": 0.19776799608732382, "grad_norm": 0.10773973912000656, "learning_rate": 0.0009962981501248208, "loss": 1.6213, "step": 4448 }, { "epoch": 0.19785692054599618, "grad_norm": 0.1118425726890564, "learning_rate": 0.0009962938597046498, "loss": 1.6326, "step": 4450 }, { "epoch": 0.19794584500466852, "grad_norm": 0.1150190681219101, "learning_rate": 0.0009962895668088824, "loss": 1.6223, "step": 4452 }, { "epoch": 0.1980347694633409, "grad_norm": 0.11391531676054001, "learning_rate": 0.0009962852714375404, "loss": 1.6274, "step": 4454 }, { "epoch": 0.19812369392201326, "grad_norm": 0.11059886962175369, "learning_rate": 0.0009962809735906452, "loss": 1.6264, "step": 4456 }, { "epoch": 0.1982126183806856, "grad_norm": 0.11909958720207214, "learning_rate": 0.0009962766732682178, "loss": 1.6188, "step": 4458 }, { "epoch": 0.19830154283935797, "grad_norm": 0.11559052765369415, "learning_rate": 0.00099627237047028, "loss": 1.6273, "step": 4460 }, { "epoch": 0.1983904672980303, "grad_norm": 0.11191637068986893, "learning_rate": 0.0009962680651968534, "loss": 1.6214, "step": 4462 }, { "epoch": 0.19847939175670268, "grad_norm": 0.113275907933712, "learning_rate": 0.0009962637574479592, "loss": 1.6233, "step": 4464 }, { "epoch": 0.19856831621537505, "grad_norm": 0.10795215517282486, "learning_rate": 0.0009962594472236192, "loss": 1.6249, "step": 4466 }, { "epoch": 0.1986572406740474, "grad_norm": 0.11079131066799164, "learning_rate": 0.0009962551345238544, "loss": 1.6265, "step": 4468 }, { "epoch": 0.19874616513271975, "grad_norm": 0.10867098718881607, "learning_rate": 0.000996250819348687, "loss": 1.6255, "step": 4470 }, { "epoch": 0.19883508959139212, "grad_norm": 0.10837139934301376, "learning_rate": 0.000996246501698138, "loss": 1.6283, "step": 4472 }, { "epoch": 0.19892401405006446, "grad_norm": 0.10983740538358688, "learning_rate": 0.0009962421815722289, "loss": 1.6196, "step": 4474 }, { "epoch": 0.19901293850873683, "grad_norm": 0.107894666492939, "learning_rate": 0.0009962378589709814, "loss": 1.6216, "step": 4476 }, { "epoch": 0.1991018629674092, "grad_norm": 0.10687065124511719, "learning_rate": 0.0009962335338944172, "loss": 1.6226, "step": 4478 }, { "epoch": 0.19919078742608154, "grad_norm": 0.10734596103429794, "learning_rate": 0.0009962292063425578, "loss": 1.6241, "step": 4480 }, { "epoch": 0.1992797118847539, "grad_norm": 0.1052248552441597, "learning_rate": 0.0009962248763154247, "loss": 1.6134, "step": 4482 }, { "epoch": 0.19936863634342625, "grad_norm": 0.10848673433065414, "learning_rate": 0.0009962205438130397, "loss": 1.6259, "step": 4484 }, { "epoch": 0.19945756080209862, "grad_norm": 0.10908637940883636, "learning_rate": 0.000996216208835424, "loss": 1.6229, "step": 4486 }, { "epoch": 0.19954648526077098, "grad_norm": 0.10700564086437225, "learning_rate": 0.0009962118713825996, "loss": 1.6207, "step": 4488 }, { "epoch": 0.19963540971944332, "grad_norm": 0.11299486458301544, "learning_rate": 0.0009962075314545883, "loss": 1.6296, "step": 4490 }, { "epoch": 0.1997243341781157, "grad_norm": 0.11249647289514542, "learning_rate": 0.000996203189051411, "loss": 1.6251, "step": 4492 }, { "epoch": 0.19981325863678806, "grad_norm": 0.10399112105369568, "learning_rate": 0.00099619884417309, "loss": 1.6197, "step": 4494 }, { "epoch": 0.1999021830954604, "grad_norm": 0.10901904106140137, "learning_rate": 0.000996194496819647, "loss": 1.6202, "step": 4496 }, { "epoch": 0.19999110755413277, "grad_norm": 0.11132825911045074, "learning_rate": 0.0009961901469911032, "loss": 1.6222, "step": 4498 }, { "epoch": 0.2000800320128051, "grad_norm": 0.10988521575927734, "learning_rate": 0.0009961857946874807, "loss": 1.6208, "step": 4500 }, { "epoch": 0.2000800320128051, "eval_loss": 1.5955942869186401, "eval_runtime": 12.3611, "eval_samples_per_second": 559.011, "eval_steps_per_second": 69.897, "step": 4500 }, { "epoch": 0.20016895647147748, "grad_norm": 0.11773038655519485, "learning_rate": 0.000996181439908801, "loss": 1.6182, "step": 4502 }, { "epoch": 0.20025788093014985, "grad_norm": 0.11245664954185486, "learning_rate": 0.0009961770826550861, "loss": 1.6263, "step": 4504 }, { "epoch": 0.20034680538882219, "grad_norm": 0.11493460088968277, "learning_rate": 0.0009961727229263575, "loss": 1.6264, "step": 4506 }, { "epoch": 0.20043572984749455, "grad_norm": 0.10975135862827301, "learning_rate": 0.0009961683607226368, "loss": 1.6218, "step": 4508 }, { "epoch": 0.20052465430616692, "grad_norm": 0.1100163534283638, "learning_rate": 0.0009961639960439463, "loss": 1.6255, "step": 4510 }, { "epoch": 0.20061357876483926, "grad_norm": 0.10604830086231232, "learning_rate": 0.0009961596288903072, "loss": 1.6221, "step": 4512 }, { "epoch": 0.20070250322351163, "grad_norm": 0.10983182489871979, "learning_rate": 0.0009961552592617415, "loss": 1.6193, "step": 4514 }, { "epoch": 0.200791427682184, "grad_norm": 0.1092047244310379, "learning_rate": 0.000996150887158271, "loss": 1.6215, "step": 4516 }, { "epoch": 0.20088035214085634, "grad_norm": 0.1078716516494751, "learning_rate": 0.0009961465125799176, "loss": 1.6246, "step": 4518 }, { "epoch": 0.2009692765995287, "grad_norm": 0.11041826754808426, "learning_rate": 0.0009961421355267031, "loss": 1.6225, "step": 4520 }, { "epoch": 0.20105820105820105, "grad_norm": 0.11641491949558258, "learning_rate": 0.0009961377559986491, "loss": 1.6173, "step": 4522 }, { "epoch": 0.20114712551687342, "grad_norm": 0.11122878640890121, "learning_rate": 0.0009961333739957779, "loss": 1.6179, "step": 4524 }, { "epoch": 0.20123604997554578, "grad_norm": 0.11828659474849701, "learning_rate": 0.0009961289895181107, "loss": 1.6227, "step": 4526 }, { "epoch": 0.20132497443421812, "grad_norm": 0.10966084152460098, "learning_rate": 0.00099612460256567, "loss": 1.6308, "step": 4528 }, { "epoch": 0.2014138988928905, "grad_norm": 0.11296512931585312, "learning_rate": 0.0009961202131384774, "loss": 1.6207, "step": 4530 }, { "epoch": 0.20150282335156286, "grad_norm": 0.10687365382909775, "learning_rate": 0.0009961158212365548, "loss": 1.6192, "step": 4532 }, { "epoch": 0.2015917478102352, "grad_norm": 0.10820367932319641, "learning_rate": 0.000996111426859924, "loss": 1.6254, "step": 4534 }, { "epoch": 0.20168067226890757, "grad_norm": 0.1103745698928833, "learning_rate": 0.0009961070300086073, "loss": 1.6171, "step": 4536 }, { "epoch": 0.2017695967275799, "grad_norm": 0.11085197329521179, "learning_rate": 0.0009961026306826261, "loss": 1.6233, "step": 4538 }, { "epoch": 0.20185852118625228, "grad_norm": 0.10982104390859604, "learning_rate": 0.000996098228882003, "loss": 1.6198, "step": 4540 }, { "epoch": 0.20194744564492464, "grad_norm": 0.11612139642238617, "learning_rate": 0.0009960938246067592, "loss": 1.6163, "step": 4542 }, { "epoch": 0.20203637010359699, "grad_norm": 0.11329114437103271, "learning_rate": 0.0009960894178569174, "loss": 1.6203, "step": 4544 }, { "epoch": 0.20212529456226935, "grad_norm": 0.11832458525896072, "learning_rate": 0.0009960850086324989, "loss": 1.6259, "step": 4546 }, { "epoch": 0.20221421902094172, "grad_norm": 0.1062120795249939, "learning_rate": 0.0009960805969335264, "loss": 1.6238, "step": 4548 }, { "epoch": 0.20230314347961406, "grad_norm": 0.10603608936071396, "learning_rate": 0.0009960761827600211, "loss": 1.6186, "step": 4550 }, { "epoch": 0.20239206793828643, "grad_norm": 0.10631296783685684, "learning_rate": 0.0009960717661120057, "loss": 1.6155, "step": 4552 }, { "epoch": 0.20248099239695877, "grad_norm": 0.11160627007484436, "learning_rate": 0.000996067346989502, "loss": 1.6248, "step": 4554 }, { "epoch": 0.20256991685563114, "grad_norm": 0.12129071354866028, "learning_rate": 0.0009960629253925322, "loss": 1.6211, "step": 4556 }, { "epoch": 0.2026588413143035, "grad_norm": 0.11960272490978241, "learning_rate": 0.0009960585013211178, "loss": 1.6191, "step": 4558 }, { "epoch": 0.20274776577297585, "grad_norm": 0.11304063349962234, "learning_rate": 0.0009960540747752816, "loss": 1.6207, "step": 4560 }, { "epoch": 0.20283669023164821, "grad_norm": 0.11380619555711746, "learning_rate": 0.000996049645755045, "loss": 1.615, "step": 4562 }, { "epoch": 0.20292561469032058, "grad_norm": 0.11661424487829208, "learning_rate": 0.0009960452142604307, "loss": 1.6163, "step": 4564 }, { "epoch": 0.20301453914899292, "grad_norm": 0.1175897866487503, "learning_rate": 0.0009960407802914605, "loss": 1.6242, "step": 4566 }, { "epoch": 0.2031034636076653, "grad_norm": 0.11345373094081879, "learning_rate": 0.0009960363438481564, "loss": 1.6157, "step": 4568 }, { "epoch": 0.20319238806633766, "grad_norm": 0.10861898958683014, "learning_rate": 0.000996031904930541, "loss": 1.6193, "step": 4570 }, { "epoch": 0.20328131252501, "grad_norm": 0.10877344012260437, "learning_rate": 0.0009960274635386357, "loss": 1.6176, "step": 4572 }, { "epoch": 0.20337023698368237, "grad_norm": 0.10538928955793381, "learning_rate": 0.0009960230196724633, "loss": 1.6164, "step": 4574 }, { "epoch": 0.2034591614423547, "grad_norm": 0.10377932339906693, "learning_rate": 0.0009960185733320457, "loss": 1.6172, "step": 4576 }, { "epoch": 0.20354808590102708, "grad_norm": 0.11017095297574997, "learning_rate": 0.000996014124517405, "loss": 1.61, "step": 4578 }, { "epoch": 0.20363701035969944, "grad_norm": 0.10805568099021912, "learning_rate": 0.0009960096732285638, "loss": 1.6151, "step": 4580 }, { "epoch": 0.20372593481837178, "grad_norm": 0.11047879606485367, "learning_rate": 0.0009960052194655437, "loss": 1.6191, "step": 4582 }, { "epoch": 0.20381485927704415, "grad_norm": 0.11243947595357895, "learning_rate": 0.0009960007632283674, "loss": 1.6166, "step": 4584 }, { "epoch": 0.20390378373571652, "grad_norm": 0.11238662898540497, "learning_rate": 0.000995996304517057, "loss": 1.6176, "step": 4586 }, { "epoch": 0.20399270819438886, "grad_norm": 0.11109507828950882, "learning_rate": 0.0009959918433316347, "loss": 1.6119, "step": 4588 }, { "epoch": 0.20408163265306123, "grad_norm": 0.11443541198968887, "learning_rate": 0.0009959873796721226, "loss": 1.6252, "step": 4590 }, { "epoch": 0.20417055711173357, "grad_norm": 0.11701706796884537, "learning_rate": 0.0009959829135385433, "loss": 1.6235, "step": 4592 }, { "epoch": 0.20425948157040594, "grad_norm": 0.11349605768918991, "learning_rate": 0.0009959784449309187, "loss": 1.617, "step": 4594 }, { "epoch": 0.2043484060290783, "grad_norm": 0.11320377141237259, "learning_rate": 0.0009959739738492717, "loss": 1.6217, "step": 4596 }, { "epoch": 0.20443733048775065, "grad_norm": 0.10560119152069092, "learning_rate": 0.0009959695002936238, "loss": 1.6176, "step": 4598 }, { "epoch": 0.20452625494642301, "grad_norm": 0.11200706660747528, "learning_rate": 0.0009959650242639976, "loss": 1.6199, "step": 4600 }, { "epoch": 0.20461517940509538, "grad_norm": 0.11499829590320587, "learning_rate": 0.000995960545760416, "loss": 1.6197, "step": 4602 }, { "epoch": 0.20470410386376772, "grad_norm": 0.10916129499673843, "learning_rate": 0.0009959560647829005, "loss": 1.6166, "step": 4604 }, { "epoch": 0.2047930283224401, "grad_norm": 0.11446643620729446, "learning_rate": 0.000995951581331474, "loss": 1.6173, "step": 4606 }, { "epoch": 0.20488195278111246, "grad_norm": 0.11069472879171371, "learning_rate": 0.0009959470954061585, "loss": 1.6155, "step": 4608 }, { "epoch": 0.2049708772397848, "grad_norm": 0.11012015491724014, "learning_rate": 0.000995942607006977, "loss": 1.6196, "step": 4610 }, { "epoch": 0.20505980169845717, "grad_norm": 0.10821570456027985, "learning_rate": 0.000995938116133951, "loss": 1.6182, "step": 4612 }, { "epoch": 0.2051487261571295, "grad_norm": 0.11206888407468796, "learning_rate": 0.0009959336227871034, "loss": 1.618, "step": 4614 }, { "epoch": 0.20523765061580188, "grad_norm": 0.11033768951892853, "learning_rate": 0.0009959291269664567, "loss": 1.6268, "step": 4616 }, { "epoch": 0.20532657507447424, "grad_norm": 0.10916636139154434, "learning_rate": 0.000995924628672033, "loss": 1.6132, "step": 4618 }, { "epoch": 0.20541549953314658, "grad_norm": 0.10948532074689865, "learning_rate": 0.0009959201279038552, "loss": 1.6132, "step": 4620 }, { "epoch": 0.20550442399181895, "grad_norm": 0.1057814285159111, "learning_rate": 0.0009959156246619454, "loss": 1.6169, "step": 4622 }, { "epoch": 0.20559334845049132, "grad_norm": 0.10795042663812637, "learning_rate": 0.000995911118946326, "loss": 1.6187, "step": 4624 }, { "epoch": 0.20568227290916366, "grad_norm": 0.10685425996780396, "learning_rate": 0.00099590661075702, "loss": 1.6212, "step": 4626 }, { "epoch": 0.20577119736783603, "grad_norm": 0.11638550460338593, "learning_rate": 0.000995902100094049, "loss": 1.6247, "step": 4628 }, { "epoch": 0.20586012182650837, "grad_norm": 0.1139526218175888, "learning_rate": 0.0009958975869574366, "loss": 1.6143, "step": 4630 }, { "epoch": 0.20594904628518074, "grad_norm": 0.1081729456782341, "learning_rate": 0.0009958930713472043, "loss": 1.617, "step": 4632 }, { "epoch": 0.2060379707438531, "grad_norm": 0.11640775948762894, "learning_rate": 0.0009958885532633751, "loss": 1.6124, "step": 4634 }, { "epoch": 0.20612689520252545, "grad_norm": 0.11649386584758759, "learning_rate": 0.0009958840327059714, "loss": 1.6203, "step": 4636 }, { "epoch": 0.2062158196611978, "grad_norm": 0.10759291797876358, "learning_rate": 0.000995879509675016, "loss": 1.617, "step": 4638 }, { "epoch": 0.20630474411987018, "grad_norm": 0.10583692044019699, "learning_rate": 0.0009958749841705312, "loss": 1.6198, "step": 4640 }, { "epoch": 0.20639366857854252, "grad_norm": 0.10626080632209778, "learning_rate": 0.0009958704561925397, "loss": 1.6194, "step": 4642 }, { "epoch": 0.2064825930372149, "grad_norm": 0.10711178928613663, "learning_rate": 0.0009958659257410642, "loss": 1.6159, "step": 4644 }, { "epoch": 0.20657151749588723, "grad_norm": 0.11228474974632263, "learning_rate": 0.000995861392816127, "loss": 1.622, "step": 4646 }, { "epoch": 0.2066604419545596, "grad_norm": 0.10673855245113373, "learning_rate": 0.000995856857417751, "loss": 1.6169, "step": 4648 }, { "epoch": 0.20674936641323197, "grad_norm": 0.10420949757099152, "learning_rate": 0.0009958523195459585, "loss": 1.6168, "step": 4650 }, { "epoch": 0.2068382908719043, "grad_norm": 0.10757365077733994, "learning_rate": 0.0009958477792007723, "loss": 1.6188, "step": 4652 }, { "epoch": 0.20692721533057667, "grad_norm": 0.10664011538028717, "learning_rate": 0.0009958432363822152, "loss": 1.6173, "step": 4654 }, { "epoch": 0.20701613978924904, "grad_norm": 0.11007332801818848, "learning_rate": 0.0009958386910903097, "loss": 1.6143, "step": 4656 }, { "epoch": 0.20710506424792138, "grad_norm": 0.11352553218603134, "learning_rate": 0.0009958341433250785, "loss": 1.6141, "step": 4658 }, { "epoch": 0.20719398870659375, "grad_norm": 0.10892893373966217, "learning_rate": 0.0009958295930865442, "loss": 1.6205, "step": 4660 }, { "epoch": 0.20728291316526612, "grad_norm": 0.10752708464860916, "learning_rate": 0.0009958250403747297, "loss": 1.6129, "step": 4662 }, { "epoch": 0.20737183762393846, "grad_norm": 0.10832353681325912, "learning_rate": 0.0009958204851896577, "loss": 1.6208, "step": 4664 }, { "epoch": 0.20746076208261083, "grad_norm": 0.11221420764923096, "learning_rate": 0.0009958159275313505, "loss": 1.6194, "step": 4666 }, { "epoch": 0.20754968654128317, "grad_norm": 0.10880377888679504, "learning_rate": 0.0009958113673998316, "loss": 1.6143, "step": 4668 }, { "epoch": 0.20763861099995554, "grad_norm": 0.10734687000513077, "learning_rate": 0.0009958068047951228, "loss": 1.6223, "step": 4670 }, { "epoch": 0.2077275354586279, "grad_norm": 0.11376973241567612, "learning_rate": 0.0009958022397172476, "loss": 1.6145, "step": 4672 }, { "epoch": 0.20781645991730024, "grad_norm": 0.11082089692354202, "learning_rate": 0.0009957976721662286, "loss": 1.6207, "step": 4674 }, { "epoch": 0.2079053843759726, "grad_norm": 0.1084550991654396, "learning_rate": 0.0009957931021420884, "loss": 1.6231, "step": 4676 }, { "epoch": 0.20799430883464498, "grad_norm": 0.11070792376995087, "learning_rate": 0.00099578852964485, "loss": 1.6124, "step": 4678 }, { "epoch": 0.20808323329331732, "grad_norm": 0.1046636626124382, "learning_rate": 0.000995783954674536, "loss": 1.619, "step": 4680 }, { "epoch": 0.2081721577519897, "grad_norm": 0.11071905493736267, "learning_rate": 0.0009957793772311695, "loss": 1.6193, "step": 4682 }, { "epoch": 0.20826108221066203, "grad_norm": 0.1142248883843422, "learning_rate": 0.000995774797314773, "loss": 1.6173, "step": 4684 }, { "epoch": 0.2083500066693344, "grad_norm": 0.10734932124614716, "learning_rate": 0.0009957702149253696, "loss": 1.618, "step": 4686 }, { "epoch": 0.20843893112800677, "grad_norm": 0.1084061935544014, "learning_rate": 0.000995765630062982, "loss": 1.6178, "step": 4688 }, { "epoch": 0.2085278555866791, "grad_norm": 0.11053569614887238, "learning_rate": 0.0009957610427276335, "loss": 1.6205, "step": 4690 }, { "epoch": 0.20861678004535147, "grad_norm": 0.10548976063728333, "learning_rate": 0.0009957564529193461, "loss": 1.6156, "step": 4692 }, { "epoch": 0.20870570450402384, "grad_norm": 0.10936005413532257, "learning_rate": 0.0009957518606381434, "loss": 1.6242, "step": 4694 }, { "epoch": 0.20879462896269618, "grad_norm": 0.11412529647350311, "learning_rate": 0.0009957472658840482, "loss": 1.6182, "step": 4696 }, { "epoch": 0.20888355342136855, "grad_norm": 0.1128493994474411, "learning_rate": 0.0009957426686570835, "loss": 1.6187, "step": 4698 }, { "epoch": 0.20897247788004092, "grad_norm": 0.11843925714492798, "learning_rate": 0.0009957380689572719, "loss": 1.619, "step": 4700 }, { "epoch": 0.20906140233871326, "grad_norm": 0.12056998908519745, "learning_rate": 0.0009957334667846365, "loss": 1.6178, "step": 4702 }, { "epoch": 0.20915032679738563, "grad_norm": 0.1123088002204895, "learning_rate": 0.0009957288621392003, "loss": 1.6228, "step": 4704 }, { "epoch": 0.20923925125605797, "grad_norm": 0.10781093686819077, "learning_rate": 0.0009957242550209862, "loss": 1.6149, "step": 4706 }, { "epoch": 0.20932817571473034, "grad_norm": 0.10738489776849747, "learning_rate": 0.0009957196454300173, "loss": 1.6187, "step": 4708 }, { "epoch": 0.2094171001734027, "grad_norm": 0.11667206138372421, "learning_rate": 0.0009957150333663165, "loss": 1.6207, "step": 4710 }, { "epoch": 0.20950602463207504, "grad_norm": 0.11121194809675217, "learning_rate": 0.0009957104188299069, "loss": 1.6148, "step": 4712 }, { "epoch": 0.2095949490907474, "grad_norm": 0.1158638447523117, "learning_rate": 0.0009957058018208113, "loss": 1.613, "step": 4714 }, { "epoch": 0.20968387354941978, "grad_norm": 0.11311011016368866, "learning_rate": 0.000995701182339053, "loss": 1.6145, "step": 4716 }, { "epoch": 0.20977279800809212, "grad_norm": 0.11132704466581345, "learning_rate": 0.000995696560384655, "loss": 1.6165, "step": 4718 }, { "epoch": 0.2098617224667645, "grad_norm": 0.11866897344589233, "learning_rate": 0.00099569193595764, "loss": 1.6116, "step": 4720 }, { "epoch": 0.20995064692543683, "grad_norm": 0.1159665435552597, "learning_rate": 0.0009956873090580316, "loss": 1.617, "step": 4722 }, { "epoch": 0.2100395713841092, "grad_norm": 0.10780125856399536, "learning_rate": 0.0009956826796858524, "loss": 1.6134, "step": 4724 }, { "epoch": 0.21012849584278157, "grad_norm": 0.10876625776290894, "learning_rate": 0.000995678047841126, "loss": 1.6187, "step": 4726 }, { "epoch": 0.2102174203014539, "grad_norm": 0.1066119521856308, "learning_rate": 0.0009956734135238752, "loss": 1.6158, "step": 4728 }, { "epoch": 0.21030634476012627, "grad_norm": 0.10660472512245178, "learning_rate": 0.0009956687767341229, "loss": 1.61, "step": 4730 }, { "epoch": 0.21039526921879864, "grad_norm": 0.10808862745761871, "learning_rate": 0.0009956641374718924, "loss": 1.6131, "step": 4732 }, { "epoch": 0.21048419367747098, "grad_norm": 0.10631510615348816, "learning_rate": 0.0009956594957372072, "loss": 1.6142, "step": 4734 }, { "epoch": 0.21057311813614335, "grad_norm": 0.10543810576200485, "learning_rate": 0.00099565485153009, "loss": 1.6175, "step": 4736 }, { "epoch": 0.2106620425948157, "grad_norm": 0.11249963939189911, "learning_rate": 0.0009956502048505643, "loss": 1.6134, "step": 4738 }, { "epoch": 0.21075096705348806, "grad_norm": 0.10649897158145905, "learning_rate": 0.0009956455556986528, "loss": 1.6176, "step": 4740 }, { "epoch": 0.21083989151216043, "grad_norm": 0.11082588136196136, "learning_rate": 0.0009956409040743793, "loss": 1.6141, "step": 4742 }, { "epoch": 0.21092881597083277, "grad_norm": 0.11297667771577835, "learning_rate": 0.0009956362499777664, "loss": 1.6122, "step": 4744 }, { "epoch": 0.21101774042950514, "grad_norm": 0.10962715744972229, "learning_rate": 0.0009956315934088378, "loss": 1.6147, "step": 4746 }, { "epoch": 0.2111066648881775, "grad_norm": 0.10940365493297577, "learning_rate": 0.0009956269343676166, "loss": 1.617, "step": 4748 }, { "epoch": 0.21119558934684984, "grad_norm": 0.10843819379806519, "learning_rate": 0.0009956222728541258, "loss": 1.6163, "step": 4750 }, { "epoch": 0.2112845138055222, "grad_norm": 0.10900399088859558, "learning_rate": 0.000995617608868389, "loss": 1.6211, "step": 4752 }, { "epoch": 0.21137343826419458, "grad_norm": 0.11029382050037384, "learning_rate": 0.0009956129424104293, "loss": 1.6172, "step": 4754 }, { "epoch": 0.21146236272286692, "grad_norm": 0.11329919844865799, "learning_rate": 0.00099560827348027, "loss": 1.6157, "step": 4756 }, { "epoch": 0.2115512871815393, "grad_norm": 0.11223863065242767, "learning_rate": 0.000995603602077934, "loss": 1.6195, "step": 4758 }, { "epoch": 0.21164021164021163, "grad_norm": 0.11206690222024918, "learning_rate": 0.0009955989282034452, "loss": 1.6098, "step": 4760 }, { "epoch": 0.211729136098884, "grad_norm": 0.10785891860723495, "learning_rate": 0.0009955942518568269, "loss": 1.6205, "step": 4762 }, { "epoch": 0.21181806055755636, "grad_norm": 0.110198475420475, "learning_rate": 0.0009955895730381018, "loss": 1.6116, "step": 4764 }, { "epoch": 0.2119069850162287, "grad_norm": 0.10387492179870605, "learning_rate": 0.000995584891747294, "loss": 1.6151, "step": 4766 }, { "epoch": 0.21199590947490107, "grad_norm": 0.10851506143808365, "learning_rate": 0.0009955802079844264, "loss": 1.6107, "step": 4768 }, { "epoch": 0.21208483393357344, "grad_norm": 0.11151215434074402, "learning_rate": 0.0009955755217495223, "loss": 1.6143, "step": 4770 }, { "epoch": 0.21217375839224578, "grad_norm": 0.10874391347169876, "learning_rate": 0.0009955708330426055, "loss": 1.6167, "step": 4772 }, { "epoch": 0.21226268285091815, "grad_norm": 0.10614147037267685, "learning_rate": 0.0009955661418636988, "loss": 1.6114, "step": 4774 }, { "epoch": 0.2123516073095905, "grad_norm": 0.110053151845932, "learning_rate": 0.000995561448212826, "loss": 1.6092, "step": 4776 }, { "epoch": 0.21244053176826286, "grad_norm": 0.11465108394622803, "learning_rate": 0.0009955567520900108, "loss": 1.6082, "step": 4778 }, { "epoch": 0.21252945622693523, "grad_norm": 0.11055173724889755, "learning_rate": 0.000995552053495276, "loss": 1.6118, "step": 4780 }, { "epoch": 0.21261838068560757, "grad_norm": 0.10470802336931229, "learning_rate": 0.0009955473524286454, "loss": 1.617, "step": 4782 }, { "epoch": 0.21270730514427993, "grad_norm": 0.10800164937973022, "learning_rate": 0.0009955426488901422, "loss": 1.6144, "step": 4784 }, { "epoch": 0.2127962296029523, "grad_norm": 0.10720106959342957, "learning_rate": 0.00099553794287979, "loss": 1.6125, "step": 4786 }, { "epoch": 0.21288515406162464, "grad_norm": 0.1036008968949318, "learning_rate": 0.0009955332343976123, "loss": 1.6188, "step": 4788 }, { "epoch": 0.212974078520297, "grad_norm": 0.1018817201256752, "learning_rate": 0.0009955285234436327, "loss": 1.6117, "step": 4790 }, { "epoch": 0.21306300297896938, "grad_norm": 0.10686477273702621, "learning_rate": 0.0009955238100178745, "loss": 1.6121, "step": 4792 }, { "epoch": 0.21315192743764172, "grad_norm": 0.10223913937807083, "learning_rate": 0.0009955190941203613, "loss": 1.6085, "step": 4794 }, { "epoch": 0.2132408518963141, "grad_norm": 0.10361933708190918, "learning_rate": 0.0009955143757511165, "loss": 1.6149, "step": 4796 }, { "epoch": 0.21332977635498643, "grad_norm": 0.10988039523363113, "learning_rate": 0.000995509654910164, "loss": 1.6095, "step": 4798 }, { "epoch": 0.2134187008136588, "grad_norm": 0.10833392292261124, "learning_rate": 0.0009955049315975267, "loss": 1.6164, "step": 4800 }, { "epoch": 0.21350762527233116, "grad_norm": 0.10468830913305283, "learning_rate": 0.0009955002058132287, "loss": 1.607, "step": 4802 }, { "epoch": 0.2135965497310035, "grad_norm": 0.1071217730641365, "learning_rate": 0.0009954954775572937, "loss": 1.6122, "step": 4804 }, { "epoch": 0.21368547418967587, "grad_norm": 0.10310684144496918, "learning_rate": 0.0009954907468297447, "loss": 1.6144, "step": 4806 }, { "epoch": 0.21377439864834824, "grad_norm": 0.10384196788072586, "learning_rate": 0.0009954860136306058, "loss": 1.6118, "step": 4808 }, { "epoch": 0.21386332310702058, "grad_norm": 0.10493996739387512, "learning_rate": 0.0009954812779599002, "loss": 1.6141, "step": 4810 }, { "epoch": 0.21395224756569295, "grad_norm": 0.10700862854719162, "learning_rate": 0.0009954765398176519, "loss": 1.6131, "step": 4812 }, { "epoch": 0.2140411720243653, "grad_norm": 0.10613391548395157, "learning_rate": 0.0009954717992038844, "loss": 1.6165, "step": 4814 }, { "epoch": 0.21413009648303766, "grad_norm": 0.10231418162584305, "learning_rate": 0.0009954670561186211, "loss": 1.6095, "step": 4816 }, { "epoch": 0.21421902094171003, "grad_norm": 0.10666482895612717, "learning_rate": 0.0009954623105618862, "loss": 1.6107, "step": 4818 }, { "epoch": 0.21430794540038237, "grad_norm": 0.10890256613492966, "learning_rate": 0.000995457562533703, "loss": 1.6116, "step": 4820 }, { "epoch": 0.21439686985905473, "grad_norm": 0.10631954669952393, "learning_rate": 0.000995452812034095, "loss": 1.6096, "step": 4822 }, { "epoch": 0.2144857943177271, "grad_norm": 0.10400256514549255, "learning_rate": 0.0009954480590630864, "loss": 1.612, "step": 4824 }, { "epoch": 0.21457471877639944, "grad_norm": 0.1029767394065857, "learning_rate": 0.0009954433036207006, "loss": 1.6084, "step": 4826 }, { "epoch": 0.2146636432350718, "grad_norm": 0.10465129464864731, "learning_rate": 0.000995438545706961, "loss": 1.6101, "step": 4828 }, { "epoch": 0.21475256769374415, "grad_norm": 0.10955265164375305, "learning_rate": 0.000995433785321892, "loss": 1.6092, "step": 4830 }, { "epoch": 0.21484149215241652, "grad_norm": 0.10269038379192352, "learning_rate": 0.000995429022465517, "loss": 1.6195, "step": 4832 }, { "epoch": 0.2149304166110889, "grad_norm": 0.1033727377653122, "learning_rate": 0.0009954242571378598, "loss": 1.6076, "step": 4834 }, { "epoch": 0.21501934106976123, "grad_norm": 0.10561516880989075, "learning_rate": 0.0009954194893389441, "loss": 1.6069, "step": 4836 }, { "epoch": 0.2151082655284336, "grad_norm": 0.10597854852676392, "learning_rate": 0.000995414719068794, "loss": 1.6085, "step": 4838 }, { "epoch": 0.21519718998710596, "grad_norm": 0.10583419352769852, "learning_rate": 0.000995409946327433, "loss": 1.6096, "step": 4840 }, { "epoch": 0.2152861144457783, "grad_norm": 0.11089032143354416, "learning_rate": 0.0009954051711148846, "loss": 1.6115, "step": 4842 }, { "epoch": 0.21537503890445067, "grad_norm": 0.11158238351345062, "learning_rate": 0.0009954003934311733, "loss": 1.6138, "step": 4844 }, { "epoch": 0.21546396336312304, "grad_norm": 0.10959797352552414, "learning_rate": 0.0009953956132763224, "loss": 1.6092, "step": 4846 }, { "epoch": 0.21555288782179538, "grad_norm": 0.11067400872707367, "learning_rate": 0.000995390830650356, "loss": 1.6079, "step": 4848 }, { "epoch": 0.21564181228046775, "grad_norm": 0.10605499148368835, "learning_rate": 0.0009953860455532981, "loss": 1.6174, "step": 4850 }, { "epoch": 0.2157307367391401, "grad_norm": 0.10270148515701294, "learning_rate": 0.0009953812579851721, "loss": 1.6129, "step": 4852 }, { "epoch": 0.21581966119781246, "grad_norm": 0.10574102401733398, "learning_rate": 0.0009953764679460025, "loss": 1.6113, "step": 4854 }, { "epoch": 0.21590858565648482, "grad_norm": 0.10553975403308868, "learning_rate": 0.0009953716754358126, "loss": 1.6095, "step": 4856 }, { "epoch": 0.21599751011515717, "grad_norm": 0.10573318600654602, "learning_rate": 0.0009953668804546264, "loss": 1.6124, "step": 4858 }, { "epoch": 0.21608643457382953, "grad_norm": 0.10750672966241837, "learning_rate": 0.0009953620830024682, "loss": 1.6107, "step": 4860 }, { "epoch": 0.2161753590325019, "grad_norm": 0.10438815504312515, "learning_rate": 0.0009953572830793617, "loss": 1.6129, "step": 4862 }, { "epoch": 0.21626428349117424, "grad_norm": 0.10748635977506638, "learning_rate": 0.0009953524806853309, "loss": 1.6101, "step": 4864 }, { "epoch": 0.2163532079498466, "grad_norm": 0.1096944510936737, "learning_rate": 0.0009953476758203996, "loss": 1.6175, "step": 4866 }, { "epoch": 0.21644213240851895, "grad_norm": 0.10906648635864258, "learning_rate": 0.000995342868484592, "loss": 1.6118, "step": 4868 }, { "epoch": 0.21653105686719132, "grad_norm": 0.10521861910820007, "learning_rate": 0.0009953380586779318, "loss": 1.6142, "step": 4870 }, { "epoch": 0.2166199813258637, "grad_norm": 0.10765370726585388, "learning_rate": 0.0009953332464004432, "loss": 1.6113, "step": 4872 }, { "epoch": 0.21670890578453603, "grad_norm": 0.10835171490907669, "learning_rate": 0.00099532843165215, "loss": 1.6175, "step": 4874 }, { "epoch": 0.2167978302432084, "grad_norm": 0.10787054151296616, "learning_rate": 0.0009953236144330767, "loss": 1.6158, "step": 4876 }, { "epoch": 0.21688675470188076, "grad_norm": 0.1088169738650322, "learning_rate": 0.0009953187947432465, "loss": 1.606, "step": 4878 }, { "epoch": 0.2169756791605531, "grad_norm": 0.10647636651992798, "learning_rate": 0.0009953139725826844, "loss": 1.6161, "step": 4880 }, { "epoch": 0.21706460361922547, "grad_norm": 0.10560780018568039, "learning_rate": 0.0009953091479514137, "loss": 1.6118, "step": 4882 }, { "epoch": 0.21715352807789784, "grad_norm": 0.10568542778491974, "learning_rate": 0.0009953043208494588, "loss": 1.6149, "step": 4884 }, { "epoch": 0.21724245253657018, "grad_norm": 0.10434123873710632, "learning_rate": 0.000995299491276844, "loss": 1.615, "step": 4886 }, { "epoch": 0.21733137699524255, "grad_norm": 0.10796734690666199, "learning_rate": 0.0009952946592335927, "loss": 1.6103, "step": 4888 }, { "epoch": 0.2174203014539149, "grad_norm": 0.11157654225826263, "learning_rate": 0.0009952898247197297, "loss": 1.6161, "step": 4890 }, { "epoch": 0.21750922591258726, "grad_norm": 0.10369045287370682, "learning_rate": 0.0009952849877352787, "loss": 1.6128, "step": 4892 }, { "epoch": 0.21759815037125962, "grad_norm": 0.10328072309494019, "learning_rate": 0.0009952801482802642, "loss": 1.6128, "step": 4894 }, { "epoch": 0.21768707482993196, "grad_norm": 0.10743004828691483, "learning_rate": 0.00099527530635471, "loss": 1.608, "step": 4896 }, { "epoch": 0.21777599928860433, "grad_norm": 0.11129618436098099, "learning_rate": 0.00099527046195864, "loss": 1.6098, "step": 4898 }, { "epoch": 0.2178649237472767, "grad_norm": 0.10475630313158035, "learning_rate": 0.0009952656150920792, "loss": 1.6102, "step": 4900 }, { "epoch": 0.21795384820594904, "grad_norm": 0.10529398918151855, "learning_rate": 0.0009952607657550511, "loss": 1.6052, "step": 4902 }, { "epoch": 0.2180427726646214, "grad_norm": 0.10349459946155548, "learning_rate": 0.0009952559139475802, "loss": 1.6098, "step": 4904 }, { "epoch": 0.21813169712329375, "grad_norm": 0.10338271409273148, "learning_rate": 0.0009952510596696904, "loss": 1.6127, "step": 4906 }, { "epoch": 0.21822062158196612, "grad_norm": 0.11201298981904984, "learning_rate": 0.0009952462029214064, "loss": 1.6127, "step": 4908 }, { "epoch": 0.21830954604063849, "grad_norm": 0.10434871912002563, "learning_rate": 0.0009952413437027518, "loss": 1.6086, "step": 4910 }, { "epoch": 0.21839847049931083, "grad_norm": 0.10629836469888687, "learning_rate": 0.0009952364820137514, "loss": 1.6101, "step": 4912 }, { "epoch": 0.2184873949579832, "grad_norm": 0.09886250644922256, "learning_rate": 0.0009952316178544291, "loss": 1.6038, "step": 4914 }, { "epoch": 0.21857631941665556, "grad_norm": 0.09968285262584686, "learning_rate": 0.0009952267512248095, "loss": 1.6106, "step": 4916 }, { "epoch": 0.2186652438753279, "grad_norm": 0.1019214391708374, "learning_rate": 0.0009952218821249164, "loss": 1.6043, "step": 4918 }, { "epoch": 0.21875416833400027, "grad_norm": 0.10381198674440384, "learning_rate": 0.0009952170105547743, "loss": 1.6032, "step": 4920 }, { "epoch": 0.2188430927926726, "grad_norm": 0.10088684409856796, "learning_rate": 0.0009952121365144077, "loss": 1.6129, "step": 4922 }, { "epoch": 0.21893201725134498, "grad_norm": 0.09936795383691788, "learning_rate": 0.0009952072600038408, "loss": 1.6079, "step": 4924 }, { "epoch": 0.21902094171001735, "grad_norm": 0.10552465170621872, "learning_rate": 0.000995202381023098, "loss": 1.6062, "step": 4926 }, { "epoch": 0.2191098661686897, "grad_norm": 0.09979760646820068, "learning_rate": 0.0009951974995722032, "loss": 1.6061, "step": 4928 }, { "epoch": 0.21919879062736206, "grad_norm": 0.10540541261434555, "learning_rate": 0.0009951926156511815, "loss": 1.6019, "step": 4930 }, { "epoch": 0.21928771508603442, "grad_norm": 0.10734565556049347, "learning_rate": 0.0009951877292600564, "loss": 1.6059, "step": 4932 }, { "epoch": 0.21937663954470676, "grad_norm": 0.10099034011363983, "learning_rate": 0.000995182840398853, "loss": 1.6102, "step": 4934 }, { "epoch": 0.21946556400337913, "grad_norm": 0.10821837186813354, "learning_rate": 0.0009951779490675952, "loss": 1.6089, "step": 4936 }, { "epoch": 0.2195544884620515, "grad_norm": 0.10176271945238113, "learning_rate": 0.0009951730552663078, "loss": 1.61, "step": 4938 }, { "epoch": 0.21964341292072384, "grad_norm": 0.10236615687608719, "learning_rate": 0.0009951681589950147, "loss": 1.6074, "step": 4940 }, { "epoch": 0.2197323373793962, "grad_norm": 0.10596957057714462, "learning_rate": 0.000995163260253741, "loss": 1.6151, "step": 4942 }, { "epoch": 0.21982126183806855, "grad_norm": 0.10543814301490784, "learning_rate": 0.0009951583590425105, "loss": 1.6109, "step": 4944 }, { "epoch": 0.21991018629674092, "grad_norm": 0.10567460209131241, "learning_rate": 0.000995153455361348, "loss": 1.6031, "step": 4946 }, { "epoch": 0.21999911075541329, "grad_norm": 0.1093137338757515, "learning_rate": 0.000995148549210278, "loss": 1.6047, "step": 4948 }, { "epoch": 0.22008803521408563, "grad_norm": 0.11018449813127518, "learning_rate": 0.0009951436405893246, "loss": 1.6101, "step": 4950 }, { "epoch": 0.220176959672758, "grad_norm": 0.10580731928348541, "learning_rate": 0.0009951387294985126, "loss": 1.6132, "step": 4952 }, { "epoch": 0.22026588413143036, "grad_norm": 0.10643559694290161, "learning_rate": 0.0009951338159378664, "loss": 1.6078, "step": 4954 }, { "epoch": 0.2203548085901027, "grad_norm": 0.10614238679409027, "learning_rate": 0.0009951288999074106, "loss": 1.6054, "step": 4956 }, { "epoch": 0.22044373304877507, "grad_norm": 0.10441026836633682, "learning_rate": 0.0009951239814071698, "loss": 1.6083, "step": 4958 }, { "epoch": 0.2205326575074474, "grad_norm": 0.10442102700471878, "learning_rate": 0.0009951190604371683, "loss": 1.6069, "step": 4960 }, { "epoch": 0.22062158196611978, "grad_norm": 0.1024702936410904, "learning_rate": 0.0009951141369974306, "loss": 1.6092, "step": 4962 }, { "epoch": 0.22071050642479215, "grad_norm": 0.09814217686653137, "learning_rate": 0.0009951092110879816, "loss": 1.6095, "step": 4964 }, { "epoch": 0.2207994308834645, "grad_norm": 0.09846185147762299, "learning_rate": 0.0009951042827088454, "loss": 1.6036, "step": 4966 }, { "epoch": 0.22088835534213686, "grad_norm": 0.10347746312618256, "learning_rate": 0.0009950993518600471, "loss": 1.6062, "step": 4968 }, { "epoch": 0.22097727980080922, "grad_norm": 0.101874940097332, "learning_rate": 0.000995094418541611, "loss": 1.6067, "step": 4970 }, { "epoch": 0.22106620425948156, "grad_norm": 0.1038205623626709, "learning_rate": 0.0009950894827535617, "loss": 1.6083, "step": 4972 }, { "epoch": 0.22115512871815393, "grad_norm": 0.10341985523700714, "learning_rate": 0.000995084544495924, "loss": 1.6091, "step": 4974 }, { "epoch": 0.2212440531768263, "grad_norm": 0.10060903429985046, "learning_rate": 0.0009950796037687224, "loss": 1.6075, "step": 4976 }, { "epoch": 0.22133297763549864, "grad_norm": 0.10430523008108139, "learning_rate": 0.0009950746605719813, "loss": 1.6024, "step": 4978 }, { "epoch": 0.221421902094171, "grad_norm": 0.11263576149940491, "learning_rate": 0.0009950697149057259, "loss": 1.6101, "step": 4980 }, { "epoch": 0.22151082655284335, "grad_norm": 0.10367169231176376, "learning_rate": 0.0009950647667699806, "loss": 1.6015, "step": 4982 }, { "epoch": 0.22159975101151572, "grad_norm": 0.10355625301599503, "learning_rate": 0.0009950598161647698, "loss": 1.6094, "step": 4984 }, { "epoch": 0.22168867547018808, "grad_norm": 0.09836798161268234, "learning_rate": 0.0009950548630901187, "loss": 1.606, "step": 4986 }, { "epoch": 0.22177759992886042, "grad_norm": 0.10221516340970993, "learning_rate": 0.0009950499075460518, "loss": 1.6084, "step": 4988 }, { "epoch": 0.2218665243875328, "grad_norm": 0.10238867253065109, "learning_rate": 0.0009950449495325934, "loss": 1.6016, "step": 4990 }, { "epoch": 0.22195544884620516, "grad_norm": 0.1028597429394722, "learning_rate": 0.000995039989049769, "loss": 1.6079, "step": 4992 }, { "epoch": 0.2220443733048775, "grad_norm": 0.10329331457614899, "learning_rate": 0.0009950350260976028, "loss": 1.6096, "step": 4994 }, { "epoch": 0.22213329776354987, "grad_norm": 0.10824605077505112, "learning_rate": 0.0009950300606761198, "loss": 1.6103, "step": 4996 }, { "epoch": 0.2222222222222222, "grad_norm": 0.10527484118938446, "learning_rate": 0.0009950250927853448, "loss": 1.6112, "step": 4998 }, { "epoch": 0.22231114668089458, "grad_norm": 0.10636089742183685, "learning_rate": 0.0009950201224253022, "loss": 1.6092, "step": 5000 }, { "epoch": 0.22231114668089458, "eval_loss": 1.5817906856536865, "eval_runtime": 12.3795, "eval_samples_per_second": 558.179, "eval_steps_per_second": 69.793, "step": 5000 }, { "epoch": 0.22240007113956695, "grad_norm": 0.09906064718961716, "learning_rate": 0.0009950151495960171, "loss": 1.6069, "step": 5002 }, { "epoch": 0.2224889955982393, "grad_norm": 0.10404371470212936, "learning_rate": 0.0009950101742975143, "loss": 1.6045, "step": 5004 }, { "epoch": 0.22257792005691165, "grad_norm": 0.1025133803486824, "learning_rate": 0.0009950051965298189, "loss": 1.607, "step": 5006 }, { "epoch": 0.22266684451558402, "grad_norm": 0.10632102936506271, "learning_rate": 0.000995000216292955, "loss": 1.603, "step": 5008 }, { "epoch": 0.22275576897425636, "grad_norm": 0.10821643471717834, "learning_rate": 0.000994995233586948, "loss": 1.6062, "step": 5010 }, { "epoch": 0.22284469343292873, "grad_norm": 0.10510935634374619, "learning_rate": 0.0009949902484118226, "loss": 1.6077, "step": 5012 }, { "epoch": 0.22293361789160107, "grad_norm": 0.1051439642906189, "learning_rate": 0.0009949852607676038, "loss": 1.6069, "step": 5014 }, { "epoch": 0.22302254235027344, "grad_norm": 0.10179907083511353, "learning_rate": 0.0009949802706543161, "loss": 1.6097, "step": 5016 }, { "epoch": 0.2231114668089458, "grad_norm": 0.10336954891681671, "learning_rate": 0.0009949752780719849, "loss": 1.6074, "step": 5018 }, { "epoch": 0.22320039126761815, "grad_norm": 0.10205300152301788, "learning_rate": 0.0009949702830206347, "loss": 1.6064, "step": 5020 }, { "epoch": 0.22328931572629052, "grad_norm": 0.10609652101993561, "learning_rate": 0.0009949652855002907, "loss": 1.6048, "step": 5022 }, { "epoch": 0.22337824018496288, "grad_norm": 0.10084786266088486, "learning_rate": 0.0009949602855109775, "loss": 1.6025, "step": 5024 }, { "epoch": 0.22346716464363522, "grad_norm": 0.10056611895561218, "learning_rate": 0.0009949552830527202, "loss": 1.6073, "step": 5026 }, { "epoch": 0.2235560891023076, "grad_norm": 0.09772695600986481, "learning_rate": 0.0009949502781255442, "loss": 1.6077, "step": 5028 }, { "epoch": 0.22364501356097996, "grad_norm": 0.10336261987686157, "learning_rate": 0.0009949452707294737, "loss": 1.6084, "step": 5030 }, { "epoch": 0.2237339380196523, "grad_norm": 0.1037675067782402, "learning_rate": 0.0009949402608645341, "loss": 1.6126, "step": 5032 }, { "epoch": 0.22382286247832467, "grad_norm": 0.10318697243928909, "learning_rate": 0.0009949352485307504, "loss": 1.6088, "step": 5034 }, { "epoch": 0.223911786936997, "grad_norm": 0.10445655882358551, "learning_rate": 0.0009949302337281473, "loss": 1.6139, "step": 5036 }, { "epoch": 0.22400071139566938, "grad_norm": 0.10120239853858948, "learning_rate": 0.0009949252164567503, "loss": 1.6073, "step": 5038 }, { "epoch": 0.22408963585434175, "grad_norm": 0.1085609570145607, "learning_rate": 0.000994920196716584, "loss": 1.6062, "step": 5040 }, { "epoch": 0.22417856031301409, "grad_norm": 0.10521785914897919, "learning_rate": 0.0009949151745076736, "loss": 1.6031, "step": 5042 }, { "epoch": 0.22426748477168645, "grad_norm": 0.10063958913087845, "learning_rate": 0.0009949101498300443, "loss": 1.6032, "step": 5044 }, { "epoch": 0.22435640923035882, "grad_norm": 0.09775926172733307, "learning_rate": 0.000994905122683721, "loss": 1.6077, "step": 5046 }, { "epoch": 0.22444533368903116, "grad_norm": 0.09834594279527664, "learning_rate": 0.0009949000930687289, "loss": 1.5963, "step": 5048 }, { "epoch": 0.22453425814770353, "grad_norm": 0.10165659338235855, "learning_rate": 0.0009948950609850928, "loss": 1.6073, "step": 5050 }, { "epoch": 0.22462318260637587, "grad_norm": 0.09759823232889175, "learning_rate": 0.000994890026432838, "loss": 1.602, "step": 5052 }, { "epoch": 0.22471210706504824, "grad_norm": 0.10388672351837158, "learning_rate": 0.0009948849894119895, "loss": 1.6052, "step": 5054 }, { "epoch": 0.2248010315237206, "grad_norm": 0.10322355479001999, "learning_rate": 0.0009948799499225726, "loss": 1.6042, "step": 5056 }, { "epoch": 0.22488995598239295, "grad_norm": 0.1011364683508873, "learning_rate": 0.0009948749079646125, "loss": 1.6039, "step": 5058 }, { "epoch": 0.22497888044106532, "grad_norm": 0.10648488998413086, "learning_rate": 0.0009948698635381342, "loss": 1.607, "step": 5060 }, { "epoch": 0.22506780489973768, "grad_norm": 0.10115467756986618, "learning_rate": 0.0009948648166431629, "loss": 1.6001, "step": 5062 }, { "epoch": 0.22515672935841002, "grad_norm": 0.1064210832118988, "learning_rate": 0.0009948597672797235, "loss": 1.6048, "step": 5064 }, { "epoch": 0.2252456538170824, "grad_norm": 0.10593225806951523, "learning_rate": 0.0009948547154478417, "loss": 1.6016, "step": 5066 }, { "epoch": 0.22533457827575476, "grad_norm": 0.10426605492830276, "learning_rate": 0.0009948496611475422, "loss": 1.5992, "step": 5068 }, { "epoch": 0.2254235027344271, "grad_norm": 0.10383313149213791, "learning_rate": 0.0009948446043788505, "loss": 1.6085, "step": 5070 }, { "epoch": 0.22551242719309947, "grad_norm": 0.10504084825515747, "learning_rate": 0.000994839545141792, "loss": 1.6052, "step": 5072 }, { "epoch": 0.2256013516517718, "grad_norm": 0.09993825107812881, "learning_rate": 0.0009948344834363916, "loss": 1.6049, "step": 5074 }, { "epoch": 0.22569027611044418, "grad_norm": 0.10596901178359985, "learning_rate": 0.0009948294192626745, "loss": 1.6018, "step": 5076 }, { "epoch": 0.22577920056911654, "grad_norm": 0.10218656808137894, "learning_rate": 0.0009948243526206664, "loss": 1.6093, "step": 5078 }, { "epoch": 0.22586812502778889, "grad_norm": 0.09963101893663406, "learning_rate": 0.0009948192835103921, "loss": 1.608, "step": 5080 }, { "epoch": 0.22595704948646125, "grad_norm": 0.09806007146835327, "learning_rate": 0.0009948142119318771, "loss": 1.6045, "step": 5082 }, { "epoch": 0.22604597394513362, "grad_norm": 0.10028783231973648, "learning_rate": 0.0009948091378851468, "loss": 1.6131, "step": 5084 }, { "epoch": 0.22613489840380596, "grad_norm": 0.10069727897644043, "learning_rate": 0.0009948040613702263, "loss": 1.6067, "step": 5086 }, { "epoch": 0.22622382286247833, "grad_norm": 0.1011575385928154, "learning_rate": 0.000994798982387141, "loss": 1.6084, "step": 5088 }, { "epoch": 0.22631274732115067, "grad_norm": 0.1032286211848259, "learning_rate": 0.0009947939009359163, "loss": 1.6047, "step": 5090 }, { "epoch": 0.22640167177982304, "grad_norm": 0.1022925153374672, "learning_rate": 0.0009947888170165774, "loss": 1.6037, "step": 5092 }, { "epoch": 0.2264905962384954, "grad_norm": 0.11113549023866653, "learning_rate": 0.0009947837306291496, "loss": 1.6072, "step": 5094 }, { "epoch": 0.22657952069716775, "grad_norm": 0.10652270913124084, "learning_rate": 0.0009947786417736587, "loss": 1.6041, "step": 5096 }, { "epoch": 0.22666844515584011, "grad_norm": 0.10547838360071182, "learning_rate": 0.0009947735504501297, "loss": 1.6035, "step": 5098 }, { "epoch": 0.22675736961451248, "grad_norm": 0.10703777521848679, "learning_rate": 0.000994768456658588, "loss": 1.6005, "step": 5100 }, { "epoch": 0.22684629407318482, "grad_norm": 0.10610830783843994, "learning_rate": 0.0009947633603990592, "loss": 1.6063, "step": 5102 }, { "epoch": 0.2269352185318572, "grad_norm": 0.10127437859773636, "learning_rate": 0.0009947582616715686, "loss": 1.605, "step": 5104 }, { "epoch": 0.22702414299052953, "grad_norm": 0.10472498089075089, "learning_rate": 0.0009947531604761416, "loss": 1.6021, "step": 5106 }, { "epoch": 0.2271130674492019, "grad_norm": 0.11169638484716415, "learning_rate": 0.000994748056812804, "loss": 1.5958, "step": 5108 }, { "epoch": 0.22720199190787427, "grad_norm": 0.10129303485155106, "learning_rate": 0.0009947429506815808, "loss": 1.5976, "step": 5110 }, { "epoch": 0.2272909163665466, "grad_norm": 0.10340652614831924, "learning_rate": 0.0009947378420824976, "loss": 1.6075, "step": 5112 }, { "epoch": 0.22737984082521898, "grad_norm": 0.0987604483962059, "learning_rate": 0.00099473273101558, "loss": 1.599, "step": 5114 }, { "epoch": 0.22746876528389134, "grad_norm": 0.10262270271778107, "learning_rate": 0.0009947276174808533, "loss": 1.602, "step": 5116 }, { "epoch": 0.22755768974256368, "grad_norm": 0.10149616748094559, "learning_rate": 0.0009947225014783432, "loss": 1.6048, "step": 5118 }, { "epoch": 0.22764661420123605, "grad_norm": 0.0976349264383316, "learning_rate": 0.0009947173830080751, "loss": 1.6016, "step": 5120 }, { "epoch": 0.22773553865990842, "grad_norm": 0.0950886532664299, "learning_rate": 0.0009947122620700747, "loss": 1.6017, "step": 5122 }, { "epoch": 0.22782446311858076, "grad_norm": 0.10552572458982468, "learning_rate": 0.0009947071386643672, "loss": 1.609, "step": 5124 }, { "epoch": 0.22791338757725313, "grad_norm": 0.10495138168334961, "learning_rate": 0.0009947020127909784, "loss": 1.6071, "step": 5126 }, { "epoch": 0.22800231203592547, "grad_norm": 0.10625366866588593, "learning_rate": 0.000994696884449934, "loss": 1.6039, "step": 5128 }, { "epoch": 0.22809123649459784, "grad_norm": 0.10566636174917221, "learning_rate": 0.0009946917536412594, "loss": 1.6031, "step": 5130 }, { "epoch": 0.2281801609532702, "grad_norm": 0.1036016196012497, "learning_rate": 0.0009946866203649804, "loss": 1.6067, "step": 5132 }, { "epoch": 0.22826908541194255, "grad_norm": 0.0977390855550766, "learning_rate": 0.0009946814846211222, "loss": 1.6106, "step": 5134 }, { "epoch": 0.22835800987061491, "grad_norm": 0.1010410338640213, "learning_rate": 0.0009946763464097105, "loss": 1.5996, "step": 5136 }, { "epoch": 0.22844693432928728, "grad_norm": 0.10956060141324997, "learning_rate": 0.000994671205730771, "loss": 1.602, "step": 5138 }, { "epoch": 0.22853585878795962, "grad_norm": 0.10265465825796127, "learning_rate": 0.0009946660625843298, "loss": 1.6062, "step": 5140 }, { "epoch": 0.228624783246632, "grad_norm": 0.10135070979595184, "learning_rate": 0.000994660916970412, "loss": 1.6026, "step": 5142 }, { "epoch": 0.22871370770530433, "grad_norm": 0.09793238341808319, "learning_rate": 0.0009946557688890435, "loss": 1.6002, "step": 5144 }, { "epoch": 0.2288026321639767, "grad_norm": 0.10020411014556885, "learning_rate": 0.0009946506183402498, "loss": 1.6111, "step": 5146 }, { "epoch": 0.22889155662264907, "grad_norm": 0.0976269543170929, "learning_rate": 0.0009946454653240568, "loss": 1.6027, "step": 5148 }, { "epoch": 0.2289804810813214, "grad_norm": 0.09884080290794373, "learning_rate": 0.00099464030984049, "loss": 1.6044, "step": 5150 }, { "epoch": 0.22906940553999378, "grad_norm": 0.10288077592849731, "learning_rate": 0.0009946351518895754, "loss": 1.6055, "step": 5152 }, { "epoch": 0.22915832999866614, "grad_norm": 0.10319656878709793, "learning_rate": 0.0009946299914713385, "loss": 1.6073, "step": 5154 }, { "epoch": 0.22924725445733848, "grad_norm": 0.10332750529050827, "learning_rate": 0.000994624828585805, "loss": 1.606, "step": 5156 }, { "epoch": 0.22933617891601085, "grad_norm": 0.10401324927806854, "learning_rate": 0.0009946196632330008, "loss": 1.6012, "step": 5158 }, { "epoch": 0.22942510337468322, "grad_norm": 0.10355940461158752, "learning_rate": 0.0009946144954129518, "loss": 1.6054, "step": 5160 }, { "epoch": 0.22951402783335556, "grad_norm": 0.10316723585128784, "learning_rate": 0.0009946093251256834, "loss": 1.609, "step": 5162 }, { "epoch": 0.22960295229202793, "grad_norm": 0.1029047816991806, "learning_rate": 0.0009946041523712214, "loss": 1.6074, "step": 5164 }, { "epoch": 0.22969187675070027, "grad_norm": 0.1026320829987526, "learning_rate": 0.0009945989771495921, "loss": 1.6014, "step": 5166 }, { "epoch": 0.22978080120937264, "grad_norm": 0.10016648471355438, "learning_rate": 0.0009945937994608209, "loss": 1.6034, "step": 5168 }, { "epoch": 0.229869725668045, "grad_norm": 0.10129445791244507, "learning_rate": 0.0009945886193049338, "loss": 1.6015, "step": 5170 }, { "epoch": 0.22995865012671735, "grad_norm": 0.10496019572019577, "learning_rate": 0.0009945834366819565, "loss": 1.6063, "step": 5172 }, { "epoch": 0.2300475745853897, "grad_norm": 0.10157696902751923, "learning_rate": 0.000994578251591915, "loss": 1.6045, "step": 5174 }, { "epoch": 0.23013649904406208, "grad_norm": 0.10173121839761734, "learning_rate": 0.0009945730640348348, "loss": 1.5999, "step": 5176 }, { "epoch": 0.23022542350273442, "grad_norm": 0.10079949349164963, "learning_rate": 0.0009945678740107423, "loss": 1.6074, "step": 5178 }, { "epoch": 0.2303143479614068, "grad_norm": 0.1033761128783226, "learning_rate": 0.000994562681519663, "loss": 1.6054, "step": 5180 }, { "epoch": 0.23040327242007913, "grad_norm": 0.1060091182589531, "learning_rate": 0.000994557486561623, "loss": 1.6007, "step": 5182 }, { "epoch": 0.2304921968787515, "grad_norm": 0.0948513001203537, "learning_rate": 0.0009945522891366483, "loss": 1.6028, "step": 5184 }, { "epoch": 0.23058112133742387, "grad_norm": 0.09779325127601624, "learning_rate": 0.0009945470892447647, "loss": 1.6045, "step": 5186 }, { "epoch": 0.2306700457960962, "grad_norm": 0.09960626810789108, "learning_rate": 0.000994541886885998, "loss": 1.6021, "step": 5188 }, { "epoch": 0.23075897025476858, "grad_norm": 0.1015731692314148, "learning_rate": 0.0009945366820603746, "loss": 1.6027, "step": 5190 }, { "epoch": 0.23084789471344094, "grad_norm": 0.10101474076509476, "learning_rate": 0.0009945314747679197, "loss": 1.6027, "step": 5192 }, { "epoch": 0.23093681917211328, "grad_norm": 0.10149352252483368, "learning_rate": 0.00099452626500866, "loss": 1.5992, "step": 5194 }, { "epoch": 0.23102574363078565, "grad_norm": 0.09866882860660553, "learning_rate": 0.0009945210527826209, "loss": 1.5996, "step": 5196 }, { "epoch": 0.231114668089458, "grad_norm": 0.10099692642688751, "learning_rate": 0.000994515838089829, "loss": 1.5983, "step": 5198 }, { "epoch": 0.23120359254813036, "grad_norm": 0.10036486387252808, "learning_rate": 0.0009945106209303099, "loss": 1.5955, "step": 5200 }, { "epoch": 0.23129251700680273, "grad_norm": 0.10122378170490265, "learning_rate": 0.0009945054013040897, "loss": 1.5993, "step": 5202 }, { "epoch": 0.23138144146547507, "grad_norm": 0.09628558903932571, "learning_rate": 0.0009945001792111944, "loss": 1.6037, "step": 5204 }, { "epoch": 0.23147036592414744, "grad_norm": 0.09720970690250397, "learning_rate": 0.0009944949546516503, "loss": 1.6031, "step": 5206 }, { "epoch": 0.2315592903828198, "grad_norm": 0.09955313801765442, "learning_rate": 0.0009944897276254832, "loss": 1.6018, "step": 5208 }, { "epoch": 0.23164821484149214, "grad_norm": 0.09602374583482742, "learning_rate": 0.0009944844981327194, "loss": 1.5969, "step": 5210 }, { "epoch": 0.2317371393001645, "grad_norm": 0.10124479979276657, "learning_rate": 0.0009944792661733847, "loss": 1.6058, "step": 5212 }, { "epoch": 0.23182606375883688, "grad_norm": 0.09888044744729996, "learning_rate": 0.0009944740317475054, "loss": 1.5966, "step": 5214 }, { "epoch": 0.23191498821750922, "grad_norm": 0.10130251944065094, "learning_rate": 0.0009944687948551074, "loss": 1.6004, "step": 5216 }, { "epoch": 0.2320039126761816, "grad_norm": 0.10554568469524384, "learning_rate": 0.000994463555496217, "loss": 1.6022, "step": 5218 }, { "epoch": 0.23209283713485393, "grad_norm": 0.10593781620264053, "learning_rate": 0.0009944583136708607, "loss": 1.6014, "step": 5220 }, { "epoch": 0.2321817615935263, "grad_norm": 0.1010642796754837, "learning_rate": 0.0009944530693790639, "loss": 1.6043, "step": 5222 }, { "epoch": 0.23227068605219867, "grad_norm": 0.0989823043346405, "learning_rate": 0.000994447822620853, "loss": 1.5977, "step": 5224 }, { "epoch": 0.232359610510871, "grad_norm": 0.10016317665576935, "learning_rate": 0.0009944425733962546, "loss": 1.6013, "step": 5226 }, { "epoch": 0.23244853496954337, "grad_norm": 0.10070092231035233, "learning_rate": 0.0009944373217052945, "loss": 1.6, "step": 5228 }, { "epoch": 0.23253745942821574, "grad_norm": 0.10165101289749146, "learning_rate": 0.0009944320675479988, "loss": 1.6034, "step": 5230 }, { "epoch": 0.23262638388688808, "grad_norm": 0.10388650000095367, "learning_rate": 0.000994426810924394, "loss": 1.5997, "step": 5232 }, { "epoch": 0.23271530834556045, "grad_norm": 0.10321874916553497, "learning_rate": 0.0009944215518345061, "loss": 1.5995, "step": 5234 }, { "epoch": 0.2328042328042328, "grad_norm": 0.09810288995504379, "learning_rate": 0.0009944162902783616, "loss": 1.6004, "step": 5236 }, { "epoch": 0.23289315726290516, "grad_norm": 0.09570595622062683, "learning_rate": 0.0009944110262559865, "loss": 1.6005, "step": 5238 }, { "epoch": 0.23298208172157753, "grad_norm": 0.10209929198026657, "learning_rate": 0.000994405759767407, "loss": 1.6032, "step": 5240 }, { "epoch": 0.23307100618024987, "grad_norm": 0.10126584023237228, "learning_rate": 0.0009944004908126495, "loss": 1.6095, "step": 5242 }, { "epoch": 0.23315993063892224, "grad_norm": 0.10170583426952362, "learning_rate": 0.0009943952193917406, "loss": 1.5949, "step": 5244 }, { "epoch": 0.2332488550975946, "grad_norm": 0.09847375005483627, "learning_rate": 0.000994389945504706, "loss": 1.6007, "step": 5246 }, { "epoch": 0.23333777955626694, "grad_norm": 0.09713829308748245, "learning_rate": 0.0009943846691515723, "loss": 1.6019, "step": 5248 }, { "epoch": 0.2334267040149393, "grad_norm": 0.09937822073698044, "learning_rate": 0.000994379390332366, "loss": 1.5981, "step": 5250 }, { "epoch": 0.23351562847361168, "grad_norm": 0.10339406877756119, "learning_rate": 0.000994374109047113, "loss": 1.6029, "step": 5252 }, { "epoch": 0.23360455293228402, "grad_norm": 0.10225261747837067, "learning_rate": 0.00099436882529584, "loss": 1.5984, "step": 5254 }, { "epoch": 0.2336934773909564, "grad_norm": 0.10872292518615723, "learning_rate": 0.0009943635390785733, "loss": 1.5941, "step": 5256 }, { "epoch": 0.23378240184962873, "grad_norm": 0.10657516866922379, "learning_rate": 0.000994358250395339, "loss": 1.5936, "step": 5258 }, { "epoch": 0.2338713263083011, "grad_norm": 0.1047828420996666, "learning_rate": 0.0009943529592461638, "loss": 1.5992, "step": 5260 }, { "epoch": 0.23396025076697347, "grad_norm": 0.1091296523809433, "learning_rate": 0.000994347665631074, "loss": 1.605, "step": 5262 }, { "epoch": 0.2340491752256458, "grad_norm": 0.1082061156630516, "learning_rate": 0.000994342369550096, "loss": 1.6055, "step": 5264 }, { "epoch": 0.23413809968431817, "grad_norm": 0.1011800467967987, "learning_rate": 0.0009943370710032563, "loss": 1.6019, "step": 5266 }, { "epoch": 0.23422702414299054, "grad_norm": 0.10550300031900406, "learning_rate": 0.000994331769990581, "loss": 1.6043, "step": 5268 }, { "epoch": 0.23431594860166288, "grad_norm": 0.10264909267425537, "learning_rate": 0.000994326466512097, "loss": 1.603, "step": 5270 }, { "epoch": 0.23440487306033525, "grad_norm": 0.10784979909658432, "learning_rate": 0.0009943211605678303, "loss": 1.6024, "step": 5272 }, { "epoch": 0.2344937975190076, "grad_norm": 0.10662925988435745, "learning_rate": 0.0009943158521578078, "loss": 1.5965, "step": 5274 }, { "epoch": 0.23458272197767996, "grad_norm": 0.10931091010570526, "learning_rate": 0.0009943105412820556, "loss": 1.6006, "step": 5276 }, { "epoch": 0.23467164643635233, "grad_norm": 0.09809394180774689, "learning_rate": 0.0009943052279406007, "loss": 1.5995, "step": 5278 }, { "epoch": 0.23476057089502467, "grad_norm": 0.10617434978485107, "learning_rate": 0.000994299912133469, "loss": 1.6007, "step": 5280 }, { "epoch": 0.23484949535369704, "grad_norm": 0.10415052622556686, "learning_rate": 0.0009942945938606874, "loss": 1.6003, "step": 5282 }, { "epoch": 0.2349384198123694, "grad_norm": 0.09706338495016098, "learning_rate": 0.0009942892731222821, "loss": 1.6022, "step": 5284 }, { "epoch": 0.23502734427104174, "grad_norm": 0.09842453896999359, "learning_rate": 0.00099428394991828, "loss": 1.593, "step": 5286 }, { "epoch": 0.2351162687297141, "grad_norm": 0.0980658233165741, "learning_rate": 0.0009942786242487075, "loss": 1.5997, "step": 5288 }, { "epoch": 0.23520519318838645, "grad_norm": 0.10015766322612762, "learning_rate": 0.0009942732961135913, "loss": 1.5994, "step": 5290 }, { "epoch": 0.23529411764705882, "grad_norm": 0.10355110466480255, "learning_rate": 0.0009942679655129576, "loss": 1.5988, "step": 5292 }, { "epoch": 0.2353830421057312, "grad_norm": 0.102814219892025, "learning_rate": 0.0009942626324468335, "loss": 1.5951, "step": 5294 }, { "epoch": 0.23547196656440353, "grad_norm": 0.10506147146224976, "learning_rate": 0.0009942572969152453, "loss": 1.5975, "step": 5296 }, { "epoch": 0.2355608910230759, "grad_norm": 0.10157327353954315, "learning_rate": 0.0009942519589182196, "loss": 1.6015, "step": 5298 }, { "epoch": 0.23564981548174826, "grad_norm": 0.10472837835550308, "learning_rate": 0.000994246618455783, "loss": 1.5976, "step": 5300 }, { "epoch": 0.2357387399404206, "grad_norm": 0.10771182179450989, "learning_rate": 0.0009942412755279623, "loss": 1.6041, "step": 5302 }, { "epoch": 0.23582766439909297, "grad_norm": 0.10492417216300964, "learning_rate": 0.000994235930134784, "loss": 1.5976, "step": 5304 }, { "epoch": 0.23591658885776534, "grad_norm": 0.1006433516740799, "learning_rate": 0.000994230582276275, "loss": 1.5996, "step": 5306 }, { "epoch": 0.23600551331643768, "grad_norm": 0.10901300609111786, "learning_rate": 0.0009942252319524619, "loss": 1.6019, "step": 5308 }, { "epoch": 0.23609443777511005, "grad_norm": 0.10461027175188065, "learning_rate": 0.000994219879163371, "loss": 1.5967, "step": 5310 }, { "epoch": 0.2361833622337824, "grad_norm": 0.09647329896688461, "learning_rate": 0.0009942145239090294, "loss": 1.5951, "step": 5312 }, { "epoch": 0.23627228669245476, "grad_norm": 0.10260302573442459, "learning_rate": 0.000994209166189464, "loss": 1.6025, "step": 5314 }, { "epoch": 0.23636121115112713, "grad_norm": 0.09995372593402863, "learning_rate": 0.0009942038060047008, "loss": 1.5971, "step": 5316 }, { "epoch": 0.23645013560979947, "grad_norm": 0.0978364571928978, "learning_rate": 0.0009941984433547674, "loss": 1.6077, "step": 5318 }, { "epoch": 0.23653906006847183, "grad_norm": 0.10295219719409943, "learning_rate": 0.0009941930782396899, "loss": 1.6031, "step": 5320 }, { "epoch": 0.2366279845271442, "grad_norm": 0.10604212433099747, "learning_rate": 0.0009941877106594953, "loss": 1.5958, "step": 5322 }, { "epoch": 0.23671690898581654, "grad_norm": 0.09841012209653854, "learning_rate": 0.0009941823406142104, "loss": 1.5927, "step": 5324 }, { "epoch": 0.2368058334444889, "grad_norm": 0.09888961166143417, "learning_rate": 0.0009941769681038619, "loss": 1.5987, "step": 5326 }, { "epoch": 0.23689475790316125, "grad_norm": 0.10190734267234802, "learning_rate": 0.0009941715931284767, "loss": 1.5938, "step": 5328 }, { "epoch": 0.23698368236183362, "grad_norm": 0.10010533779859543, "learning_rate": 0.0009941662156880816, "loss": 1.5994, "step": 5330 }, { "epoch": 0.237072606820506, "grad_norm": 0.09754836559295654, "learning_rate": 0.0009941608357827035, "loss": 1.5942, "step": 5332 }, { "epoch": 0.23716153127917833, "grad_norm": 0.09817097336053848, "learning_rate": 0.0009941554534123689, "loss": 1.598, "step": 5334 }, { "epoch": 0.2372504557378507, "grad_norm": 0.09796758741140366, "learning_rate": 0.000994150068577105, "loss": 1.598, "step": 5336 }, { "epoch": 0.23733938019652306, "grad_norm": 0.09705115854740143, "learning_rate": 0.0009941446812769385, "loss": 1.598, "step": 5338 }, { "epoch": 0.2374283046551954, "grad_norm": 0.0968238040804863, "learning_rate": 0.0009941392915118962, "loss": 1.6016, "step": 5340 }, { "epoch": 0.23751722911386777, "grad_norm": 0.09914400428533554, "learning_rate": 0.0009941338992820053, "loss": 1.5997, "step": 5342 }, { "epoch": 0.23760615357254014, "grad_norm": 0.09874872863292694, "learning_rate": 0.0009941285045872922, "loss": 1.6013, "step": 5344 }, { "epoch": 0.23769507803121248, "grad_norm": 0.10324931144714355, "learning_rate": 0.0009941231074277843, "loss": 1.6, "step": 5346 }, { "epoch": 0.23778400248988485, "grad_norm": 0.09846501797437668, "learning_rate": 0.0009941177078035084, "loss": 1.5945, "step": 5348 }, { "epoch": 0.2378729269485572, "grad_norm": 0.09790665656328201, "learning_rate": 0.0009941123057144912, "loss": 1.5987, "step": 5350 }, { "epoch": 0.23796185140722956, "grad_norm": 0.10351137071847916, "learning_rate": 0.00099410690116076, "loss": 1.5974, "step": 5352 }, { "epoch": 0.23805077586590193, "grad_norm": 0.1032329574227333, "learning_rate": 0.0009941014941423413, "loss": 1.5964, "step": 5354 }, { "epoch": 0.23813970032457427, "grad_norm": 0.0977087914943695, "learning_rate": 0.0009940960846592623, "loss": 1.5978, "step": 5356 }, { "epoch": 0.23822862478324663, "grad_norm": 0.10141148418188095, "learning_rate": 0.0009940906727115503, "loss": 1.6033, "step": 5358 }, { "epoch": 0.238317549241919, "grad_norm": 0.10696842521429062, "learning_rate": 0.0009940852582992319, "loss": 1.5956, "step": 5360 }, { "epoch": 0.23840647370059134, "grad_norm": 0.10355627536773682, "learning_rate": 0.0009940798414223343, "loss": 1.5973, "step": 5362 }, { "epoch": 0.2384953981592637, "grad_norm": 0.10255977511405945, "learning_rate": 0.000994074422080884, "loss": 1.597, "step": 5364 }, { "epoch": 0.23858432261793605, "grad_norm": 0.09989781677722931, "learning_rate": 0.000994069000274909, "loss": 1.5986, "step": 5366 }, { "epoch": 0.23867324707660842, "grad_norm": 0.10210977494716644, "learning_rate": 0.0009940635760044357, "loss": 1.5956, "step": 5368 }, { "epoch": 0.2387621715352808, "grad_norm": 0.09619224071502686, "learning_rate": 0.0009940581492694912, "loss": 1.5962, "step": 5370 }, { "epoch": 0.23885109599395313, "grad_norm": 0.09644219279289246, "learning_rate": 0.0009940527200701023, "loss": 1.6002, "step": 5372 }, { "epoch": 0.2389400204526255, "grad_norm": 0.102609783411026, "learning_rate": 0.000994047288406297, "loss": 1.5974, "step": 5374 }, { "epoch": 0.23902894491129786, "grad_norm": 0.10200612992048264, "learning_rate": 0.0009940418542781014, "loss": 1.5975, "step": 5376 }, { "epoch": 0.2391178693699702, "grad_norm": 0.09754066914319992, "learning_rate": 0.0009940364176855433, "loss": 1.6006, "step": 5378 }, { "epoch": 0.23920679382864257, "grad_norm": 0.10025352984666824, "learning_rate": 0.0009940309786286494, "loss": 1.5991, "step": 5380 }, { "epoch": 0.2392957182873149, "grad_norm": 0.10298909991979599, "learning_rate": 0.000994025537107447, "loss": 1.5975, "step": 5382 }, { "epoch": 0.23938464274598728, "grad_norm": 0.09619417786598206, "learning_rate": 0.0009940200931219632, "loss": 1.595, "step": 5384 }, { "epoch": 0.23947356720465965, "grad_norm": 0.10511010885238647, "learning_rate": 0.0009940146466722252, "loss": 1.5977, "step": 5386 }, { "epoch": 0.239562491663332, "grad_norm": 0.09961894154548645, "learning_rate": 0.0009940091977582602, "loss": 1.5973, "step": 5388 }, { "epoch": 0.23965141612200436, "grad_norm": 0.09826082736253738, "learning_rate": 0.0009940037463800951, "loss": 1.6016, "step": 5390 }, { "epoch": 0.23974034058067673, "grad_norm": 0.10174942016601562, "learning_rate": 0.0009939982925377575, "loss": 1.5996, "step": 5392 }, { "epoch": 0.23982926503934907, "grad_norm": 0.09913383424282074, "learning_rate": 0.0009939928362312745, "loss": 1.5986, "step": 5394 }, { "epoch": 0.23991818949802143, "grad_norm": 0.10252643376588821, "learning_rate": 0.000993987377460673, "loss": 1.601, "step": 5396 }, { "epoch": 0.2400071139566938, "grad_norm": 0.0993933230638504, "learning_rate": 0.0009939819162259806, "loss": 1.5984, "step": 5398 }, { "epoch": 0.24009603841536614, "grad_norm": 0.09531717747449875, "learning_rate": 0.0009939764525272244, "loss": 1.5911, "step": 5400 }, { "epoch": 0.2401849628740385, "grad_norm": 0.09904836863279343, "learning_rate": 0.0009939709863644316, "loss": 1.6022, "step": 5402 }, { "epoch": 0.24027388733271085, "grad_norm": 0.0987459197640419, "learning_rate": 0.0009939655177376298, "loss": 1.5894, "step": 5404 }, { "epoch": 0.24036281179138322, "grad_norm": 0.09736425429582596, "learning_rate": 0.0009939600466468456, "loss": 1.5912, "step": 5406 }, { "epoch": 0.2404517362500556, "grad_norm": 0.09566332399845123, "learning_rate": 0.000993954573092107, "loss": 1.5967, "step": 5408 }, { "epoch": 0.24054066070872793, "grad_norm": 0.10598226636648178, "learning_rate": 0.0009939490970734407, "loss": 1.5986, "step": 5410 }, { "epoch": 0.2406295851674003, "grad_norm": 0.10711424052715302, "learning_rate": 0.0009939436185908744, "loss": 1.599, "step": 5412 }, { "epoch": 0.24071850962607266, "grad_norm": 0.1163632795214653, "learning_rate": 0.0009939381376444353, "loss": 1.6, "step": 5414 }, { "epoch": 0.240807434084745, "grad_norm": 0.1008208766579628, "learning_rate": 0.0009939326542341511, "loss": 1.598, "step": 5416 }, { "epoch": 0.24089635854341737, "grad_norm": 0.10526162385940552, "learning_rate": 0.0009939271683600487, "loss": 1.5995, "step": 5418 }, { "epoch": 0.2409852830020897, "grad_norm": 0.10237567126750946, "learning_rate": 0.0009939216800221554, "loss": 1.5987, "step": 5420 }, { "epoch": 0.24107420746076208, "grad_norm": 0.09717054665088654, "learning_rate": 0.000993916189220499, "loss": 1.5884, "step": 5422 }, { "epoch": 0.24116313191943445, "grad_norm": 0.10505110770463943, "learning_rate": 0.0009939106959551065, "loss": 1.5971, "step": 5424 }, { "epoch": 0.2412520563781068, "grad_norm": 0.10448066890239716, "learning_rate": 0.0009939052002260055, "loss": 1.59, "step": 5426 }, { "epoch": 0.24134098083677916, "grad_norm": 0.10122029483318329, "learning_rate": 0.0009938997020332233, "loss": 1.5975, "step": 5428 }, { "epoch": 0.24142990529545152, "grad_norm": 0.10328763723373413, "learning_rate": 0.0009938942013767876, "loss": 1.5994, "step": 5430 }, { "epoch": 0.24151882975412386, "grad_norm": 0.10285595804452896, "learning_rate": 0.0009938886982567255, "loss": 1.6059, "step": 5432 }, { "epoch": 0.24160775421279623, "grad_norm": 0.10578134655952454, "learning_rate": 0.0009938831926730647, "loss": 1.5949, "step": 5434 }, { "epoch": 0.2416966786714686, "grad_norm": 0.09849389642477036, "learning_rate": 0.0009938776846258324, "loss": 1.598, "step": 5436 }, { "epoch": 0.24178560313014094, "grad_norm": 0.0992036834359169, "learning_rate": 0.0009938721741150564, "loss": 1.5966, "step": 5438 }, { "epoch": 0.2418745275888133, "grad_norm": 0.10037153214216232, "learning_rate": 0.0009938666611407638, "loss": 1.6067, "step": 5440 }, { "epoch": 0.24196345204748565, "grad_norm": 0.10501682758331299, "learning_rate": 0.0009938611457029826, "loss": 1.5944, "step": 5442 }, { "epoch": 0.24205237650615802, "grad_norm": 0.0952974334359169, "learning_rate": 0.00099385562780174, "loss": 1.5969, "step": 5444 }, { "epoch": 0.24214130096483039, "grad_norm": 0.1013774648308754, "learning_rate": 0.0009938501074370633, "loss": 1.6038, "step": 5446 }, { "epoch": 0.24223022542350273, "grad_norm": 0.09989754110574722, "learning_rate": 0.0009938445846089804, "loss": 1.6012, "step": 5448 }, { "epoch": 0.2423191498821751, "grad_norm": 0.10097597539424896, "learning_rate": 0.0009938390593175188, "loss": 1.5928, "step": 5450 }, { "epoch": 0.24240807434084746, "grad_norm": 0.09957058727741241, "learning_rate": 0.000993833531562706, "loss": 1.5999, "step": 5452 }, { "epoch": 0.2424969987995198, "grad_norm": 0.09687520563602448, "learning_rate": 0.0009938280013445696, "loss": 1.5957, "step": 5454 }, { "epoch": 0.24258592325819217, "grad_norm": 0.1019771546125412, "learning_rate": 0.000993822468663137, "loss": 1.6009, "step": 5456 }, { "epoch": 0.2426748477168645, "grad_norm": 0.09672028571367264, "learning_rate": 0.000993816933518436, "loss": 1.5961, "step": 5458 }, { "epoch": 0.24276377217553688, "grad_norm": 0.09733521938323975, "learning_rate": 0.000993811395910494, "loss": 1.5935, "step": 5460 }, { "epoch": 0.24285269663420925, "grad_norm": 0.09702073037624359, "learning_rate": 0.0009938058558393392, "loss": 1.5958, "step": 5462 }, { "epoch": 0.2429416210928816, "grad_norm": 0.10039489716291428, "learning_rate": 0.0009938003133049987, "loss": 1.5989, "step": 5464 }, { "epoch": 0.24303054555155396, "grad_norm": 0.0961323231458664, "learning_rate": 0.0009937947683075002, "loss": 1.5929, "step": 5466 }, { "epoch": 0.24311947001022632, "grad_norm": 0.0994456484913826, "learning_rate": 0.0009937892208468713, "loss": 1.5967, "step": 5468 }, { "epoch": 0.24320839446889866, "grad_norm": 0.09631854295730591, "learning_rate": 0.00099378367092314, "loss": 1.5909, "step": 5470 }, { "epoch": 0.24329731892757103, "grad_norm": 0.0983763337135315, "learning_rate": 0.0009937781185363338, "loss": 1.6006, "step": 5472 }, { "epoch": 0.24338624338624337, "grad_norm": 0.10499723255634308, "learning_rate": 0.0009937725636864803, "loss": 1.5919, "step": 5474 }, { "epoch": 0.24347516784491574, "grad_norm": 0.10987919569015503, "learning_rate": 0.0009937670063736072, "loss": 1.5988, "step": 5476 }, { "epoch": 0.2435640923035881, "grad_norm": 0.105495885014534, "learning_rate": 0.0009937614465977422, "loss": 1.5977, "step": 5478 }, { "epoch": 0.24365301676226045, "grad_norm": 0.09992179274559021, "learning_rate": 0.0009937558843589134, "loss": 1.6033, "step": 5480 }, { "epoch": 0.24374194122093282, "grad_norm": 0.10099846124649048, "learning_rate": 0.0009937503196571482, "loss": 1.5892, "step": 5482 }, { "epoch": 0.24383086567960519, "grad_norm": 0.10099624842405319, "learning_rate": 0.0009937447524924745, "loss": 1.5986, "step": 5484 }, { "epoch": 0.24391979013827753, "grad_norm": 0.10109474509954453, "learning_rate": 0.00099373918286492, "loss": 1.595, "step": 5486 }, { "epoch": 0.2440087145969499, "grad_norm": 0.09985104948282242, "learning_rate": 0.0009937336107745125, "loss": 1.5983, "step": 5488 }, { "epoch": 0.24409763905562226, "grad_norm": 0.09679286181926727, "learning_rate": 0.0009937280362212798, "loss": 1.5893, "step": 5490 }, { "epoch": 0.2441865635142946, "grad_norm": 0.09930288046598434, "learning_rate": 0.0009937224592052496, "loss": 1.598, "step": 5492 }, { "epoch": 0.24427548797296697, "grad_norm": 0.10350073873996735, "learning_rate": 0.00099371687972645, "loss": 1.5957, "step": 5494 }, { "epoch": 0.2443644124316393, "grad_norm": 0.10076453536748886, "learning_rate": 0.0009937112977849085, "loss": 1.598, "step": 5496 }, { "epoch": 0.24445333689031168, "grad_norm": 0.0963132232427597, "learning_rate": 0.0009937057133806533, "loss": 1.5974, "step": 5498 }, { "epoch": 0.24454226134898405, "grad_norm": 0.0963580459356308, "learning_rate": 0.0009937001265137118, "loss": 1.5958, "step": 5500 }, { "epoch": 0.24454226134898405, "eval_loss": 1.5716533660888672, "eval_runtime": 12.37, "eval_samples_per_second": 558.609, "eval_steps_per_second": 69.846, "step": 5500 }, { "epoch": 0.2446311858076564, "grad_norm": 0.0988180860877037, "learning_rate": 0.0009936945371841121, "loss": 1.5965, "step": 5502 }, { "epoch": 0.24472011026632876, "grad_norm": 0.10076916962862015, "learning_rate": 0.0009936889453918824, "loss": 1.5974, "step": 5504 }, { "epoch": 0.24480903472500112, "grad_norm": 0.09656892716884613, "learning_rate": 0.00099368335113705, "loss": 1.5992, "step": 5506 }, { "epoch": 0.24489795918367346, "grad_norm": 0.09603594988584518, "learning_rate": 0.0009936777544196432, "loss": 1.5994, "step": 5508 }, { "epoch": 0.24498688364234583, "grad_norm": 0.09735065698623657, "learning_rate": 0.00099367215523969, "loss": 1.5918, "step": 5510 }, { "epoch": 0.24507580810101817, "grad_norm": 0.09379937499761581, "learning_rate": 0.0009936665535972178, "loss": 1.5911, "step": 5512 }, { "epoch": 0.24516473255969054, "grad_norm": 0.09495367109775543, "learning_rate": 0.000993660949492255, "loss": 1.5949, "step": 5514 }, { "epoch": 0.2452536570183629, "grad_norm": 0.10689006745815277, "learning_rate": 0.0009936553429248296, "loss": 1.5944, "step": 5516 }, { "epoch": 0.24534258147703525, "grad_norm": 0.10062182694673538, "learning_rate": 0.0009936497338949692, "loss": 1.5903, "step": 5518 }, { "epoch": 0.24543150593570762, "grad_norm": 0.10268256068229675, "learning_rate": 0.0009936441224027022, "loss": 1.594, "step": 5520 }, { "epoch": 0.24552043039437998, "grad_norm": 0.09993576258420944, "learning_rate": 0.000993638508448056, "loss": 1.5991, "step": 5522 }, { "epoch": 0.24560935485305233, "grad_norm": 0.10125838965177536, "learning_rate": 0.0009936328920310594, "loss": 1.6004, "step": 5524 }, { "epoch": 0.2456982793117247, "grad_norm": 0.10417357832193375, "learning_rate": 0.00099362727315174, "loss": 1.5928, "step": 5526 }, { "epoch": 0.24578720377039706, "grad_norm": 0.09818510711193085, "learning_rate": 0.0009936216518101255, "loss": 1.5964, "step": 5528 }, { "epoch": 0.2458761282290694, "grad_norm": 0.0970878154039383, "learning_rate": 0.0009936160280062446, "loss": 1.5894, "step": 5530 }, { "epoch": 0.24596505268774177, "grad_norm": 0.1003979966044426, "learning_rate": 0.0009936104017401247, "loss": 1.5873, "step": 5532 }, { "epoch": 0.2460539771464141, "grad_norm": 0.09515174478292465, "learning_rate": 0.0009936047730117943, "loss": 1.5936, "step": 5534 }, { "epoch": 0.24614290160508648, "grad_norm": 0.10499396920204163, "learning_rate": 0.0009935991418212815, "loss": 1.5907, "step": 5536 }, { "epoch": 0.24623182606375885, "grad_norm": 0.09744211286306381, "learning_rate": 0.000993593508168614, "loss": 1.595, "step": 5538 }, { "epoch": 0.2463207505224312, "grad_norm": 0.10020717233419418, "learning_rate": 0.0009935878720538202, "loss": 1.5971, "step": 5540 }, { "epoch": 0.24640967498110355, "grad_norm": 0.09754054993391037, "learning_rate": 0.0009935822334769283, "loss": 1.5877, "step": 5542 }, { "epoch": 0.24649859943977592, "grad_norm": 0.09400993585586548, "learning_rate": 0.0009935765924379662, "loss": 1.5937, "step": 5544 }, { "epoch": 0.24658752389844826, "grad_norm": 0.10023734718561172, "learning_rate": 0.0009935709489369623, "loss": 1.593, "step": 5546 }, { "epoch": 0.24667644835712063, "grad_norm": 0.10244899243116379, "learning_rate": 0.0009935653029739444, "loss": 1.5898, "step": 5548 }, { "epoch": 0.24676537281579297, "grad_norm": 0.09722084552049637, "learning_rate": 0.0009935596545489409, "loss": 1.5888, "step": 5550 }, { "epoch": 0.24685429727446534, "grad_norm": 0.09749680012464523, "learning_rate": 0.0009935540036619798, "loss": 1.5938, "step": 5552 }, { "epoch": 0.2469432217331377, "grad_norm": 0.09648852795362473, "learning_rate": 0.0009935483503130896, "loss": 1.5966, "step": 5554 }, { "epoch": 0.24703214619181005, "grad_norm": 0.09504228085279465, "learning_rate": 0.0009935426945022982, "loss": 1.5899, "step": 5556 }, { "epoch": 0.24712107065048242, "grad_norm": 0.1025584489107132, "learning_rate": 0.000993537036229634, "loss": 1.5932, "step": 5558 }, { "epoch": 0.24720999510915478, "grad_norm": 0.096546970307827, "learning_rate": 0.0009935313754951248, "loss": 1.5959, "step": 5560 }, { "epoch": 0.24729891956782712, "grad_norm": 0.09677073359489441, "learning_rate": 0.0009935257122987995, "loss": 1.593, "step": 5562 }, { "epoch": 0.2473878440264995, "grad_norm": 0.09250880032777786, "learning_rate": 0.000993520046640686, "loss": 1.5935, "step": 5564 }, { "epoch": 0.24747676848517183, "grad_norm": 0.09518403559923172, "learning_rate": 0.0009935143785208126, "loss": 1.5942, "step": 5566 }, { "epoch": 0.2475656929438442, "grad_norm": 0.09718389809131622, "learning_rate": 0.0009935087079392074, "loss": 1.5959, "step": 5568 }, { "epoch": 0.24765461740251657, "grad_norm": 0.09807734936475754, "learning_rate": 0.0009935030348958989, "loss": 1.5914, "step": 5570 }, { "epoch": 0.2477435418611889, "grad_norm": 0.09537956118583679, "learning_rate": 0.0009934973593909156, "loss": 1.5932, "step": 5572 }, { "epoch": 0.24783246631986128, "grad_norm": 0.09631271660327911, "learning_rate": 0.000993491681424285, "loss": 1.5892, "step": 5574 }, { "epoch": 0.24792139077853365, "grad_norm": 0.10306434333324432, "learning_rate": 0.0009934860009960364, "loss": 1.5889, "step": 5576 }, { "epoch": 0.24801031523720599, "grad_norm": 0.09478336572647095, "learning_rate": 0.0009934803181061978, "loss": 1.5932, "step": 5578 }, { "epoch": 0.24809923969587835, "grad_norm": 0.0936090350151062, "learning_rate": 0.0009934746327547972, "loss": 1.5883, "step": 5580 }, { "epoch": 0.24818816415455072, "grad_norm": 0.09921901673078537, "learning_rate": 0.0009934689449418634, "loss": 1.5914, "step": 5582 }, { "epoch": 0.24827708861322306, "grad_norm": 0.09646906703710556, "learning_rate": 0.0009934632546674245, "loss": 1.5998, "step": 5584 }, { "epoch": 0.24836601307189543, "grad_norm": 0.09717948734760284, "learning_rate": 0.000993457561931509, "loss": 1.5946, "step": 5586 }, { "epoch": 0.24845493753056777, "grad_norm": 0.09641571342945099, "learning_rate": 0.0009934518667341452, "loss": 1.5949, "step": 5588 }, { "epoch": 0.24854386198924014, "grad_norm": 0.09554963558912277, "learning_rate": 0.0009934461690753616, "loss": 1.5885, "step": 5590 }, { "epoch": 0.2486327864479125, "grad_norm": 0.10000897943973541, "learning_rate": 0.0009934404689551867, "loss": 1.5928, "step": 5592 }, { "epoch": 0.24872171090658485, "grad_norm": 0.09571028500795364, "learning_rate": 0.0009934347663736486, "loss": 1.5956, "step": 5594 }, { "epoch": 0.24881063536525722, "grad_norm": 0.09674900025129318, "learning_rate": 0.0009934290613307763, "loss": 1.5952, "step": 5596 }, { "epoch": 0.24889955982392958, "grad_norm": 0.09690657258033752, "learning_rate": 0.0009934233538265978, "loss": 1.5927, "step": 5598 }, { "epoch": 0.24898848428260192, "grad_norm": 0.09681335836648941, "learning_rate": 0.0009934176438611418, "loss": 1.5968, "step": 5600 }, { "epoch": 0.2490774087412743, "grad_norm": 0.0922679454088211, "learning_rate": 0.0009934119314344363, "loss": 1.5888, "step": 5602 }, { "epoch": 0.24916633319994663, "grad_norm": 0.09346043318510056, "learning_rate": 0.0009934062165465106, "loss": 1.5937, "step": 5604 }, { "epoch": 0.249255257658619, "grad_norm": 0.09791743010282516, "learning_rate": 0.0009934004991973926, "loss": 1.5897, "step": 5606 }, { "epoch": 0.24934418211729137, "grad_norm": 0.09670942276716232, "learning_rate": 0.0009933947793871111, "loss": 1.5893, "step": 5608 }, { "epoch": 0.2494331065759637, "grad_norm": 0.09886854887008667, "learning_rate": 0.0009933890571156945, "loss": 1.5851, "step": 5610 }, { "epoch": 0.24952203103463608, "grad_norm": 0.10364110767841339, "learning_rate": 0.0009933833323831714, "loss": 1.591, "step": 5612 }, { "epoch": 0.24961095549330845, "grad_norm": 0.09943173825740814, "learning_rate": 0.0009933776051895705, "loss": 1.5873, "step": 5614 }, { "epoch": 0.24969987995198079, "grad_norm": 0.09556498378515244, "learning_rate": 0.00099337187553492, "loss": 1.59, "step": 5616 }, { "epoch": 0.24978880441065315, "grad_norm": 0.09815754741430283, "learning_rate": 0.0009933661434192488, "loss": 1.5908, "step": 5618 }, { "epoch": 0.24987772886932552, "grad_norm": 0.09476683288812637, "learning_rate": 0.0009933604088425852, "loss": 1.5901, "step": 5620 }, { "epoch": 0.24996665332799786, "grad_norm": 0.09704037010669708, "learning_rate": 0.000993354671804958, "loss": 1.5915, "step": 5622 }, { "epoch": 0.2500555777866702, "grad_norm": 0.09691561013460159, "learning_rate": 0.000993348932306396, "loss": 1.5928, "step": 5624 }, { "epoch": 0.25014450224534257, "grad_norm": 0.09649381786584854, "learning_rate": 0.0009933431903469275, "loss": 1.5923, "step": 5626 }, { "epoch": 0.25023342670401494, "grad_norm": 0.10152386128902435, "learning_rate": 0.0009933374459265812, "loss": 1.5896, "step": 5628 }, { "epoch": 0.2503223511626873, "grad_norm": 0.09325571358203888, "learning_rate": 0.000993331699045386, "loss": 1.5933, "step": 5630 }, { "epoch": 0.2504112756213597, "grad_norm": 0.09736236929893494, "learning_rate": 0.0009933259497033704, "loss": 1.5925, "step": 5632 }, { "epoch": 0.250500200080032, "grad_norm": 0.09810613095760345, "learning_rate": 0.0009933201979005631, "loss": 1.5907, "step": 5634 }, { "epoch": 0.25058912453870436, "grad_norm": 0.09613955765962601, "learning_rate": 0.0009933144436369925, "loss": 1.5983, "step": 5636 }, { "epoch": 0.2506780489973767, "grad_norm": 0.09884078800678253, "learning_rate": 0.0009933086869126878, "loss": 1.5894, "step": 5638 }, { "epoch": 0.2507669734560491, "grad_norm": 0.09554318338632584, "learning_rate": 0.0009933029277276774, "loss": 1.5948, "step": 5640 }, { "epoch": 0.25085589791472146, "grad_norm": 0.09753008186817169, "learning_rate": 0.0009932971660819901, "loss": 1.5868, "step": 5642 }, { "epoch": 0.2509448223733938, "grad_norm": 0.09506553411483765, "learning_rate": 0.0009932914019756547, "loss": 1.5923, "step": 5644 }, { "epoch": 0.25103374683206614, "grad_norm": 0.09722185879945755, "learning_rate": 0.0009932856354087, "loss": 1.5925, "step": 5646 }, { "epoch": 0.2511226712907385, "grad_norm": 0.10006613284349442, "learning_rate": 0.0009932798663811544, "loss": 1.5926, "step": 5648 }, { "epoch": 0.2512115957494109, "grad_norm": 0.09580423682928085, "learning_rate": 0.0009932740948930471, "loss": 1.5899, "step": 5650 }, { "epoch": 0.25130052020808324, "grad_norm": 0.09586209803819656, "learning_rate": 0.000993268320944407, "loss": 1.5942, "step": 5652 }, { "epoch": 0.2513894446667556, "grad_norm": 0.09892687201499939, "learning_rate": 0.0009932625445352623, "loss": 1.5944, "step": 5654 }, { "epoch": 0.2514783691254279, "grad_norm": 0.09688273817300797, "learning_rate": 0.000993256765665642, "loss": 1.5922, "step": 5656 }, { "epoch": 0.2515672935841003, "grad_norm": 0.0952548235654831, "learning_rate": 0.0009932509843355755, "loss": 1.5931, "step": 5658 }, { "epoch": 0.25165621804277266, "grad_norm": 0.09588255733251572, "learning_rate": 0.0009932452005450912, "loss": 1.5877, "step": 5660 }, { "epoch": 0.25174514250144503, "grad_norm": 0.09836259484291077, "learning_rate": 0.0009932394142942176, "loss": 1.5873, "step": 5662 }, { "epoch": 0.2518340669601174, "grad_norm": 0.09556706994771957, "learning_rate": 0.0009932336255829841, "loss": 1.5933, "step": 5664 }, { "epoch": 0.25192299141878977, "grad_norm": 0.10461506247520447, "learning_rate": 0.0009932278344114195, "loss": 1.5922, "step": 5666 }, { "epoch": 0.2520119158774621, "grad_norm": 0.09809567034244537, "learning_rate": 0.0009932220407795525, "loss": 1.5999, "step": 5668 }, { "epoch": 0.25210084033613445, "grad_norm": 0.09388605505228043, "learning_rate": 0.0009932162446874121, "loss": 1.5885, "step": 5670 }, { "epoch": 0.2521897647948068, "grad_norm": 0.09325406700372696, "learning_rate": 0.0009932104461350274, "loss": 1.5912, "step": 5672 }, { "epoch": 0.2522786892534792, "grad_norm": 0.0968768522143364, "learning_rate": 0.0009932046451224268, "loss": 1.5942, "step": 5674 }, { "epoch": 0.25236761371215155, "grad_norm": 0.0974501445889473, "learning_rate": 0.0009931988416496396, "loss": 1.5917, "step": 5676 }, { "epoch": 0.25245653817082386, "grad_norm": 0.09732449054718018, "learning_rate": 0.0009931930357166949, "loss": 1.5937, "step": 5678 }, { "epoch": 0.25254546262949623, "grad_norm": 0.10150519758462906, "learning_rate": 0.0009931872273236213, "loss": 1.5885, "step": 5680 }, { "epoch": 0.2526343870881686, "grad_norm": 0.1010938212275505, "learning_rate": 0.000993181416470448, "loss": 1.5845, "step": 5682 }, { "epoch": 0.25272331154684097, "grad_norm": 0.09250231832265854, "learning_rate": 0.000993175603157204, "loss": 1.5867, "step": 5684 }, { "epoch": 0.25281223600551334, "grad_norm": 0.09422440081834793, "learning_rate": 0.0009931697873839182, "loss": 1.5957, "step": 5686 }, { "epoch": 0.25290116046418565, "grad_norm": 0.09714167565107346, "learning_rate": 0.0009931639691506196, "loss": 1.5932, "step": 5688 }, { "epoch": 0.252990084922858, "grad_norm": 0.0974729061126709, "learning_rate": 0.0009931581484573374, "loss": 1.5889, "step": 5690 }, { "epoch": 0.2530790093815304, "grad_norm": 0.09551315754652023, "learning_rate": 0.0009931523253041002, "loss": 1.5846, "step": 5692 }, { "epoch": 0.25316793384020275, "grad_norm": 0.09637055546045303, "learning_rate": 0.0009931464996909376, "loss": 1.594, "step": 5694 }, { "epoch": 0.2532568582988751, "grad_norm": 0.09468736499547958, "learning_rate": 0.0009931406716178783, "loss": 1.5897, "step": 5696 }, { "epoch": 0.2533457827575475, "grad_norm": 0.09597856551408768, "learning_rate": 0.0009931348410849514, "loss": 1.5879, "step": 5698 }, { "epoch": 0.2534347072162198, "grad_norm": 0.09364350885152817, "learning_rate": 0.0009931290080921861, "loss": 1.5913, "step": 5700 }, { "epoch": 0.25352363167489217, "grad_norm": 0.09758356213569641, "learning_rate": 0.0009931231726396116, "loss": 1.5885, "step": 5702 }, { "epoch": 0.25361255613356454, "grad_norm": 0.09728454798460007, "learning_rate": 0.000993117334727257, "loss": 1.5935, "step": 5704 }, { "epoch": 0.2537014805922369, "grad_norm": 0.10622499138116837, "learning_rate": 0.0009931114943551509, "loss": 1.5899, "step": 5706 }, { "epoch": 0.2537904050509093, "grad_norm": 0.10362113267183304, "learning_rate": 0.000993105651523323, "loss": 1.59, "step": 5708 }, { "epoch": 0.2538793295095816, "grad_norm": 0.10291391611099243, "learning_rate": 0.0009930998062318021, "loss": 1.5932, "step": 5710 }, { "epoch": 0.25396825396825395, "grad_norm": 0.09546540677547455, "learning_rate": 0.0009930939584806176, "loss": 1.5853, "step": 5712 }, { "epoch": 0.2540571784269263, "grad_norm": 0.0974823534488678, "learning_rate": 0.0009930881082697988, "loss": 1.59, "step": 5714 }, { "epoch": 0.2541461028855987, "grad_norm": 0.09429586678743362, "learning_rate": 0.0009930822555993743, "loss": 1.5893, "step": 5716 }, { "epoch": 0.25423502734427106, "grad_norm": 0.09460654854774475, "learning_rate": 0.0009930764004693739, "loss": 1.5912, "step": 5718 }, { "epoch": 0.2543239518029434, "grad_norm": 0.09382274746894836, "learning_rate": 0.0009930705428798266, "loss": 1.5889, "step": 5720 }, { "epoch": 0.25441287626161574, "grad_norm": 0.09548698365688324, "learning_rate": 0.0009930646828307614, "loss": 1.5866, "step": 5722 }, { "epoch": 0.2545018007202881, "grad_norm": 0.0983201414346695, "learning_rate": 0.000993058820322208, "loss": 1.5947, "step": 5724 }, { "epoch": 0.2545907251789605, "grad_norm": 0.09615219384431839, "learning_rate": 0.000993052955354195, "loss": 1.595, "step": 5726 }, { "epoch": 0.25467964963763284, "grad_norm": 0.09847582876682281, "learning_rate": 0.0009930470879267522, "loss": 1.5901, "step": 5728 }, { "epoch": 0.2547685740963052, "grad_norm": 0.09659074246883392, "learning_rate": 0.0009930412180399087, "loss": 1.5912, "step": 5730 }, { "epoch": 0.2548574985549775, "grad_norm": 0.09810440987348557, "learning_rate": 0.000993035345693694, "loss": 1.5906, "step": 5732 }, { "epoch": 0.2549464230136499, "grad_norm": 0.1003175675868988, "learning_rate": 0.0009930294708881367, "loss": 1.5884, "step": 5734 }, { "epoch": 0.25503534747232226, "grad_norm": 0.10160094499588013, "learning_rate": 0.0009930235936232669, "loss": 1.5865, "step": 5736 }, { "epoch": 0.25512427193099463, "grad_norm": 0.09863349050283432, "learning_rate": 0.0009930177138991131, "loss": 1.5936, "step": 5738 }, { "epoch": 0.255213196389667, "grad_norm": 0.08960582315921783, "learning_rate": 0.0009930118317157054, "loss": 1.5874, "step": 5740 }, { "epoch": 0.2553021208483393, "grad_norm": 0.09829938411712646, "learning_rate": 0.000993005947073073, "loss": 1.587, "step": 5742 }, { "epoch": 0.2553910453070117, "grad_norm": 0.09991574287414551, "learning_rate": 0.000993000059971245, "loss": 1.5867, "step": 5744 }, { "epoch": 0.25547996976568405, "grad_norm": 0.09275034070014954, "learning_rate": 0.0009929941704102509, "loss": 1.5933, "step": 5746 }, { "epoch": 0.2555688942243564, "grad_norm": 0.09592308104038239, "learning_rate": 0.00099298827839012, "loss": 1.5872, "step": 5748 }, { "epoch": 0.2556578186830288, "grad_norm": 0.0935816764831543, "learning_rate": 0.0009929823839108817, "loss": 1.5868, "step": 5750 }, { "epoch": 0.25574674314170115, "grad_norm": 0.09557216614484787, "learning_rate": 0.0009929764869725656, "loss": 1.5898, "step": 5752 }, { "epoch": 0.25583566760037346, "grad_norm": 0.0934915617108345, "learning_rate": 0.0009929705875752007, "loss": 1.5913, "step": 5754 }, { "epoch": 0.25592459205904583, "grad_norm": 0.09909486025571823, "learning_rate": 0.0009929646857188168, "loss": 1.5847, "step": 5756 }, { "epoch": 0.2560135165177182, "grad_norm": 0.09650136530399323, "learning_rate": 0.0009929587814034434, "loss": 1.5949, "step": 5758 }, { "epoch": 0.25610244097639057, "grad_norm": 0.10222198814153671, "learning_rate": 0.0009929528746291094, "loss": 1.5915, "step": 5760 }, { "epoch": 0.25619136543506293, "grad_norm": 0.10196855664253235, "learning_rate": 0.000992946965395845, "loss": 1.5848, "step": 5762 }, { "epoch": 0.25628028989373525, "grad_norm": 0.09607810527086258, "learning_rate": 0.000992941053703679, "loss": 1.5936, "step": 5764 }, { "epoch": 0.2563692143524076, "grad_norm": 0.09293419867753983, "learning_rate": 0.0009929351395526413, "loss": 1.5874, "step": 5766 }, { "epoch": 0.25645813881108, "grad_norm": 0.1013997420668602, "learning_rate": 0.0009929292229427615, "loss": 1.5858, "step": 5768 }, { "epoch": 0.25654706326975235, "grad_norm": 0.10565856844186783, "learning_rate": 0.0009929233038740686, "loss": 1.591, "step": 5770 }, { "epoch": 0.2566359877284247, "grad_norm": 0.09518007189035416, "learning_rate": 0.0009929173823465926, "loss": 1.5874, "step": 5772 }, { "epoch": 0.2567249121870971, "grad_norm": 0.09767907112836838, "learning_rate": 0.000992911458360363, "loss": 1.5891, "step": 5774 }, { "epoch": 0.2568138366457694, "grad_norm": 0.09295496344566345, "learning_rate": 0.0009929055319154092, "loss": 1.5859, "step": 5776 }, { "epoch": 0.25690276110444177, "grad_norm": 0.0951056256890297, "learning_rate": 0.0009928996030117605, "loss": 1.5898, "step": 5778 }, { "epoch": 0.25699168556311414, "grad_norm": 0.09819343686103821, "learning_rate": 0.000992893671649447, "loss": 1.5859, "step": 5780 }, { "epoch": 0.2570806100217865, "grad_norm": 0.09449407458305359, "learning_rate": 0.000992887737828498, "loss": 1.5935, "step": 5782 }, { "epoch": 0.2571695344804589, "grad_norm": 0.09632644057273865, "learning_rate": 0.0009928818015489432, "loss": 1.594, "step": 5784 }, { "epoch": 0.2572584589391312, "grad_norm": 0.09808379411697388, "learning_rate": 0.000992875862810812, "loss": 1.5954, "step": 5786 }, { "epoch": 0.25734738339780355, "grad_norm": 0.09704041481018066, "learning_rate": 0.0009928699216141344, "loss": 1.5884, "step": 5788 }, { "epoch": 0.2574363078564759, "grad_norm": 0.09470924735069275, "learning_rate": 0.0009928639779589396, "loss": 1.5833, "step": 5790 }, { "epoch": 0.2575252323151483, "grad_norm": 0.09412840753793716, "learning_rate": 0.0009928580318452577, "loss": 1.5889, "step": 5792 }, { "epoch": 0.25761415677382066, "grad_norm": 0.09567838162183762, "learning_rate": 0.000992852083273118, "loss": 1.5891, "step": 5794 }, { "epoch": 0.25770308123249297, "grad_norm": 0.10092340409755707, "learning_rate": 0.0009928461322425502, "loss": 1.5908, "step": 5796 }, { "epoch": 0.25779200569116534, "grad_norm": 0.10121021419763565, "learning_rate": 0.0009928401787535842, "loss": 1.5911, "step": 5798 }, { "epoch": 0.2578809301498377, "grad_norm": 0.09260232001543045, "learning_rate": 0.0009928342228062497, "loss": 1.5844, "step": 5800 }, { "epoch": 0.2579698546085101, "grad_norm": 0.1004943922162056, "learning_rate": 0.0009928282644005762, "loss": 1.5827, "step": 5802 }, { "epoch": 0.25805877906718244, "grad_norm": 0.09976594150066376, "learning_rate": 0.0009928223035365934, "loss": 1.5867, "step": 5804 }, { "epoch": 0.2581477035258548, "grad_norm": 0.09819846600294113, "learning_rate": 0.0009928163402143312, "loss": 1.5836, "step": 5806 }, { "epoch": 0.2582366279845271, "grad_norm": 0.09671387076377869, "learning_rate": 0.0009928103744338192, "loss": 1.584, "step": 5808 }, { "epoch": 0.2583255524431995, "grad_norm": 0.09667721390724182, "learning_rate": 0.0009928044061950875, "loss": 1.5886, "step": 5810 }, { "epoch": 0.25841447690187186, "grad_norm": 0.09693595767021179, "learning_rate": 0.0009927984354981653, "loss": 1.5871, "step": 5812 }, { "epoch": 0.2585034013605442, "grad_norm": 0.08862930536270142, "learning_rate": 0.000992792462343083, "loss": 1.5902, "step": 5814 }, { "epoch": 0.2585923258192166, "grad_norm": 0.09225138276815414, "learning_rate": 0.00099278648672987, "loss": 1.5824, "step": 5816 }, { "epoch": 0.2586812502778889, "grad_norm": 0.09490638971328735, "learning_rate": 0.0009927805086585562, "loss": 1.5793, "step": 5818 }, { "epoch": 0.2587701747365613, "grad_norm": 0.09594158828258514, "learning_rate": 0.0009927745281291713, "loss": 1.583, "step": 5820 }, { "epoch": 0.25885909919523364, "grad_norm": 0.09334473311901093, "learning_rate": 0.0009927685451417453, "loss": 1.5852, "step": 5822 }, { "epoch": 0.258948023653906, "grad_norm": 0.09541244804859161, "learning_rate": 0.0009927625596963083, "loss": 1.59, "step": 5824 }, { "epoch": 0.2590369481125784, "grad_norm": 0.09415993094444275, "learning_rate": 0.0009927565717928895, "loss": 1.5866, "step": 5826 }, { "epoch": 0.25912587257125075, "grad_norm": 0.09244725853204727, "learning_rate": 0.0009927505814315194, "loss": 1.5902, "step": 5828 }, { "epoch": 0.25921479702992306, "grad_norm": 0.09592825919389725, "learning_rate": 0.0009927445886122276, "loss": 1.5915, "step": 5830 }, { "epoch": 0.25930372148859543, "grad_norm": 0.0941583588719368, "learning_rate": 0.0009927385933350436, "loss": 1.589, "step": 5832 }, { "epoch": 0.2593926459472678, "grad_norm": 0.09573867917060852, "learning_rate": 0.000992732595599998, "loss": 1.587, "step": 5834 }, { "epoch": 0.25948157040594017, "grad_norm": 0.09302788972854614, "learning_rate": 0.0009927265954071205, "loss": 1.589, "step": 5836 }, { "epoch": 0.25957049486461253, "grad_norm": 0.09388726204633713, "learning_rate": 0.0009927205927564408, "loss": 1.5858, "step": 5838 }, { "epoch": 0.25965941932328485, "grad_norm": 0.08946000784635544, "learning_rate": 0.000992714587647989, "loss": 1.5815, "step": 5840 }, { "epoch": 0.2597483437819572, "grad_norm": 0.09015568345785141, "learning_rate": 0.000992708580081795, "loss": 1.5907, "step": 5842 }, { "epoch": 0.2598372682406296, "grad_norm": 0.09112632274627686, "learning_rate": 0.0009927025700578892, "loss": 1.5878, "step": 5844 }, { "epoch": 0.25992619269930195, "grad_norm": 0.0954042598605156, "learning_rate": 0.0009926965575763008, "loss": 1.5831, "step": 5846 }, { "epoch": 0.2600151171579743, "grad_norm": 0.09618160128593445, "learning_rate": 0.0009926905426370604, "loss": 1.588, "step": 5848 }, { "epoch": 0.2601040416166467, "grad_norm": 0.09728830307722092, "learning_rate": 0.0009926845252401974, "loss": 1.5892, "step": 5850 }, { "epoch": 0.260192966075319, "grad_norm": 0.09168002754449844, "learning_rate": 0.0009926785053857425, "loss": 1.5904, "step": 5852 }, { "epoch": 0.26028189053399137, "grad_norm": 0.09341517090797424, "learning_rate": 0.0009926724830737256, "loss": 1.5891, "step": 5854 }, { "epoch": 0.26037081499266373, "grad_norm": 0.09883091598749161, "learning_rate": 0.0009926664583041764, "loss": 1.5898, "step": 5856 }, { "epoch": 0.2604597394513361, "grad_norm": 0.0963706523180008, "learning_rate": 0.000992660431077125, "loss": 1.5869, "step": 5858 }, { "epoch": 0.26054866391000847, "grad_norm": 0.09510736167430878, "learning_rate": 0.0009926544013926018, "loss": 1.5886, "step": 5860 }, { "epoch": 0.2606375883686808, "grad_norm": 0.09609393775463104, "learning_rate": 0.0009926483692506364, "loss": 1.59, "step": 5862 }, { "epoch": 0.26072651282735315, "grad_norm": 0.10332238674163818, "learning_rate": 0.0009926423346512596, "loss": 1.5906, "step": 5864 }, { "epoch": 0.2608154372860255, "grad_norm": 0.09607810527086258, "learning_rate": 0.0009926362975945008, "loss": 1.5821, "step": 5866 }, { "epoch": 0.2609043617446979, "grad_norm": 0.09439858794212341, "learning_rate": 0.0009926302580803904, "loss": 1.5893, "step": 5868 }, { "epoch": 0.26099328620337026, "grad_norm": 0.09515378624200821, "learning_rate": 0.0009926242161089583, "loss": 1.5872, "step": 5870 }, { "epoch": 0.26108221066204257, "grad_norm": 0.09659688174724579, "learning_rate": 0.0009926181716802351, "loss": 1.5856, "step": 5872 }, { "epoch": 0.26117113512071494, "grad_norm": 0.09571564942598343, "learning_rate": 0.0009926121247942506, "loss": 1.583, "step": 5874 }, { "epoch": 0.2612600595793873, "grad_norm": 0.09830082207918167, "learning_rate": 0.000992606075451035, "loss": 1.5874, "step": 5876 }, { "epoch": 0.2613489840380597, "grad_norm": 0.09110367298126221, "learning_rate": 0.0009926000236506187, "loss": 1.5869, "step": 5878 }, { "epoch": 0.26143790849673204, "grad_norm": 0.09880504012107849, "learning_rate": 0.0009925939693930315, "loss": 1.5823, "step": 5880 }, { "epoch": 0.2615268329554044, "grad_norm": 0.09947524964809418, "learning_rate": 0.0009925879126783039, "loss": 1.5878, "step": 5882 }, { "epoch": 0.2616157574140767, "grad_norm": 0.0937243178486824, "learning_rate": 0.0009925818535064662, "loss": 1.583, "step": 5884 }, { "epoch": 0.2617046818727491, "grad_norm": 0.0913049653172493, "learning_rate": 0.0009925757918775482, "loss": 1.5867, "step": 5886 }, { "epoch": 0.26179360633142146, "grad_norm": 0.09564990550279617, "learning_rate": 0.0009925697277915805, "loss": 1.5917, "step": 5888 }, { "epoch": 0.2618825307900938, "grad_norm": 0.09640397876501083, "learning_rate": 0.0009925636612485931, "loss": 1.5818, "step": 5890 }, { "epoch": 0.2619714552487662, "grad_norm": 0.09462813287973404, "learning_rate": 0.0009925575922486167, "loss": 1.5867, "step": 5892 }, { "epoch": 0.2620603797074385, "grad_norm": 0.10179334878921509, "learning_rate": 0.000992551520791681, "loss": 1.5877, "step": 5894 }, { "epoch": 0.2621493041661109, "grad_norm": 0.09697990119457245, "learning_rate": 0.0009925454468778167, "loss": 1.5843, "step": 5896 }, { "epoch": 0.26223822862478324, "grad_norm": 0.09205523133277893, "learning_rate": 0.0009925393705070538, "loss": 1.5882, "step": 5898 }, { "epoch": 0.2623271530834556, "grad_norm": 0.09468796104192734, "learning_rate": 0.0009925332916794229, "loss": 1.593, "step": 5900 }, { "epoch": 0.262416077542128, "grad_norm": 0.09811273962259293, "learning_rate": 0.000992527210394954, "loss": 1.5872, "step": 5902 }, { "epoch": 0.26250500200080035, "grad_norm": 0.09203442186117172, "learning_rate": 0.000992521126653678, "loss": 1.5857, "step": 5904 }, { "epoch": 0.26259392645947266, "grad_norm": 0.10104124248027802, "learning_rate": 0.0009925150404556245, "loss": 1.5929, "step": 5906 }, { "epoch": 0.262682850918145, "grad_norm": 0.10001523047685623, "learning_rate": 0.0009925089518008244, "loss": 1.5829, "step": 5908 }, { "epoch": 0.2627717753768174, "grad_norm": 0.10386830568313599, "learning_rate": 0.000992502860689308, "loss": 1.5874, "step": 5910 }, { "epoch": 0.26286069983548976, "grad_norm": 0.09889882802963257, "learning_rate": 0.0009924967671211052, "loss": 1.5876, "step": 5912 }, { "epoch": 0.26294962429416213, "grad_norm": 0.09799201786518097, "learning_rate": 0.000992490671096247, "loss": 1.5832, "step": 5914 }, { "epoch": 0.26303854875283444, "grad_norm": 0.10036963224411011, "learning_rate": 0.0009924845726147636, "loss": 1.5868, "step": 5916 }, { "epoch": 0.2631274732115068, "grad_norm": 0.09943078458309174, "learning_rate": 0.0009924784716766853, "loss": 1.5812, "step": 5918 }, { "epoch": 0.2632163976701792, "grad_norm": 0.0965849757194519, "learning_rate": 0.0009924723682820428, "loss": 1.5822, "step": 5920 }, { "epoch": 0.26330532212885155, "grad_norm": 0.10785552114248276, "learning_rate": 0.0009924662624308663, "loss": 1.5856, "step": 5922 }, { "epoch": 0.2633942465875239, "grad_norm": 0.09936713427305222, "learning_rate": 0.0009924601541231863, "loss": 1.5891, "step": 5924 }, { "epoch": 0.26348317104619623, "grad_norm": 0.1005997359752655, "learning_rate": 0.0009924540433590334, "loss": 1.5896, "step": 5926 }, { "epoch": 0.2635720955048686, "grad_norm": 0.0992506816983223, "learning_rate": 0.0009924479301384379, "loss": 1.5872, "step": 5928 }, { "epoch": 0.26366101996354097, "grad_norm": 0.09371292591094971, "learning_rate": 0.0009924418144614305, "loss": 1.5879, "step": 5930 }, { "epoch": 0.26374994442221333, "grad_norm": 0.09433597326278687, "learning_rate": 0.0009924356963280415, "loss": 1.5871, "step": 5932 }, { "epoch": 0.2638388688808857, "grad_norm": 0.08891277760267258, "learning_rate": 0.0009924295757383014, "loss": 1.583, "step": 5934 }, { "epoch": 0.26392779333955807, "grad_norm": 0.09156538546085358, "learning_rate": 0.000992423452692241, "loss": 1.5958, "step": 5936 }, { "epoch": 0.2640167177982304, "grad_norm": 0.09960367530584335, "learning_rate": 0.0009924173271898906, "loss": 1.5829, "step": 5938 }, { "epoch": 0.26410564225690275, "grad_norm": 0.09040109068155289, "learning_rate": 0.000992411199231281, "loss": 1.5869, "step": 5940 }, { "epoch": 0.2641945667155751, "grad_norm": 0.09623978286981583, "learning_rate": 0.0009924050688164424, "loss": 1.5814, "step": 5942 }, { "epoch": 0.2642834911742475, "grad_norm": 0.09259607642889023, "learning_rate": 0.0009923989359454056, "loss": 1.5886, "step": 5944 }, { "epoch": 0.26437241563291985, "grad_norm": 0.09638094156980515, "learning_rate": 0.0009923928006182012, "loss": 1.5863, "step": 5946 }, { "epoch": 0.26446134009159217, "grad_norm": 0.09382667392492294, "learning_rate": 0.0009923866628348597, "loss": 1.582, "step": 5948 }, { "epoch": 0.26455026455026454, "grad_norm": 0.09935595840215683, "learning_rate": 0.000992380522595412, "loss": 1.585, "step": 5950 }, { "epoch": 0.2646391890089369, "grad_norm": 0.0950867235660553, "learning_rate": 0.0009923743798998883, "loss": 1.5854, "step": 5952 }, { "epoch": 0.26472811346760927, "grad_norm": 0.09334602952003479, "learning_rate": 0.0009923682347483195, "loss": 1.5831, "step": 5954 }, { "epoch": 0.26481703792628164, "grad_norm": 0.09344839304685593, "learning_rate": 0.0009923620871407364, "loss": 1.5826, "step": 5956 }, { "epoch": 0.264905962384954, "grad_norm": 0.0958920270204544, "learning_rate": 0.0009923559370771694, "loss": 1.5838, "step": 5958 }, { "epoch": 0.2649948868436263, "grad_norm": 0.09009189903736115, "learning_rate": 0.0009923497845576493, "loss": 1.5878, "step": 5960 }, { "epoch": 0.2650838113022987, "grad_norm": 0.0952090173959732, "learning_rate": 0.0009923436295822066, "loss": 1.577, "step": 5962 }, { "epoch": 0.26517273576097106, "grad_norm": 0.08939055353403091, "learning_rate": 0.0009923374721508723, "loss": 1.578, "step": 5964 }, { "epoch": 0.2652616602196434, "grad_norm": 0.08977833390235901, "learning_rate": 0.0009923313122636769, "loss": 1.5773, "step": 5966 }, { "epoch": 0.2653505846783158, "grad_norm": 0.09042993932962418, "learning_rate": 0.000992325149920651, "loss": 1.5859, "step": 5968 }, { "epoch": 0.2654395091369881, "grad_norm": 0.0930049791932106, "learning_rate": 0.0009923189851218258, "loss": 1.5909, "step": 5970 }, { "epoch": 0.2655284335956605, "grad_norm": 0.09311951696872711, "learning_rate": 0.0009923128178672318, "loss": 1.5886, "step": 5972 }, { "epoch": 0.26561735805433284, "grad_norm": 0.09062470495700836, "learning_rate": 0.0009923066481568996, "loss": 1.5856, "step": 5974 }, { "epoch": 0.2657062825130052, "grad_norm": 0.09528020024299622, "learning_rate": 0.0009923004759908602, "loss": 1.5858, "step": 5976 }, { "epoch": 0.2657952069716776, "grad_norm": 0.09330100566148758, "learning_rate": 0.0009922943013691442, "loss": 1.5919, "step": 5978 }, { "epoch": 0.2658841314303499, "grad_norm": 0.09436290711164474, "learning_rate": 0.0009922881242917824, "loss": 1.5807, "step": 5980 }, { "epoch": 0.26597305588902226, "grad_norm": 0.09486601501703262, "learning_rate": 0.0009922819447588061, "loss": 1.5844, "step": 5982 }, { "epoch": 0.2660619803476946, "grad_norm": 0.09641608595848083, "learning_rate": 0.0009922757627702455, "loss": 1.5822, "step": 5984 }, { "epoch": 0.266150904806367, "grad_norm": 0.096797876060009, "learning_rate": 0.0009922695783261318, "loss": 1.5819, "step": 5986 }, { "epoch": 0.26623982926503936, "grad_norm": 0.09238893538713455, "learning_rate": 0.0009922633914264956, "loss": 1.5795, "step": 5988 }, { "epoch": 0.26632875372371173, "grad_norm": 0.09049165993928909, "learning_rate": 0.000992257202071368, "loss": 1.5835, "step": 5990 }, { "epoch": 0.26641767818238404, "grad_norm": 0.09518662840127945, "learning_rate": 0.0009922510102607796, "loss": 1.5844, "step": 5992 }, { "epoch": 0.2665066026410564, "grad_norm": 0.09077850729227066, "learning_rate": 0.0009922448159947618, "loss": 1.585, "step": 5994 }, { "epoch": 0.2665955270997288, "grad_norm": 0.09498831629753113, "learning_rate": 0.0009922386192733447, "loss": 1.5779, "step": 5996 }, { "epoch": 0.26668445155840115, "grad_norm": 0.09286242723464966, "learning_rate": 0.00099223242009656, "loss": 1.5788, "step": 5998 }, { "epoch": 0.2667733760170735, "grad_norm": 0.09513460099697113, "learning_rate": 0.000992226218464438, "loss": 1.5926, "step": 6000 }, { "epoch": 0.2667733760170735, "eval_loss": 1.5609192848205566, "eval_runtime": 12.357, "eval_samples_per_second": 559.198, "eval_steps_per_second": 69.92, "step": 6000 }, { "epoch": 0.26686230047574583, "grad_norm": 0.09536412358283997, "learning_rate": 0.0009922200143770103, "loss": 1.5798, "step": 6002 }, { "epoch": 0.2669512249344182, "grad_norm": 0.0923578068614006, "learning_rate": 0.000992213807834307, "loss": 1.5833, "step": 6004 }, { "epoch": 0.26704014939309056, "grad_norm": 0.0896889865398407, "learning_rate": 0.00099220759883636, "loss": 1.5824, "step": 6006 }, { "epoch": 0.26712907385176293, "grad_norm": 0.0965195745229721, "learning_rate": 0.0009922013873831994, "loss": 1.5842, "step": 6008 }, { "epoch": 0.2672179983104353, "grad_norm": 0.09298788011074066, "learning_rate": 0.0009921951734748566, "loss": 1.575, "step": 6010 }, { "epoch": 0.26730692276910767, "grad_norm": 0.0938933789730072, "learning_rate": 0.0009921889571113628, "loss": 1.5787, "step": 6012 }, { "epoch": 0.26739584722778, "grad_norm": 0.09369226545095444, "learning_rate": 0.0009921827382927486, "loss": 1.5808, "step": 6014 }, { "epoch": 0.26748477168645235, "grad_norm": 0.09126924723386765, "learning_rate": 0.0009921765170190454, "loss": 1.5752, "step": 6016 }, { "epoch": 0.2675736961451247, "grad_norm": 0.09294477850198746, "learning_rate": 0.0009921702932902839, "loss": 1.5761, "step": 6018 }, { "epoch": 0.2676626206037971, "grad_norm": 0.09301607310771942, "learning_rate": 0.0009921640671064953, "loss": 1.5772, "step": 6020 }, { "epoch": 0.26775154506246945, "grad_norm": 0.09419137239456177, "learning_rate": 0.0009921578384677106, "loss": 1.5782, "step": 6022 }, { "epoch": 0.26784046952114177, "grad_norm": 0.09414952993392944, "learning_rate": 0.000992151607373961, "loss": 1.5785, "step": 6024 }, { "epoch": 0.26792939397981413, "grad_norm": 0.09365058690309525, "learning_rate": 0.0009921453738252774, "loss": 1.584, "step": 6026 }, { "epoch": 0.2680183184384865, "grad_norm": 0.09278295934200287, "learning_rate": 0.000992139137821691, "loss": 1.5855, "step": 6028 }, { "epoch": 0.26810724289715887, "grad_norm": 0.09213846176862717, "learning_rate": 0.000992132899363233, "loss": 1.5798, "step": 6030 }, { "epoch": 0.26819616735583124, "grad_norm": 0.0945931226015091, "learning_rate": 0.0009921266584499344, "loss": 1.5792, "step": 6032 }, { "epoch": 0.2682850918145036, "grad_norm": 0.09439346194267273, "learning_rate": 0.000992120415081826, "loss": 1.5722, "step": 6034 }, { "epoch": 0.2683740162731759, "grad_norm": 0.09740027040243149, "learning_rate": 0.0009921141692589396, "loss": 1.582, "step": 6036 }, { "epoch": 0.2684629407318483, "grad_norm": 0.0935557633638382, "learning_rate": 0.0009921079209813063, "loss": 1.581, "step": 6038 }, { "epoch": 0.26855186519052066, "grad_norm": 0.09402929991483688, "learning_rate": 0.0009921016702489564, "loss": 1.5772, "step": 6040 }, { "epoch": 0.268640789649193, "grad_norm": 0.09664035588502884, "learning_rate": 0.000992095417061922, "loss": 1.5807, "step": 6042 }, { "epoch": 0.2687297141078654, "grad_norm": 0.09566997736692429, "learning_rate": 0.000992089161420234, "loss": 1.5822, "step": 6044 }, { "epoch": 0.2688186385665377, "grad_norm": 0.09569090604782104, "learning_rate": 0.0009920829033239235, "loss": 1.5799, "step": 6046 }, { "epoch": 0.2689075630252101, "grad_norm": 0.09623146057128906, "learning_rate": 0.0009920766427730218, "loss": 1.5803, "step": 6048 }, { "epoch": 0.26899648748388244, "grad_norm": 0.09390247613191605, "learning_rate": 0.0009920703797675603, "loss": 1.58, "step": 6050 }, { "epoch": 0.2690854119425548, "grad_norm": 0.09540102630853653, "learning_rate": 0.0009920641143075699, "loss": 1.5818, "step": 6052 }, { "epoch": 0.2691743364012272, "grad_norm": 0.09371043741703033, "learning_rate": 0.000992057846393082, "loss": 1.5885, "step": 6054 }, { "epoch": 0.2692632608598995, "grad_norm": 0.09126854687929153, "learning_rate": 0.000992051576024128, "loss": 1.5794, "step": 6056 }, { "epoch": 0.26935218531857186, "grad_norm": 0.09539595991373062, "learning_rate": 0.0009920453032007388, "loss": 1.5784, "step": 6058 }, { "epoch": 0.2694411097772442, "grad_norm": 0.09004338830709457, "learning_rate": 0.0009920390279229462, "loss": 1.5753, "step": 6060 }, { "epoch": 0.2695300342359166, "grad_norm": 0.09474993497133255, "learning_rate": 0.0009920327501907811, "loss": 1.5854, "step": 6062 }, { "epoch": 0.26961895869458896, "grad_norm": 0.10050513595342636, "learning_rate": 0.000992026470004275, "loss": 1.5815, "step": 6064 }, { "epoch": 0.26970788315326133, "grad_norm": 0.09140043705701828, "learning_rate": 0.0009920201873634593, "loss": 1.58, "step": 6066 }, { "epoch": 0.26979680761193364, "grad_norm": 0.09281838685274124, "learning_rate": 0.0009920139022683652, "loss": 1.5788, "step": 6068 }, { "epoch": 0.269885732070606, "grad_norm": 0.0929790586233139, "learning_rate": 0.0009920076147190242, "loss": 1.5826, "step": 6070 }, { "epoch": 0.2699746565292784, "grad_norm": 0.09214480966329575, "learning_rate": 0.0009920013247154674, "loss": 1.5772, "step": 6072 }, { "epoch": 0.27006358098795075, "grad_norm": 0.09295648336410522, "learning_rate": 0.0009919950322577263, "loss": 1.5797, "step": 6074 }, { "epoch": 0.2701525054466231, "grad_norm": 0.094325952231884, "learning_rate": 0.0009919887373458325, "loss": 1.586, "step": 6076 }, { "epoch": 0.2702414299052954, "grad_norm": 0.09574458748102188, "learning_rate": 0.0009919824399798169, "loss": 1.5852, "step": 6078 }, { "epoch": 0.2703303543639678, "grad_norm": 0.09337875992059708, "learning_rate": 0.0009919761401597114, "loss": 1.5748, "step": 6080 }, { "epoch": 0.27041927882264016, "grad_norm": 0.09532225131988525, "learning_rate": 0.0009919698378855473, "loss": 1.5696, "step": 6082 }, { "epoch": 0.27050820328131253, "grad_norm": 0.09588050097227097, "learning_rate": 0.000991963533157356, "loss": 1.585, "step": 6084 }, { "epoch": 0.2705971277399849, "grad_norm": 0.09399744123220444, "learning_rate": 0.0009919572259751688, "loss": 1.5727, "step": 6086 }, { "epoch": 0.27068605219865727, "grad_norm": 0.09603196382522583, "learning_rate": 0.0009919509163390174, "loss": 1.5792, "step": 6088 }, { "epoch": 0.2707749766573296, "grad_norm": 0.09013631939888, "learning_rate": 0.000991944604248933, "loss": 1.5795, "step": 6090 }, { "epoch": 0.27086390111600195, "grad_norm": 0.08968756347894669, "learning_rate": 0.0009919382897049475, "loss": 1.574, "step": 6092 }, { "epoch": 0.2709528255746743, "grad_norm": 0.090090312063694, "learning_rate": 0.0009919319727070919, "loss": 1.5763, "step": 6094 }, { "epoch": 0.2710417500333467, "grad_norm": 0.09772195667028427, "learning_rate": 0.0009919256532553982, "loss": 1.5812, "step": 6096 }, { "epoch": 0.27113067449201905, "grad_norm": 0.09192989021539688, "learning_rate": 0.0009919193313498976, "loss": 1.5723, "step": 6098 }, { "epoch": 0.27121959895069137, "grad_norm": 0.091171033680439, "learning_rate": 0.0009919130069906217, "loss": 1.5751, "step": 6100 }, { "epoch": 0.27130852340936373, "grad_norm": 0.0916176363825798, "learning_rate": 0.000991906680177602, "loss": 1.5787, "step": 6102 }, { "epoch": 0.2713974478680361, "grad_norm": 0.09237353503704071, "learning_rate": 0.0009919003509108702, "loss": 1.5746, "step": 6104 }, { "epoch": 0.27148637232670847, "grad_norm": 0.09654420614242554, "learning_rate": 0.0009918940191904577, "loss": 1.5804, "step": 6106 }, { "epoch": 0.27157529678538084, "grad_norm": 0.09240265935659409, "learning_rate": 0.0009918876850163966, "loss": 1.5769, "step": 6108 }, { "epoch": 0.27166422124405315, "grad_norm": 0.09678072482347488, "learning_rate": 0.0009918813483887176, "loss": 1.5787, "step": 6110 }, { "epoch": 0.2717531457027255, "grad_norm": 0.09163809567689896, "learning_rate": 0.000991875009307453, "loss": 1.581, "step": 6112 }, { "epoch": 0.2718420701613979, "grad_norm": 0.09179195761680603, "learning_rate": 0.000991868667772634, "loss": 1.5799, "step": 6114 }, { "epoch": 0.27193099462007025, "grad_norm": 0.09309026598930359, "learning_rate": 0.0009918623237842926, "loss": 1.5749, "step": 6116 }, { "epoch": 0.2720199190787426, "grad_norm": 0.09055127948522568, "learning_rate": 0.0009918559773424603, "loss": 1.5776, "step": 6118 }, { "epoch": 0.272108843537415, "grad_norm": 0.09597539901733398, "learning_rate": 0.0009918496284471688, "loss": 1.5733, "step": 6120 }, { "epoch": 0.2721977679960873, "grad_norm": 0.09350308775901794, "learning_rate": 0.0009918432770984495, "loss": 1.5775, "step": 6122 }, { "epoch": 0.27228669245475967, "grad_norm": 0.09235924482345581, "learning_rate": 0.0009918369232963344, "loss": 1.5705, "step": 6124 }, { "epoch": 0.27237561691343204, "grad_norm": 0.09772805869579315, "learning_rate": 0.0009918305670408552, "loss": 1.5795, "step": 6126 }, { "epoch": 0.2724645413721044, "grad_norm": 0.09387152642011642, "learning_rate": 0.0009918242083320433, "loss": 1.5857, "step": 6128 }, { "epoch": 0.2725534658307768, "grad_norm": 0.08818736672401428, "learning_rate": 0.0009918178471699309, "loss": 1.5767, "step": 6130 }, { "epoch": 0.2726423902894491, "grad_norm": 0.09092088788747787, "learning_rate": 0.000991811483554549, "loss": 1.5775, "step": 6132 }, { "epoch": 0.27273131474812146, "grad_norm": 0.09008492529392242, "learning_rate": 0.0009918051174859302, "loss": 1.5767, "step": 6134 }, { "epoch": 0.2728202392067938, "grad_norm": 0.0906415656208992, "learning_rate": 0.0009917987489641057, "loss": 1.5746, "step": 6136 }, { "epoch": 0.2729091636654662, "grad_norm": 0.09450238943099976, "learning_rate": 0.0009917923779891073, "loss": 1.5732, "step": 6138 }, { "epoch": 0.27299808812413856, "grad_norm": 0.08837480843067169, "learning_rate": 0.000991786004560967, "loss": 1.5756, "step": 6140 }, { "epoch": 0.27308701258281093, "grad_norm": 0.09205223619937897, "learning_rate": 0.0009917796286797163, "loss": 1.5763, "step": 6142 }, { "epoch": 0.27317593704148324, "grad_norm": 0.0925694927573204, "learning_rate": 0.0009917732503453873, "loss": 1.5766, "step": 6144 }, { "epoch": 0.2732648615001556, "grad_norm": 0.09327147156000137, "learning_rate": 0.0009917668695580118, "loss": 1.5742, "step": 6146 }, { "epoch": 0.273353785958828, "grad_norm": 0.09639228135347366, "learning_rate": 0.0009917604863176215, "loss": 1.5805, "step": 6148 }, { "epoch": 0.27344271041750035, "grad_norm": 0.09437435865402222, "learning_rate": 0.0009917541006242482, "loss": 1.5835, "step": 6150 }, { "epoch": 0.2735316348761727, "grad_norm": 0.09421025216579437, "learning_rate": 0.0009917477124779237, "loss": 1.5827, "step": 6152 }, { "epoch": 0.273620559334845, "grad_norm": 0.09246162325143814, "learning_rate": 0.0009917413218786802, "loss": 1.5771, "step": 6154 }, { "epoch": 0.2737094837935174, "grad_norm": 0.0891774520277977, "learning_rate": 0.0009917349288265493, "loss": 1.5819, "step": 6156 }, { "epoch": 0.27379840825218976, "grad_norm": 0.09381581842899323, "learning_rate": 0.0009917285333215628, "loss": 1.5758, "step": 6158 }, { "epoch": 0.27388733271086213, "grad_norm": 0.09442843496799469, "learning_rate": 0.000991722135363753, "loss": 1.5837, "step": 6160 }, { "epoch": 0.2739762571695345, "grad_norm": 0.08775163441896439, "learning_rate": 0.0009917157349531513, "loss": 1.581, "step": 6162 }, { "epoch": 0.2740651816282068, "grad_norm": 0.0929328128695488, "learning_rate": 0.00099170933208979, "loss": 1.5758, "step": 6164 }, { "epoch": 0.2741541060868792, "grad_norm": 0.09051446616649628, "learning_rate": 0.000991702926773701, "loss": 1.5774, "step": 6166 }, { "epoch": 0.27424303054555155, "grad_norm": 0.09004546701908112, "learning_rate": 0.0009916965190049161, "loss": 1.5744, "step": 6168 }, { "epoch": 0.2743319550042239, "grad_norm": 0.0947934165596962, "learning_rate": 0.0009916901087834676, "loss": 1.5806, "step": 6170 }, { "epoch": 0.2744208794628963, "grad_norm": 0.0934051126241684, "learning_rate": 0.0009916836961093869, "loss": 1.5796, "step": 6172 }, { "epoch": 0.27450980392156865, "grad_norm": 0.09276668727397919, "learning_rate": 0.0009916772809827063, "loss": 1.5775, "step": 6174 }, { "epoch": 0.27459872838024096, "grad_norm": 0.09658815711736679, "learning_rate": 0.0009916708634034582, "loss": 1.5745, "step": 6176 }, { "epoch": 0.27468765283891333, "grad_norm": 0.09510058909654617, "learning_rate": 0.0009916644433716738, "loss": 1.5706, "step": 6178 }, { "epoch": 0.2747765772975857, "grad_norm": 0.09232553839683533, "learning_rate": 0.0009916580208873859, "loss": 1.5735, "step": 6180 }, { "epoch": 0.27486550175625807, "grad_norm": 0.09260194003582001, "learning_rate": 0.0009916515959506257, "loss": 1.5781, "step": 6182 }, { "epoch": 0.27495442621493044, "grad_norm": 0.09699893742799759, "learning_rate": 0.000991645168561426, "loss": 1.5788, "step": 6184 }, { "epoch": 0.27504335067360275, "grad_norm": 0.0900624617934227, "learning_rate": 0.0009916387387198186, "loss": 1.5794, "step": 6186 }, { "epoch": 0.2751322751322751, "grad_norm": 0.08997295051813126, "learning_rate": 0.0009916323064258358, "loss": 1.5737, "step": 6188 }, { "epoch": 0.2752211995909475, "grad_norm": 0.0929020345211029, "learning_rate": 0.000991625871679509, "loss": 1.5808, "step": 6190 }, { "epoch": 0.27531012404961985, "grad_norm": 0.09494993090629578, "learning_rate": 0.000991619434480871, "loss": 1.5808, "step": 6192 }, { "epoch": 0.2753990485082922, "grad_norm": 0.08931610733270645, "learning_rate": 0.0009916129948299537, "loss": 1.5718, "step": 6194 }, { "epoch": 0.2754879729669646, "grad_norm": 0.09554082900285721, "learning_rate": 0.000991606552726789, "loss": 1.5794, "step": 6196 }, { "epoch": 0.2755768974256369, "grad_norm": 0.09193310141563416, "learning_rate": 0.0009916001081714091, "loss": 1.5806, "step": 6198 }, { "epoch": 0.27566582188430927, "grad_norm": 0.08917775005102158, "learning_rate": 0.0009915936611638466, "loss": 1.5737, "step": 6200 }, { "epoch": 0.27575474634298164, "grad_norm": 0.0933012068271637, "learning_rate": 0.0009915872117041333, "loss": 1.5803, "step": 6202 }, { "epoch": 0.275843670801654, "grad_norm": 0.09677395969629288, "learning_rate": 0.0009915807597923013, "loss": 1.5793, "step": 6204 }, { "epoch": 0.2759325952603264, "grad_norm": 0.09007348865270615, "learning_rate": 0.0009915743054283827, "loss": 1.5755, "step": 6206 }, { "epoch": 0.2760215197189987, "grad_norm": 0.09194600582122803, "learning_rate": 0.00099156784861241, "loss": 1.5735, "step": 6208 }, { "epoch": 0.27611044417767105, "grad_norm": 0.09006384760141373, "learning_rate": 0.0009915613893444154, "loss": 1.574, "step": 6210 }, { "epoch": 0.2761993686363434, "grad_norm": 0.09457103908061981, "learning_rate": 0.0009915549276244308, "loss": 1.577, "step": 6212 }, { "epoch": 0.2762882930950158, "grad_norm": 0.09670677036046982, "learning_rate": 0.0009915484634524887, "loss": 1.5838, "step": 6214 }, { "epoch": 0.27637721755368816, "grad_norm": 0.09744661301374435, "learning_rate": 0.0009915419968286212, "loss": 1.58, "step": 6216 }, { "epoch": 0.2764661420123605, "grad_norm": 0.09463924914598465, "learning_rate": 0.0009915355277528607, "loss": 1.5767, "step": 6218 }, { "epoch": 0.27655506647103284, "grad_norm": 0.09342074394226074, "learning_rate": 0.0009915290562252394, "loss": 1.5844, "step": 6220 }, { "epoch": 0.2766439909297052, "grad_norm": 0.08680648356676102, "learning_rate": 0.0009915225822457897, "loss": 1.5792, "step": 6222 }, { "epoch": 0.2767329153883776, "grad_norm": 0.09087889641523361, "learning_rate": 0.0009915161058145436, "loss": 1.5797, "step": 6224 }, { "epoch": 0.27682183984704994, "grad_norm": 0.0886339470744133, "learning_rate": 0.0009915096269315336, "loss": 1.5726, "step": 6226 }, { "epoch": 0.2769107643057223, "grad_norm": 0.08888717740774155, "learning_rate": 0.000991503145596792, "loss": 1.5751, "step": 6228 }, { "epoch": 0.2769996887643946, "grad_norm": 0.09028730541467667, "learning_rate": 0.0009914966618103512, "loss": 1.581, "step": 6230 }, { "epoch": 0.277088613223067, "grad_norm": 0.09174933284521103, "learning_rate": 0.0009914901755722434, "loss": 1.5828, "step": 6232 }, { "epoch": 0.27717753768173936, "grad_norm": 0.09008409827947617, "learning_rate": 0.000991483686882501, "loss": 1.5725, "step": 6234 }, { "epoch": 0.27726646214041173, "grad_norm": 0.09247057884931564, "learning_rate": 0.0009914771957411563, "loss": 1.584, "step": 6236 }, { "epoch": 0.2773553865990841, "grad_norm": 0.09128382802009583, "learning_rate": 0.000991470702148242, "loss": 1.5849, "step": 6238 }, { "epoch": 0.2774443110577564, "grad_norm": 0.09105904400348663, "learning_rate": 0.0009914642061037901, "loss": 1.5744, "step": 6240 }, { "epoch": 0.2775332355164288, "grad_norm": 0.08844447880983353, "learning_rate": 0.0009914577076078333, "loss": 1.5689, "step": 6242 }, { "epoch": 0.27762215997510115, "grad_norm": 0.08989200741052628, "learning_rate": 0.0009914512066604036, "loss": 1.5729, "step": 6244 }, { "epoch": 0.2777110844337735, "grad_norm": 0.09029513597488403, "learning_rate": 0.0009914447032615338, "loss": 1.573, "step": 6246 }, { "epoch": 0.2778000088924459, "grad_norm": 0.08931106328964233, "learning_rate": 0.0009914381974112565, "loss": 1.5832, "step": 6248 }, { "epoch": 0.27788893335111825, "grad_norm": 0.08635377138853073, "learning_rate": 0.0009914316891096037, "loss": 1.5818, "step": 6250 }, { "epoch": 0.27797785780979056, "grad_norm": 0.08634750545024872, "learning_rate": 0.0009914251783566081, "loss": 1.5733, "step": 6252 }, { "epoch": 0.27806678226846293, "grad_norm": 0.09367714077234268, "learning_rate": 0.0009914186651523021, "loss": 1.5813, "step": 6254 }, { "epoch": 0.2781557067271353, "grad_norm": 0.08927787095308304, "learning_rate": 0.0009914121494967183, "loss": 1.5758, "step": 6256 }, { "epoch": 0.27824463118580767, "grad_norm": 0.0868934839963913, "learning_rate": 0.000991405631389889, "loss": 1.5718, "step": 6258 }, { "epoch": 0.27833355564448004, "grad_norm": 0.09208554029464722, "learning_rate": 0.000991399110831847, "loss": 1.5722, "step": 6260 }, { "epoch": 0.27842248010315235, "grad_norm": 0.09598352760076523, "learning_rate": 0.0009913925878226246, "loss": 1.5769, "step": 6262 }, { "epoch": 0.2785114045618247, "grad_norm": 0.09622900187969208, "learning_rate": 0.0009913860623622544, "loss": 1.5752, "step": 6264 }, { "epoch": 0.2786003290204971, "grad_norm": 0.09845644980669022, "learning_rate": 0.0009913795344507687, "loss": 1.5793, "step": 6266 }, { "epoch": 0.27868925347916945, "grad_norm": 0.08853481709957123, "learning_rate": 0.0009913730040882007, "loss": 1.5745, "step": 6268 }, { "epoch": 0.2787781779378418, "grad_norm": 0.09148851037025452, "learning_rate": 0.0009913664712745825, "loss": 1.5824, "step": 6270 }, { "epoch": 0.2788671023965142, "grad_norm": 0.09253410995006561, "learning_rate": 0.0009913599360099469, "loss": 1.5755, "step": 6272 }, { "epoch": 0.2789560268551865, "grad_norm": 0.09554262459278107, "learning_rate": 0.000991353398294326, "loss": 1.5737, "step": 6274 }, { "epoch": 0.27904495131385887, "grad_norm": 0.09318879246711731, "learning_rate": 0.000991346858127753, "loss": 1.5677, "step": 6276 }, { "epoch": 0.27913387577253124, "grad_norm": 0.09248467534780502, "learning_rate": 0.0009913403155102603, "loss": 1.5769, "step": 6278 }, { "epoch": 0.2792228002312036, "grad_norm": 0.09165968000888824, "learning_rate": 0.0009913337704418805, "loss": 1.5741, "step": 6280 }, { "epoch": 0.279311724689876, "grad_norm": 0.09123395383358002, "learning_rate": 0.0009913272229226465, "loss": 1.5758, "step": 6282 }, { "epoch": 0.2794006491485483, "grad_norm": 0.08967551589012146, "learning_rate": 0.0009913206729525905, "loss": 1.5773, "step": 6284 }, { "epoch": 0.27948957360722065, "grad_norm": 0.08778689056634903, "learning_rate": 0.0009913141205317455, "loss": 1.5767, "step": 6286 }, { "epoch": 0.279578498065893, "grad_norm": 0.084915891289711, "learning_rate": 0.0009913075656601442, "loss": 1.5787, "step": 6288 }, { "epoch": 0.2796674225245654, "grad_norm": 0.0918944925069809, "learning_rate": 0.0009913010083378191, "loss": 1.5747, "step": 6290 }, { "epoch": 0.27975634698323776, "grad_norm": 0.09487152099609375, "learning_rate": 0.0009912944485648031, "loss": 1.5794, "step": 6292 }, { "epoch": 0.27984527144191007, "grad_norm": 0.08974961936473846, "learning_rate": 0.000991287886341129, "loss": 1.5725, "step": 6294 }, { "epoch": 0.27993419590058244, "grad_norm": 0.09361551702022552, "learning_rate": 0.0009912813216668291, "loss": 1.5786, "step": 6296 }, { "epoch": 0.2800231203592548, "grad_norm": 0.08920656889677048, "learning_rate": 0.0009912747545419364, "loss": 1.581, "step": 6298 }, { "epoch": 0.2801120448179272, "grad_norm": 0.08863282203674316, "learning_rate": 0.0009912681849664838, "loss": 1.5788, "step": 6300 }, { "epoch": 0.28020096927659954, "grad_norm": 0.08976864069700241, "learning_rate": 0.000991261612940504, "loss": 1.5735, "step": 6302 }, { "epoch": 0.2802898937352719, "grad_norm": 0.08996647596359253, "learning_rate": 0.0009912550384640296, "loss": 1.5788, "step": 6304 }, { "epoch": 0.2803788181939442, "grad_norm": 0.09197447448968887, "learning_rate": 0.0009912484615370936, "loss": 1.5751, "step": 6306 }, { "epoch": 0.2804677426526166, "grad_norm": 0.09102824330329895, "learning_rate": 0.0009912418821597289, "loss": 1.5789, "step": 6308 }, { "epoch": 0.28055666711128896, "grad_norm": 0.09098983556032181, "learning_rate": 0.0009912353003319679, "loss": 1.5816, "step": 6310 }, { "epoch": 0.28064559156996133, "grad_norm": 0.0941241905093193, "learning_rate": 0.0009912287160538436, "loss": 1.5775, "step": 6312 }, { "epoch": 0.2807345160286337, "grad_norm": 0.09087178856134415, "learning_rate": 0.000991222129325389, "loss": 1.5723, "step": 6314 }, { "epoch": 0.280823440487306, "grad_norm": 0.08986848592758179, "learning_rate": 0.0009912155401466372, "loss": 1.5755, "step": 6316 }, { "epoch": 0.2809123649459784, "grad_norm": 0.0928303524851799, "learning_rate": 0.0009912089485176203, "loss": 1.5737, "step": 6318 }, { "epoch": 0.28100128940465074, "grad_norm": 0.09010421484708786, "learning_rate": 0.000991202354438372, "loss": 1.5804, "step": 6320 }, { "epoch": 0.2810902138633231, "grad_norm": 0.09154298156499863, "learning_rate": 0.0009911957579089245, "loss": 1.5741, "step": 6322 }, { "epoch": 0.2811791383219955, "grad_norm": 0.09349411725997925, "learning_rate": 0.0009911891589293113, "loss": 1.571, "step": 6324 }, { "epoch": 0.28126806278066785, "grad_norm": 0.08908021450042725, "learning_rate": 0.000991182557499565, "loss": 1.5772, "step": 6326 }, { "epoch": 0.28135698723934016, "grad_norm": 0.09845002740621567, "learning_rate": 0.0009911759536197183, "loss": 1.5778, "step": 6328 }, { "epoch": 0.28144591169801253, "grad_norm": 0.08850311487913132, "learning_rate": 0.0009911693472898046, "loss": 1.5749, "step": 6330 }, { "epoch": 0.2815348361566849, "grad_norm": 0.09225910902023315, "learning_rate": 0.0009911627385098566, "loss": 1.577, "step": 6332 }, { "epoch": 0.28162376061535727, "grad_norm": 0.09028570353984833, "learning_rate": 0.0009911561272799075, "loss": 1.5768, "step": 6334 }, { "epoch": 0.28171268507402963, "grad_norm": 0.08955962210893631, "learning_rate": 0.00099114951359999, "loss": 1.5769, "step": 6336 }, { "epoch": 0.28180160953270195, "grad_norm": 0.08538378030061722, "learning_rate": 0.0009911428974701372, "loss": 1.5768, "step": 6338 }, { "epoch": 0.2818905339913743, "grad_norm": 0.08776732534170151, "learning_rate": 0.0009911362788903822, "loss": 1.5755, "step": 6340 }, { "epoch": 0.2819794584500467, "grad_norm": 0.08690127730369568, "learning_rate": 0.0009911296578607577, "loss": 1.5728, "step": 6342 }, { "epoch": 0.28206838290871905, "grad_norm": 0.08759032934904099, "learning_rate": 0.000991123034381297, "loss": 1.5728, "step": 6344 }, { "epoch": 0.2821573073673914, "grad_norm": 0.08740068972110748, "learning_rate": 0.0009911164084520333, "loss": 1.5709, "step": 6346 }, { "epoch": 0.28224623182606373, "grad_norm": 0.09269807487726212, "learning_rate": 0.0009911097800729993, "loss": 1.5711, "step": 6348 }, { "epoch": 0.2823351562847361, "grad_norm": 0.08954404294490814, "learning_rate": 0.0009911031492442282, "loss": 1.5742, "step": 6350 }, { "epoch": 0.28242408074340847, "grad_norm": 0.08641083538532257, "learning_rate": 0.000991096515965753, "loss": 1.5766, "step": 6352 }, { "epoch": 0.28251300520208084, "grad_norm": 0.0864805057644844, "learning_rate": 0.0009910898802376069, "loss": 1.5738, "step": 6354 }, { "epoch": 0.2826019296607532, "grad_norm": 0.09081622213125229, "learning_rate": 0.000991083242059823, "loss": 1.5746, "step": 6356 }, { "epoch": 0.28269085411942557, "grad_norm": 0.09132978320121765, "learning_rate": 0.0009910766014324345, "loss": 1.5765, "step": 6358 }, { "epoch": 0.2827797785780979, "grad_norm": 0.09193006902933121, "learning_rate": 0.000991069958355474, "loss": 1.5782, "step": 6360 }, { "epoch": 0.28286870303677025, "grad_norm": 0.09301292151212692, "learning_rate": 0.0009910633128289753, "loss": 1.5711, "step": 6362 }, { "epoch": 0.2829576274954426, "grad_norm": 0.09332393854856491, "learning_rate": 0.0009910566648529712, "loss": 1.5724, "step": 6364 }, { "epoch": 0.283046551954115, "grad_norm": 0.08995355665683746, "learning_rate": 0.000991050014427495, "loss": 1.5861, "step": 6366 }, { "epoch": 0.28313547641278736, "grad_norm": 0.09018755704164505, "learning_rate": 0.00099104336155258, "loss": 1.5695, "step": 6368 }, { "epoch": 0.28322440087145967, "grad_norm": 0.09240277856588364, "learning_rate": 0.000991036706228259, "loss": 1.5718, "step": 6370 }, { "epoch": 0.28331332533013204, "grad_norm": 0.08703695982694626, "learning_rate": 0.0009910300484545654, "loss": 1.5753, "step": 6372 }, { "epoch": 0.2834022497888044, "grad_norm": 0.09340234845876694, "learning_rate": 0.0009910233882315326, "loss": 1.578, "step": 6374 }, { "epoch": 0.2834911742474768, "grad_norm": 0.09236439317464828, "learning_rate": 0.0009910167255591934, "loss": 1.5762, "step": 6376 }, { "epoch": 0.28358009870614914, "grad_norm": 0.09320174157619476, "learning_rate": 0.0009910100604375812, "loss": 1.569, "step": 6378 }, { "epoch": 0.2836690231648215, "grad_norm": 0.08888036757707596, "learning_rate": 0.0009910033928667294, "loss": 1.5729, "step": 6380 }, { "epoch": 0.2837579476234938, "grad_norm": 0.0888710469007492, "learning_rate": 0.0009909967228466713, "loss": 1.5731, "step": 6382 }, { "epoch": 0.2838468720821662, "grad_norm": 0.09013516455888748, "learning_rate": 0.00099099005037744, "loss": 1.5704, "step": 6384 }, { "epoch": 0.28393579654083856, "grad_norm": 0.08720479905605316, "learning_rate": 0.0009909833754590688, "loss": 1.5738, "step": 6386 }, { "epoch": 0.2840247209995109, "grad_norm": 0.08885452896356583, "learning_rate": 0.000990976698091591, "loss": 1.5726, "step": 6388 }, { "epoch": 0.2841136454581833, "grad_norm": 0.09276578575372696, "learning_rate": 0.00099097001827504, "loss": 1.5756, "step": 6390 }, { "epoch": 0.2842025699168556, "grad_norm": 0.08871392160654068, "learning_rate": 0.000990963336009449, "loss": 1.5747, "step": 6392 }, { "epoch": 0.284291494375528, "grad_norm": 0.09305480867624283, "learning_rate": 0.0009909566512948513, "loss": 1.5691, "step": 6394 }, { "epoch": 0.28438041883420034, "grad_norm": 0.08993328362703323, "learning_rate": 0.0009909499641312805, "loss": 1.5752, "step": 6396 }, { "epoch": 0.2844693432928727, "grad_norm": 0.09120582789182663, "learning_rate": 0.0009909432745187697, "loss": 1.5774, "step": 6398 }, { "epoch": 0.2845582677515451, "grad_norm": 0.08618117868900299, "learning_rate": 0.0009909365824573523, "loss": 1.575, "step": 6400 }, { "epoch": 0.28464719221021745, "grad_norm": 0.08837292343378067, "learning_rate": 0.0009909298879470618, "loss": 1.5787, "step": 6402 }, { "epoch": 0.28473611666888976, "grad_norm": 0.08570041507482529, "learning_rate": 0.0009909231909879315, "loss": 1.5808, "step": 6404 }, { "epoch": 0.28482504112756213, "grad_norm": 0.09008973836898804, "learning_rate": 0.000990916491579995, "loss": 1.5763, "step": 6406 }, { "epoch": 0.2849139655862345, "grad_norm": 0.09252497553825378, "learning_rate": 0.0009909097897232854, "loss": 1.5776, "step": 6408 }, { "epoch": 0.28500289004490686, "grad_norm": 0.08563211560249329, "learning_rate": 0.0009909030854178362, "loss": 1.5674, "step": 6410 }, { "epoch": 0.28509181450357923, "grad_norm": 0.09084684401750565, "learning_rate": 0.0009908963786636811, "loss": 1.5732, "step": 6412 }, { "epoch": 0.28518073896225155, "grad_norm": 0.09400637447834015, "learning_rate": 0.0009908896694608533, "loss": 1.5712, "step": 6414 }, { "epoch": 0.2852696634209239, "grad_norm": 0.08952626585960388, "learning_rate": 0.0009908829578093863, "loss": 1.5744, "step": 6416 }, { "epoch": 0.2853585878795963, "grad_norm": 0.09080950170755386, "learning_rate": 0.0009908762437093137, "loss": 1.57, "step": 6418 }, { "epoch": 0.28544751233826865, "grad_norm": 0.08886551856994629, "learning_rate": 0.000990869527160669, "loss": 1.5763, "step": 6420 }, { "epoch": 0.285536436796941, "grad_norm": 0.09163545817136765, "learning_rate": 0.0009908628081634855, "loss": 1.577, "step": 6422 }, { "epoch": 0.28562536125561333, "grad_norm": 0.08795285224914551, "learning_rate": 0.0009908560867177966, "loss": 1.5714, "step": 6424 }, { "epoch": 0.2857142857142857, "grad_norm": 0.08879886567592621, "learning_rate": 0.0009908493628236364, "loss": 1.5738, "step": 6426 }, { "epoch": 0.28580321017295807, "grad_norm": 0.09103137999773026, "learning_rate": 0.000990842636481038, "loss": 1.5725, "step": 6428 }, { "epoch": 0.28589213463163043, "grad_norm": 0.08968726545572281, "learning_rate": 0.0009908359076900351, "loss": 1.5728, "step": 6430 }, { "epoch": 0.2859810590903028, "grad_norm": 0.08867494016885757, "learning_rate": 0.000990829176450661, "loss": 1.5695, "step": 6432 }, { "epoch": 0.28606998354897517, "grad_norm": 0.08909203112125397, "learning_rate": 0.0009908224427629497, "loss": 1.5785, "step": 6434 }, { "epoch": 0.2861589080076475, "grad_norm": 0.08877209573984146, "learning_rate": 0.0009908157066269346, "loss": 1.5675, "step": 6436 }, { "epoch": 0.28624783246631985, "grad_norm": 0.08952686190605164, "learning_rate": 0.0009908089680426492, "loss": 1.5712, "step": 6438 }, { "epoch": 0.2863367569249922, "grad_norm": 0.08946753293275833, "learning_rate": 0.0009908022270101273, "loss": 1.5706, "step": 6440 }, { "epoch": 0.2864256813836646, "grad_norm": 0.08814926445484161, "learning_rate": 0.0009907954835294023, "loss": 1.5738, "step": 6442 }, { "epoch": 0.28651460584233696, "grad_norm": 0.09422788769006729, "learning_rate": 0.000990788737600508, "loss": 1.5799, "step": 6444 }, { "epoch": 0.28660353030100927, "grad_norm": 0.09289488941431046, "learning_rate": 0.0009907819892234781, "loss": 1.5761, "step": 6446 }, { "epoch": 0.28669245475968164, "grad_norm": 0.08914397656917572, "learning_rate": 0.0009907752383983461, "loss": 1.569, "step": 6448 }, { "epoch": 0.286781379218354, "grad_norm": 0.08954908698797226, "learning_rate": 0.0009907684851251457, "loss": 1.5802, "step": 6450 }, { "epoch": 0.2868703036770264, "grad_norm": 0.09144933521747589, "learning_rate": 0.0009907617294039107, "loss": 1.5758, "step": 6452 }, { "epoch": 0.28695922813569874, "grad_norm": 0.09079254418611526, "learning_rate": 0.0009907549712346747, "loss": 1.5721, "step": 6454 }, { "epoch": 0.2870481525943711, "grad_norm": 0.08957768976688385, "learning_rate": 0.0009907482106174714, "loss": 1.5687, "step": 6456 }, { "epoch": 0.2871370770530434, "grad_norm": 0.09010159969329834, "learning_rate": 0.0009907414475523347, "loss": 1.5772, "step": 6458 }, { "epoch": 0.2872260015117158, "grad_norm": 0.09015689790248871, "learning_rate": 0.000990734682039298, "loss": 1.57, "step": 6460 }, { "epoch": 0.28731492597038816, "grad_norm": 0.0964498221874237, "learning_rate": 0.0009907279140783953, "loss": 1.5716, "step": 6462 }, { "epoch": 0.2874038504290605, "grad_norm": 0.09194839745759964, "learning_rate": 0.0009907211436696603, "loss": 1.5666, "step": 6464 }, { "epoch": 0.2874927748877329, "grad_norm": 0.0931999608874321, "learning_rate": 0.000990714370813127, "loss": 1.5766, "step": 6466 }, { "epoch": 0.2875816993464052, "grad_norm": 0.08518645912408829, "learning_rate": 0.0009907075955088286, "loss": 1.5778, "step": 6468 }, { "epoch": 0.2876706238050776, "grad_norm": 0.08883948624134064, "learning_rate": 0.0009907008177567997, "loss": 1.5709, "step": 6470 }, { "epoch": 0.28775954826374994, "grad_norm": 0.08934309333562851, "learning_rate": 0.0009906940375570734, "loss": 1.5703, "step": 6472 }, { "epoch": 0.2878484727224223, "grad_norm": 0.09022992104291916, "learning_rate": 0.0009906872549096837, "loss": 1.572, "step": 6474 }, { "epoch": 0.2879373971810947, "grad_norm": 0.09257543832063675, "learning_rate": 0.0009906804698146647, "loss": 1.5726, "step": 6476 }, { "epoch": 0.288026321639767, "grad_norm": 0.09016820788383484, "learning_rate": 0.0009906736822720501, "loss": 1.5735, "step": 6478 }, { "epoch": 0.28811524609843936, "grad_norm": 0.08995562046766281, "learning_rate": 0.0009906668922818735, "loss": 1.5661, "step": 6480 }, { "epoch": 0.2882041705571117, "grad_norm": 0.09062064439058304, "learning_rate": 0.0009906600998441694, "loss": 1.5656, "step": 6482 }, { "epoch": 0.2882930950157841, "grad_norm": 0.08658911287784576, "learning_rate": 0.0009906533049589708, "loss": 1.5682, "step": 6484 }, { "epoch": 0.28838201947445646, "grad_norm": 0.08712375164031982, "learning_rate": 0.0009906465076263123, "loss": 1.5734, "step": 6486 }, { "epoch": 0.28847094393312883, "grad_norm": 0.0906602293252945, "learning_rate": 0.0009906397078462276, "loss": 1.5742, "step": 6488 }, { "epoch": 0.28855986839180114, "grad_norm": 0.08672540634870529, "learning_rate": 0.0009906329056187505, "loss": 1.5789, "step": 6490 }, { "epoch": 0.2886487928504735, "grad_norm": 0.08817194402217865, "learning_rate": 0.0009906261009439152, "loss": 1.5728, "step": 6492 }, { "epoch": 0.2887377173091459, "grad_norm": 0.09028277546167374, "learning_rate": 0.0009906192938217552, "loss": 1.5735, "step": 6494 }, { "epoch": 0.28882664176781825, "grad_norm": 0.09235137701034546, "learning_rate": 0.000990612484252305, "loss": 1.5749, "step": 6496 }, { "epoch": 0.2889155662264906, "grad_norm": 0.09245535731315613, "learning_rate": 0.000990605672235598, "loss": 1.5751, "step": 6498 }, { "epoch": 0.28900449068516293, "grad_norm": 0.08902133256196976, "learning_rate": 0.0009905988577716687, "loss": 1.5744, "step": 6500 }, { "epoch": 0.28900449068516293, "eval_loss": 1.5468125343322754, "eval_runtime": 12.4163, "eval_samples_per_second": 556.526, "eval_steps_per_second": 69.586, "step": 6500 }, { "epoch": 0.2890934151438353, "grad_norm": 0.09113017469644547, "learning_rate": 0.0009905920408605508, "loss": 1.5807, "step": 6502 }, { "epoch": 0.28918233960250767, "grad_norm": 0.09066907316446304, "learning_rate": 0.0009905852215022783, "loss": 1.5756, "step": 6504 }, { "epoch": 0.28927126406118003, "grad_norm": 0.09358642995357513, "learning_rate": 0.0009905783996968854, "loss": 1.5779, "step": 6506 }, { "epoch": 0.2893601885198524, "grad_norm": 0.08834748715162277, "learning_rate": 0.000990571575444406, "loss": 1.5675, "step": 6508 }, { "epoch": 0.28944911297852477, "grad_norm": 0.08833714574575424, "learning_rate": 0.000990564748744874, "loss": 1.5699, "step": 6510 }, { "epoch": 0.2895380374371971, "grad_norm": 0.09438148140907288, "learning_rate": 0.000990557919598324, "loss": 1.5703, "step": 6512 }, { "epoch": 0.28962696189586945, "grad_norm": 0.08776549994945526, "learning_rate": 0.000990551088004789, "loss": 1.5659, "step": 6514 }, { "epoch": 0.2897158863545418, "grad_norm": 0.09066025912761688, "learning_rate": 0.0009905442539643043, "loss": 1.5698, "step": 6516 }, { "epoch": 0.2898048108132142, "grad_norm": 0.08665291219949722, "learning_rate": 0.0009905374174769032, "loss": 1.5707, "step": 6518 }, { "epoch": 0.28989373527188655, "grad_norm": 0.08850061148405075, "learning_rate": 0.0009905305785426202, "loss": 1.5764, "step": 6520 }, { "epoch": 0.28998265973055887, "grad_norm": 0.09048718959093094, "learning_rate": 0.000990523737161489, "loss": 1.5785, "step": 6522 }, { "epoch": 0.29007158418923124, "grad_norm": 0.08610177785158157, "learning_rate": 0.0009905168933335442, "loss": 1.5681, "step": 6524 }, { "epoch": 0.2901605086479036, "grad_norm": 0.08931626379489899, "learning_rate": 0.0009905100470588197, "loss": 1.5733, "step": 6526 }, { "epoch": 0.29024943310657597, "grad_norm": 0.08849899470806122, "learning_rate": 0.0009905031983373496, "loss": 1.5729, "step": 6528 }, { "epoch": 0.29033835756524834, "grad_norm": 0.08723746240139008, "learning_rate": 0.0009904963471691682, "loss": 1.5738, "step": 6530 }, { "epoch": 0.29042728202392065, "grad_norm": 0.08764983713626862, "learning_rate": 0.0009904894935543094, "loss": 1.5785, "step": 6532 }, { "epoch": 0.290516206482593, "grad_norm": 0.09550359100103378, "learning_rate": 0.0009904826374928077, "loss": 1.5718, "step": 6534 }, { "epoch": 0.2906051309412654, "grad_norm": 0.09002628922462463, "learning_rate": 0.0009904757789846973, "loss": 1.5796, "step": 6536 }, { "epoch": 0.29069405539993776, "grad_norm": 0.09034377336502075, "learning_rate": 0.0009904689180300122, "loss": 1.5693, "step": 6538 }, { "epoch": 0.2907829798586101, "grad_norm": 0.09026948362588882, "learning_rate": 0.0009904620546287866, "loss": 1.578, "step": 6540 }, { "epoch": 0.2908719043172825, "grad_norm": 0.09430602192878723, "learning_rate": 0.0009904551887810551, "loss": 1.574, "step": 6542 }, { "epoch": 0.2909608287759548, "grad_norm": 0.08742611855268478, "learning_rate": 0.0009904483204868516, "loss": 1.5757, "step": 6544 }, { "epoch": 0.2910497532346272, "grad_norm": 0.08668093383312225, "learning_rate": 0.0009904414497462104, "loss": 1.5713, "step": 6546 }, { "epoch": 0.29113867769329954, "grad_norm": 0.0894765630364418, "learning_rate": 0.000990434576559166, "loss": 1.5737, "step": 6548 }, { "epoch": 0.2912276021519719, "grad_norm": 0.09396469593048096, "learning_rate": 0.0009904277009257524, "loss": 1.5765, "step": 6550 }, { "epoch": 0.2913165266106443, "grad_norm": 0.08750537782907486, "learning_rate": 0.0009904208228460041, "loss": 1.5662, "step": 6552 }, { "epoch": 0.2914054510693166, "grad_norm": 0.08443306386470795, "learning_rate": 0.0009904139423199552, "loss": 1.5776, "step": 6554 }, { "epoch": 0.29149437552798896, "grad_norm": 0.08997094631195068, "learning_rate": 0.00099040705934764, "loss": 1.5678, "step": 6556 }, { "epoch": 0.2915832999866613, "grad_norm": 0.08915294706821442, "learning_rate": 0.0009904001739290934, "loss": 1.5724, "step": 6558 }, { "epoch": 0.2916722244453337, "grad_norm": 0.08931171149015427, "learning_rate": 0.0009903932860643493, "loss": 1.5644, "step": 6560 }, { "epoch": 0.29176114890400606, "grad_norm": 0.09019617736339569, "learning_rate": 0.0009903863957534417, "loss": 1.5755, "step": 6562 }, { "epoch": 0.29185007336267843, "grad_norm": 0.08960139006376266, "learning_rate": 0.0009903795029964058, "loss": 1.5696, "step": 6564 }, { "epoch": 0.29193899782135074, "grad_norm": 0.08686326444149017, "learning_rate": 0.0009903726077932753, "loss": 1.5728, "step": 6566 }, { "epoch": 0.2920279222800231, "grad_norm": 0.08807472884654999, "learning_rate": 0.0009903657101440848, "loss": 1.5748, "step": 6568 }, { "epoch": 0.2921168467386955, "grad_norm": 0.0936129167675972, "learning_rate": 0.0009903588100488687, "loss": 1.5737, "step": 6570 }, { "epoch": 0.29220577119736785, "grad_norm": 0.09177185595035553, "learning_rate": 0.0009903519075076617, "loss": 1.5696, "step": 6572 }, { "epoch": 0.2922946956560402, "grad_norm": 0.08630893379449844, "learning_rate": 0.0009903450025204978, "loss": 1.5701, "step": 6574 }, { "epoch": 0.29238362011471253, "grad_norm": 0.08818253129720688, "learning_rate": 0.0009903380950874116, "loss": 1.5675, "step": 6576 }, { "epoch": 0.2924725445733849, "grad_norm": 0.08376295864582062, "learning_rate": 0.0009903311852084379, "loss": 1.5677, "step": 6578 }, { "epoch": 0.29256146903205726, "grad_norm": 0.08859193325042725, "learning_rate": 0.0009903242728836106, "loss": 1.5702, "step": 6580 }, { "epoch": 0.29265039349072963, "grad_norm": 0.0904025286436081, "learning_rate": 0.0009903173581129645, "loss": 1.5719, "step": 6582 }, { "epoch": 0.292739317949402, "grad_norm": 0.08744067698717117, "learning_rate": 0.0009903104408965338, "loss": 1.57, "step": 6584 }, { "epoch": 0.29282824240807437, "grad_norm": 0.08911757916212082, "learning_rate": 0.0009903035212343535, "loss": 1.5734, "step": 6586 }, { "epoch": 0.2929171668667467, "grad_norm": 0.0855589210987091, "learning_rate": 0.000990296599126458, "loss": 1.5706, "step": 6588 }, { "epoch": 0.29300609132541905, "grad_norm": 0.0880928561091423, "learning_rate": 0.0009902896745728814, "loss": 1.5733, "step": 6590 }, { "epoch": 0.2930950157840914, "grad_norm": 0.08391828835010529, "learning_rate": 0.0009902827475736585, "loss": 1.5713, "step": 6592 }, { "epoch": 0.2931839402427638, "grad_norm": 0.08916763961315155, "learning_rate": 0.000990275818128824, "loss": 1.575, "step": 6594 }, { "epoch": 0.29327286470143615, "grad_norm": 0.08984120190143585, "learning_rate": 0.0009902688862384122, "loss": 1.5738, "step": 6596 }, { "epoch": 0.29336178916010847, "grad_norm": 0.08982984721660614, "learning_rate": 0.000990261951902458, "loss": 1.5709, "step": 6598 }, { "epoch": 0.29345071361878083, "grad_norm": 0.08733770996332169, "learning_rate": 0.0009902550151209956, "loss": 1.5711, "step": 6600 }, { "epoch": 0.2935396380774532, "grad_norm": 0.08438307046890259, "learning_rate": 0.00099024807589406, "loss": 1.5683, "step": 6602 }, { "epoch": 0.29362856253612557, "grad_norm": 0.08910389244556427, "learning_rate": 0.0009902411342216854, "loss": 1.5732, "step": 6604 }, { "epoch": 0.29371748699479794, "grad_norm": 0.09192346036434174, "learning_rate": 0.000990234190103907, "loss": 1.5696, "step": 6606 }, { "epoch": 0.29380641145347025, "grad_norm": 0.0915895402431488, "learning_rate": 0.0009902272435407587, "loss": 1.5671, "step": 6608 }, { "epoch": 0.2938953359121426, "grad_norm": 0.09286682307720184, "learning_rate": 0.0009902202945322757, "loss": 1.5757, "step": 6610 }, { "epoch": 0.293984260370815, "grad_norm": 0.0949014201760292, "learning_rate": 0.0009902133430784925, "loss": 1.5736, "step": 6612 }, { "epoch": 0.29407318482948736, "grad_norm": 0.09041435271501541, "learning_rate": 0.0009902063891794439, "loss": 1.5739, "step": 6614 }, { "epoch": 0.2941621092881597, "grad_norm": 0.09175708889961243, "learning_rate": 0.0009901994328351642, "loss": 1.5704, "step": 6616 }, { "epoch": 0.2942510337468321, "grad_norm": 0.09223156422376633, "learning_rate": 0.0009901924740456885, "loss": 1.5742, "step": 6618 }, { "epoch": 0.2943399582055044, "grad_norm": 0.08808215707540512, "learning_rate": 0.0009901855128110514, "loss": 1.5751, "step": 6620 }, { "epoch": 0.29442888266417677, "grad_norm": 0.08614702522754669, "learning_rate": 0.0009901785491312875, "loss": 1.5766, "step": 6622 }, { "epoch": 0.29451780712284914, "grad_norm": 0.08723152428865433, "learning_rate": 0.0009901715830064317, "loss": 1.5684, "step": 6624 }, { "epoch": 0.2946067315815215, "grad_norm": 0.08894877880811691, "learning_rate": 0.0009901646144365188, "loss": 1.5734, "step": 6626 }, { "epoch": 0.2946956560401939, "grad_norm": 0.09144090861082077, "learning_rate": 0.0009901576434215833, "loss": 1.574, "step": 6628 }, { "epoch": 0.2947845804988662, "grad_norm": 0.08594503253698349, "learning_rate": 0.0009901506699616601, "loss": 1.5716, "step": 6630 }, { "epoch": 0.29487350495753856, "grad_norm": 0.09015947580337524, "learning_rate": 0.000990143694056784, "loss": 1.5693, "step": 6632 }, { "epoch": 0.2949624294162109, "grad_norm": 0.08714131265878677, "learning_rate": 0.0009901367157069897, "loss": 1.5629, "step": 6634 }, { "epoch": 0.2950513538748833, "grad_norm": 0.08628025650978088, "learning_rate": 0.0009901297349123124, "loss": 1.5687, "step": 6636 }, { "epoch": 0.29514027833355566, "grad_norm": 0.09013169258832932, "learning_rate": 0.0009901227516727865, "loss": 1.5728, "step": 6638 }, { "epoch": 0.29522920279222803, "grad_norm": 0.09108193218708038, "learning_rate": 0.0009901157659884467, "loss": 1.5688, "step": 6640 }, { "epoch": 0.29531812725090034, "grad_norm": 0.08838926255702972, "learning_rate": 0.0009901087778593286, "loss": 1.5676, "step": 6642 }, { "epoch": 0.2954070517095727, "grad_norm": 0.085484080016613, "learning_rate": 0.0009901017872854662, "loss": 1.5715, "step": 6644 }, { "epoch": 0.2954959761682451, "grad_norm": 0.08966375142335892, "learning_rate": 0.0009900947942668948, "loss": 1.5704, "step": 6646 }, { "epoch": 0.29558490062691745, "grad_norm": 0.08792408555746078, "learning_rate": 0.0009900877988036493, "loss": 1.5707, "step": 6648 }, { "epoch": 0.2956738250855898, "grad_norm": 0.08903691917657852, "learning_rate": 0.0009900808008957642, "loss": 1.5769, "step": 6650 }, { "epoch": 0.2957627495442621, "grad_norm": 0.09390727430582047, "learning_rate": 0.000990073800543275, "loss": 1.5715, "step": 6652 }, { "epoch": 0.2958516740029345, "grad_norm": 0.09213802218437195, "learning_rate": 0.0009900667977462163, "loss": 1.5754, "step": 6654 }, { "epoch": 0.29594059846160686, "grad_norm": 0.0865757092833519, "learning_rate": 0.000990059792504623, "loss": 1.5704, "step": 6656 }, { "epoch": 0.29602952292027923, "grad_norm": 0.08747228235006332, "learning_rate": 0.0009900527848185302, "loss": 1.571, "step": 6658 }, { "epoch": 0.2961184473789516, "grad_norm": 0.08495338261127472, "learning_rate": 0.0009900457746879728, "loss": 1.5669, "step": 6660 }, { "epoch": 0.2962073718376239, "grad_norm": 0.0894813984632492, "learning_rate": 0.0009900387621129855, "loss": 1.5695, "step": 6662 }, { "epoch": 0.2962962962962963, "grad_norm": 0.08558983355760574, "learning_rate": 0.0009900317470936037, "loss": 1.5744, "step": 6664 }, { "epoch": 0.29638522075496865, "grad_norm": 0.08508554100990295, "learning_rate": 0.0009900247296298621, "loss": 1.5674, "step": 6666 }, { "epoch": 0.296474145213641, "grad_norm": 0.08840614557266235, "learning_rate": 0.000990017709721796, "loss": 1.569, "step": 6668 }, { "epoch": 0.2965630696723134, "grad_norm": 0.08688345551490784, "learning_rate": 0.0009900106873694402, "loss": 1.5724, "step": 6670 }, { "epoch": 0.29665199413098575, "grad_norm": 0.0900869369506836, "learning_rate": 0.0009900036625728294, "loss": 1.5707, "step": 6672 }, { "epoch": 0.29674091858965806, "grad_norm": 0.08745554089546204, "learning_rate": 0.0009899966353319994, "loss": 1.5689, "step": 6674 }, { "epoch": 0.29682984304833043, "grad_norm": 0.08753221482038498, "learning_rate": 0.0009899896056469845, "loss": 1.5709, "step": 6676 }, { "epoch": 0.2969187675070028, "grad_norm": 0.09187465906143188, "learning_rate": 0.0009899825735178204, "loss": 1.5717, "step": 6678 }, { "epoch": 0.29700769196567517, "grad_norm": 0.08736233413219452, "learning_rate": 0.000989975538944542, "loss": 1.5741, "step": 6680 }, { "epoch": 0.29709661642434754, "grad_norm": 0.08750621974468231, "learning_rate": 0.000989968501927184, "loss": 1.5702, "step": 6682 }, { "epoch": 0.29718554088301985, "grad_norm": 0.08598420023918152, "learning_rate": 0.0009899614624657817, "loss": 1.5622, "step": 6684 }, { "epoch": 0.2972744653416922, "grad_norm": 0.08676007390022278, "learning_rate": 0.0009899544205603705, "loss": 1.5741, "step": 6686 }, { "epoch": 0.2973633898003646, "grad_norm": 0.08978342264890671, "learning_rate": 0.0009899473762109855, "loss": 1.5686, "step": 6688 }, { "epoch": 0.29745231425903695, "grad_norm": 0.08682754635810852, "learning_rate": 0.0009899403294176613, "loss": 1.5657, "step": 6690 }, { "epoch": 0.2975412387177093, "grad_norm": 0.08936390280723572, "learning_rate": 0.0009899332801804335, "loss": 1.5699, "step": 6692 }, { "epoch": 0.2976301631763817, "grad_norm": 0.09042416512966156, "learning_rate": 0.0009899262284993372, "loss": 1.5695, "step": 6694 }, { "epoch": 0.297719087635054, "grad_norm": 0.08619032055139542, "learning_rate": 0.0009899191743744076, "loss": 1.5632, "step": 6696 }, { "epoch": 0.29780801209372637, "grad_norm": 0.0877993255853653, "learning_rate": 0.0009899121178056798, "loss": 1.5744, "step": 6698 }, { "epoch": 0.29789693655239874, "grad_norm": 0.09133929759263992, "learning_rate": 0.000989905058793189, "loss": 1.5672, "step": 6700 }, { "epoch": 0.2979858610110711, "grad_norm": 0.08733952045440674, "learning_rate": 0.0009898979973369706, "loss": 1.5676, "step": 6702 }, { "epoch": 0.2980747854697435, "grad_norm": 0.08532746136188507, "learning_rate": 0.0009898909334370594, "loss": 1.5743, "step": 6704 }, { "epoch": 0.2981637099284158, "grad_norm": 0.08905241638422012, "learning_rate": 0.0009898838670934912, "loss": 1.5686, "step": 6706 }, { "epoch": 0.29825263438708816, "grad_norm": 0.09203900396823883, "learning_rate": 0.0009898767983063008, "loss": 1.576, "step": 6708 }, { "epoch": 0.2983415588457605, "grad_norm": 0.20671740174293518, "learning_rate": 0.0009898697270755237, "loss": 1.5761, "step": 6710 }, { "epoch": 0.2984304833044329, "grad_norm": 0.22460798919200897, "learning_rate": 0.000989862653401195, "loss": 1.5757, "step": 6712 }, { "epoch": 0.29851940776310526, "grad_norm": 0.11507432907819748, "learning_rate": 0.00098985557728335, "loss": 1.5788, "step": 6714 }, { "epoch": 0.2986083322217776, "grad_norm": 0.09615212678909302, "learning_rate": 0.0009898484987220242, "loss": 1.5698, "step": 6716 }, { "epoch": 0.29869725668044994, "grad_norm": 0.14628173410892487, "learning_rate": 0.0009898414177172527, "loss": 1.5708, "step": 6718 }, { "epoch": 0.2987861811391223, "grad_norm": 0.10161300748586655, "learning_rate": 0.000989834334269071, "loss": 1.5803, "step": 6720 }, { "epoch": 0.2988751055977947, "grad_norm": 0.09993571043014526, "learning_rate": 0.0009898272483775143, "loss": 1.5728, "step": 6722 }, { "epoch": 0.29896403005646704, "grad_norm": 0.0945572704076767, "learning_rate": 0.0009898201600426178, "loss": 1.5717, "step": 6724 }, { "epoch": 0.2990529545151394, "grad_norm": 0.10044775158166885, "learning_rate": 0.0009898130692644173, "loss": 1.5734, "step": 6726 }, { "epoch": 0.2991418789738117, "grad_norm": 0.09027940034866333, "learning_rate": 0.0009898059760429477, "loss": 1.5767, "step": 6728 }, { "epoch": 0.2992308034324841, "grad_norm": 0.09142522513866425, "learning_rate": 0.0009897988803782448, "loss": 1.5732, "step": 6730 }, { "epoch": 0.29931972789115646, "grad_norm": 0.1005646288394928, "learning_rate": 0.0009897917822703436, "loss": 1.5717, "step": 6732 }, { "epoch": 0.29940865234982883, "grad_norm": 0.09144769608974457, "learning_rate": 0.0009897846817192796, "loss": 1.5635, "step": 6734 }, { "epoch": 0.2994975768085012, "grad_norm": 0.09377957135438919, "learning_rate": 0.0009897775787250887, "loss": 1.5698, "step": 6736 }, { "epoch": 0.2995865012671735, "grad_norm": 0.09546922147274017, "learning_rate": 0.0009897704732878057, "loss": 1.5698, "step": 6738 }, { "epoch": 0.2996754257258459, "grad_norm": 0.08886546641588211, "learning_rate": 0.0009897633654074663, "loss": 1.5649, "step": 6740 }, { "epoch": 0.29976435018451825, "grad_norm": 0.0911850556731224, "learning_rate": 0.0009897562550841058, "loss": 1.5702, "step": 6742 }, { "epoch": 0.2998532746431906, "grad_norm": 0.09427014738321304, "learning_rate": 0.0009897491423177598, "loss": 1.5766, "step": 6744 }, { "epoch": 0.299942199101863, "grad_norm": 0.1038573682308197, "learning_rate": 0.000989742027108464, "loss": 1.574, "step": 6746 }, { "epoch": 0.30003112356053535, "grad_norm": 0.08610797673463821, "learning_rate": 0.0009897349094562535, "loss": 1.5718, "step": 6748 }, { "epoch": 0.30012004801920766, "grad_norm": 0.09136275202035904, "learning_rate": 0.0009897277893611642, "loss": 1.5716, "step": 6750 }, { "epoch": 0.30020897247788003, "grad_norm": 0.09047017991542816, "learning_rate": 0.0009897206668232313, "loss": 1.5739, "step": 6752 }, { "epoch": 0.3002978969365524, "grad_norm": 0.08960193395614624, "learning_rate": 0.0009897135418424903, "loss": 1.5674, "step": 6754 }, { "epoch": 0.30038682139522477, "grad_norm": 0.08961663395166397, "learning_rate": 0.000989706414418977, "loss": 1.5759, "step": 6756 }, { "epoch": 0.30047574585389714, "grad_norm": 0.08908514678478241, "learning_rate": 0.0009896992845527266, "loss": 1.5714, "step": 6758 }, { "epoch": 0.30056467031256945, "grad_norm": 0.08930535614490509, "learning_rate": 0.000989692152243775, "loss": 1.5694, "step": 6760 }, { "epoch": 0.3006535947712418, "grad_norm": 0.09298436343669891, "learning_rate": 0.0009896850174921578, "loss": 1.5753, "step": 6762 }, { "epoch": 0.3007425192299142, "grad_norm": 0.08789312839508057, "learning_rate": 0.0009896778802979103, "loss": 1.5727, "step": 6764 }, { "epoch": 0.30083144368858655, "grad_norm": 0.08950556069612503, "learning_rate": 0.0009896707406610683, "loss": 1.5732, "step": 6766 }, { "epoch": 0.3009203681472589, "grad_norm": 0.0896010622382164, "learning_rate": 0.0009896635985816673, "loss": 1.5746, "step": 6768 }, { "epoch": 0.3010092926059313, "grad_norm": 0.0874442532658577, "learning_rate": 0.0009896564540597431, "loss": 1.5708, "step": 6770 }, { "epoch": 0.3010982170646036, "grad_norm": 0.08776126801967621, "learning_rate": 0.0009896493070953311, "loss": 1.5758, "step": 6772 }, { "epoch": 0.30118714152327597, "grad_norm": 0.08940175920724869, "learning_rate": 0.0009896421576884673, "loss": 1.5719, "step": 6774 }, { "epoch": 0.30127606598194834, "grad_norm": 0.08825140446424484, "learning_rate": 0.000989635005839187, "loss": 1.5693, "step": 6776 }, { "epoch": 0.3013649904406207, "grad_norm": 0.08824607729911804, "learning_rate": 0.000989627851547526, "loss": 1.5658, "step": 6778 }, { "epoch": 0.3014539148992931, "grad_norm": 0.08684536069631577, "learning_rate": 0.00098962069481352, "loss": 1.5636, "step": 6780 }, { "epoch": 0.3015428393579654, "grad_norm": 0.08771684765815735, "learning_rate": 0.0009896135356372046, "loss": 1.573, "step": 6782 }, { "epoch": 0.30163176381663775, "grad_norm": 0.08880012482404709, "learning_rate": 0.0009896063740186159, "loss": 1.5673, "step": 6784 }, { "epoch": 0.3017206882753101, "grad_norm": 0.08657423406839371, "learning_rate": 0.000989599209957789, "loss": 1.5675, "step": 6786 }, { "epoch": 0.3018096127339825, "grad_norm": 0.0867740735411644, "learning_rate": 0.0009895920434547603, "loss": 1.5677, "step": 6788 }, { "epoch": 0.30189853719265486, "grad_norm": 0.08525516837835312, "learning_rate": 0.000989584874509565, "loss": 1.5654, "step": 6790 }, { "epoch": 0.30198746165132717, "grad_norm": 0.08830083906650543, "learning_rate": 0.0009895777031222392, "loss": 1.5672, "step": 6792 }, { "epoch": 0.30207638610999954, "grad_norm": 0.0843716561794281, "learning_rate": 0.0009895705292928185, "loss": 1.5728, "step": 6794 }, { "epoch": 0.3021653105686719, "grad_norm": 0.0873875841498375, "learning_rate": 0.0009895633530213387, "loss": 1.5615, "step": 6796 }, { "epoch": 0.3022542350273443, "grad_norm": 0.08721845597028732, "learning_rate": 0.0009895561743078357, "loss": 1.5626, "step": 6798 }, { "epoch": 0.30234315948601664, "grad_norm": 0.09410282224416733, "learning_rate": 0.0009895489931523451, "loss": 1.5711, "step": 6800 }, { "epoch": 0.302432083944689, "grad_norm": 0.0908014252781868, "learning_rate": 0.000989541809554903, "loss": 1.5703, "step": 6802 }, { "epoch": 0.3025210084033613, "grad_norm": 0.08996500074863434, "learning_rate": 0.0009895346235155452, "loss": 1.5712, "step": 6804 }, { "epoch": 0.3026099328620337, "grad_norm": 0.08390094339847565, "learning_rate": 0.000989527435034307, "loss": 1.5733, "step": 6806 }, { "epoch": 0.30269885732070606, "grad_norm": 0.08549515902996063, "learning_rate": 0.0009895202441112252, "loss": 1.5645, "step": 6808 }, { "epoch": 0.30278778177937843, "grad_norm": 0.08361940085887909, "learning_rate": 0.000989513050746335, "loss": 1.569, "step": 6810 }, { "epoch": 0.3028767062380508, "grad_norm": 0.08554235100746155, "learning_rate": 0.0009895058549396723, "loss": 1.5686, "step": 6812 }, { "epoch": 0.3029656306967231, "grad_norm": 0.08416851609945297, "learning_rate": 0.0009894986566912733, "loss": 1.5664, "step": 6814 }, { "epoch": 0.3030545551553955, "grad_norm": 0.08411978930234909, "learning_rate": 0.0009894914560011736, "loss": 1.5648, "step": 6816 }, { "epoch": 0.30314347961406785, "grad_norm": 0.08546824753284454, "learning_rate": 0.0009894842528694095, "loss": 1.5651, "step": 6818 }, { "epoch": 0.3032324040727402, "grad_norm": 0.08174564689397812, "learning_rate": 0.0009894770472960164, "loss": 1.5734, "step": 6820 }, { "epoch": 0.3033213285314126, "grad_norm": 0.08649101853370667, "learning_rate": 0.0009894698392810307, "loss": 1.5737, "step": 6822 }, { "epoch": 0.30341025299008495, "grad_norm": 0.0850704163312912, "learning_rate": 0.0009894626288244883, "loss": 1.5716, "step": 6824 }, { "epoch": 0.30349917744875726, "grad_norm": 0.08794053643941879, "learning_rate": 0.0009894554159264248, "loss": 1.569, "step": 6826 }, { "epoch": 0.30358810190742963, "grad_norm": 0.08536579459905624, "learning_rate": 0.0009894482005868767, "loss": 1.5673, "step": 6828 }, { "epoch": 0.303677026366102, "grad_norm": 0.08639626950025558, "learning_rate": 0.0009894409828058795, "loss": 1.5668, "step": 6830 }, { "epoch": 0.30376595082477437, "grad_norm": 0.0900898426771164, "learning_rate": 0.0009894337625834695, "loss": 1.566, "step": 6832 }, { "epoch": 0.30385487528344673, "grad_norm": 0.07988198101520538, "learning_rate": 0.0009894265399196828, "loss": 1.5659, "step": 6834 }, { "epoch": 0.30394379974211905, "grad_norm": 0.08728215098381042, "learning_rate": 0.0009894193148145551, "loss": 1.5667, "step": 6836 }, { "epoch": 0.3040327242007914, "grad_norm": 0.08969441801309586, "learning_rate": 0.000989412087268123, "loss": 1.5712, "step": 6838 }, { "epoch": 0.3041216486594638, "grad_norm": 0.0852469727396965, "learning_rate": 0.0009894048572804217, "loss": 1.576, "step": 6840 }, { "epoch": 0.30421057311813615, "grad_norm": 0.08066057413816452, "learning_rate": 0.000989397624851488, "loss": 1.5643, "step": 6842 }, { "epoch": 0.3042994975768085, "grad_norm": 0.08737246692180634, "learning_rate": 0.0009893903899813575, "loss": 1.5672, "step": 6844 }, { "epoch": 0.30438842203548083, "grad_norm": 0.08560658246278763, "learning_rate": 0.0009893831526700666, "loss": 1.5696, "step": 6846 }, { "epoch": 0.3044773464941532, "grad_norm": 0.0847938284277916, "learning_rate": 0.0009893759129176514, "loss": 1.5641, "step": 6848 }, { "epoch": 0.30456627095282557, "grad_norm": 0.08661984652280807, "learning_rate": 0.000989368670724148, "loss": 1.5687, "step": 6850 }, { "epoch": 0.30465519541149794, "grad_norm": 0.08374079316854477, "learning_rate": 0.000989361426089592, "loss": 1.5665, "step": 6852 }, { "epoch": 0.3047441198701703, "grad_norm": 0.08264697343111038, "learning_rate": 0.0009893541790140202, "loss": 1.5669, "step": 6854 }, { "epoch": 0.3048330443288427, "grad_norm": 0.08502277731895447, "learning_rate": 0.0009893469294974687, "loss": 1.5616, "step": 6856 }, { "epoch": 0.304921968787515, "grad_norm": 0.08124475181102753, "learning_rate": 0.0009893396775399732, "loss": 1.5685, "step": 6858 }, { "epoch": 0.30501089324618735, "grad_norm": 0.08523332327604294, "learning_rate": 0.0009893324231415703, "loss": 1.5616, "step": 6860 }, { "epoch": 0.3050998177048597, "grad_norm": 0.08559540659189224, "learning_rate": 0.0009893251663022963, "loss": 1.5699, "step": 6862 }, { "epoch": 0.3051887421635321, "grad_norm": 0.08699463307857513, "learning_rate": 0.000989317907022187, "loss": 1.5725, "step": 6864 }, { "epoch": 0.30527766662220446, "grad_norm": 0.09094923734664917, "learning_rate": 0.0009893106453012785, "loss": 1.5698, "step": 6866 }, { "epoch": 0.30536659108087677, "grad_norm": 0.08431901037693024, "learning_rate": 0.0009893033811396075, "loss": 1.5723, "step": 6868 }, { "epoch": 0.30545551553954914, "grad_norm": 0.08522892743349075, "learning_rate": 0.00098929611453721, "loss": 1.5645, "step": 6870 }, { "epoch": 0.3055444399982215, "grad_norm": 0.08593329787254333, "learning_rate": 0.000989288845494122, "loss": 1.5708, "step": 6872 }, { "epoch": 0.3056333644568939, "grad_norm": 0.08536642789840698, "learning_rate": 0.0009892815740103805, "loss": 1.5676, "step": 6874 }, { "epoch": 0.30572228891556624, "grad_norm": 0.08635646104812622, "learning_rate": 0.000989274300086021, "loss": 1.5669, "step": 6876 }, { "epoch": 0.3058112133742386, "grad_norm": 0.09484677016735077, "learning_rate": 0.00098926702372108, "loss": 1.5701, "step": 6878 }, { "epoch": 0.3059001378329109, "grad_norm": 0.08951367437839508, "learning_rate": 0.000989259744915594, "loss": 1.5668, "step": 6880 }, { "epoch": 0.3059890622915833, "grad_norm": 0.0868958830833435, "learning_rate": 0.0009892524636695992, "loss": 1.5653, "step": 6882 }, { "epoch": 0.30607798675025566, "grad_norm": 0.08445209264755249, "learning_rate": 0.0009892451799831318, "loss": 1.5696, "step": 6884 }, { "epoch": 0.306166911208928, "grad_norm": 0.08534303307533264, "learning_rate": 0.0009892378938562284, "loss": 1.5702, "step": 6886 }, { "epoch": 0.3062558356676004, "grad_norm": 0.09028670191764832, "learning_rate": 0.0009892306052889251, "loss": 1.5702, "step": 6888 }, { "epoch": 0.3063447601262727, "grad_norm": 0.08568151295185089, "learning_rate": 0.0009892233142812584, "loss": 1.5752, "step": 6890 }, { "epoch": 0.3064336845849451, "grad_norm": 0.08130050450563431, "learning_rate": 0.0009892160208332644, "loss": 1.5648, "step": 6892 }, { "epoch": 0.30652260904361744, "grad_norm": 0.08191067725419998, "learning_rate": 0.0009892087249449798, "loss": 1.5686, "step": 6894 }, { "epoch": 0.3066115335022898, "grad_norm": 0.08696281164884567, "learning_rate": 0.0009892014266164408, "loss": 1.5645, "step": 6896 }, { "epoch": 0.3067004579609622, "grad_norm": 0.09006158262491226, "learning_rate": 0.000989194125847684, "loss": 1.5665, "step": 6898 }, { "epoch": 0.3067893824196345, "grad_norm": 0.08667782694101334, "learning_rate": 0.0009891868226387455, "loss": 1.5627, "step": 6900 }, { "epoch": 0.30687830687830686, "grad_norm": 0.08659591525793076, "learning_rate": 0.0009891795169896622, "loss": 1.5606, "step": 6902 }, { "epoch": 0.30696723133697923, "grad_norm": 0.0922919362783432, "learning_rate": 0.0009891722089004702, "loss": 1.5638, "step": 6904 }, { "epoch": 0.3070561557956516, "grad_norm": 0.09142550826072693, "learning_rate": 0.0009891648983712058, "loss": 1.5736, "step": 6906 }, { "epoch": 0.30714508025432397, "grad_norm": 0.085682712495327, "learning_rate": 0.0009891575854019058, "loss": 1.5656, "step": 6908 }, { "epoch": 0.30723400471299633, "grad_norm": 0.0815911665558815, "learning_rate": 0.0009891502699926067, "loss": 1.5646, "step": 6910 }, { "epoch": 0.30732292917166865, "grad_norm": 0.08682998269796371, "learning_rate": 0.0009891429521433448, "loss": 1.5631, "step": 6912 }, { "epoch": 0.307411853630341, "grad_norm": 0.0891093984246254, "learning_rate": 0.0009891356318541566, "loss": 1.5678, "step": 6914 }, { "epoch": 0.3075007780890134, "grad_norm": 0.08850381523370743, "learning_rate": 0.000989128309125079, "loss": 1.5682, "step": 6916 }, { "epoch": 0.30758970254768575, "grad_norm": 0.08838870376348495, "learning_rate": 0.0009891209839561477, "loss": 1.5721, "step": 6918 }, { "epoch": 0.3076786270063581, "grad_norm": 0.08760442584753036, "learning_rate": 0.0009891136563474, "loss": 1.5692, "step": 6920 }, { "epoch": 0.30776755146503043, "grad_norm": 0.08644498139619827, "learning_rate": 0.0009891063262988723, "loss": 1.566, "step": 6922 }, { "epoch": 0.3078564759237028, "grad_norm": 0.08855952322483063, "learning_rate": 0.0009890989938106008, "loss": 1.567, "step": 6924 }, { "epoch": 0.30794540038237517, "grad_norm": 0.08846352994441986, "learning_rate": 0.0009890916588826224, "loss": 1.5695, "step": 6926 }, { "epoch": 0.30803432484104754, "grad_norm": 0.08989361673593521, "learning_rate": 0.0009890843215149736, "loss": 1.568, "step": 6928 }, { "epoch": 0.3081232492997199, "grad_norm": 0.0872996598482132, "learning_rate": 0.0009890769817076912, "loss": 1.5655, "step": 6930 }, { "epoch": 0.30821217375839227, "grad_norm": 0.08718729019165039, "learning_rate": 0.0009890696394608115, "loss": 1.5768, "step": 6932 }, { "epoch": 0.3083010982170646, "grad_norm": 0.08712806552648544, "learning_rate": 0.0009890622947743713, "loss": 1.5647, "step": 6934 }, { "epoch": 0.30839002267573695, "grad_norm": 0.0897906944155693, "learning_rate": 0.0009890549476484073, "loss": 1.5751, "step": 6936 }, { "epoch": 0.3084789471344093, "grad_norm": 0.1027601882815361, "learning_rate": 0.0009890475980829558, "loss": 1.5664, "step": 6938 }, { "epoch": 0.3085678715930817, "grad_norm": 0.11154686659574509, "learning_rate": 0.0009890402460780538, "loss": 1.5664, "step": 6940 }, { "epoch": 0.30865679605175406, "grad_norm": 0.19219696521759033, "learning_rate": 0.000989032891633738, "loss": 1.5769, "step": 6942 }, { "epoch": 0.30874572051042637, "grad_norm": 0.10125477612018585, "learning_rate": 0.0009890255347500447, "loss": 1.5801, "step": 6944 }, { "epoch": 0.30883464496909874, "grad_norm": 0.10321390628814697, "learning_rate": 0.0009890181754270112, "loss": 1.5713, "step": 6946 }, { "epoch": 0.3089235694277711, "grad_norm": 0.09801045060157776, "learning_rate": 0.0009890108136646736, "loss": 1.5664, "step": 6948 }, { "epoch": 0.3090124938864435, "grad_norm": 0.09143440425395966, "learning_rate": 0.0009890034494630688, "loss": 1.5759, "step": 6950 }, { "epoch": 0.30910141834511584, "grad_norm": 0.09067144989967346, "learning_rate": 0.000988996082822234, "loss": 1.5677, "step": 6952 }, { "epoch": 0.3091903428037882, "grad_norm": 0.09082882851362228, "learning_rate": 0.0009889887137422054, "loss": 1.5746, "step": 6954 }, { "epoch": 0.3092792672624605, "grad_norm": 0.087799571454525, "learning_rate": 0.0009889813422230198, "loss": 1.5667, "step": 6956 }, { "epoch": 0.3093681917211329, "grad_norm": 0.08629163354635239, "learning_rate": 0.000988973968264714, "loss": 1.5645, "step": 6958 }, { "epoch": 0.30945711617980526, "grad_norm": 0.08560052514076233, "learning_rate": 0.000988966591867325, "loss": 1.5647, "step": 6960 }, { "epoch": 0.3095460406384776, "grad_norm": 0.08497489988803864, "learning_rate": 0.0009889592130308898, "loss": 1.5611, "step": 6962 }, { "epoch": 0.30963496509715, "grad_norm": 0.08397437632083893, "learning_rate": 0.0009889518317554445, "loss": 1.5667, "step": 6964 }, { "epoch": 0.3097238895558223, "grad_norm": 0.08864140510559082, "learning_rate": 0.0009889444480410267, "loss": 1.5644, "step": 6966 }, { "epoch": 0.3098128140144947, "grad_norm": 0.0851002037525177, "learning_rate": 0.0009889370618876723, "loss": 1.5669, "step": 6968 }, { "epoch": 0.30990173847316704, "grad_norm": 0.08869633823633194, "learning_rate": 0.000988929673295419, "loss": 1.5676, "step": 6970 }, { "epoch": 0.3099906629318394, "grad_norm": 0.08750467002391815, "learning_rate": 0.0009889222822643032, "loss": 1.5641, "step": 6972 }, { "epoch": 0.3100795873905118, "grad_norm": 0.08245990425348282, "learning_rate": 0.000988914888794362, "loss": 1.5615, "step": 6974 }, { "epoch": 0.3101685118491841, "grad_norm": 0.08736477047204971, "learning_rate": 0.0009889074928856323, "loss": 1.5645, "step": 6976 }, { "epoch": 0.31025743630785646, "grad_norm": 0.09274744242429733, "learning_rate": 0.0009889000945381506, "loss": 1.5684, "step": 6978 }, { "epoch": 0.31034636076652883, "grad_norm": 0.08163709938526154, "learning_rate": 0.0009888926937519542, "loss": 1.5637, "step": 6980 }, { "epoch": 0.3104352852252012, "grad_norm": 0.08836798369884491, "learning_rate": 0.00098888529052708, "loss": 1.565, "step": 6982 }, { "epoch": 0.31052420968387356, "grad_norm": 0.08659311383962631, "learning_rate": 0.0009888778848635646, "loss": 1.5653, "step": 6984 }, { "epoch": 0.31061313414254593, "grad_norm": 0.08404355496168137, "learning_rate": 0.0009888704767614452, "loss": 1.5669, "step": 6986 }, { "epoch": 0.31070205860121825, "grad_norm": 0.08402011543512344, "learning_rate": 0.000988863066220759, "loss": 1.5588, "step": 6988 }, { "epoch": 0.3107909830598906, "grad_norm": 0.08973820507526398, "learning_rate": 0.0009888556532415423, "loss": 1.5668, "step": 6990 }, { "epoch": 0.310879907518563, "grad_norm": 0.08140117675065994, "learning_rate": 0.0009888482378238325, "loss": 1.5649, "step": 6992 }, { "epoch": 0.31096883197723535, "grad_norm": 0.08229751884937286, "learning_rate": 0.0009888408199676666, "loss": 1.5577, "step": 6994 }, { "epoch": 0.3110577564359077, "grad_norm": 0.08518966287374496, "learning_rate": 0.0009888333996730817, "loss": 1.5675, "step": 6996 }, { "epoch": 0.31114668089458003, "grad_norm": 0.0844472348690033, "learning_rate": 0.0009888259769401148, "loss": 1.5685, "step": 6998 }, { "epoch": 0.3112356053532524, "grad_norm": 0.08895467966794968, "learning_rate": 0.0009888185517688023, "loss": 1.5693, "step": 7000 }, { "epoch": 0.3112356053532524, "eval_loss": 1.5398365259170532, "eval_runtime": 12.9215, "eval_samples_per_second": 534.768, "eval_steps_per_second": 66.865, "step": 7000 }, { "epoch": 0.31132452981192477, "grad_norm": 0.08610174804925919, "learning_rate": 0.0009888111241591819, "loss": 1.5637, "step": 7002 }, { "epoch": 0.31141345427059713, "grad_norm": 0.08628767728805542, "learning_rate": 0.0009888036941112907, "loss": 1.5625, "step": 7004 }, { "epoch": 0.3115023787292695, "grad_norm": 0.08559640496969223, "learning_rate": 0.0009887962616251654, "loss": 1.5633, "step": 7006 }, { "epoch": 0.31159130318794187, "grad_norm": 0.08582499623298645, "learning_rate": 0.0009887888267008432, "loss": 1.5718, "step": 7008 }, { "epoch": 0.3116802276466142, "grad_norm": 0.08453387767076492, "learning_rate": 0.0009887813893383612, "loss": 1.5644, "step": 7010 }, { "epoch": 0.31176915210528655, "grad_norm": 0.0880834087729454, "learning_rate": 0.0009887739495377564, "loss": 1.5675, "step": 7012 }, { "epoch": 0.3118580765639589, "grad_norm": 0.08896942436695099, "learning_rate": 0.0009887665072990661, "loss": 1.5601, "step": 7014 }, { "epoch": 0.3119470010226313, "grad_norm": 0.08615525811910629, "learning_rate": 0.0009887590626223273, "loss": 1.5647, "step": 7016 }, { "epoch": 0.31203592548130366, "grad_norm": 0.08899851888418198, "learning_rate": 0.0009887516155075772, "loss": 1.5663, "step": 7018 }, { "epoch": 0.31212484993997597, "grad_norm": 0.08589426428079605, "learning_rate": 0.000988744165954853, "loss": 1.5629, "step": 7020 }, { "epoch": 0.31221377439864834, "grad_norm": 0.0895439013838768, "learning_rate": 0.0009887367139641916, "loss": 1.5655, "step": 7022 }, { "epoch": 0.3123026988573207, "grad_norm": 0.08594179153442383, "learning_rate": 0.0009887292595356304, "loss": 1.5574, "step": 7024 }, { "epoch": 0.31239162331599307, "grad_norm": 0.08816447108983994, "learning_rate": 0.0009887218026692066, "loss": 1.5686, "step": 7026 }, { "epoch": 0.31248054777466544, "grad_norm": 0.08560863137245178, "learning_rate": 0.0009887143433649573, "loss": 1.5662, "step": 7028 }, { "epoch": 0.31256947223333775, "grad_norm": 0.0848335549235344, "learning_rate": 0.0009887068816229196, "loss": 1.5647, "step": 7030 }, { "epoch": 0.3126583966920101, "grad_norm": 0.08963989466428757, "learning_rate": 0.000988699417443131, "loss": 1.5681, "step": 7032 }, { "epoch": 0.3127473211506825, "grad_norm": 0.08423878997564316, "learning_rate": 0.0009886919508256286, "loss": 1.5634, "step": 7034 }, { "epoch": 0.31283624560935486, "grad_norm": 0.08075986057519913, "learning_rate": 0.0009886844817704496, "loss": 1.5645, "step": 7036 }, { "epoch": 0.3129251700680272, "grad_norm": 0.07971926033496857, "learning_rate": 0.0009886770102776313, "loss": 1.5615, "step": 7038 }, { "epoch": 0.3130140945266996, "grad_norm": 0.08668291568756104, "learning_rate": 0.0009886695363472107, "loss": 1.5657, "step": 7040 }, { "epoch": 0.3131030189853719, "grad_norm": 0.08376525342464447, "learning_rate": 0.0009886620599792258, "loss": 1.5634, "step": 7042 }, { "epoch": 0.3131919434440443, "grad_norm": 0.0836341604590416, "learning_rate": 0.0009886545811737132, "loss": 1.5597, "step": 7044 }, { "epoch": 0.31328086790271664, "grad_norm": 0.08614721894264221, "learning_rate": 0.0009886470999307102, "loss": 1.5616, "step": 7046 }, { "epoch": 0.313369792361389, "grad_norm": 0.08370938897132874, "learning_rate": 0.0009886396162502547, "loss": 1.5695, "step": 7048 }, { "epoch": 0.3134587168200614, "grad_norm": 0.08297300338745117, "learning_rate": 0.0009886321301323835, "loss": 1.5669, "step": 7050 }, { "epoch": 0.3135476412787337, "grad_norm": 0.07920370995998383, "learning_rate": 0.0009886246415771343, "loss": 1.5671, "step": 7052 }, { "epoch": 0.31363656573740606, "grad_norm": 0.08777054399251938, "learning_rate": 0.000988617150584544, "loss": 1.5684, "step": 7054 }, { "epoch": 0.3137254901960784, "grad_norm": 0.08385943621397018, "learning_rate": 0.0009886096571546505, "loss": 1.5594, "step": 7056 }, { "epoch": 0.3138144146547508, "grad_norm": 0.08605042845010757, "learning_rate": 0.0009886021612874908, "loss": 1.5674, "step": 7058 }, { "epoch": 0.31390333911342316, "grad_norm": 0.08706225454807281, "learning_rate": 0.0009885946629831024, "loss": 1.5579, "step": 7060 }, { "epoch": 0.31399226357209553, "grad_norm": 0.08403518795967102, "learning_rate": 0.000988587162241523, "loss": 1.5624, "step": 7062 }, { "epoch": 0.31408118803076784, "grad_norm": 0.08342042565345764, "learning_rate": 0.0009885796590627894, "loss": 1.5651, "step": 7064 }, { "epoch": 0.3141701124894402, "grad_norm": 0.08213895559310913, "learning_rate": 0.0009885721534469395, "loss": 1.5653, "step": 7066 }, { "epoch": 0.3142590369481126, "grad_norm": 0.08500027656555176, "learning_rate": 0.0009885646453940105, "loss": 1.5693, "step": 7068 }, { "epoch": 0.31434796140678495, "grad_norm": 0.08024073392152786, "learning_rate": 0.0009885571349040401, "loss": 1.565, "step": 7070 }, { "epoch": 0.3144368858654573, "grad_norm": 0.08580642938613892, "learning_rate": 0.0009885496219770656, "loss": 1.5634, "step": 7072 }, { "epoch": 0.31452581032412963, "grad_norm": 0.08305441588163376, "learning_rate": 0.0009885421066131244, "loss": 1.5678, "step": 7074 }, { "epoch": 0.314614734782802, "grad_norm": 0.0872965082526207, "learning_rate": 0.000988534588812254, "loss": 1.5619, "step": 7076 }, { "epoch": 0.31470365924147436, "grad_norm": 0.08917077630758286, "learning_rate": 0.0009885270685744921, "loss": 1.5585, "step": 7078 }, { "epoch": 0.31479258370014673, "grad_norm": 0.08678824454545975, "learning_rate": 0.000988519545899876, "loss": 1.5599, "step": 7080 }, { "epoch": 0.3148815081588191, "grad_norm": 0.08195692300796509, "learning_rate": 0.0009885120207884434, "loss": 1.565, "step": 7082 }, { "epoch": 0.3149704326174914, "grad_norm": 0.08080308139324188, "learning_rate": 0.000988504493240232, "loss": 1.5663, "step": 7084 }, { "epoch": 0.3150593570761638, "grad_norm": 0.08524754643440247, "learning_rate": 0.0009884969632552787, "loss": 1.5669, "step": 7086 }, { "epoch": 0.31514828153483615, "grad_norm": 0.08281772583723068, "learning_rate": 0.0009884894308336215, "loss": 1.5625, "step": 7088 }, { "epoch": 0.3152372059935085, "grad_norm": 0.08267832547426224, "learning_rate": 0.0009884818959752981, "loss": 1.5649, "step": 7090 }, { "epoch": 0.3153261304521809, "grad_norm": 0.08353657275438309, "learning_rate": 0.0009884743586803457, "loss": 1.5639, "step": 7092 }, { "epoch": 0.31541505491085325, "grad_norm": 0.08342524617910385, "learning_rate": 0.0009884668189488023, "loss": 1.5646, "step": 7094 }, { "epoch": 0.31550397936952557, "grad_norm": 0.08173046261072159, "learning_rate": 0.0009884592767807053, "loss": 1.5661, "step": 7096 }, { "epoch": 0.31559290382819793, "grad_norm": 0.08680406212806702, "learning_rate": 0.0009884517321760923, "loss": 1.5619, "step": 7098 }, { "epoch": 0.3156818282868703, "grad_norm": 0.08208227157592773, "learning_rate": 0.000988444185135001, "loss": 1.5639, "step": 7100 }, { "epoch": 0.31577075274554267, "grad_norm": 0.0871683731675148, "learning_rate": 0.0009884366356574692, "loss": 1.5657, "step": 7102 }, { "epoch": 0.31585967720421504, "grad_norm": 0.08410225808620453, "learning_rate": 0.0009884290837435341, "loss": 1.5616, "step": 7104 }, { "epoch": 0.31594860166288735, "grad_norm": 0.08295659720897675, "learning_rate": 0.0009884215293932339, "loss": 1.5653, "step": 7106 }, { "epoch": 0.3160375261215597, "grad_norm": 0.08682281523942947, "learning_rate": 0.000988413972606606, "loss": 1.5636, "step": 7108 }, { "epoch": 0.3161264505802321, "grad_norm": 0.08395165205001831, "learning_rate": 0.000988406413383688, "loss": 1.5628, "step": 7110 }, { "epoch": 0.31621537503890446, "grad_norm": 0.08330830186605453, "learning_rate": 0.0009883988517245177, "loss": 1.5585, "step": 7112 }, { "epoch": 0.3163042994975768, "grad_norm": 0.08235388249158859, "learning_rate": 0.0009883912876291329, "loss": 1.5712, "step": 7114 }, { "epoch": 0.3163932239562492, "grad_norm": 0.08317794650793076, "learning_rate": 0.0009883837210975714, "loss": 1.5658, "step": 7116 }, { "epoch": 0.3164821484149215, "grad_norm": 0.08337473124265671, "learning_rate": 0.0009883761521298708, "loss": 1.5615, "step": 7118 }, { "epoch": 0.3165710728735939, "grad_norm": 0.08434969931840897, "learning_rate": 0.000988368580726069, "loss": 1.5617, "step": 7120 }, { "epoch": 0.31665999733226624, "grad_norm": 0.08485856652259827, "learning_rate": 0.0009883610068862035, "loss": 1.5657, "step": 7122 }, { "epoch": 0.3167489217909386, "grad_norm": 0.08327708393335342, "learning_rate": 0.0009883534306103121, "loss": 1.5656, "step": 7124 }, { "epoch": 0.316837846249611, "grad_norm": 0.08332667499780655, "learning_rate": 0.000988345851898433, "loss": 1.5645, "step": 7126 }, { "epoch": 0.3169267707082833, "grad_norm": 0.08230622857809067, "learning_rate": 0.0009883382707506036, "loss": 1.5623, "step": 7128 }, { "epoch": 0.31701569516695566, "grad_norm": 0.08090732246637344, "learning_rate": 0.0009883306871668618, "loss": 1.5695, "step": 7130 }, { "epoch": 0.317104619625628, "grad_norm": 0.08214423060417175, "learning_rate": 0.0009883231011472455, "loss": 1.5633, "step": 7132 }, { "epoch": 0.3171935440843004, "grad_norm": 0.08126901835203171, "learning_rate": 0.0009883155126917925, "loss": 1.5634, "step": 7134 }, { "epoch": 0.31728246854297276, "grad_norm": 0.08316788822412491, "learning_rate": 0.0009883079218005407, "loss": 1.5626, "step": 7136 }, { "epoch": 0.31737139300164513, "grad_norm": 0.0886952131986618, "learning_rate": 0.000988300328473528, "loss": 1.5624, "step": 7138 }, { "epoch": 0.31746031746031744, "grad_norm": 0.0825994685292244, "learning_rate": 0.0009882927327107922, "loss": 1.5671, "step": 7140 }, { "epoch": 0.3175492419189898, "grad_norm": 0.09127391129732132, "learning_rate": 0.000988285134512371, "loss": 1.5592, "step": 7142 }, { "epoch": 0.3176381663776622, "grad_norm": 0.08798851072788239, "learning_rate": 0.0009882775338783028, "loss": 1.5618, "step": 7144 }, { "epoch": 0.31772709083633455, "grad_norm": 0.09108838438987732, "learning_rate": 0.000988269930808625, "loss": 1.5644, "step": 7146 }, { "epoch": 0.3178160152950069, "grad_norm": 0.08250788599252701, "learning_rate": 0.0009882623253033758, "loss": 1.5662, "step": 7148 }, { "epoch": 0.3179049397536792, "grad_norm": 0.08027481287717819, "learning_rate": 0.000988254717362593, "loss": 1.5601, "step": 7150 }, { "epoch": 0.3179938642123516, "grad_norm": 0.08304357528686523, "learning_rate": 0.0009882471069863146, "loss": 1.5578, "step": 7152 }, { "epoch": 0.31808278867102396, "grad_norm": 0.08454793691635132, "learning_rate": 0.0009882394941745788, "loss": 1.5567, "step": 7154 }, { "epoch": 0.31817171312969633, "grad_norm": 0.08314481377601624, "learning_rate": 0.0009882318789274233, "loss": 1.5692, "step": 7156 }, { "epoch": 0.3182606375883687, "grad_norm": 0.08372924476861954, "learning_rate": 0.000988224261244886, "loss": 1.5597, "step": 7158 }, { "epoch": 0.318349562047041, "grad_norm": 0.08202824741601944, "learning_rate": 0.000988216641127005, "loss": 1.5669, "step": 7160 }, { "epoch": 0.3184384865057134, "grad_norm": 0.08950933814048767, "learning_rate": 0.0009882090185738186, "loss": 1.5682, "step": 7162 }, { "epoch": 0.31852741096438575, "grad_norm": 0.08493774384260178, "learning_rate": 0.0009882013935853644, "loss": 1.5685, "step": 7164 }, { "epoch": 0.3186163354230581, "grad_norm": 0.08362516015768051, "learning_rate": 0.0009881937661616806, "loss": 1.5685, "step": 7166 }, { "epoch": 0.3187052598817305, "grad_norm": 0.08802633732557297, "learning_rate": 0.0009881861363028053, "loss": 1.5684, "step": 7168 }, { "epoch": 0.31879418434040285, "grad_norm": 0.08594125509262085, "learning_rate": 0.0009881785040087765, "loss": 1.5581, "step": 7170 }, { "epoch": 0.31888310879907517, "grad_norm": 0.08420670032501221, "learning_rate": 0.0009881708692796325, "loss": 1.5626, "step": 7172 }, { "epoch": 0.31897203325774753, "grad_norm": 0.08396308124065399, "learning_rate": 0.000988163232115411, "loss": 1.5624, "step": 7174 }, { "epoch": 0.3190609577164199, "grad_norm": 0.08624763786792755, "learning_rate": 0.00098815559251615, "loss": 1.5667, "step": 7176 }, { "epoch": 0.31914988217509227, "grad_norm": 0.0823800191283226, "learning_rate": 0.0009881479504818883, "loss": 1.5654, "step": 7178 }, { "epoch": 0.31923880663376464, "grad_norm": 0.08368480205535889, "learning_rate": 0.0009881403060126635, "loss": 1.5673, "step": 7180 }, { "epoch": 0.31932773109243695, "grad_norm": 0.08373897522687912, "learning_rate": 0.0009881326591085135, "loss": 1.565, "step": 7182 }, { "epoch": 0.3194166555511093, "grad_norm": 0.08619046956300735, "learning_rate": 0.0009881250097694772, "loss": 1.5597, "step": 7184 }, { "epoch": 0.3195055800097817, "grad_norm": 0.08758384734392166, "learning_rate": 0.000988117357995592, "loss": 1.5618, "step": 7186 }, { "epoch": 0.31959450446845405, "grad_norm": 0.08243545889854431, "learning_rate": 0.0009881097037868966, "loss": 1.5549, "step": 7188 }, { "epoch": 0.3196834289271264, "grad_norm": 0.07898540049791336, "learning_rate": 0.000988102047143429, "loss": 1.5597, "step": 7190 }, { "epoch": 0.3197723533857988, "grad_norm": 0.08437183499336243, "learning_rate": 0.0009880943880652274, "loss": 1.5637, "step": 7192 }, { "epoch": 0.3198612778444711, "grad_norm": 0.08325666189193726, "learning_rate": 0.0009880867265523296, "loss": 1.5613, "step": 7194 }, { "epoch": 0.31995020230314347, "grad_norm": 0.08307388424873352, "learning_rate": 0.0009880790626047747, "loss": 1.5627, "step": 7196 }, { "epoch": 0.32003912676181584, "grad_norm": 0.08156055212020874, "learning_rate": 0.0009880713962226, "loss": 1.5666, "step": 7198 }, { "epoch": 0.3201280512204882, "grad_norm": 0.08196540176868439, "learning_rate": 0.0009880637274058443, "loss": 1.5627, "step": 7200 }, { "epoch": 0.3202169756791606, "grad_norm": 0.08538369089365005, "learning_rate": 0.0009880560561545458, "loss": 1.5644, "step": 7202 }, { "epoch": 0.3203059001378329, "grad_norm": 0.08306334167718887, "learning_rate": 0.0009880483824687427, "loss": 1.5582, "step": 7204 }, { "epoch": 0.32039482459650526, "grad_norm": 0.08345794677734375, "learning_rate": 0.0009880407063484731, "loss": 1.5538, "step": 7206 }, { "epoch": 0.3204837490551776, "grad_norm": 0.08639951795339584, "learning_rate": 0.0009880330277937757, "loss": 1.5601, "step": 7208 }, { "epoch": 0.32057267351385, "grad_norm": 0.08315160125494003, "learning_rate": 0.0009880253468046883, "loss": 1.5591, "step": 7210 }, { "epoch": 0.32066159797252236, "grad_norm": 0.0834464579820633, "learning_rate": 0.0009880176633812493, "loss": 1.5663, "step": 7212 }, { "epoch": 0.3207505224311947, "grad_norm": 0.08205483853816986, "learning_rate": 0.0009880099775234977, "loss": 1.5662, "step": 7214 }, { "epoch": 0.32083944688986704, "grad_norm": 0.08744161576032639, "learning_rate": 0.000988002289231471, "loss": 1.5548, "step": 7216 }, { "epoch": 0.3209283713485394, "grad_norm": 0.08676169067621231, "learning_rate": 0.000987994598505208, "loss": 1.5595, "step": 7218 }, { "epoch": 0.3210172958072118, "grad_norm": 0.08752325177192688, "learning_rate": 0.0009879869053447468, "loss": 1.5624, "step": 7220 }, { "epoch": 0.32110622026588415, "grad_norm": 0.08283118903636932, "learning_rate": 0.0009879792097501258, "loss": 1.5675, "step": 7222 }, { "epoch": 0.3211951447245565, "grad_norm": 0.08117381483316422, "learning_rate": 0.0009879715117213838, "loss": 1.5604, "step": 7224 }, { "epoch": 0.3212840691832288, "grad_norm": 0.08399328589439392, "learning_rate": 0.0009879638112585587, "loss": 1.5532, "step": 7226 }, { "epoch": 0.3213729936419012, "grad_norm": 0.0844864770770073, "learning_rate": 0.0009879561083616893, "loss": 1.5662, "step": 7228 }, { "epoch": 0.32146191810057356, "grad_norm": 0.08030157536268234, "learning_rate": 0.0009879484030308136, "loss": 1.5637, "step": 7230 }, { "epoch": 0.32155084255924593, "grad_norm": 0.08214158564805984, "learning_rate": 0.0009879406952659705, "loss": 1.5642, "step": 7232 }, { "epoch": 0.3216397670179183, "grad_norm": 0.08249089121818542, "learning_rate": 0.0009879329850671981, "loss": 1.5614, "step": 7234 }, { "epoch": 0.3217286914765906, "grad_norm": 0.0858563482761383, "learning_rate": 0.000987925272434535, "loss": 1.5587, "step": 7236 }, { "epoch": 0.321817615935263, "grad_norm": 0.08245126157999039, "learning_rate": 0.0009879175573680196, "loss": 1.5641, "step": 7238 }, { "epoch": 0.32190654039393535, "grad_norm": 0.0817423164844513, "learning_rate": 0.0009879098398676905, "loss": 1.5613, "step": 7240 }, { "epoch": 0.3219954648526077, "grad_norm": 0.08229317516088486, "learning_rate": 0.0009879021199335862, "loss": 1.558, "step": 7242 }, { "epoch": 0.3220843893112801, "grad_norm": 0.0869976207613945, "learning_rate": 0.000987894397565745, "loss": 1.556, "step": 7244 }, { "epoch": 0.32217331376995245, "grad_norm": 0.08708877116441727, "learning_rate": 0.0009878866727642056, "loss": 1.5642, "step": 7246 }, { "epoch": 0.32226223822862476, "grad_norm": 0.08197760581970215, "learning_rate": 0.0009878789455290064, "loss": 1.5577, "step": 7248 }, { "epoch": 0.32235116268729713, "grad_norm": 0.08504566550254822, "learning_rate": 0.0009878712158601863, "loss": 1.5633, "step": 7250 }, { "epoch": 0.3224400871459695, "grad_norm": 0.0837409570813179, "learning_rate": 0.0009878634837577832, "loss": 1.5607, "step": 7252 }, { "epoch": 0.32252901160464187, "grad_norm": 0.08278852701187134, "learning_rate": 0.0009878557492218364, "loss": 1.5618, "step": 7254 }, { "epoch": 0.32261793606331424, "grad_norm": 0.07882179319858551, "learning_rate": 0.0009878480122523842, "loss": 1.5579, "step": 7256 }, { "epoch": 0.32270686052198655, "grad_norm": 0.08126863837242126, "learning_rate": 0.0009878402728494648, "loss": 1.5578, "step": 7258 }, { "epoch": 0.3227957849806589, "grad_norm": 0.08077924698591232, "learning_rate": 0.0009878325310131173, "loss": 1.5598, "step": 7260 }, { "epoch": 0.3228847094393313, "grad_norm": 0.08375802636146545, "learning_rate": 0.0009878247867433803, "loss": 1.5595, "step": 7262 }, { "epoch": 0.32297363389800365, "grad_norm": 0.08182679116725922, "learning_rate": 0.000987817040040292, "loss": 1.5558, "step": 7264 }, { "epoch": 0.323062558356676, "grad_norm": 0.08209866285324097, "learning_rate": 0.0009878092909038916, "loss": 1.5582, "step": 7266 }, { "epoch": 0.32315148281534833, "grad_norm": 0.08449520170688629, "learning_rate": 0.0009878015393342172, "loss": 1.56, "step": 7268 }, { "epoch": 0.3232404072740207, "grad_norm": 0.07824712991714478, "learning_rate": 0.000987793785331308, "loss": 1.5512, "step": 7270 }, { "epoch": 0.32332933173269307, "grad_norm": 0.0843387246131897, "learning_rate": 0.0009877860288952022, "loss": 1.5599, "step": 7272 }, { "epoch": 0.32341825619136544, "grad_norm": 0.08296310156583786, "learning_rate": 0.000987778270025939, "loss": 1.5652, "step": 7274 }, { "epoch": 0.3235071806500378, "grad_norm": 0.08399280905723572, "learning_rate": 0.0009877705087235566, "loss": 1.5624, "step": 7276 }, { "epoch": 0.3235961051087102, "grad_norm": 0.08151458948850632, "learning_rate": 0.000987762744988094, "loss": 1.5661, "step": 7278 }, { "epoch": 0.3236850295673825, "grad_norm": 0.08649469912052155, "learning_rate": 0.00098775497881959, "loss": 1.5541, "step": 7280 }, { "epoch": 0.32377395402605486, "grad_norm": 0.0848914235830307, "learning_rate": 0.0009877472102180831, "loss": 1.5622, "step": 7282 }, { "epoch": 0.3238628784847272, "grad_norm": 0.08599822223186493, "learning_rate": 0.0009877394391836123, "loss": 1.5586, "step": 7284 }, { "epoch": 0.3239518029433996, "grad_norm": 0.09074999392032623, "learning_rate": 0.000987731665716216, "loss": 1.5584, "step": 7286 }, { "epoch": 0.32404072740207196, "grad_norm": 0.08198599517345428, "learning_rate": 0.0009877238898159332, "loss": 1.5623, "step": 7288 }, { "epoch": 0.3241296518607443, "grad_norm": 0.08142637461423874, "learning_rate": 0.0009877161114828026, "loss": 1.5591, "step": 7290 }, { "epoch": 0.32421857631941664, "grad_norm": 0.08540283888578415, "learning_rate": 0.0009877083307168633, "loss": 1.562, "step": 7292 }, { "epoch": 0.324307500778089, "grad_norm": 0.08285167068243027, "learning_rate": 0.0009877005475181539, "loss": 1.5581, "step": 7294 }, { "epoch": 0.3243964252367614, "grad_norm": 0.08604173362255096, "learning_rate": 0.000987692761886713, "loss": 1.5646, "step": 7296 }, { "epoch": 0.32448534969543374, "grad_norm": 0.08079639822244644, "learning_rate": 0.0009876849738225798, "loss": 1.5611, "step": 7298 }, { "epoch": 0.3245742741541061, "grad_norm": 0.08307557553052902, "learning_rate": 0.000987677183325793, "loss": 1.5594, "step": 7300 }, { "epoch": 0.3246631986127784, "grad_norm": 0.08180294185876846, "learning_rate": 0.0009876693903963913, "loss": 1.5631, "step": 7302 }, { "epoch": 0.3247521230714508, "grad_norm": 0.08266652375459671, "learning_rate": 0.000987661595034414, "loss": 1.559, "step": 7304 }, { "epoch": 0.32484104753012316, "grad_norm": 0.07898559421300888, "learning_rate": 0.0009876537972398995, "loss": 1.5604, "step": 7306 }, { "epoch": 0.32492997198879553, "grad_norm": 0.0870475322008133, "learning_rate": 0.000987645997012887, "loss": 1.5617, "step": 7308 }, { "epoch": 0.3250188964474679, "grad_norm": 0.08088637888431549, "learning_rate": 0.0009876381943534154, "loss": 1.5604, "step": 7310 }, { "epoch": 0.3251078209061402, "grad_norm": 0.08533639460802078, "learning_rate": 0.0009876303892615234, "loss": 1.5555, "step": 7312 }, { "epoch": 0.3251967453648126, "grad_norm": 0.08240604400634766, "learning_rate": 0.0009876225817372502, "loss": 1.5609, "step": 7314 }, { "epoch": 0.32528566982348495, "grad_norm": 0.08196206390857697, "learning_rate": 0.0009876147717806346, "loss": 1.5579, "step": 7316 }, { "epoch": 0.3253745942821573, "grad_norm": 0.08041080087423325, "learning_rate": 0.0009876069593917154, "loss": 1.5621, "step": 7318 }, { "epoch": 0.3254635187408297, "grad_norm": 0.07944405823945999, "learning_rate": 0.000987599144570532, "loss": 1.5583, "step": 7320 }, { "epoch": 0.32555244319950205, "grad_norm": 0.0833417996764183, "learning_rate": 0.000987591327317123, "loss": 1.5588, "step": 7322 }, { "epoch": 0.32564136765817436, "grad_norm": 0.08267171680927277, "learning_rate": 0.0009875835076315273, "loss": 1.5576, "step": 7324 }, { "epoch": 0.32573029211684673, "grad_norm": 0.08149762451648712, "learning_rate": 0.0009875756855137845, "loss": 1.5535, "step": 7326 }, { "epoch": 0.3258192165755191, "grad_norm": 0.0837164968252182, "learning_rate": 0.0009875678609639331, "loss": 1.5554, "step": 7328 }, { "epoch": 0.32590814103419147, "grad_norm": 0.08050676435232162, "learning_rate": 0.0009875600339820123, "loss": 1.561, "step": 7330 }, { "epoch": 0.32599706549286384, "grad_norm": 0.08078650385141373, "learning_rate": 0.000987552204568061, "loss": 1.5543, "step": 7332 }, { "epoch": 0.32608598995153615, "grad_norm": 0.08506888151168823, "learning_rate": 0.0009875443727221185, "loss": 1.5606, "step": 7334 }, { "epoch": 0.3261749144102085, "grad_norm": 0.08433322608470917, "learning_rate": 0.0009875365384442237, "loss": 1.5563, "step": 7336 }, { "epoch": 0.3262638388688809, "grad_norm": 0.08262262493371964, "learning_rate": 0.000987528701734416, "loss": 1.565, "step": 7338 }, { "epoch": 0.32635276332755325, "grad_norm": 0.08206149190664291, "learning_rate": 0.0009875208625927338, "loss": 1.5562, "step": 7340 }, { "epoch": 0.3264416877862256, "grad_norm": 0.08632387220859528, "learning_rate": 0.0009875130210192167, "loss": 1.5566, "step": 7342 }, { "epoch": 0.32653061224489793, "grad_norm": 0.08040108531713486, "learning_rate": 0.0009875051770139038, "loss": 1.5591, "step": 7344 }, { "epoch": 0.3266195367035703, "grad_norm": 0.08796191215515137, "learning_rate": 0.0009874973305768343, "loss": 1.5635, "step": 7346 }, { "epoch": 0.32670846116224267, "grad_norm": 0.08850188553333282, "learning_rate": 0.000987489481708047, "loss": 1.5547, "step": 7348 }, { "epoch": 0.32679738562091504, "grad_norm": 0.08623901754617691, "learning_rate": 0.000987481630407581, "loss": 1.5583, "step": 7350 }, { "epoch": 0.3268863100795874, "grad_norm": 0.08186235278844833, "learning_rate": 0.0009874737766754763, "loss": 1.5568, "step": 7352 }, { "epoch": 0.3269752345382598, "grad_norm": 0.08264327794313431, "learning_rate": 0.000987465920511771, "loss": 1.5589, "step": 7354 }, { "epoch": 0.3270641589969321, "grad_norm": 0.08318621665239334, "learning_rate": 0.000987458061916505, "loss": 1.5578, "step": 7356 }, { "epoch": 0.32715308345560445, "grad_norm": 0.08507546037435532, "learning_rate": 0.0009874502008897174, "loss": 1.5628, "step": 7358 }, { "epoch": 0.3272420079142768, "grad_norm": 0.08034215867519379, "learning_rate": 0.000987442337431447, "loss": 1.5597, "step": 7360 }, { "epoch": 0.3273309323729492, "grad_norm": 0.08105006068944931, "learning_rate": 0.0009874344715417333, "loss": 1.5562, "step": 7362 }, { "epoch": 0.32741985683162156, "grad_norm": 0.07907504588365555, "learning_rate": 0.0009874266032206156, "loss": 1.5649, "step": 7364 }, { "epoch": 0.32750878129029387, "grad_norm": 0.08351552486419678, "learning_rate": 0.0009874187324681331, "loss": 1.563, "step": 7366 }, { "epoch": 0.32759770574896624, "grad_norm": 0.07940589636564255, "learning_rate": 0.0009874108592843253, "loss": 1.5597, "step": 7368 }, { "epoch": 0.3276866302076386, "grad_norm": 0.0827498808503151, "learning_rate": 0.000987402983669231, "loss": 1.5544, "step": 7370 }, { "epoch": 0.327775554666311, "grad_norm": 0.08331042528152466, "learning_rate": 0.0009873951056228898, "loss": 1.56, "step": 7372 }, { "epoch": 0.32786447912498334, "grad_norm": 0.08180607855319977, "learning_rate": 0.000987387225145341, "loss": 1.5565, "step": 7374 }, { "epoch": 0.3279534035836557, "grad_norm": 0.08340569585561752, "learning_rate": 0.0009873793422366234, "loss": 1.5608, "step": 7376 }, { "epoch": 0.328042328042328, "grad_norm": 0.08175882697105408, "learning_rate": 0.000987371456896777, "loss": 1.5607, "step": 7378 }, { "epoch": 0.3281312525010004, "grad_norm": 0.0852338895201683, "learning_rate": 0.000987363569125841, "loss": 1.5584, "step": 7380 }, { "epoch": 0.32822017695967276, "grad_norm": 0.0846090242266655, "learning_rate": 0.0009873556789238545, "loss": 1.5586, "step": 7382 }, { "epoch": 0.32830910141834513, "grad_norm": 0.07967184484004974, "learning_rate": 0.000987347786290857, "loss": 1.5576, "step": 7384 }, { "epoch": 0.3283980258770175, "grad_norm": 0.08349616080522537, "learning_rate": 0.000987339891226888, "loss": 1.5628, "step": 7386 }, { "epoch": 0.3284869503356898, "grad_norm": 0.08276060223579407, "learning_rate": 0.0009873319937319864, "loss": 1.5597, "step": 7388 }, { "epoch": 0.3285758747943622, "grad_norm": 0.08043144643306732, "learning_rate": 0.0009873240938061921, "loss": 1.5537, "step": 7390 }, { "epoch": 0.32866479925303455, "grad_norm": 0.08394316583871841, "learning_rate": 0.0009873161914495444, "loss": 1.5573, "step": 7392 }, { "epoch": 0.3287537237117069, "grad_norm": 0.08426385372877121, "learning_rate": 0.0009873082866620824, "loss": 1.558, "step": 7394 }, { "epoch": 0.3288426481703793, "grad_norm": 0.08154802769422531, "learning_rate": 0.000987300379443846, "loss": 1.5572, "step": 7396 }, { "epoch": 0.3289315726290516, "grad_norm": 0.08731735497713089, "learning_rate": 0.0009872924697948745, "loss": 1.5567, "step": 7398 }, { "epoch": 0.32902049708772396, "grad_norm": 0.08999709039926529, "learning_rate": 0.000987284557715207, "loss": 1.556, "step": 7400 }, { "epoch": 0.32910942154639633, "grad_norm": 0.08462511003017426, "learning_rate": 0.0009872766432048835, "loss": 1.5585, "step": 7402 }, { "epoch": 0.3291983460050687, "grad_norm": 0.08666058629751205, "learning_rate": 0.000987268726263943, "loss": 1.5567, "step": 7404 }, { "epoch": 0.32928727046374107, "grad_norm": 0.08502695709466934, "learning_rate": 0.0009872608068924253, "loss": 1.5566, "step": 7406 }, { "epoch": 0.32937619492241343, "grad_norm": 0.08488810807466507, "learning_rate": 0.0009872528850903698, "loss": 1.5529, "step": 7408 }, { "epoch": 0.32946511938108575, "grad_norm": 0.08286993950605392, "learning_rate": 0.000987244960857816, "loss": 1.5639, "step": 7410 }, { "epoch": 0.3295540438397581, "grad_norm": 0.0867101177573204, "learning_rate": 0.0009872370341948036, "loss": 1.559, "step": 7412 }, { "epoch": 0.3296429682984305, "grad_norm": 0.0828615054488182, "learning_rate": 0.0009872291051013719, "loss": 1.5587, "step": 7414 }, { "epoch": 0.32973189275710285, "grad_norm": 0.08587847650051117, "learning_rate": 0.0009872211735775605, "loss": 1.5599, "step": 7416 }, { "epoch": 0.3298208172157752, "grad_norm": 0.08348828554153442, "learning_rate": 0.000987213239623409, "loss": 1.5556, "step": 7418 }, { "epoch": 0.32990974167444753, "grad_norm": 0.08008415997028351, "learning_rate": 0.0009872053032389573, "loss": 1.5642, "step": 7420 }, { "epoch": 0.3299986661331199, "grad_norm": 0.0780973806977272, "learning_rate": 0.0009871973644242442, "loss": 1.5563, "step": 7422 }, { "epoch": 0.33008759059179227, "grad_norm": 0.08216983824968338, "learning_rate": 0.00098718942317931, "loss": 1.5608, "step": 7424 }, { "epoch": 0.33017651505046464, "grad_norm": 0.08794374763965607, "learning_rate": 0.0009871814795041941, "loss": 1.5622, "step": 7426 }, { "epoch": 0.330265439509137, "grad_norm": 0.09090631455183029, "learning_rate": 0.0009871735333989362, "loss": 1.5562, "step": 7428 }, { "epoch": 0.3303543639678094, "grad_norm": 0.08172343671321869, "learning_rate": 0.0009871655848635757, "loss": 1.5569, "step": 7430 }, { "epoch": 0.3304432884264817, "grad_norm": 0.08407451957464218, "learning_rate": 0.0009871576338981525, "loss": 1.5586, "step": 7432 }, { "epoch": 0.33053221288515405, "grad_norm": 0.08738183975219727, "learning_rate": 0.000987149680502706, "loss": 1.5631, "step": 7434 }, { "epoch": 0.3306211373438264, "grad_norm": 0.09222719818353653, "learning_rate": 0.000987141724677276, "loss": 1.5581, "step": 7436 }, { "epoch": 0.3307100618024988, "grad_norm": 0.08091361075639725, "learning_rate": 0.0009871337664219024, "loss": 1.5561, "step": 7438 }, { "epoch": 0.33079898626117116, "grad_norm": 0.09107191115617752, "learning_rate": 0.0009871258057366247, "loss": 1.5559, "step": 7440 }, { "epoch": 0.33088791071984347, "grad_norm": 0.08714121580123901, "learning_rate": 0.0009871178426214826, "loss": 1.5662, "step": 7442 }, { "epoch": 0.33097683517851584, "grad_norm": 0.0870809406042099, "learning_rate": 0.000987109877076516, "loss": 1.5584, "step": 7444 }, { "epoch": 0.3310657596371882, "grad_norm": 0.0869300365447998, "learning_rate": 0.0009871019091017641, "loss": 1.5596, "step": 7446 }, { "epoch": 0.3311546840958606, "grad_norm": 0.08757723122835159, "learning_rate": 0.0009870939386972674, "loss": 1.558, "step": 7448 }, { "epoch": 0.33124360855453294, "grad_norm": 0.08233436197042465, "learning_rate": 0.000987085965863065, "loss": 1.5573, "step": 7450 }, { "epoch": 0.33133253301320525, "grad_norm": 0.08261076360940933, "learning_rate": 0.0009870779905991971, "loss": 1.5639, "step": 7452 }, { "epoch": 0.3314214574718776, "grad_norm": 0.08498179167509079, "learning_rate": 0.0009870700129057032, "loss": 1.5584, "step": 7454 }, { "epoch": 0.33151038193055, "grad_norm": 0.08367268741130829, "learning_rate": 0.0009870620327826233, "loss": 1.5633, "step": 7456 }, { "epoch": 0.33159930638922236, "grad_norm": 0.08087131381034851, "learning_rate": 0.0009870540502299973, "loss": 1.5592, "step": 7458 }, { "epoch": 0.3316882308478947, "grad_norm": 0.08614420145750046, "learning_rate": 0.0009870460652478645, "loss": 1.5625, "step": 7460 }, { "epoch": 0.3317771553065671, "grad_norm": 0.08858582377433777, "learning_rate": 0.0009870380778362654, "loss": 1.5612, "step": 7462 }, { "epoch": 0.3318660797652394, "grad_norm": 0.08249551802873611, "learning_rate": 0.0009870300879952394, "loss": 1.5645, "step": 7464 }, { "epoch": 0.3319550042239118, "grad_norm": 0.08597932755947113, "learning_rate": 0.0009870220957248264, "loss": 1.5571, "step": 7466 }, { "epoch": 0.33204392868258414, "grad_norm": 0.08219864219427109, "learning_rate": 0.0009870141010250666, "loss": 1.5608, "step": 7468 }, { "epoch": 0.3321328531412565, "grad_norm": 0.08492123335599899, "learning_rate": 0.0009870061038959994, "loss": 1.5632, "step": 7470 }, { "epoch": 0.3322217775999289, "grad_norm": 0.08149886131286621, "learning_rate": 0.0009869981043376648, "loss": 1.5541, "step": 7472 }, { "epoch": 0.3323107020586012, "grad_norm": 0.08603468537330627, "learning_rate": 0.000986990102350103, "loss": 1.5528, "step": 7474 }, { "epoch": 0.33239962651727356, "grad_norm": 0.0839407667517662, "learning_rate": 0.0009869820979333539, "loss": 1.5573, "step": 7476 }, { "epoch": 0.33248855097594593, "grad_norm": 0.07948873937129974, "learning_rate": 0.0009869740910874569, "loss": 1.5646, "step": 7478 }, { "epoch": 0.3325774754346183, "grad_norm": 0.08331003040075302, "learning_rate": 0.0009869660818124526, "loss": 1.5538, "step": 7480 }, { "epoch": 0.33266639989329067, "grad_norm": 0.08100567013025284, "learning_rate": 0.0009869580701083805, "loss": 1.5628, "step": 7482 }, { "epoch": 0.33275532435196303, "grad_norm": 0.08268828690052032, "learning_rate": 0.0009869500559752807, "loss": 1.5591, "step": 7484 }, { "epoch": 0.33284424881063535, "grad_norm": 0.08128658682107925, "learning_rate": 0.0009869420394131932, "loss": 1.559, "step": 7486 }, { "epoch": 0.3329331732693077, "grad_norm": 0.07909125089645386, "learning_rate": 0.0009869340204221582, "loss": 1.5581, "step": 7488 }, { "epoch": 0.3330220977279801, "grad_norm": 0.08491683751344681, "learning_rate": 0.0009869259990022152, "loss": 1.5523, "step": 7490 }, { "epoch": 0.33311102218665245, "grad_norm": 0.08070467412471771, "learning_rate": 0.0009869179751534046, "loss": 1.5558, "step": 7492 }, { "epoch": 0.3331999466453248, "grad_norm": 0.08046550303697586, "learning_rate": 0.0009869099488757662, "loss": 1.5511, "step": 7494 }, { "epoch": 0.33328887110399713, "grad_norm": 0.08419077098369598, "learning_rate": 0.0009869019201693403, "loss": 1.5632, "step": 7496 }, { "epoch": 0.3333777955626695, "grad_norm": 0.07918644696474075, "learning_rate": 0.0009868938890341668, "loss": 1.5552, "step": 7498 }, { "epoch": 0.33346672002134187, "grad_norm": 0.08392981439828873, "learning_rate": 0.0009868858554702856, "loss": 1.5588, "step": 7500 }, { "epoch": 0.33346672002134187, "eval_loss": 1.5330219268798828, "eval_runtime": 12.4092, "eval_samples_per_second": 556.847, "eval_steps_per_second": 69.626, "step": 7500 }, { "epoch": 0.33355564448001424, "grad_norm": 0.09008117020130157, "learning_rate": 0.0009868778194777371, "loss": 1.5609, "step": 7502 }, { "epoch": 0.3336445689386866, "grad_norm": 0.08291777223348618, "learning_rate": 0.0009868697810565613, "loss": 1.5649, "step": 7504 }, { "epoch": 0.33373349339735897, "grad_norm": 0.08472921699285507, "learning_rate": 0.0009868617402067979, "loss": 1.5604, "step": 7506 }, { "epoch": 0.3338224178560313, "grad_norm": 0.08425983041524887, "learning_rate": 0.0009868536969284876, "loss": 1.5625, "step": 7508 }, { "epoch": 0.33391134231470365, "grad_norm": 0.08368197828531265, "learning_rate": 0.0009868456512216702, "loss": 1.5517, "step": 7510 }, { "epoch": 0.334000266773376, "grad_norm": 0.07804959267377853, "learning_rate": 0.0009868376030863857, "loss": 1.5606, "step": 7512 }, { "epoch": 0.3340891912320484, "grad_norm": 0.08549796044826508, "learning_rate": 0.0009868295525226746, "loss": 1.5547, "step": 7514 }, { "epoch": 0.33417811569072076, "grad_norm": 0.08111416548490524, "learning_rate": 0.0009868214995305768, "loss": 1.5567, "step": 7516 }, { "epoch": 0.33426704014939307, "grad_norm": 0.07972585409879684, "learning_rate": 0.0009868134441101326, "loss": 1.5515, "step": 7518 }, { "epoch": 0.33435596460806544, "grad_norm": 0.0834115669131279, "learning_rate": 0.0009868053862613821, "loss": 1.5563, "step": 7520 }, { "epoch": 0.3344448890667378, "grad_norm": 0.08134639263153076, "learning_rate": 0.0009867973259843656, "loss": 1.5539, "step": 7522 }, { "epoch": 0.3345338135254102, "grad_norm": 0.07953674346208572, "learning_rate": 0.0009867892632791232, "loss": 1.5566, "step": 7524 }, { "epoch": 0.33462273798408254, "grad_norm": 0.07886262983083725, "learning_rate": 0.000986781198145695, "loss": 1.5549, "step": 7526 }, { "epoch": 0.33471166244275485, "grad_norm": 0.08340603113174438, "learning_rate": 0.0009867731305841217, "loss": 1.5609, "step": 7528 }, { "epoch": 0.3348005869014272, "grad_norm": 0.08290226757526398, "learning_rate": 0.000986765060594443, "loss": 1.5561, "step": 7530 }, { "epoch": 0.3348895113600996, "grad_norm": 0.08421459048986435, "learning_rate": 0.0009867569881766994, "loss": 1.5549, "step": 7532 }, { "epoch": 0.33497843581877196, "grad_norm": 0.08067211508750916, "learning_rate": 0.0009867489133309312, "loss": 1.5613, "step": 7534 }, { "epoch": 0.3350673602774443, "grad_norm": 0.08332887291908264, "learning_rate": 0.0009867408360571786, "loss": 1.5533, "step": 7536 }, { "epoch": 0.3351562847361167, "grad_norm": 0.08278394490480423, "learning_rate": 0.0009867327563554819, "loss": 1.5563, "step": 7538 }, { "epoch": 0.335245209194789, "grad_norm": 0.08533370494842529, "learning_rate": 0.0009867246742258813, "loss": 1.558, "step": 7540 }, { "epoch": 0.3353341336534614, "grad_norm": 0.08526033163070679, "learning_rate": 0.0009867165896684173, "loss": 1.5586, "step": 7542 }, { "epoch": 0.33542305811213374, "grad_norm": 0.0807008370757103, "learning_rate": 0.0009867085026831304, "loss": 1.563, "step": 7544 }, { "epoch": 0.3355119825708061, "grad_norm": 0.08265620470046997, "learning_rate": 0.0009867004132700606, "loss": 1.5492, "step": 7546 }, { "epoch": 0.3356009070294785, "grad_norm": 0.08214646577835083, "learning_rate": 0.0009866923214292482, "loss": 1.5571, "step": 7548 }, { "epoch": 0.3356898314881508, "grad_norm": 0.0798763632774353, "learning_rate": 0.0009866842271607336, "loss": 1.5533, "step": 7550 }, { "epoch": 0.33577875594682316, "grad_norm": 0.07971591502428055, "learning_rate": 0.0009866761304645575, "loss": 1.5582, "step": 7552 }, { "epoch": 0.33586768040549553, "grad_norm": 0.08346015959978104, "learning_rate": 0.00098666803134076, "loss": 1.5546, "step": 7554 }, { "epoch": 0.3359566048641679, "grad_norm": 0.08312725275754929, "learning_rate": 0.0009866599297893817, "loss": 1.5637, "step": 7556 }, { "epoch": 0.33604552932284026, "grad_norm": 0.07956881821155548, "learning_rate": 0.0009866518258104628, "loss": 1.5593, "step": 7558 }, { "epoch": 0.33613445378151263, "grad_norm": 0.08073802292346954, "learning_rate": 0.0009866437194040439, "loss": 1.5635, "step": 7560 }, { "epoch": 0.33622337824018494, "grad_norm": 0.08115088939666748, "learning_rate": 0.0009866356105701652, "loss": 1.5571, "step": 7562 }, { "epoch": 0.3363123026988573, "grad_norm": 0.08693643659353256, "learning_rate": 0.0009866274993088674, "loss": 1.5552, "step": 7564 }, { "epoch": 0.3364012271575297, "grad_norm": 0.0820782408118248, "learning_rate": 0.0009866193856201907, "loss": 1.5554, "step": 7566 }, { "epoch": 0.33649015161620205, "grad_norm": 0.08132241666316986, "learning_rate": 0.000986611269504176, "loss": 1.5558, "step": 7568 }, { "epoch": 0.3365790760748744, "grad_norm": 0.08526284992694855, "learning_rate": 0.0009866031509608633, "loss": 1.5552, "step": 7570 }, { "epoch": 0.33666800053354673, "grad_norm": 0.08132081478834152, "learning_rate": 0.0009865950299902935, "loss": 1.5619, "step": 7572 }, { "epoch": 0.3367569249922191, "grad_norm": 0.08150200545787811, "learning_rate": 0.0009865869065925068, "loss": 1.5563, "step": 7574 }, { "epoch": 0.33684584945089147, "grad_norm": 0.0802740678191185, "learning_rate": 0.0009865787807675438, "loss": 1.5508, "step": 7576 }, { "epoch": 0.33693477390956383, "grad_norm": 0.08373408019542694, "learning_rate": 0.0009865706525154453, "loss": 1.5592, "step": 7578 }, { "epoch": 0.3370236983682362, "grad_norm": 0.08197604864835739, "learning_rate": 0.0009865625218362514, "loss": 1.5523, "step": 7580 }, { "epoch": 0.3371126228269085, "grad_norm": 0.08425270020961761, "learning_rate": 0.0009865543887300028, "loss": 1.557, "step": 7582 }, { "epoch": 0.3372015472855809, "grad_norm": 0.08232254534959793, "learning_rate": 0.00098654625319674, "loss": 1.5567, "step": 7584 }, { "epoch": 0.33729047174425325, "grad_norm": 0.08390845358371735, "learning_rate": 0.0009865381152365043, "loss": 1.558, "step": 7586 }, { "epoch": 0.3373793962029256, "grad_norm": 0.08453506231307983, "learning_rate": 0.0009865299748493352, "loss": 1.5561, "step": 7588 }, { "epoch": 0.337468320661598, "grad_norm": 0.08450014144182205, "learning_rate": 0.0009865218320352742, "loss": 1.5515, "step": 7590 }, { "epoch": 0.33755724512027035, "grad_norm": 0.083348348736763, "learning_rate": 0.0009865136867943615, "loss": 1.5583, "step": 7592 }, { "epoch": 0.33764616957894267, "grad_norm": 0.0832839161157608, "learning_rate": 0.0009865055391266374, "loss": 1.5618, "step": 7594 }, { "epoch": 0.33773509403761504, "grad_norm": 0.07885057479143143, "learning_rate": 0.0009864973890321431, "loss": 1.5528, "step": 7596 }, { "epoch": 0.3378240184962874, "grad_norm": 0.0822066068649292, "learning_rate": 0.0009864892365109193, "loss": 1.5585, "step": 7598 }, { "epoch": 0.33791294295495977, "grad_norm": 0.08152958005666733, "learning_rate": 0.0009864810815630061, "loss": 1.559, "step": 7600 }, { "epoch": 0.33800186741363214, "grad_norm": 0.08215297013521194, "learning_rate": 0.0009864729241884447, "loss": 1.5569, "step": 7602 }, { "epoch": 0.33809079187230445, "grad_norm": 0.08144047111272812, "learning_rate": 0.0009864647643872757, "loss": 1.558, "step": 7604 }, { "epoch": 0.3381797163309768, "grad_norm": 0.08268721401691437, "learning_rate": 0.0009864566021595396, "loss": 1.5643, "step": 7606 }, { "epoch": 0.3382686407896492, "grad_norm": 0.08536458760499954, "learning_rate": 0.000986448437505277, "loss": 1.5528, "step": 7608 }, { "epoch": 0.33835756524832156, "grad_norm": 0.07940022647380829, "learning_rate": 0.0009864402704245292, "loss": 1.5514, "step": 7610 }, { "epoch": 0.3384464897069939, "grad_norm": 0.0803755670785904, "learning_rate": 0.0009864321009173365, "loss": 1.5582, "step": 7612 }, { "epoch": 0.3385354141656663, "grad_norm": 0.08060748130083084, "learning_rate": 0.0009864239289837395, "loss": 1.559, "step": 7614 }, { "epoch": 0.3386243386243386, "grad_norm": 0.07905225455760956, "learning_rate": 0.0009864157546237795, "loss": 1.5599, "step": 7616 }, { "epoch": 0.338713263083011, "grad_norm": 0.08046825975179672, "learning_rate": 0.0009864075778374967, "loss": 1.5617, "step": 7618 }, { "epoch": 0.33880218754168334, "grad_norm": 0.08096635341644287, "learning_rate": 0.0009863993986249325, "loss": 1.5552, "step": 7620 }, { "epoch": 0.3388911120003557, "grad_norm": 0.08172665536403656, "learning_rate": 0.000986391216986127, "loss": 1.5593, "step": 7622 }, { "epoch": 0.3389800364590281, "grad_norm": 0.0854748860001564, "learning_rate": 0.0009863830329211217, "loss": 1.5591, "step": 7624 }, { "epoch": 0.3390689609177004, "grad_norm": 0.0805579125881195, "learning_rate": 0.0009863748464299569, "loss": 1.5578, "step": 7626 }, { "epoch": 0.33915788537637276, "grad_norm": 0.07932641357183456, "learning_rate": 0.0009863666575126737, "loss": 1.5491, "step": 7628 }, { "epoch": 0.3392468098350451, "grad_norm": 0.0846443846821785, "learning_rate": 0.000986358466169313, "loss": 1.5591, "step": 7630 }, { "epoch": 0.3393357342937175, "grad_norm": 0.08422157168388367, "learning_rate": 0.0009863502723999153, "loss": 1.5564, "step": 7632 }, { "epoch": 0.33942465875238986, "grad_norm": 0.08966944366693497, "learning_rate": 0.0009863420762045217, "loss": 1.5575, "step": 7634 }, { "epoch": 0.3395135832110622, "grad_norm": 0.08580625057220459, "learning_rate": 0.000986333877583173, "loss": 1.5568, "step": 7636 }, { "epoch": 0.33960250766973454, "grad_norm": 0.08479603379964828, "learning_rate": 0.0009863256765359108, "loss": 1.5607, "step": 7638 }, { "epoch": 0.3396914321284069, "grad_norm": 0.0841260626912117, "learning_rate": 0.000986317473062775, "loss": 1.5547, "step": 7640 }, { "epoch": 0.3397803565870793, "grad_norm": 0.08255398273468018, "learning_rate": 0.0009863092671638068, "loss": 1.5613, "step": 7642 }, { "epoch": 0.33986928104575165, "grad_norm": 0.0812351405620575, "learning_rate": 0.0009863010588390473, "loss": 1.5575, "step": 7644 }, { "epoch": 0.339958205504424, "grad_norm": 0.0809001475572586, "learning_rate": 0.0009862928480885375, "loss": 1.5592, "step": 7646 }, { "epoch": 0.34004712996309633, "grad_norm": 0.0820452943444252, "learning_rate": 0.0009862846349123183, "loss": 1.5551, "step": 7648 }, { "epoch": 0.3401360544217687, "grad_norm": 0.08290841430425644, "learning_rate": 0.0009862764193104303, "loss": 1.5607, "step": 7650 }, { "epoch": 0.34022497888044106, "grad_norm": 0.08015654981136322, "learning_rate": 0.0009862682012829152, "loss": 1.5555, "step": 7652 }, { "epoch": 0.34031390333911343, "grad_norm": 0.08518238365650177, "learning_rate": 0.0009862599808298134, "loss": 1.5524, "step": 7654 }, { "epoch": 0.3404028277977858, "grad_norm": 0.08613689988851547, "learning_rate": 0.000986251757951166, "loss": 1.556, "step": 7656 }, { "epoch": 0.3404917522564581, "grad_norm": 0.08661675453186035, "learning_rate": 0.0009862435326470143, "loss": 1.5564, "step": 7658 }, { "epoch": 0.3405806767151305, "grad_norm": 0.07869388163089752, "learning_rate": 0.000986235304917399, "loss": 1.5577, "step": 7660 }, { "epoch": 0.34066960117380285, "grad_norm": 0.08099169284105301, "learning_rate": 0.0009862270747623616, "loss": 1.5534, "step": 7662 }, { "epoch": 0.3407585256324752, "grad_norm": 0.08418874442577362, "learning_rate": 0.0009862188421819425, "loss": 1.5575, "step": 7664 }, { "epoch": 0.3408474500911476, "grad_norm": 0.08251459896564484, "learning_rate": 0.0009862106071761834, "loss": 1.5578, "step": 7666 }, { "epoch": 0.34093637454981995, "grad_norm": 0.08254079520702362, "learning_rate": 0.0009862023697451248, "loss": 1.5507, "step": 7668 }, { "epoch": 0.34102529900849227, "grad_norm": 0.08215059340000153, "learning_rate": 0.0009861941298888082, "loss": 1.5562, "step": 7670 }, { "epoch": 0.34111422346716463, "grad_norm": 0.08373280614614487, "learning_rate": 0.0009861858876072747, "loss": 1.5594, "step": 7672 }, { "epoch": 0.341203147925837, "grad_norm": 0.07917515188455582, "learning_rate": 0.000986177642900565, "loss": 1.5542, "step": 7674 }, { "epoch": 0.34129207238450937, "grad_norm": 0.08267196267843246, "learning_rate": 0.0009861693957687208, "loss": 1.5612, "step": 7676 }, { "epoch": 0.34138099684318174, "grad_norm": 0.08618869632482529, "learning_rate": 0.0009861611462117829, "loss": 1.5604, "step": 7678 }, { "epoch": 0.34146992130185405, "grad_norm": 0.08248917013406754, "learning_rate": 0.0009861528942297923, "loss": 1.5624, "step": 7680 }, { "epoch": 0.3415588457605264, "grad_norm": 0.08377496898174286, "learning_rate": 0.0009861446398227904, "loss": 1.5614, "step": 7682 }, { "epoch": 0.3416477702191988, "grad_norm": 0.08078130334615707, "learning_rate": 0.0009861363829908186, "loss": 1.5528, "step": 7684 }, { "epoch": 0.34173669467787116, "grad_norm": 0.07990232855081558, "learning_rate": 0.0009861281237339176, "loss": 1.5554, "step": 7686 }, { "epoch": 0.3418256191365435, "grad_norm": 0.07614683359861374, "learning_rate": 0.0009861198620521288, "loss": 1.5562, "step": 7688 }, { "epoch": 0.3419145435952159, "grad_norm": 0.08139005303382874, "learning_rate": 0.0009861115979454935, "loss": 1.5544, "step": 7690 }, { "epoch": 0.3420034680538882, "grad_norm": 0.08052778244018555, "learning_rate": 0.000986103331414053, "loss": 1.5592, "step": 7692 }, { "epoch": 0.3420923925125606, "grad_norm": 0.08632178604602814, "learning_rate": 0.0009860950624578481, "loss": 1.5574, "step": 7694 }, { "epoch": 0.34218131697123294, "grad_norm": 0.0841991975903511, "learning_rate": 0.0009860867910769207, "loss": 1.5576, "step": 7696 }, { "epoch": 0.3422702414299053, "grad_norm": 0.08402567356824875, "learning_rate": 0.0009860785172713115, "loss": 1.5525, "step": 7698 }, { "epoch": 0.3423591658885777, "grad_norm": 0.08191527426242828, "learning_rate": 0.000986070241041062, "loss": 1.5613, "step": 7700 }, { "epoch": 0.34244809034725, "grad_norm": 0.08203962445259094, "learning_rate": 0.0009860619623862136, "loss": 1.5565, "step": 7702 }, { "epoch": 0.34253701480592236, "grad_norm": 0.07740230858325958, "learning_rate": 0.0009860536813068072, "loss": 1.5574, "step": 7704 }, { "epoch": 0.3426259392645947, "grad_norm": 0.07994545996189117, "learning_rate": 0.0009860453978028844, "loss": 1.5573, "step": 7706 }, { "epoch": 0.3427148637232671, "grad_norm": 0.08103641122579575, "learning_rate": 0.0009860371118744866, "loss": 1.5566, "step": 7708 }, { "epoch": 0.34280378818193946, "grad_norm": 0.08051252365112305, "learning_rate": 0.0009860288235216549, "loss": 1.5603, "step": 7710 }, { "epoch": 0.3428927126406118, "grad_norm": 0.08033636957406998, "learning_rate": 0.0009860205327444308, "loss": 1.5545, "step": 7712 }, { "epoch": 0.34298163709928414, "grad_norm": 0.08459188044071198, "learning_rate": 0.0009860122395428555, "loss": 1.5595, "step": 7714 }, { "epoch": 0.3430705615579565, "grad_norm": 0.08079506456851959, "learning_rate": 0.0009860039439169705, "loss": 1.5565, "step": 7716 }, { "epoch": 0.3431594860166289, "grad_norm": 0.08086486160755157, "learning_rate": 0.0009859956458668174, "loss": 1.5585, "step": 7718 }, { "epoch": 0.34324841047530125, "grad_norm": 0.0799533799290657, "learning_rate": 0.000985987345392437, "loss": 1.5492, "step": 7720 }, { "epoch": 0.3433373349339736, "grad_norm": 0.07945334911346436, "learning_rate": 0.0009859790424938714, "loss": 1.5486, "step": 7722 }, { "epoch": 0.3434262593926459, "grad_norm": 0.07963025569915771, "learning_rate": 0.0009859707371711614, "loss": 1.5606, "step": 7724 }, { "epoch": 0.3435151838513183, "grad_norm": 0.08368024230003357, "learning_rate": 0.0009859624294243487, "loss": 1.5599, "step": 7726 }, { "epoch": 0.34360410830999066, "grad_norm": 0.07972510904073715, "learning_rate": 0.000985954119253475, "loss": 1.5461, "step": 7728 }, { "epoch": 0.34369303276866303, "grad_norm": 0.08038385957479477, "learning_rate": 0.000985945806658581, "loss": 1.5568, "step": 7730 }, { "epoch": 0.3437819572273354, "grad_norm": 0.08230438828468323, "learning_rate": 0.0009859374916397093, "loss": 1.5553, "step": 7732 }, { "epoch": 0.3438708816860077, "grad_norm": 0.08172792196273804, "learning_rate": 0.0009859291741969004, "loss": 1.5544, "step": 7734 }, { "epoch": 0.3439598061446801, "grad_norm": 0.08643286675214767, "learning_rate": 0.0009859208543301962, "loss": 1.5551, "step": 7736 }, { "epoch": 0.34404873060335245, "grad_norm": 0.08269612491130829, "learning_rate": 0.0009859125320396379, "loss": 1.5538, "step": 7738 }, { "epoch": 0.3441376550620248, "grad_norm": 0.08148770779371262, "learning_rate": 0.0009859042073252673, "loss": 1.5511, "step": 7740 }, { "epoch": 0.3442265795206972, "grad_norm": 0.0830964595079422, "learning_rate": 0.0009858958801871257, "loss": 1.559, "step": 7742 }, { "epoch": 0.34431550397936955, "grad_norm": 0.08528967946767807, "learning_rate": 0.0009858875506252552, "loss": 1.5578, "step": 7744 }, { "epoch": 0.34440442843804187, "grad_norm": 0.08497735857963562, "learning_rate": 0.0009858792186396967, "loss": 1.5565, "step": 7746 }, { "epoch": 0.34449335289671423, "grad_norm": 0.07968219369649887, "learning_rate": 0.0009858708842304922, "loss": 1.5606, "step": 7748 }, { "epoch": 0.3445822773553866, "grad_norm": 0.08317109197378159, "learning_rate": 0.0009858625473976828, "loss": 1.5534, "step": 7750 }, { "epoch": 0.34467120181405897, "grad_norm": 0.07973136752843857, "learning_rate": 0.0009858542081413106, "loss": 1.5536, "step": 7752 }, { "epoch": 0.34476012627273134, "grad_norm": 0.07634979486465454, "learning_rate": 0.0009858458664614169, "loss": 1.5492, "step": 7754 }, { "epoch": 0.34484905073140365, "grad_norm": 0.07837677747011185, "learning_rate": 0.0009858375223580434, "loss": 1.5548, "step": 7756 }, { "epoch": 0.344937975190076, "grad_norm": 0.07845926284790039, "learning_rate": 0.0009858291758312317, "loss": 1.5537, "step": 7758 }, { "epoch": 0.3450268996487484, "grad_norm": 0.08067924529314041, "learning_rate": 0.0009858208268810232, "loss": 1.5546, "step": 7760 }, { "epoch": 0.34511582410742075, "grad_norm": 0.07944106310606003, "learning_rate": 0.00098581247550746, "loss": 1.5523, "step": 7762 }, { "epoch": 0.3452047485660931, "grad_norm": 0.08254885673522949, "learning_rate": 0.0009858041217105835, "loss": 1.5549, "step": 7764 }, { "epoch": 0.34529367302476544, "grad_norm": 0.08257626742124557, "learning_rate": 0.0009857957654904352, "loss": 1.5568, "step": 7766 }, { "epoch": 0.3453825974834378, "grad_norm": 0.07757585495710373, "learning_rate": 0.0009857874068470575, "loss": 1.5542, "step": 7768 }, { "epoch": 0.34547152194211017, "grad_norm": 0.07840435206890106, "learning_rate": 0.000985779045780491, "loss": 1.5541, "step": 7770 }, { "epoch": 0.34556044640078254, "grad_norm": 0.08048916608095169, "learning_rate": 0.0009857706822907783, "loss": 1.5509, "step": 7772 }, { "epoch": 0.3456493708594549, "grad_norm": 0.08014550805091858, "learning_rate": 0.0009857623163779606, "loss": 1.5515, "step": 7774 }, { "epoch": 0.3457382953181273, "grad_norm": 0.08095970749855042, "learning_rate": 0.00098575394804208, "loss": 1.5545, "step": 7776 }, { "epoch": 0.3458272197767996, "grad_norm": 0.0833306685090065, "learning_rate": 0.000985745577283178, "loss": 1.5512, "step": 7778 }, { "epoch": 0.34591614423547196, "grad_norm": 0.08204789459705353, "learning_rate": 0.0009857372041012966, "loss": 1.5512, "step": 7780 }, { "epoch": 0.3460050686941443, "grad_norm": 0.08149214088916779, "learning_rate": 0.000985728828496477, "loss": 1.5549, "step": 7782 }, { "epoch": 0.3460939931528167, "grad_norm": 0.08480332046747208, "learning_rate": 0.0009857204504687617, "loss": 1.5529, "step": 7784 }, { "epoch": 0.34618291761148906, "grad_norm": 0.0808916836977005, "learning_rate": 0.0009857120700181921, "loss": 1.5539, "step": 7786 }, { "epoch": 0.3462718420701614, "grad_norm": 0.08434953540563583, "learning_rate": 0.0009857036871448101, "loss": 1.5492, "step": 7788 }, { "epoch": 0.34636076652883374, "grad_norm": 0.077853262424469, "learning_rate": 0.0009856953018486576, "loss": 1.5522, "step": 7790 }, { "epoch": 0.3464496909875061, "grad_norm": 0.08755781501531601, "learning_rate": 0.0009856869141297762, "loss": 1.5561, "step": 7792 }, { "epoch": 0.3465386154461785, "grad_norm": 0.0840546116232872, "learning_rate": 0.0009856785239882079, "loss": 1.5522, "step": 7794 }, { "epoch": 0.34662753990485085, "grad_norm": 0.08388349413871765, "learning_rate": 0.0009856701314239942, "loss": 1.5519, "step": 7796 }, { "epoch": 0.3467164643635232, "grad_norm": 0.07955602556467056, "learning_rate": 0.0009856617364371776, "loss": 1.5584, "step": 7798 }, { "epoch": 0.3468053888221955, "grad_norm": 0.08307410031557083, "learning_rate": 0.0009856533390277995, "loss": 1.5527, "step": 7800 }, { "epoch": 0.3468943132808679, "grad_norm": 0.08166033774614334, "learning_rate": 0.000985644939195902, "loss": 1.5575, "step": 7802 }, { "epoch": 0.34698323773954026, "grad_norm": 0.08017229288816452, "learning_rate": 0.0009856365369415269, "loss": 1.5536, "step": 7804 }, { "epoch": 0.34707216219821263, "grad_norm": 0.08459261059761047, "learning_rate": 0.000985628132264716, "loss": 1.5624, "step": 7806 }, { "epoch": 0.347161086656885, "grad_norm": 0.07887223362922668, "learning_rate": 0.0009856197251655117, "loss": 1.5504, "step": 7808 }, { "epoch": 0.3472500111155573, "grad_norm": 0.08019403368234634, "learning_rate": 0.0009856113156439554, "loss": 1.5524, "step": 7810 }, { "epoch": 0.3473389355742297, "grad_norm": 0.0818062275648117, "learning_rate": 0.0009856029037000893, "loss": 1.5556, "step": 7812 }, { "epoch": 0.34742786003290205, "grad_norm": 0.07944361120462418, "learning_rate": 0.0009855944893339553, "loss": 1.5543, "step": 7814 }, { "epoch": 0.3475167844915744, "grad_norm": 0.07994072139263153, "learning_rate": 0.0009855860725455953, "loss": 1.5454, "step": 7816 }, { "epoch": 0.3476057089502468, "grad_norm": 0.07912899553775787, "learning_rate": 0.0009855776533350513, "loss": 1.5526, "step": 7818 }, { "epoch": 0.3476946334089191, "grad_norm": 0.08150925487279892, "learning_rate": 0.0009855692317023654, "loss": 1.5507, "step": 7820 }, { "epoch": 0.34778355786759146, "grad_norm": 0.0804237350821495, "learning_rate": 0.0009855608076475798, "loss": 1.5562, "step": 7822 }, { "epoch": 0.34787248232626383, "grad_norm": 0.07889696210622787, "learning_rate": 0.000985552381170736, "loss": 1.5572, "step": 7824 }, { "epoch": 0.3479614067849362, "grad_norm": 0.07692207396030426, "learning_rate": 0.0009855439522718766, "loss": 1.5534, "step": 7826 }, { "epoch": 0.34805033124360857, "grad_norm": 0.07672964036464691, "learning_rate": 0.0009855355209510432, "loss": 1.553, "step": 7828 }, { "epoch": 0.34813925570228094, "grad_norm": 0.07981939613819122, "learning_rate": 0.000985527087208278, "loss": 1.5549, "step": 7830 }, { "epoch": 0.34822818016095325, "grad_norm": 0.07698217779397964, "learning_rate": 0.0009855186510436232, "loss": 1.5567, "step": 7832 }, { "epoch": 0.3483171046196256, "grad_norm": 0.07928860932588577, "learning_rate": 0.000985510212457121, "loss": 1.5513, "step": 7834 }, { "epoch": 0.348406029078298, "grad_norm": 0.08210493624210358, "learning_rate": 0.0009855017714488128, "loss": 1.5503, "step": 7836 }, { "epoch": 0.34849495353697035, "grad_norm": 0.08063147217035294, "learning_rate": 0.0009854933280187416, "loss": 1.5549, "step": 7838 }, { "epoch": 0.3485838779956427, "grad_norm": 0.07979092001914978, "learning_rate": 0.0009854848821669487, "loss": 1.5482, "step": 7840 }, { "epoch": 0.34867280245431503, "grad_norm": 0.0807296484708786, "learning_rate": 0.0009854764338934768, "loss": 1.5546, "step": 7842 }, { "epoch": 0.3487617269129874, "grad_norm": 0.082137331366539, "learning_rate": 0.0009854679831983678, "loss": 1.5572, "step": 7844 }, { "epoch": 0.34885065137165977, "grad_norm": 0.0817376971244812, "learning_rate": 0.000985459530081664, "loss": 1.5512, "step": 7846 }, { "epoch": 0.34893957583033214, "grad_norm": 0.08156748861074448, "learning_rate": 0.0009854510745434075, "loss": 1.5563, "step": 7848 }, { "epoch": 0.3490285002890045, "grad_norm": 0.08468926697969437, "learning_rate": 0.0009854426165836403, "loss": 1.5537, "step": 7850 }, { "epoch": 0.3491174247476769, "grad_norm": 0.0817328691482544, "learning_rate": 0.000985434156202405, "loss": 1.559, "step": 7852 }, { "epoch": 0.3492063492063492, "grad_norm": 0.07793274521827698, "learning_rate": 0.0009854256933997435, "loss": 1.5527, "step": 7854 }, { "epoch": 0.34929527366502156, "grad_norm": 0.07918530702590942, "learning_rate": 0.0009854172281756979, "loss": 1.554, "step": 7856 }, { "epoch": 0.3493841981236939, "grad_norm": 0.08301304280757904, "learning_rate": 0.0009854087605303105, "loss": 1.554, "step": 7858 }, { "epoch": 0.3494731225823663, "grad_norm": 0.07774464040994644, "learning_rate": 0.0009854002904636238, "loss": 1.5517, "step": 7860 }, { "epoch": 0.34956204704103866, "grad_norm": 0.08034802228212357, "learning_rate": 0.00098539181797568, "loss": 1.5544, "step": 7862 }, { "epoch": 0.34965097149971097, "grad_norm": 0.07757385820150375, "learning_rate": 0.0009853833430665212, "loss": 1.5548, "step": 7864 }, { "epoch": 0.34973989595838334, "grad_norm": 0.08288813382387161, "learning_rate": 0.0009853748657361896, "loss": 1.5527, "step": 7866 }, { "epoch": 0.3498288204170557, "grad_norm": 0.08583445101976395, "learning_rate": 0.0009853663859847276, "loss": 1.5544, "step": 7868 }, { "epoch": 0.3499177448757281, "grad_norm": 0.07990463078022003, "learning_rate": 0.0009853579038121775, "loss": 1.5537, "step": 7870 }, { "epoch": 0.35000666933440044, "grad_norm": 0.08092455565929413, "learning_rate": 0.0009853494192185817, "loss": 1.5494, "step": 7872 }, { "epoch": 0.3500955937930728, "grad_norm": 0.07774668186903, "learning_rate": 0.0009853409322039823, "loss": 1.5512, "step": 7874 }, { "epoch": 0.3501845182517451, "grad_norm": 0.08283083885908127, "learning_rate": 0.000985332442768422, "loss": 1.5538, "step": 7876 }, { "epoch": 0.3502734427104175, "grad_norm": 0.08365800231695175, "learning_rate": 0.0009853239509119427, "loss": 1.5525, "step": 7878 }, { "epoch": 0.35036236716908986, "grad_norm": 0.077533058822155, "learning_rate": 0.000985315456634587, "loss": 1.5596, "step": 7880 }, { "epoch": 0.35045129162776223, "grad_norm": 0.08021321892738342, "learning_rate": 0.0009853069599363973, "loss": 1.5468, "step": 7882 }, { "epoch": 0.3505402160864346, "grad_norm": 0.11996711045503616, "learning_rate": 0.0009852984608174158, "loss": 1.5533, "step": 7884 }, { "epoch": 0.3506291405451069, "grad_norm": 0.3232218623161316, "learning_rate": 0.0009852899592776852, "loss": 1.6783, "step": 7886 }, { "epoch": 0.3507180650037793, "grad_norm": 4.166322231292725, "learning_rate": 0.0009852814553172476, "loss": 2.4033, "step": 7888 }, { "epoch": 0.35080698946245165, "grad_norm": 105.5245132446289, "learning_rate": 0.0009852729489361457, "loss": 3.5532, "step": 7890 }, { "epoch": 0.350895913921124, "grad_norm": 5.243870258331299, "learning_rate": 0.0009852644401344218, "loss": 7.1315, "step": 7892 }, { "epoch": 0.3509848383797964, "grad_norm": 6.443427085876465, "learning_rate": 0.0009852559289121182, "loss": 7.392, "step": 7894 }, { "epoch": 0.3510737628384687, "grad_norm": 11.5606050491333, "learning_rate": 0.0009852474152692774, "loss": 7.5255, "step": 7896 }, { "epoch": 0.35116268729714106, "grad_norm": 2.048408031463623, "learning_rate": 0.000985238899205942, "loss": 7.2354, "step": 7898 }, { "epoch": 0.35125161175581343, "grad_norm": 0.9983798265457153, "learning_rate": 0.0009852303807221545, "loss": 6.5717, "step": 7900 }, { "epoch": 0.3513405362144858, "grad_norm": 6.880504608154297, "learning_rate": 0.0009852218598179572, "loss": 6.7333, "step": 7902 }, { "epoch": 0.35142946067315817, "grad_norm": 0.685417890548706, "learning_rate": 0.0009852133364933928, "loss": 6.3766, "step": 7904 }, { "epoch": 0.35151838513183054, "grad_norm": 1.0315533876419067, "learning_rate": 0.0009852048107485039, "loss": 6.1477, "step": 7906 }, { "epoch": 0.35160730959050285, "grad_norm": 0.4206119179725647, "learning_rate": 0.0009851962825833324, "loss": 6.0279, "step": 7908 }, { "epoch": 0.3516962340491752, "grad_norm": 0.34929385781288147, "learning_rate": 0.0009851877519979218, "loss": 5.9475, "step": 7910 }, { "epoch": 0.3517851585078476, "grad_norm": 0.33057817816734314, "learning_rate": 0.000985179218992314, "loss": 5.8579, "step": 7912 }, { "epoch": 0.35187408296651995, "grad_norm": 0.2894933819770813, "learning_rate": 0.0009851706835665515, "loss": 5.8023, "step": 7914 }, { "epoch": 0.3519630074251923, "grad_norm": 0.21461133658885956, "learning_rate": 0.0009851621457206774, "loss": 5.7603, "step": 7916 }, { "epoch": 0.35205193188386463, "grad_norm": 0.21909675002098083, "learning_rate": 0.0009851536054547338, "loss": 5.7207, "step": 7918 }, { "epoch": 0.352140856342537, "grad_norm": 0.3142562508583069, "learning_rate": 0.0009851450627687635, "loss": 5.6821, "step": 7920 }, { "epoch": 0.35222978080120937, "grad_norm": 1.3621059656143188, "learning_rate": 0.000985136517662809, "loss": 5.6821, "step": 7922 }, { "epoch": 0.35231870525988174, "grad_norm": 0.7668919563293457, "learning_rate": 0.0009851279701369134, "loss": 5.6241, "step": 7924 }, { "epoch": 0.3524076297185541, "grad_norm": 0.3618534207344055, "learning_rate": 0.0009851194201911187, "loss": 5.5482, "step": 7926 }, { "epoch": 0.3524965541772265, "grad_norm": 0.2870240807533264, "learning_rate": 0.0009851108678254677, "loss": 5.5032, "step": 7928 }, { "epoch": 0.3525854786358988, "grad_norm": 0.485879510641098, "learning_rate": 0.0009851023130400035, "loss": 5.4567, "step": 7930 }, { "epoch": 0.35267440309457115, "grad_norm": 0.2687022387981415, "learning_rate": 0.000985093755834768, "loss": 5.3988, "step": 7932 }, { "epoch": 0.3527633275532435, "grad_norm": 0.3306155204772949, "learning_rate": 0.0009850851962098046, "loss": 5.3285, "step": 7934 }, { "epoch": 0.3528522520119159, "grad_norm": 0.7935633063316345, "learning_rate": 0.0009850766341651556, "loss": 5.2442, "step": 7936 }, { "epoch": 0.35294117647058826, "grad_norm": 1.1152472496032715, "learning_rate": 0.000985068069700864, "loss": 5.2263, "step": 7938 }, { "epoch": 0.35303010092926057, "grad_norm": 1.460869550704956, "learning_rate": 0.0009850595028169722, "loss": 5.2365, "step": 7940 }, { "epoch": 0.35311902538793294, "grad_norm": 1.1066856384277344, "learning_rate": 0.0009850509335135231, "loss": 5.1598, "step": 7942 }, { "epoch": 0.3532079498466053, "grad_norm": 0.8395753502845764, "learning_rate": 0.0009850423617905595, "loss": 5.0372, "step": 7944 }, { "epoch": 0.3532968743052777, "grad_norm": 0.550400972366333, "learning_rate": 0.0009850337876481242, "loss": 4.9278, "step": 7946 }, { "epoch": 0.35338579876395004, "grad_norm": 0.7241742014884949, "learning_rate": 0.0009850252110862596, "loss": 4.8471, "step": 7948 }, { "epoch": 0.35347472322262236, "grad_norm": 1.1229575872421265, "learning_rate": 0.000985016632105009, "loss": 4.7441, "step": 7950 }, { "epoch": 0.3535636476812947, "grad_norm": 0.5880857110023499, "learning_rate": 0.0009850080507044147, "loss": 4.6106, "step": 7952 }, { "epoch": 0.3536525721399671, "grad_norm": 0.6695611476898193, "learning_rate": 0.0009849994668845196, "loss": 4.4359, "step": 7954 }, { "epoch": 0.35374149659863946, "grad_norm": 0.4951477646827698, "learning_rate": 0.000984990880645367, "loss": 4.3121, "step": 7956 }, { "epoch": 0.35383042105731183, "grad_norm": 1.49424147605896, "learning_rate": 0.0009849822919869994, "loss": 4.3655, "step": 7958 }, { "epoch": 0.3539193455159842, "grad_norm": 0.6731409430503845, "learning_rate": 0.0009849737009094595, "loss": 4.1954, "step": 7960 }, { "epoch": 0.3540082699746565, "grad_norm": 0.657111644744873, "learning_rate": 0.00098496510741279, "loss": 3.9788, "step": 7962 }, { "epoch": 0.3540971944333289, "grad_norm": 1.2278788089752197, "learning_rate": 0.0009849565114970346, "loss": 3.8825, "step": 7964 }, { "epoch": 0.35418611889200124, "grad_norm": 1.1738439798355103, "learning_rate": 0.0009849479131622352, "loss": 3.7363, "step": 7966 }, { "epoch": 0.3542750433506736, "grad_norm": 0.9582828283309937, "learning_rate": 0.000984939312408435, "loss": 3.5467, "step": 7968 }, { "epoch": 0.354363967809346, "grad_norm": 0.9599164724349976, "learning_rate": 0.0009849307092356773, "loss": 3.3789, "step": 7970 }, { "epoch": 0.3544528922680183, "grad_norm": 0.8798990249633789, "learning_rate": 0.0009849221036440049, "loss": 3.201, "step": 7972 }, { "epoch": 0.35454181672669066, "grad_norm": 0.837184488773346, "learning_rate": 0.0009849134956334603, "loss": 3.0279, "step": 7974 }, { "epoch": 0.35463074118536303, "grad_norm": 0.869229793548584, "learning_rate": 0.0009849048852040867, "loss": 2.9193, "step": 7976 }, { "epoch": 0.3547196656440354, "grad_norm": 1.0861220359802246, "learning_rate": 0.000984896272355927, "loss": 2.7289, "step": 7978 }, { "epoch": 0.35480859010270777, "grad_norm": 0.8610190749168396, "learning_rate": 0.000984887657089024, "loss": 2.5584, "step": 7980 }, { "epoch": 0.35489751456138013, "grad_norm": 0.6836897134780884, "learning_rate": 0.0009848790394034214, "loss": 2.4055, "step": 7982 }, { "epoch": 0.35498643902005245, "grad_norm": 0.7401456832885742, "learning_rate": 0.0009848704192991613, "loss": 2.2528, "step": 7984 }, { "epoch": 0.3550753634787248, "grad_norm": 0.9028481245040894, "learning_rate": 0.0009848617967762872, "loss": 2.3112, "step": 7986 }, { "epoch": 0.3551642879373972, "grad_norm": 0.3118695318698883, "learning_rate": 0.000984853171834842, "loss": 2.1296, "step": 7988 }, { "epoch": 0.35525321239606955, "grad_norm": 0.30334171652793884, "learning_rate": 0.0009848445444748685, "loss": 2.0048, "step": 7990 }, { "epoch": 0.3553421368547419, "grad_norm": 0.19824688136577606, "learning_rate": 0.0009848359146964103, "loss": 1.9316, "step": 7992 }, { "epoch": 0.35543106131341423, "grad_norm": 0.2128942757844925, "learning_rate": 0.0009848272824995096, "loss": 1.8792, "step": 7994 }, { "epoch": 0.3555199857720866, "grad_norm": 0.15974895656108856, "learning_rate": 0.0009848186478842104, "loss": 1.8306, "step": 7996 }, { "epoch": 0.35560891023075897, "grad_norm": 0.13069583475589752, "learning_rate": 0.000984810010850555, "loss": 1.7945, "step": 7998 }, { "epoch": 0.35569783468943134, "grad_norm": 0.13678595423698425, "learning_rate": 0.000984801371398587, "loss": 1.7696, "step": 8000 }, { "epoch": 0.35569783468943134, "eval_loss": 1.6807708740234375, "eval_runtime": 12.3592, "eval_samples_per_second": 559.097, "eval_steps_per_second": 69.907, "step": 8000 }, { "epoch": 0.3557867591481037, "grad_norm": 0.12552303075790405, "learning_rate": 0.0009847927295283491, "loss": 1.7432, "step": 8002 }, { "epoch": 0.355875683606776, "grad_norm": 0.12398079037666321, "learning_rate": 0.0009847840852398846, "loss": 1.7169, "step": 8004 }, { "epoch": 0.3559646080654484, "grad_norm": 0.11141873896121979, "learning_rate": 0.0009847754385332369, "loss": 1.6993, "step": 8006 }, { "epoch": 0.35605353252412075, "grad_norm": 0.1098860651254654, "learning_rate": 0.0009847667894084485, "loss": 1.6952, "step": 8008 }, { "epoch": 0.3561424569827931, "grad_norm": 0.10826461762189865, "learning_rate": 0.000984758137865563, "loss": 1.6817, "step": 8010 }, { "epoch": 0.3562313814414655, "grad_norm": 0.10351142287254333, "learning_rate": 0.0009847494839046234, "loss": 1.6737, "step": 8012 }, { "epoch": 0.35632030590013786, "grad_norm": 0.09795521944761276, "learning_rate": 0.000984740827525673, "loss": 1.6679, "step": 8014 }, { "epoch": 0.35640923035881017, "grad_norm": 0.0955873504281044, "learning_rate": 0.000984732168728755, "loss": 1.6526, "step": 8016 }, { "epoch": 0.35649815481748254, "grad_norm": 0.09694499522447586, "learning_rate": 0.0009847235075139123, "loss": 1.6522, "step": 8018 }, { "epoch": 0.3565870792761549, "grad_norm": 0.09016109257936478, "learning_rate": 0.0009847148438811883, "loss": 1.6439, "step": 8020 }, { "epoch": 0.3566760037348273, "grad_norm": 0.08890122175216675, "learning_rate": 0.0009847061778306262, "loss": 1.6345, "step": 8022 }, { "epoch": 0.35676492819349964, "grad_norm": 0.09018497169017792, "learning_rate": 0.0009846975093622692, "loss": 1.6403, "step": 8024 }, { "epoch": 0.35685385265217195, "grad_norm": 0.0861065536737442, "learning_rate": 0.0009846888384761607, "loss": 1.6304, "step": 8026 }, { "epoch": 0.3569427771108443, "grad_norm": 0.08618265390396118, "learning_rate": 0.0009846801651723436, "loss": 1.6258, "step": 8028 }, { "epoch": 0.3570317015695167, "grad_norm": 0.0936831384897232, "learning_rate": 0.0009846714894508617, "loss": 1.6244, "step": 8030 }, { "epoch": 0.35712062602818906, "grad_norm": 0.08654095232486725, "learning_rate": 0.0009846628113117577, "loss": 1.6206, "step": 8032 }, { "epoch": 0.3572095504868614, "grad_norm": 0.09224793314933777, "learning_rate": 0.0009846541307550753, "loss": 1.6189, "step": 8034 }, { "epoch": 0.3572984749455338, "grad_norm": 0.08937020599842072, "learning_rate": 0.0009846454477808575, "loss": 1.6198, "step": 8036 }, { "epoch": 0.3573873994042061, "grad_norm": 0.09124995023012161, "learning_rate": 0.0009846367623891478, "loss": 1.6108, "step": 8038 }, { "epoch": 0.3574763238628785, "grad_norm": 0.08880987763404846, "learning_rate": 0.0009846280745799895, "loss": 1.6126, "step": 8040 }, { "epoch": 0.35756524832155084, "grad_norm": 0.0875345841050148, "learning_rate": 0.000984619384353426, "loss": 1.6134, "step": 8042 }, { "epoch": 0.3576541727802232, "grad_norm": 0.08219029754400253, "learning_rate": 0.0009846106917095006, "loss": 1.6066, "step": 8044 }, { "epoch": 0.3577430972388956, "grad_norm": 0.08467015624046326, "learning_rate": 0.0009846019966482564, "loss": 1.6128, "step": 8046 }, { "epoch": 0.3578320216975679, "grad_norm": 0.08793745934963226, "learning_rate": 0.0009845932991697373, "loss": 1.6093, "step": 8048 }, { "epoch": 0.35792094615624026, "grad_norm": 0.08105975389480591, "learning_rate": 0.000984584599273986, "loss": 1.602, "step": 8050 }, { "epoch": 0.35800987061491263, "grad_norm": 0.08754699677228928, "learning_rate": 0.0009845758969610466, "loss": 1.596, "step": 8052 }, { "epoch": 0.358098795073585, "grad_norm": 0.08423270285129547, "learning_rate": 0.000984567192230962, "loss": 1.5994, "step": 8054 }, { "epoch": 0.35818771953225736, "grad_norm": 0.08522899448871613, "learning_rate": 0.000984558485083776, "loss": 1.6003, "step": 8056 }, { "epoch": 0.35827664399092973, "grad_norm": 0.08278319239616394, "learning_rate": 0.0009845497755195318, "loss": 1.6026, "step": 8058 }, { "epoch": 0.35836556844960205, "grad_norm": 0.08185625076293945, "learning_rate": 0.0009845410635382728, "loss": 1.5918, "step": 8060 }, { "epoch": 0.3584544929082744, "grad_norm": 0.08351411670446396, "learning_rate": 0.0009845323491400426, "loss": 1.6026, "step": 8062 }, { "epoch": 0.3585434173669468, "grad_norm": 0.08093686401844025, "learning_rate": 0.0009845236323248845, "loss": 1.6011, "step": 8064 }, { "epoch": 0.35863234182561915, "grad_norm": 0.08070489019155502, "learning_rate": 0.0009845149130928421, "loss": 1.5963, "step": 8066 }, { "epoch": 0.3587212662842915, "grad_norm": 0.08184243738651276, "learning_rate": 0.0009845061914439592, "loss": 1.5887, "step": 8068 }, { "epoch": 0.35881019074296383, "grad_norm": 0.08118681609630585, "learning_rate": 0.0009844974673782786, "loss": 1.5864, "step": 8070 }, { "epoch": 0.3588991152016362, "grad_norm": 0.08214083313941956, "learning_rate": 0.0009844887408958444, "loss": 1.5933, "step": 8072 }, { "epoch": 0.35898803966030857, "grad_norm": 0.07901991903781891, "learning_rate": 0.0009844800119967, "loss": 1.5918, "step": 8074 }, { "epoch": 0.35907696411898093, "grad_norm": 0.0798892080783844, "learning_rate": 0.000984471280680889, "loss": 1.5866, "step": 8076 }, { "epoch": 0.3591658885776533, "grad_norm": 0.08176968991756439, "learning_rate": 0.0009844625469484546, "loss": 1.5841, "step": 8078 }, { "epoch": 0.3592548130363256, "grad_norm": 0.0794360563158989, "learning_rate": 0.0009844538107994405, "loss": 1.589, "step": 8080 }, { "epoch": 0.359343737494998, "grad_norm": 0.08003082126379013, "learning_rate": 0.0009844450722338906, "loss": 1.5884, "step": 8082 }, { "epoch": 0.35943266195367035, "grad_norm": 0.08419719338417053, "learning_rate": 0.000984436331251848, "loss": 1.5887, "step": 8084 }, { "epoch": 0.3595215864123427, "grad_norm": 0.07977907359600067, "learning_rate": 0.0009844275878533569, "loss": 1.5834, "step": 8086 }, { "epoch": 0.3596105108710151, "grad_norm": 0.08345074206590652, "learning_rate": 0.0009844188420384604, "loss": 1.584, "step": 8088 }, { "epoch": 0.35969943532968746, "grad_norm": 0.08102136850357056, "learning_rate": 0.0009844100938072023, "loss": 1.5883, "step": 8090 }, { "epoch": 0.35978835978835977, "grad_norm": 0.08167117834091187, "learning_rate": 0.0009844013431596263, "loss": 1.5822, "step": 8092 }, { "epoch": 0.35987728424703214, "grad_norm": 0.0804857537150383, "learning_rate": 0.000984392590095776, "loss": 1.5835, "step": 8094 }, { "epoch": 0.3599662087057045, "grad_norm": 0.08060716092586517, "learning_rate": 0.000984383834615695, "loss": 1.5853, "step": 8096 }, { "epoch": 0.3600551331643769, "grad_norm": 0.08844839036464691, "learning_rate": 0.000984375076719427, "loss": 1.582, "step": 8098 }, { "epoch": 0.36014405762304924, "grad_norm": 0.07595451176166534, "learning_rate": 0.0009843663164070158, "loss": 1.5848, "step": 8100 }, { "epoch": 0.36023298208172155, "grad_norm": 0.08353257924318314, "learning_rate": 0.000984357553678505, "loss": 1.5796, "step": 8102 }, { "epoch": 0.3603219065403939, "grad_norm": 0.08055184781551361, "learning_rate": 0.0009843487885339384, "loss": 1.5814, "step": 8104 }, { "epoch": 0.3604108309990663, "grad_norm": 0.07868727296590805, "learning_rate": 0.0009843400209733593, "loss": 1.5861, "step": 8106 }, { "epoch": 0.36049975545773866, "grad_norm": 0.08025962114334106, "learning_rate": 0.000984331250996812, "loss": 1.5845, "step": 8108 }, { "epoch": 0.360588679916411, "grad_norm": 0.07819036394357681, "learning_rate": 0.0009843224786043401, "loss": 1.5807, "step": 8110 }, { "epoch": 0.3606776043750834, "grad_norm": 0.07715727388858795, "learning_rate": 0.0009843137037959873, "loss": 1.5842, "step": 8112 }, { "epoch": 0.3607665288337557, "grad_norm": 0.08035559952259064, "learning_rate": 0.0009843049265717975, "loss": 1.5746, "step": 8114 }, { "epoch": 0.3608554532924281, "grad_norm": 0.08042401075363159, "learning_rate": 0.000984296146931814, "loss": 1.5779, "step": 8116 }, { "epoch": 0.36094437775110044, "grad_norm": 0.0793127790093422, "learning_rate": 0.000984287364876081, "loss": 1.5806, "step": 8118 }, { "epoch": 0.3610333022097728, "grad_norm": 0.07924724370241165, "learning_rate": 0.0009842785804046424, "loss": 1.5812, "step": 8120 }, { "epoch": 0.3611222266684452, "grad_norm": 0.08291906118392944, "learning_rate": 0.0009842697935175417, "loss": 1.5734, "step": 8122 }, { "epoch": 0.3612111511271175, "grad_norm": 0.08052452653646469, "learning_rate": 0.0009842610042148229, "loss": 1.575, "step": 8124 }, { "epoch": 0.36130007558578986, "grad_norm": 0.07844741642475128, "learning_rate": 0.0009842522124965298, "loss": 1.5802, "step": 8126 }, { "epoch": 0.3613890000444622, "grad_norm": 0.08405445516109467, "learning_rate": 0.0009842434183627063, "loss": 1.571, "step": 8128 }, { "epoch": 0.3614779245031346, "grad_norm": 0.07999279350042343, "learning_rate": 0.0009842346218133964, "loss": 1.5703, "step": 8130 }, { "epoch": 0.36156684896180696, "grad_norm": 0.08422758430242538, "learning_rate": 0.0009842258228486436, "loss": 1.5774, "step": 8132 }, { "epoch": 0.3616557734204793, "grad_norm": 0.07723760604858398, "learning_rate": 0.0009842170214684923, "loss": 1.5756, "step": 8134 }, { "epoch": 0.36174469787915164, "grad_norm": 0.07770794630050659, "learning_rate": 0.0009842082176729858, "loss": 1.5674, "step": 8136 }, { "epoch": 0.361833622337824, "grad_norm": 0.07809870690107346, "learning_rate": 0.0009841994114621686, "loss": 1.5741, "step": 8138 }, { "epoch": 0.3619225467964964, "grad_norm": 0.08160432428121567, "learning_rate": 0.0009841906028360842, "loss": 1.5726, "step": 8140 }, { "epoch": 0.36201147125516875, "grad_norm": 0.08070631325244904, "learning_rate": 0.0009841817917947767, "loss": 1.5736, "step": 8142 }, { "epoch": 0.3621003957138411, "grad_norm": 0.07993360608816147, "learning_rate": 0.0009841729783382903, "loss": 1.5757, "step": 8144 }, { "epoch": 0.36218932017251343, "grad_norm": 0.07653278112411499, "learning_rate": 0.0009841641624666684, "loss": 1.5737, "step": 8146 }, { "epoch": 0.3622782446311858, "grad_norm": 0.07840976864099503, "learning_rate": 0.0009841553441799554, "loss": 1.5702, "step": 8148 }, { "epoch": 0.36236716908985817, "grad_norm": 0.07674574106931686, "learning_rate": 0.0009841465234781952, "loss": 1.5761, "step": 8150 }, { "epoch": 0.36245609354853053, "grad_norm": 0.08122869580984116, "learning_rate": 0.0009841377003614318, "loss": 1.5696, "step": 8152 }, { "epoch": 0.3625450180072029, "grad_norm": 0.07849126309156418, "learning_rate": 0.0009841288748297092, "loss": 1.5692, "step": 8154 }, { "epoch": 0.3626339424658752, "grad_norm": 0.0775504857301712, "learning_rate": 0.0009841200468830714, "loss": 1.5748, "step": 8156 }, { "epoch": 0.3627228669245476, "grad_norm": 0.07808183133602142, "learning_rate": 0.0009841112165215624, "loss": 1.5702, "step": 8158 }, { "epoch": 0.36281179138321995, "grad_norm": 0.0813973993062973, "learning_rate": 0.0009841023837452263, "loss": 1.5721, "step": 8160 }, { "epoch": 0.3629007158418923, "grad_norm": 0.07763282209634781, "learning_rate": 0.0009840935485541072, "loss": 1.5666, "step": 8162 }, { "epoch": 0.3629896403005647, "grad_norm": 0.07707773894071579, "learning_rate": 0.000984084710948249, "loss": 1.5699, "step": 8164 }, { "epoch": 0.36307856475923705, "grad_norm": 0.07617053389549255, "learning_rate": 0.0009840758709276961, "loss": 1.5731, "step": 8166 }, { "epoch": 0.36316748921790937, "grad_norm": 0.07609335333108902, "learning_rate": 0.0009840670284924921, "loss": 1.5734, "step": 8168 }, { "epoch": 0.36325641367658174, "grad_norm": 0.07503112405538559, "learning_rate": 0.0009840581836426817, "loss": 1.5689, "step": 8170 }, { "epoch": 0.3633453381352541, "grad_norm": 0.07643923163414001, "learning_rate": 0.0009840493363783085, "loss": 1.5717, "step": 8172 }, { "epoch": 0.36343426259392647, "grad_norm": 0.07574044167995453, "learning_rate": 0.0009840404866994171, "loss": 1.569, "step": 8174 }, { "epoch": 0.36352318705259884, "grad_norm": 0.07417095452547073, "learning_rate": 0.0009840316346060513, "loss": 1.569, "step": 8176 }, { "epoch": 0.36361211151127115, "grad_norm": 0.07547099888324738, "learning_rate": 0.0009840227800982553, "loss": 1.5751, "step": 8178 }, { "epoch": 0.3637010359699435, "grad_norm": 0.07645308971405029, "learning_rate": 0.0009840139231760735, "loss": 1.5648, "step": 8180 }, { "epoch": 0.3637899604286159, "grad_norm": 0.07473346590995789, "learning_rate": 0.0009840050638395498, "loss": 1.5661, "step": 8182 }, { "epoch": 0.36387888488728826, "grad_norm": 0.07807636260986328, "learning_rate": 0.0009839962020887286, "loss": 1.5724, "step": 8184 }, { "epoch": 0.3639678093459606, "grad_norm": 0.07300268113613129, "learning_rate": 0.0009839873379236537, "loss": 1.571, "step": 8186 }, { "epoch": 0.36405673380463294, "grad_norm": 0.07703215628862381, "learning_rate": 0.00098397847134437, "loss": 1.5722, "step": 8188 }, { "epoch": 0.3641456582633053, "grad_norm": 0.07669734954833984, "learning_rate": 0.000983969602350921, "loss": 1.5676, "step": 8190 }, { "epoch": 0.3642345827219777, "grad_norm": 0.07603123784065247, "learning_rate": 0.0009839607309433515, "loss": 1.5688, "step": 8192 }, { "epoch": 0.36432350718065004, "grad_norm": 0.07759499549865723, "learning_rate": 0.0009839518571217055, "loss": 1.5643, "step": 8194 }, { "epoch": 0.3644124316393224, "grad_norm": 0.07855424284934998, "learning_rate": 0.0009839429808860274, "loss": 1.5641, "step": 8196 }, { "epoch": 0.3645013560979948, "grad_norm": 0.07806675881147385, "learning_rate": 0.0009839341022363612, "loss": 1.572, "step": 8198 }, { "epoch": 0.3645902805566671, "grad_norm": 0.08267048001289368, "learning_rate": 0.0009839252211727514, "loss": 1.5711, "step": 8200 }, { "epoch": 0.36467920501533946, "grad_norm": 0.07525893300771713, "learning_rate": 0.0009839163376952425, "loss": 1.5687, "step": 8202 }, { "epoch": 0.3647681294740118, "grad_norm": 0.07745590060949326, "learning_rate": 0.0009839074518038784, "loss": 1.5629, "step": 8204 }, { "epoch": 0.3648570539326842, "grad_norm": 0.07969274371862411, "learning_rate": 0.0009838985634987036, "loss": 1.5702, "step": 8206 }, { "epoch": 0.36494597839135656, "grad_norm": 0.0816626101732254, "learning_rate": 0.0009838896727797624, "loss": 1.5599, "step": 8208 }, { "epoch": 0.3650349028500289, "grad_norm": 0.07653602957725525, "learning_rate": 0.0009838807796470992, "loss": 1.5618, "step": 8210 }, { "epoch": 0.36512382730870124, "grad_norm": 0.08516646176576614, "learning_rate": 0.0009838718841007586, "loss": 1.5642, "step": 8212 }, { "epoch": 0.3652127517673736, "grad_norm": 0.07569476962089539, "learning_rate": 0.0009838629861407843, "loss": 1.5597, "step": 8214 }, { "epoch": 0.365301676226046, "grad_norm": 0.07699557393789291, "learning_rate": 0.0009838540857672214, "loss": 1.5649, "step": 8216 }, { "epoch": 0.36539060068471835, "grad_norm": 0.07943487912416458, "learning_rate": 0.000983845182980114, "loss": 1.5606, "step": 8218 }, { "epoch": 0.3654795251433907, "grad_norm": 0.07496616989374161, "learning_rate": 0.0009838362777795065, "loss": 1.5649, "step": 8220 }, { "epoch": 0.36556844960206303, "grad_norm": 0.07536393404006958, "learning_rate": 0.0009838273701654433, "loss": 1.5696, "step": 8222 }, { "epoch": 0.3656573740607354, "grad_norm": 0.07574569433927536, "learning_rate": 0.0009838184601379688, "loss": 1.5623, "step": 8224 }, { "epoch": 0.36574629851940776, "grad_norm": 0.07682286202907562, "learning_rate": 0.0009838095476971274, "loss": 1.5676, "step": 8226 }, { "epoch": 0.36583522297808013, "grad_norm": 0.08097031712532043, "learning_rate": 0.000983800632842964, "loss": 1.5641, "step": 8228 }, { "epoch": 0.3659241474367525, "grad_norm": 0.07592246681451797, "learning_rate": 0.0009837917155755226, "loss": 1.5562, "step": 8230 }, { "epoch": 0.3660130718954248, "grad_norm": 0.07768161594867706, "learning_rate": 0.0009837827958948477, "loss": 1.5638, "step": 8232 }, { "epoch": 0.3661019963540972, "grad_norm": 0.07560420036315918, "learning_rate": 0.0009837738738009841, "loss": 1.5625, "step": 8234 }, { "epoch": 0.36619092081276955, "grad_norm": 0.07644996047019958, "learning_rate": 0.000983764949293976, "loss": 1.5602, "step": 8236 }, { "epoch": 0.3662798452714419, "grad_norm": 0.07451751828193665, "learning_rate": 0.0009837560223738679, "loss": 1.5608, "step": 8238 }, { "epoch": 0.3663687697301143, "grad_norm": 0.0761575847864151, "learning_rate": 0.0009837470930407046, "loss": 1.5542, "step": 8240 }, { "epoch": 0.36645769418878665, "grad_norm": 0.0770341232419014, "learning_rate": 0.0009837381612945305, "loss": 1.5584, "step": 8242 }, { "epoch": 0.36654661864745897, "grad_norm": 0.07663694024085999, "learning_rate": 0.0009837292271353902, "loss": 1.5646, "step": 8244 }, { "epoch": 0.36663554310613133, "grad_norm": 0.0755605474114418, "learning_rate": 0.0009837202905633282, "loss": 1.5621, "step": 8246 }, { "epoch": 0.3667244675648037, "grad_norm": 0.07471001893281937, "learning_rate": 0.0009837113515783892, "loss": 1.5641, "step": 8248 }, { "epoch": 0.36681339202347607, "grad_norm": 0.07854770869016647, "learning_rate": 0.0009837024101806173, "loss": 1.5628, "step": 8250 }, { "epoch": 0.36690231648214844, "grad_norm": 0.07697580009698868, "learning_rate": 0.000983693466370058, "loss": 1.561, "step": 8252 }, { "epoch": 0.36699124094082075, "grad_norm": 0.07174623012542725, "learning_rate": 0.000983684520146755, "loss": 1.5571, "step": 8254 }, { "epoch": 0.3670801653994931, "grad_norm": 0.0765925943851471, "learning_rate": 0.0009836755715107535, "loss": 1.5634, "step": 8256 }, { "epoch": 0.3671690898581655, "grad_norm": 0.07561610639095306, "learning_rate": 0.000983666620462098, "loss": 1.5588, "step": 8258 }, { "epoch": 0.36725801431683786, "grad_norm": 0.07434527575969696, "learning_rate": 0.000983657667000833, "loss": 1.5617, "step": 8260 }, { "epoch": 0.3673469387755102, "grad_norm": 0.07405678927898407, "learning_rate": 0.0009836487111270034, "loss": 1.5646, "step": 8262 }, { "epoch": 0.36743586323418254, "grad_norm": 0.07639020681381226, "learning_rate": 0.0009836397528406534, "loss": 1.5586, "step": 8264 }, { "epoch": 0.3675247876928549, "grad_norm": 0.07487154006958008, "learning_rate": 0.0009836307921418284, "loss": 1.5547, "step": 8266 }, { "epoch": 0.36761371215152727, "grad_norm": 0.07496780157089233, "learning_rate": 0.0009836218290305728, "loss": 1.5611, "step": 8268 }, { "epoch": 0.36770263661019964, "grad_norm": 0.07398813217878342, "learning_rate": 0.000983612863506931, "loss": 1.558, "step": 8270 }, { "epoch": 0.367791561068872, "grad_norm": 0.07573290914297104, "learning_rate": 0.000983603895570948, "loss": 1.5595, "step": 8272 }, { "epoch": 0.3678804855275444, "grad_norm": 0.07165679335594177, "learning_rate": 0.0009835949252226686, "loss": 1.5649, "step": 8274 }, { "epoch": 0.3679694099862167, "grad_norm": 0.07558693736791611, "learning_rate": 0.0009835859524621373, "loss": 1.5568, "step": 8276 }, { "epoch": 0.36805833444488906, "grad_norm": 0.0720970556139946, "learning_rate": 0.000983576977289399, "loss": 1.5583, "step": 8278 }, { "epoch": 0.3681472589035614, "grad_norm": 0.07370097190141678, "learning_rate": 0.0009835679997044985, "loss": 1.5612, "step": 8280 }, { "epoch": 0.3682361833622338, "grad_norm": 0.07401144504547119, "learning_rate": 0.0009835590197074806, "loss": 1.5596, "step": 8282 }, { "epoch": 0.36832510782090616, "grad_norm": 0.07251115888357162, "learning_rate": 0.0009835500372983902, "loss": 1.5569, "step": 8284 }, { "epoch": 0.3684140322795785, "grad_norm": 0.07301607728004456, "learning_rate": 0.0009835410524772718, "loss": 1.5591, "step": 8286 }, { "epoch": 0.36850295673825084, "grad_norm": 0.07856374233961105, "learning_rate": 0.0009835320652441702, "loss": 1.5658, "step": 8288 }, { "epoch": 0.3685918811969232, "grad_norm": 0.07428453117609024, "learning_rate": 0.0009835230755991305, "loss": 1.5631, "step": 8290 }, { "epoch": 0.3686808056555956, "grad_norm": 0.07632127404212952, "learning_rate": 0.0009835140835421977, "loss": 1.5659, "step": 8292 }, { "epoch": 0.36876973011426795, "grad_norm": 0.07355497777462006, "learning_rate": 0.0009835050890734161, "loss": 1.5582, "step": 8294 }, { "epoch": 0.3688586545729403, "grad_norm": 0.07494645565748215, "learning_rate": 0.0009834960921928308, "loss": 1.5584, "step": 8296 }, { "epoch": 0.3689475790316126, "grad_norm": 0.07737577706575394, "learning_rate": 0.000983487092900487, "loss": 1.5562, "step": 8298 }, { "epoch": 0.369036503490285, "grad_norm": 0.07639828324317932, "learning_rate": 0.000983478091196429, "loss": 1.5646, "step": 8300 }, { "epoch": 0.36912542794895736, "grad_norm": 0.07329989224672318, "learning_rate": 0.0009834690870807023, "loss": 1.5579, "step": 8302 }, { "epoch": 0.36921435240762973, "grad_norm": 0.07461026310920715, "learning_rate": 0.0009834600805533516, "loss": 1.5522, "step": 8304 }, { "epoch": 0.3693032768663021, "grad_norm": 0.07623513787984848, "learning_rate": 0.0009834510716144214, "loss": 1.5643, "step": 8306 }, { "epoch": 0.3693922013249744, "grad_norm": 0.07590184360742569, "learning_rate": 0.0009834420602639574, "loss": 1.5595, "step": 8308 }, { "epoch": 0.3694811257836468, "grad_norm": 0.07395032793283463, "learning_rate": 0.0009834330465020038, "loss": 1.5598, "step": 8310 }, { "epoch": 0.36957005024231915, "grad_norm": 0.07140004634857178, "learning_rate": 0.0009834240303286063, "loss": 1.5628, "step": 8312 }, { "epoch": 0.3696589747009915, "grad_norm": 0.0748644694685936, "learning_rate": 0.0009834150117438091, "loss": 1.5601, "step": 8314 }, { "epoch": 0.3697478991596639, "grad_norm": 0.07483812421560287, "learning_rate": 0.0009834059907476579, "loss": 1.5533, "step": 8316 }, { "epoch": 0.3698368236183362, "grad_norm": 0.07460799068212509, "learning_rate": 0.0009833969673401972, "loss": 1.5583, "step": 8318 }, { "epoch": 0.36992574807700856, "grad_norm": 0.0767635703086853, "learning_rate": 0.0009833879415214722, "loss": 1.5656, "step": 8320 }, { "epoch": 0.37001467253568093, "grad_norm": 0.07315431535243988, "learning_rate": 0.0009833789132915282, "loss": 1.5562, "step": 8322 }, { "epoch": 0.3701035969943533, "grad_norm": 0.07698900997638702, "learning_rate": 0.0009833698826504096, "loss": 1.5598, "step": 8324 }, { "epoch": 0.37019252145302567, "grad_norm": 0.07581165432929993, "learning_rate": 0.0009833608495981618, "loss": 1.5574, "step": 8326 }, { "epoch": 0.37028144591169804, "grad_norm": 0.07427161186933517, "learning_rate": 0.0009833518141348298, "loss": 1.5555, "step": 8328 }, { "epoch": 0.37037037037037035, "grad_norm": 0.07862924039363861, "learning_rate": 0.000983342776260459, "loss": 1.5601, "step": 8330 }, { "epoch": 0.3704592948290427, "grad_norm": 0.07485569268465042, "learning_rate": 0.000983333735975094, "loss": 1.5526, "step": 8332 }, { "epoch": 0.3705482192877151, "grad_norm": 0.07545112073421478, "learning_rate": 0.0009833246932787801, "loss": 1.5548, "step": 8334 }, { "epoch": 0.37063714374638745, "grad_norm": 0.07797663658857346, "learning_rate": 0.0009833156481715624, "loss": 1.5603, "step": 8336 }, { "epoch": 0.3707260682050598, "grad_norm": 0.07360795885324478, "learning_rate": 0.0009833066006534861, "loss": 1.5535, "step": 8338 }, { "epoch": 0.37081499266373213, "grad_norm": 0.07508852332830429, "learning_rate": 0.0009832975507245963, "loss": 1.562, "step": 8340 }, { "epoch": 0.3709039171224045, "grad_norm": 0.07691474258899689, "learning_rate": 0.000983288498384938, "loss": 1.5559, "step": 8342 }, { "epoch": 0.37099284158107687, "grad_norm": 0.07170414924621582, "learning_rate": 0.0009832794436345565, "loss": 1.5595, "step": 8344 }, { "epoch": 0.37108176603974924, "grad_norm": 0.07313521206378937, "learning_rate": 0.000983270386473497, "loss": 1.5568, "step": 8346 }, { "epoch": 0.3711706904984216, "grad_norm": 0.07443317025899887, "learning_rate": 0.0009832613269018043, "loss": 1.5551, "step": 8348 }, { "epoch": 0.371259614957094, "grad_norm": 0.0727194994688034, "learning_rate": 0.000983252264919524, "loss": 1.5558, "step": 8350 }, { "epoch": 0.3713485394157663, "grad_norm": 0.07491002231836319, "learning_rate": 0.0009832432005267013, "loss": 1.5553, "step": 8352 }, { "epoch": 0.37143746387443866, "grad_norm": 0.07130132615566254, "learning_rate": 0.0009832341337233813, "loss": 1.5515, "step": 8354 }, { "epoch": 0.371526388333111, "grad_norm": 0.0775056853890419, "learning_rate": 0.0009832250645096091, "loss": 1.5612, "step": 8356 }, { "epoch": 0.3716153127917834, "grad_norm": 0.0759596973657608, "learning_rate": 0.00098321599288543, "loss": 1.554, "step": 8358 }, { "epoch": 0.37170423725045576, "grad_norm": 0.0767047107219696, "learning_rate": 0.0009832069188508896, "loss": 1.5613, "step": 8360 }, { "epoch": 0.3717931617091281, "grad_norm": 0.07511959224939346, "learning_rate": 0.0009831978424060328, "loss": 1.5553, "step": 8362 }, { "epoch": 0.37188208616780044, "grad_norm": 0.07674729824066162, "learning_rate": 0.0009831887635509049, "loss": 1.5505, "step": 8364 }, { "epoch": 0.3719710106264728, "grad_norm": 0.07360707968473434, "learning_rate": 0.0009831796822855512, "loss": 1.556, "step": 8366 }, { "epoch": 0.3720599350851452, "grad_norm": 0.0746261477470398, "learning_rate": 0.000983170598610017, "loss": 1.5553, "step": 8368 }, { "epoch": 0.37214885954381755, "grad_norm": 0.07465831935405731, "learning_rate": 0.0009831615125243478, "loss": 1.55, "step": 8370 }, { "epoch": 0.37223778400248986, "grad_norm": 0.07530109584331512, "learning_rate": 0.0009831524240285887, "loss": 1.5556, "step": 8372 }, { "epoch": 0.3723267084611622, "grad_norm": 0.07392653822898865, "learning_rate": 0.0009831433331227851, "loss": 1.5582, "step": 8374 }, { "epoch": 0.3724156329198346, "grad_norm": 0.0778929591178894, "learning_rate": 0.0009831342398069823, "loss": 1.5604, "step": 8376 }, { "epoch": 0.37250455737850696, "grad_norm": 0.07217849791049957, "learning_rate": 0.000983125144081226, "loss": 1.5528, "step": 8378 }, { "epoch": 0.37259348183717933, "grad_norm": 0.07852315157651901, "learning_rate": 0.000983116045945561, "loss": 1.56, "step": 8380 }, { "epoch": 0.3726824062958517, "grad_norm": 0.07284464687108994, "learning_rate": 0.000983106945400033, "loss": 1.5594, "step": 8382 }, { "epoch": 0.372771330754524, "grad_norm": 0.07296974211931229, "learning_rate": 0.0009830978424446874, "loss": 1.5558, "step": 8384 }, { "epoch": 0.3728602552131964, "grad_norm": 0.07269974797964096, "learning_rate": 0.0009830887370795695, "loss": 1.5563, "step": 8386 }, { "epoch": 0.37294917967186875, "grad_norm": 0.07310895621776581, "learning_rate": 0.0009830796293047249, "loss": 1.559, "step": 8388 }, { "epoch": 0.3730381041305411, "grad_norm": 0.07368705421686172, "learning_rate": 0.000983070519120199, "loss": 1.5569, "step": 8390 }, { "epoch": 0.3731270285892135, "grad_norm": 0.07440350204706192, "learning_rate": 0.000983061406526037, "loss": 1.5542, "step": 8392 }, { "epoch": 0.3732159530478858, "grad_norm": 0.07380738109350204, "learning_rate": 0.0009830522915222845, "loss": 1.5558, "step": 8394 }, { "epoch": 0.37330487750655816, "grad_norm": 0.0736425444483757, "learning_rate": 0.000983043174108987, "loss": 1.5493, "step": 8396 }, { "epoch": 0.37339380196523053, "grad_norm": 0.07320426404476166, "learning_rate": 0.0009830340542861902, "loss": 1.5516, "step": 8398 }, { "epoch": 0.3734827264239029, "grad_norm": 0.07398249953985214, "learning_rate": 0.0009830249320539391, "loss": 1.5563, "step": 8400 }, { "epoch": 0.37357165088257527, "grad_norm": 0.0731641873717308, "learning_rate": 0.0009830158074122795, "loss": 1.5547, "step": 8402 }, { "epoch": 0.37366057534124764, "grad_norm": 0.07476949691772461, "learning_rate": 0.000983006680361257, "loss": 1.5563, "step": 8404 }, { "epoch": 0.37374949979991995, "grad_norm": 0.07096338272094727, "learning_rate": 0.000982997550900917, "loss": 1.5579, "step": 8406 }, { "epoch": 0.3738384242585923, "grad_norm": 0.0731477290391922, "learning_rate": 0.0009829884190313048, "loss": 1.5571, "step": 8408 }, { "epoch": 0.3739273487172647, "grad_norm": 0.07443249970674515, "learning_rate": 0.0009829792847524663, "loss": 1.5524, "step": 8410 }, { "epoch": 0.37401627317593705, "grad_norm": 0.07522628456354141, "learning_rate": 0.000982970148064447, "loss": 1.5515, "step": 8412 }, { "epoch": 0.3741051976346094, "grad_norm": 0.08042244613170624, "learning_rate": 0.0009829610089672925, "loss": 1.552, "step": 8414 }, { "epoch": 0.37419412209328173, "grad_norm": 0.0764075294137001, "learning_rate": 0.0009829518674610482, "loss": 1.5509, "step": 8416 }, { "epoch": 0.3742830465519541, "grad_norm": 0.07679541409015656, "learning_rate": 0.0009829427235457599, "loss": 1.5558, "step": 8418 }, { "epoch": 0.37437197101062647, "grad_norm": 0.07601416856050491, "learning_rate": 0.0009829335772214732, "loss": 1.5551, "step": 8420 }, { "epoch": 0.37446089546929884, "grad_norm": 0.07198473066091537, "learning_rate": 0.0009829244284882336, "loss": 1.5515, "step": 8422 }, { "epoch": 0.3745498199279712, "grad_norm": 0.0754043385386467, "learning_rate": 0.0009829152773460869, "loss": 1.5522, "step": 8424 }, { "epoch": 0.3746387443866436, "grad_norm": 0.07835698872804642, "learning_rate": 0.0009829061237950783, "loss": 1.5558, "step": 8426 }, { "epoch": 0.3747276688453159, "grad_norm": 0.07373791188001633, "learning_rate": 0.000982896967835254, "loss": 1.5485, "step": 8428 }, { "epoch": 0.37481659330398825, "grad_norm": 0.07449112087488174, "learning_rate": 0.0009828878094666595, "loss": 1.5524, "step": 8430 }, { "epoch": 0.3749055177626606, "grad_norm": 0.07234968990087509, "learning_rate": 0.0009828786486893405, "loss": 1.5579, "step": 8432 }, { "epoch": 0.374994442221333, "grad_norm": 0.07672835886478424, "learning_rate": 0.0009828694855033425, "loss": 1.5522, "step": 8434 }, { "epoch": 0.37508336668000536, "grad_norm": 0.07505165040493011, "learning_rate": 0.0009828603199087114, "loss": 1.5515, "step": 8436 }, { "epoch": 0.37517229113867767, "grad_norm": 0.07347539067268372, "learning_rate": 0.0009828511519054928, "loss": 1.552, "step": 8438 }, { "epoch": 0.37526121559735004, "grad_norm": 0.07479852437973022, "learning_rate": 0.0009828419814937326, "loss": 1.5485, "step": 8440 }, { "epoch": 0.3753501400560224, "grad_norm": 0.0721561387181282, "learning_rate": 0.0009828328086734766, "loss": 1.5542, "step": 8442 }, { "epoch": 0.3754390645146948, "grad_norm": 0.07447963207960129, "learning_rate": 0.0009828236334447702, "loss": 1.5502, "step": 8444 }, { "epoch": 0.37552798897336714, "grad_norm": 0.07521109282970428, "learning_rate": 0.0009828144558076595, "loss": 1.5478, "step": 8446 }, { "epoch": 0.37561691343203946, "grad_norm": 0.08029814064502716, "learning_rate": 0.00098280527576219, "loss": 1.5552, "step": 8448 }, { "epoch": 0.3757058378907118, "grad_norm": 0.07279892265796661, "learning_rate": 0.0009827960933084076, "loss": 1.549, "step": 8450 }, { "epoch": 0.3757947623493842, "grad_norm": 0.07226986438035965, "learning_rate": 0.0009827869084463583, "loss": 1.5512, "step": 8452 }, { "epoch": 0.37588368680805656, "grad_norm": 0.07428599148988724, "learning_rate": 0.0009827777211760878, "loss": 1.5563, "step": 8454 }, { "epoch": 0.37597261126672893, "grad_norm": 0.07335691154003143, "learning_rate": 0.0009827685314976419, "loss": 1.5522, "step": 8456 }, { "epoch": 0.3760615357254013, "grad_norm": 0.0702577456831932, "learning_rate": 0.000982759339411066, "loss": 1.5493, "step": 8458 }, { "epoch": 0.3761504601840736, "grad_norm": 0.07333244383335114, "learning_rate": 0.000982750144916407, "loss": 1.5525, "step": 8460 }, { "epoch": 0.376239384642746, "grad_norm": 0.07639877498149872, "learning_rate": 0.0009827409480137098, "loss": 1.5517, "step": 8462 }, { "epoch": 0.37632830910141835, "grad_norm": 0.07263181358575821, "learning_rate": 0.0009827317487030207, "loss": 1.549, "step": 8464 }, { "epoch": 0.3764172335600907, "grad_norm": 0.07310689985752106, "learning_rate": 0.0009827225469843854, "loss": 1.5462, "step": 8466 }, { "epoch": 0.3765061580187631, "grad_norm": 0.07368572801351547, "learning_rate": 0.00098271334285785, "loss": 1.5532, "step": 8468 }, { "epoch": 0.3765950824774354, "grad_norm": 0.07141252607107162, "learning_rate": 0.0009827041363234604, "loss": 1.5565, "step": 8470 }, { "epoch": 0.37668400693610776, "grad_norm": 0.07508658617734909, "learning_rate": 0.0009826949273812622, "loss": 1.5514, "step": 8472 }, { "epoch": 0.37677293139478013, "grad_norm": 0.07459234446287155, "learning_rate": 0.0009826857160313018, "loss": 1.5596, "step": 8474 }, { "epoch": 0.3768618558534525, "grad_norm": 0.07409926503896713, "learning_rate": 0.0009826765022736249, "loss": 1.5514, "step": 8476 }, { "epoch": 0.37695078031212487, "grad_norm": 0.07309357076883316, "learning_rate": 0.0009826672861082775, "loss": 1.5485, "step": 8478 }, { "epoch": 0.37703970477079723, "grad_norm": 0.07403724640607834, "learning_rate": 0.0009826580675353054, "loss": 1.5558, "step": 8480 }, { "epoch": 0.37712862922946955, "grad_norm": 0.07526498287916183, "learning_rate": 0.000982648846554755, "loss": 1.5511, "step": 8482 }, { "epoch": 0.3772175536881419, "grad_norm": 0.07380390167236328, "learning_rate": 0.0009826396231666717, "loss": 1.5511, "step": 8484 }, { "epoch": 0.3773064781468143, "grad_norm": 0.07699756324291229, "learning_rate": 0.0009826303973711021, "loss": 1.5507, "step": 8486 }, { "epoch": 0.37739540260548665, "grad_norm": 0.07383593916893005, "learning_rate": 0.000982621169168092, "loss": 1.5523, "step": 8488 }, { "epoch": 0.377484327064159, "grad_norm": 0.07610490173101425, "learning_rate": 0.0009826119385576873, "loss": 1.5494, "step": 8490 }, { "epoch": 0.37757325152283133, "grad_norm": 0.07537056505680084, "learning_rate": 0.000982602705539934, "loss": 1.5563, "step": 8492 }, { "epoch": 0.3776621759815037, "grad_norm": 0.07339107990264893, "learning_rate": 0.0009825934701148784, "loss": 1.5511, "step": 8494 }, { "epoch": 0.37775110044017607, "grad_norm": 0.0728975385427475, "learning_rate": 0.0009825842322825668, "loss": 1.5491, "step": 8496 }, { "epoch": 0.37784002489884844, "grad_norm": 0.0715377926826477, "learning_rate": 0.0009825749920430444, "loss": 1.5493, "step": 8498 }, { "epoch": 0.3779289493575208, "grad_norm": 0.07521523535251617, "learning_rate": 0.000982565749396358, "loss": 1.5483, "step": 8500 }, { "epoch": 0.3779289493575208, "eval_loss": 1.525511622428894, "eval_runtime": 12.3728, "eval_samples_per_second": 558.484, "eval_steps_per_second": 69.831, "step": 8500 }, { "epoch": 0.3780178738161931, "grad_norm": 0.07613959908485413, "learning_rate": 0.0009825565043425537, "loss": 1.5473, "step": 8502 }, { "epoch": 0.3781067982748655, "grad_norm": 0.07284019142389297, "learning_rate": 0.0009825472568816775, "loss": 1.5514, "step": 8504 }, { "epoch": 0.37819572273353785, "grad_norm": 0.07196670770645142, "learning_rate": 0.0009825380070137752, "loss": 1.557, "step": 8506 }, { "epoch": 0.3782846471922102, "grad_norm": 0.07932493090629578, "learning_rate": 0.0009825287547388934, "loss": 1.5504, "step": 8508 }, { "epoch": 0.3783735716508826, "grad_norm": 0.07043527066707611, "learning_rate": 0.000982519500057078, "loss": 1.5411, "step": 8510 }, { "epoch": 0.37846249610955496, "grad_norm": 0.07196062058210373, "learning_rate": 0.0009825102429683755, "loss": 1.5507, "step": 8512 }, { "epoch": 0.37855142056822727, "grad_norm": 0.07716414332389832, "learning_rate": 0.0009825009834728315, "loss": 1.5461, "step": 8514 }, { "epoch": 0.37864034502689964, "grad_norm": 0.06990046054124832, "learning_rate": 0.0009824917215704926, "loss": 1.5522, "step": 8516 }, { "epoch": 0.378729269485572, "grad_norm": 0.07274769991636276, "learning_rate": 0.000982482457261405, "loss": 1.5507, "step": 8518 }, { "epoch": 0.3788181939442444, "grad_norm": 0.07060352712869644, "learning_rate": 0.0009824731905456149, "loss": 1.5491, "step": 8520 }, { "epoch": 0.37890711840291674, "grad_norm": 0.07348603010177612, "learning_rate": 0.000982463921423168, "loss": 1.5473, "step": 8522 }, { "epoch": 0.37899604286158906, "grad_norm": 0.07370968163013458, "learning_rate": 0.0009824546498941114, "loss": 1.5564, "step": 8524 }, { "epoch": 0.3790849673202614, "grad_norm": 0.07592252641916275, "learning_rate": 0.0009824453759584907, "loss": 1.5535, "step": 8526 }, { "epoch": 0.3791738917789338, "grad_norm": 0.07057470828294754, "learning_rate": 0.0009824360996163524, "loss": 1.5567, "step": 8528 }, { "epoch": 0.37926281623760616, "grad_norm": 0.07211954891681671, "learning_rate": 0.000982426820867743, "loss": 1.5548, "step": 8530 }, { "epoch": 0.3793517406962785, "grad_norm": 0.06937380880117416, "learning_rate": 0.0009824175397127083, "loss": 1.5457, "step": 8532 }, { "epoch": 0.3794406651549509, "grad_norm": 0.07182077318429947, "learning_rate": 0.0009824082561512948, "loss": 1.5589, "step": 8534 }, { "epoch": 0.3795295896136232, "grad_norm": 0.07062894105911255, "learning_rate": 0.000982398970183549, "loss": 1.5499, "step": 8536 }, { "epoch": 0.3796185140722956, "grad_norm": 0.07044274359941483, "learning_rate": 0.000982389681809517, "loss": 1.5555, "step": 8538 }, { "epoch": 0.37970743853096794, "grad_norm": 0.0717586874961853, "learning_rate": 0.0009823803910292452, "loss": 1.5452, "step": 8540 }, { "epoch": 0.3797963629896403, "grad_norm": 0.07334475219249725, "learning_rate": 0.0009823710978427798, "loss": 1.5576, "step": 8542 }, { "epoch": 0.3798852874483127, "grad_norm": 0.07173279672861099, "learning_rate": 0.0009823618022501675, "loss": 1.5451, "step": 8544 }, { "epoch": 0.379974211906985, "grad_norm": 0.07294796407222748, "learning_rate": 0.0009823525042514543, "loss": 1.5547, "step": 8546 }, { "epoch": 0.38006313636565736, "grad_norm": 0.0728803277015686, "learning_rate": 0.0009823432038466868, "loss": 1.5461, "step": 8548 }, { "epoch": 0.38015206082432973, "grad_norm": 0.070876345038414, "learning_rate": 0.0009823339010359114, "loss": 1.5525, "step": 8550 }, { "epoch": 0.3802409852830021, "grad_norm": 0.07284954190254211, "learning_rate": 0.0009823245958191745, "loss": 1.5486, "step": 8552 }, { "epoch": 0.38032990974167447, "grad_norm": 0.0750528946518898, "learning_rate": 0.0009823152881965223, "loss": 1.5507, "step": 8554 }, { "epoch": 0.3804188342003468, "grad_norm": 0.07205671817064285, "learning_rate": 0.0009823059781680014, "loss": 1.5513, "step": 8556 }, { "epoch": 0.38050775865901915, "grad_norm": 0.07429669797420502, "learning_rate": 0.0009822966657336582, "loss": 1.5503, "step": 8558 }, { "epoch": 0.3805966831176915, "grad_norm": 0.0736270323395729, "learning_rate": 0.0009822873508935394, "loss": 1.5513, "step": 8560 }, { "epoch": 0.3806856075763639, "grad_norm": 0.0691053494811058, "learning_rate": 0.0009822780336476908, "loss": 1.5482, "step": 8562 }, { "epoch": 0.38077453203503625, "grad_norm": 0.0720055103302002, "learning_rate": 0.0009822687139961596, "loss": 1.5469, "step": 8564 }, { "epoch": 0.3808634564937086, "grad_norm": 0.07141195237636566, "learning_rate": 0.0009822593919389922, "loss": 1.5539, "step": 8566 }, { "epoch": 0.38095238095238093, "grad_norm": 0.07182452082633972, "learning_rate": 0.0009822500674762345, "loss": 1.5476, "step": 8568 }, { "epoch": 0.3810413054110533, "grad_norm": 0.07438525557518005, "learning_rate": 0.0009822407406079336, "loss": 1.5486, "step": 8570 }, { "epoch": 0.38113022986972567, "grad_norm": 0.07334297895431519, "learning_rate": 0.0009822314113341359, "loss": 1.549, "step": 8572 }, { "epoch": 0.38121915432839804, "grad_norm": 0.07467876374721527, "learning_rate": 0.0009822220796548878, "loss": 1.5524, "step": 8574 }, { "epoch": 0.3813080787870704, "grad_norm": 0.0723448172211647, "learning_rate": 0.0009822127455702359, "loss": 1.5459, "step": 8576 }, { "epoch": 0.3813970032457427, "grad_norm": 0.07537683844566345, "learning_rate": 0.0009822034090802268, "loss": 1.5465, "step": 8578 }, { "epoch": 0.3814859277044151, "grad_norm": 0.07435756176710129, "learning_rate": 0.0009821940701849072, "loss": 1.5497, "step": 8580 }, { "epoch": 0.38157485216308745, "grad_norm": 0.07319629937410355, "learning_rate": 0.0009821847288843232, "loss": 1.5454, "step": 8582 }, { "epoch": 0.3816637766217598, "grad_norm": 0.07365921884775162, "learning_rate": 0.000982175385178522, "loss": 1.5457, "step": 8584 }, { "epoch": 0.3817527010804322, "grad_norm": 0.06942031532526016, "learning_rate": 0.00098216603906755, "loss": 1.5484, "step": 8586 }, { "epoch": 0.38184162553910456, "grad_norm": 0.07433439046144485, "learning_rate": 0.0009821566905514536, "loss": 1.5524, "step": 8588 }, { "epoch": 0.38193054999777687, "grad_norm": 0.07098887860774994, "learning_rate": 0.0009821473396302797, "loss": 1.5419, "step": 8590 }, { "epoch": 0.38201947445644924, "grad_norm": 0.07336100190877914, "learning_rate": 0.0009821379863040748, "loss": 1.5489, "step": 8592 }, { "epoch": 0.3821083989151216, "grad_norm": 0.07159991562366486, "learning_rate": 0.0009821286305728854, "loss": 1.5495, "step": 8594 }, { "epoch": 0.382197323373794, "grad_norm": 0.07283421605825424, "learning_rate": 0.0009821192724367585, "loss": 1.5474, "step": 8596 }, { "epoch": 0.38228624783246634, "grad_norm": 0.07494281232357025, "learning_rate": 0.0009821099118957407, "loss": 1.5481, "step": 8598 }, { "epoch": 0.38237517229113865, "grad_norm": 0.07011394947767258, "learning_rate": 0.0009821005489498787, "loss": 1.544, "step": 8600 }, { "epoch": 0.382464096749811, "grad_norm": 0.07218565046787262, "learning_rate": 0.000982091183599219, "loss": 1.5442, "step": 8602 }, { "epoch": 0.3825530212084834, "grad_norm": 0.07564288377761841, "learning_rate": 0.0009820818158438084, "loss": 1.5573, "step": 8604 }, { "epoch": 0.38264194566715576, "grad_norm": 0.07179772108793259, "learning_rate": 0.0009820724456836938, "loss": 1.5556, "step": 8606 }, { "epoch": 0.3827308701258281, "grad_norm": 0.07555480301380157, "learning_rate": 0.0009820630731189218, "loss": 1.5486, "step": 8608 }, { "epoch": 0.3828197945845005, "grad_norm": 0.07220586389303207, "learning_rate": 0.0009820536981495391, "loss": 1.5496, "step": 8610 }, { "epoch": 0.3829087190431728, "grad_norm": 0.07378540188074112, "learning_rate": 0.0009820443207755928, "loss": 1.5498, "step": 8612 }, { "epoch": 0.3829976435018452, "grad_norm": 0.07111547887325287, "learning_rate": 0.000982034940997129, "loss": 1.5494, "step": 8614 }, { "epoch": 0.38308656796051754, "grad_norm": 0.07289877533912659, "learning_rate": 0.000982025558814195, "loss": 1.5496, "step": 8616 }, { "epoch": 0.3831754924191899, "grad_norm": 0.07126215100288391, "learning_rate": 0.0009820161742268379, "loss": 1.5444, "step": 8618 }, { "epoch": 0.3832644168778623, "grad_norm": 0.07101909816265106, "learning_rate": 0.0009820067872351038, "loss": 1.5456, "step": 8620 }, { "epoch": 0.3833533413365346, "grad_norm": 0.0723530650138855, "learning_rate": 0.0009819973978390398, "loss": 1.5452, "step": 8622 }, { "epoch": 0.38344226579520696, "grad_norm": 0.07396049052476883, "learning_rate": 0.0009819880060386928, "loss": 1.5498, "step": 8624 }, { "epoch": 0.38353119025387933, "grad_norm": 0.07335137575864792, "learning_rate": 0.0009819786118341097, "loss": 1.554, "step": 8626 }, { "epoch": 0.3836201147125517, "grad_norm": 0.07116411626338959, "learning_rate": 0.0009819692152253372, "loss": 1.5461, "step": 8628 }, { "epoch": 0.38370903917122406, "grad_norm": 0.07290813326835632, "learning_rate": 0.0009819598162124224, "loss": 1.5431, "step": 8630 }, { "epoch": 0.3837979636298964, "grad_norm": 0.07139547169208527, "learning_rate": 0.0009819504147954119, "loss": 1.5439, "step": 8632 }, { "epoch": 0.38388688808856875, "grad_norm": 0.06995617598295212, "learning_rate": 0.0009819410109743528, "loss": 1.5455, "step": 8634 }, { "epoch": 0.3839758125472411, "grad_norm": 0.07103978097438812, "learning_rate": 0.0009819316047492918, "loss": 1.5439, "step": 8636 }, { "epoch": 0.3840647370059135, "grad_norm": 0.07140542566776276, "learning_rate": 0.000981922196120276, "loss": 1.5404, "step": 8638 }, { "epoch": 0.38415366146458585, "grad_norm": 0.0695871040225029, "learning_rate": 0.0009819127850873525, "loss": 1.5443, "step": 8640 }, { "epoch": 0.3842425859232582, "grad_norm": 0.07270847260951996, "learning_rate": 0.0009819033716505679, "loss": 1.5423, "step": 8642 }, { "epoch": 0.38433151038193053, "grad_norm": 0.07431674748659134, "learning_rate": 0.000981893955809969, "loss": 1.5434, "step": 8644 }, { "epoch": 0.3844204348406029, "grad_norm": 0.07491683959960938, "learning_rate": 0.0009818845375656035, "loss": 1.5443, "step": 8646 }, { "epoch": 0.38450935929927527, "grad_norm": 0.0724981278181076, "learning_rate": 0.0009818751169175177, "loss": 1.5455, "step": 8648 }, { "epoch": 0.38459828375794763, "grad_norm": 0.07452180981636047, "learning_rate": 0.0009818656938657589, "loss": 1.55, "step": 8650 }, { "epoch": 0.38468720821662, "grad_norm": 0.07273737341165543, "learning_rate": 0.000981856268410374, "loss": 1.5422, "step": 8652 }, { "epoch": 0.3847761326752923, "grad_norm": 0.07237744331359863, "learning_rate": 0.0009818468405514101, "loss": 1.5468, "step": 8654 }, { "epoch": 0.3848650571339647, "grad_norm": 0.07318416237831116, "learning_rate": 0.0009818374102889141, "loss": 1.5456, "step": 8656 }, { "epoch": 0.38495398159263705, "grad_norm": 0.0707939937710762, "learning_rate": 0.0009818279776229333, "loss": 1.5468, "step": 8658 }, { "epoch": 0.3850429060513094, "grad_norm": 0.07714451104402542, "learning_rate": 0.0009818185425535145, "loss": 1.55, "step": 8660 }, { "epoch": 0.3851318305099818, "grad_norm": 0.07308610528707504, "learning_rate": 0.0009818091050807047, "loss": 1.5422, "step": 8662 }, { "epoch": 0.38522075496865416, "grad_norm": 0.07573612779378891, "learning_rate": 0.0009817996652045512, "loss": 1.5528, "step": 8664 }, { "epoch": 0.38530967942732647, "grad_norm": 0.07352116703987122, "learning_rate": 0.0009817902229251009, "loss": 1.5453, "step": 8666 }, { "epoch": 0.38539860388599884, "grad_norm": 0.0721651017665863, "learning_rate": 0.000981780778242401, "loss": 1.5484, "step": 8668 }, { "epoch": 0.3854875283446712, "grad_norm": 0.07210839539766312, "learning_rate": 0.0009817713311564987, "loss": 1.5465, "step": 8670 }, { "epoch": 0.3855764528033436, "grad_norm": 0.07044050097465515, "learning_rate": 0.000981761881667441, "loss": 1.5464, "step": 8672 }, { "epoch": 0.38566537726201594, "grad_norm": 0.07243029773235321, "learning_rate": 0.000981752429775275, "loss": 1.5452, "step": 8674 }, { "epoch": 0.38575430172068825, "grad_norm": 0.07179372757673264, "learning_rate": 0.000981742975480048, "loss": 1.5457, "step": 8676 }, { "epoch": 0.3858432261793606, "grad_norm": 0.07337214797735214, "learning_rate": 0.000981733518781807, "loss": 1.5459, "step": 8678 }, { "epoch": 0.385932150638033, "grad_norm": 0.0745837390422821, "learning_rate": 0.0009817240596805994, "loss": 1.5494, "step": 8680 }, { "epoch": 0.38602107509670536, "grad_norm": 0.072391077876091, "learning_rate": 0.0009817145981764722, "loss": 1.5427, "step": 8682 }, { "epoch": 0.3861099995553777, "grad_norm": 0.07101153582334518, "learning_rate": 0.0009817051342694725, "loss": 1.5478, "step": 8684 }, { "epoch": 0.38619892401405004, "grad_norm": 0.07185351103544235, "learning_rate": 0.0009816956679596475, "loss": 1.5428, "step": 8686 }, { "epoch": 0.3862878484727224, "grad_norm": 0.07106965035200119, "learning_rate": 0.000981686199247045, "loss": 1.5515, "step": 8688 }, { "epoch": 0.3863767729313948, "grad_norm": 0.07175727933645248, "learning_rate": 0.0009816767281317113, "loss": 1.5422, "step": 8690 }, { "epoch": 0.38646569739006714, "grad_norm": 0.0721508041024208, "learning_rate": 0.0009816672546136944, "loss": 1.5444, "step": 8692 }, { "epoch": 0.3865546218487395, "grad_norm": 0.07354867458343506, "learning_rate": 0.0009816577786930414, "loss": 1.5476, "step": 8694 }, { "epoch": 0.3866435463074119, "grad_norm": 0.07081717252731323, "learning_rate": 0.000981648300369799, "loss": 1.5489, "step": 8696 }, { "epoch": 0.3867324707660842, "grad_norm": 0.07528056204319, "learning_rate": 0.0009816388196440154, "loss": 1.5507, "step": 8698 }, { "epoch": 0.38682139522475656, "grad_norm": 0.07244568318128586, "learning_rate": 0.000981629336515737, "loss": 1.5481, "step": 8700 }, { "epoch": 0.3869103196834289, "grad_norm": 0.07286922633647919, "learning_rate": 0.0009816198509850118, "loss": 1.5445, "step": 8702 }, { "epoch": 0.3869992441421013, "grad_norm": 0.06972247362136841, "learning_rate": 0.000981610363051887, "loss": 1.5486, "step": 8704 }, { "epoch": 0.38708816860077366, "grad_norm": 0.07090491056442261, "learning_rate": 0.0009816008727164093, "loss": 1.5433, "step": 8706 }, { "epoch": 0.387177093059446, "grad_norm": 0.0725286453962326, "learning_rate": 0.0009815913799786269, "loss": 1.5452, "step": 8708 }, { "epoch": 0.38726601751811834, "grad_norm": 0.07091175764799118, "learning_rate": 0.0009815818848385865, "loss": 1.5429, "step": 8710 }, { "epoch": 0.3873549419767907, "grad_norm": 0.07452746480703354, "learning_rate": 0.0009815723872963358, "loss": 1.5374, "step": 8712 }, { "epoch": 0.3874438664354631, "grad_norm": 0.07199020683765411, "learning_rate": 0.0009815628873519223, "loss": 1.5389, "step": 8714 }, { "epoch": 0.38753279089413545, "grad_norm": 0.07434893399477005, "learning_rate": 0.0009815533850053928, "loss": 1.5465, "step": 8716 }, { "epoch": 0.3876217153528078, "grad_norm": 0.0721900537610054, "learning_rate": 0.0009815438802567955, "loss": 1.5464, "step": 8718 }, { "epoch": 0.38771063981148013, "grad_norm": 0.0768774077296257, "learning_rate": 0.000981534373106177, "loss": 1.5477, "step": 8720 }, { "epoch": 0.3877995642701525, "grad_norm": 0.07451849430799484, "learning_rate": 0.0009815248635535853, "loss": 1.5439, "step": 8722 }, { "epoch": 0.38788848872882487, "grad_norm": 0.0778982937335968, "learning_rate": 0.0009815153515990679, "loss": 1.5425, "step": 8724 }, { "epoch": 0.38797741318749723, "grad_norm": 0.07124931365251541, "learning_rate": 0.0009815058372426716, "loss": 1.5497, "step": 8726 }, { "epoch": 0.3880663376461696, "grad_norm": 0.0718124508857727, "learning_rate": 0.0009814963204844444, "loss": 1.5436, "step": 8728 }, { "epoch": 0.3881552621048419, "grad_norm": 0.07384263724088669, "learning_rate": 0.0009814868013244336, "loss": 1.5444, "step": 8730 }, { "epoch": 0.3882441865635143, "grad_norm": 0.07269671559333801, "learning_rate": 0.0009814772797626867, "loss": 1.5482, "step": 8732 }, { "epoch": 0.38833311102218665, "grad_norm": 0.0724627822637558, "learning_rate": 0.0009814677557992513, "loss": 1.5495, "step": 8734 }, { "epoch": 0.388422035480859, "grad_norm": 0.07544320821762085, "learning_rate": 0.0009814582294341747, "loss": 1.5459, "step": 8736 }, { "epoch": 0.3885109599395314, "grad_norm": 0.0732409730553627, "learning_rate": 0.0009814487006675047, "loss": 1.543, "step": 8738 }, { "epoch": 0.3885998843982037, "grad_norm": 0.071528859436512, "learning_rate": 0.0009814391694992885, "loss": 1.5489, "step": 8740 }, { "epoch": 0.38868880885687607, "grad_norm": 0.07471916079521179, "learning_rate": 0.000981429635929574, "loss": 1.5427, "step": 8742 }, { "epoch": 0.38877773331554843, "grad_norm": 0.07275721430778503, "learning_rate": 0.0009814200999584085, "loss": 1.5449, "step": 8744 }, { "epoch": 0.3888666577742208, "grad_norm": 0.07258014380931854, "learning_rate": 0.0009814105615858395, "loss": 1.543, "step": 8746 }, { "epoch": 0.38895558223289317, "grad_norm": 0.07144364714622498, "learning_rate": 0.0009814010208119147, "loss": 1.5451, "step": 8748 }, { "epoch": 0.38904450669156554, "grad_norm": 0.07227019965648651, "learning_rate": 0.0009813914776366816, "loss": 1.5423, "step": 8750 }, { "epoch": 0.38913343115023785, "grad_norm": 0.0736045092344284, "learning_rate": 0.0009813819320601883, "loss": 1.5435, "step": 8752 }, { "epoch": 0.3892223556089102, "grad_norm": 0.07181594520807266, "learning_rate": 0.0009813723840824819, "loss": 1.5413, "step": 8754 }, { "epoch": 0.3893112800675826, "grad_norm": 0.07333064079284668, "learning_rate": 0.0009813628337036098, "loss": 1.5514, "step": 8756 }, { "epoch": 0.38940020452625496, "grad_norm": 0.07221698760986328, "learning_rate": 0.0009813532809236202, "loss": 1.5435, "step": 8758 }, { "epoch": 0.3894891289849273, "grad_norm": 0.07169820368289948, "learning_rate": 0.0009813437257425606, "loss": 1.5474, "step": 8760 }, { "epoch": 0.38957805344359964, "grad_norm": 0.07131782174110413, "learning_rate": 0.0009813341681604785, "loss": 1.5431, "step": 8762 }, { "epoch": 0.389666977902272, "grad_norm": 0.07381466031074524, "learning_rate": 0.0009813246081774218, "loss": 1.551, "step": 8764 }, { "epoch": 0.3897559023609444, "grad_norm": 0.07252717018127441, "learning_rate": 0.000981315045793438, "loss": 1.542, "step": 8766 }, { "epoch": 0.38984482681961674, "grad_norm": 0.07131364196538925, "learning_rate": 0.0009813054810085748, "loss": 1.5416, "step": 8768 }, { "epoch": 0.3899337512782891, "grad_norm": 0.07210860401391983, "learning_rate": 0.00098129591382288, "loss": 1.5414, "step": 8770 }, { "epoch": 0.3900226757369615, "grad_norm": 0.07035349309444427, "learning_rate": 0.0009812863442364014, "loss": 1.5424, "step": 8772 }, { "epoch": 0.3901116001956338, "grad_norm": 0.07026588171720505, "learning_rate": 0.0009812767722491864, "loss": 1.5484, "step": 8774 }, { "epoch": 0.39020052465430616, "grad_norm": 0.07101976126432419, "learning_rate": 0.000981267197861283, "loss": 1.5419, "step": 8776 }, { "epoch": 0.3902894491129785, "grad_norm": 0.07162394374608994, "learning_rate": 0.0009812576210727392, "loss": 1.5482, "step": 8778 }, { "epoch": 0.3903783735716509, "grad_norm": 0.07118743658065796, "learning_rate": 0.0009812480418836024, "loss": 1.5401, "step": 8780 }, { "epoch": 0.39046729803032326, "grad_norm": 0.0704973116517067, "learning_rate": 0.0009812384602939203, "loss": 1.5413, "step": 8782 }, { "epoch": 0.3905562224889956, "grad_norm": 0.07269991189241409, "learning_rate": 0.000981228876303741, "loss": 1.5438, "step": 8784 }, { "epoch": 0.39064514694766794, "grad_norm": 0.07165592908859253, "learning_rate": 0.0009812192899131122, "loss": 1.548, "step": 8786 }, { "epoch": 0.3907340714063403, "grad_norm": 0.07121030986309052, "learning_rate": 0.0009812097011220816, "loss": 1.5459, "step": 8788 }, { "epoch": 0.3908229958650127, "grad_norm": 0.07146286964416504, "learning_rate": 0.0009812001099306974, "loss": 1.539, "step": 8790 }, { "epoch": 0.39091192032368505, "grad_norm": 0.07126247137784958, "learning_rate": 0.0009811905163390068, "loss": 1.5442, "step": 8792 }, { "epoch": 0.3910008447823574, "grad_norm": 0.06753192096948624, "learning_rate": 0.0009811809203470582, "loss": 1.546, "step": 8794 }, { "epoch": 0.3910897692410297, "grad_norm": 0.0699109360575676, "learning_rate": 0.0009811713219548992, "loss": 1.542, "step": 8796 }, { "epoch": 0.3911786936997021, "grad_norm": 0.0715608224272728, "learning_rate": 0.0009811617211625781, "loss": 1.5429, "step": 8798 }, { "epoch": 0.39126761815837446, "grad_norm": 0.07075057178735733, "learning_rate": 0.0009811521179701422, "loss": 1.5433, "step": 8800 }, { "epoch": 0.39135654261704683, "grad_norm": 0.07018981128931046, "learning_rate": 0.0009811425123776398, "loss": 1.5432, "step": 8802 }, { "epoch": 0.3914454670757192, "grad_norm": 0.07095321267843246, "learning_rate": 0.0009811329043851185, "loss": 1.5474, "step": 8804 }, { "epoch": 0.3915343915343915, "grad_norm": 0.07263996452093124, "learning_rate": 0.0009811232939926267, "loss": 1.5394, "step": 8806 }, { "epoch": 0.3916233159930639, "grad_norm": 0.07189413160085678, "learning_rate": 0.0009811136812002117, "loss": 1.5441, "step": 8808 }, { "epoch": 0.39171224045173625, "grad_norm": 0.0719655454158783, "learning_rate": 0.000981104066007922, "loss": 1.545, "step": 8810 }, { "epoch": 0.3918011649104086, "grad_norm": 0.0707225650548935, "learning_rate": 0.0009810944484158052, "loss": 1.5435, "step": 8812 }, { "epoch": 0.391890089369081, "grad_norm": 0.07110429555177689, "learning_rate": 0.0009810848284239097, "loss": 1.5402, "step": 8814 }, { "epoch": 0.3919790138277533, "grad_norm": 0.07303628325462341, "learning_rate": 0.000981075206032283, "loss": 1.5408, "step": 8816 }, { "epoch": 0.39206793828642567, "grad_norm": 0.07266764342784882, "learning_rate": 0.0009810655812409735, "loss": 1.5399, "step": 8818 }, { "epoch": 0.39215686274509803, "grad_norm": 0.07289233058691025, "learning_rate": 0.0009810559540500289, "loss": 1.5437, "step": 8820 }, { "epoch": 0.3922457872037704, "grad_norm": 0.07148449867963791, "learning_rate": 0.0009810463244594974, "loss": 1.5445, "step": 8822 }, { "epoch": 0.39233471166244277, "grad_norm": 0.0700794905424118, "learning_rate": 0.0009810366924694269, "loss": 1.5361, "step": 8824 }, { "epoch": 0.39242363612111514, "grad_norm": 0.07210569828748703, "learning_rate": 0.0009810270580798656, "loss": 1.5488, "step": 8826 }, { "epoch": 0.39251256057978745, "grad_norm": 0.07074844092130661, "learning_rate": 0.0009810174212908614, "loss": 1.5454, "step": 8828 }, { "epoch": 0.3926014850384598, "grad_norm": 0.07008279860019684, "learning_rate": 0.0009810077821024625, "loss": 1.5463, "step": 8830 }, { "epoch": 0.3926904094971322, "grad_norm": 0.07124333083629608, "learning_rate": 0.0009809981405147171, "loss": 1.5415, "step": 8832 }, { "epoch": 0.39277933395580455, "grad_norm": 0.06977679580450058, "learning_rate": 0.000980988496527673, "loss": 1.5459, "step": 8834 }, { "epoch": 0.3928682584144769, "grad_norm": 0.0721663162112236, "learning_rate": 0.0009809788501413783, "loss": 1.5433, "step": 8836 }, { "epoch": 0.39295718287314924, "grad_norm": 0.07385817915201187, "learning_rate": 0.0009809692013558816, "loss": 1.545, "step": 8838 }, { "epoch": 0.3930461073318216, "grad_norm": 0.07491130381822586, "learning_rate": 0.0009809595501712302, "loss": 1.5419, "step": 8840 }, { "epoch": 0.39313503179049397, "grad_norm": 0.07294177263975143, "learning_rate": 0.0009809498965874732, "loss": 1.5461, "step": 8842 }, { "epoch": 0.39322395624916634, "grad_norm": 0.07299856841564178, "learning_rate": 0.000980940240604658, "loss": 1.5455, "step": 8844 }, { "epoch": 0.3933128807078387, "grad_norm": 0.07228664308786392, "learning_rate": 0.0009809305822228332, "loss": 1.5422, "step": 8846 }, { "epoch": 0.3934018051665111, "grad_norm": 0.07752872258424759, "learning_rate": 0.0009809209214420467, "loss": 1.5404, "step": 8848 }, { "epoch": 0.3934907296251834, "grad_norm": 0.07412002980709076, "learning_rate": 0.000980911258262347, "loss": 1.5446, "step": 8850 }, { "epoch": 0.39357965408385576, "grad_norm": 0.06998312473297119, "learning_rate": 0.0009809015926837818, "loss": 1.5454, "step": 8852 }, { "epoch": 0.3936685785425281, "grad_norm": 0.0724993646144867, "learning_rate": 0.0009808919247064, "loss": 1.5369, "step": 8854 }, { "epoch": 0.3937575030012005, "grad_norm": 0.0712260976433754, "learning_rate": 0.0009808822543302493, "loss": 1.544, "step": 8856 }, { "epoch": 0.39384642745987286, "grad_norm": 0.07351220399141312, "learning_rate": 0.0009808725815553781, "loss": 1.5481, "step": 8858 }, { "epoch": 0.3939353519185452, "grad_norm": 0.07386364787817001, "learning_rate": 0.0009808629063818347, "loss": 1.5355, "step": 8860 }, { "epoch": 0.39402427637721754, "grad_norm": 0.0700744241476059, "learning_rate": 0.000980853228809667, "loss": 1.5344, "step": 8862 }, { "epoch": 0.3941132008358899, "grad_norm": 0.06920643895864487, "learning_rate": 0.0009808435488389239, "loss": 1.5445, "step": 8864 }, { "epoch": 0.3942021252945623, "grad_norm": 0.07501189410686493, "learning_rate": 0.0009808338664696532, "loss": 1.545, "step": 8866 }, { "epoch": 0.39429104975323465, "grad_norm": 0.07265200465917587, "learning_rate": 0.0009808241817019035, "loss": 1.5478, "step": 8868 }, { "epoch": 0.39437997421190696, "grad_norm": 0.07012397050857544, "learning_rate": 0.000980814494535723, "loss": 1.5415, "step": 8870 }, { "epoch": 0.3944688986705793, "grad_norm": 0.07346207648515701, "learning_rate": 0.0009808048049711597, "loss": 1.5456, "step": 8872 }, { "epoch": 0.3945578231292517, "grad_norm": 0.06875230371952057, "learning_rate": 0.0009807951130082625, "loss": 1.5495, "step": 8874 }, { "epoch": 0.39464674758792406, "grad_norm": 0.07028649747371674, "learning_rate": 0.0009807854186470793, "loss": 1.5382, "step": 8876 }, { "epoch": 0.39473567204659643, "grad_norm": 0.07126179337501526, "learning_rate": 0.0009807757218876587, "loss": 1.5412, "step": 8878 }, { "epoch": 0.3948245965052688, "grad_norm": 0.07412000000476837, "learning_rate": 0.000980766022730049, "loss": 1.5441, "step": 8880 }, { "epoch": 0.3949135209639411, "grad_norm": 0.06931138783693314, "learning_rate": 0.0009807563211742987, "loss": 1.5408, "step": 8882 }, { "epoch": 0.3950024454226135, "grad_norm": 0.06960853189229965, "learning_rate": 0.0009807466172204557, "loss": 1.5467, "step": 8884 }, { "epoch": 0.39509136988128585, "grad_norm": 0.07029549777507782, "learning_rate": 0.000980736910868569, "loss": 1.5485, "step": 8886 }, { "epoch": 0.3951802943399582, "grad_norm": 0.070205919444561, "learning_rate": 0.0009807272021186868, "loss": 1.544, "step": 8888 }, { "epoch": 0.3952692187986306, "grad_norm": 0.07060658931732178, "learning_rate": 0.0009807174909708576, "loss": 1.542, "step": 8890 }, { "epoch": 0.3953581432573029, "grad_norm": 0.070548877120018, "learning_rate": 0.0009807077774251296, "loss": 1.5451, "step": 8892 }, { "epoch": 0.39544706771597526, "grad_norm": 0.06973063200712204, "learning_rate": 0.0009806980614815515, "loss": 1.5422, "step": 8894 }, { "epoch": 0.39553599217464763, "grad_norm": 0.07283268868923187, "learning_rate": 0.0009806883431401718, "loss": 1.5433, "step": 8896 }, { "epoch": 0.39562491663332, "grad_norm": 0.07797042280435562, "learning_rate": 0.0009806786224010387, "loss": 1.549, "step": 8898 }, { "epoch": 0.39571384109199237, "grad_norm": 0.07383356988430023, "learning_rate": 0.000980668899264201, "loss": 1.5419, "step": 8900 }, { "epoch": 0.39580276555066474, "grad_norm": 0.08115527033805847, "learning_rate": 0.0009806591737297069, "loss": 1.5418, "step": 8902 }, { "epoch": 0.39589169000933705, "grad_norm": 0.0753684863448143, "learning_rate": 0.0009806494457976049, "loss": 1.5422, "step": 8904 }, { "epoch": 0.3959806144680094, "grad_norm": 0.07579310983419418, "learning_rate": 0.0009806397154679437, "loss": 1.5431, "step": 8906 }, { "epoch": 0.3960695389266818, "grad_norm": 0.0755925178527832, "learning_rate": 0.0009806299827407722, "loss": 1.5424, "step": 8908 }, { "epoch": 0.39615846338535415, "grad_norm": 0.07460242509841919, "learning_rate": 0.0009806202476161383, "loss": 1.5464, "step": 8910 }, { "epoch": 0.3962473878440265, "grad_norm": 0.07479722797870636, "learning_rate": 0.000980610510094091, "loss": 1.5461, "step": 8912 }, { "epoch": 0.39633631230269883, "grad_norm": 0.0737161934375763, "learning_rate": 0.0009806007701746785, "loss": 1.5415, "step": 8914 }, { "epoch": 0.3964252367613712, "grad_norm": 0.07179709523916245, "learning_rate": 0.0009805910278579495, "loss": 1.5388, "step": 8916 }, { "epoch": 0.39651416122004357, "grad_norm": 0.07436472177505493, "learning_rate": 0.0009805812831439528, "loss": 1.5408, "step": 8918 }, { "epoch": 0.39660308567871594, "grad_norm": 0.07385652512311935, "learning_rate": 0.000980571536032737, "loss": 1.5375, "step": 8920 }, { "epoch": 0.3966920101373883, "grad_norm": 0.07151640951633453, "learning_rate": 0.0009805617865243504, "loss": 1.5407, "step": 8922 }, { "epoch": 0.3967809345960606, "grad_norm": 0.07092677056789398, "learning_rate": 0.000980552034618842, "loss": 1.5415, "step": 8924 }, { "epoch": 0.396869859054733, "grad_norm": 0.0716884657740593, "learning_rate": 0.0009805422803162603, "loss": 1.5341, "step": 8926 }, { "epoch": 0.39695878351340536, "grad_norm": 0.07016546279191971, "learning_rate": 0.0009805325236166538, "loss": 1.5385, "step": 8928 }, { "epoch": 0.3970477079720777, "grad_norm": 0.07406767457723618, "learning_rate": 0.0009805227645200713, "loss": 1.5411, "step": 8930 }, { "epoch": 0.3971366324307501, "grad_norm": 0.0695648342370987, "learning_rate": 0.0009805130030265616, "loss": 1.5376, "step": 8932 }, { "epoch": 0.39722555688942246, "grad_norm": 0.07370419800281525, "learning_rate": 0.0009805032391361733, "loss": 1.5464, "step": 8934 }, { "epoch": 0.3973144813480948, "grad_norm": 0.07572710514068604, "learning_rate": 0.0009804934728489551, "loss": 1.5397, "step": 8936 }, { "epoch": 0.39740340580676714, "grad_norm": 0.07438923418521881, "learning_rate": 0.0009804837041649556, "loss": 1.5395, "step": 8938 }, { "epoch": 0.3974923302654395, "grad_norm": 0.07321964204311371, "learning_rate": 0.0009804739330842235, "loss": 1.5405, "step": 8940 }, { "epoch": 0.3975812547241119, "grad_norm": 0.07280636578798294, "learning_rate": 0.000980464159606808, "loss": 1.5442, "step": 8942 }, { "epoch": 0.39767017918278424, "grad_norm": 0.07473626732826233, "learning_rate": 0.0009804543837327573, "loss": 1.5427, "step": 8944 }, { "epoch": 0.39775910364145656, "grad_norm": 0.06971806287765503, "learning_rate": 0.0009804446054621203, "loss": 1.5431, "step": 8946 }, { "epoch": 0.3978480281001289, "grad_norm": 0.07413081079721451, "learning_rate": 0.0009804348247949462, "loss": 1.5433, "step": 8948 }, { "epoch": 0.3979369525588013, "grad_norm": 0.07259391993284225, "learning_rate": 0.0009804250417312832, "loss": 1.5446, "step": 8950 }, { "epoch": 0.39802587701747366, "grad_norm": 0.0727652907371521, "learning_rate": 0.0009804152562711804, "loss": 1.5387, "step": 8952 }, { "epoch": 0.39811480147614603, "grad_norm": 0.07275709509849548, "learning_rate": 0.0009804054684146865, "loss": 1.5379, "step": 8954 }, { "epoch": 0.3982037259348184, "grad_norm": 0.07379709184169769, "learning_rate": 0.0009803956781618505, "loss": 1.5357, "step": 8956 }, { "epoch": 0.3982926503934907, "grad_norm": 0.06894747912883759, "learning_rate": 0.000980385885512721, "loss": 1.544, "step": 8958 }, { "epoch": 0.3983815748521631, "grad_norm": 0.06997144967317581, "learning_rate": 0.000980376090467347, "loss": 1.5365, "step": 8960 }, { "epoch": 0.39847049931083545, "grad_norm": 0.07101274281740189, "learning_rate": 0.0009803662930257773, "loss": 1.5355, "step": 8962 }, { "epoch": 0.3985594237695078, "grad_norm": 0.07142171263694763, "learning_rate": 0.0009803564931880606, "loss": 1.5441, "step": 8964 }, { "epoch": 0.3986483482281802, "grad_norm": 0.06851017475128174, "learning_rate": 0.0009803466909542463, "loss": 1.5381, "step": 8966 }, { "epoch": 0.3987372726868525, "grad_norm": 0.06920833140611649, "learning_rate": 0.0009803368863243829, "loss": 1.5397, "step": 8968 }, { "epoch": 0.39882619714552486, "grad_norm": 0.07191641628742218, "learning_rate": 0.0009803270792985194, "loss": 1.541, "step": 8970 }, { "epoch": 0.39891512160419723, "grad_norm": 0.07115970551967621, "learning_rate": 0.0009803172698767046, "loss": 1.5363, "step": 8972 }, { "epoch": 0.3990040460628696, "grad_norm": 0.06930098682641983, "learning_rate": 0.0009803074580589874, "loss": 1.5372, "step": 8974 }, { "epoch": 0.39909297052154197, "grad_norm": 0.06866835802793503, "learning_rate": 0.0009802976438454173, "loss": 1.5429, "step": 8976 }, { "epoch": 0.39918189498021434, "grad_norm": 0.06998659670352936, "learning_rate": 0.0009802878272360423, "loss": 1.537, "step": 8978 }, { "epoch": 0.39927081943888665, "grad_norm": 0.07126189023256302, "learning_rate": 0.0009802780082309123, "loss": 1.5386, "step": 8980 }, { "epoch": 0.399359743897559, "grad_norm": 0.0688776820898056, "learning_rate": 0.0009802681868300758, "loss": 1.5404, "step": 8982 }, { "epoch": 0.3994486683562314, "grad_norm": 0.07415543496608734, "learning_rate": 0.0009802583630335818, "loss": 1.5461, "step": 8984 }, { "epoch": 0.39953759281490375, "grad_norm": 0.06990732252597809, "learning_rate": 0.0009802485368414792, "loss": 1.54, "step": 8986 }, { "epoch": 0.3996265172735761, "grad_norm": 0.06985145807266235, "learning_rate": 0.0009802387082538174, "loss": 1.5392, "step": 8988 }, { "epoch": 0.39971544173224843, "grad_norm": 0.07134158909320831, "learning_rate": 0.000980228877270645, "loss": 1.5432, "step": 8990 }, { "epoch": 0.3998043661909208, "grad_norm": 0.07347475737333298, "learning_rate": 0.0009802190438920116, "loss": 1.5434, "step": 8992 }, { "epoch": 0.39989329064959317, "grad_norm": 0.07028983533382416, "learning_rate": 0.0009802092081179657, "loss": 1.5385, "step": 8994 }, { "epoch": 0.39998221510826554, "grad_norm": 0.07134208083152771, "learning_rate": 0.0009801993699485566, "loss": 1.5446, "step": 8996 }, { "epoch": 0.4000711395669379, "grad_norm": 0.07063104212284088, "learning_rate": 0.000980189529383833, "loss": 1.5452, "step": 8998 }, { "epoch": 0.4001600640256102, "grad_norm": 0.06996823847293854, "learning_rate": 0.0009801796864238447, "loss": 1.5371, "step": 9000 }, { "epoch": 0.4001600640256102, "eval_loss": 1.51895010471344, "eval_runtime": 12.3629, "eval_samples_per_second": 558.929, "eval_steps_per_second": 69.886, "step": 9000 }, { "epoch": 0.4002489884842826, "grad_norm": 0.07135413587093353, "learning_rate": 0.0009801698410686404, "loss": 1.5427, "step": 9002 }, { "epoch": 0.40033791294295495, "grad_norm": 0.07339084148406982, "learning_rate": 0.0009801599933182692, "loss": 1.5352, "step": 9004 }, { "epoch": 0.4004268374016273, "grad_norm": 0.06984297931194305, "learning_rate": 0.00098015014317278, "loss": 1.5358, "step": 9006 }, { "epoch": 0.4005157618602997, "grad_norm": 0.07473129779100418, "learning_rate": 0.0009801402906322225, "loss": 1.536, "step": 9008 }, { "epoch": 0.40060468631897206, "grad_norm": 0.07112877070903778, "learning_rate": 0.0009801304356966455, "loss": 1.5474, "step": 9010 }, { "epoch": 0.40069361077764437, "grad_norm": 0.07204887270927429, "learning_rate": 0.000980120578366098, "loss": 1.5418, "step": 9012 }, { "epoch": 0.40078253523631674, "grad_norm": 0.06817831844091415, "learning_rate": 0.0009801107186406296, "loss": 1.531, "step": 9014 }, { "epoch": 0.4008714596949891, "grad_norm": 0.07230226695537567, "learning_rate": 0.000980100856520289, "loss": 1.5388, "step": 9016 }, { "epoch": 0.4009603841536615, "grad_norm": 0.07334619015455246, "learning_rate": 0.0009800909920051257, "loss": 1.5435, "step": 9018 }, { "epoch": 0.40104930861233384, "grad_norm": 0.06880249083042145, "learning_rate": 0.0009800811250951888, "loss": 1.5336, "step": 9020 }, { "epoch": 0.40113823307100616, "grad_norm": 0.07309377938508987, "learning_rate": 0.0009800712557905278, "loss": 1.5394, "step": 9022 }, { "epoch": 0.4012271575296785, "grad_norm": 0.07203828543424606, "learning_rate": 0.0009800613840911913, "loss": 1.5421, "step": 9024 }, { "epoch": 0.4013160819883509, "grad_norm": 0.07213805615901947, "learning_rate": 0.0009800515099972294, "loss": 1.5456, "step": 9026 }, { "epoch": 0.40140500644702326, "grad_norm": 0.07065927237272263, "learning_rate": 0.0009800416335086907, "loss": 1.5389, "step": 9028 }, { "epoch": 0.40149393090569563, "grad_norm": 0.0714295282959938, "learning_rate": 0.0009800317546256245, "loss": 1.5386, "step": 9030 }, { "epoch": 0.401582855364368, "grad_norm": 0.07336141169071198, "learning_rate": 0.0009800218733480802, "loss": 1.54, "step": 9032 }, { "epoch": 0.4016717798230403, "grad_norm": 0.0692136138677597, "learning_rate": 0.0009800119896761074, "loss": 1.5393, "step": 9034 }, { "epoch": 0.4017607042817127, "grad_norm": 0.07176699489355087, "learning_rate": 0.0009800021036097549, "loss": 1.5424, "step": 9036 }, { "epoch": 0.40184962874038505, "grad_norm": 0.06777803599834442, "learning_rate": 0.0009799922151490722, "loss": 1.5379, "step": 9038 }, { "epoch": 0.4019385531990574, "grad_norm": 0.07473397254943848, "learning_rate": 0.0009799823242941088, "loss": 1.54, "step": 9040 }, { "epoch": 0.4020274776577298, "grad_norm": 0.07176635414361954, "learning_rate": 0.000979972431044914, "loss": 1.5363, "step": 9042 }, { "epoch": 0.4021164021164021, "grad_norm": 0.0704076886177063, "learning_rate": 0.0009799625354015367, "loss": 1.5381, "step": 9044 }, { "epoch": 0.40220532657507446, "grad_norm": 0.07321424782276154, "learning_rate": 0.0009799526373640267, "loss": 1.5393, "step": 9046 }, { "epoch": 0.40229425103374683, "grad_norm": 0.0706743597984314, "learning_rate": 0.0009799427369324334, "loss": 1.5404, "step": 9048 }, { "epoch": 0.4023831754924192, "grad_norm": 0.07315555214881897, "learning_rate": 0.000979932834106806, "loss": 1.5373, "step": 9050 }, { "epoch": 0.40247209995109157, "grad_norm": 0.07187056541442871, "learning_rate": 0.000979922928887194, "loss": 1.5369, "step": 9052 }, { "epoch": 0.4025610244097639, "grad_norm": 0.06762688606977463, "learning_rate": 0.0009799130212736467, "loss": 1.5424, "step": 9054 }, { "epoch": 0.40264994886843625, "grad_norm": 0.07067506015300751, "learning_rate": 0.0009799031112662138, "loss": 1.5421, "step": 9056 }, { "epoch": 0.4027388733271086, "grad_norm": 0.07152686268091202, "learning_rate": 0.0009798931988649442, "loss": 1.5434, "step": 9058 }, { "epoch": 0.402827797785781, "grad_norm": 0.07224275916814804, "learning_rate": 0.0009798832840698878, "loss": 1.5396, "step": 9060 }, { "epoch": 0.40291672224445335, "grad_norm": 0.06946686655282974, "learning_rate": 0.000979873366881094, "loss": 1.5462, "step": 9062 }, { "epoch": 0.4030056467031257, "grad_norm": 0.07184547185897827, "learning_rate": 0.000979863447298612, "loss": 1.5358, "step": 9064 }, { "epoch": 0.40309457116179803, "grad_norm": 0.06754238903522491, "learning_rate": 0.0009798535253224916, "loss": 1.5369, "step": 9066 }, { "epoch": 0.4031834956204704, "grad_norm": 0.06945636868476868, "learning_rate": 0.0009798436009527823, "loss": 1.5443, "step": 9068 }, { "epoch": 0.40327242007914277, "grad_norm": 0.07276376336812973, "learning_rate": 0.0009798336741895332, "loss": 1.5391, "step": 9070 }, { "epoch": 0.40336134453781514, "grad_norm": 0.07267561554908752, "learning_rate": 0.0009798237450327942, "loss": 1.5445, "step": 9072 }, { "epoch": 0.4034502689964875, "grad_norm": 0.07162649929523468, "learning_rate": 0.000979813813482615, "loss": 1.5416, "step": 9074 }, { "epoch": 0.4035391934551598, "grad_norm": 0.06982056051492691, "learning_rate": 0.0009798038795390445, "loss": 1.5402, "step": 9076 }, { "epoch": 0.4036281179138322, "grad_norm": 0.06865835189819336, "learning_rate": 0.0009797939432021326, "loss": 1.5426, "step": 9078 }, { "epoch": 0.40371704237250455, "grad_norm": 0.07144743204116821, "learning_rate": 0.000979784004471929, "loss": 1.5405, "step": 9080 }, { "epoch": 0.4038059668311769, "grad_norm": 0.0699494332075119, "learning_rate": 0.000979774063348483, "loss": 1.5377, "step": 9082 }, { "epoch": 0.4038948912898493, "grad_norm": 0.06973431259393692, "learning_rate": 0.0009797641198318445, "loss": 1.5392, "step": 9084 }, { "epoch": 0.40398381574852166, "grad_norm": 0.07053232192993164, "learning_rate": 0.000979754173922063, "loss": 1.537, "step": 9086 }, { "epoch": 0.40407274020719397, "grad_norm": 0.06677805632352829, "learning_rate": 0.0009797442256191877, "loss": 1.5489, "step": 9088 }, { "epoch": 0.40416166466586634, "grad_norm": 0.07138942182064056, "learning_rate": 0.000979734274923269, "loss": 1.5407, "step": 9090 }, { "epoch": 0.4042505891245387, "grad_norm": 0.07154888659715652, "learning_rate": 0.0009797243218343558, "loss": 1.5373, "step": 9092 }, { "epoch": 0.4043395135832111, "grad_norm": 0.07199876755475998, "learning_rate": 0.000979714366352498, "loss": 1.5344, "step": 9094 }, { "epoch": 0.40442843804188344, "grad_norm": 0.07113402336835861, "learning_rate": 0.0009797044084777456, "loss": 1.54, "step": 9096 }, { "epoch": 0.40451736250055575, "grad_norm": 0.07240010052919388, "learning_rate": 0.0009796944482101477, "loss": 1.5366, "step": 9098 }, { "epoch": 0.4046062869592281, "grad_norm": 0.07278742641210556, "learning_rate": 0.0009796844855497545, "loss": 1.5375, "step": 9100 }, { "epoch": 0.4046952114179005, "grad_norm": 0.06928924471139908, "learning_rate": 0.0009796745204966152, "loss": 1.5358, "step": 9102 }, { "epoch": 0.40478413587657286, "grad_norm": 0.06925825029611588, "learning_rate": 0.0009796645530507802, "loss": 1.544, "step": 9104 }, { "epoch": 0.4048730603352452, "grad_norm": 0.07367236167192459, "learning_rate": 0.0009796545832122985, "loss": 1.5375, "step": 9106 }, { "epoch": 0.40496198479391754, "grad_norm": 0.07212896645069122, "learning_rate": 0.00097964461098122, "loss": 1.5415, "step": 9108 }, { "epoch": 0.4050509092525899, "grad_norm": 0.07600012421607971, "learning_rate": 0.0009796346363575947, "loss": 1.5381, "step": 9110 }, { "epoch": 0.4051398337112623, "grad_norm": 0.07662640511989594, "learning_rate": 0.0009796246593414724, "loss": 1.5339, "step": 9112 }, { "epoch": 0.40522875816993464, "grad_norm": 0.070067398250103, "learning_rate": 0.0009796146799329025, "loss": 1.5423, "step": 9114 }, { "epoch": 0.405317682628607, "grad_norm": 0.07174143195152283, "learning_rate": 0.0009796046981319349, "loss": 1.5416, "step": 9116 }, { "epoch": 0.4054066070872794, "grad_norm": 0.06986463814973831, "learning_rate": 0.0009795947139386195, "loss": 1.5367, "step": 9118 }, { "epoch": 0.4054955315459517, "grad_norm": 0.0711868554353714, "learning_rate": 0.000979584727353006, "loss": 1.5423, "step": 9120 }, { "epoch": 0.40558445600462406, "grad_norm": 0.07185269147157669, "learning_rate": 0.0009795747383751446, "loss": 1.5374, "step": 9122 }, { "epoch": 0.40567338046329643, "grad_norm": 0.07167672365903854, "learning_rate": 0.0009795647470050846, "loss": 1.5397, "step": 9124 }, { "epoch": 0.4057623049219688, "grad_norm": 0.07179324328899384, "learning_rate": 0.000979554753242876, "loss": 1.5418, "step": 9126 }, { "epoch": 0.40585122938064117, "grad_norm": 0.07255962491035461, "learning_rate": 0.0009795447570885686, "loss": 1.5377, "step": 9128 }, { "epoch": 0.4059401538393135, "grad_norm": 0.0690963864326477, "learning_rate": 0.0009795347585422123, "loss": 1.5444, "step": 9130 }, { "epoch": 0.40602907829798585, "grad_norm": 0.07536199688911438, "learning_rate": 0.0009795247576038573, "loss": 1.5363, "step": 9132 }, { "epoch": 0.4061180027566582, "grad_norm": 0.06956053525209427, "learning_rate": 0.000979514754273553, "loss": 1.541, "step": 9134 }, { "epoch": 0.4062069272153306, "grad_norm": 0.07050742208957672, "learning_rate": 0.0009795047485513498, "loss": 1.5427, "step": 9136 }, { "epoch": 0.40629585167400295, "grad_norm": 0.0698935016989708, "learning_rate": 0.000979494740437297, "loss": 1.5354, "step": 9138 }, { "epoch": 0.4063847761326753, "grad_norm": 0.0688849613070488, "learning_rate": 0.0009794847299314448, "loss": 1.5354, "step": 9140 }, { "epoch": 0.40647370059134763, "grad_norm": 0.0699022114276886, "learning_rate": 0.0009794747170338435, "loss": 1.5383, "step": 9142 }, { "epoch": 0.40656262505002, "grad_norm": 0.06858205050230026, "learning_rate": 0.0009794647017445425, "loss": 1.5436, "step": 9144 }, { "epoch": 0.40665154950869237, "grad_norm": 0.06846922636032104, "learning_rate": 0.000979454684063592, "loss": 1.5442, "step": 9146 }, { "epoch": 0.40674047396736474, "grad_norm": 0.07031798362731934, "learning_rate": 0.000979444663991042, "loss": 1.5362, "step": 9148 }, { "epoch": 0.4068293984260371, "grad_norm": 0.07397054880857468, "learning_rate": 0.0009794346415269424, "loss": 1.5395, "step": 9150 }, { "epoch": 0.4069183228847094, "grad_norm": 0.0680195763707161, "learning_rate": 0.0009794246166713433, "loss": 1.5375, "step": 9152 }, { "epoch": 0.4070072473433818, "grad_norm": 0.06949716061353683, "learning_rate": 0.0009794145894242946, "loss": 1.5439, "step": 9154 }, { "epoch": 0.40709617180205415, "grad_norm": 0.07266386598348618, "learning_rate": 0.0009794045597858463, "loss": 1.5396, "step": 9156 }, { "epoch": 0.4071850962607265, "grad_norm": 0.07391185313463211, "learning_rate": 0.0009793945277560485, "loss": 1.5412, "step": 9158 }, { "epoch": 0.4072740207193989, "grad_norm": 0.07154107093811035, "learning_rate": 0.0009793844933349513, "loss": 1.5426, "step": 9160 }, { "epoch": 0.4073629451780712, "grad_norm": 0.06954929977655411, "learning_rate": 0.0009793744565226045, "loss": 1.5383, "step": 9162 }, { "epoch": 0.40745186963674357, "grad_norm": 0.06934426724910736, "learning_rate": 0.0009793644173190584, "loss": 1.5397, "step": 9164 }, { "epoch": 0.40754079409541594, "grad_norm": 0.07320673018693924, "learning_rate": 0.000979354375724363, "loss": 1.5324, "step": 9166 }, { "epoch": 0.4076297185540883, "grad_norm": 0.07140939682722092, "learning_rate": 0.0009793443317385685, "loss": 1.5336, "step": 9168 }, { "epoch": 0.4077186430127607, "grad_norm": 0.07228023558855057, "learning_rate": 0.0009793342853617248, "loss": 1.5398, "step": 9170 }, { "epoch": 0.40780756747143304, "grad_norm": 0.07187279313802719, "learning_rate": 0.0009793242365938822, "loss": 1.5452, "step": 9172 }, { "epoch": 0.40789649193010535, "grad_norm": 0.06871677190065384, "learning_rate": 0.000979314185435091, "loss": 1.5341, "step": 9174 }, { "epoch": 0.4079854163887777, "grad_norm": 0.06969435513019562, "learning_rate": 0.0009793041318854007, "loss": 1.5386, "step": 9176 }, { "epoch": 0.4080743408474501, "grad_norm": 0.07604380697011948, "learning_rate": 0.0009792940759448619, "loss": 1.5462, "step": 9178 }, { "epoch": 0.40816326530612246, "grad_norm": 0.06866500526666641, "learning_rate": 0.0009792840176135248, "loss": 1.5392, "step": 9180 }, { "epoch": 0.4082521897647948, "grad_norm": 0.06986360996961594, "learning_rate": 0.0009792739568914393, "loss": 1.5381, "step": 9182 }, { "epoch": 0.40834111422346714, "grad_norm": 0.06875632703304291, "learning_rate": 0.0009792638937786559, "loss": 1.5407, "step": 9184 }, { "epoch": 0.4084300386821395, "grad_norm": 0.06942211091518402, "learning_rate": 0.0009792538282752245, "loss": 1.5391, "step": 9186 }, { "epoch": 0.4085189631408119, "grad_norm": 0.06980740278959274, "learning_rate": 0.0009792437603811954, "loss": 1.5366, "step": 9188 }, { "epoch": 0.40860788759948424, "grad_norm": 0.0708228349685669, "learning_rate": 0.000979233690096619, "loss": 1.5413, "step": 9190 }, { "epoch": 0.4086968120581566, "grad_norm": 0.07046905905008316, "learning_rate": 0.0009792236174215455, "loss": 1.5366, "step": 9192 }, { "epoch": 0.408785736516829, "grad_norm": 0.0711686909198761, "learning_rate": 0.000979213542356025, "loss": 1.5439, "step": 9194 }, { "epoch": 0.4088746609755013, "grad_norm": 0.06964288651943207, "learning_rate": 0.0009792034649001079, "loss": 1.5353, "step": 9196 }, { "epoch": 0.40896358543417366, "grad_norm": 0.07251102477312088, "learning_rate": 0.0009791933850538442, "loss": 1.5396, "step": 9198 }, { "epoch": 0.40905250989284603, "grad_norm": 0.07063327729701996, "learning_rate": 0.0009791833028172843, "loss": 1.5388, "step": 9200 }, { "epoch": 0.4091414343515184, "grad_norm": 0.07538381218910217, "learning_rate": 0.0009791732181904788, "loss": 1.5417, "step": 9202 }, { "epoch": 0.40923035881019076, "grad_norm": 0.07273292541503906, "learning_rate": 0.0009791631311734774, "loss": 1.5364, "step": 9204 }, { "epoch": 0.4093192832688631, "grad_norm": 0.07258638739585876, "learning_rate": 0.0009791530417663308, "loss": 1.5409, "step": 9206 }, { "epoch": 0.40940820772753544, "grad_norm": 0.07772589474916458, "learning_rate": 0.0009791429499690896, "loss": 1.5398, "step": 9208 }, { "epoch": 0.4094971321862078, "grad_norm": 0.07376066595315933, "learning_rate": 0.0009791328557818035, "loss": 1.5427, "step": 9210 }, { "epoch": 0.4095860566448802, "grad_norm": 0.07310586422681808, "learning_rate": 0.0009791227592045235, "loss": 1.5431, "step": 9212 }, { "epoch": 0.40967498110355255, "grad_norm": 0.07371335476636887, "learning_rate": 0.0009791126602372996, "loss": 1.5384, "step": 9214 }, { "epoch": 0.4097639055622249, "grad_norm": 0.0725250244140625, "learning_rate": 0.000979102558880182, "loss": 1.5362, "step": 9216 }, { "epoch": 0.40985283002089723, "grad_norm": 0.07021638751029968, "learning_rate": 0.0009790924551332215, "loss": 1.5344, "step": 9218 }, { "epoch": 0.4099417544795696, "grad_norm": 0.06901945173740387, "learning_rate": 0.0009790823489964683, "loss": 1.5343, "step": 9220 }, { "epoch": 0.41003067893824197, "grad_norm": 0.06901168823242188, "learning_rate": 0.0009790722404699726, "loss": 1.5369, "step": 9222 }, { "epoch": 0.41011960339691433, "grad_norm": 0.06912058591842651, "learning_rate": 0.0009790621295537852, "loss": 1.5356, "step": 9224 }, { "epoch": 0.4102085278555867, "grad_norm": 0.07027976959943771, "learning_rate": 0.0009790520162479563, "loss": 1.5343, "step": 9226 }, { "epoch": 0.410297452314259, "grad_norm": 0.07126928120851517, "learning_rate": 0.0009790419005525366, "loss": 1.5394, "step": 9228 }, { "epoch": 0.4103863767729314, "grad_norm": 0.0708712786436081, "learning_rate": 0.0009790317824675762, "loss": 1.5438, "step": 9230 }, { "epoch": 0.41047530123160375, "grad_norm": 0.07011223584413528, "learning_rate": 0.000979021661993126, "loss": 1.5359, "step": 9232 }, { "epoch": 0.4105642256902761, "grad_norm": 0.07202787697315216, "learning_rate": 0.000979011539129236, "loss": 1.5422, "step": 9234 }, { "epoch": 0.4106531501489485, "grad_norm": 0.06934972107410431, "learning_rate": 0.0009790014138759571, "loss": 1.5399, "step": 9236 }, { "epoch": 0.4107420746076208, "grad_norm": 0.07186301797628403, "learning_rate": 0.0009789912862333394, "loss": 1.5426, "step": 9238 }, { "epoch": 0.41083099906629317, "grad_norm": 0.07302137464284897, "learning_rate": 0.0009789811562014338, "loss": 1.5408, "step": 9240 }, { "epoch": 0.41091992352496554, "grad_norm": 0.07044612616300583, "learning_rate": 0.0009789710237802908, "loss": 1.545, "step": 9242 }, { "epoch": 0.4110088479836379, "grad_norm": 0.07422881573438644, "learning_rate": 0.0009789608889699607, "loss": 1.5366, "step": 9244 }, { "epoch": 0.41109777244231027, "grad_norm": 0.07070881128311157, "learning_rate": 0.0009789507517704943, "loss": 1.5344, "step": 9246 }, { "epoch": 0.41118669690098264, "grad_norm": 0.07254374772310257, "learning_rate": 0.0009789406121819418, "loss": 1.5384, "step": 9248 }, { "epoch": 0.41127562135965495, "grad_norm": 0.07279791682958603, "learning_rate": 0.0009789304702043542, "loss": 1.5434, "step": 9250 }, { "epoch": 0.4113645458183273, "grad_norm": 0.07429590076208115, "learning_rate": 0.000978920325837782, "loss": 1.5449, "step": 9252 }, { "epoch": 0.4114534702769997, "grad_norm": 0.07568711042404175, "learning_rate": 0.0009789101790822756, "loss": 1.541, "step": 9254 }, { "epoch": 0.41154239473567206, "grad_norm": 0.07262945920228958, "learning_rate": 0.0009789000299378857, "loss": 1.5372, "step": 9256 }, { "epoch": 0.4116313191943444, "grad_norm": 0.07350386679172516, "learning_rate": 0.000978889878404663, "loss": 1.5409, "step": 9258 }, { "epoch": 0.41172024365301674, "grad_norm": 0.0701087936758995, "learning_rate": 0.000978879724482658, "loss": 1.5446, "step": 9260 }, { "epoch": 0.4118091681116891, "grad_norm": 0.06851620972156525, "learning_rate": 0.0009788695681719217, "loss": 1.5366, "step": 9262 }, { "epoch": 0.4118980925703615, "grad_norm": 0.06817908585071564, "learning_rate": 0.0009788594094725042, "loss": 1.5356, "step": 9264 }, { "epoch": 0.41198701702903384, "grad_norm": 0.06984448432922363, "learning_rate": 0.0009788492483844566, "loss": 1.5404, "step": 9266 }, { "epoch": 0.4120759414877062, "grad_norm": 0.07095382362604141, "learning_rate": 0.0009788390849078295, "loss": 1.5423, "step": 9268 }, { "epoch": 0.4121648659463786, "grad_norm": 0.06866953521966934, "learning_rate": 0.0009788289190426735, "loss": 1.5337, "step": 9270 }, { "epoch": 0.4122537904050509, "grad_norm": 0.06914438307285309, "learning_rate": 0.0009788187507890395, "loss": 1.538, "step": 9272 }, { "epoch": 0.41234271486372326, "grad_norm": 0.06855052709579468, "learning_rate": 0.0009788085801469778, "loss": 1.5351, "step": 9274 }, { "epoch": 0.4124316393223956, "grad_norm": 0.0681350976228714, "learning_rate": 0.0009787984071165398, "loss": 1.5395, "step": 9276 }, { "epoch": 0.412520563781068, "grad_norm": 0.07035652548074722, "learning_rate": 0.0009787882316977757, "loss": 1.5417, "step": 9278 }, { "epoch": 0.41260948823974036, "grad_norm": 0.06886482238769531, "learning_rate": 0.0009787780538907365, "loss": 1.537, "step": 9280 }, { "epoch": 0.4126984126984127, "grad_norm": 0.07588198781013489, "learning_rate": 0.000978767873695473, "loss": 1.5403, "step": 9282 }, { "epoch": 0.41278733715708504, "grad_norm": 0.069828562438488, "learning_rate": 0.0009787576911120355, "loss": 1.544, "step": 9284 }, { "epoch": 0.4128762616157574, "grad_norm": 0.07009178400039673, "learning_rate": 0.0009787475061404753, "loss": 1.5365, "step": 9286 }, { "epoch": 0.4129651860744298, "grad_norm": 0.06862553209066391, "learning_rate": 0.0009787373187808432, "loss": 1.538, "step": 9288 }, { "epoch": 0.41305411053310215, "grad_norm": 0.07022769749164581, "learning_rate": 0.00097872712903319, "loss": 1.543, "step": 9290 }, { "epoch": 0.41314303499177446, "grad_norm": 0.07054140418767929, "learning_rate": 0.000978716936897566, "loss": 1.5383, "step": 9292 }, { "epoch": 0.41323195945044683, "grad_norm": 0.07116847485303879, "learning_rate": 0.0009787067423740227, "loss": 1.5386, "step": 9294 }, { "epoch": 0.4133208839091192, "grad_norm": 0.06990710645914078, "learning_rate": 0.0009786965454626107, "loss": 1.5343, "step": 9296 }, { "epoch": 0.41340980836779156, "grad_norm": 0.06872306019067764, "learning_rate": 0.0009786863461633808, "loss": 1.5424, "step": 9298 }, { "epoch": 0.41349873282646393, "grad_norm": 0.06872355192899704, "learning_rate": 0.0009786761444763841, "loss": 1.541, "step": 9300 }, { "epoch": 0.4135876572851363, "grad_norm": 0.07212715595960617, "learning_rate": 0.0009786659404016711, "loss": 1.5376, "step": 9302 }, { "epoch": 0.4136765817438086, "grad_norm": 0.06772853434085846, "learning_rate": 0.0009786557339392932, "loss": 1.5369, "step": 9304 }, { "epoch": 0.413765506202481, "grad_norm": 0.06942387670278549, "learning_rate": 0.000978645525089301, "loss": 1.5344, "step": 9306 }, { "epoch": 0.41385443066115335, "grad_norm": 0.07053601741790771, "learning_rate": 0.0009786353138517454, "loss": 1.5401, "step": 9308 }, { "epoch": 0.4139433551198257, "grad_norm": 0.07274100184440613, "learning_rate": 0.0009786251002266773, "loss": 1.5324, "step": 9310 }, { "epoch": 0.4140322795784981, "grad_norm": 0.07127835601568222, "learning_rate": 0.000978614884214148, "loss": 1.5427, "step": 9312 }, { "epoch": 0.4141212040371704, "grad_norm": 0.0735345184803009, "learning_rate": 0.0009786046658142081, "loss": 1.5359, "step": 9314 }, { "epoch": 0.41421012849584277, "grad_norm": 0.07226788252592087, "learning_rate": 0.0009785944450269087, "loss": 1.5428, "step": 9316 }, { "epoch": 0.41429905295451513, "grad_norm": 0.0691119134426117, "learning_rate": 0.0009785842218523006, "loss": 1.5388, "step": 9318 }, { "epoch": 0.4143879774131875, "grad_norm": 0.06924230605363846, "learning_rate": 0.000978573996290435, "loss": 1.5377, "step": 9320 }, { "epoch": 0.41447690187185987, "grad_norm": 0.07052425295114517, "learning_rate": 0.000978563768341363, "loss": 1.5378, "step": 9322 }, { "epoch": 0.41456582633053224, "grad_norm": 0.07418438792228699, "learning_rate": 0.0009785535380051355, "loss": 1.5331, "step": 9324 }, { "epoch": 0.41465475078920455, "grad_norm": 0.07372714579105377, "learning_rate": 0.0009785433052818034, "loss": 1.5363, "step": 9326 }, { "epoch": 0.4147436752478769, "grad_norm": 0.07146646827459335, "learning_rate": 0.000978533070171418, "loss": 1.5329, "step": 9328 }, { "epoch": 0.4148325997065493, "grad_norm": 0.07278136909008026, "learning_rate": 0.00097852283267403, "loss": 1.5412, "step": 9330 }, { "epoch": 0.41492152416522166, "grad_norm": 0.07172350585460663, "learning_rate": 0.0009785125927896908, "loss": 1.5373, "step": 9332 }, { "epoch": 0.415010448623894, "grad_norm": 0.07292774319648743, "learning_rate": 0.0009785023505184513, "loss": 1.5368, "step": 9334 }, { "epoch": 0.41509937308256634, "grad_norm": 0.07089949399232864, "learning_rate": 0.0009784921058603629, "loss": 1.5319, "step": 9336 }, { "epoch": 0.4151882975412387, "grad_norm": 0.0746246799826622, "learning_rate": 0.0009784818588154762, "loss": 1.541, "step": 9338 }, { "epoch": 0.4152772219999111, "grad_norm": 0.07343286275863647, "learning_rate": 0.0009784716093838425, "loss": 1.5374, "step": 9340 }, { "epoch": 0.41536614645858344, "grad_norm": 0.06831394135951996, "learning_rate": 0.0009784613575655131, "loss": 1.532, "step": 9342 }, { "epoch": 0.4154550709172558, "grad_norm": 0.07356590032577515, "learning_rate": 0.000978451103360539, "loss": 1.5349, "step": 9344 }, { "epoch": 0.4155439953759281, "grad_norm": 0.07031702995300293, "learning_rate": 0.0009784408467689717, "loss": 1.5351, "step": 9346 }, { "epoch": 0.4156329198346005, "grad_norm": 0.07025104761123657, "learning_rate": 0.0009784305877908615, "loss": 1.5372, "step": 9348 }, { "epoch": 0.41572184429327286, "grad_norm": 0.06900499761104584, "learning_rate": 0.0009784203264262604, "loss": 1.5366, "step": 9350 }, { "epoch": 0.4158107687519452, "grad_norm": 0.06960804015398026, "learning_rate": 0.0009784100626752193, "loss": 1.5422, "step": 9352 }, { "epoch": 0.4158996932106176, "grad_norm": 0.07161404937505722, "learning_rate": 0.0009783997965377893, "loss": 1.5398, "step": 9354 }, { "epoch": 0.41598861766928996, "grad_norm": 0.07421243190765381, "learning_rate": 0.000978389528014022, "loss": 1.5351, "step": 9356 }, { "epoch": 0.4160775421279623, "grad_norm": 0.07687795907258987, "learning_rate": 0.000978379257103968, "loss": 1.5411, "step": 9358 }, { "epoch": 0.41616646658663464, "grad_norm": 0.07376468926668167, "learning_rate": 0.0009783689838076788, "loss": 1.5342, "step": 9360 }, { "epoch": 0.416255391045307, "grad_norm": 0.07067352533340454, "learning_rate": 0.000978358708125206, "loss": 1.5381, "step": 9362 }, { "epoch": 0.4163443155039794, "grad_norm": 0.06940723210573196, "learning_rate": 0.0009783484300566003, "loss": 1.541, "step": 9364 }, { "epoch": 0.41643323996265175, "grad_norm": 0.06866216659545898, "learning_rate": 0.0009783381496019136, "loss": 1.5412, "step": 9366 }, { "epoch": 0.41652216442132406, "grad_norm": 0.06930825859308243, "learning_rate": 0.0009783278667611964, "loss": 1.5347, "step": 9368 }, { "epoch": 0.4166110888799964, "grad_norm": 0.06734399497509003, "learning_rate": 0.0009783175815345005, "loss": 1.537, "step": 9370 }, { "epoch": 0.4167000133386688, "grad_norm": 0.06903259456157684, "learning_rate": 0.0009783072939218772, "loss": 1.5386, "step": 9372 }, { "epoch": 0.41678893779734116, "grad_norm": 0.0670986995100975, "learning_rate": 0.0009782970039233776, "loss": 1.5329, "step": 9374 }, { "epoch": 0.41687786225601353, "grad_norm": 0.07043662667274475, "learning_rate": 0.0009782867115390532, "loss": 1.5364, "step": 9376 }, { "epoch": 0.4169667867146859, "grad_norm": 0.06715195626020432, "learning_rate": 0.0009782764167689554, "loss": 1.5325, "step": 9378 }, { "epoch": 0.4170557111733582, "grad_norm": 0.06987708806991577, "learning_rate": 0.0009782661196131354, "loss": 1.5398, "step": 9380 }, { "epoch": 0.4171446356320306, "grad_norm": 0.06837180256843567, "learning_rate": 0.0009782558200716446, "loss": 1.5339, "step": 9382 }, { "epoch": 0.41723356009070295, "grad_norm": 0.0699625238776207, "learning_rate": 0.0009782455181445344, "loss": 1.5387, "step": 9384 }, { "epoch": 0.4173224845493753, "grad_norm": 0.07226444780826569, "learning_rate": 0.0009782352138318561, "loss": 1.5419, "step": 9386 }, { "epoch": 0.4174114090080477, "grad_norm": 0.06939677894115448, "learning_rate": 0.0009782249071336611, "loss": 1.5394, "step": 9388 }, { "epoch": 0.41750033346672, "grad_norm": 0.06957529485225677, "learning_rate": 0.000978214598050001, "loss": 1.5342, "step": 9390 }, { "epoch": 0.41758925792539237, "grad_norm": 0.06821033358573914, "learning_rate": 0.000978204286580927, "loss": 1.535, "step": 9392 }, { "epoch": 0.41767818238406473, "grad_norm": 0.06692170351743698, "learning_rate": 0.000978193972726491, "loss": 1.5296, "step": 9394 }, { "epoch": 0.4177671068427371, "grad_norm": 0.06882653385400772, "learning_rate": 0.0009781836564867437, "loss": 1.5383, "step": 9396 }, { "epoch": 0.41785603130140947, "grad_norm": 0.07427268475294113, "learning_rate": 0.000978173337861737, "loss": 1.5384, "step": 9398 }, { "epoch": 0.41794495576008184, "grad_norm": 0.07215935736894608, "learning_rate": 0.0009781630168515223, "loss": 1.5401, "step": 9400 }, { "epoch": 0.41803388021875415, "grad_norm": 0.07148215919733047, "learning_rate": 0.0009781526934561514, "loss": 1.5448, "step": 9402 }, { "epoch": 0.4181228046774265, "grad_norm": 0.06973208487033844, "learning_rate": 0.0009781423676756753, "loss": 1.5379, "step": 9404 }, { "epoch": 0.4182117291360989, "grad_norm": 0.07168926298618317, "learning_rate": 0.0009781320395101456, "loss": 1.5365, "step": 9406 }, { "epoch": 0.41830065359477125, "grad_norm": 0.0705469623208046, "learning_rate": 0.0009781217089596139, "loss": 1.5359, "step": 9408 }, { "epoch": 0.4183895780534436, "grad_norm": 0.06955131143331528, "learning_rate": 0.000978111376024132, "loss": 1.5371, "step": 9410 }, { "epoch": 0.41847850251211594, "grad_norm": 0.07158604264259338, "learning_rate": 0.000978101040703751, "loss": 1.54, "step": 9412 }, { "epoch": 0.4185674269707883, "grad_norm": 0.07020861655473709, "learning_rate": 0.0009780907029985227, "loss": 1.5381, "step": 9414 }, { "epoch": 0.41865635142946067, "grad_norm": 0.07165636867284775, "learning_rate": 0.0009780803629084987, "loss": 1.5382, "step": 9416 }, { "epoch": 0.41874527588813304, "grad_norm": 0.07206343859434128, "learning_rate": 0.0009780700204337304, "loss": 1.5348, "step": 9418 }, { "epoch": 0.4188342003468054, "grad_norm": 0.06763043999671936, "learning_rate": 0.0009780596755742694, "loss": 1.5375, "step": 9420 }, { "epoch": 0.4189231248054777, "grad_norm": 0.07210266590118408, "learning_rate": 0.0009780493283301675, "loss": 1.5403, "step": 9422 }, { "epoch": 0.4190120492641501, "grad_norm": 0.07414006441831589, "learning_rate": 0.000978038978701476, "loss": 1.537, "step": 9424 }, { "epoch": 0.41910097372282246, "grad_norm": 0.07020355015993118, "learning_rate": 0.000978028626688247, "loss": 1.5321, "step": 9426 }, { "epoch": 0.4191898981814948, "grad_norm": 0.07178431004285812, "learning_rate": 0.0009780182722905317, "loss": 1.5328, "step": 9428 }, { "epoch": 0.4192788226401672, "grad_norm": 0.07044417411088943, "learning_rate": 0.0009780079155083821, "loss": 1.5356, "step": 9430 }, { "epoch": 0.41936774709883956, "grad_norm": 0.07170826941728592, "learning_rate": 0.0009779975563418495, "loss": 1.5315, "step": 9432 }, { "epoch": 0.4194566715575119, "grad_norm": 0.07005254924297333, "learning_rate": 0.0009779871947909857, "loss": 1.5367, "step": 9434 }, { "epoch": 0.41954559601618424, "grad_norm": 0.06897489726543427, "learning_rate": 0.0009779768308558427, "loss": 1.5291, "step": 9436 }, { "epoch": 0.4196345204748566, "grad_norm": 0.07095732539892197, "learning_rate": 0.0009779664645364716, "loss": 1.5301, "step": 9438 }, { "epoch": 0.419723444933529, "grad_norm": 0.07314879447221756, "learning_rate": 0.0009779560958329246, "loss": 1.5323, "step": 9440 }, { "epoch": 0.41981236939220135, "grad_norm": 0.07073177397251129, "learning_rate": 0.0009779457247452532, "loss": 1.5375, "step": 9442 }, { "epoch": 0.41990129385087366, "grad_norm": 0.0689346119761467, "learning_rate": 0.0009779353512735093, "loss": 1.5345, "step": 9444 }, { "epoch": 0.419990218309546, "grad_norm": 0.06825485080480576, "learning_rate": 0.0009779249754177444, "loss": 1.5362, "step": 9446 }, { "epoch": 0.4200791427682184, "grad_norm": 0.07014141231775284, "learning_rate": 0.0009779145971780103, "loss": 1.5383, "step": 9448 }, { "epoch": 0.42016806722689076, "grad_norm": 0.06948299705982208, "learning_rate": 0.0009779042165543592, "loss": 1.5405, "step": 9450 }, { "epoch": 0.42025699168556313, "grad_norm": 0.06821645051240921, "learning_rate": 0.0009778938335468423, "loss": 1.5361, "step": 9452 }, { "epoch": 0.4203459161442355, "grad_norm": 0.0685497298836708, "learning_rate": 0.0009778834481555118, "loss": 1.536, "step": 9454 }, { "epoch": 0.4204348406029078, "grad_norm": 0.07217854261398315, "learning_rate": 0.0009778730603804192, "loss": 1.5344, "step": 9456 }, { "epoch": 0.4205237650615802, "grad_norm": 0.06981514394283295, "learning_rate": 0.0009778626702216164, "loss": 1.5345, "step": 9458 }, { "epoch": 0.42061268952025255, "grad_norm": 0.07042837888002396, "learning_rate": 0.0009778522776791553, "loss": 1.5362, "step": 9460 }, { "epoch": 0.4207016139789249, "grad_norm": 0.07101742923259735, "learning_rate": 0.0009778418827530878, "loss": 1.5426, "step": 9462 }, { "epoch": 0.4207905384375973, "grad_norm": 0.06968886405229568, "learning_rate": 0.0009778314854434656, "loss": 1.5348, "step": 9464 }, { "epoch": 0.4208794628962696, "grad_norm": 0.06759744137525558, "learning_rate": 0.0009778210857503407, "loss": 1.5297, "step": 9466 }, { "epoch": 0.42096838735494196, "grad_norm": 0.0696602612733841, "learning_rate": 0.0009778106836737647, "loss": 1.5383, "step": 9468 }, { "epoch": 0.42105731181361433, "grad_norm": 0.06992914527654648, "learning_rate": 0.00097780027921379, "loss": 1.5319, "step": 9470 }, { "epoch": 0.4211462362722867, "grad_norm": 0.07088860124349594, "learning_rate": 0.0009777898723704681, "loss": 1.54, "step": 9472 }, { "epoch": 0.42123516073095907, "grad_norm": 0.0724070593714714, "learning_rate": 0.000977779463143851, "loss": 1.5378, "step": 9474 }, { "epoch": 0.4213240851896314, "grad_norm": 0.07188228517770767, "learning_rate": 0.0009777690515339905, "loss": 1.5392, "step": 9476 }, { "epoch": 0.42141300964830375, "grad_norm": 0.06922028958797455, "learning_rate": 0.0009777586375409389, "loss": 1.527, "step": 9478 }, { "epoch": 0.4215019341069761, "grad_norm": 0.07125406712293625, "learning_rate": 0.0009777482211647476, "loss": 1.5363, "step": 9480 }, { "epoch": 0.4215908585656485, "grad_norm": 0.06816605478525162, "learning_rate": 0.0009777378024054693, "loss": 1.5298, "step": 9482 }, { "epoch": 0.42167978302432085, "grad_norm": 0.06775356084108353, "learning_rate": 0.0009777273812631552, "loss": 1.5418, "step": 9484 }, { "epoch": 0.4217687074829932, "grad_norm": 0.07036928087472916, "learning_rate": 0.0009777169577378578, "loss": 1.537, "step": 9486 }, { "epoch": 0.42185763194166553, "grad_norm": 0.07150757312774658, "learning_rate": 0.0009777065318296288, "loss": 1.5352, "step": 9488 }, { "epoch": 0.4219465564003379, "grad_norm": 0.06876358389854431, "learning_rate": 0.0009776961035385203, "loss": 1.5388, "step": 9490 }, { "epoch": 0.42203548085901027, "grad_norm": 0.07003464549779892, "learning_rate": 0.0009776856728645844, "loss": 1.537, "step": 9492 }, { "epoch": 0.42212440531768264, "grad_norm": 0.070342518389225, "learning_rate": 0.0009776752398078731, "loss": 1.5322, "step": 9494 }, { "epoch": 0.422213329776355, "grad_norm": 0.06741419434547424, "learning_rate": 0.0009776648043684384, "loss": 1.5398, "step": 9496 }, { "epoch": 0.4223022542350273, "grad_norm": 0.07096916437149048, "learning_rate": 0.0009776543665463325, "loss": 1.5313, "step": 9498 }, { "epoch": 0.4223911786936997, "grad_norm": 0.06838645040988922, "learning_rate": 0.000977643926341607, "loss": 1.5372, "step": 9500 }, { "epoch": 0.4223911786936997, "eval_loss": 1.5147486925125122, "eval_runtime": 12.692, "eval_samples_per_second": 544.436, "eval_steps_per_second": 68.074, "step": 9500 }, { "epoch": 0.42248010315237206, "grad_norm": 0.07152796536684036, "learning_rate": 0.0009776334837543147, "loss": 1.5332, "step": 9502 }, { "epoch": 0.4225690276110444, "grad_norm": 0.0700407326221466, "learning_rate": 0.000977623038784507, "loss": 1.5358, "step": 9504 }, { "epoch": 0.4226579520697168, "grad_norm": 0.06791914999485016, "learning_rate": 0.0009776125914322364, "loss": 1.5323, "step": 9506 }, { "epoch": 0.42274687652838916, "grad_norm": 0.07237772643566132, "learning_rate": 0.0009776021416975549, "loss": 1.5339, "step": 9508 }, { "epoch": 0.42283580098706147, "grad_norm": 0.07078398764133453, "learning_rate": 0.0009775916895805145, "loss": 1.5327, "step": 9510 }, { "epoch": 0.42292472544573384, "grad_norm": 0.07085014879703522, "learning_rate": 0.0009775812350811678, "loss": 1.5297, "step": 9512 }, { "epoch": 0.4230136499044062, "grad_norm": 0.07237362116575241, "learning_rate": 0.0009775707781995665, "loss": 1.5296, "step": 9514 }, { "epoch": 0.4231025743630786, "grad_norm": 0.06783927977085114, "learning_rate": 0.0009775603189357627, "loss": 1.5367, "step": 9516 }, { "epoch": 0.42319149882175094, "grad_norm": 0.06836774200201035, "learning_rate": 0.0009775498572898089, "loss": 1.5336, "step": 9518 }, { "epoch": 0.42328042328042326, "grad_norm": 0.0725974515080452, "learning_rate": 0.0009775393932617573, "loss": 1.5319, "step": 9520 }, { "epoch": 0.4233693477390956, "grad_norm": 0.07181134074926376, "learning_rate": 0.0009775289268516597, "loss": 1.5352, "step": 9522 }, { "epoch": 0.423458272197768, "grad_norm": 0.07021788507699966, "learning_rate": 0.0009775184580595687, "loss": 1.5393, "step": 9524 }, { "epoch": 0.42354719665644036, "grad_norm": 0.06836768984794617, "learning_rate": 0.0009775079868855363, "loss": 1.5354, "step": 9526 }, { "epoch": 0.42363612111511273, "grad_norm": 0.06834634393453598, "learning_rate": 0.0009774975133296148, "loss": 1.5393, "step": 9528 }, { "epoch": 0.42372504557378504, "grad_norm": 0.0703490823507309, "learning_rate": 0.0009774870373918565, "loss": 1.5319, "step": 9530 }, { "epoch": 0.4238139700324574, "grad_norm": 0.06841664761304855, "learning_rate": 0.0009774765590723133, "loss": 1.5413, "step": 9532 }, { "epoch": 0.4239028944911298, "grad_norm": 0.06665299087762833, "learning_rate": 0.0009774660783710381, "loss": 1.5359, "step": 9534 }, { "epoch": 0.42399181894980215, "grad_norm": 0.06913217157125473, "learning_rate": 0.0009774555952880828, "loss": 1.5409, "step": 9536 }, { "epoch": 0.4240807434084745, "grad_norm": 0.06786402314901352, "learning_rate": 0.0009774451098234999, "loss": 1.5291, "step": 9538 }, { "epoch": 0.4241696678671469, "grad_norm": 0.06822808086872101, "learning_rate": 0.000977434621977341, "loss": 1.5398, "step": 9540 }, { "epoch": 0.4242585923258192, "grad_norm": 0.06818651407957077, "learning_rate": 0.0009774241317496593, "loss": 1.5413, "step": 9542 }, { "epoch": 0.42434751678449156, "grad_norm": 0.06693682074546814, "learning_rate": 0.000977413639140507, "loss": 1.5302, "step": 9544 }, { "epoch": 0.42443644124316393, "grad_norm": 0.06880820542573929, "learning_rate": 0.0009774031441499359, "loss": 1.5323, "step": 9546 }, { "epoch": 0.4245253657018363, "grad_norm": 0.0655490830540657, "learning_rate": 0.0009773926467779987, "loss": 1.5412, "step": 9548 }, { "epoch": 0.42461429016050867, "grad_norm": 0.06712329387664795, "learning_rate": 0.0009773821470247478, "loss": 1.5346, "step": 9550 }, { "epoch": 0.424703214619181, "grad_norm": 0.06615650653839111, "learning_rate": 0.0009773716448902355, "loss": 1.5341, "step": 9552 }, { "epoch": 0.42479213907785335, "grad_norm": 0.06936550885438919, "learning_rate": 0.0009773611403745143, "loss": 1.5341, "step": 9554 }, { "epoch": 0.4248810635365257, "grad_norm": 0.06751050800085068, "learning_rate": 0.0009773506334776363, "loss": 1.5379, "step": 9556 }, { "epoch": 0.4249699879951981, "grad_norm": 0.0678553357720375, "learning_rate": 0.0009773401241996542, "loss": 1.5381, "step": 9558 }, { "epoch": 0.42505891245387045, "grad_norm": 0.06892528384923935, "learning_rate": 0.0009773296125406203, "loss": 1.5373, "step": 9560 }, { "epoch": 0.4251478369125428, "grad_norm": 0.06836491823196411, "learning_rate": 0.0009773190985005872, "loss": 1.5374, "step": 9562 }, { "epoch": 0.42523676137121513, "grad_norm": 0.07044228166341782, "learning_rate": 0.000977308582079607, "loss": 1.5313, "step": 9564 }, { "epoch": 0.4253256858298875, "grad_norm": 0.06915424019098282, "learning_rate": 0.0009772980632777324, "loss": 1.5336, "step": 9566 }, { "epoch": 0.42541461028855987, "grad_norm": 0.0704522505402565, "learning_rate": 0.0009772875420950159, "loss": 1.5408, "step": 9568 }, { "epoch": 0.42550353474723224, "grad_norm": 0.06899117678403854, "learning_rate": 0.0009772770185315098, "loss": 1.538, "step": 9570 }, { "epoch": 0.4255924592059046, "grad_norm": 0.06853268295526505, "learning_rate": 0.000977266492587267, "loss": 1.5374, "step": 9572 }, { "epoch": 0.4256813836645769, "grad_norm": 0.07276134192943573, "learning_rate": 0.0009772559642623395, "loss": 1.5343, "step": 9574 }, { "epoch": 0.4257703081232493, "grad_norm": 0.06734218448400497, "learning_rate": 0.00097724543355678, "loss": 1.5361, "step": 9576 }, { "epoch": 0.42585923258192165, "grad_norm": 0.07109152525663376, "learning_rate": 0.0009772349004706412, "loss": 1.5397, "step": 9578 }, { "epoch": 0.425948157040594, "grad_norm": 0.06941290199756622, "learning_rate": 0.0009772243650039755, "loss": 1.5369, "step": 9580 }, { "epoch": 0.4260370814992664, "grad_norm": 0.07232563197612762, "learning_rate": 0.0009772138271568352, "loss": 1.5375, "step": 9582 }, { "epoch": 0.42612600595793876, "grad_norm": 0.07067085057497025, "learning_rate": 0.0009772032869292734, "loss": 1.5339, "step": 9584 }, { "epoch": 0.42621493041661107, "grad_norm": 0.06817600876092911, "learning_rate": 0.0009771927443213422, "loss": 1.5303, "step": 9586 }, { "epoch": 0.42630385487528344, "grad_norm": 0.06987394392490387, "learning_rate": 0.0009771821993330944, "loss": 1.5308, "step": 9588 }, { "epoch": 0.4263927793339558, "grad_norm": 0.06813080608844757, "learning_rate": 0.0009771716519645826, "loss": 1.5327, "step": 9590 }, { "epoch": 0.4264817037926282, "grad_norm": 0.06885334104299545, "learning_rate": 0.0009771611022158593, "loss": 1.5369, "step": 9592 }, { "epoch": 0.42657062825130054, "grad_norm": 0.06869365274906158, "learning_rate": 0.0009771505500869775, "loss": 1.541, "step": 9594 }, { "epoch": 0.42665955270997286, "grad_norm": 0.06896496564149857, "learning_rate": 0.0009771399955779893, "loss": 1.5353, "step": 9596 }, { "epoch": 0.4267484771686452, "grad_norm": 0.07151202112436295, "learning_rate": 0.0009771294386889478, "loss": 1.5341, "step": 9598 }, { "epoch": 0.4268374016273176, "grad_norm": 0.06725707650184631, "learning_rate": 0.0009771188794199053, "loss": 1.538, "step": 9600 }, { "epoch": 0.42692632608598996, "grad_norm": 0.07007326185703278, "learning_rate": 0.0009771083177709146, "loss": 1.5396, "step": 9602 }, { "epoch": 0.42701525054466233, "grad_norm": 0.06781750172376633, "learning_rate": 0.0009770977537420288, "loss": 1.5337, "step": 9604 }, { "epoch": 0.42710417500333464, "grad_norm": 0.07008969038724899, "learning_rate": 0.0009770871873332997, "loss": 1.5296, "step": 9606 }, { "epoch": 0.427193099462007, "grad_norm": 0.07271461933851242, "learning_rate": 0.0009770766185447808, "loss": 1.5362, "step": 9608 }, { "epoch": 0.4272820239206794, "grad_norm": 0.07218462973833084, "learning_rate": 0.0009770660473765245, "loss": 1.5313, "step": 9610 }, { "epoch": 0.42737094837935174, "grad_norm": 0.0696045383810997, "learning_rate": 0.0009770554738285835, "loss": 1.535, "step": 9612 }, { "epoch": 0.4274598728380241, "grad_norm": 0.0716555267572403, "learning_rate": 0.0009770448979010105, "loss": 1.5347, "step": 9614 }, { "epoch": 0.4275487972966965, "grad_norm": 0.06784503161907196, "learning_rate": 0.0009770343195938586, "loss": 1.5328, "step": 9616 }, { "epoch": 0.4276377217553688, "grad_norm": 0.0706615075469017, "learning_rate": 0.0009770237389071803, "loss": 1.5365, "step": 9618 }, { "epoch": 0.42772664621404116, "grad_norm": 0.07164894044399261, "learning_rate": 0.0009770131558410283, "loss": 1.5398, "step": 9620 }, { "epoch": 0.42781557067271353, "grad_norm": 0.07363253831863403, "learning_rate": 0.0009770025703954555, "loss": 1.5344, "step": 9622 }, { "epoch": 0.4279044951313859, "grad_norm": 0.07175491750240326, "learning_rate": 0.0009769919825705147, "loss": 1.5304, "step": 9624 }, { "epoch": 0.42799341959005827, "grad_norm": 0.07303240895271301, "learning_rate": 0.0009769813923662589, "loss": 1.5402, "step": 9626 }, { "epoch": 0.4280823440487306, "grad_norm": 0.06884127110242844, "learning_rate": 0.0009769707997827404, "loss": 1.5313, "step": 9628 }, { "epoch": 0.42817126850740295, "grad_norm": 0.06890305131673813, "learning_rate": 0.0009769602048200128, "loss": 1.5369, "step": 9630 }, { "epoch": 0.4282601929660753, "grad_norm": 0.07173550128936768, "learning_rate": 0.0009769496074781283, "loss": 1.5283, "step": 9632 }, { "epoch": 0.4283491174247477, "grad_norm": 0.068869449198246, "learning_rate": 0.0009769390077571398, "loss": 1.5363, "step": 9634 }, { "epoch": 0.42843804188342005, "grad_norm": 0.0685199499130249, "learning_rate": 0.0009769284056571005, "loss": 1.5358, "step": 9636 }, { "epoch": 0.4285269663420924, "grad_norm": 0.06906317174434662, "learning_rate": 0.0009769178011780632, "loss": 1.5338, "step": 9638 }, { "epoch": 0.42861589080076473, "grad_norm": 0.06904531270265579, "learning_rate": 0.0009769071943200808, "loss": 1.5367, "step": 9640 }, { "epoch": 0.4287048152594371, "grad_norm": 0.0690319761633873, "learning_rate": 0.0009768965850832062, "loss": 1.5332, "step": 9642 }, { "epoch": 0.42879373971810947, "grad_norm": 0.07049769908189774, "learning_rate": 0.0009768859734674922, "loss": 1.5366, "step": 9644 }, { "epoch": 0.42888266417678184, "grad_norm": 0.07084295898675919, "learning_rate": 0.0009768753594729918, "loss": 1.536, "step": 9646 }, { "epoch": 0.4289715886354542, "grad_norm": 0.06893621385097504, "learning_rate": 0.0009768647430997578, "loss": 1.5285, "step": 9648 }, { "epoch": 0.4290605130941265, "grad_norm": 0.07064125686883926, "learning_rate": 0.0009768541243478435, "loss": 1.53, "step": 9650 }, { "epoch": 0.4291494375527989, "grad_norm": 0.07157714664936066, "learning_rate": 0.0009768435032173016, "loss": 1.5316, "step": 9652 }, { "epoch": 0.42923836201147125, "grad_norm": 0.07000470906496048, "learning_rate": 0.0009768328797081852, "loss": 1.5395, "step": 9654 }, { "epoch": 0.4293272864701436, "grad_norm": 0.06912733614444733, "learning_rate": 0.000976822253820547, "loss": 1.5349, "step": 9656 }, { "epoch": 0.429416210928816, "grad_norm": 0.06659382581710815, "learning_rate": 0.0009768116255544407, "loss": 1.5309, "step": 9658 }, { "epoch": 0.4295051353874883, "grad_norm": 0.06706222891807556, "learning_rate": 0.0009768009949099184, "loss": 1.5337, "step": 9660 }, { "epoch": 0.42959405984616067, "grad_norm": 0.07012798637151718, "learning_rate": 0.0009767903618870337, "loss": 1.5429, "step": 9662 }, { "epoch": 0.42968298430483304, "grad_norm": 0.06715415418148041, "learning_rate": 0.0009767797264858397, "loss": 1.5405, "step": 9664 }, { "epoch": 0.4297719087635054, "grad_norm": 0.0690038651227951, "learning_rate": 0.0009767690887063894, "loss": 1.5364, "step": 9666 }, { "epoch": 0.4298608332221778, "grad_norm": 0.06869196146726608, "learning_rate": 0.0009767584485487356, "loss": 1.5336, "step": 9668 }, { "epoch": 0.42994975768085014, "grad_norm": 0.07014153897762299, "learning_rate": 0.0009767478060129313, "loss": 1.5349, "step": 9670 }, { "epoch": 0.43003868213952245, "grad_norm": 0.06871045380830765, "learning_rate": 0.00097673716109903, "loss": 1.5348, "step": 9672 }, { "epoch": 0.4301276065981948, "grad_norm": 0.06930448859930038, "learning_rate": 0.0009767265138070846, "loss": 1.5296, "step": 9674 }, { "epoch": 0.4302165310568672, "grad_norm": 0.07054897397756577, "learning_rate": 0.0009767158641371483, "loss": 1.5402, "step": 9676 }, { "epoch": 0.43030545551553956, "grad_norm": 0.07136155664920807, "learning_rate": 0.0009767052120892741, "loss": 1.5364, "step": 9678 }, { "epoch": 0.4303943799742119, "grad_norm": 0.06936744600534439, "learning_rate": 0.0009766945576635151, "loss": 1.5287, "step": 9680 }, { "epoch": 0.43048330443288424, "grad_norm": 0.07328307628631592, "learning_rate": 0.0009766839008599245, "loss": 1.5266, "step": 9682 }, { "epoch": 0.4305722288915566, "grad_norm": 0.06779733300209045, "learning_rate": 0.0009766732416785556, "loss": 1.5374, "step": 9684 }, { "epoch": 0.430661153350229, "grad_norm": 0.07060229033231735, "learning_rate": 0.0009766625801194613, "loss": 1.5293, "step": 9686 }, { "epoch": 0.43075007780890134, "grad_norm": 0.06920021772384644, "learning_rate": 0.000976651916182695, "loss": 1.5357, "step": 9688 }, { "epoch": 0.4308390022675737, "grad_norm": 0.0714344009757042, "learning_rate": 0.0009766412498683097, "loss": 1.536, "step": 9690 }, { "epoch": 0.4309279267262461, "grad_norm": 0.06918825954198837, "learning_rate": 0.000976630581176359, "loss": 1.5312, "step": 9692 }, { "epoch": 0.4310168511849184, "grad_norm": 0.07354065775871277, "learning_rate": 0.0009766199101068956, "loss": 1.5333, "step": 9694 }, { "epoch": 0.43110577564359076, "grad_norm": 0.07036007940769196, "learning_rate": 0.0009766092366599731, "loss": 1.5393, "step": 9696 }, { "epoch": 0.43119470010226313, "grad_norm": 0.07394345849752426, "learning_rate": 0.0009765985608356446, "loss": 1.5397, "step": 9698 }, { "epoch": 0.4312836245609355, "grad_norm": 0.07076235115528107, "learning_rate": 0.0009765878826339634, "loss": 1.5343, "step": 9700 }, { "epoch": 0.43137254901960786, "grad_norm": 0.07146693021059036, "learning_rate": 0.0009765772020549827, "loss": 1.5359, "step": 9702 }, { "epoch": 0.4314614734782802, "grad_norm": 0.06926552951335907, "learning_rate": 0.0009765665190987558, "loss": 1.5294, "step": 9704 }, { "epoch": 0.43155039793695255, "grad_norm": 0.07245678454637527, "learning_rate": 0.000976555833765336, "loss": 1.5303, "step": 9706 }, { "epoch": 0.4316393223956249, "grad_norm": 0.06824632734060287, "learning_rate": 0.0009765451460547766, "loss": 1.5339, "step": 9708 }, { "epoch": 0.4317282468542973, "grad_norm": 0.06656771898269653, "learning_rate": 0.0009765344559671307, "loss": 1.5326, "step": 9710 }, { "epoch": 0.43181717131296965, "grad_norm": 0.0666504055261612, "learning_rate": 0.0009765237635024522, "loss": 1.5307, "step": 9712 }, { "epoch": 0.43190609577164196, "grad_norm": 0.06956163048744202, "learning_rate": 0.0009765130686607938, "loss": 1.5284, "step": 9714 }, { "epoch": 0.43199502023031433, "grad_norm": 0.06977500021457672, "learning_rate": 0.0009765023714422092, "loss": 1.5358, "step": 9716 }, { "epoch": 0.4320839446889867, "grad_norm": 0.06824065744876862, "learning_rate": 0.0009764916718467517, "loss": 1.5359, "step": 9718 }, { "epoch": 0.43217286914765907, "grad_norm": 0.06885942071676254, "learning_rate": 0.0009764809698744746, "loss": 1.5375, "step": 9720 }, { "epoch": 0.43226179360633143, "grad_norm": 0.06814990937709808, "learning_rate": 0.0009764702655254314, "loss": 1.5334, "step": 9722 }, { "epoch": 0.4323507180650038, "grad_norm": 0.07028649002313614, "learning_rate": 0.0009764595587996754, "loss": 1.5322, "step": 9724 }, { "epoch": 0.4324396425236761, "grad_norm": 0.06551536917686462, "learning_rate": 0.00097644884969726, "loss": 1.5346, "step": 9726 }, { "epoch": 0.4325285669823485, "grad_norm": 0.06867454200983047, "learning_rate": 0.0009764381382182387, "loss": 1.5386, "step": 9728 }, { "epoch": 0.43261749144102085, "grad_norm": 0.065100759267807, "learning_rate": 0.0009764274243626649, "loss": 1.5314, "step": 9730 }, { "epoch": 0.4327064158996932, "grad_norm": 0.0707404837012291, "learning_rate": 0.000976416708130592, "loss": 1.5359, "step": 9732 }, { "epoch": 0.4327953403583656, "grad_norm": 0.0689748004078865, "learning_rate": 0.0009764059895220734, "loss": 1.5353, "step": 9734 }, { "epoch": 0.4328842648170379, "grad_norm": 0.06782509386539459, "learning_rate": 0.0009763952685371627, "loss": 1.5366, "step": 9736 }, { "epoch": 0.43297318927571027, "grad_norm": 0.07140477746725082, "learning_rate": 0.0009763845451759133, "loss": 1.5278, "step": 9738 }, { "epoch": 0.43306211373438264, "grad_norm": 0.06992009282112122, "learning_rate": 0.0009763738194383787, "loss": 1.533, "step": 9740 }, { "epoch": 0.433151038193055, "grad_norm": 0.07231522351503372, "learning_rate": 0.0009763630913246124, "loss": 1.5334, "step": 9742 }, { "epoch": 0.4332399626517274, "grad_norm": 0.07116207480430603, "learning_rate": 0.000976352360834668, "loss": 1.5301, "step": 9744 }, { "epoch": 0.43332888711039974, "grad_norm": 0.06921789795160294, "learning_rate": 0.000976341627968599, "loss": 1.5333, "step": 9746 }, { "epoch": 0.43341781156907205, "grad_norm": 0.06795500218868256, "learning_rate": 0.0009763308927264588, "loss": 1.5253, "step": 9748 }, { "epoch": 0.4335067360277444, "grad_norm": 0.06932450830936432, "learning_rate": 0.0009763201551083011, "loss": 1.5293, "step": 9750 }, { "epoch": 0.4335956604864168, "grad_norm": 0.07011115550994873, "learning_rate": 0.0009763094151141793, "loss": 1.5288, "step": 9752 }, { "epoch": 0.43368458494508916, "grad_norm": 0.06874189525842667, "learning_rate": 0.0009762986727441472, "loss": 1.5296, "step": 9754 }, { "epoch": 0.4337735094037615, "grad_norm": 0.06724654138088226, "learning_rate": 0.0009762879279982582, "loss": 1.5354, "step": 9756 }, { "epoch": 0.43386243386243384, "grad_norm": 0.06827713549137115, "learning_rate": 0.0009762771808765659, "loss": 1.5306, "step": 9758 }, { "epoch": 0.4339513583211062, "grad_norm": 0.06977761536836624, "learning_rate": 0.0009762664313791241, "loss": 1.5323, "step": 9760 }, { "epoch": 0.4340402827797786, "grad_norm": 0.0717058926820755, "learning_rate": 0.0009762556795059862, "loss": 1.5324, "step": 9762 }, { "epoch": 0.43412920723845094, "grad_norm": 0.06921355426311493, "learning_rate": 0.0009762449252572058, "loss": 1.5318, "step": 9764 }, { "epoch": 0.4342181316971233, "grad_norm": 0.0721471756696701, "learning_rate": 0.0009762341686328368, "loss": 1.5354, "step": 9766 }, { "epoch": 0.4343070561557957, "grad_norm": 0.06916612386703491, "learning_rate": 0.0009762234096329327, "loss": 1.531, "step": 9768 }, { "epoch": 0.434395980614468, "grad_norm": 0.07326947152614594, "learning_rate": 0.0009762126482575473, "loss": 1.5348, "step": 9770 }, { "epoch": 0.43448490507314036, "grad_norm": 0.07012344151735306, "learning_rate": 0.000976201884506734, "loss": 1.5335, "step": 9772 }, { "epoch": 0.4345738295318127, "grad_norm": 0.07134021073579788, "learning_rate": 0.0009761911183805466, "loss": 1.534, "step": 9774 }, { "epoch": 0.4346627539904851, "grad_norm": 0.06965865939855576, "learning_rate": 0.0009761803498790389, "loss": 1.5304, "step": 9776 }, { "epoch": 0.43475167844915746, "grad_norm": 0.06988412886857986, "learning_rate": 0.0009761695790022647, "loss": 1.5289, "step": 9778 }, { "epoch": 0.4348406029078298, "grad_norm": 0.0701071098446846, "learning_rate": 0.0009761588057502775, "loss": 1.5378, "step": 9780 }, { "epoch": 0.43492952736650214, "grad_norm": 0.07131971418857574, "learning_rate": 0.0009761480301231311, "loss": 1.5371, "step": 9782 }, { "epoch": 0.4350184518251745, "grad_norm": 0.06863526254892349, "learning_rate": 0.0009761372521208795, "loss": 1.5313, "step": 9784 }, { "epoch": 0.4351073762838469, "grad_norm": 0.06877754628658295, "learning_rate": 0.0009761264717435761, "loss": 1.5346, "step": 9786 }, { "epoch": 0.43519630074251925, "grad_norm": 0.06825356185436249, "learning_rate": 0.0009761156889912748, "loss": 1.5348, "step": 9788 }, { "epoch": 0.43528522520119156, "grad_norm": 0.06907788664102554, "learning_rate": 0.0009761049038640295, "loss": 1.5285, "step": 9790 }, { "epoch": 0.43537414965986393, "grad_norm": 0.06589856743812561, "learning_rate": 0.0009760941163618937, "loss": 1.5279, "step": 9792 }, { "epoch": 0.4354630741185363, "grad_norm": 0.06974221765995026, "learning_rate": 0.0009760833264849215, "loss": 1.5368, "step": 9794 }, { "epoch": 0.43555199857720867, "grad_norm": 0.06818997859954834, "learning_rate": 0.0009760725342331668, "loss": 1.5287, "step": 9796 }, { "epoch": 0.43564092303588103, "grad_norm": 0.0677684098482132, "learning_rate": 0.0009760617396066834, "loss": 1.5346, "step": 9798 }, { "epoch": 0.4357298474945534, "grad_norm": 0.06733290106058121, "learning_rate": 0.0009760509426055247, "loss": 1.5316, "step": 9800 }, { "epoch": 0.4358187719532257, "grad_norm": 0.06740870326757431, "learning_rate": 0.000976040143229745, "loss": 1.5319, "step": 9802 }, { "epoch": 0.4359076964118981, "grad_norm": 0.06795347481966019, "learning_rate": 0.0009760293414793979, "loss": 1.5353, "step": 9804 }, { "epoch": 0.43599662087057045, "grad_norm": 0.0665903314948082, "learning_rate": 0.0009760185373545376, "loss": 1.5337, "step": 9806 }, { "epoch": 0.4360855453292428, "grad_norm": 0.06611596792936325, "learning_rate": 0.0009760077308552178, "loss": 1.5304, "step": 9808 }, { "epoch": 0.4361744697879152, "grad_norm": 0.06896202266216278, "learning_rate": 0.0009759969219814924, "loss": 1.53, "step": 9810 }, { "epoch": 0.4362633942465875, "grad_norm": 0.06539107859134674, "learning_rate": 0.0009759861107334154, "loss": 1.5292, "step": 9812 }, { "epoch": 0.43635231870525987, "grad_norm": 0.07421990483999252, "learning_rate": 0.0009759752971110407, "loss": 1.5396, "step": 9814 }, { "epoch": 0.43644124316393224, "grad_norm": 0.06868709623813629, "learning_rate": 0.000975964481114422, "loss": 1.5387, "step": 9816 }, { "epoch": 0.4365301676226046, "grad_norm": 0.07227539271116257, "learning_rate": 0.0009759536627436137, "loss": 1.54, "step": 9818 }, { "epoch": 0.43661909208127697, "grad_norm": 0.06839174777269363, "learning_rate": 0.0009759428419986693, "loss": 1.5323, "step": 9820 }, { "epoch": 0.43670801653994934, "grad_norm": 0.07100740075111389, "learning_rate": 0.0009759320188796432, "loss": 1.5304, "step": 9822 }, { "epoch": 0.43679694099862165, "grad_norm": 0.07037629187107086, "learning_rate": 0.000975921193386589, "loss": 1.5327, "step": 9824 }, { "epoch": 0.436885865457294, "grad_norm": 0.07322507351636887, "learning_rate": 0.000975910365519561, "loss": 1.5359, "step": 9826 }, { "epoch": 0.4369747899159664, "grad_norm": 0.06714385747909546, "learning_rate": 0.0009758995352786131, "loss": 1.5287, "step": 9828 }, { "epoch": 0.43706371437463876, "grad_norm": 0.06822887063026428, "learning_rate": 0.0009758887026637994, "loss": 1.5342, "step": 9830 }, { "epoch": 0.4371526388333111, "grad_norm": 0.0696975588798523, "learning_rate": 0.0009758778676751737, "loss": 1.5314, "step": 9832 }, { "epoch": 0.43724156329198344, "grad_norm": 0.07337722182273865, "learning_rate": 0.0009758670303127903, "loss": 1.5383, "step": 9834 }, { "epoch": 0.4373304877506558, "grad_norm": 0.07148087024688721, "learning_rate": 0.0009758561905767032, "loss": 1.5315, "step": 9836 }, { "epoch": 0.4374194122093282, "grad_norm": 0.06952288746833801, "learning_rate": 0.0009758453484669663, "loss": 1.5327, "step": 9838 }, { "epoch": 0.43750833666800054, "grad_norm": 0.07030414789915085, "learning_rate": 0.000975834503983634, "loss": 1.5458, "step": 9840 }, { "epoch": 0.4375972611266729, "grad_norm": 0.06890011578798294, "learning_rate": 0.00097582365712676, "loss": 1.5377, "step": 9842 }, { "epoch": 0.4376861855853452, "grad_norm": 0.06955260038375854, "learning_rate": 0.0009758128078963988, "loss": 1.5374, "step": 9844 }, { "epoch": 0.4377751100440176, "grad_norm": 0.0680069625377655, "learning_rate": 0.0009758019562926041, "loss": 1.5341, "step": 9846 }, { "epoch": 0.43786403450268996, "grad_norm": 0.06949877738952637, "learning_rate": 0.0009757911023154305, "loss": 1.5361, "step": 9848 }, { "epoch": 0.4379529589613623, "grad_norm": 0.0744439959526062, "learning_rate": 0.0009757802459649319, "loss": 1.5328, "step": 9850 }, { "epoch": 0.4380418834200347, "grad_norm": 0.07167305797338486, "learning_rate": 0.0009757693872411622, "loss": 1.5318, "step": 9852 }, { "epoch": 0.43813080787870706, "grad_norm": 0.07135527580976486, "learning_rate": 0.0009757585261441762, "loss": 1.5315, "step": 9854 }, { "epoch": 0.4382197323373794, "grad_norm": 0.0703761875629425, "learning_rate": 0.0009757476626740274, "loss": 1.5339, "step": 9856 }, { "epoch": 0.43830865679605174, "grad_norm": 0.06967216730117798, "learning_rate": 0.0009757367968307705, "loss": 1.5276, "step": 9858 }, { "epoch": 0.4383975812547241, "grad_norm": 0.06713514029979706, "learning_rate": 0.0009757259286144593, "loss": 1.5282, "step": 9860 }, { "epoch": 0.4384865057133965, "grad_norm": 0.07148092240095139, "learning_rate": 0.0009757150580251481, "loss": 1.5397, "step": 9862 }, { "epoch": 0.43857543017206885, "grad_norm": 0.07046201080083847, "learning_rate": 0.0009757041850628915, "loss": 1.5359, "step": 9864 }, { "epoch": 0.43866435463074116, "grad_norm": 0.06848791241645813, "learning_rate": 0.0009756933097277434, "loss": 1.5323, "step": 9866 }, { "epoch": 0.43875327908941353, "grad_norm": 0.06691039353609085, "learning_rate": 0.000975682432019758, "loss": 1.5337, "step": 9868 }, { "epoch": 0.4388422035480859, "grad_norm": 0.06878488510847092, "learning_rate": 0.0009756715519389899, "loss": 1.5321, "step": 9870 }, { "epoch": 0.43893112800675826, "grad_norm": 0.06844276189804077, "learning_rate": 0.0009756606694854928, "loss": 1.5288, "step": 9872 }, { "epoch": 0.43902005246543063, "grad_norm": 0.07109802216291428, "learning_rate": 0.0009756497846593215, "loss": 1.5309, "step": 9874 }, { "epoch": 0.439108976924103, "grad_norm": 0.06382738053798676, "learning_rate": 0.0009756388974605302, "loss": 1.5289, "step": 9876 }, { "epoch": 0.4391979013827753, "grad_norm": 0.06882934272289276, "learning_rate": 0.000975628007889173, "loss": 1.5332, "step": 9878 }, { "epoch": 0.4392868258414477, "grad_norm": 0.06806602329015732, "learning_rate": 0.0009756171159453045, "loss": 1.5313, "step": 9880 }, { "epoch": 0.43937575030012005, "grad_norm": 0.06839878112077713, "learning_rate": 0.0009756062216289787, "loss": 1.5379, "step": 9882 }, { "epoch": 0.4394646747587924, "grad_norm": 0.0689515769481659, "learning_rate": 0.0009755953249402503, "loss": 1.5344, "step": 9884 }, { "epoch": 0.4395535992174648, "grad_norm": 0.06822851300239563, "learning_rate": 0.0009755844258791733, "loss": 1.5353, "step": 9886 }, { "epoch": 0.4396425236761371, "grad_norm": 0.06764476001262665, "learning_rate": 0.0009755735244458024, "loss": 1.5303, "step": 9888 }, { "epoch": 0.43973144813480947, "grad_norm": 0.06747211515903473, "learning_rate": 0.0009755626206401917, "loss": 1.5273, "step": 9890 }, { "epoch": 0.43982037259348183, "grad_norm": 0.06862806528806686, "learning_rate": 0.0009755517144623958, "loss": 1.5317, "step": 9892 }, { "epoch": 0.4399092970521542, "grad_norm": 0.06655488908290863, "learning_rate": 0.0009755408059124688, "loss": 1.5291, "step": 9894 }, { "epoch": 0.43999822151082657, "grad_norm": 0.06658562272787094, "learning_rate": 0.0009755298949904655, "loss": 1.5282, "step": 9896 }, { "epoch": 0.4400871459694989, "grad_norm": 0.06817793101072311, "learning_rate": 0.0009755189816964402, "loss": 1.5301, "step": 9898 }, { "epoch": 0.44017607042817125, "grad_norm": 0.07138610631227493, "learning_rate": 0.0009755080660304472, "loss": 1.5329, "step": 9900 }, { "epoch": 0.4402649948868436, "grad_norm": 0.06737557798624039, "learning_rate": 0.0009754971479925409, "loss": 1.5357, "step": 9902 }, { "epoch": 0.440353919345516, "grad_norm": 0.06973376125097275, "learning_rate": 0.0009754862275827762, "loss": 1.5267, "step": 9904 }, { "epoch": 0.44044284380418836, "grad_norm": 0.06866409629583359, "learning_rate": 0.000975475304801207, "loss": 1.5338, "step": 9906 }, { "epoch": 0.4405317682628607, "grad_norm": 0.06903880089521408, "learning_rate": 0.0009754643796478882, "loss": 1.5376, "step": 9908 }, { "epoch": 0.44062069272153304, "grad_norm": 0.06750325113534927, "learning_rate": 0.0009754534521228742, "loss": 1.5331, "step": 9910 }, { "epoch": 0.4407096171802054, "grad_norm": 0.0681682899594307, "learning_rate": 0.0009754425222262193, "loss": 1.535, "step": 9912 }, { "epoch": 0.44079854163887777, "grad_norm": 0.06802817434072495, "learning_rate": 0.0009754315899579783, "loss": 1.53, "step": 9914 }, { "epoch": 0.44088746609755014, "grad_norm": 0.06679469347000122, "learning_rate": 0.0009754206553182057, "loss": 1.5295, "step": 9916 }, { "epoch": 0.4409763905562225, "grad_norm": 0.06789480149745941, "learning_rate": 0.0009754097183069557, "loss": 1.5327, "step": 9918 }, { "epoch": 0.4410653150148948, "grad_norm": 0.07148660719394684, "learning_rate": 0.0009753987789242833, "loss": 1.5376, "step": 9920 }, { "epoch": 0.4411542394735672, "grad_norm": 0.07011796534061432, "learning_rate": 0.0009753878371702427, "loss": 1.5291, "step": 9922 }, { "epoch": 0.44124316393223956, "grad_norm": 0.07079441100358963, "learning_rate": 0.0009753768930448888, "loss": 1.5317, "step": 9924 }, { "epoch": 0.4413320883909119, "grad_norm": 0.07116356492042542, "learning_rate": 0.000975365946548276, "loss": 1.5337, "step": 9926 }, { "epoch": 0.4414210128495843, "grad_norm": 0.07054295390844345, "learning_rate": 0.000975354997680459, "loss": 1.5309, "step": 9928 }, { "epoch": 0.44150993730825666, "grad_norm": 0.06715717166662216, "learning_rate": 0.0009753440464414924, "loss": 1.5324, "step": 9930 }, { "epoch": 0.441598861766929, "grad_norm": 0.06757137924432755, "learning_rate": 0.0009753330928314308, "loss": 1.5335, "step": 9932 }, { "epoch": 0.44168778622560134, "grad_norm": 0.06650960445404053, "learning_rate": 0.0009753221368503286, "loss": 1.5276, "step": 9934 }, { "epoch": 0.4417767106842737, "grad_norm": 0.06757502257823944, "learning_rate": 0.0009753111784982409, "loss": 1.5326, "step": 9936 }, { "epoch": 0.4418656351429461, "grad_norm": 0.07040387392044067, "learning_rate": 0.0009753002177752221, "loss": 1.5321, "step": 9938 }, { "epoch": 0.44195455960161845, "grad_norm": 0.06907001882791519, "learning_rate": 0.000975289254681327, "loss": 1.5358, "step": 9940 }, { "epoch": 0.44204348406029076, "grad_norm": 0.06722556799650192, "learning_rate": 0.0009752782892166101, "loss": 1.5388, "step": 9942 }, { "epoch": 0.4421324085189631, "grad_norm": 0.06660650670528412, "learning_rate": 0.0009752673213811263, "loss": 1.5229, "step": 9944 }, { "epoch": 0.4422213329776355, "grad_norm": 0.0699448511004448, "learning_rate": 0.00097525635117493, "loss": 1.5271, "step": 9946 }, { "epoch": 0.44231025743630786, "grad_norm": 0.07192565500736237, "learning_rate": 0.0009752453785980763, "loss": 1.5317, "step": 9948 }, { "epoch": 0.44239918189498023, "grad_norm": 0.06789202988147736, "learning_rate": 0.0009752344036506197, "loss": 1.5254, "step": 9950 }, { "epoch": 0.4424881063536526, "grad_norm": 0.06763622164726257, "learning_rate": 0.000975223426332615, "loss": 1.5357, "step": 9952 }, { "epoch": 0.4425770308123249, "grad_norm": 0.0687379539012909, "learning_rate": 0.000975212446644117, "loss": 1.5316, "step": 9954 }, { "epoch": 0.4426659552709973, "grad_norm": 0.06635449826717377, "learning_rate": 0.0009752014645851805, "loss": 1.5354, "step": 9956 }, { "epoch": 0.44275487972966965, "grad_norm": 0.07067475467920303, "learning_rate": 0.0009751904801558602, "loss": 1.5283, "step": 9958 }, { "epoch": 0.442843804188342, "grad_norm": 0.07415599375963211, "learning_rate": 0.0009751794933562108, "loss": 1.534, "step": 9960 }, { "epoch": 0.4429327286470144, "grad_norm": 0.06655219942331314, "learning_rate": 0.0009751685041862872, "loss": 1.5341, "step": 9962 }, { "epoch": 0.4430216531056867, "grad_norm": 0.07229764014482498, "learning_rate": 0.0009751575126461443, "loss": 1.535, "step": 9964 }, { "epoch": 0.44311057756435906, "grad_norm": 0.06633423268795013, "learning_rate": 0.0009751465187358368, "loss": 1.5361, "step": 9966 }, { "epoch": 0.44319950202303143, "grad_norm": 0.07215137034654617, "learning_rate": 0.0009751355224554197, "loss": 1.5333, "step": 9968 }, { "epoch": 0.4432884264817038, "grad_norm": 0.06949516385793686, "learning_rate": 0.0009751245238049477, "loss": 1.53, "step": 9970 }, { "epoch": 0.44337735094037617, "grad_norm": 0.06816526502370834, "learning_rate": 0.0009751135227844758, "loss": 1.5338, "step": 9972 }, { "epoch": 0.4434662753990485, "grad_norm": 0.06912938505411148, "learning_rate": 0.0009751025193940586, "loss": 1.5347, "step": 9974 }, { "epoch": 0.44355519985772085, "grad_norm": 0.07131484895944595, "learning_rate": 0.0009750915136337513, "loss": 1.5317, "step": 9976 }, { "epoch": 0.4436441243163932, "grad_norm": 0.06642764806747437, "learning_rate": 0.0009750805055036086, "loss": 1.5271, "step": 9978 }, { "epoch": 0.4437330487750656, "grad_norm": 0.0668521523475647, "learning_rate": 0.0009750694950036855, "loss": 1.5312, "step": 9980 }, { "epoch": 0.44382197323373795, "grad_norm": 0.06732647120952606, "learning_rate": 0.0009750584821340369, "loss": 1.536, "step": 9982 }, { "epoch": 0.4439108976924103, "grad_norm": 0.06798292696475983, "learning_rate": 0.0009750474668947178, "loss": 1.5334, "step": 9984 }, { "epoch": 0.44399982215108263, "grad_norm": 0.06467396020889282, "learning_rate": 0.0009750364492857829, "loss": 1.532, "step": 9986 }, { "epoch": 0.444088746609755, "grad_norm": 0.06865821778774261, "learning_rate": 0.0009750254293072876, "loss": 1.5302, "step": 9988 }, { "epoch": 0.44417767106842737, "grad_norm": 0.06773124635219574, "learning_rate": 0.0009750144069592863, "loss": 1.5351, "step": 9990 }, { "epoch": 0.44426659552709974, "grad_norm": 0.06710581481456757, "learning_rate": 0.0009750033822418345, "loss": 1.5284, "step": 9992 }, { "epoch": 0.4443555199857721, "grad_norm": 0.06517042219638824, "learning_rate": 0.000974992355154987, "loss": 1.5325, "step": 9994 }, { "epoch": 0.4444444444444444, "grad_norm": 0.06951684504747391, "learning_rate": 0.0009749813256987987, "loss": 1.5345, "step": 9996 }, { "epoch": 0.4445333689031168, "grad_norm": 0.06696436554193497, "learning_rate": 0.0009749702938733247, "loss": 1.5336, "step": 9998 }, { "epoch": 0.44462229336178916, "grad_norm": 0.06807401776313782, "learning_rate": 0.00097495925967862, "loss": 1.5254, "step": 10000 }, { "epoch": 0.44462229336178916, "eval_loss": 1.5108540058135986, "eval_runtime": 12.3873, "eval_samples_per_second": 557.83, "eval_steps_per_second": 69.749, "step": 10000 }, { "epoch": 0.4447112178204615, "grad_norm": 0.07122799009084702, "learning_rate": 0.0006260291775788085, "loss": 1.5277, "step": 10002 }, { "epoch": 0.4448001422791339, "grad_norm": 0.06492965668439865, "learning_rate": 0.0006258877037424691, "loss": 1.5261, "step": 10004 }, { "epoch": 0.44488906673780626, "grad_norm": 0.06462486833333969, "learning_rate": 0.0006257462191446941, "loss": 1.5268, "step": 10006 }, { "epoch": 0.4449779911964786, "grad_norm": 0.06561804562807083, "learning_rate": 0.0006256047237975782, "loss": 1.5237, "step": 10008 }, { "epoch": 0.44506691565515094, "grad_norm": 0.06555454432964325, "learning_rate": 0.0006254632177132169, "loss": 1.5244, "step": 10010 }, { "epoch": 0.4451558401138233, "grad_norm": 0.06676924228668213, "learning_rate": 0.0006253217009037068, "loss": 1.5251, "step": 10012 }, { "epoch": 0.4452447645724957, "grad_norm": 0.06575804203748703, "learning_rate": 0.0006251801733811455, "loss": 1.5222, "step": 10014 }, { "epoch": 0.44533368903116805, "grad_norm": 0.06349354237318039, "learning_rate": 0.0006250386351576314, "loss": 1.5211, "step": 10016 }, { "epoch": 0.44542261348984036, "grad_norm": 0.06419903039932251, "learning_rate": 0.0006248970862452637, "loss": 1.5216, "step": 10018 }, { "epoch": 0.4455115379485127, "grad_norm": 0.06334590911865234, "learning_rate": 0.0006247555266561425, "loss": 1.5178, "step": 10020 }, { "epoch": 0.4456004624071851, "grad_norm": 0.06277309358119965, "learning_rate": 0.0006246139564023693, "loss": 1.5201, "step": 10022 }, { "epoch": 0.44568938686585746, "grad_norm": 0.06414992362260818, "learning_rate": 0.0006244723754960459, "loss": 1.5213, "step": 10024 }, { "epoch": 0.44577831132452983, "grad_norm": 0.06549572199583054, "learning_rate": 0.0006243307839492752, "loss": 1.5174, "step": 10026 }, { "epoch": 0.44586723578320214, "grad_norm": 0.0640779510140419, "learning_rate": 0.0006241891817741613, "loss": 1.5167, "step": 10028 }, { "epoch": 0.4459561602418745, "grad_norm": 0.06481896340847015, "learning_rate": 0.0006240475689828086, "loss": 1.5147, "step": 10030 }, { "epoch": 0.4460450847005469, "grad_norm": 0.06547030061483383, "learning_rate": 0.0006239059455873235, "loss": 1.5214, "step": 10032 }, { "epoch": 0.44613400915921925, "grad_norm": 0.061458081007003784, "learning_rate": 0.0006237643115998119, "loss": 1.5146, "step": 10034 }, { "epoch": 0.4462229336178916, "grad_norm": 0.06149187311530113, "learning_rate": 0.0006236226670323816, "loss": 1.5123, "step": 10036 }, { "epoch": 0.446311858076564, "grad_norm": 0.06510701775550842, "learning_rate": 0.0006234810118971408, "loss": 1.5149, "step": 10038 }, { "epoch": 0.4464007825352363, "grad_norm": 0.061700914055109024, "learning_rate": 0.0006233393462061989, "loss": 1.5157, "step": 10040 }, { "epoch": 0.44648970699390866, "grad_norm": 0.06388545781373978, "learning_rate": 0.0006231976699716664, "loss": 1.5221, "step": 10042 }, { "epoch": 0.44657863145258103, "grad_norm": 0.06275127828121185, "learning_rate": 0.0006230559832056539, "loss": 1.5054, "step": 10044 }, { "epoch": 0.4466675559112534, "grad_norm": 0.06381896138191223, "learning_rate": 0.0006229142859202739, "loss": 1.5148, "step": 10046 }, { "epoch": 0.44675648036992577, "grad_norm": 0.06251713633537292, "learning_rate": 0.0006227725781276389, "loss": 1.5142, "step": 10048 }, { "epoch": 0.4468454048285981, "grad_norm": 0.06425853818655014, "learning_rate": 0.000622630859839863, "loss": 1.5165, "step": 10050 }, { "epoch": 0.44693432928727045, "grad_norm": 0.06350740045309067, "learning_rate": 0.0006224891310690606, "loss": 1.5139, "step": 10052 }, { "epoch": 0.4470232537459428, "grad_norm": 0.06478399783372879, "learning_rate": 0.0006223473918273477, "loss": 1.5146, "step": 10054 }, { "epoch": 0.4471121782046152, "grad_norm": 0.06344152241945267, "learning_rate": 0.0006222056421268405, "loss": 1.5167, "step": 10056 }, { "epoch": 0.44720110266328755, "grad_norm": 0.06286389380693436, "learning_rate": 0.0006220638819796565, "loss": 1.5089, "step": 10058 }, { "epoch": 0.4472900271219599, "grad_norm": 0.06424534320831299, "learning_rate": 0.0006219221113979138, "loss": 1.5131, "step": 10060 }, { "epoch": 0.44737895158063223, "grad_norm": 0.061146993190050125, "learning_rate": 0.0006217803303937319, "loss": 1.5113, "step": 10062 }, { "epoch": 0.4474678760393046, "grad_norm": 0.06360126286745071, "learning_rate": 0.0006216385389792306, "loss": 1.5176, "step": 10064 }, { "epoch": 0.44755680049797697, "grad_norm": 0.06215475872159004, "learning_rate": 0.0006214967371665309, "loss": 1.5132, "step": 10066 }, { "epoch": 0.44764572495664934, "grad_norm": 0.06413223594427109, "learning_rate": 0.0006213549249677548, "loss": 1.5158, "step": 10068 }, { "epoch": 0.4477346494153217, "grad_norm": 0.06347139924764633, "learning_rate": 0.000621213102395025, "loss": 1.5089, "step": 10070 }, { "epoch": 0.447823573873994, "grad_norm": 0.06259430944919586, "learning_rate": 0.0006210712694604647, "loss": 1.5121, "step": 10072 }, { "epoch": 0.4479124983326664, "grad_norm": 0.06153608858585358, "learning_rate": 0.0006209294261761989, "loss": 1.5149, "step": 10074 }, { "epoch": 0.44800142279133875, "grad_norm": 0.06388126313686371, "learning_rate": 0.000620787572554353, "loss": 1.5123, "step": 10076 }, { "epoch": 0.4480903472500111, "grad_norm": 0.06113690882921219, "learning_rate": 0.0006206457086070531, "loss": 1.5124, "step": 10078 }, { "epoch": 0.4481792717086835, "grad_norm": 0.06311555206775665, "learning_rate": 0.000620503834346426, "loss": 1.5159, "step": 10080 }, { "epoch": 0.4482681961673558, "grad_norm": 0.06283438205718994, "learning_rate": 0.0006203619497846005, "loss": 1.5146, "step": 10082 }, { "epoch": 0.44835712062602817, "grad_norm": 0.06206725165247917, "learning_rate": 0.0006202200549337048, "loss": 1.5096, "step": 10084 }, { "epoch": 0.44844604508470054, "grad_norm": 0.062284741550683975, "learning_rate": 0.0006200781498058695, "loss": 1.5088, "step": 10086 }, { "epoch": 0.4485349695433729, "grad_norm": 0.06355787068605423, "learning_rate": 0.0006199362344132243, "loss": 1.5075, "step": 10088 }, { "epoch": 0.4486238940020453, "grad_norm": 0.06461120396852493, "learning_rate": 0.0006197943087679013, "loss": 1.5123, "step": 10090 }, { "epoch": 0.44871281846071764, "grad_norm": 0.06290825456380844, "learning_rate": 0.0006196523728820329, "loss": 1.5114, "step": 10092 }, { "epoch": 0.44880174291938996, "grad_norm": 0.06232891604304314, "learning_rate": 0.0006195104267677525, "loss": 1.5073, "step": 10094 }, { "epoch": 0.4488906673780623, "grad_norm": 0.06516814976930618, "learning_rate": 0.0006193684704371941, "loss": 1.519, "step": 10096 }, { "epoch": 0.4489795918367347, "grad_norm": 0.06370670348405838, "learning_rate": 0.0006192265039024928, "loss": 1.5129, "step": 10098 }, { "epoch": 0.44906851629540706, "grad_norm": 0.0628281906247139, "learning_rate": 0.0006190845271757846, "loss": 1.5132, "step": 10100 }, { "epoch": 0.44915744075407943, "grad_norm": 0.06411248445510864, "learning_rate": 0.0006189425402692061, "loss": 1.5139, "step": 10102 }, { "epoch": 0.44924636521275174, "grad_norm": 0.06423705816268921, "learning_rate": 0.0006188005431948953, "loss": 1.5087, "step": 10104 }, { "epoch": 0.4493352896714241, "grad_norm": 0.06506875157356262, "learning_rate": 0.0006186585359649903, "loss": 1.5149, "step": 10106 }, { "epoch": 0.4494242141300965, "grad_norm": 0.06523150950670242, "learning_rate": 0.0006185165185916308, "loss": 1.5154, "step": 10108 }, { "epoch": 0.44951313858876885, "grad_norm": 0.06264711171388626, "learning_rate": 0.0006183744910869571, "loss": 1.511, "step": 10110 }, { "epoch": 0.4496020630474412, "grad_norm": 0.06609227508306503, "learning_rate": 0.0006182324534631102, "loss": 1.5134, "step": 10112 }, { "epoch": 0.4496909875061136, "grad_norm": 0.06463871151208878, "learning_rate": 0.0006180904057322321, "loss": 1.5089, "step": 10114 }, { "epoch": 0.4497799119647859, "grad_norm": 0.06578138470649719, "learning_rate": 0.0006179483479064657, "loss": 1.5116, "step": 10116 }, { "epoch": 0.44986883642345826, "grad_norm": 0.0638619214296341, "learning_rate": 0.0006178062799979548, "loss": 1.5139, "step": 10118 }, { "epoch": 0.44995776088213063, "grad_norm": 0.06302259117364883, "learning_rate": 0.0006176642020188439, "loss": 1.5093, "step": 10120 }, { "epoch": 0.450046685340803, "grad_norm": 0.06259556859731674, "learning_rate": 0.0006175221139812784, "loss": 1.51, "step": 10122 }, { "epoch": 0.45013560979947537, "grad_norm": 0.06390849500894547, "learning_rate": 0.0006173800158974048, "loss": 1.5142, "step": 10124 }, { "epoch": 0.4502245342581477, "grad_norm": 0.06265709549188614, "learning_rate": 0.0006172379077793702, "loss": 1.5049, "step": 10126 }, { "epoch": 0.45031345871682005, "grad_norm": 0.061674658209085464, "learning_rate": 0.0006170957896393225, "loss": 1.5133, "step": 10128 }, { "epoch": 0.4504023831754924, "grad_norm": 0.06264129281044006, "learning_rate": 0.0006169536614894107, "loss": 1.5137, "step": 10130 }, { "epoch": 0.4504913076341648, "grad_norm": 0.06164534017443657, "learning_rate": 0.0006168115233417846, "loss": 1.5115, "step": 10132 }, { "epoch": 0.45058023209283715, "grad_norm": 0.06206073611974716, "learning_rate": 0.0006166693752085946, "loss": 1.5174, "step": 10134 }, { "epoch": 0.4506691565515095, "grad_norm": 0.06333418935537338, "learning_rate": 0.0006165272171019923, "loss": 1.5157, "step": 10136 }, { "epoch": 0.45075808101018183, "grad_norm": 0.06554005295038223, "learning_rate": 0.0006163850490341298, "loss": 1.5043, "step": 10138 }, { "epoch": 0.4508470054688542, "grad_norm": 0.06292907893657684, "learning_rate": 0.0006162428710171608, "loss": 1.5117, "step": 10140 }, { "epoch": 0.45093592992752657, "grad_norm": 0.06288234144449234, "learning_rate": 0.0006161006830632386, "loss": 1.5138, "step": 10142 }, { "epoch": 0.45102485438619894, "grad_norm": 0.06246441975235939, "learning_rate": 0.0006159584851845184, "loss": 1.5104, "step": 10144 }, { "epoch": 0.4511137788448713, "grad_norm": 0.062472037971019745, "learning_rate": 0.0006158162773931559, "loss": 1.5149, "step": 10146 }, { "epoch": 0.4512027033035436, "grad_norm": 0.06365121155977249, "learning_rate": 0.0006156740597013079, "loss": 1.5114, "step": 10148 }, { "epoch": 0.451291627762216, "grad_norm": 0.06463265419006348, "learning_rate": 0.0006155318321211312, "loss": 1.5189, "step": 10150 }, { "epoch": 0.45138055222088835, "grad_norm": 0.06455686688423157, "learning_rate": 0.0006153895946647845, "loss": 1.5085, "step": 10152 }, { "epoch": 0.4514694766795607, "grad_norm": 0.06434636563062668, "learning_rate": 0.0006152473473444265, "loss": 1.5093, "step": 10154 }, { "epoch": 0.4515584011382331, "grad_norm": 0.06507983803749084, "learning_rate": 0.0006151050901722177, "loss": 1.5084, "step": 10156 }, { "epoch": 0.4516473255969054, "grad_norm": 0.0638333186507225, "learning_rate": 0.0006149628231603184, "loss": 1.5092, "step": 10158 }, { "epoch": 0.45173625005557777, "grad_norm": 0.0641988068819046, "learning_rate": 0.0006148205463208902, "loss": 1.5138, "step": 10160 }, { "epoch": 0.45182517451425014, "grad_norm": 0.06309515982866287, "learning_rate": 0.000614678259666096, "loss": 1.5057, "step": 10162 }, { "epoch": 0.4519140989729225, "grad_norm": 0.06427089869976044, "learning_rate": 0.0006145359632080987, "loss": 1.5137, "step": 10164 }, { "epoch": 0.4520030234315949, "grad_norm": 0.06765829026699066, "learning_rate": 0.0006143936569590624, "loss": 1.5093, "step": 10166 }, { "epoch": 0.45209194789026724, "grad_norm": 0.0644116923213005, "learning_rate": 0.0006142513409311522, "loss": 1.5123, "step": 10168 }, { "epoch": 0.45218087234893956, "grad_norm": 0.06532014906406403, "learning_rate": 0.000614109015136534, "loss": 1.5055, "step": 10170 }, { "epoch": 0.4522697968076119, "grad_norm": 0.06273654848337173, "learning_rate": 0.0006139666795873743, "loss": 1.5117, "step": 10172 }, { "epoch": 0.4523587212662843, "grad_norm": 0.0660717710852623, "learning_rate": 0.0006138243342958405, "loss": 1.5065, "step": 10174 }, { "epoch": 0.45244764572495666, "grad_norm": 0.06457041203975677, "learning_rate": 0.0006136819792741011, "loss": 1.5117, "step": 10176 }, { "epoch": 0.452536570183629, "grad_norm": 0.06340939551591873, "learning_rate": 0.000613539614534325, "loss": 1.507, "step": 10178 }, { "epoch": 0.45262549464230134, "grad_norm": 0.06222638487815857, "learning_rate": 0.0006133972400886825, "loss": 1.5065, "step": 10180 }, { "epoch": 0.4527144191009737, "grad_norm": 0.06319104135036469, "learning_rate": 0.0006132548559493441, "loss": 1.5134, "step": 10182 }, { "epoch": 0.4528033435596461, "grad_norm": 0.06425220519304276, "learning_rate": 0.0006131124621284815, "loss": 1.509, "step": 10184 }, { "epoch": 0.45289226801831844, "grad_norm": 0.06511306017637253, "learning_rate": 0.0006129700586382671, "loss": 1.5106, "step": 10186 }, { "epoch": 0.4529811924769908, "grad_norm": 0.06421872228384018, "learning_rate": 0.0006128276454908742, "loss": 1.5101, "step": 10188 }, { "epoch": 0.4530701169356632, "grad_norm": 0.06392832100391388, "learning_rate": 0.0006126852226984771, "loss": 1.5032, "step": 10190 }, { "epoch": 0.4531590413943355, "grad_norm": 0.06504455208778381, "learning_rate": 0.0006125427902732506, "loss": 1.5128, "step": 10192 }, { "epoch": 0.45324796585300786, "grad_norm": 0.06360786408185959, "learning_rate": 0.0006124003482273704, "loss": 1.5038, "step": 10194 }, { "epoch": 0.45333689031168023, "grad_norm": 0.06323164701461792, "learning_rate": 0.000612257896573013, "loss": 1.5068, "step": 10196 }, { "epoch": 0.4534258147703526, "grad_norm": 0.06563688069581985, "learning_rate": 0.000612115435322356, "loss": 1.5096, "step": 10198 }, { "epoch": 0.45351473922902497, "grad_norm": 0.06295774132013321, "learning_rate": 0.0006119729644875774, "loss": 1.5103, "step": 10200 }, { "epoch": 0.4536036636876973, "grad_norm": 0.06223352998495102, "learning_rate": 0.0006118304840808565, "loss": 1.5064, "step": 10202 }, { "epoch": 0.45369258814636965, "grad_norm": 0.06273999065160751, "learning_rate": 0.0006116879941143728, "loss": 1.5096, "step": 10204 }, { "epoch": 0.453781512605042, "grad_norm": 0.06447035819292068, "learning_rate": 0.0006115454946003074, "loss": 1.5094, "step": 10206 }, { "epoch": 0.4538704370637144, "grad_norm": 0.06401224434375763, "learning_rate": 0.0006114029855508413, "loss": 1.5158, "step": 10208 }, { "epoch": 0.45395936152238675, "grad_norm": 0.06190183758735657, "learning_rate": 0.0006112604669781572, "loss": 1.5101, "step": 10210 }, { "epoch": 0.45404828598105906, "grad_norm": 0.06359678506851196, "learning_rate": 0.0006111179388944381, "loss": 1.5114, "step": 10212 }, { "epoch": 0.45413721043973143, "grad_norm": 0.06362069398164749, "learning_rate": 0.0006109754013118678, "loss": 1.5103, "step": 10214 }, { "epoch": 0.4542261348984038, "grad_norm": 0.06391779333353043, "learning_rate": 0.0006108328542426312, "loss": 1.5183, "step": 10216 }, { "epoch": 0.45431505935707617, "grad_norm": 0.06379327178001404, "learning_rate": 0.0006106902976989139, "loss": 1.5107, "step": 10218 }, { "epoch": 0.45440398381574854, "grad_norm": 0.06601396203041077, "learning_rate": 0.0006105477316929021, "loss": 1.5121, "step": 10220 }, { "epoch": 0.4544929082744209, "grad_norm": 0.0628276839852333, "learning_rate": 0.0006104051562367829, "loss": 1.5103, "step": 10222 }, { "epoch": 0.4545818327330932, "grad_norm": 0.06758300215005875, "learning_rate": 0.0006102625713427446, "loss": 1.5088, "step": 10224 }, { "epoch": 0.4546707571917656, "grad_norm": 0.06313972175121307, "learning_rate": 0.0006101199770229758, "loss": 1.5101, "step": 10226 }, { "epoch": 0.45475968165043795, "grad_norm": 0.06574665755033493, "learning_rate": 0.0006099773732896658, "loss": 1.5103, "step": 10228 }, { "epoch": 0.4548486061091103, "grad_norm": 0.06595081835985184, "learning_rate": 0.0006098347601550055, "loss": 1.5099, "step": 10230 }, { "epoch": 0.4549375305677827, "grad_norm": 0.06619875133037567, "learning_rate": 0.0006096921376311857, "loss": 1.5086, "step": 10232 }, { "epoch": 0.455026455026455, "grad_norm": 0.06375854462385178, "learning_rate": 0.0006095495057303988, "loss": 1.5088, "step": 10234 }, { "epoch": 0.45511537948512737, "grad_norm": 0.06172388046979904, "learning_rate": 0.0006094068644648373, "loss": 1.5109, "step": 10236 }, { "epoch": 0.45520430394379974, "grad_norm": 0.06474345177412033, "learning_rate": 0.0006092642138466948, "loss": 1.5136, "step": 10238 }, { "epoch": 0.4552932284024721, "grad_norm": 0.06749559938907623, "learning_rate": 0.0006091215538881658, "loss": 1.5045, "step": 10240 }, { "epoch": 0.4553821528611445, "grad_norm": 0.062397945672273636, "learning_rate": 0.0006089788846014457, "loss": 1.5162, "step": 10242 }, { "epoch": 0.45547107731981684, "grad_norm": 0.06550050526857376, "learning_rate": 0.0006088362059987301, "loss": 1.5102, "step": 10244 }, { "epoch": 0.45556000177848915, "grad_norm": 0.0633438304066658, "learning_rate": 0.0006086935180922159, "loss": 1.506, "step": 10246 }, { "epoch": 0.4556489262371615, "grad_norm": 0.06535664945840836, "learning_rate": 0.000608550820894101, "loss": 1.5092, "step": 10248 }, { "epoch": 0.4557378506958339, "grad_norm": 0.0659709945321083, "learning_rate": 0.0006084081144165835, "loss": 1.518, "step": 10250 }, { "epoch": 0.45582677515450626, "grad_norm": 0.06421976536512375, "learning_rate": 0.0006082653986718626, "loss": 1.5138, "step": 10252 }, { "epoch": 0.4559156996131786, "grad_norm": 0.06513391435146332, "learning_rate": 0.0006081226736721383, "loss": 1.5079, "step": 10254 }, { "epoch": 0.45600462407185094, "grad_norm": 0.06458871066570282, "learning_rate": 0.0006079799394296115, "loss": 1.5101, "step": 10256 }, { "epoch": 0.4560935485305233, "grad_norm": 0.06421530246734619, "learning_rate": 0.0006078371959564833, "loss": 1.5049, "step": 10258 }, { "epoch": 0.4561824729891957, "grad_norm": 0.0646352618932724, "learning_rate": 0.0006076944432649567, "loss": 1.504, "step": 10260 }, { "epoch": 0.45627139744786804, "grad_norm": 0.06522826105356216, "learning_rate": 0.0006075516813672342, "loss": 1.5081, "step": 10262 }, { "epoch": 0.4563603219065404, "grad_norm": 0.06355036050081253, "learning_rate": 0.0006074089102755204, "loss": 1.5131, "step": 10264 }, { "epoch": 0.4564492463652127, "grad_norm": 0.06306944042444229, "learning_rate": 0.0006072661300020193, "loss": 1.5033, "step": 10266 }, { "epoch": 0.4565381708238851, "grad_norm": 0.0642002746462822, "learning_rate": 0.0006071233405589368, "loss": 1.5074, "step": 10268 }, { "epoch": 0.45662709528255746, "grad_norm": 0.06410963833332062, "learning_rate": 0.0006069805419584791, "loss": 1.5062, "step": 10270 }, { "epoch": 0.45671601974122983, "grad_norm": 0.06610678881406784, "learning_rate": 0.0006068377342128532, "loss": 1.5125, "step": 10272 }, { "epoch": 0.4568049441999022, "grad_norm": 0.06421998143196106, "learning_rate": 0.000606694917334267, "loss": 1.5103, "step": 10274 }, { "epoch": 0.45689386865857456, "grad_norm": 0.0632401779294014, "learning_rate": 0.000606552091334929, "loss": 1.5144, "step": 10276 }, { "epoch": 0.4569827931172469, "grad_norm": 0.06443547457456589, "learning_rate": 0.0006064092562270487, "loss": 1.5061, "step": 10278 }, { "epoch": 0.45707171757591925, "grad_norm": 0.06429964303970337, "learning_rate": 0.0006062664120228363, "loss": 1.5122, "step": 10280 }, { "epoch": 0.4571606420345916, "grad_norm": 0.06369158625602722, "learning_rate": 0.0006061235587345025, "loss": 1.5081, "step": 10282 }, { "epoch": 0.457249566493264, "grad_norm": 0.062136199325323105, "learning_rate": 0.0006059806963742595, "loss": 1.5106, "step": 10284 }, { "epoch": 0.45733849095193635, "grad_norm": 0.06280120462179184, "learning_rate": 0.0006058378249543193, "loss": 1.5096, "step": 10286 }, { "epoch": 0.45742741541060866, "grad_norm": 0.06295254826545715, "learning_rate": 0.0006056949444868956, "loss": 1.5038, "step": 10288 }, { "epoch": 0.45751633986928103, "grad_norm": 0.0640212669968605, "learning_rate": 0.0006055520549842022, "loss": 1.5059, "step": 10290 }, { "epoch": 0.4576052643279534, "grad_norm": 0.06613568961620331, "learning_rate": 0.000605409156458454, "loss": 1.5122, "step": 10292 }, { "epoch": 0.45769418878662577, "grad_norm": 0.06338582932949066, "learning_rate": 0.0006052662489218665, "loss": 1.5069, "step": 10294 }, { "epoch": 0.45778311324529813, "grad_norm": 0.06454145908355713, "learning_rate": 0.0006051233323866563, "loss": 1.506, "step": 10296 }, { "epoch": 0.4578720377039705, "grad_norm": 0.06602118164300919, "learning_rate": 0.0006049804068650403, "loss": 1.5075, "step": 10298 }, { "epoch": 0.4579609621626428, "grad_norm": 0.06418369710445404, "learning_rate": 0.0006048374723692365, "loss": 1.508, "step": 10300 }, { "epoch": 0.4580498866213152, "grad_norm": 0.063139408826828, "learning_rate": 0.0006046945289114634, "loss": 1.5068, "step": 10302 }, { "epoch": 0.45813881107998755, "grad_norm": 0.06343439221382141, "learning_rate": 0.0006045515765039408, "loss": 1.5078, "step": 10304 }, { "epoch": 0.4582277355386599, "grad_norm": 0.06396979093551636, "learning_rate": 0.0006044086151588886, "loss": 1.5098, "step": 10306 }, { "epoch": 0.4583166599973323, "grad_norm": 0.06393872201442719, "learning_rate": 0.0006042656448885279, "loss": 1.507, "step": 10308 }, { "epoch": 0.4584055844560046, "grad_norm": 0.06459838151931763, "learning_rate": 0.0006041226657050804, "loss": 1.5107, "step": 10310 }, { "epoch": 0.45849450891467697, "grad_norm": 0.06489048898220062, "learning_rate": 0.0006039796776207686, "loss": 1.5094, "step": 10312 }, { "epoch": 0.45858343337334934, "grad_norm": 0.06409396976232529, "learning_rate": 0.0006038366806478157, "loss": 1.5047, "step": 10314 }, { "epoch": 0.4586723578320217, "grad_norm": 0.06495364010334015, "learning_rate": 0.0006036936747984456, "loss": 1.4996, "step": 10316 }, { "epoch": 0.4587612822906941, "grad_norm": 0.06571628898382187, "learning_rate": 0.0006035506600848835, "loss": 1.5113, "step": 10318 }, { "epoch": 0.45885020674936644, "grad_norm": 0.06504350155591965, "learning_rate": 0.0006034076365193545, "loss": 1.4964, "step": 10320 }, { "epoch": 0.45893913120803875, "grad_norm": 0.06698577105998993, "learning_rate": 0.0006032646041140849, "loss": 1.5051, "step": 10322 }, { "epoch": 0.4590280556667111, "grad_norm": 0.06189613789319992, "learning_rate": 0.0006031215628813021, "loss": 1.5097, "step": 10324 }, { "epoch": 0.4591169801253835, "grad_norm": 0.06384344398975372, "learning_rate": 0.0006029785128332336, "loss": 1.512, "step": 10326 }, { "epoch": 0.45920590458405586, "grad_norm": 0.06703834980726242, "learning_rate": 0.0006028354539821079, "loss": 1.5041, "step": 10328 }, { "epoch": 0.4592948290427282, "grad_norm": 0.06500376015901566, "learning_rate": 0.0006026923863401545, "loss": 1.5061, "step": 10330 }, { "epoch": 0.45938375350140054, "grad_norm": 0.06535261869430542, "learning_rate": 0.0006025493099196033, "loss": 1.5052, "step": 10332 }, { "epoch": 0.4594726779600729, "grad_norm": 0.062189262360334396, "learning_rate": 0.0006024062247326854, "loss": 1.5009, "step": 10334 }, { "epoch": 0.4595616024187453, "grad_norm": 0.06319093704223633, "learning_rate": 0.0006022631307916318, "loss": 1.5052, "step": 10336 }, { "epoch": 0.45965052687741764, "grad_norm": 0.0611843504011631, "learning_rate": 0.0006021200281086753, "loss": 1.5051, "step": 10338 }, { "epoch": 0.45973945133609, "grad_norm": 0.06422754377126694, "learning_rate": 0.0006019769166960485, "loss": 1.5042, "step": 10340 }, { "epoch": 0.4598283757947623, "grad_norm": 0.06372174620628357, "learning_rate": 0.0006018337965659859, "loss": 1.513, "step": 10342 }, { "epoch": 0.4599173002534347, "grad_norm": 0.06303902715444565, "learning_rate": 0.0006016906677307213, "loss": 1.5053, "step": 10344 }, { "epoch": 0.46000622471210706, "grad_norm": 0.0631803497672081, "learning_rate": 0.0006015475302024904, "loss": 1.5102, "step": 10346 }, { "epoch": 0.4600951491707794, "grad_norm": 0.06362450867891312, "learning_rate": 0.0006014043839935291, "loss": 1.5115, "step": 10348 }, { "epoch": 0.4601840736294518, "grad_norm": 0.06407329440116882, "learning_rate": 0.0006012612291160743, "loss": 1.5065, "step": 10350 }, { "epoch": 0.46027299808812416, "grad_norm": 0.061808858066797256, "learning_rate": 0.0006011180655823632, "loss": 1.5125, "step": 10352 }, { "epoch": 0.4603619225467965, "grad_norm": 0.0652756616473198, "learning_rate": 0.0006009748934046343, "loss": 1.5064, "step": 10354 }, { "epoch": 0.46045084700546884, "grad_norm": 0.0631522387266159, "learning_rate": 0.0006008317125951265, "loss": 1.504, "step": 10356 }, { "epoch": 0.4605397714641412, "grad_norm": 0.0668981596827507, "learning_rate": 0.0006006885231660796, "loss": 1.5145, "step": 10358 }, { "epoch": 0.4606286959228136, "grad_norm": 0.06222165748476982, "learning_rate": 0.0006005453251297341, "loss": 1.5067, "step": 10360 }, { "epoch": 0.46071762038148595, "grad_norm": 0.06397759169340134, "learning_rate": 0.0006004021184983309, "loss": 1.5067, "step": 10362 }, { "epoch": 0.46080654484015826, "grad_norm": 0.060776520520448685, "learning_rate": 0.0006002589032841122, "loss": 1.5025, "step": 10364 }, { "epoch": 0.46089546929883063, "grad_norm": 0.06292383372783661, "learning_rate": 0.0006001156794993208, "loss": 1.4995, "step": 10366 }, { "epoch": 0.460984393757503, "grad_norm": 0.06334930658340454, "learning_rate": 0.0005999724471561999, "loss": 1.5038, "step": 10368 }, { "epoch": 0.46107331821617537, "grad_norm": 0.062282536178827286, "learning_rate": 0.0005998292062669935, "loss": 1.5019, "step": 10370 }, { "epoch": 0.46116224267484773, "grad_norm": 0.06332574039697647, "learning_rate": 0.0005996859568439468, "loss": 1.5022, "step": 10372 }, { "epoch": 0.4612511671335201, "grad_norm": 0.06254351884126663, "learning_rate": 0.000599542698899305, "loss": 1.513, "step": 10374 }, { "epoch": 0.4613400915921924, "grad_norm": 0.06307613104581833, "learning_rate": 0.0005993994324453147, "loss": 1.5088, "step": 10376 }, { "epoch": 0.4614290160508648, "grad_norm": 0.062051285058259964, "learning_rate": 0.0005992561574942229, "loss": 1.4984, "step": 10378 }, { "epoch": 0.46151794050953715, "grad_norm": 0.06389043480157852, "learning_rate": 0.0005991128740582773, "loss": 1.5078, "step": 10380 }, { "epoch": 0.4616068649682095, "grad_norm": 0.06286501884460449, "learning_rate": 0.0005989695821497266, "loss": 1.5044, "step": 10382 }, { "epoch": 0.4616957894268819, "grad_norm": 0.0667327269911766, "learning_rate": 0.0005988262817808198, "loss": 1.5093, "step": 10384 }, { "epoch": 0.4617847138855542, "grad_norm": 0.06277990341186523, "learning_rate": 0.0005986829729638069, "loss": 1.5009, "step": 10386 }, { "epoch": 0.46187363834422657, "grad_norm": 0.06250549107789993, "learning_rate": 0.0005985396557109386, "loss": 1.509, "step": 10388 }, { "epoch": 0.46196256280289894, "grad_norm": 0.06798355281352997, "learning_rate": 0.0005983963300344662, "loss": 1.5055, "step": 10390 }, { "epoch": 0.4620514872615713, "grad_norm": 0.06521153450012207, "learning_rate": 0.000598252995946642, "loss": 1.5108, "step": 10392 }, { "epoch": 0.46214041172024367, "grad_norm": 0.06334955245256424, "learning_rate": 0.0005981096534597186, "loss": 1.5062, "step": 10394 }, { "epoch": 0.462229336178916, "grad_norm": 0.06671233475208282, "learning_rate": 0.0005979663025859499, "loss": 1.508, "step": 10396 }, { "epoch": 0.46231826063758835, "grad_norm": 0.06669692695140839, "learning_rate": 0.0005978229433375897, "loss": 1.5016, "step": 10398 }, { "epoch": 0.4624071850962607, "grad_norm": 0.06515517830848694, "learning_rate": 0.0005976795757268933, "loss": 1.5029, "step": 10400 }, { "epoch": 0.4624961095549331, "grad_norm": 0.06504440307617188, "learning_rate": 0.0005975361997661162, "loss": 1.5052, "step": 10402 }, { "epoch": 0.46258503401360546, "grad_norm": 0.06195230036973953, "learning_rate": 0.0005973928154675151, "loss": 1.5033, "step": 10404 }, { "epoch": 0.4626739584722778, "grad_norm": 0.0632019117474556, "learning_rate": 0.0005972494228433468, "loss": 1.5057, "step": 10406 }, { "epoch": 0.46276288293095014, "grad_norm": 0.06470786780118942, "learning_rate": 0.0005971060219058694, "loss": 1.5045, "step": 10408 }, { "epoch": 0.4628518073896225, "grad_norm": 0.06392474472522736, "learning_rate": 0.000596962612667341, "loss": 1.506, "step": 10410 }, { "epoch": 0.4629407318482949, "grad_norm": 0.06552916020154953, "learning_rate": 0.0005968191951400215, "loss": 1.5066, "step": 10412 }, { "epoch": 0.46302965630696724, "grad_norm": 0.06476908922195435, "learning_rate": 0.0005966757693361705, "loss": 1.5028, "step": 10414 }, { "epoch": 0.4631185807656396, "grad_norm": 0.06308183819055557, "learning_rate": 0.0005965323352680486, "loss": 1.5073, "step": 10416 }, { "epoch": 0.4632075052243119, "grad_norm": 0.06603847444057465, "learning_rate": 0.0005963888929479173, "loss": 1.516, "step": 10418 }, { "epoch": 0.4632964296829843, "grad_norm": 0.06707409769296646, "learning_rate": 0.0005962454423880387, "loss": 1.5026, "step": 10420 }, { "epoch": 0.46338535414165666, "grad_norm": 0.06557576358318329, "learning_rate": 0.0005961019836006755, "loss": 1.5101, "step": 10422 }, { "epoch": 0.463474278600329, "grad_norm": 0.06399369239807129, "learning_rate": 0.0005959585165980912, "loss": 1.5085, "step": 10424 }, { "epoch": 0.4635632030590014, "grad_norm": 0.06383886933326721, "learning_rate": 0.0005958150413925501, "loss": 1.5059, "step": 10426 }, { "epoch": 0.46365212751767376, "grad_norm": 0.0634358674287796, "learning_rate": 0.000595671557996317, "loss": 1.5054, "step": 10428 }, { "epoch": 0.4637410519763461, "grad_norm": 0.06545853614807129, "learning_rate": 0.0005955280664216575, "loss": 1.513, "step": 10430 }, { "epoch": 0.46382997643501844, "grad_norm": 0.06597691774368286, "learning_rate": 0.0005953845666808378, "loss": 1.5063, "step": 10432 }, { "epoch": 0.4639189008936908, "grad_norm": 0.06610873341560364, "learning_rate": 0.0005952410587861251, "loss": 1.5096, "step": 10434 }, { "epoch": 0.4640078253523632, "grad_norm": 0.06371376663446426, "learning_rate": 0.0005950975427497871, "loss": 1.5077, "step": 10436 }, { "epoch": 0.46409674981103555, "grad_norm": 0.06398597359657288, "learning_rate": 0.0005949540185840919, "loss": 1.5092, "step": 10438 }, { "epoch": 0.46418567426970786, "grad_norm": 0.06583784520626068, "learning_rate": 0.0005948104863013089, "loss": 1.5122, "step": 10440 }, { "epoch": 0.46427459872838023, "grad_norm": 0.06568284332752228, "learning_rate": 0.0005946669459137075, "loss": 1.5076, "step": 10442 }, { "epoch": 0.4643635231870526, "grad_norm": 0.06291704624891281, "learning_rate": 0.0005945233974335585, "loss": 1.5103, "step": 10444 }, { "epoch": 0.46445244764572496, "grad_norm": 0.06666183471679688, "learning_rate": 0.0005943798408731329, "loss": 1.5065, "step": 10446 }, { "epoch": 0.46454137210439733, "grad_norm": 0.06522826105356216, "learning_rate": 0.0005942362762447026, "loss": 1.5054, "step": 10448 }, { "epoch": 0.46463029656306964, "grad_norm": 0.06623510271310806, "learning_rate": 0.0005940927035605403, "loss": 1.5098, "step": 10450 }, { "epoch": 0.464719221021742, "grad_norm": 0.06403439491987228, "learning_rate": 0.0005939491228329187, "loss": 1.5, "step": 10452 }, { "epoch": 0.4648081454804144, "grad_norm": 0.0637822076678276, "learning_rate": 0.0005938055340741123, "loss": 1.5119, "step": 10454 }, { "epoch": 0.46489706993908675, "grad_norm": 0.0661793127655983, "learning_rate": 0.0005936619372963953, "loss": 1.5033, "step": 10456 }, { "epoch": 0.4649859943977591, "grad_norm": 0.06596322357654572, "learning_rate": 0.0005935183325120433, "loss": 1.5056, "step": 10458 }, { "epoch": 0.4650749188564315, "grad_norm": 0.06263718008995056, "learning_rate": 0.000593374719733332, "loss": 1.5065, "step": 10460 }, { "epoch": 0.4651638433151038, "grad_norm": 0.06337321549654007, "learning_rate": 0.0005932310989725382, "loss": 1.5115, "step": 10462 }, { "epoch": 0.46525276777377617, "grad_norm": 0.0630149319767952, "learning_rate": 0.0005930874702419392, "loss": 1.5084, "step": 10464 }, { "epoch": 0.46534169223244853, "grad_norm": 0.06514789909124374, "learning_rate": 0.0005929438335538131, "loss": 1.5035, "step": 10466 }, { "epoch": 0.4654306166911209, "grad_norm": 0.061315376311540604, "learning_rate": 0.0005928001889204385, "loss": 1.5073, "step": 10468 }, { "epoch": 0.46551954114979327, "grad_norm": 0.06364026665687561, "learning_rate": 0.0005926565363540947, "loss": 1.5027, "step": 10470 }, { "epoch": 0.4656084656084656, "grad_norm": 0.06301172822713852, "learning_rate": 0.0005925128758670619, "loss": 1.5064, "step": 10472 }, { "epoch": 0.46569739006713795, "grad_norm": 0.06264245510101318, "learning_rate": 0.0005923692074716209, "loss": 1.5051, "step": 10474 }, { "epoch": 0.4657863145258103, "grad_norm": 0.06521310657262802, "learning_rate": 0.0005922255311800529, "loss": 1.5008, "step": 10476 }, { "epoch": 0.4658752389844827, "grad_norm": 0.06265758723020554, "learning_rate": 0.0005920818470046399, "loss": 1.5011, "step": 10478 }, { "epoch": 0.46596416344315505, "grad_norm": 0.06298228353261948, "learning_rate": 0.000591938154957665, "loss": 1.5046, "step": 10480 }, { "epoch": 0.4660530879018274, "grad_norm": 0.06504108011722565, "learning_rate": 0.0005917944550514114, "loss": 1.5035, "step": 10482 }, { "epoch": 0.46614201236049974, "grad_norm": 0.0628167986869812, "learning_rate": 0.0005916507472981632, "loss": 1.5018, "step": 10484 }, { "epoch": 0.4662309368191721, "grad_norm": 0.06394867599010468, "learning_rate": 0.0005915070317102053, "loss": 1.5055, "step": 10486 }, { "epoch": 0.46631986127784447, "grad_norm": 0.06346921622753143, "learning_rate": 0.0005913633082998231, "loss": 1.5132, "step": 10488 }, { "epoch": 0.46640878573651684, "grad_norm": 0.06185340881347656, "learning_rate": 0.0005912195770793028, "loss": 1.5051, "step": 10490 }, { "epoch": 0.4664977101951892, "grad_norm": 0.06247565150260925, "learning_rate": 0.0005910758380609308, "loss": 1.5057, "step": 10492 }, { "epoch": 0.4665866346538615, "grad_norm": 0.061248041689395905, "learning_rate": 0.000590932091256995, "loss": 1.4972, "step": 10494 }, { "epoch": 0.4666755591125339, "grad_norm": 0.062036920338869095, "learning_rate": 0.0005907883366797832, "loss": 1.5055, "step": 10496 }, { "epoch": 0.46676448357120626, "grad_norm": 0.0646434798836708, "learning_rate": 0.0005906445743415845, "loss": 1.5068, "step": 10498 }, { "epoch": 0.4668534080298786, "grad_norm": 0.06403973698616028, "learning_rate": 0.0005905008042546878, "loss": 1.5053, "step": 10500 }, { "epoch": 0.4668534080298786, "eval_loss": 1.4856921434402466, "eval_runtime": 12.4102, "eval_samples_per_second": 556.799, "eval_steps_per_second": 69.62, "step": 10500 }, { "epoch": 0.466942332488551, "grad_norm": 0.06568461656570435, "learning_rate": 0.0005903570264313837, "loss": 1.5073, "step": 10502 }, { "epoch": 0.46703125694722336, "grad_norm": 0.06413400918245316, "learning_rate": 0.0005902132408839626, "loss": 1.5047, "step": 10504 }, { "epoch": 0.4671201814058957, "grad_norm": 0.06379328668117523, "learning_rate": 0.0005900694476247164, "loss": 1.5017, "step": 10506 }, { "epoch": 0.46720910586456804, "grad_norm": 0.06230514496564865, "learning_rate": 0.0005899256466659369, "loss": 1.5085, "step": 10508 }, { "epoch": 0.4672980303232404, "grad_norm": 0.06407498568296432, "learning_rate": 0.0005897818380199165, "loss": 1.5085, "step": 10510 }, { "epoch": 0.4673869547819128, "grad_norm": 0.06471338123083115, "learning_rate": 0.0005896380216989495, "loss": 1.5057, "step": 10512 }, { "epoch": 0.46747587924058515, "grad_norm": 0.06504790484905243, "learning_rate": 0.0005894941977153289, "loss": 1.5093, "step": 10514 }, { "epoch": 0.46756480369925746, "grad_norm": 0.06497068703174591, "learning_rate": 0.0005893503660813499, "loss": 1.5046, "step": 10516 }, { "epoch": 0.4676537281579298, "grad_norm": 0.06451025605201721, "learning_rate": 0.000589206526809308, "loss": 1.504, "step": 10518 }, { "epoch": 0.4677426526166022, "grad_norm": 0.062198251485824585, "learning_rate": 0.0005890626799114991, "loss": 1.5068, "step": 10520 }, { "epoch": 0.46783157707527456, "grad_norm": 0.0641157478094101, "learning_rate": 0.0005889188254002198, "loss": 1.5027, "step": 10522 }, { "epoch": 0.46792050153394693, "grad_norm": 0.06388696283102036, "learning_rate": 0.0005887749632877673, "loss": 1.5103, "step": 10524 }, { "epoch": 0.46800942599261924, "grad_norm": 0.06389610469341278, "learning_rate": 0.0005886310935864399, "loss": 1.5049, "step": 10526 }, { "epoch": 0.4680983504512916, "grad_norm": 0.06436503678560257, "learning_rate": 0.0005884872163085359, "loss": 1.5033, "step": 10528 }, { "epoch": 0.468187274909964, "grad_norm": 0.06388412415981293, "learning_rate": 0.0005883433314663549, "loss": 1.5082, "step": 10530 }, { "epoch": 0.46827619936863635, "grad_norm": 0.06415783613920212, "learning_rate": 0.0005881994390721964, "loss": 1.5031, "step": 10532 }, { "epoch": 0.4683651238273087, "grad_norm": 0.06618549674749374, "learning_rate": 0.0005880555391383613, "loss": 1.4983, "step": 10534 }, { "epoch": 0.4684540482859811, "grad_norm": 0.06373777985572815, "learning_rate": 0.0005879116316771507, "loss": 1.5058, "step": 10536 }, { "epoch": 0.4685429727446534, "grad_norm": 0.06715410202741623, "learning_rate": 0.0005877677167008663, "loss": 1.5013, "step": 10538 }, { "epoch": 0.46863189720332576, "grad_norm": 0.0642244890332222, "learning_rate": 0.0005876237942218107, "loss": 1.5073, "step": 10540 }, { "epoch": 0.46872082166199813, "grad_norm": 0.06601016223430634, "learning_rate": 0.0005874798642522869, "loss": 1.4981, "step": 10542 }, { "epoch": 0.4688097461206705, "grad_norm": 0.06458527594804764, "learning_rate": 0.0005873359268045991, "loss": 1.5065, "step": 10544 }, { "epoch": 0.46889867057934287, "grad_norm": 0.06326668709516525, "learning_rate": 0.0005871919818910511, "loss": 1.5067, "step": 10546 }, { "epoch": 0.4689875950380152, "grad_norm": 0.06331662088632584, "learning_rate": 0.0005870480295239486, "loss": 1.5053, "step": 10548 }, { "epoch": 0.46907651949668755, "grad_norm": 0.0645529255270958, "learning_rate": 0.0005869040697155966, "loss": 1.5113, "step": 10550 }, { "epoch": 0.4691654439553599, "grad_norm": 0.0635153278708458, "learning_rate": 0.0005867601024783021, "loss": 1.503, "step": 10552 }, { "epoch": 0.4692543684140323, "grad_norm": 0.06458617746829987, "learning_rate": 0.0005866161278243713, "loss": 1.5021, "step": 10554 }, { "epoch": 0.46934329287270465, "grad_norm": 0.0654539167881012, "learning_rate": 0.0005864721457661124, "loss": 1.5025, "step": 10556 }, { "epoch": 0.469432217331377, "grad_norm": 0.06510742008686066, "learning_rate": 0.0005863281563158332, "loss": 1.5045, "step": 10558 }, { "epoch": 0.46952114179004933, "grad_norm": 0.0627632662653923, "learning_rate": 0.000586184159485843, "loss": 1.5048, "step": 10560 }, { "epoch": 0.4696100662487217, "grad_norm": 0.06467565894126892, "learning_rate": 0.000586040155288451, "loss": 1.5027, "step": 10562 }, { "epoch": 0.46969899070739407, "grad_norm": 0.06286469101905823, "learning_rate": 0.0005858961437359674, "loss": 1.4974, "step": 10564 }, { "epoch": 0.46978791516606644, "grad_norm": 0.06415469199419022, "learning_rate": 0.0005857521248407027, "loss": 1.5036, "step": 10566 }, { "epoch": 0.4698768396247388, "grad_norm": 0.06374349445104599, "learning_rate": 0.0005856080986149687, "loss": 1.4962, "step": 10568 }, { "epoch": 0.4699657640834111, "grad_norm": 0.0641632080078125, "learning_rate": 0.0005854640650710771, "loss": 1.5011, "step": 10570 }, { "epoch": 0.4700546885420835, "grad_norm": 0.06440167129039764, "learning_rate": 0.0005853200242213405, "loss": 1.5109, "step": 10572 }, { "epoch": 0.47014361300075586, "grad_norm": 0.06403553485870361, "learning_rate": 0.0005851759760780724, "loss": 1.5112, "step": 10574 }, { "epoch": 0.4702325374594282, "grad_norm": 0.06359109282493591, "learning_rate": 0.0005850319206535863, "loss": 1.5012, "step": 10576 }, { "epoch": 0.4703214619181006, "grad_norm": 0.06405449658632278, "learning_rate": 0.0005848878579601971, "loss": 1.5061, "step": 10578 }, { "epoch": 0.4704103863767729, "grad_norm": 0.06662164628505707, "learning_rate": 0.0005847437880102196, "loss": 1.5053, "step": 10580 }, { "epoch": 0.4704993108354453, "grad_norm": 0.06323253363370895, "learning_rate": 0.0005845997108159697, "loss": 1.5118, "step": 10582 }, { "epoch": 0.47058823529411764, "grad_norm": 0.06517712771892548, "learning_rate": 0.0005844556263897637, "loss": 1.5091, "step": 10584 }, { "epoch": 0.47067715975279, "grad_norm": 0.0638645812869072, "learning_rate": 0.0005843115347439184, "loss": 1.5093, "step": 10586 }, { "epoch": 0.4707660842114624, "grad_norm": 0.06771344691514969, "learning_rate": 0.0005841674358907517, "loss": 1.512, "step": 10588 }, { "epoch": 0.47085500867013474, "grad_norm": 0.06667336076498032, "learning_rate": 0.0005840233298425818, "loss": 1.5051, "step": 10590 }, { "epoch": 0.47094393312880706, "grad_norm": 0.06689155846834183, "learning_rate": 0.0005838792166117273, "loss": 1.5107, "step": 10592 }, { "epoch": 0.4710328575874794, "grad_norm": 0.06521068513393402, "learning_rate": 0.0005837350962105076, "loss": 1.508, "step": 10594 }, { "epoch": 0.4711217820461518, "grad_norm": 0.06260724365711212, "learning_rate": 0.0005835909686512429, "loss": 1.4977, "step": 10596 }, { "epoch": 0.47121070650482416, "grad_norm": 0.06671774387359619, "learning_rate": 0.0005834468339462539, "loss": 1.5029, "step": 10598 }, { "epoch": 0.47129963096349653, "grad_norm": 0.0636298730969429, "learning_rate": 0.0005833026921078616, "loss": 1.5078, "step": 10600 }, { "epoch": 0.47138855542216884, "grad_norm": 0.0641683042049408, "learning_rate": 0.0005831585431483883, "loss": 1.4961, "step": 10602 }, { "epoch": 0.4714774798808412, "grad_norm": 0.06526412814855576, "learning_rate": 0.0005830143870801562, "loss": 1.5074, "step": 10604 }, { "epoch": 0.4715664043395136, "grad_norm": 0.06388209015130997, "learning_rate": 0.0005828702239154886, "loss": 1.5082, "step": 10606 }, { "epoch": 0.47165532879818595, "grad_norm": 0.0643208771944046, "learning_rate": 0.0005827260536667089, "loss": 1.5034, "step": 10608 }, { "epoch": 0.4717442532568583, "grad_norm": 0.0622149333357811, "learning_rate": 0.0005825818763461416, "loss": 1.5036, "step": 10610 }, { "epoch": 0.4718331777155307, "grad_norm": 0.06327205151319504, "learning_rate": 0.0005824376919661114, "loss": 1.5018, "step": 10612 }, { "epoch": 0.471922102174203, "grad_norm": 0.06469540297985077, "learning_rate": 0.0005822935005389443, "loss": 1.5091, "step": 10614 }, { "epoch": 0.47201102663287536, "grad_norm": 0.06485003978013992, "learning_rate": 0.000582149302076966, "loss": 1.5048, "step": 10616 }, { "epoch": 0.47209995109154773, "grad_norm": 0.06667590141296387, "learning_rate": 0.0005820050965925032, "loss": 1.5053, "step": 10618 }, { "epoch": 0.4721888755502201, "grad_norm": 0.0658281147480011, "learning_rate": 0.0005818608840978837, "loss": 1.5085, "step": 10620 }, { "epoch": 0.47227780000889247, "grad_norm": 0.06506504863500595, "learning_rate": 0.0005817166646054348, "loss": 1.5044, "step": 10622 }, { "epoch": 0.4723667244675648, "grad_norm": 0.06641970574855804, "learning_rate": 0.0005815724381274854, "loss": 1.507, "step": 10624 }, { "epoch": 0.47245564892623715, "grad_norm": 0.06248960644006729, "learning_rate": 0.0005814282046763643, "loss": 1.505, "step": 10626 }, { "epoch": 0.4725445733849095, "grad_norm": 0.06606055051088333, "learning_rate": 0.0005812839642644017, "loss": 1.5084, "step": 10628 }, { "epoch": 0.4726334978435819, "grad_norm": 0.06419503688812256, "learning_rate": 0.0005811397169039277, "loss": 1.5038, "step": 10630 }, { "epoch": 0.47272242230225425, "grad_norm": 0.06354578584432602, "learning_rate": 0.0005809954626072728, "loss": 1.5025, "step": 10632 }, { "epoch": 0.47281134676092657, "grad_norm": 0.06533468514680862, "learning_rate": 0.000580851201386769, "loss": 1.5118, "step": 10634 }, { "epoch": 0.47290027121959893, "grad_norm": 0.06488578021526337, "learning_rate": 0.0005807069332547482, "loss": 1.5051, "step": 10636 }, { "epoch": 0.4729891956782713, "grad_norm": 0.061041004955768585, "learning_rate": 0.000580562658223543, "loss": 1.5046, "step": 10638 }, { "epoch": 0.47307812013694367, "grad_norm": 0.06147739291191101, "learning_rate": 0.0005804183763054869, "loss": 1.5003, "step": 10640 }, { "epoch": 0.47316704459561604, "grad_norm": 0.06590231508016586, "learning_rate": 0.0005802740875129135, "loss": 1.5015, "step": 10642 }, { "epoch": 0.4732559690542884, "grad_norm": 0.06333669275045395, "learning_rate": 0.0005801297918581574, "loss": 1.5054, "step": 10644 }, { "epoch": 0.4733448935129607, "grad_norm": 0.06394929438829422, "learning_rate": 0.0005799854893535535, "loss": 1.4967, "step": 10646 }, { "epoch": 0.4734338179716331, "grad_norm": 0.06518962234258652, "learning_rate": 0.0005798411800114375, "loss": 1.5029, "step": 10648 }, { "epoch": 0.47352274243030545, "grad_norm": 0.06461624801158905, "learning_rate": 0.0005796968638441455, "loss": 1.5038, "step": 10650 }, { "epoch": 0.4736116668889778, "grad_norm": 0.06398481875658035, "learning_rate": 0.0005795525408640146, "loss": 1.501, "step": 10652 }, { "epoch": 0.4737005913476502, "grad_norm": 0.06461846828460693, "learning_rate": 0.0005794082110833817, "loss": 1.5096, "step": 10654 }, { "epoch": 0.4737895158063225, "grad_norm": 0.0636335164308548, "learning_rate": 0.0005792638745145851, "loss": 1.504, "step": 10656 }, { "epoch": 0.47387844026499487, "grad_norm": 0.0646030604839325, "learning_rate": 0.0005791195311699631, "loss": 1.5081, "step": 10658 }, { "epoch": 0.47396736472366724, "grad_norm": 0.06314581632614136, "learning_rate": 0.0005789751810618551, "loss": 1.5029, "step": 10660 }, { "epoch": 0.4740562891823396, "grad_norm": 0.06447373330593109, "learning_rate": 0.0005788308242026004, "loss": 1.5029, "step": 10662 }, { "epoch": 0.474145213641012, "grad_norm": 0.06374399363994598, "learning_rate": 0.0005786864606045396, "loss": 1.5069, "step": 10664 }, { "epoch": 0.47423413809968434, "grad_norm": 0.06367414444684982, "learning_rate": 0.0005785420902800131, "loss": 1.4976, "step": 10666 }, { "epoch": 0.47432306255835666, "grad_norm": 0.06507609784603119, "learning_rate": 0.0005783977132413629, "loss": 1.5004, "step": 10668 }, { "epoch": 0.474411987017029, "grad_norm": 0.06401117891073227, "learning_rate": 0.0005782533295009307, "loss": 1.5017, "step": 10670 }, { "epoch": 0.4745009114757014, "grad_norm": 0.06449148803949356, "learning_rate": 0.0005781089390710588, "loss": 1.5078, "step": 10672 }, { "epoch": 0.47458983593437376, "grad_norm": 0.06876698136329651, "learning_rate": 0.0005779645419640907, "loss": 1.5033, "step": 10674 }, { "epoch": 0.47467876039304613, "grad_norm": 0.06464815884828568, "learning_rate": 0.00057782013819237, "loss": 1.5032, "step": 10676 }, { "epoch": 0.47476768485171844, "grad_norm": 0.06513024866580963, "learning_rate": 0.000577675727768241, "loss": 1.5039, "step": 10678 }, { "epoch": 0.4748566093103908, "grad_norm": 0.06479975581169128, "learning_rate": 0.0005775313107040483, "loss": 1.5033, "step": 10680 }, { "epoch": 0.4749455337690632, "grad_norm": 0.06562959402799606, "learning_rate": 0.0005773868870121377, "loss": 1.5035, "step": 10682 }, { "epoch": 0.47503445822773555, "grad_norm": 0.06198382005095482, "learning_rate": 0.0005772424567048549, "loss": 1.502, "step": 10684 }, { "epoch": 0.4751233826864079, "grad_norm": 0.0658661350607872, "learning_rate": 0.0005770980197945464, "loss": 1.5029, "step": 10686 }, { "epoch": 0.4752123071450803, "grad_norm": 0.06593360006809235, "learning_rate": 0.0005769535762935595, "loss": 1.5024, "step": 10688 }, { "epoch": 0.4753012316037526, "grad_norm": 0.06711971014738083, "learning_rate": 0.0005768091262142416, "loss": 1.5028, "step": 10690 }, { "epoch": 0.47539015606242496, "grad_norm": 0.06662214547395706, "learning_rate": 0.0005766646695689415, "loss": 1.4979, "step": 10692 }, { "epoch": 0.47547908052109733, "grad_norm": 0.062251124531030655, "learning_rate": 0.0005765202063700072, "loss": 1.5074, "step": 10694 }, { "epoch": 0.4755680049797697, "grad_norm": 0.0647764578461647, "learning_rate": 0.0005763757366297886, "loss": 1.5038, "step": 10696 }, { "epoch": 0.47565692943844207, "grad_norm": 0.06302308291196823, "learning_rate": 0.0005762312603606355, "loss": 1.4976, "step": 10698 }, { "epoch": 0.4757458538971144, "grad_norm": 0.06450463831424713, "learning_rate": 0.0005760867775748983, "loss": 1.502, "step": 10700 }, { "epoch": 0.47583477835578675, "grad_norm": 0.06613396853208542, "learning_rate": 0.000575942288284928, "loss": 1.4991, "step": 10702 }, { "epoch": 0.4759237028144591, "grad_norm": 0.06332564353942871, "learning_rate": 0.0005757977925030763, "loss": 1.5066, "step": 10704 }, { "epoch": 0.4760126272731315, "grad_norm": 0.06272386014461517, "learning_rate": 0.0005756532902416952, "loss": 1.5016, "step": 10706 }, { "epoch": 0.47610155173180385, "grad_norm": 0.06387588381767273, "learning_rate": 0.0005755087815131375, "loss": 1.501, "step": 10708 }, { "epoch": 0.47619047619047616, "grad_norm": 0.0668317899107933, "learning_rate": 0.0005753642663297564, "loss": 1.5068, "step": 10710 }, { "epoch": 0.47627940064914853, "grad_norm": 0.06176994368433952, "learning_rate": 0.0005752197447039056, "loss": 1.5082, "step": 10712 }, { "epoch": 0.4763683251078209, "grad_norm": 0.06459102034568787, "learning_rate": 0.0005750752166479397, "loss": 1.5063, "step": 10714 }, { "epoch": 0.47645724956649327, "grad_norm": 0.0622757263481617, "learning_rate": 0.0005749306821742132, "loss": 1.5021, "step": 10716 }, { "epoch": 0.47654617402516564, "grad_norm": 0.06583724915981293, "learning_rate": 0.0005747861412950821, "loss": 1.5038, "step": 10718 }, { "epoch": 0.476635098483838, "grad_norm": 0.0620274692773819, "learning_rate": 0.0005746415940229018, "loss": 1.5039, "step": 10720 }, { "epoch": 0.4767240229425103, "grad_norm": 0.06492220610380173, "learning_rate": 0.0005744970403700292, "loss": 1.5058, "step": 10722 }, { "epoch": 0.4768129474011827, "grad_norm": 0.06156022846698761, "learning_rate": 0.0005743524803488214, "loss": 1.5004, "step": 10724 }, { "epoch": 0.47690187185985505, "grad_norm": 0.06396898627281189, "learning_rate": 0.0005742079139716358, "loss": 1.502, "step": 10726 }, { "epoch": 0.4769907963185274, "grad_norm": 0.06404668837785721, "learning_rate": 0.0005740633412508307, "loss": 1.5032, "step": 10728 }, { "epoch": 0.4770797207771998, "grad_norm": 0.06537303328514099, "learning_rate": 0.0005739187621987648, "loss": 1.5036, "step": 10730 }, { "epoch": 0.4771686452358721, "grad_norm": 0.0657230019569397, "learning_rate": 0.0005737741768277974, "loss": 1.505, "step": 10732 }, { "epoch": 0.47725756969454447, "grad_norm": 0.06339870393276215, "learning_rate": 0.0005736295851502882, "loss": 1.5034, "step": 10734 }, { "epoch": 0.47734649415321684, "grad_norm": 0.06523014605045319, "learning_rate": 0.0005734849871785976, "loss": 1.5058, "step": 10736 }, { "epoch": 0.4774354186118892, "grad_norm": 0.06503810733556747, "learning_rate": 0.0005733403829250865, "loss": 1.5031, "step": 10738 }, { "epoch": 0.4775243430705616, "grad_norm": 0.0636671707034111, "learning_rate": 0.0005731957724021163, "loss": 1.5059, "step": 10740 }, { "epoch": 0.47761326752923394, "grad_norm": 0.06321322917938232, "learning_rate": 0.0005730511556220488, "loss": 1.503, "step": 10742 }, { "epoch": 0.47770219198790626, "grad_norm": 0.06607749313116074, "learning_rate": 0.0005729065325972467, "loss": 1.5087, "step": 10744 }, { "epoch": 0.4777911164465786, "grad_norm": 0.06520562618970871, "learning_rate": 0.0005727619033400729, "loss": 1.5022, "step": 10746 }, { "epoch": 0.477880040905251, "grad_norm": 0.06609973311424255, "learning_rate": 0.0005726172678628907, "loss": 1.5012, "step": 10748 }, { "epoch": 0.47796896536392336, "grad_norm": 0.06533106416463852, "learning_rate": 0.0005724726261780648, "loss": 1.4998, "step": 10750 }, { "epoch": 0.4780578898225957, "grad_norm": 0.06694469600915909, "learning_rate": 0.0005723279782979591, "loss": 1.5035, "step": 10752 }, { "epoch": 0.47814681428126804, "grad_norm": 0.06440529227256775, "learning_rate": 0.0005721833242349393, "loss": 1.5098, "step": 10754 }, { "epoch": 0.4782357387399404, "grad_norm": 0.06357917189598083, "learning_rate": 0.0005720386640013707, "loss": 1.5016, "step": 10756 }, { "epoch": 0.4783246631986128, "grad_norm": 0.06445612013339996, "learning_rate": 0.0005718939976096198, "loss": 1.5005, "step": 10758 }, { "epoch": 0.47841358765728514, "grad_norm": 0.062472015619277954, "learning_rate": 0.000571749325072053, "loss": 1.5029, "step": 10760 }, { "epoch": 0.4785025121159575, "grad_norm": 0.06607341021299362, "learning_rate": 0.0005716046464010378, "loss": 1.5072, "step": 10762 }, { "epoch": 0.4785914365746298, "grad_norm": 0.06257757544517517, "learning_rate": 0.0005714599616089419, "loss": 1.5015, "step": 10764 }, { "epoch": 0.4786803610333022, "grad_norm": 0.06556899100542068, "learning_rate": 0.0005713152707081335, "loss": 1.5052, "step": 10766 }, { "epoch": 0.47876928549197456, "grad_norm": 0.06689021736383438, "learning_rate": 0.0005711705737109816, "loss": 1.5016, "step": 10768 }, { "epoch": 0.47885820995064693, "grad_norm": 0.06304630637168884, "learning_rate": 0.0005710258706298553, "loss": 1.5011, "step": 10770 }, { "epoch": 0.4789471344093193, "grad_norm": 0.06447041779756546, "learning_rate": 0.0005708811614771245, "loss": 1.5073, "step": 10772 }, { "epoch": 0.47903605886799167, "grad_norm": 0.06245449185371399, "learning_rate": 0.0005707364462651598, "loss": 1.4974, "step": 10774 }, { "epoch": 0.479124983326664, "grad_norm": 0.063130222260952, "learning_rate": 0.000570591725006332, "loss": 1.4997, "step": 10776 }, { "epoch": 0.47921390778533635, "grad_norm": 0.06639711558818817, "learning_rate": 0.0005704469977130123, "loss": 1.5042, "step": 10778 }, { "epoch": 0.4793028322440087, "grad_norm": 0.06467228382825851, "learning_rate": 0.0005703022643975728, "loss": 1.5027, "step": 10780 }, { "epoch": 0.4793917567026811, "grad_norm": 0.06834383308887482, "learning_rate": 0.0005701575250723859, "loss": 1.4984, "step": 10782 }, { "epoch": 0.47948068116135345, "grad_norm": 0.0679764524102211, "learning_rate": 0.0005700127797498248, "loss": 1.5074, "step": 10784 }, { "epoch": 0.47956960562002576, "grad_norm": 0.06350118666887283, "learning_rate": 0.0005698680284422626, "loss": 1.4981, "step": 10786 }, { "epoch": 0.47965853007869813, "grad_norm": 0.06375767290592194, "learning_rate": 0.0005697232711620733, "loss": 1.5007, "step": 10788 }, { "epoch": 0.4797474545373705, "grad_norm": 0.0636902004480362, "learning_rate": 0.0005695785079216318, "loss": 1.5074, "step": 10790 }, { "epoch": 0.47983637899604287, "grad_norm": 0.06637261062860489, "learning_rate": 0.0005694337387333127, "loss": 1.5031, "step": 10792 }, { "epoch": 0.47992530345471524, "grad_norm": 0.06588244438171387, "learning_rate": 0.0005692889636094917, "loss": 1.5034, "step": 10794 }, { "epoch": 0.4800142279133876, "grad_norm": 0.06770049780607224, "learning_rate": 0.0005691441825625446, "loss": 1.5026, "step": 10796 }, { "epoch": 0.4801031523720599, "grad_norm": 0.06415392458438873, "learning_rate": 0.0005689993956048481, "loss": 1.5048, "step": 10798 }, { "epoch": 0.4801920768307323, "grad_norm": 0.06577417999505997, "learning_rate": 0.0005688546027487792, "loss": 1.5023, "step": 10800 }, { "epoch": 0.48028100128940465, "grad_norm": 0.06189163774251938, "learning_rate": 0.0005687098040067153, "loss": 1.5017, "step": 10802 }, { "epoch": 0.480369925748077, "grad_norm": 0.06363365054130554, "learning_rate": 0.0005685649993910348, "loss": 1.5037, "step": 10804 }, { "epoch": 0.4804588502067494, "grad_norm": 0.06573474407196045, "learning_rate": 0.0005684201889141158, "loss": 1.5027, "step": 10806 }, { "epoch": 0.4805477746654217, "grad_norm": 0.06577087193727493, "learning_rate": 0.0005682753725883378, "loss": 1.5003, "step": 10808 }, { "epoch": 0.48063669912409407, "grad_norm": 0.062234729528427124, "learning_rate": 0.0005681305504260798, "loss": 1.5009, "step": 10810 }, { "epoch": 0.48072562358276644, "grad_norm": 0.06282081454992294, "learning_rate": 0.0005679857224397222, "loss": 1.5035, "step": 10812 }, { "epoch": 0.4808145480414388, "grad_norm": 0.06254705041646957, "learning_rate": 0.0005678408886416454, "loss": 1.5049, "step": 10814 }, { "epoch": 0.4809034725001112, "grad_norm": 0.06438016891479492, "learning_rate": 0.0005676960490442305, "loss": 1.4978, "step": 10816 }, { "epoch": 0.4809923969587835, "grad_norm": 0.06419461965560913, "learning_rate": 0.0005675512036598592, "loss": 1.5005, "step": 10818 }, { "epoch": 0.48108132141745585, "grad_norm": 0.06625373661518097, "learning_rate": 0.000567406352500913, "loss": 1.4987, "step": 10820 }, { "epoch": 0.4811702458761282, "grad_norm": 0.06367786973714828, "learning_rate": 0.0005672614955797749, "loss": 1.5023, "step": 10822 }, { "epoch": 0.4812591703348006, "grad_norm": 0.06512659043073654, "learning_rate": 0.0005671166329088278, "loss": 1.5038, "step": 10824 }, { "epoch": 0.48134809479347296, "grad_norm": 0.06565241515636444, "learning_rate": 0.0005669717645004551, "loss": 1.5081, "step": 10826 }, { "epoch": 0.4814370192521453, "grad_norm": 0.06338914483785629, "learning_rate": 0.0005668268903670407, "loss": 1.5058, "step": 10828 }, { "epoch": 0.48152594371081764, "grad_norm": 0.06349223107099533, "learning_rate": 0.0005666820105209694, "loss": 1.4945, "step": 10830 }, { "epoch": 0.48161486816949, "grad_norm": 0.06533645838499069, "learning_rate": 0.0005665371249746259, "loss": 1.503, "step": 10832 }, { "epoch": 0.4817037926281624, "grad_norm": 0.07015854865312576, "learning_rate": 0.0005663922337403957, "loss": 1.5032, "step": 10834 }, { "epoch": 0.48179271708683474, "grad_norm": 0.06523853540420532, "learning_rate": 0.0005662473368306649, "loss": 1.5043, "step": 10836 }, { "epoch": 0.4818816415455071, "grad_norm": 0.06229966878890991, "learning_rate": 0.0005661024342578197, "loss": 1.5009, "step": 10838 }, { "epoch": 0.4819705660041794, "grad_norm": 0.06518285721540451, "learning_rate": 0.0005659575260342473, "loss": 1.5044, "step": 10840 }, { "epoch": 0.4820594904628518, "grad_norm": 0.06520578265190125, "learning_rate": 0.0005658126121723346, "loss": 1.5082, "step": 10842 }, { "epoch": 0.48214841492152416, "grad_norm": 0.06587293744087219, "learning_rate": 0.00056566769268447, "loss": 1.5047, "step": 10844 }, { "epoch": 0.48223733938019653, "grad_norm": 0.06606019288301468, "learning_rate": 0.0005655227675830416, "loss": 1.4998, "step": 10846 }, { "epoch": 0.4823262638388689, "grad_norm": 0.06386158615350723, "learning_rate": 0.0005653778368804381, "loss": 1.5081, "step": 10848 }, { "epoch": 0.48241518829754126, "grad_norm": 0.06474409252405167, "learning_rate": 0.0005652329005890492, "loss": 1.5008, "step": 10850 }, { "epoch": 0.4825041127562136, "grad_norm": 0.06231878697872162, "learning_rate": 0.0005650879587212645, "loss": 1.4979, "step": 10852 }, { "epoch": 0.48259303721488594, "grad_norm": 0.06352438777685165, "learning_rate": 0.0005649430112894743, "loss": 1.5012, "step": 10854 }, { "epoch": 0.4826819616735583, "grad_norm": 0.06429651379585266, "learning_rate": 0.000564798058306069, "loss": 1.4951, "step": 10856 }, { "epoch": 0.4827708861322307, "grad_norm": 0.06389694660902023, "learning_rate": 0.0005646530997834403, "loss": 1.4992, "step": 10858 }, { "epoch": 0.48285981059090305, "grad_norm": 0.06426005810499191, "learning_rate": 0.0005645081357339797, "loss": 1.4968, "step": 10860 }, { "epoch": 0.48294873504957536, "grad_norm": 0.062285613268613815, "learning_rate": 0.0005643631661700796, "loss": 1.4996, "step": 10862 }, { "epoch": 0.48303765950824773, "grad_norm": 0.06469501554965973, "learning_rate": 0.0005642181911041321, "loss": 1.4993, "step": 10864 }, { "epoch": 0.4831265839669201, "grad_norm": 0.06305573880672455, "learning_rate": 0.0005640732105485308, "loss": 1.5, "step": 10866 }, { "epoch": 0.48321550842559247, "grad_norm": 0.06352468580007553, "learning_rate": 0.000563928224515669, "loss": 1.5065, "step": 10868 }, { "epoch": 0.48330443288426483, "grad_norm": 0.06422882527112961, "learning_rate": 0.0005637832330179409, "loss": 1.5032, "step": 10870 }, { "epoch": 0.4833933573429372, "grad_norm": 0.06555035710334778, "learning_rate": 0.000563638236067741, "loss": 1.5019, "step": 10872 }, { "epoch": 0.4834822818016095, "grad_norm": 0.0639529749751091, "learning_rate": 0.0005634932336774641, "loss": 1.4982, "step": 10874 }, { "epoch": 0.4835712062602819, "grad_norm": 0.06562841683626175, "learning_rate": 0.0005633482258595059, "loss": 1.5008, "step": 10876 }, { "epoch": 0.48366013071895425, "grad_norm": 0.06432850658893585, "learning_rate": 0.0005632032126262622, "loss": 1.507, "step": 10878 }, { "epoch": 0.4837490551776266, "grad_norm": 0.06400411576032639, "learning_rate": 0.0005630581939901294, "loss": 1.5074, "step": 10880 }, { "epoch": 0.483837979636299, "grad_norm": 0.06370353698730469, "learning_rate": 0.0005629131699635041, "loss": 1.4988, "step": 10882 }, { "epoch": 0.4839269040949713, "grad_norm": 0.06292074173688889, "learning_rate": 0.0005627681405587839, "loss": 1.5069, "step": 10884 }, { "epoch": 0.48401582855364367, "grad_norm": 0.0631115660071373, "learning_rate": 0.0005626231057883664, "loss": 1.503, "step": 10886 }, { "epoch": 0.48410475301231604, "grad_norm": 0.06345234811306, "learning_rate": 0.0005624780656646499, "loss": 1.5007, "step": 10888 }, { "epoch": 0.4841936774709884, "grad_norm": 0.06285906583070755, "learning_rate": 0.000562333020200033, "loss": 1.5008, "step": 10890 }, { "epoch": 0.48428260192966077, "grad_norm": 0.06470154970884323, "learning_rate": 0.0005621879694069148, "loss": 1.5029, "step": 10892 }, { "epoch": 0.4843715263883331, "grad_norm": 0.06552711874246597, "learning_rate": 0.000562042913297695, "loss": 1.5065, "step": 10894 }, { "epoch": 0.48446045084700545, "grad_norm": 0.0655001625418663, "learning_rate": 0.0005618978518847733, "loss": 1.5033, "step": 10896 }, { "epoch": 0.4845493753056778, "grad_norm": 0.06529513746500015, "learning_rate": 0.0005617527851805507, "loss": 1.4996, "step": 10898 }, { "epoch": 0.4846382997643502, "grad_norm": 0.06527028232812881, "learning_rate": 0.0005616077131974279, "loss": 1.5027, "step": 10900 }, { "epoch": 0.48472722422302256, "grad_norm": 0.06357049942016602, "learning_rate": 0.0005614626359478062, "loss": 1.5025, "step": 10902 }, { "epoch": 0.4848161486816949, "grad_norm": 0.06293467432260513, "learning_rate": 0.0005613175534440875, "loss": 1.5036, "step": 10904 }, { "epoch": 0.48490507314036724, "grad_norm": 0.06417332589626312, "learning_rate": 0.0005611724656986741, "loss": 1.4987, "step": 10906 }, { "epoch": 0.4849939975990396, "grad_norm": 0.06176379695534706, "learning_rate": 0.0005610273727239688, "loss": 1.5024, "step": 10908 }, { "epoch": 0.485082922057712, "grad_norm": 0.06424736231565475, "learning_rate": 0.0005608822745323748, "loss": 1.5012, "step": 10910 }, { "epoch": 0.48517184651638434, "grad_norm": 0.06530416756868362, "learning_rate": 0.0005607371711362956, "loss": 1.4997, "step": 10912 }, { "epoch": 0.4852607709750567, "grad_norm": 0.06718974560499191, "learning_rate": 0.0005605920625481353, "loss": 1.5028, "step": 10914 }, { "epoch": 0.485349695433729, "grad_norm": 0.06613506376743317, "learning_rate": 0.0005604469487802987, "loss": 1.4958, "step": 10916 }, { "epoch": 0.4854386198924014, "grad_norm": 0.06338746100664139, "learning_rate": 0.0005603018298451903, "loss": 1.5018, "step": 10918 }, { "epoch": 0.48552754435107376, "grad_norm": 0.06582844257354736, "learning_rate": 0.0005601567057552158, "loss": 1.4997, "step": 10920 }, { "epoch": 0.4856164688097461, "grad_norm": 0.06286223977804184, "learning_rate": 0.000560011576522781, "loss": 1.4998, "step": 10922 }, { "epoch": 0.4857053932684185, "grad_norm": 0.06453262269496918, "learning_rate": 0.0005598664421602921, "loss": 1.4966, "step": 10924 }, { "epoch": 0.48579431772709086, "grad_norm": 0.06320101022720337, "learning_rate": 0.0005597213026801561, "loss": 1.5054, "step": 10926 }, { "epoch": 0.4858832421857632, "grad_norm": 0.06558766961097717, "learning_rate": 0.0005595761580947798, "loss": 1.5011, "step": 10928 }, { "epoch": 0.48597216664443554, "grad_norm": 0.06365552544593811, "learning_rate": 0.000559431008416571, "loss": 1.5008, "step": 10930 }, { "epoch": 0.4860610911031079, "grad_norm": 0.06486231088638306, "learning_rate": 0.0005592858536579377, "loss": 1.5008, "step": 10932 }, { "epoch": 0.4861500155617803, "grad_norm": 0.063105009496212, "learning_rate": 0.0005591406938312885, "loss": 1.5011, "step": 10934 }, { "epoch": 0.48623894002045265, "grad_norm": 0.06280262023210526, "learning_rate": 0.000558995528949032, "loss": 1.4973, "step": 10936 }, { "epoch": 0.48632786447912496, "grad_norm": 0.0647190734744072, "learning_rate": 0.0005588503590235777, "loss": 1.4982, "step": 10938 }, { "epoch": 0.48641678893779733, "grad_norm": 0.06445516645908356, "learning_rate": 0.0005587051840673355, "loss": 1.5, "step": 10940 }, { "epoch": 0.4865057133964697, "grad_norm": 0.06466984003782272, "learning_rate": 0.0005585600040927154, "loss": 1.4962, "step": 10942 }, { "epoch": 0.48659463785514206, "grad_norm": 0.06264392286539078, "learning_rate": 0.0005584148191121279, "loss": 1.4962, "step": 10944 }, { "epoch": 0.48668356231381443, "grad_norm": 0.06432507932186127, "learning_rate": 0.0005582696291379843, "loss": 1.4995, "step": 10946 }, { "epoch": 0.48677248677248675, "grad_norm": 0.0634993389248848, "learning_rate": 0.0005581244341826963, "loss": 1.5032, "step": 10948 }, { "epoch": 0.4868614112311591, "grad_norm": 0.06333111226558685, "learning_rate": 0.0005579792342586753, "loss": 1.5059, "step": 10950 }, { "epoch": 0.4869503356898315, "grad_norm": 0.06281735002994537, "learning_rate": 0.0005578340293783339, "loss": 1.5028, "step": 10952 }, { "epoch": 0.48703926014850385, "grad_norm": 0.06381440162658691, "learning_rate": 0.0005576888195540848, "loss": 1.5039, "step": 10954 }, { "epoch": 0.4871281846071762, "grad_norm": 0.06332245469093323, "learning_rate": 0.000557543604798341, "loss": 1.5009, "step": 10956 }, { "epoch": 0.4872171090658486, "grad_norm": 0.06438940018415451, "learning_rate": 0.0005573983851235165, "loss": 1.5028, "step": 10958 }, { "epoch": 0.4873060335245209, "grad_norm": 0.06432240456342697, "learning_rate": 0.000557253160542025, "loss": 1.4928, "step": 10960 }, { "epoch": 0.48739495798319327, "grad_norm": 0.06432882696390152, "learning_rate": 0.0005571079310662811, "loss": 1.5016, "step": 10962 }, { "epoch": 0.48748388244186563, "grad_norm": 0.06522729992866516, "learning_rate": 0.0005569626967086995, "loss": 1.5001, "step": 10964 }, { "epoch": 0.487572806900538, "grad_norm": 0.0628109946846962, "learning_rate": 0.0005568174574816957, "loss": 1.505, "step": 10966 }, { "epoch": 0.48766173135921037, "grad_norm": 0.06767455488443375, "learning_rate": 0.0005566722133976851, "loss": 1.5021, "step": 10968 }, { "epoch": 0.4877506558178827, "grad_norm": 0.06257057934999466, "learning_rate": 0.000556526964469084, "loss": 1.4986, "step": 10970 }, { "epoch": 0.48783958027655505, "grad_norm": 0.06524206697940826, "learning_rate": 0.0005563817107083088, "loss": 1.4929, "step": 10972 }, { "epoch": 0.4879285047352274, "grad_norm": 0.06345758587121964, "learning_rate": 0.0005562364521277766, "loss": 1.4987, "step": 10974 }, { "epoch": 0.4880174291938998, "grad_norm": 0.0644221380352974, "learning_rate": 0.0005560911887399047, "loss": 1.4985, "step": 10976 }, { "epoch": 0.48810635365257216, "grad_norm": 0.06262285262346268, "learning_rate": 0.0005559459205571106, "loss": 1.5021, "step": 10978 }, { "epoch": 0.4881952781112445, "grad_norm": 0.06352121382951736, "learning_rate": 0.0005558006475918128, "loss": 1.5005, "step": 10980 }, { "epoch": 0.48828420256991684, "grad_norm": 0.06309682130813599, "learning_rate": 0.0005556553698564297, "loss": 1.5002, "step": 10982 }, { "epoch": 0.4883731270285892, "grad_norm": 0.061160314828157425, "learning_rate": 0.0005555100873633804, "loss": 1.5016, "step": 10984 }, { "epoch": 0.4884620514872616, "grad_norm": 0.06492006033658981, "learning_rate": 0.0005553648001250842, "loss": 1.5017, "step": 10986 }, { "epoch": 0.48855097594593394, "grad_norm": 0.06279385089874268, "learning_rate": 0.0005552195081539608, "loss": 1.5062, "step": 10988 }, { "epoch": 0.4886399004046063, "grad_norm": 0.06334224343299866, "learning_rate": 0.0005550742114624305, "loss": 1.5027, "step": 10990 }, { "epoch": 0.4887288248632786, "grad_norm": 0.06543200463056564, "learning_rate": 0.000554928910062914, "loss": 1.5028, "step": 10992 }, { "epoch": 0.488817749321951, "grad_norm": 0.06258574873209, "learning_rate": 0.0005547836039678321, "loss": 1.5023, "step": 10994 }, { "epoch": 0.48890667378062336, "grad_norm": 0.06658318638801575, "learning_rate": 0.0005546382931896065, "loss": 1.4961, "step": 10996 }, { "epoch": 0.4889955982392957, "grad_norm": 0.062175750732421875, "learning_rate": 0.0005544929777406586, "loss": 1.5022, "step": 10998 }, { "epoch": 0.4890845226979681, "grad_norm": 0.06402657181024551, "learning_rate": 0.0005543476576334109, "loss": 1.4992, "step": 11000 }, { "epoch": 0.4890845226979681, "eval_loss": 1.4820914268493652, "eval_runtime": 13.0255, "eval_samples_per_second": 530.5, "eval_steps_per_second": 66.332, "step": 11000 }, { "epoch": 0.4891734471566404, "grad_norm": 0.06335395574569702, "learning_rate": 0.000554202332880286, "loss": 1.504, "step": 11002 }, { "epoch": 0.4892623716153128, "grad_norm": 0.06110154837369919, "learning_rate": 0.0005540570034937066, "loss": 1.5059, "step": 11004 }, { "epoch": 0.48935129607398514, "grad_norm": 0.06317976117134094, "learning_rate": 0.0005539116694860965, "loss": 1.4984, "step": 11006 }, { "epoch": 0.4894402205326575, "grad_norm": 0.06273622810840607, "learning_rate": 0.0005537663308698792, "loss": 1.4975, "step": 11008 }, { "epoch": 0.4895291449913299, "grad_norm": 0.06430622190237045, "learning_rate": 0.0005536209876574792, "loss": 1.5068, "step": 11010 }, { "epoch": 0.48961806945000225, "grad_norm": 0.06317053735256195, "learning_rate": 0.0005534756398613206, "loss": 1.4996, "step": 11012 }, { "epoch": 0.48970699390867456, "grad_norm": 0.063200943171978, "learning_rate": 0.0005533302874938289, "loss": 1.4999, "step": 11014 }, { "epoch": 0.4897959183673469, "grad_norm": 0.06409791111946106, "learning_rate": 0.0005531849305674292, "loss": 1.4959, "step": 11016 }, { "epoch": 0.4898848428260193, "grad_norm": 0.06358983367681503, "learning_rate": 0.000553039569094547, "loss": 1.501, "step": 11018 }, { "epoch": 0.48997376728469166, "grad_norm": 0.06684956699609756, "learning_rate": 0.0005528942030876089, "loss": 1.4998, "step": 11020 }, { "epoch": 0.49006269174336403, "grad_norm": 0.06377758830785751, "learning_rate": 0.0005527488325590411, "loss": 1.4958, "step": 11022 }, { "epoch": 0.49015161620203634, "grad_norm": 0.06464217603206635, "learning_rate": 0.0005526034575212708, "loss": 1.5029, "step": 11024 }, { "epoch": 0.4902405406607087, "grad_norm": 0.06466019153594971, "learning_rate": 0.000552458077986725, "loss": 1.499, "step": 11026 }, { "epoch": 0.4903294651193811, "grad_norm": 0.06569449603557587, "learning_rate": 0.0005523126939678316, "loss": 1.5023, "step": 11028 }, { "epoch": 0.49041838957805345, "grad_norm": 0.06310504674911499, "learning_rate": 0.0005521673054770185, "loss": 1.4994, "step": 11030 }, { "epoch": 0.4905073140367258, "grad_norm": 0.06455674022436142, "learning_rate": 0.0005520219125267144, "loss": 1.5022, "step": 11032 }, { "epoch": 0.4905962384953982, "grad_norm": 0.06450628489255905, "learning_rate": 0.0005518765151293478, "loss": 1.5005, "step": 11034 }, { "epoch": 0.4906851629540705, "grad_norm": 0.06352430582046509, "learning_rate": 0.0005517311132973481, "loss": 1.4988, "step": 11036 }, { "epoch": 0.49077408741274287, "grad_norm": 0.06309404969215393, "learning_rate": 0.0005515857070431448, "loss": 1.4998, "step": 11038 }, { "epoch": 0.49086301187141523, "grad_norm": 0.0660061240196228, "learning_rate": 0.000551440296379168, "loss": 1.5007, "step": 11040 }, { "epoch": 0.4909519363300876, "grad_norm": 0.06624384969472885, "learning_rate": 0.0005512948813178482, "loss": 1.4957, "step": 11042 }, { "epoch": 0.49104086078875997, "grad_norm": 0.06285151839256287, "learning_rate": 0.0005511494618716155, "loss": 1.5028, "step": 11044 }, { "epoch": 0.4911297852474323, "grad_norm": 0.06388713419437408, "learning_rate": 0.0005510040380529017, "loss": 1.5095, "step": 11046 }, { "epoch": 0.49121870970610465, "grad_norm": 0.06444618105888367, "learning_rate": 0.0005508586098741378, "loss": 1.4973, "step": 11048 }, { "epoch": 0.491307634164777, "grad_norm": 0.06315982341766357, "learning_rate": 0.0005507131773477558, "loss": 1.4944, "step": 11050 }, { "epoch": 0.4913965586234494, "grad_norm": 0.06305846571922302, "learning_rate": 0.000550567740486188, "loss": 1.5036, "step": 11052 }, { "epoch": 0.49148548308212175, "grad_norm": 0.06245909631252289, "learning_rate": 0.0005504222993018668, "loss": 1.4955, "step": 11054 }, { "epoch": 0.4915744075407941, "grad_norm": 0.0626964196562767, "learning_rate": 0.0005502768538072254, "loss": 1.4963, "step": 11056 }, { "epoch": 0.49166333199946644, "grad_norm": 0.06086695194244385, "learning_rate": 0.0005501314040146967, "loss": 1.4928, "step": 11058 }, { "epoch": 0.4917522564581388, "grad_norm": 0.06215178221464157, "learning_rate": 0.0005499859499367149, "loss": 1.4947, "step": 11060 }, { "epoch": 0.49184118091681117, "grad_norm": 0.06229309365153313, "learning_rate": 0.0005498404915857137, "loss": 1.4948, "step": 11062 }, { "epoch": 0.49193010537548354, "grad_norm": 0.06522365659475327, "learning_rate": 0.0005496950289741278, "loss": 1.5071, "step": 11064 }, { "epoch": 0.4920190298341559, "grad_norm": 0.06355579197406769, "learning_rate": 0.0005495495621143917, "loss": 1.5063, "step": 11066 }, { "epoch": 0.4921079542928282, "grad_norm": 0.06263679265975952, "learning_rate": 0.0005494040910189407, "loss": 1.4971, "step": 11068 }, { "epoch": 0.4921968787515006, "grad_norm": 0.06291796267032623, "learning_rate": 0.0005492586157002102, "loss": 1.5011, "step": 11070 }, { "epoch": 0.49228580321017296, "grad_norm": 0.0636981651186943, "learning_rate": 0.0005491131361706363, "loss": 1.4985, "step": 11072 }, { "epoch": 0.4923747276688453, "grad_norm": 0.061627645045518875, "learning_rate": 0.0005489676524426551, "loss": 1.5001, "step": 11074 }, { "epoch": 0.4924636521275177, "grad_norm": 0.06285221129655838, "learning_rate": 0.000548822164528703, "loss": 1.4997, "step": 11076 }, { "epoch": 0.49255257658619, "grad_norm": 0.06427198648452759, "learning_rate": 0.0005486766724412172, "loss": 1.4987, "step": 11078 }, { "epoch": 0.4926415010448624, "grad_norm": 0.06529872864484787, "learning_rate": 0.0005485311761926349, "loss": 1.5027, "step": 11080 }, { "epoch": 0.49273042550353474, "grad_norm": 0.06436862051486969, "learning_rate": 0.000548385675795394, "loss": 1.5018, "step": 11082 }, { "epoch": 0.4928193499622071, "grad_norm": 0.06122404336929321, "learning_rate": 0.000548240171261932, "loss": 1.5029, "step": 11084 }, { "epoch": 0.4929082744208795, "grad_norm": 0.0629076138138771, "learning_rate": 0.0005480946626046879, "loss": 1.5, "step": 11086 }, { "epoch": 0.49299719887955185, "grad_norm": 0.06222783774137497, "learning_rate": 0.0005479491498360997, "loss": 1.4984, "step": 11088 }, { "epoch": 0.49308612333822416, "grad_norm": 0.06436508893966675, "learning_rate": 0.0005478036329686071, "loss": 1.4938, "step": 11090 }, { "epoch": 0.4931750477968965, "grad_norm": 0.06390917301177979, "learning_rate": 0.0005476581120146493, "loss": 1.5018, "step": 11092 }, { "epoch": 0.4932639722555689, "grad_norm": 0.06670954078435898, "learning_rate": 0.0005475125869866661, "loss": 1.5033, "step": 11094 }, { "epoch": 0.49335289671424126, "grad_norm": 0.0621398501098156, "learning_rate": 0.0005473670578970975, "loss": 1.4996, "step": 11096 }, { "epoch": 0.49344182117291363, "grad_norm": 0.06525732576847076, "learning_rate": 0.000547221524758384, "loss": 1.4983, "step": 11098 }, { "epoch": 0.49353074563158594, "grad_norm": 0.06158006191253662, "learning_rate": 0.0005470759875829665, "loss": 1.4993, "step": 11100 }, { "epoch": 0.4936196700902583, "grad_norm": 0.06350939720869064, "learning_rate": 0.0005469304463832862, "loss": 1.5011, "step": 11102 }, { "epoch": 0.4937085945489307, "grad_norm": 0.06323396414518356, "learning_rate": 0.0005467849011717845, "loss": 1.5057, "step": 11104 }, { "epoch": 0.49379751900760305, "grad_norm": 0.06293822079896927, "learning_rate": 0.0005466393519609032, "loss": 1.5004, "step": 11106 }, { "epoch": 0.4938864434662754, "grad_norm": 0.06489180028438568, "learning_rate": 0.0005464937987630845, "loss": 1.5013, "step": 11108 }, { "epoch": 0.4939753679249478, "grad_norm": 0.06281109154224396, "learning_rate": 0.0005463482415907713, "loss": 1.4972, "step": 11110 }, { "epoch": 0.4940642923836201, "grad_norm": 0.06400121748447418, "learning_rate": 0.0005462026804564058, "loss": 1.5008, "step": 11112 }, { "epoch": 0.49415321684229246, "grad_norm": 0.06272722035646439, "learning_rate": 0.0005460571153724318, "loss": 1.4962, "step": 11114 }, { "epoch": 0.49424214130096483, "grad_norm": 0.06551103293895721, "learning_rate": 0.0005459115463512925, "loss": 1.5093, "step": 11116 }, { "epoch": 0.4943310657596372, "grad_norm": 0.06318585574626923, "learning_rate": 0.000545765973405432, "loss": 1.493, "step": 11118 }, { "epoch": 0.49441999021830957, "grad_norm": 0.06164968013763428, "learning_rate": 0.0005456203965472944, "loss": 1.4923, "step": 11120 }, { "epoch": 0.4945089146769819, "grad_norm": 0.06324370205402374, "learning_rate": 0.0005454748157893241, "loss": 1.5067, "step": 11122 }, { "epoch": 0.49459783913565425, "grad_norm": 0.0638519749045372, "learning_rate": 0.0005453292311439662, "loss": 1.4976, "step": 11124 }, { "epoch": 0.4946867635943266, "grad_norm": 0.0644693598151207, "learning_rate": 0.000545183642623666, "loss": 1.498, "step": 11126 }, { "epoch": 0.494775688052999, "grad_norm": 0.06370065361261368, "learning_rate": 0.0005450380502408688, "loss": 1.4954, "step": 11128 }, { "epoch": 0.49486461251167135, "grad_norm": 0.06507989764213562, "learning_rate": 0.0005448924540080206, "loss": 1.5037, "step": 11130 }, { "epoch": 0.49495353697034367, "grad_norm": 0.06500361114740372, "learning_rate": 0.0005447468539375675, "loss": 1.4998, "step": 11132 }, { "epoch": 0.49504246142901603, "grad_norm": 0.06104636937379837, "learning_rate": 0.0005446012500419564, "loss": 1.4931, "step": 11134 }, { "epoch": 0.4951313858876884, "grad_norm": 0.06581141799688339, "learning_rate": 0.0005444556423336337, "loss": 1.4986, "step": 11136 }, { "epoch": 0.49522031034636077, "grad_norm": 0.06393726170063019, "learning_rate": 0.0005443100308250467, "loss": 1.4976, "step": 11138 }, { "epoch": 0.49530923480503314, "grad_norm": 0.06342989206314087, "learning_rate": 0.0005441644155286432, "loss": 1.4973, "step": 11140 }, { "epoch": 0.4953981592637055, "grad_norm": 0.06588321924209595, "learning_rate": 0.0005440187964568708, "loss": 1.5007, "step": 11142 }, { "epoch": 0.4954870837223778, "grad_norm": 0.06536178290843964, "learning_rate": 0.0005438731736221776, "loss": 1.5027, "step": 11144 }, { "epoch": 0.4955760081810502, "grad_norm": 0.06368371844291687, "learning_rate": 0.0005437275470370122, "loss": 1.5003, "step": 11146 }, { "epoch": 0.49566493263972256, "grad_norm": 0.06588061898946762, "learning_rate": 0.0005435819167138234, "loss": 1.5015, "step": 11148 }, { "epoch": 0.4957538570983949, "grad_norm": 0.06427399069070816, "learning_rate": 0.0005434362826650603, "loss": 1.5013, "step": 11150 }, { "epoch": 0.4958427815570673, "grad_norm": 0.0646776631474495, "learning_rate": 0.0005432906449031723, "loss": 1.5026, "step": 11152 }, { "epoch": 0.4959317060157396, "grad_norm": 0.06432519853115082, "learning_rate": 0.0005431450034406092, "loss": 1.5005, "step": 11154 }, { "epoch": 0.49602063047441197, "grad_norm": 0.06441538780927658, "learning_rate": 0.0005429993582898212, "loss": 1.4945, "step": 11156 }, { "epoch": 0.49610955493308434, "grad_norm": 0.06345316767692566, "learning_rate": 0.0005428537094632585, "loss": 1.495, "step": 11158 }, { "epoch": 0.4961984793917567, "grad_norm": 0.06269050389528275, "learning_rate": 0.0005427080569733718, "loss": 1.494, "step": 11160 }, { "epoch": 0.4962874038504291, "grad_norm": 0.06417939811944962, "learning_rate": 0.0005425624008326122, "loss": 1.4962, "step": 11162 }, { "epoch": 0.49637632830910144, "grad_norm": 0.06371048837900162, "learning_rate": 0.0005424167410534311, "loss": 1.5057, "step": 11164 }, { "epoch": 0.49646525276777376, "grad_norm": 0.06224866583943367, "learning_rate": 0.0005422710776482798, "loss": 1.5009, "step": 11166 }, { "epoch": 0.4965541772264461, "grad_norm": 0.06159532442688942, "learning_rate": 0.0005421254106296108, "loss": 1.4936, "step": 11168 }, { "epoch": 0.4966431016851185, "grad_norm": 0.06550117582082748, "learning_rate": 0.0005419797400098758, "loss": 1.4971, "step": 11170 }, { "epoch": 0.49673202614379086, "grad_norm": 0.06186149641871452, "learning_rate": 0.0005418340658015279, "loss": 1.4973, "step": 11172 }, { "epoch": 0.49682095060246323, "grad_norm": 0.06517408788204193, "learning_rate": 0.0005416883880170195, "loss": 1.5007, "step": 11174 }, { "epoch": 0.49690987506113554, "grad_norm": 0.06351044774055481, "learning_rate": 0.000541542706668804, "loss": 1.5013, "step": 11176 }, { "epoch": 0.4969987995198079, "grad_norm": 0.06350798904895782, "learning_rate": 0.0005413970217693348, "loss": 1.5037, "step": 11178 }, { "epoch": 0.4970877239784803, "grad_norm": 0.06402333080768585, "learning_rate": 0.0005412513333310661, "loss": 1.4991, "step": 11180 }, { "epoch": 0.49717664843715265, "grad_norm": 0.06381136924028397, "learning_rate": 0.0005411056413664512, "loss": 1.4986, "step": 11182 }, { "epoch": 0.497265572895825, "grad_norm": 0.06557981669902802, "learning_rate": 0.0005409599458879452, "loss": 1.4981, "step": 11184 }, { "epoch": 0.4973544973544973, "grad_norm": 0.06369341164827347, "learning_rate": 0.0005408142469080023, "loss": 1.4985, "step": 11186 }, { "epoch": 0.4974434218131697, "grad_norm": 0.06300416588783264, "learning_rate": 0.0005406685444390779, "loss": 1.5046, "step": 11188 }, { "epoch": 0.49753234627184206, "grad_norm": 0.0626647025346756, "learning_rate": 0.0005405228384936272, "loss": 1.4958, "step": 11190 }, { "epoch": 0.49762127073051443, "grad_norm": 0.06517183035612106, "learning_rate": 0.0005403771290841053, "loss": 1.5032, "step": 11192 }, { "epoch": 0.4977101951891868, "grad_norm": 0.06442451477050781, "learning_rate": 0.0005402314162229688, "loss": 1.4977, "step": 11194 }, { "epoch": 0.49779911964785917, "grad_norm": 0.06301950663328171, "learning_rate": 0.0005400856999226735, "loss": 1.4991, "step": 11196 }, { "epoch": 0.4978880441065315, "grad_norm": 0.06560328602790833, "learning_rate": 0.000539939980195676, "loss": 1.5007, "step": 11198 }, { "epoch": 0.49797696856520385, "grad_norm": 0.06166763976216316, "learning_rate": 0.0005397942570544327, "loss": 1.4942, "step": 11200 }, { "epoch": 0.4980658930238762, "grad_norm": 0.06296875327825546, "learning_rate": 0.0005396485305114011, "loss": 1.4945, "step": 11202 }, { "epoch": 0.4981548174825486, "grad_norm": 0.06445491313934326, "learning_rate": 0.0005395028005790384, "loss": 1.5026, "step": 11204 }, { "epoch": 0.49824374194122095, "grad_norm": 0.06422359496355057, "learning_rate": 0.000539357067269802, "loss": 1.4957, "step": 11206 }, { "epoch": 0.49833266639989326, "grad_norm": 0.06359175592660904, "learning_rate": 0.0005392113305961503, "loss": 1.4982, "step": 11208 }, { "epoch": 0.49842159085856563, "grad_norm": 0.061649519950151443, "learning_rate": 0.0005390655905705413, "loss": 1.4979, "step": 11210 }, { "epoch": 0.498510515317238, "grad_norm": 0.06346156448125839, "learning_rate": 0.0005389198472054334, "loss": 1.5022, "step": 11212 }, { "epoch": 0.49859943977591037, "grad_norm": 0.06347343325614929, "learning_rate": 0.0005387741005132853, "loss": 1.4976, "step": 11214 }, { "epoch": 0.49868836423458274, "grad_norm": 0.06473079323768616, "learning_rate": 0.0005386283505065565, "loss": 1.4985, "step": 11216 }, { "epoch": 0.4987772886932551, "grad_norm": 0.0626574233174324, "learning_rate": 0.0005384825971977059, "loss": 1.4948, "step": 11218 }, { "epoch": 0.4988662131519274, "grad_norm": 0.06385411322116852, "learning_rate": 0.0005383368405991932, "loss": 1.5038, "step": 11220 }, { "epoch": 0.4989551376105998, "grad_norm": 0.0622217021882534, "learning_rate": 0.0005381910807234785, "loss": 1.4998, "step": 11222 }, { "epoch": 0.49904406206927215, "grad_norm": 0.06290595233440399, "learning_rate": 0.0005380453175830219, "loss": 1.4955, "step": 11224 }, { "epoch": 0.4991329865279445, "grad_norm": 0.06268318742513657, "learning_rate": 0.0005378995511902841, "loss": 1.4992, "step": 11226 }, { "epoch": 0.4992219109866169, "grad_norm": 0.06381423771381378, "learning_rate": 0.0005377537815577254, "loss": 1.4967, "step": 11228 }, { "epoch": 0.4993108354452892, "grad_norm": 0.06410021334886551, "learning_rate": 0.0005376080086978072, "loss": 1.4954, "step": 11230 }, { "epoch": 0.49939975990396157, "grad_norm": 0.06397207826375961, "learning_rate": 0.0005374622326229906, "loss": 1.5047, "step": 11232 }, { "epoch": 0.49948868436263394, "grad_norm": 0.06497202068567276, "learning_rate": 0.0005373164533457374, "loss": 1.5001, "step": 11234 }, { "epoch": 0.4995776088213063, "grad_norm": 0.06162360683083534, "learning_rate": 0.0005371706708785091, "loss": 1.4959, "step": 11236 }, { "epoch": 0.4996665332799787, "grad_norm": 0.06346426904201508, "learning_rate": 0.0005370248852337682, "loss": 1.4943, "step": 11238 }, { "epoch": 0.49975545773865104, "grad_norm": 0.06419111043214798, "learning_rate": 0.000536879096423977, "loss": 1.5007, "step": 11240 }, { "epoch": 0.49984438219732336, "grad_norm": 0.06275220960378647, "learning_rate": 0.0005367333044615979, "loss": 1.4958, "step": 11242 }, { "epoch": 0.4999333066559957, "grad_norm": 0.06255465000867844, "learning_rate": 0.0005365875093590944, "loss": 1.4995, "step": 11244 }, { "epoch": 0.500022231114668, "grad_norm": 0.06293238699436188, "learning_rate": 0.000536441711128929, "loss": 1.5021, "step": 11246 }, { "epoch": 0.5001111555733404, "grad_norm": 0.06618613004684448, "learning_rate": 0.0005362959097835658, "loss": 1.4973, "step": 11248 }, { "epoch": 0.5002000800320128, "grad_norm": 0.06275363266468048, "learning_rate": 0.0005361501053354681, "loss": 1.495, "step": 11250 }, { "epoch": 0.5002890044906851, "grad_norm": 0.06456315517425537, "learning_rate": 0.0005360042977971002, "loss": 1.496, "step": 11252 }, { "epoch": 0.5003779289493575, "grad_norm": 0.06540905684232712, "learning_rate": 0.0005358584871809262, "loss": 1.4952, "step": 11254 }, { "epoch": 0.5004668534080299, "grad_norm": 0.06401478499174118, "learning_rate": 0.0005357126734994108, "loss": 1.4914, "step": 11256 }, { "epoch": 0.5005557778667022, "grad_norm": 0.06477171927690506, "learning_rate": 0.0005355668567650186, "loss": 1.5019, "step": 11258 }, { "epoch": 0.5006447023253746, "grad_norm": 0.06348700076341629, "learning_rate": 0.0005354210369902147, "loss": 1.4993, "step": 11260 }, { "epoch": 0.500733626784047, "grad_norm": 0.06129096448421478, "learning_rate": 0.0005352752141874645, "loss": 1.4959, "step": 11262 }, { "epoch": 0.5008225512427193, "grad_norm": 0.06324250996112823, "learning_rate": 0.0005351293883692335, "loss": 1.4915, "step": 11264 }, { "epoch": 0.5009114757013917, "grad_norm": 0.0653425082564354, "learning_rate": 0.0005349835595479879, "loss": 1.4971, "step": 11266 }, { "epoch": 0.501000400160064, "grad_norm": 0.06187792494893074, "learning_rate": 0.000534837727736193, "loss": 1.4998, "step": 11268 }, { "epoch": 0.5010893246187363, "grad_norm": 0.06403960287570953, "learning_rate": 0.0005346918929463159, "loss": 1.5046, "step": 11270 }, { "epoch": 0.5011782490774087, "grad_norm": 0.06291434913873672, "learning_rate": 0.0005345460551908227, "loss": 1.4957, "step": 11272 }, { "epoch": 0.5012671735360811, "grad_norm": 0.06603266298770905, "learning_rate": 0.0005344002144821806, "loss": 1.4996, "step": 11274 }, { "epoch": 0.5013560979947534, "grad_norm": 0.06421004980802536, "learning_rate": 0.0005342543708328566, "loss": 1.5001, "step": 11276 }, { "epoch": 0.5014450224534258, "grad_norm": 0.06628622859716415, "learning_rate": 0.0005341085242553179, "loss": 1.4974, "step": 11278 }, { "epoch": 0.5015339469120982, "grad_norm": 0.06342728435993195, "learning_rate": 0.0005339626747620323, "loss": 1.4976, "step": 11280 }, { "epoch": 0.5016228713707706, "grad_norm": 0.0662439614534378, "learning_rate": 0.0005338168223654676, "loss": 1.4981, "step": 11282 }, { "epoch": 0.5017117958294429, "grad_norm": 0.06446612626314163, "learning_rate": 0.000533670967078092, "loss": 1.4992, "step": 11284 }, { "epoch": 0.5018007202881153, "grad_norm": 0.06358826905488968, "learning_rate": 0.0005335251089123734, "loss": 1.5075, "step": 11286 }, { "epoch": 0.5018896447467877, "grad_norm": 0.06295160949230194, "learning_rate": 0.0005333792478807812, "loss": 1.5024, "step": 11288 }, { "epoch": 0.5019785692054599, "grad_norm": 0.06541930884122849, "learning_rate": 0.0005332333839957834, "loss": 1.4928, "step": 11290 }, { "epoch": 0.5020674936641323, "grad_norm": 0.06382886320352554, "learning_rate": 0.0005330875172698498, "loss": 1.4973, "step": 11292 }, { "epoch": 0.5021564181228046, "grad_norm": 0.06370875984430313, "learning_rate": 0.0005329416477154492, "loss": 1.4947, "step": 11294 }, { "epoch": 0.502245342581477, "grad_norm": 0.06501110643148422, "learning_rate": 0.0005327957753450515, "loss": 1.4953, "step": 11296 }, { "epoch": 0.5023342670401494, "grad_norm": 0.06344249099493027, "learning_rate": 0.0005326499001711264, "loss": 1.4958, "step": 11298 }, { "epoch": 0.5024231914988218, "grad_norm": 0.0630435198545456, "learning_rate": 0.0005325040222061438, "loss": 1.5025, "step": 11300 }, { "epoch": 0.5025121159574941, "grad_norm": 0.06559957563877106, "learning_rate": 0.0005323581414625743, "loss": 1.4961, "step": 11302 }, { "epoch": 0.5026010404161665, "grad_norm": 0.06591141223907471, "learning_rate": 0.0005322122579528883, "loss": 1.4967, "step": 11304 }, { "epoch": 0.5026899648748389, "grad_norm": 0.06230854615569115, "learning_rate": 0.0005320663716895565, "loss": 1.4959, "step": 11306 }, { "epoch": 0.5027788893335112, "grad_norm": 0.06417876482009888, "learning_rate": 0.0005319204826850498, "loss": 1.5024, "step": 11308 }, { "epoch": 0.5028678137921836, "grad_norm": 0.06187916174530983, "learning_rate": 0.0005317745909518397, "loss": 1.4925, "step": 11310 }, { "epoch": 0.5029567382508559, "grad_norm": 0.06452244520187378, "learning_rate": 0.0005316286965023976, "loss": 1.5, "step": 11312 }, { "epoch": 0.5030456627095282, "grad_norm": 0.064858078956604, "learning_rate": 0.0005314827993491948, "loss": 1.4968, "step": 11314 }, { "epoch": 0.5031345871682006, "grad_norm": 0.0635191798210144, "learning_rate": 0.0005313368995047039, "loss": 1.4931, "step": 11316 }, { "epoch": 0.503223511626873, "grad_norm": 0.06416906416416168, "learning_rate": 0.0005311909969813966, "loss": 1.4993, "step": 11318 }, { "epoch": 0.5033124360855453, "grad_norm": 0.06357254087924957, "learning_rate": 0.0005310450917917456, "loss": 1.4931, "step": 11320 }, { "epoch": 0.5034013605442177, "grad_norm": 0.06390412896871567, "learning_rate": 0.0005308991839482231, "loss": 1.498, "step": 11322 }, { "epoch": 0.5034902850028901, "grad_norm": 0.06369391083717346, "learning_rate": 0.0005307532734633024, "loss": 1.5001, "step": 11324 }, { "epoch": 0.5035792094615624, "grad_norm": 0.06292715668678284, "learning_rate": 0.0005306073603494561, "loss": 1.499, "step": 11326 }, { "epoch": 0.5036681339202348, "grad_norm": 0.06275395303964615, "learning_rate": 0.0005304614446191581, "loss": 1.5005, "step": 11328 }, { "epoch": 0.5037570583789072, "grad_norm": 0.06449761241674423, "learning_rate": 0.0005303155262848813, "loss": 1.4961, "step": 11330 }, { "epoch": 0.5038459828375795, "grad_norm": 0.06431379169225693, "learning_rate": 0.0005301696053590999, "loss": 1.4974, "step": 11332 }, { "epoch": 0.5039349072962518, "grad_norm": 0.06470140814781189, "learning_rate": 0.0005300236818542876, "loss": 1.4957, "step": 11334 }, { "epoch": 0.5040238317549242, "grad_norm": 0.06437893211841583, "learning_rate": 0.0005298777557829186, "loss": 1.4951, "step": 11336 }, { "epoch": 0.5041127562135965, "grad_norm": 0.06438335031270981, "learning_rate": 0.0005297318271574675, "loss": 1.4902, "step": 11338 }, { "epoch": 0.5042016806722689, "grad_norm": 0.06443442404270172, "learning_rate": 0.0005295858959904086, "loss": 1.4999, "step": 11340 }, { "epoch": 0.5042906051309413, "grad_norm": 0.06223178282380104, "learning_rate": 0.0005294399622942172, "loss": 1.5001, "step": 11342 }, { "epoch": 0.5043795295896136, "grad_norm": 0.06361085921525955, "learning_rate": 0.0005292940260813678, "loss": 1.4937, "step": 11344 }, { "epoch": 0.504468454048286, "grad_norm": 0.06470072269439697, "learning_rate": 0.0005291480873643362, "loss": 1.5009, "step": 11346 }, { "epoch": 0.5045573785069584, "grad_norm": 0.05959364399313927, "learning_rate": 0.0005290021461555974, "loss": 1.4964, "step": 11348 }, { "epoch": 0.5046463029656307, "grad_norm": 0.06182974949479103, "learning_rate": 0.0005288562024676275, "loss": 1.4977, "step": 11350 }, { "epoch": 0.5047352274243031, "grad_norm": 0.06326933205127716, "learning_rate": 0.0005287102563129023, "loss": 1.5012, "step": 11352 }, { "epoch": 0.5048241518829754, "grad_norm": 0.06258763372898102, "learning_rate": 0.0005285643077038977, "loss": 1.498, "step": 11354 }, { "epoch": 0.5049130763416477, "grad_norm": 0.06452583521604538, "learning_rate": 0.0005284183566530902, "loss": 1.4936, "step": 11356 }, { "epoch": 0.5050020008003201, "grad_norm": 0.06566081196069717, "learning_rate": 0.0005282724031729565, "loss": 1.4988, "step": 11358 }, { "epoch": 0.5050909252589925, "grad_norm": 0.06276465952396393, "learning_rate": 0.0005281264472759732, "loss": 1.4944, "step": 11360 }, { "epoch": 0.5051798497176648, "grad_norm": 0.061030905693769455, "learning_rate": 0.0005279804889746169, "loss": 1.4979, "step": 11362 }, { "epoch": 0.5052687741763372, "grad_norm": 0.06379514187574387, "learning_rate": 0.0005278345282813655, "loss": 1.4952, "step": 11364 }, { "epoch": 0.5053576986350096, "grad_norm": 0.06288314610719681, "learning_rate": 0.0005276885652086958, "loss": 1.4891, "step": 11366 }, { "epoch": 0.5054466230936819, "grad_norm": 0.061497218906879425, "learning_rate": 0.0005275425997690854, "loss": 1.5001, "step": 11368 }, { "epoch": 0.5055355475523543, "grad_norm": 0.06392903625965118, "learning_rate": 0.0005273966319750123, "loss": 1.4953, "step": 11370 }, { "epoch": 0.5056244720110267, "grad_norm": 0.06301810592412949, "learning_rate": 0.0005272506618389544, "loss": 1.4944, "step": 11372 }, { "epoch": 0.505713396469699, "grad_norm": 0.06324661523103714, "learning_rate": 0.00052710468937339, "loss": 1.499, "step": 11374 }, { "epoch": 0.5058023209283713, "grad_norm": 0.06164475902915001, "learning_rate": 0.0005269587145907969, "loss": 1.4962, "step": 11376 }, { "epoch": 0.5058912453870437, "grad_norm": 0.060896288603544235, "learning_rate": 0.0005268127375036545, "loss": 1.5005, "step": 11378 }, { "epoch": 0.505980169845716, "grad_norm": 0.06393571943044662, "learning_rate": 0.0005266667581244408, "loss": 1.5005, "step": 11380 }, { "epoch": 0.5060690943043884, "grad_norm": 0.06255924701690674, "learning_rate": 0.0005265207764656353, "loss": 1.4963, "step": 11382 }, { "epoch": 0.5061580187630608, "grad_norm": 0.06395517289638519, "learning_rate": 0.0005263747925397168, "loss": 1.4931, "step": 11384 }, { "epoch": 0.5062469432217331, "grad_norm": 0.0635991245508194, "learning_rate": 0.0005262288063591649, "loss": 1.4972, "step": 11386 }, { "epoch": 0.5063358676804055, "grad_norm": 0.06476542353630066, "learning_rate": 0.0005260828179364591, "loss": 1.4942, "step": 11388 }, { "epoch": 0.5064247921390779, "grad_norm": 0.06474872678518295, "learning_rate": 0.000525936827284079, "loss": 1.4969, "step": 11390 }, { "epoch": 0.5065137165977502, "grad_norm": 0.06325345486402512, "learning_rate": 0.0005257908344145046, "loss": 1.4924, "step": 11392 }, { "epoch": 0.5066026410564226, "grad_norm": 0.06438945233821869, "learning_rate": 0.0005256448393402161, "loss": 1.4962, "step": 11394 }, { "epoch": 0.506691565515095, "grad_norm": 0.06563424319028854, "learning_rate": 0.0005254988420736935, "loss": 1.5001, "step": 11396 }, { "epoch": 0.5067804899737672, "grad_norm": 0.06430519372224808, "learning_rate": 0.0005253528426274178, "loss": 1.4977, "step": 11398 }, { "epoch": 0.5068694144324396, "grad_norm": 0.06316456943750381, "learning_rate": 0.0005252068410138693, "loss": 1.4993, "step": 11400 }, { "epoch": 0.506958338891112, "grad_norm": 0.06348346173763275, "learning_rate": 0.0005250608372455288, "loss": 1.4894, "step": 11402 }, { "epoch": 0.5070472633497843, "grad_norm": 0.06484797596931458, "learning_rate": 0.0005249148313348776, "loss": 1.4929, "step": 11404 }, { "epoch": 0.5071361878084567, "grad_norm": 0.06406370550394058, "learning_rate": 0.0005247688232943969, "loss": 1.4931, "step": 11406 }, { "epoch": 0.5072251122671291, "grad_norm": 0.0638013482093811, "learning_rate": 0.0005246228131365678, "loss": 1.4977, "step": 11408 }, { "epoch": 0.5073140367258014, "grad_norm": 0.06718415021896362, "learning_rate": 0.0005244768008738724, "loss": 1.498, "step": 11410 }, { "epoch": 0.5074029611844738, "grad_norm": 0.06131194531917572, "learning_rate": 0.0005243307865187921, "loss": 1.4967, "step": 11412 }, { "epoch": 0.5074918856431462, "grad_norm": 0.06349305063486099, "learning_rate": 0.0005241847700838089, "loss": 1.4966, "step": 11414 }, { "epoch": 0.5075808101018185, "grad_norm": 0.06464185565710068, "learning_rate": 0.0005240387515814049, "loss": 1.4924, "step": 11416 }, { "epoch": 0.5076697345604909, "grad_norm": 0.06342726945877075, "learning_rate": 0.0005238927310240627, "loss": 1.5043, "step": 11418 }, { "epoch": 0.5077586590191632, "grad_norm": 0.06681360304355621, "learning_rate": 0.0005237467084242644, "loss": 1.5018, "step": 11420 }, { "epoch": 0.5078475834778355, "grad_norm": 0.06572667509317398, "learning_rate": 0.0005236006837944928, "loss": 1.5001, "step": 11422 }, { "epoch": 0.5079365079365079, "grad_norm": 0.06486863642930984, "learning_rate": 0.0005234546571472307, "loss": 1.4965, "step": 11424 }, { "epoch": 0.5080254323951803, "grad_norm": 0.06413675844669342, "learning_rate": 0.0005233086284949612, "loss": 1.4973, "step": 11426 }, { "epoch": 0.5081143568538526, "grad_norm": 0.0645928606390953, "learning_rate": 0.0005231625978501676, "loss": 1.4959, "step": 11428 }, { "epoch": 0.508203281312525, "grad_norm": 0.06422816962003708, "learning_rate": 0.0005230165652253329, "loss": 1.497, "step": 11430 }, { "epoch": 0.5082922057711974, "grad_norm": 0.064136803150177, "learning_rate": 0.0005228705306329407, "loss": 1.4967, "step": 11432 }, { "epoch": 0.5083811302298697, "grad_norm": 0.06331904232501984, "learning_rate": 0.0005227244940854748, "loss": 1.4981, "step": 11434 }, { "epoch": 0.5084700546885421, "grad_norm": 0.06546802818775177, "learning_rate": 0.0005225784555954191, "loss": 1.4998, "step": 11436 }, { "epoch": 0.5085589791472145, "grad_norm": 0.06174863874912262, "learning_rate": 0.0005224324151752575, "loss": 1.4924, "step": 11438 }, { "epoch": 0.5086479036058869, "grad_norm": 0.06472094357013702, "learning_rate": 0.0005222863728374742, "loss": 1.4909, "step": 11440 }, { "epoch": 0.5087368280645591, "grad_norm": 0.0627531185746193, "learning_rate": 0.0005221403285945534, "loss": 1.4991, "step": 11442 }, { "epoch": 0.5088257525232315, "grad_norm": 0.06593529880046844, "learning_rate": 0.0005219942824589798, "loss": 1.4974, "step": 11444 }, { "epoch": 0.5089146769819038, "grad_norm": 0.06247802823781967, "learning_rate": 0.0005218482344432381, "loss": 1.4976, "step": 11446 }, { "epoch": 0.5090036014405762, "grad_norm": 0.06401393562555313, "learning_rate": 0.000521702184559813, "loss": 1.4989, "step": 11448 }, { "epoch": 0.5090925258992486, "grad_norm": 0.06516777724027634, "learning_rate": 0.0005215561328211895, "loss": 1.4973, "step": 11450 }, { "epoch": 0.509181450357921, "grad_norm": 0.06341985613107681, "learning_rate": 0.0005214100792398529, "loss": 1.4956, "step": 11452 }, { "epoch": 0.5092703748165933, "grad_norm": 0.06545495986938477, "learning_rate": 0.0005212640238282882, "loss": 1.4988, "step": 11454 }, { "epoch": 0.5093592992752657, "grad_norm": 0.06209327280521393, "learning_rate": 0.0005211179665989811, "loss": 1.4951, "step": 11456 }, { "epoch": 0.509448223733938, "grad_norm": 0.06250801682472229, "learning_rate": 0.000520971907564417, "loss": 1.4963, "step": 11458 }, { "epoch": 0.5095371481926104, "grad_norm": 0.060768019407987595, "learning_rate": 0.0005208258467370821, "loss": 1.4939, "step": 11460 }, { "epoch": 0.5096260726512827, "grad_norm": 0.06457893550395966, "learning_rate": 0.0005206797841294618, "loss": 1.5005, "step": 11462 }, { "epoch": 0.509714997109955, "grad_norm": 0.06338069587945938, "learning_rate": 0.0005205337197540425, "loss": 1.4988, "step": 11464 }, { "epoch": 0.5098039215686274, "grad_norm": 0.0637018084526062, "learning_rate": 0.0005203876536233104, "loss": 1.4965, "step": 11466 }, { "epoch": 0.5098928460272998, "grad_norm": 0.06639863550662994, "learning_rate": 0.0005202415857497519, "loss": 1.4953, "step": 11468 }, { "epoch": 0.5099817704859722, "grad_norm": 0.0653444156050682, "learning_rate": 0.0005200955161458533, "loss": 1.4941, "step": 11470 }, { "epoch": 0.5100706949446445, "grad_norm": 0.06342481076717377, "learning_rate": 0.0005199494448241014, "loss": 1.4917, "step": 11472 }, { "epoch": 0.5101596194033169, "grad_norm": 0.0635971799492836, "learning_rate": 0.0005198033717969833, "loss": 1.495, "step": 11474 }, { "epoch": 0.5102485438619893, "grad_norm": 0.06396380811929703, "learning_rate": 0.0005196572970769854, "loss": 1.5032, "step": 11476 }, { "epoch": 0.5103374683206616, "grad_norm": 0.0634695515036583, "learning_rate": 0.0005195112206765952, "loss": 1.4967, "step": 11478 }, { "epoch": 0.510426392779334, "grad_norm": 0.06354030221700668, "learning_rate": 0.0005193651426082999, "loss": 1.4968, "step": 11480 }, { "epoch": 0.5105153172380064, "grad_norm": 0.06678715348243713, "learning_rate": 0.000519219062884587, "loss": 1.4991, "step": 11482 }, { "epoch": 0.5106042416966786, "grad_norm": 0.06397784501314163, "learning_rate": 0.0005190729815179437, "loss": 1.4931, "step": 11484 }, { "epoch": 0.510693166155351, "grad_norm": 0.0642557144165039, "learning_rate": 0.0005189268985208579, "loss": 1.4963, "step": 11486 }, { "epoch": 0.5107820906140234, "grad_norm": 0.06316371262073517, "learning_rate": 0.0005187808139058175, "loss": 1.4913, "step": 11488 }, { "epoch": 0.5108710150726957, "grad_norm": 0.06363008916378021, "learning_rate": 0.0005186347276853105, "loss": 1.495, "step": 11490 }, { "epoch": 0.5109599395313681, "grad_norm": 0.06320468336343765, "learning_rate": 0.0005184886398718246, "loss": 1.495, "step": 11492 }, { "epoch": 0.5110488639900405, "grad_norm": 0.06314976513385773, "learning_rate": 0.0005183425504778484, "loss": 1.4978, "step": 11494 }, { "epoch": 0.5111377884487128, "grad_norm": 0.06445315480232239, "learning_rate": 0.00051819645951587, "loss": 1.4968, "step": 11496 }, { "epoch": 0.5112267129073852, "grad_norm": 0.06370304524898529, "learning_rate": 0.0005180503669983782, "loss": 1.4937, "step": 11498 }, { "epoch": 0.5113156373660576, "grad_norm": 0.06207090616226196, "learning_rate": 0.0005179042729378616, "loss": 1.4991, "step": 11500 }, { "epoch": 0.5113156373660576, "eval_loss": 1.4778088331222534, "eval_runtime": 12.4152, "eval_samples_per_second": 556.574, "eval_steps_per_second": 69.592, "step": 11500 }, { "epoch": 0.5114045618247299, "grad_norm": 0.06190875545144081, "learning_rate": 0.0005177581773468087, "loss": 1.4958, "step": 11502 }, { "epoch": 0.5114934862834023, "grad_norm": 0.06312815099954605, "learning_rate": 0.0005176120802377086, "loss": 1.494, "step": 11504 }, { "epoch": 0.5115824107420746, "grad_norm": 0.06356559693813324, "learning_rate": 0.0005174659816230503, "loss": 1.5014, "step": 11506 }, { "epoch": 0.5116713352007469, "grad_norm": 0.060818467289209366, "learning_rate": 0.0005173198815153228, "loss": 1.4924, "step": 11508 }, { "epoch": 0.5117602596594193, "grad_norm": 0.06228560581803322, "learning_rate": 0.0005171737799270154, "loss": 1.4924, "step": 11510 }, { "epoch": 0.5118491841180917, "grad_norm": 0.06337162107229233, "learning_rate": 0.0005170276768706179, "loss": 1.5018, "step": 11512 }, { "epoch": 0.511938108576764, "grad_norm": 0.06353859603404999, "learning_rate": 0.0005168815723586194, "loss": 1.5007, "step": 11514 }, { "epoch": 0.5120270330354364, "grad_norm": 0.06312459707260132, "learning_rate": 0.0005167354664035096, "loss": 1.4998, "step": 11516 }, { "epoch": 0.5121159574941088, "grad_norm": 0.06268506497144699, "learning_rate": 0.0005165893590177786, "loss": 1.5017, "step": 11518 }, { "epoch": 0.5122048819527811, "grad_norm": 0.07257755100727081, "learning_rate": 0.0005164432502139158, "loss": 1.4964, "step": 11520 }, { "epoch": 0.5122938064114535, "grad_norm": 0.06548047810792923, "learning_rate": 0.0005162971400044119, "loss": 1.5002, "step": 11522 }, { "epoch": 0.5123827308701259, "grad_norm": 0.06318143010139465, "learning_rate": 0.0005161510284017563, "loss": 1.493, "step": 11524 }, { "epoch": 0.5124716553287982, "grad_norm": 0.06401795148849487, "learning_rate": 0.0005160049154184399, "loss": 1.4945, "step": 11526 }, { "epoch": 0.5125605797874705, "grad_norm": 0.06641431897878647, "learning_rate": 0.0005158588010669526, "loss": 1.495, "step": 11528 }, { "epoch": 0.5126495042461429, "grad_norm": 0.06431353837251663, "learning_rate": 0.0005157126853597852, "loss": 1.4966, "step": 11530 }, { "epoch": 0.5127384287048152, "grad_norm": 0.06240619346499443, "learning_rate": 0.000515566568309428, "loss": 1.4989, "step": 11532 }, { "epoch": 0.5128273531634876, "grad_norm": 0.06395246088504791, "learning_rate": 0.0005154204499283723, "loss": 1.4936, "step": 11534 }, { "epoch": 0.51291627762216, "grad_norm": 0.06343571096658707, "learning_rate": 0.0005152743302291083, "loss": 1.4887, "step": 11536 }, { "epoch": 0.5130052020808323, "grad_norm": 0.06305166333913803, "learning_rate": 0.0005151282092241273, "loss": 1.4997, "step": 11538 }, { "epoch": 0.5130941265395047, "grad_norm": 0.06751389056444168, "learning_rate": 0.0005149820869259203, "loss": 1.4989, "step": 11540 }, { "epoch": 0.5131830509981771, "grad_norm": 0.06132427230477333, "learning_rate": 0.0005148359633469784, "loss": 1.4996, "step": 11542 }, { "epoch": 0.5132719754568494, "grad_norm": 0.06433816254138947, "learning_rate": 0.0005146898384997933, "loss": 1.4972, "step": 11544 }, { "epoch": 0.5133608999155218, "grad_norm": 0.06302385777235031, "learning_rate": 0.0005145437123968558, "loss": 1.4971, "step": 11546 }, { "epoch": 0.5134498243741942, "grad_norm": 0.06478093564510345, "learning_rate": 0.0005143975850506579, "loss": 1.4962, "step": 11548 }, { "epoch": 0.5135387488328664, "grad_norm": 0.06177389994263649, "learning_rate": 0.0005142514564736906, "loss": 1.4966, "step": 11550 }, { "epoch": 0.5136276732915388, "grad_norm": 0.06433558464050293, "learning_rate": 0.0005141053266784462, "loss": 1.4989, "step": 11552 }, { "epoch": 0.5137165977502112, "grad_norm": 0.0624937042593956, "learning_rate": 0.0005139591956774165, "loss": 1.4979, "step": 11554 }, { "epoch": 0.5138055222088835, "grad_norm": 0.062215663492679596, "learning_rate": 0.0005138130634830928, "loss": 1.4982, "step": 11556 }, { "epoch": 0.5138944466675559, "grad_norm": 0.06470473855733871, "learning_rate": 0.0005136669301079679, "loss": 1.4914, "step": 11558 }, { "epoch": 0.5139833711262283, "grad_norm": 0.06185575947165489, "learning_rate": 0.0005135207955645336, "loss": 1.4924, "step": 11560 }, { "epoch": 0.5140722955849006, "grad_norm": 0.06259468197822571, "learning_rate": 0.0005133746598652821, "loss": 1.4997, "step": 11562 }, { "epoch": 0.514161220043573, "grad_norm": 0.06266958266496658, "learning_rate": 0.0005132285230227055, "loss": 1.496, "step": 11564 }, { "epoch": 0.5142501445022454, "grad_norm": 0.06312844902276993, "learning_rate": 0.0005130823850492967, "loss": 1.4945, "step": 11566 }, { "epoch": 0.5143390689609177, "grad_norm": 0.06307828426361084, "learning_rate": 0.0005129362459575478, "loss": 1.4946, "step": 11568 }, { "epoch": 0.5144279934195901, "grad_norm": 0.06209829822182655, "learning_rate": 0.0005127901057599516, "loss": 1.4996, "step": 11570 }, { "epoch": 0.5145169178782624, "grad_norm": 0.06419774144887924, "learning_rate": 0.000512643964469001, "loss": 1.4965, "step": 11572 }, { "epoch": 0.5146058423369347, "grad_norm": 0.06341389566659927, "learning_rate": 0.0005124978220971884, "loss": 1.4957, "step": 11574 }, { "epoch": 0.5146947667956071, "grad_norm": 0.062224533408880234, "learning_rate": 0.0005123516786570071, "loss": 1.4992, "step": 11576 }, { "epoch": 0.5147836912542795, "grad_norm": 0.06227099150419235, "learning_rate": 0.0005122055341609496, "loss": 1.5002, "step": 11578 }, { "epoch": 0.5148726157129518, "grad_norm": 0.06264078617095947, "learning_rate": 0.0005120593886215095, "loss": 1.4982, "step": 11580 }, { "epoch": 0.5149615401716242, "grad_norm": 0.0614069402217865, "learning_rate": 0.0005119132420511795, "loss": 1.4952, "step": 11582 }, { "epoch": 0.5150504646302966, "grad_norm": 0.06539449840784073, "learning_rate": 0.0005117670944624533, "loss": 1.4953, "step": 11584 }, { "epoch": 0.515139389088969, "grad_norm": 0.06382063776254654, "learning_rate": 0.0005116209458678238, "loss": 1.4929, "step": 11586 }, { "epoch": 0.5152283135476413, "grad_norm": 0.06359395384788513, "learning_rate": 0.0005114747962797848, "loss": 1.4938, "step": 11588 }, { "epoch": 0.5153172380063137, "grad_norm": 0.06395386159420013, "learning_rate": 0.0005113286457108296, "loss": 1.4926, "step": 11590 }, { "epoch": 0.5154061624649859, "grad_norm": 0.06651069223880768, "learning_rate": 0.0005111824941734516, "loss": 1.5002, "step": 11592 }, { "epoch": 0.5154950869236583, "grad_norm": 0.06515168398618698, "learning_rate": 0.000511036341680145, "loss": 1.4955, "step": 11594 }, { "epoch": 0.5155840113823307, "grad_norm": 0.06810913234949112, "learning_rate": 0.0005108901882434031, "loss": 1.4953, "step": 11596 }, { "epoch": 0.515672935841003, "grad_norm": 0.06428667902946472, "learning_rate": 0.0005107440338757202, "loss": 1.4911, "step": 11598 }, { "epoch": 0.5157618602996754, "grad_norm": 0.06621834635734558, "learning_rate": 0.0005105978785895898, "loss": 1.5015, "step": 11600 }, { "epoch": 0.5158507847583478, "grad_norm": 0.06360407173633575, "learning_rate": 0.000510451722397506, "loss": 1.4948, "step": 11602 }, { "epoch": 0.5159397092170201, "grad_norm": 0.06574375927448273, "learning_rate": 0.0005103055653119627, "loss": 1.4965, "step": 11604 }, { "epoch": 0.5160286336756925, "grad_norm": 0.06324627995491028, "learning_rate": 0.0005101594073454545, "loss": 1.4963, "step": 11606 }, { "epoch": 0.5161175581343649, "grad_norm": 0.06388534605503082, "learning_rate": 0.0005100132485104754, "loss": 1.4984, "step": 11608 }, { "epoch": 0.5162064825930373, "grad_norm": 0.06630777567625046, "learning_rate": 0.0005098670888195196, "loss": 1.4928, "step": 11610 }, { "epoch": 0.5162954070517096, "grad_norm": 0.0635385662317276, "learning_rate": 0.0005097209282850817, "loss": 1.4886, "step": 11612 }, { "epoch": 0.5163843315103819, "grad_norm": 0.062710702419281, "learning_rate": 0.0005095747669196559, "loss": 1.4977, "step": 11614 }, { "epoch": 0.5164732559690542, "grad_norm": 0.06373061239719391, "learning_rate": 0.0005094286047357368, "loss": 1.4934, "step": 11616 }, { "epoch": 0.5165621804277266, "grad_norm": 0.06358326971530914, "learning_rate": 0.0005092824417458191, "loss": 1.496, "step": 11618 }, { "epoch": 0.516651104886399, "grad_norm": 0.06176624819636345, "learning_rate": 0.0005091362779623972, "loss": 1.4983, "step": 11620 }, { "epoch": 0.5167400293450713, "grad_norm": 0.06667372584342957, "learning_rate": 0.0005089901133979663, "loss": 1.5, "step": 11622 }, { "epoch": 0.5168289538037437, "grad_norm": 0.0629948228597641, "learning_rate": 0.0005088439480650207, "loss": 1.5001, "step": 11624 }, { "epoch": 0.5169178782624161, "grad_norm": 0.06550941616296768, "learning_rate": 0.0005086977819760554, "loss": 1.4975, "step": 11626 }, { "epoch": 0.5170068027210885, "grad_norm": 0.06235315650701523, "learning_rate": 0.0005085516151435655, "loss": 1.4933, "step": 11628 }, { "epoch": 0.5170957271797608, "grad_norm": 0.06560411304235458, "learning_rate": 0.0005084054475800458, "loss": 1.4941, "step": 11630 }, { "epoch": 0.5171846516384332, "grad_norm": 0.06175919994711876, "learning_rate": 0.0005082592792979914, "loss": 1.494, "step": 11632 }, { "epoch": 0.5172735760971056, "grad_norm": 0.06700320541858673, "learning_rate": 0.0005081131103098974, "loss": 1.4909, "step": 11634 }, { "epoch": 0.5173625005557778, "grad_norm": 0.06476999819278717, "learning_rate": 0.0005079669406282588, "loss": 1.4967, "step": 11636 }, { "epoch": 0.5174514250144502, "grad_norm": 0.06396272778511047, "learning_rate": 0.0005078207702655714, "loss": 1.5023, "step": 11638 }, { "epoch": 0.5175403494731226, "grad_norm": 0.0631399154663086, "learning_rate": 0.0005076745992343296, "loss": 1.4899, "step": 11640 }, { "epoch": 0.5176292739317949, "grad_norm": 0.06382925063371658, "learning_rate": 0.0005075284275470296, "loss": 1.4977, "step": 11642 }, { "epoch": 0.5177181983904673, "grad_norm": 0.06469811499118805, "learning_rate": 0.0005073822552161664, "loss": 1.5019, "step": 11644 }, { "epoch": 0.5178071228491397, "grad_norm": 0.06676246225833893, "learning_rate": 0.0005072360822542354, "loss": 1.4926, "step": 11646 }, { "epoch": 0.517896047307812, "grad_norm": 0.0667065978050232, "learning_rate": 0.0005070899086737322, "loss": 1.4994, "step": 11648 }, { "epoch": 0.5179849717664844, "grad_norm": 0.06245999410748482, "learning_rate": 0.0005069437344871523, "loss": 1.4896, "step": 11650 }, { "epoch": 0.5180738962251568, "grad_norm": 0.06560548394918442, "learning_rate": 0.0005067975597069917, "loss": 1.497, "step": 11652 }, { "epoch": 0.5181628206838291, "grad_norm": 0.06353573501110077, "learning_rate": 0.0005066513843457454, "loss": 1.4976, "step": 11654 }, { "epoch": 0.5182517451425015, "grad_norm": 0.06430783122777939, "learning_rate": 0.0005065052084159096, "loss": 1.4974, "step": 11656 }, { "epoch": 0.5183406696011738, "grad_norm": 0.06370967626571655, "learning_rate": 0.0005063590319299799, "loss": 1.4924, "step": 11658 }, { "epoch": 0.5184295940598461, "grad_norm": 0.06251141428947449, "learning_rate": 0.0005062128549004521, "loss": 1.4985, "step": 11660 }, { "epoch": 0.5185185185185185, "grad_norm": 0.06270091235637665, "learning_rate": 0.0005060666773398221, "loss": 1.4939, "step": 11662 }, { "epoch": 0.5186074429771909, "grad_norm": 0.06116213649511337, "learning_rate": 0.0005059204992605857, "loss": 1.499, "step": 11664 }, { "epoch": 0.5186963674358632, "grad_norm": 0.06304262578487396, "learning_rate": 0.0005057743206752391, "loss": 1.4885, "step": 11666 }, { "epoch": 0.5187852918945356, "grad_norm": 0.062481291592121124, "learning_rate": 0.000505628141596278, "loss": 1.4923, "step": 11668 }, { "epoch": 0.518874216353208, "grad_norm": 0.06374236196279526, "learning_rate": 0.0005054819620361986, "loss": 1.5012, "step": 11670 }, { "epoch": 0.5189631408118803, "grad_norm": 0.06343623250722885, "learning_rate": 0.0005053357820074968, "loss": 1.4938, "step": 11672 }, { "epoch": 0.5190520652705527, "grad_norm": 0.06267695873975754, "learning_rate": 0.0005051896015226691, "loss": 1.4951, "step": 11674 }, { "epoch": 0.5191409897292251, "grad_norm": 0.06275571882724762, "learning_rate": 0.0005050434205942113, "loss": 1.4897, "step": 11676 }, { "epoch": 0.5192299141878974, "grad_norm": 0.06288079917430878, "learning_rate": 0.0005048972392346196, "loss": 1.4895, "step": 11678 }, { "epoch": 0.5193188386465697, "grad_norm": 0.06326977908611298, "learning_rate": 0.0005047510574563902, "loss": 1.4993, "step": 11680 }, { "epoch": 0.5194077631052421, "grad_norm": 0.06266149133443832, "learning_rate": 0.0005046048752720197, "loss": 1.4982, "step": 11682 }, { "epoch": 0.5194966875639144, "grad_norm": 0.06486205756664276, "learning_rate": 0.0005044586926940041, "loss": 1.496, "step": 11684 }, { "epoch": 0.5195856120225868, "grad_norm": 0.06361040472984314, "learning_rate": 0.0005043125097348398, "loss": 1.4952, "step": 11686 }, { "epoch": 0.5196745364812592, "grad_norm": 0.0649038627743721, "learning_rate": 0.0005041663264070231, "loss": 1.4968, "step": 11688 }, { "epoch": 0.5197634609399315, "grad_norm": 0.06310614198446274, "learning_rate": 0.0005040201427230504, "loss": 1.4945, "step": 11690 }, { "epoch": 0.5198523853986039, "grad_norm": 0.0667177364230156, "learning_rate": 0.0005038739586954186, "loss": 1.4936, "step": 11692 }, { "epoch": 0.5199413098572763, "grad_norm": 0.0625763013958931, "learning_rate": 0.0005037277743366232, "loss": 1.4931, "step": 11694 }, { "epoch": 0.5200302343159486, "grad_norm": 0.06521979719400406, "learning_rate": 0.0005035815896591616, "loss": 1.4938, "step": 11696 }, { "epoch": 0.520119158774621, "grad_norm": 0.06360069662332535, "learning_rate": 0.0005034354046755296, "loss": 1.494, "step": 11698 }, { "epoch": 0.5202080832332934, "grad_norm": 0.06382516026496887, "learning_rate": 0.0005032892193982243, "loss": 1.4954, "step": 11700 }, { "epoch": 0.5202970076919656, "grad_norm": 0.06371236592531204, "learning_rate": 0.000503143033839742, "loss": 1.4961, "step": 11702 }, { "epoch": 0.520385932150638, "grad_norm": 0.06548450887203217, "learning_rate": 0.0005029968480125792, "loss": 1.4937, "step": 11704 }, { "epoch": 0.5204748566093104, "grad_norm": 0.06387647241353989, "learning_rate": 0.0005028506619292328, "loss": 1.4924, "step": 11706 }, { "epoch": 0.5205637810679827, "grad_norm": 0.06516347825527191, "learning_rate": 0.0005027044756021992, "loss": 1.4932, "step": 11708 }, { "epoch": 0.5206527055266551, "grad_norm": 0.06408815830945969, "learning_rate": 0.0005025582890439751, "loss": 1.4929, "step": 11710 }, { "epoch": 0.5207416299853275, "grad_norm": 0.06484084576368332, "learning_rate": 0.0005024121022670572, "loss": 1.4895, "step": 11712 }, { "epoch": 0.5208305544439998, "grad_norm": 0.062469739466905594, "learning_rate": 0.0005022659152839421, "loss": 1.4925, "step": 11714 }, { "epoch": 0.5209194789026722, "grad_norm": 0.06194042041897774, "learning_rate": 0.0005021197281071265, "loss": 1.502, "step": 11716 }, { "epoch": 0.5210084033613446, "grad_norm": 0.06476829200983047, "learning_rate": 0.0005019735407491072, "loss": 1.5043, "step": 11718 }, { "epoch": 0.5210973278200169, "grad_norm": 0.06216420605778694, "learning_rate": 0.0005018273532223812, "loss": 1.4906, "step": 11720 }, { "epoch": 0.5211862522786892, "grad_norm": 0.06267587095499039, "learning_rate": 0.0005016811655394449, "loss": 1.4925, "step": 11722 }, { "epoch": 0.5212751767373616, "grad_norm": 0.06346725672483444, "learning_rate": 0.0005015349777127952, "loss": 1.4922, "step": 11724 }, { "epoch": 0.5213641011960339, "grad_norm": 0.06293226033449173, "learning_rate": 0.0005013887897549285, "loss": 1.4935, "step": 11726 }, { "epoch": 0.5214530256547063, "grad_norm": 0.061574727296829224, "learning_rate": 0.0005012426016783422, "loss": 1.4944, "step": 11728 }, { "epoch": 0.5215419501133787, "grad_norm": 0.062728650867939, "learning_rate": 0.0005010964134955329, "loss": 1.4908, "step": 11730 }, { "epoch": 0.521630874572051, "grad_norm": 0.06296613812446594, "learning_rate": 0.0005009502252189972, "loss": 1.4882, "step": 11732 }, { "epoch": 0.5217197990307234, "grad_norm": 0.0656297355890274, "learning_rate": 0.0005008040368612321, "loss": 1.4928, "step": 11734 }, { "epoch": 0.5218087234893958, "grad_norm": 0.06233159825205803, "learning_rate": 0.0005006578484347345, "loss": 1.5, "step": 11736 }, { "epoch": 0.5218976479480681, "grad_norm": 0.06443444639444351, "learning_rate": 0.000500511659952001, "loss": 1.4908, "step": 11738 }, { "epoch": 0.5219865724067405, "grad_norm": 0.06185967102646828, "learning_rate": 0.0005003654714255285, "loss": 1.4877, "step": 11740 }, { "epoch": 0.5220754968654129, "grad_norm": 0.0641622245311737, "learning_rate": 0.0005002192828678139, "loss": 1.4962, "step": 11742 }, { "epoch": 0.5221644213240851, "grad_norm": 0.06316876411437988, "learning_rate": 0.0005000730942913541, "loss": 1.4998, "step": 11744 }, { "epoch": 0.5222533457827575, "grad_norm": 0.06322462856769562, "learning_rate": 0.0004999269057086459, "loss": 1.4944, "step": 11746 }, { "epoch": 0.5223422702414299, "grad_norm": 0.06409668177366257, "learning_rate": 0.0004997807171321861, "loss": 1.4944, "step": 11748 }, { "epoch": 0.5224311947001022, "grad_norm": 0.06382996588945389, "learning_rate": 0.0004996345285744716, "loss": 1.4929, "step": 11750 }, { "epoch": 0.5225201191587746, "grad_norm": 0.06639515608549118, "learning_rate": 0.0004994883400479991, "loss": 1.493, "step": 11752 }, { "epoch": 0.522609043617447, "grad_norm": 0.06325390189886093, "learning_rate": 0.0004993421515652656, "loss": 1.4928, "step": 11754 }, { "epoch": 0.5226979680761193, "grad_norm": 0.06696011871099472, "learning_rate": 0.0004991959631387679, "loss": 1.4937, "step": 11756 }, { "epoch": 0.5227868925347917, "grad_norm": 0.06227093189954758, "learning_rate": 0.0004990497747810028, "loss": 1.4913, "step": 11758 }, { "epoch": 0.5228758169934641, "grad_norm": 0.0665513128042221, "learning_rate": 0.0004989035865044673, "loss": 1.4935, "step": 11760 }, { "epoch": 0.5229647414521365, "grad_norm": 0.0690368264913559, "learning_rate": 0.0004987573983216576, "loss": 1.4907, "step": 11762 }, { "epoch": 0.5230536659108088, "grad_norm": 0.06481185555458069, "learning_rate": 0.0004986112102450715, "loss": 1.4942, "step": 11764 }, { "epoch": 0.5231425903694811, "grad_norm": 0.06394998729228973, "learning_rate": 0.0004984650222872051, "loss": 1.4943, "step": 11766 }, { "epoch": 0.5232315148281534, "grad_norm": 0.06454624235630035, "learning_rate": 0.0004983188344605552, "loss": 1.4975, "step": 11768 }, { "epoch": 0.5233204392868258, "grad_norm": 0.06496671587228775, "learning_rate": 0.0004981726467776188, "loss": 1.4941, "step": 11770 }, { "epoch": 0.5234093637454982, "grad_norm": 0.0641636922955513, "learning_rate": 0.0004980264592508927, "loss": 1.4984, "step": 11772 }, { "epoch": 0.5234982882041705, "grad_norm": 0.06285618245601654, "learning_rate": 0.0004978802718928735, "loss": 1.4953, "step": 11774 }, { "epoch": 0.5235872126628429, "grad_norm": 0.06363879889249802, "learning_rate": 0.0004977340847160581, "loss": 1.4954, "step": 11776 }, { "epoch": 0.5236761371215153, "grad_norm": 0.06370127201080322, "learning_rate": 0.0004975878977329429, "loss": 1.4919, "step": 11778 }, { "epoch": 0.5237650615801877, "grad_norm": 0.06240037456154823, "learning_rate": 0.000497441710956025, "loss": 1.4935, "step": 11780 }, { "epoch": 0.52385398603886, "grad_norm": 0.06250346451997757, "learning_rate": 0.000497295524397801, "loss": 1.4893, "step": 11782 }, { "epoch": 0.5239429104975324, "grad_norm": 0.06271068751811981, "learning_rate": 0.0004971493380707671, "loss": 1.4963, "step": 11784 }, { "epoch": 0.5240318349562048, "grad_norm": 0.06360255181789398, "learning_rate": 0.0004970031519874209, "loss": 1.4935, "step": 11786 }, { "epoch": 0.524120759414877, "grad_norm": 0.06317956000566483, "learning_rate": 0.0004968569661602582, "loss": 1.4939, "step": 11788 }, { "epoch": 0.5242096838735494, "grad_norm": 0.06279599666595459, "learning_rate": 0.0004967107806017757, "loss": 1.4987, "step": 11790 }, { "epoch": 0.5242986083322217, "grad_norm": 0.0632399320602417, "learning_rate": 0.0004965645953244705, "loss": 1.4954, "step": 11792 }, { "epoch": 0.5243875327908941, "grad_norm": 0.06396782398223877, "learning_rate": 0.0004964184103408386, "loss": 1.4951, "step": 11794 }, { "epoch": 0.5244764572495665, "grad_norm": 0.061761051416397095, "learning_rate": 0.0004962722256633768, "loss": 1.502, "step": 11796 }, { "epoch": 0.5245653817082389, "grad_norm": 0.06565398722887039, "learning_rate": 0.0004961260413045818, "loss": 1.4963, "step": 11798 }, { "epoch": 0.5246543061669112, "grad_norm": 0.06184777244925499, "learning_rate": 0.0004959798572769495, "loss": 1.493, "step": 11800 }, { "epoch": 0.5247432306255836, "grad_norm": 0.06266294419765472, "learning_rate": 0.000495833673592977, "loss": 1.4999, "step": 11802 }, { "epoch": 0.524832155084256, "grad_norm": 0.06519590318202972, "learning_rate": 0.0004956874902651603, "loss": 1.4917, "step": 11804 }, { "epoch": 0.5249210795429283, "grad_norm": 0.06418920308351517, "learning_rate": 0.0004955413073059959, "loss": 1.4897, "step": 11806 }, { "epoch": 0.5250100040016007, "grad_norm": 0.06677830964326859, "learning_rate": 0.0004953951247279804, "loss": 1.4942, "step": 11808 }, { "epoch": 0.525098928460273, "grad_norm": 0.06483811140060425, "learning_rate": 0.0004952489425436097, "loss": 1.4902, "step": 11810 }, { "epoch": 0.5251878529189453, "grad_norm": 0.06273074448108673, "learning_rate": 0.0004951027607653805, "loss": 1.4937, "step": 11812 }, { "epoch": 0.5252767773776177, "grad_norm": 0.06601008027791977, "learning_rate": 0.0004949565794057889, "loss": 1.4903, "step": 11814 }, { "epoch": 0.52536570183629, "grad_norm": 0.06314726918935776, "learning_rate": 0.0004948103984773309, "loss": 1.4964, "step": 11816 }, { "epoch": 0.5254546262949624, "grad_norm": 0.06756903976202011, "learning_rate": 0.0004946642179925032, "loss": 1.4895, "step": 11818 }, { "epoch": 0.5255435507536348, "grad_norm": 0.06394748389720917, "learning_rate": 0.0004945180379638015, "loss": 1.4906, "step": 11820 }, { "epoch": 0.5256324752123072, "grad_norm": 0.06343099474906921, "learning_rate": 0.0004943718584037221, "loss": 1.4973, "step": 11822 }, { "epoch": 0.5257213996709795, "grad_norm": 0.06174701824784279, "learning_rate": 0.000494225679324761, "loss": 1.4924, "step": 11824 }, { "epoch": 0.5258103241296519, "grad_norm": 0.06279890984296799, "learning_rate": 0.0004940795007394143, "loss": 1.4946, "step": 11826 }, { "epoch": 0.5258992485883243, "grad_norm": 0.06350038945674896, "learning_rate": 0.000493933322660178, "loss": 1.4906, "step": 11828 }, { "epoch": 0.5259881730469965, "grad_norm": 0.06374242156744003, "learning_rate": 0.000493787145099548, "loss": 1.497, "step": 11830 }, { "epoch": 0.5260770975056689, "grad_norm": 0.062003325670957565, "learning_rate": 0.0004936409680700201, "loss": 1.4964, "step": 11832 }, { "epoch": 0.5261660219643413, "grad_norm": 0.06445759534835815, "learning_rate": 0.0004934947915840904, "loss": 1.4866, "step": 11834 }, { "epoch": 0.5262549464230136, "grad_norm": 0.06202596426010132, "learning_rate": 0.0004933486156542548, "loss": 1.4899, "step": 11836 }, { "epoch": 0.526343870881686, "grad_norm": 0.06291262060403824, "learning_rate": 0.0004932024402930084, "loss": 1.4951, "step": 11838 }, { "epoch": 0.5264327953403584, "grad_norm": 0.06197972595691681, "learning_rate": 0.0004930562655128477, "loss": 1.4858, "step": 11840 }, { "epoch": 0.5265217197990307, "grad_norm": 0.06182212010025978, "learning_rate": 0.0004929100913262679, "loss": 1.501, "step": 11842 }, { "epoch": 0.5266106442577031, "grad_norm": 0.06719043105840683, "learning_rate": 0.0004927639177457647, "loss": 1.5003, "step": 11844 }, { "epoch": 0.5266995687163755, "grad_norm": 0.06187283247709274, "learning_rate": 0.0004926177447838338, "loss": 1.4959, "step": 11846 }, { "epoch": 0.5267884931750478, "grad_norm": 0.06271807104349136, "learning_rate": 0.0004924715724529704, "loss": 1.495, "step": 11848 }, { "epoch": 0.5268774176337202, "grad_norm": 0.06492515653371811, "learning_rate": 0.0004923254007656704, "loss": 1.4968, "step": 11850 }, { "epoch": 0.5269663420923925, "grad_norm": 0.0634203776717186, "learning_rate": 0.0004921792297344289, "loss": 1.4964, "step": 11852 }, { "epoch": 0.5270552665510648, "grad_norm": 0.0632987841963768, "learning_rate": 0.0004920330593717411, "loss": 1.482, "step": 11854 }, { "epoch": 0.5271441910097372, "grad_norm": 0.0637960284948349, "learning_rate": 0.0004918868896901027, "loss": 1.4902, "step": 11856 }, { "epoch": 0.5272331154684096, "grad_norm": 0.0617951899766922, "learning_rate": 0.0004917407207020088, "loss": 1.4918, "step": 11858 }, { "epoch": 0.5273220399270819, "grad_norm": 0.06447748094797134, "learning_rate": 0.0004915945524199542, "loss": 1.4945, "step": 11860 }, { "epoch": 0.5274109643857543, "grad_norm": 0.06240464001893997, "learning_rate": 0.0004914483848564346, "loss": 1.4893, "step": 11862 }, { "epoch": 0.5274998888444267, "grad_norm": 0.06561367958784103, "learning_rate": 0.0004913022180239446, "loss": 1.4904, "step": 11864 }, { "epoch": 0.527588813303099, "grad_norm": 0.06380762904882431, "learning_rate": 0.0004911560519349794, "loss": 1.4985, "step": 11866 }, { "epoch": 0.5276777377617714, "grad_norm": 0.06274005025625229, "learning_rate": 0.0004910098866020339, "loss": 1.4938, "step": 11868 }, { "epoch": 0.5277666622204438, "grad_norm": 0.06453198194503784, "learning_rate": 0.0004908637220376027, "loss": 1.4965, "step": 11870 }, { "epoch": 0.5278555866791161, "grad_norm": 0.06285727769136429, "learning_rate": 0.000490717558254181, "loss": 1.4924, "step": 11872 }, { "epoch": 0.5279445111377884, "grad_norm": 0.06540695577859879, "learning_rate": 0.0004905713952642634, "loss": 1.4931, "step": 11874 }, { "epoch": 0.5280334355964608, "grad_norm": 0.06500522047281265, "learning_rate": 0.0004904252330803442, "loss": 1.4884, "step": 11876 }, { "epoch": 0.5281223600551331, "grad_norm": 0.06160023435950279, "learning_rate": 0.0004902790717149184, "loss": 1.494, "step": 11878 }, { "epoch": 0.5282112845138055, "grad_norm": 0.06481318175792694, "learning_rate": 0.0004901329111804805, "loss": 1.4887, "step": 11880 }, { "epoch": 0.5283002089724779, "grad_norm": 0.06199006363749504, "learning_rate": 0.0004899867514895247, "loss": 1.4942, "step": 11882 }, { "epoch": 0.5283891334311502, "grad_norm": 0.06205325201153755, "learning_rate": 0.0004898405926545455, "loss": 1.4907, "step": 11884 }, { "epoch": 0.5284780578898226, "grad_norm": 0.06204449385404587, "learning_rate": 0.0004896944346880372, "loss": 1.4962, "step": 11886 }, { "epoch": 0.528566982348495, "grad_norm": 0.0624365471303463, "learning_rate": 0.0004895482776024942, "loss": 1.4945, "step": 11888 }, { "epoch": 0.5286559068071673, "grad_norm": 0.06314221024513245, "learning_rate": 0.0004894021214104105, "loss": 1.4899, "step": 11890 }, { "epoch": 0.5287448312658397, "grad_norm": 0.06316719204187393, "learning_rate": 0.0004892559661242798, "loss": 1.4979, "step": 11892 }, { "epoch": 0.5288337557245121, "grad_norm": 0.0631944015622139, "learning_rate": 0.0004891098117565969, "loss": 1.491, "step": 11894 }, { "epoch": 0.5289226801831843, "grad_norm": 0.06547391414642334, "learning_rate": 0.0004889636583198549, "loss": 1.4915, "step": 11896 }, { "epoch": 0.5290116046418567, "grad_norm": 0.061480239033699036, "learning_rate": 0.0004888175058265484, "loss": 1.4923, "step": 11898 }, { "epoch": 0.5291005291005291, "grad_norm": 0.06591926515102386, "learning_rate": 0.0004886713542891707, "loss": 1.4942, "step": 11900 }, { "epoch": 0.5291894535592014, "grad_norm": 0.06440820544958115, "learning_rate": 0.0004885252037202154, "loss": 1.4928, "step": 11902 }, { "epoch": 0.5292783780178738, "grad_norm": 0.0646994411945343, "learning_rate": 0.0004883790541321762, "loss": 1.4951, "step": 11904 }, { "epoch": 0.5293673024765462, "grad_norm": 0.06326589733362198, "learning_rate": 0.0004882329055375469, "loss": 1.4959, "step": 11906 }, { "epoch": 0.5294562269352185, "grad_norm": 0.06294218450784683, "learning_rate": 0.0004880867579488205, "loss": 1.4862, "step": 11908 }, { "epoch": 0.5295451513938909, "grad_norm": 0.061898913234472275, "learning_rate": 0.0004879406113784906, "loss": 1.4971, "step": 11910 }, { "epoch": 0.5296340758525633, "grad_norm": 0.062190067023038864, "learning_rate": 0.00048779446583905055, "loss": 1.4951, "step": 11912 }, { "epoch": 0.5297230003112356, "grad_norm": 0.06890960782766342, "learning_rate": 0.000487648321342993, "loss": 1.494, "step": 11914 }, { "epoch": 0.529811924769908, "grad_norm": 0.06532379984855652, "learning_rate": 0.0004875021779028117, "loss": 1.4943, "step": 11916 }, { "epoch": 0.5299008492285803, "grad_norm": 0.0694059357047081, "learning_rate": 0.000487356035530999, "loss": 1.4941, "step": 11918 }, { "epoch": 0.5299897736872526, "grad_norm": 0.060807377099990845, "learning_rate": 0.0004872098942400484, "loss": 1.4945, "step": 11920 }, { "epoch": 0.530078698145925, "grad_norm": 0.0658683255314827, "learning_rate": 0.00048706375404245227, "loss": 1.4913, "step": 11922 }, { "epoch": 0.5301676226045974, "grad_norm": 0.062365368008613586, "learning_rate": 0.00048691761495070344, "loss": 1.4966, "step": 11924 }, { "epoch": 0.5302565470632697, "grad_norm": 0.0646410584449768, "learning_rate": 0.00048677147697729457, "loss": 1.4989, "step": 11926 }, { "epoch": 0.5303454715219421, "grad_norm": 0.06210246682167053, "learning_rate": 0.00048662534013471816, "loss": 1.4891, "step": 11928 }, { "epoch": 0.5304343959806145, "grad_norm": 0.06435447186231613, "learning_rate": 0.0004864792044354665, "loss": 1.4881, "step": 11930 }, { "epoch": 0.5305233204392868, "grad_norm": 0.06157999858260155, "learning_rate": 0.00048633306989203214, "loss": 1.4902, "step": 11932 }, { "epoch": 0.5306122448979592, "grad_norm": 0.06224975362420082, "learning_rate": 0.0004861869365169071, "loss": 1.4926, "step": 11934 }, { "epoch": 0.5307011693566316, "grad_norm": 0.06169992312788963, "learning_rate": 0.00048604080432258366, "loss": 1.493, "step": 11936 }, { "epoch": 0.530790093815304, "grad_norm": 0.06507695466279984, "learning_rate": 0.00048589467332155385, "loss": 1.4962, "step": 11938 }, { "epoch": 0.5308790182739762, "grad_norm": 0.06133843585848808, "learning_rate": 0.0004857485435263094, "loss": 1.4874, "step": 11940 }, { "epoch": 0.5309679427326486, "grad_norm": 0.06246451288461685, "learning_rate": 0.0004856024149493423, "loss": 1.4917, "step": 11942 }, { "epoch": 0.531056867191321, "grad_norm": 0.06303883343935013, "learning_rate": 0.0004854562876031444, "loss": 1.4862, "step": 11944 }, { "epoch": 0.5311457916499933, "grad_norm": 0.06277139484882355, "learning_rate": 0.0004853101615002068, "loss": 1.4925, "step": 11946 }, { "epoch": 0.5312347161086657, "grad_norm": 0.06367526948451996, "learning_rate": 0.0004851640366530216, "loss": 1.4969, "step": 11948 }, { "epoch": 0.531323640567338, "grad_norm": 0.06328226625919342, "learning_rate": 0.0004850179130740796, "loss": 1.4958, "step": 11950 }, { "epoch": 0.5314125650260104, "grad_norm": 0.06432175636291504, "learning_rate": 0.0004848717907758728, "loss": 1.5002, "step": 11952 }, { "epoch": 0.5315014894846828, "grad_norm": 0.06231130287051201, "learning_rate": 0.0004847256697708919, "loss": 1.4921, "step": 11954 }, { "epoch": 0.5315904139433552, "grad_norm": 0.06357510387897491, "learning_rate": 0.0004845795500716279, "loss": 1.487, "step": 11956 }, { "epoch": 0.5316793384020275, "grad_norm": 0.0623379684984684, "learning_rate": 0.000484433431690572, "loss": 1.4889, "step": 11958 }, { "epoch": 0.5317682628606998, "grad_norm": 0.060883667320013046, "learning_rate": 0.00048428731464021505, "loss": 1.4919, "step": 11960 }, { "epoch": 0.5318571873193721, "grad_norm": 0.06377879530191422, "learning_rate": 0.0004841411989330475, "loss": 1.4888, "step": 11962 }, { "epoch": 0.5319461117780445, "grad_norm": 0.06436724215745926, "learning_rate": 0.00048399508458156027, "loss": 1.4935, "step": 11964 }, { "epoch": 0.5320350362367169, "grad_norm": 0.06252797693014145, "learning_rate": 0.00048384897159824364, "loss": 1.4916, "step": 11966 }, { "epoch": 0.5321239606953893, "grad_norm": 0.0638202354311943, "learning_rate": 0.0004837028599955882, "loss": 1.486, "step": 11968 }, { "epoch": 0.5322128851540616, "grad_norm": 0.062130406498909, "learning_rate": 0.0004835567497860842, "loss": 1.4902, "step": 11970 }, { "epoch": 0.532301809612734, "grad_norm": 0.06190851330757141, "learning_rate": 0.00048341064098222144, "loss": 1.4931, "step": 11972 }, { "epoch": 0.5323907340714064, "grad_norm": 0.06283093988895416, "learning_rate": 0.0004832645335964904, "loss": 1.4941, "step": 11974 }, { "epoch": 0.5324796585300787, "grad_norm": 0.06219896674156189, "learning_rate": 0.0004831184276413808, "loss": 1.4913, "step": 11976 }, { "epoch": 0.5325685829887511, "grad_norm": 0.06502451747655869, "learning_rate": 0.0004829723231293822, "loss": 1.4953, "step": 11978 }, { "epoch": 0.5326575074474235, "grad_norm": 0.06261445581912994, "learning_rate": 0.0004828262200729846, "loss": 1.4897, "step": 11980 }, { "epoch": 0.5327464319060957, "grad_norm": 0.06267430633306503, "learning_rate": 0.0004826801184846774, "loss": 1.4912, "step": 11982 }, { "epoch": 0.5328353563647681, "grad_norm": 0.06361742317676544, "learning_rate": 0.0004825340183769499, "loss": 1.4931, "step": 11984 }, { "epoch": 0.5329242808234405, "grad_norm": 0.062050919979810715, "learning_rate": 0.0004823879197622916, "loss": 1.5026, "step": 11986 }, { "epoch": 0.5330132052821128, "grad_norm": 0.06152861937880516, "learning_rate": 0.00048224182265319134, "loss": 1.4903, "step": 11988 }, { "epoch": 0.5331021297407852, "grad_norm": 0.061182718724012375, "learning_rate": 0.0004820957270621385, "loss": 1.4921, "step": 11990 }, { "epoch": 0.5331910541994576, "grad_norm": 0.06338909268379211, "learning_rate": 0.00048194963300162184, "loss": 1.4925, "step": 11992 }, { "epoch": 0.5332799786581299, "grad_norm": 0.06249266117811203, "learning_rate": 0.0004818035404841299, "loss": 1.4878, "step": 11994 }, { "epoch": 0.5333689031168023, "grad_norm": 0.06300443410873413, "learning_rate": 0.0004816574495221517, "loss": 1.4991, "step": 11996 }, { "epoch": 0.5334578275754747, "grad_norm": 0.06762851029634476, "learning_rate": 0.0004815113601281756, "loss": 1.4926, "step": 11998 }, { "epoch": 0.533546752034147, "grad_norm": 0.062195733189582825, "learning_rate": 0.0004813652723146896, "loss": 1.4881, "step": 12000 }, { "epoch": 0.533546752034147, "eval_loss": 1.474120020866394, "eval_runtime": 12.3992, "eval_samples_per_second": 557.296, "eval_steps_per_second": 69.682, "step": 12000 }, { "epoch": 0.5336356764928194, "grad_norm": 0.06374084204435349, "learning_rate": 0.0004812191860941826, "loss": 1.4982, "step": 12002 }, { "epoch": 0.5337246009514917, "grad_norm": 0.06320127099752426, "learning_rate": 0.00048107310147914203, "loss": 1.4914, "step": 12004 }, { "epoch": 0.533813525410164, "grad_norm": 0.06292546540498734, "learning_rate": 0.00048092701848205646, "loss": 1.4917, "step": 12006 }, { "epoch": 0.5339024498688364, "grad_norm": 0.06197582930326462, "learning_rate": 0.00048078093711541325, "loss": 1.4799, "step": 12008 }, { "epoch": 0.5339913743275088, "grad_norm": 0.06429006904363632, "learning_rate": 0.00048063485739170014, "loss": 1.4908, "step": 12010 }, { "epoch": 0.5340802987861811, "grad_norm": 0.06294738501310349, "learning_rate": 0.0004804887793234049, "loss": 1.4883, "step": 12012 }, { "epoch": 0.5341692232448535, "grad_norm": 0.06511865556240082, "learning_rate": 0.00048034270292301475, "loss": 1.4928, "step": 12014 }, { "epoch": 0.5342581477035259, "grad_norm": 0.0640450119972229, "learning_rate": 0.0004801966282030169, "loss": 1.4937, "step": 12016 }, { "epoch": 0.5343470721621982, "grad_norm": 0.062151722609996796, "learning_rate": 0.0004800505551758986, "loss": 1.49, "step": 12018 }, { "epoch": 0.5344359966208706, "grad_norm": 0.06464537233114243, "learning_rate": 0.0004799044838541467, "loss": 1.4898, "step": 12020 }, { "epoch": 0.534524921079543, "grad_norm": 0.06477969139814377, "learning_rate": 0.0004797584142502482, "loss": 1.4911, "step": 12022 }, { "epoch": 0.5346138455382153, "grad_norm": 0.06363675743341446, "learning_rate": 0.00047961234637668973, "loss": 1.493, "step": 12024 }, { "epoch": 0.5347027699968876, "grad_norm": 0.06466624140739441, "learning_rate": 0.0004794662802459574, "loss": 1.4926, "step": 12026 }, { "epoch": 0.53479169445556, "grad_norm": 0.06489887833595276, "learning_rate": 0.00047932021587053833, "loss": 1.4876, "step": 12028 }, { "epoch": 0.5348806189142323, "grad_norm": 0.06438994407653809, "learning_rate": 0.0004791741532629181, "loss": 1.4938, "step": 12030 }, { "epoch": 0.5349695433729047, "grad_norm": 0.06246186047792435, "learning_rate": 0.000479028092435583, "loss": 1.4969, "step": 12032 }, { "epoch": 0.5350584678315771, "grad_norm": 0.06295336037874222, "learning_rate": 0.0004788820334010191, "loss": 1.4918, "step": 12034 }, { "epoch": 0.5351473922902494, "grad_norm": 0.062076494097709656, "learning_rate": 0.000478735976171712, "loss": 1.4924, "step": 12036 }, { "epoch": 0.5352363167489218, "grad_norm": 0.061363715678453445, "learning_rate": 0.0004785899207601473, "loss": 1.4896, "step": 12038 }, { "epoch": 0.5353252412075942, "grad_norm": 0.06081344187259674, "learning_rate": 0.0004784438671788106, "loss": 1.4919, "step": 12040 }, { "epoch": 0.5354141656662665, "grad_norm": 0.06194039806723595, "learning_rate": 0.00047829781544018703, "loss": 1.4896, "step": 12042 }, { "epoch": 0.5355030901249389, "grad_norm": 0.06131895259022713, "learning_rate": 0.00047815176555676193, "loss": 1.4981, "step": 12044 }, { "epoch": 0.5355920145836113, "grad_norm": 0.060021188110113144, "learning_rate": 0.00047800571754102024, "loss": 1.4876, "step": 12046 }, { "epoch": 0.5356809390422835, "grad_norm": 0.06131913512945175, "learning_rate": 0.00047785967140544656, "loss": 1.4898, "step": 12048 }, { "epoch": 0.5357698635009559, "grad_norm": 0.061648186296224594, "learning_rate": 0.00047771362716252585, "loss": 1.4876, "step": 12050 }, { "epoch": 0.5358587879596283, "grad_norm": 0.061754439026117325, "learning_rate": 0.0004775675848247427, "loss": 1.4927, "step": 12052 }, { "epoch": 0.5359477124183006, "grad_norm": 0.06522786617279053, "learning_rate": 0.00047742154440458083, "loss": 1.4918, "step": 12054 }, { "epoch": 0.536036636876973, "grad_norm": 0.06362829357385635, "learning_rate": 0.0004772755059145252, "loss": 1.4909, "step": 12056 }, { "epoch": 0.5361255613356454, "grad_norm": 0.06584829092025757, "learning_rate": 0.0004771294693670592, "loss": 1.4919, "step": 12058 }, { "epoch": 0.5362144857943177, "grad_norm": 0.06313332915306091, "learning_rate": 0.0004769834347746672, "loss": 1.4916, "step": 12060 }, { "epoch": 0.5363034102529901, "grad_norm": 0.06333259493112564, "learning_rate": 0.00047683740214983256, "loss": 1.487, "step": 12062 }, { "epoch": 0.5363923347116625, "grad_norm": 0.06263703107833862, "learning_rate": 0.00047669137150503876, "loss": 1.485, "step": 12064 }, { "epoch": 0.5364812591703348, "grad_norm": 0.06329838186502457, "learning_rate": 0.0004765453428527693, "loss": 1.4955, "step": 12066 }, { "epoch": 0.5365701836290072, "grad_norm": 0.06317045539617538, "learning_rate": 0.0004763993162055074, "loss": 1.4871, "step": 12068 }, { "epoch": 0.5366591080876795, "grad_norm": 0.06274587661027908, "learning_rate": 0.0004762532915757357, "loss": 1.4957, "step": 12070 }, { "epoch": 0.5367480325463518, "grad_norm": 0.06153559684753418, "learning_rate": 0.00047610726897593747, "loss": 1.491, "step": 12072 }, { "epoch": 0.5368369570050242, "grad_norm": 0.061654843389987946, "learning_rate": 0.00047596124841859505, "loss": 1.4954, "step": 12074 }, { "epoch": 0.5369258814636966, "grad_norm": 0.06349113583564758, "learning_rate": 0.0004758152299161912, "loss": 1.4941, "step": 12076 }, { "epoch": 0.5370148059223689, "grad_norm": 0.06033482402563095, "learning_rate": 0.0004756692134812081, "loss": 1.4873, "step": 12078 }, { "epoch": 0.5371037303810413, "grad_norm": 0.06259752810001373, "learning_rate": 0.00047552319912612764, "loss": 1.4953, "step": 12080 }, { "epoch": 0.5371926548397137, "grad_norm": 0.06465926766395569, "learning_rate": 0.00047537718686343225, "loss": 1.4911, "step": 12082 }, { "epoch": 0.537281579298386, "grad_norm": 0.06145221367478371, "learning_rate": 0.00047523117670560333, "loss": 1.4896, "step": 12084 }, { "epoch": 0.5373705037570584, "grad_norm": 0.06425707787275314, "learning_rate": 0.0004750851686651225, "loss": 1.4916, "step": 12086 }, { "epoch": 0.5374594282157308, "grad_norm": 0.06472969055175781, "learning_rate": 0.0004749391627544713, "loss": 1.4974, "step": 12088 }, { "epoch": 0.537548352674403, "grad_norm": 0.0711335614323616, "learning_rate": 0.0004747931589861308, "loss": 1.4928, "step": 12090 }, { "epoch": 0.5376372771330754, "grad_norm": 0.06172974407672882, "learning_rate": 0.0004746471573725823, "loss": 1.4879, "step": 12092 }, { "epoch": 0.5377262015917478, "grad_norm": 0.062273457646369934, "learning_rate": 0.0004745011579263065, "loss": 1.4867, "step": 12094 }, { "epoch": 0.5378151260504201, "grad_norm": 0.06254924833774567, "learning_rate": 0.00047435516065978396, "loss": 1.4943, "step": 12096 }, { "epoch": 0.5379040505090925, "grad_norm": 0.06490933150053024, "learning_rate": 0.0004742091655854954, "loss": 1.4962, "step": 12098 }, { "epoch": 0.5379929749677649, "grad_norm": 0.06268811225891113, "learning_rate": 0.0004740631727159211, "loss": 1.4896, "step": 12100 }, { "epoch": 0.5380818994264372, "grad_norm": 0.06337209790945053, "learning_rate": 0.00047391718206354097, "loss": 1.4944, "step": 12102 }, { "epoch": 0.5381708238851096, "grad_norm": 0.06212884932756424, "learning_rate": 0.0004737711936408351, "loss": 1.4923, "step": 12104 }, { "epoch": 0.538259748343782, "grad_norm": 0.06264811009168625, "learning_rate": 0.00047362520746028336, "loss": 1.4862, "step": 12106 }, { "epoch": 0.5383486728024544, "grad_norm": 0.06359831243753433, "learning_rate": 0.00047347922353436475, "loss": 1.4975, "step": 12108 }, { "epoch": 0.5384375972611267, "grad_norm": 0.06291768699884415, "learning_rate": 0.00047333324187555934, "loss": 1.4913, "step": 12110 }, { "epoch": 0.538526521719799, "grad_norm": 0.06421522051095963, "learning_rate": 0.0004731872624963456, "loss": 1.493, "step": 12112 }, { "epoch": 0.5386154461784713, "grad_norm": 0.06364668905735016, "learning_rate": 0.0004730412854092031, "loss": 1.4885, "step": 12114 }, { "epoch": 0.5387043706371437, "grad_norm": 0.06267435848712921, "learning_rate": 0.0004728953106266103, "loss": 1.4859, "step": 12116 }, { "epoch": 0.5387932950958161, "grad_norm": 0.06271584331989288, "learning_rate": 0.0004727493381610456, "loss": 1.488, "step": 12118 }, { "epoch": 0.5388822195544885, "grad_norm": 0.06468139588832855, "learning_rate": 0.0004726033680249877, "loss": 1.4919, "step": 12120 }, { "epoch": 0.5389711440131608, "grad_norm": 0.06396504491567612, "learning_rate": 0.0004724574002309147, "loss": 1.4947, "step": 12122 }, { "epoch": 0.5390600684718332, "grad_norm": 0.06503921747207642, "learning_rate": 0.00047231143479130436, "loss": 1.4909, "step": 12124 }, { "epoch": 0.5391489929305056, "grad_norm": 0.06351353228092194, "learning_rate": 0.00047216547171863467, "loss": 1.492, "step": 12126 }, { "epoch": 0.5392379173891779, "grad_norm": 0.06411804258823395, "learning_rate": 0.000472019511025383, "loss": 1.4905, "step": 12128 }, { "epoch": 0.5393268418478503, "grad_norm": 0.06250099092721939, "learning_rate": 0.000471873552724027, "loss": 1.4884, "step": 12130 }, { "epoch": 0.5394157663065227, "grad_norm": 0.06506571918725967, "learning_rate": 0.00047172759682704375, "loss": 1.4917, "step": 12132 }, { "epoch": 0.5395046907651949, "grad_norm": 0.06468180567026138, "learning_rate": 0.0004715816433469097, "loss": 1.493, "step": 12134 }, { "epoch": 0.5395936152238673, "grad_norm": 0.06447789818048477, "learning_rate": 0.00047143569229610247, "loss": 1.4964, "step": 12136 }, { "epoch": 0.5396825396825397, "grad_norm": 0.06448780745267868, "learning_rate": 0.00047128974368709797, "loss": 1.4869, "step": 12138 }, { "epoch": 0.539771464141212, "grad_norm": 0.06413755565881729, "learning_rate": 0.00047114379753237265, "loss": 1.4866, "step": 12140 }, { "epoch": 0.5398603885998844, "grad_norm": 0.0629299134016037, "learning_rate": 0.0004709978538444027, "loss": 1.4883, "step": 12142 }, { "epoch": 0.5399493130585568, "grad_norm": 0.06269446760416031, "learning_rate": 0.000470851912635664, "loss": 1.4944, "step": 12144 }, { "epoch": 0.5400382375172291, "grad_norm": 0.0642932578921318, "learning_rate": 0.0004707059739186323, "loss": 1.4922, "step": 12146 }, { "epoch": 0.5401271619759015, "grad_norm": 0.06270511448383331, "learning_rate": 0.000470560037705783, "loss": 1.4956, "step": 12148 }, { "epoch": 0.5402160864345739, "grad_norm": 0.06323356926441193, "learning_rate": 0.0004704141040095914, "loss": 1.4927, "step": 12150 }, { "epoch": 0.5403050108932462, "grad_norm": 0.06155591458082199, "learning_rate": 0.00047026817284253264, "loss": 1.492, "step": 12152 }, { "epoch": 0.5403939353519186, "grad_norm": 0.06190359592437744, "learning_rate": 0.0004701222442170815, "loss": 1.4937, "step": 12154 }, { "epoch": 0.5404828598105909, "grad_norm": 0.06423845887184143, "learning_rate": 0.00046997631814571245, "loss": 1.4927, "step": 12156 }, { "epoch": 0.5405717842692632, "grad_norm": 0.06188628450036049, "learning_rate": 0.0004698303946409002, "loss": 1.4844, "step": 12158 }, { "epoch": 0.5406607087279356, "grad_norm": 0.06377921998500824, "learning_rate": 0.00046968447371511883, "loss": 1.4897, "step": 12160 }, { "epoch": 0.540749633186608, "grad_norm": 0.06156056001782417, "learning_rate": 0.00046953855538084193, "loss": 1.4852, "step": 12162 }, { "epoch": 0.5408385576452803, "grad_norm": 0.06201445311307907, "learning_rate": 0.0004693926396505439, "loss": 1.4835, "step": 12164 }, { "epoch": 0.5409274821039527, "grad_norm": 0.0629076287150383, "learning_rate": 0.0004692467265366976, "loss": 1.4976, "step": 12166 }, { "epoch": 0.5410164065626251, "grad_norm": 0.062005601823329926, "learning_rate": 0.00046910081605177695, "loss": 1.4922, "step": 12168 }, { "epoch": 0.5411053310212974, "grad_norm": 0.06382474303245544, "learning_rate": 0.0004689549082082546, "loss": 1.4909, "step": 12170 }, { "epoch": 0.5411942554799698, "grad_norm": 0.061615731567144394, "learning_rate": 0.00046880900301860337, "loss": 1.4913, "step": 12172 }, { "epoch": 0.5412831799386422, "grad_norm": 0.061972636729478836, "learning_rate": 0.00046866310049529613, "loss": 1.485, "step": 12174 }, { "epoch": 0.5413721043973145, "grad_norm": 0.06166384369134903, "learning_rate": 0.0004685172006508052, "loss": 1.4903, "step": 12176 }, { "epoch": 0.5414610288559868, "grad_norm": 0.06348898261785507, "learning_rate": 0.00046837130349760257, "loss": 1.4945, "step": 12178 }, { "epoch": 0.5415499533146592, "grad_norm": 0.062341634184122086, "learning_rate": 0.0004682254090481604, "loss": 1.4892, "step": 12180 }, { "epoch": 0.5416388777733315, "grad_norm": 0.062497496604919434, "learning_rate": 0.0004680795173149502, "loss": 1.4911, "step": 12182 }, { "epoch": 0.5417278022320039, "grad_norm": 0.06050288304686546, "learning_rate": 0.0004679336283104436, "loss": 1.4854, "step": 12184 }, { "epoch": 0.5418167266906763, "grad_norm": 0.061905790120363235, "learning_rate": 0.00046778774204711196, "loss": 1.494, "step": 12186 }, { "epoch": 0.5419056511493486, "grad_norm": 0.0627816841006279, "learning_rate": 0.00046764185853742564, "loss": 1.4943, "step": 12188 }, { "epoch": 0.541994575608021, "grad_norm": 0.06469935178756714, "learning_rate": 0.0004674959777938562, "loss": 1.4926, "step": 12190 }, { "epoch": 0.5420835000666934, "grad_norm": 0.06236010789871216, "learning_rate": 0.0004673500998288738, "loss": 1.4896, "step": 12192 }, { "epoch": 0.5421724245253657, "grad_norm": 0.06338387727737427, "learning_rate": 0.00046720422465494854, "loss": 1.4871, "step": 12194 }, { "epoch": 0.5422613489840381, "grad_norm": 0.06159103289246559, "learning_rate": 0.00046705835228455083, "loss": 1.4935, "step": 12196 }, { "epoch": 0.5423502734427104, "grad_norm": 0.06336644291877747, "learning_rate": 0.0004669124827301503, "loss": 1.4883, "step": 12198 }, { "epoch": 0.5424391979013827, "grad_norm": 0.061430975794792175, "learning_rate": 0.00046676661600421657, "loss": 1.4915, "step": 12200 }, { "epoch": 0.5425281223600551, "grad_norm": 0.06243472546339035, "learning_rate": 0.000466620752119219, "loss": 1.491, "step": 12202 }, { "epoch": 0.5426170468187275, "grad_norm": 0.06282027810811996, "learning_rate": 0.0004664748910876265, "loss": 1.4878, "step": 12204 }, { "epoch": 0.5427059712773998, "grad_norm": 0.06300549954175949, "learning_rate": 0.00046632903292190823, "loss": 1.493, "step": 12206 }, { "epoch": 0.5427948957360722, "grad_norm": 0.062161967158317566, "learning_rate": 0.00046618317763453263, "loss": 1.4932, "step": 12208 }, { "epoch": 0.5428838201947446, "grad_norm": 0.062358465045690536, "learning_rate": 0.0004660373252379677, "loss": 1.4947, "step": 12210 }, { "epoch": 0.5429727446534169, "grad_norm": 0.06448014080524445, "learning_rate": 0.00046589147574468227, "loss": 1.4914, "step": 12212 }, { "epoch": 0.5430616691120893, "grad_norm": 0.06277332454919815, "learning_rate": 0.00046574562916714345, "loss": 1.4838, "step": 12214 }, { "epoch": 0.5431505935707617, "grad_norm": 0.06348789483308792, "learning_rate": 0.0004655997855178195, "loss": 1.4943, "step": 12216 }, { "epoch": 0.543239518029434, "grad_norm": 0.0647101104259491, "learning_rate": 0.0004654539448091774, "loss": 1.4911, "step": 12218 }, { "epoch": 0.5433284424881063, "grad_norm": 0.06389828771352768, "learning_rate": 0.0004653081070536843, "loss": 1.4876, "step": 12220 }, { "epoch": 0.5434173669467787, "grad_norm": 0.06637488305568695, "learning_rate": 0.00046516227226380707, "loss": 1.4933, "step": 12222 }, { "epoch": 0.543506291405451, "grad_norm": 0.06405807286500931, "learning_rate": 0.00046501644045201244, "loss": 1.4877, "step": 12224 }, { "epoch": 0.5435952158641234, "grad_norm": 0.06238565593957901, "learning_rate": 0.0004648706116307664, "loss": 1.49, "step": 12226 }, { "epoch": 0.5436841403227958, "grad_norm": 0.06455307453870773, "learning_rate": 0.0004647247858125355, "loss": 1.4907, "step": 12228 }, { "epoch": 0.5437730647814681, "grad_norm": 0.06793267279863358, "learning_rate": 0.0004645789630097854, "loss": 1.4835, "step": 12230 }, { "epoch": 0.5438619892401405, "grad_norm": 0.06355708837509155, "learning_rate": 0.0004644331432349815, "loss": 1.4914, "step": 12232 }, { "epoch": 0.5439509136988129, "grad_norm": 0.06357616931200027, "learning_rate": 0.0004642873265005893, "loss": 1.4844, "step": 12234 }, { "epoch": 0.5440398381574852, "grad_norm": 0.06270436942577362, "learning_rate": 0.0004641415128190737, "loss": 1.4898, "step": 12236 }, { "epoch": 0.5441287626161576, "grad_norm": 0.06114961579442024, "learning_rate": 0.0004639957022028998, "loss": 1.4918, "step": 12238 }, { "epoch": 0.54421768707483, "grad_norm": 0.06168103963136673, "learning_rate": 0.00046384989466453203, "loss": 1.4879, "step": 12240 }, { "epoch": 0.5443066115335022, "grad_norm": 0.06130845472216606, "learning_rate": 0.0004637040902164342, "loss": 1.4863, "step": 12242 }, { "epoch": 0.5443955359921746, "grad_norm": 0.060740306973457336, "learning_rate": 0.00046355828887107103, "loss": 1.4925, "step": 12244 }, { "epoch": 0.544484460450847, "grad_norm": 0.06276367604732513, "learning_rate": 0.00046341249064090585, "loss": 1.4919, "step": 12246 }, { "epoch": 0.5445733849095193, "grad_norm": 0.06420931220054626, "learning_rate": 0.00046326669553840204, "loss": 1.4946, "step": 12248 }, { "epoch": 0.5446623093681917, "grad_norm": 0.06452270597219467, "learning_rate": 0.00046312090357602316, "loss": 1.4963, "step": 12250 }, { "epoch": 0.5447512338268641, "grad_norm": 0.06321154534816742, "learning_rate": 0.0004629751147662318, "loss": 1.4855, "step": 12252 }, { "epoch": 0.5448401582855364, "grad_norm": 0.06343185901641846, "learning_rate": 0.0004628293291214909, "loss": 1.4905, "step": 12254 }, { "epoch": 0.5449290827442088, "grad_norm": 0.062312331050634384, "learning_rate": 0.0004626835466542628, "loss": 1.4856, "step": 12256 }, { "epoch": 0.5450180072028812, "grad_norm": 0.0644093006849289, "learning_rate": 0.0004625377673770094, "loss": 1.4834, "step": 12258 }, { "epoch": 0.5451069316615536, "grad_norm": 0.06491252779960632, "learning_rate": 0.0004623919913021929, "loss": 1.4909, "step": 12260 }, { "epoch": 0.5451958561202259, "grad_norm": 0.06439858675003052, "learning_rate": 0.00046224621844227477, "loss": 1.4882, "step": 12262 }, { "epoch": 0.5452847805788982, "grad_norm": 0.06259090453386307, "learning_rate": 0.00046210044880971596, "loss": 1.487, "step": 12264 }, { "epoch": 0.5453737050375705, "grad_norm": 0.06402361392974854, "learning_rate": 0.0004619546824169781, "loss": 1.4849, "step": 12266 }, { "epoch": 0.5454626294962429, "grad_norm": 0.06298209726810455, "learning_rate": 0.0004618089192765214, "loss": 1.4888, "step": 12268 }, { "epoch": 0.5455515539549153, "grad_norm": 0.06361270695924759, "learning_rate": 0.0004616631594008069, "loss": 1.4922, "step": 12270 }, { "epoch": 0.5456404784135876, "grad_norm": 0.06214560195803642, "learning_rate": 0.00046151740280229433, "loss": 1.4865, "step": 12272 }, { "epoch": 0.54572940287226, "grad_norm": 0.06411103904247284, "learning_rate": 0.0004613716494934437, "loss": 1.4955, "step": 12274 }, { "epoch": 0.5458183273309324, "grad_norm": 0.06292589753866196, "learning_rate": 0.0004612258994867147, "loss": 1.4852, "step": 12276 }, { "epoch": 0.5459072517896048, "grad_norm": 0.06273365765810013, "learning_rate": 0.00046108015279456676, "loss": 1.4856, "step": 12278 }, { "epoch": 0.5459961762482771, "grad_norm": 0.06191820278763771, "learning_rate": 0.00046093440942945874, "loss": 1.4907, "step": 12280 }, { "epoch": 0.5460851007069495, "grad_norm": 0.06268199533224106, "learning_rate": 0.00046078866940384974, "loss": 1.4926, "step": 12282 }, { "epoch": 0.5461740251656219, "grad_norm": 0.06177908927202225, "learning_rate": 0.00046064293273019797, "loss": 1.4907, "step": 12284 }, { "epoch": 0.5462629496242941, "grad_norm": 0.06444894522428513, "learning_rate": 0.0004604971994209617, "loss": 1.4875, "step": 12286 }, { "epoch": 0.5463518740829665, "grad_norm": 0.06176155060529709, "learning_rate": 0.000460351469488599, "loss": 1.4836, "step": 12288 }, { "epoch": 0.5464407985416389, "grad_norm": 0.06493420898914337, "learning_rate": 0.0004602057429455673, "loss": 1.4828, "step": 12290 }, { "epoch": 0.5465297230003112, "grad_norm": 0.0612507201731205, "learning_rate": 0.00046006001980432423, "loss": 1.487, "step": 12292 }, { "epoch": 0.5466186474589836, "grad_norm": 0.06377555429935455, "learning_rate": 0.0004599143000773267, "loss": 1.483, "step": 12294 }, { "epoch": 0.546707571917656, "grad_norm": 0.06083401292562485, "learning_rate": 0.00045976858377703116, "loss": 1.4897, "step": 12296 }, { "epoch": 0.5467964963763283, "grad_norm": 0.06468698382377625, "learning_rate": 0.0004596228709158947, "loss": 1.4917, "step": 12298 }, { "epoch": 0.5468854208350007, "grad_norm": 0.06334157288074493, "learning_rate": 0.0004594771615063731, "loss": 1.4881, "step": 12300 }, { "epoch": 0.5469743452936731, "grad_norm": 0.06522412598133087, "learning_rate": 0.00045933145556092215, "loss": 1.4908, "step": 12302 }, { "epoch": 0.5470632697523454, "grad_norm": 0.06641488522291183, "learning_rate": 0.0004591857530919977, "loss": 1.4941, "step": 12304 }, { "epoch": 0.5471521942110178, "grad_norm": 0.06378214806318283, "learning_rate": 0.0004590400541120549, "loss": 1.4886, "step": 12306 }, { "epoch": 0.54724111866969, "grad_norm": 0.06377306580543518, "learning_rate": 0.00045889435863354886, "loss": 1.4878, "step": 12308 }, { "epoch": 0.5473300431283624, "grad_norm": 0.062183529138565063, "learning_rate": 0.0004587486666689342, "loss": 1.4899, "step": 12310 }, { "epoch": 0.5474189675870348, "grad_norm": 0.062393732368946075, "learning_rate": 0.0004586029782306651, "loss": 1.4875, "step": 12312 }, { "epoch": 0.5475078920457072, "grad_norm": 0.06595607101917267, "learning_rate": 0.0004584572933311961, "loss": 1.4902, "step": 12314 }, { "epoch": 0.5475968165043795, "grad_norm": 0.06217242404818535, "learning_rate": 0.0004583116119829807, "loss": 1.4901, "step": 12316 }, { "epoch": 0.5476857409630519, "grad_norm": 0.06375078856945038, "learning_rate": 0.0004581659341984722, "loss": 1.4894, "step": 12318 }, { "epoch": 0.5477746654217243, "grad_norm": 0.06128295138478279, "learning_rate": 0.00045802025999012433, "loss": 1.4846, "step": 12320 }, { "epoch": 0.5478635898803966, "grad_norm": 0.06317851692438126, "learning_rate": 0.00045787458937038923, "loss": 1.4912, "step": 12322 }, { "epoch": 0.547952514339069, "grad_norm": 0.06150876730680466, "learning_rate": 0.00045772892235172025, "loss": 1.4873, "step": 12324 }, { "epoch": 0.5480414387977414, "grad_norm": 0.061902642250061035, "learning_rate": 0.00045758325894656917, "loss": 1.4865, "step": 12326 }, { "epoch": 0.5481303632564136, "grad_norm": 0.06313225626945496, "learning_rate": 0.0004574375991673879, "loss": 1.4819, "step": 12328 }, { "epoch": 0.548219287715086, "grad_norm": 0.06134433671832085, "learning_rate": 0.0004572919430266284, "loss": 1.4875, "step": 12330 }, { "epoch": 0.5483082121737584, "grad_norm": 0.06121361255645752, "learning_rate": 0.0004571462905367417, "loss": 1.4894, "step": 12332 }, { "epoch": 0.5483971366324307, "grad_norm": 0.06105126067996025, "learning_rate": 0.0004570006417101789, "loss": 1.4887, "step": 12334 }, { "epoch": 0.5484860610911031, "grad_norm": 0.061685241758823395, "learning_rate": 0.0004568549965593908, "loss": 1.4945, "step": 12336 }, { "epoch": 0.5485749855497755, "grad_norm": 0.06117040663957596, "learning_rate": 0.0004567093550968277, "loss": 1.4852, "step": 12338 }, { "epoch": 0.5486639100084478, "grad_norm": 0.06458423286676407, "learning_rate": 0.00045656371733493977, "loss": 1.4911, "step": 12340 }, { "epoch": 0.5487528344671202, "grad_norm": 0.06184089928865433, "learning_rate": 0.00045641808328617667, "loss": 1.4882, "step": 12342 }, { "epoch": 0.5488417589257926, "grad_norm": 0.06287356466054916, "learning_rate": 0.0004562724529629878, "loss": 1.4925, "step": 12344 }, { "epoch": 0.5489306833844649, "grad_norm": 0.06341803073883057, "learning_rate": 0.00045612682637782246, "loss": 1.487, "step": 12346 }, { "epoch": 0.5490196078431373, "grad_norm": 0.06103647127747536, "learning_rate": 0.00045598120354312945, "loss": 1.4941, "step": 12348 }, { "epoch": 0.5491085323018096, "grad_norm": 0.061173129826784134, "learning_rate": 0.0004558355844713568, "loss": 1.4914, "step": 12350 }, { "epoch": 0.5491974567604819, "grad_norm": 0.06562421470880508, "learning_rate": 0.00045568996917495335, "loss": 1.4899, "step": 12352 }, { "epoch": 0.5492863812191543, "grad_norm": 0.06319348514080048, "learning_rate": 0.00045554435766636647, "loss": 1.4836, "step": 12354 }, { "epoch": 0.5493753056778267, "grad_norm": 0.06338098645210266, "learning_rate": 0.0004553987499580438, "loss": 1.4881, "step": 12356 }, { "epoch": 0.549464230136499, "grad_norm": 0.06358245015144348, "learning_rate": 0.0004552531460624325, "loss": 1.4929, "step": 12358 }, { "epoch": 0.5495531545951714, "grad_norm": 0.06360200047492981, "learning_rate": 0.00045510754599197944, "loss": 1.4937, "step": 12360 }, { "epoch": 0.5496420790538438, "grad_norm": 0.06298113614320755, "learning_rate": 0.0004549619497591313, "loss": 1.4883, "step": 12362 }, { "epoch": 0.5497310035125161, "grad_norm": 0.0645737498998642, "learning_rate": 0.0004548163573763341, "loss": 1.4853, "step": 12364 }, { "epoch": 0.5498199279711885, "grad_norm": 0.06510405987501144, "learning_rate": 0.00045467076885603375, "loss": 1.4872, "step": 12366 }, { "epoch": 0.5499088524298609, "grad_norm": 0.06831739097833633, "learning_rate": 0.00045452518421067596, "loss": 1.4967, "step": 12368 }, { "epoch": 0.5499977768885332, "grad_norm": 0.06525734066963196, "learning_rate": 0.0004543796034527059, "loss": 1.487, "step": 12370 }, { "epoch": 0.5500867013472055, "grad_norm": 0.06312596797943115, "learning_rate": 0.00045423402659456805, "loss": 1.4896, "step": 12372 }, { "epoch": 0.5501756258058779, "grad_norm": 0.06690060347318649, "learning_rate": 0.0004540884536487076, "loss": 1.4905, "step": 12374 }, { "epoch": 0.5502645502645502, "grad_norm": 0.06204893812537193, "learning_rate": 0.0004539428846275682, "loss": 1.4871, "step": 12376 }, { "epoch": 0.5503534747232226, "grad_norm": 0.06456583738327026, "learning_rate": 0.00045379731954359425, "loss": 1.4915, "step": 12378 }, { "epoch": 0.550442399181895, "grad_norm": 0.06286507844924927, "learning_rate": 0.00045365175840922893, "loss": 1.4849, "step": 12380 }, { "epoch": 0.5505313236405673, "grad_norm": 0.06443874537944794, "learning_rate": 0.00045350620123691543, "loss": 1.4932, "step": 12382 }, { "epoch": 0.5506202480992397, "grad_norm": 0.06251480430364609, "learning_rate": 0.00045336064803909687, "loss": 1.4924, "step": 12384 }, { "epoch": 0.5507091725579121, "grad_norm": 0.06327617913484573, "learning_rate": 0.00045321509882821566, "loss": 1.4907, "step": 12386 }, { "epoch": 0.5507980970165844, "grad_norm": 0.06285477429628372, "learning_rate": 0.0004530695536167139, "loss": 1.4884, "step": 12388 }, { "epoch": 0.5508870214752568, "grad_norm": 0.06266319751739502, "learning_rate": 0.00045292401241703355, "loss": 1.487, "step": 12390 }, { "epoch": 0.5509759459339292, "grad_norm": 0.06416985392570496, "learning_rate": 0.000452778475241616, "loss": 1.491, "step": 12392 }, { "epoch": 0.5510648703926014, "grad_norm": 0.06277859956026077, "learning_rate": 0.00045263294210290265, "loss": 1.4911, "step": 12394 }, { "epoch": 0.5511537948512738, "grad_norm": 0.06268072873353958, "learning_rate": 0.0004524874130133341, "loss": 1.4863, "step": 12396 }, { "epoch": 0.5512427193099462, "grad_norm": 0.06368980556726456, "learning_rate": 0.0004523418879853507, "loss": 1.4886, "step": 12398 }, { "epoch": 0.5513316437686185, "grad_norm": 0.06306835263967514, "learning_rate": 0.0004521963670313929, "loss": 1.4822, "step": 12400 }, { "epoch": 0.5514205682272909, "grad_norm": 0.06334751844406128, "learning_rate": 0.0004520508501639004, "loss": 1.4873, "step": 12402 }, { "epoch": 0.5515094926859633, "grad_norm": 0.06404084712266922, "learning_rate": 0.0004519053373953122, "loss": 1.4901, "step": 12404 }, { "epoch": 0.5515984171446356, "grad_norm": 0.06552892923355103, "learning_rate": 0.0004517598287380681, "loss": 1.4888, "step": 12406 }, { "epoch": 0.551687341603308, "grad_norm": 0.06249921768903732, "learning_rate": 0.000451614324204606, "loss": 1.487, "step": 12408 }, { "epoch": 0.5517762660619804, "grad_norm": 0.06481725722551346, "learning_rate": 0.0004514688238073651, "loss": 1.4913, "step": 12410 }, { "epoch": 0.5518651905206527, "grad_norm": 0.06446146219968796, "learning_rate": 0.0004513233275587829, "loss": 1.4864, "step": 12412 }, { "epoch": 0.5519541149793251, "grad_norm": 0.0644034743309021, "learning_rate": 0.00045117783547129705, "loss": 1.4838, "step": 12414 }, { "epoch": 0.5520430394379974, "grad_norm": 0.06331180036067963, "learning_rate": 0.0004510323475573451, "loss": 1.4922, "step": 12416 }, { "epoch": 0.5521319638966697, "grad_norm": 0.06320036947727203, "learning_rate": 0.0004508868638293638, "loss": 1.4861, "step": 12418 }, { "epoch": 0.5522208883553421, "grad_norm": 0.06392395496368408, "learning_rate": 0.00045074138429978974, "loss": 1.4894, "step": 12420 }, { "epoch": 0.5523098128140145, "grad_norm": 0.06227342411875725, "learning_rate": 0.00045059590898105935, "loss": 1.4881, "step": 12422 }, { "epoch": 0.5523987372726868, "grad_norm": 0.06373913586139679, "learning_rate": 0.0004504504378856085, "loss": 1.4876, "step": 12424 }, { "epoch": 0.5524876617313592, "grad_norm": 0.06222604960203171, "learning_rate": 0.00045030497102587223, "loss": 1.4854, "step": 12426 }, { "epoch": 0.5525765861900316, "grad_norm": 0.06317233294248581, "learning_rate": 0.00045015950841428636, "loss": 1.4906, "step": 12428 }, { "epoch": 0.552665510648704, "grad_norm": 0.06286189705133438, "learning_rate": 0.00045001405006328505, "loss": 1.487, "step": 12430 }, { "epoch": 0.5527544351073763, "grad_norm": 0.06075560674071312, "learning_rate": 0.00044986859598530334, "loss": 1.4837, "step": 12432 }, { "epoch": 0.5528433595660487, "grad_norm": 0.0642814040184021, "learning_rate": 0.0004497231461927748, "loss": 1.4896, "step": 12434 }, { "epoch": 0.552932284024721, "grad_norm": 0.061715368181467056, "learning_rate": 0.00044957770069813325, "loss": 1.4897, "step": 12436 }, { "epoch": 0.5530212084833933, "grad_norm": 0.06312602758407593, "learning_rate": 0.0004494322595138122, "loss": 1.4886, "step": 12438 }, { "epoch": 0.5531101329420657, "grad_norm": 0.05953274294734001, "learning_rate": 0.00044928682265224436, "loss": 1.4871, "step": 12440 }, { "epoch": 0.553199057400738, "grad_norm": 0.06225527450442314, "learning_rate": 0.0004491413901258623, "loss": 1.4847, "step": 12442 }, { "epoch": 0.5532879818594104, "grad_norm": 0.06316383928060532, "learning_rate": 0.00044899596194709847, "loss": 1.4923, "step": 12444 }, { "epoch": 0.5533769063180828, "grad_norm": 0.06311739236116409, "learning_rate": 0.00044885053812838445, "loss": 1.4821, "step": 12446 }, { "epoch": 0.5534658307767552, "grad_norm": 0.06298642605543137, "learning_rate": 0.000448705118682152, "loss": 1.4868, "step": 12448 }, { "epoch": 0.5535547552354275, "grad_norm": 0.06279413402080536, "learning_rate": 0.00044855970362083197, "loss": 1.4835, "step": 12450 }, { "epoch": 0.5536436796940999, "grad_norm": 0.06067779287695885, "learning_rate": 0.0004484142929568552, "loss": 1.4835, "step": 12452 }, { "epoch": 0.5537326041527723, "grad_norm": 0.06174848973751068, "learning_rate": 0.000448268886702652, "loss": 1.4821, "step": 12454 }, { "epoch": 0.5538215286114446, "grad_norm": 0.06377837806940079, "learning_rate": 0.0004481234848706524, "loss": 1.4846, "step": 12456 }, { "epoch": 0.5539104530701169, "grad_norm": 0.061777420341968536, "learning_rate": 0.0004479780874732857, "loss": 1.4896, "step": 12458 }, { "epoch": 0.5539993775287892, "grad_norm": 0.0632123202085495, "learning_rate": 0.0004478326945229816, "loss": 1.4873, "step": 12460 }, { "epoch": 0.5540883019874616, "grad_norm": 0.06222397834062576, "learning_rate": 0.00044768730603216835, "loss": 1.4761, "step": 12462 }, { "epoch": 0.554177226446134, "grad_norm": 0.06313126534223557, "learning_rate": 0.0004475419220132751, "loss": 1.4866, "step": 12464 }, { "epoch": 0.5542661509048064, "grad_norm": 0.06050386652350426, "learning_rate": 0.00044739654247872933, "loss": 1.4873, "step": 12466 }, { "epoch": 0.5543550753634787, "grad_norm": 0.06345865875482559, "learning_rate": 0.0004472511674409589, "loss": 1.4869, "step": 12468 }, { "epoch": 0.5544439998221511, "grad_norm": 0.06424275785684586, "learning_rate": 0.00044710579691239116, "loss": 1.4879, "step": 12470 }, { "epoch": 0.5545329242808235, "grad_norm": 0.06436572223901749, "learning_rate": 0.0004469604309054531, "loss": 1.4837, "step": 12472 }, { "epoch": 0.5546218487394958, "grad_norm": 0.06335853785276413, "learning_rate": 0.00044681506943257095, "loss": 1.4916, "step": 12474 }, { "epoch": 0.5547107731981682, "grad_norm": 0.06440746039152145, "learning_rate": 0.00044666971250617116, "loss": 1.4837, "step": 12476 }, { "epoch": 0.5547996976568406, "grad_norm": 0.06688931584358215, "learning_rate": 0.0004465243601386795, "loss": 1.4906, "step": 12478 }, { "epoch": 0.5548886221155128, "grad_norm": 0.06155811622738838, "learning_rate": 0.0004463790123425209, "loss": 1.4918, "step": 12480 }, { "epoch": 0.5549775465741852, "grad_norm": 0.06473089009523392, "learning_rate": 0.00044623366913012087, "loss": 1.4893, "step": 12482 }, { "epoch": 0.5550664710328576, "grad_norm": 0.0628729984164238, "learning_rate": 0.00044608833051390344, "loss": 1.4888, "step": 12484 }, { "epoch": 0.5551553954915299, "grad_norm": 0.06283288449048996, "learning_rate": 0.0004459429965062935, "loss": 1.4896, "step": 12486 }, { "epoch": 0.5552443199502023, "grad_norm": 0.061963021755218506, "learning_rate": 0.00044579766711971423, "loss": 1.4856, "step": 12488 }, { "epoch": 0.5553332444088747, "grad_norm": 0.06432387232780457, "learning_rate": 0.0004456523423665892, "loss": 1.4847, "step": 12490 }, { "epoch": 0.555422168867547, "grad_norm": 0.06147773563861847, "learning_rate": 0.00044550702225934155, "loss": 1.4901, "step": 12492 }, { "epoch": 0.5555110933262194, "grad_norm": 0.0623924657702446, "learning_rate": 0.0004453617068103937, "loss": 1.4929, "step": 12494 }, { "epoch": 0.5556000177848918, "grad_norm": 0.06333832442760468, "learning_rate": 0.0004452163960321679, "loss": 1.4814, "step": 12496 }, { "epoch": 0.5556889422435641, "grad_norm": 0.06210675835609436, "learning_rate": 0.0004450710899370861, "loss": 1.4911, "step": 12498 }, { "epoch": 0.5557778667022365, "grad_norm": 0.06140570342540741, "learning_rate": 0.0004449257885375695, "loss": 1.4867, "step": 12500 }, { "epoch": 0.5557778667022365, "eval_loss": 1.47047758102417, "eval_runtime": 12.3971, "eval_samples_per_second": 557.39, "eval_steps_per_second": 69.694, "step": 12500 }, { "epoch": 0.5558667911609088, "grad_norm": 0.06514092534780502, "learning_rate": 0.00044478049184603925, "loss": 1.4915, "step": 12502 }, { "epoch": 0.5559557156195811, "grad_norm": 0.06373412162065506, "learning_rate": 0.00044463519987491607, "loss": 1.4812, "step": 12504 }, { "epoch": 0.5560446400782535, "grad_norm": 0.06132040545344353, "learning_rate": 0.0004444899126366196, "loss": 1.4855, "step": 12506 }, { "epoch": 0.5561335645369259, "grad_norm": 0.062154147773981094, "learning_rate": 0.0004443446301435704, "loss": 1.486, "step": 12508 }, { "epoch": 0.5562224889955982, "grad_norm": 0.0628192275762558, "learning_rate": 0.0004441993524081873, "loss": 1.4891, "step": 12510 }, { "epoch": 0.5563114134542706, "grad_norm": 0.06253869086503983, "learning_rate": 0.00044405407944288943, "loss": 1.4888, "step": 12512 }, { "epoch": 0.556400337912943, "grad_norm": 0.06273260712623596, "learning_rate": 0.0004439088112600955, "loss": 1.4872, "step": 12514 }, { "epoch": 0.5564892623716153, "grad_norm": 0.060649313032627106, "learning_rate": 0.00044376354787222346, "loss": 1.4841, "step": 12516 }, { "epoch": 0.5565781868302877, "grad_norm": 0.06059176102280617, "learning_rate": 0.00044361828929169125, "loss": 1.4883, "step": 12518 }, { "epoch": 0.5566671112889601, "grad_norm": 0.06078102067112923, "learning_rate": 0.00044347303553091616, "loss": 1.4899, "step": 12520 }, { "epoch": 0.5567560357476324, "grad_norm": 0.0650026723742485, "learning_rate": 0.000443327786602315, "loss": 1.4901, "step": 12522 }, { "epoch": 0.5568449602063047, "grad_norm": 0.06332740187644958, "learning_rate": 0.00044318254251830445, "loss": 1.4882, "step": 12524 }, { "epoch": 0.5569338846649771, "grad_norm": 0.06303809583187103, "learning_rate": 0.0004430373032913006, "loss": 1.4893, "step": 12526 }, { "epoch": 0.5570228091236494, "grad_norm": 0.061641015112400055, "learning_rate": 0.00044289206893371894, "loss": 1.4913, "step": 12528 }, { "epoch": 0.5571117335823218, "grad_norm": 0.062258537858724594, "learning_rate": 0.000442746839457975, "loss": 1.4891, "step": 12530 }, { "epoch": 0.5572006580409942, "grad_norm": 0.061097778379917145, "learning_rate": 0.0004426016148764835, "loss": 1.4916, "step": 12532 }, { "epoch": 0.5572895824996665, "grad_norm": 0.06280693411827087, "learning_rate": 0.0004424563952016589, "loss": 1.4904, "step": 12534 }, { "epoch": 0.5573785069583389, "grad_norm": 0.061958327889442444, "learning_rate": 0.00044231118044591545, "loss": 1.4892, "step": 12536 }, { "epoch": 0.5574674314170113, "grad_norm": 0.062417879700660706, "learning_rate": 0.0004421659706216661, "loss": 1.4855, "step": 12538 }, { "epoch": 0.5575563558756836, "grad_norm": 0.0647241473197937, "learning_rate": 0.00044202076574132484, "loss": 1.4862, "step": 12540 }, { "epoch": 0.557645280334356, "grad_norm": 0.06341023743152618, "learning_rate": 0.0004418755658173039, "loss": 1.4923, "step": 12542 }, { "epoch": 0.5577342047930284, "grad_norm": 0.062425922602415085, "learning_rate": 0.0004417303708620156, "loss": 1.4892, "step": 12544 }, { "epoch": 0.5578231292517006, "grad_norm": 0.0629575252532959, "learning_rate": 0.00044158518088787215, "loss": 1.489, "step": 12546 }, { "epoch": 0.557912053710373, "grad_norm": 0.060888003557920456, "learning_rate": 0.0004414399959072848, "loss": 1.4942, "step": 12548 }, { "epoch": 0.5580009781690454, "grad_norm": 0.0610843189060688, "learning_rate": 0.00044129481593266465, "loss": 1.4922, "step": 12550 }, { "epoch": 0.5580899026277177, "grad_norm": 0.06121043115854263, "learning_rate": 0.00044114964097642233, "loss": 1.4876, "step": 12552 }, { "epoch": 0.5581788270863901, "grad_norm": 0.062700554728508, "learning_rate": 0.00044100447105096803, "loss": 1.4922, "step": 12554 }, { "epoch": 0.5582677515450625, "grad_norm": 0.06433205306529999, "learning_rate": 0.00044085930616871163, "loss": 1.4865, "step": 12556 }, { "epoch": 0.5583566760037348, "grad_norm": 0.06155223399400711, "learning_rate": 0.0004407141463420624, "loss": 1.4894, "step": 12558 }, { "epoch": 0.5584456004624072, "grad_norm": 0.0634516254067421, "learning_rate": 0.00044056899158342893, "loss": 1.4841, "step": 12560 }, { "epoch": 0.5585345249210796, "grad_norm": 0.06104006990790367, "learning_rate": 0.0004404238419052203, "loss": 1.4868, "step": 12562 }, { "epoch": 0.558623449379752, "grad_norm": 0.06177747994661331, "learning_rate": 0.0004402786973198441, "loss": 1.4813, "step": 12564 }, { "epoch": 0.5587123738384242, "grad_norm": 0.06346866488456726, "learning_rate": 0.00044013355783970785, "loss": 1.4911, "step": 12566 }, { "epoch": 0.5588012982970966, "grad_norm": 0.06526315212249756, "learning_rate": 0.0004399884234772191, "loss": 1.4937, "step": 12568 }, { "epoch": 0.5588902227557689, "grad_norm": 0.0635879635810852, "learning_rate": 0.00043984329424478423, "loss": 1.4778, "step": 12570 }, { "epoch": 0.5589791472144413, "grad_norm": 0.06199366971850395, "learning_rate": 0.00043969817015480976, "loss": 1.4854, "step": 12572 }, { "epoch": 0.5590680716731137, "grad_norm": 0.06245632842183113, "learning_rate": 0.0004395530512197015, "loss": 1.4814, "step": 12574 }, { "epoch": 0.559156996131786, "grad_norm": 0.0628722682595253, "learning_rate": 0.0004394079374518647, "loss": 1.4936, "step": 12576 }, { "epoch": 0.5592459205904584, "grad_norm": 0.06258208304643631, "learning_rate": 0.00043926282886370445, "loss": 1.485, "step": 12578 }, { "epoch": 0.5593348450491308, "grad_norm": 0.06096351519227028, "learning_rate": 0.0004391177254676253, "loss": 1.4896, "step": 12580 }, { "epoch": 0.5594237695078031, "grad_norm": 0.06193548068404198, "learning_rate": 0.0004389726272760312, "loss": 1.4913, "step": 12582 }, { "epoch": 0.5595126939664755, "grad_norm": 0.06140163913369179, "learning_rate": 0.0004388275343013259, "loss": 1.4898, "step": 12584 }, { "epoch": 0.5596016184251479, "grad_norm": 0.06241277977824211, "learning_rate": 0.0004386824465559125, "loss": 1.4877, "step": 12586 }, { "epoch": 0.5596905428838201, "grad_norm": 0.06165548413991928, "learning_rate": 0.00043853736405219385, "loss": 1.482, "step": 12588 }, { "epoch": 0.5597794673424925, "grad_norm": 0.06252114474773407, "learning_rate": 0.0004383922868025723, "loss": 1.4912, "step": 12590 }, { "epoch": 0.5598683918011649, "grad_norm": 0.061745136976242065, "learning_rate": 0.00043824721481944924, "loss": 1.4839, "step": 12592 }, { "epoch": 0.5599573162598372, "grad_norm": 0.06240737810730934, "learning_rate": 0.00043810214811522674, "loss": 1.4879, "step": 12594 }, { "epoch": 0.5600462407185096, "grad_norm": 0.06228557974100113, "learning_rate": 0.00043795708670230527, "loss": 1.4882, "step": 12596 }, { "epoch": 0.560135165177182, "grad_norm": 0.06238147243857384, "learning_rate": 0.00043781203059308535, "loss": 1.4856, "step": 12598 }, { "epoch": 0.5602240896358543, "grad_norm": 0.06147747486829758, "learning_rate": 0.00043766697979996715, "loss": 1.4861, "step": 12600 }, { "epoch": 0.5603130140945267, "grad_norm": 0.0619262270629406, "learning_rate": 0.00043752193433535026, "loss": 1.4849, "step": 12602 }, { "epoch": 0.5604019385531991, "grad_norm": 0.06108873710036278, "learning_rate": 0.0004373768942116337, "loss": 1.4901, "step": 12604 }, { "epoch": 0.5604908630118715, "grad_norm": 0.06347453594207764, "learning_rate": 0.0004372318594412162, "loss": 1.4785, "step": 12606 }, { "epoch": 0.5605797874705438, "grad_norm": 0.060936614871025085, "learning_rate": 0.0004370868300364959, "loss": 1.488, "step": 12608 }, { "epoch": 0.5606687119292161, "grad_norm": 0.060470595955848694, "learning_rate": 0.00043694180600987076, "loss": 1.4801, "step": 12610 }, { "epoch": 0.5607576363878884, "grad_norm": 0.06342191249132156, "learning_rate": 0.000436796787373738, "loss": 1.4921, "step": 12612 }, { "epoch": 0.5608465608465608, "grad_norm": 0.06200144812464714, "learning_rate": 0.00043665177414049404, "loss": 1.4875, "step": 12614 }, { "epoch": 0.5609354853052332, "grad_norm": 0.06348490715026855, "learning_rate": 0.000436506766322536, "loss": 1.4866, "step": 12616 }, { "epoch": 0.5610244097639056, "grad_norm": 0.06057412549853325, "learning_rate": 0.0004363617639322592, "loss": 1.4826, "step": 12618 }, { "epoch": 0.5611133342225779, "grad_norm": 0.059886761009693146, "learning_rate": 0.0004362167669820592, "loss": 1.4859, "step": 12620 }, { "epoch": 0.5612022586812503, "grad_norm": 0.06247553601861, "learning_rate": 0.00043607177548433105, "loss": 1.4826, "step": 12622 }, { "epoch": 0.5612911831399227, "grad_norm": 0.06425140798091888, "learning_rate": 0.0004359267894514693, "loss": 1.4877, "step": 12624 }, { "epoch": 0.561380107598595, "grad_norm": 0.06306110322475433, "learning_rate": 0.0004357818088958679, "loss": 1.4864, "step": 12626 }, { "epoch": 0.5614690320572674, "grad_norm": 0.06385751813650131, "learning_rate": 0.00043563683382992057, "loss": 1.494, "step": 12628 }, { "epoch": 0.5615579565159398, "grad_norm": 0.0631057620048523, "learning_rate": 0.0004354918642660203, "loss": 1.4885, "step": 12630 }, { "epoch": 0.561646880974612, "grad_norm": 0.06637078523635864, "learning_rate": 0.0004353469002165597, "loss": 1.4927, "step": 12632 }, { "epoch": 0.5617358054332844, "grad_norm": 0.06537287682294846, "learning_rate": 0.0004352019416939311, "loss": 1.4901, "step": 12634 }, { "epoch": 0.5618247298919568, "grad_norm": 0.06411311775445938, "learning_rate": 0.00043505698871052585, "loss": 1.4912, "step": 12636 }, { "epoch": 0.5619136543506291, "grad_norm": 0.06516771763563156, "learning_rate": 0.00043491204127873556, "loss": 1.481, "step": 12638 }, { "epoch": 0.5620025788093015, "grad_norm": 0.06372273713350296, "learning_rate": 0.00043476709941095074, "loss": 1.4882, "step": 12640 }, { "epoch": 0.5620915032679739, "grad_norm": 0.06318344920873642, "learning_rate": 0.0004346221631195618, "loss": 1.4841, "step": 12642 }, { "epoch": 0.5621804277266462, "grad_norm": 0.06348215788602829, "learning_rate": 0.0004344772324169586, "loss": 1.4878, "step": 12644 }, { "epoch": 0.5622693521853186, "grad_norm": 0.0606432780623436, "learning_rate": 0.00043433230731553, "loss": 1.494, "step": 12646 }, { "epoch": 0.562358276643991, "grad_norm": 0.06193801760673523, "learning_rate": 0.00043418738782766554, "loss": 1.4876, "step": 12648 }, { "epoch": 0.5624472011026633, "grad_norm": 0.062378112226724625, "learning_rate": 0.00043404247396575303, "loss": 1.4922, "step": 12650 }, { "epoch": 0.5625361255613357, "grad_norm": 0.06375192105770111, "learning_rate": 0.00043389756574218035, "loss": 1.4903, "step": 12652 }, { "epoch": 0.562625050020008, "grad_norm": 0.06285295635461807, "learning_rate": 0.0004337526631693353, "loss": 1.4871, "step": 12654 }, { "epoch": 0.5627139744786803, "grad_norm": 0.0635739341378212, "learning_rate": 0.0004336077662596043, "loss": 1.4837, "step": 12656 }, { "epoch": 0.5628028989373527, "grad_norm": 0.06320179253816605, "learning_rate": 0.00043346287502537417, "loss": 1.4951, "step": 12658 }, { "epoch": 0.5628918233960251, "grad_norm": 0.06251043826341629, "learning_rate": 0.0004333179894790307, "loss": 1.4853, "step": 12660 }, { "epoch": 0.5629807478546974, "grad_norm": 0.06125814467668533, "learning_rate": 0.00043317310963295925, "loss": 1.4879, "step": 12662 }, { "epoch": 0.5630696723133698, "grad_norm": 0.06437008082866669, "learning_rate": 0.000433028235499545, "loss": 1.4902, "step": 12664 }, { "epoch": 0.5631585967720422, "grad_norm": 0.0625307485461235, "learning_rate": 0.0004328833670911724, "loss": 1.4882, "step": 12666 }, { "epoch": 0.5632475212307145, "grad_norm": 0.06289555132389069, "learning_rate": 0.00043273850442022505, "loss": 1.4853, "step": 12668 }, { "epoch": 0.5633364456893869, "grad_norm": 0.06292538344860077, "learning_rate": 0.00043259364749908706, "loss": 1.4879, "step": 12670 }, { "epoch": 0.5634253701480593, "grad_norm": 0.06323841959238052, "learning_rate": 0.00043244879634014106, "loss": 1.4838, "step": 12672 }, { "epoch": 0.5635142946067316, "grad_norm": 0.06537654995918274, "learning_rate": 0.0004323039509557695, "loss": 1.4864, "step": 12674 }, { "epoch": 0.5636032190654039, "grad_norm": 0.06365048885345459, "learning_rate": 0.00043215911135835466, "loss": 1.4812, "step": 12676 }, { "epoch": 0.5636921435240763, "grad_norm": 0.06092437356710434, "learning_rate": 0.0004320142775602778, "loss": 1.4825, "step": 12678 }, { "epoch": 0.5637810679827486, "grad_norm": 0.06534268707036972, "learning_rate": 0.0004318694495739203, "loss": 1.4839, "step": 12680 }, { "epoch": 0.563869992441421, "grad_norm": 0.06497418135404587, "learning_rate": 0.0004317246274116624, "loss": 1.486, "step": 12682 }, { "epoch": 0.5639589169000934, "grad_norm": 0.06123911216855049, "learning_rate": 0.0004315798110858842, "loss": 1.486, "step": 12684 }, { "epoch": 0.5640478413587657, "grad_norm": 0.062008924782276154, "learning_rate": 0.0004314350006089653, "loss": 1.481, "step": 12686 }, { "epoch": 0.5641367658174381, "grad_norm": 0.06340475380420685, "learning_rate": 0.0004312901959932847, "loss": 1.4898, "step": 12688 }, { "epoch": 0.5642256902761105, "grad_norm": 0.062276601791381836, "learning_rate": 0.00043114539725122087, "loss": 1.4852, "step": 12690 }, { "epoch": 0.5643146147347828, "grad_norm": 0.06282736361026764, "learning_rate": 0.00043100060439515205, "loss": 1.4893, "step": 12692 }, { "epoch": 0.5644035391934552, "grad_norm": 0.06258688867092133, "learning_rate": 0.00043085581743745546, "loss": 1.4856, "step": 12694 }, { "epoch": 0.5644924636521275, "grad_norm": 0.0631018653512001, "learning_rate": 0.00043071103639050846, "loss": 1.4859, "step": 12696 }, { "epoch": 0.5645813881107998, "grad_norm": 0.06103930622339249, "learning_rate": 0.0004305662612666875, "loss": 1.4892, "step": 12698 }, { "epoch": 0.5646703125694722, "grad_norm": 0.06183280423283577, "learning_rate": 0.00043042149207836824, "loss": 1.4848, "step": 12700 }, { "epoch": 0.5647592370281446, "grad_norm": 0.06280186027288437, "learning_rate": 0.0004302767288379267, "loss": 1.4846, "step": 12702 }, { "epoch": 0.5648481614868169, "grad_norm": 0.061180781573057175, "learning_rate": 0.0004301319715577376, "loss": 1.4802, "step": 12704 }, { "epoch": 0.5649370859454893, "grad_norm": 0.06304103881120682, "learning_rate": 0.00042998722025017536, "loss": 1.4853, "step": 12706 }, { "epoch": 0.5650260104041617, "grad_norm": 0.06135190278291702, "learning_rate": 0.0004298424749276141, "loss": 1.489, "step": 12708 }, { "epoch": 0.565114934862834, "grad_norm": 0.061154115945100784, "learning_rate": 0.0004296977356024272, "loss": 1.487, "step": 12710 }, { "epoch": 0.5652038593215064, "grad_norm": 0.06199873611330986, "learning_rate": 0.0004295530022869878, "loss": 1.4856, "step": 12712 }, { "epoch": 0.5652927837801788, "grad_norm": 0.06099383533000946, "learning_rate": 0.0004294082749936682, "loss": 1.4791, "step": 12714 }, { "epoch": 0.5653817082388511, "grad_norm": 0.061755064874887466, "learning_rate": 0.0004292635537348402, "loss": 1.4841, "step": 12716 }, { "epoch": 0.5654706326975234, "grad_norm": 0.0626377984881401, "learning_rate": 0.00042911883852287546, "loss": 1.4911, "step": 12718 }, { "epoch": 0.5655595571561958, "grad_norm": 0.06231605261564255, "learning_rate": 0.00042897412937014496, "loss": 1.4779, "step": 12720 }, { "epoch": 0.5656484816148681, "grad_norm": 0.06421330571174622, "learning_rate": 0.0004288294262890185, "loss": 1.4914, "step": 12722 }, { "epoch": 0.5657374060735405, "grad_norm": 0.06046907603740692, "learning_rate": 0.0004286847292918666, "loss": 1.4845, "step": 12724 }, { "epoch": 0.5658263305322129, "grad_norm": 0.062285587191581726, "learning_rate": 0.00042854003839105806, "loss": 1.4837, "step": 12726 }, { "epoch": 0.5659152549908852, "grad_norm": 0.0627601146697998, "learning_rate": 0.00042839535359896227, "loss": 1.4861, "step": 12728 }, { "epoch": 0.5660041794495576, "grad_norm": 0.06236814707517624, "learning_rate": 0.00042825067492794706, "loss": 1.4876, "step": 12730 }, { "epoch": 0.56609310390823, "grad_norm": 0.06265702098608017, "learning_rate": 0.0004281060023903803, "loss": 1.4797, "step": 12732 }, { "epoch": 0.5661820283669023, "grad_norm": 0.06238122656941414, "learning_rate": 0.0004279613359986293, "loss": 1.4885, "step": 12734 }, { "epoch": 0.5662709528255747, "grad_norm": 0.06290338933467865, "learning_rate": 0.00042781667576506085, "loss": 1.4882, "step": 12736 }, { "epoch": 0.5663598772842471, "grad_norm": 0.06228485703468323, "learning_rate": 0.0004276720217020409, "loss": 1.4914, "step": 12738 }, { "epoch": 0.5664488017429193, "grad_norm": 0.06285703182220459, "learning_rate": 0.0004275273738219354, "loss": 1.4851, "step": 12740 }, { "epoch": 0.5665377262015917, "grad_norm": 0.06270929425954819, "learning_rate": 0.00042738273213710937, "loss": 1.4811, "step": 12742 }, { "epoch": 0.5666266506602641, "grad_norm": 0.06521732360124588, "learning_rate": 0.00042723809665992733, "loss": 1.4876, "step": 12744 }, { "epoch": 0.5667155751189364, "grad_norm": 0.06187422201037407, "learning_rate": 0.00042709346740275346, "loss": 1.4874, "step": 12746 }, { "epoch": 0.5668044995776088, "grad_norm": 0.06465815752744675, "learning_rate": 0.0004269488443779512, "loss": 1.4846, "step": 12748 }, { "epoch": 0.5668934240362812, "grad_norm": 0.0634174719452858, "learning_rate": 0.00042680422759788377, "loss": 1.4852, "step": 12750 }, { "epoch": 0.5669823484949535, "grad_norm": 0.0651431754231453, "learning_rate": 0.0004266596170749137, "loss": 1.4861, "step": 12752 }, { "epoch": 0.5670712729536259, "grad_norm": 0.06035947799682617, "learning_rate": 0.0004265150128214024, "loss": 1.4858, "step": 12754 }, { "epoch": 0.5671601974122983, "grad_norm": 0.06423404812812805, "learning_rate": 0.0004263704148497119, "loss": 1.4907, "step": 12756 }, { "epoch": 0.5672491218709707, "grad_norm": 0.06168157607316971, "learning_rate": 0.0004262258231722028, "loss": 1.4827, "step": 12758 }, { "epoch": 0.567338046329643, "grad_norm": 0.06296733021736145, "learning_rate": 0.0004260812378012353, "loss": 1.4828, "step": 12760 }, { "epoch": 0.5674269707883153, "grad_norm": 0.06025325134396553, "learning_rate": 0.00042593665874916944, "loss": 1.4839, "step": 12762 }, { "epoch": 0.5675158952469876, "grad_norm": 0.062312737107276917, "learning_rate": 0.0004257920860283642, "loss": 1.4904, "step": 12764 }, { "epoch": 0.56760481970566, "grad_norm": 0.06261510401964188, "learning_rate": 0.0004256475196511787, "loss": 1.4859, "step": 12766 }, { "epoch": 0.5676937441643324, "grad_norm": 0.06052310764789581, "learning_rate": 0.00042550295962997083, "loss": 1.4845, "step": 12768 }, { "epoch": 0.5677826686230047, "grad_norm": 0.061324592679739, "learning_rate": 0.00042535840597709816, "loss": 1.4843, "step": 12770 }, { "epoch": 0.5678715930816771, "grad_norm": 0.06264898926019669, "learning_rate": 0.000425213858704918, "loss": 1.4873, "step": 12772 }, { "epoch": 0.5679605175403495, "grad_norm": 0.06049567088484764, "learning_rate": 0.0004250693178257868, "loss": 1.4887, "step": 12774 }, { "epoch": 0.5680494419990219, "grad_norm": 0.06124699115753174, "learning_rate": 0.00042492478335206033, "loss": 1.486, "step": 12776 }, { "epoch": 0.5681383664576942, "grad_norm": 0.06057760491967201, "learning_rate": 0.00042478025529609447, "loss": 1.4847, "step": 12778 }, { "epoch": 0.5682272909163666, "grad_norm": 0.0630040317773819, "learning_rate": 0.0004246357336702436, "loss": 1.4834, "step": 12780 }, { "epoch": 0.568316215375039, "grad_norm": 0.061218783259391785, "learning_rate": 0.0004244912184868626, "loss": 1.4846, "step": 12782 }, { "epoch": 0.5684051398337112, "grad_norm": 0.0640929564833641, "learning_rate": 0.0004243467097583049, "loss": 1.4862, "step": 12784 }, { "epoch": 0.5684940642923836, "grad_norm": 0.061070337891578674, "learning_rate": 0.00042420220749692377, "loss": 1.4816, "step": 12786 }, { "epoch": 0.568582988751056, "grad_norm": 0.06318749487400055, "learning_rate": 0.00042405771171507206, "loss": 1.4888, "step": 12788 }, { "epoch": 0.5686719132097283, "grad_norm": 0.06423699110746384, "learning_rate": 0.0004239132224251018, "loss": 1.4879, "step": 12790 }, { "epoch": 0.5687608376684007, "grad_norm": 0.06260624527931213, "learning_rate": 0.0004237687396393645, "loss": 1.491, "step": 12792 }, { "epoch": 0.568849762127073, "grad_norm": 0.062127046287059784, "learning_rate": 0.00042362426337021143, "loss": 1.483, "step": 12794 }, { "epoch": 0.5689386865857454, "grad_norm": 0.06342507153749466, "learning_rate": 0.00042347979362999293, "loss": 1.486, "step": 12796 }, { "epoch": 0.5690276110444178, "grad_norm": 0.06286569684743881, "learning_rate": 0.00042333533043105865, "loss": 1.4895, "step": 12798 }, { "epoch": 0.5691165355030902, "grad_norm": 0.06699131429195404, "learning_rate": 0.0004231908737857584, "loss": 1.4929, "step": 12800 }, { "epoch": 0.5692054599617625, "grad_norm": 0.06216110289096832, "learning_rate": 0.00042304642370644054, "loss": 1.4828, "step": 12802 }, { "epoch": 0.5692943844204349, "grad_norm": 0.06298429518938065, "learning_rate": 0.00042290198020545375, "loss": 1.482, "step": 12804 }, { "epoch": 0.5693833088791072, "grad_norm": 0.06303789466619492, "learning_rate": 0.0004227575432951453, "loss": 1.4873, "step": 12806 }, { "epoch": 0.5694722333377795, "grad_norm": 0.06614295393228531, "learning_rate": 0.0004226131129878624, "loss": 1.4907, "step": 12808 }, { "epoch": 0.5695611577964519, "grad_norm": 0.06474742293357849, "learning_rate": 0.0004224686892959517, "loss": 1.4831, "step": 12810 }, { "epoch": 0.5696500822551243, "grad_norm": 0.06322574615478516, "learning_rate": 0.0004223242722317592, "loss": 1.4893, "step": 12812 }, { "epoch": 0.5697390067137966, "grad_norm": 0.060625430196523666, "learning_rate": 0.00042217986180763, "loss": 1.477, "step": 12814 }, { "epoch": 0.569827931172469, "grad_norm": 0.06206146627664566, "learning_rate": 0.00042203545803590935, "loss": 1.4866, "step": 12816 }, { "epoch": 0.5699168556311414, "grad_norm": 0.06130099669098854, "learning_rate": 0.0004218910609289412, "loss": 1.485, "step": 12818 }, { "epoch": 0.5700057800898137, "grad_norm": 0.06254599988460541, "learning_rate": 0.00042174667049906943, "loss": 1.4766, "step": 12820 }, { "epoch": 0.5700947045484861, "grad_norm": 0.06300479918718338, "learning_rate": 0.00042160228675863716, "loss": 1.4921, "step": 12822 }, { "epoch": 0.5701836290071585, "grad_norm": 0.06208550184965134, "learning_rate": 0.0004214579097199868, "loss": 1.4773, "step": 12824 }, { "epoch": 0.5702725534658307, "grad_norm": 0.06384430080652237, "learning_rate": 0.0004213135393954605, "loss": 1.4851, "step": 12826 }, { "epoch": 0.5703614779245031, "grad_norm": 0.06255348771810532, "learning_rate": 0.0004211691757973998, "loss": 1.4875, "step": 12828 }, { "epoch": 0.5704504023831755, "grad_norm": 0.06358494609594345, "learning_rate": 0.000421024818938145, "loss": 1.4814, "step": 12830 }, { "epoch": 0.5705393268418478, "grad_norm": 0.0655054897069931, "learning_rate": 0.000420880468830037, "loss": 1.4862, "step": 12832 }, { "epoch": 0.5706282513005202, "grad_norm": 0.06272534281015396, "learning_rate": 0.0004207361254854149, "loss": 1.4882, "step": 12834 }, { "epoch": 0.5707171757591926, "grad_norm": 0.06250984966754913, "learning_rate": 0.00042059178891661845, "loss": 1.4822, "step": 12836 }, { "epoch": 0.5708061002178649, "grad_norm": 0.06336545944213867, "learning_rate": 0.0004204474591359856, "loss": 1.4828, "step": 12838 }, { "epoch": 0.5708950246765373, "grad_norm": 0.06069128215312958, "learning_rate": 0.00042030313615585446, "loss": 1.4854, "step": 12840 }, { "epoch": 0.5709839491352097, "grad_norm": 0.06559595465660095, "learning_rate": 0.00042015881998856263, "loss": 1.4875, "step": 12842 }, { "epoch": 0.571072873593882, "grad_norm": 0.06050030142068863, "learning_rate": 0.0004200145106464467, "loss": 1.4861, "step": 12844 }, { "epoch": 0.5711617980525544, "grad_norm": 0.06325476616621017, "learning_rate": 0.0004198702081418427, "loss": 1.4888, "step": 12846 }, { "epoch": 0.5712507225112267, "grad_norm": 0.06051871180534363, "learning_rate": 0.00041972591248708657, "loss": 1.483, "step": 12848 }, { "epoch": 0.571339646969899, "grad_norm": 0.06114453822374344, "learning_rate": 0.0004195816236945131, "loss": 1.4873, "step": 12850 }, { "epoch": 0.5714285714285714, "grad_norm": 0.06214149668812752, "learning_rate": 0.000419437341776457, "loss": 1.4859, "step": 12852 }, { "epoch": 0.5715174958872438, "grad_norm": 0.060995914041996, "learning_rate": 0.000419293066745252, "loss": 1.4919, "step": 12854 }, { "epoch": 0.5716064203459161, "grad_norm": 0.06173815205693245, "learning_rate": 0.000419148798613231, "loss": 1.4881, "step": 12856 }, { "epoch": 0.5716953448045885, "grad_norm": 0.060767561197280884, "learning_rate": 0.0004190045373927273, "loss": 1.488, "step": 12858 }, { "epoch": 0.5717842692632609, "grad_norm": 0.061665672808885574, "learning_rate": 0.0004188602830960726, "loss": 1.4862, "step": 12860 }, { "epoch": 0.5718731937219332, "grad_norm": 0.06136852502822876, "learning_rate": 0.00041871603573559837, "loss": 1.4881, "step": 12862 }, { "epoch": 0.5719621181806056, "grad_norm": 0.0626179501414299, "learning_rate": 0.0004185717953236357, "loss": 1.4832, "step": 12864 }, { "epoch": 0.572051042639278, "grad_norm": 0.06289484351873398, "learning_rate": 0.0004184275618725148, "loss": 1.4861, "step": 12866 }, { "epoch": 0.5721399670979503, "grad_norm": 0.06334191560745239, "learning_rate": 0.0004182833353945653, "loss": 1.4887, "step": 12868 }, { "epoch": 0.5722288915566226, "grad_norm": 0.061536043882369995, "learning_rate": 0.0004181391159021165, "loss": 1.4828, "step": 12870 }, { "epoch": 0.572317816015295, "grad_norm": 0.06327944248914719, "learning_rate": 0.0004179949034074967, "loss": 1.4843, "step": 12872 }, { "epoch": 0.5724067404739673, "grad_norm": 0.061356909573078156, "learning_rate": 0.0004178506979230341, "loss": 1.4833, "step": 12874 }, { "epoch": 0.5724956649326397, "grad_norm": 0.0625733733177185, "learning_rate": 0.00041770649946105585, "loss": 1.4834, "step": 12876 }, { "epoch": 0.5725845893913121, "grad_norm": 0.06169121339917183, "learning_rate": 0.0004175623080338885, "loss": 1.48, "step": 12878 }, { "epoch": 0.5726735138499844, "grad_norm": 0.06390050053596497, "learning_rate": 0.0004174181236538585, "loss": 1.4884, "step": 12880 }, { "epoch": 0.5727624383086568, "grad_norm": 0.0622803270816803, "learning_rate": 0.00041727394633329137, "loss": 1.4838, "step": 12882 }, { "epoch": 0.5728513627673292, "grad_norm": 0.062278926372528076, "learning_rate": 0.0004171297760845115, "loss": 1.4837, "step": 12884 }, { "epoch": 0.5729402872260015, "grad_norm": 0.06416445970535278, "learning_rate": 0.00041698561291984387, "loss": 1.485, "step": 12886 }, { "epoch": 0.5730292116846739, "grad_norm": 0.06131880730390549, "learning_rate": 0.0004168414568516116, "loss": 1.4871, "step": 12888 }, { "epoch": 0.5731181361433463, "grad_norm": 0.06132744997739792, "learning_rate": 0.0004166973078921384, "loss": 1.4895, "step": 12890 }, { "epoch": 0.5732070606020185, "grad_norm": 0.06064356490969658, "learning_rate": 0.0004165531660537463, "loss": 1.4859, "step": 12892 }, { "epoch": 0.5732959850606909, "grad_norm": 0.060645733028650284, "learning_rate": 0.00041640903134875716, "loss": 1.4801, "step": 12894 }, { "epoch": 0.5733849095193633, "grad_norm": 0.06099729612469673, "learning_rate": 0.00041626490378949253, "loss": 1.4751, "step": 12896 }, { "epoch": 0.5734738339780356, "grad_norm": 0.06112310662865639, "learning_rate": 0.00041612078338827295, "loss": 1.4874, "step": 12898 }, { "epoch": 0.573562758436708, "grad_norm": 0.06212225928902626, "learning_rate": 0.00041597667015741836, "loss": 1.4863, "step": 12900 }, { "epoch": 0.5736516828953804, "grad_norm": 0.06187918782234192, "learning_rate": 0.0004158325641092483, "loss": 1.4783, "step": 12902 }, { "epoch": 0.5737406073540527, "grad_norm": 0.06254956126213074, "learning_rate": 0.00041568846525608154, "loss": 1.4861, "step": 12904 }, { "epoch": 0.5738295318127251, "grad_norm": 0.06175340339541435, "learning_rate": 0.00041554437361023645, "loss": 1.4867, "step": 12906 }, { "epoch": 0.5739184562713975, "grad_norm": 0.06396617740392685, "learning_rate": 0.0004154002891840306, "loss": 1.4828, "step": 12908 }, { "epoch": 0.5740073807300698, "grad_norm": 0.0628194510936737, "learning_rate": 0.0004152562119897804, "loss": 1.4834, "step": 12910 }, { "epoch": 0.5740963051887422, "grad_norm": 0.06112290546298027, "learning_rate": 0.0004151121420398031, "loss": 1.4838, "step": 12912 }, { "epoch": 0.5741852296474145, "grad_norm": 0.06214891001582146, "learning_rate": 0.00041496807934641376, "loss": 1.4872, "step": 12914 }, { "epoch": 0.5742741541060868, "grad_norm": 0.06411154568195343, "learning_rate": 0.00041482402392192775, "loss": 1.4844, "step": 12916 }, { "epoch": 0.5743630785647592, "grad_norm": 0.06268089264631271, "learning_rate": 0.0004146799757786596, "loss": 1.4916, "step": 12918 }, { "epoch": 0.5744520030234316, "grad_norm": 0.06198188662528992, "learning_rate": 0.00041453593492892307, "loss": 1.4868, "step": 12920 }, { "epoch": 0.574540927482104, "grad_norm": 0.06695974618196487, "learning_rate": 0.00041439190138503135, "loss": 1.4801, "step": 12922 }, { "epoch": 0.5746298519407763, "grad_norm": 0.06644386053085327, "learning_rate": 0.0004142478751592973, "loss": 1.494, "step": 12924 }, { "epoch": 0.5747187763994487, "grad_norm": 0.06115036457777023, "learning_rate": 0.0004141038562640327, "loss": 1.4766, "step": 12926 }, { "epoch": 0.574807700858121, "grad_norm": 0.062477122992277145, "learning_rate": 0.000413959844711549, "loss": 1.4887, "step": 12928 }, { "epoch": 0.5748966253167934, "grad_norm": 0.06162472814321518, "learning_rate": 0.0004138158405141571, "loss": 1.4835, "step": 12930 }, { "epoch": 0.5749855497754658, "grad_norm": 0.06390776485204697, "learning_rate": 0.0004136718436841667, "loss": 1.4821, "step": 12932 }, { "epoch": 0.575074474234138, "grad_norm": 0.06126069650053978, "learning_rate": 0.00041352785423388774, "loss": 1.4833, "step": 12934 }, { "epoch": 0.5751633986928104, "grad_norm": 0.06258382648229599, "learning_rate": 0.0004133838721756289, "loss": 1.4808, "step": 12936 }, { "epoch": 0.5752523231514828, "grad_norm": 0.061541568487882614, "learning_rate": 0.0004132398975216981, "loss": 1.4852, "step": 12938 }, { "epoch": 0.5753412476101551, "grad_norm": 0.06053704768419266, "learning_rate": 0.00041309593028440354, "loss": 1.4843, "step": 12940 }, { "epoch": 0.5754301720688275, "grad_norm": 0.060443904250860214, "learning_rate": 0.0004129519704760514, "loss": 1.484, "step": 12942 }, { "epoch": 0.5755190965274999, "grad_norm": 0.06167231872677803, "learning_rate": 0.0004128080181089489, "loss": 1.4891, "step": 12944 }, { "epoch": 0.5756080209861723, "grad_norm": 0.06110317260026932, "learning_rate": 0.00041266407319540105, "loss": 1.4837, "step": 12946 }, { "epoch": 0.5756969454448446, "grad_norm": 0.06018367037177086, "learning_rate": 0.000412520135747713, "loss": 1.4861, "step": 12948 }, { "epoch": 0.575785869903517, "grad_norm": 0.06103086471557617, "learning_rate": 0.0004123762057781894, "loss": 1.4848, "step": 12950 }, { "epoch": 0.5758747943621894, "grad_norm": 0.06104036420583725, "learning_rate": 0.0004122322832991339, "loss": 1.4899, "step": 12952 }, { "epoch": 0.5759637188208617, "grad_norm": 0.061243943870067596, "learning_rate": 0.0004120883683228494, "loss": 1.4895, "step": 12954 }, { "epoch": 0.576052643279534, "grad_norm": 0.062343720346689224, "learning_rate": 0.0004119444608616388, "loss": 1.4847, "step": 12956 }, { "epoch": 0.5761415677382064, "grad_norm": 0.061404332518577576, "learning_rate": 0.00041180056092780354, "loss": 1.4812, "step": 12958 }, { "epoch": 0.5762304921968787, "grad_norm": 0.06235596537590027, "learning_rate": 0.0004116566685336452, "loss": 1.4852, "step": 12960 }, { "epoch": 0.5763194166555511, "grad_norm": 0.061843257397413254, "learning_rate": 0.0004115127836914642, "loss": 1.49, "step": 12962 }, { "epoch": 0.5764083411142235, "grad_norm": 0.05963189899921417, "learning_rate": 0.00041136890641356005, "loss": 1.487, "step": 12964 }, { "epoch": 0.5764972655728958, "grad_norm": 0.06309180706739426, "learning_rate": 0.0004112250367122328, "loss": 1.4832, "step": 12966 }, { "epoch": 0.5765861900315682, "grad_norm": 0.05972014740109444, "learning_rate": 0.0004110811745997804, "loss": 1.4868, "step": 12968 }, { "epoch": 0.5766751144902406, "grad_norm": 0.06337624043226242, "learning_rate": 0.00041093732008850106, "loss": 1.4848, "step": 12970 }, { "epoch": 0.5767640389489129, "grad_norm": 0.06255938112735748, "learning_rate": 0.0004107934731906921, "loss": 1.4831, "step": 12972 }, { "epoch": 0.5768529634075853, "grad_norm": 0.06092438846826553, "learning_rate": 0.0004106496339186501, "loss": 1.4789, "step": 12974 }, { "epoch": 0.5769418878662577, "grad_norm": 0.062436189502477646, "learning_rate": 0.0004105058022846713, "loss": 1.4888, "step": 12976 }, { "epoch": 0.5770308123249299, "grad_norm": 0.0622539147734642, "learning_rate": 0.00041036197830105083, "loss": 1.4801, "step": 12978 }, { "epoch": 0.5771197367836023, "grad_norm": 0.06234341859817505, "learning_rate": 0.0004102181619800834, "loss": 1.4812, "step": 12980 }, { "epoch": 0.5772086612422747, "grad_norm": 0.06356412917375565, "learning_rate": 0.00041007435333406325, "loss": 1.4836, "step": 12982 }, { "epoch": 0.577297585700947, "grad_norm": 0.06135694682598114, "learning_rate": 0.00040993055237528367, "loss": 1.4868, "step": 12984 }, { "epoch": 0.5773865101596194, "grad_norm": 0.06356058269739151, "learning_rate": 0.00040978675911603727, "loss": 1.4841, "step": 12986 }, { "epoch": 0.5774754346182918, "grad_norm": 0.060618288815021515, "learning_rate": 0.00040964297356861634, "loss": 1.4792, "step": 12988 }, { "epoch": 0.5775643590769641, "grad_norm": 0.06027473136782646, "learning_rate": 0.0004094991957453124, "loss": 1.4804, "step": 12990 }, { "epoch": 0.5776532835356365, "grad_norm": 0.06143961101770401, "learning_rate": 0.00040935542565841566, "loss": 1.4767, "step": 12992 }, { "epoch": 0.5777422079943089, "grad_norm": 0.06066564470529556, "learning_rate": 0.00040921166332021693, "loss": 1.4848, "step": 12994 }, { "epoch": 0.5778311324529812, "grad_norm": 0.060524292290210724, "learning_rate": 0.000409067908743005, "loss": 1.4874, "step": 12996 }, { "epoch": 0.5779200569116536, "grad_norm": 0.0610857717692852, "learning_rate": 0.0004089241619390692, "loss": 1.4846, "step": 12998 }, { "epoch": 0.5780089813703259, "grad_norm": 0.06156258285045624, "learning_rate": 0.0004087804229206974, "loss": 1.4799, "step": 13000 }, { "epoch": 0.5780089813703259, "eval_loss": 1.4671134948730469, "eval_runtime": 14.09, "eval_samples_per_second": 490.418, "eval_steps_per_second": 61.32, "step": 13000 }, { "epoch": 0.5780979058289982, "grad_norm": 0.0617251992225647, "learning_rate": 0.00040863669170017694, "loss": 1.4861, "step": 13002 }, { "epoch": 0.5781868302876706, "grad_norm": 0.06050444394350052, "learning_rate": 0.0004084929682897947, "loss": 1.4846, "step": 13004 }, { "epoch": 0.578275754746343, "grad_norm": 0.06325755268335342, "learning_rate": 0.00040834925270183685, "loss": 1.4795, "step": 13006 }, { "epoch": 0.5783646792050153, "grad_norm": 0.06123512238264084, "learning_rate": 0.0004082055449485886, "loss": 1.484, "step": 13008 }, { "epoch": 0.5784536036636877, "grad_norm": 0.061540085822343826, "learning_rate": 0.00040806184504233513, "loss": 1.4841, "step": 13010 }, { "epoch": 0.5785425281223601, "grad_norm": 0.06179589033126831, "learning_rate": 0.00040791815299536004, "loss": 1.4848, "step": 13012 }, { "epoch": 0.5786314525810324, "grad_norm": 0.06283790618181229, "learning_rate": 0.0004077744688199473, "loss": 1.4888, "step": 13014 }, { "epoch": 0.5787203770397048, "grad_norm": 0.06377764791250229, "learning_rate": 0.0004076307925283794, "loss": 1.4902, "step": 13016 }, { "epoch": 0.5788093014983772, "grad_norm": 0.06321964412927628, "learning_rate": 0.0004074871241329381, "loss": 1.4781, "step": 13018 }, { "epoch": 0.5788982259570495, "grad_norm": 0.06205172836780548, "learning_rate": 0.0004073434636459054, "loss": 1.4869, "step": 13020 }, { "epoch": 0.5789871504157218, "grad_norm": 0.06128274276852608, "learning_rate": 0.0004071998110795617, "loss": 1.4815, "step": 13022 }, { "epoch": 0.5790760748743942, "grad_norm": 0.061635617166757584, "learning_rate": 0.000407056166446187, "loss": 1.4746, "step": 13024 }, { "epoch": 0.5791649993330665, "grad_norm": 0.061317794024944305, "learning_rate": 0.0004069125297580609, "loss": 1.4782, "step": 13026 }, { "epoch": 0.5792539237917389, "grad_norm": 0.06503716111183167, "learning_rate": 0.0004067689010274618, "loss": 1.4814, "step": 13028 }, { "epoch": 0.5793428482504113, "grad_norm": 0.06180577725172043, "learning_rate": 0.0004066252802666681, "loss": 1.4876, "step": 13030 }, { "epoch": 0.5794317727090836, "grad_norm": 0.06261365860700607, "learning_rate": 0.0004064816674879568, "loss": 1.4816, "step": 13032 }, { "epoch": 0.579520697167756, "grad_norm": 0.062196627259254456, "learning_rate": 0.0004063380627036047, "loss": 1.4831, "step": 13034 }, { "epoch": 0.5796096216264284, "grad_norm": 0.06358969211578369, "learning_rate": 0.00040619446592588784, "loss": 1.4869, "step": 13036 }, { "epoch": 0.5796985460851007, "grad_norm": 0.061073388904333115, "learning_rate": 0.0004060508771670814, "loss": 1.4873, "step": 13038 }, { "epoch": 0.5797874705437731, "grad_norm": 0.061576247215270996, "learning_rate": 0.0004059072964394599, "loss": 1.4804, "step": 13040 }, { "epoch": 0.5798763950024455, "grad_norm": 0.06221195310354233, "learning_rate": 0.0004057637237552974, "loss": 1.487, "step": 13042 }, { "epoch": 0.5799653194611177, "grad_norm": 0.0631050392985344, "learning_rate": 0.00040562015912686705, "loss": 1.4875, "step": 13044 }, { "epoch": 0.5800542439197901, "grad_norm": 0.062043990939855576, "learning_rate": 0.00040547660256644145, "loss": 1.4873, "step": 13046 }, { "epoch": 0.5801431683784625, "grad_norm": 0.06122414022684097, "learning_rate": 0.00040533305408629264, "loss": 1.4815, "step": 13048 }, { "epoch": 0.5802320928371348, "grad_norm": 0.06327483803033829, "learning_rate": 0.0004051895136986912, "loss": 1.4902, "step": 13050 }, { "epoch": 0.5803210172958072, "grad_norm": 0.06129778176546097, "learning_rate": 0.0004050459814159082, "loss": 1.477, "step": 13052 }, { "epoch": 0.5804099417544796, "grad_norm": 0.0637560561299324, "learning_rate": 0.000404902457250213, "loss": 1.4809, "step": 13054 }, { "epoch": 0.5804988662131519, "grad_norm": 0.06117291748523712, "learning_rate": 0.00040475894121387487, "loss": 1.4849, "step": 13056 }, { "epoch": 0.5805877906718243, "grad_norm": 0.06376662105321884, "learning_rate": 0.00040461543331916217, "loss": 1.4824, "step": 13058 }, { "epoch": 0.5806767151304967, "grad_norm": 0.06137915700674057, "learning_rate": 0.0004044719335783426, "loss": 1.484, "step": 13060 }, { "epoch": 0.580765639589169, "grad_norm": 0.06189562380313873, "learning_rate": 0.000404328442003683, "loss": 1.4871, "step": 13062 }, { "epoch": 0.5808545640478413, "grad_norm": 0.06184488162398338, "learning_rate": 0.00040418495860744996, "loss": 1.4856, "step": 13064 }, { "epoch": 0.5809434885065137, "grad_norm": 0.06308455020189285, "learning_rate": 0.0004040414834019088, "loss": 1.487, "step": 13066 }, { "epoch": 0.581032412965186, "grad_norm": 0.06119653582572937, "learning_rate": 0.0004038980163993245, "loss": 1.4843, "step": 13068 }, { "epoch": 0.5811213374238584, "grad_norm": 0.06232026219367981, "learning_rate": 0.0004037545576119615, "loss": 1.482, "step": 13070 }, { "epoch": 0.5812102618825308, "grad_norm": 0.06109381094574928, "learning_rate": 0.0004036111070520827, "loss": 1.4848, "step": 13072 }, { "epoch": 0.5812991863412031, "grad_norm": 0.0632401630282402, "learning_rate": 0.0004034676647319515, "loss": 1.4796, "step": 13074 }, { "epoch": 0.5813881107998755, "grad_norm": 0.06033066287636757, "learning_rate": 0.0004033242306638297, "loss": 1.48, "step": 13076 }, { "epoch": 0.5814770352585479, "grad_norm": 0.05948418378829956, "learning_rate": 0.0004031808048599785, "loss": 1.4845, "step": 13078 }, { "epoch": 0.5815659597172202, "grad_norm": 0.062302976846694946, "learning_rate": 0.00040303738733265897, "loss": 1.4862, "step": 13080 }, { "epoch": 0.5816548841758926, "grad_norm": 0.061018794775009155, "learning_rate": 0.00040289397809413073, "loss": 1.4819, "step": 13082 }, { "epoch": 0.581743808634565, "grad_norm": 0.061782609671354294, "learning_rate": 0.0004027505771566533, "loss": 1.4811, "step": 13084 }, { "epoch": 0.5818327330932372, "grad_norm": 0.05988406762480736, "learning_rate": 0.0004026071845324851, "loss": 1.4856, "step": 13086 }, { "epoch": 0.5819216575519096, "grad_norm": 0.06019321829080582, "learning_rate": 0.0004024638002338838, "loss": 1.4828, "step": 13088 }, { "epoch": 0.582010582010582, "grad_norm": 0.06038999930024147, "learning_rate": 0.00040232042427310684, "loss": 1.4912, "step": 13090 }, { "epoch": 0.5820995064692543, "grad_norm": 0.06270384043455124, "learning_rate": 0.00040217705666241053, "loss": 1.4808, "step": 13092 }, { "epoch": 0.5821884309279267, "grad_norm": 0.06618236005306244, "learning_rate": 0.0004020336974140502, "loss": 1.4852, "step": 13094 }, { "epoch": 0.5822773553865991, "grad_norm": 0.060933563858270645, "learning_rate": 0.0004018903465402815, "loss": 1.4846, "step": 13096 }, { "epoch": 0.5823662798452715, "grad_norm": 0.06165250390768051, "learning_rate": 0.000401747004053358, "loss": 1.4827, "step": 13098 }, { "epoch": 0.5824552043039438, "grad_norm": 0.06271658837795258, "learning_rate": 0.0004016036699655339, "loss": 1.4864, "step": 13100 }, { "epoch": 0.5825441287626162, "grad_norm": 0.0629393607378006, "learning_rate": 0.0004014603442890616, "loss": 1.476, "step": 13102 }, { "epoch": 0.5826330532212886, "grad_norm": 0.06081317737698555, "learning_rate": 0.0004013170270361932, "loss": 1.4821, "step": 13104 }, { "epoch": 0.5827219776799609, "grad_norm": 0.06229792535305023, "learning_rate": 0.0004011737182191803, "loss": 1.4831, "step": 13106 }, { "epoch": 0.5828109021386332, "grad_norm": 0.0620463564991951, "learning_rate": 0.0004010304178502735, "loss": 1.4862, "step": 13108 }, { "epoch": 0.5828998265973055, "grad_norm": 0.06133623793721199, "learning_rate": 0.00040088712594172265, "loss": 1.4845, "step": 13110 }, { "epoch": 0.5829887510559779, "grad_norm": 0.06249558553099632, "learning_rate": 0.0004007438425057771, "loss": 1.4844, "step": 13112 }, { "epoch": 0.5830776755146503, "grad_norm": 0.06116337701678276, "learning_rate": 0.00040060056755468536, "loss": 1.487, "step": 13114 }, { "epoch": 0.5831665999733227, "grad_norm": 0.06192251667380333, "learning_rate": 0.000400457301100695, "loss": 1.4818, "step": 13116 }, { "epoch": 0.583255524431995, "grad_norm": 0.060988899320364, "learning_rate": 0.00040031404315605336, "loss": 1.4796, "step": 13118 }, { "epoch": 0.5833444488906674, "grad_norm": 0.06281070411205292, "learning_rate": 0.00040017079373300644, "loss": 1.4803, "step": 13120 }, { "epoch": 0.5834333733493398, "grad_norm": 0.06199118122458458, "learning_rate": 0.0004000275528438002, "loss": 1.4867, "step": 13122 }, { "epoch": 0.5835222978080121, "grad_norm": 0.06208673492074013, "learning_rate": 0.00039988432050067936, "loss": 1.4852, "step": 13124 }, { "epoch": 0.5836112222666845, "grad_norm": 0.06448842585086823, "learning_rate": 0.00039974109671588764, "loss": 1.4811, "step": 13126 }, { "epoch": 0.5837001467253569, "grad_norm": 0.06038080155849457, "learning_rate": 0.0003995978815016692, "loss": 1.4787, "step": 13128 }, { "epoch": 0.5837890711840291, "grad_norm": 0.06375820189714432, "learning_rate": 0.00039945467487026616, "loss": 1.4855, "step": 13130 }, { "epoch": 0.5838779956427015, "grad_norm": 0.06250657886266708, "learning_rate": 0.0003993114768339205, "loss": 1.4816, "step": 13132 }, { "epoch": 0.5839669201013739, "grad_norm": 0.061796385794878006, "learning_rate": 0.0003991682874048736, "loss": 1.4791, "step": 13134 }, { "epoch": 0.5840558445600462, "grad_norm": 0.06170998141169548, "learning_rate": 0.0003990251065953658, "loss": 1.4829, "step": 13136 }, { "epoch": 0.5841447690187186, "grad_norm": 0.060505084693431854, "learning_rate": 0.0003988819344176369, "loss": 1.4766, "step": 13138 }, { "epoch": 0.584233693477391, "grad_norm": 0.06287679821252823, "learning_rate": 0.0003987387708839259, "loss": 1.4804, "step": 13140 }, { "epoch": 0.5843226179360633, "grad_norm": 0.061412662267684937, "learning_rate": 0.0003985956160064709, "loss": 1.4776, "step": 13142 }, { "epoch": 0.5844115423947357, "grad_norm": 0.06276515871286392, "learning_rate": 0.0003984524697975096, "loss": 1.4818, "step": 13144 }, { "epoch": 0.5845004668534081, "grad_norm": 0.06057889387011528, "learning_rate": 0.00039830933226927883, "loss": 1.4876, "step": 13146 }, { "epoch": 0.5845893913120804, "grad_norm": 0.061848003417253494, "learning_rate": 0.00039816620343401414, "loss": 1.4808, "step": 13148 }, { "epoch": 0.5846783157707528, "grad_norm": 0.06087247282266617, "learning_rate": 0.00039802308330395147, "loss": 1.4855, "step": 13150 }, { "epoch": 0.5847672402294251, "grad_norm": 0.06292174756526947, "learning_rate": 0.00039787997189132476, "loss": 1.4838, "step": 13152 }, { "epoch": 0.5848561646880974, "grad_norm": 0.06331959366798401, "learning_rate": 0.0003977368692083683, "loss": 1.482, "step": 13154 }, { "epoch": 0.5849450891467698, "grad_norm": 0.06192371994256973, "learning_rate": 0.0003975937752673149, "loss": 1.4817, "step": 13156 }, { "epoch": 0.5850340136054422, "grad_norm": 0.06208168342709541, "learning_rate": 0.0003974506900803967, "loss": 1.4868, "step": 13158 }, { "epoch": 0.5851229380641145, "grad_norm": 0.0600164569914341, "learning_rate": 0.0003973076136598456, "loss": 1.4877, "step": 13160 }, { "epoch": 0.5852118625227869, "grad_norm": 0.06281714886426926, "learning_rate": 0.0003971645460178922, "loss": 1.4813, "step": 13162 }, { "epoch": 0.5853007869814593, "grad_norm": 0.061320312321186066, "learning_rate": 0.0003970214871667665, "loss": 1.4813, "step": 13164 }, { "epoch": 0.5853897114401316, "grad_norm": 0.06139989197254181, "learning_rate": 0.000396878437118698, "loss": 1.4865, "step": 13166 }, { "epoch": 0.585478635898804, "grad_norm": 0.060984056442976, "learning_rate": 0.000396735395885915, "loss": 1.4816, "step": 13168 }, { "epoch": 0.5855675603574764, "grad_norm": 0.06185859069228172, "learning_rate": 0.00039659236348064557, "loss": 1.4809, "step": 13170 }, { "epoch": 0.5856564848161487, "grad_norm": 0.06174325570464134, "learning_rate": 0.0003964493399151166, "loss": 1.4774, "step": 13172 }, { "epoch": 0.585745409274821, "grad_norm": 0.06207715719938278, "learning_rate": 0.0003963063252015543, "loss": 1.4763, "step": 13174 }, { "epoch": 0.5858343337334934, "grad_norm": 0.06227646395564079, "learning_rate": 0.00039616331935218434, "loss": 1.4841, "step": 13176 }, { "epoch": 0.5859232581921657, "grad_norm": 0.06518425047397614, "learning_rate": 0.00039602032237923157, "loss": 1.4859, "step": 13178 }, { "epoch": 0.5860121826508381, "grad_norm": 0.06016527861356735, "learning_rate": 0.0003958773342949196, "loss": 1.4804, "step": 13180 }, { "epoch": 0.5861011071095105, "grad_norm": 0.060563359409570694, "learning_rate": 0.0003957343551114722, "loss": 1.4884, "step": 13182 }, { "epoch": 0.5861900315681828, "grad_norm": 0.060045305639505386, "learning_rate": 0.00039559138484111155, "loss": 1.4795, "step": 13184 }, { "epoch": 0.5862789560268552, "grad_norm": 0.06347208470106125, "learning_rate": 0.0003954484234960593, "loss": 1.4833, "step": 13186 }, { "epoch": 0.5863678804855276, "grad_norm": 0.0632077008485794, "learning_rate": 0.00039530547108853665, "loss": 1.4842, "step": 13188 }, { "epoch": 0.5864568049441999, "grad_norm": 0.061214666813611984, "learning_rate": 0.0003951625276307636, "loss": 1.4823, "step": 13190 }, { "epoch": 0.5865457294028723, "grad_norm": 0.06085453927516937, "learning_rate": 0.00039501959313495986, "loss": 1.4797, "step": 13192 }, { "epoch": 0.5866346538615446, "grad_norm": 0.06207851693034172, "learning_rate": 0.00039487666761334384, "loss": 1.4758, "step": 13194 }, { "epoch": 0.5867235783202169, "grad_norm": 0.061899829655885696, "learning_rate": 0.0003947337510781335, "loss": 1.4801, "step": 13196 }, { "epoch": 0.5868125027788893, "grad_norm": 0.06145618110895157, "learning_rate": 0.0003945908435415461, "loss": 1.4831, "step": 13198 }, { "epoch": 0.5869014272375617, "grad_norm": 0.06099288538098335, "learning_rate": 0.000394447945015798, "loss": 1.4886, "step": 13200 }, { "epoch": 0.586990351696234, "grad_norm": 0.06336424499750137, "learning_rate": 0.0003943050555131044, "loss": 1.4824, "step": 13202 }, { "epoch": 0.5870792761549064, "grad_norm": 0.062214065343141556, "learning_rate": 0.00039416217504568073, "loss": 1.4845, "step": 13204 }, { "epoch": 0.5871682006135788, "grad_norm": 0.0627097636461258, "learning_rate": 0.0003940193036257405, "loss": 1.4835, "step": 13206 }, { "epoch": 0.5872571250722511, "grad_norm": 0.06262244284152985, "learning_rate": 0.0003938764412654975, "loss": 1.4757, "step": 13208 }, { "epoch": 0.5873460495309235, "grad_norm": 0.06010544300079346, "learning_rate": 0.0003937335879771639, "loss": 1.4811, "step": 13210 }, { "epoch": 0.5874349739895959, "grad_norm": 0.06312010437250137, "learning_rate": 0.0003935907437729514, "loss": 1.4795, "step": 13212 }, { "epoch": 0.5875238984482682, "grad_norm": 0.06187771260738373, "learning_rate": 0.00039344790866507114, "loss": 1.4758, "step": 13214 }, { "epoch": 0.5876128229069405, "grad_norm": 0.060956425964832306, "learning_rate": 0.0003933050826657332, "loss": 1.478, "step": 13216 }, { "epoch": 0.5877017473656129, "grad_norm": 0.062204815447330475, "learning_rate": 0.00039316226578714686, "loss": 1.487, "step": 13218 }, { "epoch": 0.5877906718242852, "grad_norm": 0.06293026357889175, "learning_rate": 0.000393019458041521, "loss": 1.4837, "step": 13220 }, { "epoch": 0.5878795962829576, "grad_norm": 0.06233726814389229, "learning_rate": 0.00039287665944106324, "loss": 1.4822, "step": 13222 }, { "epoch": 0.58796852074163, "grad_norm": 0.06225262209773064, "learning_rate": 0.0003927338699979807, "loss": 1.4815, "step": 13224 }, { "epoch": 0.5880574452003023, "grad_norm": 0.06425213813781738, "learning_rate": 0.00039259108972447977, "loss": 1.4861, "step": 13226 }, { "epoch": 0.5881463696589747, "grad_norm": 0.06262674182653427, "learning_rate": 0.0003924483186327656, "loss": 1.4801, "step": 13228 }, { "epoch": 0.5882352941176471, "grad_norm": 0.06313468515872955, "learning_rate": 0.0003923055567350434, "loss": 1.4816, "step": 13230 }, { "epoch": 0.5883242185763194, "grad_norm": 0.06385188549757004, "learning_rate": 0.00039216280404351676, "loss": 1.4834, "step": 13232 }, { "epoch": 0.5884131430349918, "grad_norm": 0.06282700598239899, "learning_rate": 0.00039202006057038863, "loss": 1.4805, "step": 13234 }, { "epoch": 0.5885020674936642, "grad_norm": 0.061191339045763016, "learning_rate": 0.0003918773263278618, "loss": 1.4793, "step": 13236 }, { "epoch": 0.5885909919523364, "grad_norm": 0.059706203639507294, "learning_rate": 0.0003917346013281376, "loss": 1.482, "step": 13238 }, { "epoch": 0.5886799164110088, "grad_norm": 0.06191098317503929, "learning_rate": 0.00039159188558341666, "loss": 1.4848, "step": 13240 }, { "epoch": 0.5887688408696812, "grad_norm": 0.0614837221801281, "learning_rate": 0.0003914491791058991, "loss": 1.4783, "step": 13242 }, { "epoch": 0.5888577653283535, "grad_norm": 0.06151347979903221, "learning_rate": 0.0003913064819077841, "loss": 1.4825, "step": 13244 }, { "epoch": 0.5889466897870259, "grad_norm": 0.06056448817253113, "learning_rate": 0.00039116379400127, "loss": 1.4811, "step": 13246 }, { "epoch": 0.5890356142456983, "grad_norm": 0.06178190931677818, "learning_rate": 0.00039102111539855445, "loss": 1.4794, "step": 13248 }, { "epoch": 0.5891245387043706, "grad_norm": 0.061939485371112823, "learning_rate": 0.0003908784461118341, "loss": 1.4831, "step": 13250 }, { "epoch": 0.589213463163043, "grad_norm": 0.0606502890586853, "learning_rate": 0.0003907357861533052, "loss": 1.4822, "step": 13252 }, { "epoch": 0.5893023876217154, "grad_norm": 0.06153089553117752, "learning_rate": 0.0003905931355351629, "loss": 1.4805, "step": 13254 }, { "epoch": 0.5893913120803878, "grad_norm": 0.05961450934410095, "learning_rate": 0.00039045049426960124, "loss": 1.4787, "step": 13256 }, { "epoch": 0.5894802365390601, "grad_norm": 0.06326043605804443, "learning_rate": 0.0003903078623688143, "loss": 1.4821, "step": 13258 }, { "epoch": 0.5895691609977324, "grad_norm": 0.06015404686331749, "learning_rate": 0.00039016523984499455, "loss": 1.4795, "step": 13260 }, { "epoch": 0.5896580854564047, "grad_norm": 0.06200879067182541, "learning_rate": 0.0003900226267103343, "loss": 1.4836, "step": 13262 }, { "epoch": 0.5897470099150771, "grad_norm": 0.060929592698812485, "learning_rate": 0.0003898800229770245, "loss": 1.4835, "step": 13264 }, { "epoch": 0.5898359343737495, "grad_norm": 0.061469994485378265, "learning_rate": 0.0003897374286572555, "loss": 1.4779, "step": 13266 }, { "epoch": 0.5899248588324219, "grad_norm": 0.06109357252717018, "learning_rate": 0.0003895948437632171, "loss": 1.4852, "step": 13268 }, { "epoch": 0.5900137832910942, "grad_norm": 0.06095615401864052, "learning_rate": 0.0003894522683070981, "loss": 1.4847, "step": 13270 }, { "epoch": 0.5901027077497666, "grad_norm": 0.06231928989291191, "learning_rate": 0.00038930970230108616, "loss": 1.4761, "step": 13272 }, { "epoch": 0.590191632208439, "grad_norm": 0.06076860427856445, "learning_rate": 0.0003891671457573688, "loss": 1.4803, "step": 13274 }, { "epoch": 0.5902805566671113, "grad_norm": 0.06351371854543686, "learning_rate": 0.00038902459868813213, "loss": 1.4779, "step": 13276 }, { "epoch": 0.5903694811257837, "grad_norm": 0.0635959729552269, "learning_rate": 0.00038888206110556197, "loss": 1.4837, "step": 13278 }, { "epoch": 0.5904584055844561, "grad_norm": 0.06213785707950592, "learning_rate": 0.00038873953302184284, "loss": 1.481, "step": 13280 }, { "epoch": 0.5905473300431283, "grad_norm": 0.060719020664691925, "learning_rate": 0.0003885970144491587, "loss": 1.4851, "step": 13282 }, { "epoch": 0.5906362545018007, "grad_norm": 0.06250161677598953, "learning_rate": 0.00038845450539969274, "loss": 1.4812, "step": 13284 }, { "epoch": 0.590725178960473, "grad_norm": 0.061430674046278, "learning_rate": 0.00038831200588562736, "loss": 1.481, "step": 13286 }, { "epoch": 0.5908141034191454, "grad_norm": 0.06262683868408203, "learning_rate": 0.00038816951591914356, "loss": 1.4801, "step": 13288 }, { "epoch": 0.5909030278778178, "grad_norm": 0.062282636761665344, "learning_rate": 0.00038802703551242275, "loss": 1.4776, "step": 13290 }, { "epoch": 0.5909919523364902, "grad_norm": 0.06258291751146317, "learning_rate": 0.000387884564677644, "loss": 1.4808, "step": 13292 }, { "epoch": 0.5910808767951625, "grad_norm": 0.06155584007501602, "learning_rate": 0.00038774210342698714, "loss": 1.4778, "step": 13294 }, { "epoch": 0.5911698012538349, "grad_norm": 0.062455207109451294, "learning_rate": 0.0003875996517726298, "loss": 1.4812, "step": 13296 }, { "epoch": 0.5912587257125073, "grad_norm": 0.06287974864244461, "learning_rate": 0.0003874572097267495, "loss": 1.4872, "step": 13298 }, { "epoch": 0.5913476501711796, "grad_norm": 0.060571350157260895, "learning_rate": 0.00038731477730152294, "loss": 1.4832, "step": 13300 }, { "epoch": 0.5914365746298519, "grad_norm": 0.061406493186950684, "learning_rate": 0.0003871723545091258, "loss": 1.4813, "step": 13302 }, { "epoch": 0.5915254990885243, "grad_norm": 0.060636457055807114, "learning_rate": 0.000387029941361733, "loss": 1.485, "step": 13304 }, { "epoch": 0.5916144235471966, "grad_norm": 0.06447170674800873, "learning_rate": 0.00038688753787151863, "loss": 1.4819, "step": 13306 }, { "epoch": 0.591703348005869, "grad_norm": 0.06106022372841835, "learning_rate": 0.0003867451440506562, "loss": 1.4842, "step": 13308 }, { "epoch": 0.5917922724645414, "grad_norm": 0.062031909823417664, "learning_rate": 0.00038660275991131757, "loss": 1.4892, "step": 13310 }, { "epoch": 0.5918811969232137, "grad_norm": 0.05990834906697273, "learning_rate": 0.0003864603854656751, "loss": 1.4825, "step": 13312 }, { "epoch": 0.5919701213818861, "grad_norm": 0.062491413205862045, "learning_rate": 0.0003863180207258989, "loss": 1.4884, "step": 13314 }, { "epoch": 0.5920590458405585, "grad_norm": 0.06330056488513947, "learning_rate": 0.00038617566570415965, "loss": 1.4825, "step": 13316 }, { "epoch": 0.5921479702992308, "grad_norm": 0.060776226222515106, "learning_rate": 0.0003860333204126259, "loss": 1.4821, "step": 13318 }, { "epoch": 0.5922368947579032, "grad_norm": 0.05994359776377678, "learning_rate": 0.00038589098486346607, "loss": 1.4898, "step": 13320 }, { "epoch": 0.5923258192165756, "grad_norm": 0.0633685290813446, "learning_rate": 0.00038574865906884783, "loss": 1.4819, "step": 13322 }, { "epoch": 0.5924147436752478, "grad_norm": 0.06285269558429718, "learning_rate": 0.0003856063430409377, "loss": 1.4866, "step": 13324 }, { "epoch": 0.5925036681339202, "grad_norm": 0.06075499951839447, "learning_rate": 0.00038546403679190147, "loss": 1.4801, "step": 13326 }, { "epoch": 0.5925925925925926, "grad_norm": 0.062371302396059036, "learning_rate": 0.0003853217403339041, "loss": 1.4821, "step": 13328 }, { "epoch": 0.5926815170512649, "grad_norm": 0.06306248158216476, "learning_rate": 0.00038517945367910967, "loss": 1.4912, "step": 13330 }, { "epoch": 0.5927704415099373, "grad_norm": 0.0632634311914444, "learning_rate": 0.00038503717683968164, "loss": 1.4819, "step": 13332 }, { "epoch": 0.5928593659686097, "grad_norm": 0.06258311122655869, "learning_rate": 0.0003848949098277824, "loss": 1.4828, "step": 13334 }, { "epoch": 0.592948290427282, "grad_norm": 0.06177235394716263, "learning_rate": 0.00038475265265557334, "loss": 1.4772, "step": 13336 }, { "epoch": 0.5930372148859544, "grad_norm": 0.0634724497795105, "learning_rate": 0.0003846104053352156, "loss": 1.4817, "step": 13338 }, { "epoch": 0.5931261393446268, "grad_norm": 0.06223030388355255, "learning_rate": 0.00038446816787886896, "loss": 1.4796, "step": 13340 }, { "epoch": 0.5932150638032991, "grad_norm": 0.06202046200633049, "learning_rate": 0.0003843259402986922, "loss": 1.4812, "step": 13342 }, { "epoch": 0.5933039882619715, "grad_norm": 0.06367559731006622, "learning_rate": 0.00038418372260684406, "loss": 1.4762, "step": 13344 }, { "epoch": 0.5933929127206438, "grad_norm": 0.060448892414569855, "learning_rate": 0.00038404151481548143, "loss": 1.4802, "step": 13346 }, { "epoch": 0.5934818371793161, "grad_norm": 0.06097383797168732, "learning_rate": 0.0003838993169367615, "loss": 1.4802, "step": 13348 }, { "epoch": 0.5935707616379885, "grad_norm": 0.061548784375190735, "learning_rate": 0.0003837571289828394, "loss": 1.4821, "step": 13350 }, { "epoch": 0.5936596860966609, "grad_norm": 0.060715217143297195, "learning_rate": 0.0003836149509658701, "loss": 1.4833, "step": 13352 }, { "epoch": 0.5937486105553332, "grad_norm": 0.062499675899744034, "learning_rate": 0.0003834727828980078, "loss": 1.4768, "step": 13354 }, { "epoch": 0.5938375350140056, "grad_norm": 0.06051585450768471, "learning_rate": 0.00038333062479140555, "loss": 1.4806, "step": 13356 }, { "epoch": 0.593926459472678, "grad_norm": 0.06072830781340599, "learning_rate": 0.00038318847665821547, "loss": 1.4808, "step": 13358 }, { "epoch": 0.5940153839313503, "grad_norm": 0.06030082702636719, "learning_rate": 0.00038304633851058936, "loss": 1.4784, "step": 13360 }, { "epoch": 0.5941043083900227, "grad_norm": 0.06220033019781113, "learning_rate": 0.0003829042103606777, "loss": 1.4826, "step": 13362 }, { "epoch": 0.5941932328486951, "grad_norm": 0.0611334852874279, "learning_rate": 0.0003827620922206299, "loss": 1.4782, "step": 13364 }, { "epoch": 0.5942821573073674, "grad_norm": 0.06260346621274948, "learning_rate": 0.00038261998410259537, "loss": 1.4794, "step": 13366 }, { "epoch": 0.5943710817660397, "grad_norm": 0.06368494778871536, "learning_rate": 0.00038247788601872154, "loss": 1.4802, "step": 13368 }, { "epoch": 0.5944600062247121, "grad_norm": 0.06044352427124977, "learning_rate": 0.00038233579798115623, "loss": 1.4727, "step": 13370 }, { "epoch": 0.5945489306833844, "grad_norm": 0.06313683092594147, "learning_rate": 0.00038219372000204536, "loss": 1.4744, "step": 13372 }, { "epoch": 0.5946378551420568, "grad_norm": 0.06014512851834297, "learning_rate": 0.0003820516520935344, "loss": 1.4856, "step": 13374 }, { "epoch": 0.5947267796007292, "grad_norm": 0.06175994500517845, "learning_rate": 0.00038190959426776807, "loss": 1.4838, "step": 13376 }, { "epoch": 0.5948157040594015, "grad_norm": 0.06352592259645462, "learning_rate": 0.00038176754653689, "loss": 1.4735, "step": 13378 }, { "epoch": 0.5949046285180739, "grad_norm": 0.060022275894880295, "learning_rate": 0.00038162550891304297, "loss": 1.4773, "step": 13380 }, { "epoch": 0.5949935529767463, "grad_norm": 0.06236466392874718, "learning_rate": 0.00038148348140836923, "loss": 1.4835, "step": 13382 }, { "epoch": 0.5950824774354186, "grad_norm": 0.06078318506479263, "learning_rate": 0.0003813414640350097, "loss": 1.4867, "step": 13384 }, { "epoch": 0.595171401894091, "grad_norm": 0.06466275453567505, "learning_rate": 0.0003811994568051048, "loss": 1.4751, "step": 13386 }, { "epoch": 0.5952603263527634, "grad_norm": 0.062311556190252304, "learning_rate": 0.00038105745973079404, "loss": 1.4843, "step": 13388 }, { "epoch": 0.5953492508114356, "grad_norm": 0.06244157627224922, "learning_rate": 0.0003809154728242154, "loss": 1.4692, "step": 13390 }, { "epoch": 0.595438175270108, "grad_norm": 0.0595109798014164, "learning_rate": 0.00038077349609750725, "loss": 1.4827, "step": 13392 }, { "epoch": 0.5955270997287804, "grad_norm": 0.061755720525979996, "learning_rate": 0.000380631529562806, "loss": 1.4748, "step": 13394 }, { "epoch": 0.5956160241874527, "grad_norm": 0.06010547652840614, "learning_rate": 0.00038048957323224755, "loss": 1.4804, "step": 13396 }, { "epoch": 0.5957049486461251, "grad_norm": 0.0607173889875412, "learning_rate": 0.0003803476271179671, "loss": 1.48, "step": 13398 }, { "epoch": 0.5957938731047975, "grad_norm": 0.061135951429605484, "learning_rate": 0.0003802056912320987, "loss": 1.4828, "step": 13400 }, { "epoch": 0.5958827975634698, "grad_norm": 0.06174211576581001, "learning_rate": 0.00038006376558677587, "loss": 1.4777, "step": 13402 }, { "epoch": 0.5959717220221422, "grad_norm": 0.062369443476200104, "learning_rate": 0.00037992185019413083, "loss": 1.4799, "step": 13404 }, { "epoch": 0.5960606464808146, "grad_norm": 0.06260187923908234, "learning_rate": 0.0003797799450662951, "loss": 1.4755, "step": 13406 }, { "epoch": 0.596149570939487, "grad_norm": 0.06085886061191559, "learning_rate": 0.0003796380502153996, "loss": 1.4821, "step": 13408 }, { "epoch": 0.5962384953981593, "grad_norm": 0.061523739248514175, "learning_rate": 0.000379496165653574, "loss": 1.4822, "step": 13410 }, { "epoch": 0.5963274198568316, "grad_norm": 0.06035231426358223, "learning_rate": 0.0003793542913929471, "loss": 1.4808, "step": 13412 }, { "epoch": 0.5964163443155039, "grad_norm": 0.061069756746292114, "learning_rate": 0.0003792124274456471, "loss": 1.4753, "step": 13414 }, { "epoch": 0.5965052687741763, "grad_norm": 0.062064070254564285, "learning_rate": 0.000379070573823801, "loss": 1.482, "step": 13416 }, { "epoch": 0.5965941932328487, "grad_norm": 0.06175654008984566, "learning_rate": 0.0003789287305395352, "loss": 1.4805, "step": 13418 }, { "epoch": 0.596683117691521, "grad_norm": 0.06208856776356697, "learning_rate": 0.0003787868976049753, "loss": 1.4824, "step": 13420 }, { "epoch": 0.5967720421501934, "grad_norm": 0.062433741986751556, "learning_rate": 0.0003786450750322452, "loss": 1.4762, "step": 13422 }, { "epoch": 0.5968609666088658, "grad_norm": 0.061507366597652435, "learning_rate": 0.0003785032628334692, "loss": 1.482, "step": 13424 }, { "epoch": 0.5969498910675382, "grad_norm": 0.061356283724308014, "learning_rate": 0.0003783614610207696, "loss": 1.4781, "step": 13426 }, { "epoch": 0.5970388155262105, "grad_norm": 0.06329002976417542, "learning_rate": 0.00037821966960626817, "loss": 1.4757, "step": 13428 }, { "epoch": 0.5971277399848829, "grad_norm": 0.06240663677453995, "learning_rate": 0.0003780778886020863, "loss": 1.4779, "step": 13430 }, { "epoch": 0.5972166644435551, "grad_norm": 0.06056615337729454, "learning_rate": 0.0003779361180203437, "loss": 1.4722, "step": 13432 }, { "epoch": 0.5973055889022275, "grad_norm": 0.060851022601127625, "learning_rate": 0.0003777943578731596, "loss": 1.4827, "step": 13434 }, { "epoch": 0.5973945133608999, "grad_norm": 0.06265364587306976, "learning_rate": 0.0003776526081726524, "loss": 1.4797, "step": 13436 }, { "epoch": 0.5974834378195722, "grad_norm": 0.06204288452863693, "learning_rate": 0.0003775108689309393, "loss": 1.4828, "step": 13438 }, { "epoch": 0.5975723622782446, "grad_norm": 0.06286075711250305, "learning_rate": 0.00037736914016013714, "loss": 1.4838, "step": 13440 }, { "epoch": 0.597661286736917, "grad_norm": 0.059687890112400055, "learning_rate": 0.00037722742187236126, "loss": 1.4833, "step": 13442 }, { "epoch": 0.5977502111955894, "grad_norm": 0.060804031789302826, "learning_rate": 0.00037708571407972616, "loss": 1.4799, "step": 13444 }, { "epoch": 0.5978391356542617, "grad_norm": 0.061151038855314255, "learning_rate": 0.0003769440167943462, "loss": 1.4771, "step": 13446 }, { "epoch": 0.5979280601129341, "grad_norm": 0.06135578826069832, "learning_rate": 0.0003768023300283338, "loss": 1.4824, "step": 13448 }, { "epoch": 0.5980169845716065, "grad_norm": 0.06151726096868515, "learning_rate": 0.0003766606537938011, "loss": 1.4837, "step": 13450 }, { "epoch": 0.5981059090302788, "grad_norm": 0.06048937514424324, "learning_rate": 0.00037651898810285936, "loss": 1.4808, "step": 13452 }, { "epoch": 0.5981948334889511, "grad_norm": 0.06177039071917534, "learning_rate": 0.00037637733296761854, "loss": 1.4743, "step": 13454 }, { "epoch": 0.5982837579476235, "grad_norm": 0.06174348294734955, "learning_rate": 0.0003762356884001882, "loss": 1.4838, "step": 13456 }, { "epoch": 0.5983726824062958, "grad_norm": 0.06009867042303085, "learning_rate": 0.0003760940544126766, "loss": 1.4775, "step": 13458 }, { "epoch": 0.5984616068649682, "grad_norm": 0.06166113540530205, "learning_rate": 0.0003759524310171912, "loss": 1.4841, "step": 13460 }, { "epoch": 0.5985505313236406, "grad_norm": 0.061972811818122864, "learning_rate": 0.0003758108182258388, "loss": 1.4817, "step": 13462 }, { "epoch": 0.5986394557823129, "grad_norm": 0.059086523950099945, "learning_rate": 0.0003756692160507249, "loss": 1.4769, "step": 13464 }, { "epoch": 0.5987283802409853, "grad_norm": 0.06444913148880005, "learning_rate": 0.0003755276245039542, "loss": 1.4782, "step": 13466 }, { "epoch": 0.5988173046996577, "grad_norm": 0.06151975691318512, "learning_rate": 0.00037538604359763084, "loss": 1.4785, "step": 13468 }, { "epoch": 0.59890622915833, "grad_norm": 0.06152950972318649, "learning_rate": 0.0003752444733438575, "loss": 1.4749, "step": 13470 }, { "epoch": 0.5989951536170024, "grad_norm": 0.060316842049360275, "learning_rate": 0.0003751029137547364, "loss": 1.4807, "step": 13472 }, { "epoch": 0.5990840780756748, "grad_norm": 0.06243583559989929, "learning_rate": 0.00037496136484236887, "loss": 1.4782, "step": 13474 }, { "epoch": 0.599173002534347, "grad_norm": 0.06194531172513962, "learning_rate": 0.0003748198266188545, "loss": 1.481, "step": 13476 }, { "epoch": 0.5992619269930194, "grad_norm": 0.061740942299366, "learning_rate": 0.0003746782990962933, "loss": 1.4746, "step": 13478 }, { "epoch": 0.5993508514516918, "grad_norm": 0.06074299290776253, "learning_rate": 0.00037453678228678333, "loss": 1.4771, "step": 13480 }, { "epoch": 0.5994397759103641, "grad_norm": 0.060749128460884094, "learning_rate": 0.00037439527620242196, "loss": 1.4847, "step": 13482 }, { "epoch": 0.5995287003690365, "grad_norm": 0.06093888357281685, "learning_rate": 0.00037425378085530604, "loss": 1.4798, "step": 13484 }, { "epoch": 0.5996176248277089, "grad_norm": 0.061810996383428574, "learning_rate": 0.0003741122962575309, "loss": 1.4813, "step": 13486 }, { "epoch": 0.5997065492863812, "grad_norm": 0.06119661033153534, "learning_rate": 0.00037397082242119164, "loss": 1.4694, "step": 13488 }, { "epoch": 0.5997954737450536, "grad_norm": 0.0609118789434433, "learning_rate": 0.0003738293593583818, "loss": 1.4833, "step": 13490 }, { "epoch": 0.599884398203726, "grad_norm": 0.06090276688337326, "learning_rate": 0.00037368790708119427, "loss": 1.4821, "step": 13492 }, { "epoch": 0.5999733226623983, "grad_norm": 0.06049749627709389, "learning_rate": 0.0003735464656017212, "loss": 1.4806, "step": 13494 }, { "epoch": 0.6000622471210707, "grad_norm": 0.06154240667819977, "learning_rate": 0.00037340503493205355, "loss": 1.4845, "step": 13496 }, { "epoch": 0.600151171579743, "grad_norm": 0.060583166778087616, "learning_rate": 0.0003732636150842811, "loss": 1.4793, "step": 13498 }, { "epoch": 0.6002400960384153, "grad_norm": 0.06009206175804138, "learning_rate": 0.00037312220607049377, "loss": 1.4784, "step": 13500 }, { "epoch": 0.6002400960384153, "eval_loss": 1.4636259078979492, "eval_runtime": 12.3839, "eval_samples_per_second": 557.984, "eval_steps_per_second": 69.768, "step": 13500 }, { "epoch": 0.6003290204970877, "grad_norm": 0.061223968863487244, "learning_rate": 0.0003729808079027792, "loss": 1.4799, "step": 13502 }, { "epoch": 0.6004179449557601, "grad_norm": 0.06168459728360176, "learning_rate": 0.0003728394205932248, "loss": 1.4809, "step": 13504 }, { "epoch": 0.6005068694144324, "grad_norm": 0.062344878911972046, "learning_rate": 0.00037269804415391726, "loss": 1.4763, "step": 13506 }, { "epoch": 0.6005957938731048, "grad_norm": 0.061719466000795364, "learning_rate": 0.00037255667859694173, "loss": 1.4798, "step": 13508 }, { "epoch": 0.6006847183317772, "grad_norm": 0.06244780495762825, "learning_rate": 0.00037241532393438313, "loss": 1.4775, "step": 13510 }, { "epoch": 0.6007736427904495, "grad_norm": 0.06072971224784851, "learning_rate": 0.00037227398017832485, "loss": 1.4808, "step": 13512 }, { "epoch": 0.6008625672491219, "grad_norm": 0.06084993854165077, "learning_rate": 0.0003721326473408495, "loss": 1.4764, "step": 13514 }, { "epoch": 0.6009514917077943, "grad_norm": 0.06384997069835663, "learning_rate": 0.00037199132543403916, "loss": 1.4758, "step": 13516 }, { "epoch": 0.6010404161664666, "grad_norm": 0.05961621552705765, "learning_rate": 0.0003718500144699743, "loss": 1.4846, "step": 13518 }, { "epoch": 0.6011293406251389, "grad_norm": 0.05951659008860588, "learning_rate": 0.0003717087144607348, "loss": 1.4813, "step": 13520 }, { "epoch": 0.6012182650838113, "grad_norm": 0.06181299686431885, "learning_rate": 0.00037156742541840005, "loss": 1.4786, "step": 13522 }, { "epoch": 0.6013071895424836, "grad_norm": 0.06189553067088127, "learning_rate": 0.00037142614735504755, "loss": 1.4674, "step": 13524 }, { "epoch": 0.601396114001156, "grad_norm": 0.06160914897918701, "learning_rate": 0.00037128488028275467, "loss": 1.4807, "step": 13526 }, { "epoch": 0.6014850384598284, "grad_norm": 0.05913588032126427, "learning_rate": 0.0003711436242135975, "loss": 1.4799, "step": 13528 }, { "epoch": 0.6015739629185007, "grad_norm": 0.060983993113040924, "learning_rate": 0.00037100237915965094, "loss": 1.4758, "step": 13530 }, { "epoch": 0.6016628873771731, "grad_norm": 0.06069378927350044, "learning_rate": 0.0003708611451329898, "loss": 1.4809, "step": 13532 }, { "epoch": 0.6017518118358455, "grad_norm": 0.061977144330739975, "learning_rate": 0.00037071992214568694, "loss": 1.4809, "step": 13534 }, { "epoch": 0.6018407362945178, "grad_norm": 0.062056753784418106, "learning_rate": 0.00037057871020981483, "loss": 1.4725, "step": 13536 }, { "epoch": 0.6019296607531902, "grad_norm": 0.0617976188659668, "learning_rate": 0.00037043750933744493, "loss": 1.4793, "step": 13538 }, { "epoch": 0.6020185852118626, "grad_norm": 0.06159581243991852, "learning_rate": 0.0003702963195406476, "loss": 1.4774, "step": 13540 }, { "epoch": 0.6021075096705348, "grad_norm": 0.0624043270945549, "learning_rate": 0.0003701551408314926, "loss": 1.4865, "step": 13542 }, { "epoch": 0.6021964341292072, "grad_norm": 0.06266829371452332, "learning_rate": 0.00037001397322204834, "loss": 1.4791, "step": 13544 }, { "epoch": 0.6022853585878796, "grad_norm": 0.061347655951976776, "learning_rate": 0.0003698728167243824, "loss": 1.4767, "step": 13546 }, { "epoch": 0.6023742830465519, "grad_norm": 0.05982671678066254, "learning_rate": 0.0003697316713505616, "loss": 1.4754, "step": 13548 }, { "epoch": 0.6024632075052243, "grad_norm": 0.06235219165682793, "learning_rate": 0.00036959053711265176, "loss": 1.4841, "step": 13550 }, { "epoch": 0.6025521319638967, "grad_norm": 0.06052573025226593, "learning_rate": 0.00036944941402271714, "loss": 1.4776, "step": 13552 }, { "epoch": 0.602641056422569, "grad_norm": 0.06316308677196503, "learning_rate": 0.00036930830209282227, "loss": 1.486, "step": 13554 }, { "epoch": 0.6027299808812414, "grad_norm": 0.06305599957704544, "learning_rate": 0.0003691672013350296, "loss": 1.477, "step": 13556 }, { "epoch": 0.6028189053399138, "grad_norm": 0.061367884278297424, "learning_rate": 0.00036902611176140097, "loss": 1.4807, "step": 13558 }, { "epoch": 0.6029078297985861, "grad_norm": 0.06356421858072281, "learning_rate": 0.00036888503338399766, "loss": 1.4872, "step": 13560 }, { "epoch": 0.6029967542572584, "grad_norm": 0.06272092461585999, "learning_rate": 0.0003687439662148793, "loss": 1.4741, "step": 13562 }, { "epoch": 0.6030856787159308, "grad_norm": 0.060393061488866806, "learning_rate": 0.0003686029102661053, "loss": 1.4814, "step": 13564 }, { "epoch": 0.6031746031746031, "grad_norm": 0.06250613927841187, "learning_rate": 0.0003684618655497336, "loss": 1.4819, "step": 13566 }, { "epoch": 0.6032635276332755, "grad_norm": 0.0645843967795372, "learning_rate": 0.00036832083207782114, "loss": 1.482, "step": 13568 }, { "epoch": 0.6033524520919479, "grad_norm": 0.06406543403863907, "learning_rate": 0.0003681798098624244, "loss": 1.4779, "step": 13570 }, { "epoch": 0.6034413765506202, "grad_norm": 0.06072346493601799, "learning_rate": 0.0003680387989155984, "loss": 1.4713, "step": 13572 }, { "epoch": 0.6035303010092926, "grad_norm": 0.06097434088587761, "learning_rate": 0.00036789779924939725, "loss": 1.4776, "step": 13574 }, { "epoch": 0.603619225467965, "grad_norm": 0.06126450002193451, "learning_rate": 0.00036775681087587454, "loss": 1.4791, "step": 13576 }, { "epoch": 0.6037081499266373, "grad_norm": 0.061803415417671204, "learning_rate": 0.0003676158338070823, "loss": 1.4737, "step": 13578 }, { "epoch": 0.6037970743853097, "grad_norm": 0.058394886553287506, "learning_rate": 0.0003674748680550721, "loss": 1.4767, "step": 13580 }, { "epoch": 0.6038859988439821, "grad_norm": 0.061498358845710754, "learning_rate": 0.0003673339136318943, "loss": 1.4785, "step": 13582 }, { "epoch": 0.6039749233026543, "grad_norm": 0.06015564873814583, "learning_rate": 0.0003671929705495979, "loss": 1.4798, "step": 13584 }, { "epoch": 0.6040638477613267, "grad_norm": 0.06150843948125839, "learning_rate": 0.0003670520388202319, "loss": 1.4846, "step": 13586 }, { "epoch": 0.6041527722199991, "grad_norm": 0.06126067414879799, "learning_rate": 0.0003669111184558435, "loss": 1.4828, "step": 13588 }, { "epoch": 0.6042416966786714, "grad_norm": 0.06068529933691025, "learning_rate": 0.00036677020946847895, "loss": 1.4826, "step": 13590 }, { "epoch": 0.6043306211373438, "grad_norm": 0.06273922324180603, "learning_rate": 0.00036662931187018423, "loss": 1.4812, "step": 13592 }, { "epoch": 0.6044195455960162, "grad_norm": 0.06314177811145782, "learning_rate": 0.00036648842567300356, "loss": 1.4776, "step": 13594 }, { "epoch": 0.6045084700546886, "grad_norm": 0.0634959414601326, "learning_rate": 0.00036634755088898064, "loss": 1.482, "step": 13596 }, { "epoch": 0.6045973945133609, "grad_norm": 0.06375706940889359, "learning_rate": 0.0003662066875301582, "loss": 1.4779, "step": 13598 }, { "epoch": 0.6046863189720333, "grad_norm": 0.0621272437274456, "learning_rate": 0.00036606583560857756, "loss": 1.4859, "step": 13600 }, { "epoch": 0.6047752434307057, "grad_norm": 0.0640636533498764, "learning_rate": 0.00036592499513627953, "loss": 1.4766, "step": 13602 }, { "epoch": 0.604864167889378, "grad_norm": 0.06891714781522751, "learning_rate": 0.00036578416612530396, "loss": 1.4821, "step": 13604 }, { "epoch": 0.6049530923480503, "grad_norm": 0.065804123878479, "learning_rate": 0.00036564334858768906, "loss": 1.4774, "step": 13606 }, { "epoch": 0.6050420168067226, "grad_norm": 0.06563190370798111, "learning_rate": 0.0003655025425354731, "loss": 1.4788, "step": 13608 }, { "epoch": 0.605130941265395, "grad_norm": 0.06128527969121933, "learning_rate": 0.00036536174798069217, "loss": 1.4766, "step": 13610 }, { "epoch": 0.6052198657240674, "grad_norm": 0.06118803471326828, "learning_rate": 0.0003652209649353827, "loss": 1.4826, "step": 13612 }, { "epoch": 0.6053087901827398, "grad_norm": 0.06206532567739487, "learning_rate": 0.00036508019341157896, "loss": 1.4774, "step": 13614 }, { "epoch": 0.6053977146414121, "grad_norm": 0.062083590775728226, "learning_rate": 0.00036493943342131473, "loss": 1.4813, "step": 13616 }, { "epoch": 0.6054866391000845, "grad_norm": 0.06275159120559692, "learning_rate": 0.0003647986849766231, "loss": 1.4792, "step": 13618 }, { "epoch": 0.6055755635587569, "grad_norm": 0.06044034659862518, "learning_rate": 0.0003646579480895357, "loss": 1.4774, "step": 13620 }, { "epoch": 0.6056644880174292, "grad_norm": 0.06259037554264069, "learning_rate": 0.00036451722277208314, "loss": 1.482, "step": 13622 }, { "epoch": 0.6057534124761016, "grad_norm": 0.0622219480574131, "learning_rate": 0.0003643765090362956, "loss": 1.4756, "step": 13624 }, { "epoch": 0.605842336934774, "grad_norm": 0.060978252440690994, "learning_rate": 0.0003642358068942017, "loss": 1.4794, "step": 13626 }, { "epoch": 0.6059312613934462, "grad_norm": 0.059897031635046005, "learning_rate": 0.00036409511635782923, "loss": 1.4817, "step": 13628 }, { "epoch": 0.6060201858521186, "grad_norm": 0.06172829493880272, "learning_rate": 0.00036395443743920524, "loss": 1.4788, "step": 13630 }, { "epoch": 0.606109110310791, "grad_norm": 0.06160893663764, "learning_rate": 0.0003638137701503554, "loss": 1.4734, "step": 13632 }, { "epoch": 0.6061980347694633, "grad_norm": 0.06013627350330353, "learning_rate": 0.0003636731145033047, "loss": 1.4766, "step": 13634 }, { "epoch": 0.6062869592281357, "grad_norm": 0.06238262727856636, "learning_rate": 0.0003635324705100771, "loss": 1.483, "step": 13636 }, { "epoch": 0.6063758836868081, "grad_norm": 0.06085238978266716, "learning_rate": 0.0003633918381826949, "loss": 1.479, "step": 13638 }, { "epoch": 0.6064648081454804, "grad_norm": 0.06417615711688995, "learning_rate": 0.0003632512175331808, "loss": 1.4799, "step": 13640 }, { "epoch": 0.6065537326041528, "grad_norm": 0.06320105493068695, "learning_rate": 0.0003631106085735551, "loss": 1.4779, "step": 13642 }, { "epoch": 0.6066426570628252, "grad_norm": 0.062168557196855545, "learning_rate": 0.00036297001131583776, "loss": 1.4769, "step": 13644 }, { "epoch": 0.6067315815214975, "grad_norm": 0.06036188453435898, "learning_rate": 0.00036282942577204783, "loss": 1.4753, "step": 13646 }, { "epoch": 0.6068205059801699, "grad_norm": 0.0608220137655735, "learning_rate": 0.00036268885195420294, "loss": 1.4779, "step": 13648 }, { "epoch": 0.6069094304388422, "grad_norm": 0.060881808400154114, "learning_rate": 0.0003625482898743202, "loss": 1.4739, "step": 13650 }, { "epoch": 0.6069983548975145, "grad_norm": 0.060534775257110596, "learning_rate": 0.0003624077395444154, "loss": 1.4814, "step": 13652 }, { "epoch": 0.6070872793561869, "grad_norm": 0.06239446997642517, "learning_rate": 0.00036226720097650323, "loss": 1.4804, "step": 13654 }, { "epoch": 0.6071762038148593, "grad_norm": 0.06479929387569427, "learning_rate": 0.0003621266741825978, "loss": 1.4788, "step": 13656 }, { "epoch": 0.6072651282735316, "grad_norm": 0.06032795459032059, "learning_rate": 0.000361986159174712, "loss": 1.4804, "step": 13658 }, { "epoch": 0.607354052732204, "grad_norm": 0.06258971989154816, "learning_rate": 0.0003618456559648572, "loss": 1.4784, "step": 13660 }, { "epoch": 0.6074429771908764, "grad_norm": 0.060947444289922714, "learning_rate": 0.0003617051645650448, "loss": 1.4767, "step": 13662 }, { "epoch": 0.6075319016495487, "grad_norm": 0.0637674406170845, "learning_rate": 0.0003615646849872841, "loss": 1.4773, "step": 13664 }, { "epoch": 0.6076208261082211, "grad_norm": 0.05858977884054184, "learning_rate": 0.00036142421724358456, "loss": 1.4761, "step": 13666 }, { "epoch": 0.6077097505668935, "grad_norm": 0.060713764280080795, "learning_rate": 0.0003612837613459534, "loss": 1.4735, "step": 13668 }, { "epoch": 0.6077986750255657, "grad_norm": 0.05963486433029175, "learning_rate": 0.0003611433173063976, "loss": 1.4801, "step": 13670 }, { "epoch": 0.6078875994842381, "grad_norm": 0.06113218143582344, "learning_rate": 0.00036100288513692304, "loss": 1.4722, "step": 13672 }, { "epoch": 0.6079765239429105, "grad_norm": 0.06065460667014122, "learning_rate": 0.00036086246484953445, "loss": 1.4768, "step": 13674 }, { "epoch": 0.6080654484015828, "grad_norm": 0.060706645250320435, "learning_rate": 0.00036072205645623536, "loss": 1.4783, "step": 13676 }, { "epoch": 0.6081543728602552, "grad_norm": 0.06271646171808243, "learning_rate": 0.00036058165996902876, "loss": 1.479, "step": 13678 }, { "epoch": 0.6082432973189276, "grad_norm": 0.06224247068166733, "learning_rate": 0.0003604412753999163, "loss": 1.4785, "step": 13680 }, { "epoch": 0.6083322217775999, "grad_norm": 0.06208979710936546, "learning_rate": 0.0003603009027608985, "loss": 1.4811, "step": 13682 }, { "epoch": 0.6084211462362723, "grad_norm": 0.06194361299276352, "learning_rate": 0.0003601605420639753, "loss": 1.4771, "step": 13684 }, { "epoch": 0.6085100706949447, "grad_norm": 0.06201132759451866, "learning_rate": 0.00036002019332114513, "loss": 1.4754, "step": 13686 }, { "epoch": 0.608598995153617, "grad_norm": 0.06057362258434296, "learning_rate": 0.0003598798565444058, "loss": 1.4791, "step": 13688 }, { "epoch": 0.6086879196122894, "grad_norm": 0.062178537249565125, "learning_rate": 0.0003597395317457538, "loss": 1.476, "step": 13690 }, { "epoch": 0.6087768440709617, "grad_norm": 0.06185256317257881, "learning_rate": 0.0003595992189371846, "loss": 1.4752, "step": 13692 }, { "epoch": 0.608865768529634, "grad_norm": 0.06281464546918869, "learning_rate": 0.0003594589181306931, "loss": 1.478, "step": 13694 }, { "epoch": 0.6089546929883064, "grad_norm": 0.060524832457304, "learning_rate": 0.00035931862933827254, "loss": 1.4829, "step": 13696 }, { "epoch": 0.6090436174469788, "grad_norm": 0.06258238852024078, "learning_rate": 0.0003591783525719154, "loss": 1.4736, "step": 13698 }, { "epoch": 0.6091325419056511, "grad_norm": 0.06145402789115906, "learning_rate": 0.00035903808784361335, "loss": 1.4815, "step": 13700 }, { "epoch": 0.6092214663643235, "grad_norm": 0.06079664081335068, "learning_rate": 0.0003588978351653567, "loss": 1.4756, "step": 13702 }, { "epoch": 0.6093103908229959, "grad_norm": 0.061961762607097626, "learning_rate": 0.000358757594549135, "loss": 1.4807, "step": 13704 }, { "epoch": 0.6093993152816682, "grad_norm": 0.06455682963132858, "learning_rate": 0.0003586173660069365, "loss": 1.4786, "step": 13706 }, { "epoch": 0.6094882397403406, "grad_norm": 0.06444698572158813, "learning_rate": 0.0003584771495507486, "loss": 1.4827, "step": 13708 }, { "epoch": 0.609577164199013, "grad_norm": 0.06271133571863174, "learning_rate": 0.00035833694519255764, "loss": 1.4749, "step": 13710 }, { "epoch": 0.6096660886576853, "grad_norm": 0.06196901574730873, "learning_rate": 0.00035819675294434904, "loss": 1.4734, "step": 13712 }, { "epoch": 0.6097550131163576, "grad_norm": 0.06276630610227585, "learning_rate": 0.0003580565728181066, "loss": 1.4832, "step": 13714 }, { "epoch": 0.60984393757503, "grad_norm": 0.06265407800674438, "learning_rate": 0.0003579164048258141, "loss": 1.4774, "step": 13716 }, { "epoch": 0.6099328620337023, "grad_norm": 0.06209501251578331, "learning_rate": 0.00035777624897945324, "loss": 1.4789, "step": 13718 }, { "epoch": 0.6100217864923747, "grad_norm": 0.06323768943548203, "learning_rate": 0.00035763610529100567, "loss": 1.4795, "step": 13720 }, { "epoch": 0.6101107109510471, "grad_norm": 0.06337448954582214, "learning_rate": 0.000357495973772451, "loss": 1.4814, "step": 13722 }, { "epoch": 0.6101996354097194, "grad_norm": 0.06219518184661865, "learning_rate": 0.00035735585443576846, "loss": 1.4698, "step": 13724 }, { "epoch": 0.6102885598683918, "grad_norm": 0.06129265949130058, "learning_rate": 0.00035721574729293616, "loss": 1.4807, "step": 13726 }, { "epoch": 0.6103774843270642, "grad_norm": 0.0608796663582325, "learning_rate": 0.00035707565235593104, "loss": 1.4743, "step": 13728 }, { "epoch": 0.6104664087857365, "grad_norm": 0.06377924978733063, "learning_rate": 0.0003569355696367289, "loss": 1.4794, "step": 13730 }, { "epoch": 0.6105553332444089, "grad_norm": 0.06089383736252785, "learning_rate": 0.0003567954991473048, "loss": 1.4803, "step": 13732 }, { "epoch": 0.6106442577030813, "grad_norm": 0.0606466569006443, "learning_rate": 0.00035665544089963244, "loss": 1.4818, "step": 13734 }, { "epoch": 0.6107331821617535, "grad_norm": 0.06015947833657265, "learning_rate": 0.0003565153949056849, "loss": 1.4785, "step": 13736 }, { "epoch": 0.6108221066204259, "grad_norm": 0.06213843822479248, "learning_rate": 0.00035637536117743364, "loss": 1.4752, "step": 13738 }, { "epoch": 0.6109110310790983, "grad_norm": 0.061907023191452026, "learning_rate": 0.0003562353397268492, "loss": 1.4774, "step": 13740 }, { "epoch": 0.6109999555377706, "grad_norm": 0.06074616312980652, "learning_rate": 0.0003560953305659018, "loss": 1.4726, "step": 13742 }, { "epoch": 0.611088879996443, "grad_norm": 0.06233498081564903, "learning_rate": 0.0003559553337065596, "loss": 1.4772, "step": 13744 }, { "epoch": 0.6111778044551154, "grad_norm": 0.06261789798736572, "learning_rate": 0.00035581534916079017, "loss": 1.4796, "step": 13746 }, { "epoch": 0.6112667289137877, "grad_norm": 0.06077142059803009, "learning_rate": 0.0003556753769405601, "loss": 1.4757, "step": 13748 }, { "epoch": 0.6113556533724601, "grad_norm": 0.05928738787770271, "learning_rate": 0.0003555354170578349, "loss": 1.4729, "step": 13750 }, { "epoch": 0.6114445778311325, "grad_norm": 0.06241046264767647, "learning_rate": 0.00035539546952457877, "loss": 1.4816, "step": 13752 }, { "epoch": 0.6115335022898049, "grad_norm": 0.06255488097667694, "learning_rate": 0.00035525553435275536, "loss": 1.4826, "step": 13754 }, { "epoch": 0.6116224267484772, "grad_norm": 0.06241544336080551, "learning_rate": 0.0003551156115543265, "loss": 1.4832, "step": 13756 }, { "epoch": 0.6117113512071495, "grad_norm": 0.0616549476981163, "learning_rate": 0.00035497570114125383, "loss": 1.481, "step": 13758 }, { "epoch": 0.6118002756658218, "grad_norm": 0.06216934695839882, "learning_rate": 0.0003548358031254973, "loss": 1.4804, "step": 13760 }, { "epoch": 0.6118892001244942, "grad_norm": 0.06092384457588196, "learning_rate": 0.000354695917519016, "loss": 1.48, "step": 13762 }, { "epoch": 0.6119781245831666, "grad_norm": 0.06199733540415764, "learning_rate": 0.000354556044333768, "loss": 1.4815, "step": 13764 }, { "epoch": 0.612067049041839, "grad_norm": 0.060262758284807205, "learning_rate": 0.00035441618358171046, "loss": 1.4774, "step": 13766 }, { "epoch": 0.6121559735005113, "grad_norm": 0.06175270304083824, "learning_rate": 0.00035427633527479875, "loss": 1.4805, "step": 13768 }, { "epoch": 0.6122448979591837, "grad_norm": 0.06414217501878738, "learning_rate": 0.00035413649942498836, "loss": 1.4717, "step": 13770 }, { "epoch": 0.612333822417856, "grad_norm": 0.06109687313437462, "learning_rate": 0.00035399667604423256, "loss": 1.4784, "step": 13772 }, { "epoch": 0.6124227468765284, "grad_norm": 0.06094755604863167, "learning_rate": 0.0003538568651444847, "loss": 1.481, "step": 13774 }, { "epoch": 0.6125116713352008, "grad_norm": 0.059903766959905624, "learning_rate": 0.00035371706673769575, "loss": 1.4742, "step": 13776 }, { "epoch": 0.6126005957938732, "grad_norm": 0.06042702868580818, "learning_rate": 0.0003535772808358166, "loss": 1.4756, "step": 13778 }, { "epoch": 0.6126895202525454, "grad_norm": 0.05981525033712387, "learning_rate": 0.00035343750745079683, "loss": 1.482, "step": 13780 }, { "epoch": 0.6127784447112178, "grad_norm": 0.060655247420072556, "learning_rate": 0.0003532977465945848, "loss": 1.4782, "step": 13782 }, { "epoch": 0.6128673691698902, "grad_norm": 0.06158040091395378, "learning_rate": 0.00035315799827912776, "loss": 1.4821, "step": 13784 }, { "epoch": 0.6129562936285625, "grad_norm": 0.06122865527868271, "learning_rate": 0.0003530182625163723, "loss": 1.4724, "step": 13786 }, { "epoch": 0.6130452180872349, "grad_norm": 0.061763033270835876, "learning_rate": 0.0003528785393182633, "loss": 1.4795, "step": 13788 }, { "epoch": 0.6131341425459073, "grad_norm": 0.06082763522863388, "learning_rate": 0.0003527388286967453, "loss": 1.4738, "step": 13790 }, { "epoch": 0.6132230670045796, "grad_norm": 0.061021216213703156, "learning_rate": 0.0003525991306637612, "loss": 1.4728, "step": 13792 }, { "epoch": 0.613311991463252, "grad_norm": 0.06356395781040192, "learning_rate": 0.00035245944523125275, "loss": 1.4708, "step": 13794 }, { "epoch": 0.6134009159219244, "grad_norm": 0.05992809683084488, "learning_rate": 0.00035231977241116137, "loss": 1.476, "step": 13796 }, { "epoch": 0.6134898403805967, "grad_norm": 0.06261426210403442, "learning_rate": 0.00035218011221542657, "loss": 1.477, "step": 13798 }, { "epoch": 0.613578764839269, "grad_norm": 0.06266981363296509, "learning_rate": 0.00035204046465598705, "loss": 1.4794, "step": 13800 }, { "epoch": 0.6136676892979414, "grad_norm": 0.06202618405222893, "learning_rate": 0.0003519008297447808, "loss": 1.4745, "step": 13802 }, { "epoch": 0.6137566137566137, "grad_norm": 0.06045003980398178, "learning_rate": 0.00035176120749374416, "loss": 1.4759, "step": 13804 }, { "epoch": 0.6138455382152861, "grad_norm": 0.06169283390045166, "learning_rate": 0.00035162159791481286, "loss": 1.4783, "step": 13806 }, { "epoch": 0.6139344626739585, "grad_norm": 0.06302846968173981, "learning_rate": 0.00035148200101992126, "loss": 1.4796, "step": 13808 }, { "epoch": 0.6140233871326308, "grad_norm": 0.06206449121236801, "learning_rate": 0.00035134241682100267, "loss": 1.4747, "step": 13810 }, { "epoch": 0.6141123115913032, "grad_norm": 0.06087673828005791, "learning_rate": 0.00035120284532998944, "loss": 1.4813, "step": 13812 }, { "epoch": 0.6142012360499756, "grad_norm": 0.06068653240799904, "learning_rate": 0.0003510632865588127, "loss": 1.4734, "step": 13814 }, { "epoch": 0.6142901605086479, "grad_norm": 0.05969279631972313, "learning_rate": 0.00035092374051940247, "loss": 1.4742, "step": 13816 }, { "epoch": 0.6143790849673203, "grad_norm": 0.06131194904446602, "learning_rate": 0.000350784207223688, "loss": 1.4765, "step": 13818 }, { "epoch": 0.6144680094259927, "grad_norm": 0.061875831335783005, "learning_rate": 0.0003506446866835972, "loss": 1.4741, "step": 13820 }, { "epoch": 0.6145569338846649, "grad_norm": 0.061578866094350815, "learning_rate": 0.00035050517891105636, "loss": 1.4751, "step": 13822 }, { "epoch": 0.6146458583433373, "grad_norm": 0.06131550669670105, "learning_rate": 0.000350365683917992, "loss": 1.4812, "step": 13824 }, { "epoch": 0.6147347828020097, "grad_norm": 0.06422258168458939, "learning_rate": 0.00035022620171632813, "loss": 1.4829, "step": 13826 }, { "epoch": 0.614823707260682, "grad_norm": 0.059642113745212555, "learning_rate": 0.0003500867323179888, "loss": 1.4753, "step": 13828 }, { "epoch": 0.6149126317193544, "grad_norm": 0.06245987117290497, "learning_rate": 0.00034994727573489615, "loss": 1.4773, "step": 13830 }, { "epoch": 0.6150015561780268, "grad_norm": 0.06119692325592041, "learning_rate": 0.0003498078319789716, "loss": 1.4806, "step": 13832 }, { "epoch": 0.6150904806366991, "grad_norm": 0.05997243896126747, "learning_rate": 0.0003496684010621354, "loss": 1.4792, "step": 13834 }, { "epoch": 0.6151794050953715, "grad_norm": 0.061276134103536606, "learning_rate": 0.00034952898299630685, "loss": 1.4761, "step": 13836 }, { "epoch": 0.6152683295540439, "grad_norm": 0.06231103837490082, "learning_rate": 0.0003493895777934039, "loss": 1.4709, "step": 13838 }, { "epoch": 0.6153572540127162, "grad_norm": 0.06226308271288872, "learning_rate": 0.0003492501854653436, "loss": 1.4754, "step": 13840 }, { "epoch": 0.6154461784713886, "grad_norm": 0.06134946271777153, "learning_rate": 0.00034911080602404164, "loss": 1.4771, "step": 13842 }, { "epoch": 0.6155351029300609, "grad_norm": 0.06262792646884918, "learning_rate": 0.0003489714394814131, "loss": 1.4744, "step": 13844 }, { "epoch": 0.6156240273887332, "grad_norm": 0.06038961932063103, "learning_rate": 0.00034883208584937154, "loss": 1.4786, "step": 13846 }, { "epoch": 0.6157129518474056, "grad_norm": 0.06001369282603264, "learning_rate": 0.0003486927451398292, "loss": 1.4753, "step": 13848 }, { "epoch": 0.615801876306078, "grad_norm": 0.06046263128519058, "learning_rate": 0.000348553417364698, "loss": 1.4684, "step": 13850 }, { "epoch": 0.6158908007647503, "grad_norm": 0.06046475097537041, "learning_rate": 0.0003484141025358881, "loss": 1.4787, "step": 13852 }, { "epoch": 0.6159797252234227, "grad_norm": 0.06241816654801369, "learning_rate": 0.0003482748006653086, "loss": 1.4803, "step": 13854 }, { "epoch": 0.6160686496820951, "grad_norm": 0.06165211275219917, "learning_rate": 0.0003481355117648678, "loss": 1.4798, "step": 13856 }, { "epoch": 0.6161575741407674, "grad_norm": 0.061339523643255234, "learning_rate": 0.00034799623584647266, "loss": 1.4842, "step": 13858 }, { "epoch": 0.6162464985994398, "grad_norm": 0.060382258147001266, "learning_rate": 0.00034785697292202925, "loss": 1.476, "step": 13860 }, { "epoch": 0.6163354230581122, "grad_norm": 0.05996260046958923, "learning_rate": 0.0003477177230034422, "loss": 1.4739, "step": 13862 }, { "epoch": 0.6164243475167845, "grad_norm": 0.06047176197171211, "learning_rate": 0.0003475784861026152, "loss": 1.4773, "step": 13864 }, { "epoch": 0.6165132719754568, "grad_norm": 0.061495665460824966, "learning_rate": 0.0003474392622314511, "loss": 1.4768, "step": 13866 }, { "epoch": 0.6166021964341292, "grad_norm": 0.06136169657111168, "learning_rate": 0.0003473000514018511, "loss": 1.4779, "step": 13868 }, { "epoch": 0.6166911208928015, "grad_norm": 0.06489419937133789, "learning_rate": 0.0003471608536257155, "loss": 1.476, "step": 13870 }, { "epoch": 0.6167800453514739, "grad_norm": 0.06050872802734375, "learning_rate": 0.0003470216689149438, "loss": 1.4778, "step": 13872 }, { "epoch": 0.6168689698101463, "grad_norm": 0.06324207037687302, "learning_rate": 0.00034688249728143415, "loss": 1.4782, "step": 13874 }, { "epoch": 0.6169578942688186, "grad_norm": 0.059641290456056595, "learning_rate": 0.0003467433387370831, "loss": 1.4749, "step": 13876 }, { "epoch": 0.617046818727491, "grad_norm": 0.06075885146856308, "learning_rate": 0.000346604193293787, "loss": 1.4795, "step": 13878 }, { "epoch": 0.6171357431861634, "grad_norm": 0.062138065695762634, "learning_rate": 0.00034646506096344025, "loss": 1.4735, "step": 13880 }, { "epoch": 0.6172246676448357, "grad_norm": 0.06034298986196518, "learning_rate": 0.000346325941757937, "loss": 1.4779, "step": 13882 }, { "epoch": 0.6173135921035081, "grad_norm": 0.06234195455908775, "learning_rate": 0.00034618683568916935, "loss": 1.4729, "step": 13884 }, { "epoch": 0.6174025165621805, "grad_norm": 0.06217122822999954, "learning_rate": 0.0003460477427690287, "loss": 1.4708, "step": 13886 }, { "epoch": 0.6174914410208527, "grad_norm": 0.05944179743528366, "learning_rate": 0.0003459086630094056, "loss": 1.4764, "step": 13888 }, { "epoch": 0.6175803654795251, "grad_norm": 0.06370831280946732, "learning_rate": 0.000345769596422189, "loss": 1.4839, "step": 13890 }, { "epoch": 0.6176692899381975, "grad_norm": 0.06237664818763733, "learning_rate": 0.0003456305430192669, "loss": 1.4778, "step": 13892 }, { "epoch": 0.6177582143968698, "grad_norm": 0.06287901103496552, "learning_rate": 0.00034549150281252633, "loss": 1.4808, "step": 13894 }, { "epoch": 0.6178471388555422, "grad_norm": 0.0613352507352829, "learning_rate": 0.0003453524758138529, "loss": 1.4807, "step": 13896 }, { "epoch": 0.6179360633142146, "grad_norm": 0.061771195381879807, "learning_rate": 0.0003452134620351315, "loss": 1.474, "step": 13898 }, { "epoch": 0.618024987772887, "grad_norm": 0.0651378482580185, "learning_rate": 0.0003450744614882455, "loss": 1.4709, "step": 13900 }, { "epoch": 0.6181139122315593, "grad_norm": 0.062017977237701416, "learning_rate": 0.00034493547418507704, "loss": 1.4781, "step": 13902 }, { "epoch": 0.6182028366902317, "grad_norm": 0.06174427643418312, "learning_rate": 0.0003447965001375079, "loss": 1.4819, "step": 13904 }, { "epoch": 0.618291761148904, "grad_norm": 0.06139495223760605, "learning_rate": 0.0003446575393574177, "loss": 1.4749, "step": 13906 }, { "epoch": 0.6183806856075764, "grad_norm": 0.05985395982861519, "learning_rate": 0.00034451859185668555, "loss": 1.4709, "step": 13908 }, { "epoch": 0.6184696100662487, "grad_norm": 0.05860113725066185, "learning_rate": 0.0003443796576471895, "loss": 1.4727, "step": 13910 }, { "epoch": 0.618558534524921, "grad_norm": 0.05987655743956566, "learning_rate": 0.0003442407367408059, "loss": 1.4781, "step": 13912 }, { "epoch": 0.6186474589835934, "grad_norm": 0.05981361120939255, "learning_rate": 0.0003441018291494108, "loss": 1.4788, "step": 13914 }, { "epoch": 0.6187363834422658, "grad_norm": 0.061126016080379486, "learning_rate": 0.00034396293488487824, "loss": 1.4694, "step": 13916 }, { "epoch": 0.6188253079009381, "grad_norm": 0.05968809127807617, "learning_rate": 0.00034382405395908157, "loss": 1.4756, "step": 13918 }, { "epoch": 0.6189142323596105, "grad_norm": 0.05997713655233383, "learning_rate": 0.0003436851863838931, "loss": 1.4803, "step": 13920 }, { "epoch": 0.6190031568182829, "grad_norm": 0.06370899826288223, "learning_rate": 0.0003435463321711838, "loss": 1.4788, "step": 13922 }, { "epoch": 0.6190920812769553, "grad_norm": 0.06034357100725174, "learning_rate": 0.00034340749133282333, "loss": 1.4737, "step": 13924 }, { "epoch": 0.6191810057356276, "grad_norm": 0.0625743493437767, "learning_rate": 0.0003432686638806807, "loss": 1.4807, "step": 13926 }, { "epoch": 0.6192699301943, "grad_norm": 0.05968970060348511, "learning_rate": 0.0003431298498266233, "loss": 1.4737, "step": 13928 }, { "epoch": 0.6193588546529722, "grad_norm": 0.0622946061193943, "learning_rate": 0.00034299104918251777, "loss": 1.4767, "step": 13930 }, { "epoch": 0.6194477791116446, "grad_norm": 0.060472749173641205, "learning_rate": 0.0003428522619602293, "loss": 1.4727, "step": 13932 }, { "epoch": 0.619536703570317, "grad_norm": 0.06135667487978935, "learning_rate": 0.00034271348817162175, "loss": 1.4727, "step": 13934 }, { "epoch": 0.6196256280289894, "grad_norm": 0.06154874339699745, "learning_rate": 0.0003425747278285587, "loss": 1.4723, "step": 13936 }, { "epoch": 0.6197145524876617, "grad_norm": 0.061515383422374725, "learning_rate": 0.0003424359809429015, "loss": 1.4735, "step": 13938 }, { "epoch": 0.6198034769463341, "grad_norm": 0.06175931170582771, "learning_rate": 0.00034229724752651093, "loss": 1.4762, "step": 13940 }, { "epoch": 0.6198924014050065, "grad_norm": 0.06058594584465027, "learning_rate": 0.0003421585275912467, "loss": 1.4732, "step": 13942 }, { "epoch": 0.6199813258636788, "grad_norm": 0.06232278421521187, "learning_rate": 0.0003420198211489671, "loss": 1.4754, "step": 13944 }, { "epoch": 0.6200702503223512, "grad_norm": 0.061653584241867065, "learning_rate": 0.0003418811282115294, "loss": 1.4707, "step": 13946 }, { "epoch": 0.6201591747810236, "grad_norm": 0.061312127858400345, "learning_rate": 0.00034174244879078966, "loss": 1.4743, "step": 13948 }, { "epoch": 0.6202480992396959, "grad_norm": 0.06045181304216385, "learning_rate": 0.0003416037828986027, "loss": 1.4723, "step": 13950 }, { "epoch": 0.6203370236983682, "grad_norm": 0.06308136880397797, "learning_rate": 0.0003414651305468226, "loss": 1.4732, "step": 13952 }, { "epoch": 0.6204259481570406, "grad_norm": 0.060169022530317307, "learning_rate": 0.0003413264917473018, "loss": 1.4769, "step": 13954 }, { "epoch": 0.6205148726157129, "grad_norm": 0.0620846189558506, "learning_rate": 0.0003411878665118915, "loss": 1.4735, "step": 13956 }, { "epoch": 0.6206037970743853, "grad_norm": 0.0605727918446064, "learning_rate": 0.00034104925485244254, "loss": 1.4743, "step": 13958 }, { "epoch": 0.6206927215330577, "grad_norm": 0.06262971460819244, "learning_rate": 0.00034091065678080356, "loss": 1.4758, "step": 13960 }, { "epoch": 0.62078164599173, "grad_norm": 0.06452898681163788, "learning_rate": 0.0003407720723088227, "loss": 1.4752, "step": 13962 }, { "epoch": 0.6208705704504024, "grad_norm": 0.05955608934164047, "learning_rate": 0.00034063350144834684, "loss": 1.4758, "step": 13964 }, { "epoch": 0.6209594949090748, "grad_norm": 0.06136883795261383, "learning_rate": 0.0003404949442112215, "loss": 1.4781, "step": 13966 }, { "epoch": 0.6210484193677471, "grad_norm": 0.0601998008787632, "learning_rate": 0.0003403564006092914, "loss": 1.4796, "step": 13968 }, { "epoch": 0.6211373438264195, "grad_norm": 0.060754548758268356, "learning_rate": 0.0003402178706543996, "loss": 1.478, "step": 13970 }, { "epoch": 0.6212262682850919, "grad_norm": 0.06004665046930313, "learning_rate": 0.0003400793543583883, "loss": 1.4739, "step": 13972 }, { "epoch": 0.6213151927437641, "grad_norm": 0.06137534976005554, "learning_rate": 0.0003399408517330987, "loss": 1.4741, "step": 13974 }, { "epoch": 0.6214041172024365, "grad_norm": 0.06013936921954155, "learning_rate": 0.00033980236279037033, "loss": 1.4734, "step": 13976 }, { "epoch": 0.6214930416611089, "grad_norm": 0.06054317578673363, "learning_rate": 0.00033966388754204195, "loss": 1.4776, "step": 13978 }, { "epoch": 0.6215819661197812, "grad_norm": 0.06105973199009895, "learning_rate": 0.00033952542599995116, "loss": 1.4717, "step": 13980 }, { "epoch": 0.6216708905784536, "grad_norm": 0.05983678251504898, "learning_rate": 0.000339386978175934, "loss": 1.4794, "step": 13982 }, { "epoch": 0.621759815037126, "grad_norm": 0.06286296248435974, "learning_rate": 0.0003392485440818259, "loss": 1.4766, "step": 13984 }, { "epoch": 0.6218487394957983, "grad_norm": 0.05888976529240608, "learning_rate": 0.0003391101237294606, "loss": 1.4761, "step": 13986 }, { "epoch": 0.6219376639544707, "grad_norm": 0.061478178948163986, "learning_rate": 0.00033897171713067085, "loss": 1.4737, "step": 13988 }, { "epoch": 0.6220265884131431, "grad_norm": 0.06167341768741608, "learning_rate": 0.0003388333242972885, "loss": 1.4782, "step": 13990 }, { "epoch": 0.6221155128718154, "grad_norm": 0.059394970536231995, "learning_rate": 0.0003386949452411438, "loss": 1.473, "step": 13992 }, { "epoch": 0.6222044373304878, "grad_norm": 0.06255074590444565, "learning_rate": 0.00033855657997406585, "loss": 1.4779, "step": 13994 }, { "epoch": 0.6222933617891601, "grad_norm": 0.05998929962515831, "learning_rate": 0.0003384182285078831, "loss": 1.4753, "step": 13996 }, { "epoch": 0.6223822862478324, "grad_norm": 0.06142527610063553, "learning_rate": 0.0003382798908544221, "loss": 1.4754, "step": 13998 }, { "epoch": 0.6224712107065048, "grad_norm": 0.06191844493150711, "learning_rate": 0.00033814156702550877, "loss": 1.4789, "step": 14000 }, { "epoch": 0.6224712107065048, "eval_loss": 1.460575819015503, "eval_runtime": 12.4458, "eval_samples_per_second": 555.207, "eval_steps_per_second": 69.421, "step": 14000 }, { "epoch": 0.6225601351651772, "grad_norm": 0.061490852385759354, "learning_rate": 0.00033800325703296763, "loss": 1.4737, "step": 14002 }, { "epoch": 0.6226490596238495, "grad_norm": 0.06254355609416962, "learning_rate": 0.0003378649608886218, "loss": 1.4782, "step": 14004 }, { "epoch": 0.6227379840825219, "grad_norm": 0.06114278733730316, "learning_rate": 0.0003377266786042938, "loss": 1.4775, "step": 14006 }, { "epoch": 0.6228269085411943, "grad_norm": 0.06295786052942276, "learning_rate": 0.0003375884101918045, "loss": 1.4736, "step": 14008 }, { "epoch": 0.6229158329998666, "grad_norm": 0.0627705305814743, "learning_rate": 0.00033745015566297333, "loss": 1.4771, "step": 14010 }, { "epoch": 0.623004757458539, "grad_norm": 0.06055225431919098, "learning_rate": 0.0003373119150296195, "loss": 1.4748, "step": 14012 }, { "epoch": 0.6230936819172114, "grad_norm": 0.06257602572441101, "learning_rate": 0.00033717368830356, "loss": 1.4773, "step": 14014 }, { "epoch": 0.6231826063758837, "grad_norm": 0.06255099177360535, "learning_rate": 0.00033703547549661107, "loss": 1.4767, "step": 14016 }, { "epoch": 0.623271530834556, "grad_norm": 0.06176856905221939, "learning_rate": 0.000336897276620588, "loss": 1.4765, "step": 14018 }, { "epoch": 0.6233604552932284, "grad_norm": 0.060302965342998505, "learning_rate": 0.00033675909168730435, "loss": 1.4745, "step": 14020 }, { "epoch": 0.6234493797519007, "grad_norm": 0.05996936559677124, "learning_rate": 0.00033662092070857295, "loss": 1.4733, "step": 14022 }, { "epoch": 0.6235383042105731, "grad_norm": 0.060551125556230545, "learning_rate": 0.0003364827636962053, "loss": 1.4816, "step": 14024 }, { "epoch": 0.6236272286692455, "grad_norm": 0.06198984757065773, "learning_rate": 0.0003363446206620116, "loss": 1.4746, "step": 14026 }, { "epoch": 0.6237161531279178, "grad_norm": 0.060537002980709076, "learning_rate": 0.0003362064916178009, "loss": 1.475, "step": 14028 }, { "epoch": 0.6238050775865902, "grad_norm": 0.05999968200922012, "learning_rate": 0.0003360683765753813, "loss": 1.4746, "step": 14030 }, { "epoch": 0.6238940020452626, "grad_norm": 0.0614023394882679, "learning_rate": 0.00033593027554655895, "loss": 1.4797, "step": 14032 }, { "epoch": 0.6239829265039349, "grad_norm": 0.061192482709884644, "learning_rate": 0.00033579218854314, "loss": 1.4771, "step": 14034 }, { "epoch": 0.6240718509626073, "grad_norm": 0.059598758816719055, "learning_rate": 0.0003356541155769281, "loss": 1.4795, "step": 14036 }, { "epoch": 0.6241607754212796, "grad_norm": 0.061559103429317474, "learning_rate": 0.00033551605665972694, "loss": 1.4822, "step": 14038 }, { "epoch": 0.6242496998799519, "grad_norm": 0.06176281347870827, "learning_rate": 0.00033537801180333804, "loss": 1.4763, "step": 14040 }, { "epoch": 0.6243386243386243, "grad_norm": 0.060048338025808334, "learning_rate": 0.00033523998101956206, "loss": 1.4764, "step": 14042 }, { "epoch": 0.6244275487972967, "grad_norm": 0.06090456247329712, "learning_rate": 0.0003351019643201987, "loss": 1.4757, "step": 14044 }, { "epoch": 0.624516473255969, "grad_norm": 0.061848804354667664, "learning_rate": 0.00033496396171704603, "loss": 1.4783, "step": 14046 }, { "epoch": 0.6246053977146414, "grad_norm": 0.06011731177568436, "learning_rate": 0.0003348259732219011, "loss": 1.4753, "step": 14048 }, { "epoch": 0.6246943221733138, "grad_norm": 0.06071808561682701, "learning_rate": 0.00033468799884656006, "loss": 1.4786, "step": 14050 }, { "epoch": 0.6247832466319861, "grad_norm": 0.06180998310446739, "learning_rate": 0.0003345500386028172, "loss": 1.4742, "step": 14052 }, { "epoch": 0.6248721710906585, "grad_norm": 0.0607328861951828, "learning_rate": 0.00033441209250246627, "loss": 1.4755, "step": 14054 }, { "epoch": 0.6249610955493309, "grad_norm": 0.060531482100486755, "learning_rate": 0.00033427416055729946, "loss": 1.4702, "step": 14056 }, { "epoch": 0.6250500200080032, "grad_norm": 0.06345600634813309, "learning_rate": 0.0003341362427791076, "loss": 1.4709, "step": 14058 }, { "epoch": 0.6251389444666755, "grad_norm": 0.06127747520804405, "learning_rate": 0.00033399833917968077, "loss": 1.4732, "step": 14060 }, { "epoch": 0.6252278689253479, "grad_norm": 0.06288928538560867, "learning_rate": 0.0003338604497708075, "loss": 1.4748, "step": 14062 }, { "epoch": 0.6253167933840202, "grad_norm": 0.06117086485028267, "learning_rate": 0.0003337225745642749, "loss": 1.4751, "step": 14064 }, { "epoch": 0.6254057178426926, "grad_norm": 0.06286124885082245, "learning_rate": 0.0003335847135718696, "loss": 1.4751, "step": 14066 }, { "epoch": 0.625494642301365, "grad_norm": 0.061294663697481155, "learning_rate": 0.00033344686680537633, "loss": 1.4768, "step": 14068 }, { "epoch": 0.6255835667600373, "grad_norm": 0.05968429148197174, "learning_rate": 0.00033330903427657876, "loss": 1.4799, "step": 14070 }, { "epoch": 0.6256724912187097, "grad_norm": 0.06150318309664726, "learning_rate": 0.0003331712159972596, "loss": 1.4739, "step": 14072 }, { "epoch": 0.6257614156773821, "grad_norm": 0.0625072494149208, "learning_rate": 0.00033303341197919994, "loss": 1.4786, "step": 14074 }, { "epoch": 0.6258503401360545, "grad_norm": 0.06237860396504402, "learning_rate": 0.0003328956222341802, "loss": 1.4776, "step": 14076 }, { "epoch": 0.6259392645947268, "grad_norm": 0.0603688582777977, "learning_rate": 0.00033275784677397905, "loss": 1.4719, "step": 14078 }, { "epoch": 0.6260281890533992, "grad_norm": 0.06012796610593796, "learning_rate": 0.00033262008561037404, "loss": 1.4764, "step": 14080 }, { "epoch": 0.6261171135120714, "grad_norm": 0.06131792068481445, "learning_rate": 0.0003324823387551419, "loss": 1.473, "step": 14082 }, { "epoch": 0.6262060379707438, "grad_norm": 0.06047110632061958, "learning_rate": 0.0003323446062200577, "loss": 1.4762, "step": 14084 }, { "epoch": 0.6262949624294162, "grad_norm": 0.06241205334663391, "learning_rate": 0.0003322068880168951, "loss": 1.4782, "step": 14086 }, { "epoch": 0.6263838868880885, "grad_norm": 0.0601295605301857, "learning_rate": 0.0003320691841574275, "loss": 1.4751, "step": 14088 }, { "epoch": 0.6264728113467609, "grad_norm": 0.06591775268316269, "learning_rate": 0.00033193149465342574, "loss": 1.4799, "step": 14090 }, { "epoch": 0.6265617358054333, "grad_norm": 0.06100832670927048, "learning_rate": 0.0003317938195166608, "loss": 1.4682, "step": 14092 }, { "epoch": 0.6266506602641057, "grad_norm": 0.0626838430762291, "learning_rate": 0.0003316561587589013, "loss": 1.4759, "step": 14094 }, { "epoch": 0.626739584722778, "grad_norm": 0.060527194291353226, "learning_rate": 0.00033151851239191513, "loss": 1.4687, "step": 14096 }, { "epoch": 0.6268285091814504, "grad_norm": 0.06166524067521095, "learning_rate": 0.00033138088042746904, "loss": 1.4758, "step": 14098 }, { "epoch": 0.6269174336401228, "grad_norm": 0.06203189119696617, "learning_rate": 0.00033124326287732847, "loss": 1.4757, "step": 14100 }, { "epoch": 0.6270063580987951, "grad_norm": 0.06214692071080208, "learning_rate": 0.00033110565975325734, "loss": 1.4731, "step": 14102 }, { "epoch": 0.6270952825574674, "grad_norm": 0.06142725050449371, "learning_rate": 0.00033096807106701876, "loss": 1.4802, "step": 14104 }, { "epoch": 0.6271842070161397, "grad_norm": 0.06279194355010986, "learning_rate": 0.00033083049683037437, "loss": 1.4776, "step": 14106 }, { "epoch": 0.6272731314748121, "grad_norm": 0.06059839949011803, "learning_rate": 0.0003306929370550846, "loss": 1.4767, "step": 14108 }, { "epoch": 0.6273620559334845, "grad_norm": 0.06027147173881531, "learning_rate": 0.0003305553917529087, "loss": 1.4788, "step": 14110 }, { "epoch": 0.6274509803921569, "grad_norm": 0.06027664244174957, "learning_rate": 0.0003304178609356046, "loss": 1.4724, "step": 14112 }, { "epoch": 0.6275399048508292, "grad_norm": 0.0626441091299057, "learning_rate": 0.00033028034461492917, "loss": 1.4738, "step": 14114 }, { "epoch": 0.6276288293095016, "grad_norm": 0.060072094202041626, "learning_rate": 0.00033014284280263786, "loss": 1.4736, "step": 14116 }, { "epoch": 0.627717753768174, "grad_norm": 0.059987835586071014, "learning_rate": 0.0003300053555104846, "loss": 1.4746, "step": 14118 }, { "epoch": 0.6278066782268463, "grad_norm": 0.06026008725166321, "learning_rate": 0.000329867882750223, "loss": 1.4749, "step": 14120 }, { "epoch": 0.6278956026855187, "grad_norm": 0.060853175818920135, "learning_rate": 0.00032973042453360426, "loss": 1.4706, "step": 14122 }, { "epoch": 0.6279845271441911, "grad_norm": 0.06199175864458084, "learning_rate": 0.00032959298087237944, "loss": 1.4794, "step": 14124 }, { "epoch": 0.6280734516028633, "grad_norm": 0.058771684765815735, "learning_rate": 0.00032945555177829746, "loss": 1.4797, "step": 14126 }, { "epoch": 0.6281623760615357, "grad_norm": 0.05990970879793167, "learning_rate": 0.0003293181372631064, "loss": 1.4769, "step": 14128 }, { "epoch": 0.6282513005202081, "grad_norm": 0.059724360704422, "learning_rate": 0.00032918073733855327, "loss": 1.4791, "step": 14130 }, { "epoch": 0.6283402249788804, "grad_norm": 0.060740843415260315, "learning_rate": 0.0003290433520163835, "loss": 1.4779, "step": 14132 }, { "epoch": 0.6284291494375528, "grad_norm": 0.060699593275785446, "learning_rate": 0.00032890598130834117, "loss": 1.4727, "step": 14134 }, { "epoch": 0.6285180738962252, "grad_norm": 0.061070021241903305, "learning_rate": 0.0003287686252261697, "loss": 1.4799, "step": 14136 }, { "epoch": 0.6286069983548975, "grad_norm": 0.06295224279165268, "learning_rate": 0.0003286312837816109, "loss": 1.4739, "step": 14138 }, { "epoch": 0.6286959228135699, "grad_norm": 0.06164375692605972, "learning_rate": 0.00032849395698640485, "loss": 1.4738, "step": 14140 }, { "epoch": 0.6287848472722423, "grad_norm": 0.06068945676088333, "learning_rate": 0.0003283566448522914, "loss": 1.4754, "step": 14142 }, { "epoch": 0.6288737717309146, "grad_norm": 0.06089947745203972, "learning_rate": 0.0003282193473910081, "loss": 1.4776, "step": 14144 }, { "epoch": 0.628962696189587, "grad_norm": 0.06197715550661087, "learning_rate": 0.00032808206461429226, "loss": 1.4781, "step": 14146 }, { "epoch": 0.6290516206482593, "grad_norm": 0.06002449244260788, "learning_rate": 0.00032794479653387905, "loss": 1.4751, "step": 14148 }, { "epoch": 0.6291405451069316, "grad_norm": 0.061960477381944656, "learning_rate": 0.00032780754316150276, "loss": 1.4709, "step": 14150 }, { "epoch": 0.629229469565604, "grad_norm": 0.06243150681257248, "learning_rate": 0.0003276703045088966, "loss": 1.4719, "step": 14152 }, { "epoch": 0.6293183940242764, "grad_norm": 0.06339258700609207, "learning_rate": 0.0003275330805877923, "loss": 1.4753, "step": 14154 }, { "epoch": 0.6294073184829487, "grad_norm": 0.06161705031991005, "learning_rate": 0.00032739587140992015, "loss": 1.4731, "step": 14156 }, { "epoch": 0.6294962429416211, "grad_norm": 0.06279416382312775, "learning_rate": 0.0003272586769870097, "loss": 1.4711, "step": 14158 }, { "epoch": 0.6295851674002935, "grad_norm": 0.06209520995616913, "learning_rate": 0.0003271214973307887, "loss": 1.4711, "step": 14160 }, { "epoch": 0.6296740918589658, "grad_norm": 0.06087113171815872, "learning_rate": 0.000326984332452984, "loss": 1.4769, "step": 14162 }, { "epoch": 0.6297630163176382, "grad_norm": 0.06049448251724243, "learning_rate": 0.0003268471823653211, "loss": 1.4766, "step": 14164 }, { "epoch": 0.6298519407763106, "grad_norm": 0.06183711439371109, "learning_rate": 0.0003267100470795239, "loss": 1.4719, "step": 14166 }, { "epoch": 0.6299408652349828, "grad_norm": 0.06014047563076019, "learning_rate": 0.0003265729266073157, "loss": 1.4793, "step": 14168 }, { "epoch": 0.6300297896936552, "grad_norm": 0.06054830551147461, "learning_rate": 0.00032643582096041806, "loss": 1.4755, "step": 14170 }, { "epoch": 0.6301187141523276, "grad_norm": 0.05997813493013382, "learning_rate": 0.00032629873015055106, "loss": 1.4747, "step": 14172 }, { "epoch": 0.6302076386109999, "grad_norm": 0.06141214445233345, "learning_rate": 0.0003261616541894344, "loss": 1.4715, "step": 14174 }, { "epoch": 0.6302965630696723, "grad_norm": 0.060470618307590485, "learning_rate": 0.0003260245930887853, "loss": 1.4731, "step": 14176 }, { "epoch": 0.6303854875283447, "grad_norm": 0.06036084517836571, "learning_rate": 0.00032588754686032093, "loss": 1.4774, "step": 14178 }, { "epoch": 0.630474411987017, "grad_norm": 0.06185087189078331, "learning_rate": 0.0003257505155157563, "loss": 1.475, "step": 14180 }, { "epoch": 0.6305633364456894, "grad_norm": 0.0611613430082798, "learning_rate": 0.0003256134990668054, "loss": 1.4794, "step": 14182 }, { "epoch": 0.6306522609043618, "grad_norm": 0.060027047991752625, "learning_rate": 0.00032547649752518115, "loss": 1.4753, "step": 14184 }, { "epoch": 0.6307411853630341, "grad_norm": 0.06171073764562607, "learning_rate": 0.000325339510902595, "loss": 1.4729, "step": 14186 }, { "epoch": 0.6308301098217065, "grad_norm": 0.06028326600790024, "learning_rate": 0.00032520253921075707, "loss": 1.4784, "step": 14188 }, { "epoch": 0.6309190342803788, "grad_norm": 0.06145615503191948, "learning_rate": 0.0003250655824613765, "loss": 1.4791, "step": 14190 }, { "epoch": 0.6310079587390511, "grad_norm": 0.0612344890832901, "learning_rate": 0.00032492864066616095, "loss": 1.4732, "step": 14192 }, { "epoch": 0.6310968831977235, "grad_norm": 0.0602327398955822, "learning_rate": 0.00032479171383681637, "loss": 1.4745, "step": 14194 }, { "epoch": 0.6311858076563959, "grad_norm": 0.062162987887859344, "learning_rate": 0.0003246548019850486, "loss": 1.4775, "step": 14196 }, { "epoch": 0.6312747321150682, "grad_norm": 0.06073742359876633, "learning_rate": 0.00032451790512256075, "loss": 1.4777, "step": 14198 }, { "epoch": 0.6313636565737406, "grad_norm": 0.06158788129687309, "learning_rate": 0.0003243810232610559, "loss": 1.4747, "step": 14200 }, { "epoch": 0.631452581032413, "grad_norm": 0.06189347803592682, "learning_rate": 0.00032424415641223517, "loss": 1.4696, "step": 14202 }, { "epoch": 0.6315415054910853, "grad_norm": 0.06252620369195938, "learning_rate": 0.00032410730458779825, "loss": 1.472, "step": 14204 }, { "epoch": 0.6316304299497577, "grad_norm": 0.06133527681231499, "learning_rate": 0.0003239704677994442, "loss": 1.4734, "step": 14206 }, { "epoch": 0.6317193544084301, "grad_norm": 0.06081013008952141, "learning_rate": 0.00032383364605887034, "loss": 1.4778, "step": 14208 }, { "epoch": 0.6318082788671024, "grad_norm": 0.061107292771339417, "learning_rate": 0.00032369683937777264, "loss": 1.4686, "step": 14210 }, { "epoch": 0.6318972033257747, "grad_norm": 0.059868182986974716, "learning_rate": 0.0003235600477678461, "loss": 1.4787, "step": 14212 }, { "epoch": 0.6319861277844471, "grad_norm": 0.06001732498407364, "learning_rate": 0.00032342327124078416, "loss": 1.4724, "step": 14214 }, { "epoch": 0.6320750522431194, "grad_norm": 0.061543405055999756, "learning_rate": 0.00032328650980827925, "loss": 1.4688, "step": 14216 }, { "epoch": 0.6321639767017918, "grad_norm": 0.05957053229212761, "learning_rate": 0.0003231497634820222, "loss": 1.4732, "step": 14218 }, { "epoch": 0.6322529011604642, "grad_norm": 0.060270920395851135, "learning_rate": 0.0003230130322737027, "loss": 1.4665, "step": 14220 }, { "epoch": 0.6323418256191365, "grad_norm": 0.0641508549451828, "learning_rate": 0.00032287631619500913, "loss": 1.4746, "step": 14222 }, { "epoch": 0.6324307500778089, "grad_norm": 0.060125336050987244, "learning_rate": 0.00032273961525762886, "loss": 1.4736, "step": 14224 }, { "epoch": 0.6325196745364813, "grad_norm": 0.06322526186704636, "learning_rate": 0.00032260292947324697, "loss": 1.473, "step": 14226 }, { "epoch": 0.6326085989951536, "grad_norm": 0.06271185725927353, "learning_rate": 0.0003224662588535489, "loss": 1.4777, "step": 14228 }, { "epoch": 0.632697523453826, "grad_norm": 0.0621628575026989, "learning_rate": 0.00032232960341021697, "loss": 1.474, "step": 14230 }, { "epoch": 0.6327864479124984, "grad_norm": 0.06069038808345795, "learning_rate": 0.00032219296315493385, "loss": 1.4773, "step": 14232 }, { "epoch": 0.6328753723711706, "grad_norm": 0.06033967062830925, "learning_rate": 0.00032205633809937975, "loss": 1.4754, "step": 14234 }, { "epoch": 0.632964296829843, "grad_norm": 0.06296979635953903, "learning_rate": 0.00032191972825523383, "loss": 1.4727, "step": 14236 }, { "epoch": 0.6330532212885154, "grad_norm": 0.061179324984550476, "learning_rate": 0.00032178313363417455, "loss": 1.4744, "step": 14238 }, { "epoch": 0.6331421457471877, "grad_norm": 0.06097666174173355, "learning_rate": 0.0003216465542478783, "loss": 1.4738, "step": 14240 }, { "epoch": 0.6332310702058601, "grad_norm": 0.05940008535981178, "learning_rate": 0.0003215099901080205, "loss": 1.4743, "step": 14242 }, { "epoch": 0.6333199946645325, "grad_norm": 0.06126728653907776, "learning_rate": 0.0003213734412262754, "loss": 1.4739, "step": 14244 }, { "epoch": 0.6334089191232048, "grad_norm": 0.06011746823787689, "learning_rate": 0.00032123690761431565, "loss": 1.4811, "step": 14246 }, { "epoch": 0.6334978435818772, "grad_norm": 0.06307831406593323, "learning_rate": 0.0003211003892838129, "loss": 1.4803, "step": 14248 }, { "epoch": 0.6335867680405496, "grad_norm": 0.061354558914899826, "learning_rate": 0.00032096388624643736, "loss": 1.4758, "step": 14250 }, { "epoch": 0.633675692499222, "grad_norm": 0.05967186391353607, "learning_rate": 0.0003208273985138576, "loss": 1.4686, "step": 14252 }, { "epoch": 0.6337646169578943, "grad_norm": 0.06030327454209328, "learning_rate": 0.00032069092609774157, "loss": 1.4719, "step": 14254 }, { "epoch": 0.6338535414165666, "grad_norm": 0.061803799122571945, "learning_rate": 0.0003205544690097553, "loss": 1.472, "step": 14256 }, { "epoch": 0.633942465875239, "grad_norm": 0.05942728742957115, "learning_rate": 0.00032041802726156377, "loss": 1.4767, "step": 14258 }, { "epoch": 0.6340313903339113, "grad_norm": 0.06224941089749336, "learning_rate": 0.0003202816008648307, "loss": 1.4734, "step": 14260 }, { "epoch": 0.6341203147925837, "grad_norm": 0.06028662249445915, "learning_rate": 0.00032014518983121843, "loss": 1.4727, "step": 14262 }, { "epoch": 0.634209239251256, "grad_norm": 0.061462193727493286, "learning_rate": 0.00032000879417238784, "loss": 1.4763, "step": 14264 }, { "epoch": 0.6342981637099284, "grad_norm": 0.061825983226299286, "learning_rate": 0.00031987241389999876, "loss": 1.4754, "step": 14266 }, { "epoch": 0.6343870881686008, "grad_norm": 0.062059544026851654, "learning_rate": 0.0003197360490257094, "loss": 1.4714, "step": 14268 }, { "epoch": 0.6344760126272732, "grad_norm": 0.06092653051018715, "learning_rate": 0.00031959969956117716, "loss": 1.4772, "step": 14270 }, { "epoch": 0.6345649370859455, "grad_norm": 0.061005737632513046, "learning_rate": 0.0003194633655180575, "loss": 1.4744, "step": 14272 }, { "epoch": 0.6346538615446179, "grad_norm": 0.06450587511062622, "learning_rate": 0.00031932704690800493, "loss": 1.4779, "step": 14274 }, { "epoch": 0.6347427860032903, "grad_norm": 0.0617448128759861, "learning_rate": 0.00031919074374267277, "loss": 1.4732, "step": 14276 }, { "epoch": 0.6348317104619625, "grad_norm": 0.061758507043123245, "learning_rate": 0.0003190544560337126, "loss": 1.4744, "step": 14278 }, { "epoch": 0.6349206349206349, "grad_norm": 0.06205600127577782, "learning_rate": 0.0003189181837927747, "loss": 1.4701, "step": 14280 }, { "epoch": 0.6350095593793073, "grad_norm": 0.0619838610291481, "learning_rate": 0.0003187819270315087, "loss": 1.4782, "step": 14282 }, { "epoch": 0.6350984838379796, "grad_norm": 0.05949383229017258, "learning_rate": 0.000318645685761562, "loss": 1.4753, "step": 14284 }, { "epoch": 0.635187408296652, "grad_norm": 0.061724212020635605, "learning_rate": 0.00031850945999458135, "loss": 1.4738, "step": 14286 }, { "epoch": 0.6352763327553244, "grad_norm": 0.060357339680194855, "learning_rate": 0.00031837324974221193, "loss": 1.472, "step": 14288 }, { "epoch": 0.6353652572139967, "grad_norm": 0.06111651286482811, "learning_rate": 0.0003182370550160974, "loss": 1.4803, "step": 14290 }, { "epoch": 0.6354541816726691, "grad_norm": 0.05993794649839401, "learning_rate": 0.0003181008758278805, "loss": 1.4755, "step": 14292 }, { "epoch": 0.6355431061313415, "grad_norm": 0.06252172589302063, "learning_rate": 0.00031796471218920237, "loss": 1.4704, "step": 14294 }, { "epoch": 0.6356320305900138, "grad_norm": 0.0633573830127716, "learning_rate": 0.0003178285641117027, "loss": 1.4694, "step": 14296 }, { "epoch": 0.6357209550486861, "grad_norm": 0.06102222204208374, "learning_rate": 0.0003176924316070203, "loss": 1.4782, "step": 14298 }, { "epoch": 0.6358098795073585, "grad_norm": 0.06126133352518082, "learning_rate": 0.0003175563146867921, "loss": 1.4734, "step": 14300 }, { "epoch": 0.6358988039660308, "grad_norm": 0.06115219369530678, "learning_rate": 0.0003174202133626543, "loss": 1.4785, "step": 14302 }, { "epoch": 0.6359877284247032, "grad_norm": 0.05963146686553955, "learning_rate": 0.0003172841276462414, "loss": 1.4754, "step": 14304 }, { "epoch": 0.6360766528833756, "grad_norm": 0.06013455241918564, "learning_rate": 0.00031714805754918607, "loss": 1.4751, "step": 14306 }, { "epoch": 0.6361655773420479, "grad_norm": 0.060067515820264816, "learning_rate": 0.00031701200308312087, "loss": 1.4703, "step": 14308 }, { "epoch": 0.6362545018007203, "grad_norm": 0.06043084338307381, "learning_rate": 0.000316875964259676, "loss": 1.4663, "step": 14310 }, { "epoch": 0.6363434262593927, "grad_norm": 0.059209950268268585, "learning_rate": 0.0003167399410904807, "loss": 1.4715, "step": 14312 }, { "epoch": 0.636432350718065, "grad_norm": 0.06119099631905556, "learning_rate": 0.00031660393358716275, "loss": 1.4731, "step": 14314 }, { "epoch": 0.6365212751767374, "grad_norm": 0.059667252004146576, "learning_rate": 0.00031646794176134883, "loss": 1.4765, "step": 14316 }, { "epoch": 0.6366101996354098, "grad_norm": 0.062074486166238785, "learning_rate": 0.000316331965624664, "loss": 1.4751, "step": 14318 }, { "epoch": 0.636699124094082, "grad_norm": 0.05942871421575546, "learning_rate": 0.0003161960051887322, "loss": 1.4739, "step": 14320 }, { "epoch": 0.6367880485527544, "grad_norm": 0.0627705380320549, "learning_rate": 0.0003160600604651758, "loss": 1.4667, "step": 14322 }, { "epoch": 0.6368769730114268, "grad_norm": 0.05913541093468666, "learning_rate": 0.00031592413146561604, "loss": 1.4713, "step": 14324 }, { "epoch": 0.6369658974700991, "grad_norm": 0.06256476044654846, "learning_rate": 0.00031578821820167294, "loss": 1.4733, "step": 14326 }, { "epoch": 0.6370548219287715, "grad_norm": 0.06173428148031235, "learning_rate": 0.0003156523206849644, "loss": 1.4775, "step": 14328 }, { "epoch": 0.6371437463874439, "grad_norm": 0.0602475143969059, "learning_rate": 0.00031551643892710815, "loss": 1.4763, "step": 14330 }, { "epoch": 0.6372326708461162, "grad_norm": 0.06171022728085518, "learning_rate": 0.0003153805729397197, "loss": 1.474, "step": 14332 }, { "epoch": 0.6373215953047886, "grad_norm": 0.06058694049715996, "learning_rate": 0.0003152447227344132, "loss": 1.4785, "step": 14334 }, { "epoch": 0.637410519763461, "grad_norm": 0.06109078973531723, "learning_rate": 0.0003151088883228022, "loss": 1.469, "step": 14336 }, { "epoch": 0.6374994442221333, "grad_norm": 0.06186812371015549, "learning_rate": 0.00031497306971649806, "loss": 1.4757, "step": 14338 }, { "epoch": 0.6375883686808057, "grad_norm": 0.059616927057504654, "learning_rate": 0.00031483726692711143, "loss": 1.473, "step": 14340 }, { "epoch": 0.637677293139478, "grad_norm": 0.06053665280342102, "learning_rate": 0.0003147014799662513, "loss": 1.4735, "step": 14342 }, { "epoch": 0.6377662175981503, "grad_norm": 0.06036613509058952, "learning_rate": 0.000314565708845525, "loss": 1.4742, "step": 14344 }, { "epoch": 0.6378551420568227, "grad_norm": 0.05983301252126694, "learning_rate": 0.00031442995357653926, "loss": 1.4764, "step": 14346 }, { "epoch": 0.6379440665154951, "grad_norm": 0.060548748821020126, "learning_rate": 0.00031429421417089885, "loss": 1.4712, "step": 14348 }, { "epoch": 0.6380329909741674, "grad_norm": 0.059574976563453674, "learning_rate": 0.0003141584906402073, "loss": 1.4698, "step": 14350 }, { "epoch": 0.6381219154328398, "grad_norm": 0.06052010506391525, "learning_rate": 0.0003140227829960671, "loss": 1.4738, "step": 14352 }, { "epoch": 0.6382108398915122, "grad_norm": 0.059967853128910065, "learning_rate": 0.0003138870912500788, "loss": 1.4719, "step": 14354 }, { "epoch": 0.6382997643501845, "grad_norm": 0.06056461110711098, "learning_rate": 0.00031375141541384224, "loss": 1.4771, "step": 14356 }, { "epoch": 0.6383886888088569, "grad_norm": 0.062207210808992386, "learning_rate": 0.0003136157554989556, "loss": 1.4707, "step": 14358 }, { "epoch": 0.6384776132675293, "grad_norm": 0.05935196205973625, "learning_rate": 0.00031348011151701523, "loss": 1.4646, "step": 14360 }, { "epoch": 0.6385665377262016, "grad_norm": 0.06075610965490341, "learning_rate": 0.0003133444834796172, "loss": 1.4748, "step": 14362 }, { "epoch": 0.6386554621848739, "grad_norm": 0.06111859157681465, "learning_rate": 0.00031320887139835515, "loss": 1.4721, "step": 14364 }, { "epoch": 0.6387443866435463, "grad_norm": 0.05913927033543587, "learning_rate": 0.0003130732752848218, "loss": 1.4773, "step": 14366 }, { "epoch": 0.6388333111022186, "grad_norm": 0.06242065876722336, "learning_rate": 0.00031293769515060884, "loss": 1.4778, "step": 14368 }, { "epoch": 0.638922235560891, "grad_norm": 0.060579001903533936, "learning_rate": 0.0003128021310073059, "loss": 1.4749, "step": 14370 }, { "epoch": 0.6390111600195634, "grad_norm": 0.06134400889277458, "learning_rate": 0.0003126665828665018, "loss": 1.4755, "step": 14372 }, { "epoch": 0.6391000844782357, "grad_norm": 0.06002011522650719, "learning_rate": 0.00031253105073978385, "loss": 1.4783, "step": 14374 }, { "epoch": 0.6391890089369081, "grad_norm": 0.06118055060505867, "learning_rate": 0.0003123955346387377, "loss": 1.4711, "step": 14376 }, { "epoch": 0.6392779333955805, "grad_norm": 0.060449860990047455, "learning_rate": 0.0003122600345749481, "loss": 1.4773, "step": 14378 }, { "epoch": 0.6393668578542528, "grad_norm": 0.06100989878177643, "learning_rate": 0.00031212455055999814, "loss": 1.4668, "step": 14380 }, { "epoch": 0.6394557823129252, "grad_norm": 0.06005636230111122, "learning_rate": 0.0003119890826054693, "loss": 1.4761, "step": 14382 }, { "epoch": 0.6395447067715976, "grad_norm": 0.06204495206475258, "learning_rate": 0.00031185363072294247, "loss": 1.4733, "step": 14384 }, { "epoch": 0.6396336312302698, "grad_norm": 0.05984333157539368, "learning_rate": 0.0003117181949239963, "loss": 1.4742, "step": 14386 }, { "epoch": 0.6397225556889422, "grad_norm": 0.06164374575018883, "learning_rate": 0.0003115827752202086, "loss": 1.4814, "step": 14388 }, { "epoch": 0.6398114801476146, "grad_norm": 0.06006844714283943, "learning_rate": 0.00031144737162315555, "loss": 1.4765, "step": 14390 }, { "epoch": 0.6399004046062869, "grad_norm": 0.06163867563009262, "learning_rate": 0.0003113119841444121, "loss": 1.4774, "step": 14392 }, { "epoch": 0.6399893290649593, "grad_norm": 0.06025887280702591, "learning_rate": 0.00031117661279555175, "loss": 1.4702, "step": 14394 }, { "epoch": 0.6400782535236317, "grad_norm": 0.06053690239787102, "learning_rate": 0.00031104125758814674, "loss": 1.4726, "step": 14396 }, { "epoch": 0.640167177982304, "grad_norm": 0.06094555929303169, "learning_rate": 0.0003109059185337676, "loss": 1.4739, "step": 14398 }, { "epoch": 0.6402561024409764, "grad_norm": 0.060711413621902466, "learning_rate": 0.00031077059564398404, "loss": 1.4722, "step": 14400 }, { "epoch": 0.6403450268996488, "grad_norm": 0.05947254225611687, "learning_rate": 0.0003106352889303638, "loss": 1.4736, "step": 14402 }, { "epoch": 0.6404339513583212, "grad_norm": 0.05949154123663902, "learning_rate": 0.00031049999840447346, "loss": 1.4744, "step": 14404 }, { "epoch": 0.6405228758169934, "grad_norm": 0.05873184651136398, "learning_rate": 0.0003103647240778785, "loss": 1.4736, "step": 14406 }, { "epoch": 0.6406118002756658, "grad_norm": 0.05965226888656616, "learning_rate": 0.0003102294659621425, "loss": 1.4766, "step": 14408 }, { "epoch": 0.6407007247343381, "grad_norm": 0.06001347303390503, "learning_rate": 0.0003100942240688281, "loss": 1.4721, "step": 14410 }, { "epoch": 0.6407896491930105, "grad_norm": 0.06033129617571831, "learning_rate": 0.0003099589984094965, "loss": 1.477, "step": 14412 }, { "epoch": 0.6408785736516829, "grad_norm": 0.060869600623846054, "learning_rate": 0.00030982378899570685, "loss": 1.4678, "step": 14414 }, { "epoch": 0.6409674981103552, "grad_norm": 0.060569677501916885, "learning_rate": 0.0003096885958390181, "loss": 1.4698, "step": 14416 }, { "epoch": 0.6410564225690276, "grad_norm": 0.060459256172180176, "learning_rate": 0.0003095534189509867, "loss": 1.4784, "step": 14418 }, { "epoch": 0.6411453470277, "grad_norm": 0.06125481799244881, "learning_rate": 0.00030941825834316825, "loss": 1.4712, "step": 14420 }, { "epoch": 0.6412342714863724, "grad_norm": 0.060924872756004333, "learning_rate": 0.00030928311402711705, "loss": 1.475, "step": 14422 }, { "epoch": 0.6413231959450447, "grad_norm": 0.06078309938311577, "learning_rate": 0.00030914798601438566, "loss": 1.4719, "step": 14424 }, { "epoch": 0.6414121204037171, "grad_norm": 0.06124448776245117, "learning_rate": 0.0003090128743165256, "loss": 1.4706, "step": 14426 }, { "epoch": 0.6415010448623893, "grad_norm": 0.060657963156700134, "learning_rate": 0.0003088777789450866, "loss": 1.4778, "step": 14428 }, { "epoch": 0.6415899693210617, "grad_norm": 0.0634801834821701, "learning_rate": 0.0003087426999116173, "loss": 1.4766, "step": 14430 }, { "epoch": 0.6416788937797341, "grad_norm": 0.06083952635526657, "learning_rate": 0.00030860763722766486, "loss": 1.4667, "step": 14432 }, { "epoch": 0.6417678182384065, "grad_norm": 0.06175751984119415, "learning_rate": 0.0003084725909047752, "loss": 1.475, "step": 14434 }, { "epoch": 0.6418567426970788, "grad_norm": 0.06247575953602791, "learning_rate": 0.00030833756095449217, "loss": 1.4704, "step": 14436 }, { "epoch": 0.6419456671557512, "grad_norm": 0.06103740632534027, "learning_rate": 0.0003082025473883594, "loss": 1.4742, "step": 14438 }, { "epoch": 0.6420345916144236, "grad_norm": 0.06011580675840378, "learning_rate": 0.00030806755021791797, "loss": 1.4773, "step": 14440 }, { "epoch": 0.6421235160730959, "grad_norm": 0.05931413918733597, "learning_rate": 0.0003079325694547081, "loss": 1.4702, "step": 14442 }, { "epoch": 0.6422124405317683, "grad_norm": 0.05985075607895851, "learning_rate": 0.00030779760511026865, "loss": 1.4705, "step": 14444 }, { "epoch": 0.6423013649904407, "grad_norm": 0.06110270693898201, "learning_rate": 0.00030766265719613686, "loss": 1.4728, "step": 14446 }, { "epoch": 0.642390289449113, "grad_norm": 0.061616457998752594, "learning_rate": 0.00030752772572384884, "loss": 1.4699, "step": 14448 }, { "epoch": 0.6424792139077853, "grad_norm": 0.05985260009765625, "learning_rate": 0.000307392810704939, "loss": 1.4706, "step": 14450 }, { "epoch": 0.6425681383664577, "grad_norm": 0.06176939606666565, "learning_rate": 0.00030725791215094046, "loss": 1.4756, "step": 14452 }, { "epoch": 0.64265706282513, "grad_norm": 0.060520898550748825, "learning_rate": 0.000307123030073385, "loss": 1.4789, "step": 14454 }, { "epoch": 0.6427459872838024, "grad_norm": 0.060216981917619705, "learning_rate": 0.00030698816448380294, "loss": 1.4725, "step": 14456 }, { "epoch": 0.6428349117424748, "grad_norm": 0.06227912753820419, "learning_rate": 0.000306853315393723, "loss": 1.4763, "step": 14458 }, { "epoch": 0.6429238362011471, "grad_norm": 0.05990215763449669, "learning_rate": 0.000306718482814673, "loss": 1.4747, "step": 14460 }, { "epoch": 0.6430127606598195, "grad_norm": 0.060426462441682816, "learning_rate": 0.0003065836667581787, "loss": 1.4722, "step": 14462 }, { "epoch": 0.6431016851184919, "grad_norm": 0.061631377786397934, "learning_rate": 0.0003064488672357651, "loss": 1.4755, "step": 14464 }, { "epoch": 0.6431906095771642, "grad_norm": 0.061112042516469955, "learning_rate": 0.00030631408425895536, "loss": 1.4776, "step": 14466 }, { "epoch": 0.6432795340358366, "grad_norm": 0.06276817619800568, "learning_rate": 0.00030617931783927086, "loss": 1.4744, "step": 14468 }, { "epoch": 0.643368458494509, "grad_norm": 0.06161990389227867, "learning_rate": 0.0003060445679882328, "loss": 1.4732, "step": 14470 }, { "epoch": 0.6434573829531812, "grad_norm": 0.06073969602584839, "learning_rate": 0.0003059098347173597, "loss": 1.4772, "step": 14472 }, { "epoch": 0.6435463074118536, "grad_norm": 0.06146194413304329, "learning_rate": 0.0003057751180381691, "loss": 1.4648, "step": 14474 }, { "epoch": 0.643635231870526, "grad_norm": 0.06184494122862816, "learning_rate": 0.00030564041796217744, "loss": 1.4684, "step": 14476 }, { "epoch": 0.6437241563291983, "grad_norm": 0.06379452347755432, "learning_rate": 0.0003055057345008993, "loss": 1.4741, "step": 14478 }, { "epoch": 0.6438130807878707, "grad_norm": 0.06197437271475792, "learning_rate": 0.0003053710676658482, "loss": 1.4728, "step": 14480 }, { "epoch": 0.6439020052465431, "grad_norm": 0.06214113533496857, "learning_rate": 0.00030523641746853594, "loss": 1.4751, "step": 14482 }, { "epoch": 0.6439909297052154, "grad_norm": 0.061672694981098175, "learning_rate": 0.0003051017839204728, "loss": 1.4736, "step": 14484 }, { "epoch": 0.6440798541638878, "grad_norm": 0.06234847381711006, "learning_rate": 0.0003049671670331683, "loss": 1.4706, "step": 14486 }, { "epoch": 0.6441687786225602, "grad_norm": 0.06098306179046631, "learning_rate": 0.00030483256681812987, "loss": 1.4699, "step": 14488 }, { "epoch": 0.6442577030812325, "grad_norm": 0.060952410101890564, "learning_rate": 0.0003046979832868634, "loss": 1.4701, "step": 14490 }, { "epoch": 0.6443466275399049, "grad_norm": 0.061424147337675095, "learning_rate": 0.00030456341645087426, "loss": 1.4764, "step": 14492 }, { "epoch": 0.6444355519985772, "grad_norm": 0.06046923249959946, "learning_rate": 0.0003044288663216653, "loss": 1.4673, "step": 14494 }, { "epoch": 0.6445244764572495, "grad_norm": 0.06237080320715904, "learning_rate": 0.00030429433291073905, "loss": 1.4692, "step": 14496 }, { "epoch": 0.6446134009159219, "grad_norm": 0.0605134479701519, "learning_rate": 0.00030415981622959544, "loss": 1.4683, "step": 14498 }, { "epoch": 0.6447023253745943, "grad_norm": 0.061160653829574585, "learning_rate": 0.00030402531628973375, "loss": 1.4755, "step": 14500 }, { "epoch": 0.6447023253745943, "eval_loss": 1.45697820186615, "eval_runtime": 12.467, "eval_samples_per_second": 554.261, "eval_steps_per_second": 69.303, "step": 14500 }, { "epoch": 0.6447912498332666, "grad_norm": 0.06107184290885925, "learning_rate": 0.00030389083310265177, "loss": 1.4756, "step": 14502 }, { "epoch": 0.644880174291939, "grad_norm": 0.06288035959005356, "learning_rate": 0.0003037563666798456, "loss": 1.4804, "step": 14504 }, { "epoch": 0.6449690987506114, "grad_norm": 0.061453141272068024, "learning_rate": 0.0003036219170328099, "loss": 1.4762, "step": 14506 }, { "epoch": 0.6450580232092837, "grad_norm": 0.059680961072444916, "learning_rate": 0.0003034874841730382, "loss": 1.4776, "step": 14508 }, { "epoch": 0.6451469476679561, "grad_norm": 0.061881791800260544, "learning_rate": 0.00030335306811202245, "loss": 1.4725, "step": 14510 }, { "epoch": 0.6452358721266285, "grad_norm": 0.06059206277132034, "learning_rate": 0.00030321866886125285, "loss": 1.471, "step": 14512 }, { "epoch": 0.6453247965853008, "grad_norm": 0.0622367300093174, "learning_rate": 0.0003030842864322187, "loss": 1.4723, "step": 14514 }, { "epoch": 0.6454137210439731, "grad_norm": 0.06035193055868149, "learning_rate": 0.0003029499208364076, "loss": 1.472, "step": 14516 }, { "epoch": 0.6455026455026455, "grad_norm": 0.06035499647259712, "learning_rate": 0.00030281557208530557, "loss": 1.4712, "step": 14518 }, { "epoch": 0.6455915699613178, "grad_norm": 0.06048353761434555, "learning_rate": 0.00030268124019039753, "loss": 1.4763, "step": 14520 }, { "epoch": 0.6456804944199902, "grad_norm": 0.06177537515759468, "learning_rate": 0.0003025469251631664, "loss": 1.475, "step": 14522 }, { "epoch": 0.6457694188786626, "grad_norm": 0.061068419367074966, "learning_rate": 0.0003024126270150945, "loss": 1.471, "step": 14524 }, { "epoch": 0.6458583433373349, "grad_norm": 0.06005478650331497, "learning_rate": 0.0003022783457576619, "loss": 1.4684, "step": 14526 }, { "epoch": 0.6459472677960073, "grad_norm": 0.06285406649112701, "learning_rate": 0.0003021440814023475, "loss": 1.4752, "step": 14528 }, { "epoch": 0.6460361922546797, "grad_norm": 0.060045741498470306, "learning_rate": 0.00030200983396062907, "loss": 1.4733, "step": 14530 }, { "epoch": 0.646125116713352, "grad_norm": 0.059561315923929214, "learning_rate": 0.00030187560344398245, "loss": 1.4758, "step": 14532 }, { "epoch": 0.6462140411720244, "grad_norm": 0.061188627034425735, "learning_rate": 0.0003017413898638825, "loss": 1.4796, "step": 14534 }, { "epoch": 0.6463029656306967, "grad_norm": 0.06275120377540588, "learning_rate": 0.0003016071932318022, "loss": 1.4765, "step": 14536 }, { "epoch": 0.646391890089369, "grad_norm": 0.06224878132343292, "learning_rate": 0.0003014730135592132, "loss": 1.4726, "step": 14538 }, { "epoch": 0.6464808145480414, "grad_norm": 0.06063244864344597, "learning_rate": 0.00030133885085758595, "loss": 1.4701, "step": 14540 }, { "epoch": 0.6465697390067138, "grad_norm": 0.06237416714429855, "learning_rate": 0.00030120470513838937, "loss": 1.4742, "step": 14542 }, { "epoch": 0.6466586634653861, "grad_norm": 0.06257627159357071, "learning_rate": 0.00030107057641309034, "loss": 1.4741, "step": 14544 }, { "epoch": 0.6467475879240585, "grad_norm": 0.05979495123028755, "learning_rate": 0.0003009364646931554, "loss": 1.4755, "step": 14546 }, { "epoch": 0.6468365123827309, "grad_norm": 0.059078190475702286, "learning_rate": 0.00030080236999004833, "loss": 1.4745, "step": 14548 }, { "epoch": 0.6469254368414032, "grad_norm": 0.059303369373083115, "learning_rate": 0.00030066829231523287, "loss": 1.4728, "step": 14550 }, { "epoch": 0.6470143613000756, "grad_norm": 0.06229490414261818, "learning_rate": 0.00030053423168017014, "loss": 1.4669, "step": 14552 }, { "epoch": 0.647103285758748, "grad_norm": 0.06081756576895714, "learning_rate": 0.00030040018809632013, "loss": 1.4707, "step": 14554 }, { "epoch": 0.6471922102174203, "grad_norm": 0.05991298332810402, "learning_rate": 0.00030026616157514176, "loss": 1.4704, "step": 14556 }, { "epoch": 0.6472811346760926, "grad_norm": 0.0638953223824501, "learning_rate": 0.0003001321521280921, "loss": 1.4678, "step": 14558 }, { "epoch": 0.647370059134765, "grad_norm": 0.062413301318883896, "learning_rate": 0.0002999981597666268, "loss": 1.4718, "step": 14560 }, { "epoch": 0.6474589835934373, "grad_norm": 0.060770995914936066, "learning_rate": 0.0002998641845022003, "loss": 1.4711, "step": 14562 }, { "epoch": 0.6475479080521097, "grad_norm": 0.06420081853866577, "learning_rate": 0.00029973022634626514, "loss": 1.4737, "step": 14564 }, { "epoch": 0.6476368325107821, "grad_norm": 0.059697262942790985, "learning_rate": 0.00029959628531027287, "loss": 1.4752, "step": 14566 }, { "epoch": 0.6477257569694544, "grad_norm": 0.06121120974421501, "learning_rate": 0.0002994623614056733, "loss": 1.4698, "step": 14568 }, { "epoch": 0.6478146814281268, "grad_norm": 0.06134429946541786, "learning_rate": 0.00029932845464391476, "loss": 1.471, "step": 14570 }, { "epoch": 0.6479036058867992, "grad_norm": 0.06118486449122429, "learning_rate": 0.00029919456503644425, "loss": 1.4717, "step": 14572 }, { "epoch": 0.6479925303454716, "grad_norm": 0.06073017045855522, "learning_rate": 0.00029906069259470747, "loss": 1.4767, "step": 14574 }, { "epoch": 0.6480814548041439, "grad_norm": 0.06142178550362587, "learning_rate": 0.0002989268373301478, "loss": 1.4737, "step": 14576 }, { "epoch": 0.6481703792628163, "grad_norm": 0.05868706852197647, "learning_rate": 0.00029879299925420855, "loss": 1.4715, "step": 14578 }, { "epoch": 0.6482593037214885, "grad_norm": 0.06036018207669258, "learning_rate": 0.00029865917837833023, "loss": 1.4641, "step": 14580 }, { "epoch": 0.6483482281801609, "grad_norm": 0.06149023771286011, "learning_rate": 0.0002985253747139526, "loss": 1.4738, "step": 14582 }, { "epoch": 0.6484371526388333, "grad_norm": 0.0594891719520092, "learning_rate": 0.0002983915882725139, "loss": 1.4698, "step": 14584 }, { "epoch": 0.6485260770975056, "grad_norm": 0.06031309440732002, "learning_rate": 0.00029825781906545057, "loss": 1.4683, "step": 14586 }, { "epoch": 0.648615001556178, "grad_norm": 0.06182754039764404, "learning_rate": 0.000298124067104198, "loss": 1.4741, "step": 14588 }, { "epoch": 0.6487039260148504, "grad_norm": 0.06004558876156807, "learning_rate": 0.0002979903324001898, "loss": 1.4774, "step": 14590 }, { "epoch": 0.6487928504735228, "grad_norm": 0.05962319299578667, "learning_rate": 0.00029785661496485816, "loss": 1.4754, "step": 14592 }, { "epoch": 0.6488817749321951, "grad_norm": 0.06000960245728493, "learning_rate": 0.00029772291480963396, "loss": 1.4699, "step": 14594 }, { "epoch": 0.6489706993908675, "grad_norm": 0.059110336005687714, "learning_rate": 0.00029758923194594657, "loss": 1.4747, "step": 14596 }, { "epoch": 0.6490596238495399, "grad_norm": 0.0625082477927208, "learning_rate": 0.0002974555663852232, "loss": 1.4723, "step": 14598 }, { "epoch": 0.6491485483082122, "grad_norm": 0.06030767038464546, "learning_rate": 0.000297321918138891, "loss": 1.4739, "step": 14600 }, { "epoch": 0.6492374727668845, "grad_norm": 0.060511305928230286, "learning_rate": 0.00029718828721837407, "loss": 1.4659, "step": 14602 }, { "epoch": 0.6493263972255569, "grad_norm": 0.060783982276916504, "learning_rate": 0.0002970546736350964, "loss": 1.4677, "step": 14604 }, { "epoch": 0.6494153216842292, "grad_norm": 0.060023460537195206, "learning_rate": 0.0002969210774004796, "loss": 1.4695, "step": 14606 }, { "epoch": 0.6495042461429016, "grad_norm": 0.0627756267786026, "learning_rate": 0.0002967874985259438, "loss": 1.472, "step": 14608 }, { "epoch": 0.649593170601574, "grad_norm": 0.061621058732271194, "learning_rate": 0.00029665393702290834, "loss": 1.4801, "step": 14610 }, { "epoch": 0.6496820950602463, "grad_norm": 0.05914198234677315, "learning_rate": 0.0002965203929027904, "loss": 1.4706, "step": 14612 }, { "epoch": 0.6497710195189187, "grad_norm": 0.06379103660583496, "learning_rate": 0.0002963868661770059, "loss": 1.4757, "step": 14614 }, { "epoch": 0.6498599439775911, "grad_norm": 0.06029783561825752, "learning_rate": 0.00029625335685696955, "loss": 1.4718, "step": 14616 }, { "epoch": 0.6499488684362634, "grad_norm": 0.06065394729375839, "learning_rate": 0.00029611986495409393, "loss": 1.4682, "step": 14618 }, { "epoch": 0.6500377928949358, "grad_norm": 0.06273026019334793, "learning_rate": 0.0002959863904797908, "loss": 1.4757, "step": 14620 }, { "epoch": 0.6501267173536082, "grad_norm": 0.06018160283565521, "learning_rate": 0.00029585293344547026, "loss": 1.4719, "step": 14622 }, { "epoch": 0.6502156418122804, "grad_norm": 0.059954337775707245, "learning_rate": 0.0002957194938625403, "loss": 1.4712, "step": 14624 }, { "epoch": 0.6503045662709528, "grad_norm": 0.06017017737030983, "learning_rate": 0.0002955860717424085, "loss": 1.4692, "step": 14626 }, { "epoch": 0.6503934907296252, "grad_norm": 0.0594681017100811, "learning_rate": 0.00029545266709648003, "loss": 1.4721, "step": 14628 }, { "epoch": 0.6504824151882975, "grad_norm": 0.06060068681836128, "learning_rate": 0.00029531927993615894, "loss": 1.4698, "step": 14630 }, { "epoch": 0.6505713396469699, "grad_norm": 0.061522986739873886, "learning_rate": 0.0002951859102728478, "loss": 1.4723, "step": 14632 }, { "epoch": 0.6506602641056423, "grad_norm": 0.06037484481930733, "learning_rate": 0.0002950525581179477, "loss": 1.4712, "step": 14634 }, { "epoch": 0.6507491885643146, "grad_norm": 0.061230264604091644, "learning_rate": 0.000294919223482858, "loss": 1.4734, "step": 14636 }, { "epoch": 0.650838113022987, "grad_norm": 0.06225361302495003, "learning_rate": 0.00029478590637897696, "loss": 1.4777, "step": 14638 }, { "epoch": 0.6509270374816594, "grad_norm": 0.05939456820487976, "learning_rate": 0.0002946526068177009, "loss": 1.4755, "step": 14640 }, { "epoch": 0.6510159619403317, "grad_norm": 0.059683386236429214, "learning_rate": 0.000294519324810425, "loss": 1.4751, "step": 14642 }, { "epoch": 0.6511048863990041, "grad_norm": 0.062137801200151443, "learning_rate": 0.0002943860603685428, "loss": 1.4733, "step": 14644 }, { "epoch": 0.6511938108576764, "grad_norm": 0.05937831103801727, "learning_rate": 0.0002942528135034461, "loss": 1.4712, "step": 14646 }, { "epoch": 0.6512827353163487, "grad_norm": 0.06024874746799469, "learning_rate": 0.0002941195842265257, "loss": 1.468, "step": 14648 }, { "epoch": 0.6513716597750211, "grad_norm": 0.06173409894108772, "learning_rate": 0.0002939863725491706, "loss": 1.4691, "step": 14650 }, { "epoch": 0.6514605842336935, "grad_norm": 0.060418277978897095, "learning_rate": 0.00029385317848276807, "loss": 1.4726, "step": 14652 }, { "epoch": 0.6515495086923658, "grad_norm": 0.06066744774580002, "learning_rate": 0.0002937200020387045, "loss": 1.4757, "step": 14654 }, { "epoch": 0.6516384331510382, "grad_norm": 0.06171972304582596, "learning_rate": 0.00029358684322836396, "loss": 1.4771, "step": 14656 }, { "epoch": 0.6517273576097106, "grad_norm": 0.06183767318725586, "learning_rate": 0.00029345370206312996, "loss": 1.4738, "step": 14658 }, { "epoch": 0.6518162820683829, "grad_norm": 0.06188305467367172, "learning_rate": 0.0002933205785543836, "loss": 1.4689, "step": 14660 }, { "epoch": 0.6519052065270553, "grad_norm": 0.06321601569652557, "learning_rate": 0.00029318747271350493, "loss": 1.4732, "step": 14662 }, { "epoch": 0.6519941309857277, "grad_norm": 0.06155180558562279, "learning_rate": 0.0002930543845518726, "loss": 1.4707, "step": 14664 }, { "epoch": 0.6520830554443999, "grad_norm": 0.0595284141600132, "learning_rate": 0.00029292131408086345, "loss": 1.4722, "step": 14666 }, { "epoch": 0.6521719799030723, "grad_norm": 0.060346901416778564, "learning_rate": 0.0002927882613118528, "loss": 1.477, "step": 14668 }, { "epoch": 0.6522609043617447, "grad_norm": 0.061850935220718384, "learning_rate": 0.0002926552262562149, "loss": 1.4768, "step": 14670 }, { "epoch": 0.652349828820417, "grad_norm": 0.061827659606933594, "learning_rate": 0.0002925222089253218, "loss": 1.471, "step": 14672 }, { "epoch": 0.6524387532790894, "grad_norm": 0.061195094138383865, "learning_rate": 0.0002923892093305448, "loss": 1.4789, "step": 14674 }, { "epoch": 0.6525276777377618, "grad_norm": 0.0616355836391449, "learning_rate": 0.0002922562274832531, "loss": 1.4651, "step": 14676 }, { "epoch": 0.6526166021964341, "grad_norm": 0.05983952432870865, "learning_rate": 0.0002921232633948143, "loss": 1.4705, "step": 14678 }, { "epoch": 0.6527055266551065, "grad_norm": 0.060748714953660965, "learning_rate": 0.0002919903170765954, "loss": 1.4728, "step": 14680 }, { "epoch": 0.6527944511137789, "grad_norm": 0.06134764477610588, "learning_rate": 0.0002918573885399607, "loss": 1.4724, "step": 14682 }, { "epoch": 0.6528833755724512, "grad_norm": 0.061923548579216, "learning_rate": 0.00029172447779627367, "loss": 1.4699, "step": 14684 }, { "epoch": 0.6529723000311236, "grad_norm": 0.05965703725814819, "learning_rate": 0.00029159158485689594, "loss": 1.4746, "step": 14686 }, { "epoch": 0.6530612244897959, "grad_norm": 0.06172526627779007, "learning_rate": 0.0002914587097331881, "loss": 1.4738, "step": 14688 }, { "epoch": 0.6531501489484682, "grad_norm": 0.06159723922610283, "learning_rate": 0.000291325852436509, "loss": 1.4705, "step": 14690 }, { "epoch": 0.6532390734071406, "grad_norm": 0.059951264411211014, "learning_rate": 0.00029119301297821554, "loss": 1.4704, "step": 14692 }, { "epoch": 0.653327997865813, "grad_norm": 0.05888056382536888, "learning_rate": 0.0002910601913696631, "loss": 1.4688, "step": 14694 }, { "epoch": 0.6534169223244853, "grad_norm": 0.05950944125652313, "learning_rate": 0.0002909273876222067, "loss": 1.4749, "step": 14696 }, { "epoch": 0.6535058467831577, "grad_norm": 0.060837604105472565, "learning_rate": 0.00029079460174719865, "loss": 1.47, "step": 14698 }, { "epoch": 0.6535947712418301, "grad_norm": 0.059955399483442307, "learning_rate": 0.00029066183375598965, "loss": 1.4737, "step": 14700 }, { "epoch": 0.6536836957005024, "grad_norm": 0.06083989143371582, "learning_rate": 0.0002905290836599297, "loss": 1.4775, "step": 14702 }, { "epoch": 0.6537726201591748, "grad_norm": 0.0601428858935833, "learning_rate": 0.000290396351470367, "loss": 1.47, "step": 14704 }, { "epoch": 0.6538615446178472, "grad_norm": 0.06066612899303436, "learning_rate": 0.00029026363719864755, "loss": 1.4735, "step": 14706 }, { "epoch": 0.6539504690765195, "grad_norm": 0.060709454119205475, "learning_rate": 0.0002901309408561168, "loss": 1.471, "step": 14708 }, { "epoch": 0.6540393935351918, "grad_norm": 0.061341624706983566, "learning_rate": 0.00028999826245411803, "loss": 1.4721, "step": 14710 }, { "epoch": 0.6541283179938642, "grad_norm": 0.062061794102191925, "learning_rate": 0.00028986560200399347, "loss": 1.4718, "step": 14712 }, { "epoch": 0.6542172424525365, "grad_norm": 0.06041424348950386, "learning_rate": 0.000289732959517083, "loss": 1.4697, "step": 14714 }, { "epoch": 0.6543061669112089, "grad_norm": 0.0598766915500164, "learning_rate": 0.00028960033500472583, "loss": 1.4718, "step": 14716 }, { "epoch": 0.6543950913698813, "grad_norm": 0.061096884310245514, "learning_rate": 0.00028946772847825944, "loss": 1.4709, "step": 14718 }, { "epoch": 0.6544840158285536, "grad_norm": 0.06066969409584999, "learning_rate": 0.0002893351399490194, "loss": 1.473, "step": 14720 }, { "epoch": 0.654572940287226, "grad_norm": 0.061327192932367325, "learning_rate": 0.00028920256942833956, "loss": 1.4684, "step": 14722 }, { "epoch": 0.6546618647458984, "grad_norm": 0.06126781925559044, "learning_rate": 0.00028907001692755343, "loss": 1.4715, "step": 14724 }, { "epoch": 0.6547507892045707, "grad_norm": 0.062026720494031906, "learning_rate": 0.00028893748245799154, "loss": 1.4663, "step": 14726 }, { "epoch": 0.6548397136632431, "grad_norm": 0.06028749421238899, "learning_rate": 0.00028880496603098403, "loss": 1.4747, "step": 14728 }, { "epoch": 0.6549286381219155, "grad_norm": 0.06083076074719429, "learning_rate": 0.0002886724676578586, "loss": 1.4711, "step": 14730 }, { "epoch": 0.6550175625805877, "grad_norm": 0.06158638745546341, "learning_rate": 0.0002885399873499418, "loss": 1.4754, "step": 14732 }, { "epoch": 0.6551064870392601, "grad_norm": 0.06135280430316925, "learning_rate": 0.000288407525118559, "loss": 1.4757, "step": 14734 }, { "epoch": 0.6551954114979325, "grad_norm": 0.0598473995923996, "learning_rate": 0.0002882750809750332, "loss": 1.4694, "step": 14736 }, { "epoch": 0.6552843359566048, "grad_norm": 0.060813672840595245, "learning_rate": 0.0002881426549306866, "loss": 1.4696, "step": 14738 }, { "epoch": 0.6553732604152772, "grad_norm": 0.0595141276717186, "learning_rate": 0.00028801024699683954, "loss": 1.4734, "step": 14740 }, { "epoch": 0.6554621848739496, "grad_norm": 0.059074971824884415, "learning_rate": 0.0002878778571848107, "loss": 1.4677, "step": 14742 }, { "epoch": 0.655551109332622, "grad_norm": 0.06075437366962433, "learning_rate": 0.0002877454855059174, "loss": 1.4735, "step": 14744 }, { "epoch": 0.6556400337912943, "grad_norm": 0.06068753823637962, "learning_rate": 0.0002876131319714755, "loss": 1.469, "step": 14746 }, { "epoch": 0.6557289582499667, "grad_norm": 0.05928294360637665, "learning_rate": 0.0002874807965927989, "loss": 1.4735, "step": 14748 }, { "epoch": 0.655817882708639, "grad_norm": 0.060513000935316086, "learning_rate": 0.0002873484793812005, "loss": 1.472, "step": 14750 }, { "epoch": 0.6559068071673114, "grad_norm": 0.060318686068058014, "learning_rate": 0.000287216180347991, "loss": 1.4728, "step": 14752 }, { "epoch": 0.6559957316259837, "grad_norm": 0.0590246245265007, "learning_rate": 0.0002870838995044802, "loss": 1.4708, "step": 14754 }, { "epoch": 0.656084656084656, "grad_norm": 0.060487277805805206, "learning_rate": 0.0002869516368619761, "loss": 1.4684, "step": 14756 }, { "epoch": 0.6561735805433284, "grad_norm": 0.060591381043195724, "learning_rate": 0.0002868193924317848, "loss": 1.4745, "step": 14758 }, { "epoch": 0.6562625050020008, "grad_norm": 0.059523772448301315, "learning_rate": 0.00028668716622521126, "loss": 1.4743, "step": 14760 }, { "epoch": 0.6563514294606732, "grad_norm": 0.058641474694013596, "learning_rate": 0.0002865549582535589, "loss": 1.4712, "step": 14762 }, { "epoch": 0.6564403539193455, "grad_norm": 0.06107234209775925, "learning_rate": 0.0002864227685281293, "loss": 1.473, "step": 14764 }, { "epoch": 0.6565292783780179, "grad_norm": 0.059071581810712814, "learning_rate": 0.0002862905970602225, "loss": 1.4703, "step": 14766 }, { "epoch": 0.6566182028366903, "grad_norm": 0.06059209629893303, "learning_rate": 0.0002861584438611375, "loss": 1.4692, "step": 14768 }, { "epoch": 0.6567071272953626, "grad_norm": 0.06286212056875229, "learning_rate": 0.000286026308942171, "loss": 1.4715, "step": 14770 }, { "epoch": 0.656796051754035, "grad_norm": 0.0598897784948349, "learning_rate": 0.0002858941923146187, "loss": 1.4647, "step": 14772 }, { "epoch": 0.6568849762127073, "grad_norm": 0.05993928760290146, "learning_rate": 0.0002857620939897741, "loss": 1.4691, "step": 14774 }, { "epoch": 0.6569739006713796, "grad_norm": 0.05909041315317154, "learning_rate": 0.00028563001397892987, "loss": 1.4722, "step": 14776 }, { "epoch": 0.657062825130052, "grad_norm": 0.061181001365184784, "learning_rate": 0.0002854979522933769, "loss": 1.4705, "step": 14778 }, { "epoch": 0.6571517495887244, "grad_norm": 0.05939111486077309, "learning_rate": 0.0002853659089444041, "loss": 1.4646, "step": 14780 }, { "epoch": 0.6572406740473967, "grad_norm": 0.060132816433906555, "learning_rate": 0.0002852338839432993, "loss": 1.473, "step": 14782 }, { "epoch": 0.6573295985060691, "grad_norm": 0.060787100344896317, "learning_rate": 0.0002851018773013486, "loss": 1.4695, "step": 14784 }, { "epoch": 0.6574185229647415, "grad_norm": 0.06070558354258537, "learning_rate": 0.00028496988902983644, "loss": 1.4726, "step": 14786 }, { "epoch": 0.6575074474234138, "grad_norm": 0.060331154614686966, "learning_rate": 0.0002848379191400456, "loss": 1.4664, "step": 14788 }, { "epoch": 0.6575963718820862, "grad_norm": 0.059469323605298996, "learning_rate": 0.00028470596764325795, "loss": 1.4688, "step": 14790 }, { "epoch": 0.6576852963407586, "grad_norm": 0.06065972149372101, "learning_rate": 0.0002845740345507527, "loss": 1.4715, "step": 14792 }, { "epoch": 0.6577742207994309, "grad_norm": 0.062771275639534, "learning_rate": 0.00028444211987380835, "loss": 1.4711, "step": 14794 }, { "epoch": 0.6578631452581032, "grad_norm": 0.05880487337708473, "learning_rate": 0.0002843102236237015, "loss": 1.4718, "step": 14796 }, { "epoch": 0.6579520697167756, "grad_norm": 0.05996553599834442, "learning_rate": 0.0002841783458117075, "loss": 1.4745, "step": 14798 }, { "epoch": 0.6580409941754479, "grad_norm": 0.06091736629605293, "learning_rate": 0.0002840464864490995, "loss": 1.473, "step": 14800 }, { "epoch": 0.6581299186341203, "grad_norm": 0.059029486030340195, "learning_rate": 0.0002839146455471492, "loss": 1.4726, "step": 14802 }, { "epoch": 0.6582188430927927, "grad_norm": 0.06295252591371536, "learning_rate": 0.0002837828231171277, "loss": 1.4668, "step": 14804 }, { "epoch": 0.658307767551465, "grad_norm": 0.06021355465054512, "learning_rate": 0.00028365101917030315, "loss": 1.4625, "step": 14806 }, { "epoch": 0.6583966920101374, "grad_norm": 0.058032263070344925, "learning_rate": 0.0002835192337179428, "loss": 1.4654, "step": 14808 }, { "epoch": 0.6584856164688098, "grad_norm": 0.05962485074996948, "learning_rate": 0.0002833874667713123, "loss": 1.4726, "step": 14810 }, { "epoch": 0.6585745409274821, "grad_norm": 0.061198100447654724, "learning_rate": 0.00028325571834167585, "loss": 1.4708, "step": 14812 }, { "epoch": 0.6586634653861545, "grad_norm": 0.05982755497097969, "learning_rate": 0.00028312398844029585, "loss": 1.4727, "step": 14814 }, { "epoch": 0.6587523898448269, "grad_norm": 0.06041552871465683, "learning_rate": 0.0002829922770784329, "loss": 1.4748, "step": 14816 }, { "epoch": 0.6588413143034991, "grad_norm": 0.0609116330742836, "learning_rate": 0.0002828605842673464, "loss": 1.4678, "step": 14818 }, { "epoch": 0.6589302387621715, "grad_norm": 0.06071128696203232, "learning_rate": 0.0002827289100182943, "loss": 1.4696, "step": 14820 }, { "epoch": 0.6590191632208439, "grad_norm": 0.060914020985364914, "learning_rate": 0.0002825972543425324, "loss": 1.4741, "step": 14822 }, { "epoch": 0.6591080876795162, "grad_norm": 0.05891665443778038, "learning_rate": 0.0002824656172513149, "loss": 1.4643, "step": 14824 }, { "epoch": 0.6591970121381886, "grad_norm": 0.06179531663656235, "learning_rate": 0.0002823339987558955, "loss": 1.472, "step": 14826 }, { "epoch": 0.659285936596861, "grad_norm": 0.06368506699800491, "learning_rate": 0.000282202398867525, "loss": 1.4742, "step": 14828 }, { "epoch": 0.6593748610555333, "grad_norm": 0.061851486563682556, "learning_rate": 0.0002820708175974531, "loss": 1.4685, "step": 14830 }, { "epoch": 0.6594637855142057, "grad_norm": 0.06155168637633324, "learning_rate": 0.00028193925495692817, "loss": 1.474, "step": 14832 }, { "epoch": 0.6595527099728781, "grad_norm": 0.06134020909667015, "learning_rate": 0.0002818077109571967, "loss": 1.4658, "step": 14834 }, { "epoch": 0.6596416344315504, "grad_norm": 0.060675278306007385, "learning_rate": 0.0002816761856095037, "loss": 1.4737, "step": 14836 }, { "epoch": 0.6597305588902228, "grad_norm": 0.060609932988882065, "learning_rate": 0.0002815446789250924, "loss": 1.4669, "step": 14838 }, { "epoch": 0.6598194833488951, "grad_norm": 0.0593482069671154, "learning_rate": 0.00028141319091520454, "loss": 1.4744, "step": 14840 }, { "epoch": 0.6599084078075674, "grad_norm": 0.061323292553424835, "learning_rate": 0.00028128172159108065, "loss": 1.4715, "step": 14842 }, { "epoch": 0.6599973322662398, "grad_norm": 0.05969291925430298, "learning_rate": 0.0002811502709639589, "loss": 1.4666, "step": 14844 }, { "epoch": 0.6600862567249122, "grad_norm": 0.05991503968834877, "learning_rate": 0.00028101883904507643, "loss": 1.4695, "step": 14846 }, { "epoch": 0.6601751811835845, "grad_norm": 0.06149459630250931, "learning_rate": 0.00028088742584566874, "loss": 1.4683, "step": 14848 }, { "epoch": 0.6602641056422569, "grad_norm": 0.05918089300394058, "learning_rate": 0.0002807560313769694, "loss": 1.4754, "step": 14850 }, { "epoch": 0.6603530301009293, "grad_norm": 0.06039484962821007, "learning_rate": 0.00028062465565021054, "loss": 1.4686, "step": 14852 }, { "epoch": 0.6604419545596016, "grad_norm": 0.060235604643821716, "learning_rate": 0.00028049329867662313, "loss": 1.465, "step": 14854 }, { "epoch": 0.660530879018274, "grad_norm": 0.05938028544187546, "learning_rate": 0.0002803619604674357, "loss": 1.4735, "step": 14856 }, { "epoch": 0.6606198034769464, "grad_norm": 0.05908706784248352, "learning_rate": 0.00028023064103387596, "loss": 1.4733, "step": 14858 }, { "epoch": 0.6607087279356187, "grad_norm": 0.06113129109144211, "learning_rate": 0.00028009934038716937, "loss": 1.4708, "step": 14860 }, { "epoch": 0.660797652394291, "grad_norm": 0.0593777671456337, "learning_rate": 0.00027996805853854024, "loss": 1.4708, "step": 14862 }, { "epoch": 0.6608865768529634, "grad_norm": 0.05915198102593422, "learning_rate": 0.00027983679549921126, "loss": 1.4649, "step": 14864 }, { "epoch": 0.6609755013116357, "grad_norm": 0.061543058604002, "learning_rate": 0.0002797055512804031, "loss": 1.474, "step": 14866 }, { "epoch": 0.6610644257703081, "grad_norm": 0.06172054633498192, "learning_rate": 0.0002795743258933352, "loss": 1.4728, "step": 14868 }, { "epoch": 0.6611533502289805, "grad_norm": 0.06032607704401016, "learning_rate": 0.0002794431193492254, "loss": 1.4695, "step": 14870 }, { "epoch": 0.6612422746876528, "grad_norm": 0.06093483418226242, "learning_rate": 0.0002793119316592897, "loss": 1.4651, "step": 14872 }, { "epoch": 0.6613311991463252, "grad_norm": 0.06096861511468887, "learning_rate": 0.0002791807628347425, "loss": 1.4723, "step": 14874 }, { "epoch": 0.6614201236049976, "grad_norm": 0.06007368490099907, "learning_rate": 0.00027904961288679705, "loss": 1.4758, "step": 14876 }, { "epoch": 0.66150904806367, "grad_norm": 0.06006438657641411, "learning_rate": 0.00027891848182666414, "loss": 1.4672, "step": 14878 }, { "epoch": 0.6615979725223423, "grad_norm": 0.05987627059221268, "learning_rate": 0.00027878736966555393, "loss": 1.4656, "step": 14880 }, { "epoch": 0.6616868969810147, "grad_norm": 0.06110451743006706, "learning_rate": 0.0002786562764146738, "loss": 1.4748, "step": 14882 }, { "epoch": 0.6617758214396869, "grad_norm": 0.060226909816265106, "learning_rate": 0.00027852520208523104, "loss": 1.4724, "step": 14884 }, { "epoch": 0.6618647458983593, "grad_norm": 0.06197149679064751, "learning_rate": 0.00027839414668842996, "loss": 1.4728, "step": 14886 }, { "epoch": 0.6619536703570317, "grad_norm": 0.059934891760349274, "learning_rate": 0.0002782631102354736, "loss": 1.4699, "step": 14888 }, { "epoch": 0.662042594815704, "grad_norm": 0.0607532262802124, "learning_rate": 0.0002781320927375637, "loss": 1.4705, "step": 14890 }, { "epoch": 0.6621315192743764, "grad_norm": 0.060573361814022064, "learning_rate": 0.0002780010942059005, "loss": 1.4681, "step": 14892 }, { "epoch": 0.6622204437330488, "grad_norm": 0.061003051698207855, "learning_rate": 0.0002778701146516819, "loss": 1.4741, "step": 14894 }, { "epoch": 0.6623093681917211, "grad_norm": 0.06043396145105362, "learning_rate": 0.0002777391540861048, "loss": 1.4689, "step": 14896 }, { "epoch": 0.6623982926503935, "grad_norm": 0.06064370647072792, "learning_rate": 0.0002776082125203645, "loss": 1.4736, "step": 14898 }, { "epoch": 0.6624872171090659, "grad_norm": 0.061219021677970886, "learning_rate": 0.000277477289965654, "loss": 1.467, "step": 14900 }, { "epoch": 0.6625761415677383, "grad_norm": 0.06040164828300476, "learning_rate": 0.00027734638643316534, "loss": 1.4677, "step": 14902 }, { "epoch": 0.6626650660264105, "grad_norm": 0.05900850147008896, "learning_rate": 0.00027721550193408884, "loss": 1.4704, "step": 14904 }, { "epoch": 0.6627539904850829, "grad_norm": 0.060347117483615875, "learning_rate": 0.00027708463647961324, "loss": 1.4718, "step": 14906 }, { "epoch": 0.6628429149437552, "grad_norm": 0.060297053307294846, "learning_rate": 0.0002769537900809252, "loss": 1.4679, "step": 14908 }, { "epoch": 0.6629318394024276, "grad_norm": 0.060264792293310165, "learning_rate": 0.0002768229627492097, "loss": 1.4718, "step": 14910 }, { "epoch": 0.6630207638611, "grad_norm": 0.06151338666677475, "learning_rate": 0.00027669215449565134, "loss": 1.466, "step": 14912 }, { "epoch": 0.6631096883197724, "grad_norm": 0.05946485698223114, "learning_rate": 0.0002765613653314316, "loss": 1.4713, "step": 14914 }, { "epoch": 0.6631986127784447, "grad_norm": 0.059629566967487335, "learning_rate": 0.0002764305952677309, "loss": 1.4705, "step": 14916 }, { "epoch": 0.6632875372371171, "grad_norm": 0.061307553201913834, "learning_rate": 0.0002762998443157281, "loss": 1.4723, "step": 14918 }, { "epoch": 0.6633764616957895, "grad_norm": 0.05890590697526932, "learning_rate": 0.0002761691124866005, "loss": 1.4718, "step": 14920 }, { "epoch": 0.6634653861544618, "grad_norm": 0.060055799782276154, "learning_rate": 0.00027603839979152366, "loss": 1.4676, "step": 14922 }, { "epoch": 0.6635543106131342, "grad_norm": 0.05887698009610176, "learning_rate": 0.00027590770624167114, "loss": 1.4651, "step": 14924 }, { "epoch": 0.6636432350718064, "grad_norm": 0.060526877641677856, "learning_rate": 0.0002757770318482156, "loss": 1.469, "step": 14926 }, { "epoch": 0.6637321595304788, "grad_norm": 0.06122924014925957, "learning_rate": 0.00027564637662232756, "loss": 1.4692, "step": 14928 }, { "epoch": 0.6638210839891512, "grad_norm": 0.061400964856147766, "learning_rate": 0.000275515740575176, "loss": 1.4705, "step": 14930 }, { "epoch": 0.6639100084478236, "grad_norm": 0.05991072207689285, "learning_rate": 0.0002753851237179278, "loss": 1.4702, "step": 14932 }, { "epoch": 0.6639989329064959, "grad_norm": 0.05974270775914192, "learning_rate": 0.0002752545260617494, "loss": 1.4725, "step": 14934 }, { "epoch": 0.6640878573651683, "grad_norm": 0.06016663461923599, "learning_rate": 0.00027512394761780443, "loss": 1.4658, "step": 14936 }, { "epoch": 0.6641767818238407, "grad_norm": 0.060294173657894135, "learning_rate": 0.0002749933883972555, "loss": 1.4701, "step": 14938 }, { "epoch": 0.664265706282513, "grad_norm": 0.06034474819898605, "learning_rate": 0.0002748628484112632, "loss": 1.4747, "step": 14940 }, { "epoch": 0.6643546307411854, "grad_norm": 0.0617527961730957, "learning_rate": 0.0002747323276709868, "loss": 1.4662, "step": 14942 }, { "epoch": 0.6644435551998578, "grad_norm": 0.05941234156489372, "learning_rate": 0.0002746018261875839, "loss": 1.4704, "step": 14944 }, { "epoch": 0.6645324796585301, "grad_norm": 0.06100812181830406, "learning_rate": 0.00027447134397220996, "loss": 1.4689, "step": 14946 }, { "epoch": 0.6646214041172024, "grad_norm": 0.06031123921275139, "learning_rate": 0.0002743408810360194, "loss": 1.4724, "step": 14948 }, { "epoch": 0.6647103285758748, "grad_norm": 0.05882007256150246, "learning_rate": 0.00027421043739016504, "loss": 1.4708, "step": 14950 }, { "epoch": 0.6647992530345471, "grad_norm": 0.06024463474750519, "learning_rate": 0.0002740800130457973, "loss": 1.465, "step": 14952 }, { "epoch": 0.6648881774932195, "grad_norm": 0.060123246163129807, "learning_rate": 0.00027394960801406564, "loss": 1.4727, "step": 14954 }, { "epoch": 0.6649771019518919, "grad_norm": 0.059828802943229675, "learning_rate": 0.0002738192223061178, "loss": 1.4666, "step": 14956 }, { "epoch": 0.6650660264105642, "grad_norm": 0.06216980516910553, "learning_rate": 0.0002736888559330994, "loss": 1.4677, "step": 14958 }, { "epoch": 0.6651549508692366, "grad_norm": 0.06128519028425217, "learning_rate": 0.00027355850890615504, "loss": 1.4701, "step": 14960 }, { "epoch": 0.665243875327909, "grad_norm": 0.062251608818769455, "learning_rate": 0.0002734281812364273, "loss": 1.4673, "step": 14962 }, { "epoch": 0.6653327997865813, "grad_norm": 0.06012072041630745, "learning_rate": 0.0002732978729350569, "loss": 1.4749, "step": 14964 }, { "epoch": 0.6654217242452537, "grad_norm": 0.06006190553307533, "learning_rate": 0.0002731675840131836, "loss": 1.4654, "step": 14966 }, { "epoch": 0.6655106487039261, "grad_norm": 0.06113257631659508, "learning_rate": 0.00027303731448194467, "loss": 1.4711, "step": 14968 }, { "epoch": 0.6655995731625983, "grad_norm": 0.06147268787026405, "learning_rate": 0.0002729070643524763, "loss": 1.4706, "step": 14970 }, { "epoch": 0.6656884976212707, "grad_norm": 0.06147456541657448, "learning_rate": 0.000272776833635913, "loss": 1.4646, "step": 14972 }, { "epoch": 0.6657774220799431, "grad_norm": 0.06183717027306557, "learning_rate": 0.0002726466223433871, "loss": 1.473, "step": 14974 }, { "epoch": 0.6658663465386154, "grad_norm": 0.06242965906858444, "learning_rate": 0.0002725164304860298, "loss": 1.4701, "step": 14976 }, { "epoch": 0.6659552709972878, "grad_norm": 0.06013220548629761, "learning_rate": 0.00027238625807497074, "loss": 1.4705, "step": 14978 }, { "epoch": 0.6660441954559602, "grad_norm": 0.06084521859884262, "learning_rate": 0.00027225610512133727, "loss": 1.4733, "step": 14980 }, { "epoch": 0.6661331199146325, "grad_norm": 0.06399312615394592, "learning_rate": 0.00027212597163625546, "loss": 1.4703, "step": 14982 }, { "epoch": 0.6662220443733049, "grad_norm": 0.06068024784326553, "learning_rate": 0.00027199585763085005, "loss": 1.4684, "step": 14984 }, { "epoch": 0.6663109688319773, "grad_norm": 0.05974745377898216, "learning_rate": 0.00027186576311624336, "loss": 1.4707, "step": 14986 }, { "epoch": 0.6663998932906496, "grad_norm": 0.061118923127651215, "learning_rate": 0.0002717356881035567, "loss": 1.4704, "step": 14988 }, { "epoch": 0.666488817749322, "grad_norm": 0.06090380996465683, "learning_rate": 0.000271605632603909, "loss": 1.4672, "step": 14990 }, { "epoch": 0.6665777422079943, "grad_norm": 0.060469526797533035, "learning_rate": 0.0002714755966284187, "loss": 1.4701, "step": 14992 }, { "epoch": 0.6666666666666666, "grad_norm": 0.05882229283452034, "learning_rate": 0.0002713455801882015, "loss": 1.4697, "step": 14994 }, { "epoch": 0.666755591125339, "grad_norm": 0.06075397878885269, "learning_rate": 0.00027121558329437157, "loss": 1.4723, "step": 14996 }, { "epoch": 0.6668445155840114, "grad_norm": 0.05918700248003006, "learning_rate": 0.0002710856059580418, "loss": 1.4664, "step": 14998 }, { "epoch": 0.6669334400426837, "grad_norm": 0.06100982427597046, "learning_rate": 0.0002709556481903234, "loss": 1.4677, "step": 15000 }, { "epoch": 0.6669334400426837, "eval_loss": 1.4535506963729858, "eval_runtime": 12.4672, "eval_samples_per_second": 554.252, "eval_steps_per_second": 69.302, "step": 15000 }, { "epoch": 0.6670223645013561, "grad_norm": 0.05966784805059433, "learning_rate": 0.00027082571000232525, "loss": 1.4677, "step": 15002 }, { "epoch": 0.6671112889600285, "grad_norm": 0.05990458279848099, "learning_rate": 0.0002706957914051555, "loss": 1.4647, "step": 15004 }, { "epoch": 0.6672002134187008, "grad_norm": 0.05977652221918106, "learning_rate": 0.00027056589240991995, "loss": 1.4705, "step": 15006 }, { "epoch": 0.6672891378773732, "grad_norm": 0.05973276495933533, "learning_rate": 0.00027043601302772325, "loss": 1.4662, "step": 15008 }, { "epoch": 0.6673780623360456, "grad_norm": 0.05999859794974327, "learning_rate": 0.00027030615326966757, "loss": 1.4668, "step": 15010 }, { "epoch": 0.6674669867947179, "grad_norm": 0.05877234786748886, "learning_rate": 0.00027017631314685416, "loss": 1.4632, "step": 15012 }, { "epoch": 0.6675559112533902, "grad_norm": 0.060439884662628174, "learning_rate": 0.0002700464926703825, "loss": 1.4734, "step": 15014 }, { "epoch": 0.6676448357120626, "grad_norm": 0.05888623371720314, "learning_rate": 0.00026991669185135, "loss": 1.4699, "step": 15016 }, { "epoch": 0.6677337601707349, "grad_norm": 0.05988156795501709, "learning_rate": 0.0002697869107008522, "loss": 1.4681, "step": 15018 }, { "epoch": 0.6678226846294073, "grad_norm": 0.05992647632956505, "learning_rate": 0.0002696571492299842, "loss": 1.4652, "step": 15020 }, { "epoch": 0.6679116090880797, "grad_norm": 0.06108422204852104, "learning_rate": 0.00026952740744983823, "loss": 1.4671, "step": 15022 }, { "epoch": 0.668000533546752, "grad_norm": 0.06095761060714722, "learning_rate": 0.0002693976853715049, "loss": 1.471, "step": 15024 }, { "epoch": 0.6680894580054244, "grad_norm": 0.061448004096746445, "learning_rate": 0.00026926798300607363, "loss": 1.4727, "step": 15026 }, { "epoch": 0.6681783824640968, "grad_norm": 0.05938434228301048, "learning_rate": 0.00026913830036463197, "loss": 1.4692, "step": 15028 }, { "epoch": 0.6682673069227691, "grad_norm": 0.06313709169626236, "learning_rate": 0.000269008637458266, "loss": 1.4659, "step": 15030 }, { "epoch": 0.6683562313814415, "grad_norm": 0.06244370713829994, "learning_rate": 0.00026887899429805953, "loss": 1.4704, "step": 15032 }, { "epoch": 0.6684451558401138, "grad_norm": 0.06154157221317291, "learning_rate": 0.000268749370895095, "loss": 1.4732, "step": 15034 }, { "epoch": 0.6685340802987861, "grad_norm": 0.06056886166334152, "learning_rate": 0.0002686197672604536, "loss": 1.4795, "step": 15036 }, { "epoch": 0.6686230047574585, "grad_norm": 0.060194727033376694, "learning_rate": 0.00026849018340521417, "loss": 1.4705, "step": 15038 }, { "epoch": 0.6687119292161309, "grad_norm": 0.05959151312708855, "learning_rate": 0.00026836061934045364, "loss": 1.4729, "step": 15040 }, { "epoch": 0.6688008536748032, "grad_norm": 0.06189076974987984, "learning_rate": 0.0002682310750772486, "loss": 1.471, "step": 15042 }, { "epoch": 0.6688897781334756, "grad_norm": 0.06022195145487785, "learning_rate": 0.00026810155062667243, "loss": 1.4695, "step": 15044 }, { "epoch": 0.668978702592148, "grad_norm": 0.059737030416727066, "learning_rate": 0.00026797204599979783, "loss": 1.4644, "step": 15046 }, { "epoch": 0.6690676270508203, "grad_norm": 0.05960986763238907, "learning_rate": 0.00026784256120769513, "loss": 1.4697, "step": 15048 }, { "epoch": 0.6691565515094927, "grad_norm": 0.060930218547582626, "learning_rate": 0.00026771309626143333, "loss": 1.4694, "step": 15050 }, { "epoch": 0.6692454759681651, "grad_norm": 0.05909661948680878, "learning_rate": 0.0002675836511720799, "loss": 1.4655, "step": 15052 }, { "epoch": 0.6693344004268375, "grad_norm": 0.06040756776928902, "learning_rate": 0.0002674542259507, "loss": 1.4687, "step": 15054 }, { "epoch": 0.6694233248855097, "grad_norm": 0.05938173085451126, "learning_rate": 0.0002673248206083577, "loss": 1.4736, "step": 15056 }, { "epoch": 0.6695122493441821, "grad_norm": 0.05951856076717377, "learning_rate": 0.0002671954351561152, "loss": 1.4709, "step": 15058 }, { "epoch": 0.6696011738028544, "grad_norm": 0.060165997594594955, "learning_rate": 0.0002670660696050328, "loss": 1.4699, "step": 15060 }, { "epoch": 0.6696900982615268, "grad_norm": 0.060211215168237686, "learning_rate": 0.0002669367239661692, "loss": 1.4756, "step": 15062 }, { "epoch": 0.6697790227201992, "grad_norm": 0.06052954122424126, "learning_rate": 0.00026680739825058164, "loss": 1.4744, "step": 15064 }, { "epoch": 0.6698679471788715, "grad_norm": 0.05990380421280861, "learning_rate": 0.0002666780924693252, "loss": 1.4674, "step": 15066 }, { "epoch": 0.6699568716375439, "grad_norm": 0.05932345241308212, "learning_rate": 0.00026654880663345385, "loss": 1.4717, "step": 15068 }, { "epoch": 0.6700457960962163, "grad_norm": 0.06290200352668762, "learning_rate": 0.00026641954075401903, "loss": 1.4679, "step": 15070 }, { "epoch": 0.6701347205548887, "grad_norm": 0.05994856730103493, "learning_rate": 0.00026629029484207125, "loss": 1.4655, "step": 15072 }, { "epoch": 0.670223645013561, "grad_norm": 0.06112265959382057, "learning_rate": 0.0002661610689086592, "loss": 1.4684, "step": 15074 }, { "epoch": 0.6703125694722334, "grad_norm": 0.06035906448960304, "learning_rate": 0.0002660318629648293, "loss": 1.4712, "step": 15076 }, { "epoch": 0.6704014939309056, "grad_norm": 0.060564734041690826, "learning_rate": 0.00026590267702162674, "loss": 1.4769, "step": 15078 }, { "epoch": 0.670490418389578, "grad_norm": 0.06007889285683632, "learning_rate": 0.0002657735110900953, "loss": 1.4714, "step": 15080 }, { "epoch": 0.6705793428482504, "grad_norm": 0.06008253991603851, "learning_rate": 0.000265644365181276, "loss": 1.4692, "step": 15082 }, { "epoch": 0.6706682673069227, "grad_norm": 0.05926406383514404, "learning_rate": 0.00026551523930620926, "loss": 1.471, "step": 15084 }, { "epoch": 0.6707571917655951, "grad_norm": 0.059321608394384384, "learning_rate": 0.0002653861334759333, "loss": 1.4656, "step": 15086 }, { "epoch": 0.6708461162242675, "grad_norm": 0.060134414583444595, "learning_rate": 0.0002652570477014844, "loss": 1.464, "step": 15088 }, { "epoch": 0.6709350406829399, "grad_norm": 0.0598330982029438, "learning_rate": 0.0002651279819938976, "loss": 1.468, "step": 15090 }, { "epoch": 0.6710239651416122, "grad_norm": 0.05979737266898155, "learning_rate": 0.0002649989363642061, "loss": 1.4636, "step": 15092 }, { "epoch": 0.6711128896002846, "grad_norm": 0.061708901077508926, "learning_rate": 0.0002648699108234409, "loss": 1.4689, "step": 15094 }, { "epoch": 0.671201814058957, "grad_norm": 0.059828344732522964, "learning_rate": 0.00026474090538263216, "loss": 1.4688, "step": 15096 }, { "epoch": 0.6712907385176293, "grad_norm": 0.060239676386117935, "learning_rate": 0.00026461192005280715, "loss": 1.4696, "step": 15098 }, { "epoch": 0.6713796629763016, "grad_norm": 0.06201807036995888, "learning_rate": 0.00026448295484499297, "loss": 1.4712, "step": 15100 }, { "epoch": 0.671468587434974, "grad_norm": 0.059881266206502914, "learning_rate": 0.0002643540097702136, "loss": 1.4662, "step": 15102 }, { "epoch": 0.6715575118936463, "grad_norm": 0.06092246621847153, "learning_rate": 0.0002642250848394918, "loss": 1.4675, "step": 15104 }, { "epoch": 0.6716464363523187, "grad_norm": 0.06052365154027939, "learning_rate": 0.00026409618006384883, "loss": 1.47, "step": 15106 }, { "epoch": 0.6717353608109911, "grad_norm": 0.06088147312402725, "learning_rate": 0.00026396729545430413, "loss": 1.4677, "step": 15108 }, { "epoch": 0.6718242852696634, "grad_norm": 0.05907963961362839, "learning_rate": 0.000263838431021875, "loss": 1.4646, "step": 15110 }, { "epoch": 0.6719132097283358, "grad_norm": 0.060525842010974884, "learning_rate": 0.00026370958677757753, "loss": 1.4652, "step": 15112 }, { "epoch": 0.6720021341870082, "grad_norm": 0.06022028997540474, "learning_rate": 0.0002635807627324258, "loss": 1.4651, "step": 15114 }, { "epoch": 0.6720910586456805, "grad_norm": 0.05875980481505394, "learning_rate": 0.0002634519588974326, "loss": 1.4643, "step": 15116 }, { "epoch": 0.6721799831043529, "grad_norm": 0.06109503656625748, "learning_rate": 0.00026332317528360836, "loss": 1.468, "step": 15118 }, { "epoch": 0.6722689075630253, "grad_norm": 0.05978439748287201, "learning_rate": 0.0002631944119019617, "loss": 1.4701, "step": 15120 }, { "epoch": 0.6723578320216975, "grad_norm": 0.059926699846982956, "learning_rate": 0.0002630656687635007, "loss": 1.4698, "step": 15122 }, { "epoch": 0.6724467564803699, "grad_norm": 0.06024140492081642, "learning_rate": 0.00026293694587923043, "loss": 1.4632, "step": 15124 }, { "epoch": 0.6725356809390423, "grad_norm": 0.05961621552705765, "learning_rate": 0.0002628082432601546, "loss": 1.4681, "step": 15126 }, { "epoch": 0.6726246053977146, "grad_norm": 0.05957215651869774, "learning_rate": 0.00026267956091727534, "loss": 1.4639, "step": 15128 }, { "epoch": 0.672713529856387, "grad_norm": 0.06190573051571846, "learning_rate": 0.00026255089886159313, "loss": 1.4712, "step": 15130 }, { "epoch": 0.6728024543150594, "grad_norm": 0.060655590146780014, "learning_rate": 0.0002624222571041066, "loss": 1.4714, "step": 15132 }, { "epoch": 0.6728913787737317, "grad_norm": 0.06115647032856941, "learning_rate": 0.0002622936356558123, "loss": 1.4678, "step": 15134 }, { "epoch": 0.6729803032324041, "grad_norm": 0.06026843562722206, "learning_rate": 0.00026216503452770564, "loss": 1.4616, "step": 15136 }, { "epoch": 0.6730692276910765, "grad_norm": 0.058876439929008484, "learning_rate": 0.0002620364537307801, "loss": 1.4623, "step": 15138 }, { "epoch": 0.6731581521497488, "grad_norm": 0.06088225543498993, "learning_rate": 0.00026190789327602696, "loss": 1.4688, "step": 15140 }, { "epoch": 0.6732470766084211, "grad_norm": 0.059225499629974365, "learning_rate": 0.0002617793531744363, "loss": 1.4674, "step": 15142 }, { "epoch": 0.6733360010670935, "grad_norm": 0.05934342369437218, "learning_rate": 0.0002616508334369966, "loss": 1.4642, "step": 15144 }, { "epoch": 0.6734249255257658, "grad_norm": 0.059114716947078705, "learning_rate": 0.000261522334074694, "loss": 1.4685, "step": 15146 }, { "epoch": 0.6735138499844382, "grad_norm": 0.05941977724432945, "learning_rate": 0.00026139385509851284, "loss": 1.4729, "step": 15148 }, { "epoch": 0.6736027744431106, "grad_norm": 0.06069924682378769, "learning_rate": 0.0002612653965194368, "loss": 1.469, "step": 15150 }, { "epoch": 0.6736916989017829, "grad_norm": 0.06208193302154541, "learning_rate": 0.0002611369583484466, "loss": 1.4714, "step": 15152 }, { "epoch": 0.6737806233604553, "grad_norm": 0.058625586330890656, "learning_rate": 0.000261008540596522, "loss": 1.4672, "step": 15154 }, { "epoch": 0.6738695478191277, "grad_norm": 0.06107405945658684, "learning_rate": 0.00026088014327464037, "loss": 1.4675, "step": 15156 }, { "epoch": 0.6739584722778, "grad_norm": 0.06048043444752693, "learning_rate": 0.00026075176639377785, "loss": 1.4662, "step": 15158 }, { "epoch": 0.6740473967364724, "grad_norm": 0.060831520706415176, "learning_rate": 0.00026062340996490884, "loss": 1.4712, "step": 15160 }, { "epoch": 0.6741363211951448, "grad_norm": 0.06307994574308395, "learning_rate": 0.0002604950739990055, "loss": 1.4725, "step": 15162 }, { "epoch": 0.674225245653817, "grad_norm": 0.06147250533103943, "learning_rate": 0.00026036675850703864, "loss": 1.4684, "step": 15164 }, { "epoch": 0.6743141701124894, "grad_norm": 0.05897139012813568, "learning_rate": 0.0002602384634999775, "loss": 1.4698, "step": 15166 }, { "epoch": 0.6744030945711618, "grad_norm": 0.06173459067940712, "learning_rate": 0.0002601101889887889, "loss": 1.472, "step": 15168 }, { "epoch": 0.6744920190298341, "grad_norm": 0.05954406037926674, "learning_rate": 0.00025998193498443856, "loss": 1.4712, "step": 15170 }, { "epoch": 0.6745809434885065, "grad_norm": 0.06112904101610184, "learning_rate": 0.0002598537014978903, "loss": 1.4697, "step": 15172 }, { "epoch": 0.6746698679471789, "grad_norm": 0.06085880100727081, "learning_rate": 0.00025972548854010576, "loss": 1.468, "step": 15174 }, { "epoch": 0.6747587924058512, "grad_norm": 0.061143457889556885, "learning_rate": 0.0002595972961220455, "loss": 1.4706, "step": 15176 }, { "epoch": 0.6748477168645236, "grad_norm": 0.060030248016119, "learning_rate": 0.0002594691242546675, "loss": 1.4674, "step": 15178 }, { "epoch": 0.674936641323196, "grad_norm": 0.05988433212041855, "learning_rate": 0.0002593409729489289, "loss": 1.4637, "step": 15180 }, { "epoch": 0.6750255657818683, "grad_norm": 0.06074235960841179, "learning_rate": 0.00025921284221578456, "loss": 1.4677, "step": 15182 }, { "epoch": 0.6751144902405407, "grad_norm": 0.06032219156622887, "learning_rate": 0.00025908473206618743, "loss": 1.4667, "step": 15184 }, { "epoch": 0.675203414699213, "grad_norm": 0.059529777616262436, "learning_rate": 0.00025895664251108906, "loss": 1.4663, "step": 15186 }, { "epoch": 0.6752923391578853, "grad_norm": 0.05997844040393829, "learning_rate": 0.00025882857356143935, "loss": 1.4708, "step": 15188 }, { "epoch": 0.6753812636165577, "grad_norm": 0.059802982956171036, "learning_rate": 0.00025870052522818576, "loss": 1.4654, "step": 15190 }, { "epoch": 0.6754701880752301, "grad_norm": 0.05979980528354645, "learning_rate": 0.00025857249752227467, "loss": 1.4636, "step": 15192 }, { "epoch": 0.6755591125339024, "grad_norm": 0.06097504124045372, "learning_rate": 0.0002584444904546506, "loss": 1.4648, "step": 15194 }, { "epoch": 0.6756480369925748, "grad_norm": 0.06036361679434776, "learning_rate": 0.00025831650403625585, "loss": 1.4674, "step": 15196 }, { "epoch": 0.6757369614512472, "grad_norm": 0.06019364297389984, "learning_rate": 0.0002581885382780313, "loss": 1.4697, "step": 15198 }, { "epoch": 0.6758258859099195, "grad_norm": 0.06247372180223465, "learning_rate": 0.0002580605931909164, "loss": 1.4712, "step": 15200 }, { "epoch": 0.6759148103685919, "grad_norm": 0.05914263427257538, "learning_rate": 0.000257932668785848, "loss": 1.4626, "step": 15202 }, { "epoch": 0.6760037348272643, "grad_norm": 0.06002722680568695, "learning_rate": 0.000257804765073762, "loss": 1.4704, "step": 15204 }, { "epoch": 0.6760926592859366, "grad_norm": 0.06022915244102478, "learning_rate": 0.0002576768820655916, "loss": 1.4735, "step": 15206 }, { "epoch": 0.6761815837446089, "grad_norm": 0.05858427286148071, "learning_rate": 0.0002575490197722696, "loss": 1.4651, "step": 15208 }, { "epoch": 0.6762705082032813, "grad_norm": 0.06035829707980156, "learning_rate": 0.00025742117820472585, "loss": 1.4657, "step": 15210 }, { "epoch": 0.6763594326619536, "grad_norm": 0.05977611243724823, "learning_rate": 0.0002572933573738886, "loss": 1.4648, "step": 15212 }, { "epoch": 0.676448357120626, "grad_norm": 0.06203882768750191, "learning_rate": 0.0002571655572906847, "loss": 1.4681, "step": 15214 }, { "epoch": 0.6765372815792984, "grad_norm": 0.0590372197329998, "learning_rate": 0.00025703777796603935, "loss": 1.4712, "step": 15216 }, { "epoch": 0.6766262060379707, "grad_norm": 0.06090830639004707, "learning_rate": 0.00025691001941087515, "loss": 1.4722, "step": 15218 }, { "epoch": 0.6767151304966431, "grad_norm": 0.060008611530065536, "learning_rate": 0.00025678228163611384, "loss": 1.467, "step": 15220 }, { "epoch": 0.6768040549553155, "grad_norm": 0.060110870748758316, "learning_rate": 0.0002566545646526749, "loss": 1.4608, "step": 15222 }, { "epoch": 0.6768929794139878, "grad_norm": 0.05973638966679573, "learning_rate": 0.00025652686847147635, "loss": 1.4685, "step": 15224 }, { "epoch": 0.6769819038726602, "grad_norm": 0.05918769910931587, "learning_rate": 0.000256399193103434, "loss": 1.468, "step": 15226 }, { "epoch": 0.6770708283313326, "grad_norm": 0.060597680509090424, "learning_rate": 0.0002562715385594618, "loss": 1.4648, "step": 15228 }, { "epoch": 0.6771597527900048, "grad_norm": 0.06197728216648102, "learning_rate": 0.00025614390485047294, "loss": 1.4742, "step": 15230 }, { "epoch": 0.6772486772486772, "grad_norm": 0.06051739677786827, "learning_rate": 0.00025601629198737777, "loss": 1.4697, "step": 15232 }, { "epoch": 0.6773376017073496, "grad_norm": 0.06158318743109703, "learning_rate": 0.0002558886999810849, "loss": 1.4675, "step": 15234 }, { "epoch": 0.677426526166022, "grad_norm": 0.05960030108690262, "learning_rate": 0.00025576112884250174, "loss": 1.4682, "step": 15236 }, { "epoch": 0.6775154506246943, "grad_norm": 0.059879280626773834, "learning_rate": 0.0002556335785825336, "loss": 1.4612, "step": 15238 }, { "epoch": 0.6776043750833667, "grad_norm": 0.06018189340829849, "learning_rate": 0.0002555060492120843, "loss": 1.4737, "step": 15240 }, { "epoch": 0.677693299542039, "grad_norm": 0.058622147887945175, "learning_rate": 0.00025537854074205514, "loss": 1.4675, "step": 15242 }, { "epoch": 0.6777822240007114, "grad_norm": 0.06036924943327904, "learning_rate": 0.0002552510531833463, "loss": 1.4663, "step": 15244 }, { "epoch": 0.6778711484593838, "grad_norm": 0.060898203402757645, "learning_rate": 0.0002551235865468562, "loss": 1.4729, "step": 15246 }, { "epoch": 0.6779600729180562, "grad_norm": 0.0606052428483963, "learning_rate": 0.0002549961408434809, "loss": 1.4669, "step": 15248 }, { "epoch": 0.6780489973767285, "grad_norm": 0.061549894511699677, "learning_rate": 0.0002548687160841152, "loss": 1.4674, "step": 15250 }, { "epoch": 0.6781379218354008, "grad_norm": 0.061437010765075684, "learning_rate": 0.000254741312279652, "loss": 1.4741, "step": 15252 }, { "epoch": 0.6782268462940731, "grad_norm": 0.06180231273174286, "learning_rate": 0.0002546139294409822, "loss": 1.4658, "step": 15254 }, { "epoch": 0.6783157707527455, "grad_norm": 0.06152801215648651, "learning_rate": 0.000254486567578995, "loss": 1.4682, "step": 15256 }, { "epoch": 0.6784046952114179, "grad_norm": 0.061020851135253906, "learning_rate": 0.00025435922670457813, "loss": 1.4651, "step": 15258 }, { "epoch": 0.6784936196700903, "grad_norm": 0.060287658125162125, "learning_rate": 0.00025423190682861686, "loss": 1.4621, "step": 15260 }, { "epoch": 0.6785825441287626, "grad_norm": 0.060152024030685425, "learning_rate": 0.0002541046079619955, "loss": 1.4688, "step": 15262 }, { "epoch": 0.678671468587435, "grad_norm": 0.06008097529411316, "learning_rate": 0.0002539773301155956, "loss": 1.4642, "step": 15264 }, { "epoch": 0.6787603930461074, "grad_norm": 0.060999296605587006, "learning_rate": 0.00025385007330029787, "loss": 1.4673, "step": 15266 }, { "epoch": 0.6788493175047797, "grad_norm": 0.06069813296198845, "learning_rate": 0.00025372283752698066, "loss": 1.4636, "step": 15268 }, { "epoch": 0.6789382419634521, "grad_norm": 0.060081273317337036, "learning_rate": 0.0002535956228065205, "loss": 1.4647, "step": 15270 }, { "epoch": 0.6790271664221244, "grad_norm": 0.061477821320295334, "learning_rate": 0.00025346842914979233, "loss": 1.4675, "step": 15272 }, { "epoch": 0.6791160908807967, "grad_norm": 0.058304693549871445, "learning_rate": 0.0002533412565676695, "loss": 1.4641, "step": 15274 }, { "epoch": 0.6792050153394691, "grad_norm": 0.0618242472410202, "learning_rate": 0.0002532141050710228, "loss": 1.4667, "step": 15276 }, { "epoch": 0.6792939397981415, "grad_norm": 0.060133617371320724, "learning_rate": 0.0002530869746707219, "loss": 1.464, "step": 15278 }, { "epoch": 0.6793828642568138, "grad_norm": 0.060968294739723206, "learning_rate": 0.00025295986537763487, "loss": 1.4707, "step": 15280 }, { "epoch": 0.6794717887154862, "grad_norm": 0.06188968941569328, "learning_rate": 0.000252832777202627, "loss": 1.4677, "step": 15282 }, { "epoch": 0.6795607131741586, "grad_norm": 0.06084349378943443, "learning_rate": 0.0002527057101565626, "loss": 1.4682, "step": 15284 }, { "epoch": 0.6796496376328309, "grad_norm": 0.06000750884413719, "learning_rate": 0.00025257866425030384, "loss": 1.4678, "step": 15286 }, { "epoch": 0.6797385620915033, "grad_norm": 0.05905289202928543, "learning_rate": 0.00025245163949471115, "loss": 1.4669, "step": 15288 }, { "epoch": 0.6798274865501757, "grad_norm": 0.060218751430511475, "learning_rate": 0.00025232463590064345, "loss": 1.4648, "step": 15290 }, { "epoch": 0.679916411008848, "grad_norm": 0.058075301349163055, "learning_rate": 0.0002521976534789571, "loss": 1.4685, "step": 15292 }, { "epoch": 0.6800053354675203, "grad_norm": 0.06031598150730133, "learning_rate": 0.0002520706922405074, "loss": 1.4672, "step": 15294 }, { "epoch": 0.6800942599261927, "grad_norm": 0.059682924300432205, "learning_rate": 0.00025194375219614773, "loss": 1.4666, "step": 15296 }, { "epoch": 0.680183184384865, "grad_norm": 0.05814158171415329, "learning_rate": 0.00025181683335672903, "loss": 1.4643, "step": 15298 }, { "epoch": 0.6802721088435374, "grad_norm": 0.06083943694829941, "learning_rate": 0.0002516899357331012, "loss": 1.4697, "step": 15300 }, { "epoch": 0.6803610333022098, "grad_norm": 0.06108289211988449, "learning_rate": 0.00025156305933611204, "loss": 1.4643, "step": 15302 }, { "epoch": 0.6804499577608821, "grad_norm": 0.05812264606356621, "learning_rate": 0.00025143620417660716, "loss": 1.4682, "step": 15304 }, { "epoch": 0.6805388822195545, "grad_norm": 0.06007948890328407, "learning_rate": 0.00025130937026543107, "loss": 1.4659, "step": 15306 }, { "epoch": 0.6806278066782269, "grad_norm": 0.062025126069784164, "learning_rate": 0.0002511825576134259, "loss": 1.4717, "step": 15308 }, { "epoch": 0.6807167311368992, "grad_norm": 0.05901065096259117, "learning_rate": 0.00025105576623143234, "loss": 1.4637, "step": 15310 }, { "epoch": 0.6808056555955716, "grad_norm": 0.06109865382313728, "learning_rate": 0.00025092899613028897, "loss": 1.4713, "step": 15312 }, { "epoch": 0.680894580054244, "grad_norm": 0.06021618843078613, "learning_rate": 0.0002508022473208323, "loss": 1.4658, "step": 15314 }, { "epoch": 0.6809835045129162, "grad_norm": 0.0613398440182209, "learning_rate": 0.00025067551981389804, "loss": 1.4682, "step": 15316 }, { "epoch": 0.6810724289715886, "grad_norm": 0.0613284669816494, "learning_rate": 0.00025054881362031914, "loss": 1.4665, "step": 15318 }, { "epoch": 0.681161353430261, "grad_norm": 0.060059648007154465, "learning_rate": 0.0002504221287509267, "loss": 1.4659, "step": 15320 }, { "epoch": 0.6812502778889333, "grad_norm": 0.05834935978055, "learning_rate": 0.00025029546521655055, "loss": 1.4698, "step": 15322 }, { "epoch": 0.6813392023476057, "grad_norm": 0.059527888894081116, "learning_rate": 0.00025016882302801845, "loss": 1.4657, "step": 15324 }, { "epoch": 0.6814281268062781, "grad_norm": 0.06185444816946983, "learning_rate": 0.00025004220219615654, "loss": 1.4731, "step": 15326 }, { "epoch": 0.6815170512649504, "grad_norm": 0.058973994106054306, "learning_rate": 0.0002499156027317885, "loss": 1.4675, "step": 15328 }, { "epoch": 0.6816059757236228, "grad_norm": 0.060899462550878525, "learning_rate": 0.00024978902464573683, "loss": 1.4688, "step": 15330 }, { "epoch": 0.6816949001822952, "grad_norm": 0.06071661040186882, "learning_rate": 0.0002496624679488222, "loss": 1.4631, "step": 15332 }, { "epoch": 0.6817838246409675, "grad_norm": 0.059728339314460754, "learning_rate": 0.00024953593265186297, "loss": 1.4672, "step": 15334 }, { "epoch": 0.6818727490996399, "grad_norm": 0.058348335325717926, "learning_rate": 0.00024940941876567564, "loss": 1.4617, "step": 15336 }, { "epoch": 0.6819616735583122, "grad_norm": 0.05913185328245163, "learning_rate": 0.00024928292630107586, "loss": 1.4696, "step": 15338 }, { "epoch": 0.6820505980169845, "grad_norm": 0.062302857637405396, "learning_rate": 0.00024915645526887636, "loss": 1.47, "step": 15340 }, { "epoch": 0.6821395224756569, "grad_norm": 0.062192488461732864, "learning_rate": 0.0002490300056798884, "loss": 1.4651, "step": 15342 }, { "epoch": 0.6822284469343293, "grad_norm": 0.061173710972070694, "learning_rate": 0.00024890357754492147, "loss": 1.4652, "step": 15344 }, { "epoch": 0.6823173713930016, "grad_norm": 0.06143614277243614, "learning_rate": 0.00024877717087478334, "loss": 1.4693, "step": 15346 }, { "epoch": 0.682406295851674, "grad_norm": 0.05982348322868347, "learning_rate": 0.00024865078568027995, "loss": 1.4709, "step": 15348 }, { "epoch": 0.6824952203103464, "grad_norm": 0.05981658026576042, "learning_rate": 0.0002485244219722148, "loss": 1.467, "step": 15350 }, { "epoch": 0.6825841447690187, "grad_norm": 0.05860120430588722, "learning_rate": 0.0002483980797613903, "loss": 1.4644, "step": 15352 }, { "epoch": 0.6826730692276911, "grad_norm": 0.05877653881907463, "learning_rate": 0.00024827175905860687, "loss": 1.4707, "step": 15354 }, { "epoch": 0.6827619936863635, "grad_norm": 0.061559949070215225, "learning_rate": 0.00024814545987466284, "loss": 1.4666, "step": 15356 }, { "epoch": 0.6828509181450358, "grad_norm": 0.05921272560954094, "learning_rate": 0.00024801918222035446, "loss": 1.4688, "step": 15358 }, { "epoch": 0.6829398426037081, "grad_norm": 0.061051640659570694, "learning_rate": 0.0002478929261064772, "loss": 1.4603, "step": 15360 }, { "epoch": 0.6830287670623805, "grad_norm": 0.059043582528829575, "learning_rate": 0.00024776669154382336, "loss": 1.463, "step": 15362 }, { "epoch": 0.6831176915210528, "grad_norm": 0.059850361198186874, "learning_rate": 0.0002476404785431845, "loss": 1.4664, "step": 15364 }, { "epoch": 0.6832066159797252, "grad_norm": 0.06030626222491264, "learning_rate": 0.00024751428711534946, "loss": 1.4634, "step": 15366 }, { "epoch": 0.6832955404383976, "grad_norm": 0.058735646307468414, "learning_rate": 0.00024738811727110583, "loss": 1.4638, "step": 15368 }, { "epoch": 0.6833844648970699, "grad_norm": 0.05930692330002785, "learning_rate": 0.0002472619690212393, "loss": 1.4693, "step": 15370 }, { "epoch": 0.6834733893557423, "grad_norm": 0.0614553838968277, "learning_rate": 0.0002471358423765333, "loss": 1.4653, "step": 15372 }, { "epoch": 0.6835623138144147, "grad_norm": 0.06112890690565109, "learning_rate": 0.00024700973734776983, "loss": 1.4676, "step": 15374 }, { "epoch": 0.683651238273087, "grad_norm": 0.059310752898454666, "learning_rate": 0.0002468836539457291, "loss": 1.4685, "step": 15376 }, { "epoch": 0.6837401627317594, "grad_norm": 0.06061893329024315, "learning_rate": 0.00024675759218118886, "loss": 1.468, "step": 15378 }, { "epoch": 0.6838290871904318, "grad_norm": 0.060217492282390594, "learning_rate": 0.00024663155206492565, "loss": 1.4595, "step": 15380 }, { "epoch": 0.683918011649104, "grad_norm": 0.059973131865262985, "learning_rate": 0.00024650553360771417, "loss": 1.4674, "step": 15382 }, { "epoch": 0.6840069361077764, "grad_norm": 0.059515953063964844, "learning_rate": 0.00024637953682032655, "loss": 1.4681, "step": 15384 }, { "epoch": 0.6840958605664488, "grad_norm": 0.05842442810535431, "learning_rate": 0.00024625356171353375, "loss": 1.4665, "step": 15386 }, { "epoch": 0.6841847850251211, "grad_norm": 0.06066687032580376, "learning_rate": 0.000246127608298105, "loss": 1.4712, "step": 15388 }, { "epoch": 0.6842737094837935, "grad_norm": 0.061743155121803284, "learning_rate": 0.0002460016765848068, "loss": 1.4652, "step": 15390 }, { "epoch": 0.6843626339424659, "grad_norm": 0.0611640103161335, "learning_rate": 0.00024587576658440484, "loss": 1.4622, "step": 15392 }, { "epoch": 0.6844515584011382, "grad_norm": 0.059736188501119614, "learning_rate": 0.0002457498783076621, "loss": 1.4646, "step": 15394 }, { "epoch": 0.6845404828598106, "grad_norm": 0.060952868312597275, "learning_rate": 0.00024562401176534017, "loss": 1.4665, "step": 15396 }, { "epoch": 0.684629407318483, "grad_norm": 0.060799382627010345, "learning_rate": 0.0002454981669681989, "loss": 1.4682, "step": 15398 }, { "epoch": 0.6847183317771554, "grad_norm": 0.05983178690075874, "learning_rate": 0.00024537234392699566, "loss": 1.4653, "step": 15400 }, { "epoch": 0.6848072562358276, "grad_norm": 0.05988869071006775, "learning_rate": 0.0002452465426524866, "loss": 1.4679, "step": 15402 }, { "epoch": 0.6848961806945, "grad_norm": 0.059868037700653076, "learning_rate": 0.00024512076315542594, "loss": 1.4633, "step": 15404 }, { "epoch": 0.6849851051531723, "grad_norm": 0.0584200955927372, "learning_rate": 0.00024499500544656537, "loss": 1.4682, "step": 15406 }, { "epoch": 0.6850740296118447, "grad_norm": 0.059604521840810776, "learning_rate": 0.0002448692695366556, "loss": 1.4674, "step": 15408 }, { "epoch": 0.6851629540705171, "grad_norm": 0.05988859757781029, "learning_rate": 0.00024474355543644517, "loss": 1.4599, "step": 15410 }, { "epoch": 0.6852518785291895, "grad_norm": 0.06065924093127251, "learning_rate": 0.00024461786315668033, "loss": 1.4673, "step": 15412 }, { "epoch": 0.6853408029878618, "grad_norm": 0.060476917773485184, "learning_rate": 0.0002444921927081061, "loss": 1.4695, "step": 15414 }, { "epoch": 0.6854297274465342, "grad_norm": 0.060220107436180115, "learning_rate": 0.00024436654410146493, "loss": 1.4639, "step": 15416 }, { "epoch": 0.6855186519052066, "grad_norm": 0.059693094342947006, "learning_rate": 0.0002442409173474984, "loss": 1.4677, "step": 15418 }, { "epoch": 0.6856075763638789, "grad_norm": 0.06192352995276451, "learning_rate": 0.00024411531245694545, "loss": 1.4658, "step": 15420 }, { "epoch": 0.6856965008225513, "grad_norm": 0.05954059958457947, "learning_rate": 0.00024398972944054297, "loss": 1.4665, "step": 15422 }, { "epoch": 0.6857854252812235, "grad_norm": 0.05900617316365242, "learning_rate": 0.00024386416830902668, "loss": 1.4659, "step": 15424 }, { "epoch": 0.6858743497398959, "grad_norm": 0.060082681477069855, "learning_rate": 0.00024373862907313017, "loss": 1.466, "step": 15426 }, { "epoch": 0.6859632741985683, "grad_norm": 0.06016187369823456, "learning_rate": 0.0002436131117435848, "loss": 1.4625, "step": 15428 }, { "epoch": 0.6860521986572407, "grad_norm": 0.0597849078476429, "learning_rate": 0.0002434876163311205, "loss": 1.4654, "step": 15430 }, { "epoch": 0.686141123115913, "grad_norm": 0.060573283582925797, "learning_rate": 0.00024336214284646513, "loss": 1.4613, "step": 15432 }, { "epoch": 0.6862300475745854, "grad_norm": 0.05994643270969391, "learning_rate": 0.00024323669130034498, "loss": 1.4657, "step": 15434 }, { "epoch": 0.6863189720332578, "grad_norm": 0.060550957918167114, "learning_rate": 0.0002431112617034838, "loss": 1.4726, "step": 15436 }, { "epoch": 0.6864078964919301, "grad_norm": 0.06180041283369064, "learning_rate": 0.000242985854066604, "loss": 1.4693, "step": 15438 }, { "epoch": 0.6864968209506025, "grad_norm": 0.05975709855556488, "learning_rate": 0.00024286046840042618, "loss": 1.4725, "step": 15440 }, { "epoch": 0.6865857454092749, "grad_norm": 0.06140446290373802, "learning_rate": 0.00024273510471566878, "loss": 1.4673, "step": 15442 }, { "epoch": 0.6866746698679472, "grad_norm": 0.06379693001508713, "learning_rate": 0.00024260976302304792, "loss": 1.4751, "step": 15444 }, { "epoch": 0.6867635943266195, "grad_norm": 0.05907401442527771, "learning_rate": 0.00024248444333327924, "loss": 1.4691, "step": 15446 }, { "epoch": 0.6868525187852919, "grad_norm": 0.05944070219993591, "learning_rate": 0.00024235914565707496, "loss": 1.4702, "step": 15448 }, { "epoch": 0.6869414432439642, "grad_norm": 0.061419710516929626, "learning_rate": 0.00024223387000514647, "loss": 1.4651, "step": 15450 }, { "epoch": 0.6870303677026366, "grad_norm": 0.05813660845160484, "learning_rate": 0.00024210861638820252, "loss": 1.4564, "step": 15452 }, { "epoch": 0.687119292161309, "grad_norm": 0.05996819585561752, "learning_rate": 0.00024198338481695053, "loss": 1.4653, "step": 15454 }, { "epoch": 0.6872082166199813, "grad_norm": 0.060112178325653076, "learning_rate": 0.00024185817530209604, "loss": 1.4666, "step": 15456 }, { "epoch": 0.6872971410786537, "grad_norm": 0.05905492231249809, "learning_rate": 0.00024173298785434217, "loss": 1.4681, "step": 15458 }, { "epoch": 0.6873860655373261, "grad_norm": 0.06022144481539726, "learning_rate": 0.00024160782248439056, "loss": 1.465, "step": 15460 }, { "epoch": 0.6874749899959984, "grad_norm": 0.05972209945321083, "learning_rate": 0.00024148267920294126, "loss": 1.4695, "step": 15462 }, { "epoch": 0.6875639144546708, "grad_norm": 0.060659270733594894, "learning_rate": 0.00024135755802069176, "loss": 1.4673, "step": 15464 }, { "epoch": 0.6876528389133432, "grad_norm": 0.060310158878564835, "learning_rate": 0.00024123245894833756, "loss": 1.4646, "step": 15466 }, { "epoch": 0.6877417633720154, "grad_norm": 0.06041952967643738, "learning_rate": 0.00024110738199657357, "loss": 1.4709, "step": 15468 }, { "epoch": 0.6878306878306878, "grad_norm": 0.06220192089676857, "learning_rate": 0.00024098232717609125, "loss": 1.4675, "step": 15470 }, { "epoch": 0.6879196122893602, "grad_norm": 0.060216281563043594, "learning_rate": 0.00024085729449758127, "loss": 1.4641, "step": 15472 }, { "epoch": 0.6880085367480325, "grad_norm": 0.06173881143331528, "learning_rate": 0.00024073228397173159, "loss": 1.4647, "step": 15474 }, { "epoch": 0.6880974612067049, "grad_norm": 0.06371451169252396, "learning_rate": 0.0002406072956092288, "loss": 1.4726, "step": 15476 }, { "epoch": 0.6881863856653773, "grad_norm": 0.060280535370111465, "learning_rate": 0.00024048232942075766, "loss": 1.4632, "step": 15478 }, { "epoch": 0.6882753101240496, "grad_norm": 0.06460434943437576, "learning_rate": 0.00024035738541700047, "loss": 1.4655, "step": 15480 }, { "epoch": 0.688364234582722, "grad_norm": 0.060472600162029266, "learning_rate": 0.00024023246360863821, "loss": 1.4621, "step": 15482 }, { "epoch": 0.6884531590413944, "grad_norm": 0.060170937329530716, "learning_rate": 0.00024010756400634993, "loss": 1.4619, "step": 15484 }, { "epoch": 0.6885420835000667, "grad_norm": 0.06201303005218506, "learning_rate": 0.00023998268662081212, "loss": 1.467, "step": 15486 }, { "epoch": 0.6886310079587391, "grad_norm": 0.06021099537611008, "learning_rate": 0.00023985783146270018, "loss": 1.465, "step": 15488 }, { "epoch": 0.6887199324174114, "grad_norm": 0.05890370160341263, "learning_rate": 0.00023973299854268743, "loss": 1.4657, "step": 15490 }, { "epoch": 0.6888088568760837, "grad_norm": 0.06088082119822502, "learning_rate": 0.00023960818787144468, "loss": 1.4669, "step": 15492 }, { "epoch": 0.6888977813347561, "grad_norm": 0.06058606877923012, "learning_rate": 0.00023948339945964155, "loss": 1.4647, "step": 15494 }, { "epoch": 0.6889867057934285, "grad_norm": 0.06058656796813011, "learning_rate": 0.00023935863331794567, "loss": 1.4713, "step": 15496 }, { "epoch": 0.6890756302521008, "grad_norm": 0.06112731248140335, "learning_rate": 0.0002392338894570223, "loss": 1.4691, "step": 15498 }, { "epoch": 0.6891645547107732, "grad_norm": 0.0599660649895668, "learning_rate": 0.00023910916788753533, "loss": 1.4686, "step": 15500 }, { "epoch": 0.6891645547107732, "eval_loss": 1.4509516954421997, "eval_runtime": 12.4424, "eval_samples_per_second": 555.36, "eval_steps_per_second": 69.44, "step": 15500 }, { "epoch": 0.6892534791694456, "grad_norm": 0.059089768677949905, "learning_rate": 0.00023898446862014605, "loss": 1.4649, "step": 15502 }, { "epoch": 0.6893424036281179, "grad_norm": 0.05946287512779236, "learning_rate": 0.00023885979166551503, "loss": 1.4652, "step": 15504 }, { "epoch": 0.6894313280867903, "grad_norm": 0.059581682085990906, "learning_rate": 0.00023873513703429983, "loss": 1.4743, "step": 15506 }, { "epoch": 0.6895202525454627, "grad_norm": 0.0592639334499836, "learning_rate": 0.00023861050473715628, "loss": 1.4693, "step": 15508 }, { "epoch": 0.6896091770041349, "grad_norm": 0.06175572797656059, "learning_rate": 0.0002384858947847387, "loss": 1.4681, "step": 15510 }, { "epoch": 0.6896981014628073, "grad_norm": 0.05914521589875221, "learning_rate": 0.00023836130718769943, "loss": 1.4664, "step": 15512 }, { "epoch": 0.6897870259214797, "grad_norm": 0.060408368706703186, "learning_rate": 0.00023823674195668849, "loss": 1.4636, "step": 15514 }, { "epoch": 0.689875950380152, "grad_norm": 0.05882785841822624, "learning_rate": 0.00023811219910235438, "loss": 1.4699, "step": 15516 }, { "epoch": 0.6899648748388244, "grad_norm": 0.061456430703401566, "learning_rate": 0.00023798767863534377, "loss": 1.4662, "step": 15518 }, { "epoch": 0.6900537992974968, "grad_norm": 0.059389494359493256, "learning_rate": 0.00023786318056630092, "loss": 1.4615, "step": 15520 }, { "epoch": 0.6901427237561691, "grad_norm": 0.06065116822719574, "learning_rate": 0.00023773870490586868, "loss": 1.4686, "step": 15522 }, { "epoch": 0.6902316482148415, "grad_norm": 0.06044527515769005, "learning_rate": 0.00023761425166468736, "loss": 1.4611, "step": 15524 }, { "epoch": 0.6903205726735139, "grad_norm": 0.060601379722356796, "learning_rate": 0.00023748982085339647, "loss": 1.4673, "step": 15526 }, { "epoch": 0.6904094971321862, "grad_norm": 0.05913134291768074, "learning_rate": 0.0002373654124826326, "loss": 1.4657, "step": 15528 }, { "epoch": 0.6904984215908586, "grad_norm": 0.06059577316045761, "learning_rate": 0.00023724102656303044, "loss": 1.4623, "step": 15530 }, { "epoch": 0.6905873460495309, "grad_norm": 0.05980486422777176, "learning_rate": 0.00023711666310522324, "loss": 1.4603, "step": 15532 }, { "epoch": 0.6906762705082032, "grad_norm": 0.05991894379258156, "learning_rate": 0.0002369923221198424, "loss": 1.467, "step": 15534 }, { "epoch": 0.6907651949668756, "grad_norm": 0.06080161780118942, "learning_rate": 0.0002368680036175167, "loss": 1.4608, "step": 15536 }, { "epoch": 0.690854119425548, "grad_norm": 0.06057791784405708, "learning_rate": 0.0002367437076088736, "loss": 1.4728, "step": 15538 }, { "epoch": 0.6909430438842203, "grad_norm": 0.05937578156590462, "learning_rate": 0.00023661943410453856, "loss": 1.4685, "step": 15540 }, { "epoch": 0.6910319683428927, "grad_norm": 0.06065383553504944, "learning_rate": 0.00023649518311513512, "loss": 1.4729, "step": 15542 }, { "epoch": 0.6911208928015651, "grad_norm": 0.062062423676252365, "learning_rate": 0.0002363709546512845, "loss": 1.4643, "step": 15544 }, { "epoch": 0.6912098172602374, "grad_norm": 0.06103227660059929, "learning_rate": 0.0002362467487236064, "loss": 1.4647, "step": 15546 }, { "epoch": 0.6912987417189098, "grad_norm": 0.05918658524751663, "learning_rate": 0.00023612256534271875, "loss": 1.4664, "step": 15548 }, { "epoch": 0.6913876661775822, "grad_norm": 0.05857004597783089, "learning_rate": 0.00023599840451923716, "loss": 1.4638, "step": 15550 }, { "epoch": 0.6914765906362546, "grad_norm": 0.06153101101517677, "learning_rate": 0.0002358742662637749, "loss": 1.4681, "step": 15552 }, { "epoch": 0.6915655150949268, "grad_norm": 0.06063772737979889, "learning_rate": 0.0002357501505869447, "loss": 1.4705, "step": 15554 }, { "epoch": 0.6916544395535992, "grad_norm": 0.059646911919116974, "learning_rate": 0.000235626057499356, "loss": 1.4624, "step": 15556 }, { "epoch": 0.6917433640122715, "grad_norm": 0.060654833912849426, "learning_rate": 0.00023550198701161718, "loss": 1.4647, "step": 15558 }, { "epoch": 0.6918322884709439, "grad_norm": 0.06024772301316261, "learning_rate": 0.00023537793913433385, "loss": 1.463, "step": 15560 }, { "epoch": 0.6919212129296163, "grad_norm": 0.060860976576805115, "learning_rate": 0.00023525391387811047, "loss": 1.4673, "step": 15562 }, { "epoch": 0.6920101373882886, "grad_norm": 0.058908648788928986, "learning_rate": 0.00023512991125354944, "loss": 1.4642, "step": 15564 }, { "epoch": 0.692099061846961, "grad_norm": 0.05974110960960388, "learning_rate": 0.0002350059312712506, "loss": 1.4639, "step": 15566 }, { "epoch": 0.6921879863056334, "grad_norm": 0.059412941336631775, "learning_rate": 0.0002348819739418126, "loss": 1.4672, "step": 15568 }, { "epoch": 0.6922769107643058, "grad_norm": 0.06173228472471237, "learning_rate": 0.00023475803927583194, "loss": 1.4662, "step": 15570 }, { "epoch": 0.6923658352229781, "grad_norm": 0.0602116584777832, "learning_rate": 0.00023463412728390282, "loss": 1.4638, "step": 15572 }, { "epoch": 0.6924547596816505, "grad_norm": 0.061301339417696, "learning_rate": 0.0002345102379766179, "loss": 1.4684, "step": 15574 }, { "epoch": 0.6925436841403227, "grad_norm": 0.06103100627660751, "learning_rate": 0.00023438637136456803, "loss": 1.4651, "step": 15576 }, { "epoch": 0.6926326085989951, "grad_norm": 0.06038039177656174, "learning_rate": 0.00023426252745834144, "loss": 1.4656, "step": 15578 }, { "epoch": 0.6927215330576675, "grad_norm": 0.05906345322728157, "learning_rate": 0.0002341387062685253, "loss": 1.4673, "step": 15580 }, { "epoch": 0.6928104575163399, "grad_norm": 0.06025807186961174, "learning_rate": 0.0002340149078057039, "loss": 1.461, "step": 15582 }, { "epoch": 0.6928993819750122, "grad_norm": 0.06042305752635002, "learning_rate": 0.00023389113208046038, "loss": 1.4679, "step": 15584 }, { "epoch": 0.6929883064336846, "grad_norm": 0.05965936928987503, "learning_rate": 0.00023376737910337582, "loss": 1.4676, "step": 15586 }, { "epoch": 0.693077230892357, "grad_norm": 0.06160576641559601, "learning_rate": 0.0002336436488850287, "loss": 1.4613, "step": 15588 }, { "epoch": 0.6931661553510293, "grad_norm": 0.06115745007991791, "learning_rate": 0.0002335199414359963, "loss": 1.4667, "step": 15590 }, { "epoch": 0.6932550798097017, "grad_norm": 0.06310083717107773, "learning_rate": 0.00023339625676685376, "loss": 1.4616, "step": 15592 }, { "epoch": 0.6933440042683741, "grad_norm": 0.06115524098277092, "learning_rate": 0.00023327259488817398, "loss": 1.4633, "step": 15594 }, { "epoch": 0.6934329287270464, "grad_norm": 0.058892738074064255, "learning_rate": 0.0002331489558105282, "loss": 1.4674, "step": 15596 }, { "epoch": 0.6935218531857187, "grad_norm": 0.058157991617918015, "learning_rate": 0.00023302533954448586, "loss": 1.4577, "step": 15598 }, { "epoch": 0.693610777644391, "grad_norm": 0.06278180330991745, "learning_rate": 0.00023290174610061376, "loss": 1.4603, "step": 15600 }, { "epoch": 0.6936997021030634, "grad_norm": 0.06078719347715378, "learning_rate": 0.0002327781754894775, "loss": 1.4642, "step": 15602 }, { "epoch": 0.6937886265617358, "grad_norm": 0.06112992390990257, "learning_rate": 0.0002326546277216406, "loss": 1.4734, "step": 15604 }, { "epoch": 0.6938775510204082, "grad_norm": 0.0634625107049942, "learning_rate": 0.0002325311028076641, "loss": 1.4633, "step": 15606 }, { "epoch": 0.6939664754790805, "grad_norm": 0.06064048036932945, "learning_rate": 0.0002324076007581078, "loss": 1.4626, "step": 15608 }, { "epoch": 0.6940553999377529, "grad_norm": 0.059506576508283615, "learning_rate": 0.00023228412158352862, "loss": 1.4643, "step": 15610 }, { "epoch": 0.6941443243964253, "grad_norm": 0.06171632558107376, "learning_rate": 0.00023216066529448294, "loss": 1.4637, "step": 15612 }, { "epoch": 0.6942332488550976, "grad_norm": 0.06034686788916588, "learning_rate": 0.00023203723190152393, "loss": 1.4683, "step": 15614 }, { "epoch": 0.69432217331377, "grad_norm": 0.061040595173835754, "learning_rate": 0.00023191382141520296, "loss": 1.4659, "step": 15616 }, { "epoch": 0.6944110977724424, "grad_norm": 0.061496566981077194, "learning_rate": 0.00023179043384607002, "loss": 1.4674, "step": 15618 }, { "epoch": 0.6945000222311146, "grad_norm": 0.058754466474056244, "learning_rate": 0.0002316670692046729, "loss": 1.4616, "step": 15620 }, { "epoch": 0.694588946689787, "grad_norm": 0.06135370582342148, "learning_rate": 0.000231543727501557, "loss": 1.4669, "step": 15622 }, { "epoch": 0.6946778711484594, "grad_norm": 0.058612678200006485, "learning_rate": 0.0002314204087472664, "loss": 1.4647, "step": 15624 }, { "epoch": 0.6947667956071317, "grad_norm": 0.06040282920002937, "learning_rate": 0.00023129711295234278, "loss": 1.4625, "step": 15626 }, { "epoch": 0.6948557200658041, "grad_norm": 0.06024818494915962, "learning_rate": 0.00023117384012732629, "loss": 1.4604, "step": 15628 }, { "epoch": 0.6949446445244765, "grad_norm": 0.060510408133268356, "learning_rate": 0.00023105059028275465, "loss": 1.4619, "step": 15630 }, { "epoch": 0.6950335689831488, "grad_norm": 0.06075219810009003, "learning_rate": 0.00023092736342916338, "loss": 1.4687, "step": 15632 }, { "epoch": 0.6951224934418212, "grad_norm": 0.0619949996471405, "learning_rate": 0.00023080415957708723, "loss": 1.4668, "step": 15634 }, { "epoch": 0.6952114179004936, "grad_norm": 0.0615934357047081, "learning_rate": 0.0002306809787370578, "loss": 1.468, "step": 15636 }, { "epoch": 0.6953003423591659, "grad_norm": 0.0594145767390728, "learning_rate": 0.00023055782091960497, "loss": 1.465, "step": 15638 }, { "epoch": 0.6953892668178382, "grad_norm": 0.05926194414496422, "learning_rate": 0.00023043468613525702, "loss": 1.4656, "step": 15640 }, { "epoch": 0.6954781912765106, "grad_norm": 0.06039900332689285, "learning_rate": 0.00023031157439454004, "loss": 1.471, "step": 15642 }, { "epoch": 0.6955671157351829, "grad_norm": 0.05915983021259308, "learning_rate": 0.00023018848570797835, "loss": 1.4628, "step": 15644 }, { "epoch": 0.6956560401938553, "grad_norm": 0.061098966747522354, "learning_rate": 0.0002300654200860937, "loss": 1.4688, "step": 15646 }, { "epoch": 0.6957449646525277, "grad_norm": 0.06001387909054756, "learning_rate": 0.00022994237753940655, "loss": 1.462, "step": 15648 }, { "epoch": 0.6958338891112, "grad_norm": 0.06392589956521988, "learning_rate": 0.00022981935807843523, "loss": 1.4688, "step": 15650 }, { "epoch": 0.6959228135698724, "grad_norm": 0.05922695994377136, "learning_rate": 0.00022969636171369585, "loss": 1.4647, "step": 15652 }, { "epoch": 0.6960117380285448, "grad_norm": 0.06134214252233505, "learning_rate": 0.00022957338845570232, "loss": 1.4697, "step": 15654 }, { "epoch": 0.6961006624872171, "grad_norm": 0.058561984449625015, "learning_rate": 0.0002294504383149676, "loss": 1.4655, "step": 15656 }, { "epoch": 0.6961895869458895, "grad_norm": 0.05878908187150955, "learning_rate": 0.0002293275113020017, "loss": 1.4722, "step": 15658 }, { "epoch": 0.6962785114045619, "grad_norm": 0.061014749109745026, "learning_rate": 0.00022920460742731268, "loss": 1.4645, "step": 15660 }, { "epoch": 0.6963674358632341, "grad_norm": 0.06033896282315254, "learning_rate": 0.0002290817267014073, "loss": 1.4719, "step": 15662 }, { "epoch": 0.6964563603219065, "grad_norm": 0.059221964329481125, "learning_rate": 0.00022895886913478974, "loss": 1.4656, "step": 15664 }, { "epoch": 0.6965452847805789, "grad_norm": 0.062421657145023346, "learning_rate": 0.00022883603473796273, "loss": 1.4704, "step": 15666 }, { "epoch": 0.6966342092392512, "grad_norm": 0.06081133335828781, "learning_rate": 0.0002287132235214262, "loss": 1.4621, "step": 15668 }, { "epoch": 0.6967231336979236, "grad_norm": 0.05905033275485039, "learning_rate": 0.00022859043549567888, "loss": 1.4667, "step": 15670 }, { "epoch": 0.696812058156596, "grad_norm": 0.05814428627490997, "learning_rate": 0.00022846767067121737, "loss": 1.4651, "step": 15672 }, { "epoch": 0.6969009826152683, "grad_norm": 0.06016310676932335, "learning_rate": 0.0002283449290585358, "loss": 1.4621, "step": 15674 }, { "epoch": 0.6969899070739407, "grad_norm": 0.05994448438286781, "learning_rate": 0.00022822221066812686, "loss": 1.4716, "step": 15676 }, { "epoch": 0.6970788315326131, "grad_norm": 0.062008995562791824, "learning_rate": 0.00022809951551048125, "loss": 1.4699, "step": 15678 }, { "epoch": 0.6971677559912854, "grad_norm": 0.057963281869888306, "learning_rate": 0.00022797684359608702, "loss": 1.4699, "step": 15680 }, { "epoch": 0.6972566804499578, "grad_norm": 0.06139107421040535, "learning_rate": 0.00022785419493543101, "loss": 1.4601, "step": 15682 }, { "epoch": 0.6973456049086301, "grad_norm": 0.05990011617541313, "learning_rate": 0.00022773156953899786, "loss": 1.4648, "step": 15684 }, { "epoch": 0.6974345293673024, "grad_norm": 0.059842146933078766, "learning_rate": 0.00022760896741726988, "loss": 1.4699, "step": 15686 }, { "epoch": 0.6975234538259748, "grad_norm": 0.06125682219862938, "learning_rate": 0.0002274863885807279, "loss": 1.4593, "step": 15688 }, { "epoch": 0.6976123782846472, "grad_norm": 0.06033066660165787, "learning_rate": 0.00022736383303985014, "loss": 1.4641, "step": 15690 }, { "epoch": 0.6977013027433195, "grad_norm": 0.06027790158987045, "learning_rate": 0.00022724130080511347, "loss": 1.4641, "step": 15692 }, { "epoch": 0.6977902272019919, "grad_norm": 0.05997991934418678, "learning_rate": 0.0002271187918869926, "loss": 1.4673, "step": 15694 }, { "epoch": 0.6978791516606643, "grad_norm": 0.06118232384324074, "learning_rate": 0.00022699630629595969, "loss": 1.4696, "step": 15696 }, { "epoch": 0.6979680761193366, "grad_norm": 0.06090354546904564, "learning_rate": 0.0002268738440424856, "loss": 1.4667, "step": 15698 }, { "epoch": 0.698057000578009, "grad_norm": 0.059883639216423035, "learning_rate": 0.00022675140513703923, "loss": 1.4629, "step": 15700 }, { "epoch": 0.6981459250366814, "grad_norm": 0.06094551458954811, "learning_rate": 0.00022662898959008665, "loss": 1.4616, "step": 15702 }, { "epoch": 0.6982348494953537, "grad_norm": 0.06020454317331314, "learning_rate": 0.0002265065974120928, "loss": 1.4652, "step": 15704 }, { "epoch": 0.698323773954026, "grad_norm": 0.06263810396194458, "learning_rate": 0.00022638422861352036, "loss": 1.461, "step": 15706 }, { "epoch": 0.6984126984126984, "grad_norm": 0.059346914291381836, "learning_rate": 0.00022626188320482975, "loss": 1.468, "step": 15708 }, { "epoch": 0.6985016228713707, "grad_norm": 0.059429168701171875, "learning_rate": 0.00022613956119647981, "loss": 1.4603, "step": 15710 }, { "epoch": 0.6985905473300431, "grad_norm": 0.0603039413690567, "learning_rate": 0.0002260172625989269, "loss": 1.4664, "step": 15712 }, { "epoch": 0.6986794717887155, "grad_norm": 0.060589831322431564, "learning_rate": 0.00022589498742262576, "loss": 1.4685, "step": 15714 }, { "epoch": 0.6987683962473878, "grad_norm": 0.05926838517189026, "learning_rate": 0.0002257727356780292, "loss": 1.4665, "step": 15716 }, { "epoch": 0.6988573207060602, "grad_norm": 0.0597996711730957, "learning_rate": 0.00022565050737558752, "loss": 1.4617, "step": 15718 }, { "epoch": 0.6989462451647326, "grad_norm": 0.06095963716506958, "learning_rate": 0.00022552830252574953, "loss": 1.4685, "step": 15720 }, { "epoch": 0.699035169623405, "grad_norm": 0.05980850011110306, "learning_rate": 0.00022540612113896192, "loss": 1.4661, "step": 15722 }, { "epoch": 0.6991240940820773, "grad_norm": 0.060585636645555496, "learning_rate": 0.00022528396322566896, "loss": 1.4626, "step": 15724 }, { "epoch": 0.6992130185407497, "grad_norm": 0.060612861067056656, "learning_rate": 0.00022516182879631352, "loss": 1.4685, "step": 15726 }, { "epoch": 0.6993019429994219, "grad_norm": 0.05883822590112686, "learning_rate": 0.0002250397178613363, "loss": 1.4628, "step": 15728 }, { "epoch": 0.6993908674580943, "grad_norm": 0.06021643057465553, "learning_rate": 0.0002249176304311755, "loss": 1.4612, "step": 15730 }, { "epoch": 0.6994797919167667, "grad_norm": 0.058791451156139374, "learning_rate": 0.00022479556651626792, "loss": 1.4669, "step": 15732 }, { "epoch": 0.699568716375439, "grad_norm": 0.06060413271188736, "learning_rate": 0.00022467352612704812, "loss": 1.464, "step": 15734 }, { "epoch": 0.6996576408341114, "grad_norm": 0.05910290777683258, "learning_rate": 0.0002245515092739488, "loss": 1.4708, "step": 15736 }, { "epoch": 0.6997465652927838, "grad_norm": 0.05985668674111366, "learning_rate": 0.0002244295159674003, "loss": 1.4653, "step": 15738 }, { "epoch": 0.6998354897514562, "grad_norm": 0.05992359668016434, "learning_rate": 0.00022430754621783085, "loss": 1.4624, "step": 15740 }, { "epoch": 0.6999244142101285, "grad_norm": 0.058332495391368866, "learning_rate": 0.00022418560003566762, "loss": 1.4663, "step": 15742 }, { "epoch": 0.7000133386688009, "grad_norm": 0.061433497816324234, "learning_rate": 0.00022406367743133475, "loss": 1.4643, "step": 15744 }, { "epoch": 0.7001022631274733, "grad_norm": 0.06030712649226189, "learning_rate": 0.0002239417784152546, "loss": 1.4649, "step": 15746 }, { "epoch": 0.7001911875861456, "grad_norm": 0.060619745403528214, "learning_rate": 0.00022381990299784777, "loss": 1.4661, "step": 15748 }, { "epoch": 0.7002801120448179, "grad_norm": 0.06033237650990486, "learning_rate": 0.00022369805118953274, "loss": 1.4677, "step": 15750 }, { "epoch": 0.7003690365034902, "grad_norm": 0.06091267988085747, "learning_rate": 0.0002235762230007261, "loss": 1.4652, "step": 15752 }, { "epoch": 0.7004579609621626, "grad_norm": 0.06021000072360039, "learning_rate": 0.00022345441844184188, "loss": 1.4668, "step": 15754 }, { "epoch": 0.700546885420835, "grad_norm": 0.05979836732149124, "learning_rate": 0.00022333263752329264, "loss": 1.4638, "step": 15756 }, { "epoch": 0.7006358098795074, "grad_norm": 0.05988521873950958, "learning_rate": 0.00022321088025548902, "loss": 1.4593, "step": 15758 }, { "epoch": 0.7007247343381797, "grad_norm": 0.061469241976737976, "learning_rate": 0.00022308914664883911, "loss": 1.4653, "step": 15760 }, { "epoch": 0.7008136587968521, "grad_norm": 0.05871487781405449, "learning_rate": 0.00022296743671374887, "loss": 1.4624, "step": 15762 }, { "epoch": 0.7009025832555245, "grad_norm": 0.05979468673467636, "learning_rate": 0.00022284575046062333, "loss": 1.4673, "step": 15764 }, { "epoch": 0.7009915077141968, "grad_norm": 0.06295553594827652, "learning_rate": 0.00022272408789986427, "loss": 1.4617, "step": 15766 }, { "epoch": 0.7010804321728692, "grad_norm": 0.0636453926563263, "learning_rate": 0.00022260244904187226, "loss": 1.4601, "step": 15768 }, { "epoch": 0.7011693566315415, "grad_norm": 0.06022004410624504, "learning_rate": 0.00022248083389704516, "loss": 1.4608, "step": 15770 }, { "epoch": 0.7012582810902138, "grad_norm": 0.059936974197626114, "learning_rate": 0.00022235924247577938, "loss": 1.4611, "step": 15772 }, { "epoch": 0.7013472055488862, "grad_norm": 0.059660959988832474, "learning_rate": 0.00022223767478846924, "loss": 1.4592, "step": 15774 }, { "epoch": 0.7014361300075586, "grad_norm": 0.06002466008067131, "learning_rate": 0.0002221161308455066, "loss": 1.4679, "step": 15776 }, { "epoch": 0.7015250544662309, "grad_norm": 0.06080063059926033, "learning_rate": 0.0002219946106572816, "loss": 1.4663, "step": 15778 }, { "epoch": 0.7016139789249033, "grad_norm": 0.059667252004146576, "learning_rate": 0.00022187311423418267, "loss": 1.4672, "step": 15780 }, { "epoch": 0.7017029033835757, "grad_norm": 0.05923038348555565, "learning_rate": 0.0002217516415865954, "loss": 1.4637, "step": 15782 }, { "epoch": 0.701791827842248, "grad_norm": 0.0604298934340477, "learning_rate": 0.00022163019272490397, "loss": 1.4652, "step": 15784 }, { "epoch": 0.7018807523009204, "grad_norm": 0.05981077253818512, "learning_rate": 0.00022150876765949062, "loss": 1.4664, "step": 15786 }, { "epoch": 0.7019696767595928, "grad_norm": 0.058474358171224594, "learning_rate": 0.00022138736640073487, "loss": 1.4659, "step": 15788 }, { "epoch": 0.7020586012182651, "grad_norm": 0.05936760455369949, "learning_rate": 0.00022126598895901484, "loss": 1.4668, "step": 15790 }, { "epoch": 0.7021475256769374, "grad_norm": 0.060833852738142014, "learning_rate": 0.00022114463534470664, "loss": 1.4602, "step": 15792 }, { "epoch": 0.7022364501356098, "grad_norm": 0.06116551160812378, "learning_rate": 0.00022102330556818367, "loss": 1.4691, "step": 15794 }, { "epoch": 0.7023253745942821, "grad_norm": 0.06091858446598053, "learning_rate": 0.00022090199963981815, "loss": 1.4575, "step": 15796 }, { "epoch": 0.7024142990529545, "grad_norm": 0.05815834179520607, "learning_rate": 0.00022078071756997947, "loss": 1.4642, "step": 15798 }, { "epoch": 0.7025032235116269, "grad_norm": 0.05963579937815666, "learning_rate": 0.00022065945936903554, "loss": 1.4623, "step": 15800 }, { "epoch": 0.7025921479702992, "grad_norm": 0.05970877781510353, "learning_rate": 0.00022053822504735222, "loss": 1.4654, "step": 15802 }, { "epoch": 0.7026810724289716, "grad_norm": 0.059889744967222214, "learning_rate": 0.00022041701461529274, "loss": 1.464, "step": 15804 }, { "epoch": 0.702769996887644, "grad_norm": 0.0590536892414093, "learning_rate": 0.00022029582808321896, "loss": 1.4651, "step": 15806 }, { "epoch": 0.7028589213463163, "grad_norm": 0.05903364717960358, "learning_rate": 0.0002201746654614906, "loss": 1.4673, "step": 15808 }, { "epoch": 0.7029478458049887, "grad_norm": 0.05853195860981941, "learning_rate": 0.0002200535267604648, "loss": 1.4652, "step": 15810 }, { "epoch": 0.7030367702636611, "grad_norm": 0.0594969168305397, "learning_rate": 0.00021993241199049724, "loss": 1.4621, "step": 15812 }, { "epoch": 0.7031256947223333, "grad_norm": 0.061632584780454636, "learning_rate": 0.00021981132116194146, "loss": 1.4674, "step": 15814 }, { "epoch": 0.7032146191810057, "grad_norm": 0.05890651419758797, "learning_rate": 0.00021969025428514855, "loss": 1.464, "step": 15816 }, { "epoch": 0.7033035436396781, "grad_norm": 0.05904078111052513, "learning_rate": 0.00021956921137046814, "loss": 1.4609, "step": 15818 }, { "epoch": 0.7033924680983504, "grad_norm": 0.05928374454379082, "learning_rate": 0.00021944819242824705, "loss": 1.4711, "step": 15820 }, { "epoch": 0.7034813925570228, "grad_norm": 0.05989319831132889, "learning_rate": 0.00021932719746883112, "loss": 1.4646, "step": 15822 }, { "epoch": 0.7035703170156952, "grad_norm": 0.05920826643705368, "learning_rate": 0.0002192062265025633, "loss": 1.4723, "step": 15824 }, { "epoch": 0.7036592414743675, "grad_norm": 0.0592825673520565, "learning_rate": 0.0002190852795397844, "loss": 1.464, "step": 15826 }, { "epoch": 0.7037481659330399, "grad_norm": 0.05995682626962662, "learning_rate": 0.0002189643565908338, "loss": 1.4645, "step": 15828 }, { "epoch": 0.7038370903917123, "grad_norm": 0.05902191996574402, "learning_rate": 0.00021884345766604867, "loss": 1.4597, "step": 15830 }, { "epoch": 0.7039260148503846, "grad_norm": 0.060309432446956635, "learning_rate": 0.00021872258277576363, "loss": 1.4737, "step": 15832 }, { "epoch": 0.704014939309057, "grad_norm": 0.06095599755644798, "learning_rate": 0.00021860173193031173, "loss": 1.4623, "step": 15834 }, { "epoch": 0.7041038637677293, "grad_norm": 0.060994766652584076, "learning_rate": 0.00021848090514002406, "loss": 1.461, "step": 15836 }, { "epoch": 0.7041927882264016, "grad_norm": 0.05787515640258789, "learning_rate": 0.000218360102415229, "loss": 1.4625, "step": 15838 }, { "epoch": 0.704281712685074, "grad_norm": 0.05939411744475365, "learning_rate": 0.00021823932376625362, "loss": 1.4617, "step": 15840 }, { "epoch": 0.7043706371437464, "grad_norm": 0.059615593403577805, "learning_rate": 0.0002181185692034225, "loss": 1.4611, "step": 15842 }, { "epoch": 0.7044595616024187, "grad_norm": 0.059437982738018036, "learning_rate": 0.00021799783873705842, "loss": 1.4641, "step": 15844 }, { "epoch": 0.7045484860610911, "grad_norm": 0.06425923109054565, "learning_rate": 0.00021787713237748192, "loss": 1.4658, "step": 15846 }, { "epoch": 0.7046374105197635, "grad_norm": 0.06056644767522812, "learning_rate": 0.000217756450135011, "loss": 1.4645, "step": 15848 }, { "epoch": 0.7047263349784358, "grad_norm": 0.06096633896231651, "learning_rate": 0.00021763579201996286, "loss": 1.4702, "step": 15850 }, { "epoch": 0.7048152594371082, "grad_norm": 0.06033441051840782, "learning_rate": 0.0002175151580426516, "loss": 1.466, "step": 15852 }, { "epoch": 0.7049041838957806, "grad_norm": 0.0599847212433815, "learning_rate": 0.00021739454821338933, "loss": 1.4668, "step": 15854 }, { "epoch": 0.704993108354453, "grad_norm": 0.06081864982843399, "learning_rate": 0.00021727396254248645, "loss": 1.4669, "step": 15856 }, { "epoch": 0.7050820328131252, "grad_norm": 0.06006255000829697, "learning_rate": 0.00021715340104025123, "loss": 1.46, "step": 15858 }, { "epoch": 0.7051709572717976, "grad_norm": 0.059641413390636444, "learning_rate": 0.0002170328637169899, "loss": 1.4586, "step": 15860 }, { "epoch": 0.7052598817304699, "grad_norm": 0.06192189082503319, "learning_rate": 0.0002169123505830062, "loss": 1.4649, "step": 15862 }, { "epoch": 0.7053488061891423, "grad_norm": 0.05896773561835289, "learning_rate": 0.00021679186164860227, "loss": 1.4648, "step": 15864 }, { "epoch": 0.7054377306478147, "grad_norm": 0.060805827379226685, "learning_rate": 0.00021667139692407828, "loss": 1.4672, "step": 15866 }, { "epoch": 0.705526655106487, "grad_norm": 0.06141723319888115, "learning_rate": 0.00021655095641973188, "loss": 1.458, "step": 15868 }, { "epoch": 0.7056155795651594, "grad_norm": 0.061006996780633926, "learning_rate": 0.00021643054014585845, "loss": 1.4693, "step": 15870 }, { "epoch": 0.7057045040238318, "grad_norm": 0.05978891998529434, "learning_rate": 0.00021631014811275247, "loss": 1.4703, "step": 15872 }, { "epoch": 0.7057934284825041, "grad_norm": 0.06066666170954704, "learning_rate": 0.00021618978033070501, "loss": 1.4592, "step": 15874 }, { "epoch": 0.7058823529411765, "grad_norm": 0.05984902381896973, "learning_rate": 0.00021606943681000607, "loss": 1.4589, "step": 15876 }, { "epoch": 0.7059712773998488, "grad_norm": 0.05905771628022194, "learning_rate": 0.0002159491175609427, "loss": 1.464, "step": 15878 }, { "epoch": 0.7060602018585211, "grad_norm": 0.06035059690475464, "learning_rate": 0.00021582882259380055, "loss": 1.4608, "step": 15880 }, { "epoch": 0.7061491263171935, "grad_norm": 0.061551641672849655, "learning_rate": 0.00021570855191886313, "loss": 1.4607, "step": 15882 }, { "epoch": 0.7062380507758659, "grad_norm": 0.05943434312939644, "learning_rate": 0.0002155883055464113, "loss": 1.4696, "step": 15884 }, { "epoch": 0.7063269752345382, "grad_norm": 0.05969441309571266, "learning_rate": 0.00021546808348672452, "loss": 1.4641, "step": 15886 }, { "epoch": 0.7064158996932106, "grad_norm": 0.05943189188838005, "learning_rate": 0.00021534788575008, "loss": 1.4646, "step": 15888 }, { "epoch": 0.706504824151883, "grad_norm": 0.05974946543574333, "learning_rate": 0.00021522771234675249, "loss": 1.4665, "step": 15890 }, { "epoch": 0.7065937486105553, "grad_norm": 0.05903458595275879, "learning_rate": 0.00021510756328701504, "loss": 1.4597, "step": 15892 }, { "epoch": 0.7066826730692277, "grad_norm": 0.061366867274045944, "learning_rate": 0.00021498743858113878, "loss": 1.4645, "step": 15894 }, { "epoch": 0.7067715975279001, "grad_norm": 0.05912630632519722, "learning_rate": 0.00021486733823939202, "loss": 1.4645, "step": 15896 }, { "epoch": 0.7068605219865725, "grad_norm": 0.06042192131280899, "learning_rate": 0.00021474726227204183, "loss": 1.4685, "step": 15898 }, { "epoch": 0.7069494464452447, "grad_norm": 0.05886279419064522, "learning_rate": 0.00021462721068935282, "loss": 1.4635, "step": 15900 }, { "epoch": 0.7070383709039171, "grad_norm": 0.058296773582696915, "learning_rate": 0.0002145071835015873, "loss": 1.4642, "step": 15902 }, { "epoch": 0.7071272953625894, "grad_norm": 0.05788257345557213, "learning_rate": 0.00021438718071900605, "loss": 1.4653, "step": 15904 }, { "epoch": 0.7072162198212618, "grad_norm": 0.05922654643654823, "learning_rate": 0.00021426720235186703, "loss": 1.4671, "step": 15906 }, { "epoch": 0.7073051442799342, "grad_norm": 0.059532783925533295, "learning_rate": 0.00021414724841042676, "loss": 1.4659, "step": 15908 }, { "epoch": 0.7073940687386066, "grad_norm": 0.05968281626701355, "learning_rate": 0.00021402731890493958, "loss": 1.4614, "step": 15910 }, { "epoch": 0.7074829931972789, "grad_norm": 0.06003040820360184, "learning_rate": 0.00021390741384565726, "loss": 1.466, "step": 15912 }, { "epoch": 0.7075719176559513, "grad_norm": 0.059364233165979385, "learning_rate": 0.00021378753324283002, "loss": 1.4609, "step": 15914 }, { "epoch": 0.7076608421146237, "grad_norm": 0.058379728347063065, "learning_rate": 0.0002136676771067058, "loss": 1.4637, "step": 15916 }, { "epoch": 0.707749766573296, "grad_norm": 0.059285663068294525, "learning_rate": 0.00021354784544753026, "loss": 1.4653, "step": 15918 }, { "epoch": 0.7078386910319684, "grad_norm": 0.061500970274209976, "learning_rate": 0.00021342803827554725, "loss": 1.464, "step": 15920 }, { "epoch": 0.7079276154906406, "grad_norm": 0.05940185859799385, "learning_rate": 0.00021330825560099854, "loss": 1.4641, "step": 15922 }, { "epoch": 0.708016539949313, "grad_norm": 0.05833735316991806, "learning_rate": 0.00021318849743412344, "loss": 1.465, "step": 15924 }, { "epoch": 0.7081054644079854, "grad_norm": 0.05976291745901108, "learning_rate": 0.00021306876378515967, "loss": 1.4637, "step": 15926 }, { "epoch": 0.7081943888666578, "grad_norm": 0.058585334569215775, "learning_rate": 0.000212949054664342, "loss": 1.4653, "step": 15928 }, { "epoch": 0.7082833133253301, "grad_norm": 0.061199333518743515, "learning_rate": 0.00021282937008190457, "loss": 1.465, "step": 15930 }, { "epoch": 0.7083722377840025, "grad_norm": 0.05798893794417381, "learning_rate": 0.00021270971004807809, "loss": 1.4624, "step": 15932 }, { "epoch": 0.7084611622426749, "grad_norm": 0.060919709503650665, "learning_rate": 0.00021259007457309149, "loss": 1.4684, "step": 15934 }, { "epoch": 0.7085500867013472, "grad_norm": 0.05985601246356964, "learning_rate": 0.00021247046366717183, "loss": 1.4633, "step": 15936 }, { "epoch": 0.7086390111600196, "grad_norm": 0.059532877057790756, "learning_rate": 0.00021235087734054425, "loss": 1.4666, "step": 15938 }, { "epoch": 0.708727935618692, "grad_norm": 0.060997124761343, "learning_rate": 0.00021223131560343112, "loss": 1.4664, "step": 15940 }, { "epoch": 0.7088168600773643, "grad_norm": 0.061860326677560806, "learning_rate": 0.00021211177846605327, "loss": 1.4628, "step": 15942 }, { "epoch": 0.7089057845360366, "grad_norm": 0.05831504985690117, "learning_rate": 0.0002119922659386293, "loss": 1.465, "step": 15944 }, { "epoch": 0.708994708994709, "grad_norm": 0.05970292538404465, "learning_rate": 0.00021187277803137584, "loss": 1.4616, "step": 15946 }, { "epoch": 0.7090836334533813, "grad_norm": 0.058645959943532944, "learning_rate": 0.00021175331475450703, "loss": 1.463, "step": 15948 }, { "epoch": 0.7091725579120537, "grad_norm": 0.05951127037405968, "learning_rate": 0.00021163387611823482, "loss": 1.4609, "step": 15950 }, { "epoch": 0.7092614823707261, "grad_norm": 0.05968938767910004, "learning_rate": 0.00021151446213276998, "loss": 1.4673, "step": 15952 }, { "epoch": 0.7093504068293984, "grad_norm": 0.0600113719701767, "learning_rate": 0.0002113950728083202, "loss": 1.4608, "step": 15954 }, { "epoch": 0.7094393312880708, "grad_norm": 0.06004900485277176, "learning_rate": 0.00021127570815509134, "loss": 1.462, "step": 15956 }, { "epoch": 0.7095282557467432, "grad_norm": 0.059999480843544006, "learning_rate": 0.00021115636818328727, "loss": 1.4652, "step": 15958 }, { "epoch": 0.7096171802054155, "grad_norm": 0.06003829836845398, "learning_rate": 0.0002110370529031097, "loss": 1.4673, "step": 15960 }, { "epoch": 0.7097061046640879, "grad_norm": 0.058980792760849, "learning_rate": 0.00021091776232475852, "loss": 1.4609, "step": 15962 }, { "epoch": 0.7097950291227603, "grad_norm": 0.05989384278655052, "learning_rate": 0.00021079849645843073, "loss": 1.4604, "step": 15964 }, { "epoch": 0.7098839535814325, "grad_norm": 0.0600026473402977, "learning_rate": 0.00021067925531432198, "loss": 1.4649, "step": 15966 }, { "epoch": 0.7099728780401049, "grad_norm": 0.05990760773420334, "learning_rate": 0.0002105600389026256, "loss": 1.4647, "step": 15968 }, { "epoch": 0.7100618024987773, "grad_norm": 0.0623035728931427, "learning_rate": 0.00021044084723353246, "loss": 1.4708, "step": 15970 }, { "epoch": 0.7101507269574496, "grad_norm": 0.059693653136491776, "learning_rate": 0.00021032168031723174, "loss": 1.4644, "step": 15972 }, { "epoch": 0.710239651416122, "grad_norm": 0.05945916846394539, "learning_rate": 0.00021020253816391056, "loss": 1.4599, "step": 15974 }, { "epoch": 0.7103285758747944, "grad_norm": 0.060173675417900085, "learning_rate": 0.00021008342078375353, "loss": 1.4667, "step": 15976 }, { "epoch": 0.7104175003334667, "grad_norm": 0.0596684031188488, "learning_rate": 0.00020996432818694288, "loss": 1.4599, "step": 15978 }, { "epoch": 0.7105064247921391, "grad_norm": 0.06060690060257912, "learning_rate": 0.00020984526038366003, "loss": 1.4643, "step": 15980 }, { "epoch": 0.7105953492508115, "grad_norm": 0.05914461240172386, "learning_rate": 0.00020972621738408277, "loss": 1.4646, "step": 15982 }, { "epoch": 0.7106842737094838, "grad_norm": 0.06105831637978554, "learning_rate": 0.00020960719919838787, "loss": 1.4666, "step": 15984 }, { "epoch": 0.7107731981681562, "grad_norm": 0.06017794460058212, "learning_rate": 0.0002094882058367491, "loss": 1.4646, "step": 15986 }, { "epoch": 0.7108621226268285, "grad_norm": 0.059006720781326294, "learning_rate": 0.0002093692373093387, "loss": 1.4621, "step": 15988 }, { "epoch": 0.7109510470855008, "grad_norm": 0.05977842956781387, "learning_rate": 0.0002092502936263269, "loss": 1.4631, "step": 15990 }, { "epoch": 0.7110399715441732, "grad_norm": 0.06044214591383934, "learning_rate": 0.00020913137479788109, "loss": 1.4614, "step": 15992 }, { "epoch": 0.7111288960028456, "grad_norm": 0.05967364087700844, "learning_rate": 0.0002090124808341672, "loss": 1.4622, "step": 15994 }, { "epoch": 0.7112178204615179, "grad_norm": 0.05973359942436218, "learning_rate": 0.00020889361174534894, "loss": 1.459, "step": 15996 }, { "epoch": 0.7113067449201903, "grad_norm": 0.05971769616007805, "learning_rate": 0.00020877476754158742, "loss": 1.4605, "step": 15998 }, { "epoch": 0.7113956693788627, "grad_norm": 0.060989778488874435, "learning_rate": 0.0002086559482330422, "loss": 1.46, "step": 16000 }, { "epoch": 0.7113956693788627, "eval_loss": 1.4480788707733154, "eval_runtime": 12.4236, "eval_samples_per_second": 556.2, "eval_steps_per_second": 69.545, "step": 16000 }, { "epoch": 0.711484593837535, "grad_norm": 0.05998692661523819, "learning_rate": 0.00020853715382987054, "loss": 1.4688, "step": 16002 }, { "epoch": 0.7115735182962074, "grad_norm": 0.06045999005436897, "learning_rate": 0.00020841838434222732, "loss": 1.468, "step": 16004 }, { "epoch": 0.7116624427548798, "grad_norm": 0.05892810970544815, "learning_rate": 0.00020829963978026566, "loss": 1.4649, "step": 16006 }, { "epoch": 0.711751367213552, "grad_norm": 0.0596071220934391, "learning_rate": 0.00020818092015413616, "loss": 1.4559, "step": 16008 }, { "epoch": 0.7118402916722244, "grad_norm": 0.061141762882471085, "learning_rate": 0.00020806222547398756, "loss": 1.4602, "step": 16010 }, { "epoch": 0.7119292161308968, "grad_norm": 0.059475455433130264, "learning_rate": 0.0002079435557499667, "loss": 1.4639, "step": 16012 }, { "epoch": 0.7120181405895691, "grad_norm": 0.06107206270098686, "learning_rate": 0.00020782491099221752, "loss": 1.4668, "step": 16014 }, { "epoch": 0.7121070650482415, "grad_norm": 0.060450684279203415, "learning_rate": 0.00020770629121088252, "loss": 1.4543, "step": 16016 }, { "epoch": 0.7121959895069139, "grad_norm": 0.059023890644311905, "learning_rate": 0.00020758769641610202, "loss": 1.4608, "step": 16018 }, { "epoch": 0.7122849139655862, "grad_norm": 0.06070045754313469, "learning_rate": 0.0002074691266180137, "loss": 1.4574, "step": 16020 }, { "epoch": 0.7123738384242586, "grad_norm": 0.0602857731282711, "learning_rate": 0.0002073505818267536, "loss": 1.4639, "step": 16022 }, { "epoch": 0.712462762882931, "grad_norm": 0.05997920036315918, "learning_rate": 0.00020723206205245555, "loss": 1.4654, "step": 16024 }, { "epoch": 0.7125516873416033, "grad_norm": 0.06003386527299881, "learning_rate": 0.00020711356730525082, "loss": 1.4625, "step": 16026 }, { "epoch": 0.7126406118002757, "grad_norm": 0.05993438884615898, "learning_rate": 0.00020699509759526913, "loss": 1.4609, "step": 16028 }, { "epoch": 0.712729536258948, "grad_norm": 0.059374984353780746, "learning_rate": 0.0002068766529326379, "loss": 1.4637, "step": 16030 }, { "epoch": 0.7128184607176203, "grad_norm": 0.05907906964421272, "learning_rate": 0.00020675823332748196, "loss": 1.4645, "step": 16032 }, { "epoch": 0.7129073851762927, "grad_norm": 0.05863776430487633, "learning_rate": 0.00020663983878992464, "loss": 1.4627, "step": 16034 }, { "epoch": 0.7129963096349651, "grad_norm": 0.058749303221702576, "learning_rate": 0.00020652146933008636, "loss": 1.4612, "step": 16036 }, { "epoch": 0.7130852340936374, "grad_norm": 0.0615452416241169, "learning_rate": 0.00020640312495808654, "loss": 1.4709, "step": 16038 }, { "epoch": 0.7131741585523098, "grad_norm": 0.05932776257395744, "learning_rate": 0.0002062848056840415, "loss": 1.4652, "step": 16040 }, { "epoch": 0.7132630830109822, "grad_norm": 0.06059432402253151, "learning_rate": 0.00020616651151806543, "loss": 1.4626, "step": 16042 }, { "epoch": 0.7133520074696545, "grad_norm": 0.06045154482126236, "learning_rate": 0.00020604824247027077, "loss": 1.464, "step": 16044 }, { "epoch": 0.7134409319283269, "grad_norm": 0.06008205935359001, "learning_rate": 0.000205929998550768, "loss": 1.467, "step": 16046 }, { "epoch": 0.7135298563869993, "grad_norm": 0.059912290424108505, "learning_rate": 0.00020581177976966464, "loss": 1.4664, "step": 16048 }, { "epoch": 0.7136187808456717, "grad_norm": 0.061931006610393524, "learning_rate": 0.00020569358613706684, "loss": 1.4685, "step": 16050 }, { "epoch": 0.7137077053043439, "grad_norm": 0.059048622846603394, "learning_rate": 0.00020557541766307825, "loss": 1.4646, "step": 16052 }, { "epoch": 0.7137966297630163, "grad_norm": 0.058656416833400726, "learning_rate": 0.00020545727435780065, "loss": 1.4562, "step": 16054 }, { "epoch": 0.7138855542216886, "grad_norm": 0.05939547345042229, "learning_rate": 0.0002053391562313332, "loss": 1.4609, "step": 16056 }, { "epoch": 0.713974478680361, "grad_norm": 0.060639627277851105, "learning_rate": 0.00020522106329377287, "loss": 1.4673, "step": 16058 }, { "epoch": 0.7140634031390334, "grad_norm": 0.06143512949347496, "learning_rate": 0.00020510299555521544, "loss": 1.4704, "step": 16060 }, { "epoch": 0.7141523275977057, "grad_norm": 0.059509847313165665, "learning_rate": 0.00020498495302575354, "loss": 1.4638, "step": 16062 }, { "epoch": 0.7142412520563781, "grad_norm": 0.05967727676033974, "learning_rate": 0.00020486693571547776, "loss": 1.4603, "step": 16064 }, { "epoch": 0.7143301765150505, "grad_norm": 0.06043224409222603, "learning_rate": 0.00020474894363447694, "loss": 1.4602, "step": 16066 }, { "epoch": 0.7144191009737229, "grad_norm": 0.05880916118621826, "learning_rate": 0.00020463097679283748, "loss": 1.4666, "step": 16068 }, { "epoch": 0.7145080254323952, "grad_norm": 0.06067218258976936, "learning_rate": 0.00020451303520064402, "loss": 1.4597, "step": 16070 }, { "epoch": 0.7145969498910676, "grad_norm": 0.06095737963914871, "learning_rate": 0.0002043951188679783, "loss": 1.4666, "step": 16072 }, { "epoch": 0.7146858743497398, "grad_norm": 0.06045563891530037, "learning_rate": 0.00020427722780492049, "loss": 1.4595, "step": 16074 }, { "epoch": 0.7147747988084122, "grad_norm": 0.05901022255420685, "learning_rate": 0.00020415936202154862, "loss": 1.4609, "step": 16076 }, { "epoch": 0.7148637232670846, "grad_norm": 0.06187785044312477, "learning_rate": 0.00020404152152793804, "loss": 1.4616, "step": 16078 }, { "epoch": 0.714952647725757, "grad_norm": 0.06016907840967178, "learning_rate": 0.00020392370633416247, "loss": 1.4627, "step": 16080 }, { "epoch": 0.7150415721844293, "grad_norm": 0.05958845466375351, "learning_rate": 0.00020380591645029338, "loss": 1.4614, "step": 16082 }, { "epoch": 0.7151304966431017, "grad_norm": 0.06001560017466545, "learning_rate": 0.00020368815188639966, "loss": 1.4599, "step": 16084 }, { "epoch": 0.715219421101774, "grad_norm": 0.05985542759299278, "learning_rate": 0.00020357041265254856, "loss": 1.4627, "step": 16086 }, { "epoch": 0.7153083455604464, "grad_norm": 0.05955475568771362, "learning_rate": 0.00020345269875880507, "loss": 1.4571, "step": 16088 }, { "epoch": 0.7153972700191188, "grad_norm": 0.059921056032180786, "learning_rate": 0.00020333501021523154, "loss": 1.464, "step": 16090 }, { "epoch": 0.7154861944777912, "grad_norm": 0.06080423295497894, "learning_rate": 0.00020321734703188893, "loss": 1.4646, "step": 16092 }, { "epoch": 0.7155751189364635, "grad_norm": 0.05900348350405693, "learning_rate": 0.00020309970921883515, "loss": 1.4668, "step": 16094 }, { "epoch": 0.7156640433951358, "grad_norm": 0.05854041501879692, "learning_rate": 0.0002029820967861266, "loss": 1.4641, "step": 16096 }, { "epoch": 0.7157529678538082, "grad_norm": 0.06043776124715805, "learning_rate": 0.0002028645097438176, "loss": 1.4555, "step": 16098 }, { "epoch": 0.7158418923124805, "grad_norm": 0.06004379689693451, "learning_rate": 0.00020274694810195948, "loss": 1.4663, "step": 16100 }, { "epoch": 0.7159308167711529, "grad_norm": 0.059593565762043, "learning_rate": 0.0002026294118706022, "loss": 1.4625, "step": 16102 }, { "epoch": 0.7160197412298253, "grad_norm": 0.05828922986984253, "learning_rate": 0.00020251190105979346, "loss": 1.4581, "step": 16104 }, { "epoch": 0.7161086656884976, "grad_norm": 0.060029514133930206, "learning_rate": 0.00020239441567957822, "loss": 1.4621, "step": 16106 }, { "epoch": 0.71619759014717, "grad_norm": 0.05973885953426361, "learning_rate": 0.00020227695573999982, "loss": 1.4635, "step": 16108 }, { "epoch": 0.7162865146058424, "grad_norm": 0.05934428423643112, "learning_rate": 0.00020215952125109948, "loss": 1.4608, "step": 16110 }, { "epoch": 0.7163754390645147, "grad_norm": 0.06081746146082878, "learning_rate": 0.00020204211222291557, "loss": 1.4604, "step": 16112 }, { "epoch": 0.7164643635231871, "grad_norm": 0.05820313096046448, "learning_rate": 0.00020192472866548524, "loss": 1.4615, "step": 16114 }, { "epoch": 0.7165532879818595, "grad_norm": 0.05866072699427605, "learning_rate": 0.0002018073705888424, "loss": 1.4571, "step": 16116 }, { "epoch": 0.7166422124405317, "grad_norm": 0.05988702178001404, "learning_rate": 0.00020169003800301965, "loss": 1.4649, "step": 16118 }, { "epoch": 0.7167311368992041, "grad_norm": 0.059877146035432816, "learning_rate": 0.00020157273091804724, "loss": 1.463, "step": 16120 }, { "epoch": 0.7168200613578765, "grad_norm": 0.06032383814454079, "learning_rate": 0.00020145544934395271, "loss": 1.4683, "step": 16122 }, { "epoch": 0.7169089858165488, "grad_norm": 0.05933660268783569, "learning_rate": 0.00020133819329076207, "loss": 1.4627, "step": 16124 }, { "epoch": 0.7169979102752212, "grad_norm": 0.05780529975891113, "learning_rate": 0.00020122096276849899, "loss": 1.4619, "step": 16126 }, { "epoch": 0.7170868347338936, "grad_norm": 0.06042304262518883, "learning_rate": 0.00020110375778718449, "loss": 1.4691, "step": 16128 }, { "epoch": 0.7171757591925659, "grad_norm": 0.05824956297874451, "learning_rate": 0.00020098657835683798, "loss": 1.4589, "step": 16130 }, { "epoch": 0.7172646836512383, "grad_norm": 0.060649555176496506, "learning_rate": 0.00020086942448747664, "loss": 1.4593, "step": 16132 }, { "epoch": 0.7173536081099107, "grad_norm": 0.06005752086639404, "learning_rate": 0.00020075229618911493, "loss": 1.4662, "step": 16134 }, { "epoch": 0.717442532568583, "grad_norm": 0.06136421114206314, "learning_rate": 0.0002006351934717657, "loss": 1.4603, "step": 16136 }, { "epoch": 0.7175314570272553, "grad_norm": 0.06038883700966835, "learning_rate": 0.00020051811634543937, "loss": 1.4623, "step": 16138 }, { "epoch": 0.7176203814859277, "grad_norm": 0.05902881920337677, "learning_rate": 0.00020040106482014443, "loss": 1.4677, "step": 16140 }, { "epoch": 0.7177093059446, "grad_norm": 0.05924457684159279, "learning_rate": 0.00020028403890588677, "loss": 1.4614, "step": 16142 }, { "epoch": 0.7177982304032724, "grad_norm": 0.05954696983098984, "learning_rate": 0.00020016703861266987, "loss": 1.4647, "step": 16144 }, { "epoch": 0.7178871548619448, "grad_norm": 0.05851093307137489, "learning_rate": 0.0002000500639504962, "loss": 1.4644, "step": 16146 }, { "epoch": 0.7179760793206171, "grad_norm": 0.06012677401304245, "learning_rate": 0.0001999331149293649, "loss": 1.4596, "step": 16148 }, { "epoch": 0.7180650037792895, "grad_norm": 0.059234343469142914, "learning_rate": 0.0001998161915592731, "loss": 1.4601, "step": 16150 }, { "epoch": 0.7181539282379619, "grad_norm": 0.06154603511095047, "learning_rate": 0.00019969929385021612, "loss": 1.4609, "step": 16152 }, { "epoch": 0.7182428526966342, "grad_norm": 0.06049703061580658, "learning_rate": 0.0001995824218121871, "loss": 1.4565, "step": 16154 }, { "epoch": 0.7183317771553066, "grad_norm": 0.059687089174985886, "learning_rate": 0.00019946557545517636, "loss": 1.4608, "step": 16156 }, { "epoch": 0.718420701613979, "grad_norm": 0.06014818698167801, "learning_rate": 0.00019934875478917264, "loss": 1.4606, "step": 16158 }, { "epoch": 0.7185096260726512, "grad_norm": 0.061506614089012146, "learning_rate": 0.00019923195982416225, "loss": 1.4618, "step": 16160 }, { "epoch": 0.7185985505313236, "grad_norm": 0.059169650077819824, "learning_rate": 0.00019911519057012962, "loss": 1.4604, "step": 16162 }, { "epoch": 0.718687474989996, "grad_norm": 0.05947177857160568, "learning_rate": 0.00019899844703705644, "loss": 1.4603, "step": 16164 }, { "epoch": 0.7187763994486683, "grad_norm": 0.05948949605226517, "learning_rate": 0.0001988817292349221, "loss": 1.463, "step": 16166 }, { "epoch": 0.7188653239073407, "grad_norm": 0.05881119892001152, "learning_rate": 0.00019876503717370487, "loss": 1.4641, "step": 16168 }, { "epoch": 0.7189542483660131, "grad_norm": 0.061151519417762756, "learning_rate": 0.00019864837086337977, "loss": 1.4674, "step": 16170 }, { "epoch": 0.7190431728246854, "grad_norm": 0.06064696982502937, "learning_rate": 0.0001985317303139198, "loss": 1.4673, "step": 16172 }, { "epoch": 0.7191320972833578, "grad_norm": 0.062243394553661346, "learning_rate": 0.000198415115535296, "loss": 1.4636, "step": 16174 }, { "epoch": 0.7192210217420302, "grad_norm": 0.05873696506023407, "learning_rate": 0.0001982985265374772, "loss": 1.464, "step": 16176 }, { "epoch": 0.7193099462007025, "grad_norm": 0.05942462012171745, "learning_rate": 0.0001981819633304301, "loss": 1.4635, "step": 16178 }, { "epoch": 0.7193988706593749, "grad_norm": 0.0588180348277092, "learning_rate": 0.00019806542592411863, "loss": 1.4619, "step": 16180 }, { "epoch": 0.7194877951180472, "grad_norm": 0.059856392443180084, "learning_rate": 0.00019794891432850514, "loss": 1.4649, "step": 16182 }, { "epoch": 0.7195767195767195, "grad_norm": 0.05942876264452934, "learning_rate": 0.00019783242855354977, "loss": 1.4572, "step": 16184 }, { "epoch": 0.7196656440353919, "grad_norm": 0.060655154287815094, "learning_rate": 0.00019771596860920982, "loss": 1.4618, "step": 16186 }, { "epoch": 0.7197545684940643, "grad_norm": 0.06054103747010231, "learning_rate": 0.00019759953450544098, "loss": 1.4609, "step": 16188 }, { "epoch": 0.7198434929527366, "grad_norm": 0.05870714411139488, "learning_rate": 0.00019748312625219672, "loss": 1.461, "step": 16190 }, { "epoch": 0.719932417411409, "grad_norm": 0.05910949036478996, "learning_rate": 0.0001973667438594278, "loss": 1.4573, "step": 16192 }, { "epoch": 0.7200213418700814, "grad_norm": 0.06156644597649574, "learning_rate": 0.00019725038733708333, "loss": 1.4639, "step": 16194 }, { "epoch": 0.7201102663287537, "grad_norm": 0.05833414942026138, "learning_rate": 0.00019713405669511007, "loss": 1.4545, "step": 16196 }, { "epoch": 0.7201991907874261, "grad_norm": 0.05948293209075928, "learning_rate": 0.00019701775194345218, "loss": 1.457, "step": 16198 }, { "epoch": 0.7202881152460985, "grad_norm": 0.05885034427046776, "learning_rate": 0.0001969014730920522, "loss": 1.4604, "step": 16200 }, { "epoch": 0.7203770397047708, "grad_norm": 0.060151953250169754, "learning_rate": 0.00019678522015084983, "loss": 1.4617, "step": 16202 }, { "epoch": 0.7204659641634431, "grad_norm": 0.06010283902287483, "learning_rate": 0.0001966689931297831, "loss": 1.4669, "step": 16204 }, { "epoch": 0.7205548886221155, "grad_norm": 0.060459204018116, "learning_rate": 0.00019655279203878768, "loss": 1.4593, "step": 16206 }, { "epoch": 0.7206438130807878, "grad_norm": 0.06011422351002693, "learning_rate": 0.00019643661688779673, "loss": 1.4639, "step": 16208 }, { "epoch": 0.7207327375394602, "grad_norm": 0.06095078960061073, "learning_rate": 0.0001963204676867415, "loss": 1.4649, "step": 16210 }, { "epoch": 0.7208216619981326, "grad_norm": 0.058982133865356445, "learning_rate": 0.0001962043444455512, "loss": 1.4607, "step": 16212 }, { "epoch": 0.720910586456805, "grad_norm": 0.05980036035180092, "learning_rate": 0.00019608824717415207, "loss": 1.4636, "step": 16214 }, { "epoch": 0.7209995109154773, "grad_norm": 0.06017722934484482, "learning_rate": 0.0001959721758824689, "loss": 1.4604, "step": 16216 }, { "epoch": 0.7210884353741497, "grad_norm": 0.05889592692255974, "learning_rate": 0.0001958561305804241, "loss": 1.462, "step": 16218 }, { "epoch": 0.721177359832822, "grad_norm": 0.060982596129179, "learning_rate": 0.00019574011127793746, "loss": 1.4658, "step": 16220 }, { "epoch": 0.7212662842914944, "grad_norm": 0.05975702032446861, "learning_rate": 0.00019562411798492701, "loss": 1.4623, "step": 16222 }, { "epoch": 0.7213552087501668, "grad_norm": 0.05935823172330856, "learning_rate": 0.00019550815071130823, "loss": 1.4642, "step": 16224 }, { "epoch": 0.721444133208839, "grad_norm": 0.05940350890159607, "learning_rate": 0.0001953922094669945, "loss": 1.4637, "step": 16226 }, { "epoch": 0.7215330576675114, "grad_norm": 0.058843355625867844, "learning_rate": 0.0001952762942618973, "loss": 1.4622, "step": 16228 }, { "epoch": 0.7216219821261838, "grad_norm": 0.05933767557144165, "learning_rate": 0.00019516040510592515, "loss": 1.4674, "step": 16230 }, { "epoch": 0.7217109065848561, "grad_norm": 0.059428151696920395, "learning_rate": 0.0001950445420089849, "loss": 1.4658, "step": 16232 }, { "epoch": 0.7217998310435285, "grad_norm": 0.05923604965209961, "learning_rate": 0.0001949287049809813, "loss": 1.4643, "step": 16234 }, { "epoch": 0.7218887555022009, "grad_norm": 0.05947105586528778, "learning_rate": 0.00019481289403181623, "loss": 1.4598, "step": 16236 }, { "epoch": 0.7219776799608733, "grad_norm": 0.05970315635204315, "learning_rate": 0.00019469710917138994, "loss": 1.4593, "step": 16238 }, { "epoch": 0.7220666044195456, "grad_norm": 0.05884910002350807, "learning_rate": 0.00019458135040960036, "loss": 1.4637, "step": 16240 }, { "epoch": 0.722155528878218, "grad_norm": 0.05918232724070549, "learning_rate": 0.00019446561775634264, "loss": 1.458, "step": 16242 }, { "epoch": 0.7222444533368904, "grad_norm": 0.05901836231350899, "learning_rate": 0.00019434991122151057, "loss": 1.4594, "step": 16244 }, { "epoch": 0.7223333777955626, "grad_norm": 0.05842548981308937, "learning_rate": 0.0001942342308149947, "loss": 1.466, "step": 16246 }, { "epoch": 0.722422302254235, "grad_norm": 0.05925069749355316, "learning_rate": 0.00019411857654668457, "loss": 1.4643, "step": 16248 }, { "epoch": 0.7225112267129074, "grad_norm": 0.05993708595633507, "learning_rate": 0.0001940029484264665, "loss": 1.4551, "step": 16250 }, { "epoch": 0.7226001511715797, "grad_norm": 0.060077860951423645, "learning_rate": 0.00019388734646422472, "loss": 1.4578, "step": 16252 }, { "epoch": 0.7226890756302521, "grad_norm": 0.05832253396511078, "learning_rate": 0.00019377177066984158, "loss": 1.4647, "step": 16254 }, { "epoch": 0.7227780000889245, "grad_norm": 0.06057137995958328, "learning_rate": 0.00019365622105319713, "loss": 1.4583, "step": 16256 }, { "epoch": 0.7228669245475968, "grad_norm": 0.060661908239126205, "learning_rate": 0.00019354069762416877, "loss": 1.4647, "step": 16258 }, { "epoch": 0.7229558490062692, "grad_norm": 0.058103371411561966, "learning_rate": 0.00019342520039263207, "loss": 1.4619, "step": 16260 }, { "epoch": 0.7230447734649416, "grad_norm": 0.058487582951784134, "learning_rate": 0.0001933097293684603, "loss": 1.4574, "step": 16262 }, { "epoch": 0.7231336979236139, "grad_norm": 0.062292974442243576, "learning_rate": 0.00019319428456152454, "loss": 1.4686, "step": 16264 }, { "epoch": 0.7232226223822863, "grad_norm": 0.05761106684803963, "learning_rate": 0.0001930788659816932, "loss": 1.4576, "step": 16266 }, { "epoch": 0.7233115468409586, "grad_norm": 0.058435920625925064, "learning_rate": 0.00019296347363883303, "loss": 1.4618, "step": 16268 }, { "epoch": 0.7234004712996309, "grad_norm": 0.06008351221680641, "learning_rate": 0.00019284810754280834, "loss": 1.4556, "step": 16270 }, { "epoch": 0.7234893957583033, "grad_norm": 0.05883776769042015, "learning_rate": 0.00019273276770348098, "loss": 1.4617, "step": 16272 }, { "epoch": 0.7235783202169757, "grad_norm": 0.058516763150691986, "learning_rate": 0.00019261745413071035, "loss": 1.4609, "step": 16274 }, { "epoch": 0.723667244675648, "grad_norm": 0.0610274001955986, "learning_rate": 0.00019250216683435473, "loss": 1.4621, "step": 16276 }, { "epoch": 0.7237561691343204, "grad_norm": 0.06014120578765869, "learning_rate": 0.00019238690582426893, "loss": 1.466, "step": 16278 }, { "epoch": 0.7238450935929928, "grad_norm": 0.06050468608736992, "learning_rate": 0.0001922716711103059, "loss": 1.466, "step": 16280 }, { "epoch": 0.7239340180516651, "grad_norm": 0.05953977629542351, "learning_rate": 0.0001921564627023165, "loss": 1.4586, "step": 16282 }, { "epoch": 0.7240229425103375, "grad_norm": 0.059618160128593445, "learning_rate": 0.00019204128061014935, "loss": 1.4582, "step": 16284 }, { "epoch": 0.7241118669690099, "grad_norm": 0.06013248860836029, "learning_rate": 0.0001919261248436508, "loss": 1.4617, "step": 16286 }, { "epoch": 0.7242007914276822, "grad_norm": 0.05888787657022476, "learning_rate": 0.00019181099541266456, "loss": 1.4647, "step": 16288 }, { "epoch": 0.7242897158863545, "grad_norm": 0.05903095752000809, "learning_rate": 0.00019169589232703255, "loss": 1.4615, "step": 16290 }, { "epoch": 0.7243786403450269, "grad_norm": 0.059270888566970825, "learning_rate": 0.00019158081559659453, "loss": 1.4635, "step": 16292 }, { "epoch": 0.7244675648036992, "grad_norm": 0.06252747029066086, "learning_rate": 0.00019146576523118754, "loss": 1.4604, "step": 16294 }, { "epoch": 0.7245564892623716, "grad_norm": 0.060627248138189316, "learning_rate": 0.00019135074124064627, "loss": 1.4606, "step": 16296 }, { "epoch": 0.724645413721044, "grad_norm": 0.06071724370121956, "learning_rate": 0.00019123574363480417, "loss": 1.4627, "step": 16298 }, { "epoch": 0.7247343381797163, "grad_norm": 0.06156003847718239, "learning_rate": 0.0001911207724234912, "loss": 1.4647, "step": 16300 }, { "epoch": 0.7248232626383887, "grad_norm": 0.0580633170902729, "learning_rate": 0.0001910058276165359, "loss": 1.4643, "step": 16302 }, { "epoch": 0.7249121870970611, "grad_norm": 0.060547344386577606, "learning_rate": 0.00019089090922376405, "loss": 1.4638, "step": 16304 }, { "epoch": 0.7250011115557334, "grad_norm": 0.060422513633966446, "learning_rate": 0.00019077601725499944, "loss": 1.456, "step": 16306 }, { "epoch": 0.7250900360144058, "grad_norm": 0.059972185641527176, "learning_rate": 0.00019066115172006377, "loss": 1.4615, "step": 16308 }, { "epoch": 0.7251789604730782, "grad_norm": 0.05940213426947594, "learning_rate": 0.0001905463126287759, "loss": 1.4647, "step": 16310 }, { "epoch": 0.7252678849317504, "grad_norm": 0.06141269952058792, "learning_rate": 0.00019043149999095293, "loss": 1.4681, "step": 16312 }, { "epoch": 0.7253568093904228, "grad_norm": 0.06061433255672455, "learning_rate": 0.00019031671381640976, "loss": 1.4581, "step": 16314 }, { "epoch": 0.7254457338490952, "grad_norm": 0.05905766785144806, "learning_rate": 0.00019020195411495838, "loss": 1.459, "step": 16316 }, { "epoch": 0.7255346583077675, "grad_norm": 0.05946394428610802, "learning_rate": 0.0001900872208964093, "loss": 1.4631, "step": 16318 }, { "epoch": 0.7256235827664399, "grad_norm": 0.060677699744701385, "learning_rate": 0.00018997251417057044, "loss": 1.4648, "step": 16320 }, { "epoch": 0.7257125072251123, "grad_norm": 0.05936264991760254, "learning_rate": 0.00018985783394724715, "loss": 1.4605, "step": 16322 }, { "epoch": 0.7258014316837846, "grad_norm": 0.060269761830568314, "learning_rate": 0.00018974318023624293, "loss": 1.4642, "step": 16324 }, { "epoch": 0.725890356142457, "grad_norm": 0.05980468913912773, "learning_rate": 0.00018962855304735916, "loss": 1.4613, "step": 16326 }, { "epoch": 0.7259792806011294, "grad_norm": 0.05888427793979645, "learning_rate": 0.0001895139523903942, "loss": 1.4571, "step": 16328 }, { "epoch": 0.7260682050598017, "grad_norm": 0.05930918827652931, "learning_rate": 0.00018939937827514507, "loss": 1.4606, "step": 16330 }, { "epoch": 0.7261571295184741, "grad_norm": 0.06069708615541458, "learning_rate": 0.00018928483071140552, "loss": 1.4585, "step": 16332 }, { "epoch": 0.7262460539771464, "grad_norm": 0.05740377679467201, "learning_rate": 0.0001891703097089682, "loss": 1.4629, "step": 16334 }, { "epoch": 0.7263349784358187, "grad_norm": 0.05887899175286293, "learning_rate": 0.00018905581527762266, "loss": 1.4606, "step": 16336 }, { "epoch": 0.7264239028944911, "grad_norm": 0.06021280959248543, "learning_rate": 0.00018894134742715614, "loss": 1.4624, "step": 16338 }, { "epoch": 0.7265128273531635, "grad_norm": 0.06010861694812775, "learning_rate": 0.00018882690616735397, "loss": 1.4649, "step": 16340 }, { "epoch": 0.7266017518118358, "grad_norm": 0.05954209715127945, "learning_rate": 0.00018871249150799934, "loss": 1.4681, "step": 16342 }, { "epoch": 0.7266906762705082, "grad_norm": 0.058117982000112534, "learning_rate": 0.00018859810345887263, "loss": 1.4617, "step": 16344 }, { "epoch": 0.7267796007291806, "grad_norm": 0.05962996557354927, "learning_rate": 0.00018848374202975232, "loss": 1.4578, "step": 16346 }, { "epoch": 0.7268685251878529, "grad_norm": 0.06057076156139374, "learning_rate": 0.00018836940723041467, "loss": 1.4604, "step": 16348 }, { "epoch": 0.7269574496465253, "grad_norm": 0.06041696295142174, "learning_rate": 0.00018825509907063325, "loss": 1.4573, "step": 16350 }, { "epoch": 0.7270463741051977, "grad_norm": 0.059521712362766266, "learning_rate": 0.00018814081756017998, "loss": 1.4587, "step": 16352 }, { "epoch": 0.72713529856387, "grad_norm": 0.05939910560846329, "learning_rate": 0.00018802656270882357, "loss": 1.4637, "step": 16354 }, { "epoch": 0.7272242230225423, "grad_norm": 0.05934542044997215, "learning_rate": 0.00018791233452633178, "loss": 1.4619, "step": 16356 }, { "epoch": 0.7273131474812147, "grad_norm": 0.05895724520087242, "learning_rate": 0.00018779813302246894, "loss": 1.461, "step": 16358 }, { "epoch": 0.727402071939887, "grad_norm": 0.05960603058338165, "learning_rate": 0.00018768395820699735, "loss": 1.4599, "step": 16360 }, { "epoch": 0.7274909963985594, "grad_norm": 0.05965695157647133, "learning_rate": 0.00018756981008967734, "loss": 1.4609, "step": 16362 }, { "epoch": 0.7275799208572318, "grad_norm": 0.06047413870692253, "learning_rate": 0.00018745568868026698, "loss": 1.4629, "step": 16364 }, { "epoch": 0.7276688453159041, "grad_norm": 0.05957818776369095, "learning_rate": 0.00018734159398852148, "loss": 1.4595, "step": 16366 }, { "epoch": 0.7277577697745765, "grad_norm": 0.06116658076643944, "learning_rate": 0.0001872275260241944, "loss": 1.4633, "step": 16368 }, { "epoch": 0.7278466942332489, "grad_norm": 0.058435603976249695, "learning_rate": 0.00018711348479703673, "loss": 1.4567, "step": 16370 }, { "epoch": 0.7279356186919212, "grad_norm": 0.059727638959884644, "learning_rate": 0.00018699947031679736, "loss": 1.465, "step": 16372 }, { "epoch": 0.7280245431505936, "grad_norm": 0.06051727011799812, "learning_rate": 0.00018688548259322252, "loss": 1.4598, "step": 16374 }, { "epoch": 0.7281134676092659, "grad_norm": 0.05949892848730087, "learning_rate": 0.00018677152163605642, "loss": 1.4609, "step": 16376 }, { "epoch": 0.7282023920679382, "grad_norm": 0.060253631323575974, "learning_rate": 0.0001866575874550412, "loss": 1.4575, "step": 16378 }, { "epoch": 0.7282913165266106, "grad_norm": 0.05913444608449936, "learning_rate": 0.0001865436800599163, "loss": 1.4526, "step": 16380 }, { "epoch": 0.728380240985283, "grad_norm": 0.06042875722050667, "learning_rate": 0.00018642979946041855, "loss": 1.4702, "step": 16382 }, { "epoch": 0.7284691654439553, "grad_norm": 0.0590120404958725, "learning_rate": 0.0001863159456662838, "loss": 1.4635, "step": 16384 }, { "epoch": 0.7285580899026277, "grad_norm": 0.058893073350191116, "learning_rate": 0.00018620211868724417, "loss": 1.4607, "step": 16386 }, { "epoch": 0.7286470143613001, "grad_norm": 0.05858619511127472, "learning_rate": 0.00018608831853303054, "loss": 1.4628, "step": 16388 }, { "epoch": 0.7287359388199725, "grad_norm": 0.05882981792092323, "learning_rate": 0.0001859745452133706, "loss": 1.4651, "step": 16390 }, { "epoch": 0.7288248632786448, "grad_norm": 0.05942827835679054, "learning_rate": 0.0001858607987379904, "loss": 1.4644, "step": 16392 }, { "epoch": 0.7289137877373172, "grad_norm": 0.059885479509830475, "learning_rate": 0.00018574707911661365, "loss": 1.4578, "step": 16394 }, { "epoch": 0.7290027121959896, "grad_norm": 0.06129121780395508, "learning_rate": 0.00018563338635896125, "loss": 1.4568, "step": 16396 }, { "epoch": 0.7290916366546618, "grad_norm": 0.060367047786712646, "learning_rate": 0.0001855197204747524, "loss": 1.4601, "step": 16398 }, { "epoch": 0.7291805611133342, "grad_norm": 0.05986754596233368, "learning_rate": 0.00018540608147370386, "loss": 1.4617, "step": 16400 }, { "epoch": 0.7292694855720065, "grad_norm": 0.0603383369743824, "learning_rate": 0.0001852924693655297, "loss": 1.4597, "step": 16402 }, { "epoch": 0.7293584100306789, "grad_norm": 0.059178128838539124, "learning_rate": 0.00018517888415994205, "loss": 1.4629, "step": 16404 }, { "epoch": 0.7294473344893513, "grad_norm": 0.05977354943752289, "learning_rate": 0.00018506532586665098, "loss": 1.4619, "step": 16406 }, { "epoch": 0.7295362589480237, "grad_norm": 0.06036766618490219, "learning_rate": 0.00018495179449536358, "loss": 1.457, "step": 16408 }, { "epoch": 0.729625183406696, "grad_norm": 0.05909400433301926, "learning_rate": 0.0001848382900557853, "loss": 1.4589, "step": 16410 }, { "epoch": 0.7297141078653684, "grad_norm": 0.059279441833496094, "learning_rate": 0.0001847248125576187, "loss": 1.4622, "step": 16412 }, { "epoch": 0.7298030323240408, "grad_norm": 0.06124156713485718, "learning_rate": 0.0001846113620105645, "loss": 1.4588, "step": 16414 }, { "epoch": 0.7298919567827131, "grad_norm": 0.05898246914148331, "learning_rate": 0.0001844979384243211, "loss": 1.4576, "step": 16416 }, { "epoch": 0.7299808812413855, "grad_norm": 0.05937924608588219, "learning_rate": 0.00018438454180858417, "loss": 1.4656, "step": 16418 }, { "epoch": 0.7300698057000578, "grad_norm": 0.05933384969830513, "learning_rate": 0.0001842711721730474, "loss": 1.4565, "step": 16420 }, { "epoch": 0.7301587301587301, "grad_norm": 0.05890939012169838, "learning_rate": 0.00018415782952740245, "loss": 1.4622, "step": 16422 }, { "epoch": 0.7302476546174025, "grad_norm": 0.059906478971242905, "learning_rate": 0.00018404451388133786, "loss": 1.4655, "step": 16424 }, { "epoch": 0.7303365790760749, "grad_norm": 0.05846390128135681, "learning_rate": 0.00018393122524454065, "loss": 1.4616, "step": 16426 }, { "epoch": 0.7304255035347472, "grad_norm": 0.0599554218351841, "learning_rate": 0.00018381796362669533, "loss": 1.4652, "step": 16428 }, { "epoch": 0.7305144279934196, "grad_norm": 0.058733414858579636, "learning_rate": 0.0001837047290374837, "loss": 1.4564, "step": 16430 }, { "epoch": 0.730603352452092, "grad_norm": 0.06102525070309639, "learning_rate": 0.00018359152148658564, "loss": 1.4606, "step": 16432 }, { "epoch": 0.7306922769107643, "grad_norm": 0.05933784320950508, "learning_rate": 0.00018347834098367895, "loss": 1.4593, "step": 16434 }, { "epoch": 0.7307812013694367, "grad_norm": 0.06041508913040161, "learning_rate": 0.0001833651875384383, "loss": 1.4602, "step": 16436 }, { "epoch": 0.7308701258281091, "grad_norm": 0.05959257856011391, "learning_rate": 0.000183252061160537, "loss": 1.4654, "step": 16438 }, { "epoch": 0.7309590502867814, "grad_norm": 0.05934099480509758, "learning_rate": 0.00018313896185964497, "loss": 1.456, "step": 16440 }, { "epoch": 0.7310479747454537, "grad_norm": 0.06000977009534836, "learning_rate": 0.0001830258896454312, "loss": 1.4548, "step": 16442 }, { "epoch": 0.7311368992041261, "grad_norm": 0.05849349498748779, "learning_rate": 0.0001829128445275613, "loss": 1.4563, "step": 16444 }, { "epoch": 0.7312258236627984, "grad_norm": 0.060585957020521164, "learning_rate": 0.0001827998265156986, "loss": 1.4601, "step": 16446 }, { "epoch": 0.7313147481214708, "grad_norm": 0.059966158121824265, "learning_rate": 0.00018268683561950455, "loss": 1.462, "step": 16448 }, { "epoch": 0.7314036725801432, "grad_norm": 0.05956388637423515, "learning_rate": 0.0001825738718486384, "loss": 1.4581, "step": 16450 }, { "epoch": 0.7314925970388155, "grad_norm": 0.05950170010328293, "learning_rate": 0.00018246093521275632, "loss": 1.459, "step": 16452 }, { "epoch": 0.7315815214974879, "grad_norm": 0.05926720052957535, "learning_rate": 0.00018234802572151282, "loss": 1.4568, "step": 16454 }, { "epoch": 0.7316704459561603, "grad_norm": 0.05866430699825287, "learning_rate": 0.00018223514338455994, "loss": 1.4574, "step": 16456 }, { "epoch": 0.7317593704148326, "grad_norm": 0.05895557999610901, "learning_rate": 0.00018212228821154754, "loss": 1.4648, "step": 16458 }, { "epoch": 0.731848294873505, "grad_norm": 0.06050115451216698, "learning_rate": 0.0001820094602121228, "loss": 1.4635, "step": 16460 }, { "epoch": 0.7319372193321774, "grad_norm": 0.06033334881067276, "learning_rate": 0.0001818966593959303, "loss": 1.4609, "step": 16462 }, { "epoch": 0.7320261437908496, "grad_norm": 0.0586191788315773, "learning_rate": 0.00018178388577261363, "loss": 1.456, "step": 16464 }, { "epoch": 0.732115068249522, "grad_norm": 0.06057421490550041, "learning_rate": 0.00018167113935181273, "loss": 1.4593, "step": 16466 }, { "epoch": 0.7322039927081944, "grad_norm": 0.05998922884464264, "learning_rate": 0.00018155842014316547, "loss": 1.4598, "step": 16468 }, { "epoch": 0.7322929171668667, "grad_norm": 0.05810751020908356, "learning_rate": 0.0001814457281563078, "loss": 1.465, "step": 16470 }, { "epoch": 0.7323818416255391, "grad_norm": 0.05943676084280014, "learning_rate": 0.00018133306340087325, "loss": 1.4579, "step": 16472 }, { "epoch": 0.7324707660842115, "grad_norm": 0.05965890362858772, "learning_rate": 0.00018122042588649263, "loss": 1.4541, "step": 16474 }, { "epoch": 0.7325596905428838, "grad_norm": 0.05899742990732193, "learning_rate": 0.00018110781562279477, "loss": 1.4605, "step": 16476 }, { "epoch": 0.7326486150015562, "grad_norm": 0.06035918369889259, "learning_rate": 0.0001809952326194062, "loss": 1.4651, "step": 16478 }, { "epoch": 0.7327375394602286, "grad_norm": 0.05863313376903534, "learning_rate": 0.00018088267688595112, "loss": 1.4603, "step": 16480 }, { "epoch": 0.7328264639189009, "grad_norm": 0.0600309744477272, "learning_rate": 0.00018077014843205093, "loss": 1.4588, "step": 16482 }, { "epoch": 0.7329153883775733, "grad_norm": 0.05916277691721916, "learning_rate": 0.00018065764726732524, "loss": 1.4567, "step": 16484 }, { "epoch": 0.7330043128362456, "grad_norm": 0.05814970284700394, "learning_rate": 0.0001805451734013913, "loss": 1.4591, "step": 16486 }, { "epoch": 0.7330932372949179, "grad_norm": 0.05853576585650444, "learning_rate": 0.0001804327268438638, "loss": 1.4623, "step": 16488 }, { "epoch": 0.7331821617535903, "grad_norm": 0.061256907880306244, "learning_rate": 0.00018032030760435476, "loss": 1.4625, "step": 16490 }, { "epoch": 0.7332710862122627, "grad_norm": 0.05845688655972481, "learning_rate": 0.0001802079156924749, "loss": 1.4617, "step": 16492 }, { "epoch": 0.733360010670935, "grad_norm": 0.058402858674526215, "learning_rate": 0.00018009555111783154, "loss": 1.457, "step": 16494 }, { "epoch": 0.7334489351296074, "grad_norm": 0.0583033561706543, "learning_rate": 0.00017998321389003043, "loss": 1.4627, "step": 16496 }, { "epoch": 0.7335378595882798, "grad_norm": 0.05811697617173195, "learning_rate": 0.00017987090401867423, "loss": 1.4628, "step": 16498 }, { "epoch": 0.7336267840469521, "grad_norm": 0.060463517904281616, "learning_rate": 0.00017975862151336396, "loss": 1.4621, "step": 16500 }, { "epoch": 0.7336267840469521, "eval_loss": 1.4455397129058838, "eval_runtime": 12.4314, "eval_samples_per_second": 555.851, "eval_steps_per_second": 69.501, "step": 16500 }, { "epoch": 0.7337157085056245, "grad_norm": 0.06006486341357231, "learning_rate": 0.0001796463663836982, "loss": 1.4574, "step": 16502 }, { "epoch": 0.7338046329642969, "grad_norm": 0.05787288397550583, "learning_rate": 0.00017953413863927253, "loss": 1.4524, "step": 16504 }, { "epoch": 0.7338935574229691, "grad_norm": 0.058139074593782425, "learning_rate": 0.000179421938289681, "loss": 1.4588, "step": 16506 }, { "epoch": 0.7339824818816415, "grad_norm": 0.05989261716604233, "learning_rate": 0.00017930976534451503, "loss": 1.4555, "step": 16508 }, { "epoch": 0.7340714063403139, "grad_norm": 0.05919159948825836, "learning_rate": 0.00017919761981336347, "loss": 1.4618, "step": 16510 }, { "epoch": 0.7341603307989862, "grad_norm": 0.059001095592975616, "learning_rate": 0.00017908550170581307, "loss": 1.4559, "step": 16512 }, { "epoch": 0.7342492552576586, "grad_norm": 0.058799173682928085, "learning_rate": 0.00017897341103144837, "loss": 1.4628, "step": 16514 }, { "epoch": 0.734338179716331, "grad_norm": 0.06127883121371269, "learning_rate": 0.00017886134779985103, "loss": 1.4597, "step": 16516 }, { "epoch": 0.7344271041750033, "grad_norm": 0.05968368798494339, "learning_rate": 0.00017874931202060108, "loss": 1.4596, "step": 16518 }, { "epoch": 0.7345160286336757, "grad_norm": 0.05886247009038925, "learning_rate": 0.00017863730370327546, "loss": 1.4568, "step": 16520 }, { "epoch": 0.7346049530923481, "grad_norm": 0.057910215109586716, "learning_rate": 0.00017852532285744931, "loss": 1.4605, "step": 16522 }, { "epoch": 0.7346938775510204, "grad_norm": 0.06086508929729462, "learning_rate": 0.0001784133694926955, "loss": 1.4616, "step": 16524 }, { "epoch": 0.7347828020096928, "grad_norm": 0.05869476869702339, "learning_rate": 0.0001783014436185838, "loss": 1.456, "step": 16526 }, { "epoch": 0.7348717264683651, "grad_norm": 0.05936345458030701, "learning_rate": 0.0001781895452446824, "loss": 1.4626, "step": 16528 }, { "epoch": 0.7349606509270374, "grad_norm": 0.05925047770142555, "learning_rate": 0.000178077674380557, "loss": 1.4621, "step": 16530 }, { "epoch": 0.7350495753857098, "grad_norm": 0.05956713482737541, "learning_rate": 0.0001779658310357705, "loss": 1.4588, "step": 16532 }, { "epoch": 0.7351384998443822, "grad_norm": 0.06348378211259842, "learning_rate": 0.00017785401521988391, "loss": 1.4613, "step": 16534 }, { "epoch": 0.7352274243030545, "grad_norm": 0.059166379272937775, "learning_rate": 0.00017774222694245595, "loss": 1.4652, "step": 16536 }, { "epoch": 0.7353163487617269, "grad_norm": 0.0596497543156147, "learning_rate": 0.00017763046621304242, "loss": 1.4576, "step": 16538 }, { "epoch": 0.7354052732203993, "grad_norm": 0.0589081346988678, "learning_rate": 0.00017751873304119742, "loss": 1.4604, "step": 16540 }, { "epoch": 0.7354941976790716, "grad_norm": 0.06031009927392006, "learning_rate": 0.00017740702743647207, "loss": 1.4592, "step": 16542 }, { "epoch": 0.735583122137744, "grad_norm": 0.06029221788048744, "learning_rate": 0.00017729534940841564, "loss": 1.4599, "step": 16544 }, { "epoch": 0.7356720465964164, "grad_norm": 0.059007350355386734, "learning_rate": 0.00017718369896657505, "loss": 1.4602, "step": 16546 }, { "epoch": 0.7357609710550888, "grad_norm": 0.058588165789842606, "learning_rate": 0.0001770720761204943, "loss": 1.4562, "step": 16548 }, { "epoch": 0.735849895513761, "grad_norm": 0.05922502651810646, "learning_rate": 0.00017696048087971556, "loss": 1.4607, "step": 16550 }, { "epoch": 0.7359388199724334, "grad_norm": 0.06027013808488846, "learning_rate": 0.00017684891325377866, "loss": 1.4597, "step": 16552 }, { "epoch": 0.7360277444311057, "grad_norm": 0.05908496677875519, "learning_rate": 0.0001767373732522206, "loss": 1.462, "step": 16554 }, { "epoch": 0.7361166688897781, "grad_norm": 0.05901119112968445, "learning_rate": 0.00017662586088457643, "loss": 1.4629, "step": 16556 }, { "epoch": 0.7362055933484505, "grad_norm": 0.0609251968562603, "learning_rate": 0.0001765143761603789, "loss": 1.4652, "step": 16558 }, { "epoch": 0.7362945178071229, "grad_norm": 0.06070336699485779, "learning_rate": 0.0001764029190891579, "loss": 1.4599, "step": 16560 }, { "epoch": 0.7363834422657952, "grad_norm": 0.06053367629647255, "learning_rate": 0.0001762914896804414, "loss": 1.4651, "step": 16562 }, { "epoch": 0.7364723667244676, "grad_norm": 0.061270855367183685, "learning_rate": 0.0001761800879437549, "loss": 1.4535, "step": 16564 }, { "epoch": 0.73656129118314, "grad_norm": 0.05957317352294922, "learning_rate": 0.00017606871388862173, "loss": 1.4577, "step": 16566 }, { "epoch": 0.7366502156418123, "grad_norm": 0.05948825180530548, "learning_rate": 0.00017595736752456237, "loss": 1.4614, "step": 16568 }, { "epoch": 0.7367391401004847, "grad_norm": 0.060310911387205124, "learning_rate": 0.0001758460488610949, "loss": 1.4581, "step": 16570 }, { "epoch": 0.736828064559157, "grad_norm": 0.06115229055285454, "learning_rate": 0.000175734757907736, "loss": 1.4584, "step": 16572 }, { "epoch": 0.7369169890178293, "grad_norm": 0.059415318071842194, "learning_rate": 0.00017562349467399902, "loss": 1.4579, "step": 16574 }, { "epoch": 0.7370059134765017, "grad_norm": 0.059758272022008896, "learning_rate": 0.000175512259169395, "loss": 1.4589, "step": 16576 }, { "epoch": 0.737094837935174, "grad_norm": 0.059409987181425095, "learning_rate": 0.0001754010514034331, "loss": 1.4623, "step": 16578 }, { "epoch": 0.7371837623938464, "grad_norm": 0.06052602455019951, "learning_rate": 0.0001752898713856197, "loss": 1.4563, "step": 16580 }, { "epoch": 0.7372726868525188, "grad_norm": 0.05821981281042099, "learning_rate": 0.0001751787191254593, "loss": 1.4576, "step": 16582 }, { "epoch": 0.7373616113111912, "grad_norm": 0.06081145256757736, "learning_rate": 0.0001750675946324532, "loss": 1.4663, "step": 16584 }, { "epoch": 0.7374505357698635, "grad_norm": 0.06057305634021759, "learning_rate": 0.00017495649791610102, "loss": 1.4634, "step": 16586 }, { "epoch": 0.7375394602285359, "grad_norm": 0.05856640636920929, "learning_rate": 0.00017484542898590005, "loss": 1.4573, "step": 16588 }, { "epoch": 0.7376283846872083, "grad_norm": 0.0607236847281456, "learning_rate": 0.00017473438785134472, "loss": 1.455, "step": 16590 }, { "epoch": 0.7377173091458806, "grad_norm": 0.060658324509859085, "learning_rate": 0.00017462337452192695, "loss": 1.4624, "step": 16592 }, { "epoch": 0.7378062336045529, "grad_norm": 0.06017633154988289, "learning_rate": 0.00017451238900713733, "loss": 1.4547, "step": 16594 }, { "epoch": 0.7378951580632253, "grad_norm": 0.05868356674909592, "learning_rate": 0.00017440143131646307, "loss": 1.4602, "step": 16596 }, { "epoch": 0.7379840825218976, "grad_norm": 0.05967675894498825, "learning_rate": 0.0001742905014593892, "loss": 1.4572, "step": 16598 }, { "epoch": 0.73807300698057, "grad_norm": 0.06043080985546112, "learning_rate": 0.00017417959944539862, "loss": 1.4637, "step": 16600 }, { "epoch": 0.7381619314392424, "grad_norm": 0.059699367731809616, "learning_rate": 0.00017406872528397173, "loss": 1.4584, "step": 16602 }, { "epoch": 0.7382508558979147, "grad_norm": 0.059518732130527496, "learning_rate": 0.00017395787898458664, "loss": 1.4556, "step": 16604 }, { "epoch": 0.7383397803565871, "grad_norm": 0.059061914682388306, "learning_rate": 0.0001738470605567188, "loss": 1.4565, "step": 16606 }, { "epoch": 0.7384287048152595, "grad_norm": 0.058894891291856766, "learning_rate": 0.00017373627000984144, "loss": 1.4545, "step": 16608 }, { "epoch": 0.7385176292739318, "grad_norm": 0.05930294841527939, "learning_rate": 0.00017362550735342574, "loss": 1.459, "step": 16610 }, { "epoch": 0.7386065537326042, "grad_norm": 0.06055053696036339, "learning_rate": 0.00017351477259693976, "loss": 1.4617, "step": 16612 }, { "epoch": 0.7386954781912765, "grad_norm": 0.05868672579526901, "learning_rate": 0.00017340406574984984, "loss": 1.4601, "step": 16614 }, { "epoch": 0.7387844026499488, "grad_norm": 0.05979511886835098, "learning_rate": 0.00017329338682161978, "loss": 1.4574, "step": 16616 }, { "epoch": 0.7388733271086212, "grad_norm": 0.05988418683409691, "learning_rate": 0.00017318273582171058, "loss": 1.459, "step": 16618 }, { "epoch": 0.7389622515672936, "grad_norm": 0.05983160063624382, "learning_rate": 0.0001730721127595814, "loss": 1.4637, "step": 16620 }, { "epoch": 0.7390511760259659, "grad_norm": 0.05880555137991905, "learning_rate": 0.00017296151764468887, "loss": 1.4618, "step": 16622 }, { "epoch": 0.7391401004846383, "grad_norm": 0.058000050485134125, "learning_rate": 0.00017285095048648685, "loss": 1.4601, "step": 16624 }, { "epoch": 0.7392290249433107, "grad_norm": 0.058923520147800446, "learning_rate": 0.00017274041129442753, "loss": 1.46, "step": 16626 }, { "epoch": 0.739317949401983, "grad_norm": 0.059510573744773865, "learning_rate": 0.00017262990007795975, "loss": 1.4588, "step": 16628 }, { "epoch": 0.7394068738606554, "grad_norm": 0.05785347521305084, "learning_rate": 0.0001725194168465309, "loss": 1.4559, "step": 16630 }, { "epoch": 0.7394957983193278, "grad_norm": 0.05921796336770058, "learning_rate": 0.00017240896160958559, "loss": 1.4559, "step": 16632 }, { "epoch": 0.7395847227780001, "grad_norm": 0.059610430151224136, "learning_rate": 0.00017229853437656568, "loss": 1.4579, "step": 16634 }, { "epoch": 0.7396736472366724, "grad_norm": 0.058959461748600006, "learning_rate": 0.00017218813515691117, "loss": 1.4584, "step": 16636 }, { "epoch": 0.7397625716953448, "grad_norm": 0.05981831252574921, "learning_rate": 0.00017207776396005976, "loss": 1.4602, "step": 16638 }, { "epoch": 0.7398514961540171, "grad_norm": 0.060530826449394226, "learning_rate": 0.0001719674207954459, "loss": 1.4639, "step": 16640 }, { "epoch": 0.7399404206126895, "grad_norm": 0.05915441736578941, "learning_rate": 0.00017185710567250257, "loss": 1.4624, "step": 16642 }, { "epoch": 0.7400293450713619, "grad_norm": 0.05957896262407303, "learning_rate": 0.0001717468186006601, "loss": 1.4577, "step": 16644 }, { "epoch": 0.7401182695300342, "grad_norm": 0.05835951864719391, "learning_rate": 0.000171636559589346, "loss": 1.4562, "step": 16646 }, { "epoch": 0.7402071939887066, "grad_norm": 0.06005425006151199, "learning_rate": 0.00017152632864798594, "loss": 1.4611, "step": 16648 }, { "epoch": 0.740296118447379, "grad_norm": 0.06107817590236664, "learning_rate": 0.00017141612578600252, "loss": 1.4582, "step": 16650 }, { "epoch": 0.7403850429060513, "grad_norm": 0.05926118418574333, "learning_rate": 0.00017130595101281704, "loss": 1.4605, "step": 16652 }, { "epoch": 0.7404739673647237, "grad_norm": 0.06142883375287056, "learning_rate": 0.00017119580433784737, "loss": 1.469, "step": 16654 }, { "epoch": 0.7405628918233961, "grad_norm": 0.059701576828956604, "learning_rate": 0.0001710856857705092, "loss": 1.455, "step": 16656 }, { "epoch": 0.7406518162820683, "grad_norm": 0.059347156435251236, "learning_rate": 0.00017097559532021605, "loss": 1.4636, "step": 16658 }, { "epoch": 0.7407407407407407, "grad_norm": 0.05961744487285614, "learning_rate": 0.00017086553299637913, "loss": 1.4567, "step": 16660 }, { "epoch": 0.7408296651994131, "grad_norm": 0.05953764170408249, "learning_rate": 0.0001707554988084068, "loss": 1.4541, "step": 16662 }, { "epoch": 0.7409185896580854, "grad_norm": 0.060986440628767014, "learning_rate": 0.00017064549276570528, "loss": 1.467, "step": 16664 }, { "epoch": 0.7410075141167578, "grad_norm": 0.05836610123515129, "learning_rate": 0.00017053551487767867, "loss": 1.4574, "step": 16666 }, { "epoch": 0.7410964385754302, "grad_norm": 0.06008746847510338, "learning_rate": 0.00017042556515372797, "loss": 1.4545, "step": 16668 }, { "epoch": 0.7411853630341025, "grad_norm": 0.06051292270421982, "learning_rate": 0.00017031564360325236, "loss": 1.4583, "step": 16670 }, { "epoch": 0.7412742874927749, "grad_norm": 0.060724154114723206, "learning_rate": 0.00017020575023564844, "loss": 1.4589, "step": 16672 }, { "epoch": 0.7413632119514473, "grad_norm": 0.05805688351392746, "learning_rate": 0.00017009588506031043, "loss": 1.4577, "step": 16674 }, { "epoch": 0.7414521364101196, "grad_norm": 0.05907231569290161, "learning_rate": 0.0001699860480866301, "loss": 1.4653, "step": 16676 }, { "epoch": 0.741541060868792, "grad_norm": 0.060612257570028305, "learning_rate": 0.00016987623932399631, "loss": 1.4621, "step": 16678 }, { "epoch": 0.7416299853274643, "grad_norm": 0.059990622103214264, "learning_rate": 0.00016976645878179675, "loss": 1.4614, "step": 16680 }, { "epoch": 0.7417189097861366, "grad_norm": 0.058527737855911255, "learning_rate": 0.00016965670646941556, "loss": 1.4589, "step": 16682 }, { "epoch": 0.741807834244809, "grad_norm": 0.059194039553403854, "learning_rate": 0.00016954698239623472, "loss": 1.4593, "step": 16684 }, { "epoch": 0.7418967587034814, "grad_norm": 0.06000838056206703, "learning_rate": 0.0001694372865716341, "loss": 1.455, "step": 16686 }, { "epoch": 0.7419856831621537, "grad_norm": 0.05965815484523773, "learning_rate": 0.00016932761900499094, "loss": 1.4623, "step": 16688 }, { "epoch": 0.7420746076208261, "grad_norm": 0.05837101861834526, "learning_rate": 0.00016921797970568031, "loss": 1.4601, "step": 16690 }, { "epoch": 0.7421635320794985, "grad_norm": 0.06096314638853073, "learning_rate": 0.00016910836868307434, "loss": 1.4579, "step": 16692 }, { "epoch": 0.7422524565381708, "grad_norm": 0.05966602638363838, "learning_rate": 0.00016899878594654315, "loss": 1.4562, "step": 16694 }, { "epoch": 0.7423413809968432, "grad_norm": 0.059270136058330536, "learning_rate": 0.0001688892315054546, "loss": 1.4612, "step": 16696 }, { "epoch": 0.7424303054555156, "grad_norm": 0.05927656963467598, "learning_rate": 0.00016877970536917375, "loss": 1.455, "step": 16698 }, { "epoch": 0.742519229914188, "grad_norm": 0.06073769927024841, "learning_rate": 0.00016867020754706292, "loss": 1.4615, "step": 16700 }, { "epoch": 0.7426081543728602, "grad_norm": 0.05787941813468933, "learning_rate": 0.00016856073804848328, "loss": 1.4577, "step": 16702 }, { "epoch": 0.7426970788315326, "grad_norm": 0.06029796600341797, "learning_rate": 0.00016845129688279216, "loss": 1.4592, "step": 16704 }, { "epoch": 0.7427860032902049, "grad_norm": 0.05889935791492462, "learning_rate": 0.00016834188405934542, "loss": 1.4618, "step": 16706 }, { "epoch": 0.7428749277488773, "grad_norm": 0.058539580553770065, "learning_rate": 0.00016823249958749581, "loss": 1.4616, "step": 16708 }, { "epoch": 0.7429638522075497, "grad_norm": 0.06004004552960396, "learning_rate": 0.00016812314347659424, "loss": 1.4536, "step": 16710 }, { "epoch": 0.743052776666222, "grad_norm": 0.060982439666986465, "learning_rate": 0.00016801381573598907, "loss": 1.4549, "step": 16712 }, { "epoch": 0.7431417011248944, "grad_norm": 0.05949532985687256, "learning_rate": 0.00016790451637502574, "loss": 1.4583, "step": 16714 }, { "epoch": 0.7432306255835668, "grad_norm": 0.05871553719043732, "learning_rate": 0.00016779524540304786, "loss": 1.4598, "step": 16716 }, { "epoch": 0.7433195500422392, "grad_norm": 0.059430256485939026, "learning_rate": 0.00016768600282939655, "loss": 1.4618, "step": 16718 }, { "epoch": 0.7434084745009115, "grad_norm": 0.059652287513017654, "learning_rate": 0.00016757678866341003, "loss": 1.4654, "step": 16720 }, { "epoch": 0.7434973989595839, "grad_norm": 0.05944843962788582, "learning_rate": 0.00016746760291442452, "loss": 1.4606, "step": 16722 }, { "epoch": 0.7435863234182561, "grad_norm": 0.059113163501024246, "learning_rate": 0.00016735844559177392, "loss": 1.4584, "step": 16724 }, { "epoch": 0.7436752478769285, "grad_norm": 0.05857875198125839, "learning_rate": 0.00016724931670478911, "loss": 1.4563, "step": 16726 }, { "epoch": 0.7437641723356009, "grad_norm": 0.06093486398458481, "learning_rate": 0.00016714021626279912, "loss": 1.4632, "step": 16728 }, { "epoch": 0.7438530967942732, "grad_norm": 0.05900067463517189, "learning_rate": 0.0001670311442751305, "loss": 1.465, "step": 16730 }, { "epoch": 0.7439420212529456, "grad_norm": 0.05907110497355461, "learning_rate": 0.0001669221007511068, "loss": 1.4637, "step": 16732 }, { "epoch": 0.744030945711618, "grad_norm": 0.05955798551440239, "learning_rate": 0.00016681308570004992, "loss": 1.4578, "step": 16734 }, { "epoch": 0.7441198701702904, "grad_norm": 0.05999176949262619, "learning_rate": 0.00016670409913127865, "loss": 1.456, "step": 16736 }, { "epoch": 0.7442087946289627, "grad_norm": 0.05871756374835968, "learning_rate": 0.00016659514105410973, "loss": 1.4558, "step": 16738 }, { "epoch": 0.7442977190876351, "grad_norm": 0.05930541455745697, "learning_rate": 0.00016648621147785764, "loss": 1.4695, "step": 16740 }, { "epoch": 0.7443866435463075, "grad_norm": 0.0586344413459301, "learning_rate": 0.00016637731041183374, "loss": 1.4612, "step": 16742 }, { "epoch": 0.7444755680049797, "grad_norm": 0.05983453243970871, "learning_rate": 0.00016626843786534756, "loss": 1.4609, "step": 16744 }, { "epoch": 0.7445644924636521, "grad_norm": 0.058784399181604385, "learning_rate": 0.00016615959384770623, "loss": 1.4539, "step": 16746 }, { "epoch": 0.7446534169223245, "grad_norm": 0.05940399691462517, "learning_rate": 0.0001660507783682138, "loss": 1.4582, "step": 16748 }, { "epoch": 0.7447423413809968, "grad_norm": 0.05869759991765022, "learning_rate": 0.00016594199143617251, "loss": 1.4617, "step": 16750 }, { "epoch": 0.7448312658396692, "grad_norm": 0.059345047920942307, "learning_rate": 0.0001658332330608821, "loss": 1.4588, "step": 16752 }, { "epoch": 0.7449201902983416, "grad_norm": 0.059553761035203934, "learning_rate": 0.0001657245032516394, "loss": 1.4596, "step": 16754 }, { "epoch": 0.7450091147570139, "grad_norm": 0.05867783725261688, "learning_rate": 0.00016561580201773947, "loss": 1.4613, "step": 16756 }, { "epoch": 0.7450980392156863, "grad_norm": 0.057739853858947754, "learning_rate": 0.00016550712936847395, "loss": 1.4595, "step": 16758 }, { "epoch": 0.7451869636743587, "grad_norm": 0.0593368336558342, "learning_rate": 0.00016539848531313346, "loss": 1.4607, "step": 16760 }, { "epoch": 0.745275888133031, "grad_norm": 0.058571264147758484, "learning_rate": 0.00016528986986100497, "loss": 1.4567, "step": 16762 }, { "epoch": 0.7453648125917034, "grad_norm": 0.06027856841683388, "learning_rate": 0.00016518128302137324, "loss": 1.4572, "step": 16764 }, { "epoch": 0.7454537370503757, "grad_norm": 0.060379598289728165, "learning_rate": 0.000165072724803521, "loss": 1.4616, "step": 16766 }, { "epoch": 0.745542661509048, "grad_norm": 0.06043410673737526, "learning_rate": 0.00016496419521672834, "loss": 1.463, "step": 16768 }, { "epoch": 0.7456315859677204, "grad_norm": 0.06077970936894417, "learning_rate": 0.00016485569427027253, "loss": 1.4633, "step": 16770 }, { "epoch": 0.7457205104263928, "grad_norm": 0.05859680846333504, "learning_rate": 0.00016474722197342895, "loss": 1.4503, "step": 16772 }, { "epoch": 0.7458094348850651, "grad_norm": 0.06022398918867111, "learning_rate": 0.00016463877833547026, "loss": 1.4622, "step": 16774 }, { "epoch": 0.7458983593437375, "grad_norm": 0.05970166251063347, "learning_rate": 0.00016453036336566685, "loss": 1.4621, "step": 16776 }, { "epoch": 0.7459872838024099, "grad_norm": 0.05986715480685234, "learning_rate": 0.00016442197707328615, "loss": 1.4628, "step": 16778 }, { "epoch": 0.7460762082610822, "grad_norm": 0.05800659954547882, "learning_rate": 0.0001643136194675937, "loss": 1.4602, "step": 16780 }, { "epoch": 0.7461651327197546, "grad_norm": 0.05899570882320404, "learning_rate": 0.00016420529055785255, "loss": 1.4611, "step": 16782 }, { "epoch": 0.746254057178427, "grad_norm": 0.05969346687197685, "learning_rate": 0.00016409699035332297, "loss": 1.4599, "step": 16784 }, { "epoch": 0.7463429816370993, "grad_norm": 0.0593080073595047, "learning_rate": 0.00016398871886326262, "loss": 1.4559, "step": 16786 }, { "epoch": 0.7464319060957716, "grad_norm": 0.05878398194909096, "learning_rate": 0.00016388047609692763, "loss": 1.4562, "step": 16788 }, { "epoch": 0.746520830554444, "grad_norm": 0.05969779193401337, "learning_rate": 0.00016377226206357076, "loss": 1.4649, "step": 16790 }, { "epoch": 0.7466097550131163, "grad_norm": 0.059288714081048965, "learning_rate": 0.00016366407677244249, "loss": 1.4623, "step": 16792 }, { "epoch": 0.7466986794717887, "grad_norm": 0.06077198311686516, "learning_rate": 0.00016355592023279113, "loss": 1.459, "step": 16794 }, { "epoch": 0.7467876039304611, "grad_norm": 0.060099512338638306, "learning_rate": 0.00016344779245386237, "loss": 1.4619, "step": 16796 }, { "epoch": 0.7468765283891334, "grad_norm": 0.06000978872179985, "learning_rate": 0.00016333969344489962, "loss": 1.4646, "step": 16798 }, { "epoch": 0.7469654528478058, "grad_norm": 0.05997542664408684, "learning_rate": 0.00016323162321514335, "loss": 1.4584, "step": 16800 }, { "epoch": 0.7470543773064782, "grad_norm": 0.059203166514635086, "learning_rate": 0.00016312358177383195, "loss": 1.461, "step": 16802 }, { "epoch": 0.7471433017651505, "grad_norm": 0.059819675981998444, "learning_rate": 0.00016301556913020154, "loss": 1.4598, "step": 16804 }, { "epoch": 0.7472322262238229, "grad_norm": 0.05942592769861221, "learning_rate": 0.0001629075852934853, "loss": 1.4564, "step": 16806 }, { "epoch": 0.7473211506824953, "grad_norm": 0.05992693081498146, "learning_rate": 0.0001627996302729139, "loss": 1.4544, "step": 16808 }, { "epoch": 0.7474100751411675, "grad_norm": 0.06033800169825554, "learning_rate": 0.00016269170407771645, "loss": 1.4539, "step": 16810 }, { "epoch": 0.7474989995998399, "grad_norm": 0.05810123309493065, "learning_rate": 0.00016258380671711837, "loss": 1.4565, "step": 16812 }, { "epoch": 0.7475879240585123, "grad_norm": 0.059875525534152985, "learning_rate": 0.00016247593820034362, "loss": 1.4554, "step": 16814 }, { "epoch": 0.7476768485171846, "grad_norm": 0.05907121300697327, "learning_rate": 0.00016236809853661295, "loss": 1.4597, "step": 16816 }, { "epoch": 0.747765772975857, "grad_norm": 0.05930914729833603, "learning_rate": 0.00016226028773514513, "loss": 1.4576, "step": 16818 }, { "epoch": 0.7478546974345294, "grad_norm": 0.0589991994202137, "learning_rate": 0.0001621525058051564, "loss": 1.4528, "step": 16820 }, { "epoch": 0.7479436218932017, "grad_norm": 0.05885922908782959, "learning_rate": 0.00016204475275586016, "loss": 1.4642, "step": 16822 }, { "epoch": 0.7480325463518741, "grad_norm": 0.058898232877254486, "learning_rate": 0.0001619370285964678, "loss": 1.4609, "step": 16824 }, { "epoch": 0.7481214708105465, "grad_norm": 0.058432307094335556, "learning_rate": 0.0001618293333361882, "loss": 1.4588, "step": 16826 }, { "epoch": 0.7482103952692188, "grad_norm": 0.059765320271253586, "learning_rate": 0.0001617216669842273, "loss": 1.4541, "step": 16828 }, { "epoch": 0.7482993197278912, "grad_norm": 0.05895643308758736, "learning_rate": 0.000161614029549789, "loss": 1.4602, "step": 16830 }, { "epoch": 0.7483882441865635, "grad_norm": 0.05961558222770691, "learning_rate": 0.00016150642104207486, "loss": 1.4598, "step": 16832 }, { "epoch": 0.7484771686452358, "grad_norm": 0.06014033779501915, "learning_rate": 0.0001613988414702834, "loss": 1.4579, "step": 16834 }, { "epoch": 0.7485660931039082, "grad_norm": 0.06016629934310913, "learning_rate": 0.00016129129084361128, "loss": 1.4579, "step": 16836 }, { "epoch": 0.7486550175625806, "grad_norm": 0.05935119092464447, "learning_rate": 0.0001611837691712521, "loss": 1.4623, "step": 16838 }, { "epoch": 0.7487439420212529, "grad_norm": 0.060207169502973557, "learning_rate": 0.00016107627646239748, "loss": 1.4623, "step": 16840 }, { "epoch": 0.7488328664799253, "grad_norm": 0.05937547609210014, "learning_rate": 0.00016096881272623643, "loss": 1.4519, "step": 16842 }, { "epoch": 0.7489217909385977, "grad_norm": 0.059106145054101944, "learning_rate": 0.0001608613779719552, "loss": 1.4604, "step": 16844 }, { "epoch": 0.74901071539727, "grad_norm": 0.05962870270013809, "learning_rate": 0.00016075397220873794, "loss": 1.4623, "step": 16846 }, { "epoch": 0.7490996398559424, "grad_norm": 0.059200629591941833, "learning_rate": 0.0001606465954457663, "loss": 1.46, "step": 16848 }, { "epoch": 0.7491885643146148, "grad_norm": 0.05879664793610573, "learning_rate": 0.000160539247692219, "loss": 1.4581, "step": 16850 }, { "epoch": 0.7492774887732871, "grad_norm": 0.05944131687283516, "learning_rate": 0.0001604319289572727, "loss": 1.459, "step": 16852 }, { "epoch": 0.7493664132319594, "grad_norm": 0.05925022438168526, "learning_rate": 0.00016032463925010176, "loss": 1.4585, "step": 16854 }, { "epoch": 0.7494553376906318, "grad_norm": 0.06039819493889809, "learning_rate": 0.0001602173785798774, "loss": 1.466, "step": 16856 }, { "epoch": 0.7495442621493041, "grad_norm": 0.05966150015592575, "learning_rate": 0.0001601101469557689, "loss": 1.4556, "step": 16858 }, { "epoch": 0.7496331866079765, "grad_norm": 0.05846158042550087, "learning_rate": 0.000160002944386943, "loss": 1.4595, "step": 16860 }, { "epoch": 0.7497221110666489, "grad_norm": 0.05919952318072319, "learning_rate": 0.00015989577088256363, "loss": 1.4562, "step": 16862 }, { "epoch": 0.7498110355253212, "grad_norm": 0.05901890993118286, "learning_rate": 0.0001597886264517927, "loss": 1.4583, "step": 16864 }, { "epoch": 0.7498999599839936, "grad_norm": 0.058764733374118805, "learning_rate": 0.00015968151110378888, "loss": 1.4553, "step": 16866 }, { "epoch": 0.749988884442666, "grad_norm": 0.05931887403130531, "learning_rate": 0.00015957442484770956, "loss": 1.4591, "step": 16868 }, { "epoch": 0.7500778089013383, "grad_norm": 0.05878930538892746, "learning_rate": 0.00015946736769270863, "loss": 1.4572, "step": 16870 }, { "epoch": 0.7501667333600107, "grad_norm": 0.060362525284290314, "learning_rate": 0.00015936033964793767, "loss": 1.4579, "step": 16872 }, { "epoch": 0.750255657818683, "grad_norm": 0.059850580990314484, "learning_rate": 0.00015925334072254595, "loss": 1.4622, "step": 16874 }, { "epoch": 0.7503445822773553, "grad_norm": 0.05898622050881386, "learning_rate": 0.00015914637092568045, "loss": 1.4627, "step": 16876 }, { "epoch": 0.7504335067360277, "grad_norm": 0.05974709987640381, "learning_rate": 0.00015903943026648515, "loss": 1.4618, "step": 16878 }, { "epoch": 0.7505224311947001, "grad_norm": 0.05845978111028671, "learning_rate": 0.0001589325187541019, "loss": 1.4622, "step": 16880 }, { "epoch": 0.7506113556533724, "grad_norm": 0.059004079550504684, "learning_rate": 0.00015882563639766994, "loss": 1.4616, "step": 16882 }, { "epoch": 0.7507002801120448, "grad_norm": 0.06059623137116432, "learning_rate": 0.0001587187832063263, "loss": 1.4603, "step": 16884 }, { "epoch": 0.7507892045707172, "grad_norm": 0.05917629599571228, "learning_rate": 0.00015861195918920502, "loss": 1.4617, "step": 16886 }, { "epoch": 0.7508781290293896, "grad_norm": 0.0598984956741333, "learning_rate": 0.0001585051643554376, "loss": 1.4587, "step": 16888 }, { "epoch": 0.7509670534880619, "grad_norm": 0.061087653040885925, "learning_rate": 0.000158398398714154, "loss": 1.4605, "step": 16890 }, { "epoch": 0.7510559779467343, "grad_norm": 0.0595669187605381, "learning_rate": 0.00015829166227448066, "loss": 1.4579, "step": 16892 }, { "epoch": 0.7511449024054067, "grad_norm": 0.05967925488948822, "learning_rate": 0.0001581849550455417, "loss": 1.4628, "step": 16894 }, { "epoch": 0.7512338268640789, "grad_norm": 0.060816604644060135, "learning_rate": 0.0001580782770364591, "loss": 1.4586, "step": 16896 }, { "epoch": 0.7513227513227513, "grad_norm": 0.060024797916412354, "learning_rate": 0.00015797162825635218, "loss": 1.4575, "step": 16898 }, { "epoch": 0.7514116757814236, "grad_norm": 0.058512914925813675, "learning_rate": 0.00015786500871433796, "loss": 1.4558, "step": 16900 }, { "epoch": 0.751500600240096, "grad_norm": 0.05933115631341934, "learning_rate": 0.00015775841841953036, "loss": 1.4552, "step": 16902 }, { "epoch": 0.7515895246987684, "grad_norm": 0.059899043291807175, "learning_rate": 0.00015765185738104133, "loss": 1.4588, "step": 16904 }, { "epoch": 0.7516784491574408, "grad_norm": 0.05907595157623291, "learning_rate": 0.0001575453256079804, "loss": 1.4609, "step": 16906 }, { "epoch": 0.7517673736161131, "grad_norm": 0.05995360016822815, "learning_rate": 0.000157438823109454, "loss": 1.4613, "step": 16908 }, { "epoch": 0.7518562980747855, "grad_norm": 0.05959051102399826, "learning_rate": 0.00015733234989456663, "loss": 1.4605, "step": 16910 }, { "epoch": 0.7519452225334579, "grad_norm": 0.06011039391160011, "learning_rate": 0.00015722590597242025, "loss": 1.4554, "step": 16912 }, { "epoch": 0.7520341469921302, "grad_norm": 0.05920354649424553, "learning_rate": 0.00015711949135211394, "loss": 1.4523, "step": 16914 }, { "epoch": 0.7521230714508026, "grad_norm": 0.059563346207141876, "learning_rate": 0.00015701310604274426, "loss": 1.4578, "step": 16916 }, { "epoch": 0.7522119959094749, "grad_norm": 0.05948107689619064, "learning_rate": 0.0001569067500534061, "loss": 1.4571, "step": 16918 }, { "epoch": 0.7523009203681472, "grad_norm": 0.058655984699726105, "learning_rate": 0.00015680042339319072, "loss": 1.4562, "step": 16920 }, { "epoch": 0.7523898448268196, "grad_norm": 0.05892255902290344, "learning_rate": 0.00015669412607118778, "loss": 1.4593, "step": 16922 }, { "epoch": 0.752478769285492, "grad_norm": 0.05882655829191208, "learning_rate": 0.00015658785809648362, "loss": 1.4542, "step": 16924 }, { "epoch": 0.7525676937441643, "grad_norm": 0.058582112193107605, "learning_rate": 0.00015648161947816276, "loss": 1.4666, "step": 16926 }, { "epoch": 0.7526566182028367, "grad_norm": 0.05810071900486946, "learning_rate": 0.00015637541022530706, "loss": 1.4571, "step": 16928 }, { "epoch": 0.7527455426615091, "grad_norm": 0.0615081861615181, "learning_rate": 0.00015626923034699542, "loss": 1.4623, "step": 16930 }, { "epoch": 0.7528344671201814, "grad_norm": 0.0594865120947361, "learning_rate": 0.00015616307985230477, "loss": 1.4576, "step": 16932 }, { "epoch": 0.7529233915788538, "grad_norm": 0.05900559946894646, "learning_rate": 0.0001560569587503094, "loss": 1.4514, "step": 16934 }, { "epoch": 0.7530123160375262, "grad_norm": 0.06001661345362663, "learning_rate": 0.00015595086705008076, "loss": 1.4621, "step": 16936 }, { "epoch": 0.7531012404961985, "grad_norm": 0.05865577980875969, "learning_rate": 0.00015584480476068825, "loss": 1.4546, "step": 16938 }, { "epoch": 0.7531901649548708, "grad_norm": 0.05930325761437416, "learning_rate": 0.00015573877189119855, "loss": 1.462, "step": 16940 }, { "epoch": 0.7532790894135432, "grad_norm": 0.05888302996754646, "learning_rate": 0.00015563276845067565, "loss": 1.4552, "step": 16942 }, { "epoch": 0.7533680138722155, "grad_norm": 0.06017635017633438, "learning_rate": 0.00015552679444818135, "loss": 1.4547, "step": 16944 }, { "epoch": 0.7534569383308879, "grad_norm": 0.06248828396201134, "learning_rate": 0.00015542084989277462, "loss": 1.4556, "step": 16946 }, { "epoch": 0.7535458627895603, "grad_norm": 0.058105915784835815, "learning_rate": 0.0001553149347935121, "loss": 1.4615, "step": 16948 }, { "epoch": 0.7536347872482326, "grad_norm": 0.059730805456638336, "learning_rate": 0.00015520904915944817, "loss": 1.4619, "step": 16950 }, { "epoch": 0.753723711706905, "grad_norm": 0.059080708771944046, "learning_rate": 0.00015510319299963393, "loss": 1.4627, "step": 16952 }, { "epoch": 0.7538126361655774, "grad_norm": 0.06086655706167221, "learning_rate": 0.00015499736632311868, "loss": 1.4642, "step": 16954 }, { "epoch": 0.7539015606242497, "grad_norm": 0.05988055095076561, "learning_rate": 0.0001548915691389491, "loss": 1.4542, "step": 16956 }, { "epoch": 0.7539904850829221, "grad_norm": 0.05994189530611038, "learning_rate": 0.00015478580145616888, "loss": 1.4542, "step": 16958 }, { "epoch": 0.7540794095415945, "grad_norm": 0.06018702685832977, "learning_rate": 0.00015468006328381968, "loss": 1.4618, "step": 16960 }, { "epoch": 0.7541683340002667, "grad_norm": 0.0580444373190403, "learning_rate": 0.00015457435463094054, "loss": 1.4637, "step": 16962 }, { "epoch": 0.7542572584589391, "grad_norm": 0.06005840376019478, "learning_rate": 0.00015446867550656767, "loss": 1.4638, "step": 16964 }, { "epoch": 0.7543461829176115, "grad_norm": 0.061004381626844406, "learning_rate": 0.00015436302591973516, "loss": 1.4629, "step": 16966 }, { "epoch": 0.7544351073762838, "grad_norm": 0.05958648771047592, "learning_rate": 0.00015425740587947435, "loss": 1.4598, "step": 16968 }, { "epoch": 0.7545240318349562, "grad_norm": 0.059469547122716904, "learning_rate": 0.00015415181539481427, "loss": 1.4588, "step": 16970 }, { "epoch": 0.7546129562936286, "grad_norm": 0.05835345759987831, "learning_rate": 0.00015404625447478117, "loss": 1.454, "step": 16972 }, { "epoch": 0.7547018807523009, "grad_norm": 0.058979202061891556, "learning_rate": 0.0001539407231283984, "loss": 1.4601, "step": 16974 }, { "epoch": 0.7547908052109733, "grad_norm": 0.060271427035331726, "learning_rate": 0.00015383522136468803, "loss": 1.4585, "step": 16976 }, { "epoch": 0.7548797296696457, "grad_norm": 0.05911035090684891, "learning_rate": 0.0001537297491926684, "loss": 1.4615, "step": 16978 }, { "epoch": 0.754968654128318, "grad_norm": 0.059652071446180344, "learning_rate": 0.00015362430662135568, "loss": 1.4659, "step": 16980 }, { "epoch": 0.7550575785869903, "grad_norm": 0.05946535989642143, "learning_rate": 0.00015351889365976357, "loss": 1.4537, "step": 16982 }, { "epoch": 0.7551465030456627, "grad_norm": 0.06029531732201576, "learning_rate": 0.00015341351031690354, "loss": 1.4597, "step": 16984 }, { "epoch": 0.755235427504335, "grad_norm": 0.05937948077917099, "learning_rate": 0.00015330815660178382, "loss": 1.4593, "step": 16986 }, { "epoch": 0.7553243519630074, "grad_norm": 0.05890772491693497, "learning_rate": 0.0001532028325234107, "loss": 1.4543, "step": 16988 }, { "epoch": 0.7554132764216798, "grad_norm": 0.06144456937909126, "learning_rate": 0.00015309753809078774, "loss": 1.4574, "step": 16990 }, { "epoch": 0.7555022008803521, "grad_norm": 0.059851065278053284, "learning_rate": 0.00015299227331291615, "loss": 1.4601, "step": 16992 }, { "epoch": 0.7555911253390245, "grad_norm": 0.05949746072292328, "learning_rate": 0.00015288703819879424, "loss": 1.4574, "step": 16994 }, { "epoch": 0.7556800497976969, "grad_norm": 0.05919702351093292, "learning_rate": 0.00015278183275741763, "loss": 1.4577, "step": 16996 }, { "epoch": 0.7557689742563692, "grad_norm": 0.05892585590481758, "learning_rate": 0.0001526766569977805, "loss": 1.4593, "step": 16998 }, { "epoch": 0.7558578987150416, "grad_norm": 0.06064224988222122, "learning_rate": 0.0001525715109288733, "loss": 1.4542, "step": 17000 }, { "epoch": 0.7558578987150416, "eval_loss": 1.443333387374878, "eval_runtime": 12.4431, "eval_samples_per_second": 555.329, "eval_steps_per_second": 69.436, "step": 17000 }, { "epoch": 0.755946823173714, "grad_norm": 0.05818698927760124, "learning_rate": 0.0001524663945596842, "loss": 1.4525, "step": 17002 }, { "epoch": 0.7560357476323862, "grad_norm": 0.05946521461009979, "learning_rate": 0.00015236130789919927, "loss": 1.4631, "step": 17004 }, { "epoch": 0.7561246720910586, "grad_norm": 0.059593699872493744, "learning_rate": 0.0001522562509564017, "loss": 1.4585, "step": 17006 }, { "epoch": 0.756213596549731, "grad_norm": 0.05993930995464325, "learning_rate": 0.00015215122374027246, "loss": 1.4616, "step": 17008 }, { "epoch": 0.7563025210084033, "grad_norm": 0.05795668810606003, "learning_rate": 0.0001520462262597893, "loss": 1.4631, "step": 17010 }, { "epoch": 0.7563914454670757, "grad_norm": 0.05875843018293381, "learning_rate": 0.0001519412585239282, "loss": 1.451, "step": 17012 }, { "epoch": 0.7564803699257481, "grad_norm": 0.057727470993995667, "learning_rate": 0.0001518363205416623, "loss": 1.4582, "step": 17014 }, { "epoch": 0.7565692943844204, "grad_norm": 0.058522436767816544, "learning_rate": 0.0001517314123219618, "loss": 1.4639, "step": 17016 }, { "epoch": 0.7566582188430928, "grad_norm": 0.058987364172935486, "learning_rate": 0.000151626533873795, "loss": 1.4567, "step": 17018 }, { "epoch": 0.7567471433017652, "grad_norm": 0.05821183696389198, "learning_rate": 0.00015152168520612746, "loss": 1.4583, "step": 17020 }, { "epoch": 0.7568360677604375, "grad_norm": 0.06063270568847656, "learning_rate": 0.00015141686632792184, "loss": 1.4641, "step": 17022 }, { "epoch": 0.7569249922191099, "grad_norm": 0.0597229078412056, "learning_rate": 0.00015131207724813862, "loss": 1.4509, "step": 17024 }, { "epoch": 0.7570139166777822, "grad_norm": 0.05786067619919777, "learning_rate": 0.0001512073179757358, "loss": 1.4589, "step": 17026 }, { "epoch": 0.7571028411364545, "grad_norm": 0.062379490584135056, "learning_rate": 0.00015110258851966835, "loss": 1.457, "step": 17028 }, { "epoch": 0.7571917655951269, "grad_norm": 0.058465585112571716, "learning_rate": 0.00015099788888888938, "loss": 1.4601, "step": 17030 }, { "epoch": 0.7572806900537993, "grad_norm": 0.058587927371263504, "learning_rate": 0.00015089321909234865, "loss": 1.4605, "step": 17032 }, { "epoch": 0.7573696145124716, "grad_norm": 0.06187847629189491, "learning_rate": 0.00015078857913899407, "loss": 1.4613, "step": 17034 }, { "epoch": 0.757458538971144, "grad_norm": 0.05908993259072304, "learning_rate": 0.00015068396903777082, "loss": 1.4529, "step": 17036 }, { "epoch": 0.7575474634298164, "grad_norm": 0.05782734975218773, "learning_rate": 0.00015057938879762113, "loss": 1.4518, "step": 17038 }, { "epoch": 0.7576363878884887, "grad_norm": 0.06174134463071823, "learning_rate": 0.0001504748384274851, "loss": 1.4595, "step": 17040 }, { "epoch": 0.7577253123471611, "grad_norm": 0.06178918853402138, "learning_rate": 0.00015037031793630035, "loss": 1.4626, "step": 17042 }, { "epoch": 0.7578142368058335, "grad_norm": 0.05829121544957161, "learning_rate": 0.00015026582733300144, "loss": 1.4541, "step": 17044 }, { "epoch": 0.7579031612645059, "grad_norm": 0.06056341528892517, "learning_rate": 0.00015016136662652086, "loss": 1.4642, "step": 17046 }, { "epoch": 0.7579920857231781, "grad_norm": 0.05929940566420555, "learning_rate": 0.0001500569358257885, "loss": 1.4596, "step": 17048 }, { "epoch": 0.7580810101818505, "grad_norm": 0.05996108427643776, "learning_rate": 0.0001499525349397312, "loss": 1.4619, "step": 17050 }, { "epoch": 0.7581699346405228, "grad_norm": 0.05888962745666504, "learning_rate": 0.000149848163977274, "loss": 1.4585, "step": 17052 }, { "epoch": 0.7582588590991952, "grad_norm": 0.05805379897356033, "learning_rate": 0.00014974382294733873, "loss": 1.4582, "step": 17054 }, { "epoch": 0.7583477835578676, "grad_norm": 0.06010428071022034, "learning_rate": 0.00014963951185884494, "loss": 1.461, "step": 17056 }, { "epoch": 0.75843670801654, "grad_norm": 0.06018170714378357, "learning_rate": 0.00014953523072070985, "loss": 1.4593, "step": 17058 }, { "epoch": 0.7585256324752123, "grad_norm": 0.058989543467760086, "learning_rate": 0.00014943097954184752, "loss": 1.4541, "step": 17060 }, { "epoch": 0.7586145569338847, "grad_norm": 0.060051459819078445, "learning_rate": 0.00014932675833116994, "loss": 1.4549, "step": 17062 }, { "epoch": 0.758703481392557, "grad_norm": 0.06050692871212959, "learning_rate": 0.00014922256709758662, "loss": 1.4664, "step": 17064 }, { "epoch": 0.7587924058512294, "grad_norm": 0.05857968330383301, "learning_rate": 0.00014911840585000392, "loss": 1.4577, "step": 17066 }, { "epoch": 0.7588813303099018, "grad_norm": 0.06070830672979355, "learning_rate": 0.00014901427459732613, "loss": 1.4657, "step": 17068 }, { "epoch": 0.758970254768574, "grad_norm": 0.059025511145591736, "learning_rate": 0.00014891017334845515, "loss": 1.4577, "step": 17070 }, { "epoch": 0.7590591792272464, "grad_norm": 0.060259878635406494, "learning_rate": 0.00014880610211228951, "loss": 1.4623, "step": 17072 }, { "epoch": 0.7591481036859188, "grad_norm": 0.05835847184062004, "learning_rate": 0.00014870206089772592, "loss": 1.4571, "step": 17074 }, { "epoch": 0.7592370281445912, "grad_norm": 0.058684930205345154, "learning_rate": 0.00014859804971365832, "loss": 1.458, "step": 17076 }, { "epoch": 0.7593259526032635, "grad_norm": 0.06079687178134918, "learning_rate": 0.00014849406856897818, "loss": 1.4562, "step": 17078 }, { "epoch": 0.7594148770619359, "grad_norm": 0.059008531272411346, "learning_rate": 0.00014839011747257407, "loss": 1.4605, "step": 17080 }, { "epoch": 0.7595038015206083, "grad_norm": 0.058418843895196915, "learning_rate": 0.00014828619643333192, "loss": 1.4533, "step": 17082 }, { "epoch": 0.7595927259792806, "grad_norm": 0.060562197118997574, "learning_rate": 0.00014818230546013596, "loss": 1.4563, "step": 17084 }, { "epoch": 0.759681650437953, "grad_norm": 0.05741146579384804, "learning_rate": 0.00014807844456186692, "loss": 1.4577, "step": 17086 }, { "epoch": 0.7597705748966254, "grad_norm": 0.058450762182474136, "learning_rate": 0.00014797461374740312, "loss": 1.4551, "step": 17088 }, { "epoch": 0.7598594993552977, "grad_norm": 0.05948064476251602, "learning_rate": 0.0001478708130256207, "loss": 1.4559, "step": 17090 }, { "epoch": 0.75994842381397, "grad_norm": 0.05894921347498894, "learning_rate": 0.00014776704240539296, "loss": 1.455, "step": 17092 }, { "epoch": 0.7600373482726424, "grad_norm": 0.05906720459461212, "learning_rate": 0.00014766330189559085, "loss": 1.4564, "step": 17094 }, { "epoch": 0.7601262727313147, "grad_norm": 0.059796903282403946, "learning_rate": 0.00014755959150508223, "loss": 1.4582, "step": 17096 }, { "epoch": 0.7602151971899871, "grad_norm": 0.060287073254585266, "learning_rate": 0.00014745591124273289, "loss": 1.4608, "step": 17098 }, { "epoch": 0.7603041216486595, "grad_norm": 0.059052977710962296, "learning_rate": 0.00014735226111740603, "loss": 1.456, "step": 17100 }, { "epoch": 0.7603930461073318, "grad_norm": 0.05815219506621361, "learning_rate": 0.0001472486411379619, "loss": 1.4511, "step": 17102 }, { "epoch": 0.7604819705660042, "grad_norm": 0.05960388854146004, "learning_rate": 0.00014714505131325817, "loss": 1.46, "step": 17104 }, { "epoch": 0.7605708950246766, "grad_norm": 0.060582149773836136, "learning_rate": 0.00014704149165215076, "loss": 1.4646, "step": 17106 }, { "epoch": 0.7606598194833489, "grad_norm": 0.058946821838617325, "learning_rate": 0.0001469379621634921, "loss": 1.4587, "step": 17108 }, { "epoch": 0.7607487439420213, "grad_norm": 0.06056873872876167, "learning_rate": 0.00014683446285613212, "loss": 1.4572, "step": 17110 }, { "epoch": 0.7608376684006936, "grad_norm": 0.05924323946237564, "learning_rate": 0.00014673099373891863, "loss": 1.4545, "step": 17112 }, { "epoch": 0.7609265928593659, "grad_norm": 0.05958441272377968, "learning_rate": 0.0001466275548206966, "loss": 1.4507, "step": 17114 }, { "epoch": 0.7610155173180383, "grad_norm": 0.05905461683869362, "learning_rate": 0.00014652414611030862, "loss": 1.4613, "step": 17116 }, { "epoch": 0.7611044417767107, "grad_norm": 0.05967036262154579, "learning_rate": 0.00014642076761659417, "loss": 1.458, "step": 17118 }, { "epoch": 0.761193366235383, "grad_norm": 0.05881161987781525, "learning_rate": 0.00014631741934839065, "loss": 1.459, "step": 17120 }, { "epoch": 0.7612822906940554, "grad_norm": 0.06017891317605972, "learning_rate": 0.00014621410131453294, "loss": 1.4535, "step": 17122 }, { "epoch": 0.7613712151527278, "grad_norm": 0.05896136537194252, "learning_rate": 0.00014611081352385296, "loss": 1.4567, "step": 17124 }, { "epoch": 0.7614601396114001, "grad_norm": 0.05887780338525772, "learning_rate": 0.00014600755598517985, "loss": 1.4566, "step": 17126 }, { "epoch": 0.7615490640700725, "grad_norm": 0.06204426288604736, "learning_rate": 0.00014590432870734116, "loss": 1.4587, "step": 17128 }, { "epoch": 0.7616379885287449, "grad_norm": 0.05948631837964058, "learning_rate": 0.0001458011316991607, "loss": 1.452, "step": 17130 }, { "epoch": 0.7617269129874172, "grad_norm": 0.05867043510079384, "learning_rate": 0.00014569796496946064, "loss": 1.4582, "step": 17132 }, { "epoch": 0.7618158374460895, "grad_norm": 0.0589086189866066, "learning_rate": 0.00014559482852705968, "loss": 1.4573, "step": 17134 }, { "epoch": 0.7619047619047619, "grad_norm": 0.0606837160885334, "learning_rate": 0.00014549172238077456, "loss": 1.4536, "step": 17136 }, { "epoch": 0.7619936863634342, "grad_norm": 0.05865649878978729, "learning_rate": 0.00014538864653941953, "loss": 1.4604, "step": 17138 }, { "epoch": 0.7620826108221066, "grad_norm": 0.058400411158800125, "learning_rate": 0.0001452856010118055, "loss": 1.4619, "step": 17140 }, { "epoch": 0.762171535280779, "grad_norm": 0.05969015881419182, "learning_rate": 0.0001451825858067415, "loss": 1.4589, "step": 17142 }, { "epoch": 0.7622604597394513, "grad_norm": 0.059678588062524796, "learning_rate": 0.0001450796009330338, "loss": 1.454, "step": 17144 }, { "epoch": 0.7623493841981237, "grad_norm": 0.058471839874982834, "learning_rate": 0.00014497664639948578, "loss": 1.4564, "step": 17146 }, { "epoch": 0.7624383086567961, "grad_norm": 0.059258636087179184, "learning_rate": 0.0001448737222148986, "loss": 1.455, "step": 17148 }, { "epoch": 0.7625272331154684, "grad_norm": 0.06029491126537323, "learning_rate": 0.00014477082838807072, "loss": 1.4602, "step": 17150 }, { "epoch": 0.7626161575741408, "grad_norm": 0.05870896950364113, "learning_rate": 0.0001446679649277977, "loss": 1.4544, "step": 17152 }, { "epoch": 0.7627050820328132, "grad_norm": 0.05867839604616165, "learning_rate": 0.000144565131842873, "loss": 1.452, "step": 17154 }, { "epoch": 0.7627940064914854, "grad_norm": 0.058226361870765686, "learning_rate": 0.00014446232914208735, "loss": 1.4586, "step": 17156 }, { "epoch": 0.7628829309501578, "grad_norm": 0.05763303488492966, "learning_rate": 0.0001443595568342284, "loss": 1.4539, "step": 17158 }, { "epoch": 0.7629718554088302, "grad_norm": 0.05882842093706131, "learning_rate": 0.00014425681492808196, "loss": 1.4542, "step": 17160 }, { "epoch": 0.7630607798675025, "grad_norm": 0.05935533344745636, "learning_rate": 0.00014415410343243036, "loss": 1.4603, "step": 17162 }, { "epoch": 0.7631497043261749, "grad_norm": 0.060341961681842804, "learning_rate": 0.0001440514223560545, "loss": 1.4578, "step": 17164 }, { "epoch": 0.7632386287848473, "grad_norm": 0.05959847569465637, "learning_rate": 0.00014394877170773168, "loss": 1.4586, "step": 17166 }, { "epoch": 0.7633275532435196, "grad_norm": 0.058998953551054, "learning_rate": 0.00014384615149623674, "loss": 1.4553, "step": 17168 }, { "epoch": 0.763416477702192, "grad_norm": 0.058729879558086395, "learning_rate": 0.0001437435617303423, "loss": 1.4607, "step": 17170 }, { "epoch": 0.7635054021608644, "grad_norm": 0.05923215672373772, "learning_rate": 0.00014364100241881834, "loss": 1.4544, "step": 17172 }, { "epoch": 0.7635943266195367, "grad_norm": 0.05920840799808502, "learning_rate": 0.00014353847357043175, "loss": 1.4641, "step": 17174 }, { "epoch": 0.7636832510782091, "grad_norm": 0.058832135051488876, "learning_rate": 0.0001434359751939473, "loss": 1.454, "step": 17176 }, { "epoch": 0.7637721755368814, "grad_norm": 0.05877342447638512, "learning_rate": 0.00014333350729812722, "loss": 1.4617, "step": 17178 }, { "epoch": 0.7638610999955537, "grad_norm": 0.05918002128601074, "learning_rate": 0.00014323106989173052, "loss": 1.4572, "step": 17180 }, { "epoch": 0.7639500244542261, "grad_norm": 0.058956753462553024, "learning_rate": 0.00014312866298351435, "loss": 1.4561, "step": 17182 }, { "epoch": 0.7640389489128985, "grad_norm": 0.05958571657538414, "learning_rate": 0.00014302628658223243, "loss": 1.4537, "step": 17184 }, { "epoch": 0.7641278733715708, "grad_norm": 0.058351002633571625, "learning_rate": 0.000142923940696637, "loss": 1.4526, "step": 17186 }, { "epoch": 0.7642167978302432, "grad_norm": 0.0581650584936142, "learning_rate": 0.00014282162533547666, "loss": 1.4606, "step": 17188 }, { "epoch": 0.7643057222889156, "grad_norm": 0.0594148151576519, "learning_rate": 0.00014271934050749774, "loss": 1.4577, "step": 17190 }, { "epoch": 0.764394646747588, "grad_norm": 0.058780916035175323, "learning_rate": 0.00014261708622144404, "loss": 1.4568, "step": 17192 }, { "epoch": 0.7644835712062603, "grad_norm": 0.06111384555697441, "learning_rate": 0.00014251486248605688, "loss": 1.4585, "step": 17194 }, { "epoch": 0.7645724956649327, "grad_norm": 0.060085564851760864, "learning_rate": 0.0001424126693100745, "loss": 1.4593, "step": 17196 }, { "epoch": 0.764661420123605, "grad_norm": 0.05976768955588341, "learning_rate": 0.000142310506702233, "loss": 1.4553, "step": 17198 }, { "epoch": 0.7647503445822773, "grad_norm": 0.057454586029052734, "learning_rate": 0.0001422083746712657, "loss": 1.4578, "step": 17200 }, { "epoch": 0.7648392690409497, "grad_norm": 0.05781802162528038, "learning_rate": 0.0001421062732259034, "loss": 1.4557, "step": 17202 }, { "epoch": 0.764928193499622, "grad_norm": 0.058845892548561096, "learning_rate": 0.00014200420237487388, "loss": 1.4594, "step": 17204 }, { "epoch": 0.7650171179582944, "grad_norm": 0.05837889760732651, "learning_rate": 0.00014190216212690282, "loss": 1.4635, "step": 17206 }, { "epoch": 0.7651060424169668, "grad_norm": 0.057773832231760025, "learning_rate": 0.00014180015249071315, "loss": 1.4523, "step": 17208 }, { "epoch": 0.7651949668756391, "grad_norm": 0.05930472910404205, "learning_rate": 0.000141698173475025, "loss": 1.4551, "step": 17210 }, { "epoch": 0.7652838913343115, "grad_norm": 0.058624934405088425, "learning_rate": 0.00014159622508855564, "loss": 1.4633, "step": 17212 }, { "epoch": 0.7653728157929839, "grad_norm": 0.0588277168571949, "learning_rate": 0.0001414943073400207, "loss": 1.4488, "step": 17214 }, { "epoch": 0.7654617402516563, "grad_norm": 0.06121628358960152, "learning_rate": 0.00014139242023813214, "loss": 1.4541, "step": 17216 }, { "epoch": 0.7655506647103286, "grad_norm": 0.06059684976935387, "learning_rate": 0.00014129056379159994, "loss": 1.458, "step": 17218 }, { "epoch": 0.765639589169001, "grad_norm": 0.059569355100393295, "learning_rate": 0.00014118873800913096, "loss": 1.4564, "step": 17220 }, { "epoch": 0.7657285136276732, "grad_norm": 0.058082085102796555, "learning_rate": 0.0001410869428994299, "loss": 1.4604, "step": 17222 }, { "epoch": 0.7658174380863456, "grad_norm": 0.05903531610965729, "learning_rate": 0.00014098517847119886, "loss": 1.4629, "step": 17224 }, { "epoch": 0.765906362545018, "grad_norm": 0.057636991143226624, "learning_rate": 0.00014088344473313664, "loss": 1.4602, "step": 17226 }, { "epoch": 0.7659952870036904, "grad_norm": 0.05922068655490875, "learning_rate": 0.0001407817416939402, "loss": 1.4566, "step": 17228 }, { "epoch": 0.7660842114623627, "grad_norm": 0.05783383175730705, "learning_rate": 0.00014068006936230365, "loss": 1.4545, "step": 17230 }, { "epoch": 0.7661731359210351, "grad_norm": 0.05895695090293884, "learning_rate": 0.0001405784277469182, "loss": 1.4631, "step": 17232 }, { "epoch": 0.7662620603797075, "grad_norm": 0.05768261104822159, "learning_rate": 0.0001404768168564724, "loss": 1.4533, "step": 17234 }, { "epoch": 0.7663509848383798, "grad_norm": 0.0601133368909359, "learning_rate": 0.00014037523669965297, "loss": 1.4535, "step": 17236 }, { "epoch": 0.7664399092970522, "grad_norm": 0.058861732482910156, "learning_rate": 0.00014027368728514294, "loss": 1.4581, "step": 17238 }, { "epoch": 0.7665288337557246, "grad_norm": 0.05894797295331955, "learning_rate": 0.00014017216862162357, "loss": 1.4537, "step": 17240 }, { "epoch": 0.7666177582143968, "grad_norm": 0.05880829319357872, "learning_rate": 0.00014007068071777274, "loss": 1.4577, "step": 17242 }, { "epoch": 0.7667066826730692, "grad_norm": 0.06195038929581642, "learning_rate": 0.00013996922358226632, "loss": 1.459, "step": 17244 }, { "epoch": 0.7667956071317416, "grad_norm": 0.058604203164577484, "learning_rate": 0.0001398677972237774, "loss": 1.4592, "step": 17246 }, { "epoch": 0.7668845315904139, "grad_norm": 0.059475913643836975, "learning_rate": 0.00013976640165097609, "loss": 1.4559, "step": 17248 }, { "epoch": 0.7669734560490863, "grad_norm": 0.058169715106487274, "learning_rate": 0.00013966503687253023, "loss": 1.4575, "step": 17250 }, { "epoch": 0.7670623805077587, "grad_norm": 0.06019798666238785, "learning_rate": 0.00013956370289710518, "loss": 1.4533, "step": 17252 }, { "epoch": 0.767151304966431, "grad_norm": 0.05990200489759445, "learning_rate": 0.000139462399733363, "loss": 1.4578, "step": 17254 }, { "epoch": 0.7672402294251034, "grad_norm": 0.061555687338113785, "learning_rate": 0.00013936112738996377, "loss": 1.4562, "step": 17256 }, { "epoch": 0.7673291538837758, "grad_norm": 0.05985216051340103, "learning_rate": 0.00013925988587556477, "loss": 1.4601, "step": 17258 }, { "epoch": 0.7674180783424481, "grad_norm": 0.058907974511384964, "learning_rate": 0.00013915867519882026, "loss": 1.4597, "step": 17260 }, { "epoch": 0.7675070028011205, "grad_norm": 0.059296950697898865, "learning_rate": 0.0001390574953683824, "loss": 1.4566, "step": 17262 }, { "epoch": 0.7675959272597928, "grad_norm": 0.058819953352212906, "learning_rate": 0.00013895634639290067, "loss": 1.4568, "step": 17264 }, { "epoch": 0.7676848517184651, "grad_norm": 0.05849660560488701, "learning_rate": 0.00013885522828102132, "loss": 1.4597, "step": 17266 }, { "epoch": 0.7677737761771375, "grad_norm": 0.05969657003879547, "learning_rate": 0.00013875414104138874, "loss": 1.4551, "step": 17268 }, { "epoch": 0.7678627006358099, "grad_norm": 0.059172701090574265, "learning_rate": 0.00013865308468264382, "loss": 1.4562, "step": 17270 }, { "epoch": 0.7679516250944822, "grad_norm": 0.059122659265995026, "learning_rate": 0.0001385520592134259, "loss": 1.4559, "step": 17272 }, { "epoch": 0.7680405495531546, "grad_norm": 0.05972140282392502, "learning_rate": 0.00013845106464237085, "loss": 1.4559, "step": 17274 }, { "epoch": 0.768129474011827, "grad_norm": 0.058670710772275925, "learning_rate": 0.00013835010097811191, "loss": 1.4532, "step": 17276 }, { "epoch": 0.7682183984704993, "grad_norm": 0.05994569510221481, "learning_rate": 0.00013824916822928013, "loss": 1.4585, "step": 17278 }, { "epoch": 0.7683073229291717, "grad_norm": 0.060833681374788284, "learning_rate": 0.00013814826640450373, "loss": 1.4545, "step": 17280 }, { "epoch": 0.7683962473878441, "grad_norm": 0.058985013514757156, "learning_rate": 0.00013804739551240804, "loss": 1.4596, "step": 17282 }, { "epoch": 0.7684851718465164, "grad_norm": 0.05949314683675766, "learning_rate": 0.000137946555561616, "loss": 1.4517, "step": 17284 }, { "epoch": 0.7685740963051887, "grad_norm": 0.0588274747133255, "learning_rate": 0.00013784574656074795, "loss": 1.4546, "step": 17286 }, { "epoch": 0.7686630207638611, "grad_norm": 0.06145096197724342, "learning_rate": 0.00013774496851842155, "loss": 1.4592, "step": 17288 }, { "epoch": 0.7687519452225334, "grad_norm": 0.058883629739284515, "learning_rate": 0.00013764422144325173, "loss": 1.4589, "step": 17290 }, { "epoch": 0.7688408696812058, "grad_norm": 0.06038152053952217, "learning_rate": 0.00013754350534385035, "loss": 1.4592, "step": 17292 }, { "epoch": 0.7689297941398782, "grad_norm": 0.05828770250082016, "learning_rate": 0.00013744282022882775, "loss": 1.456, "step": 17294 }, { "epoch": 0.7690187185985505, "grad_norm": 0.06063667684793472, "learning_rate": 0.00013734216610679055, "loss": 1.4467, "step": 17296 }, { "epoch": 0.7691076430572229, "grad_norm": 0.05999766290187836, "learning_rate": 0.0001372415429863431, "loss": 1.4586, "step": 17298 }, { "epoch": 0.7691965675158953, "grad_norm": 0.05993552505970001, "learning_rate": 0.00013714095087608718, "loss": 1.4532, "step": 17300 }, { "epoch": 0.7692854919745676, "grad_norm": 0.058838147670030594, "learning_rate": 0.00013704038978462195, "loss": 1.4539, "step": 17302 }, { "epoch": 0.76937441643324, "grad_norm": 0.059014104306697845, "learning_rate": 0.00013693985972054363, "loss": 1.4509, "step": 17304 }, { "epoch": 0.7694633408919124, "grad_norm": 0.05862824246287346, "learning_rate": 0.00013683936069244603, "loss": 1.4539, "step": 17306 }, { "epoch": 0.7695522653505846, "grad_norm": 0.059112753719091415, "learning_rate": 0.0001367388927089202, "loss": 1.4498, "step": 17308 }, { "epoch": 0.769641189809257, "grad_norm": 0.05892875790596008, "learning_rate": 0.0001366384557785549, "loss": 1.461, "step": 17310 }, { "epoch": 0.7697301142679294, "grad_norm": 0.061551161110401154, "learning_rate": 0.00013653804990993552, "loss": 1.4524, "step": 17312 }, { "epoch": 0.7698190387266017, "grad_norm": 0.05997699499130249, "learning_rate": 0.0001364376751116453, "loss": 1.456, "step": 17314 }, { "epoch": 0.7699079631852741, "grad_norm": 0.05965949594974518, "learning_rate": 0.00013633733139226494, "loss": 1.4535, "step": 17316 }, { "epoch": 0.7699968876439465, "grad_norm": 0.059972114861011505, "learning_rate": 0.00013623701876037215, "loss": 1.4613, "step": 17318 }, { "epoch": 0.7700858121026188, "grad_norm": 0.06004555895924568, "learning_rate": 0.00013613673722454172, "loss": 1.4589, "step": 17320 }, { "epoch": 0.7701747365612912, "grad_norm": 0.05914655700325966, "learning_rate": 0.00013603648679334679, "loss": 1.4538, "step": 17322 }, { "epoch": 0.7702636610199636, "grad_norm": 0.058920904994010925, "learning_rate": 0.00013593626747535674, "loss": 1.4505, "step": 17324 }, { "epoch": 0.7703525854786359, "grad_norm": 0.05806851387023926, "learning_rate": 0.00013583607927913915, "loss": 1.4613, "step": 17326 }, { "epoch": 0.7704415099373083, "grad_norm": 0.06060624495148659, "learning_rate": 0.0001357359222132581, "loss": 1.4561, "step": 17328 }, { "epoch": 0.7705304343959806, "grad_norm": 0.060192741453647614, "learning_rate": 0.0001356357962862757, "loss": 1.4562, "step": 17330 }, { "epoch": 0.7706193588546529, "grad_norm": 0.05914744362235069, "learning_rate": 0.00013553570150675126, "loss": 1.4527, "step": 17332 }, { "epoch": 0.7707082833133253, "grad_norm": 0.05795003101229668, "learning_rate": 0.00013543563788324104, "loss": 1.4546, "step": 17334 }, { "epoch": 0.7707972077719977, "grad_norm": 0.05958685278892517, "learning_rate": 0.00013533560542429912, "loss": 1.4505, "step": 17336 }, { "epoch": 0.77088613223067, "grad_norm": 0.05854284018278122, "learning_rate": 0.0001352356041384768, "loss": 1.4587, "step": 17338 }, { "epoch": 0.7709750566893424, "grad_norm": 0.05816273018717766, "learning_rate": 0.00013513563403432232, "loss": 1.4561, "step": 17340 }, { "epoch": 0.7710639811480148, "grad_norm": 0.060084130614995956, "learning_rate": 0.00013503569512038184, "loss": 1.4549, "step": 17342 }, { "epoch": 0.7711529056066871, "grad_norm": 0.06050108000636101, "learning_rate": 0.00013493578740519856, "loss": 1.4592, "step": 17344 }, { "epoch": 0.7712418300653595, "grad_norm": 0.05803850293159485, "learning_rate": 0.00013483591089731285, "loss": 1.4543, "step": 17346 }, { "epoch": 0.7713307545240319, "grad_norm": 0.05967843905091286, "learning_rate": 0.0001347360656052628, "loss": 1.454, "step": 17348 }, { "epoch": 0.7714196789827041, "grad_norm": 0.05886238068342209, "learning_rate": 0.00013463625153758342, "loss": 1.4569, "step": 17350 }, { "epoch": 0.7715086034413765, "grad_norm": 0.0592324323952198, "learning_rate": 0.00013453646870280735, "loss": 1.4549, "step": 17352 }, { "epoch": 0.7715975279000489, "grad_norm": 0.059191394597291946, "learning_rate": 0.0001344367171094646, "loss": 1.4547, "step": 17354 }, { "epoch": 0.7716864523587212, "grad_norm": 0.05849238485097885, "learning_rate": 0.00013433699676608214, "loss": 1.4564, "step": 17356 }, { "epoch": 0.7717753768173936, "grad_norm": 0.058643490076065063, "learning_rate": 0.00013423730768118464, "loss": 1.4555, "step": 17358 }, { "epoch": 0.771864301276066, "grad_norm": 0.05843536928296089, "learning_rate": 0.00013413764986329408, "loss": 1.4584, "step": 17360 }, { "epoch": 0.7719532257347383, "grad_norm": 0.05824931710958481, "learning_rate": 0.00013403802332092934, "loss": 1.4561, "step": 17362 }, { "epoch": 0.7720421501934107, "grad_norm": 0.059431836009025574, "learning_rate": 0.00013393842806260715, "loss": 1.4552, "step": 17364 }, { "epoch": 0.7721310746520831, "grad_norm": 0.06101321056485176, "learning_rate": 0.00013383886409684142, "loss": 1.4558, "step": 17366 }, { "epoch": 0.7722199991107555, "grad_norm": 0.05848054587841034, "learning_rate": 0.00013373933143214306, "loss": 1.4496, "step": 17368 }, { "epoch": 0.7723089235694278, "grad_norm": 0.05772422254085541, "learning_rate": 0.00013363983007702075, "loss": 1.4613, "step": 17370 }, { "epoch": 0.7723978480281001, "grad_norm": 0.05888652801513672, "learning_rate": 0.00013354036003998033, "loss": 1.4597, "step": 17372 }, { "epoch": 0.7724867724867724, "grad_norm": 0.058828819543123245, "learning_rate": 0.0001334409213295248, "loss": 1.4565, "step": 17374 }, { "epoch": 0.7725756969454448, "grad_norm": 0.060525476932525635, "learning_rate": 0.00013334151395415477, "loss": 1.4589, "step": 17376 }, { "epoch": 0.7726646214041172, "grad_norm": 0.06004347652196884, "learning_rate": 0.00013324213792236772, "loss": 1.4552, "step": 17378 }, { "epoch": 0.7727535458627895, "grad_norm": 0.05766339972615242, "learning_rate": 0.00013314279324265921, "loss": 1.454, "step": 17380 }, { "epoch": 0.7728424703214619, "grad_norm": 0.05979818478226662, "learning_rate": 0.00013304347992352145, "loss": 1.4531, "step": 17382 }, { "epoch": 0.7729313947801343, "grad_norm": 0.05923622101545334, "learning_rate": 0.00013294419797344397, "loss": 1.4577, "step": 17384 }, { "epoch": 0.7730203192388067, "grad_norm": 0.059375032782554626, "learning_rate": 0.00013284494740091403, "loss": 1.458, "step": 17386 }, { "epoch": 0.773109243697479, "grad_norm": 0.05873361974954605, "learning_rate": 0.0001327457282144161, "loss": 1.4583, "step": 17388 }, { "epoch": 0.7731981681561514, "grad_norm": 0.05951083451509476, "learning_rate": 0.00013264654042243168, "loss": 1.4531, "step": 17390 }, { "epoch": 0.7732870926148238, "grad_norm": 0.059443507343530655, "learning_rate": 0.0001325473840334398, "loss": 1.4564, "step": 17392 }, { "epoch": 0.773376017073496, "grad_norm": 0.05906612053513527, "learning_rate": 0.0001324482590559169, "loss": 1.4523, "step": 17394 }, { "epoch": 0.7734649415321684, "grad_norm": 0.05907057970762253, "learning_rate": 0.00013234916549833664, "loss": 1.4547, "step": 17396 }, { "epoch": 0.7735538659908407, "grad_norm": 0.058966733515262604, "learning_rate": 0.00013225010336916998, "loss": 1.4513, "step": 17398 }, { "epoch": 0.7736427904495131, "grad_norm": 0.06092256307601929, "learning_rate": 0.00013215107267688475, "loss": 1.4487, "step": 17400 }, { "epoch": 0.7737317149081855, "grad_norm": 0.060688234865665436, "learning_rate": 0.0001320520734299472, "loss": 1.4537, "step": 17402 }, { "epoch": 0.7738206393668579, "grad_norm": 0.0587213858962059, "learning_rate": 0.0001319531056368199, "loss": 1.4579, "step": 17404 }, { "epoch": 0.7739095638255302, "grad_norm": 0.05942368134856224, "learning_rate": 0.0001318541693059629, "loss": 1.4543, "step": 17406 }, { "epoch": 0.7739984882842026, "grad_norm": 0.06065099313855171, "learning_rate": 0.0001317552644458339, "loss": 1.4541, "step": 17408 }, { "epoch": 0.774087412742875, "grad_norm": 0.05897912383079529, "learning_rate": 0.00013165639106488768, "loss": 1.4523, "step": 17410 }, { "epoch": 0.7741763372015473, "grad_norm": 0.05976055935025215, "learning_rate": 0.00013155754917157654, "loss": 1.4512, "step": 17412 }, { "epoch": 0.7742652616602197, "grad_norm": 0.05808144807815552, "learning_rate": 0.00013145873877434965, "loss": 1.454, "step": 17414 }, { "epoch": 0.774354186118892, "grad_norm": 0.06036299839615822, "learning_rate": 0.00013135995988165384, "loss": 1.4538, "step": 17416 }, { "epoch": 0.7744431105775643, "grad_norm": 0.058639515191316605, "learning_rate": 0.00013126121250193334, "loss": 1.4528, "step": 17418 }, { "epoch": 0.7745320350362367, "grad_norm": 0.05818388611078262, "learning_rate": 0.00013116249664362945, "loss": 1.4578, "step": 17420 }, { "epoch": 0.7746209594949091, "grad_norm": 0.05927572399377823, "learning_rate": 0.00013106381231518037, "loss": 1.4547, "step": 17422 }, { "epoch": 0.7747098839535814, "grad_norm": 0.05805305764079094, "learning_rate": 0.0001309651595250228, "loss": 1.4548, "step": 17424 }, { "epoch": 0.7747988084122538, "grad_norm": 0.05744117870926857, "learning_rate": 0.00013086653828158973, "loss": 1.4596, "step": 17426 }, { "epoch": 0.7748877328709262, "grad_norm": 0.05873503535985947, "learning_rate": 0.0001307679485933115, "loss": 1.455, "step": 17428 }, { "epoch": 0.7749766573295985, "grad_norm": 0.05853577330708504, "learning_rate": 0.00013066939046861613, "loss": 1.4544, "step": 17430 }, { "epoch": 0.7750655817882709, "grad_norm": 0.05812995135784149, "learning_rate": 0.00013057086391592889, "loss": 1.4523, "step": 17432 }, { "epoch": 0.7751545062469433, "grad_norm": 0.05899371579289436, "learning_rate": 0.00013047236894367237, "loss": 1.4546, "step": 17434 }, { "epoch": 0.7752434307056156, "grad_norm": 0.05924930423498154, "learning_rate": 0.00013037390556026608, "loss": 1.4557, "step": 17436 }, { "epoch": 0.7753323551642879, "grad_norm": 0.059978023171424866, "learning_rate": 0.0001302754737741272, "loss": 1.4537, "step": 17438 }, { "epoch": 0.7754212796229603, "grad_norm": 0.05814290791749954, "learning_rate": 0.00013017707359367026, "loss": 1.4523, "step": 17440 }, { "epoch": 0.7755102040816326, "grad_norm": 0.05937810242176056, "learning_rate": 0.00013007870502730674, "loss": 1.451, "step": 17442 }, { "epoch": 0.775599128540305, "grad_norm": 0.058999329805374146, "learning_rate": 0.00012998036808344572, "loss": 1.4548, "step": 17444 }, { "epoch": 0.7756880529989774, "grad_norm": 0.05975823104381561, "learning_rate": 0.00012988206277049353, "loss": 1.4563, "step": 17446 }, { "epoch": 0.7757769774576497, "grad_norm": 0.058464065194129944, "learning_rate": 0.00012978378909685357, "loss": 1.448, "step": 17448 }, { "epoch": 0.7758659019163221, "grad_norm": 0.06097313389182091, "learning_rate": 0.00012968554707092682, "loss": 1.4552, "step": 17450 }, { "epoch": 0.7759548263749945, "grad_norm": 0.060069650411605835, "learning_rate": 0.00012958733670111156, "loss": 1.4555, "step": 17452 }, { "epoch": 0.7760437508336668, "grad_norm": 0.05999191105365753, "learning_rate": 0.00012948915799580296, "loss": 1.456, "step": 17454 }, { "epoch": 0.7761326752923392, "grad_norm": 0.059085212647914886, "learning_rate": 0.00012939101096339406, "loss": 1.4568, "step": 17456 }, { "epoch": 0.7762215997510116, "grad_norm": 0.06041676923632622, "learning_rate": 0.00012929289561227465, "loss": 1.4599, "step": 17458 }, { "epoch": 0.7763105242096838, "grad_norm": 0.059452079236507416, "learning_rate": 0.00012919481195083215, "loss": 1.4529, "step": 17460 }, { "epoch": 0.7763994486683562, "grad_norm": 0.05959062650799751, "learning_rate": 0.0001290967599874514, "loss": 1.4565, "step": 17462 }, { "epoch": 0.7764883731270286, "grad_norm": 0.059135422110557556, "learning_rate": 0.0001289987397305139, "loss": 1.4584, "step": 17464 }, { "epoch": 0.7765772975857009, "grad_norm": 0.060259103775024414, "learning_rate": 0.00012890075118839918, "loss": 1.451, "step": 17466 }, { "epoch": 0.7766662220443733, "grad_norm": 0.05845576897263527, "learning_rate": 0.00012880279436948368, "loss": 1.4531, "step": 17468 }, { "epoch": 0.7767551465030457, "grad_norm": 0.05925283208489418, "learning_rate": 0.00012870486928214103, "loss": 1.4575, "step": 17470 }, { "epoch": 0.776844070961718, "grad_norm": 0.060372382402420044, "learning_rate": 0.0001286069759347424, "loss": 1.4531, "step": 17472 }, { "epoch": 0.7769329954203904, "grad_norm": 0.059411074966192245, "learning_rate": 0.00012850911433565627, "loss": 1.4571, "step": 17474 }, { "epoch": 0.7770219198790628, "grad_norm": 0.05941460654139519, "learning_rate": 0.000128411284493248, "loss": 1.4575, "step": 17476 }, { "epoch": 0.7771108443377351, "grad_norm": 0.05880104377865791, "learning_rate": 0.00012831348641588087, "loss": 1.4566, "step": 17478 }, { "epoch": 0.7771997687964074, "grad_norm": 0.05832483246922493, "learning_rate": 0.0001282157201119145, "loss": 1.4555, "step": 17480 }, { "epoch": 0.7772886932550798, "grad_norm": 0.059995248913764954, "learning_rate": 0.00012811798558970717, "loss": 1.4586, "step": 17482 }, { "epoch": 0.7773776177137521, "grad_norm": 0.0595800019800663, "learning_rate": 0.00012802028285761318, "loss": 1.4615, "step": 17484 }, { "epoch": 0.7774665421724245, "grad_norm": 0.06008690595626831, "learning_rate": 0.0001279226119239846, "loss": 1.4572, "step": 17486 }, { "epoch": 0.7775554666310969, "grad_norm": 0.058767497539520264, "learning_rate": 0.00012782497279717082, "loss": 1.4608, "step": 17488 }, { "epoch": 0.7776443910897692, "grad_norm": 0.058635737746953964, "learning_rate": 0.00012772736548551862, "loss": 1.4545, "step": 17490 }, { "epoch": 0.7777333155484416, "grad_norm": 0.059888698160648346, "learning_rate": 0.00012762978999737162, "loss": 1.451, "step": 17492 }, { "epoch": 0.777822240007114, "grad_norm": 0.060602057725191116, "learning_rate": 0.00012753224634107118, "loss": 1.4643, "step": 17494 }, { "epoch": 0.7779111644657863, "grad_norm": 0.0588313527405262, "learning_rate": 0.00012743473452495586, "loss": 1.4517, "step": 17496 }, { "epoch": 0.7780000889244587, "grad_norm": 0.05848833918571472, "learning_rate": 0.0001273372545573611, "loss": 1.4569, "step": 17498 }, { "epoch": 0.7780890133831311, "grad_norm": 0.05902280658483505, "learning_rate": 0.00012723980644662014, "loss": 1.4577, "step": 17500 }, { "epoch": 0.7780890133831311, "eval_loss": 1.441103219985962, "eval_runtime": 12.4299, "eval_samples_per_second": 555.918, "eval_steps_per_second": 69.51, "step": 17500 }, { "epoch": 0.7781779378418033, "grad_norm": 0.05878506973385811, "learning_rate": 0.00012714239020106323, "loss": 1.446, "step": 17502 }, { "epoch": 0.7782668623004757, "grad_norm": 0.05805620923638344, "learning_rate": 0.00012704500582901807, "loss": 1.448, "step": 17504 }, { "epoch": 0.7783557867591481, "grad_norm": 0.06127835810184479, "learning_rate": 0.00012694765333880935, "loss": 1.4584, "step": 17506 }, { "epoch": 0.7784447112178204, "grad_norm": 0.059799838811159134, "learning_rate": 0.00012685033273875896, "loss": 1.455, "step": 17508 }, { "epoch": 0.7785336356764928, "grad_norm": 0.0602266825735569, "learning_rate": 0.0001267530440371868, "loss": 1.4574, "step": 17510 }, { "epoch": 0.7786225601351652, "grad_norm": 0.05929147079586983, "learning_rate": 0.00012665578724240927, "loss": 1.4532, "step": 17512 }, { "epoch": 0.7787114845938375, "grad_norm": 0.059397101402282715, "learning_rate": 0.00012655856236274023, "loss": 1.4546, "step": 17514 }, { "epoch": 0.7788004090525099, "grad_norm": 0.059210579842329025, "learning_rate": 0.00012646136940649093, "loss": 1.4494, "step": 17516 }, { "epoch": 0.7788893335111823, "grad_norm": 0.059511348605155945, "learning_rate": 0.00012636420838196995, "loss": 1.4598, "step": 17518 }, { "epoch": 0.7789782579698546, "grad_norm": 0.05979214981198311, "learning_rate": 0.0001262670792974831, "loss": 1.4606, "step": 17520 }, { "epoch": 0.779067182428527, "grad_norm": 0.059690359979867935, "learning_rate": 0.00012616998216133314, "loss": 1.4603, "step": 17522 }, { "epoch": 0.7791561068871993, "grad_norm": 0.059829000383615494, "learning_rate": 0.00012607291698182055, "loss": 1.4563, "step": 17524 }, { "epoch": 0.7792450313458716, "grad_norm": 0.06074198707938194, "learning_rate": 0.000125975883767243, "loss": 1.4569, "step": 17526 }, { "epoch": 0.779333955804544, "grad_norm": 0.058196790516376495, "learning_rate": 0.0001258788825258952, "loss": 1.4577, "step": 17528 }, { "epoch": 0.7794228802632164, "grad_norm": 0.059000782668590546, "learning_rate": 0.0001257819132660689, "loss": 1.4581, "step": 17530 }, { "epoch": 0.7795118047218887, "grad_norm": 0.058559540659189224, "learning_rate": 0.00012568497599605412, "loss": 1.4561, "step": 17532 }, { "epoch": 0.7796007291805611, "grad_norm": 0.057577475905418396, "learning_rate": 0.0001255880707241369, "loss": 1.455, "step": 17534 }, { "epoch": 0.7796896536392335, "grad_norm": 0.06146826967597008, "learning_rate": 0.00012549119745860162, "loss": 1.4585, "step": 17536 }, { "epoch": 0.7797785780979059, "grad_norm": 0.05853557214140892, "learning_rate": 0.00012539435620772898, "loss": 1.4611, "step": 17538 }, { "epoch": 0.7798675025565782, "grad_norm": 0.0580923855304718, "learning_rate": 0.00012529754697979763, "loss": 1.4624, "step": 17540 }, { "epoch": 0.7799564270152506, "grad_norm": 0.058282919228076935, "learning_rate": 0.00012520076978308337, "loss": 1.4516, "step": 17542 }, { "epoch": 0.780045351473923, "grad_norm": 0.06060004606842995, "learning_rate": 0.00012510402462585885, "loss": 1.4515, "step": 17544 }, { "epoch": 0.7801342759325952, "grad_norm": 0.05811069533228874, "learning_rate": 0.0001250073115163944, "loss": 1.4587, "step": 17546 }, { "epoch": 0.7802232003912676, "grad_norm": 0.05835281312465668, "learning_rate": 0.00012491063046295763, "loss": 1.4563, "step": 17548 }, { "epoch": 0.78031212484994, "grad_norm": 0.059804681688547134, "learning_rate": 0.00012481398147381307, "loss": 1.4531, "step": 17550 }, { "epoch": 0.7804010493086123, "grad_norm": 0.05878927931189537, "learning_rate": 0.0001247173645572227, "loss": 1.4627, "step": 17552 }, { "epoch": 0.7804899737672847, "grad_norm": 0.05973034352064133, "learning_rate": 0.00012462077972144597, "loss": 1.457, "step": 17554 }, { "epoch": 0.780578898225957, "grad_norm": 0.057263296097517014, "learning_rate": 0.0001245242269747391, "loss": 1.4613, "step": 17556 }, { "epoch": 0.7806678226846294, "grad_norm": 0.05798415094614029, "learning_rate": 0.00012442770632535598, "loss": 1.4565, "step": 17558 }, { "epoch": 0.7807567471433018, "grad_norm": 0.05924572795629501, "learning_rate": 0.00012433121778154777, "loss": 1.4583, "step": 17560 }, { "epoch": 0.7808456716019742, "grad_norm": 0.05844467505812645, "learning_rate": 0.00012423476135156242, "loss": 1.4604, "step": 17562 }, { "epoch": 0.7809345960606465, "grad_norm": 0.05907626450061798, "learning_rate": 0.00012413833704364574, "loss": 1.4532, "step": 17564 }, { "epoch": 0.7810235205193189, "grad_norm": 0.059602320194244385, "learning_rate": 0.00012404194486604025, "loss": 1.455, "step": 17566 }, { "epoch": 0.7811124449779911, "grad_norm": 0.05900027975440025, "learning_rate": 0.00012394558482698613, "loss": 1.4606, "step": 17568 }, { "epoch": 0.7812013694366635, "grad_norm": 0.058760497719049454, "learning_rate": 0.00012384925693472077, "loss": 1.4548, "step": 17570 }, { "epoch": 0.7812902938953359, "grad_norm": 0.05956209450960159, "learning_rate": 0.0001237529611974784, "loss": 1.4485, "step": 17572 }, { "epoch": 0.7813792183540083, "grad_norm": 0.057924702763557434, "learning_rate": 0.00012365669762349102, "loss": 1.4541, "step": 17574 }, { "epoch": 0.7814681428126806, "grad_norm": 0.059304650872945786, "learning_rate": 0.00012356046622098772, "loss": 1.4603, "step": 17576 }, { "epoch": 0.781557067271353, "grad_norm": 0.05885639041662216, "learning_rate": 0.00012346426699819457, "loss": 1.4611, "step": 17578 }, { "epoch": 0.7816459917300254, "grad_norm": 0.059227973222732544, "learning_rate": 0.0001233680999633352, "loss": 1.4572, "step": 17580 }, { "epoch": 0.7817349161886977, "grad_norm": 0.059174761176109314, "learning_rate": 0.00012327196512463057, "loss": 1.4501, "step": 17582 }, { "epoch": 0.7818238406473701, "grad_norm": 0.05834069103002548, "learning_rate": 0.0001231758624902984, "loss": 1.4517, "step": 17584 }, { "epoch": 0.7819127651060425, "grad_norm": 0.05900967866182327, "learning_rate": 0.0001230797920685542, "loss": 1.4536, "step": 17586 }, { "epoch": 0.7820016895647148, "grad_norm": 0.0585876889526844, "learning_rate": 0.0001229837538676102, "loss": 1.4607, "step": 17588 }, { "epoch": 0.7820906140233871, "grad_norm": 0.0589335672557354, "learning_rate": 0.0001228877478956766, "loss": 1.4593, "step": 17590 }, { "epoch": 0.7821795384820595, "grad_norm": 0.059638556092977524, "learning_rate": 0.00012279177416096022, "loss": 1.454, "step": 17592 }, { "epoch": 0.7822684629407318, "grad_norm": 0.05850829929113388, "learning_rate": 0.00012269583267166513, "loss": 1.4535, "step": 17594 }, { "epoch": 0.7823573873994042, "grad_norm": 0.05961374565958977, "learning_rate": 0.00012259992343599302, "loss": 1.4504, "step": 17596 }, { "epoch": 0.7824463118580766, "grad_norm": 0.05841527134180069, "learning_rate": 0.0001225040464621427, "loss": 1.4527, "step": 17598 }, { "epoch": 0.7825352363167489, "grad_norm": 0.059410110116004944, "learning_rate": 0.00012240820175830997, "loss": 1.4515, "step": 17600 }, { "epoch": 0.7826241607754213, "grad_norm": 0.05802929401397705, "learning_rate": 0.00012231238933268812, "loss": 1.4523, "step": 17602 }, { "epoch": 0.7827130852340937, "grad_norm": 0.05958373472094536, "learning_rate": 0.0001222166091934676, "loss": 1.4586, "step": 17604 }, { "epoch": 0.782802009692766, "grad_norm": 0.059802230447530746, "learning_rate": 0.0001221208613488364, "loss": 1.4544, "step": 17606 }, { "epoch": 0.7828909341514384, "grad_norm": 0.05877742916345596, "learning_rate": 0.00012202514580697904, "loss": 1.4551, "step": 17608 }, { "epoch": 0.7829798586101107, "grad_norm": 0.05887303501367569, "learning_rate": 0.0001219294625760779, "loss": 1.4622, "step": 17610 }, { "epoch": 0.783068783068783, "grad_norm": 0.05912398546934128, "learning_rate": 0.00012183381166431251, "loss": 1.453, "step": 17612 }, { "epoch": 0.7831577075274554, "grad_norm": 0.05929466336965561, "learning_rate": 0.00012173819307985945, "loss": 1.4483, "step": 17614 }, { "epoch": 0.7832466319861278, "grad_norm": 0.05975263938307762, "learning_rate": 0.00012164260683089234, "loss": 1.4542, "step": 17616 }, { "epoch": 0.7833355564448001, "grad_norm": 0.059671733528375626, "learning_rate": 0.00012154705292558283, "loss": 1.4588, "step": 17618 }, { "epoch": 0.7834244809034725, "grad_norm": 0.06014909967780113, "learning_rate": 0.00012145153137209896, "loss": 1.451, "step": 17620 }, { "epoch": 0.7835134053621449, "grad_norm": 0.05790025368332863, "learning_rate": 0.00012135604217860635, "loss": 1.4554, "step": 17622 }, { "epoch": 0.7836023298208172, "grad_norm": 0.05892461538314819, "learning_rate": 0.00012126058535326784, "loss": 1.46, "step": 17624 }, { "epoch": 0.7836912542794896, "grad_norm": 0.05957599729299545, "learning_rate": 0.00012116516090424357, "loss": 1.4584, "step": 17626 }, { "epoch": 0.783780178738162, "grad_norm": 0.059336572885513306, "learning_rate": 0.0001210697688396909, "loss": 1.4471, "step": 17628 }, { "epoch": 0.7838691031968343, "grad_norm": 0.05946873500943184, "learning_rate": 0.0001209744091677642, "loss": 1.456, "step": 17630 }, { "epoch": 0.7839580276555066, "grad_norm": 0.05893925949931145, "learning_rate": 0.00012087908189661528, "loss": 1.4565, "step": 17632 }, { "epoch": 0.784046952114179, "grad_norm": 0.05950813367962837, "learning_rate": 0.0001207837870343933, "loss": 1.4495, "step": 17634 }, { "epoch": 0.7841358765728513, "grad_norm": 0.058802422136068344, "learning_rate": 0.00012068852458924439, "loss": 1.4563, "step": 17636 }, { "epoch": 0.7842248010315237, "grad_norm": 0.05822760984301567, "learning_rate": 0.00012059329456931167, "loss": 1.4588, "step": 17638 }, { "epoch": 0.7843137254901961, "grad_norm": 0.059016041457653046, "learning_rate": 0.00012049809698273645, "loss": 1.4554, "step": 17640 }, { "epoch": 0.7844026499488684, "grad_norm": 0.05806576833128929, "learning_rate": 0.0001204029318376561, "loss": 1.4568, "step": 17642 }, { "epoch": 0.7844915744075408, "grad_norm": 0.058557651937007904, "learning_rate": 0.00012030779914220613, "loss": 1.4526, "step": 17644 }, { "epoch": 0.7845804988662132, "grad_norm": 0.05871611833572388, "learning_rate": 0.00012021269890451863, "loss": 1.4518, "step": 17646 }, { "epoch": 0.7846694233248855, "grad_norm": 0.05869081988930702, "learning_rate": 0.00012011763113272329, "loss": 1.452, "step": 17648 }, { "epoch": 0.7847583477835579, "grad_norm": 0.059065330773591995, "learning_rate": 0.00012002259583494701, "loss": 1.4584, "step": 17650 }, { "epoch": 0.7848472722422303, "grad_norm": 0.05952310189604759, "learning_rate": 0.00011992759301931366, "loss": 1.4574, "step": 17652 }, { "epoch": 0.7849361967009025, "grad_norm": 0.05919057875871658, "learning_rate": 0.00011983262269394457, "loss": 1.4492, "step": 17654 }, { "epoch": 0.7850251211595749, "grad_norm": 0.05960650369524956, "learning_rate": 0.00011973768486695835, "loss": 1.4556, "step": 17656 }, { "epoch": 0.7851140456182473, "grad_norm": 0.0592602901160717, "learning_rate": 0.00011964277954647046, "loss": 1.4503, "step": 17658 }, { "epoch": 0.7852029700769196, "grad_norm": 0.05901474878191948, "learning_rate": 0.00011954790674059401, "loss": 1.4545, "step": 17660 }, { "epoch": 0.785291894535592, "grad_norm": 0.05881018191576004, "learning_rate": 0.0001194530664574392, "loss": 1.4524, "step": 17662 }, { "epoch": 0.7853808189942644, "grad_norm": 0.05886894464492798, "learning_rate": 0.00011935825870511308, "loss": 1.4614, "step": 17664 }, { "epoch": 0.7854697434529367, "grad_norm": 0.059833452105522156, "learning_rate": 0.00011926348349172051, "loss": 1.458, "step": 17666 }, { "epoch": 0.7855586679116091, "grad_norm": 0.06001525744795799, "learning_rate": 0.00011916874082536333, "loss": 1.4564, "step": 17668 }, { "epoch": 0.7856475923702815, "grad_norm": 0.06033634394407272, "learning_rate": 0.00011907403071414036, "loss": 1.4544, "step": 17670 }, { "epoch": 0.7857365168289538, "grad_norm": 0.058471135795116425, "learning_rate": 0.00011897935316614799, "loss": 1.4526, "step": 17672 }, { "epoch": 0.7858254412876262, "grad_norm": 0.06029105186462402, "learning_rate": 0.00011888470818947955, "loss": 1.4565, "step": 17674 }, { "epoch": 0.7859143657462985, "grad_norm": 0.059264685958623886, "learning_rate": 0.00011879009579222577, "loss": 1.4518, "step": 17676 }, { "epoch": 0.7860032902049708, "grad_norm": 0.059138696640729904, "learning_rate": 0.00011869551598247463, "loss": 1.4484, "step": 17678 }, { "epoch": 0.7860922146636432, "grad_norm": 0.05941601097583771, "learning_rate": 0.000118600968768311, "loss": 1.4567, "step": 17680 }, { "epoch": 0.7861811391223156, "grad_norm": 0.05975447967648506, "learning_rate": 0.00011850645415781736, "loss": 1.4537, "step": 17682 }, { "epoch": 0.7862700635809879, "grad_norm": 0.05916404351592064, "learning_rate": 0.00011841197215907335, "loss": 1.4555, "step": 17684 }, { "epoch": 0.7863589880396603, "grad_norm": 0.061422303318977356, "learning_rate": 0.00011831752278015545, "loss": 1.4601, "step": 17686 }, { "epoch": 0.7864479124983327, "grad_norm": 0.05970718339085579, "learning_rate": 0.00011822310602913766, "loss": 1.4566, "step": 17688 }, { "epoch": 0.786536836957005, "grad_norm": 0.05843975022435188, "learning_rate": 0.00011812872191409135, "loss": 1.454, "step": 17690 }, { "epoch": 0.7866257614156774, "grad_norm": 0.05985402688384056, "learning_rate": 0.00011803437044308463, "loss": 1.4532, "step": 17692 }, { "epoch": 0.7867146858743498, "grad_norm": 0.059144627302885056, "learning_rate": 0.00011794005162418336, "loss": 1.4588, "step": 17694 }, { "epoch": 0.7868036103330222, "grad_norm": 0.058817509561777115, "learning_rate": 0.00011784576546544984, "loss": 1.4517, "step": 17696 }, { "epoch": 0.7868925347916944, "grad_norm": 0.059203263372182846, "learning_rate": 0.00011775151197494466, "loss": 1.4543, "step": 17698 }, { "epoch": 0.7869814592503668, "grad_norm": 0.05954177677631378, "learning_rate": 0.00011765729116072476, "loss": 1.4546, "step": 17700 }, { "epoch": 0.7870703837090391, "grad_norm": 0.0586751364171505, "learning_rate": 0.0001175631030308444, "loss": 1.4472, "step": 17702 }, { "epoch": 0.7871593081677115, "grad_norm": 0.058443281799554825, "learning_rate": 0.00011746894759335535, "loss": 1.4547, "step": 17704 }, { "epoch": 0.7872482326263839, "grad_norm": 0.05910266563296318, "learning_rate": 0.00011737482485630657, "loss": 1.4556, "step": 17706 }, { "epoch": 0.7873371570850562, "grad_norm": 0.05853736400604248, "learning_rate": 0.00011728073482774377, "loss": 1.4566, "step": 17708 }, { "epoch": 0.7874260815437286, "grad_norm": 0.0591680109500885, "learning_rate": 0.00011718667751571032, "loss": 1.4587, "step": 17710 }, { "epoch": 0.787515006002401, "grad_norm": 0.05998816341161728, "learning_rate": 0.00011709265292824672, "loss": 1.4581, "step": 17712 }, { "epoch": 0.7876039304610734, "grad_norm": 0.0575900636613369, "learning_rate": 0.00011699866107339068, "loss": 1.4542, "step": 17714 }, { "epoch": 0.7876928549197457, "grad_norm": 0.05857378616929054, "learning_rate": 0.00011690470195917696, "loss": 1.4543, "step": 17716 }, { "epoch": 0.787781779378418, "grad_norm": 0.05935800448060036, "learning_rate": 0.00011681077559363729, "loss": 1.4565, "step": 17718 }, { "epoch": 0.7878707038370903, "grad_norm": 0.058298259973526, "learning_rate": 0.00011671688198480141, "loss": 1.455, "step": 17720 }, { "epoch": 0.7879596282957627, "grad_norm": 0.058936238288879395, "learning_rate": 0.00011662302114069556, "loss": 1.4562, "step": 17722 }, { "epoch": 0.7880485527544351, "grad_norm": 0.05869641527533531, "learning_rate": 0.00011652919306934324, "loss": 1.454, "step": 17724 }, { "epoch": 0.7881374772131075, "grad_norm": 0.058170247822999954, "learning_rate": 0.00011643539777876532, "loss": 1.4549, "step": 17726 }, { "epoch": 0.7882264016717798, "grad_norm": 0.05906420946121216, "learning_rate": 0.00011634163527697999, "loss": 1.4512, "step": 17728 }, { "epoch": 0.7883153261304522, "grad_norm": 0.05926912650465965, "learning_rate": 0.00011624790557200254, "loss": 1.4524, "step": 17730 }, { "epoch": 0.7884042505891246, "grad_norm": 0.05841286852955818, "learning_rate": 0.00011615420867184506, "loss": 1.4534, "step": 17732 }, { "epoch": 0.7884931750477969, "grad_norm": 0.059124939143657684, "learning_rate": 0.00011606054458451742, "loss": 1.4579, "step": 17734 }, { "epoch": 0.7885820995064693, "grad_norm": 0.05994617938995361, "learning_rate": 0.00011596691331802656, "loss": 1.451, "step": 17736 }, { "epoch": 0.7886710239651417, "grad_norm": 0.058965619653463364, "learning_rate": 0.00011587331488037611, "loss": 1.4557, "step": 17738 }, { "epoch": 0.7887599484238139, "grad_norm": 0.059728700667619705, "learning_rate": 0.00011577974927956758, "loss": 1.4493, "step": 17740 }, { "epoch": 0.7888488728824863, "grad_norm": 0.061226364225149155, "learning_rate": 0.00011568621652359935, "loss": 1.4536, "step": 17742 }, { "epoch": 0.7889377973411587, "grad_norm": 0.06103841960430145, "learning_rate": 0.000115592716620467, "loss": 1.4539, "step": 17744 }, { "epoch": 0.789026721799831, "grad_norm": 0.05864092707633972, "learning_rate": 0.00011549924957816293, "loss": 1.4571, "step": 17746 }, { "epoch": 0.7891156462585034, "grad_norm": 0.059012021869421005, "learning_rate": 0.00011540581540467782, "loss": 1.4519, "step": 17748 }, { "epoch": 0.7892045707171758, "grad_norm": 0.0590151883661747, "learning_rate": 0.00011531241410799825, "loss": 1.4522, "step": 17750 }, { "epoch": 0.7892934951758481, "grad_norm": 0.058573827147483826, "learning_rate": 0.00011521904569610892, "loss": 1.4585, "step": 17752 }, { "epoch": 0.7893824196345205, "grad_norm": 0.058176856487989426, "learning_rate": 0.00011512571017699114, "loss": 1.4557, "step": 17754 }, { "epoch": 0.7894713440931929, "grad_norm": 0.05986884981393814, "learning_rate": 0.00011503240755862371, "loss": 1.4505, "step": 17756 }, { "epoch": 0.7895602685518652, "grad_norm": 0.059089165180921555, "learning_rate": 0.0001149391378489827, "loss": 1.4538, "step": 17758 }, { "epoch": 0.7896491930105376, "grad_norm": 0.05954886972904205, "learning_rate": 0.00011484590105604098, "loss": 1.4562, "step": 17760 }, { "epoch": 0.7897381174692099, "grad_norm": 0.05822194367647171, "learning_rate": 0.00011475269718776898, "loss": 1.4533, "step": 17762 }, { "epoch": 0.7898270419278822, "grad_norm": 0.06061677634716034, "learning_rate": 0.00011465952625213428, "loss": 1.4525, "step": 17764 }, { "epoch": 0.7899159663865546, "grad_norm": 0.05945700407028198, "learning_rate": 0.00011456638825710125, "loss": 1.4522, "step": 17766 }, { "epoch": 0.790004890845227, "grad_norm": 0.060659851878881454, "learning_rate": 0.00011447328321063188, "loss": 1.4567, "step": 17768 }, { "epoch": 0.7900938153038993, "grad_norm": 0.05868120491504669, "learning_rate": 0.00011438021112068541, "loss": 1.4555, "step": 17770 }, { "epoch": 0.7901827397625717, "grad_norm": 0.05930672958493233, "learning_rate": 0.00011428717199521771, "loss": 1.4523, "step": 17772 }, { "epoch": 0.7902716642212441, "grad_norm": 0.058497123420238495, "learning_rate": 0.0001141941658421825, "loss": 1.4578, "step": 17774 }, { "epoch": 0.7903605886799164, "grad_norm": 0.05888426676392555, "learning_rate": 0.00011410119266953001, "loss": 1.4535, "step": 17776 }, { "epoch": 0.7904495131385888, "grad_norm": 0.05962875857949257, "learning_rate": 0.00011400825248520818, "loss": 1.4571, "step": 17778 }, { "epoch": 0.7905384375972612, "grad_norm": 0.060313645750284195, "learning_rate": 0.00011391534529716208, "loss": 1.4503, "step": 17780 }, { "epoch": 0.7906273620559335, "grad_norm": 0.059360720217227936, "learning_rate": 0.00011382247111333355, "loss": 1.4556, "step": 17782 }, { "epoch": 0.7907162865146058, "grad_norm": 0.058396752923727036, "learning_rate": 0.00011372962994166203, "loss": 1.4519, "step": 17784 }, { "epoch": 0.7908052109732782, "grad_norm": 0.058448676019907, "learning_rate": 0.00011363682179008406, "loss": 1.4496, "step": 17786 }, { "epoch": 0.7908941354319505, "grad_norm": 0.06033730506896973, "learning_rate": 0.00011354404666653311, "loss": 1.4524, "step": 17788 }, { "epoch": 0.7909830598906229, "grad_norm": 0.059229444712400436, "learning_rate": 0.00011345130457894015, "loss": 1.4524, "step": 17790 }, { "epoch": 0.7910719843492953, "grad_norm": 0.0583866648375988, "learning_rate": 0.0001133585955352332, "loss": 1.4583, "step": 17792 }, { "epoch": 0.7911609088079676, "grad_norm": 0.05953531712293625, "learning_rate": 0.00011326591954333732, "loss": 1.4556, "step": 17794 }, { "epoch": 0.79124983326664, "grad_norm": 0.05893312767148018, "learning_rate": 0.00011317327661117488, "loss": 1.4497, "step": 17796 }, { "epoch": 0.7913387577253124, "grad_norm": 0.057053301483392715, "learning_rate": 0.0001130806667466654, "loss": 1.4489, "step": 17798 }, { "epoch": 0.7914276821839847, "grad_norm": 0.05844390019774437, "learning_rate": 0.00011298808995772581, "loss": 1.4558, "step": 17800 }, { "epoch": 0.7915166066426571, "grad_norm": 0.05953795462846756, "learning_rate": 0.00011289554625226977, "loss": 1.4548, "step": 17802 }, { "epoch": 0.7916055311013295, "grad_norm": 0.05917130783200264, "learning_rate": 0.00011280303563820815, "loss": 1.4518, "step": 17804 }, { "epoch": 0.7916944555600017, "grad_norm": 0.0593201145529747, "learning_rate": 0.00011271055812344955, "loss": 1.4529, "step": 17806 }, { "epoch": 0.7917833800186741, "grad_norm": 0.0592842660844326, "learning_rate": 0.00011261811371589919, "loss": 1.4521, "step": 17808 }, { "epoch": 0.7918723044773465, "grad_norm": 0.059808555990457535, "learning_rate": 0.0001125257024234595, "loss": 1.4525, "step": 17810 }, { "epoch": 0.7919612289360188, "grad_norm": 0.05857331305742264, "learning_rate": 0.0001124333242540303, "loss": 1.4547, "step": 17812 }, { "epoch": 0.7920501533946912, "grad_norm": 0.05948074907064438, "learning_rate": 0.00011234097921550862, "loss": 1.4527, "step": 17814 }, { "epoch": 0.7921390778533636, "grad_norm": 0.05830054357647896, "learning_rate": 0.00011224866731578831, "loss": 1.4561, "step": 17816 }, { "epoch": 0.7922280023120359, "grad_norm": 0.057090599089860916, "learning_rate": 0.00011215638856276061, "loss": 1.4556, "step": 17818 }, { "epoch": 0.7923169267707083, "grad_norm": 0.05876989662647247, "learning_rate": 0.000112064142964314, "loss": 1.4605, "step": 17820 }, { "epoch": 0.7924058512293807, "grad_norm": 0.05962676927447319, "learning_rate": 0.00011197193052833421, "loss": 1.4564, "step": 17822 }, { "epoch": 0.792494775688053, "grad_norm": 0.058956604450941086, "learning_rate": 0.00011187975126270372, "loss": 1.4526, "step": 17824 }, { "epoch": 0.7925837001467254, "grad_norm": 0.06046443060040474, "learning_rate": 0.00011178760517530224, "loss": 1.4493, "step": 17826 }, { "epoch": 0.7926726246053977, "grad_norm": 0.059528037905693054, "learning_rate": 0.00011169549227400733, "loss": 1.4591, "step": 17828 }, { "epoch": 0.79276154906407, "grad_norm": 0.060843151062726974, "learning_rate": 0.00011160341256669299, "loss": 1.4522, "step": 17830 }, { "epoch": 0.7928504735227424, "grad_norm": 0.06023893877863884, "learning_rate": 0.00011151136606123036, "loss": 1.4516, "step": 17832 }, { "epoch": 0.7929393979814148, "grad_norm": 0.05774928256869316, "learning_rate": 0.0001114193527654882, "loss": 1.4534, "step": 17834 }, { "epoch": 0.7930283224400871, "grad_norm": 0.05887966603040695, "learning_rate": 0.00011132737268733217, "loss": 1.4559, "step": 17836 }, { "epoch": 0.7931172468987595, "grad_norm": 0.05917837843298912, "learning_rate": 0.00011123542583462531, "loss": 1.4621, "step": 17838 }, { "epoch": 0.7932061713574319, "grad_norm": 0.05823491886258125, "learning_rate": 0.00011114351221522728, "loss": 1.4513, "step": 17840 }, { "epoch": 0.7932950958161042, "grad_norm": 0.059828367084264755, "learning_rate": 0.00011105163183699552, "loss": 1.4576, "step": 17842 }, { "epoch": 0.7933840202747766, "grad_norm": 0.05933848023414612, "learning_rate": 0.00011095978470778445, "loss": 1.4588, "step": 17844 }, { "epoch": 0.793472944733449, "grad_norm": 0.05880282446742058, "learning_rate": 0.00011086797083544526, "loss": 1.46, "step": 17846 }, { "epoch": 0.7935618691921212, "grad_norm": 0.05894262716174126, "learning_rate": 0.00011077619022782676, "loss": 1.4547, "step": 17848 }, { "epoch": 0.7936507936507936, "grad_norm": 0.05894016847014427, "learning_rate": 0.00011068444289277496, "loss": 1.4554, "step": 17850 }, { "epoch": 0.793739718109466, "grad_norm": 0.06030754745006561, "learning_rate": 0.00011059272883813248, "loss": 1.4538, "step": 17852 }, { "epoch": 0.7938286425681383, "grad_norm": 0.05975908041000366, "learning_rate": 0.0001105010480717396, "loss": 1.4545, "step": 17854 }, { "epoch": 0.7939175670268107, "grad_norm": 0.05732753127813339, "learning_rate": 0.00011040940060143373, "loss": 1.4432, "step": 17856 }, { "epoch": 0.7940064914854831, "grad_norm": 0.05890670418739319, "learning_rate": 0.00011031778643504902, "loss": 1.4489, "step": 17858 }, { "epoch": 0.7940954159441554, "grad_norm": 0.05884822458028793, "learning_rate": 0.00011022620558041735, "loss": 1.4521, "step": 17860 }, { "epoch": 0.7941843404028278, "grad_norm": 0.058066993951797485, "learning_rate": 0.00011013465804536715, "loss": 1.457, "step": 17862 }, { "epoch": 0.7942732648615002, "grad_norm": 0.05799723044037819, "learning_rate": 0.00011004314383772446, "loss": 1.4489, "step": 17864 }, { "epoch": 0.7943621893201726, "grad_norm": 0.058095499873161316, "learning_rate": 0.00010995166296531251, "loss": 1.446, "step": 17866 }, { "epoch": 0.7944511137788449, "grad_norm": 0.05935432016849518, "learning_rate": 0.0001098602154359511, "loss": 1.4607, "step": 17868 }, { "epoch": 0.7945400382375172, "grad_norm": 0.06020105257630348, "learning_rate": 0.0001097688012574578, "loss": 1.4538, "step": 17870 }, { "epoch": 0.7946289626961895, "grad_norm": 0.05931893363595009, "learning_rate": 0.00010967742043764717, "loss": 1.454, "step": 17872 }, { "epoch": 0.7947178871548619, "grad_norm": 0.059080854058265686, "learning_rate": 0.00010958607298433059, "loss": 1.454, "step": 17874 }, { "epoch": 0.7948068116135343, "grad_norm": 0.05938325077295303, "learning_rate": 0.00010949475890531701, "loss": 1.4534, "step": 17876 }, { "epoch": 0.7948957360722066, "grad_norm": 0.059726350009441376, "learning_rate": 0.00010940347820841251, "loss": 1.4464, "step": 17878 }, { "epoch": 0.794984660530879, "grad_norm": 0.05838613212108612, "learning_rate": 0.00010931223090141984, "loss": 1.4567, "step": 17880 }, { "epoch": 0.7950735849895514, "grad_norm": 0.05989585816860199, "learning_rate": 0.00010922101699213948, "loss": 1.4549, "step": 17882 }, { "epoch": 0.7951625094482238, "grad_norm": 0.05945861339569092, "learning_rate": 0.00010912983648836861, "loss": 1.4555, "step": 17884 }, { "epoch": 0.7952514339068961, "grad_norm": 0.05932047963142395, "learning_rate": 0.0001090386893979018, "loss": 1.4556, "step": 17886 }, { "epoch": 0.7953403583655685, "grad_norm": 0.059849612414836884, "learning_rate": 0.00010894757572853086, "loss": 1.4606, "step": 17888 }, { "epoch": 0.7954292828242409, "grad_norm": 0.05838996544480324, "learning_rate": 0.00010885649548804433, "loss": 1.4567, "step": 17890 }, { "epoch": 0.7955182072829131, "grad_norm": 0.05834618955850601, "learning_rate": 0.00010876544868422827, "loss": 1.4574, "step": 17892 }, { "epoch": 0.7956071317415855, "grad_norm": 0.05943123623728752, "learning_rate": 0.00010867443532486587, "loss": 1.4577, "step": 17894 }, { "epoch": 0.7956960562002579, "grad_norm": 0.05874134600162506, "learning_rate": 0.00010858345541773717, "loss": 1.4508, "step": 17896 }, { "epoch": 0.7957849806589302, "grad_norm": 0.0590997152030468, "learning_rate": 0.00010849250897061963, "loss": 1.4554, "step": 17898 }, { "epoch": 0.7958739051176026, "grad_norm": 0.05840156599879265, "learning_rate": 0.00010840159599128785, "loss": 1.4579, "step": 17900 }, { "epoch": 0.795962829576275, "grad_norm": 0.05960099771618843, "learning_rate": 0.00010831071648751322, "loss": 1.4622, "step": 17902 }, { "epoch": 0.7960517540349473, "grad_norm": 0.05900590494275093, "learning_rate": 0.00010821987046706472, "loss": 1.4528, "step": 17904 }, { "epoch": 0.7961406784936197, "grad_norm": 0.05795317143201828, "learning_rate": 0.00010812905793770816, "loss": 1.4526, "step": 17906 }, { "epoch": 0.7962296029522921, "grad_norm": 0.06108953431248665, "learning_rate": 0.00010803827890720675, "loss": 1.4571, "step": 17908 }, { "epoch": 0.7963185274109644, "grad_norm": 0.059297602623701096, "learning_rate": 0.00010794753338332064, "loss": 1.4518, "step": 17910 }, { "epoch": 0.7964074518696368, "grad_norm": 0.059192463755607605, "learning_rate": 0.00010785682137380681, "loss": 1.4542, "step": 17912 }, { "epoch": 0.796496376328309, "grad_norm": 0.05878617987036705, "learning_rate": 0.00010776614288642028, "loss": 1.4563, "step": 17914 }, { "epoch": 0.7965853007869814, "grad_norm": 0.05914062634110451, "learning_rate": 0.00010767549792891235, "loss": 1.4566, "step": 17916 }, { "epoch": 0.7966742252456538, "grad_norm": 0.06022977828979492, "learning_rate": 0.00010758488650903165, "loss": 1.4577, "step": 17918 }, { "epoch": 0.7967631497043262, "grad_norm": 0.05821533128619194, "learning_rate": 0.0001074943086345242, "loss": 1.4544, "step": 17920 }, { "epoch": 0.7968520741629985, "grad_norm": 0.0580059252679348, "learning_rate": 0.00010740376431313293, "loss": 1.4557, "step": 17922 }, { "epoch": 0.7969409986216709, "grad_norm": 0.05985068529844284, "learning_rate": 0.00010731325355259813, "loss": 1.4548, "step": 17924 }, { "epoch": 0.7970299230803433, "grad_norm": 0.05859328433871269, "learning_rate": 0.0001072227763606568, "loss": 1.4509, "step": 17926 }, { "epoch": 0.7971188475390156, "grad_norm": 0.0583089180290699, "learning_rate": 0.00010713233274504348, "loss": 1.4518, "step": 17928 }, { "epoch": 0.797207771997688, "grad_norm": 0.05798826739192009, "learning_rate": 0.00010704192271348978, "loss": 1.4551, "step": 17930 }, { "epoch": 0.7972966964563604, "grad_norm": 0.05828414484858513, "learning_rate": 0.00010695154627372428, "loss": 1.4539, "step": 17932 }, { "epoch": 0.7973856209150327, "grad_norm": 0.05891380086541176, "learning_rate": 0.00010686120343347238, "loss": 1.4568, "step": 17934 }, { "epoch": 0.797474545373705, "grad_norm": 0.059466563165187836, "learning_rate": 0.00010677089420045765, "loss": 1.4491, "step": 17936 }, { "epoch": 0.7975634698323774, "grad_norm": 0.059041835367679596, "learning_rate": 0.00010668061858239974, "loss": 1.4512, "step": 17938 }, { "epoch": 0.7976523942910497, "grad_norm": 0.05893898755311966, "learning_rate": 0.00010659037658701576, "loss": 1.4581, "step": 17940 }, { "epoch": 0.7977413187497221, "grad_norm": 0.05860443785786629, "learning_rate": 0.00010650016822202008, "loss": 1.4536, "step": 17942 }, { "epoch": 0.7978302432083945, "grad_norm": 0.058257170021533966, "learning_rate": 0.00010640999349512409, "loss": 1.4592, "step": 17944 }, { "epoch": 0.7979191676670668, "grad_norm": 0.05957287922501564, "learning_rate": 0.00010631985241403653, "loss": 1.4529, "step": 17946 }, { "epoch": 0.7980080921257392, "grad_norm": 0.05837190896272659, "learning_rate": 0.00010622974498646271, "loss": 1.4488, "step": 17948 }, { "epoch": 0.7980970165844116, "grad_norm": 0.05990975350141525, "learning_rate": 0.00010613967122010553, "loss": 1.4598, "step": 17950 }, { "epoch": 0.7981859410430839, "grad_norm": 0.05874193459749222, "learning_rate": 0.00010604963112266513, "loss": 1.4528, "step": 17952 }, { "epoch": 0.7982748655017563, "grad_norm": 0.05808878690004349, "learning_rate": 0.00010595962470183823, "loss": 1.4524, "step": 17954 }, { "epoch": 0.7983637899604287, "grad_norm": 0.05945536866784096, "learning_rate": 0.00010586965196531905, "loss": 1.4542, "step": 17956 }, { "epoch": 0.7984527144191009, "grad_norm": 0.06018710881471634, "learning_rate": 0.00010577971292079907, "loss": 1.4543, "step": 17958 }, { "epoch": 0.7985416388777733, "grad_norm": 0.05900532007217407, "learning_rate": 0.00010568980757596636, "loss": 1.4469, "step": 17960 }, { "epoch": 0.7986305633364457, "grad_norm": 0.0572827085852623, "learning_rate": 0.00010559993593850654, "loss": 1.4496, "step": 17962 }, { "epoch": 0.798719487795118, "grad_norm": 0.06311970204114914, "learning_rate": 0.0001055100980161024, "loss": 1.4553, "step": 17964 }, { "epoch": 0.7988084122537904, "grad_norm": 0.058263469487428665, "learning_rate": 0.00010542029381643343, "loss": 1.4536, "step": 17966 }, { "epoch": 0.7988973367124628, "grad_norm": 0.06007849797606468, "learning_rate": 0.00010533052334717675, "loss": 1.4567, "step": 17968 }, { "epoch": 0.7989862611711351, "grad_norm": 0.05845961719751358, "learning_rate": 0.000105240786616006, "loss": 1.4598, "step": 17970 }, { "epoch": 0.7990751856298075, "grad_norm": 0.058356136083602905, "learning_rate": 0.0001051510836305925, "loss": 1.4548, "step": 17972 }, { "epoch": 0.7991641100884799, "grad_norm": 0.05997835844755173, "learning_rate": 0.0001050614143986045, "loss": 1.4535, "step": 17974 }, { "epoch": 0.7992530345471522, "grad_norm": 0.06147778779268265, "learning_rate": 0.00010497177892770715, "loss": 1.4545, "step": 17976 }, { "epoch": 0.7993419590058245, "grad_norm": 0.058420371264219284, "learning_rate": 0.00010488217722556292, "loss": 1.4469, "step": 17978 }, { "epoch": 0.7994308834644969, "grad_norm": 0.05859336629509926, "learning_rate": 0.00010479260929983159, "loss": 1.4563, "step": 17980 }, { "epoch": 0.7995198079231692, "grad_norm": 0.059518374502658844, "learning_rate": 0.00010470307515816951, "loss": 1.4526, "step": 17982 }, { "epoch": 0.7996087323818416, "grad_norm": 0.058449771255254745, "learning_rate": 0.00010461357480823052, "loss": 1.4492, "step": 17984 }, { "epoch": 0.799697656840514, "grad_norm": 0.06011267751455307, "learning_rate": 0.00010452410825766579, "loss": 1.4624, "step": 17986 }, { "epoch": 0.7997865812991863, "grad_norm": 0.05873945727944374, "learning_rate": 0.00010443467551412289, "loss": 1.4558, "step": 17988 }, { "epoch": 0.7998755057578587, "grad_norm": 0.06056896224617958, "learning_rate": 0.00010434527658524729, "loss": 1.4524, "step": 17990 }, { "epoch": 0.7999644302165311, "grad_norm": 0.05866500735282898, "learning_rate": 0.00010425591147868086, "loss": 1.4497, "step": 17992 }, { "epoch": 0.8000533546752034, "grad_norm": 0.0592012032866478, "learning_rate": 0.00010416658020206316, "loss": 1.4537, "step": 17994 }, { "epoch": 0.8001422791338758, "grad_norm": 0.06068159639835358, "learning_rate": 0.0001040772827630307, "loss": 1.4533, "step": 17996 }, { "epoch": 0.8002312035925482, "grad_norm": 0.060801684856414795, "learning_rate": 0.00010398801916921669, "loss": 1.4512, "step": 17998 }, { "epoch": 0.8003201280512204, "grad_norm": 0.05898714438080788, "learning_rate": 0.00010389878942825204, "loss": 1.451, "step": 18000 }, { "epoch": 0.8003201280512204, "eval_loss": 1.4389121532440186, "eval_runtime": 12.4447, "eval_samples_per_second": 555.256, "eval_steps_per_second": 69.427, "step": 18000 }, { "epoch": 0.8004090525098928, "grad_norm": 0.05900314450263977, "learning_rate": 0.00010380959354776454, "loss": 1.4508, "step": 18002 }, { "epoch": 0.8004979769685652, "grad_norm": 0.05843342840671539, "learning_rate": 0.0001037204315353788, "loss": 1.4576, "step": 18004 }, { "epoch": 0.8005869014272375, "grad_norm": 0.059655383229255676, "learning_rate": 0.0001036313033987169, "loss": 1.4518, "step": 18006 }, { "epoch": 0.8006758258859099, "grad_norm": 0.05997476726770401, "learning_rate": 0.00010354220914539808, "loss": 1.4529, "step": 18008 }, { "epoch": 0.8007647503445823, "grad_norm": 0.05772737041115761, "learning_rate": 0.00010345314878303824, "loss": 1.4528, "step": 18010 }, { "epoch": 0.8008536748032546, "grad_norm": 0.05794164165854454, "learning_rate": 0.00010336412231925074, "loss": 1.4483, "step": 18012 }, { "epoch": 0.800942599261927, "grad_norm": 0.05926957726478577, "learning_rate": 0.000103275129761646, "loss": 1.4539, "step": 18014 }, { "epoch": 0.8010315237205994, "grad_norm": 0.05818663910031319, "learning_rate": 0.00010318617111783157, "loss": 1.4481, "step": 18016 }, { "epoch": 0.8011204481792717, "grad_norm": 0.060195665806531906, "learning_rate": 0.00010309724639541196, "loss": 1.4547, "step": 18018 }, { "epoch": 0.8012093726379441, "grad_norm": 0.06009439751505852, "learning_rate": 0.00010300835560198873, "loss": 1.4512, "step": 18020 }, { "epoch": 0.8012982970966164, "grad_norm": 0.05936377868056297, "learning_rate": 0.00010291949874516071, "loss": 1.4537, "step": 18022 }, { "epoch": 0.8013872215552887, "grad_norm": 0.05843667313456535, "learning_rate": 0.00010283067583252393, "loss": 1.4567, "step": 18024 }, { "epoch": 0.8014761460139611, "grad_norm": 0.05879830941557884, "learning_rate": 0.00010274188687167112, "loss": 1.4563, "step": 18026 }, { "epoch": 0.8015650704726335, "grad_norm": 0.058328744024038315, "learning_rate": 0.00010265313187019254, "loss": 1.4555, "step": 18028 }, { "epoch": 0.8016539949313058, "grad_norm": 0.05817698314785957, "learning_rate": 0.00010256441083567524, "loss": 1.4539, "step": 18030 }, { "epoch": 0.8017429193899782, "grad_norm": 0.06012728065252304, "learning_rate": 0.00010247572377570369, "loss": 1.454, "step": 18032 }, { "epoch": 0.8018318438486506, "grad_norm": 0.05919637903571129, "learning_rate": 0.00010238707069785897, "loss": 1.4564, "step": 18034 }, { "epoch": 0.801920768307323, "grad_norm": 0.058458033949136734, "learning_rate": 0.00010229845160971967, "loss": 1.4531, "step": 18036 }, { "epoch": 0.8020096927659953, "grad_norm": 0.059698980301618576, "learning_rate": 0.00010220986651886149, "loss": 1.4494, "step": 18038 }, { "epoch": 0.8020986172246677, "grad_norm": 0.05928831920027733, "learning_rate": 0.00010212131543285691, "loss": 1.4512, "step": 18040 }, { "epoch": 0.80218754168334, "grad_norm": 0.05956317111849785, "learning_rate": 0.00010203279835927536, "loss": 1.4557, "step": 18042 }, { "epoch": 0.8022764661420123, "grad_norm": 0.05859542265534401, "learning_rate": 0.00010194431530568432, "loss": 1.4568, "step": 18044 }, { "epoch": 0.8023653906006847, "grad_norm": 0.060236856341362, "learning_rate": 0.00010185586627964727, "loss": 1.4524, "step": 18046 }, { "epoch": 0.802454315059357, "grad_norm": 0.05890626460313797, "learning_rate": 0.00010176745128872544, "loss": 1.448, "step": 18048 }, { "epoch": 0.8025432395180294, "grad_norm": 0.05818956717848778, "learning_rate": 0.00010167907034047669, "loss": 1.4551, "step": 18050 }, { "epoch": 0.8026321639767018, "grad_norm": 0.05896204337477684, "learning_rate": 0.00010159072344245634, "loss": 1.4493, "step": 18052 }, { "epoch": 0.8027210884353742, "grad_norm": 0.0573628768324852, "learning_rate": 0.00010150241060221682, "loss": 1.4554, "step": 18054 }, { "epoch": 0.8028100128940465, "grad_norm": 0.0591515451669693, "learning_rate": 0.00010141413182730724, "loss": 1.4532, "step": 18056 }, { "epoch": 0.8028989373527189, "grad_norm": 0.0614108145236969, "learning_rate": 0.00010132588712527418, "loss": 1.4555, "step": 18058 }, { "epoch": 0.8029878618113913, "grad_norm": 0.05975984036922455, "learning_rate": 0.00010123767650366134, "loss": 1.4472, "step": 18060 }, { "epoch": 0.8030767862700636, "grad_norm": 0.05843626707792282, "learning_rate": 0.00010114949997000916, "loss": 1.4553, "step": 18062 }, { "epoch": 0.803165710728736, "grad_norm": 0.05943736433982849, "learning_rate": 0.0001010613575318552, "loss": 1.4515, "step": 18064 }, { "epoch": 0.8032546351874083, "grad_norm": 0.05971449986100197, "learning_rate": 0.00010097324919673467, "loss": 1.4504, "step": 18066 }, { "epoch": 0.8033435596460806, "grad_norm": 0.05878691002726555, "learning_rate": 0.00010088517497217914, "loss": 1.4576, "step": 18068 }, { "epoch": 0.803432484104753, "grad_norm": 0.059320129454135895, "learning_rate": 0.00010079713486571784, "loss": 1.4577, "step": 18070 }, { "epoch": 0.8035214085634254, "grad_norm": 0.05824529007077217, "learning_rate": 0.00010070912888487649, "loss": 1.4574, "step": 18072 }, { "epoch": 0.8036103330220977, "grad_norm": 0.05816292017698288, "learning_rate": 0.0001006211570371785, "loss": 1.453, "step": 18074 }, { "epoch": 0.8036992574807701, "grad_norm": 0.059266943484544754, "learning_rate": 0.00010053321933014409, "loss": 1.4515, "step": 18076 }, { "epoch": 0.8037881819394425, "grad_norm": 0.05980881303548813, "learning_rate": 0.0001004453157712904, "loss": 1.4508, "step": 18078 }, { "epoch": 0.8038771063981148, "grad_norm": 0.05897355452179909, "learning_rate": 0.00010035744636813187, "loss": 1.4602, "step": 18080 }, { "epoch": 0.8039660308567872, "grad_norm": 0.060474496334791183, "learning_rate": 0.00010026961112818011, "loss": 1.4631, "step": 18082 }, { "epoch": 0.8040549553154596, "grad_norm": 0.05896021053195, "learning_rate": 0.00010018181005894345, "loss": 1.4544, "step": 18084 }, { "epoch": 0.8041438797741318, "grad_norm": 0.05860500782728195, "learning_rate": 0.00010009404316792753, "loss": 1.4476, "step": 18086 }, { "epoch": 0.8042328042328042, "grad_norm": 0.06036067008972168, "learning_rate": 0.00010000631046263537, "loss": 1.4552, "step": 18088 }, { "epoch": 0.8043217286914766, "grad_norm": 0.061282288283109665, "learning_rate": 9.991861195056629e-05, "loss": 1.4558, "step": 18090 }, { "epoch": 0.8044106531501489, "grad_norm": 0.058544036000967026, "learning_rate": 9.98309476392174e-05, "loss": 1.4548, "step": 18092 }, { "epoch": 0.8044995776088213, "grad_norm": 0.05853986367583275, "learning_rate": 9.974331753608274e-05, "loss": 1.4542, "step": 18094 }, { "epoch": 0.8045885020674937, "grad_norm": 0.058233581483364105, "learning_rate": 9.965572164865299e-05, "loss": 1.4565, "step": 18096 }, { "epoch": 0.804677426526166, "grad_norm": 0.05811081454157829, "learning_rate": 9.956815998441659e-05, "loss": 1.4523, "step": 18098 }, { "epoch": 0.8047663509848384, "grad_norm": 0.0591614656150341, "learning_rate": 9.948063255085821e-05, "loss": 1.4454, "step": 18100 }, { "epoch": 0.8048552754435108, "grad_norm": 0.0593455545604229, "learning_rate": 9.939313935546069e-05, "loss": 1.4566, "step": 18102 }, { "epoch": 0.8049441999021831, "grad_norm": 0.059242065995931625, "learning_rate": 9.930568040570304e-05, "loss": 1.453, "step": 18104 }, { "epoch": 0.8050331243608555, "grad_norm": 0.05771373212337494, "learning_rate": 9.921825570906145e-05, "loss": 1.4504, "step": 18106 }, { "epoch": 0.8051220488195278, "grad_norm": 0.05904533714056015, "learning_rate": 9.913086527300962e-05, "loss": 1.4539, "step": 18108 }, { "epoch": 0.8052109732782001, "grad_norm": 0.057896826416254044, "learning_rate": 9.904350910501808e-05, "loss": 1.4482, "step": 18110 }, { "epoch": 0.8052998977368725, "grad_norm": 0.05723195523023605, "learning_rate": 9.895618721255422e-05, "loss": 1.4536, "step": 18112 }, { "epoch": 0.8053888221955449, "grad_norm": 0.05811459571123123, "learning_rate": 9.886889960308281e-05, "loss": 1.4508, "step": 18114 }, { "epoch": 0.8054777466542172, "grad_norm": 0.058095820248126984, "learning_rate": 9.878164628406578e-05, "loss": 1.4489, "step": 18116 }, { "epoch": 0.8055666711128896, "grad_norm": 0.059355780482292175, "learning_rate": 9.869442726296157e-05, "loss": 1.456, "step": 18118 }, { "epoch": 0.805655595571562, "grad_norm": 0.059321098029613495, "learning_rate": 9.860724254722631e-05, "loss": 1.4501, "step": 18120 }, { "epoch": 0.8057445200302343, "grad_norm": 0.05816931650042534, "learning_rate": 9.852009214431262e-05, "loss": 1.4581, "step": 18122 }, { "epoch": 0.8058334444889067, "grad_norm": 0.05967382341623306, "learning_rate": 9.843297606167095e-05, "loss": 1.4558, "step": 18124 }, { "epoch": 0.8059223689475791, "grad_norm": 0.05909135937690735, "learning_rate": 9.834589430674817e-05, "loss": 1.4519, "step": 18126 }, { "epoch": 0.8060112934062514, "grad_norm": 0.05855505168437958, "learning_rate": 9.825884688698822e-05, "loss": 1.4511, "step": 18128 }, { "epoch": 0.8061002178649237, "grad_norm": 0.0576552115380764, "learning_rate": 9.817183380983246e-05, "loss": 1.4466, "step": 18130 }, { "epoch": 0.8061891423235961, "grad_norm": 0.059187132865190506, "learning_rate": 9.808485508271925e-05, "loss": 1.4462, "step": 18132 }, { "epoch": 0.8062780667822684, "grad_norm": 0.059183891862630844, "learning_rate": 9.799791071308373e-05, "loss": 1.4586, "step": 18134 }, { "epoch": 0.8063669912409408, "grad_norm": 0.05837311968207359, "learning_rate": 9.791100070835834e-05, "loss": 1.4525, "step": 18136 }, { "epoch": 0.8064559156996132, "grad_norm": 0.059481870383024216, "learning_rate": 9.782412507597255e-05, "loss": 1.4545, "step": 18138 }, { "epoch": 0.8065448401582855, "grad_norm": 0.05896589532494545, "learning_rate": 9.7737283823353e-05, "loss": 1.4523, "step": 18140 }, { "epoch": 0.8066337646169579, "grad_norm": 0.058587439358234406, "learning_rate": 9.765047695792306e-05, "loss": 1.4515, "step": 18142 }, { "epoch": 0.8067226890756303, "grad_norm": 0.058542508631944656, "learning_rate": 9.756370448710339e-05, "loss": 1.45, "step": 18144 }, { "epoch": 0.8068116135343026, "grad_norm": 0.05747520923614502, "learning_rate": 9.747696641831194e-05, "loss": 1.45, "step": 18146 }, { "epoch": 0.806900537992975, "grad_norm": 0.05875362455844879, "learning_rate": 9.739026275896323e-05, "loss": 1.451, "step": 18148 }, { "epoch": 0.8069894624516474, "grad_norm": 0.06102447956800461, "learning_rate": 9.730359351646884e-05, "loss": 1.4512, "step": 18150 }, { "epoch": 0.8070783869103196, "grad_norm": 0.059680186212062836, "learning_rate": 9.72169586982381e-05, "loss": 1.4508, "step": 18152 }, { "epoch": 0.807167311368992, "grad_norm": 0.05882233753800392, "learning_rate": 9.713035831167672e-05, "loss": 1.4519, "step": 18154 }, { "epoch": 0.8072562358276644, "grad_norm": 0.05874011665582657, "learning_rate": 9.704379236418776e-05, "loss": 1.4537, "step": 18156 }, { "epoch": 0.8073451602863367, "grad_norm": 0.059738658368587494, "learning_rate": 9.695726086317108e-05, "loss": 1.4502, "step": 18158 }, { "epoch": 0.8074340847450091, "grad_norm": 0.05960926041007042, "learning_rate": 9.687076381602388e-05, "loss": 1.4602, "step": 18160 }, { "epoch": 0.8075230092036815, "grad_norm": 0.060264989733695984, "learning_rate": 9.678430123014048e-05, "loss": 1.4487, "step": 18162 }, { "epoch": 0.8076119336623538, "grad_norm": 0.05822618305683136, "learning_rate": 9.669787311291178e-05, "loss": 1.4562, "step": 18164 }, { "epoch": 0.8077008581210262, "grad_norm": 0.05792885646224022, "learning_rate": 9.661147947172616e-05, "loss": 1.4564, "step": 18166 }, { "epoch": 0.8077897825796986, "grad_norm": 0.060065533965826035, "learning_rate": 9.652512031396916e-05, "loss": 1.4594, "step": 18168 }, { "epoch": 0.807878707038371, "grad_norm": 0.05816401168704033, "learning_rate": 9.643879564702269e-05, "loss": 1.4583, "step": 18170 }, { "epoch": 0.8079676314970433, "grad_norm": 0.05799930915236473, "learning_rate": 9.635250547826646e-05, "loss": 1.4538, "step": 18172 }, { "epoch": 0.8080565559557156, "grad_norm": 0.05812399461865425, "learning_rate": 9.626624981507703e-05, "loss": 1.4559, "step": 18174 }, { "epoch": 0.8081454804143879, "grad_norm": 0.05904964357614517, "learning_rate": 9.618002866482761e-05, "loss": 1.4522, "step": 18176 }, { "epoch": 0.8082344048730603, "grad_norm": 0.05795786529779434, "learning_rate": 9.609384203488903e-05, "loss": 1.4508, "step": 18178 }, { "epoch": 0.8083233293317327, "grad_norm": 0.0590299516916275, "learning_rate": 9.60076899326287e-05, "loss": 1.4599, "step": 18180 }, { "epoch": 0.808412253790405, "grad_norm": 0.05843953415751457, "learning_rate": 9.592157236541132e-05, "loss": 1.454, "step": 18182 }, { "epoch": 0.8085011782490774, "grad_norm": 0.05828475579619408, "learning_rate": 9.583548934059882e-05, "loss": 1.4551, "step": 18184 }, { "epoch": 0.8085901027077498, "grad_norm": 0.058298975229263306, "learning_rate": 9.574944086554965e-05, "loss": 1.4501, "step": 18186 }, { "epoch": 0.8086790271664221, "grad_norm": 0.05863676592707634, "learning_rate": 9.566342694761976e-05, "loss": 1.4562, "step": 18188 }, { "epoch": 0.8087679516250945, "grad_norm": 0.05812937393784523, "learning_rate": 9.557744759416209e-05, "loss": 1.4525, "step": 18190 }, { "epoch": 0.8088568760837669, "grad_norm": 0.05726495757699013, "learning_rate": 9.549150281252633e-05, "loss": 1.448, "step": 18192 }, { "epoch": 0.8089458005424393, "grad_norm": 0.06033691018819809, "learning_rate": 9.54055926100595e-05, "loss": 1.4607, "step": 18194 }, { "epoch": 0.8090347250011115, "grad_norm": 0.0588860847055912, "learning_rate": 9.531971699410574e-05, "loss": 1.4526, "step": 18196 }, { "epoch": 0.8091236494597839, "grad_norm": 0.058994125574827194, "learning_rate": 9.523387597200578e-05, "loss": 1.4491, "step": 18198 }, { "epoch": 0.8092125739184562, "grad_norm": 0.05951934680342674, "learning_rate": 9.514806955109789e-05, "loss": 1.4552, "step": 18200 }, { "epoch": 0.8093014983771286, "grad_norm": 0.05920296534895897, "learning_rate": 9.506229773871727e-05, "loss": 1.4548, "step": 18202 }, { "epoch": 0.809390422835801, "grad_norm": 0.05928421393036842, "learning_rate": 9.497656054219578e-05, "loss": 1.4561, "step": 18204 }, { "epoch": 0.8094793472944734, "grad_norm": 0.059079330414533615, "learning_rate": 9.489085796886293e-05, "loss": 1.4492, "step": 18206 }, { "epoch": 0.8095682717531457, "grad_norm": 0.058482855558395386, "learning_rate": 9.480519002604454e-05, "loss": 1.449, "step": 18208 }, { "epoch": 0.8096571962118181, "grad_norm": 0.05823582410812378, "learning_rate": 9.471955672106436e-05, "loss": 1.4564, "step": 18210 }, { "epoch": 0.8097461206704905, "grad_norm": 0.05962125584483147, "learning_rate": 9.463395806124254e-05, "loss": 1.4559, "step": 18212 }, { "epoch": 0.8098350451291628, "grad_norm": 0.05842282250523567, "learning_rate": 9.45483940538962e-05, "loss": 1.4502, "step": 18214 }, { "epoch": 0.8099239695878351, "grad_norm": 0.05836905539035797, "learning_rate": 9.446286470633997e-05, "loss": 1.4528, "step": 18216 }, { "epoch": 0.8100128940465074, "grad_norm": 0.05897628888487816, "learning_rate": 9.437737002588525e-05, "loss": 1.4538, "step": 18218 }, { "epoch": 0.8101018185051798, "grad_norm": 0.05997723713517189, "learning_rate": 9.429191001984039e-05, "loss": 1.4573, "step": 18220 }, { "epoch": 0.8101907429638522, "grad_norm": 0.05981513485312462, "learning_rate": 9.420648469551097e-05, "loss": 1.456, "step": 18222 }, { "epoch": 0.8102796674225246, "grad_norm": 0.059481166303157806, "learning_rate": 9.412109406019948e-05, "loss": 1.4506, "step": 18224 }, { "epoch": 0.8103685918811969, "grad_norm": 0.059796884655952454, "learning_rate": 9.403573812120564e-05, "loss": 1.4566, "step": 18226 }, { "epoch": 0.8104575163398693, "grad_norm": 0.05872061103582382, "learning_rate": 9.395041688582596e-05, "loss": 1.4532, "step": 18228 }, { "epoch": 0.8105464407985417, "grad_norm": 0.060998715460300446, "learning_rate": 9.38651303613538e-05, "loss": 1.4493, "step": 18230 }, { "epoch": 0.810635365257214, "grad_norm": 0.05913839489221573, "learning_rate": 9.37798785550803e-05, "loss": 1.4568, "step": 18232 }, { "epoch": 0.8107242897158864, "grad_norm": 0.05807075276970863, "learning_rate": 9.369466147429296e-05, "loss": 1.4499, "step": 18234 }, { "epoch": 0.8108132141745588, "grad_norm": 0.05896204710006714, "learning_rate": 9.360947912627632e-05, "loss": 1.4542, "step": 18236 }, { "epoch": 0.810902138633231, "grad_norm": 0.05949874222278595, "learning_rate": 9.352433151831236e-05, "loss": 1.4478, "step": 18238 }, { "epoch": 0.8109910630919034, "grad_norm": 0.059194955974817276, "learning_rate": 9.34392186576798e-05, "loss": 1.454, "step": 18240 }, { "epoch": 0.8110799875505758, "grad_norm": 0.05962388962507248, "learning_rate": 9.335414055165459e-05, "loss": 1.4458, "step": 18242 }, { "epoch": 0.8111689120092481, "grad_norm": 0.05830967053771019, "learning_rate": 9.326909720750937e-05, "loss": 1.4562, "step": 18244 }, { "epoch": 0.8112578364679205, "grad_norm": 0.05975262448191643, "learning_rate": 9.31840886325141e-05, "loss": 1.4476, "step": 18246 }, { "epoch": 0.8113467609265929, "grad_norm": 0.058683622628450394, "learning_rate": 9.309911483393586e-05, "loss": 1.4508, "step": 18248 }, { "epoch": 0.8114356853852652, "grad_norm": 0.05931386351585388, "learning_rate": 9.301417581903831e-05, "loss": 1.4536, "step": 18250 }, { "epoch": 0.8115246098439376, "grad_norm": 0.058767132461071014, "learning_rate": 9.292927159508258e-05, "loss": 1.4536, "step": 18252 }, { "epoch": 0.81161353430261, "grad_norm": 0.060294680297374725, "learning_rate": 9.284440216932666e-05, "loss": 1.4611, "step": 18254 }, { "epoch": 0.8117024587612823, "grad_norm": 0.05967991426587105, "learning_rate": 9.27595675490256e-05, "loss": 1.4506, "step": 18256 }, { "epoch": 0.8117913832199547, "grad_norm": 0.05928469076752663, "learning_rate": 9.267476774143102e-05, "loss": 1.4445, "step": 18258 }, { "epoch": 0.811880307678627, "grad_norm": 0.059566475450992584, "learning_rate": 9.259000275379264e-05, "loss": 1.4484, "step": 18260 }, { "epoch": 0.8119692321372993, "grad_norm": 0.06041361764073372, "learning_rate": 9.250527259335606e-05, "loss": 1.4577, "step": 18262 }, { "epoch": 0.8120581565959717, "grad_norm": 0.059527479112148285, "learning_rate": 9.242057726736469e-05, "loss": 1.4552, "step": 18264 }, { "epoch": 0.8121470810546441, "grad_norm": 0.058299604803323746, "learning_rate": 9.233591678305842e-05, "loss": 1.4522, "step": 18266 }, { "epoch": 0.8122360055133164, "grad_norm": 0.05863171070814133, "learning_rate": 9.225129114767445e-05, "loss": 1.4465, "step": 18268 }, { "epoch": 0.8123249299719888, "grad_norm": 0.05925234407186508, "learning_rate": 9.216670036844715e-05, "loss": 1.454, "step": 18270 }, { "epoch": 0.8124138544306612, "grad_norm": 0.05960720032453537, "learning_rate": 9.208214445260749e-05, "loss": 1.4497, "step": 18272 }, { "epoch": 0.8125027788893335, "grad_norm": 0.05938321352005005, "learning_rate": 9.199762340738365e-05, "loss": 1.4473, "step": 18274 }, { "epoch": 0.8125917033480059, "grad_norm": 0.058269064873456955, "learning_rate": 9.191313724000117e-05, "loss": 1.4525, "step": 18276 }, { "epoch": 0.8126806278066783, "grad_norm": 0.05836362764239311, "learning_rate": 9.182868595768196e-05, "loss": 1.4513, "step": 18278 }, { "epoch": 0.8127695522653506, "grad_norm": 0.058995332568883896, "learning_rate": 9.174426956764542e-05, "loss": 1.4535, "step": 18280 }, { "epoch": 0.8128584767240229, "grad_norm": 0.05960138887166977, "learning_rate": 9.165988807710801e-05, "loss": 1.4555, "step": 18282 }, { "epoch": 0.8129474011826953, "grad_norm": 0.06037072464823723, "learning_rate": 9.157554149328267e-05, "loss": 1.4468, "step": 18284 }, { "epoch": 0.8130363256413676, "grad_norm": 0.060436177998781204, "learning_rate": 9.149122982338009e-05, "loss": 1.452, "step": 18286 }, { "epoch": 0.81312525010004, "grad_norm": 0.058294836431741714, "learning_rate": 9.140695307460728e-05, "loss": 1.4551, "step": 18288 }, { "epoch": 0.8132141745587124, "grad_norm": 0.06048420071601868, "learning_rate": 9.132271125416874e-05, "loss": 1.4588, "step": 18290 }, { "epoch": 0.8133030990173847, "grad_norm": 0.060230907052755356, "learning_rate": 9.12385043692659e-05, "loss": 1.4543, "step": 18292 }, { "epoch": 0.8133920234760571, "grad_norm": 0.05797860398888588, "learning_rate": 9.115433242709697e-05, "loss": 1.4504, "step": 18294 }, { "epoch": 0.8134809479347295, "grad_norm": 0.05805959552526474, "learning_rate": 9.107019543485745e-05, "loss": 1.457, "step": 18296 }, { "epoch": 0.8135698723934018, "grad_norm": 0.05792992562055588, "learning_rate": 9.098609339973985e-05, "loss": 1.4444, "step": 18298 }, { "epoch": 0.8136587968520742, "grad_norm": 0.05891840532422066, "learning_rate": 9.09020263289333e-05, "loss": 1.4542, "step": 18300 }, { "epoch": 0.8137477213107466, "grad_norm": 0.05851350352168083, "learning_rate": 9.081799422962434e-05, "loss": 1.4516, "step": 18302 }, { "epoch": 0.8138366457694188, "grad_norm": 0.05911056697368622, "learning_rate": 9.073399710899661e-05, "loss": 1.4541, "step": 18304 }, { "epoch": 0.8139255702280912, "grad_norm": 0.05874814838171005, "learning_rate": 9.065003497423024e-05, "loss": 1.4522, "step": 18306 }, { "epoch": 0.8140144946867636, "grad_norm": 0.058131199330091476, "learning_rate": 9.056610783250286e-05, "loss": 1.4552, "step": 18308 }, { "epoch": 0.8141034191454359, "grad_norm": 0.058527685701847076, "learning_rate": 9.048221569098903e-05, "loss": 1.4523, "step": 18310 }, { "epoch": 0.8141923436041083, "grad_norm": 0.0587623156607151, "learning_rate": 9.03983585568599e-05, "loss": 1.4523, "step": 18312 }, { "epoch": 0.8142812680627807, "grad_norm": 0.05750168114900589, "learning_rate": 9.031453643728433e-05, "loss": 1.456, "step": 18314 }, { "epoch": 0.814370192521453, "grad_norm": 0.058666735887527466, "learning_rate": 9.023074933942743e-05, "loss": 1.4556, "step": 18316 }, { "epoch": 0.8144591169801254, "grad_norm": 0.05856854468584061, "learning_rate": 9.014699727045189e-05, "loss": 1.4595, "step": 18318 }, { "epoch": 0.8145480414387978, "grad_norm": 0.058630138635635376, "learning_rate": 9.006328023751736e-05, "loss": 1.452, "step": 18320 }, { "epoch": 0.8146369658974701, "grad_norm": 0.05786571651697159, "learning_rate": 8.997959824777996e-05, "loss": 1.4531, "step": 18322 }, { "epoch": 0.8147258903561424, "grad_norm": 0.0589289627969265, "learning_rate": 8.989595130839345e-05, "loss": 1.4475, "step": 18324 }, { "epoch": 0.8148148148148148, "grad_norm": 0.05925757810473442, "learning_rate": 8.981233942650841e-05, "loss": 1.4559, "step": 18326 }, { "epoch": 0.8149037392734871, "grad_norm": 0.05926579236984253, "learning_rate": 8.97287626092721e-05, "loss": 1.4516, "step": 18328 }, { "epoch": 0.8149926637321595, "grad_norm": 0.05869268253445625, "learning_rate": 8.96452208638292e-05, "loss": 1.4525, "step": 18330 }, { "epoch": 0.8150815881908319, "grad_norm": 0.05770133063197136, "learning_rate": 8.956171419732118e-05, "loss": 1.4493, "step": 18332 }, { "epoch": 0.8151705126495042, "grad_norm": 0.05826232582330704, "learning_rate": 8.947824261688669e-05, "loss": 1.4566, "step": 18334 }, { "epoch": 0.8152594371081766, "grad_norm": 0.05967103689908981, "learning_rate": 8.939480612966112e-05, "loss": 1.4492, "step": 18336 }, { "epoch": 0.815348361566849, "grad_norm": 0.05853937938809395, "learning_rate": 8.931140474277677e-05, "loss": 1.4557, "step": 18338 }, { "epoch": 0.8154372860255213, "grad_norm": 0.05865015089511871, "learning_rate": 8.922803846336363e-05, "loss": 1.4566, "step": 18340 }, { "epoch": 0.8155262104841937, "grad_norm": 0.0575866624712944, "learning_rate": 8.914470729854802e-05, "loss": 1.4522, "step": 18342 }, { "epoch": 0.8156151349428661, "grad_norm": 0.05788535624742508, "learning_rate": 8.90614112554532e-05, "loss": 1.4507, "step": 18344 }, { "epoch": 0.8157040594015383, "grad_norm": 0.058020517230033875, "learning_rate": 8.897815034119994e-05, "loss": 1.4498, "step": 18346 }, { "epoch": 0.8157929838602107, "grad_norm": 0.061876051127910614, "learning_rate": 8.889492456290571e-05, "loss": 1.4573, "step": 18348 }, { "epoch": 0.8158819083188831, "grad_norm": 0.05803605914115906, "learning_rate": 8.88117339276851e-05, "loss": 1.4507, "step": 18350 }, { "epoch": 0.8159708327775554, "grad_norm": 0.058719929307699203, "learning_rate": 8.872857844264942e-05, "loss": 1.4549, "step": 18352 }, { "epoch": 0.8160597572362278, "grad_norm": 0.0580810084939003, "learning_rate": 8.864545811490731e-05, "loss": 1.4587, "step": 18354 }, { "epoch": 0.8161486816949002, "grad_norm": 0.059138279408216476, "learning_rate": 8.856237295156427e-05, "loss": 1.4546, "step": 18356 }, { "epoch": 0.8162376061535725, "grad_norm": 0.05815105512738228, "learning_rate": 8.847932295972277e-05, "loss": 1.4511, "step": 18358 }, { "epoch": 0.8163265306122449, "grad_norm": 0.05784906446933746, "learning_rate": 8.839630814648203e-05, "loss": 1.4503, "step": 18360 }, { "epoch": 0.8164154550709173, "grad_norm": 0.06052152067422867, "learning_rate": 8.831332851893897e-05, "loss": 1.4514, "step": 18362 }, { "epoch": 0.8165043795295897, "grad_norm": 0.058384090662002563, "learning_rate": 8.823038408418671e-05, "loss": 1.4554, "step": 18364 }, { "epoch": 0.816593303988262, "grad_norm": 0.059240229427814484, "learning_rate": 8.814747484931595e-05, "loss": 1.4546, "step": 18366 }, { "epoch": 0.8166822284469343, "grad_norm": 0.059385642409324646, "learning_rate": 8.806460082141393e-05, "loss": 1.4566, "step": 18368 }, { "epoch": 0.8167711529056066, "grad_norm": 0.05777799338102341, "learning_rate": 8.798176200756513e-05, "loss": 1.453, "step": 18370 }, { "epoch": 0.816860077364279, "grad_norm": 0.05718129873275757, "learning_rate": 8.789895841485118e-05, "loss": 1.4475, "step": 18372 }, { "epoch": 0.8169490018229514, "grad_norm": 0.05819839611649513, "learning_rate": 8.781619005035019e-05, "loss": 1.4509, "step": 18374 }, { "epoch": 0.8170379262816237, "grad_norm": 0.05856141820549965, "learning_rate": 8.773345692113771e-05, "loss": 1.4562, "step": 18376 }, { "epoch": 0.8171268507402961, "grad_norm": 0.05731408670544624, "learning_rate": 8.76507590342862e-05, "loss": 1.4501, "step": 18378 }, { "epoch": 0.8172157751989685, "grad_norm": 0.05817363038659096, "learning_rate": 8.756809639686492e-05, "loss": 1.4516, "step": 18380 }, { "epoch": 0.8173046996576409, "grad_norm": 0.05989028140902519, "learning_rate": 8.748546901594028e-05, "loss": 1.4541, "step": 18382 }, { "epoch": 0.8173936241163132, "grad_norm": 0.058697037398815155, "learning_rate": 8.740287689857574e-05, "loss": 1.4507, "step": 18384 }, { "epoch": 0.8174825485749856, "grad_norm": 0.05862530320882797, "learning_rate": 8.732032005183144e-05, "loss": 1.4558, "step": 18386 }, { "epoch": 0.817571473033658, "grad_norm": 0.058638229966163635, "learning_rate": 8.72377984827648e-05, "loss": 1.4499, "step": 18388 }, { "epoch": 0.8176603974923302, "grad_norm": 0.05860773101449013, "learning_rate": 8.715531219843026e-05, "loss": 1.4585, "step": 18390 }, { "epoch": 0.8177493219510026, "grad_norm": 0.05907618626952171, "learning_rate": 8.707286120587882e-05, "loss": 1.4575, "step": 18392 }, { "epoch": 0.817838246409675, "grad_norm": 0.058161091059446335, "learning_rate": 8.699044551215907e-05, "loss": 1.4474, "step": 18394 }, { "epoch": 0.8179271708683473, "grad_norm": 0.05811753496527672, "learning_rate": 8.690806512431598e-05, "loss": 1.4469, "step": 18396 }, { "epoch": 0.8180160953270197, "grad_norm": 0.058913908898830414, "learning_rate": 8.682572004939188e-05, "loss": 1.4574, "step": 18398 }, { "epoch": 0.8181050197856921, "grad_norm": 0.0583617277443409, "learning_rate": 8.674341029442623e-05, "loss": 1.4558, "step": 18400 }, { "epoch": 0.8181939442443644, "grad_norm": 0.05864092707633972, "learning_rate": 8.666113586645485e-05, "loss": 1.4534, "step": 18402 }, { "epoch": 0.8182828687030368, "grad_norm": 0.057395271956920624, "learning_rate": 8.657889677251113e-05, "loss": 1.4517, "step": 18404 }, { "epoch": 0.8183717931617092, "grad_norm": 0.057702574878931046, "learning_rate": 8.649669301962532e-05, "loss": 1.46, "step": 18406 }, { "epoch": 0.8184607176203815, "grad_norm": 0.05797472596168518, "learning_rate": 8.64145246148243e-05, "loss": 1.4515, "step": 18408 }, { "epoch": 0.8185496420790539, "grad_norm": 0.058402277529239655, "learning_rate": 8.633239156513234e-05, "loss": 1.4523, "step": 18410 }, { "epoch": 0.8186385665377262, "grad_norm": 0.057839684188365936, "learning_rate": 8.625029387757066e-05, "loss": 1.4467, "step": 18412 }, { "epoch": 0.8187274909963985, "grad_norm": 0.0590512678027153, "learning_rate": 8.616823155915704e-05, "loss": 1.451, "step": 18414 }, { "epoch": 0.8188164154550709, "grad_norm": 0.05765726789832115, "learning_rate": 8.608620461690681e-05, "loss": 1.4472, "step": 18416 }, { "epoch": 0.8189053399137433, "grad_norm": 0.058935705572366714, "learning_rate": 8.600421305783168e-05, "loss": 1.4459, "step": 18418 }, { "epoch": 0.8189942643724156, "grad_norm": 0.05756261944770813, "learning_rate": 8.592225688894101e-05, "loss": 1.4481, "step": 18420 }, { "epoch": 0.819083188831088, "grad_norm": 0.05783841758966446, "learning_rate": 8.584033611724063e-05, "loss": 1.4493, "step": 18422 }, { "epoch": 0.8191721132897604, "grad_norm": 0.05876602604985237, "learning_rate": 8.575845074973337e-05, "loss": 1.4499, "step": 18424 }, { "epoch": 0.8192610377484327, "grad_norm": 0.05919130891561508, "learning_rate": 8.567660079341922e-05, "loss": 1.4478, "step": 18426 }, { "epoch": 0.8193499622071051, "grad_norm": 0.058414239436388016, "learning_rate": 8.559478625529521e-05, "loss": 1.4534, "step": 18428 }, { "epoch": 0.8194388866657775, "grad_norm": 0.05952535197138786, "learning_rate": 8.551300714235494e-05, "loss": 1.4541, "step": 18430 }, { "epoch": 0.8195278111244498, "grad_norm": 0.05861512944102287, "learning_rate": 8.543126346158947e-05, "loss": 1.4536, "step": 18432 }, { "epoch": 0.8196167355831221, "grad_norm": 0.059502918273210526, "learning_rate": 8.534955521998666e-05, "loss": 1.4562, "step": 18434 }, { "epoch": 0.8197056600417945, "grad_norm": 0.06032497435808182, "learning_rate": 8.526788242453098e-05, "loss": 1.4523, "step": 18436 }, { "epoch": 0.8197945845004668, "grad_norm": 0.05803174898028374, "learning_rate": 8.518624508220446e-05, "loss": 1.4503, "step": 18438 }, { "epoch": 0.8198835089591392, "grad_norm": 0.058706771582365036, "learning_rate": 8.510464319998568e-05, "loss": 1.4481, "step": 18440 }, { "epoch": 0.8199724334178116, "grad_norm": 0.059511151164770126, "learning_rate": 8.502307678485055e-05, "loss": 1.4465, "step": 18442 }, { "epoch": 0.8200613578764839, "grad_norm": 0.05901079997420311, "learning_rate": 8.494154584377156e-05, "loss": 1.4479, "step": 18444 }, { "epoch": 0.8201502823351563, "grad_norm": 0.05895798280835152, "learning_rate": 8.486005038371803e-05, "loss": 1.4492, "step": 18446 }, { "epoch": 0.8202392067938287, "grad_norm": 0.05760321766138077, "learning_rate": 8.477859041165714e-05, "loss": 1.4478, "step": 18448 }, { "epoch": 0.820328131252501, "grad_norm": 0.05806753411889076, "learning_rate": 8.469716593455218e-05, "loss": 1.4519, "step": 18450 }, { "epoch": 0.8204170557111734, "grad_norm": 0.06014678627252579, "learning_rate": 8.461577695936351e-05, "loss": 1.455, "step": 18452 }, { "epoch": 0.8205059801698457, "grad_norm": 0.05981629341840744, "learning_rate": 8.453442349304879e-05, "loss": 1.4504, "step": 18454 }, { "epoch": 0.820594904628518, "grad_norm": 0.05972183868288994, "learning_rate": 8.445310554256241e-05, "loss": 1.4487, "step": 18456 }, { "epoch": 0.8206838290871904, "grad_norm": 0.05913654714822769, "learning_rate": 8.437182311485597e-05, "loss": 1.4543, "step": 18458 }, { "epoch": 0.8207727535458628, "grad_norm": 0.05821114405989647, "learning_rate": 8.429057621687758e-05, "loss": 1.4494, "step": 18460 }, { "epoch": 0.8208616780045351, "grad_norm": 0.05849185958504677, "learning_rate": 8.420936485557268e-05, "loss": 1.45, "step": 18462 }, { "epoch": 0.8209506024632075, "grad_norm": 0.06026323139667511, "learning_rate": 8.412818903788377e-05, "loss": 1.4534, "step": 18464 }, { "epoch": 0.8210395269218799, "grad_norm": 0.058231696486473083, "learning_rate": 8.404704877074992e-05, "loss": 1.4521, "step": 18466 }, { "epoch": 0.8211284513805522, "grad_norm": 0.05949164554476738, "learning_rate": 8.396594406110713e-05, "loss": 1.4514, "step": 18468 }, { "epoch": 0.8212173758392246, "grad_norm": 0.05902651697397232, "learning_rate": 8.388487491588914e-05, "loss": 1.4489, "step": 18470 }, { "epoch": 0.821306300297897, "grad_norm": 0.05884255841374397, "learning_rate": 8.380384134202563e-05, "loss": 1.4476, "step": 18472 }, { "epoch": 0.8213952247565693, "grad_norm": 0.059297338128089905, "learning_rate": 8.372284334644404e-05, "loss": 1.4528, "step": 18474 }, { "epoch": 0.8214841492152416, "grad_norm": 0.05986732244491577, "learning_rate": 8.364188093606812e-05, "loss": 1.4511, "step": 18476 }, { "epoch": 0.821573073673914, "grad_norm": 0.06006040424108505, "learning_rate": 8.356095411781906e-05, "loss": 1.4573, "step": 18478 }, { "epoch": 0.8216619981325863, "grad_norm": 0.05927327647805214, "learning_rate": 8.348006289861493e-05, "loss": 1.4549, "step": 18480 }, { "epoch": 0.8217509225912587, "grad_norm": 0.05835961177945137, "learning_rate": 8.339920728537049e-05, "loss": 1.4518, "step": 18482 }, { "epoch": 0.8218398470499311, "grad_norm": 0.05824832245707512, "learning_rate": 8.331838728499768e-05, "loss": 1.4575, "step": 18484 }, { "epoch": 0.8219287715086034, "grad_norm": 0.05883040279150009, "learning_rate": 8.323760290440552e-05, "loss": 1.4548, "step": 18486 }, { "epoch": 0.8220176959672758, "grad_norm": 0.058843452483415604, "learning_rate": 8.31568541504995e-05, "loss": 1.4494, "step": 18488 }, { "epoch": 0.8221066204259482, "grad_norm": 0.05943866819143295, "learning_rate": 8.307614103018262e-05, "loss": 1.4532, "step": 18490 }, { "epoch": 0.8221955448846205, "grad_norm": 0.05836620554327965, "learning_rate": 8.299546355035465e-05, "loss": 1.4517, "step": 18492 }, { "epoch": 0.8222844693432929, "grad_norm": 0.05824948847293854, "learning_rate": 8.291482171791198e-05, "loss": 1.4466, "step": 18494 }, { "epoch": 0.8223733938019653, "grad_norm": 0.059235066175460815, "learning_rate": 8.28342155397484e-05, "loss": 1.4545, "step": 18496 }, { "epoch": 0.8224623182606375, "grad_norm": 0.058670178055763245, "learning_rate": 8.275364502275457e-05, "loss": 1.4537, "step": 18498 }, { "epoch": 0.8225512427193099, "grad_norm": 0.060267142951488495, "learning_rate": 8.26731101738178e-05, "loss": 1.4537, "step": 18500 }, { "epoch": 0.8225512427193099, "eval_loss": 1.4376198053359985, "eval_runtime": 12.4249, "eval_samples_per_second": 556.143, "eval_steps_per_second": 69.538, "step": 18500 }, { "epoch": 0.8226401671779823, "grad_norm": 0.058397773653268814, "learning_rate": 8.25926109998228e-05, "loss": 1.4565, "step": 18502 }, { "epoch": 0.8227290916366546, "grad_norm": 0.058602750301361084, "learning_rate": 8.251214750765073e-05, "loss": 1.4461, "step": 18504 }, { "epoch": 0.822818016095327, "grad_norm": 0.058638736605644226, "learning_rate": 8.243171970418012e-05, "loss": 1.4507, "step": 18506 }, { "epoch": 0.8229069405539994, "grad_norm": 0.05909786745905876, "learning_rate": 8.235132759628639e-05, "loss": 1.4492, "step": 18508 }, { "epoch": 0.8229958650126717, "grad_norm": 0.05795243754982948, "learning_rate": 8.22709711908416e-05, "loss": 1.451, "step": 18510 }, { "epoch": 0.8230847894713441, "grad_norm": 0.05899320915341377, "learning_rate": 8.219065049471503e-05, "loss": 1.4544, "step": 18512 }, { "epoch": 0.8231737139300165, "grad_norm": 0.05861734598875046, "learning_rate": 8.211036551477296e-05, "loss": 1.4559, "step": 18514 }, { "epoch": 0.8232626383886888, "grad_norm": 0.057675547897815704, "learning_rate": 8.203011625787838e-05, "loss": 1.4515, "step": 18516 }, { "epoch": 0.8233515628473612, "grad_norm": 0.05739520117640495, "learning_rate": 8.194990273089137e-05, "loss": 1.4536, "step": 18518 }, { "epoch": 0.8234404873060335, "grad_norm": 0.057842835783958435, "learning_rate": 8.186972494066907e-05, "loss": 1.4515, "step": 18520 }, { "epoch": 0.8235294117647058, "grad_norm": 0.05861688032746315, "learning_rate": 8.178958289406518e-05, "loss": 1.4518, "step": 18522 }, { "epoch": 0.8236183362233782, "grad_norm": 0.059118542820215225, "learning_rate": 8.170947659793093e-05, "loss": 1.4575, "step": 18524 }, { "epoch": 0.8237072606820506, "grad_norm": 0.057545505464076996, "learning_rate": 8.162940605911368e-05, "loss": 1.4463, "step": 18526 }, { "epoch": 0.823796185140723, "grad_norm": 0.05849096179008484, "learning_rate": 8.154937128445872e-05, "loss": 1.453, "step": 18528 }, { "epoch": 0.8238851095993953, "grad_norm": 0.05910954251885414, "learning_rate": 8.146937228080758e-05, "loss": 1.4583, "step": 18530 }, { "epoch": 0.8239740340580677, "grad_norm": 0.057638462632894516, "learning_rate": 8.13894090549988e-05, "loss": 1.4574, "step": 18532 }, { "epoch": 0.82406295851674, "grad_norm": 0.05978331342339516, "learning_rate": 8.130948161386808e-05, "loss": 1.4462, "step": 18534 }, { "epoch": 0.8241518829754124, "grad_norm": 0.058321740478277206, "learning_rate": 8.122958996424806e-05, "loss": 1.4468, "step": 18536 }, { "epoch": 0.8242408074340848, "grad_norm": 0.05788661167025566, "learning_rate": 8.11497341129681e-05, "loss": 1.4529, "step": 18538 }, { "epoch": 0.8243297318927572, "grad_norm": 0.0584048330783844, "learning_rate": 8.106991406685466e-05, "loss": 1.4579, "step": 18540 }, { "epoch": 0.8244186563514294, "grad_norm": 0.0584276020526886, "learning_rate": 8.099012983273107e-05, "loss": 1.4532, "step": 18542 }, { "epoch": 0.8245075808101018, "grad_norm": 0.059375640004873276, "learning_rate": 8.091038141741791e-05, "loss": 1.4545, "step": 18544 }, { "epoch": 0.8245965052687741, "grad_norm": 0.05816349759697914, "learning_rate": 8.083066882773205e-05, "loss": 1.4539, "step": 18546 }, { "epoch": 0.8246854297274465, "grad_norm": 0.06018855422735214, "learning_rate": 8.075099207048781e-05, "loss": 1.451, "step": 18548 }, { "epoch": 0.8247743541861189, "grad_norm": 0.05824248492717743, "learning_rate": 8.067135115249646e-05, "loss": 1.4551, "step": 18550 }, { "epoch": 0.8248632786447913, "grad_norm": 0.057999785989522934, "learning_rate": 8.059174608056597e-05, "loss": 1.4548, "step": 18552 }, { "epoch": 0.8249522031034636, "grad_norm": 0.058654844760894775, "learning_rate": 8.051217686150109e-05, "loss": 1.4579, "step": 18554 }, { "epoch": 0.825041127562136, "grad_norm": 0.059156231582164764, "learning_rate": 8.04326435021041e-05, "loss": 1.4535, "step": 18556 }, { "epoch": 0.8251300520208084, "grad_norm": 0.058144066482782364, "learning_rate": 8.035314600917365e-05, "loss": 1.4522, "step": 18558 }, { "epoch": 0.8252189764794807, "grad_norm": 0.057454727590084076, "learning_rate": 8.027368438950577e-05, "loss": 1.4521, "step": 18560 }, { "epoch": 0.8253079009381531, "grad_norm": 0.05804436281323433, "learning_rate": 8.019425864989283e-05, "loss": 1.4533, "step": 18562 }, { "epoch": 0.8253968253968254, "grad_norm": 0.0597645603120327, "learning_rate": 8.011486879712471e-05, "loss": 1.4472, "step": 18564 }, { "epoch": 0.8254857498554977, "grad_norm": 0.05968466028571129, "learning_rate": 8.00355148379881e-05, "loss": 1.4561, "step": 18566 }, { "epoch": 0.8255746743141701, "grad_norm": 0.05988946184515953, "learning_rate": 7.99561967792663e-05, "loss": 1.4529, "step": 18568 }, { "epoch": 0.8256635987728425, "grad_norm": 0.05921318754553795, "learning_rate": 7.987691462773983e-05, "loss": 1.4514, "step": 18570 }, { "epoch": 0.8257525232315148, "grad_norm": 0.05899718776345253, "learning_rate": 7.979766839018627e-05, "loss": 1.4482, "step": 18572 }, { "epoch": 0.8258414476901872, "grad_norm": 0.058463871479034424, "learning_rate": 7.971845807337979e-05, "loss": 1.4549, "step": 18574 }, { "epoch": 0.8259303721488596, "grad_norm": 0.059063419699668884, "learning_rate": 7.96392836840914e-05, "loss": 1.4508, "step": 18576 }, { "epoch": 0.8260192966075319, "grad_norm": 0.05859994888305664, "learning_rate": 7.956014522908972e-05, "loss": 1.4503, "step": 18578 }, { "epoch": 0.8261082210662043, "grad_norm": 0.05792148783802986, "learning_rate": 7.948104271513956e-05, "loss": 1.4562, "step": 18580 }, { "epoch": 0.8261971455248767, "grad_norm": 0.05839909240603447, "learning_rate": 7.940197614900314e-05, "loss": 1.4477, "step": 18582 }, { "epoch": 0.8262860699835489, "grad_norm": 0.05920722335577011, "learning_rate": 7.932294553743918e-05, "loss": 1.4532, "step": 18584 }, { "epoch": 0.8263749944422213, "grad_norm": 0.057403240352869034, "learning_rate": 7.924395088720371e-05, "loss": 1.4469, "step": 18586 }, { "epoch": 0.8264639189008937, "grad_norm": 0.058875951915979385, "learning_rate": 7.916499220504964e-05, "loss": 1.4541, "step": 18588 }, { "epoch": 0.826552843359566, "grad_norm": 0.05903790891170502, "learning_rate": 7.908606949772646e-05, "loss": 1.4532, "step": 18590 }, { "epoch": 0.8266417678182384, "grad_norm": 0.058501675724983215, "learning_rate": 7.900718277198099e-05, "loss": 1.4579, "step": 18592 }, { "epoch": 0.8267306922769108, "grad_norm": 0.06078590452671051, "learning_rate": 7.892833203455695e-05, "loss": 1.453, "step": 18594 }, { "epoch": 0.8268196167355831, "grad_norm": 0.05794868990778923, "learning_rate": 7.884951729219453e-05, "loss": 1.4482, "step": 18596 }, { "epoch": 0.8269085411942555, "grad_norm": 0.05853716656565666, "learning_rate": 7.87707385516313e-05, "loss": 1.454, "step": 18598 }, { "epoch": 0.8269974656529279, "grad_norm": 0.0581338033080101, "learning_rate": 7.86919958196018e-05, "loss": 1.455, "step": 18600 }, { "epoch": 0.8270863901116002, "grad_norm": 0.06062675267457962, "learning_rate": 7.8613289102837e-05, "loss": 1.4577, "step": 18602 }, { "epoch": 0.8271753145702726, "grad_norm": 0.05953441187739372, "learning_rate": 7.853461840806525e-05, "loss": 1.4478, "step": 18604 }, { "epoch": 0.8272642390289449, "grad_norm": 0.05867122486233711, "learning_rate": 7.845598374201174e-05, "loss": 1.4472, "step": 18606 }, { "epoch": 0.8273531634876172, "grad_norm": 0.05937234312295914, "learning_rate": 7.837738511139836e-05, "loss": 1.4522, "step": 18608 }, { "epoch": 0.8274420879462896, "grad_norm": 0.05736441910266876, "learning_rate": 7.829882252294423e-05, "loss": 1.4549, "step": 18610 }, { "epoch": 0.827531012404962, "grad_norm": 0.059804897755384445, "learning_rate": 7.8220295983365e-05, "loss": 1.4554, "step": 18612 }, { "epoch": 0.8276199368636343, "grad_norm": 0.058750297874212265, "learning_rate": 7.81418054993736e-05, "loss": 1.4478, "step": 18614 }, { "epoch": 0.8277088613223067, "grad_norm": 0.05871342122554779, "learning_rate": 7.806335107767981e-05, "loss": 1.446, "step": 18616 }, { "epoch": 0.8277977857809791, "grad_norm": 0.05835486575961113, "learning_rate": 7.798493272499013e-05, "loss": 1.444, "step": 18618 }, { "epoch": 0.8278867102396514, "grad_norm": 0.058702003210783005, "learning_rate": 7.790655044800815e-05, "loss": 1.4525, "step": 18620 }, { "epoch": 0.8279756346983238, "grad_norm": 0.058793142437934875, "learning_rate": 7.78282042534344e-05, "loss": 1.4532, "step": 18622 }, { "epoch": 0.8280645591569962, "grad_norm": 0.0574706606566906, "learning_rate": 7.774989414796612e-05, "loss": 1.4462, "step": 18624 }, { "epoch": 0.8281534836156685, "grad_norm": 0.0592108853161335, "learning_rate": 7.767162013829771e-05, "loss": 1.4489, "step": 18626 }, { "epoch": 0.8282424080743408, "grad_norm": 0.05900189280509949, "learning_rate": 7.759338223112044e-05, "loss": 1.4521, "step": 18628 }, { "epoch": 0.8283313325330132, "grad_norm": 0.058755598962306976, "learning_rate": 7.751518043312228e-05, "loss": 1.4563, "step": 18630 }, { "epoch": 0.8284202569916855, "grad_norm": 0.05981973186135292, "learning_rate": 7.743701475098835e-05, "loss": 1.4524, "step": 18632 }, { "epoch": 0.8285091814503579, "grad_norm": 0.058422692120075226, "learning_rate": 7.735888519140044e-05, "loss": 1.4499, "step": 18634 }, { "epoch": 0.8285981059090303, "grad_norm": 0.058538686484098434, "learning_rate": 7.728079176103769e-05, "loss": 1.4498, "step": 18636 }, { "epoch": 0.8286870303677026, "grad_norm": 0.05935661122202873, "learning_rate": 7.720273446657577e-05, "loss": 1.4577, "step": 18638 }, { "epoch": 0.828775954826375, "grad_norm": 0.0582767128944397, "learning_rate": 7.712471331468717e-05, "loss": 1.4507, "step": 18640 }, { "epoch": 0.8288648792850474, "grad_norm": 0.05873246118426323, "learning_rate": 7.704672831204168e-05, "loss": 1.458, "step": 18642 }, { "epoch": 0.8289538037437197, "grad_norm": 0.05824227258563042, "learning_rate": 7.696877946530584e-05, "loss": 1.4528, "step": 18644 }, { "epoch": 0.8290427282023921, "grad_norm": 0.05939396098256111, "learning_rate": 7.689086678114282e-05, "loss": 1.4556, "step": 18646 }, { "epoch": 0.8291316526610645, "grad_norm": 0.05786508694291115, "learning_rate": 7.681299026621307e-05, "loss": 1.4546, "step": 18648 }, { "epoch": 0.8292205771197367, "grad_norm": 0.059782709926366806, "learning_rate": 7.67351499271739e-05, "loss": 1.4482, "step": 18650 }, { "epoch": 0.8293095015784091, "grad_norm": 0.0591367743909359, "learning_rate": 7.665734577067951e-05, "loss": 1.4528, "step": 18652 }, { "epoch": 0.8293984260370815, "grad_norm": 0.05903034284710884, "learning_rate": 7.657957780338076e-05, "loss": 1.4533, "step": 18654 }, { "epoch": 0.8294873504957538, "grad_norm": 0.05796166509389877, "learning_rate": 7.650184603192544e-05, "loss": 1.4451, "step": 18656 }, { "epoch": 0.8295762749544262, "grad_norm": 0.05834323912858963, "learning_rate": 7.642415046295886e-05, "loss": 1.455, "step": 18658 }, { "epoch": 0.8296651994130986, "grad_norm": 0.05898818001151085, "learning_rate": 7.634649110312253e-05, "loss": 1.4545, "step": 18660 }, { "epoch": 0.8297541238717709, "grad_norm": 0.059623461216688156, "learning_rate": 7.626886795905497e-05, "loss": 1.4535, "step": 18662 }, { "epoch": 0.8298430483304433, "grad_norm": 0.05911053344607353, "learning_rate": 7.619128103739193e-05, "loss": 1.4496, "step": 18664 }, { "epoch": 0.8299319727891157, "grad_norm": 0.05885409191250801, "learning_rate": 7.611373034476587e-05, "loss": 1.452, "step": 18666 }, { "epoch": 0.830020897247788, "grad_norm": 0.05906490609049797, "learning_rate": 7.603621588780624e-05, "loss": 1.4513, "step": 18668 }, { "epoch": 0.8301098217064604, "grad_norm": 0.05751986429095268, "learning_rate": 7.59587376731391e-05, "loss": 1.4457, "step": 18670 }, { "epoch": 0.8301987461651327, "grad_norm": 0.05804996192455292, "learning_rate": 7.588129570738778e-05, "loss": 1.4479, "step": 18672 }, { "epoch": 0.830287670623805, "grad_norm": 0.060301125049591064, "learning_rate": 7.580388999717241e-05, "loss": 1.4481, "step": 18674 }, { "epoch": 0.8303765950824774, "grad_norm": 0.05793362483382225, "learning_rate": 7.572652054910984e-05, "loss": 1.4502, "step": 18676 }, { "epoch": 0.8304655195411498, "grad_norm": 0.059740450233221054, "learning_rate": 7.564918736981397e-05, "loss": 1.4541, "step": 18678 }, { "epoch": 0.8305544439998221, "grad_norm": 0.05822866037487984, "learning_rate": 7.557189046589575e-05, "loss": 1.4532, "step": 18680 }, { "epoch": 0.8306433684584945, "grad_norm": 0.05787384882569313, "learning_rate": 7.549462984396255e-05, "loss": 1.4513, "step": 18682 }, { "epoch": 0.8307322929171669, "grad_norm": 0.06009489670395851, "learning_rate": 7.541740551061916e-05, "loss": 1.453, "step": 18684 }, { "epoch": 0.8308212173758392, "grad_norm": 0.0584663450717926, "learning_rate": 7.534021747246717e-05, "loss": 1.4485, "step": 18686 }, { "epoch": 0.8309101418345116, "grad_norm": 0.05907716229557991, "learning_rate": 7.526306573610464e-05, "loss": 1.4501, "step": 18688 }, { "epoch": 0.830999066293184, "grad_norm": 0.058759596198797226, "learning_rate": 7.518595030812713e-05, "loss": 1.4551, "step": 18690 }, { "epoch": 0.8310879907518562, "grad_norm": 0.05805884301662445, "learning_rate": 7.51088711951266e-05, "loss": 1.4562, "step": 18692 }, { "epoch": 0.8311769152105286, "grad_norm": 0.05766487121582031, "learning_rate": 7.503182840369221e-05, "loss": 1.4534, "step": 18694 }, { "epoch": 0.831265839669201, "grad_norm": 0.05738608166575432, "learning_rate": 7.495482194040998e-05, "loss": 1.447, "step": 18696 }, { "epoch": 0.8313547641278733, "grad_norm": 0.058979667723178864, "learning_rate": 7.487785181186258e-05, "loss": 1.447, "step": 18698 }, { "epoch": 0.8314436885865457, "grad_norm": 0.05860074982047081, "learning_rate": 7.480091802462985e-05, "loss": 1.454, "step": 18700 }, { "epoch": 0.8315326130452181, "grad_norm": 0.05817517265677452, "learning_rate": 7.472402058528854e-05, "loss": 1.4527, "step": 18702 }, { "epoch": 0.8316215375038905, "grad_norm": 0.05893828719854355, "learning_rate": 7.464715950041202e-05, "loss": 1.4522, "step": 18704 }, { "epoch": 0.8317104619625628, "grad_norm": 0.058481235057115555, "learning_rate": 7.457033477657071e-05, "loss": 1.4537, "step": 18706 }, { "epoch": 0.8317993864212352, "grad_norm": 0.059741776436567307, "learning_rate": 7.449354642033212e-05, "loss": 1.455, "step": 18708 }, { "epoch": 0.8318883108799076, "grad_norm": 0.059183038771152496, "learning_rate": 7.441679443826021e-05, "loss": 1.454, "step": 18710 }, { "epoch": 0.8319772353385799, "grad_norm": 0.058914851397275925, "learning_rate": 7.434007883691635e-05, "loss": 1.4553, "step": 18712 }, { "epoch": 0.8320661597972522, "grad_norm": 0.05881735682487488, "learning_rate": 7.426339962285822e-05, "loss": 1.4568, "step": 18714 }, { "epoch": 0.8321550842559245, "grad_norm": 0.05866105481982231, "learning_rate": 7.418675680264087e-05, "loss": 1.4504, "step": 18716 }, { "epoch": 0.8322440087145969, "grad_norm": 0.06024124473333359, "learning_rate": 7.411015038281615e-05, "loss": 1.4505, "step": 18718 }, { "epoch": 0.8323329331732693, "grad_norm": 0.06013770401477814, "learning_rate": 7.403358036993252e-05, "loss": 1.4568, "step": 18720 }, { "epoch": 0.8324218576319417, "grad_norm": 0.058068759739398956, "learning_rate": 7.39570467705356e-05, "loss": 1.4542, "step": 18722 }, { "epoch": 0.832510782090614, "grad_norm": 0.05928467959165573, "learning_rate": 7.388054959116796e-05, "loss": 1.4483, "step": 18724 }, { "epoch": 0.8325997065492864, "grad_norm": 0.05854354798793793, "learning_rate": 7.380408883836864e-05, "loss": 1.4482, "step": 18726 }, { "epoch": 0.8326886310079588, "grad_norm": 0.05878078192472458, "learning_rate": 7.372766451867402e-05, "loss": 1.4453, "step": 18728 }, { "epoch": 0.8327775554666311, "grad_norm": 0.06043083965778351, "learning_rate": 7.365127663861726e-05, "loss": 1.4513, "step": 18730 }, { "epoch": 0.8328664799253035, "grad_norm": 0.05829674378037453, "learning_rate": 7.357492520472814e-05, "loss": 1.4538, "step": 18732 }, { "epoch": 0.8329554043839759, "grad_norm": 0.057941779494285583, "learning_rate": 7.349861022353355e-05, "loss": 1.4487, "step": 18734 }, { "epoch": 0.8330443288426481, "grad_norm": 0.05707291513681412, "learning_rate": 7.342233170155732e-05, "loss": 1.4506, "step": 18736 }, { "epoch": 0.8331332533013205, "grad_norm": 0.058747537434101105, "learning_rate": 7.334608964532019e-05, "loss": 1.4549, "step": 18738 }, { "epoch": 0.8332221777599929, "grad_norm": 0.05869237333536148, "learning_rate": 7.32698840613395e-05, "loss": 1.4558, "step": 18740 }, { "epoch": 0.8333111022186652, "grad_norm": 0.05862243101000786, "learning_rate": 7.319371495612942e-05, "loss": 1.4534, "step": 18742 }, { "epoch": 0.8334000266773376, "grad_norm": 0.059139467775821686, "learning_rate": 7.311758233620169e-05, "loss": 1.4485, "step": 18744 }, { "epoch": 0.83348895113601, "grad_norm": 0.05852329730987549, "learning_rate": 7.304148620806423e-05, "loss": 1.4489, "step": 18746 }, { "epoch": 0.8335778755946823, "grad_norm": 0.057609908282756805, "learning_rate": 7.296542657822197e-05, "loss": 1.4519, "step": 18748 }, { "epoch": 0.8336668000533547, "grad_norm": 0.059576258063316345, "learning_rate": 7.28894034531769e-05, "loss": 1.4488, "step": 18750 }, { "epoch": 0.8337557245120271, "grad_norm": 0.05912381783127785, "learning_rate": 7.281341683942799e-05, "loss": 1.4512, "step": 18752 }, { "epoch": 0.8338446489706994, "grad_norm": 0.05865836516022682, "learning_rate": 7.273746674347059e-05, "loss": 1.4459, "step": 18754 }, { "epoch": 0.8339335734293718, "grad_norm": 0.057585492730140686, "learning_rate": 7.266155317179746e-05, "loss": 1.4505, "step": 18756 }, { "epoch": 0.8340224978880441, "grad_norm": 0.057790596038103104, "learning_rate": 7.2585676130898e-05, "loss": 1.451, "step": 18758 }, { "epoch": 0.8341114223467164, "grad_norm": 0.06023659557104111, "learning_rate": 7.25098356272586e-05, "loss": 1.4554, "step": 18760 }, { "epoch": 0.8342003468053888, "grad_norm": 0.05900289863348007, "learning_rate": 7.243403166736229e-05, "loss": 1.4525, "step": 18762 }, { "epoch": 0.8342892712640612, "grad_norm": 0.059401899576187134, "learning_rate": 7.235826425768899e-05, "loss": 1.4531, "step": 18764 }, { "epoch": 0.8343781957227335, "grad_norm": 0.059451449662446976, "learning_rate": 7.228253340471608e-05, "loss": 1.4482, "step": 18766 }, { "epoch": 0.8344671201814059, "grad_norm": 0.058755919337272644, "learning_rate": 7.220683911491704e-05, "loss": 1.4484, "step": 18768 }, { "epoch": 0.8345560446400783, "grad_norm": 0.06049460545182228, "learning_rate": 7.213118139476255e-05, "loss": 1.4498, "step": 18770 }, { "epoch": 0.8346449690987506, "grad_norm": 0.058131422847509384, "learning_rate": 7.205556025072019e-05, "loss": 1.4494, "step": 18772 }, { "epoch": 0.834733893557423, "grad_norm": 0.057763516902923584, "learning_rate": 7.197997568925446e-05, "loss": 1.4524, "step": 18774 }, { "epoch": 0.8348228180160954, "grad_norm": 0.0582827590405941, "learning_rate": 7.19044277168267e-05, "loss": 1.4584, "step": 18776 }, { "epoch": 0.8349117424747677, "grad_norm": 0.05903775990009308, "learning_rate": 7.182891633989497e-05, "loss": 1.4507, "step": 18778 }, { "epoch": 0.83500066693344, "grad_norm": 0.05882865563035011, "learning_rate": 7.175344156491431e-05, "loss": 1.4472, "step": 18780 }, { "epoch": 0.8350895913921124, "grad_norm": 0.05889016017317772, "learning_rate": 7.16780033983368e-05, "loss": 1.4513, "step": 18782 }, { "epoch": 0.8351785158507847, "grad_norm": 0.05878911539912224, "learning_rate": 7.1602601846611e-05, "loss": 1.4541, "step": 18784 }, { "epoch": 0.8352674403094571, "grad_norm": 0.05862708389759064, "learning_rate": 7.152723691618268e-05, "loss": 1.4547, "step": 18786 }, { "epoch": 0.8353563647681295, "grad_norm": 0.057450249791145325, "learning_rate": 7.145190861349449e-05, "loss": 1.4471, "step": 18788 }, { "epoch": 0.8354452892268018, "grad_norm": 0.0574302151799202, "learning_rate": 7.137661694498559e-05, "loss": 1.4475, "step": 18790 }, { "epoch": 0.8355342136854742, "grad_norm": 0.05855082347989082, "learning_rate": 7.130136191709235e-05, "loss": 1.4506, "step": 18792 }, { "epoch": 0.8356231381441466, "grad_norm": 0.05884725600481033, "learning_rate": 7.122614353624807e-05, "loss": 1.4479, "step": 18794 }, { "epoch": 0.8357120626028189, "grad_norm": 0.0579058937728405, "learning_rate": 7.115096180888242e-05, "loss": 1.453, "step": 18796 }, { "epoch": 0.8358009870614913, "grad_norm": 0.0576477013528347, "learning_rate": 7.107581674142255e-05, "loss": 1.4533, "step": 18798 }, { "epoch": 0.8358899115201637, "grad_norm": 0.05866725370287895, "learning_rate": 7.100070834029198e-05, "loss": 1.4529, "step": 18800 }, { "epoch": 0.8359788359788359, "grad_norm": 0.059061404317617416, "learning_rate": 7.092563661191137e-05, "loss": 1.4497, "step": 18802 }, { "epoch": 0.8360677604375083, "grad_norm": 0.058392900973558426, "learning_rate": 7.085060156269835e-05, "loss": 1.4571, "step": 18804 }, { "epoch": 0.8361566848961807, "grad_norm": 0.05853873863816261, "learning_rate": 7.077560319906695e-05, "loss": 1.4481, "step": 18806 }, { "epoch": 0.836245609354853, "grad_norm": 0.058212365955114365, "learning_rate": 7.070064152742855e-05, "loss": 1.4575, "step": 18808 }, { "epoch": 0.8363345338135254, "grad_norm": 0.05963822454214096, "learning_rate": 7.062571655419132e-05, "loss": 1.449, "step": 18810 }, { "epoch": 0.8364234582721978, "grad_norm": 0.058400917798280716, "learning_rate": 7.055082828575987e-05, "loss": 1.4544, "step": 18812 }, { "epoch": 0.8365123827308701, "grad_norm": 0.058825619518756866, "learning_rate": 7.047597672853618e-05, "loss": 1.4532, "step": 18814 }, { "epoch": 0.8366013071895425, "grad_norm": 0.059138767421245575, "learning_rate": 7.040116188891893e-05, "loss": 1.4483, "step": 18816 }, { "epoch": 0.8366902316482149, "grad_norm": 0.058162230998277664, "learning_rate": 7.032638377330341e-05, "loss": 1.451, "step": 18818 }, { "epoch": 0.8367791561068872, "grad_norm": 0.05852394923567772, "learning_rate": 7.025164238808229e-05, "loss": 1.455, "step": 18820 }, { "epoch": 0.8368680805655595, "grad_norm": 0.058616820722818375, "learning_rate": 7.017693773964445e-05, "loss": 1.4485, "step": 18822 }, { "epoch": 0.8369570050242319, "grad_norm": 0.05944205820560455, "learning_rate": 7.010226983437617e-05, "loss": 1.4441, "step": 18824 }, { "epoch": 0.8370459294829042, "grad_norm": 0.058783505111932755, "learning_rate": 7.002763867866047e-05, "loss": 1.4517, "step": 18826 }, { "epoch": 0.8371348539415766, "grad_norm": 0.059438593685626984, "learning_rate": 6.995304427887688e-05, "loss": 1.4488, "step": 18828 }, { "epoch": 0.837223778400249, "grad_norm": 0.05818268656730652, "learning_rate": 6.987848664140229e-05, "loss": 1.4501, "step": 18830 }, { "epoch": 0.8373127028589213, "grad_norm": 0.05838819593191147, "learning_rate": 6.980396577261022e-05, "loss": 1.4559, "step": 18832 }, { "epoch": 0.8374016273175937, "grad_norm": 0.059087976813316345, "learning_rate": 6.972948167887088e-05, "loss": 1.4521, "step": 18834 }, { "epoch": 0.8374905517762661, "grad_norm": 0.05923278629779816, "learning_rate": 6.965503436655158e-05, "loss": 1.4543, "step": 18836 }, { "epoch": 0.8375794762349384, "grad_norm": 0.060685187578201294, "learning_rate": 6.958062384201652e-05, "loss": 1.4532, "step": 18838 }, { "epoch": 0.8376684006936108, "grad_norm": 0.058887116611003876, "learning_rate": 6.950625011162642e-05, "loss": 1.4534, "step": 18840 }, { "epoch": 0.8377573251522832, "grad_norm": 0.05865625664591789, "learning_rate": 6.943191318173919e-05, "loss": 1.448, "step": 18842 }, { "epoch": 0.8378462496109554, "grad_norm": 0.05893245339393616, "learning_rate": 6.935761305870946e-05, "loss": 1.4504, "step": 18844 }, { "epoch": 0.8379351740696278, "grad_norm": 0.05826593190431595, "learning_rate": 6.928334974888889e-05, "loss": 1.4481, "step": 18846 }, { "epoch": 0.8380240985283002, "grad_norm": 0.05828888714313507, "learning_rate": 6.920912325862566e-05, "loss": 1.445, "step": 18848 }, { "epoch": 0.8381130229869725, "grad_norm": 0.0586436428129673, "learning_rate": 6.913493359426475e-05, "loss": 1.4572, "step": 18850 }, { "epoch": 0.8382019474456449, "grad_norm": 0.05876269191503525, "learning_rate": 6.906078076214877e-05, "loss": 1.4558, "step": 18852 }, { "epoch": 0.8382908719043173, "grad_norm": 0.05837937071919441, "learning_rate": 6.898666476861631e-05, "loss": 1.4541, "step": 18854 }, { "epoch": 0.8383797963629896, "grad_norm": 0.058034468442201614, "learning_rate": 6.891258562000302e-05, "loss": 1.449, "step": 18856 }, { "epoch": 0.838468720821662, "grad_norm": 0.05731835216283798, "learning_rate": 6.883854332264167e-05, "loss": 1.4469, "step": 18858 }, { "epoch": 0.8385576452803344, "grad_norm": 0.05783449858427048, "learning_rate": 6.876453788286174e-05, "loss": 1.45, "step": 18860 }, { "epoch": 0.8386465697390068, "grad_norm": 0.058289241045713425, "learning_rate": 6.869056930698958e-05, "loss": 1.4564, "step": 18862 }, { "epoch": 0.8387354941976791, "grad_norm": 0.05786917731165886, "learning_rate": 6.861663760134818e-05, "loss": 1.4477, "step": 18864 }, { "epoch": 0.8388244186563514, "grad_norm": 0.05944148823618889, "learning_rate": 6.854274277225764e-05, "loss": 1.4545, "step": 18866 }, { "epoch": 0.8389133431150237, "grad_norm": 0.05793730542063713, "learning_rate": 6.846888482603492e-05, "loss": 1.4501, "step": 18868 }, { "epoch": 0.8390022675736961, "grad_norm": 0.05772128701210022, "learning_rate": 6.839506376899362e-05, "loss": 1.4506, "step": 18870 }, { "epoch": 0.8390911920323685, "grad_norm": 0.05777378007769585, "learning_rate": 6.832127960744405e-05, "loss": 1.4493, "step": 18872 }, { "epoch": 0.8391801164910409, "grad_norm": 0.058291662484407425, "learning_rate": 6.82475323476941e-05, "loss": 1.4474, "step": 18874 }, { "epoch": 0.8392690409497132, "grad_norm": 0.058231450617313385, "learning_rate": 6.81738219960476e-05, "loss": 1.4488, "step": 18876 }, { "epoch": 0.8393579654083856, "grad_norm": 0.057656459510326385, "learning_rate": 6.810014855880598e-05, "loss": 1.4476, "step": 18878 }, { "epoch": 0.839446889867058, "grad_norm": 0.057494621723890305, "learning_rate": 6.802651204226678e-05, "loss": 1.4471, "step": 18880 }, { "epoch": 0.8395358143257303, "grad_norm": 0.05767682567238808, "learning_rate": 6.795291245272506e-05, "loss": 1.4493, "step": 18882 }, { "epoch": 0.8396247387844027, "grad_norm": 0.05807575583457947, "learning_rate": 6.787934979647242e-05, "loss": 1.4505, "step": 18884 }, { "epoch": 0.8397136632430751, "grad_norm": 0.05979083850979805, "learning_rate": 6.780582407979712e-05, "loss": 1.4455, "step": 18886 }, { "epoch": 0.8398025877017473, "grad_norm": 0.05773860588669777, "learning_rate": 6.773233530898465e-05, "loss": 1.4442, "step": 18888 }, { "epoch": 0.8398915121604197, "grad_norm": 0.058765195310115814, "learning_rate": 6.765888349031718e-05, "loss": 1.4499, "step": 18890 }, { "epoch": 0.839980436619092, "grad_norm": 0.058183662593364716, "learning_rate": 6.758546863007353e-05, "loss": 1.4457, "step": 18892 }, { "epoch": 0.8400693610777644, "grad_norm": 0.058729760348796844, "learning_rate": 6.751209073452958e-05, "loss": 1.4453, "step": 18894 }, { "epoch": 0.8401582855364368, "grad_norm": 0.05806111916899681, "learning_rate": 6.743874980995813e-05, "loss": 1.4462, "step": 18896 }, { "epoch": 0.8402472099951092, "grad_norm": 0.05843853950500488, "learning_rate": 6.736544586262849e-05, "loss": 1.4506, "step": 18898 }, { "epoch": 0.8403361344537815, "grad_norm": 0.059424083679914474, "learning_rate": 6.729217889880712e-05, "loss": 1.4461, "step": 18900 }, { "epoch": 0.8404250589124539, "grad_norm": 0.058906249701976776, "learning_rate": 6.721894892475727e-05, "loss": 1.4515, "step": 18902 }, { "epoch": 0.8405139833711263, "grad_norm": 0.05890142172574997, "learning_rate": 6.714575594673871e-05, "loss": 1.4516, "step": 18904 }, { "epoch": 0.8406029078297986, "grad_norm": 0.05828310176730156, "learning_rate": 6.707259997100862e-05, "loss": 1.4501, "step": 18906 }, { "epoch": 0.840691832288471, "grad_norm": 0.0580887570977211, "learning_rate": 6.699948100382042e-05, "loss": 1.4513, "step": 18908 }, { "epoch": 0.8407807567471433, "grad_norm": 0.057612862437963486, "learning_rate": 6.692639905142477e-05, "loss": 1.4552, "step": 18910 }, { "epoch": 0.8408696812058156, "grad_norm": 0.06015538424253464, "learning_rate": 6.68533541200691e-05, "loss": 1.4516, "step": 18912 }, { "epoch": 0.840958605664488, "grad_norm": 0.06045129895210266, "learning_rate": 6.678034621599743e-05, "loss": 1.4497, "step": 18914 }, { "epoch": 0.8410475301231604, "grad_norm": 0.059288546442985535, "learning_rate": 6.670737534545097e-05, "loss": 1.4492, "step": 18916 }, { "epoch": 0.8411364545818327, "grad_norm": 0.05912403389811516, "learning_rate": 6.663444151466752e-05, "loss": 1.4524, "step": 18918 }, { "epoch": 0.8412253790405051, "grad_norm": 0.05908604711294174, "learning_rate": 6.656154472988174e-05, "loss": 1.4495, "step": 18920 }, { "epoch": 0.8413143034991775, "grad_norm": 0.05895715579390526, "learning_rate": 6.648868499732524e-05, "loss": 1.4498, "step": 18922 }, { "epoch": 0.8414032279578498, "grad_norm": 0.059338223189115524, "learning_rate": 6.641586232322644e-05, "loss": 1.4547, "step": 18924 }, { "epoch": 0.8414921524165222, "grad_norm": 0.05875730887055397, "learning_rate": 6.63430767138104e-05, "loss": 1.4521, "step": 18926 }, { "epoch": 0.8415810768751946, "grad_norm": 0.057817842811346054, "learning_rate": 6.627032817529926e-05, "loss": 1.4473, "step": 18928 }, { "epoch": 0.8416700013338669, "grad_norm": 0.0594828836619854, "learning_rate": 6.619761671391173e-05, "loss": 1.4581, "step": 18930 }, { "epoch": 0.8417589257925392, "grad_norm": 0.058293212205171585, "learning_rate": 6.61249423358638e-05, "loss": 1.4499, "step": 18932 }, { "epoch": 0.8418478502512116, "grad_norm": 0.05719943344593048, "learning_rate": 6.605230504736782e-05, "loss": 1.4485, "step": 18934 }, { "epoch": 0.8419367747098839, "grad_norm": 0.058276139199733734, "learning_rate": 6.597970485463306e-05, "loss": 1.4467, "step": 18936 }, { "epoch": 0.8420256991685563, "grad_norm": 0.057104870676994324, "learning_rate": 6.590714176386575e-05, "loss": 1.4477, "step": 18938 }, { "epoch": 0.8421146236272287, "grad_norm": 0.058028530329465866, "learning_rate": 6.58346157812691e-05, "loss": 1.4521, "step": 18940 }, { "epoch": 0.842203548085901, "grad_norm": 0.05872227996587753, "learning_rate": 6.576212691304267e-05, "loss": 1.4559, "step": 18942 }, { "epoch": 0.8422924725445734, "grad_norm": 0.05847456306219101, "learning_rate": 6.568967516538326e-05, "loss": 1.4534, "step": 18944 }, { "epoch": 0.8423813970032458, "grad_norm": 0.05918823927640915, "learning_rate": 6.561726054448447e-05, "loss": 1.4535, "step": 18946 }, { "epoch": 0.8424703214619181, "grad_norm": 0.05854053795337677, "learning_rate": 6.554488305653639e-05, "loss": 1.4515, "step": 18948 }, { "epoch": 0.8425592459205905, "grad_norm": 0.05881137028336525, "learning_rate": 6.54725427077264e-05, "loss": 1.4548, "step": 18950 }, { "epoch": 0.8426481703792628, "grad_norm": 0.058693382889032364, "learning_rate": 6.540023950423818e-05, "loss": 1.4498, "step": 18952 }, { "epoch": 0.8427370948379351, "grad_norm": 0.05968313291668892, "learning_rate": 6.532797345225288e-05, "loss": 1.4423, "step": 18954 }, { "epoch": 0.8428260192966075, "grad_norm": 0.05926063284277916, "learning_rate": 6.525574455794797e-05, "loss": 1.4563, "step": 18956 }, { "epoch": 0.8429149437552799, "grad_norm": 0.05805355682969093, "learning_rate": 6.518355282749777e-05, "loss": 1.4526, "step": 18958 }, { "epoch": 0.8430038682139522, "grad_norm": 0.05876849219202995, "learning_rate": 6.511139826707362e-05, "loss": 1.4477, "step": 18960 }, { "epoch": 0.8430927926726246, "grad_norm": 0.058109939098358154, "learning_rate": 6.503928088284377e-05, "loss": 1.4478, "step": 18962 }, { "epoch": 0.843181717131297, "grad_norm": 0.059168893843889236, "learning_rate": 6.496720068097283e-05, "loss": 1.4579, "step": 18964 }, { "epoch": 0.8432706415899693, "grad_norm": 0.05811697244644165, "learning_rate": 6.489515766762277e-05, "loss": 1.4523, "step": 18966 }, { "epoch": 0.8433595660486417, "grad_norm": 0.05842481181025505, "learning_rate": 6.482315184895205e-05, "loss": 1.4528, "step": 18968 }, { "epoch": 0.8434484905073141, "grad_norm": 0.059310369193553925, "learning_rate": 6.475118323111622e-05, "loss": 1.4513, "step": 18970 }, { "epoch": 0.8435374149659864, "grad_norm": 0.05840446799993515, "learning_rate": 6.467925182026718e-05, "loss": 1.4555, "step": 18972 }, { "epoch": 0.8436263394246587, "grad_norm": 0.05892022326588631, "learning_rate": 6.46073576225541e-05, "loss": 1.4511, "step": 18974 }, { "epoch": 0.8437152638833311, "grad_norm": 0.05745990201830864, "learning_rate": 6.45355006441229e-05, "loss": 1.4561, "step": 18976 }, { "epoch": 0.8438041883420034, "grad_norm": 0.058618173003196716, "learning_rate": 6.446368089111615e-05, "loss": 1.4544, "step": 18978 }, { "epoch": 0.8438931128006758, "grad_norm": 0.05895305797457695, "learning_rate": 6.439189836967307e-05, "loss": 1.4508, "step": 18980 }, { "epoch": 0.8439820372593482, "grad_norm": 0.058520711958408356, "learning_rate": 6.432015308593043e-05, "loss": 1.4521, "step": 18982 }, { "epoch": 0.8440709617180205, "grad_norm": 0.05948729068040848, "learning_rate": 6.424844504602095e-05, "loss": 1.4467, "step": 18984 }, { "epoch": 0.8441598861766929, "grad_norm": 0.058824650943279266, "learning_rate": 6.417677425607483e-05, "loss": 1.4447, "step": 18986 }, { "epoch": 0.8442488106353653, "grad_norm": 0.058834102004766464, "learning_rate": 6.410514072221852e-05, "loss": 1.4448, "step": 18988 }, { "epoch": 0.8443377350940376, "grad_norm": 0.05896035209298134, "learning_rate": 6.403354445057569e-05, "loss": 1.4515, "step": 18990 }, { "epoch": 0.84442665955271, "grad_norm": 0.0575827993452549, "learning_rate": 6.396198544726683e-05, "loss": 1.4469, "step": 18992 }, { "epoch": 0.8445155840113824, "grad_norm": 0.05748899281024933, "learning_rate": 6.38904637184089e-05, "loss": 1.4457, "step": 18994 }, { "epoch": 0.8446045084700546, "grad_norm": 0.057869840413331985, "learning_rate": 6.381897927011604e-05, "loss": 1.4436, "step": 18996 }, { "epoch": 0.844693432928727, "grad_norm": 0.05841681361198425, "learning_rate": 6.374753210849909e-05, "loss": 1.4493, "step": 18998 }, { "epoch": 0.8447823573873994, "grad_norm": 0.058826643973588943, "learning_rate": 6.367612223966551e-05, "loss": 1.4515, "step": 19000 }, { "epoch": 0.8447823573873994, "eval_loss": 1.4360281229019165, "eval_runtime": 12.4748, "eval_samples_per_second": 553.917, "eval_steps_per_second": 69.26, "step": 19000 }, { "epoch": 0.8448712818460717, "grad_norm": 0.059161681681871414, "learning_rate": 6.36047496697198e-05, "loss": 1.4485, "step": 19002 }, { "epoch": 0.8449602063047441, "grad_norm": 0.05967250093817711, "learning_rate": 6.353341440476334e-05, "loss": 1.4478, "step": 19004 }, { "epoch": 0.8450491307634165, "grad_norm": 0.05787471681833267, "learning_rate": 6.346211645089394e-05, "loss": 1.4505, "step": 19006 }, { "epoch": 0.8451380552220888, "grad_norm": 0.05900755897164345, "learning_rate": 6.339085581420672e-05, "loss": 1.4497, "step": 19008 }, { "epoch": 0.8452269796807612, "grad_norm": 0.05949883162975311, "learning_rate": 6.33196325007931e-05, "loss": 1.4547, "step": 19010 }, { "epoch": 0.8453159041394336, "grad_norm": 0.05793669447302818, "learning_rate": 6.324844651674171e-05, "loss": 1.4532, "step": 19012 }, { "epoch": 0.845404828598106, "grad_norm": 0.05827039107680321, "learning_rate": 6.317729786813786e-05, "loss": 1.455, "step": 19014 }, { "epoch": 0.8454937530567783, "grad_norm": 0.05869372561573982, "learning_rate": 6.310618656106354e-05, "loss": 1.4442, "step": 19016 }, { "epoch": 0.8455826775154506, "grad_norm": 0.05820537358522415, "learning_rate": 6.303511260159772e-05, "loss": 1.4527, "step": 19018 }, { "epoch": 0.8456716019741229, "grad_norm": 0.05905662849545479, "learning_rate": 6.296407599581622e-05, "loss": 1.4486, "step": 19020 }, { "epoch": 0.8457605264327953, "grad_norm": 0.05853661149740219, "learning_rate": 6.289307674979133e-05, "loss": 1.4539, "step": 19022 }, { "epoch": 0.8458494508914677, "grad_norm": 0.059646617621183395, "learning_rate": 6.282211486959255e-05, "loss": 1.4524, "step": 19024 }, { "epoch": 0.84593837535014, "grad_norm": 0.05904247611761093, "learning_rate": 6.275119036128602e-05, "loss": 1.4483, "step": 19026 }, { "epoch": 0.8460272998088124, "grad_norm": 0.05890771001577377, "learning_rate": 6.268030323093455e-05, "loss": 1.4495, "step": 19028 }, { "epoch": 0.8461162242674848, "grad_norm": 0.0586511567234993, "learning_rate": 6.260945348459801e-05, "loss": 1.4458, "step": 19030 }, { "epoch": 0.8462051487261572, "grad_norm": 0.057969868183135986, "learning_rate": 6.253864112833296e-05, "loss": 1.4514, "step": 19032 }, { "epoch": 0.8462940731848295, "grad_norm": 0.05988781899213791, "learning_rate": 6.246786616819261e-05, "loss": 1.45, "step": 19034 }, { "epoch": 0.8463829976435019, "grad_norm": 0.05796820670366287, "learning_rate": 6.239712861022728e-05, "loss": 1.4498, "step": 19036 }, { "epoch": 0.8464719221021743, "grad_norm": 0.059687115252017975, "learning_rate": 6.232642846048364e-05, "loss": 1.4522, "step": 19038 }, { "epoch": 0.8465608465608465, "grad_norm": 0.05780632048845291, "learning_rate": 6.225576572500591e-05, "loss": 1.4479, "step": 19040 }, { "epoch": 0.8466497710195189, "grad_norm": 0.05791749432682991, "learning_rate": 6.218514040983437e-05, "loss": 1.4547, "step": 19042 }, { "epoch": 0.8467386954781913, "grad_norm": 0.05878786742687225, "learning_rate": 6.211455252100635e-05, "loss": 1.4467, "step": 19044 }, { "epoch": 0.8468276199368636, "grad_norm": 0.058243002742528915, "learning_rate": 6.204400206455601e-05, "loss": 1.4504, "step": 19046 }, { "epoch": 0.846916544395536, "grad_norm": 0.0594029575586319, "learning_rate": 6.197348904651456e-05, "loss": 1.4464, "step": 19048 }, { "epoch": 0.8470054688542084, "grad_norm": 0.05850706622004509, "learning_rate": 6.190301347290944e-05, "loss": 1.4451, "step": 19050 }, { "epoch": 0.8470943933128807, "grad_norm": 0.059582531452178955, "learning_rate": 6.183257534976534e-05, "loss": 1.4492, "step": 19052 }, { "epoch": 0.8471833177715531, "grad_norm": 0.05976349487900734, "learning_rate": 6.176217468310368e-05, "loss": 1.4506, "step": 19054 }, { "epoch": 0.8472722422302255, "grad_norm": 0.05981292203068733, "learning_rate": 6.169181147894265e-05, "loss": 1.4549, "step": 19056 }, { "epoch": 0.8473611666888978, "grad_norm": 0.05852234736084938, "learning_rate": 6.162148574329717e-05, "loss": 1.4511, "step": 19058 }, { "epoch": 0.8474500911475701, "grad_norm": 0.05882321670651436, "learning_rate": 6.155119748217874e-05, "loss": 1.4511, "step": 19060 }, { "epoch": 0.8475390156062425, "grad_norm": 0.05775250121951103, "learning_rate": 6.148094670159632e-05, "loss": 1.4495, "step": 19062 }, { "epoch": 0.8476279400649148, "grad_norm": 0.058737363666296005, "learning_rate": 6.14107334075551e-05, "loss": 1.4487, "step": 19064 }, { "epoch": 0.8477168645235872, "grad_norm": 0.057975850999355316, "learning_rate": 6.134055760605706e-05, "loss": 1.4439, "step": 19066 }, { "epoch": 0.8478057889822596, "grad_norm": 0.05753524228930473, "learning_rate": 6.127041930310129e-05, "loss": 1.4506, "step": 19068 }, { "epoch": 0.8478947134409319, "grad_norm": 0.05863305553793907, "learning_rate": 6.12003185046836e-05, "loss": 1.4506, "step": 19070 }, { "epoch": 0.8479836378996043, "grad_norm": 0.058391932398080826, "learning_rate": 6.113025521679627e-05, "loss": 1.4492, "step": 19072 }, { "epoch": 0.8480725623582767, "grad_norm": 0.057830072939395905, "learning_rate": 6.106022944542878e-05, "loss": 1.4506, "step": 19074 }, { "epoch": 0.848161486816949, "grad_norm": 0.05919577181339264, "learning_rate": 6.0990241196567205e-05, "loss": 1.4546, "step": 19076 }, { "epoch": 0.8482504112756214, "grad_norm": 0.05908111110329628, "learning_rate": 6.092029047619463e-05, "loss": 1.454, "step": 19078 }, { "epoch": 0.8483393357342938, "grad_norm": 0.05926148593425751, "learning_rate": 6.0850377290290415e-05, "loss": 1.4512, "step": 19080 }, { "epoch": 0.848428260192966, "grad_norm": 0.05787745118141174, "learning_rate": 6.078050164483129e-05, "loss": 1.4463, "step": 19082 }, { "epoch": 0.8485171846516384, "grad_norm": 0.059534478932619095, "learning_rate": 6.071066354579047e-05, "loss": 1.4511, "step": 19084 }, { "epoch": 0.8486061091103108, "grad_norm": 0.058711905032396317, "learning_rate": 6.064086299913807e-05, "loss": 1.4487, "step": 19086 }, { "epoch": 0.8486950335689831, "grad_norm": 0.059324994683265686, "learning_rate": 6.057110001084071e-05, "loss": 1.4388, "step": 19088 }, { "epoch": 0.8487839580276555, "grad_norm": 0.05815599113702774, "learning_rate": 6.050137458686239e-05, "loss": 1.4477, "step": 19090 }, { "epoch": 0.8488728824863279, "grad_norm": 0.05865519493818283, "learning_rate": 6.0431686733163294e-05, "loss": 1.4548, "step": 19092 }, { "epoch": 0.8489618069450002, "grad_norm": 0.05810709670186043, "learning_rate": 6.036203645570082e-05, "loss": 1.4453, "step": 19094 }, { "epoch": 0.8490507314036726, "grad_norm": 0.058259494602680206, "learning_rate": 6.029242376042882e-05, "loss": 1.4519, "step": 19096 }, { "epoch": 0.849139655862345, "grad_norm": 0.05749960616230965, "learning_rate": 6.0222848653298135e-05, "loss": 1.4485, "step": 19098 }, { "epoch": 0.8492285803210173, "grad_norm": 0.05866747722029686, "learning_rate": 6.015331114025646e-05, "loss": 1.4471, "step": 19100 }, { "epoch": 0.8493175047796897, "grad_norm": 0.05797070264816284, "learning_rate": 6.008381122724804e-05, "loss": 1.4522, "step": 19102 }, { "epoch": 0.849406429238362, "grad_norm": 0.05796579271554947, "learning_rate": 6.001434892021407e-05, "loss": 1.4536, "step": 19104 }, { "epoch": 0.8494953536970343, "grad_norm": 0.05819402635097504, "learning_rate": 5.994492422509257e-05, "loss": 1.4491, "step": 19106 }, { "epoch": 0.8495842781557067, "grad_norm": 0.058709751814603806, "learning_rate": 5.9875537147818126e-05, "loss": 1.4468, "step": 19108 }, { "epoch": 0.8496732026143791, "grad_norm": 0.05779354274272919, "learning_rate": 5.9806187694322334e-05, "loss": 1.448, "step": 19110 }, { "epoch": 0.8497621270730514, "grad_norm": 0.058577634394168854, "learning_rate": 5.973687587053361e-05, "loss": 1.4512, "step": 19112 }, { "epoch": 0.8498510515317238, "grad_norm": 0.05885601416230202, "learning_rate": 5.966760168237684e-05, "loss": 1.4475, "step": 19114 }, { "epoch": 0.8499399759903962, "grad_norm": 0.05744828283786774, "learning_rate": 5.9598365135773994e-05, "loss": 1.4527, "step": 19116 }, { "epoch": 0.8500289004490685, "grad_norm": 0.05852948874235153, "learning_rate": 5.952916623664362e-05, "loss": 1.4536, "step": 19118 }, { "epoch": 0.8501178249077409, "grad_norm": 0.05883361026644707, "learning_rate": 5.946000499090121e-05, "loss": 1.455, "step": 19120 }, { "epoch": 0.8502067493664133, "grad_norm": 0.05857586860656738, "learning_rate": 5.939088140445903e-05, "loss": 1.4478, "step": 19122 }, { "epoch": 0.8502956738250856, "grad_norm": 0.0588909313082695, "learning_rate": 5.932179548322597e-05, "loss": 1.4553, "step": 19124 }, { "epoch": 0.8503845982837579, "grad_norm": 0.05820483714342117, "learning_rate": 5.925274723310781e-05, "loss": 1.4496, "step": 19126 }, { "epoch": 0.8504735227424303, "grad_norm": 0.0587213970720768, "learning_rate": 5.9183736660007204e-05, "loss": 1.4544, "step": 19128 }, { "epoch": 0.8505624472011026, "grad_norm": 0.05785829946398735, "learning_rate": 5.911476376982333e-05, "loss": 1.4548, "step": 19130 }, { "epoch": 0.850651371659775, "grad_norm": 0.05887683108448982, "learning_rate": 5.904582856845236e-05, "loss": 1.454, "step": 19132 }, { "epoch": 0.8507402961184474, "grad_norm": 0.05940135940909386, "learning_rate": 5.897693106178725e-05, "loss": 1.4566, "step": 19134 }, { "epoch": 0.8508292205771197, "grad_norm": 0.05797107517719269, "learning_rate": 5.8908071255717566e-05, "loss": 1.4525, "step": 19136 }, { "epoch": 0.8509181450357921, "grad_norm": 0.058985110372304916, "learning_rate": 5.8839249156129705e-05, "loss": 1.4494, "step": 19138 }, { "epoch": 0.8510070694944645, "grad_norm": 0.057915061712265015, "learning_rate": 5.8770464768907125e-05, "loss": 1.4463, "step": 19140 }, { "epoch": 0.8510959939531368, "grad_norm": 0.05887880176305771, "learning_rate": 5.870171809992952e-05, "loss": 1.4533, "step": 19142 }, { "epoch": 0.8511849184118092, "grad_norm": 0.057847775518894196, "learning_rate": 5.8633009155073915e-05, "loss": 1.4527, "step": 19144 }, { "epoch": 0.8512738428704816, "grad_norm": 0.05895482003688812, "learning_rate": 5.856433794021349e-05, "loss": 1.4462, "step": 19146 }, { "epoch": 0.8513627673291538, "grad_norm": 0.05755472183227539, "learning_rate": 5.849570446121905e-05, "loss": 1.4506, "step": 19148 }, { "epoch": 0.8514516917878262, "grad_norm": 0.05808274820446968, "learning_rate": 5.8427108723957403e-05, "loss": 1.454, "step": 19150 }, { "epoch": 0.8515406162464986, "grad_norm": 0.06018178537487984, "learning_rate": 5.835855073429236e-05, "loss": 1.4481, "step": 19152 }, { "epoch": 0.8516295407051709, "grad_norm": 0.05914274603128433, "learning_rate": 5.829003049808462e-05, "loss": 1.4529, "step": 19154 }, { "epoch": 0.8517184651638433, "grad_norm": 0.058473363518714905, "learning_rate": 5.8221548021191785e-05, "loss": 1.45, "step": 19156 }, { "epoch": 0.8518073896225157, "grad_norm": 0.05827651917934418, "learning_rate": 5.815310330946777e-05, "loss": 1.4485, "step": 19158 }, { "epoch": 0.851896314081188, "grad_norm": 0.05839327350258827, "learning_rate": 5.8084696368763565e-05, "loss": 1.4478, "step": 19160 }, { "epoch": 0.8519852385398604, "grad_norm": 0.059893328696489334, "learning_rate": 5.8016327204927054e-05, "loss": 1.4493, "step": 19162 }, { "epoch": 0.8520741629985328, "grad_norm": 0.05871112272143364, "learning_rate": 5.7947995823802724e-05, "loss": 1.4493, "step": 19164 }, { "epoch": 0.8521630874572051, "grad_norm": 0.0575588084757328, "learning_rate": 5.787970223123179e-05, "loss": 1.4478, "step": 19166 }, { "epoch": 0.8522520119158775, "grad_norm": 0.058595284819602966, "learning_rate": 5.781144643305203e-05, "loss": 1.4501, "step": 19168 }, { "epoch": 0.8523409363745498, "grad_norm": 0.05944250524044037, "learning_rate": 5.774322843509872e-05, "loss": 1.4566, "step": 19170 }, { "epoch": 0.8524298608332221, "grad_norm": 0.05856909975409508, "learning_rate": 5.767504824320319e-05, "loss": 1.4448, "step": 19172 }, { "epoch": 0.8525187852918945, "grad_norm": 0.0576825849711895, "learning_rate": 5.7606905863193725e-05, "loss": 1.4392, "step": 19174 }, { "epoch": 0.8526077097505669, "grad_norm": 0.05843258276581764, "learning_rate": 5.7538801300895507e-05, "loss": 1.4544, "step": 19176 }, { "epoch": 0.8526966342092392, "grad_norm": 0.05991073697805405, "learning_rate": 5.7470734562130364e-05, "loss": 1.4511, "step": 19178 }, { "epoch": 0.8527855586679116, "grad_norm": 0.05866760015487671, "learning_rate": 5.740270565271716e-05, "loss": 1.4554, "step": 19180 }, { "epoch": 0.852874483126584, "grad_norm": 0.05758367106318474, "learning_rate": 5.7334714578471004e-05, "loss": 1.4499, "step": 19182 }, { "epoch": 0.8529634075852564, "grad_norm": 0.058923859149217606, "learning_rate": 5.72667613452042e-05, "loss": 1.4569, "step": 19184 }, { "epoch": 0.8530523320439287, "grad_norm": 0.05793732777237892, "learning_rate": 5.719884595872582e-05, "loss": 1.4534, "step": 19186 }, { "epoch": 0.8531412565026011, "grad_norm": 0.05803541839122772, "learning_rate": 5.713096842484139e-05, "loss": 1.4461, "step": 19188 }, { "epoch": 0.8532301809612733, "grad_norm": 0.057840730994939804, "learning_rate": 5.7063128749353375e-05, "loss": 1.4472, "step": 19190 }, { "epoch": 0.8533191054199457, "grad_norm": 0.05936199426651001, "learning_rate": 5.69953269380612e-05, "loss": 1.4522, "step": 19192 }, { "epoch": 0.8534080298786181, "grad_norm": 0.059048447757959366, "learning_rate": 5.6927562996760715e-05, "loss": 1.4507, "step": 19194 }, { "epoch": 0.8534969543372904, "grad_norm": 0.057632967829704285, "learning_rate": 5.685983693124452e-05, "loss": 1.4467, "step": 19196 }, { "epoch": 0.8535858787959628, "grad_norm": 0.05861329287290573, "learning_rate": 5.679214874730249e-05, "loss": 1.4473, "step": 19198 }, { "epoch": 0.8536748032546352, "grad_norm": 0.05806206539273262, "learning_rate": 5.6724498450720695e-05, "loss": 1.4405, "step": 19200 }, { "epoch": 0.8537637277133076, "grad_norm": 0.05802812799811363, "learning_rate": 5.6656886047282305e-05, "loss": 1.4478, "step": 19202 }, { "epoch": 0.8538526521719799, "grad_norm": 0.058287039399147034, "learning_rate": 5.658931154276692e-05, "loss": 1.4474, "step": 19204 }, { "epoch": 0.8539415766306523, "grad_norm": 0.05866856500506401, "learning_rate": 5.652177494295124e-05, "loss": 1.4494, "step": 19206 }, { "epoch": 0.8540305010893247, "grad_norm": 0.05808721482753754, "learning_rate": 5.645427625360866e-05, "loss": 1.4472, "step": 19208 }, { "epoch": 0.854119425547997, "grad_norm": 0.05806644633412361, "learning_rate": 5.638681548050917e-05, "loss": 1.4471, "step": 19210 }, { "epoch": 0.8542083500066693, "grad_norm": 0.058878298848867416, "learning_rate": 5.6319392629419544e-05, "loss": 1.4508, "step": 19212 }, { "epoch": 0.8542972744653416, "grad_norm": 0.058544594794511795, "learning_rate": 5.6252007706103614e-05, "loss": 1.4456, "step": 19214 }, { "epoch": 0.854386198924014, "grad_norm": 0.059308234602212906, "learning_rate": 5.61846607163215e-05, "loss": 1.4529, "step": 19216 }, { "epoch": 0.8544751233826864, "grad_norm": 0.05778004601597786, "learning_rate": 5.6117351665830426e-05, "loss": 1.4538, "step": 19218 }, { "epoch": 0.8545640478413588, "grad_norm": 0.05761827155947685, "learning_rate": 5.605008056038435e-05, "loss": 1.4521, "step": 19220 }, { "epoch": 0.8546529723000311, "grad_norm": 0.058810003101825714, "learning_rate": 5.598284740573367e-05, "loss": 1.4511, "step": 19222 }, { "epoch": 0.8547418967587035, "grad_norm": 0.05870932340621948, "learning_rate": 5.5915652207626064e-05, "loss": 1.4459, "step": 19224 }, { "epoch": 0.8548308212173759, "grad_norm": 0.05845349654555321, "learning_rate": 5.584849497180544e-05, "loss": 1.4601, "step": 19226 }, { "epoch": 0.8549197456760482, "grad_norm": 0.05838220566511154, "learning_rate": 5.578137570401271e-05, "loss": 1.4566, "step": 19228 }, { "epoch": 0.8550086701347206, "grad_norm": 0.05828014388680458, "learning_rate": 5.5714294409985766e-05, "loss": 1.4478, "step": 19230 }, { "epoch": 0.855097594593393, "grad_norm": 0.058326125144958496, "learning_rate": 5.5647251095458707e-05, "loss": 1.4441, "step": 19232 }, { "epoch": 0.8551865190520652, "grad_norm": 0.05799197405576706, "learning_rate": 5.558024576616277e-05, "loss": 1.448, "step": 19234 }, { "epoch": 0.8552754435107376, "grad_norm": 0.058170199394226074, "learning_rate": 5.5513278427826105e-05, "loss": 1.451, "step": 19236 }, { "epoch": 0.85536436796941, "grad_norm": 0.05785010755062103, "learning_rate": 5.544634908617302e-05, "loss": 1.4515, "step": 19238 }, { "epoch": 0.8554532924280823, "grad_norm": 0.05972233787178993, "learning_rate": 5.5379457746925155e-05, "loss": 1.4452, "step": 19240 }, { "epoch": 0.8555422168867547, "grad_norm": 0.060089897364377975, "learning_rate": 5.5312604415800716e-05, "loss": 1.4574, "step": 19242 }, { "epoch": 0.8556311413454271, "grad_norm": 0.06087521091103554, "learning_rate": 5.5245789098514406e-05, "loss": 1.4495, "step": 19244 }, { "epoch": 0.8557200658040994, "grad_norm": 0.05851989984512329, "learning_rate": 5.517901180077811e-05, "loss": 1.4488, "step": 19246 }, { "epoch": 0.8558089902627718, "grad_norm": 0.05812680348753929, "learning_rate": 5.5112272528299925e-05, "loss": 1.4514, "step": 19248 }, { "epoch": 0.8558979147214442, "grad_norm": 0.058141183108091354, "learning_rate": 5.50455712867855e-05, "loss": 1.4451, "step": 19250 }, { "epoch": 0.8559868391801165, "grad_norm": 0.05929475277662277, "learning_rate": 5.497890808193645e-05, "loss": 1.4509, "step": 19252 }, { "epoch": 0.8560757636387889, "grad_norm": 0.05837177485227585, "learning_rate": 5.491228291945138e-05, "loss": 1.4526, "step": 19254 }, { "epoch": 0.8561646880974612, "grad_norm": 0.05776189640164375, "learning_rate": 5.4845695805025855e-05, "loss": 1.4493, "step": 19256 }, { "epoch": 0.8562536125561335, "grad_norm": 0.057521410286426544, "learning_rate": 5.4779146744352027e-05, "loss": 1.4415, "step": 19258 }, { "epoch": 0.8563425370148059, "grad_norm": 0.06062265858054161, "learning_rate": 5.4712635743118685e-05, "loss": 1.4444, "step": 19260 }, { "epoch": 0.8564314614734783, "grad_norm": 0.05821528658270836, "learning_rate": 5.464616280701151e-05, "loss": 1.4483, "step": 19262 }, { "epoch": 0.8565203859321506, "grad_norm": 0.05795830488204956, "learning_rate": 5.457972794171312e-05, "loss": 1.4485, "step": 19264 }, { "epoch": 0.856609310390823, "grad_norm": 0.057552520185709, "learning_rate": 5.451333115290236e-05, "loss": 1.4473, "step": 19266 }, { "epoch": 0.8566982348494954, "grad_norm": 0.05941027030348778, "learning_rate": 5.444697244625524e-05, "loss": 1.4486, "step": 19268 }, { "epoch": 0.8567871593081677, "grad_norm": 0.05889924615621567, "learning_rate": 5.4380651827444394e-05, "loss": 1.4476, "step": 19270 }, { "epoch": 0.8568760837668401, "grad_norm": 0.057859137654304504, "learning_rate": 5.431436930213934e-05, "loss": 1.4475, "step": 19272 }, { "epoch": 0.8569650082255125, "grad_norm": 0.058761779218912125, "learning_rate": 5.4248124876006054e-05, "loss": 1.4519, "step": 19274 }, { "epoch": 0.8570539326841848, "grad_norm": 0.05748540908098221, "learning_rate": 5.418191855470722e-05, "loss": 1.4431, "step": 19276 }, { "epoch": 0.8571428571428571, "grad_norm": 0.0578523650765419, "learning_rate": 5.411575034390287e-05, "loss": 1.45, "step": 19278 }, { "epoch": 0.8572317816015295, "grad_norm": 0.05837201699614525, "learning_rate": 5.404962024924909e-05, "loss": 1.4471, "step": 19280 }, { "epoch": 0.8573207060602018, "grad_norm": 0.05988498777151108, "learning_rate": 5.3983528276398966e-05, "loss": 1.4496, "step": 19282 }, { "epoch": 0.8574096305188742, "grad_norm": 0.058663882315158844, "learning_rate": 5.391747443100231e-05, "loss": 1.4462, "step": 19284 }, { "epoch": 0.8574985549775466, "grad_norm": 0.05759890750050545, "learning_rate": 5.385145871870578e-05, "loss": 1.4473, "step": 19286 }, { "epoch": 0.8575874794362189, "grad_norm": 0.05909644067287445, "learning_rate": 5.37854811451528e-05, "loss": 1.4538, "step": 19288 }, { "epoch": 0.8576764038948913, "grad_norm": 0.0579351969063282, "learning_rate": 5.3719541715983145e-05, "loss": 1.454, "step": 19290 }, { "epoch": 0.8577653283535637, "grad_norm": 0.05818416178226471, "learning_rate": 5.36536404368338e-05, "loss": 1.4482, "step": 19292 }, { "epoch": 0.857854252812236, "grad_norm": 0.05801917612552643, "learning_rate": 5.358777731333836e-05, "loss": 1.4485, "step": 19294 }, { "epoch": 0.8579431772709084, "grad_norm": 0.05826589837670326, "learning_rate": 5.3521952351126946e-05, "loss": 1.4471, "step": 19296 }, { "epoch": 0.8580321017295808, "grad_norm": 0.05808262154459953, "learning_rate": 5.3456165555826445e-05, "loss": 1.4514, "step": 19298 }, { "epoch": 0.858121026188253, "grad_norm": 0.058341771364212036, "learning_rate": 5.339041693306096e-05, "loss": 1.4544, "step": 19300 }, { "epoch": 0.8582099506469254, "grad_norm": 0.05911700800061226, "learning_rate": 5.332470648845067e-05, "loss": 1.4494, "step": 19302 }, { "epoch": 0.8582988751055978, "grad_norm": 0.0577431358397007, "learning_rate": 5.3259034227613026e-05, "loss": 1.4517, "step": 19304 }, { "epoch": 0.8583877995642701, "grad_norm": 0.059968046844005585, "learning_rate": 5.319340015616175e-05, "loss": 1.4513, "step": 19306 }, { "epoch": 0.8584767240229425, "grad_norm": 0.05771856755018234, "learning_rate": 5.312780427970765e-05, "loss": 1.4496, "step": 19308 }, { "epoch": 0.8585656484816149, "grad_norm": 0.05939215421676636, "learning_rate": 5.306224660385822e-05, "loss": 1.4509, "step": 19310 }, { "epoch": 0.8586545729402872, "grad_norm": 0.05893797054886818, "learning_rate": 5.299672713421744e-05, "loss": 1.4462, "step": 19312 }, { "epoch": 0.8587434973989596, "grad_norm": 0.05804156884551048, "learning_rate": 5.2931245876386314e-05, "loss": 1.4448, "step": 19314 }, { "epoch": 0.858832421857632, "grad_norm": 0.058172598481178284, "learning_rate": 5.286580283596254e-05, "loss": 1.4499, "step": 19316 }, { "epoch": 0.8589213463163043, "grad_norm": 0.05792252719402313, "learning_rate": 5.280039801854036e-05, "loss": 1.4474, "step": 19318 }, { "epoch": 0.8590102707749766, "grad_norm": 0.05791470780968666, "learning_rate": 5.273503142971081e-05, "loss": 1.4542, "step": 19320 }, { "epoch": 0.859099195233649, "grad_norm": 0.05813904106616974, "learning_rate": 5.266970307506197e-05, "loss": 1.4486, "step": 19322 }, { "epoch": 0.8591881196923213, "grad_norm": 0.057610899209976196, "learning_rate": 5.260441296017815e-05, "loss": 1.4461, "step": 19324 }, { "epoch": 0.8592770441509937, "grad_norm": 0.05858998745679855, "learning_rate": 5.2539161090640664e-05, "loss": 1.4518, "step": 19326 }, { "epoch": 0.8593659686096661, "grad_norm": 0.05812988802790642, "learning_rate": 5.247394747202772e-05, "loss": 1.4471, "step": 19328 }, { "epoch": 0.8594548930683384, "grad_norm": 0.05788353830575943, "learning_rate": 5.2408772109913796e-05, "loss": 1.4433, "step": 19330 }, { "epoch": 0.8595438175270108, "grad_norm": 0.057957880198955536, "learning_rate": 5.2343635009870604e-05, "loss": 1.4528, "step": 19332 }, { "epoch": 0.8596327419856832, "grad_norm": 0.05941891297698021, "learning_rate": 5.22785361774662e-05, "loss": 1.456, "step": 19334 }, { "epoch": 0.8597216664443555, "grad_norm": 0.059053994715213776, "learning_rate": 5.2213475618265505e-05, "loss": 1.4444, "step": 19336 }, { "epoch": 0.8598105909030279, "grad_norm": 0.05831526964902878, "learning_rate": 5.214845333783036e-05, "loss": 1.4422, "step": 19338 }, { "epoch": 0.8598995153617003, "grad_norm": 0.05919569730758667, "learning_rate": 5.208346934171898e-05, "loss": 1.4492, "step": 19340 }, { "epoch": 0.8599884398203725, "grad_norm": 0.057435665279626846, "learning_rate": 5.2018523635486536e-05, "loss": 1.4433, "step": 19342 }, { "epoch": 0.8600773642790449, "grad_norm": 0.059063155204057693, "learning_rate": 5.1953616224684916e-05, "loss": 1.4567, "step": 19344 }, { "epoch": 0.8601662887377173, "grad_norm": 0.0575404167175293, "learning_rate": 5.188874711486263e-05, "loss": 1.4542, "step": 19346 }, { "epoch": 0.8602552131963896, "grad_norm": 0.05753824859857559, "learning_rate": 5.182391631156497e-05, "loss": 1.4458, "step": 19348 }, { "epoch": 0.860344137655062, "grad_norm": 0.05860580503940582, "learning_rate": 5.1759123820334096e-05, "loss": 1.4482, "step": 19350 }, { "epoch": 0.8604330621137344, "grad_norm": 0.057950492948293686, "learning_rate": 5.16943696467086e-05, "loss": 1.44, "step": 19352 }, { "epoch": 0.8605219865724067, "grad_norm": 0.0587628073990345, "learning_rate": 5.162965379622403e-05, "loss": 1.4523, "step": 19354 }, { "epoch": 0.8606109110310791, "grad_norm": 0.05854136124253273, "learning_rate": 5.156497627441242e-05, "loss": 1.4526, "step": 19356 }, { "epoch": 0.8606998354897515, "grad_norm": 0.05751117691397667, "learning_rate": 5.150033708680296e-05, "loss": 1.4459, "step": 19358 }, { "epoch": 0.8607887599484239, "grad_norm": 0.0575980618596077, "learning_rate": 5.1435736238921226e-05, "loss": 1.4479, "step": 19360 }, { "epoch": 0.8608776844070962, "grad_norm": 0.05835995450615883, "learning_rate": 5.137117373628936e-05, "loss": 1.4543, "step": 19362 }, { "epoch": 0.8609666088657685, "grad_norm": 0.057942815124988556, "learning_rate": 5.1306649584426654e-05, "loss": 1.4561, "step": 19364 }, { "epoch": 0.8610555333244408, "grad_norm": 0.05824165791273117, "learning_rate": 5.124216378884894e-05, "loss": 1.4495, "step": 19366 }, { "epoch": 0.8611444577831132, "grad_norm": 0.05746667832136154, "learning_rate": 5.117771635506857e-05, "loss": 1.4532, "step": 19368 }, { "epoch": 0.8612333822417856, "grad_norm": 0.05848689749836922, "learning_rate": 5.111330728859492e-05, "loss": 1.4543, "step": 19370 }, { "epoch": 0.861322306700458, "grad_norm": 0.057919543236494064, "learning_rate": 5.104893659493392e-05, "loss": 1.4482, "step": 19372 }, { "epoch": 0.8614112311591303, "grad_norm": 0.058501534163951874, "learning_rate": 5.0984604279588456e-05, "loss": 1.4445, "step": 19374 }, { "epoch": 0.8615001556178027, "grad_norm": 0.05916326865553856, "learning_rate": 5.092031034805761e-05, "loss": 1.4471, "step": 19376 }, { "epoch": 0.8615890800764751, "grad_norm": 0.05860411003232002, "learning_rate": 5.085605480583766e-05, "loss": 1.4482, "step": 19378 }, { "epoch": 0.8616780045351474, "grad_norm": 0.059459760785102844, "learning_rate": 5.0791837658421544e-05, "loss": 1.4528, "step": 19380 }, { "epoch": 0.8617669289938198, "grad_norm": 0.059411801397800446, "learning_rate": 5.072765891129877e-05, "loss": 1.4544, "step": 19382 }, { "epoch": 0.8618558534524922, "grad_norm": 0.05691593512892723, "learning_rate": 5.066351856995538e-05, "loss": 1.4445, "step": 19384 }, { "epoch": 0.8619447779111644, "grad_norm": 0.05798202380537987, "learning_rate": 5.0599416639874776e-05, "loss": 1.4533, "step": 19386 }, { "epoch": 0.8620337023698368, "grad_norm": 0.058786388486623764, "learning_rate": 5.053535312653651e-05, "loss": 1.4485, "step": 19388 }, { "epoch": 0.8621226268285092, "grad_norm": 0.05767536908388138, "learning_rate": 5.0471328035416876e-05, "loss": 1.4481, "step": 19390 }, { "epoch": 0.8622115512871815, "grad_norm": 0.05754457041621208, "learning_rate": 5.04073413719891e-05, "loss": 1.4473, "step": 19392 }, { "epoch": 0.8623004757458539, "grad_norm": 0.05914188176393509, "learning_rate": 5.034339314172309e-05, "loss": 1.4438, "step": 19394 }, { "epoch": 0.8623894002045263, "grad_norm": 0.058261413127183914, "learning_rate": 5.027948335008553e-05, "loss": 1.453, "step": 19396 }, { "epoch": 0.8624783246631986, "grad_norm": 0.05782920494675636, "learning_rate": 5.021561200253943e-05, "loss": 1.4516, "step": 19398 }, { "epoch": 0.862567249121871, "grad_norm": 0.05883567035198212, "learning_rate": 5.015177910454499e-05, "loss": 1.4472, "step": 19400 }, { "epoch": 0.8626561735805434, "grad_norm": 0.05863523483276367, "learning_rate": 5.0087984661559005e-05, "loss": 1.4518, "step": 19402 }, { "epoch": 0.8627450980392157, "grad_norm": 0.05786546319723129, "learning_rate": 5.0024228679034775e-05, "loss": 1.4483, "step": 19404 }, { "epoch": 0.8628340224978881, "grad_norm": 0.058120548725128174, "learning_rate": 4.996051116242228e-05, "loss": 1.4442, "step": 19406 }, { "epoch": 0.8629229469565604, "grad_norm": 0.05752819404006004, "learning_rate": 4.989683211716872e-05, "loss": 1.4516, "step": 19408 }, { "epoch": 0.8630118714152327, "grad_norm": 0.05808071419596672, "learning_rate": 4.983319154871741e-05, "loss": 1.4465, "step": 19410 }, { "epoch": 0.8631007958739051, "grad_norm": 0.05908680707216263, "learning_rate": 4.9769589462508756e-05, "loss": 1.449, "step": 19412 }, { "epoch": 0.8631897203325775, "grad_norm": 0.0582328736782074, "learning_rate": 4.9706025863979654e-05, "loss": 1.4444, "step": 19414 }, { "epoch": 0.8632786447912498, "grad_norm": 0.058734990656375885, "learning_rate": 4.96425007585638e-05, "loss": 1.4558, "step": 19416 }, { "epoch": 0.8633675692499222, "grad_norm": 0.05865876004099846, "learning_rate": 4.95790141516918e-05, "loss": 1.4477, "step": 19418 }, { "epoch": 0.8634564937085946, "grad_norm": 0.05872330814599991, "learning_rate": 4.9515566048790485e-05, "loss": 1.4483, "step": 19420 }, { "epoch": 0.8635454181672669, "grad_norm": 0.05882033705711365, "learning_rate": 4.945215645528378e-05, "loss": 1.446, "step": 19422 }, { "epoch": 0.8636343426259393, "grad_norm": 0.05818769335746765, "learning_rate": 4.938878537659236e-05, "loss": 1.4462, "step": 19424 }, { "epoch": 0.8637232670846117, "grad_norm": 0.05744488164782524, "learning_rate": 4.932545281813322e-05, "loss": 1.448, "step": 19426 }, { "epoch": 0.8638121915432839, "grad_norm": 0.0585269033908844, "learning_rate": 4.9262158785320474e-05, "loss": 1.4513, "step": 19428 }, { "epoch": 0.8639011160019563, "grad_norm": 0.057957399636507034, "learning_rate": 4.91989032835648e-05, "loss": 1.4463, "step": 19430 }, { "epoch": 0.8639900404606287, "grad_norm": 0.058509595692157745, "learning_rate": 4.913568631827342e-05, "loss": 1.451, "step": 19432 }, { "epoch": 0.864078964919301, "grad_norm": 0.05889742821455002, "learning_rate": 4.9072507894850404e-05, "loss": 1.4461, "step": 19434 }, { "epoch": 0.8641678893779734, "grad_norm": 0.05817759409546852, "learning_rate": 4.9009368018696766e-05, "loss": 1.4507, "step": 19436 }, { "epoch": 0.8642568138366458, "grad_norm": 0.05786820873618126, "learning_rate": 4.8946266695209626e-05, "loss": 1.4559, "step": 19438 }, { "epoch": 0.8643457382953181, "grad_norm": 0.05808315426111221, "learning_rate": 4.8883203929783514e-05, "loss": 1.4505, "step": 19440 }, { "epoch": 0.8644346627539905, "grad_norm": 0.058381590992212296, "learning_rate": 4.8820179727808886e-05, "loss": 1.444, "step": 19442 }, { "epoch": 0.8645235872126629, "grad_norm": 0.05841744318604469, "learning_rate": 4.875719409467377e-05, "loss": 1.4468, "step": 19444 }, { "epoch": 0.8646125116713352, "grad_norm": 0.058670420199632645, "learning_rate": 4.869424703576231e-05, "loss": 1.4511, "step": 19446 }, { "epoch": 0.8647014361300076, "grad_norm": 0.05914199724793434, "learning_rate": 4.8631338556455354e-05, "loss": 1.4477, "step": 19448 }, { "epoch": 0.8647903605886799, "grad_norm": 0.05767590180039406, "learning_rate": 4.856846866213066e-05, "loss": 1.4442, "step": 19450 }, { "epoch": 0.8648792850473522, "grad_norm": 0.05876242741942406, "learning_rate": 4.8505637358162765e-05, "loss": 1.4517, "step": 19452 }, { "epoch": 0.8649682095060246, "grad_norm": 0.0582275465130806, "learning_rate": 4.8442844649922604e-05, "loss": 1.4491, "step": 19454 }, { "epoch": 0.865057133964697, "grad_norm": 0.058131519705057144, "learning_rate": 4.8380090542777974e-05, "loss": 1.4483, "step": 19456 }, { "epoch": 0.8651460584233693, "grad_norm": 0.05899100378155708, "learning_rate": 4.831737504209355e-05, "loss": 1.4476, "step": 19458 }, { "epoch": 0.8652349828820417, "grad_norm": 0.05828023701906204, "learning_rate": 4.8254698153230305e-05, "loss": 1.4483, "step": 19460 }, { "epoch": 0.8653239073407141, "grad_norm": 0.05877317860722542, "learning_rate": 4.8192059881546345e-05, "loss": 1.4492, "step": 19462 }, { "epoch": 0.8654128317993864, "grad_norm": 0.05795292928814888, "learning_rate": 4.812946023239595e-05, "loss": 1.448, "step": 19464 }, { "epoch": 0.8655017562580588, "grad_norm": 0.05792052298784256, "learning_rate": 4.806689921113083e-05, "loss": 1.4499, "step": 19466 }, { "epoch": 0.8655906807167312, "grad_norm": 0.05851710960268974, "learning_rate": 4.800437682309877e-05, "loss": 1.4556, "step": 19468 }, { "epoch": 0.8656796051754035, "grad_norm": 0.057633742690086365, "learning_rate": 4.794189307364433e-05, "loss": 1.4414, "step": 19470 }, { "epoch": 0.8657685296340758, "grad_norm": 0.058146603405475616, "learning_rate": 4.787944796810906e-05, "loss": 1.4447, "step": 19472 }, { "epoch": 0.8658574540927482, "grad_norm": 0.058894164860248566, "learning_rate": 4.7817041511831136e-05, "loss": 1.4552, "step": 19474 }, { "epoch": 0.8659463785514205, "grad_norm": 0.0587955042719841, "learning_rate": 4.775467371014508e-05, "loss": 1.4472, "step": 19476 }, { "epoch": 0.8660353030100929, "grad_norm": 0.05853498354554176, "learning_rate": 4.769234456838245e-05, "loss": 1.4491, "step": 19478 }, { "epoch": 0.8661242274687653, "grad_norm": 0.05728617683053017, "learning_rate": 4.763005409187154e-05, "loss": 1.4472, "step": 19480 }, { "epoch": 0.8662131519274376, "grad_norm": 0.058214664459228516, "learning_rate": 4.7567802285937155e-05, "loss": 1.4534, "step": 19482 }, { "epoch": 0.86630207638611, "grad_norm": 0.05796115845441818, "learning_rate": 4.750558915590081e-05, "loss": 1.4468, "step": 19484 }, { "epoch": 0.8663910008447824, "grad_norm": 0.05892049893736839, "learning_rate": 4.744341470708075e-05, "loss": 1.4474, "step": 19486 }, { "epoch": 0.8664799253034547, "grad_norm": 0.05804290249943733, "learning_rate": 4.738127894479205e-05, "loss": 1.4496, "step": 19488 }, { "epoch": 0.8665688497621271, "grad_norm": 0.05845332145690918, "learning_rate": 4.731918187434625e-05, "loss": 1.4493, "step": 19490 }, { "epoch": 0.8666577742207995, "grad_norm": 0.058075401932001114, "learning_rate": 4.725712350105149e-05, "loss": 1.4496, "step": 19492 }, { "epoch": 0.8667466986794717, "grad_norm": 0.057721007615327835, "learning_rate": 4.719510383021319e-05, "loss": 1.4517, "step": 19494 }, { "epoch": 0.8668356231381441, "grad_norm": 0.05866990238428116, "learning_rate": 4.7133122867132725e-05, "loss": 1.4498, "step": 19496 }, { "epoch": 0.8669245475968165, "grad_norm": 0.05763349309563637, "learning_rate": 4.70711806171088e-05, "loss": 1.4501, "step": 19498 }, { "epoch": 0.8670134720554888, "grad_norm": 0.058722514659166336, "learning_rate": 4.7009277085436176e-05, "loss": 1.4518, "step": 19500 }, { "epoch": 0.8670134720554888, "eval_loss": 1.4349604845046997, "eval_runtime": 12.4467, "eval_samples_per_second": 555.169, "eval_steps_per_second": 69.416, "step": 19500 }, { "epoch": 0.8671023965141612, "grad_norm": 0.058399032801389694, "learning_rate": 4.69474122774069e-05, "loss": 1.4443, "step": 19502 }, { "epoch": 0.8671913209728336, "grad_norm": 0.057792019098997116, "learning_rate": 4.6885586198309406e-05, "loss": 1.4442, "step": 19504 }, { "epoch": 0.867280245431506, "grad_norm": 0.05912178382277489, "learning_rate": 4.682379885342869e-05, "loss": 1.4518, "step": 19506 }, { "epoch": 0.8673691698901783, "grad_norm": 0.05834237486124039, "learning_rate": 4.676205024804675e-05, "loss": 1.4467, "step": 19508 }, { "epoch": 0.8674580943488507, "grad_norm": 0.057614706456661224, "learning_rate": 4.6700340387442255e-05, "loss": 1.4508, "step": 19510 }, { "epoch": 0.867547018807523, "grad_norm": 0.05703110247850418, "learning_rate": 4.663866927689025e-05, "loss": 1.4479, "step": 19512 }, { "epoch": 0.8676359432661954, "grad_norm": 0.05827274173498154, "learning_rate": 4.657703692166248e-05, "loss": 1.4489, "step": 19514 }, { "epoch": 0.8677248677248677, "grad_norm": 0.058673057705163956, "learning_rate": 4.6515443327028006e-05, "loss": 1.4462, "step": 19516 }, { "epoch": 0.86781379218354, "grad_norm": 0.057610198855400085, "learning_rate": 4.6453888498251715e-05, "loss": 1.45, "step": 19518 }, { "epoch": 0.8679027166422124, "grad_norm": 0.05858117714524269, "learning_rate": 4.639237244059591e-05, "loss": 1.4429, "step": 19520 }, { "epoch": 0.8679916411008848, "grad_norm": 0.057463061064481735, "learning_rate": 4.633089515931899e-05, "loss": 1.4498, "step": 19522 }, { "epoch": 0.8680805655595571, "grad_norm": 0.05761371925473213, "learning_rate": 4.6269456659676355e-05, "loss": 1.4479, "step": 19524 }, { "epoch": 0.8681694900182295, "grad_norm": 0.05824030563235283, "learning_rate": 4.6208056946920204e-05, "loss": 1.4496, "step": 19526 }, { "epoch": 0.8682584144769019, "grad_norm": 0.05864088982343674, "learning_rate": 4.6146696026299064e-05, "loss": 1.4461, "step": 19528 }, { "epoch": 0.8683473389355743, "grad_norm": 0.057611025869846344, "learning_rate": 4.608537390305845e-05, "loss": 1.4491, "step": 19530 }, { "epoch": 0.8684362633942466, "grad_norm": 0.05795488879084587, "learning_rate": 4.6024090582440446e-05, "loss": 1.446, "step": 19532 }, { "epoch": 0.868525187852919, "grad_norm": 0.05817627161741257, "learning_rate": 4.5962846069683705e-05, "loss": 1.4447, "step": 19534 }, { "epoch": 0.8686141123115914, "grad_norm": 0.058427758514881134, "learning_rate": 4.590164037002381e-05, "loss": 1.4526, "step": 19536 }, { "epoch": 0.8687030367702636, "grad_norm": 0.05801371484994888, "learning_rate": 4.5840473488692856e-05, "loss": 1.4533, "step": 19538 }, { "epoch": 0.868791961228936, "grad_norm": 0.058494169265031815, "learning_rate": 4.57793454309196e-05, "loss": 1.447, "step": 19540 }, { "epoch": 0.8688808856876084, "grad_norm": 0.058679576963186264, "learning_rate": 4.571825620192965e-05, "loss": 1.4489, "step": 19542 }, { "epoch": 0.8689698101462807, "grad_norm": 0.05957064405083656, "learning_rate": 4.565720580694505e-05, "loss": 1.446, "step": 19544 }, { "epoch": 0.8690587346049531, "grad_norm": 0.05911765247583389, "learning_rate": 4.559619425118472e-05, "loss": 1.4461, "step": 19546 }, { "epoch": 0.8691476590636255, "grad_norm": 0.05717119202017784, "learning_rate": 4.553522153986428e-05, "loss": 1.4417, "step": 19548 }, { "epoch": 0.8692365835222978, "grad_norm": 0.057892777025699615, "learning_rate": 4.547428767819578e-05, "loss": 1.4474, "step": 19550 }, { "epoch": 0.8693255079809702, "grad_norm": 0.05849381908774376, "learning_rate": 4.541339267138816e-05, "loss": 1.4522, "step": 19552 }, { "epoch": 0.8694144324396426, "grad_norm": 0.05854662507772446, "learning_rate": 4.535253652464716e-05, "loss": 1.443, "step": 19554 }, { "epoch": 0.8695033568983149, "grad_norm": 0.05722244083881378, "learning_rate": 4.529171924317482e-05, "loss": 1.4438, "step": 19556 }, { "epoch": 0.8695922813569872, "grad_norm": 0.05876478552818298, "learning_rate": 4.523094083217011e-05, "loss": 1.4451, "step": 19558 }, { "epoch": 0.8696812058156596, "grad_norm": 0.058361224830150604, "learning_rate": 4.5170201296828804e-05, "loss": 1.4514, "step": 19560 }, { "epoch": 0.8697701302743319, "grad_norm": 0.0587281808257103, "learning_rate": 4.510950064234298e-05, "loss": 1.4467, "step": 19562 }, { "epoch": 0.8698590547330043, "grad_norm": 0.05788736417889595, "learning_rate": 4.504883887390165e-05, "loss": 1.4514, "step": 19564 }, { "epoch": 0.8699479791916767, "grad_norm": 0.05839037150144577, "learning_rate": 4.498821599669051e-05, "loss": 1.4457, "step": 19566 }, { "epoch": 0.870036903650349, "grad_norm": 0.059564560651779175, "learning_rate": 4.49276320158919e-05, "loss": 1.4439, "step": 19568 }, { "epoch": 0.8701258281090214, "grad_norm": 0.05748181417584419, "learning_rate": 4.486708693668473e-05, "loss": 1.4528, "step": 19570 }, { "epoch": 0.8702147525676938, "grad_norm": 0.058979492634534836, "learning_rate": 4.480658076424454e-05, "loss": 1.443, "step": 19572 }, { "epoch": 0.8703036770263661, "grad_norm": 0.058040812611579895, "learning_rate": 4.474611350374397e-05, "loss": 1.4517, "step": 19574 }, { "epoch": 0.8703926014850385, "grad_norm": 0.05852758511900902, "learning_rate": 4.468568516035182e-05, "loss": 1.4503, "step": 19576 }, { "epoch": 0.8704815259437109, "grad_norm": 0.05870926007628441, "learning_rate": 4.462529573923374e-05, "loss": 1.4487, "step": 19578 }, { "epoch": 0.8705704504023831, "grad_norm": 0.05752955749630928, "learning_rate": 4.4564945245552155e-05, "loss": 1.4509, "step": 19580 }, { "epoch": 0.8706593748610555, "grad_norm": 0.05794141814112663, "learning_rate": 4.450463368446617e-05, "loss": 1.4525, "step": 19582 }, { "epoch": 0.8707482993197279, "grad_norm": 0.05781754106283188, "learning_rate": 4.4444361061131314e-05, "loss": 1.4514, "step": 19584 }, { "epoch": 0.8708372237784002, "grad_norm": 0.05853389576077461, "learning_rate": 4.438412738070008e-05, "loss": 1.4528, "step": 19586 }, { "epoch": 0.8709261482370726, "grad_norm": 0.05923375114798546, "learning_rate": 4.432393264832141e-05, "loss": 1.4523, "step": 19588 }, { "epoch": 0.871015072695745, "grad_norm": 0.058429352939128876, "learning_rate": 4.426377686914118e-05, "loss": 1.4478, "step": 19590 }, { "epoch": 0.8711039971544173, "grad_norm": 0.05943598598241806, "learning_rate": 4.420366004830173e-05, "loss": 1.4575, "step": 19592 }, { "epoch": 0.8711929216130897, "grad_norm": 0.05901796370744705, "learning_rate": 4.414358219094178e-05, "loss": 1.4495, "step": 19594 }, { "epoch": 0.8712818460717621, "grad_norm": 0.05831238627433777, "learning_rate": 4.408354330219755e-05, "loss": 1.4525, "step": 19596 }, { "epoch": 0.8713707705304344, "grad_norm": 0.059671416878700256, "learning_rate": 4.402354338720122e-05, "loss": 1.4544, "step": 19598 }, { "epoch": 0.8714596949891068, "grad_norm": 0.05828830599784851, "learning_rate": 4.3963582451081675e-05, "loss": 1.4521, "step": 19600 }, { "epoch": 0.8715486194477791, "grad_norm": 0.058360446244478226, "learning_rate": 4.3903660498964824e-05, "loss": 1.451, "step": 19602 }, { "epoch": 0.8716375439064514, "grad_norm": 0.0570736788213253, "learning_rate": 4.384377753597302e-05, "loss": 1.4486, "step": 19604 }, { "epoch": 0.8717264683651238, "grad_norm": 0.05778930336236954, "learning_rate": 4.3783933567225374e-05, "loss": 1.4499, "step": 19606 }, { "epoch": 0.8718153928237962, "grad_norm": 0.0583699494600296, "learning_rate": 4.372412859783753e-05, "loss": 1.4499, "step": 19608 }, { "epoch": 0.8719043172824685, "grad_norm": 0.05747527629137039, "learning_rate": 4.3664362632921894e-05, "loss": 1.4511, "step": 19610 }, { "epoch": 0.8719932417411409, "grad_norm": 0.058236028999090195, "learning_rate": 4.3604635677587653e-05, "loss": 1.4475, "step": 19612 }, { "epoch": 0.8720821661998133, "grad_norm": 0.057254135608673096, "learning_rate": 4.3544947736940286e-05, "loss": 1.4533, "step": 19614 }, { "epoch": 0.8721710906584856, "grad_norm": 0.05867518112063408, "learning_rate": 4.348529881608237e-05, "loss": 1.446, "step": 19616 }, { "epoch": 0.872260015117158, "grad_norm": 0.057885196059942245, "learning_rate": 4.3425688920113006e-05, "loss": 1.4482, "step": 19618 }, { "epoch": 0.8723489395758304, "grad_norm": 0.058469321578741074, "learning_rate": 4.3366118054127656e-05, "loss": 1.4529, "step": 19620 }, { "epoch": 0.8724378640345027, "grad_norm": 0.058005768805742264, "learning_rate": 4.330658622321892e-05, "loss": 1.4456, "step": 19622 }, { "epoch": 0.872526788493175, "grad_norm": 0.05836115777492523, "learning_rate": 4.324709343247585e-05, "loss": 1.4495, "step": 19624 }, { "epoch": 0.8726157129518474, "grad_norm": 0.058056388050317764, "learning_rate": 4.318763968698397e-05, "loss": 1.4488, "step": 19626 }, { "epoch": 0.8727046374105197, "grad_norm": 0.05839812010526657, "learning_rate": 4.312822499182584e-05, "loss": 1.4443, "step": 19628 }, { "epoch": 0.8727935618691921, "grad_norm": 0.05871996283531189, "learning_rate": 4.306884935208033e-05, "loss": 1.4433, "step": 19630 }, { "epoch": 0.8728824863278645, "grad_norm": 0.05736853554844856, "learning_rate": 4.300951277282322e-05, "loss": 1.4489, "step": 19632 }, { "epoch": 0.8729714107865368, "grad_norm": 0.058451537042856216, "learning_rate": 4.29502152591269e-05, "loss": 1.4537, "step": 19634 }, { "epoch": 0.8730603352452092, "grad_norm": 0.05809301510453224, "learning_rate": 4.289095681606031e-05, "loss": 1.4495, "step": 19636 }, { "epoch": 0.8731492597038816, "grad_norm": 0.0578899048268795, "learning_rate": 4.283173744868912e-05, "loss": 1.4448, "step": 19638 }, { "epoch": 0.8732381841625539, "grad_norm": 0.0587192140519619, "learning_rate": 4.277255716207573e-05, "loss": 1.4503, "step": 19640 }, { "epoch": 0.8733271086212263, "grad_norm": 0.05794324725866318, "learning_rate": 4.2713415961279034e-05, "loss": 1.4495, "step": 19642 }, { "epoch": 0.8734160330798987, "grad_norm": 0.057733118534088135, "learning_rate": 4.2654313851354765e-05, "loss": 1.448, "step": 19644 }, { "epoch": 0.8735049575385709, "grad_norm": 0.05751529708504677, "learning_rate": 4.259525083735527e-05, "loss": 1.4532, "step": 19646 }, { "epoch": 0.8735938819972433, "grad_norm": 0.058284077793359756, "learning_rate": 4.2536226924329344e-05, "loss": 1.4469, "step": 19648 }, { "epoch": 0.8736828064559157, "grad_norm": 0.05852330103516579, "learning_rate": 4.247724211732285e-05, "loss": 1.4482, "step": 19650 }, { "epoch": 0.873771730914588, "grad_norm": 0.05764244496822357, "learning_rate": 4.2418296421377786e-05, "loss": 1.4503, "step": 19652 }, { "epoch": 0.8738606553732604, "grad_norm": 0.05952933803200722, "learning_rate": 4.235938984153326e-05, "loss": 1.448, "step": 19654 }, { "epoch": 0.8739495798319328, "grad_norm": 0.058142997324466705, "learning_rate": 4.230052238282495e-05, "loss": 1.4516, "step": 19656 }, { "epoch": 0.8740385042906051, "grad_norm": 0.05889532342553139, "learning_rate": 4.224169405028494e-05, "loss": 1.4509, "step": 19658 }, { "epoch": 0.8741274287492775, "grad_norm": 0.058623868972063065, "learning_rate": 4.218290484894216e-05, "loss": 1.4537, "step": 19660 }, { "epoch": 0.8742163532079499, "grad_norm": 0.05701803043484688, "learning_rate": 4.2124154783822314e-05, "loss": 1.4467, "step": 19662 }, { "epoch": 0.8743052776666222, "grad_norm": 0.05911652743816376, "learning_rate": 4.2065443859947425e-05, "loss": 1.4449, "step": 19664 }, { "epoch": 0.8743942021252946, "grad_norm": 0.05890338122844696, "learning_rate": 4.200677208233644e-05, "loss": 1.4566, "step": 19666 }, { "epoch": 0.8744831265839669, "grad_norm": 0.05840785801410675, "learning_rate": 4.194813945600495e-05, "loss": 1.4515, "step": 19668 }, { "epoch": 0.8745720510426392, "grad_norm": 0.059223078191280365, "learning_rate": 4.188954598596506e-05, "loss": 1.4478, "step": 19670 }, { "epoch": 0.8746609755013116, "grad_norm": 0.05858706682920456, "learning_rate": 4.183099167722554e-05, "loss": 1.4449, "step": 19672 }, { "epoch": 0.874749899959984, "grad_norm": 0.057242508977651596, "learning_rate": 4.177247653479194e-05, "loss": 1.4504, "step": 19674 }, { "epoch": 0.8748388244186563, "grad_norm": 0.058398645371198654, "learning_rate": 4.171400056366648e-05, "loss": 1.4501, "step": 19676 }, { "epoch": 0.8749277488773287, "grad_norm": 0.05813181772828102, "learning_rate": 4.165556376884788e-05, "loss": 1.4488, "step": 19678 }, { "epoch": 0.8750166733360011, "grad_norm": 0.05748685821890831, "learning_rate": 4.1597166155331323e-05, "loss": 1.4515, "step": 19680 }, { "epoch": 0.8751055977946735, "grad_norm": 0.05673710256814957, "learning_rate": 4.153880772810936e-05, "loss": 1.4491, "step": 19682 }, { "epoch": 0.8751945222533458, "grad_norm": 0.05813974887132645, "learning_rate": 4.1480488492170396e-05, "loss": 1.4472, "step": 19684 }, { "epoch": 0.8752834467120182, "grad_norm": 0.05882754176855087, "learning_rate": 4.142220845249989e-05, "loss": 1.4506, "step": 19686 }, { "epoch": 0.8753723711706904, "grad_norm": 0.05838124826550484, "learning_rate": 4.13639676140799e-05, "loss": 1.4483, "step": 19688 }, { "epoch": 0.8754612956293628, "grad_norm": 0.05817935988306999, "learning_rate": 4.130576598188907e-05, "loss": 1.4489, "step": 19690 }, { "epoch": 0.8755502200880352, "grad_norm": 0.05672944337129593, "learning_rate": 4.124760356090285e-05, "loss": 1.4481, "step": 19692 }, { "epoch": 0.8756391445467075, "grad_norm": 0.05868963524699211, "learning_rate": 4.1189480356093045e-05, "loss": 1.4463, "step": 19694 }, { "epoch": 0.8757280690053799, "grad_norm": 0.058534860610961914, "learning_rate": 4.113139637242835e-05, "loss": 1.4481, "step": 19696 }, { "epoch": 0.8758169934640523, "grad_norm": 0.05786217749118805, "learning_rate": 4.107335161487419e-05, "loss": 1.4436, "step": 19698 }, { "epoch": 0.8759059179227247, "grad_norm": 0.057530727237463, "learning_rate": 4.101534608839236e-05, "loss": 1.4428, "step": 19700 }, { "epoch": 0.875994842381397, "grad_norm": 0.05776270851492882, "learning_rate": 4.095737979794123e-05, "loss": 1.4538, "step": 19702 }, { "epoch": 0.8760837668400694, "grad_norm": 0.05870446935296059, "learning_rate": 4.0899452748476394e-05, "loss": 1.4482, "step": 19704 }, { "epoch": 0.8761726912987418, "grad_norm": 0.05895115062594414, "learning_rate": 4.08415649449495e-05, "loss": 1.4544, "step": 19706 }, { "epoch": 0.8762616157574141, "grad_norm": 0.05859552323818207, "learning_rate": 4.0783716392309047e-05, "loss": 1.4508, "step": 19708 }, { "epoch": 0.8763505402160864, "grad_norm": 0.06085187941789627, "learning_rate": 4.072590709550017e-05, "loss": 1.4512, "step": 19710 }, { "epoch": 0.8764394646747588, "grad_norm": 0.057687316089868546, "learning_rate": 4.066813705946476e-05, "loss": 1.4437, "step": 19712 }, { "epoch": 0.8765283891334311, "grad_norm": 0.05841357260942459, "learning_rate": 4.061040628914125e-05, "loss": 1.449, "step": 19714 }, { "epoch": 0.8766173135921035, "grad_norm": 0.058153048157691956, "learning_rate": 4.05527147894646e-05, "loss": 1.4421, "step": 19716 }, { "epoch": 0.8767062380507759, "grad_norm": 0.058645881712436676, "learning_rate": 4.049506256536656e-05, "loss": 1.454, "step": 19718 }, { "epoch": 0.8767951625094482, "grad_norm": 0.057623352855443954, "learning_rate": 4.04374496217757e-05, "loss": 1.4492, "step": 19720 }, { "epoch": 0.8768840869681206, "grad_norm": 0.059648510068655014, "learning_rate": 4.0379875963616754e-05, "loss": 1.4486, "step": 19722 }, { "epoch": 0.876973011426793, "grad_norm": 0.05818026140332222, "learning_rate": 4.03223415958115e-05, "loss": 1.4503, "step": 19724 }, { "epoch": 0.8770619358854653, "grad_norm": 0.05832851305603981, "learning_rate": 4.0264846523278334e-05, "loss": 1.4549, "step": 19726 }, { "epoch": 0.8771508603441377, "grad_norm": 0.05934127792716026, "learning_rate": 4.020739075093194e-05, "loss": 1.449, "step": 19728 }, { "epoch": 0.8772397848028101, "grad_norm": 0.05757347121834755, "learning_rate": 4.0149974283684055e-05, "loss": 1.4422, "step": 19730 }, { "epoch": 0.8773287092614823, "grad_norm": 0.057523492723703384, "learning_rate": 4.009259712644292e-05, "loss": 1.4492, "step": 19732 }, { "epoch": 0.8774176337201547, "grad_norm": 0.0594845674932003, "learning_rate": 4.0035259284113276e-05, "loss": 1.4449, "step": 19734 }, { "epoch": 0.8775065581788271, "grad_norm": 0.05800360441207886, "learning_rate": 3.997796076159671e-05, "loss": 1.4463, "step": 19736 }, { "epoch": 0.8775954826374994, "grad_norm": 0.05898301303386688, "learning_rate": 3.992070156379124e-05, "loss": 1.4462, "step": 19738 }, { "epoch": 0.8776844070961718, "grad_norm": 0.05766226351261139, "learning_rate": 3.986348169559173e-05, "loss": 1.4474, "step": 19740 }, { "epoch": 0.8777733315548442, "grad_norm": 0.05817679688334465, "learning_rate": 3.98063011618896e-05, "loss": 1.4465, "step": 19742 }, { "epoch": 0.8778622560135165, "grad_norm": 0.058511506766080856, "learning_rate": 3.9749159967572834e-05, "loss": 1.4514, "step": 19744 }, { "epoch": 0.8779511804721889, "grad_norm": 0.058822281658649445, "learning_rate": 3.9692058117526076e-05, "loss": 1.4508, "step": 19746 }, { "epoch": 0.8780401049308613, "grad_norm": 0.058104876428842545, "learning_rate": 3.9634995616630806e-05, "loss": 1.4482, "step": 19748 }, { "epoch": 0.8781290293895336, "grad_norm": 0.05765243247151375, "learning_rate": 3.95779724697648e-05, "loss": 1.451, "step": 19750 }, { "epoch": 0.878217953848206, "grad_norm": 0.058590538799762726, "learning_rate": 3.952098868180271e-05, "loss": 1.4445, "step": 19752 }, { "epoch": 0.8783068783068783, "grad_norm": 0.05874716117978096, "learning_rate": 3.946404425761585e-05, "loss": 1.4548, "step": 19754 }, { "epoch": 0.8783958027655506, "grad_norm": 0.05839841812849045, "learning_rate": 3.940713920207195e-05, "loss": 1.4452, "step": 19756 }, { "epoch": 0.878484727224223, "grad_norm": 0.05867445841431618, "learning_rate": 3.9350273520035615e-05, "loss": 1.4459, "step": 19758 }, { "epoch": 0.8785736516828954, "grad_norm": 0.0578501895070076, "learning_rate": 3.929344721636774e-05, "loss": 1.4534, "step": 19760 }, { "epoch": 0.8786625761415677, "grad_norm": 0.05831950530409813, "learning_rate": 3.9236660295926485e-05, "loss": 1.4527, "step": 19762 }, { "epoch": 0.8787515006002401, "grad_norm": 0.05866897478699684, "learning_rate": 3.9179912763565975e-05, "loss": 1.4518, "step": 19764 }, { "epoch": 0.8788404250589125, "grad_norm": 0.059070635586977005, "learning_rate": 3.912320462413721e-05, "loss": 1.4476, "step": 19766 }, { "epoch": 0.8789293495175848, "grad_norm": 0.05861429125070572, "learning_rate": 3.906653588248793e-05, "loss": 1.4478, "step": 19768 }, { "epoch": 0.8790182739762572, "grad_norm": 0.057909827679395676, "learning_rate": 3.9009906543462536e-05, "loss": 1.4473, "step": 19770 }, { "epoch": 0.8791071984349296, "grad_norm": 0.057409241795539856, "learning_rate": 3.895331661190177e-05, "loss": 1.4447, "step": 19772 }, { "epoch": 0.8791961228936019, "grad_norm": 0.05790772661566734, "learning_rate": 3.8896766092643257e-05, "loss": 1.4461, "step": 19774 }, { "epoch": 0.8792850473522742, "grad_norm": 0.05888407304883003, "learning_rate": 3.884025499052124e-05, "loss": 1.446, "step": 19776 }, { "epoch": 0.8793739718109466, "grad_norm": 0.05903869867324829, "learning_rate": 3.878378331036647e-05, "loss": 1.4504, "step": 19778 }, { "epoch": 0.8794628962696189, "grad_norm": 0.057740140706300735, "learning_rate": 3.8727351057006365e-05, "loss": 1.447, "step": 19780 }, { "epoch": 0.8795518207282913, "grad_norm": 0.059584252536296844, "learning_rate": 3.867095823526506e-05, "loss": 1.4504, "step": 19782 }, { "epoch": 0.8796407451869637, "grad_norm": 0.05790328234434128, "learning_rate": 3.8614604849963375e-05, "loss": 1.4489, "step": 19784 }, { "epoch": 0.879729669645636, "grad_norm": 0.058795176446437836, "learning_rate": 3.85582909059185e-05, "loss": 1.4462, "step": 19786 }, { "epoch": 0.8798185941043084, "grad_norm": 0.057657308876514435, "learning_rate": 3.850201640794426e-05, "loss": 1.4487, "step": 19788 }, { "epoch": 0.8799075185629808, "grad_norm": 0.0583043098449707, "learning_rate": 3.8445781360851585e-05, "loss": 1.4394, "step": 19790 }, { "epoch": 0.8799964430216531, "grad_norm": 0.05816715955734253, "learning_rate": 3.838958576944751e-05, "loss": 1.4454, "step": 19792 }, { "epoch": 0.8800853674803255, "grad_norm": 0.05845462530851364, "learning_rate": 3.833342963853587e-05, "loss": 1.4484, "step": 19794 }, { "epoch": 0.8801742919389978, "grad_norm": 0.05977516621351242, "learning_rate": 3.8277312972917086e-05, "loss": 1.4492, "step": 19796 }, { "epoch": 0.8802632163976701, "grad_norm": 0.058808039873838425, "learning_rate": 3.8221235777388396e-05, "loss": 1.4487, "step": 19798 }, { "epoch": 0.8803521408563425, "grad_norm": 0.057808782905340195, "learning_rate": 3.816519805674351e-05, "loss": 1.4443, "step": 19800 }, { "epoch": 0.8804410653150149, "grad_norm": 0.05733674019575119, "learning_rate": 3.8109199815772656e-05, "loss": 1.4458, "step": 19802 }, { "epoch": 0.8805299897736872, "grad_norm": 0.05880142003297806, "learning_rate": 3.8053241059262886e-05, "loss": 1.4473, "step": 19804 }, { "epoch": 0.8806189142323596, "grad_norm": 0.0574544295668602, "learning_rate": 3.799732179199788e-05, "loss": 1.4467, "step": 19806 }, { "epoch": 0.880707838691032, "grad_norm": 0.05944616347551346, "learning_rate": 3.794144201875782e-05, "loss": 1.4481, "step": 19808 }, { "epoch": 0.8807967631497043, "grad_norm": 0.058573972433805466, "learning_rate": 3.788560174431932e-05, "loss": 1.4517, "step": 19810 }, { "epoch": 0.8808856876083767, "grad_norm": 0.05748533084988594, "learning_rate": 3.782980097345623e-05, "loss": 1.4506, "step": 19812 }, { "epoch": 0.8809746120670491, "grad_norm": 0.05795510113239288, "learning_rate": 3.77740397109384e-05, "loss": 1.4546, "step": 19814 }, { "epoch": 0.8810635365257214, "grad_norm": 0.057292576879262924, "learning_rate": 3.771831796153269e-05, "loss": 1.4429, "step": 19816 }, { "epoch": 0.8811524609843937, "grad_norm": 0.05805271118879318, "learning_rate": 3.7662635730002284e-05, "loss": 1.449, "step": 19818 }, { "epoch": 0.8812413854430661, "grad_norm": 0.05669526755809784, "learning_rate": 3.760699302110726e-05, "loss": 1.443, "step": 19820 }, { "epoch": 0.8813303099017384, "grad_norm": 0.05827340483665466, "learning_rate": 3.755138983960421e-05, "loss": 1.4438, "step": 19822 }, { "epoch": 0.8814192343604108, "grad_norm": 0.05783102661371231, "learning_rate": 3.7495826190246275e-05, "loss": 1.4488, "step": 19824 }, { "epoch": 0.8815081588190832, "grad_norm": 0.05988376960158348, "learning_rate": 3.744030207778326e-05, "loss": 1.4502, "step": 19826 }, { "epoch": 0.8815970832777555, "grad_norm": 0.057520363479852676, "learning_rate": 3.738481750696176e-05, "loss": 1.4514, "step": 19828 }, { "epoch": 0.8816860077364279, "grad_norm": 0.057553213089704514, "learning_rate": 3.732937248252472e-05, "loss": 1.4461, "step": 19830 }, { "epoch": 0.8817749321951003, "grad_norm": 0.058701615780591965, "learning_rate": 3.7273967009211706e-05, "loss": 1.4476, "step": 19832 }, { "epoch": 0.8818638566537726, "grad_norm": 0.0582745298743248, "learning_rate": 3.721860109175934e-05, "loss": 1.4501, "step": 19834 }, { "epoch": 0.881952781112445, "grad_norm": 0.057957906275987625, "learning_rate": 3.716327473490022e-05, "loss": 1.4523, "step": 19836 }, { "epoch": 0.8820417055711174, "grad_norm": 0.057991642504930496, "learning_rate": 3.710798794336418e-05, "loss": 1.4489, "step": 19838 }, { "epoch": 0.8821306300297896, "grad_norm": 0.05811452493071556, "learning_rate": 3.705274072187709e-05, "loss": 1.4473, "step": 19840 }, { "epoch": 0.882219554488462, "grad_norm": 0.058310460299253464, "learning_rate": 3.6997533075161905e-05, "loss": 1.4449, "step": 19842 }, { "epoch": 0.8823084789471344, "grad_norm": 0.05879312753677368, "learning_rate": 3.694236500793802e-05, "loss": 1.4473, "step": 19844 }, { "epoch": 0.8823974034058067, "grad_norm": 0.05760977044701576, "learning_rate": 3.6887236524921374e-05, "loss": 1.4455, "step": 19846 }, { "epoch": 0.8824863278644791, "grad_norm": 0.057671599090099335, "learning_rate": 3.68321476308246e-05, "loss": 1.4504, "step": 19848 }, { "epoch": 0.8825752523231515, "grad_norm": 0.058317407965660095, "learning_rate": 3.677709833035703e-05, "loss": 1.4492, "step": 19850 }, { "epoch": 0.8826641767818239, "grad_norm": 0.059517450630664825, "learning_rate": 3.6722088628224346e-05, "loss": 1.4431, "step": 19852 }, { "epoch": 0.8827531012404962, "grad_norm": 0.05849652737379074, "learning_rate": 3.6667118529129186e-05, "loss": 1.4487, "step": 19854 }, { "epoch": 0.8828420256991686, "grad_norm": 0.058554280549287796, "learning_rate": 3.661218803777061e-05, "loss": 1.4479, "step": 19856 }, { "epoch": 0.882930950157841, "grad_norm": 0.05854412168264389, "learning_rate": 3.6557297158844205e-05, "loss": 1.446, "step": 19858 }, { "epoch": 0.8830198746165133, "grad_norm": 0.05787697806954384, "learning_rate": 3.650244589704238e-05, "loss": 1.4494, "step": 19860 }, { "epoch": 0.8831087990751856, "grad_norm": 0.05933505296707153, "learning_rate": 3.6447634257054163e-05, "loss": 1.4462, "step": 19862 }, { "epoch": 0.883197723533858, "grad_norm": 0.059682779014110565, "learning_rate": 3.6392862243564864e-05, "loss": 1.4473, "step": 19864 }, { "epoch": 0.8832866479925303, "grad_norm": 0.05752909556031227, "learning_rate": 3.633812986125679e-05, "loss": 1.4441, "step": 19866 }, { "epoch": 0.8833755724512027, "grad_norm": 0.0576491579413414, "learning_rate": 3.6283437114808526e-05, "loss": 1.4428, "step": 19868 }, { "epoch": 0.883464496909875, "grad_norm": 0.05781255662441254, "learning_rate": 3.622878400889579e-05, "loss": 1.4531, "step": 19870 }, { "epoch": 0.8835534213685474, "grad_norm": 0.058144886046648026, "learning_rate": 3.617417054819039e-05, "loss": 1.4454, "step": 19872 }, { "epoch": 0.8836423458272198, "grad_norm": 0.05706218257546425, "learning_rate": 3.611959673736076e-05, "loss": 1.4469, "step": 19874 }, { "epoch": 0.8837312702858922, "grad_norm": 0.05844343453645706, "learning_rate": 3.6065062581072283e-05, "loss": 1.4444, "step": 19876 }, { "epoch": 0.8838201947445645, "grad_norm": 0.05753406509757042, "learning_rate": 3.6010568083986837e-05, "loss": 1.4454, "step": 19878 }, { "epoch": 0.8839091192032369, "grad_norm": 0.05792682617902756, "learning_rate": 3.595611325076264e-05, "loss": 1.4469, "step": 19880 }, { "epoch": 0.8839980436619093, "grad_norm": 0.059252262115478516, "learning_rate": 3.5901698086054926e-05, "loss": 1.4485, "step": 19882 }, { "epoch": 0.8840869681205815, "grad_norm": 0.05811452120542526, "learning_rate": 3.584732259451523e-05, "loss": 1.4531, "step": 19884 }, { "epoch": 0.8841758925792539, "grad_norm": 0.0585939846932888, "learning_rate": 3.579298678079196e-05, "loss": 1.4519, "step": 19886 }, { "epoch": 0.8842648170379263, "grad_norm": 0.058722611516714096, "learning_rate": 3.573869064952989e-05, "loss": 1.4502, "step": 19888 }, { "epoch": 0.8843537414965986, "grad_norm": 0.05796421319246292, "learning_rate": 3.5684434205370266e-05, "loss": 1.4456, "step": 19890 }, { "epoch": 0.884442665955271, "grad_norm": 0.058497413992881775, "learning_rate": 3.563021745295158e-05, "loss": 1.4431, "step": 19892 }, { "epoch": 0.8845315904139434, "grad_norm": 0.05818541720509529, "learning_rate": 3.557604039690826e-05, "loss": 1.4429, "step": 19894 }, { "epoch": 0.8846205148726157, "grad_norm": 0.05828235670924187, "learning_rate": 3.552190304187164e-05, "loss": 1.4481, "step": 19896 }, { "epoch": 0.8847094393312881, "grad_norm": 0.05807334557175636, "learning_rate": 3.546780539246958e-05, "loss": 1.4478, "step": 19898 }, { "epoch": 0.8847983637899605, "grad_norm": 0.056775208562612534, "learning_rate": 3.541374745332676e-05, "loss": 1.4516, "step": 19900 }, { "epoch": 0.8848872882486328, "grad_norm": 0.05986194312572479, "learning_rate": 3.535972922906411e-05, "loss": 1.4534, "step": 19902 }, { "epoch": 0.8849762127073052, "grad_norm": 0.056858133524656296, "learning_rate": 3.530575072429937e-05, "loss": 1.4486, "step": 19904 }, { "epoch": 0.8850651371659775, "grad_norm": 0.058056510984897614, "learning_rate": 3.5251811943646904e-05, "loss": 1.4485, "step": 19906 }, { "epoch": 0.8851540616246498, "grad_norm": 0.05905826762318611, "learning_rate": 3.5197912891717754e-05, "loss": 1.4528, "step": 19908 }, { "epoch": 0.8852429860833222, "grad_norm": 0.05918426811695099, "learning_rate": 3.514405357311923e-05, "loss": 1.4511, "step": 19910 }, { "epoch": 0.8853319105419946, "grad_norm": 0.05840429291129112, "learning_rate": 3.5090233992455536e-05, "loss": 1.4492, "step": 19912 }, { "epoch": 0.8854208350006669, "grad_norm": 0.058113958686590195, "learning_rate": 3.503645415432749e-05, "loss": 1.4486, "step": 19914 }, { "epoch": 0.8855097594593393, "grad_norm": 0.05794018507003784, "learning_rate": 3.498271406333242e-05, "loss": 1.4522, "step": 19916 }, { "epoch": 0.8855986839180117, "grad_norm": 0.058955464512109756, "learning_rate": 3.492901372406404e-05, "loss": 1.4498, "step": 19918 }, { "epoch": 0.885687608376684, "grad_norm": 0.05927339568734169, "learning_rate": 3.487535314111323e-05, "loss": 1.4484, "step": 19920 }, { "epoch": 0.8857765328353564, "grad_norm": 0.05773244425654411, "learning_rate": 3.4821732319066876e-05, "loss": 1.4503, "step": 19922 }, { "epoch": 0.8858654572940288, "grad_norm": 0.05872263014316559, "learning_rate": 3.476815126250893e-05, "loss": 1.4463, "step": 19924 }, { "epoch": 0.885954381752701, "grad_norm": 0.05775640159845352, "learning_rate": 3.4714609976019496e-05, "loss": 1.4479, "step": 19926 }, { "epoch": 0.8860433062113734, "grad_norm": 0.057550862431526184, "learning_rate": 3.4661108464175696e-05, "loss": 1.4462, "step": 19928 }, { "epoch": 0.8861322306700458, "grad_norm": 0.059137601405382156, "learning_rate": 3.460764673155109e-05, "loss": 1.4451, "step": 19930 }, { "epoch": 0.8862211551287181, "grad_norm": 0.057657770812511444, "learning_rate": 3.4554224782715636e-05, "loss": 1.4474, "step": 19932 }, { "epoch": 0.8863100795873905, "grad_norm": 0.05691711977124214, "learning_rate": 3.450084262223624e-05, "loss": 1.4476, "step": 19934 }, { "epoch": 0.8863990040460629, "grad_norm": 0.05885188281536102, "learning_rate": 3.4447500254676304e-05, "loss": 1.4472, "step": 19936 }, { "epoch": 0.8864879285047352, "grad_norm": 0.05741071701049805, "learning_rate": 3.4394197684595564e-05, "loss": 1.4481, "step": 19938 }, { "epoch": 0.8865768529634076, "grad_norm": 0.058238908648490906, "learning_rate": 3.434093491655066e-05, "loss": 1.4475, "step": 19940 }, { "epoch": 0.88666577742208, "grad_norm": 0.05798852816224098, "learning_rate": 3.428771195509483e-05, "loss": 1.4466, "step": 19942 }, { "epoch": 0.8867547018807523, "grad_norm": 0.05808843672275543, "learning_rate": 3.423452880477762e-05, "loss": 1.45, "step": 19944 }, { "epoch": 0.8868436263394247, "grad_norm": 0.059082068502902985, "learning_rate": 3.4181385470145546e-05, "loss": 1.4458, "step": 19946 }, { "epoch": 0.886932550798097, "grad_norm": 0.05787166953086853, "learning_rate": 3.41282819557413e-05, "loss": 1.449, "step": 19948 }, { "epoch": 0.8870214752567693, "grad_norm": 0.05777514725923538, "learning_rate": 3.40752182661046e-05, "loss": 1.4453, "step": 19950 }, { "epoch": 0.8871103997154417, "grad_norm": 0.059281278401613235, "learning_rate": 3.402219440577159e-05, "loss": 1.4491, "step": 19952 }, { "epoch": 0.8871993241741141, "grad_norm": 0.058436159044504166, "learning_rate": 3.3969210379274805e-05, "loss": 1.4448, "step": 19954 }, { "epoch": 0.8872882486327864, "grad_norm": 0.057971615344285965, "learning_rate": 3.391626619114363e-05, "loss": 1.4542, "step": 19956 }, { "epoch": 0.8873771730914588, "grad_norm": 0.05890185385942459, "learning_rate": 3.386336184590405e-05, "loss": 1.4479, "step": 19958 }, { "epoch": 0.8874660975501312, "grad_norm": 0.05890071764588356, "learning_rate": 3.381049734807845e-05, "loss": 1.4439, "step": 19960 }, { "epoch": 0.8875550220088035, "grad_norm": 0.05897240713238716, "learning_rate": 3.375767270218599e-05, "loss": 1.4473, "step": 19962 }, { "epoch": 0.8876439464674759, "grad_norm": 0.05797523632645607, "learning_rate": 3.370488791274234e-05, "loss": 1.4474, "step": 19964 }, { "epoch": 0.8877328709261483, "grad_norm": 0.05868737772107124, "learning_rate": 3.3652142984259783e-05, "loss": 1.446, "step": 19966 }, { "epoch": 0.8878217953848206, "grad_norm": 0.05914899334311485, "learning_rate": 3.359943792124709e-05, "loss": 1.4449, "step": 19968 }, { "epoch": 0.8879107198434929, "grad_norm": 0.05786535516381264, "learning_rate": 3.354677272820994e-05, "loss": 1.4519, "step": 19970 }, { "epoch": 0.8879996443021653, "grad_norm": 0.05873554199934006, "learning_rate": 3.349414740965012e-05, "loss": 1.4475, "step": 19972 }, { "epoch": 0.8880885687608376, "grad_norm": 0.05829008296132088, "learning_rate": 3.344156197006654e-05, "loss": 1.448, "step": 19974 }, { "epoch": 0.88817749321951, "grad_norm": 0.05718972161412239, "learning_rate": 3.338901641395409e-05, "loss": 1.4437, "step": 19976 }, { "epoch": 0.8882664176781824, "grad_norm": 0.058192022144794464, "learning_rate": 3.333651074580496e-05, "loss": 1.4443, "step": 19978 }, { "epoch": 0.8883553421368547, "grad_norm": 0.05977290868759155, "learning_rate": 3.328404497010745e-05, "loss": 1.4562, "step": 19980 }, { "epoch": 0.8884442665955271, "grad_norm": 0.057841334491968155, "learning_rate": 3.323161909134642e-05, "loss": 1.4467, "step": 19982 }, { "epoch": 0.8885331910541995, "grad_norm": 0.0573255680501461, "learning_rate": 3.317923311400356e-05, "loss": 1.4424, "step": 19984 }, { "epoch": 0.8886221155128718, "grad_norm": 0.057299330830574036, "learning_rate": 3.312688704255717e-05, "loss": 1.4463, "step": 19986 }, { "epoch": 0.8887110399715442, "grad_norm": 0.0575009286403656, "learning_rate": 3.3074580881481854e-05, "loss": 1.4464, "step": 19988 }, { "epoch": 0.8887999644302166, "grad_norm": 0.05899973586201668, "learning_rate": 3.302231463524902e-05, "loss": 1.4517, "step": 19990 }, { "epoch": 0.8888888888888888, "grad_norm": 0.057911746203899384, "learning_rate": 3.29700883083266e-05, "loss": 1.4506, "step": 19992 }, { "epoch": 0.8889778133475612, "grad_norm": 0.057607442140579224, "learning_rate": 3.2917901905179294e-05, "loss": 1.4492, "step": 19994 }, { "epoch": 0.8890667378062336, "grad_norm": 0.05750446021556854, "learning_rate": 3.2865755430268095e-05, "loss": 1.4517, "step": 19996 }, { "epoch": 0.8891556622649059, "grad_norm": 0.05804445222020149, "learning_rate": 3.281364888805055e-05, "loss": 1.4511, "step": 19998 }, { "epoch": 0.8892445867235783, "grad_norm": 0.05784785374999046, "learning_rate": 3.276158228298126e-05, "loss": 1.45, "step": 20000 }, { "epoch": 0.8892445867235783, "eval_loss": 1.4338102340698242, "eval_runtime": 12.4535, "eval_samples_per_second": 554.865, "eval_steps_per_second": 69.378, "step": 20000 }, { "epoch": 0.8893335111822507, "grad_norm": 0.05788677558302879, "learning_rate": 3.2709555619511e-05, "loss": 1.4518, "step": 20002 }, { "epoch": 0.889422435640923, "grad_norm": 0.058794233947992325, "learning_rate": 3.26575689020871e-05, "loss": 1.4477, "step": 20004 }, { "epoch": 0.8895113600995954, "grad_norm": 0.05749224126338959, "learning_rate": 3.260562213515372e-05, "loss": 1.4488, "step": 20006 }, { "epoch": 0.8896002845582678, "grad_norm": 0.05827973410487175, "learning_rate": 3.2553715323151486e-05, "loss": 1.4487, "step": 20008 }, { "epoch": 0.8896892090169402, "grad_norm": 0.05793258920311928, "learning_rate": 3.250184847051779e-05, "loss": 1.4495, "step": 20010 }, { "epoch": 0.8897781334756125, "grad_norm": 0.05844861641526222, "learning_rate": 3.245002158168614e-05, "loss": 1.452, "step": 20012 }, { "epoch": 0.8898670579342848, "grad_norm": 0.05809600278735161, "learning_rate": 3.23982346610871e-05, "loss": 1.4541, "step": 20014 }, { "epoch": 0.8899559823929571, "grad_norm": 0.05752575770020485, "learning_rate": 3.2346487713147684e-05, "loss": 1.4477, "step": 20016 }, { "epoch": 0.8900449068516295, "grad_norm": 0.05790848284959793, "learning_rate": 3.229478074229125e-05, "loss": 1.4473, "step": 20018 }, { "epoch": 0.8901338313103019, "grad_norm": 0.058815497905015945, "learning_rate": 3.224311375293815e-05, "loss": 1.4475, "step": 20020 }, { "epoch": 0.8902227557689742, "grad_norm": 0.05812389776110649, "learning_rate": 3.219148674950506e-05, "loss": 1.4455, "step": 20022 }, { "epoch": 0.8903116802276466, "grad_norm": 0.058545514941215515, "learning_rate": 3.2139899736405245e-05, "loss": 1.4428, "step": 20024 }, { "epoch": 0.890400604686319, "grad_norm": 0.05910952761769295, "learning_rate": 3.208835271804844e-05, "loss": 1.4495, "step": 20026 }, { "epoch": 0.8904895291449914, "grad_norm": 0.05743895471096039, "learning_rate": 3.2036845698841465e-05, "loss": 1.4403, "step": 20028 }, { "epoch": 0.8905784536036637, "grad_norm": 0.059019867330789566, "learning_rate": 3.1985378683187014e-05, "loss": 1.446, "step": 20030 }, { "epoch": 0.8906673780623361, "grad_norm": 0.05902063101530075, "learning_rate": 3.193395167548502e-05, "loss": 1.4457, "step": 20032 }, { "epoch": 0.8907563025210085, "grad_norm": 0.05790397897362709, "learning_rate": 3.18825646801314e-05, "loss": 1.45, "step": 20034 }, { "epoch": 0.8908452269796807, "grad_norm": 0.05794849991798401, "learning_rate": 3.183121770151909e-05, "loss": 1.4494, "step": 20036 }, { "epoch": 0.8909341514383531, "grad_norm": 0.058465514332056046, "learning_rate": 3.1779910744037574e-05, "loss": 1.4499, "step": 20038 }, { "epoch": 0.8910230758970255, "grad_norm": 0.05779614299535751, "learning_rate": 3.172864381207252e-05, "loss": 1.4433, "step": 20040 }, { "epoch": 0.8911120003556978, "grad_norm": 0.058368321508169174, "learning_rate": 3.167741691000664e-05, "loss": 1.4451, "step": 20042 }, { "epoch": 0.8912009248143702, "grad_norm": 0.05803350359201431, "learning_rate": 3.162623004221904e-05, "loss": 1.4465, "step": 20044 }, { "epoch": 0.8912898492730426, "grad_norm": 0.05906941741704941, "learning_rate": 3.1575083213085275e-05, "loss": 1.443, "step": 20046 }, { "epoch": 0.8913787737317149, "grad_norm": 0.058908261358737946, "learning_rate": 3.152397642697774e-05, "loss": 1.4541, "step": 20048 }, { "epoch": 0.8914676981903873, "grad_norm": 0.05919423699378967, "learning_rate": 3.147290968826522e-05, "loss": 1.4473, "step": 20050 }, { "epoch": 0.8915566226490597, "grad_norm": 0.057987507432699203, "learning_rate": 3.142188300131305e-05, "loss": 1.4466, "step": 20052 }, { "epoch": 0.891645547107732, "grad_norm": 0.057514190673828125, "learning_rate": 3.137089637048335e-05, "loss": 1.4462, "step": 20054 }, { "epoch": 0.8917344715664043, "grad_norm": 0.05833049863576889, "learning_rate": 3.131994980013453e-05, "loss": 1.447, "step": 20056 }, { "epoch": 0.8918233960250767, "grad_norm": 0.05808662623167038, "learning_rate": 3.126904329462182e-05, "loss": 1.4454, "step": 20058 }, { "epoch": 0.891912320483749, "grad_norm": 0.05755183845758438, "learning_rate": 3.1218176858296974e-05, "loss": 1.4474, "step": 20060 }, { "epoch": 0.8920012449424214, "grad_norm": 0.05792864039540291, "learning_rate": 3.1167350495508175e-05, "loss": 1.446, "step": 20062 }, { "epoch": 0.8920901694010938, "grad_norm": 0.05887740105390549, "learning_rate": 3.111656421060033e-05, "loss": 1.4454, "step": 20064 }, { "epoch": 0.8921790938597661, "grad_norm": 0.05969681963324547, "learning_rate": 3.106581800791497e-05, "loss": 1.453, "step": 20066 }, { "epoch": 0.8922680183184385, "grad_norm": 0.0589289627969265, "learning_rate": 3.10151118917899e-05, "loss": 1.4504, "step": 20068 }, { "epoch": 0.8923569427771109, "grad_norm": 0.05786190554499626, "learning_rate": 3.096444586655983e-05, "loss": 1.4524, "step": 20070 }, { "epoch": 0.8924458672357832, "grad_norm": 0.05816512554883957, "learning_rate": 3.091381993655601e-05, "loss": 1.4438, "step": 20072 }, { "epoch": 0.8925347916944556, "grad_norm": 0.057929810136556625, "learning_rate": 3.086323410610592e-05, "loss": 1.4494, "step": 20074 }, { "epoch": 0.892623716153128, "grad_norm": 0.05758269876241684, "learning_rate": 3.081268837953405e-05, "loss": 1.4496, "step": 20076 }, { "epoch": 0.8927126406118002, "grad_norm": 0.05764364078640938, "learning_rate": 3.076218276116116e-05, "loss": 1.4491, "step": 20078 }, { "epoch": 0.8928015650704726, "grad_norm": 0.057650718837976456, "learning_rate": 3.0711717255304864e-05, "loss": 1.4495, "step": 20080 }, { "epoch": 0.892890489529145, "grad_norm": 0.057383351027965546, "learning_rate": 3.066129186627909e-05, "loss": 1.4452, "step": 20082 }, { "epoch": 0.8929794139878173, "grad_norm": 0.05764487385749817, "learning_rate": 3.061090659839422e-05, "loss": 1.4497, "step": 20084 }, { "epoch": 0.8930683384464897, "grad_norm": 0.05808642879128456, "learning_rate": 3.05605614559577e-05, "loss": 1.4498, "step": 20086 }, { "epoch": 0.8931572629051621, "grad_norm": 0.05805491656064987, "learning_rate": 3.051025644327321e-05, "loss": 1.4483, "step": 20088 }, { "epoch": 0.8932461873638344, "grad_norm": 0.05781690031290054, "learning_rate": 3.0459991564640855e-05, "loss": 1.4429, "step": 20090 }, { "epoch": 0.8933351118225068, "grad_norm": 0.06007815897464752, "learning_rate": 3.040976682435759e-05, "loss": 1.4486, "step": 20092 }, { "epoch": 0.8934240362811792, "grad_norm": 0.058555953204631805, "learning_rate": 3.0359582226716975e-05, "loss": 1.4454, "step": 20094 }, { "epoch": 0.8935129607398515, "grad_norm": 0.05821076035499573, "learning_rate": 3.0309437776008865e-05, "loss": 1.4527, "step": 20096 }, { "epoch": 0.8936018851985239, "grad_norm": 0.058373384177684784, "learning_rate": 3.0259333476519825e-05, "loss": 1.4439, "step": 20098 }, { "epoch": 0.8936908096571962, "grad_norm": 0.05813969671726227, "learning_rate": 3.0209269332533094e-05, "loss": 1.4469, "step": 20100 }, { "epoch": 0.8937797341158685, "grad_norm": 0.058415379375219345, "learning_rate": 3.0159245348328368e-05, "loss": 1.447, "step": 20102 }, { "epoch": 0.8938686585745409, "grad_norm": 0.05789436027407646, "learning_rate": 3.010926152818183e-05, "loss": 1.4537, "step": 20104 }, { "epoch": 0.8939575830332133, "grad_norm": 0.058166682720184326, "learning_rate": 3.0059317876366286e-05, "loss": 1.4486, "step": 20106 }, { "epoch": 0.8940465074918856, "grad_norm": 0.05696212127804756, "learning_rate": 3.0009414397151326e-05, "loss": 1.4443, "step": 20108 }, { "epoch": 0.894135431950558, "grad_norm": 0.057195693254470825, "learning_rate": 2.995955109480275e-05, "loss": 1.4523, "step": 20110 }, { "epoch": 0.8942243564092304, "grad_norm": 0.058714404702186584, "learning_rate": 2.9909727973583156e-05, "loss": 1.4506, "step": 20112 }, { "epoch": 0.8943132808679027, "grad_norm": 0.05786760151386261, "learning_rate": 2.985994503775158e-05, "loss": 1.4493, "step": 20114 }, { "epoch": 0.8944022053265751, "grad_norm": 0.05894371122121811, "learning_rate": 2.981020229156378e-05, "loss": 1.4467, "step": 20116 }, { "epoch": 0.8944911297852475, "grad_norm": 0.05780460685491562, "learning_rate": 2.9760499739271974e-05, "loss": 1.4471, "step": 20118 }, { "epoch": 0.8945800542439198, "grad_norm": 0.058124516159296036, "learning_rate": 2.9710837385124924e-05, "loss": 1.4449, "step": 20120 }, { "epoch": 0.8946689787025921, "grad_norm": 0.057611338794231415, "learning_rate": 2.9661215233367956e-05, "loss": 1.4518, "step": 20122 }, { "epoch": 0.8947579031612645, "grad_norm": 0.05695406347513199, "learning_rate": 2.961163328824307e-05, "loss": 1.4398, "step": 20124 }, { "epoch": 0.8948468276199368, "grad_norm": 0.057146843522787094, "learning_rate": 2.9562091553988758e-05, "loss": 1.4436, "step": 20126 }, { "epoch": 0.8949357520786092, "grad_norm": 0.05892251059412956, "learning_rate": 2.9512590034839802e-05, "loss": 1.4539, "step": 20128 }, { "epoch": 0.8950246765372816, "grad_norm": 0.05868358537554741, "learning_rate": 2.946312873502821e-05, "loss": 1.4473, "step": 20130 }, { "epoch": 0.8951136009959539, "grad_norm": 0.058262553066015244, "learning_rate": 2.9413707658781876e-05, "loss": 1.4478, "step": 20132 }, { "epoch": 0.8952025254546263, "grad_norm": 0.05769951269030571, "learning_rate": 2.936432681032569e-05, "loss": 1.4514, "step": 20134 }, { "epoch": 0.8952914499132987, "grad_norm": 0.057862915098667145, "learning_rate": 2.9314986193880842e-05, "loss": 1.4489, "step": 20136 }, { "epoch": 0.895380374371971, "grad_norm": 0.05768333002924919, "learning_rate": 2.9265685813665178e-05, "loss": 1.4443, "step": 20138 }, { "epoch": 0.8954692988306434, "grad_norm": 0.05807989463210106, "learning_rate": 2.921642567389321e-05, "loss": 1.4404, "step": 20140 }, { "epoch": 0.8955582232893158, "grad_norm": 0.05821651592850685, "learning_rate": 2.9167205778775795e-05, "loss": 1.4466, "step": 20142 }, { "epoch": 0.895647147747988, "grad_norm": 0.058034975081682205, "learning_rate": 2.9118026132520513e-05, "loss": 1.443, "step": 20144 }, { "epoch": 0.8957360722066604, "grad_norm": 0.057850636541843414, "learning_rate": 2.9068886739331557e-05, "loss": 1.4407, "step": 20146 }, { "epoch": 0.8958249966653328, "grad_norm": 0.05788253992795944, "learning_rate": 2.90197876034094e-05, "loss": 1.4454, "step": 20148 }, { "epoch": 0.8959139211240051, "grad_norm": 0.05744844675064087, "learning_rate": 2.8970728728951347e-05, "loss": 1.4516, "step": 20150 }, { "epoch": 0.8960028455826775, "grad_norm": 0.058373648673295975, "learning_rate": 2.8921710120151212e-05, "loss": 1.4466, "step": 20152 }, { "epoch": 0.8960917700413499, "grad_norm": 0.05803242325782776, "learning_rate": 2.8872731781199257e-05, "loss": 1.4488, "step": 20154 }, { "epoch": 0.8961806945000222, "grad_norm": 0.057802025228738785, "learning_rate": 2.88237937162823e-05, "loss": 1.4517, "step": 20156 }, { "epoch": 0.8962696189586946, "grad_norm": 0.05781133100390434, "learning_rate": 2.877489592958399e-05, "loss": 1.4443, "step": 20158 }, { "epoch": 0.896358543417367, "grad_norm": 0.05783500894904137, "learning_rate": 2.8726038425284096e-05, "loss": 1.4474, "step": 20160 }, { "epoch": 0.8964474678760394, "grad_norm": 0.05782508850097656, "learning_rate": 2.867722120755939e-05, "loss": 1.4438, "step": 20162 }, { "epoch": 0.8965363923347116, "grad_norm": 0.05776134133338928, "learning_rate": 2.8628444280582756e-05, "loss": 1.4513, "step": 20164 }, { "epoch": 0.896625316793384, "grad_norm": 0.0572594478726387, "learning_rate": 2.857970764852397e-05, "loss": 1.4473, "step": 20166 }, { "epoch": 0.8967142412520563, "grad_norm": 0.05813061445951462, "learning_rate": 2.853101131554936e-05, "loss": 1.4434, "step": 20168 }, { "epoch": 0.8968031657107287, "grad_norm": 0.05757516622543335, "learning_rate": 2.8482355285821494e-05, "loss": 1.4455, "step": 20170 }, { "epoch": 0.8968920901694011, "grad_norm": 0.05779808387160301, "learning_rate": 2.8433739563499817e-05, "loss": 1.4454, "step": 20172 }, { "epoch": 0.8969810146280734, "grad_norm": 0.058449193835258484, "learning_rate": 2.8385164152740283e-05, "loss": 1.4442, "step": 20174 }, { "epoch": 0.8970699390867458, "grad_norm": 0.05776485055685043, "learning_rate": 2.833662905769524e-05, "loss": 1.444, "step": 20176 }, { "epoch": 0.8971588635454182, "grad_norm": 0.0574498176574707, "learning_rate": 2.8288134282513644e-05, "loss": 1.4476, "step": 20178 }, { "epoch": 0.8972477880040906, "grad_norm": 0.05783705785870552, "learning_rate": 2.8239679831341126e-05, "loss": 1.4464, "step": 20180 }, { "epoch": 0.8973367124627629, "grad_norm": 0.05792314559221268, "learning_rate": 2.819126570831976e-05, "loss": 1.4502, "step": 20182 }, { "epoch": 0.8974256369214353, "grad_norm": 0.057795315980911255, "learning_rate": 2.814289191758823e-05, "loss": 1.4469, "step": 20184 }, { "epoch": 0.8975145613801075, "grad_norm": 0.05749436840415001, "learning_rate": 2.8094558463281572e-05, "loss": 1.4488, "step": 20186 }, { "epoch": 0.8976034858387799, "grad_norm": 0.05765572935342789, "learning_rate": 2.804626534953181e-05, "loss": 1.4416, "step": 20188 }, { "epoch": 0.8976924102974523, "grad_norm": 0.05794264376163483, "learning_rate": 2.7998012580467137e-05, "loss": 1.446, "step": 20190 }, { "epoch": 0.8977813347561246, "grad_norm": 0.05829514190554619, "learning_rate": 2.7949800160212312e-05, "loss": 1.4488, "step": 20192 }, { "epoch": 0.897870259214797, "grad_norm": 0.05872653052210808, "learning_rate": 2.790162809288882e-05, "loss": 1.4526, "step": 20194 }, { "epoch": 0.8979591836734694, "grad_norm": 0.0570625476539135, "learning_rate": 2.7853496382614695e-05, "loss": 1.4446, "step": 20196 }, { "epoch": 0.8980481081321418, "grad_norm": 0.058061014860868454, "learning_rate": 2.7805405033504317e-05, "loss": 1.4446, "step": 20198 }, { "epoch": 0.8981370325908141, "grad_norm": 0.05758427456021309, "learning_rate": 2.7757354049668783e-05, "loss": 1.4426, "step": 20200 }, { "epoch": 0.8982259570494865, "grad_norm": 0.05844208970665932, "learning_rate": 2.770934343521575e-05, "loss": 1.4509, "step": 20202 }, { "epoch": 0.8983148815081589, "grad_norm": 0.05929677188396454, "learning_rate": 2.766137319424944e-05, "loss": 1.4505, "step": 20204 }, { "epoch": 0.8984038059668312, "grad_norm": 0.05805506557226181, "learning_rate": 2.761344333087046e-05, "loss": 1.4492, "step": 20206 }, { "epoch": 0.8984927304255035, "grad_norm": 0.05872786045074463, "learning_rate": 2.756555384917603e-05, "loss": 1.4478, "step": 20208 }, { "epoch": 0.8985816548841759, "grad_norm": 0.057989608496427536, "learning_rate": 2.751770475326004e-05, "loss": 1.4431, "step": 20210 }, { "epoch": 0.8986705793428482, "grad_norm": 0.05771687999367714, "learning_rate": 2.746989604721284e-05, "loss": 1.4469, "step": 20212 }, { "epoch": 0.8987595038015206, "grad_norm": 0.05826197937130928, "learning_rate": 2.7422127735121206e-05, "loss": 1.4516, "step": 20214 }, { "epoch": 0.898848428260193, "grad_norm": 0.058696992695331573, "learning_rate": 2.7374399821068762e-05, "loss": 1.4488, "step": 20216 }, { "epoch": 0.8989373527188653, "grad_norm": 0.05959450826048851, "learning_rate": 2.7326712309135414e-05, "loss": 1.4471, "step": 20218 }, { "epoch": 0.8990262771775377, "grad_norm": 0.05827097222208977, "learning_rate": 2.7279065203397678e-05, "loss": 1.4493, "step": 20220 }, { "epoch": 0.8991152016362101, "grad_norm": 0.05881284549832344, "learning_rate": 2.723145850792863e-05, "loss": 1.4558, "step": 20222 }, { "epoch": 0.8992041260948824, "grad_norm": 0.057856813073158264, "learning_rate": 2.71838922267979e-05, "loss": 1.4469, "step": 20224 }, { "epoch": 0.8992930505535548, "grad_norm": 0.05925833061337471, "learning_rate": 2.7136366364071797e-05, "loss": 1.4445, "step": 20226 }, { "epoch": 0.8993819750122272, "grad_norm": 0.05858567729592323, "learning_rate": 2.70888809238129e-05, "loss": 1.4451, "step": 20228 }, { "epoch": 0.8994708994708994, "grad_norm": 0.05886606499552727, "learning_rate": 2.704143591008046e-05, "loss": 1.4467, "step": 20230 }, { "epoch": 0.8995598239295718, "grad_norm": 0.05794280394911766, "learning_rate": 2.6994031326930456e-05, "loss": 1.4432, "step": 20232 }, { "epoch": 0.8996487483882442, "grad_norm": 0.05774116888642311, "learning_rate": 2.6946667178415097e-05, "loss": 1.4478, "step": 20234 }, { "epoch": 0.8997376728469165, "grad_norm": 0.05794171988964081, "learning_rate": 2.689934346858314e-05, "loss": 1.4489, "step": 20236 }, { "epoch": 0.8998265973055889, "grad_norm": 0.05861863121390343, "learning_rate": 2.68520602014804e-05, "loss": 1.4459, "step": 20238 }, { "epoch": 0.8999155217642613, "grad_norm": 0.057649560272693634, "learning_rate": 2.6804817381148537e-05, "loss": 1.4465, "step": 20240 }, { "epoch": 0.9000044462229336, "grad_norm": 0.05889919400215149, "learning_rate": 2.6757615011626212e-05, "loss": 1.4519, "step": 20242 }, { "epoch": 0.900093370681606, "grad_norm": 0.058443833142519, "learning_rate": 2.6710453096948473e-05, "loss": 1.4464, "step": 20244 }, { "epoch": 0.9001822951402784, "grad_norm": 0.0582452267408371, "learning_rate": 2.6663331641146872e-05, "loss": 1.4514, "step": 20246 }, { "epoch": 0.9002712195989507, "grad_norm": 0.058483488857746124, "learning_rate": 2.661625064824974e-05, "loss": 1.4484, "step": 20248 }, { "epoch": 0.9003601440576231, "grad_norm": 0.05783051252365112, "learning_rate": 2.656921012228153e-05, "loss": 1.4494, "step": 20250 }, { "epoch": 0.9004490685162954, "grad_norm": 0.05933910235762596, "learning_rate": 2.6522210067263573e-05, "loss": 1.4465, "step": 20252 }, { "epoch": 0.9005379929749677, "grad_norm": 0.0582958348095417, "learning_rate": 2.647525048721372e-05, "loss": 1.4489, "step": 20254 }, { "epoch": 0.9006269174336401, "grad_norm": 0.0582851879298687, "learning_rate": 2.6428331386146143e-05, "loss": 1.4492, "step": 20256 }, { "epoch": 0.9007158418923125, "grad_norm": 0.059080787003040314, "learning_rate": 2.6381452768071747e-05, "loss": 1.4483, "step": 20258 }, { "epoch": 0.9008047663509848, "grad_norm": 0.05819986015558243, "learning_rate": 2.6334614636998045e-05, "loss": 1.4498, "step": 20260 }, { "epoch": 0.9008936908096572, "grad_norm": 0.058520715683698654, "learning_rate": 2.6287816996928783e-05, "loss": 1.4497, "step": 20262 }, { "epoch": 0.9009826152683296, "grad_norm": 0.0575350746512413, "learning_rate": 2.624105985186448e-05, "loss": 1.4402, "step": 20264 }, { "epoch": 0.9010715397270019, "grad_norm": 0.05844678357243538, "learning_rate": 2.6194343205802272e-05, "loss": 1.4483, "step": 20266 }, { "epoch": 0.9011604641856743, "grad_norm": 0.05773119255900383, "learning_rate": 2.6147667062735468e-05, "loss": 1.443, "step": 20268 }, { "epoch": 0.9012493886443467, "grad_norm": 0.05865318328142166, "learning_rate": 2.610103142665443e-05, "loss": 1.4502, "step": 20270 }, { "epoch": 0.901338313103019, "grad_norm": 0.05893605947494507, "learning_rate": 2.6054436301545526e-05, "loss": 1.4491, "step": 20272 }, { "epoch": 0.9014272375616913, "grad_norm": 0.059134792536497116, "learning_rate": 2.600788169139201e-05, "loss": 1.443, "step": 20274 }, { "epoch": 0.9015161620203637, "grad_norm": 0.05810784548521042, "learning_rate": 2.5961367600173646e-05, "loss": 1.4503, "step": 20276 }, { "epoch": 0.901605086479036, "grad_norm": 0.05842362344264984, "learning_rate": 2.5914894031866533e-05, "loss": 1.4463, "step": 20278 }, { "epoch": 0.9016940109377084, "grad_norm": 0.05697064474225044, "learning_rate": 2.5868460990443542e-05, "loss": 1.4439, "step": 20280 }, { "epoch": 0.9017829353963808, "grad_norm": 0.05963601917028427, "learning_rate": 2.582206847987395e-05, "loss": 1.4428, "step": 20282 }, { "epoch": 0.9018718598550531, "grad_norm": 0.05725273862481117, "learning_rate": 2.5775716504123525e-05, "loss": 1.4458, "step": 20284 }, { "epoch": 0.9019607843137255, "grad_norm": 0.05789296701550484, "learning_rate": 2.572940506715471e-05, "loss": 1.4422, "step": 20286 }, { "epoch": 0.9020497087723979, "grad_norm": 0.05861978977918625, "learning_rate": 2.5683134172926446e-05, "loss": 1.451, "step": 20288 }, { "epoch": 0.9021386332310702, "grad_norm": 0.05937764793634415, "learning_rate": 2.5636903825394074e-05, "loss": 1.45, "step": 20290 }, { "epoch": 0.9022275576897426, "grad_norm": 0.05819464474916458, "learning_rate": 2.559071402850971e-05, "loss": 1.4512, "step": 20292 }, { "epoch": 0.9023164821484149, "grad_norm": 0.05875251069664955, "learning_rate": 2.554456478622158e-05, "loss": 1.4478, "step": 20294 }, { "epoch": 0.9024054066070872, "grad_norm": 0.05768872797489166, "learning_rate": 2.549845610247503e-05, "loss": 1.4479, "step": 20296 }, { "epoch": 0.9024943310657596, "grad_norm": 0.05741060525178909, "learning_rate": 2.5452387981211523e-05, "loss": 1.4453, "step": 20298 }, { "epoch": 0.902583255524432, "grad_norm": 0.05832884460687637, "learning_rate": 2.5406360426369123e-05, "loss": 1.4453, "step": 20300 }, { "epoch": 0.9026721799831043, "grad_norm": 0.05834457650780678, "learning_rate": 2.5360373441882467e-05, "loss": 1.4494, "step": 20302 }, { "epoch": 0.9027611044417767, "grad_norm": 0.05867211893200874, "learning_rate": 2.531442703168285e-05, "loss": 1.4489, "step": 20304 }, { "epoch": 0.9028500289004491, "grad_norm": 0.057918209582567215, "learning_rate": 2.5268521199697746e-05, "loss": 1.4454, "step": 20306 }, { "epoch": 0.9029389533591214, "grad_norm": 0.057650770992040634, "learning_rate": 2.5222655949851626e-05, "loss": 1.4484, "step": 20308 }, { "epoch": 0.9030278778177938, "grad_norm": 0.05866733193397522, "learning_rate": 2.517683128606507e-05, "loss": 1.4527, "step": 20310 }, { "epoch": 0.9031168022764662, "grad_norm": 0.057978980243206024, "learning_rate": 2.5131047212255566e-05, "loss": 1.448, "step": 20312 }, { "epoch": 0.9032057267351385, "grad_norm": 0.05772881582379341, "learning_rate": 2.508530373233675e-05, "loss": 1.4488, "step": 20314 }, { "epoch": 0.9032946511938108, "grad_norm": 0.057447269558906555, "learning_rate": 2.5039600850219058e-05, "loss": 1.4449, "step": 20316 }, { "epoch": 0.9033835756524832, "grad_norm": 0.057314515113830566, "learning_rate": 2.4993938569809416e-05, "loss": 1.445, "step": 20318 }, { "epoch": 0.9034725001111555, "grad_norm": 0.058126822113990784, "learning_rate": 2.4948316895011204e-05, "loss": 1.4505, "step": 20320 }, { "epoch": 0.9035614245698279, "grad_norm": 0.05834449455142021, "learning_rate": 2.490273582972419e-05, "loss": 1.4474, "step": 20322 }, { "epoch": 0.9036503490285003, "grad_norm": 0.0587538406252861, "learning_rate": 2.4857195377845144e-05, "loss": 1.4403, "step": 20324 }, { "epoch": 0.9037392734871726, "grad_norm": 0.05739228054881096, "learning_rate": 2.4811695543266842e-05, "loss": 1.4439, "step": 20326 }, { "epoch": 0.903828197945845, "grad_norm": 0.057395629584789276, "learning_rate": 2.476623632987901e-05, "loss": 1.4444, "step": 20328 }, { "epoch": 0.9039171224045174, "grad_norm": 0.05809532478451729, "learning_rate": 2.472081774156748e-05, "loss": 1.4442, "step": 20330 }, { "epoch": 0.9040060468631897, "grad_norm": 0.05822272226214409, "learning_rate": 2.4675439782214924e-05, "loss": 1.4502, "step": 20332 }, { "epoch": 0.9040949713218621, "grad_norm": 0.059671252965927124, "learning_rate": 2.4630102455700577e-05, "loss": 1.444, "step": 20334 }, { "epoch": 0.9041838957805345, "grad_norm": 0.05677417665719986, "learning_rate": 2.4584805765899885e-05, "loss": 1.4455, "step": 20336 }, { "epoch": 0.9042728202392067, "grad_norm": 0.058038029819726944, "learning_rate": 2.4539549716685038e-05, "loss": 1.4463, "step": 20338 }, { "epoch": 0.9043617446978791, "grad_norm": 0.057455874979496, "learning_rate": 2.449433431192488e-05, "loss": 1.4421, "step": 20340 }, { "epoch": 0.9044506691565515, "grad_norm": 0.05858970433473587, "learning_rate": 2.444915955548449e-05, "loss": 1.4471, "step": 20342 }, { "epoch": 0.9045395936152238, "grad_norm": 0.05943557992577553, "learning_rate": 2.4404025451225497e-05, "loss": 1.4534, "step": 20344 }, { "epoch": 0.9046285180738962, "grad_norm": 0.058774013072252274, "learning_rate": 2.4358932003006427e-05, "loss": 1.447, "step": 20346 }, { "epoch": 0.9047174425325686, "grad_norm": 0.05879400297999382, "learning_rate": 2.4313879214681866e-05, "loss": 1.4474, "step": 20348 }, { "epoch": 0.904806366991241, "grad_norm": 0.05844084918498993, "learning_rate": 2.4268867090103232e-05, "loss": 1.4454, "step": 20350 }, { "epoch": 0.9048952914499133, "grad_norm": 0.05857951194047928, "learning_rate": 2.4223895633118276e-05, "loss": 1.4535, "step": 20352 }, { "epoch": 0.9049842159085857, "grad_norm": 0.05743425339460373, "learning_rate": 2.417896484757137e-05, "loss": 1.4417, "step": 20354 }, { "epoch": 0.905073140367258, "grad_norm": 0.056919701397418976, "learning_rate": 2.4134074737303558e-05, "loss": 1.4449, "step": 20356 }, { "epoch": 0.9051620648259304, "grad_norm": 0.05839109793305397, "learning_rate": 2.4089225306151986e-05, "loss": 1.4459, "step": 20358 }, { "epoch": 0.9052509892846027, "grad_norm": 0.05757851153612137, "learning_rate": 2.40444165579507e-05, "loss": 1.4408, "step": 20360 }, { "epoch": 0.905339913743275, "grad_norm": 0.057318564504384995, "learning_rate": 2.3999648496530247e-05, "loss": 1.4473, "step": 20362 }, { "epoch": 0.9054288382019474, "grad_norm": 0.05825290456414223, "learning_rate": 2.3954921125717454e-05, "loss": 1.4484, "step": 20364 }, { "epoch": 0.9055177626606198, "grad_norm": 0.057050399482250214, "learning_rate": 2.391023444933582e-05, "loss": 1.4491, "step": 20366 }, { "epoch": 0.9056066871192922, "grad_norm": 0.058133237063884735, "learning_rate": 2.3865588471205514e-05, "loss": 1.4488, "step": 20368 }, { "epoch": 0.9056956115779645, "grad_norm": 0.0579213872551918, "learning_rate": 2.3820983195142864e-05, "loss": 1.4432, "step": 20370 }, { "epoch": 0.9057845360366369, "grad_norm": 0.05713074654340744, "learning_rate": 2.377641862496105e-05, "loss": 1.4493, "step": 20372 }, { "epoch": 0.9058734604953093, "grad_norm": 0.05779387801885605, "learning_rate": 2.3731894764469687e-05, "loss": 1.4474, "step": 20374 }, { "epoch": 0.9059623849539816, "grad_norm": 0.05732658505439758, "learning_rate": 2.368741161747473e-05, "loss": 1.4462, "step": 20376 }, { "epoch": 0.906051309412654, "grad_norm": 0.058318544179201126, "learning_rate": 2.3642969187778916e-05, "loss": 1.4432, "step": 20378 }, { "epoch": 0.9061402338713264, "grad_norm": 0.057634204626083374, "learning_rate": 2.359856747918121e-05, "loss": 1.4426, "step": 20380 }, { "epoch": 0.9062291583299986, "grad_norm": 0.05897272378206253, "learning_rate": 2.3554206495477515e-05, "loss": 1.4453, "step": 20382 }, { "epoch": 0.906318082788671, "grad_norm": 0.05731482058763504, "learning_rate": 2.350988624045991e-05, "loss": 1.4424, "step": 20384 }, { "epoch": 0.9064070072473434, "grad_norm": 0.05799233168363571, "learning_rate": 2.3465606717916975e-05, "loss": 1.4518, "step": 20386 }, { "epoch": 0.9064959317060157, "grad_norm": 0.0577356293797493, "learning_rate": 2.3421367931633963e-05, "loss": 1.4431, "step": 20388 }, { "epoch": 0.9065848561646881, "grad_norm": 0.060062676668167114, "learning_rate": 2.3377169885392734e-05, "loss": 1.4531, "step": 20390 }, { "epoch": 0.9066737806233605, "grad_norm": 0.05856315419077873, "learning_rate": 2.333301258297138e-05, "loss": 1.4402, "step": 20392 }, { "epoch": 0.9067627050820328, "grad_norm": 0.05849577859044075, "learning_rate": 2.3288896028144657e-05, "loss": 1.4438, "step": 20394 }, { "epoch": 0.9068516295407052, "grad_norm": 0.058690641075372696, "learning_rate": 2.324482022468394e-05, "loss": 1.4547, "step": 20396 }, { "epoch": 0.9069405539993776, "grad_norm": 0.05806424096226692, "learning_rate": 2.3200785176357042e-05, "loss": 1.4501, "step": 20398 }, { "epoch": 0.9070294784580499, "grad_norm": 0.05833980441093445, "learning_rate": 2.3156790886928237e-05, "loss": 1.4476, "step": 20400 }, { "epoch": 0.9071184029167223, "grad_norm": 0.0578826479613781, "learning_rate": 2.3112837360158122e-05, "loss": 1.4482, "step": 20402 }, { "epoch": 0.9072073273753946, "grad_norm": 0.057997167110443115, "learning_rate": 2.3068924599804475e-05, "loss": 1.445, "step": 20404 }, { "epoch": 0.9072962518340669, "grad_norm": 0.057818666100502014, "learning_rate": 2.3025052609620843e-05, "loss": 1.4447, "step": 20406 }, { "epoch": 0.9073851762927393, "grad_norm": 0.0576237328350544, "learning_rate": 2.2981221393357622e-05, "loss": 1.4494, "step": 20408 }, { "epoch": 0.9074741007514117, "grad_norm": 0.05845033749938011, "learning_rate": 2.2937430954761752e-05, "loss": 1.4427, "step": 20410 }, { "epoch": 0.907563025210084, "grad_norm": 0.05697134882211685, "learning_rate": 2.289368129757674e-05, "loss": 1.4486, "step": 20412 }, { "epoch": 0.9076519496687564, "grad_norm": 0.05806869640946388, "learning_rate": 2.2849972425542265e-05, "loss": 1.4464, "step": 20414 }, { "epoch": 0.9077408741274288, "grad_norm": 0.05777909234166145, "learning_rate": 2.280630434239489e-05, "loss": 1.4493, "step": 20416 }, { "epoch": 0.9078297985861011, "grad_norm": 0.05883679911494255, "learning_rate": 2.2762677051867577e-05, "loss": 1.4485, "step": 20418 }, { "epoch": 0.9079187230447735, "grad_norm": 0.05816845968365669, "learning_rate": 2.2719090557689782e-05, "loss": 1.4434, "step": 20420 }, { "epoch": 0.9080076475034459, "grad_norm": 0.05854851007461548, "learning_rate": 2.267554486358747e-05, "loss": 1.4494, "step": 20422 }, { "epoch": 0.9080965719621181, "grad_norm": 0.056842558085918427, "learning_rate": 2.2632039973282947e-05, "loss": 1.4488, "step": 20424 }, { "epoch": 0.9081854964207905, "grad_norm": 0.05853312835097313, "learning_rate": 2.25885758904954e-05, "loss": 1.4544, "step": 20426 }, { "epoch": 0.9082744208794629, "grad_norm": 0.058467112481594086, "learning_rate": 2.2545152618940355e-05, "loss": 1.4481, "step": 20428 }, { "epoch": 0.9083633453381352, "grad_norm": 0.05814722180366516, "learning_rate": 2.2501770162329626e-05, "loss": 1.445, "step": 20430 }, { "epoch": 0.9084522697968076, "grad_norm": 0.05995301157236099, "learning_rate": 2.2458428524371855e-05, "loss": 1.4447, "step": 20432 }, { "epoch": 0.90854119425548, "grad_norm": 0.05826203152537346, "learning_rate": 2.2415127708772077e-05, "loss": 1.4434, "step": 20434 }, { "epoch": 0.9086301187141523, "grad_norm": 0.05822010338306427, "learning_rate": 2.2371867719231943e-05, "loss": 1.4443, "step": 20436 }, { "epoch": 0.9087190431728247, "grad_norm": 0.057935092598199844, "learning_rate": 2.2328648559449273e-05, "loss": 1.4474, "step": 20438 }, { "epoch": 0.9088079676314971, "grad_norm": 0.057203538715839386, "learning_rate": 2.2285470233118778e-05, "loss": 1.4479, "step": 20440 }, { "epoch": 0.9088968920901694, "grad_norm": 0.05857103317975998, "learning_rate": 2.2242332743931558e-05, "loss": 1.4451, "step": 20442 }, { "epoch": 0.9089858165488418, "grad_norm": 0.058484774082899094, "learning_rate": 2.219923609557506e-05, "loss": 1.4552, "step": 20444 }, { "epoch": 0.9090747410075141, "grad_norm": 0.058287981897592545, "learning_rate": 2.2156180291733497e-05, "loss": 1.4511, "step": 20446 }, { "epoch": 0.9091636654661864, "grad_norm": 0.057483308017253876, "learning_rate": 2.2113165336087482e-05, "loss": 1.449, "step": 20448 }, { "epoch": 0.9092525899248588, "grad_norm": 0.05801312252879143, "learning_rate": 2.207019123231402e-05, "loss": 1.4427, "step": 20450 }, { "epoch": 0.9093415143835312, "grad_norm": 0.05689322575926781, "learning_rate": 2.202725798408678e-05, "loss": 1.4482, "step": 20452 }, { "epoch": 0.9094304388422035, "grad_norm": 0.05802132561802864, "learning_rate": 2.1984365595075882e-05, "loss": 1.4516, "step": 20454 }, { "epoch": 0.9095193633008759, "grad_norm": 0.058080337941646576, "learning_rate": 2.1941514068947955e-05, "loss": 1.4435, "step": 20456 }, { "epoch": 0.9096082877595483, "grad_norm": 0.056655969470739365, "learning_rate": 2.1898703409366228e-05, "loss": 1.4453, "step": 20458 }, { "epoch": 0.9096972122182206, "grad_norm": 0.057819753885269165, "learning_rate": 2.1855933619990166e-05, "loss": 1.451, "step": 20460 }, { "epoch": 0.909786136676893, "grad_norm": 0.057411663234233856, "learning_rate": 2.181320470447601e-05, "loss": 1.4467, "step": 20462 }, { "epoch": 0.9098750611355654, "grad_norm": 0.05854671448469162, "learning_rate": 2.1770516666476502e-05, "loss": 1.4534, "step": 20464 }, { "epoch": 0.9099639855942377, "grad_norm": 0.05828188359737396, "learning_rate": 2.172786950964062e-05, "loss": 1.4449, "step": 20466 }, { "epoch": 0.91005291005291, "grad_norm": 0.05763471499085426, "learning_rate": 2.168526323761416e-05, "loss": 1.4474, "step": 20468 }, { "epoch": 0.9101418345115824, "grad_norm": 0.05832042172551155, "learning_rate": 2.1642697854039272e-05, "loss": 1.4473, "step": 20470 }, { "epoch": 0.9102307589702547, "grad_norm": 0.05753203481435776, "learning_rate": 2.1600173362554597e-05, "loss": 1.453, "step": 20472 }, { "epoch": 0.9103196834289271, "grad_norm": 0.05804789811372757, "learning_rate": 2.155768976679534e-05, "loss": 1.4507, "step": 20474 }, { "epoch": 0.9104086078875995, "grad_norm": 0.05770070105791092, "learning_rate": 2.151524707039326e-05, "loss": 1.449, "step": 20476 }, { "epoch": 0.9104975323462718, "grad_norm": 0.05669693648815155, "learning_rate": 2.1472845276976404e-05, "loss": 1.4436, "step": 20478 }, { "epoch": 0.9105864568049442, "grad_norm": 0.05826473981142044, "learning_rate": 2.1430484390169648e-05, "loss": 1.4424, "step": 20480 }, { "epoch": 0.9106753812636166, "grad_norm": 0.05782058462500572, "learning_rate": 2.1388164413593924e-05, "loss": 1.4464, "step": 20482 }, { "epoch": 0.910764305722289, "grad_norm": 0.05837662145495415, "learning_rate": 2.1345885350867123e-05, "loss": 1.4528, "step": 20484 }, { "epoch": 0.9108532301809613, "grad_norm": 0.058443278074264526, "learning_rate": 2.130364720560346e-05, "loss": 1.4433, "step": 20486 }, { "epoch": 0.9109421546396337, "grad_norm": 0.057444412261247635, "learning_rate": 2.126144998141355e-05, "loss": 1.451, "step": 20488 }, { "epoch": 0.9110310790983059, "grad_norm": 0.0574500598013401, "learning_rate": 2.121929368190456e-05, "loss": 1.4479, "step": 20490 }, { "epoch": 0.9111200035569783, "grad_norm": 0.05753646418452263, "learning_rate": 2.117717831068039e-05, "loss": 1.4474, "step": 20492 }, { "epoch": 0.9112089280156507, "grad_norm": 0.058596670627593994, "learning_rate": 2.1135103871341043e-05, "loss": 1.4481, "step": 20494 }, { "epoch": 0.911297852474323, "grad_norm": 0.05796939134597778, "learning_rate": 2.109307036748326e-05, "loss": 1.4488, "step": 20496 }, { "epoch": 0.9113867769329954, "grad_norm": 0.058441001921892166, "learning_rate": 2.1051077802700435e-05, "loss": 1.4437, "step": 20498 }, { "epoch": 0.9114757013916678, "grad_norm": 0.05807716026902199, "learning_rate": 2.100912618058204e-05, "loss": 1.4438, "step": 20500 }, { "epoch": 0.9114757013916678, "eval_loss": 1.433039903640747, "eval_runtime": 12.5537, "eval_samples_per_second": 550.436, "eval_steps_per_second": 68.824, "step": 20500 }, { "epoch": 0.9115646258503401, "grad_norm": 0.05796836316585541, "learning_rate": 2.0967215504714366e-05, "loss": 1.4506, "step": 20502 }, { "epoch": 0.9116535503090125, "grad_norm": 0.05753817781805992, "learning_rate": 2.092534577868016e-05, "loss": 1.4498, "step": 20504 }, { "epoch": 0.9117424747676849, "grad_norm": 0.05816453695297241, "learning_rate": 2.088351700605867e-05, "loss": 1.4505, "step": 20506 }, { "epoch": 0.9118313992263573, "grad_norm": 0.05715755373239517, "learning_rate": 2.084172919042554e-05, "loss": 1.4524, "step": 20508 }, { "epoch": 0.9119203236850296, "grad_norm": 0.05733273923397064, "learning_rate": 2.079998233535285e-05, "loss": 1.4492, "step": 20510 }, { "epoch": 0.9120092481437019, "grad_norm": 0.057934537529945374, "learning_rate": 2.075827644440953e-05, "loss": 1.4469, "step": 20512 }, { "epoch": 0.9120981726023742, "grad_norm": 0.056834906339645386, "learning_rate": 2.0716611521160776e-05, "loss": 1.4416, "step": 20514 }, { "epoch": 0.9121870970610466, "grad_norm": 0.05724672973155975, "learning_rate": 2.067498756916808e-05, "loss": 1.4451, "step": 20516 }, { "epoch": 0.912276021519719, "grad_norm": 0.05750434473156929, "learning_rate": 2.0633404591989757e-05, "loss": 1.4472, "step": 20518 }, { "epoch": 0.9123649459783914, "grad_norm": 0.05747924745082855, "learning_rate": 2.059186259318052e-05, "loss": 1.4487, "step": 20520 }, { "epoch": 0.9124538704370637, "grad_norm": 0.057014673948287964, "learning_rate": 2.0550361576291645e-05, "loss": 1.4518, "step": 20522 }, { "epoch": 0.9125427948957361, "grad_norm": 0.05780208110809326, "learning_rate": 2.050890154487062e-05, "loss": 1.4477, "step": 20524 }, { "epoch": 0.9126317193544085, "grad_norm": 0.0579492561519146, "learning_rate": 2.0467482502461775e-05, "loss": 1.4455, "step": 20526 }, { "epoch": 0.9127206438130808, "grad_norm": 0.058637332171201706, "learning_rate": 2.0426104452605786e-05, "loss": 1.4503, "step": 20528 }, { "epoch": 0.9128095682717532, "grad_norm": 0.05861702188849449, "learning_rate": 2.038476739883982e-05, "loss": 1.447, "step": 20530 }, { "epoch": 0.9128984927304254, "grad_norm": 0.05932488292455673, "learning_rate": 2.034347134469744e-05, "loss": 1.4504, "step": 20532 }, { "epoch": 0.9129874171890978, "grad_norm": 0.05963583663105965, "learning_rate": 2.0302216293708987e-05, "loss": 1.4429, "step": 20534 }, { "epoch": 0.9130763416477702, "grad_norm": 0.05832473933696747, "learning_rate": 2.0261002249401028e-05, "loss": 1.4492, "step": 20536 }, { "epoch": 0.9131652661064426, "grad_norm": 0.058115072548389435, "learning_rate": 2.021982921529675e-05, "loss": 1.4511, "step": 20538 }, { "epoch": 0.9132541905651149, "grad_norm": 0.057589709758758545, "learning_rate": 2.0178697194915717e-05, "loss": 1.4502, "step": 20540 }, { "epoch": 0.9133431150237873, "grad_norm": 0.05809624865651131, "learning_rate": 2.0137606191774183e-05, "loss": 1.4538, "step": 20542 }, { "epoch": 0.9134320394824597, "grad_norm": 0.058290865272283554, "learning_rate": 2.009655620938483e-05, "loss": 1.4441, "step": 20544 }, { "epoch": 0.913520963941132, "grad_norm": 0.05669715628027916, "learning_rate": 2.0055547251256633e-05, "loss": 1.444, "step": 20546 }, { "epoch": 0.9136098883998044, "grad_norm": 0.05922799929976463, "learning_rate": 2.0014579320895344e-05, "loss": 1.4514, "step": 20548 }, { "epoch": 0.9136988128584768, "grad_norm": 0.058014314621686935, "learning_rate": 1.9973652421803045e-05, "loss": 1.4445, "step": 20550 }, { "epoch": 0.9137877373171491, "grad_norm": 0.058082759380340576, "learning_rate": 1.9932766557478333e-05, "loss": 1.4509, "step": 20552 }, { "epoch": 0.9138766617758214, "grad_norm": 0.05879303067922592, "learning_rate": 1.9891921731416297e-05, "loss": 1.4449, "step": 20554 }, { "epoch": 0.9139655862344938, "grad_norm": 0.05874144658446312, "learning_rate": 1.9851117947108643e-05, "loss": 1.4441, "step": 20556 }, { "epoch": 0.9140545106931661, "grad_norm": 0.057445310056209564, "learning_rate": 1.9810355208043306e-05, "loss": 1.4476, "step": 20558 }, { "epoch": 0.9141434351518385, "grad_norm": 0.05829305201768875, "learning_rate": 1.976963351770489e-05, "loss": 1.4493, "step": 20560 }, { "epoch": 0.9142323596105109, "grad_norm": 0.059354912489652634, "learning_rate": 1.9728952879574604e-05, "loss": 1.4551, "step": 20562 }, { "epoch": 0.9143212840691832, "grad_norm": 0.057627931237220764, "learning_rate": 1.968831329712989e-05, "loss": 1.4429, "step": 20564 }, { "epoch": 0.9144102085278556, "grad_norm": 0.0584438256919384, "learning_rate": 1.9647714773844804e-05, "loss": 1.4498, "step": 20566 }, { "epoch": 0.914499132986528, "grad_norm": 0.05829082801938057, "learning_rate": 1.9607157313189904e-05, "loss": 1.4472, "step": 20568 }, { "epoch": 0.9145880574452003, "grad_norm": 0.057573914527893066, "learning_rate": 1.9566640918632194e-05, "loss": 1.4428, "step": 20570 }, { "epoch": 0.9146769819038727, "grad_norm": 0.05751367285847664, "learning_rate": 1.9526165593635235e-05, "loss": 1.4479, "step": 20572 }, { "epoch": 0.9147659063625451, "grad_norm": 0.05755585804581642, "learning_rate": 1.948573134165904e-05, "loss": 1.4433, "step": 20574 }, { "epoch": 0.9148548308212173, "grad_norm": 0.0580437146127224, "learning_rate": 1.9445338166160064e-05, "loss": 1.448, "step": 20576 }, { "epoch": 0.9149437552798897, "grad_norm": 0.057542815804481506, "learning_rate": 1.9404986070591322e-05, "loss": 1.4461, "step": 20578 }, { "epoch": 0.9150326797385621, "grad_norm": 0.05818972736597061, "learning_rate": 1.936467505840228e-05, "loss": 1.4475, "step": 20580 }, { "epoch": 0.9151216041972344, "grad_norm": 0.05846571922302246, "learning_rate": 1.9324405133038902e-05, "loss": 1.4432, "step": 20582 }, { "epoch": 0.9152105286559068, "grad_norm": 0.057439543306827545, "learning_rate": 1.9284176297943712e-05, "loss": 1.4487, "step": 20584 }, { "epoch": 0.9152994531145792, "grad_norm": 0.05826476961374283, "learning_rate": 1.9243988556555515e-05, "loss": 1.4475, "step": 20586 }, { "epoch": 0.9153883775732515, "grad_norm": 0.05997009575366974, "learning_rate": 1.920384191230984e-05, "loss": 1.447, "step": 20588 }, { "epoch": 0.9154773020319239, "grad_norm": 0.05828747898340225, "learning_rate": 1.9163736368638496e-05, "loss": 1.4409, "step": 20590 }, { "epoch": 0.9155662264905963, "grad_norm": 0.057352207601070404, "learning_rate": 1.912367192896991e-05, "loss": 1.4441, "step": 20592 }, { "epoch": 0.9156551509492686, "grad_norm": 0.05746382474899292, "learning_rate": 1.908364859672912e-05, "loss": 1.4532, "step": 20594 }, { "epoch": 0.915744075407941, "grad_norm": 0.05716506391763687, "learning_rate": 1.9043666375337276e-05, "loss": 1.4396, "step": 20596 }, { "epoch": 0.9158329998666133, "grad_norm": 0.05829397961497307, "learning_rate": 1.9003725268212367e-05, "loss": 1.4568, "step": 20598 }, { "epoch": 0.9159219243252856, "grad_norm": 0.058943524956703186, "learning_rate": 1.8963825278768776e-05, "loss": 1.4509, "step": 20600 }, { "epoch": 0.916010848783958, "grad_norm": 0.05764896050095558, "learning_rate": 1.892396641041716e-05, "loss": 1.4512, "step": 20602 }, { "epoch": 0.9160997732426304, "grad_norm": 0.05844266340136528, "learning_rate": 1.888414866656496e-05, "loss": 1.446, "step": 20604 }, { "epoch": 0.9161886977013027, "grad_norm": 0.0566147081553936, "learning_rate": 1.8844372050615953e-05, "loss": 1.4433, "step": 20606 }, { "epoch": 0.9162776221599751, "grad_norm": 0.06036548689007759, "learning_rate": 1.8804636565970422e-05, "loss": 1.4485, "step": 20608 }, { "epoch": 0.9163665466186475, "grad_norm": 0.05751558393239975, "learning_rate": 1.876494221602504e-05, "loss": 1.4479, "step": 20610 }, { "epoch": 0.9164554710773198, "grad_norm": 0.05685598403215408, "learning_rate": 1.8725289004173198e-05, "loss": 1.4439, "step": 20612 }, { "epoch": 0.9165443955359922, "grad_norm": 0.05743536353111267, "learning_rate": 1.8685676933804575e-05, "loss": 1.4509, "step": 20614 }, { "epoch": 0.9166333199946646, "grad_norm": 0.05938420072197914, "learning_rate": 1.8646106008305352e-05, "loss": 1.4504, "step": 20616 }, { "epoch": 0.9167222444533369, "grad_norm": 0.058147843927145004, "learning_rate": 1.860657623105816e-05, "loss": 1.4463, "step": 20618 }, { "epoch": 0.9168111689120092, "grad_norm": 0.059247348457574844, "learning_rate": 1.85670876054424e-05, "loss": 1.45, "step": 20620 }, { "epoch": 0.9169000933706816, "grad_norm": 0.05791396647691727, "learning_rate": 1.85276401348336e-05, "loss": 1.4464, "step": 20622 }, { "epoch": 0.9169890178293539, "grad_norm": 0.05942825973033905, "learning_rate": 1.8488233822603782e-05, "loss": 1.4504, "step": 20624 }, { "epoch": 0.9170779422880263, "grad_norm": 0.05869719013571739, "learning_rate": 1.8448868672121756e-05, "loss": 1.4469, "step": 20626 }, { "epoch": 0.9171668667466987, "grad_norm": 0.058402083814144135, "learning_rate": 1.8409544686752542e-05, "loss": 1.4417, "step": 20628 }, { "epoch": 0.917255791205371, "grad_norm": 0.05802400782704353, "learning_rate": 1.8370261869857798e-05, "loss": 1.4483, "step": 20630 }, { "epoch": 0.9173447156640434, "grad_norm": 0.058382607996463776, "learning_rate": 1.8331020224795493e-05, "loss": 1.445, "step": 20632 }, { "epoch": 0.9174336401227158, "grad_norm": 0.05774873495101929, "learning_rate": 1.8291819754920224e-05, "loss": 1.4541, "step": 20634 }, { "epoch": 0.9175225645813881, "grad_norm": 0.05814613774418831, "learning_rate": 1.8252660463583036e-05, "loss": 1.4459, "step": 20636 }, { "epoch": 0.9176114890400605, "grad_norm": 0.05833296850323677, "learning_rate": 1.8213542354131474e-05, "loss": 1.4463, "step": 20638 }, { "epoch": 0.9177004134987329, "grad_norm": 0.058397140353918076, "learning_rate": 1.8174465429909358e-05, "loss": 1.446, "step": 20640 }, { "epoch": 0.9177893379574051, "grad_norm": 0.05853003263473511, "learning_rate": 1.8135429694257354e-05, "loss": 1.4472, "step": 20642 }, { "epoch": 0.9178782624160775, "grad_norm": 0.05980772152543068, "learning_rate": 1.809643515051229e-05, "loss": 1.4465, "step": 20644 }, { "epoch": 0.9179671868747499, "grad_norm": 0.05789174139499664, "learning_rate": 1.8057481802007724e-05, "loss": 1.4434, "step": 20646 }, { "epoch": 0.9180561113334222, "grad_norm": 0.05812879651784897, "learning_rate": 1.801856965207338e-05, "loss": 1.4429, "step": 20648 }, { "epoch": 0.9181450357920946, "grad_norm": 0.05842747539281845, "learning_rate": 1.7979698704035708e-05, "loss": 1.4446, "step": 20650 }, { "epoch": 0.918233960250767, "grad_norm": 0.0583362840116024, "learning_rate": 1.794086896121766e-05, "loss": 1.452, "step": 20652 }, { "epoch": 0.9183228847094393, "grad_norm": 0.059007447212934494, "learning_rate": 1.7902080426938417e-05, "loss": 1.4548, "step": 20654 }, { "epoch": 0.9184118091681117, "grad_norm": 0.057736873626708984, "learning_rate": 1.7863333104513934e-05, "loss": 1.4478, "step": 20656 }, { "epoch": 0.9185007336267841, "grad_norm": 0.05727485939860344, "learning_rate": 1.7824626997256456e-05, "loss": 1.4414, "step": 20658 }, { "epoch": 0.9185896580854565, "grad_norm": 0.059946656227111816, "learning_rate": 1.7785962108474663e-05, "loss": 1.4492, "step": 20660 }, { "epoch": 0.9186785825441287, "grad_norm": 0.05915560573339462, "learning_rate": 1.7747338441473914e-05, "loss": 1.4452, "step": 20662 }, { "epoch": 0.9187675070028011, "grad_norm": 0.05715799331665039, "learning_rate": 1.770875599955596e-05, "loss": 1.4486, "step": 20664 }, { "epoch": 0.9188564314614734, "grad_norm": 0.05892267823219299, "learning_rate": 1.7670214786018823e-05, "loss": 1.4465, "step": 20666 }, { "epoch": 0.9189453559201458, "grad_norm": 0.05755788832902908, "learning_rate": 1.7631714804157316e-05, "loss": 1.4423, "step": 20668 }, { "epoch": 0.9190342803788182, "grad_norm": 0.0570998415350914, "learning_rate": 1.7593256057262642e-05, "loss": 1.448, "step": 20670 }, { "epoch": 0.9191232048374905, "grad_norm": 0.05825738236308098, "learning_rate": 1.755483854862222e-05, "loss": 1.4507, "step": 20672 }, { "epoch": 0.9192121292961629, "grad_norm": 0.057699088007211685, "learning_rate": 1.7516462281520372e-05, "loss": 1.4468, "step": 20674 }, { "epoch": 0.9193010537548353, "grad_norm": 0.05778341740369797, "learning_rate": 1.7478127259237474e-05, "loss": 1.4453, "step": 20676 }, { "epoch": 0.9193899782135077, "grad_norm": 0.05825795978307724, "learning_rate": 1.7439833485050682e-05, "loss": 1.441, "step": 20678 }, { "epoch": 0.91947890267218, "grad_norm": 0.05925915762782097, "learning_rate": 1.7401580962233597e-05, "loss": 1.4467, "step": 20680 }, { "epoch": 0.9195678271308524, "grad_norm": 0.059423577040433884, "learning_rate": 1.7363369694055997e-05, "loss": 1.4483, "step": 20682 }, { "epoch": 0.9196567515895246, "grad_norm": 0.05769523233175278, "learning_rate": 1.7325199683784486e-05, "loss": 1.4526, "step": 20684 }, { "epoch": 0.919745676048197, "grad_norm": 0.05748085305094719, "learning_rate": 1.7287070934682013e-05, "loss": 1.4489, "step": 20686 }, { "epoch": 0.9198346005068694, "grad_norm": 0.05770343542098999, "learning_rate": 1.7248983450007914e-05, "loss": 1.4407, "step": 20688 }, { "epoch": 0.9199235249655418, "grad_norm": 0.05809416621923447, "learning_rate": 1.7210937233018143e-05, "loss": 1.4469, "step": 20690 }, { "epoch": 0.9200124494242141, "grad_norm": 0.05900378152728081, "learning_rate": 1.7172932286965093e-05, "loss": 1.441, "step": 20692 }, { "epoch": 0.9201013738828865, "grad_norm": 0.05754346400499344, "learning_rate": 1.71349686150975e-05, "loss": 1.4483, "step": 20694 }, { "epoch": 0.9201902983415589, "grad_norm": 0.05826412886381149, "learning_rate": 1.709704622066077e-05, "loss": 1.4425, "step": 20696 }, { "epoch": 0.9202792228002312, "grad_norm": 0.05790907144546509, "learning_rate": 1.7059165106896478e-05, "loss": 1.4424, "step": 20698 }, { "epoch": 0.9203681472589036, "grad_norm": 0.05832275003194809, "learning_rate": 1.702132527704309e-05, "loss": 1.4435, "step": 20700 }, { "epoch": 0.920457071717576, "grad_norm": 0.05824443697929382, "learning_rate": 1.6983526734335287e-05, "loss": 1.4501, "step": 20702 }, { "epoch": 0.9205459961762483, "grad_norm": 0.06036430969834328, "learning_rate": 1.6945769482004104e-05, "loss": 1.4482, "step": 20704 }, { "epoch": 0.9206349206349206, "grad_norm": 0.05813805013895035, "learning_rate": 1.6908053523277344e-05, "loss": 1.4487, "step": 20706 }, { "epoch": 0.920723845093593, "grad_norm": 0.05797953903675079, "learning_rate": 1.68703788613791e-05, "loss": 1.4517, "step": 20708 }, { "epoch": 0.9208127695522653, "grad_norm": 0.05762023478746414, "learning_rate": 1.68327454995299e-05, "loss": 1.4477, "step": 20710 }, { "epoch": 0.9209016940109377, "grad_norm": 0.05841365456581116, "learning_rate": 1.6795153440946897e-05, "loss": 1.4423, "step": 20712 }, { "epoch": 0.9209906184696101, "grad_norm": 0.05898358300328255, "learning_rate": 1.6757602688843577e-05, "loss": 1.4517, "step": 20714 }, { "epoch": 0.9210795429282824, "grad_norm": 0.05752791091799736, "learning_rate": 1.672009324642998e-05, "loss": 1.441, "step": 20716 }, { "epoch": 0.9211684673869548, "grad_norm": 0.05729573592543602, "learning_rate": 1.6682625116912597e-05, "loss": 1.4516, "step": 20718 }, { "epoch": 0.9212573918456272, "grad_norm": 0.060360997915267944, "learning_rate": 1.66451983034942e-05, "loss": 1.451, "step": 20720 }, { "epoch": 0.9213463163042995, "grad_norm": 0.058285191655159, "learning_rate": 1.6607812809374455e-05, "loss": 1.449, "step": 20722 }, { "epoch": 0.9214352407629719, "grad_norm": 0.05837748199701309, "learning_rate": 1.6570468637749083e-05, "loss": 1.4424, "step": 20724 }, { "epoch": 0.9215241652216443, "grad_norm": 0.057116538286209106, "learning_rate": 1.653316579181041e-05, "loss": 1.4439, "step": 20726 }, { "epoch": 0.9216130896803165, "grad_norm": 0.05754510685801506, "learning_rate": 1.649590427474734e-05, "loss": 1.443, "step": 20728 }, { "epoch": 0.9217020141389889, "grad_norm": 0.05777478963136673, "learning_rate": 1.64586840897451e-05, "loss": 1.4462, "step": 20730 }, { "epoch": 0.9217909385976613, "grad_norm": 0.05793027579784393, "learning_rate": 1.6421505239985422e-05, "loss": 1.4449, "step": 20732 }, { "epoch": 0.9218798630563336, "grad_norm": 0.057621315121650696, "learning_rate": 1.6384367728646542e-05, "loss": 1.4483, "step": 20734 }, { "epoch": 0.921968787515006, "grad_norm": 0.05741515010595322, "learning_rate": 1.634727155890309e-05, "loss": 1.4445, "step": 20736 }, { "epoch": 0.9220577119736784, "grad_norm": 0.058283522725105286, "learning_rate": 1.6310216733926353e-05, "loss": 1.4464, "step": 20738 }, { "epoch": 0.9221466364323507, "grad_norm": 0.057298872619867325, "learning_rate": 1.627320325688375e-05, "loss": 1.4422, "step": 20740 }, { "epoch": 0.9222355608910231, "grad_norm": 0.05811677128076553, "learning_rate": 1.6236231130939473e-05, "loss": 1.4502, "step": 20742 }, { "epoch": 0.9223244853496955, "grad_norm": 0.056963641196489334, "learning_rate": 1.61993003592541e-05, "loss": 1.4417, "step": 20744 }, { "epoch": 0.9224134098083678, "grad_norm": 0.05848962813615799, "learning_rate": 1.6162410944984552e-05, "loss": 1.448, "step": 20746 }, { "epoch": 0.9225023342670402, "grad_norm": 0.05890442803502083, "learning_rate": 1.6125562891284252e-05, "loss": 1.4442, "step": 20748 }, { "epoch": 0.9225912587257125, "grad_norm": 0.05794266238808632, "learning_rate": 1.6088756201303235e-05, "loss": 1.4415, "step": 20750 }, { "epoch": 0.9226801831843848, "grad_norm": 0.05748625472187996, "learning_rate": 1.6051990878187874e-05, "loss": 1.4459, "step": 20752 }, { "epoch": 0.9227691076430572, "grad_norm": 0.05740351602435112, "learning_rate": 1.601526692508104e-05, "loss": 1.4488, "step": 20754 }, { "epoch": 0.9228580321017296, "grad_norm": 0.05720726400613785, "learning_rate": 1.5978584345122005e-05, "loss": 1.4466, "step": 20756 }, { "epoch": 0.9229469565604019, "grad_norm": 0.058184560388326645, "learning_rate": 1.5941943141446593e-05, "loss": 1.4507, "step": 20758 }, { "epoch": 0.9230358810190743, "grad_norm": 0.05791355296969414, "learning_rate": 1.5905343317187126e-05, "loss": 1.4435, "step": 20760 }, { "epoch": 0.9231248054777467, "grad_norm": 0.05806044861674309, "learning_rate": 1.5868784875472166e-05, "loss": 1.4496, "step": 20762 }, { "epoch": 0.923213729936419, "grad_norm": 0.05714624375104904, "learning_rate": 1.5832267819426982e-05, "loss": 1.4434, "step": 20764 }, { "epoch": 0.9233026543950914, "grad_norm": 0.057807646691799164, "learning_rate": 1.579579215217325e-05, "loss": 1.4504, "step": 20766 }, { "epoch": 0.9233915788537638, "grad_norm": 0.058163121342659, "learning_rate": 1.5759357876828974e-05, "loss": 1.4481, "step": 20768 }, { "epoch": 0.9234805033124361, "grad_norm": 0.059579599648714066, "learning_rate": 1.572296499650877e-05, "loss": 1.4458, "step": 20770 }, { "epoch": 0.9235694277711084, "grad_norm": 0.05729829519987106, "learning_rate": 1.568661351432371e-05, "loss": 1.4442, "step": 20772 }, { "epoch": 0.9236583522297808, "grad_norm": 0.05755337327718735, "learning_rate": 1.5650303433381195e-05, "loss": 1.445, "step": 20774 }, { "epoch": 0.9237472766884531, "grad_norm": 0.057855114340782166, "learning_rate": 1.5614034756785188e-05, "loss": 1.4457, "step": 20776 }, { "epoch": 0.9238362011471255, "grad_norm": 0.05798733979463577, "learning_rate": 1.5577807487636154e-05, "loss": 1.4439, "step": 20778 }, { "epoch": 0.9239251256057979, "grad_norm": 0.059019967913627625, "learning_rate": 1.5541621629030832e-05, "loss": 1.4456, "step": 20780 }, { "epoch": 0.9240140500644702, "grad_norm": 0.057757288217544556, "learning_rate": 1.55054771840627e-05, "loss": 1.4496, "step": 20782 }, { "epoch": 0.9241029745231426, "grad_norm": 0.057486891746520996, "learning_rate": 1.546937415582145e-05, "loss": 1.4503, "step": 20784 }, { "epoch": 0.924191898981815, "grad_norm": 0.05713411793112755, "learning_rate": 1.543331254739333e-05, "loss": 1.4441, "step": 20786 }, { "epoch": 0.9242808234404873, "grad_norm": 0.058376528322696686, "learning_rate": 1.539729236186116e-05, "loss": 1.4535, "step": 20788 }, { "epoch": 0.9243697478991597, "grad_norm": 0.05829921364784241, "learning_rate": 1.5361313602303916e-05, "loss": 1.4443, "step": 20790 }, { "epoch": 0.924458672357832, "grad_norm": 0.0577683262526989, "learning_rate": 1.5325376271797364e-05, "loss": 1.4487, "step": 20792 }, { "epoch": 0.9245475968165043, "grad_norm": 0.0584307499229908, "learning_rate": 1.5289480373413545e-05, "loss": 1.4399, "step": 20794 }, { "epoch": 0.9246365212751767, "grad_norm": 0.05748804286122322, "learning_rate": 1.5253625910221003e-05, "loss": 1.4444, "step": 20796 }, { "epoch": 0.9247254457338491, "grad_norm": 0.058221444487571716, "learning_rate": 1.5217812885284677e-05, "loss": 1.4441, "step": 20798 }, { "epoch": 0.9248143701925214, "grad_norm": 0.05775991082191467, "learning_rate": 1.5182041301666115e-05, "loss": 1.4454, "step": 20800 }, { "epoch": 0.9249032946511938, "grad_norm": 0.05797930434346199, "learning_rate": 1.5146311162423155e-05, "loss": 1.4487, "step": 20802 }, { "epoch": 0.9249922191098662, "grad_norm": 0.05848506838083267, "learning_rate": 1.5110622470610236e-05, "loss": 1.4563, "step": 20804 }, { "epoch": 0.9250811435685385, "grad_norm": 0.05729779601097107, "learning_rate": 1.5074975229278087e-05, "loss": 1.4438, "step": 20806 }, { "epoch": 0.9251700680272109, "grad_norm": 0.058297332376241684, "learning_rate": 1.5039369441474105e-05, "loss": 1.4454, "step": 20808 }, { "epoch": 0.9252589924858833, "grad_norm": 0.057348962873220444, "learning_rate": 1.5003805110241964e-05, "loss": 1.4499, "step": 20810 }, { "epoch": 0.9253479169445556, "grad_norm": 0.05848599225282669, "learning_rate": 1.4968282238621844e-05, "loss": 1.4407, "step": 20812 }, { "epoch": 0.9254368414032279, "grad_norm": 0.058535609394311905, "learning_rate": 1.4932800829650429e-05, "loss": 1.4499, "step": 20814 }, { "epoch": 0.9255257658619003, "grad_norm": 0.05695459619164467, "learning_rate": 1.48973608863609e-05, "loss": 1.4477, "step": 20816 }, { "epoch": 0.9256146903205726, "grad_norm": 0.057941921055316925, "learning_rate": 1.486196241178267e-05, "loss": 1.4571, "step": 20818 }, { "epoch": 0.925703614779245, "grad_norm": 0.05711508169770241, "learning_rate": 1.4826605408941818e-05, "loss": 1.4482, "step": 20820 }, { "epoch": 0.9257925392379174, "grad_norm": 0.05759931355714798, "learning_rate": 1.4791289880860814e-05, "loss": 1.4482, "step": 20822 }, { "epoch": 0.9258814636965897, "grad_norm": 0.05856931582093239, "learning_rate": 1.4756015830558634e-05, "loss": 1.4511, "step": 20824 }, { "epoch": 0.9259703881552621, "grad_norm": 0.05778467282652855, "learning_rate": 1.4720783261050641e-05, "loss": 1.4477, "step": 20826 }, { "epoch": 0.9260593126139345, "grad_norm": 0.05779944732785225, "learning_rate": 1.468559217534854e-05, "loss": 1.4449, "step": 20828 }, { "epoch": 0.9261482370726069, "grad_norm": 0.057623885571956635, "learning_rate": 1.4650442576460865e-05, "loss": 1.4483, "step": 20830 }, { "epoch": 0.9262371615312792, "grad_norm": 0.058261968195438385, "learning_rate": 1.4615334467392216e-05, "loss": 1.4486, "step": 20832 }, { "epoch": 0.9263260859899516, "grad_norm": 0.057926472276449203, "learning_rate": 1.4580267851143747e-05, "loss": 1.4472, "step": 20834 }, { "epoch": 0.9264150104486238, "grad_norm": 0.05730114132165909, "learning_rate": 1.4545242730713115e-05, "loss": 1.447, "step": 20836 }, { "epoch": 0.9265039349072962, "grad_norm": 0.05763164535164833, "learning_rate": 1.4510259109094481e-05, "loss": 1.4426, "step": 20838 }, { "epoch": 0.9265928593659686, "grad_norm": 0.058550890535116196, "learning_rate": 1.4475316989278453e-05, "loss": 1.4475, "step": 20840 }, { "epoch": 0.926681783824641, "grad_norm": 0.05741726979613304, "learning_rate": 1.444041637425192e-05, "loss": 1.4479, "step": 20842 }, { "epoch": 0.9267707082833133, "grad_norm": 0.05805116891860962, "learning_rate": 1.4405557266998382e-05, "loss": 1.4446, "step": 20844 }, { "epoch": 0.9268596327419857, "grad_norm": 0.05792229250073433, "learning_rate": 1.437073967049779e-05, "loss": 1.4407, "step": 20846 }, { "epoch": 0.926948557200658, "grad_norm": 0.0565202422440052, "learning_rate": 1.4335963587726431e-05, "loss": 1.4491, "step": 20848 }, { "epoch": 0.9270374816593304, "grad_norm": 0.05785936489701271, "learning_rate": 1.4301229021657147e-05, "loss": 1.4506, "step": 20850 }, { "epoch": 0.9271264061180028, "grad_norm": 0.05873408913612366, "learning_rate": 1.4266535975259287e-05, "loss": 1.4455, "step": 20852 }, { "epoch": 0.9272153305766752, "grad_norm": 0.05857241526246071, "learning_rate": 1.4231884451498478e-05, "loss": 1.4426, "step": 20854 }, { "epoch": 0.9273042550353475, "grad_norm": 0.05902234464883804, "learning_rate": 1.4197274453336795e-05, "loss": 1.4501, "step": 20856 }, { "epoch": 0.9273931794940198, "grad_norm": 0.05742455646395683, "learning_rate": 1.4162705983733038e-05, "loss": 1.4464, "step": 20858 }, { "epoch": 0.9274821039526921, "grad_norm": 0.05817548558115959, "learning_rate": 1.412817904564212e-05, "loss": 1.4495, "step": 20860 }, { "epoch": 0.9275710284113645, "grad_norm": 0.0579364076256752, "learning_rate": 1.4093693642015737e-05, "loss": 1.4484, "step": 20862 }, { "epoch": 0.9276599528700369, "grad_norm": 0.05716056376695633, "learning_rate": 1.4059249775801642e-05, "loss": 1.4411, "step": 20864 }, { "epoch": 0.9277488773287093, "grad_norm": 0.05732995644211769, "learning_rate": 1.4024847449944366e-05, "loss": 1.4498, "step": 20866 }, { "epoch": 0.9278378017873816, "grad_norm": 0.05792165920138359, "learning_rate": 1.3990486667384784e-05, "loss": 1.4499, "step": 20868 }, { "epoch": 0.927926726246054, "grad_norm": 0.05896197259426117, "learning_rate": 1.395616743106015e-05, "loss": 1.4476, "step": 20870 }, { "epoch": 0.9280156507047264, "grad_norm": 0.0585983581840992, "learning_rate": 1.3921889743904238e-05, "loss": 1.4504, "step": 20872 }, { "epoch": 0.9281045751633987, "grad_norm": 0.058123402297496796, "learning_rate": 1.3887653608847361e-05, "loss": 1.4508, "step": 20874 }, { "epoch": 0.9281934996220711, "grad_norm": 0.057394903153181076, "learning_rate": 1.3853459028815963e-05, "loss": 1.4494, "step": 20876 }, { "epoch": 0.9282824240807435, "grad_norm": 0.0583951398730278, "learning_rate": 1.3819306006733312e-05, "loss": 1.4535, "step": 20878 }, { "epoch": 0.9283713485394157, "grad_norm": 0.05813184753060341, "learning_rate": 1.3785194545518964e-05, "loss": 1.4472, "step": 20880 }, { "epoch": 0.9284602729980881, "grad_norm": 0.057534657418727875, "learning_rate": 1.3751124648088754e-05, "loss": 1.4378, "step": 20882 }, { "epoch": 0.9285491974567605, "grad_norm": 0.05735687166452408, "learning_rate": 1.371709631735535e-05, "loss": 1.4454, "step": 20884 }, { "epoch": 0.9286381219154328, "grad_norm": 0.05778350308537483, "learning_rate": 1.368310955622748e-05, "loss": 1.4487, "step": 20886 }, { "epoch": 0.9287270463741052, "grad_norm": 0.05846724286675453, "learning_rate": 1.364916436761049e-05, "loss": 1.4464, "step": 20888 }, { "epoch": 0.9288159708327776, "grad_norm": 0.057493094354867935, "learning_rate": 1.3615260754406278e-05, "loss": 1.4436, "step": 20890 }, { "epoch": 0.9289048952914499, "grad_norm": 0.05848955735564232, "learning_rate": 1.3581398719512972e-05, "loss": 1.4501, "step": 20892 }, { "epoch": 0.9289938197501223, "grad_norm": 0.05717703327536583, "learning_rate": 1.3547578265825255e-05, "loss": 1.443, "step": 20894 }, { "epoch": 0.9290827442087947, "grad_norm": 0.057882264256477356, "learning_rate": 1.3513799396234372e-05, "loss": 1.4442, "step": 20896 }, { "epoch": 0.929171668667467, "grad_norm": 0.057226452976465225, "learning_rate": 1.3480062113627679e-05, "loss": 1.4469, "step": 20898 }, { "epoch": 0.9292605931261393, "grad_norm": 0.057946473360061646, "learning_rate": 1.3446366420889366e-05, "loss": 1.4427, "step": 20900 }, { "epoch": 0.9293495175848117, "grad_norm": 0.05745415389537811, "learning_rate": 1.3412712320899856e-05, "loss": 1.4486, "step": 20902 }, { "epoch": 0.929438442043484, "grad_norm": 0.05738044157624245, "learning_rate": 1.33790998165359e-05, "loss": 1.4482, "step": 20904 }, { "epoch": 0.9295273665021564, "grad_norm": 0.05781969055533409, "learning_rate": 1.3345528910671033e-05, "loss": 1.4496, "step": 20906 }, { "epoch": 0.9296162909608288, "grad_norm": 0.058522261679172516, "learning_rate": 1.3311999606175018e-05, "loss": 1.4469, "step": 20908 }, { "epoch": 0.9297052154195011, "grad_norm": 0.058261334896087646, "learning_rate": 1.3278511905913948e-05, "loss": 1.4434, "step": 20910 }, { "epoch": 0.9297941398781735, "grad_norm": 0.05749499797821045, "learning_rate": 1.3245065812750645e-05, "loss": 1.4438, "step": 20912 }, { "epoch": 0.9298830643368459, "grad_norm": 0.058415595442056656, "learning_rate": 1.32116613295441e-05, "loss": 1.4472, "step": 20914 }, { "epoch": 0.9299719887955182, "grad_norm": 0.0577775239944458, "learning_rate": 1.3178298459150084e-05, "loss": 1.4461, "step": 20916 }, { "epoch": 0.9300609132541906, "grad_norm": 0.058564167469739914, "learning_rate": 1.3144977204420427e-05, "loss": 1.4459, "step": 20918 }, { "epoch": 0.930149837712863, "grad_norm": 0.05728235840797424, "learning_rate": 1.3111697568203574e-05, "loss": 1.4465, "step": 20920 }, { "epoch": 0.9302387621715352, "grad_norm": 0.05898916721343994, "learning_rate": 1.307845955334447e-05, "loss": 1.4465, "step": 20922 }, { "epoch": 0.9303276866302076, "grad_norm": 0.05832632631063461, "learning_rate": 1.304526316268445e-05, "loss": 1.4451, "step": 20924 }, { "epoch": 0.93041661108888, "grad_norm": 0.057864874601364136, "learning_rate": 1.301210839906125e-05, "loss": 1.4472, "step": 20926 }, { "epoch": 0.9305055355475523, "grad_norm": 0.05715201422572136, "learning_rate": 1.2978995265309158e-05, "loss": 1.4447, "step": 20928 }, { "epoch": 0.9305944600062247, "grad_norm": 0.05769526958465576, "learning_rate": 1.2945923764258738e-05, "loss": 1.4445, "step": 20930 }, { "epoch": 0.9306833844648971, "grad_norm": 0.05778975784778595, "learning_rate": 1.2912893898737177e-05, "loss": 1.4497, "step": 20932 }, { "epoch": 0.9307723089235694, "grad_norm": 0.05751507729291916, "learning_rate": 1.2879905671567938e-05, "loss": 1.4536, "step": 20934 }, { "epoch": 0.9308612333822418, "grad_norm": 0.05899496749043465, "learning_rate": 1.2846959085570985e-05, "loss": 1.4458, "step": 20936 }, { "epoch": 0.9309501578409142, "grad_norm": 0.05783265456557274, "learning_rate": 1.2814054143562848e-05, "loss": 1.4489, "step": 20938 }, { "epoch": 0.9310390822995865, "grad_norm": 0.05845189839601517, "learning_rate": 1.2781190848356328e-05, "loss": 1.4459, "step": 20940 }, { "epoch": 0.9311280067582589, "grad_norm": 0.05726044625043869, "learning_rate": 1.2748369202760678e-05, "loss": 1.4408, "step": 20942 }, { "epoch": 0.9312169312169312, "grad_norm": 0.058978818356990814, "learning_rate": 1.2715589209581713e-05, "loss": 1.4472, "step": 20944 }, { "epoch": 0.9313058556756035, "grad_norm": 0.05756903067231178, "learning_rate": 1.2682850871621521e-05, "loss": 1.4468, "step": 20946 }, { "epoch": 0.9313947801342759, "grad_norm": 0.05813246965408325, "learning_rate": 1.2650154191678864e-05, "loss": 1.4485, "step": 20948 }, { "epoch": 0.9314837045929483, "grad_norm": 0.05751319229602814, "learning_rate": 1.2617499172548619e-05, "loss": 1.449, "step": 20950 }, { "epoch": 0.9315726290516206, "grad_norm": 0.05843638256192207, "learning_rate": 1.2584885817022385e-05, "loss": 1.4567, "step": 20952 }, { "epoch": 0.931661553510293, "grad_norm": 0.05761188268661499, "learning_rate": 1.2552314127888153e-05, "loss": 1.4446, "step": 20954 }, { "epoch": 0.9317504779689654, "grad_norm": 0.05716591700911522, "learning_rate": 1.2519784107930144e-05, "loss": 1.4493, "step": 20956 }, { "epoch": 0.9318394024276377, "grad_norm": 0.05824635922908783, "learning_rate": 1.248729575992924e-05, "loss": 1.4445, "step": 20958 }, { "epoch": 0.9319283268863101, "grad_norm": 0.05802621319890022, "learning_rate": 1.2454849086662723e-05, "loss": 1.4477, "step": 20960 }, { "epoch": 0.9320172513449825, "grad_norm": 0.05747433751821518, "learning_rate": 1.242244409090426e-05, "loss": 1.4448, "step": 20962 }, { "epoch": 0.9321061758036548, "grad_norm": 0.05807986482977867, "learning_rate": 1.2390080775423917e-05, "loss": 1.4437, "step": 20964 }, { "epoch": 0.9321951002623271, "grad_norm": 0.05921353027224541, "learning_rate": 1.2357759142988367e-05, "loss": 1.4476, "step": 20966 }, { "epoch": 0.9322840247209995, "grad_norm": 0.05769893899559975, "learning_rate": 1.2325479196360457e-05, "loss": 1.4546, "step": 20968 }, { "epoch": 0.9323729491796718, "grad_norm": 0.058186814188957214, "learning_rate": 1.229324093829981e-05, "loss": 1.447, "step": 20970 }, { "epoch": 0.9324618736383442, "grad_norm": 0.057806309312582016, "learning_rate": 1.2261044371562058e-05, "loss": 1.4412, "step": 20972 }, { "epoch": 0.9325507980970166, "grad_norm": 0.05800100043416023, "learning_rate": 1.2228889498899664e-05, "loss": 1.4428, "step": 20974 }, { "epoch": 0.9326397225556889, "grad_norm": 0.057893089950084686, "learning_rate": 1.2196776323061376e-05, "loss": 1.4504, "step": 20976 }, { "epoch": 0.9327286470143613, "grad_norm": 0.05721002444624901, "learning_rate": 1.2164704846792275e-05, "loss": 1.4393, "step": 20978 }, { "epoch": 0.9328175714730337, "grad_norm": 0.05754590034484863, "learning_rate": 1.2132675072834054e-05, "loss": 1.4513, "step": 20980 }, { "epoch": 0.932906495931706, "grad_norm": 0.05840574577450752, "learning_rate": 1.2100687003924748e-05, "loss": 1.4451, "step": 20982 }, { "epoch": 0.9329954203903784, "grad_norm": 0.058282025158405304, "learning_rate": 1.2068740642798782e-05, "loss": 1.4451, "step": 20984 }, { "epoch": 0.9330843448490508, "grad_norm": 0.05762806534767151, "learning_rate": 1.2036835992187078e-05, "loss": 1.4521, "step": 20986 }, { "epoch": 0.933173269307723, "grad_norm": 0.058149222284555435, "learning_rate": 1.2004973054817125e-05, "loss": 1.4418, "step": 20988 }, { "epoch": 0.9332621937663954, "grad_norm": 0.057563796639442444, "learning_rate": 1.1973151833412466e-05, "loss": 1.4417, "step": 20990 }, { "epoch": 0.9333511182250678, "grad_norm": 0.05817318707704544, "learning_rate": 1.194137233069359e-05, "loss": 1.4438, "step": 20992 }, { "epoch": 0.9334400426837401, "grad_norm": 0.05729508027434349, "learning_rate": 1.1909634549376935e-05, "loss": 1.4476, "step": 20994 }, { "epoch": 0.9335289671424125, "grad_norm": 0.05712838098406792, "learning_rate": 1.187793849217561e-05, "loss": 1.4403, "step": 20996 }, { "epoch": 0.9336178916010849, "grad_norm": 0.05793168023228645, "learning_rate": 1.1846284161799281e-05, "loss": 1.4466, "step": 20998 }, { "epoch": 0.9337068160597572, "grad_norm": 0.057333990931510925, "learning_rate": 1.1814671560953783e-05, "loss": 1.4399, "step": 21000 }, { "epoch": 0.9337068160597572, "eval_loss": 1.4323413372039795, "eval_runtime": 12.424, "eval_samples_per_second": 556.179, "eval_steps_per_second": 69.543, "step": 21000 }, { "epoch": 0.9337957405184296, "grad_norm": 0.05756537243723869, "learning_rate": 1.1783100692341508e-05, "loss": 1.4479, "step": 21002 }, { "epoch": 0.933884664977102, "grad_norm": 0.0578317753970623, "learning_rate": 1.17515715586613e-05, "loss": 1.4463, "step": 21004 }, { "epoch": 0.9339735894357744, "grad_norm": 0.056406132876873016, "learning_rate": 1.172008416260839e-05, "loss": 1.4472, "step": 21006 }, { "epoch": 0.9340625138944467, "grad_norm": 0.0582900270819664, "learning_rate": 1.1688638506874405e-05, "loss": 1.452, "step": 21008 }, { "epoch": 0.934151438353119, "grad_norm": 0.05787406116724014, "learning_rate": 1.1657234594147636e-05, "loss": 1.4507, "step": 21010 }, { "epoch": 0.9342403628117913, "grad_norm": 0.05701528117060661, "learning_rate": 1.1625872427112439e-05, "loss": 1.4506, "step": 21012 }, { "epoch": 0.9343292872704637, "grad_norm": 0.05795227363705635, "learning_rate": 1.1594552008449888e-05, "loss": 1.4454, "step": 21014 }, { "epoch": 0.9344182117291361, "grad_norm": 0.05739319324493408, "learning_rate": 1.1563273340837289e-05, "loss": 1.451, "step": 21016 }, { "epoch": 0.9345071361878085, "grad_norm": 0.05824971944093704, "learning_rate": 1.1532036426948667e-05, "loss": 1.4479, "step": 21018 }, { "epoch": 0.9345960606464808, "grad_norm": 0.05758555978536606, "learning_rate": 1.1500841269454166e-05, "loss": 1.4407, "step": 21020 }, { "epoch": 0.9346849851051532, "grad_norm": 0.05768615007400513, "learning_rate": 1.1469687871020428e-05, "loss": 1.4472, "step": 21022 }, { "epoch": 0.9347739095638256, "grad_norm": 0.057626668363809586, "learning_rate": 1.143857623431066e-05, "loss": 1.4431, "step": 21024 }, { "epoch": 0.9348628340224979, "grad_norm": 0.057193391025066376, "learning_rate": 1.1407506361984509e-05, "loss": 1.4451, "step": 21026 }, { "epoch": 0.9349517584811703, "grad_norm": 0.05753851309418678, "learning_rate": 1.1376478256697797e-05, "loss": 1.4413, "step": 21028 }, { "epoch": 0.9350406829398425, "grad_norm": 0.05880241096019745, "learning_rate": 1.1345491921102957e-05, "loss": 1.4487, "step": 21030 }, { "epoch": 0.9351296073985149, "grad_norm": 0.057462744414806366, "learning_rate": 1.1314547357849037e-05, "loss": 1.4432, "step": 21032 }, { "epoch": 0.9352185318571873, "grad_norm": 0.05817759782075882, "learning_rate": 1.1283644569581087e-05, "loss": 1.4497, "step": 21034 }, { "epoch": 0.9353074563158597, "grad_norm": 0.058256879448890686, "learning_rate": 1.1252783558940882e-05, "loss": 1.4495, "step": 21036 }, { "epoch": 0.935396380774532, "grad_norm": 0.05921512097120285, "learning_rate": 1.1221964328566647e-05, "loss": 1.444, "step": 21038 }, { "epoch": 0.9354853052332044, "grad_norm": 0.05717106908559799, "learning_rate": 1.119118688109283e-05, "loss": 1.4431, "step": 21040 }, { "epoch": 0.9355742296918768, "grad_norm": 0.058333754539489746, "learning_rate": 1.1160451219150548e-05, "loss": 1.4481, "step": 21042 }, { "epoch": 0.9356631541505491, "grad_norm": 0.05773552879691124, "learning_rate": 1.112975734536703e-05, "loss": 1.4508, "step": 21044 }, { "epoch": 0.9357520786092215, "grad_norm": 0.058473654091358185, "learning_rate": 1.1099105262366294e-05, "loss": 1.452, "step": 21046 }, { "epoch": 0.9358410030678939, "grad_norm": 0.057561490684747696, "learning_rate": 1.1068494972768572e-05, "loss": 1.4452, "step": 21048 }, { "epoch": 0.9359299275265662, "grad_norm": 0.05912725254893303, "learning_rate": 1.1037926479190497e-05, "loss": 1.4493, "step": 21050 }, { "epoch": 0.9360188519852385, "grad_norm": 0.05738065019249916, "learning_rate": 1.1007399784245308e-05, "loss": 1.4438, "step": 21052 }, { "epoch": 0.9361077764439109, "grad_norm": 0.05802926421165466, "learning_rate": 1.0976914890542422e-05, "loss": 1.4481, "step": 21054 }, { "epoch": 0.9361967009025832, "grad_norm": 0.05792355164885521, "learning_rate": 1.0946471800688029e-05, "loss": 1.4466, "step": 21056 }, { "epoch": 0.9362856253612556, "grad_norm": 0.059775542467832565, "learning_rate": 1.0916070517284327e-05, "loss": 1.4437, "step": 21058 }, { "epoch": 0.936374549819928, "grad_norm": 0.058133359998464584, "learning_rate": 1.0885711042930235e-05, "loss": 1.4453, "step": 21060 }, { "epoch": 0.9364634742786003, "grad_norm": 0.05763556808233261, "learning_rate": 1.0855393380221068e-05, "loss": 1.4554, "step": 21062 }, { "epoch": 0.9365523987372727, "grad_norm": 0.058549582958221436, "learning_rate": 1.0825117531748474e-05, "loss": 1.447, "step": 21064 }, { "epoch": 0.9366413231959451, "grad_norm": 0.057294197380542755, "learning_rate": 1.0794883500100494e-05, "loss": 1.4436, "step": 21066 }, { "epoch": 0.9367302476546174, "grad_norm": 0.05782110616564751, "learning_rate": 1.0764691287861783e-05, "loss": 1.4539, "step": 21068 }, { "epoch": 0.9368191721132898, "grad_norm": 0.05737898498773575, "learning_rate": 1.0734540897613221e-05, "loss": 1.4546, "step": 21070 }, { "epoch": 0.9369080965719622, "grad_norm": 0.05745162069797516, "learning_rate": 1.0704432331932245e-05, "loss": 1.4459, "step": 21072 }, { "epoch": 0.9369970210306344, "grad_norm": 0.05851050093770027, "learning_rate": 1.0674365593392633e-05, "loss": 1.4427, "step": 21074 }, { "epoch": 0.9370859454893068, "grad_norm": 0.058115605264902115, "learning_rate": 1.0644340684564657e-05, "loss": 1.442, "step": 21076 }, { "epoch": 0.9371748699479792, "grad_norm": 0.05693921446800232, "learning_rate": 1.061435760801499e-05, "loss": 1.4476, "step": 21078 }, { "epoch": 0.9372637944066515, "grad_norm": 0.05739055946469307, "learning_rate": 1.0584416366306581e-05, "loss": 1.4492, "step": 21080 }, { "epoch": 0.9373527188653239, "grad_norm": 0.05716922506690025, "learning_rate": 1.0554516961999105e-05, "loss": 1.4438, "step": 21082 }, { "epoch": 0.9374416433239963, "grad_norm": 0.05674619972705841, "learning_rate": 1.0524659397648462e-05, "loss": 1.4482, "step": 21084 }, { "epoch": 0.9375305677826686, "grad_norm": 0.058578867465257645, "learning_rate": 1.0494843675806942e-05, "loss": 1.4419, "step": 21086 }, { "epoch": 0.937619492241341, "grad_norm": 0.05818881839513779, "learning_rate": 1.0465069799023397e-05, "loss": 1.4496, "step": 21088 }, { "epoch": 0.9377084167000134, "grad_norm": 0.058019597083330154, "learning_rate": 1.0435337769843012e-05, "loss": 1.4423, "step": 21090 }, { "epoch": 0.9377973411586857, "grad_norm": 0.05781329795718193, "learning_rate": 1.0405647590807366e-05, "loss": 1.4467, "step": 21092 }, { "epoch": 0.9378862656173581, "grad_norm": 0.05810556560754776, "learning_rate": 1.0375999264454483e-05, "loss": 1.4453, "step": 21094 }, { "epoch": 0.9379751900760304, "grad_norm": 0.05800723284482956, "learning_rate": 1.0346392793319005e-05, "loss": 1.442, "step": 21096 }, { "epoch": 0.9380641145347027, "grad_norm": 0.0583004355430603, "learning_rate": 1.031682817993157e-05, "loss": 1.447, "step": 21098 }, { "epoch": 0.9381530389933751, "grad_norm": 0.05934945121407509, "learning_rate": 1.0287305426819771e-05, "loss": 1.4457, "step": 21100 }, { "epoch": 0.9382419634520475, "grad_norm": 0.05703364312648773, "learning_rate": 1.0257824536507088e-05, "loss": 1.4435, "step": 21102 }, { "epoch": 0.9383308879107198, "grad_norm": 0.057916540652513504, "learning_rate": 1.0228385511513782e-05, "loss": 1.4447, "step": 21104 }, { "epoch": 0.9384198123693922, "grad_norm": 0.05742233991622925, "learning_rate": 1.0198988354356509e-05, "loss": 1.442, "step": 21106 }, { "epoch": 0.9385087368280646, "grad_norm": 0.05896897241473198, "learning_rate": 1.0169633067548145e-05, "loss": 1.4476, "step": 21108 }, { "epoch": 0.9385976612867369, "grad_norm": 0.05869178846478462, "learning_rate": 1.0140319653598185e-05, "loss": 1.4472, "step": 21110 }, { "epoch": 0.9386865857454093, "grad_norm": 0.05712299793958664, "learning_rate": 1.0111048115012456e-05, "loss": 1.4457, "step": 21112 }, { "epoch": 0.9387755102040817, "grad_norm": 0.05764850974082947, "learning_rate": 1.0081818454293124e-05, "loss": 1.4435, "step": 21114 }, { "epoch": 0.938864434662754, "grad_norm": 0.05720280855894089, "learning_rate": 1.0052630673939023e-05, "loss": 1.4491, "step": 21116 }, { "epoch": 0.9389533591214263, "grad_norm": 0.058444440364837646, "learning_rate": 1.0023484776445158e-05, "loss": 1.4496, "step": 21118 }, { "epoch": 0.9390422835800987, "grad_norm": 0.0576569139957428, "learning_rate": 9.994380764303091e-06, "loss": 1.4523, "step": 21120 }, { "epoch": 0.939131208038771, "grad_norm": 0.05701880529522896, "learning_rate": 9.96531864000072e-06, "loss": 1.4496, "step": 21122 }, { "epoch": 0.9392201324974434, "grad_norm": 0.05704936385154724, "learning_rate": 9.936298406022393e-06, "loss": 1.4445, "step": 21124 }, { "epoch": 0.9393090569561158, "grad_norm": 0.05782592296600342, "learning_rate": 9.907320064848956e-06, "loss": 1.4462, "step": 21126 }, { "epoch": 0.9393979814147881, "grad_norm": 0.05813451111316681, "learning_rate": 9.878383618957599e-06, "loss": 1.4475, "step": 21128 }, { "epoch": 0.9394869058734605, "grad_norm": 0.05756333842873573, "learning_rate": 9.849489070821893e-06, "loss": 1.4443, "step": 21130 }, { "epoch": 0.9395758303321329, "grad_norm": 0.05767510086297989, "learning_rate": 9.820636422911811e-06, "loss": 1.4527, "step": 21132 }, { "epoch": 0.9396647547908052, "grad_norm": 0.057892054319381714, "learning_rate": 9.791825677693988e-06, "loss": 1.4458, "step": 21134 }, { "epoch": 0.9397536792494776, "grad_norm": 0.057954948395490646, "learning_rate": 9.763056837631123e-06, "loss": 1.4412, "step": 21136 }, { "epoch": 0.93984260370815, "grad_norm": 0.05756092816591263, "learning_rate": 9.734329905182527e-06, "loss": 1.4492, "step": 21138 }, { "epoch": 0.9399315281668222, "grad_norm": 0.05734538286924362, "learning_rate": 9.705644882803954e-06, "loss": 1.4451, "step": 21140 }, { "epoch": 0.9400204526254946, "grad_norm": 0.057080693542957306, "learning_rate": 9.677001772947614e-06, "loss": 1.4529, "step": 21142 }, { "epoch": 0.940109377084167, "grad_norm": 0.057391393929719925, "learning_rate": 9.648400578061822e-06, "loss": 1.4459, "step": 21144 }, { "epoch": 0.9401983015428393, "grad_norm": 0.0573163703083992, "learning_rate": 9.619841300591736e-06, "loss": 1.4446, "step": 21146 }, { "epoch": 0.9402872260015117, "grad_norm": 0.058304496109485626, "learning_rate": 9.591323942978624e-06, "loss": 1.4483, "step": 21148 }, { "epoch": 0.9403761504601841, "grad_norm": 0.059254664927721024, "learning_rate": 9.562848507660316e-06, "loss": 1.4455, "step": 21150 }, { "epoch": 0.9404650749188564, "grad_norm": 0.05732493847608566, "learning_rate": 9.534414997070973e-06, "loss": 1.4498, "step": 21152 }, { "epoch": 0.9405539993775288, "grad_norm": 0.05808182805776596, "learning_rate": 9.506023413641263e-06, "loss": 1.4464, "step": 21154 }, { "epoch": 0.9406429238362012, "grad_norm": 0.05795228108763695, "learning_rate": 9.477673759798188e-06, "loss": 1.444, "step": 21156 }, { "epoch": 0.9407318482948736, "grad_norm": 0.0579368956387043, "learning_rate": 9.449366037965313e-06, "loss": 1.4475, "step": 21158 }, { "epoch": 0.9408207727535458, "grad_norm": 0.05821996554732323, "learning_rate": 9.421100250562309e-06, "loss": 1.4519, "step": 21160 }, { "epoch": 0.9409096972122182, "grad_norm": 0.058779843151569366, "learning_rate": 9.392876400005579e-06, "loss": 1.4436, "step": 21162 }, { "epoch": 0.9409986216708905, "grad_norm": 0.05720444768667221, "learning_rate": 9.364694488707858e-06, "loss": 1.4545, "step": 21164 }, { "epoch": 0.9410875461295629, "grad_norm": 0.057297226041555405, "learning_rate": 9.336554519078221e-06, "loss": 1.4425, "step": 21166 }, { "epoch": 0.9411764705882353, "grad_norm": 0.057402316480875015, "learning_rate": 9.308456493522133e-06, "loss": 1.4458, "step": 21168 }, { "epoch": 0.9412653950469076, "grad_norm": 0.05686284974217415, "learning_rate": 9.280400414441613e-06, "loss": 1.4437, "step": 21170 }, { "epoch": 0.94135431950558, "grad_norm": 0.05828374624252319, "learning_rate": 9.252386284235026e-06, "loss": 1.4463, "step": 21172 }, { "epoch": 0.9414432439642524, "grad_norm": 0.05804366618394852, "learning_rate": 9.224414105297064e-06, "loss": 1.4455, "step": 21174 }, { "epoch": 0.9415321684229248, "grad_norm": 0.058524325489997864, "learning_rate": 9.19648388001898e-06, "loss": 1.4406, "step": 21176 }, { "epoch": 0.9416210928815971, "grad_norm": 0.05712493881583214, "learning_rate": 9.168595610788365e-06, "loss": 1.445, "step": 21178 }, { "epoch": 0.9417100173402695, "grad_norm": 0.05792125314474106, "learning_rate": 9.140749299989205e-06, "loss": 1.4381, "step": 21180 }, { "epoch": 0.9417989417989417, "grad_norm": 0.05810127779841423, "learning_rate": 9.112944950001978e-06, "loss": 1.4472, "step": 21182 }, { "epoch": 0.9418878662576141, "grad_norm": 0.05826450139284134, "learning_rate": 9.085182563203453e-06, "loss": 1.4468, "step": 21184 }, { "epoch": 0.9419767907162865, "grad_norm": 0.0577828474342823, "learning_rate": 9.057462141966898e-06, "loss": 1.4437, "step": 21186 }, { "epoch": 0.9420657151749589, "grad_norm": 0.05752667412161827, "learning_rate": 9.02978368866203e-06, "loss": 1.4479, "step": 21188 }, { "epoch": 0.9421546396336312, "grad_norm": 0.05732182040810585, "learning_rate": 9.002147205654843e-06, "loss": 1.4476, "step": 21190 }, { "epoch": 0.9422435640923036, "grad_norm": 0.057963550090789795, "learning_rate": 8.974552695307948e-06, "loss": 1.4481, "step": 21192 }, { "epoch": 0.942332488550976, "grad_norm": 0.05983150750398636, "learning_rate": 8.947000159980067e-06, "loss": 1.4485, "step": 21194 }, { "epoch": 0.9424214130096483, "grad_norm": 0.05860733240842819, "learning_rate": 8.919489602026653e-06, "loss": 1.4393, "step": 21196 }, { "epoch": 0.9425103374683207, "grad_norm": 0.05784778296947479, "learning_rate": 8.892021023799435e-06, "loss": 1.4464, "step": 21198 }, { "epoch": 0.9425992619269931, "grad_norm": 0.05762090906500816, "learning_rate": 8.864594427646478e-06, "loss": 1.4431, "step": 21200 }, { "epoch": 0.9426881863856654, "grad_norm": 0.0579993836581707, "learning_rate": 8.837209815912295e-06, "loss": 1.4431, "step": 21202 }, { "epoch": 0.9427771108443377, "grad_norm": 0.0585738830268383, "learning_rate": 8.809867190938014e-06, "loss": 1.4496, "step": 21204 }, { "epoch": 0.94286603530301, "grad_norm": 0.05800570920109749, "learning_rate": 8.782566555060822e-06, "loss": 1.4428, "step": 21206 }, { "epoch": 0.9429549597616824, "grad_norm": 0.058509331196546555, "learning_rate": 8.755307910614574e-06, "loss": 1.4455, "step": 21208 }, { "epoch": 0.9430438842203548, "grad_norm": 0.05728688836097717, "learning_rate": 8.728091259929404e-06, "loss": 1.4459, "step": 21210 }, { "epoch": 0.9431328086790272, "grad_norm": 0.057143598794937134, "learning_rate": 8.70091660533201e-06, "loss": 1.4493, "step": 21212 }, { "epoch": 0.9432217331376995, "grad_norm": 0.05791422724723816, "learning_rate": 8.673783949145364e-06, "loss": 1.4482, "step": 21214 }, { "epoch": 0.9433106575963719, "grad_norm": 0.05727590247988701, "learning_rate": 8.646693293688834e-06, "loss": 1.4514, "step": 21216 }, { "epoch": 0.9433995820550443, "grad_norm": 0.05757603794336319, "learning_rate": 8.61964464127829e-06, "loss": 1.4526, "step": 21218 }, { "epoch": 0.9434885065137166, "grad_norm": 0.057514291256666183, "learning_rate": 8.592637994225994e-06, "loss": 1.4421, "step": 21220 }, { "epoch": 0.943577430972389, "grad_norm": 0.05774332210421562, "learning_rate": 8.565673354840543e-06, "loss": 1.4437, "step": 21222 }, { "epoch": 0.9436663554310614, "grad_norm": 0.05754677578806877, "learning_rate": 8.53875072542698e-06, "loss": 1.4513, "step": 21224 }, { "epoch": 0.9437552798897336, "grad_norm": 0.05838412791490555, "learning_rate": 8.511870108286857e-06, "loss": 1.449, "step": 21226 }, { "epoch": 0.943844204348406, "grad_norm": 0.05649451166391373, "learning_rate": 8.485031505718e-06, "loss": 1.4425, "step": 21228 }, { "epoch": 0.9439331288070784, "grad_norm": 0.05882545933127403, "learning_rate": 8.458234920014684e-06, "loss": 1.4436, "step": 21230 }, { "epoch": 0.9440220532657507, "grad_norm": 0.05716662108898163, "learning_rate": 8.431480353467524e-06, "loss": 1.4402, "step": 21232 }, { "epoch": 0.9441109777244231, "grad_norm": 0.05844126269221306, "learning_rate": 8.404767808363744e-06, "loss": 1.4457, "step": 21234 }, { "epoch": 0.9441999021830955, "grad_norm": 0.05845796316862106, "learning_rate": 8.378097286986852e-06, "loss": 1.4508, "step": 21236 }, { "epoch": 0.9442888266417678, "grad_norm": 0.059185273945331573, "learning_rate": 8.351468791616634e-06, "loss": 1.4452, "step": 21238 }, { "epoch": 0.9443777511004402, "grad_norm": 0.05794282630085945, "learning_rate": 8.324882324529548e-06, "loss": 1.4503, "step": 21240 }, { "epoch": 0.9444666755591126, "grad_norm": 0.05867988243699074, "learning_rate": 8.29833788799822e-06, "loss": 1.4503, "step": 21242 }, { "epoch": 0.9445556000177849, "grad_norm": 0.05754483863711357, "learning_rate": 8.271835484291835e-06, "loss": 1.4445, "step": 21244 }, { "epoch": 0.9446445244764573, "grad_norm": 0.05763980373740196, "learning_rate": 8.245375115675912e-06, "loss": 1.4464, "step": 21246 }, { "epoch": 0.9447334489351296, "grad_norm": 0.05797286331653595, "learning_rate": 8.218956784412479e-06, "loss": 1.448, "step": 21248 }, { "epoch": 0.9448223733938019, "grad_norm": 0.05742442235350609, "learning_rate": 8.192580492759783e-06, "loss": 1.4453, "step": 21250 }, { "epoch": 0.9449112978524743, "grad_norm": 0.05741652473807335, "learning_rate": 8.16624624297263e-06, "loss": 1.4499, "step": 21252 }, { "epoch": 0.9450002223111467, "grad_norm": 0.058388851583004, "learning_rate": 8.139954037302222e-06, "loss": 1.4424, "step": 21254 }, { "epoch": 0.945089146769819, "grad_norm": 0.058313120156526566, "learning_rate": 8.113703877996092e-06, "loss": 1.4426, "step": 21256 }, { "epoch": 0.9451780712284914, "grad_norm": 0.05811810865998268, "learning_rate": 8.087495767298226e-06, "loss": 1.441, "step": 21258 }, { "epoch": 0.9452669956871638, "grad_norm": 0.057785093784332275, "learning_rate": 8.061329707448939e-06, "loss": 1.4496, "step": 21260 }, { "epoch": 0.9453559201458361, "grad_norm": 0.05778678134083748, "learning_rate": 8.035205700685167e-06, "loss": 1.4433, "step": 21262 }, { "epoch": 0.9454448446045085, "grad_norm": 0.057463571429252625, "learning_rate": 8.00912374924001e-06, "loss": 1.4445, "step": 21264 }, { "epoch": 0.9455337690631809, "grad_norm": 0.058068107813596725, "learning_rate": 7.983083855343132e-06, "loss": 1.4452, "step": 21266 }, { "epoch": 0.9456226935218531, "grad_norm": 0.05680304020643234, "learning_rate": 7.957086021220417e-06, "loss": 1.4465, "step": 21268 }, { "epoch": 0.9457116179805255, "grad_norm": 0.057669758796691895, "learning_rate": 7.931130249094365e-06, "loss": 1.4493, "step": 21270 }, { "epoch": 0.9458005424391979, "grad_norm": 0.056808967143297195, "learning_rate": 7.905216541183869e-06, "loss": 1.4448, "step": 21272 }, { "epoch": 0.9458894668978702, "grad_norm": 0.05728383734822273, "learning_rate": 7.879344899703932e-06, "loss": 1.4457, "step": 21274 }, { "epoch": 0.9459783913565426, "grad_norm": 0.05765995383262634, "learning_rate": 7.853515326866345e-06, "loss": 1.4424, "step": 21276 }, { "epoch": 0.946067315815215, "grad_norm": 0.0581035315990448, "learning_rate": 7.827727824879116e-06, "loss": 1.443, "step": 21278 }, { "epoch": 0.9461562402738873, "grad_norm": 0.05767706036567688, "learning_rate": 7.801982395946649e-06, "loss": 1.4423, "step": 21280 }, { "epoch": 0.9462451647325597, "grad_norm": 0.057066384702920914, "learning_rate": 7.776279042269685e-06, "loss": 1.4482, "step": 21282 }, { "epoch": 0.9463340891912321, "grad_norm": 0.058075472712516785, "learning_rate": 7.750617766045688e-06, "loss": 1.4452, "step": 21284 }, { "epoch": 0.9464230136499044, "grad_norm": 0.05739322304725647, "learning_rate": 7.724998569468066e-06, "loss": 1.4477, "step": 21286 }, { "epoch": 0.9465119381085768, "grad_norm": 0.05727776512503624, "learning_rate": 7.699421454726963e-06, "loss": 1.447, "step": 21288 }, { "epoch": 0.9466008625672491, "grad_norm": 0.057564686983823776, "learning_rate": 7.67388642400879e-06, "loss": 1.4455, "step": 21290 }, { "epoch": 0.9466897870259214, "grad_norm": 0.05791555717587471, "learning_rate": 7.648393479496419e-06, "loss": 1.4429, "step": 21292 }, { "epoch": 0.9467787114845938, "grad_norm": 0.057518370449543, "learning_rate": 7.622942623369156e-06, "loss": 1.4435, "step": 21294 }, { "epoch": 0.9468676359432662, "grad_norm": 0.05689391866326332, "learning_rate": 7.597533857802541e-06, "loss": 1.4461, "step": 21296 }, { "epoch": 0.9469565604019385, "grad_norm": 0.058041494339704514, "learning_rate": 7.57216718496867e-06, "loss": 1.4456, "step": 21298 }, { "epoch": 0.9470454848606109, "grad_norm": 0.057841625064611435, "learning_rate": 7.5468426070360865e-06, "loss": 1.4501, "step": 21300 }, { "epoch": 0.9471344093192833, "grad_norm": 0.057104241102933884, "learning_rate": 7.521560126169502e-06, "loss": 1.4433, "step": 21302 }, { "epoch": 0.9472233337779556, "grad_norm": 0.058944664895534515, "learning_rate": 7.4963197445302445e-06, "loss": 1.4469, "step": 21304 }, { "epoch": 0.947312258236628, "grad_norm": 0.05764506757259369, "learning_rate": 7.471121464275976e-06, "loss": 1.4485, "step": 21306 }, { "epoch": 0.9474011826953004, "grad_norm": 0.05761958658695221, "learning_rate": 7.445965287560752e-06, "loss": 1.4491, "step": 21308 }, { "epoch": 0.9474901071539727, "grad_norm": 0.0575152151286602, "learning_rate": 7.420851216535019e-06, "loss": 1.4428, "step": 21310 }, { "epoch": 0.947579031612645, "grad_norm": 0.058228038251399994, "learning_rate": 7.3957792533456695e-06, "loss": 1.4463, "step": 21312 }, { "epoch": 0.9476679560713174, "grad_norm": 0.057448986917734146, "learning_rate": 7.3707494001358785e-06, "loss": 1.4482, "step": 21314 }, { "epoch": 0.9477568805299897, "grad_norm": 0.05910767614841461, "learning_rate": 7.3457616590454916e-06, "loss": 1.445, "step": 21316 }, { "epoch": 0.9478458049886621, "grad_norm": 0.057866428047418594, "learning_rate": 7.320816032210353e-06, "loss": 1.4464, "step": 21318 }, { "epoch": 0.9479347294473345, "grad_norm": 0.05850965157151222, "learning_rate": 7.295912521763038e-06, "loss": 1.4437, "step": 21320 }, { "epoch": 0.9480236539060068, "grad_norm": 0.057498205453157425, "learning_rate": 7.271051129832451e-06, "loss": 1.449, "step": 21322 }, { "epoch": 0.9481125783646792, "grad_norm": 0.057792823761701584, "learning_rate": 7.246231858543784e-06, "loss": 1.4393, "step": 21324 }, { "epoch": 0.9482015028233516, "grad_norm": 0.058098308742046356, "learning_rate": 7.221454710018671e-06, "loss": 1.4419, "step": 21326 }, { "epoch": 0.948290427282024, "grad_norm": 0.058741819113492966, "learning_rate": 7.196719686375308e-06, "loss": 1.4463, "step": 21328 }, { "epoch": 0.9483793517406963, "grad_norm": 0.0579305998980999, "learning_rate": 7.1720267897279475e-06, "loss": 1.4438, "step": 21330 }, { "epoch": 0.9484682761993687, "grad_norm": 0.05758350342512131, "learning_rate": 7.147376022187624e-06, "loss": 1.4539, "step": 21332 }, { "epoch": 0.9485572006580409, "grad_norm": 0.05874762684106827, "learning_rate": 7.122767385861484e-06, "loss": 1.4462, "step": 21334 }, { "epoch": 0.9486461251167133, "grad_norm": 0.05720106512308121, "learning_rate": 7.09820088285329e-06, "loss": 1.4421, "step": 21336 }, { "epoch": 0.9487350495753857, "grad_norm": 0.057676248252391815, "learning_rate": 7.073676515263028e-06, "loss": 1.4495, "step": 21338 }, { "epoch": 0.948823974034058, "grad_norm": 0.05915232002735138, "learning_rate": 7.049194285187077e-06, "loss": 1.4458, "step": 21340 }, { "epoch": 0.9489128984927304, "grad_norm": 0.05813376605510712, "learning_rate": 7.02475419471843e-06, "loss": 1.4398, "step": 21342 }, { "epoch": 0.9490018229514028, "grad_norm": 0.057365693151950836, "learning_rate": 7.000356245946249e-06, "loss": 1.4464, "step": 21344 }, { "epoch": 0.9490907474100752, "grad_norm": 0.05754502862691879, "learning_rate": 6.976000440956198e-06, "loss": 1.4493, "step": 21346 }, { "epoch": 0.9491796718687475, "grad_norm": 0.058054231107234955, "learning_rate": 6.9516867818302796e-06, "loss": 1.4469, "step": 21348 }, { "epoch": 0.9492685963274199, "grad_norm": 0.05699177458882332, "learning_rate": 6.927415270647053e-06, "loss": 1.4509, "step": 21350 }, { "epoch": 0.9493575207860923, "grad_norm": 0.058153219521045685, "learning_rate": 6.903185909481191e-06, "loss": 1.4432, "step": 21352 }, { "epoch": 0.9494464452447646, "grad_norm": 0.05815225467085838, "learning_rate": 6.878998700403982e-06, "loss": 1.439, "step": 21354 }, { "epoch": 0.9495353697034369, "grad_norm": 0.05740995332598686, "learning_rate": 6.854853645483106e-06, "loss": 1.4472, "step": 21356 }, { "epoch": 0.9496242941621093, "grad_norm": 0.05770250782370567, "learning_rate": 6.830750746782633e-06, "loss": 1.4437, "step": 21358 }, { "epoch": 0.9497132186207816, "grad_norm": 0.05776236578822136, "learning_rate": 6.806690006362859e-06, "loss": 1.4447, "step": 21360 }, { "epoch": 0.949802143079454, "grad_norm": 0.057613179087638855, "learning_rate": 6.7826714262806396e-06, "loss": 1.4448, "step": 21362 }, { "epoch": 0.9498910675381264, "grad_norm": 0.0582854337990284, "learning_rate": 6.758695008589221e-06, "loss": 1.4439, "step": 21364 }, { "epoch": 0.9499799919967987, "grad_norm": 0.057772018015384674, "learning_rate": 6.734760755338243e-06, "loss": 1.4434, "step": 21366 }, { "epoch": 0.9500689164554711, "grad_norm": 0.057995546609163284, "learning_rate": 6.7108686685735665e-06, "loss": 1.444, "step": 21368 }, { "epoch": 0.9501578409141435, "grad_norm": 0.05806703120470047, "learning_rate": 6.687018750337726e-06, "loss": 1.4412, "step": 21370 }, { "epoch": 0.9502467653728158, "grad_norm": 0.056987687945365906, "learning_rate": 6.663211002669534e-06, "loss": 1.4426, "step": 21372 }, { "epoch": 0.9503356898314882, "grad_norm": 0.05830015987157822, "learning_rate": 6.639445427604085e-06, "loss": 1.4377, "step": 21374 }, { "epoch": 0.9504246142901606, "grad_norm": 0.057094842195510864, "learning_rate": 6.615722027173032e-06, "loss": 1.4469, "step": 21376 }, { "epoch": 0.9505135387488328, "grad_norm": 0.057993218302726746, "learning_rate": 6.592040803404309e-06, "loss": 1.4415, "step": 21378 }, { "epoch": 0.9506024632075052, "grad_norm": 0.05756436288356781, "learning_rate": 6.5684017583223506e-06, "loss": 1.4454, "step": 21380 }, { "epoch": 0.9506913876661776, "grad_norm": 0.05718113109469414, "learning_rate": 6.544804893947876e-06, "loss": 1.4446, "step": 21382 }, { "epoch": 0.9507803121248499, "grad_norm": 0.05671248957514763, "learning_rate": 6.521250212298046e-06, "loss": 1.4501, "step": 21384 }, { "epoch": 0.9508692365835223, "grad_norm": 0.05659397318959236, "learning_rate": 6.497737715386476e-06, "loss": 1.4458, "step": 21386 }, { "epoch": 0.9509581610421947, "grad_norm": 0.05791900306940079, "learning_rate": 6.474267405223e-06, "loss": 1.4472, "step": 21388 }, { "epoch": 0.951047085500867, "grad_norm": 0.05804454907774925, "learning_rate": 6.450839283814125e-06, "loss": 1.4465, "step": 21390 }, { "epoch": 0.9511360099595394, "grad_norm": 0.05747028812766075, "learning_rate": 6.4274533531624715e-06, "loss": 1.4468, "step": 21392 }, { "epoch": 0.9512249344182118, "grad_norm": 0.058244530111551285, "learning_rate": 6.404109615267218e-06, "loss": 1.4506, "step": 21394 }, { "epoch": 0.9513138588768841, "grad_norm": 0.057229701429605484, "learning_rate": 6.380808072123934e-06, "loss": 1.4488, "step": 21396 }, { "epoch": 0.9514027833355564, "grad_norm": 0.05814434215426445, "learning_rate": 6.357548725724416e-06, "loss": 1.4449, "step": 21398 }, { "epoch": 0.9514917077942288, "grad_norm": 0.057511039078235626, "learning_rate": 6.334331578057018e-06, "loss": 1.4408, "step": 21400 }, { "epoch": 0.9515806322529011, "grad_norm": 0.05873566493391991, "learning_rate": 6.31115663110654e-06, "loss": 1.449, "step": 21402 }, { "epoch": 0.9516695567115735, "grad_norm": 0.05769232288002968, "learning_rate": 6.288023886854011e-06, "loss": 1.4451, "step": 21404 }, { "epoch": 0.9517584811702459, "grad_norm": 0.05827988311648369, "learning_rate": 6.264933347276847e-06, "loss": 1.4442, "step": 21406 }, { "epoch": 0.9518474056289182, "grad_norm": 0.05725441128015518, "learning_rate": 6.2418850143490805e-06, "loss": 1.4455, "step": 21408 }, { "epoch": 0.9519363300875906, "grad_norm": 0.05901114642620087, "learning_rate": 6.218878890040858e-06, "loss": 1.4456, "step": 21410 }, { "epoch": 0.952025254546263, "grad_norm": 0.05790838971734047, "learning_rate": 6.195914976318884e-06, "loss": 1.4424, "step": 21412 }, { "epoch": 0.9521141790049353, "grad_norm": 0.057518161833286285, "learning_rate": 6.17299327514631e-06, "loss": 1.45, "step": 21414 }, { "epoch": 0.9522031034636077, "grad_norm": 0.05830460786819458, "learning_rate": 6.150113788482403e-06, "loss": 1.451, "step": 21416 }, { "epoch": 0.9522920279222801, "grad_norm": 0.057020124047994614, "learning_rate": 6.127276518283153e-06, "loss": 1.4477, "step": 21418 }, { "epoch": 0.9523809523809523, "grad_norm": 0.05747748538851738, "learning_rate": 6.104481466500667e-06, "loss": 1.4502, "step": 21420 }, { "epoch": 0.9524698768396247, "grad_norm": 0.05771753937005997, "learning_rate": 6.081728635083661e-06, "loss": 1.4461, "step": 21422 }, { "epoch": 0.9525588012982971, "grad_norm": 0.057178083807229996, "learning_rate": 6.059018025977137e-06, "loss": 1.4498, "step": 21424 }, { "epoch": 0.9526477257569694, "grad_norm": 0.05818738043308258, "learning_rate": 6.036349641122429e-06, "loss": 1.4535, "step": 21426 }, { "epoch": 0.9527366502156418, "grad_norm": 0.057868827134370804, "learning_rate": 6.013723482457434e-06, "loss": 1.4438, "step": 21428 }, { "epoch": 0.9528255746743142, "grad_norm": 0.05839633569121361, "learning_rate": 5.9911395519162695e-06, "loss": 1.4444, "step": 21430 }, { "epoch": 0.9529144991329865, "grad_norm": 0.05750703439116478, "learning_rate": 5.968597851429502e-06, "loss": 1.4501, "step": 21432 }, { "epoch": 0.9530034235916589, "grad_norm": 0.05813831835985184, "learning_rate": 5.946098382924148e-06, "loss": 1.4435, "step": 21434 }, { "epoch": 0.9530923480503313, "grad_norm": 0.05906336382031441, "learning_rate": 5.9236411483235e-06, "loss": 1.4442, "step": 21436 }, { "epoch": 0.9531812725090036, "grad_norm": 0.05761095881462097, "learning_rate": 5.901226149547356e-06, "loss": 1.4428, "step": 21438 }, { "epoch": 0.953270196967676, "grad_norm": 0.058021754026412964, "learning_rate": 5.8788533885117956e-06, "loss": 1.4458, "step": 21440 }, { "epoch": 0.9533591214263483, "grad_norm": 0.057688046246767044, "learning_rate": 5.856522867129343e-06, "loss": 1.4438, "step": 21442 }, { "epoch": 0.9534480458850206, "grad_norm": 0.05762839689850807, "learning_rate": 5.834234587309028e-06, "loss": 1.4459, "step": 21444 }, { "epoch": 0.953536970343693, "grad_norm": 0.05795801803469658, "learning_rate": 5.811988550955993e-06, "loss": 1.4487, "step": 21446 }, { "epoch": 0.9536258948023654, "grad_norm": 0.0580856017768383, "learning_rate": 5.789784759971994e-06, "loss": 1.4503, "step": 21448 }, { "epoch": 0.9537148192610377, "grad_norm": 0.05720821022987366, "learning_rate": 5.767623216255125e-06, "loss": 1.4477, "step": 21450 }, { "epoch": 0.9538037437197101, "grad_norm": 0.057731300592422485, "learning_rate": 5.745503921699868e-06, "loss": 1.4478, "step": 21452 }, { "epoch": 0.9538926681783825, "grad_norm": 0.05763096734881401, "learning_rate": 5.7234268781969915e-06, "loss": 1.4447, "step": 21454 }, { "epoch": 0.9539815926370548, "grad_norm": 0.057338956743478775, "learning_rate": 5.701392087633761e-06, "loss": 1.445, "step": 21456 }, { "epoch": 0.9540705170957272, "grad_norm": 0.057891324162483215, "learning_rate": 5.679399551893893e-06, "loss": 1.4418, "step": 21458 }, { "epoch": 0.9541594415543996, "grad_norm": 0.05737922713160515, "learning_rate": 5.657449272857385e-06, "loss": 1.4444, "step": 21460 }, { "epoch": 0.954248366013072, "grad_norm": 0.05823858082294464, "learning_rate": 5.635541252400511e-06, "loss": 1.4367, "step": 21462 }, { "epoch": 0.9543372904717442, "grad_norm": 0.05750085040926933, "learning_rate": 5.61367549239622e-06, "loss": 1.4491, "step": 21464 }, { "epoch": 0.9544262149304166, "grad_norm": 0.05752246081829071, "learning_rate": 5.591851994713681e-06, "loss": 1.4429, "step": 21466 }, { "epoch": 0.9545151393890889, "grad_norm": 0.057769421488046646, "learning_rate": 5.5700707612184044e-06, "loss": 1.4498, "step": 21468 }, { "epoch": 0.9546040638477613, "grad_norm": 0.057797472923994064, "learning_rate": 5.548331793772288e-06, "loss": 1.4463, "step": 21470 }, { "epoch": 0.9546929883064337, "grad_norm": 0.057377446442842484, "learning_rate": 5.52663509423379e-06, "loss": 1.4473, "step": 21472 }, { "epoch": 0.954781912765106, "grad_norm": 0.05764816701412201, "learning_rate": 5.504980664457593e-06, "loss": 1.4454, "step": 21474 }, { "epoch": 0.9548708372237784, "grad_norm": 0.05828756093978882, "learning_rate": 5.483368506294828e-06, "loss": 1.4446, "step": 21476 }, { "epoch": 0.9549597616824508, "grad_norm": 0.058148372918367386, "learning_rate": 5.461798621593017e-06, "loss": 1.441, "step": 21478 }, { "epoch": 0.9550486861411231, "grad_norm": 0.05808459222316742, "learning_rate": 5.440271012195963e-06, "loss": 1.4473, "step": 21480 }, { "epoch": 0.9551376105997955, "grad_norm": 0.05690108239650726, "learning_rate": 5.4187856799440275e-06, "loss": 1.4451, "step": 21482 }, { "epoch": 0.9552265350584679, "grad_norm": 0.05813979357481003, "learning_rate": 5.3973426266737955e-06, "loss": 1.4465, "step": 21484 }, { "epoch": 0.9553154595171401, "grad_norm": 0.057522471994161606, "learning_rate": 5.3759418542184134e-06, "loss": 1.4487, "step": 21486 }, { "epoch": 0.9554043839758125, "grad_norm": 0.05860830470919609, "learning_rate": 5.354583364407251e-06, "loss": 1.4444, "step": 21488 }, { "epoch": 0.9554933084344849, "grad_norm": 0.05663066357374191, "learning_rate": 5.333267159066124e-06, "loss": 1.444, "step": 21490 }, { "epoch": 0.9555822328931572, "grad_norm": 0.058597080409526825, "learning_rate": 5.311993240017243e-06, "loss": 1.4472, "step": 21492 }, { "epoch": 0.9556711573518296, "grad_norm": 0.057328660041093826, "learning_rate": 5.290761609079209e-06, "loss": 1.447, "step": 21494 }, { "epoch": 0.955760081810502, "grad_norm": 0.0580228753387928, "learning_rate": 5.269572268066958e-06, "loss": 1.4468, "step": 21496 }, { "epoch": 0.9558490062691744, "grad_norm": 0.05755842849612236, "learning_rate": 5.248425218791874e-06, "loss": 1.4492, "step": 21498 }, { "epoch": 0.9559379307278467, "grad_norm": 0.05705071613192558, "learning_rate": 5.227320463061791e-06, "loss": 1.4469, "step": 21500 }, { "epoch": 0.9559379307278467, "eval_loss": 1.432187557220459, "eval_runtime": 12.4558, "eval_samples_per_second": 554.762, "eval_steps_per_second": 69.365, "step": 21500 }, { "epoch": 0.9560268551865191, "grad_norm": 0.057521793991327286, "learning_rate": 5.206258002680653e-06, "loss": 1.4425, "step": 21502 }, { "epoch": 0.9561157796451915, "grad_norm": 0.05715570226311684, "learning_rate": 5.18523783944913e-06, "loss": 1.4504, "step": 21504 }, { "epoch": 0.9562047041038638, "grad_norm": 0.05779632553458214, "learning_rate": 5.164259975164009e-06, "loss": 1.4429, "step": 21506 }, { "epoch": 0.9562936285625361, "grad_norm": 0.059060875326395035, "learning_rate": 5.143324411618577e-06, "loss": 1.4463, "step": 21508 }, { "epoch": 0.9563825530212084, "grad_norm": 0.05711085721850395, "learning_rate": 5.122431150602624e-06, "loss": 1.4455, "step": 21510 }, { "epoch": 0.9564714774798808, "grad_norm": 0.05797237157821655, "learning_rate": 5.101580193902055e-06, "loss": 1.4448, "step": 21512 }, { "epoch": 0.9565604019385532, "grad_norm": 0.057503245770931244, "learning_rate": 5.080771543299389e-06, "loss": 1.4502, "step": 21514 }, { "epoch": 0.9566493263972256, "grad_norm": 0.05919162929058075, "learning_rate": 5.060005200573425e-06, "loss": 1.4475, "step": 21516 }, { "epoch": 0.9567382508558979, "grad_norm": 0.05886862426996231, "learning_rate": 5.039281167499299e-06, "loss": 1.4526, "step": 21518 }, { "epoch": 0.9568271753145703, "grad_norm": 0.058441340923309326, "learning_rate": 5.018599445848648e-06, "loss": 1.4441, "step": 21520 }, { "epoch": 0.9569160997732427, "grad_norm": 0.05824016034603119, "learning_rate": 4.997960037389449e-06, "loss": 1.4449, "step": 21522 }, { "epoch": 0.957005024231915, "grad_norm": 0.057525213807821274, "learning_rate": 4.97736294388601e-06, "loss": 1.4477, "step": 21524 }, { "epoch": 0.9570939486905874, "grad_norm": 0.058176856487989426, "learning_rate": 4.956808167099092e-06, "loss": 1.4471, "step": 21526 }, { "epoch": 0.9571828731492596, "grad_norm": 0.057681165635585785, "learning_rate": 4.936295708785732e-06, "loss": 1.4497, "step": 21528 }, { "epoch": 0.957271797607932, "grad_norm": 0.05783999338746071, "learning_rate": 4.915825570699584e-06, "loss": 1.4408, "step": 21530 }, { "epoch": 0.9573607220666044, "grad_norm": 0.05777215585112572, "learning_rate": 4.895397754590414e-06, "loss": 1.4461, "step": 21532 }, { "epoch": 0.9574496465252768, "grad_norm": 0.058080073446035385, "learning_rate": 4.875012262204492e-06, "loss": 1.451, "step": 21534 }, { "epoch": 0.9575385709839491, "grad_norm": 0.05700400471687317, "learning_rate": 4.854669095284425e-06, "loss": 1.4511, "step": 21536 }, { "epoch": 0.9576274954426215, "grad_norm": 0.05668550357222557, "learning_rate": 4.834368255569322e-06, "loss": 1.4435, "step": 21538 }, { "epoch": 0.9577164199012939, "grad_norm": 0.05787520855665207, "learning_rate": 4.81410974479457e-06, "loss": 1.4419, "step": 21540 }, { "epoch": 0.9578053443599662, "grad_norm": 0.05773087963461876, "learning_rate": 4.793893564691843e-06, "loss": 1.4458, "step": 21542 }, { "epoch": 0.9578942688186386, "grad_norm": 0.057870473712682724, "learning_rate": 4.773719716989477e-06, "loss": 1.4454, "step": 21544 }, { "epoch": 0.957983193277311, "grad_norm": 0.05800451338291168, "learning_rate": 4.753588203411929e-06, "loss": 1.4423, "step": 21546 }, { "epoch": 0.9580721177359833, "grad_norm": 0.05686897784471512, "learning_rate": 4.7334990256800995e-06, "loss": 1.4462, "step": 21548 }, { "epoch": 0.9581610421946556, "grad_norm": 0.056949593126773834, "learning_rate": 4.7134521855113355e-06, "loss": 1.4418, "step": 21550 }, { "epoch": 0.958249966653328, "grad_norm": 0.05743638426065445, "learning_rate": 4.693447684619379e-06, "loss": 1.4471, "step": 21552 }, { "epoch": 0.9583388911120003, "grad_norm": 0.05902915075421333, "learning_rate": 4.673485524714305e-06, "loss": 1.4437, "step": 21554 }, { "epoch": 0.9584278155706727, "grad_norm": 0.058458685874938965, "learning_rate": 4.653565707502416e-06, "loss": 1.4422, "step": 21556 }, { "epoch": 0.9585167400293451, "grad_norm": 0.05795848369598389, "learning_rate": 4.633688234686739e-06, "loss": 1.4426, "step": 21558 }, { "epoch": 0.9586056644880174, "grad_norm": 0.058213118463754654, "learning_rate": 4.613853107966359e-06, "loss": 1.4477, "step": 21560 }, { "epoch": 0.9586945889466898, "grad_norm": 0.05738683417439461, "learning_rate": 4.59406032903692e-06, "loss": 1.4455, "step": 21562 }, { "epoch": 0.9587835134053622, "grad_norm": 0.057969436049461365, "learning_rate": 4.5743098995903455e-06, "loss": 1.4482, "step": 21564 }, { "epoch": 0.9588724378640345, "grad_norm": 0.05753438174724579, "learning_rate": 4.554601821315063e-06, "loss": 1.4418, "step": 21566 }, { "epoch": 0.9589613623227069, "grad_norm": 0.056868597865104675, "learning_rate": 4.534936095895781e-06, "loss": 1.449, "step": 21568 }, { "epoch": 0.9590502867813793, "grad_norm": 0.057507824152708054, "learning_rate": 4.515312725013598e-06, "loss": 1.4416, "step": 21570 }, { "epoch": 0.9591392112400515, "grad_norm": 0.05689457431435585, "learning_rate": 4.495731710346007e-06, "loss": 1.4416, "step": 21572 }, { "epoch": 0.9592281356987239, "grad_norm": 0.0581645630300045, "learning_rate": 4.47619305356689e-06, "loss": 1.446, "step": 21574 }, { "epoch": 0.9593170601573963, "grad_norm": 0.058234803378582, "learning_rate": 4.4566967563465234e-06, "loss": 1.4436, "step": 21576 }, { "epoch": 0.9594059846160686, "grad_norm": 0.05846071243286133, "learning_rate": 4.4372428203514615e-06, "loss": 1.4399, "step": 21578 }, { "epoch": 0.959494909074741, "grad_norm": 0.057688742876052856, "learning_rate": 4.417831247244819e-06, "loss": 1.4489, "step": 21580 }, { "epoch": 0.9595838335334134, "grad_norm": 0.058130089193582535, "learning_rate": 4.398462038685824e-06, "loss": 1.4435, "step": 21582 }, { "epoch": 0.9596727579920857, "grad_norm": 0.05822238698601723, "learning_rate": 4.3791351963304304e-06, "loss": 1.4503, "step": 21584 }, { "epoch": 0.9597616824507581, "grad_norm": 0.05791193246841431, "learning_rate": 4.3598507218306495e-06, "loss": 1.4482, "step": 21586 }, { "epoch": 0.9598506069094305, "grad_norm": 0.05749112367630005, "learning_rate": 4.340608616835051e-06, "loss": 1.4458, "step": 21588 }, { "epoch": 0.9599395313681028, "grad_norm": 0.0575961209833622, "learning_rate": 4.321408882988542e-06, "loss": 1.4476, "step": 21590 }, { "epoch": 0.9600284558267752, "grad_norm": 0.05781961977481842, "learning_rate": 4.302251521932366e-06, "loss": 1.4448, "step": 21592 }, { "epoch": 0.9601173802854475, "grad_norm": 0.05863596498966217, "learning_rate": 4.283136535304155e-06, "loss": 1.4491, "step": 21594 }, { "epoch": 0.9602063047441198, "grad_norm": 0.057458702474832535, "learning_rate": 4.264063924738104e-06, "loss": 1.4479, "step": 21596 }, { "epoch": 0.9602952292027922, "grad_norm": 0.057083528488874435, "learning_rate": 4.2450336918644085e-06, "loss": 1.4482, "step": 21598 }, { "epoch": 0.9603841536614646, "grad_norm": 0.05720750615000725, "learning_rate": 4.226045838309989e-06, "loss": 1.4473, "step": 21600 }, { "epoch": 0.9604730781201369, "grad_norm": 0.05795248597860336, "learning_rate": 4.207100365697936e-06, "loss": 1.4519, "step": 21602 }, { "epoch": 0.9605620025788093, "grad_norm": 0.057493992149829865, "learning_rate": 4.188197275647898e-06, "loss": 1.4457, "step": 21604 }, { "epoch": 0.9606509270374817, "grad_norm": 0.0582110695540905, "learning_rate": 4.169336569775695e-06, "loss": 1.4448, "step": 21606 }, { "epoch": 0.960739851496154, "grad_norm": 0.05797712132334709, "learning_rate": 4.150518249693647e-06, "loss": 1.4529, "step": 21608 }, { "epoch": 0.9608287759548264, "grad_norm": 0.0583646297454834, "learning_rate": 4.1317423170104675e-06, "loss": 1.4422, "step": 21610 }, { "epoch": 0.9609177004134988, "grad_norm": 0.05701034516096115, "learning_rate": 4.113008773331151e-06, "loss": 1.4434, "step": 21612 }, { "epoch": 0.9610066248721711, "grad_norm": 0.05788358673453331, "learning_rate": 4.094317620257138e-06, "loss": 1.4412, "step": 21614 }, { "epoch": 0.9610955493308434, "grad_norm": 0.05787377804517746, "learning_rate": 4.075668859386261e-06, "loss": 1.4417, "step": 21616 }, { "epoch": 0.9611844737895158, "grad_norm": 0.05681522190570831, "learning_rate": 4.057062492312691e-06, "loss": 1.4483, "step": 21618 }, { "epoch": 0.9612733982481881, "grad_norm": 0.05852191522717476, "learning_rate": 4.038498520626932e-06, "loss": 1.4491, "step": 21620 }, { "epoch": 0.9613623227068605, "grad_norm": 0.05827983468770981, "learning_rate": 4.0199769459159376e-06, "loss": 1.4491, "step": 21622 }, { "epoch": 0.9614512471655329, "grad_norm": 0.05857324227690697, "learning_rate": 4.001497769763107e-06, "loss": 1.4505, "step": 21624 }, { "epoch": 0.9615401716242052, "grad_norm": 0.058631282299757004, "learning_rate": 3.983060993747956e-06, "loss": 1.4475, "step": 21626 }, { "epoch": 0.9616290960828776, "grad_norm": 0.056906893849372864, "learning_rate": 3.964666619446666e-06, "loss": 1.4458, "step": 21628 }, { "epoch": 0.96171802054155, "grad_norm": 0.057671476155519485, "learning_rate": 3.946314648431648e-06, "loss": 1.4407, "step": 21630 }, { "epoch": 0.9618069450002223, "grad_norm": 0.057756319642066956, "learning_rate": 3.928005082271646e-06, "loss": 1.4427, "step": 21632 }, { "epoch": 0.9618958694588947, "grad_norm": 0.05779024213552475, "learning_rate": 3.909737922531909e-06, "loss": 1.4454, "step": 21634 }, { "epoch": 0.961984793917567, "grad_norm": 0.05701223015785217, "learning_rate": 3.891513170773964e-06, "loss": 1.4496, "step": 21636 }, { "epoch": 0.9620737183762393, "grad_norm": 0.05769617483019829, "learning_rate": 3.873330828555788e-06, "loss": 1.443, "step": 21638 }, { "epoch": 0.9621626428349117, "grad_norm": 0.05806345120072365, "learning_rate": 3.855190897431637e-06, "loss": 1.4428, "step": 21640 }, { "epoch": 0.9622515672935841, "grad_norm": 0.0573996864259243, "learning_rate": 3.837093378952217e-06, "loss": 1.4492, "step": 21642 }, { "epoch": 0.9623404917522564, "grad_norm": 0.05671732872724533, "learning_rate": 3.81903827466451e-06, "loss": 1.4445, "step": 21644 }, { "epoch": 0.9624294162109288, "grad_norm": 0.057457756251096725, "learning_rate": 3.801025586112061e-06, "loss": 1.4476, "step": 21646 }, { "epoch": 0.9625183406696012, "grad_norm": 0.05706573650240898, "learning_rate": 3.783055314834638e-06, "loss": 1.4462, "step": 21648 }, { "epoch": 0.9626072651282735, "grad_norm": 0.05761359632015228, "learning_rate": 3.765127462368345e-06, "loss": 1.4483, "step": 21650 }, { "epoch": 0.9626961895869459, "grad_norm": 0.057819437235593796, "learning_rate": 3.7472420302458455e-06, "loss": 1.4464, "step": 21652 }, { "epoch": 0.9627851140456183, "grad_norm": 0.05833171680569649, "learning_rate": 3.729399019996027e-06, "loss": 1.4449, "step": 21654 }, { "epoch": 0.9628740385042907, "grad_norm": 0.057252366095781326, "learning_rate": 3.71159843314417e-06, "loss": 1.4462, "step": 21656 }, { "epoch": 0.9629629629629629, "grad_norm": 0.058833926916122437, "learning_rate": 3.69384027121189e-06, "loss": 1.4441, "step": 21658 }, { "epoch": 0.9630518874216353, "grad_norm": 0.05726613849401474, "learning_rate": 3.676124535717307e-06, "loss": 1.4473, "step": 21660 }, { "epoch": 0.9631408118803076, "grad_norm": 0.05816185846924782, "learning_rate": 3.658451228174875e-06, "loss": 1.4407, "step": 21662 }, { "epoch": 0.96322973633898, "grad_norm": 0.05848447605967522, "learning_rate": 3.640820350095331e-06, "loss": 1.4431, "step": 21664 }, { "epoch": 0.9633186607976524, "grad_norm": 0.05794060230255127, "learning_rate": 3.6232319029858017e-06, "loss": 1.4511, "step": 21666 }, { "epoch": 0.9634075852563247, "grad_norm": 0.05679847672581673, "learning_rate": 3.6056858883499187e-06, "loss": 1.4469, "step": 21668 }, { "epoch": 0.9634965097149971, "grad_norm": 0.05763591080904007, "learning_rate": 3.588182307687482e-06, "loss": 1.4494, "step": 21670 }, { "epoch": 0.9635854341736695, "grad_norm": 0.05757317319512367, "learning_rate": 3.5707211624949055e-06, "loss": 1.4486, "step": 21672 }, { "epoch": 0.9636743586323419, "grad_norm": 0.057650867849588394, "learning_rate": 3.5533024542647174e-06, "loss": 1.4514, "step": 21674 }, { "epoch": 0.9637632830910142, "grad_norm": 0.057070985436439514, "learning_rate": 3.53592618448606e-06, "loss": 1.4482, "step": 21676 }, { "epoch": 0.9638522075496866, "grad_norm": 0.05852733552455902, "learning_rate": 3.5185923546442457e-06, "loss": 1.448, "step": 21678 }, { "epoch": 0.9639411320083588, "grad_norm": 0.058048900216817856, "learning_rate": 3.501300966221088e-06, "loss": 1.4493, "step": 21680 }, { "epoch": 0.9640300564670312, "grad_norm": 0.05750441551208496, "learning_rate": 3.4840520206947392e-06, "loss": 1.4466, "step": 21682 }, { "epoch": 0.9641189809257036, "grad_norm": 0.05819224938750267, "learning_rate": 3.4668455195396854e-06, "loss": 1.4456, "step": 21684 }, { "epoch": 0.964207905384376, "grad_norm": 0.057015545666217804, "learning_rate": 3.449681464226806e-06, "loss": 1.4428, "step": 21686 }, { "epoch": 0.9642968298430483, "grad_norm": 0.056825630366802216, "learning_rate": 3.4325598562234274e-06, "loss": 1.447, "step": 21688 }, { "epoch": 0.9643857543017207, "grad_norm": 0.05699056014418602, "learning_rate": 3.415480696993101e-06, "loss": 1.4432, "step": 21690 }, { "epoch": 0.9644746787603931, "grad_norm": 0.057919371873140335, "learning_rate": 3.3984439879958807e-06, "loss": 1.4484, "step": 21692 }, { "epoch": 0.9645636032190654, "grad_norm": 0.05731486156582832, "learning_rate": 3.381449730688102e-06, "loss": 1.4495, "step": 21694 }, { "epoch": 0.9646525276777378, "grad_norm": 0.05739450827240944, "learning_rate": 3.3644979265225474e-06, "loss": 1.449, "step": 21696 }, { "epoch": 0.9647414521364102, "grad_norm": 0.058767009526491165, "learning_rate": 3.3475885769482796e-06, "loss": 1.4444, "step": 21698 }, { "epoch": 0.9648303765950825, "grad_norm": 0.05814867466688156, "learning_rate": 3.330721683410809e-06, "loss": 1.4522, "step": 21700 }, { "epoch": 0.9649193010537548, "grad_norm": 0.05803316831588745, "learning_rate": 3.313897247352038e-06, "loss": 1.4435, "step": 21702 }, { "epoch": 0.9650082255124272, "grad_norm": 0.05720685422420502, "learning_rate": 3.297115270210149e-06, "loss": 1.4459, "step": 21704 }, { "epoch": 0.9650971499710995, "grad_norm": 0.05773816257715225, "learning_rate": 3.2803757534197175e-06, "loss": 1.4459, "step": 21706 }, { "epoch": 0.9651860744297719, "grad_norm": 0.05913296714425087, "learning_rate": 3.263678698411765e-06, "loss": 1.4439, "step": 21708 }, { "epoch": 0.9652749988884443, "grad_norm": 0.057658907026052475, "learning_rate": 3.2470241066136495e-06, "loss": 1.4482, "step": 21710 }, { "epoch": 0.9653639233471166, "grad_norm": 0.05754270777106285, "learning_rate": 3.230411979448955e-06, "loss": 1.4479, "step": 21712 }, { "epoch": 0.965452847805789, "grad_norm": 0.05791141465306282, "learning_rate": 3.2138423183378785e-06, "loss": 1.4476, "step": 21714 }, { "epoch": 0.9655417722644614, "grad_norm": 0.057228460907936096, "learning_rate": 3.1973151246967868e-06, "loss": 1.4455, "step": 21716 }, { "epoch": 0.9656306967231337, "grad_norm": 0.05856222286820412, "learning_rate": 3.18083039993855e-06, "loss": 1.4421, "step": 21718 }, { "epoch": 0.9657196211818061, "grad_norm": 0.05858870595693588, "learning_rate": 3.1643881454723744e-06, "loss": 1.4433, "step": 21720 }, { "epoch": 0.9658085456404785, "grad_norm": 0.057810429483652115, "learning_rate": 3.1479883627037464e-06, "loss": 1.4414, "step": 21722 }, { "epoch": 0.9658974700991507, "grad_norm": 0.05786995589733124, "learning_rate": 3.131631053034656e-06, "loss": 1.4479, "step": 21724 }, { "epoch": 0.9659863945578231, "grad_norm": 0.05765226110816002, "learning_rate": 3.115316217863373e-06, "loss": 1.444, "step": 21726 }, { "epoch": 0.9660753190164955, "grad_norm": 0.05825477093458176, "learning_rate": 3.0990438585845048e-06, "loss": 1.4505, "step": 21728 }, { "epoch": 0.9661642434751678, "grad_norm": 0.057248134166002274, "learning_rate": 3.082813976589216e-06, "loss": 1.4484, "step": 21730 }, { "epoch": 0.9662531679338402, "grad_norm": 0.057661522179841995, "learning_rate": 3.0666265732647856e-06, "loss": 1.4497, "step": 21732 }, { "epoch": 0.9663420923925126, "grad_norm": 0.05723179504275322, "learning_rate": 3.0504816499950516e-06, "loss": 1.4442, "step": 21734 }, { "epoch": 0.9664310168511849, "grad_norm": 0.05735337361693382, "learning_rate": 3.034379208160076e-06, "loss": 1.4482, "step": 21736 }, { "epoch": 0.9665199413098573, "grad_norm": 0.05809729918837547, "learning_rate": 3.018319249136481e-06, "loss": 1.4436, "step": 21738 }, { "epoch": 0.9666088657685297, "grad_norm": 0.05790387839078903, "learning_rate": 3.002301774297056e-06, "loss": 1.4404, "step": 21740 }, { "epoch": 0.966697790227202, "grad_norm": 0.05688699334859848, "learning_rate": 2.9863267850110955e-06, "loss": 1.449, "step": 21742 }, { "epoch": 0.9667867146858744, "grad_norm": 0.056998882442712784, "learning_rate": 2.9703942826441734e-06, "loss": 1.4472, "step": 21744 }, { "epoch": 0.9668756391445467, "grad_norm": 0.058484215289354324, "learning_rate": 2.9545042685583112e-06, "loss": 1.4486, "step": 21746 }, { "epoch": 0.966964563603219, "grad_norm": 0.05785754323005676, "learning_rate": 2.938656744111812e-06, "loss": 1.4462, "step": 21748 }, { "epoch": 0.9670534880618914, "grad_norm": 0.05740215256810188, "learning_rate": 2.9228517106594244e-06, "loss": 1.4409, "step": 21750 }, { "epoch": 0.9671424125205638, "grad_norm": 0.057988543063402176, "learning_rate": 2.9070891695521796e-06, "loss": 1.4468, "step": 21752 }, { "epoch": 0.9672313369792361, "grad_norm": 0.05775817483663559, "learning_rate": 2.8913691221376102e-06, "loss": 1.4534, "step": 21754 }, { "epoch": 0.9673202614379085, "grad_norm": 0.05813591554760933, "learning_rate": 2.8756915697594756e-06, "loss": 1.4463, "step": 21756 }, { "epoch": 0.9674091858965809, "grad_norm": 0.05779729038476944, "learning_rate": 2.86005651375798e-06, "loss": 1.4475, "step": 21758 }, { "epoch": 0.9674981103552532, "grad_norm": 0.05689067393541336, "learning_rate": 2.844463955469723e-06, "loss": 1.4441, "step": 21760 }, { "epoch": 0.9675870348139256, "grad_norm": 0.05785556882619858, "learning_rate": 2.828913896227525e-06, "loss": 1.446, "step": 21762 }, { "epoch": 0.967675959272598, "grad_norm": 0.0574394054710865, "learning_rate": 2.8134063373607687e-06, "loss": 1.4418, "step": 21764 }, { "epoch": 0.9677648837312702, "grad_norm": 0.05662890523672104, "learning_rate": 2.79794128019506e-06, "loss": 1.4432, "step": 21766 }, { "epoch": 0.9678538081899426, "grad_norm": 0.0582660436630249, "learning_rate": 2.7825187260523966e-06, "loss": 1.4476, "step": 21768 }, { "epoch": 0.967942732648615, "grad_norm": 0.05858684331178665, "learning_rate": 2.767138676251224e-06, "loss": 1.4451, "step": 21770 }, { "epoch": 0.9680316571072873, "grad_norm": 0.056393761187791824, "learning_rate": 2.7518011321062687e-06, "loss": 1.444, "step": 21772 }, { "epoch": 0.9681205815659597, "grad_norm": 0.058356158435344696, "learning_rate": 2.736506094928648e-06, "loss": 1.4424, "step": 21774 }, { "epoch": 0.9682095060246321, "grad_norm": 0.05726497247815132, "learning_rate": 2.7212535660258718e-06, "loss": 1.4445, "step": 21776 }, { "epoch": 0.9682984304833044, "grad_norm": 0.057576339691877365, "learning_rate": 2.7060435467017865e-06, "loss": 1.4466, "step": 21778 }, { "epoch": 0.9683873549419768, "grad_norm": 0.05778598040342331, "learning_rate": 2.6908760382565735e-06, "loss": 1.448, "step": 21780 }, { "epoch": 0.9684762794006492, "grad_norm": 0.056812673807144165, "learning_rate": 2.6757510419868624e-06, "loss": 1.4487, "step": 21782 }, { "epoch": 0.9685652038593215, "grad_norm": 0.05817960575222969, "learning_rate": 2.660668559185564e-06, "loss": 1.4498, "step": 21784 }, { "epoch": 0.9686541283179939, "grad_norm": 0.05673222243785858, "learning_rate": 2.6456285911420353e-06, "loss": 1.4489, "step": 21786 }, { "epoch": 0.9687430527766662, "grad_norm": 0.058771610260009766, "learning_rate": 2.63063113914197e-06, "loss": 1.4485, "step": 21788 }, { "epoch": 0.9688319772353385, "grad_norm": 0.05711560696363449, "learning_rate": 2.615676204467343e-06, "loss": 1.4452, "step": 21790 }, { "epoch": 0.9689209016940109, "grad_norm": 0.057385995984077454, "learning_rate": 2.6007637883966872e-06, "loss": 1.4476, "step": 21792 }, { "epoch": 0.9690098261526833, "grad_norm": 0.05783890560269356, "learning_rate": 2.5858938922046495e-06, "loss": 1.4458, "step": 21794 }, { "epoch": 0.9690987506113556, "grad_norm": 0.05787234753370285, "learning_rate": 2.571066517162435e-06, "loss": 1.4456, "step": 21796 }, { "epoch": 0.969187675070028, "grad_norm": 0.05799291655421257, "learning_rate": 2.556281664537585e-06, "loss": 1.4469, "step": 21798 }, { "epoch": 0.9692765995287004, "grad_norm": 0.05841020122170448, "learning_rate": 2.541539335593923e-06, "loss": 1.4422, "step": 21800 }, { "epoch": 0.9693655239873727, "grad_norm": 0.057031866163015366, "learning_rate": 2.526839531591718e-06, "loss": 1.4509, "step": 21802 }, { "epoch": 0.9694544484460451, "grad_norm": 0.05724103003740311, "learning_rate": 2.5121822537875206e-06, "loss": 1.4438, "step": 21804 }, { "epoch": 0.9695433729047175, "grad_norm": 0.057726915925741196, "learning_rate": 2.4975675034343835e-06, "loss": 1.4444, "step": 21806 }, { "epoch": 0.9696322973633899, "grad_norm": 0.058166489005088806, "learning_rate": 2.482995281781586e-06, "loss": 1.4455, "step": 21808 }, { "epoch": 0.9697212218220621, "grad_norm": 0.058684688061475754, "learning_rate": 2.4684655900748532e-06, "loss": 1.4486, "step": 21810 }, { "epoch": 0.9698101462807345, "grad_norm": 0.05730533227324486, "learning_rate": 2.453978429556247e-06, "loss": 1.4456, "step": 21812 }, { "epoch": 0.9698990707394068, "grad_norm": 0.057590242475271225, "learning_rate": 2.439533801464111e-06, "loss": 1.4445, "step": 21814 }, { "epoch": 0.9699879951980792, "grad_norm": 0.057827651500701904, "learning_rate": 2.425131707033346e-06, "loss": 1.4461, "step": 21816 }, { "epoch": 0.9700769196567516, "grad_norm": 0.05834771320223808, "learning_rate": 2.4107721474950774e-06, "loss": 1.4415, "step": 21818 }, { "epoch": 0.970165844115424, "grad_norm": 0.057678621262311935, "learning_rate": 2.3964551240767684e-06, "loss": 1.4477, "step": 21820 }, { "epoch": 0.9702547685740963, "grad_norm": 0.05750925838947296, "learning_rate": 2.382180638002385e-06, "loss": 1.4441, "step": 21822 }, { "epoch": 0.9703436930327687, "grad_norm": 0.056883055716753006, "learning_rate": 2.3679486904921164e-06, "loss": 1.4452, "step": 21824 }, { "epoch": 0.970432617491441, "grad_norm": 0.057405877858400345, "learning_rate": 2.353759282762602e-06, "loss": 1.4487, "step": 21826 }, { "epoch": 0.9705215419501134, "grad_norm": 0.056650012731552124, "learning_rate": 2.339612416026815e-06, "loss": 1.4426, "step": 21828 }, { "epoch": 0.9706104664087858, "grad_norm": 0.05729759484529495, "learning_rate": 2.3255080914940106e-06, "loss": 1.4483, "step": 21830 }, { "epoch": 0.970699390867458, "grad_norm": 0.05801773816347122, "learning_rate": 2.3114463103700023e-06, "loss": 1.4429, "step": 21832 }, { "epoch": 0.9707883153261304, "grad_norm": 0.057637542486190796, "learning_rate": 2.2974270738567726e-06, "loss": 1.4442, "step": 21834 }, { "epoch": 0.9708772397848028, "grad_norm": 0.058352191001176834, "learning_rate": 2.2834503831528076e-06, "loss": 1.4448, "step": 21836 }, { "epoch": 0.9709661642434751, "grad_norm": 0.057397257536649704, "learning_rate": 2.2695162394528735e-06, "loss": 1.4414, "step": 21838 }, { "epoch": 0.9710550887021475, "grad_norm": 0.05693025141954422, "learning_rate": 2.2556246439481286e-06, "loss": 1.4481, "step": 21840 }, { "epoch": 0.9711440131608199, "grad_norm": 0.05720305070281029, "learning_rate": 2.2417755978260123e-06, "loss": 1.452, "step": 21842 }, { "epoch": 0.9712329376194923, "grad_norm": 0.057424396276474, "learning_rate": 2.2279691022705216e-06, "loss": 1.4438, "step": 21844 }, { "epoch": 0.9713218620781646, "grad_norm": 0.057667020708322525, "learning_rate": 2.2142051584618237e-06, "loss": 1.4478, "step": 21846 }, { "epoch": 0.971410786536837, "grad_norm": 0.058313172310590744, "learning_rate": 2.200483767576589e-06, "loss": 1.4476, "step": 21848 }, { "epoch": 0.9714997109955094, "grad_norm": 0.057696230709552765, "learning_rate": 2.1868049307877113e-06, "loss": 1.4433, "step": 21850 }, { "epoch": 0.9715886354541817, "grad_norm": 0.057298045605421066, "learning_rate": 2.1731686492644787e-06, "loss": 1.447, "step": 21852 }, { "epoch": 0.971677559912854, "grad_norm": 0.05838226154446602, "learning_rate": 2.1595749241726805e-06, "loss": 1.4426, "step": 21854 }, { "epoch": 0.9717664843715264, "grad_norm": 0.057391196489334106, "learning_rate": 2.146023756674331e-06, "loss": 1.4436, "step": 21856 }, { "epoch": 0.9718554088301987, "grad_norm": 0.05735941603779793, "learning_rate": 2.132515147927838e-06, "loss": 1.4449, "step": 21858 }, { "epoch": 0.9719443332888711, "grad_norm": 0.05849364399909973, "learning_rate": 2.1190490990879994e-06, "loss": 1.4509, "step": 21860 }, { "epoch": 0.9720332577475435, "grad_norm": 0.05746670067310333, "learning_rate": 2.10562561130595e-06, "loss": 1.4453, "step": 21862 }, { "epoch": 0.9721221822062158, "grad_norm": 0.05694587528705597, "learning_rate": 2.0922446857291055e-06, "loss": 1.4459, "step": 21864 }, { "epoch": 0.9722111066648882, "grad_norm": 0.05823846161365509, "learning_rate": 2.0789063235014396e-06, "loss": 1.4386, "step": 21866 }, { "epoch": 0.9723000311235606, "grad_norm": 0.05770362168550491, "learning_rate": 2.065610525763095e-06, "loss": 1.4468, "step": 21868 }, { "epoch": 0.9723889555822329, "grad_norm": 0.05771014466881752, "learning_rate": 2.052357293650775e-06, "loss": 1.4437, "step": 21870 }, { "epoch": 0.9724778800409053, "grad_norm": 0.058271683752536774, "learning_rate": 2.039146628297295e-06, "loss": 1.446, "step": 21872 }, { "epoch": 0.9725668044995777, "grad_norm": 0.057467687875032425, "learning_rate": 2.0259785308319176e-06, "loss": 1.4474, "step": 21874 }, { "epoch": 0.9726557289582499, "grad_norm": 0.058236099779605865, "learning_rate": 2.012853002380466e-06, "loss": 1.4466, "step": 21876 }, { "epoch": 0.9727446534169223, "grad_norm": 0.057283271104097366, "learning_rate": 1.9997700440649302e-06, "loss": 1.4443, "step": 21878 }, { "epoch": 0.9728335778755947, "grad_norm": 0.057595402002334595, "learning_rate": 1.9867296570036387e-06, "loss": 1.4457, "step": 21880 }, { "epoch": 0.972922502334267, "grad_norm": 0.05784250423312187, "learning_rate": 1.973731842311366e-06, "loss": 1.4421, "step": 21882 }, { "epoch": 0.9730114267929394, "grad_norm": 0.057085875421762466, "learning_rate": 1.9607766010991677e-06, "loss": 1.4472, "step": 21884 }, { "epoch": 0.9731003512516118, "grad_norm": 0.05725502967834473, "learning_rate": 1.9478639344746585e-06, "loss": 1.4512, "step": 21886 }, { "epoch": 0.9731892757102841, "grad_norm": 0.057032741606235504, "learning_rate": 1.934993843541566e-06, "loss": 1.4412, "step": 21888 }, { "epoch": 0.9732782001689565, "grad_norm": 0.056812822818756104, "learning_rate": 1.9221663294000657e-06, "loss": 1.4454, "step": 21890 }, { "epoch": 0.9733671246276289, "grad_norm": 0.057856421917676926, "learning_rate": 1.9093813931467807e-06, "loss": 1.4446, "step": 21892 }, { "epoch": 0.9734560490863012, "grad_norm": 0.05846314877271652, "learning_rate": 1.8966390358746145e-06, "loss": 1.4441, "step": 21894 }, { "epoch": 0.9735449735449735, "grad_norm": 0.05669744685292244, "learning_rate": 1.883939258672751e-06, "loss": 1.4504, "step": 21896 }, { "epoch": 0.9736338980036459, "grad_norm": 0.05733288824558258, "learning_rate": 1.8712820626268778e-06, "loss": 1.453, "step": 21898 }, { "epoch": 0.9737228224623182, "grad_norm": 0.05750066787004471, "learning_rate": 1.8586674488190736e-06, "loss": 1.4435, "step": 21900 }, { "epoch": 0.9738117469209906, "grad_norm": 0.05767687410116196, "learning_rate": 1.8460954183275314e-06, "loss": 1.4426, "step": 21902 }, { "epoch": 0.973900671379663, "grad_norm": 0.05756646394729614, "learning_rate": 1.8335659722271136e-06, "loss": 1.4368, "step": 21904 }, { "epoch": 0.9739895958383353, "grad_norm": 0.057719554752111435, "learning_rate": 1.8210791115887971e-06, "loss": 1.4422, "step": 21906 }, { "epoch": 0.9740785202970077, "grad_norm": 0.05758875980973244, "learning_rate": 1.8086348374800055e-06, "loss": 1.4432, "step": 21908 }, { "epoch": 0.9741674447556801, "grad_norm": 0.0580807626247406, "learning_rate": 1.7962331509646103e-06, "loss": 1.4417, "step": 21910 }, { "epoch": 0.9742563692143524, "grad_norm": 0.057357918471097946, "learning_rate": 1.7838740531027076e-06, "loss": 1.4442, "step": 21912 }, { "epoch": 0.9743452936730248, "grad_norm": 0.05980123206973076, "learning_rate": 1.7715575449508413e-06, "loss": 1.4484, "step": 21914 }, { "epoch": 0.9744342181316972, "grad_norm": 0.05732397362589836, "learning_rate": 1.7592836275618361e-06, "loss": 1.4435, "step": 21916 }, { "epoch": 0.9745231425903694, "grad_norm": 0.05790344625711441, "learning_rate": 1.7470523019849084e-06, "loss": 1.446, "step": 21918 }, { "epoch": 0.9746120670490418, "grad_norm": 0.0583200603723526, "learning_rate": 1.734863569265721e-06, "loss": 1.4472, "step": 21920 }, { "epoch": 0.9747009915077142, "grad_norm": 0.057706188410520554, "learning_rate": 1.7227174304461635e-06, "loss": 1.4404, "step": 21922 }, { "epoch": 0.9747899159663865, "grad_norm": 0.05923660844564438, "learning_rate": 1.7106138865645716e-06, "loss": 1.4471, "step": 21924 }, { "epoch": 0.9748788404250589, "grad_norm": 0.057506807148456573, "learning_rate": 1.698552938655562e-06, "loss": 1.4454, "step": 21926 }, { "epoch": 0.9749677648837313, "grad_norm": 0.057615265250205994, "learning_rate": 1.6865345877502546e-06, "loss": 1.4465, "step": 21928 }, { "epoch": 0.9750566893424036, "grad_norm": 0.05670270696282387, "learning_rate": 1.6745588348758833e-06, "loss": 1.4418, "step": 21930 }, { "epoch": 0.975145613801076, "grad_norm": 0.057712372392416, "learning_rate": 1.662625681056351e-06, "loss": 1.4462, "step": 21932 }, { "epoch": 0.9752345382597484, "grad_norm": 0.05813715234398842, "learning_rate": 1.65073512731162e-06, "loss": 1.446, "step": 21934 }, { "epoch": 0.9753234627184207, "grad_norm": 0.05662925913929939, "learning_rate": 1.6388871746582102e-06, "loss": 1.4411, "step": 21936 }, { "epoch": 0.9754123871770931, "grad_norm": 0.056773193180561066, "learning_rate": 1.627081824108978e-06, "loss": 1.4469, "step": 21938 }, { "epoch": 0.9755013116357654, "grad_norm": 0.057524967938661575, "learning_rate": 1.6153190766730053e-06, "loss": 1.4463, "step": 21940 }, { "epoch": 0.9755902360944377, "grad_norm": 0.05839113891124725, "learning_rate": 1.6035989333558765e-06, "loss": 1.4537, "step": 21942 }, { "epoch": 0.9756791605531101, "grad_norm": 0.058153193444013596, "learning_rate": 1.5919213951594569e-06, "loss": 1.4476, "step": 21944 }, { "epoch": 0.9757680850117825, "grad_norm": 0.05714964494109154, "learning_rate": 1.5802864630820591e-06, "loss": 1.4424, "step": 21946 }, { "epoch": 0.9758570094704548, "grad_norm": 0.0575794093310833, "learning_rate": 1.5686941381182206e-06, "loss": 1.4466, "step": 21948 }, { "epoch": 0.9759459339291272, "grad_norm": 0.057474132627248764, "learning_rate": 1.5571444212588715e-06, "loss": 1.4455, "step": 21950 }, { "epoch": 0.9760348583877996, "grad_norm": 0.05650651827454567, "learning_rate": 1.5456373134914437e-06, "loss": 1.449, "step": 21952 }, { "epoch": 0.9761237828464719, "grad_norm": 0.05746529996395111, "learning_rate": 1.534172815799484e-06, "loss": 1.4398, "step": 21954 }, { "epoch": 0.9762127073051443, "grad_norm": 0.05808810144662857, "learning_rate": 1.522750929163208e-06, "loss": 1.439, "step": 21956 }, { "epoch": 0.9763016317638167, "grad_norm": 0.05777526646852493, "learning_rate": 1.5113716545588352e-06, "loss": 1.4447, "step": 21958 }, { "epoch": 0.976390556222489, "grad_norm": 0.057585395872592926, "learning_rate": 1.5000349929591982e-06, "loss": 1.4507, "step": 21960 }, { "epoch": 0.9764794806811613, "grad_norm": 0.05766819417476654, "learning_rate": 1.4887409453333555e-06, "loss": 1.4515, "step": 21962 }, { "epoch": 0.9765684051398337, "grad_norm": 0.05719029903411865, "learning_rate": 1.4774895126468125e-06, "loss": 1.4468, "step": 21964 }, { "epoch": 0.976657329598506, "grad_norm": 0.05869666114449501, "learning_rate": 1.4662806958614105e-06, "loss": 1.4479, "step": 21966 }, { "epoch": 0.9767462540571784, "grad_norm": 0.057645734399557114, "learning_rate": 1.4551144959352724e-06, "loss": 1.4539, "step": 21968 }, { "epoch": 0.9768351785158508, "grad_norm": 0.058074142783880234, "learning_rate": 1.443990913822968e-06, "loss": 1.4441, "step": 21970 }, { "epoch": 0.9769241029745231, "grad_norm": 0.057652901858091354, "learning_rate": 1.432909950475403e-06, "loss": 1.4502, "step": 21972 }, { "epoch": 0.9770130274331955, "grad_norm": 0.056621208786964417, "learning_rate": 1.4218716068398196e-06, "loss": 1.4418, "step": 21974 }, { "epoch": 0.9771019518918679, "grad_norm": 0.05749661475419998, "learning_rate": 1.4108758838597969e-06, "loss": 1.4433, "step": 21976 }, { "epoch": 0.9771908763505402, "grad_norm": 0.05827660858631134, "learning_rate": 1.3999227824753047e-06, "loss": 1.4444, "step": 21978 }, { "epoch": 0.9772798008092126, "grad_norm": 0.057929717004299164, "learning_rate": 1.3890123036227054e-06, "loss": 1.4509, "step": 21980 }, { "epoch": 0.977368725267885, "grad_norm": 0.058121420443058014, "learning_rate": 1.3781444482345863e-06, "loss": 1.4477, "step": 21982 }, { "epoch": 0.9774576497265572, "grad_norm": 0.057763341814279556, "learning_rate": 1.3673192172400927e-06, "loss": 1.4427, "step": 21984 }, { "epoch": 0.9775465741852296, "grad_norm": 0.05874239280819893, "learning_rate": 1.3565366115645405e-06, "loss": 1.4478, "step": 21986 }, { "epoch": 0.977635498643902, "grad_norm": 0.05708703398704529, "learning_rate": 1.3457966321296921e-06, "loss": 1.4451, "step": 21988 }, { "epoch": 0.9777244231025743, "grad_norm": 0.05787283182144165, "learning_rate": 1.335099279853591e-06, "loss": 1.4457, "step": 21990 }, { "epoch": 0.9778133475612467, "grad_norm": 0.05789671465754509, "learning_rate": 1.3244445556507834e-06, "loss": 1.445, "step": 21992 }, { "epoch": 0.9779022720199191, "grad_norm": 0.0576728992164135, "learning_rate": 1.3138324604320961e-06, "loss": 1.4467, "step": 21994 }, { "epoch": 0.9779911964785915, "grad_norm": 0.05753236636519432, "learning_rate": 1.3032629951045817e-06, "loss": 1.4464, "step": 21996 }, { "epoch": 0.9780801209372638, "grad_norm": 0.05750956013798714, "learning_rate": 1.2927361605718502e-06, "loss": 1.4436, "step": 21998 }, { "epoch": 0.9781690453959362, "grad_norm": 0.05757934972643852, "learning_rate": 1.2822519577337932e-06, "loss": 1.4457, "step": 22000 }, { "epoch": 0.9781690453959362, "eval_loss": 1.4320533275604248, "eval_runtime": 12.4698, "eval_samples_per_second": 554.141, "eval_steps_per_second": 69.288, "step": 22000 }, { "epoch": 0.9782579698546086, "grad_norm": 0.057963717728853226, "learning_rate": 1.2718103874865827e-06, "loss": 1.4517, "step": 22002 }, { "epoch": 0.9783468943132808, "grad_norm": 0.05782179906964302, "learning_rate": 1.261411450722838e-06, "loss": 1.4397, "step": 22004 }, { "epoch": 0.9784358187719532, "grad_norm": 0.05823444202542305, "learning_rate": 1.2510551483315146e-06, "loss": 1.4472, "step": 22006 }, { "epoch": 0.9785247432306255, "grad_norm": 0.057919129729270935, "learning_rate": 1.2407414811979601e-06, "loss": 1.4441, "step": 22008 }, { "epoch": 0.9786136676892979, "grad_norm": 0.058388952165842056, "learning_rate": 1.2304704502037467e-06, "loss": 1.4497, "step": 22010 }, { "epoch": 0.9787025921479703, "grad_norm": 0.057490069419145584, "learning_rate": 1.2202420562268946e-06, "loss": 1.4493, "step": 22012 }, { "epoch": 0.9787915166066427, "grad_norm": 0.05721807852387428, "learning_rate": 1.2100563001418708e-06, "loss": 1.4464, "step": 22014 }, { "epoch": 0.978880441065315, "grad_norm": 0.057543035596609116, "learning_rate": 1.1999131828192567e-06, "loss": 1.4467, "step": 22016 }, { "epoch": 0.9789693655239874, "grad_norm": 0.057324040681123734, "learning_rate": 1.1898127051262476e-06, "loss": 1.4435, "step": 22018 }, { "epoch": 0.9790582899826598, "grad_norm": 0.05752379819750786, "learning_rate": 1.1797548679262638e-06, "loss": 1.4421, "step": 22020 }, { "epoch": 0.9791472144413321, "grad_norm": 0.05681290104985237, "learning_rate": 1.1697396720790065e-06, "loss": 1.4482, "step": 22022 }, { "epoch": 0.9792361389000045, "grad_norm": 0.05741885304450989, "learning_rate": 1.15976711844068e-06, "loss": 1.4459, "step": 22024 }, { "epoch": 0.9793250633586768, "grad_norm": 0.05764324218034744, "learning_rate": 1.1498372078638243e-06, "loss": 1.4434, "step": 22026 }, { "epoch": 0.9794139878173491, "grad_norm": 0.05784038081765175, "learning_rate": 1.1399499411972048e-06, "loss": 1.4504, "step": 22028 }, { "epoch": 0.9795029122760215, "grad_norm": 0.05776010826230049, "learning_rate": 1.13010531928609e-06, "loss": 1.4494, "step": 22030 }, { "epoch": 0.9795918367346939, "grad_norm": 0.05751819536089897, "learning_rate": 1.1203033429719734e-06, "loss": 1.4483, "step": 22032 }, { "epoch": 0.9796807611933662, "grad_norm": 0.05655774101614952, "learning_rate": 1.1105440130929067e-06, "loss": 1.4474, "step": 22034 }, { "epoch": 0.9797696856520386, "grad_norm": 0.0575784407556057, "learning_rate": 1.1008273304830008e-06, "loss": 1.4497, "step": 22036 }, { "epoch": 0.979858610110711, "grad_norm": 0.05941782891750336, "learning_rate": 1.0911532959729797e-06, "loss": 1.45, "step": 22038 }, { "epoch": 0.9799475345693833, "grad_norm": 0.05766258016228676, "learning_rate": 1.0815219103897933e-06, "loss": 1.4442, "step": 22040 }, { "epoch": 0.9800364590280557, "grad_norm": 0.056954920291900635, "learning_rate": 1.0719331745567828e-06, "loss": 1.4441, "step": 22042 }, { "epoch": 0.9801253834867281, "grad_norm": 0.05810870602726936, "learning_rate": 1.0623870892936261e-06, "loss": 1.4414, "step": 22044 }, { "epoch": 0.9802143079454004, "grad_norm": 0.05758174881339073, "learning_rate": 1.0528836554163368e-06, "loss": 1.4461, "step": 22046 }, { "epoch": 0.9803032324040727, "grad_norm": 0.05731727182865143, "learning_rate": 1.0434228737373764e-06, "loss": 1.4426, "step": 22048 }, { "epoch": 0.9803921568627451, "grad_norm": 0.05725093558430672, "learning_rate": 1.034004745065431e-06, "loss": 1.4474, "step": 22050 }, { "epoch": 0.9804810813214174, "grad_norm": 0.0574093721807003, "learning_rate": 1.0246292702056348e-06, "loss": 1.4473, "step": 22052 }, { "epoch": 0.9805700057800898, "grad_norm": 0.057217229157686234, "learning_rate": 1.0152964499594575e-06, "loss": 1.4416, "step": 22054 }, { "epoch": 0.9806589302387622, "grad_norm": 0.05822160094976425, "learning_rate": 1.0060062851247053e-06, "loss": 1.4464, "step": 22056 }, { "epoch": 0.9807478546974345, "grad_norm": 0.057691022753715515, "learning_rate": 9.967587764955211e-07, "loss": 1.4492, "step": 22058 }, { "epoch": 0.9808367791561069, "grad_norm": 0.056380316615104675, "learning_rate": 9.875539248624388e-07, "loss": 1.4399, "step": 22060 }, { "epoch": 0.9809257036147793, "grad_norm": 0.058779843151569366, "learning_rate": 9.783917310122737e-07, "loss": 1.4478, "step": 22062 }, { "epoch": 0.9810146280734516, "grad_norm": 0.058194488286972046, "learning_rate": 9.692721957283435e-07, "loss": 1.4477, "step": 22064 }, { "epoch": 0.981103552532124, "grad_norm": 0.05809153616428375, "learning_rate": 9.601953197901913e-07, "loss": 1.4381, "step": 22066 }, { "epoch": 0.9811924769907964, "grad_norm": 0.05672544613480568, "learning_rate": 9.511611039737522e-07, "loss": 1.4442, "step": 22068 }, { "epoch": 0.9812814014494686, "grad_norm": 0.058622874319553375, "learning_rate": 9.421695490512416e-07, "loss": 1.4394, "step": 22070 }, { "epoch": 0.981370325908141, "grad_norm": 0.0575464628636837, "learning_rate": 9.332206557914336e-07, "loss": 1.447, "step": 22072 }, { "epoch": 0.9814592503668134, "grad_norm": 0.057317234575748444, "learning_rate": 9.243144249591606e-07, "loss": 1.4467, "step": 22074 }, { "epoch": 0.9815481748254857, "grad_norm": 0.05799569934606552, "learning_rate": 9.154508573158693e-07, "loss": 1.4454, "step": 22076 }, { "epoch": 0.9816370992841581, "grad_norm": 0.05822969973087311, "learning_rate": 9.066299536192313e-07, "loss": 1.4448, "step": 22078 }, { "epoch": 0.9817260237428305, "grad_norm": 0.058205485343933105, "learning_rate": 8.9785171462331e-07, "loss": 1.455, "step": 22080 }, { "epoch": 0.9818149482015028, "grad_norm": 0.057623036205768585, "learning_rate": 8.891161410785053e-07, "loss": 1.4465, "step": 22082 }, { "epoch": 0.9819038726601752, "grad_norm": 0.056971605867147446, "learning_rate": 8.804232337315532e-07, "loss": 1.4439, "step": 22084 }, { "epoch": 0.9819927971188476, "grad_norm": 0.056623686105012894, "learning_rate": 8.717729933255814e-07, "loss": 1.4419, "step": 22086 }, { "epoch": 0.9820817215775199, "grad_norm": 0.05784169211983681, "learning_rate": 8.631654206000539e-07, "loss": 1.4461, "step": 22088 }, { "epoch": 0.9821706460361923, "grad_norm": 0.05700985714793205, "learning_rate": 8.546005162907156e-07, "loss": 1.4379, "step": 22090 }, { "epoch": 0.9822595704948646, "grad_norm": 0.057645298540592194, "learning_rate": 8.46078281129814e-07, "loss": 1.4479, "step": 22092 }, { "epoch": 0.9823484949535369, "grad_norm": 0.057107895612716675, "learning_rate": 8.375987158458775e-07, "loss": 1.448, "step": 22094 }, { "epoch": 0.9824374194122093, "grad_norm": 0.05772538483142853, "learning_rate": 8.291618211637153e-07, "loss": 1.4449, "step": 22096 }, { "epoch": 0.9825263438708817, "grad_norm": 0.05685894191265106, "learning_rate": 8.207675978045281e-07, "loss": 1.4485, "step": 22098 }, { "epoch": 0.982615268329554, "grad_norm": 0.058153510093688965, "learning_rate": 8.124160464859642e-07, "loss": 1.4477, "step": 22100 }, { "epoch": 0.9827041927882264, "grad_norm": 0.057993143796920776, "learning_rate": 8.041071679219525e-07, "loss": 1.4428, "step": 22102 }, { "epoch": 0.9827931172468988, "grad_norm": 0.0569118969142437, "learning_rate": 7.95840962822758e-07, "loss": 1.4464, "step": 22104 }, { "epoch": 0.9828820417055711, "grad_norm": 0.057352546602487564, "learning_rate": 7.876174318949824e-07, "loss": 1.4476, "step": 22106 }, { "epoch": 0.9829709661642435, "grad_norm": 0.05711887776851654, "learning_rate": 7.794365758416188e-07, "loss": 1.443, "step": 22108 }, { "epoch": 0.9830598906229159, "grad_norm": 0.058069389313459396, "learning_rate": 7.712983953619967e-07, "loss": 1.4485, "step": 22110 }, { "epoch": 0.9831488150815882, "grad_norm": 0.05813687667250633, "learning_rate": 7.632028911518374e-07, "loss": 1.4526, "step": 22112 }, { "epoch": 0.9832377395402605, "grad_norm": 0.05850476771593094, "learning_rate": 7.551500639031427e-07, "loss": 1.4463, "step": 22114 }, { "epoch": 0.9833266639989329, "grad_norm": 0.0575152225792408, "learning_rate": 7.471399143043067e-07, "loss": 1.4549, "step": 22116 }, { "epoch": 0.9834155884576052, "grad_norm": 0.05797769874334335, "learning_rate": 7.391724430401148e-07, "loss": 1.449, "step": 22118 }, { "epoch": 0.9835045129162776, "grad_norm": 0.056744713336229324, "learning_rate": 7.312476507916332e-07, "loss": 1.4456, "step": 22120 }, { "epoch": 0.98359343737495, "grad_norm": 0.057250816375017166, "learning_rate": 7.233655382363202e-07, "loss": 1.4396, "step": 22122 }, { "epoch": 0.9836823618336223, "grad_norm": 0.05758194625377655, "learning_rate": 7.155261060479701e-07, "loss": 1.4521, "step": 22124 }, { "epoch": 0.9837712862922947, "grad_norm": 0.05834776535630226, "learning_rate": 7.077293548966579e-07, "loss": 1.4438, "step": 22126 }, { "epoch": 0.9838602107509671, "grad_norm": 0.0584811232984066, "learning_rate": 6.999752854490171e-07, "loss": 1.4453, "step": 22128 }, { "epoch": 0.9839491352096394, "grad_norm": 0.05721158906817436, "learning_rate": 6.922638983677954e-07, "loss": 1.4502, "step": 22130 }, { "epoch": 0.9840380596683118, "grad_norm": 0.05716519057750702, "learning_rate": 6.84595194312243e-07, "loss": 1.4425, "step": 22132 }, { "epoch": 0.9841269841269841, "grad_norm": 0.05842083320021629, "learning_rate": 6.769691739378913e-07, "loss": 1.4459, "step": 22134 }, { "epoch": 0.9842159085856564, "grad_norm": 0.05762701854109764, "learning_rate": 6.693858378967188e-07, "loss": 1.4475, "step": 22136 }, { "epoch": 0.9843048330443288, "grad_norm": 0.0569646880030632, "learning_rate": 6.618451868368736e-07, "loss": 1.4453, "step": 22138 }, { "epoch": 0.9843937575030012, "grad_norm": 0.056726690381765366, "learning_rate": 6.543472214030066e-07, "loss": 1.4471, "step": 22140 }, { "epoch": 0.9844826819616735, "grad_norm": 0.056243959814310074, "learning_rate": 6.468919422361052e-07, "loss": 1.4397, "step": 22142 }, { "epoch": 0.9845716064203459, "grad_norm": 0.058017753064632416, "learning_rate": 6.394793499734375e-07, "loss": 1.4452, "step": 22144 }, { "epoch": 0.9846605308790183, "grad_norm": 0.057117003947496414, "learning_rate": 6.321094452487186e-07, "loss": 1.4457, "step": 22146 }, { "epoch": 0.9847494553376906, "grad_norm": 0.059118859469890594, "learning_rate": 6.247822286918892e-07, "loss": 1.4479, "step": 22148 }, { "epoch": 0.984838379796363, "grad_norm": 0.05744896084070206, "learning_rate": 6.174977009293925e-07, "loss": 1.4502, "step": 22150 }, { "epoch": 0.9849273042550354, "grad_norm": 0.05781065300107002, "learning_rate": 6.102558625838417e-07, "loss": 1.4416, "step": 22152 }, { "epoch": 0.9850162287137078, "grad_norm": 0.057693760842084885, "learning_rate": 6.030567142744081e-07, "loss": 1.4452, "step": 22154 }, { "epoch": 0.98510515317238, "grad_norm": 0.05738704651594162, "learning_rate": 5.959002566164328e-07, "loss": 1.4488, "step": 22156 }, { "epoch": 0.9851940776310524, "grad_norm": 0.05740581080317497, "learning_rate": 5.887864902217044e-07, "loss": 1.4462, "step": 22158 }, { "epoch": 0.9852830020897247, "grad_norm": 0.057811297476291656, "learning_rate": 5.817154156983473e-07, "loss": 1.4463, "step": 22160 }, { "epoch": 0.9853719265483971, "grad_norm": 0.0571342296898365, "learning_rate": 5.746870336508225e-07, "loss": 1.4455, "step": 22162 }, { "epoch": 0.9854608510070695, "grad_norm": 0.05744681507349014, "learning_rate": 5.677013446799828e-07, "loss": 1.4499, "step": 22164 }, { "epoch": 0.9855497754657419, "grad_norm": 0.05767764896154404, "learning_rate": 5.60758349382906e-07, "loss": 1.4425, "step": 22166 }, { "epoch": 0.9856386999244142, "grad_norm": 0.05785730853676796, "learning_rate": 5.53858048353173e-07, "loss": 1.4449, "step": 22168 }, { "epoch": 0.9857276243830866, "grad_norm": 0.058226075023412704, "learning_rate": 5.470004421806452e-07, "loss": 1.4434, "step": 22170 }, { "epoch": 0.985816548841759, "grad_norm": 0.057991281151771545, "learning_rate": 5.401855314515758e-07, "loss": 1.445, "step": 22172 }, { "epoch": 0.9859054733004313, "grad_norm": 0.05784721300005913, "learning_rate": 5.334133167484434e-07, "loss": 1.448, "step": 22174 }, { "epoch": 0.9859943977591037, "grad_norm": 0.05827952176332474, "learning_rate": 5.266837986502294e-07, "loss": 1.4506, "step": 22176 }, { "epoch": 0.986083322217776, "grad_norm": 0.05748755857348442, "learning_rate": 5.199969777321955e-07, "loss": 1.4483, "step": 22178 }, { "epoch": 0.9861722466764483, "grad_norm": 0.057960059493780136, "learning_rate": 5.13352854565996e-07, "loss": 1.4474, "step": 22180 }, { "epoch": 0.9862611711351207, "grad_norm": 0.05742307007312775, "learning_rate": 5.067514297195098e-07, "loss": 1.4512, "step": 22182 }, { "epoch": 0.986350095593793, "grad_norm": 0.05780716612935066, "learning_rate": 5.001927037571186e-07, "loss": 1.4459, "step": 22184 }, { "epoch": 0.9864390200524654, "grad_norm": 0.0577070526778698, "learning_rate": 4.936766772394851e-07, "loss": 1.4409, "step": 22186 }, { "epoch": 0.9865279445111378, "grad_norm": 0.057526715099811554, "learning_rate": 4.872033507236084e-07, "loss": 1.4471, "step": 22188 }, { "epoch": 0.9866168689698102, "grad_norm": 0.05801134556531906, "learning_rate": 4.80772724762879e-07, "loss": 1.4386, "step": 22190 }, { "epoch": 0.9867057934284825, "grad_norm": 0.05754433944821358, "learning_rate": 4.743847999070239e-07, "loss": 1.4458, "step": 22192 }, { "epoch": 0.9867947178871549, "grad_norm": 0.05766000971198082, "learning_rate": 4.680395767021062e-07, "loss": 1.4399, "step": 22194 }, { "epoch": 0.9868836423458273, "grad_norm": 0.0569644533097744, "learning_rate": 4.617370556904699e-07, "loss": 1.4417, "step": 22196 }, { "epoch": 0.9869725668044996, "grad_norm": 0.05827400088310242, "learning_rate": 4.554772374110172e-07, "loss": 1.4557, "step": 22198 }, { "epoch": 0.9870614912631719, "grad_norm": 0.05730379745364189, "learning_rate": 4.4926012239870916e-07, "loss": 1.4435, "step": 22200 }, { "epoch": 0.9871504157218443, "grad_norm": 0.05722169205546379, "learning_rate": 4.4308571118517605e-07, "loss": 1.4427, "step": 22202 }, { "epoch": 0.9872393401805166, "grad_norm": 0.05816062167286873, "learning_rate": 4.369540042981068e-07, "loss": 1.4448, "step": 22204 }, { "epoch": 0.987328264639189, "grad_norm": 0.05683821812272072, "learning_rate": 4.3086500226169334e-07, "loss": 1.445, "step": 22206 }, { "epoch": 0.9874171890978614, "grad_norm": 0.05709170922636986, "learning_rate": 4.248187055965191e-07, "loss": 1.4453, "step": 22208 }, { "epoch": 0.9875061135565337, "grad_norm": 0.05717954784631729, "learning_rate": 4.1881511481939304e-07, "loss": 1.4421, "step": 22210 }, { "epoch": 0.9875950380152061, "grad_norm": 0.05784517154097557, "learning_rate": 4.1285423044351565e-07, "loss": 1.4496, "step": 22212 }, { "epoch": 0.9876839624738785, "grad_norm": 0.057573914527893066, "learning_rate": 4.0693605297842384e-07, "loss": 1.4458, "step": 22214 }, { "epoch": 0.9877728869325508, "grad_norm": 0.056905996054410934, "learning_rate": 4.0106058293015723e-07, "loss": 1.4495, "step": 22216 }, { "epoch": 0.9878618113912232, "grad_norm": 0.05849708244204521, "learning_rate": 3.952278208008697e-07, "loss": 1.4523, "step": 22218 }, { "epoch": 0.9879507358498956, "grad_norm": 0.0573749914765358, "learning_rate": 3.8943776708916247e-07, "loss": 1.4409, "step": 22220 }, { "epoch": 0.9880396603085678, "grad_norm": 0.05719359219074249, "learning_rate": 3.836904222900284e-07, "loss": 1.4437, "step": 22222 }, { "epoch": 0.9881285847672402, "grad_norm": 0.058678288012742996, "learning_rate": 3.779857868947967e-07, "loss": 1.4416, "step": 22224 }, { "epoch": 0.9882175092259126, "grad_norm": 0.058080919086933136, "learning_rate": 3.723238613910773e-07, "loss": 1.4414, "step": 22226 }, { "epoch": 0.9883064336845849, "grad_norm": 0.05763470381498337, "learning_rate": 3.6670464626292754e-07, "loss": 1.4479, "step": 22228 }, { "epoch": 0.9883953581432573, "grad_norm": 0.0569768100976944, "learning_rate": 3.611281419906853e-07, "loss": 1.4533, "step": 22230 }, { "epoch": 0.9884842826019297, "grad_norm": 0.05751827359199524, "learning_rate": 3.5559434905102497e-07, "loss": 1.4468, "step": 22232 }, { "epoch": 0.988573207060602, "grad_norm": 0.057300738990306854, "learning_rate": 3.501032679170124e-07, "loss": 1.4462, "step": 22234 }, { "epoch": 0.9886621315192744, "grad_norm": 0.057894255965948105, "learning_rate": 3.4465489905810555e-07, "loss": 1.444, "step": 22236 }, { "epoch": 0.9887510559779468, "grad_norm": 0.05746915191411972, "learning_rate": 3.392492429399874e-07, "loss": 1.4461, "step": 22238 }, { "epoch": 0.9888399804366191, "grad_norm": 0.05837760120630264, "learning_rate": 3.3388630002473273e-07, "loss": 1.4482, "step": 22240 }, { "epoch": 0.9889289048952915, "grad_norm": 0.05805863440036774, "learning_rate": 3.2856607077086374e-07, "loss": 1.4444, "step": 22242 }, { "epoch": 0.9890178293539638, "grad_norm": 0.056360069662332535, "learning_rate": 3.2328855563318326e-07, "loss": 1.4449, "step": 22244 }, { "epoch": 0.9891067538126361, "grad_norm": 0.05757332220673561, "learning_rate": 3.180537550627749e-07, "loss": 1.4413, "step": 22246 }, { "epoch": 0.9891956782713085, "grad_norm": 0.05767189711332321, "learning_rate": 3.1286166950711405e-07, "loss": 1.4476, "step": 22248 }, { "epoch": 0.9892846027299809, "grad_norm": 0.058686595410108566, "learning_rate": 3.0771229941012337e-07, "loss": 1.446, "step": 22250 }, { "epoch": 0.9893735271886532, "grad_norm": 0.05774107575416565, "learning_rate": 3.026056452119508e-07, "loss": 1.4448, "step": 22252 }, { "epoch": 0.9894624516473256, "grad_norm": 0.05755819380283356, "learning_rate": 2.975417073491915e-07, "loss": 1.4493, "step": 22254 }, { "epoch": 0.989551376105998, "grad_norm": 0.057851407676935196, "learning_rate": 2.9252048625461047e-07, "loss": 1.4421, "step": 22256 }, { "epoch": 0.9896403005646703, "grad_norm": 0.05904214829206467, "learning_rate": 2.8754198235758643e-07, "loss": 1.4491, "step": 22258 }, { "epoch": 0.9897292250233427, "grad_norm": 0.0583941750228405, "learning_rate": 2.826061960836124e-07, "loss": 1.4429, "step": 22260 }, { "epoch": 0.9898181494820151, "grad_norm": 0.05874110758304596, "learning_rate": 2.7771312785462853e-07, "loss": 1.4454, "step": 22262 }, { "epoch": 0.9899070739406873, "grad_norm": 0.05823364108800888, "learning_rate": 2.7286277808891145e-07, "loss": 1.45, "step": 22264 }, { "epoch": 0.9899959983993597, "grad_norm": 0.056944265961647034, "learning_rate": 2.6805514720112946e-07, "loss": 1.4472, "step": 22266 }, { "epoch": 0.9900849228580321, "grad_norm": 0.05789085850119591, "learning_rate": 2.632902356022315e-07, "loss": 1.4496, "step": 22268 }, { "epoch": 0.9901738473167044, "grad_norm": 0.058085713535547256, "learning_rate": 2.5856804369955854e-07, "loss": 1.4492, "step": 22270 }, { "epoch": 0.9902627717753768, "grad_norm": 0.05807514116168022, "learning_rate": 2.538885718967876e-07, "loss": 1.4439, "step": 22272 }, { "epoch": 0.9903516962340492, "grad_norm": 0.057060327380895615, "learning_rate": 2.49251820593932e-07, "loss": 1.4408, "step": 22274 }, { "epoch": 0.9904406206927215, "grad_norm": 0.057622525840997696, "learning_rate": 2.44657790187397e-07, "loss": 1.4477, "step": 22276 }, { "epoch": 0.9905295451513939, "grad_norm": 0.056390151381492615, "learning_rate": 2.401064810698128e-07, "loss": 1.4408, "step": 22278 }, { "epoch": 0.9906184696100663, "grad_norm": 0.05661400407552719, "learning_rate": 2.355978936303127e-07, "loss": 1.4452, "step": 22280 }, { "epoch": 0.9907073940687386, "grad_norm": 0.057079821825027466, "learning_rate": 2.3113202825425505e-07, "loss": 1.4395, "step": 22282 }, { "epoch": 0.990796318527411, "grad_norm": 0.05696665868163109, "learning_rate": 2.267088853235011e-07, "loss": 1.447, "step": 22284 }, { "epoch": 0.9908852429860833, "grad_norm": 0.056844066828489304, "learning_rate": 2.2232846521608175e-07, "loss": 1.4413, "step": 22286 }, { "epoch": 0.9909741674447556, "grad_norm": 0.05780512094497681, "learning_rate": 2.1799076830647525e-07, "loss": 1.4417, "step": 22288 }, { "epoch": 0.991063091903428, "grad_norm": 0.05735721439123154, "learning_rate": 2.1369579496549607e-07, "loss": 1.4478, "step": 22290 }, { "epoch": 0.9911520163621004, "grad_norm": 0.05718429014086723, "learning_rate": 2.0944354556023947e-07, "loss": 1.4469, "step": 22292 }, { "epoch": 0.9912409408207727, "grad_norm": 0.05797044187784195, "learning_rate": 2.0523402045430352e-07, "loss": 1.4446, "step": 22294 }, { "epoch": 0.9913298652794451, "grad_norm": 0.05726039782166481, "learning_rate": 2.0106722000745592e-07, "loss": 1.4418, "step": 22296 }, { "epoch": 0.9914187897381175, "grad_norm": 0.05672242492437363, "learning_rate": 1.969431445759673e-07, "loss": 1.4446, "step": 22298 }, { "epoch": 0.9915077141967898, "grad_norm": 0.057257115840911865, "learning_rate": 1.9286179451227792e-07, "loss": 1.448, "step": 22300 }, { "epoch": 0.9915966386554622, "grad_norm": 0.05845504254102707, "learning_rate": 1.8882317016538642e-07, "loss": 1.4445, "step": 22302 }, { "epoch": 0.9916855631141346, "grad_norm": 0.058910876512527466, "learning_rate": 1.8482727188046112e-07, "loss": 1.4505, "step": 22304 }, { "epoch": 0.991774487572807, "grad_norm": 0.05711057409644127, "learning_rate": 1.8087409999911763e-07, "loss": 1.4427, "step": 22306 }, { "epoch": 0.9918634120314792, "grad_norm": 0.05767577514052391, "learning_rate": 1.7696365485930787e-07, "loss": 1.4478, "step": 22308 }, { "epoch": 0.9919523364901516, "grad_norm": 0.057249389588832855, "learning_rate": 1.7309593679526447e-07, "loss": 1.4401, "step": 22310 }, { "epoch": 0.9920412609488239, "grad_norm": 0.05789843201637268, "learning_rate": 1.6927094613766736e-07, "loss": 1.4432, "step": 22312 }, { "epoch": 0.9921301854074963, "grad_norm": 0.05697624757885933, "learning_rate": 1.654886832134772e-07, "loss": 1.4426, "step": 22314 }, { "epoch": 0.9922191098661687, "grad_norm": 0.058805204927921295, "learning_rate": 1.6174914834599097e-07, "loss": 1.4463, "step": 22316 }, { "epoch": 0.992308034324841, "grad_norm": 0.05726942420005798, "learning_rate": 1.5805234185495287e-07, "loss": 1.4472, "step": 22318 }, { "epoch": 0.9923969587835134, "grad_norm": 0.05799218639731407, "learning_rate": 1.5439826405627688e-07, "loss": 1.4422, "step": 22320 }, { "epoch": 0.9924858832421858, "grad_norm": 0.05797122046351433, "learning_rate": 1.5078691526243526e-07, "loss": 1.4429, "step": 22322 }, { "epoch": 0.9925748077008582, "grad_norm": 0.057014547288417816, "learning_rate": 1.4721829578207002e-07, "loss": 1.4429, "step": 22324 }, { "epoch": 0.9926637321595305, "grad_norm": 0.05727580189704895, "learning_rate": 1.4369240592021493e-07, "loss": 1.4485, "step": 22326 }, { "epoch": 0.9927526566182029, "grad_norm": 0.05705159902572632, "learning_rate": 1.4020924597840656e-07, "loss": 1.4497, "step": 22328 }, { "epoch": 0.9928415810768751, "grad_norm": 0.05742578208446503, "learning_rate": 1.3676881625424021e-07, "loss": 1.4469, "step": 22330 }, { "epoch": 0.9929305055355475, "grad_norm": 0.058664653450250626, "learning_rate": 1.3337111704198047e-07, "loss": 1.4475, "step": 22332 }, { "epoch": 0.9930194299942199, "grad_norm": 0.05729004368185997, "learning_rate": 1.3001614863195066e-07, "loss": 1.4456, "step": 22334 }, { "epoch": 0.9931083544528923, "grad_norm": 0.05801570415496826, "learning_rate": 1.2670391131103242e-07, "loss": 1.4544, "step": 22336 }, { "epoch": 0.9931972789115646, "grad_norm": 0.05761876702308655, "learning_rate": 1.2343440536227714e-07, "loss": 1.4461, "step": 22338 }, { "epoch": 0.993286203370237, "grad_norm": 0.05685674026608467, "learning_rate": 1.2020763106529443e-07, "loss": 1.4403, "step": 22340 }, { "epoch": 0.9933751278289094, "grad_norm": 0.05733815208077431, "learning_rate": 1.170235886958082e-07, "loss": 1.4474, "step": 22342 }, { "epoch": 0.9934640522875817, "grad_norm": 0.057545486837625504, "learning_rate": 1.1388227852610066e-07, "loss": 1.445, "step": 22344 }, { "epoch": 0.9935529767462541, "grad_norm": 0.0574163943529129, "learning_rate": 1.1078370082467926e-07, "loss": 1.4422, "step": 22346 }, { "epoch": 0.9936419012049265, "grad_norm": 0.05813299119472504, "learning_rate": 1.0772785585633215e-07, "loss": 1.4507, "step": 22348 }, { "epoch": 0.9937308256635988, "grad_norm": 0.05734492093324661, "learning_rate": 1.0471474388240587e-07, "loss": 1.4422, "step": 22350 }, { "epoch": 0.9938197501222711, "grad_norm": 0.05803303048014641, "learning_rate": 1.0174436516047214e-07, "loss": 1.4482, "step": 22352 }, { "epoch": 0.9939086745809435, "grad_norm": 0.058048054575920105, "learning_rate": 9.881671994432795e-08, "loss": 1.4479, "step": 22354 }, { "epoch": 0.9939975990396158, "grad_norm": 0.057730190455913544, "learning_rate": 9.59318084843841e-08, "loss": 1.4538, "step": 22356 }, { "epoch": 0.9940865234982882, "grad_norm": 0.05775808170437813, "learning_rate": 9.308963102716561e-08, "loss": 1.442, "step": 22358 }, { "epoch": 0.9941754479569606, "grad_norm": 0.05732406675815582, "learning_rate": 9.029018781570031e-08, "loss": 1.447, "step": 22360 }, { "epoch": 0.9942643724156329, "grad_norm": 0.05907835438847542, "learning_rate": 8.753347908924126e-08, "loss": 1.4477, "step": 22362 }, { "epoch": 0.9943532968743053, "grad_norm": 0.05664451792836189, "learning_rate": 8.481950508343328e-08, "loss": 1.4409, "step": 22364 }, { "epoch": 0.9944422213329777, "grad_norm": 0.05807731673121452, "learning_rate": 8.214826603031301e-08, "loss": 1.4443, "step": 22366 }, { "epoch": 0.99453114579165, "grad_norm": 0.0581711083650589, "learning_rate": 7.951976215825329e-08, "loss": 1.4511, "step": 22368 }, { "epoch": 0.9946200702503224, "grad_norm": 0.057496510446071625, "learning_rate": 7.693399369190779e-08, "loss": 1.4406, "step": 22370 }, { "epoch": 0.9947089947089947, "grad_norm": 0.05667787045240402, "learning_rate": 7.43909608523774e-08, "loss": 1.4445, "step": 22372 }, { "epoch": 0.994797919167667, "grad_norm": 0.05935773253440857, "learning_rate": 7.189066385693276e-08, "loss": 1.4474, "step": 22374 }, { "epoch": 0.9948868436263394, "grad_norm": 0.05779668688774109, "learning_rate": 6.943310291945836e-08, "loss": 1.4459, "step": 22376 }, { "epoch": 0.9949757680850118, "grad_norm": 0.056512489914894104, "learning_rate": 6.701827824989737e-08, "loss": 1.4443, "step": 22378 }, { "epoch": 0.9950646925436841, "grad_norm": 0.058668848127126694, "learning_rate": 6.464619005480676e-08, "loss": 1.4499, "step": 22380 }, { "epoch": 0.9951536170023565, "grad_norm": 0.05988642945885658, "learning_rate": 6.231683853685776e-08, "loss": 1.4459, "step": 22382 }, { "epoch": 0.9952425414610289, "grad_norm": 0.057425398379564285, "learning_rate": 6.003022389522439e-08, "loss": 1.4456, "step": 22384 }, { "epoch": 0.9953314659197012, "grad_norm": 0.05751774087548256, "learning_rate": 5.778634632536139e-08, "loss": 1.4431, "step": 22386 }, { "epoch": 0.9954203903783736, "grad_norm": 0.05865611135959625, "learning_rate": 5.558520601917083e-08, "loss": 1.4444, "step": 22388 }, { "epoch": 0.995509314837046, "grad_norm": 0.05733845382928848, "learning_rate": 5.342680316466897e-08, "loss": 1.4468, "step": 22390 }, { "epoch": 0.9955982392957183, "grad_norm": 0.059069231152534485, "learning_rate": 5.131113794643039e-08, "loss": 1.4488, "step": 22392 }, { "epoch": 0.9956871637543906, "grad_norm": 0.05756256729364395, "learning_rate": 4.923821054536592e-08, "loss": 1.4397, "step": 22394 }, { "epoch": 0.995776088213063, "grad_norm": 0.057630039751529694, "learning_rate": 4.7208021138611666e-08, "loss": 1.4437, "step": 22396 }, { "epoch": 0.9958650126717353, "grad_norm": 0.057779572904109955, "learning_rate": 4.5220569899750984e-08, "loss": 1.4464, "step": 22398 }, { "epoch": 0.9959539371304077, "grad_norm": 0.05961634963750839, "learning_rate": 4.327585699859249e-08, "loss": 1.4437, "step": 22400 }, { "epoch": 0.9960428615890801, "grad_norm": 0.05674600601196289, "learning_rate": 4.137388260155861e-08, "loss": 1.4464, "step": 22402 }, { "epoch": 0.9961317860477524, "grad_norm": 0.056886181235313416, "learning_rate": 3.951464687107498e-08, "loss": 1.445, "step": 22404 }, { "epoch": 0.9962207105064248, "grad_norm": 0.0578632578253746, "learning_rate": 3.769814996612553e-08, "loss": 1.4484, "step": 22406 }, { "epoch": 0.9963096349650972, "grad_norm": 0.057221461087465286, "learning_rate": 3.592439204197495e-08, "loss": 1.445, "step": 22408 }, { "epoch": 0.9963985594237695, "grad_norm": 0.058185044676065445, "learning_rate": 3.4193373250335224e-08, "loss": 1.4412, "step": 22410 }, { "epoch": 0.9964874838824419, "grad_norm": 0.05843387171626091, "learning_rate": 3.250509373908805e-08, "loss": 1.4423, "step": 22412 }, { "epoch": 0.9965764083411143, "grad_norm": 0.05737845599651337, "learning_rate": 3.085955365261794e-08, "loss": 1.4465, "step": 22414 }, { "epoch": 0.9966653327997865, "grad_norm": 0.05840914696455002, "learning_rate": 2.925675313159015e-08, "loss": 1.4438, "step": 22416 }, { "epoch": 0.9967542572584589, "grad_norm": 0.05688779801130295, "learning_rate": 2.769669231295069e-08, "loss": 1.4438, "step": 22418 }, { "epoch": 0.9968431817171313, "grad_norm": 0.05774452164769173, "learning_rate": 2.617937133009285e-08, "loss": 1.4466, "step": 22420 }, { "epoch": 0.9969321061758036, "grad_norm": 0.05797778069972992, "learning_rate": 2.4704790312746194e-08, "loss": 1.4443, "step": 22422 }, { "epoch": 0.997021030634476, "grad_norm": 0.057229943573474884, "learning_rate": 2.327294938697655e-08, "loss": 1.4496, "step": 22424 }, { "epoch": 0.9971099550931484, "grad_norm": 0.05771101638674736, "learning_rate": 2.188384867513049e-08, "loss": 1.4412, "step": 22426 }, { "epoch": 0.9971988795518207, "grad_norm": 0.057632721960544586, "learning_rate": 2.053748829600188e-08, "loss": 1.4515, "step": 22428 }, { "epoch": 0.9972878040104931, "grad_norm": 0.05861422419548035, "learning_rate": 1.9233868364665342e-08, "loss": 1.4463, "step": 22430 }, { "epoch": 0.9973767284691655, "grad_norm": 0.05753085017204285, "learning_rate": 1.797298899258726e-08, "loss": 1.4453, "step": 22432 }, { "epoch": 0.9974656529278378, "grad_norm": 0.057691846042871475, "learning_rate": 1.6754850287459267e-08, "loss": 1.4435, "step": 22434 }, { "epoch": 0.9975545773865102, "grad_norm": 0.05841954052448273, "learning_rate": 1.5579452353531308e-08, "loss": 1.4526, "step": 22436 }, { "epoch": 0.9976435018451825, "grad_norm": 0.05724509432911873, "learning_rate": 1.4446795291223059e-08, "loss": 1.4428, "step": 22438 }, { "epoch": 0.9977324263038548, "grad_norm": 0.05789647623896599, "learning_rate": 1.3356879197401473e-08, "loss": 1.445, "step": 22440 }, { "epoch": 0.9978213507625272, "grad_norm": 0.058276426047086716, "learning_rate": 1.2309704165158753e-08, "loss": 1.4454, "step": 22442 }, { "epoch": 0.9979102752211996, "grad_norm": 0.057980820536613464, "learning_rate": 1.1305270284089897e-08, "loss": 1.4448, "step": 22444 }, { "epoch": 0.9979991996798719, "grad_norm": 0.05673612281680107, "learning_rate": 1.0343577639959634e-08, "loss": 1.446, "step": 22446 }, { "epoch": 0.9980881241385443, "grad_norm": 0.05783254653215408, "learning_rate": 9.424626315091e-09, "loss": 1.4437, "step": 22448 }, { "epoch": 0.9981770485972167, "grad_norm": 0.058315496891736984, "learning_rate": 8.548416388032277e-09, "loss": 1.4446, "step": 22450 }, { "epoch": 0.998265973055889, "grad_norm": 0.05717792361974716, "learning_rate": 7.714947933556982e-09, "loss": 1.4445, "step": 22452 }, { "epoch": 0.9983548975145614, "grad_norm": 0.05846971645951271, "learning_rate": 6.924221023052457e-09, "loss": 1.4477, "step": 22454 }, { "epoch": 0.9984438219732338, "grad_norm": 0.05766315013170242, "learning_rate": 6.176235724075774e-09, "loss": 1.4465, "step": 22456 }, { "epoch": 0.9985327464319061, "grad_norm": 0.058049630373716354, "learning_rate": 5.470992100520267e-09, "loss": 1.4475, "step": 22458 }, { "epoch": 0.9986216708905784, "grad_norm": 0.057937148958444595, "learning_rate": 4.808490212726557e-09, "loss": 1.4404, "step": 22460 }, { "epoch": 0.9987105953492508, "grad_norm": 0.05762643739581108, "learning_rate": 4.18873011731602e-09, "loss": 1.446, "step": 22462 }, { "epoch": 0.9987995198079231, "grad_norm": 0.05884932354092598, "learning_rate": 3.611711867246292e-09, "loss": 1.4505, "step": 22464 }, { "epoch": 0.9988884442665955, "grad_norm": 0.05755854770541191, "learning_rate": 3.0774355119222996e-09, "loss": 1.4517, "step": 22466 }, { "epoch": 0.9989773687252679, "grad_norm": 0.05807171016931534, "learning_rate": 2.585901096863186e-09, "loss": 1.4417, "step": 22468 }, { "epoch": 0.9990662931839402, "grad_norm": 0.057942621409893036, "learning_rate": 2.1371086642574255e-09, "loss": 1.4481, "step": 22470 }, { "epoch": 0.9991552176426126, "grad_norm": 0.05796192213892937, "learning_rate": 1.7310582523522023e-09, "loss": 1.4428, "step": 22472 }, { "epoch": 0.999244142101285, "grad_norm": 0.05767205357551575, "learning_rate": 1.3677498958974966e-09, "loss": 1.448, "step": 22474 }, { "epoch": 0.9993330665599574, "grad_norm": 0.05703622102737427, "learning_rate": 1.047183625924042e-09, "loss": 1.4464, "step": 22476 }, { "epoch": 0.9994219910186297, "grad_norm": 0.05870012938976288, "learning_rate": 7.693594699098582e-10, "loss": 1.4521, "step": 22478 }, { "epoch": 0.9995109154773021, "grad_norm": 0.057245586067438126, "learning_rate": 5.342774515026961e-10, "loss": 1.4448, "step": 22480 }, { "epoch": 0.9995998399359743, "grad_norm": 0.05662744492292404, "learning_rate": 3.419375909086142e-10, "loss": 1.4334, "step": 22482 }, { "epoch": 0.9996887643946467, "grad_norm": 0.05766800418496132, "learning_rate": 1.923399044478913e-10, "loss": 1.4482, "step": 22484 }, { "epoch": 0.9997776888533191, "grad_norm": 0.058209143579006195, "learning_rate": 8.548440505462551e-11, "loss": 1.4435, "step": 22486 }, { "epoch": 0.9998666133119914, "grad_norm": 0.05729079991579056, "learning_rate": 2.1371101721623376e-11, "loss": 1.4459, "step": 22488 }, { "epoch": 0.9999555377706638, "grad_norm": 0.057013168931007385, "learning_rate": 0.0, "loss": 1.4473, "step": 22490 } ], "logging_steps": 2, "max_steps": 22491, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.812454399322358e+19, "train_batch_size": 768, "trial_name": null, "trial_params": null }